aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Android.bp2
-rw-r--r--CMakeLists.txt109
-rw-r--r--Dockerfile56
-rw-r--r--Makefile24
-rw-r--r--README.md6
-rw-r--r--abigail_reader.cc133
-rw-r--r--abigail_reader.h2
-rw-r--r--abigail_reader_test.cc5
-rw-r--r--btf_reader.cc10
-rw-r--r--cmake/FindJemalloc.cmake106
-rw-r--r--cmake/FindLibDw.cmake105
-rw-r--r--cmake/FindLibElf.cmake105
-rw-r--r--comparison.cc41
-rw-r--r--comparison.h10
-rw-r--r--doc/stg.md43
-rw-r--r--doc/stgdiff.md19
-rw-r--r--dwarf_processor.cc280
-rw-r--r--dwarf_processor.h8
-rw-r--r--dwarf_wrappers.cc187
-rw-r--r--dwarf_wrappers.h36
-rw-r--r--elf_loader.cc123
-rw-r--r--elf_loader.h3
-rw-r--r--elf_reader.cc130
-rw-r--r--elf_reader.h8
-rw-r--r--elf_reader_test.cc30
-rw-r--r--equality.h9
-rw-r--r--fidelity.cc72
-rw-r--r--fidelity.h9
-rw-r--r--filter.cc (renamed from symbol_filter.cc)151
-rw-r--r--filter.h (renamed from symbol_filter.h)26
-rw-r--r--filter_test.cc (renamed from symbol_filter_test.cc)6
-rw-r--r--fingerprint.cc15
-rw-r--r--fuzz/elf_reader_fuzzer.cc3
-rw-r--r--graph.cc11
-rw-r--r--graph.h90
-rw-r--r--hashing.h2
-rw-r--r--input.cc8
-rw-r--r--input.h6
-rw-r--r--naming.cc17
-rw-r--r--naming.h3
-rw-r--r--proto_reader.cc50
-rw-r--r--proto_reader.h6
-rw-r--r--proto_writer.cc52
-rw-r--r--proto_writer.h1
-rw-r--r--reporting.cc12
-rw-r--r--reporting.h4
-rw-r--r--reporting_test.cc19
-rw-r--r--stable_hash.cc17
-rw-r--r--stable_hash.h3
-rw-r--r--stg.cc53
-rw-r--r--stg.proto57
-rw-r--r--stgdiff.cc13
-rw-r--r--stgdiff_test.cc53
-rw-r--r--stginfo.cc3
-rw-r--r--substitution.h4
-rw-r--r--testdata/abigail_bad_elf_dwarf_link_0.xml26
-rw-r--r--testdata/abigail_bad_elf_dwarf_link_1.xml26
-rw-r--r--testdata/abigail_bad_elf_dwarf_link_2.xml26
-rw-r--r--testdata/abigail_duplicate_data_members_1.xml3
-rw-r--r--testdata/crc_change_0.stg2
-rw-r--r--testdata/crc_change_1.stg2
-rw-r--r--testdata/enum_underlying_type_0.stg16
-rw-r--r--testdata/enum_underlying_type_1.stg16
-rw-r--r--testdata/fidelity_diff_0.stg72
-rw-r--r--testdata/fidelity_diff_1.stg74
-rw-r--r--testdata/fidelity_diff_report2
-rw-r--r--testdata/interface_addition_0.stg2
-rw-r--r--testdata/interface_addition_1.stg2
-rw-r--r--testdata/member_size_0.stg12
-rw-r--r--testdata/member_size_1.stg20
-rw-r--r--testdata/primitive_type_encoding_0.stg16
-rw-r--r--testdata/primitive_type_encoding_1.stg16
-rw-r--r--testdata/qualifier_0.stg12
-rw-r--r--testdata/qualifier_1.stg16
-rw-r--r--testdata/symbol_list2
-rw-r--r--testdata/type_addition_0.stg2
-rw-r--r--testdata/type_addition_1.stg6
-rw-r--r--testdata/type_addition_2.stg13
-rw-r--r--testdata/type_definition_addition_small_diff3
-rw-r--r--type_normalisation.cc4
-rw-r--r--type_resolution.cc10
-rw-r--r--unification.cc10
-rw-r--r--unification.h12
83 files changed, 1899 insertions, 880 deletions
diff --git a/Android.bp b/Android.bp
index f2ad130..3b17599 100644
--- a/Android.bp
+++ b/Android.bp
@@ -76,6 +76,7 @@ cc_library_host_static {
"elf_reader.cc",
"fidelity.cc",
"file_descriptor.cc",
+ "filter.cc",
"fingerprint.cc",
"graph.cc",
"input.cc",
@@ -86,7 +87,6 @@ cc_library_host_static {
"proto_writer.cc",
"reporting.cc",
"stable_hash.cc",
- "symbol_filter.cc",
"stg.proto",
"type_normalisation.cc",
"type_resolution.cc",
diff --git a/CMakeLists.txt b/CMakeLists.txt
new file mode 100644
index 0000000..38410d0
--- /dev/null
+++ b/CMakeLists.txt
@@ -0,0 +1,109 @@
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+#
+# Copyright 2023 Google LLC
+#
+# Licensed under the Apache License v2.0 with LLVM Exceptions (the
+# "License"); you may not use this file except in compliance with the
+# License. You may obtain a copy of the License at
+#
+# https://llvm.org/LICENSE.txt
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# Author: Aleksei Vetrov
+
+cmake_minimum_required(VERSION 3.14)
+
+project(
+ stg
+ VERSION 0.0.1
+ LANGUAGES CXX)
+
+set(CMAKE_CXX_STANDARD 17)
+
+add_compile_options(-fstrict-enums -Wall -Wextra)
+
+if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
+ # GCC has problems detecting "no-return" switches and destructors.
+ add_compile_options(-Wno-return-type)
+endif()
+
+# Enable LTO for release builds
+set(CMAKE_INTERPROCEDURAL_OPTIMIZATION_RELEASE TRUE)
+set(CMAKE_INTERPROCEDURAL_OPTIMIZATION_RELWITHDEBINFO TRUE)
+
+list(APPEND CMAKE_MODULE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/cmake)
+
+find_package(LibElf REQUIRED)
+find_package(LibDw REQUIRED)
+find_package(LibXml2 REQUIRED)
+find_package(Protobuf REQUIRED)
+
+set(COMMON_LIBRARIES
+ LibElf::LibElf
+ LibDw::LibDw
+ LibXml2::LibXml2
+ protobuf::libprotobuf)
+
+if(NOT Jemalloc_DISABLE)
+ find_package(Jemalloc)
+ if(Jemalloc_FOUND)
+ list(APPEND COMMON_LIBRARIES Jemalloc::Jemalloc)
+ else()
+ message(WARNING "jemalloc significantly improves performance, but is not functionally required to build STG.
+Use -DJemalloc_DISABLE=TRUE to disable jemalloc and suppress this warning.")
+ endif()
+endif()
+
+protobuf_generate_cpp(PROTO_SRCS PROTO_HDRS stg.proto)
+
+add_library(libstg OBJECT
+ abigail_reader.cc
+ btf_reader.cc
+ comparison.cc
+ deduplication.cc
+ dwarf_processor.cc
+ dwarf_wrappers.cc
+ elf_loader.cc
+ elf_reader.cc
+ fidelity.cc
+ file_descriptor.cc
+ filter.cc
+ fingerprint.cc
+ graph.cc
+ input.cc
+ metrics.cc
+ naming.cc
+ post_processing.cc
+ proto_reader.cc
+ proto_writer.cc
+ reporting.cc
+ stable_hash.cc
+ type_normalisation.cc
+ type_resolution.cc
+ unification.cc
+ ${PROTO_SRCS}
+ ${PROTO_HDRS})
+# Needed for generated .pb.h files
+target_include_directories(libstg PRIVATE ${CMAKE_CURRENT_BINARY_DIR})
+target_link_libraries(libstg PRIVATE ${COMMON_LIBRARIES})
+
+set(STG_EXECUTABLE_TARGETS stg stgdiff stginfo)
+
+foreach(TARGET IN LISTS STG_EXECUTABLE_TARGETS)
+ add_executable("${TARGET}" "${TARGET}.cc")
+ target_link_libraries("${TARGET}" PRIVATE libstg)
+endforeach()
+
+# Installation and packaging
+
+include(GNUInstallDirs)
+
+install(
+ TARGETS ${STG_EXECUTABLE_TARGETS}
+ COMPONENT Binaries
+ RUNTIME DESTINATION "${CMAKE_INSTALL_BINDIR}")
diff --git a/Dockerfile b/Dockerfile
index edc602c..ab60318 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -1,14 +1,54 @@
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+#
+# Copyright 2022-2023 Google LLC
+#
+# Licensed under the Apache License v2.0 with LLVM Exceptions (the "License");
+# you may not use this file except in compliance with the License. You may
+# obtain a copy of the License at
+#
+# https://llvm.org/LICENSE.txt
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations under
+# the License.
+#
+# Author: Vanessa Sochat
+# Author: Aleksei Vetrov
+
ARG debian_version=stable-slim
FROM debian:${debian_version} as builder
# docker build -t stg .
RUN apt-get update && \
- apt-get install -y build-essential \
- libelf-dev \
- linux-libc-dev \
- libxml2-dev
+ apt-get install -y \
+ build-essential \
+ pkg-config \
+ cmake \
+ libelf-dev \
+ libdw-dev \
+ libxml2-dev \
+ libprotobuf-dev \
+ protobuf-compiler \
+ libjemalloc-dev
WORKDIR /src
COPY . /src
-RUN make
-# uncomment for multistage build
-# FROM debian:${debian_version}
-# COPY --from=builder /src/stgdiff /usr/local/bin/stgdiff
+RUN mkdir -p build && \
+ cd build && \
+ cmake -DCMAKE_BUILD_TYPE=Release .. && \
+ cmake --build . --parallel && \
+ cmake --install . --strip
+# second stage
+FROM debian:${debian_version}
+RUN apt-get update && \
+ apt-get install -y \
+ libc6 \
+ libgcc-s1 \
+ libstdc++6 \
+ libdw1 \
+ libelf1 \
+ libjemalloc2 \
+ libprotobuf32 \
+ libxml2 && \
+ rm -rf /var/lib/apt/lists/*
+COPY --from=builder /usr/local /usr/local
diff --git a/Makefile b/Makefile
deleted file mode 100644
index 97e641b..0000000
--- a/Makefile
+++ /dev/null
@@ -1,24 +0,0 @@
-LDLIBS := -lelf -lxml2
-LDFLAGS :=
-CXXFLAGS := -fstrict-enums -Wall -Wextra -I/usr/include -I/usr/include/libxml2 -std=c++17
-
-SRCS := abigail_reader.cc btf_reader.cc elf_loader.cc elf_reader.cc \
- post_processing.cc reporting.cc stg.cc stgdiff.cc
-HDRS := abigail_reader.h btf_reader.h crc.h elf_loader.h elf_reader.h \
- error.h id.h order.h post_processing.h reporting.h scc.h stg.h
-
-OBJS := $(SRCS:.cc=.o)
-MAIN := stgdiff
-
-.PHONY: all
-
-all: $(MAIN)
-
-# Conservative header dependencies
-$(OBJS): $(HDRS)
-
-$(MAIN): $(OBJS)
- $(LINK.cc) $^ $(LDLIBS) -o $@
-
-clean:
- rm -f $(OBJS) $(MAIN)
diff --git a/README.md b/README.md
index 2bf1e68..3dbbe1c 100644
--- a/README.md
+++ b/README.md
@@ -23,10 +23,12 @@ Instructions are included for local and Docker builds.
### Local Build
-You can build as follows:
+STG can be built with CMake as follows:
```bash
-$ make
+$ mkdir build && cd build
+$ cmake ..
+$ cmake --build . --parallel
```
### Docker Build
diff --git a/abigail_reader.cc b/abigail_reader.cc
index 7082376..752af0e 100644
--- a/abigail_reader.cc
+++ b/abigail_reader.cc
@@ -43,6 +43,7 @@
#include <vector>
#include <libxml/parser.h>
+#include <libxml/tree.h>
#include "error.h"
#include "file_descriptor.h"
#include "graph.h"
@@ -118,8 +119,8 @@ std::string GetAttributeOrDie(xmlNodePtr node, const char* name) {
}
// Set an attribute value.
-void SetAttribute(xmlNodePtr node, const char* name, const char* value) {
- xmlSetProp(node, ToLibxml(name), ToLibxml(value));
+void SetAttribute(xmlNodePtr node, const char* name, const std::string &value) {
+ xmlSetProp(node, ToLibxml(name), ToLibxml(value.c_str()));
}
// Unset an attribute value.
@@ -463,6 +464,59 @@ void StripReachabilityAttributes(xmlNodePtr node) {
}
}
+// Fix bad DWARF -> ELF links caused by size zero symbol confusion.
+//
+// libabigail used to be confused by these sorts of symbols, resulting in
+// declarations pointing at the wrong ELF symbols:
+//
+// 573623: ffffffc0122383c0 256 OBJECT GLOBAL DEFAULT 33 vm_node_stat
+// 573960: ffffffc0122383c0 0 OBJECT GLOBAL DEFAULT 33 vm_numa_stat
+void FixBadDwarfElfLinks(xmlNodePtr root) {
+ std::unordered_map<std::string, size_t> elf_links;
+
+ // See which ELF symbol IDs have multiple declarations.
+ const std::function<void(xmlNodePtr)> count = [&](xmlNodePtr node) {
+ if (GetName(node) == "var-decl") {
+ const auto symbol_id = GetAttribute(node, "elf-symbol-id");
+ if (symbol_id) {
+ ++elf_links[symbol_id.value()];
+ }
+ }
+
+ for (auto* child = Child(node); child; child = Next(child)) {
+ count(child);
+ }
+ };
+ count(root);
+
+ // Fix up likely bad links from DWARF declaration to ELF symbol.
+ const std::function<void(xmlNodePtr)> fix = [&](xmlNodePtr node) {
+ if (GetName(node) == "var-decl") {
+ const auto name = GetAttributeOrDie(node, "name");
+ const auto mangled_name = GetAttribute(node, "mangled-name");
+ const auto symbol_id = GetAttribute(node, "elf-symbol-id");
+ if (mangled_name && symbol_id && name != symbol_id.value()
+ && elf_links[symbol_id.value()] > 1) {
+ if (mangled_name.value() == name) {
+ Warn() << "fixing up ELF symbol for '" << name
+ << "' (was '" << symbol_id.value() << "')";
+ SetAttribute(node, "elf-symbol-id", name);
+ } else if (mangled_name.value() == symbol_id.value()) {
+ Warn() << "fixing up mangled name and ELF symbol for '" << name
+ << "' (was '" << symbol_id.value() << "')";
+ SetAttribute(node, "mangled-name", name);
+ SetAttribute(node, "elf-symbol-id", name);
+ }
+ }
+ }
+
+ for (auto* child = Child(node); child; child = Next(child)) {
+ fix(child);
+ }
+ };
+ fix(root);
+}
+
// Tidy anonymous types in various ways.
//
// 1. Normalise anonymous type names by dropping the name attribute.
@@ -506,8 +560,8 @@ void TidyAnonymousTypes(xmlNodePtr node) {
}
}
-// Remove duplicate data members.
-void RemoveDuplicateDataMembers(xmlNodePtr root) {
+// Remove duplicate members.
+void RemoveDuplicateMembers(xmlNodePtr root) {
std::vector<xmlNodePtr> types;
// find all structs and unions
@@ -524,28 +578,28 @@ void RemoveDuplicateDataMembers(xmlNodePtr root) {
dfs(root);
for (const auto& node : types) {
- // filter data members
- std::vector<xmlNodePtr> data_members;
+ // partition members by node name
+ std::map<std::string_view, std::vector<xmlNodePtr>> member_map;
for (auto* child = Child(node); child; child = Next(child)) {
- if (GetName(child) == "data-member") {
- data_members.push_back(child);
- }
+ member_map[GetName(child)].push_back(child);
}
- // remove identical duplicate data members - O(n^2)
- for (size_t i = 0; i < data_members.size(); ++i) {
- xmlNodePtr& i_node = data_members[i];
- bool duplicate = false;
- for (size_t j = 0; j < i; ++j) {
- const xmlNodePtr& j_node = data_members[j];
- if (j_node != nullptr && EqualTree(i_node, j_node)) {
- duplicate = true;
- break;
+ // for each kind of member...
+ for (auto& [name, members] : member_map) {
+ // ... remove identical duplicate members - O(n^2)
+ for (size_t i = 0; i < members.size(); ++i) {
+ xmlNodePtr& i_node = members[i];
+ bool duplicate = false;
+ for (size_t j = 0; j < i; ++j) {
+ const xmlNodePtr& j_node = members[j];
+ if (j_node != nullptr && EqualTree(i_node, j_node)) {
+ duplicate = true;
+ break;
+ }
+ }
+ if (duplicate) {
+ RemoveNode(i_node);
+ i_node = nullptr;
}
- }
- if (duplicate) {
- Warn() << "found duplicate data-member";
- RemoveNode(i_node);
- i_node = nullptr;
}
}
}
@@ -702,13 +756,16 @@ namespace {
// Transform XML elements to improve their semantics.
void Tidy(xmlNodePtr root) {
+ // Fix bad ELF symbol links
+ FixBadDwarfElfLinks(root);
+
// Normalise anonymous type names.
// Reanonymise anonymous types.
// Discard naming typedef backlinks.
TidyAnonymousTypes(root);
- // Remove duplicate data members.
- RemoveDuplicateDataMembers(root);
+ // Remove duplicate members.
+ RemoveDuplicateMembers(root);
// Eliminate complete duplicates and extra fragments of types.
// Report conflicting duplicate defintions.
@@ -751,7 +808,7 @@ Id Abigail::GetEdge(xmlNodePtr element) {
Id Abigail::GetVariadic() {
if (!variadic_) {
- variadic_ = {graph_.Add<Variadic>()};
+ variadic_ = {graph_.Add<Special>(Special::Kind::VARIADIC)};
}
return *variadic_;
}
@@ -1033,7 +1090,7 @@ void Abigail::ProcessTypeDecl(Id id, xmlNodePtr type_decl) {
const auto bytes = bits / 8;
if (name == "void") {
- graph_.Set<Void>(id);
+ graph_.Set<Special>(id, Special::Kind::VOID);
} else {
// libabigail doesn't model encoding at all and we don't want to parse names
// (which will not always work) in an attempt to reconstruct it.
@@ -1079,7 +1136,7 @@ void Abigail::ProcessStructUnion(Id id, bool is_struct,
} else if (child_name == "base-class") {
base_classes.push_back(ProcessBaseClass(child));
} else if (child_name == "member-function") {
- methods.push_back(ProcessMemberFunction(child));
+ ProcessMemberFunction(methods, child);
} else {
Die() << "unrecognised " << kind << "-decl child element '" << child_name
<< "'";
@@ -1148,20 +1205,20 @@ std::optional<Id> Abigail::ProcessDataMember(bool is_struct,
return {graph_.Add<Member>(name, type, offset, 0)};
}
-Id Abigail::ProcessMemberFunction(xmlNodePtr method) {
+void Abigail::ProcessMemberFunction(std::vector<Id>& methods,
+ xmlNodePtr method) {
xmlNodePtr decl = GetOnlyChild(method);
CheckName("function-decl", decl);
- static const std::string missing = "{missing}";
- const auto mangled_name = ReadAttribute(decl, "mangled-name", missing);
- const auto name = GetAttributeOrDie(decl, "name");
+ // ProcessDecl creates symbol references so must be called unconditionally.
const auto type = ProcessDecl(false, decl);
const auto vtable_offset = ReadAttribute<uint64_t>(method, "vtable-offset");
- const auto kind = vtable_offset
- ? Method::Kind::VIRTUAL
- : ReadAttribute<bool>(method, "static", false)
- ? Method::Kind::STATIC
- : Method::Kind::NON_VIRTUAL;
- return graph_.Add<Method>(mangled_name, name, kind, vtable_offset, type);
+ if (vtable_offset) {
+ static const std::string missing = "{missing}";
+ const auto mangled_name = ReadAttribute(decl, "mangled-name", missing);
+ const auto name = GetAttributeOrDie(decl, "name");
+ methods.push_back(
+ graph_.Add<Method>(mangled_name, name, vtable_offset.value(), type));
+ }
}
void Abigail::ProcessMemberType(xmlNodePtr member_type) {
diff --git a/abigail_reader.h b/abigail_reader.h
index 90c2dc1..21ada8d 100644
--- a/abigail_reader.h
+++ b/abigail_reader.h
@@ -137,7 +137,7 @@ class Abigail {
Id ProcessBaseClass(xmlNodePtr base_class);
std::optional<Id> ProcessDataMember(bool is_struct, xmlNodePtr data_member);
- Id ProcessMemberFunction(xmlNodePtr method);
+ void ProcessMemberFunction(std::vector<Id>& methods, xmlNodePtr method);
void ProcessMemberType(xmlNodePtr member_type);
Id BuildSymbol(const SymbolInfo& info,
diff --git a/abigail_reader_test.cc b/abigail_reader_test.cc
index e1b191d..930342f 100644
--- a/abigail_reader_test.cc
+++ b/abigail_reader_test.cc
@@ -161,6 +161,11 @@ struct TidyTestCase {
TEST_CASE("Tidy") {
const auto test = GENERATE(
TidyTestCase(
+ {"bad DWARF ELF link",
+ {"abigail_bad_elf_dwarf_link_0.xml",
+ "abigail_bad_elf_dwarf_link_1.xml",
+ "abigail_bad_elf_dwarf_link_2.xml"}}),
+ TidyTestCase(
{"anonymous type normalisation",
{"abigail_anonymous_types_0.xml",
"abigail_anonymous_types_1.xml",
diff --git a/btf_reader.cc b/btf_reader.cc
index 5fd3921..991d530 100644
--- a/btf_reader.cc
+++ b/btf_reader.cc
@@ -86,7 +86,7 @@ Structs::Structs(Graph& graph, const bool verbose)
// Get the index of the void type, creating one if needed.
Id Structs::GetVoid() {
if (!void_) {
- void_ = {graph_.Add<Void>()};
+ void_ = {graph_.Add<Special>(Special::Kind::VOID)};
}
return *void_;
}
@@ -94,7 +94,7 @@ Id Structs::GetVoid() {
// Get the index of the variadic parameter type, creating one if needed.
Id Structs::GetVariadic() {
if (!variadic_) {
- variadic_ = {graph_.Add<Variadic>()};
+ variadic_ = {graph_.Add<Special>(Special::Kind::VARIADIC)};
}
return *variadic_;
}
@@ -253,9 +253,9 @@ Id Structs::BuildTypes(MemoryRange memory) {
std::cout << "Type section:\n";
}
- // Alas, BTF overloads type id 0 to mean both Void (for everything but
- // function parameters) and Variadic (for function parameters). We determine
- // which is intended and create Void and Variadic types on demand.
+ // Alas, BTF overloads type id 0 to mean both void (for everything but
+ // function parameters) and variadic (for function parameters). We determine
+ // which is intended and create void and variadic types on demand.
// The type section is parsed sequentially and each type's index is its id.
uint32_t btf_index = 1;
diff --git a/cmake/FindJemalloc.cmake b/cmake/FindJemalloc.cmake
new file mode 100644
index 0000000..0873cc0
--- /dev/null
+++ b/cmake/FindJemalloc.cmake
@@ -0,0 +1,106 @@
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+#
+# Copyright 2023 Google LLC
+#
+# Licensed under the Apache License v2.0 with LLVM Exceptions (the "License");
+# you may not use this file except in compliance with the License. You may
+# obtain a copy of the License at
+#
+# https://llvm.org/LICENSE.txt
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations under
+# the License.
+#
+# Author: Aleksei Vetrov
+
+#[=======================================================================[.rst:
+FindJemalloc
+---------
+
+Finds the jemalloc library.
+
+Imported Targets
+^^^^^^^^^^^^^^^^
+
+This module provides the following imported targets, if found:
+
+``Jemalloc::Jemalloc``
+ The Jemalloc library
+
+Result Variables
+^^^^^^^^^^^^^^^^
+
+This will define the following variables:
+
+``Jemalloc_FOUND``
+ True if the system has the Jemalloc library.
+``Jemalloc_VERSION``
+ The version of the Jemalloc library which was found.
+``Jemalloc_INCLUDE_DIRS``
+ Include directories needed to use Jemalloc.
+``Jemalloc_LIBRARIES``
+ Libraries needed to link to Jemalloc.
+``Jemalloc_DEFINITIONS``
+ the compiler switches required for using Jemalloc
+
+Cache Variables
+^^^^^^^^^^^^^^^
+
+The following cache variables may also be set:
+
+``Jemalloc_INCLUDE_DIR``
+ The directory containing ``jemalloc.h``.
+``Jemalloc_LIBRARY``
+ The path to the ``libjemalloc.so``.
+
+#]=======================================================================]
+
+find_package(PkgConfig)
+pkg_check_modules(PC_Jemalloc QUIET jemalloc)
+
+find_library(
+ Jemalloc_LIBRARY
+ NAMES jemalloc
+ HINTS ${PC_Jemalloc_LIBDIR} ${PC_Jemalloc_LIBRARY_DIRS})
+# Try the value from user if the library is not found.
+if(DEFINED Jemalloc_LIBRARIES AND NOT DEFINED Jemalloc_LIBRARY)
+ set(Jemalloc_LIBRARY ${Jemalloc_LIBRARIES})
+endif()
+mark_as_advanced(Jemalloc_LIBRARY)
+
+find_path(
+ Jemalloc_INCLUDE_DIR
+ NAMES jemalloc.h
+ PATH_SUFFIXES jemalloc
+ HINTS ${PC_Jemalloc_INCLUDEDIR} ${PC_Jemalloc_INCLUDE_DIRS})
+# Try the value from user if the library is not found.
+if(DEFINED Jemalloc_INCLUDE_DIRS AND NOT DEFINED Jemalloc_INCLUDE_DIR)
+ set(Jemalloc_INCLUDE_DIR ${Jemalloc_INCLUDE_DIRS})
+endif()
+mark_as_advanced(Jemalloc_INCLUDE_DIR)
+
+set(Jemalloc_VERSION ${PC_Jemalloc_VERSION})
+
+include(FindPackageHandleStandardArgs)
+find_package_handle_standard_args(
+ Jemalloc
+ REQUIRED_VARS Jemalloc_LIBRARY Jemalloc_INCLUDE_DIR
+ VERSION_VAR Jemalloc_VERSION)
+
+if(Jemalloc_FOUND)
+ set(Jemalloc_LIBRARIES ${Jemalloc_LIBRARY})
+ set(Jemalloc_INCLUDE_DIRS ${Jemalloc_INCLUDE_DIR})
+ set(Jemalloc_DEFINITIONS ${PC_Jemalloc_CFLAGS_OTHER})
+endif()
+
+if(Jemalloc_FOUND AND NOT TARGET Jemalloc::Jemalloc)
+ add_library(Jemalloc::Jemalloc UNKNOWN IMPORTED)
+ set_target_properties(
+ Jemalloc::Jemalloc
+ PROPERTIES IMPORTED_LOCATION "${Jemalloc_LIBRARY}"
+ INTERFACE_COMPILE_OPTIONS "${PC_Jemalloc_CFLAGS_OTHER}"
+ INTERFACE_INCLUDE_DIRECTORIES "${Jemalloc_INCLUDE_DIR}")
+endif()
diff --git a/cmake/FindLibDw.cmake b/cmake/FindLibDw.cmake
new file mode 100644
index 0000000..352d672
--- /dev/null
+++ b/cmake/FindLibDw.cmake
@@ -0,0 +1,105 @@
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+#
+# Copyright 2023 Google LLC
+#
+# Licensed under the Apache License v2.0 with LLVM Exceptions (the "License");
+# you may not use this file except in compliance with the License. You may
+# obtain a copy of the License at
+#
+# https://llvm.org/LICENSE.txt
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations under
+# the License.
+#
+# Author: Aleksei Vetrov
+
+#[=======================================================================[.rst:
+FindLibDw
+---------
+
+Finds the DWARF processing library (libdw).
+
+Imported Targets
+^^^^^^^^^^^^^^^^
+
+This module provides the following imported targets, if found:
+
+``LibDw::LibDw``
+ The LibDw library
+
+Result Variables
+^^^^^^^^^^^^^^^^
+
+This will define the following variables:
+
+``LibDw_FOUND``
+ True if the system has the LibDw library.
+``LibDw_VERSION``
+ The version of the LibDw library which was found.
+``LibDw_INCLUDE_DIRS``
+ Include directories needed to use LibDw.
+``LibDw_LIBRARIES``
+ Libraries needed to link to LibDw.
+``LibDw_DEFINITIONS``
+ the compiler switches required for using LibDw
+
+Cache Variables
+^^^^^^^^^^^^^^^
+
+The following cache variables may also be set:
+
+``LibDw_INCLUDE_DIR``
+ The directory containing ``dwarf.h``.
+``LibDw_LIBRARY``
+ The path to the ``libdw.so``.
+
+#]=======================================================================]
+
+find_package(PkgConfig)
+pkg_check_modules(PC_LibDw QUIET libdw)
+
+find_library(
+ LibDw_LIBRARY
+ NAMES dw
+ HINTS ${PC_LibDw_LIBDIR} ${PC_LibDw_LIBRARY_DIRS})
+# Try the value from user if the library is not found.
+if(DEFINED LibDw_LIBRARIES AND NOT DEFINED LibDw_LIBRARY)
+ set(LibDw_LIBRARY ${LibDw_LIBRARIES})
+endif()
+mark_as_advanced(LibDw_LIBRARY)
+
+find_path(
+ LibDw_INCLUDE_DIR
+ NAMES dwarf.h
+ HINTS ${PC_LibDw_INCLUDEDIR} ${PC_LibDw_INCLUDE_DIRS})
+# Try the value from user if the library is not found.
+if(DEFINED LibDw_INCLUDE_DIRS AND NOT DEFINED LibDw_INCLUDE_DIR)
+ set(LibDw_INCLUDE_DIR ${LibDw_INCLUDE_DIRS})
+endif()
+mark_as_advanced(LibDw_INCLUDE_DIR)
+
+set(LibDw_VERSION ${PC_LibDw_VERSION})
+
+include(FindPackageHandleStandardArgs)
+find_package_handle_standard_args(
+ LibDw
+ REQUIRED_VARS LibDw_LIBRARY LibDw_INCLUDE_DIR
+ VERSION_VAR LibDw_VERSION)
+
+if(LibDw_FOUND)
+ set(LibDw_LIBRARIES ${LibDw_LIBRARY})
+ set(LibDw_INCLUDE_DIRS ${LibDw_INCLUDE_DIR})
+ set(LibDw_DEFINITIONS ${PC_LibDw_CFLAGS_OTHER})
+endif()
+
+if(LibDw_FOUND AND NOT TARGET LibDw::LibDw)
+ add_library(LibDw::LibDw UNKNOWN IMPORTED)
+ set_target_properties(
+ LibDw::LibDw
+ PROPERTIES IMPORTED_LOCATION "${LibDw_LIBRARY}"
+ INTERFACE_COMPILE_OPTIONS "${PC_LibDw_CFLAGS_OTHER}"
+ INTERFACE_INCLUDE_DIRECTORIES "${LibDw_INCLUDE_DIR}")
+endif()
diff --git a/cmake/FindLibElf.cmake b/cmake/FindLibElf.cmake
new file mode 100644
index 0000000..35872c9
--- /dev/null
+++ b/cmake/FindLibElf.cmake
@@ -0,0 +1,105 @@
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+#
+# Copyright 2023 Google LLC
+#
+# Licensed under the Apache License v2.0 with LLVM Exceptions (the "License");
+# you may not use this file except in compliance with the License. You may
+# obtain a copy of the License at
+#
+# https://llvm.org/LICENSE.txt
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations under
+# the License.
+#
+# Author: Aleksei Vetrov
+
+#[=======================================================================[.rst:
+FindLibElf
+----------
+
+Finds the ELF processing library (libelf).
+
+Imported Targets
+^^^^^^^^^^^^^^^^
+
+This module provides the following imported targets, if found:
+
+``LibElf::LibElf``
+ The LibElf library
+
+Result Variables
+^^^^^^^^^^^^^^^^
+
+This will define the following variables:
+
+``LibElf_FOUND``
+ True if the system has the LibElf library.
+``LibElf_VERSION``
+ The version of the LibElf library which was found.
+``LibElf_INCLUDE_DIRS``
+ Include directories needed to use LibElf.
+``LibElf_LIBRARIES``
+ Libraries needed to link to LibElf.
+``LibElf_DEFINITIONS``
+ the compiler switches required for using LibElf
+
+Cache Variables
+^^^^^^^^^^^^^^^
+
+The following cache variables may also be set:
+
+``LibElf_INCLUDE_DIR``
+ The directory containing ``libelf.h``.
+``LibElf_LIBRARY``
+ The path to the ``libelf.so``.
+
+#]=======================================================================]
+
+find_package(PkgConfig)
+pkg_check_modules(PC_LibElf QUIET libelf)
+
+find_library(
+ LibElf_LIBRARY
+ NAMES elf
+ HINTS ${PC_LibElf_LIBDIR} ${PC_LibElf_LIBRARY_DIRS})
+# Try the value from user if the library is not found.
+if(DEFINED LibElf_LIBRARIES AND NOT DEFINED LibElf_LIBRARY)
+ set(LibElf_LIBRARY ${LibElf_LIBRARIES})
+endif()
+mark_as_advanced(LibElf_LIBRARY)
+
+find_path(
+ LibElf_INCLUDE_DIR
+ NAMES libelf.h
+ HINTS ${PC_LibElf_INCLUDEDIR} ${PC_LibElf_INCLUDE_DIRS})
+# Try the value from user if the library is not found.
+if(DEFINED LibElf_INCLUDE_DIRS AND NOT DEFINED LibElf_INCLUDE_DIR)
+ set(LibElf_INCLUDE_DIR ${LibElf_INCLUDE_DIRS})
+endif()
+mark_as_advanced(LibElf_INCLUDE_DIR)
+
+set(LibElf_VERSION ${PC_LibElf_VERSION})
+
+include(FindPackageHandleStandardArgs)
+find_package_handle_standard_args(
+ LibElf
+ REQUIRED_VARS LibElf_LIBRARY LibElf_INCLUDE_DIR
+ VERSION_VAR LibElf_VERSION)
+
+if(LibElf_FOUND)
+ set(LibElf_LIBRARIES ${LibElf_LIBRARY})
+ set(LibElf_INCLUDE_DIRS ${LibElf_INCLUDE_DIR})
+ set(LibElf_DEFINITIONS ${PC_LibElf_CFLAGS_OTHER})
+endif()
+
+if(LibElf_FOUND AND NOT TARGET LibElf::LibElf)
+ add_library(LibElf::LibElf UNKNOWN IMPORTED)
+ set_target_properties(
+ LibElf::LibElf
+ PROPERTIES IMPORTED_LOCATION "${LibElf_LIBRARY}"
+ INTERFACE_COMPILE_OPTIONS "${PC_LibElf_CFLAGS_OTHER}"
+ INTERFACE_INCLUDE_DIRECTORIES "${LibElf_INCLUDE_DIR}")
+endif()
diff --git a/comparison.cc b/comparison.cc
index 5d94350..210cc26 100644
--- a/comparison.cc
+++ b/comparison.cc
@@ -44,15 +44,16 @@ struct IgnoreDescriptor {
Ignore::Value value;
};
-static constexpr std::array<IgnoreDescriptor, 8> kIgnores{{
- {"type_declaration_status", Ignore::TYPE_DECLARATION_STATUS},
- {"symbol_type_presence", Ignore::SYMBOL_TYPE_PRESENCE },
- {"primitive_type_encoding", Ignore::PRIMITIVE_TYPE_ENCODING},
- {"member_size", Ignore::MEMBER_SIZE },
- {"enum_underlying_type", Ignore::ENUM_UNDERLYING_TYPE },
- {"qualifier", Ignore::QUALIFIER },
- {"interface_addition", Ignore::INTERFACE_ADDITION },
- {"linux_symbol_crc", Ignore::SYMBOL_CRC },
+static constexpr std::array<IgnoreDescriptor, 9> kIgnores{{
+ {"type_declaration_status", Ignore::TYPE_DECLARATION_STATUS },
+ {"symbol_type_presence", Ignore::SYMBOL_TYPE_PRESENCE },
+ {"primitive_type_encoding", Ignore::PRIMITIVE_TYPE_ENCODING },
+ {"member_size", Ignore::MEMBER_SIZE },
+ {"enum_underlying_type", Ignore::ENUM_UNDERLYING_TYPE },
+ {"qualifier", Ignore::QUALIFIER },
+ {"linux_symbol_crc", Ignore::SYMBOL_CRC },
+ {"interface_addition", Ignore::INTERFACE_ADDITION },
+ {"type_definition_addition", Ignore::TYPE_DEFINITION_ADDITION },
}};
std::optional<Ignore::Value> ParseIgnore(std::string_view ignore) {
@@ -227,12 +228,12 @@ Result Compare::Mismatch() {
return Result().MarkIncomparable();
}
-Result Compare::operator()(const Void&, const Void&) {
- return {};
-}
-
-Result Compare::operator()(const Variadic&, const Variadic&) {
- return {};
+Result Compare::operator()(const Special& x1, const Special& x2) {
+ Result result;
+ if (x1.kind != x2.kind) {
+ return result.MarkIncomparable();
+ }
+ return result;
}
Result Compare::operator()(const PointerReference& x1,
@@ -289,12 +290,13 @@ Result Compare::operator()(const Array& x1, const Array& x2) {
return result;
}
+// return whether to continue comparing both definitions
bool Compare::CompareDefined(bool defined1, bool defined2, Result& result) {
- if (defined1 && defined2) {
- return true;
+ if (defined1 == defined2) {
+ return defined1;
}
- const bool ignore_diff = ignore.Test(Ignore::TYPE_DECLARATION_STATUS);
- if (!ignore_diff && defined1 != defined2) {
+ if (!ignore.Test(Ignore::TYPE_DECLARATION_STATUS)
+ && !(ignore.Test(Ignore::TYPE_DEFINITION_ADDITION) && defined2)) {
std::ostringstream os;
os << "was " << (defined1 ? "fully defined" : "only declared")
<< ", is now " << (defined2 ? "fully defined" : "only declared");
@@ -450,7 +452,6 @@ Result Compare::operator()(const Member& x1, const Member& x2) {
Result Compare::operator()(const Method& x1, const Method& x2) {
Result result;
- result.MaybeAddNodeDiff("kind", x1.kind, x2.kind);
result.MaybeAddNodeDiff("vtable offset", x1.vtable_offset, x2.vtable_offset);
result.MaybeAddEdgeDiff("", (*this)(x1.type_id, x2.type_id));
return result;
diff --git a/comparison.h b/comparison.h
index fc66580..b73cdc0 100644
--- a/comparison.h
+++ b/comparison.h
@@ -45,14 +45,17 @@ namespace stg {
struct Ignore {
enum Value {
+ // noise reduction
SYMBOL_TYPE_PRESENCE = 1<<0,
TYPE_DECLARATION_STATUS = 1<<1,
PRIMITIVE_TYPE_ENCODING = 1<<2,
MEMBER_SIZE = 1<<3,
ENUM_UNDERLYING_TYPE = 1<<4,
QUALIFIER = 1<<5,
- INTERFACE_ADDITION = 1<<6,
- SYMBOL_CRC = 1<<7,
+ SYMBOL_CRC = 1<<6,
+ // ABI compatibility testing
+ INTERFACE_ADDITION = 1<<7,
+ TYPE_DEFINITION_ADDITION = 1<<8,
};
using Bitset = std::underlying_type_t<Value>;
@@ -271,8 +274,7 @@ struct Compare {
bool CompareDefined(bool defined1, bool defined2, Result& result);
Result Mismatch();
- Result operator()(const Void&, const Void&);
- Result operator()(const Variadic&, const Variadic&);
+ Result operator()(const Special&, const Special&);
Result operator()(const PointerReference&, const PointerReference&);
Result operator()(const PointerToMember&, const PointerToMember&);
Result operator()(const Typedef&, const Typedef&);
diff --git a/doc/stg.md b/doc/stg.md
index 378e8e1..d84d268 100644
--- a/doc/stg.md
+++ b/doc/stg.md
@@ -14,12 +14,13 @@ stg
[-i|--info]
[-d|--keep-duplicates]
[-t|--types]
- [-S|--symbols <filter>]
+ [-F|--file-filter <filter>]
+ [-S|--symbols|--symbol-filter <filter>]
[--skip-dwarf]
[-a|--abi|-b|--btf|-e|--elf|-s|--stg] [file] ...
[{-o|--output} {filename|-}] ...
implicit defaults: --abi
-symbol filter syntax:
+filter syntax:
<filter> ::= <term> | <expression> '|' <term>
<term> ::= <factor> | <term> '&' <factor>
<factor> ::= <atom> | '!' <factor>
@@ -89,16 +90,41 @@ Symbols must be disjoint across all inputs.
### Types
-Merging is not yet supported with type roots.
+If duplicate type roots are found during merge, they are unified. If unification
+fails, the merge fails.
+
+Unification is a process which replaces references to forward declarations of
+types with references to full definitions, if that would result in equal types.
## Filter
-If a symbol filter is supplied, symbols not matching the filter are dropped.
+There are two types of filters that can be applied to STG output:
+
+1. `-F|--file-filter <filter>`
+
+ Filter type definitions by source location.
+
+ DWARF information includes type declaration source locations. If a struct,
+ union, class or enum is defined in a file whose name does not match the
+ filter, then its definition (layout, members etc.) is dropped, leaving only
+ a declaration.
+
+ File filters are only applicable to ELF binary objects containing DWARF with
+ source location information; any other kind of input will be unaffected.
+
+2. `-S|--symbols|--symbol-filter <filter>`
+
+ Filter ELF symbols by name (which may include a `@version` or `@@version`
+ suffix).
+
+ If a symbol filter is supplied, symbols not matching the filter are dropped.
+ Symbol filtering is universal across all input formats as it happens after
+ input processing.
The basic syntactical elements are:
* `glob` - a glob pattern matching symbol names
-* `:filename` - the name of a file containing a libabigail format symbol list
+* `:filename` - the name of a file containing a libabigail format filter list
Filter expressions can be combined with infix disjuction (`|`) and conjunction
(`&`) operators and negated with the prefix (`!`) operator; these obey the usual
@@ -107,6 +133,8 @@ Whitespace is not significant, except as a string delimiter.
### Examples
+Symbol filters:
+
* `jiffies |panic` - keep just the symbols `jiffies` and `panic`
* `str*` - keep symbols beginning with `str` such as `strncpy_from_user`
* `!(*@* & ! *@@*`) - drop versioned symbols that are not the default versions
@@ -114,6 +142,11 @@ Whitespace is not significant, except as a string delimiter.
* `:include & !:exclude ` - keep symbols that are in the symbol list file
`include` but not in the symbol list file `exclude`
+File filters:
+
+* `!*.cc` - exclude all type definitions found in source files named `*.cc`
+* `*.h` - include only type definitions found in source files named `*.h`
+
## Deduplication
ABI representations, particularly merged ones, almost always have some sets of
diff --git a/doc/stgdiff.md b/doc/stgdiff.md
index b3e7789..d0c89ab 100644
--- a/doc/stgdiff.md
+++ b/doc/stgdiff.md
@@ -20,7 +20,7 @@ stgdiff
implicit defaults: --abi --format plain
--exact (node equality) cannot be combined with --output
output formats: plain flat small short viz
-ignore options: type_declaration_status symbol_type_presence primitive_type_encoding member_size enum_underlying_type qualifier interface_addition linux_symbol_crc
+ignore options: type_declaration_status symbol_type_presence primitive_type_encoding member_size enum_underlying_type qualifier linux_symbol_crc interface_addition type_definition_addition
```
## Input
@@ -114,17 +114,24 @@ in how much (DWARF) information they preserve.
Ignore qualifiers during comparison. Both libabigail and STG interpret and
adjust type qualifiers but sometimes do so differently.
-* `interface_addition`
-
- Ignore interface additions during comparison. This can be useful for ABI
- comparisons where symbol / type additions are allowed.
-
* `linux_symbol_crc`
Ignore Linux kernel symbol CRC changes during comparison. This can be
useful for ABI comparisons across different toolchains, where CRC changes
are often large and not useful.
+These two options can be used for ABI compatibility testing where the first ABI
+is expected to be a subset of the second.
+
+* `interface_addition`
+
+ Ignore interface (symbol and type root) additions during comparison.
+
+* `type_definition_addition`
+
+ Ignore type definition additions during comparison. Any extra symbol and
+ type roots may reach extra definitions of existing types.
+
### Fidelity Reporting
* `-F|--fidelity`
diff --git a/dwarf_processor.cc b/dwarf_processor.cc
index 46f433f..bd106af 100644
--- a/dwarf_processor.cc
+++ b/dwarf_processor.cc
@@ -24,17 +24,19 @@
#include <algorithm>
#include <cstddef>
-#include <ios>
-#include <iostream>
+#include <cstdint>
+#include <memory>
#include <optional>
#include <sstream>
#include <string>
+#include <string_view>
#include <unordered_map>
#include <utility>
#include <vector>
#include "dwarf_wrappers.h"
#include "error.h"
+#include "filter.h"
#include "graph.h"
#include "scope.h"
@@ -140,8 +142,9 @@ size_t GetNumberOfElements(Entry& entry) {
<< "Non-zero DW_AT_lower_bound is not supported";
std::optional<size_t> upper_bound_optional =
entry.MaybeGetUnsignedConstant(DW_AT_upper_bound);
- std::optional<size_t> number_of_elements_optional =
- entry.MaybeGetUnsignedConstant(DW_AT_count);
+ // Don't fail if DW_AT_count is not a constant and treat this as no count
+ // provided. This can happen if array has variable length.
+ std::optional<size_t> number_of_elements_optional = entry.MaybeGetCount();
if (upper_bound_optional && number_of_elements_optional) {
Die() << "Both DW_AT_upper_bound and DW_AT_count given";
} else if (upper_bound_optional) {
@@ -259,11 +262,13 @@ size_t GetDataBitOffset(Entry& entry, size_t bit_size,
class Processor {
public:
Processor(Graph& graph, Id void_id, Id variadic_id,
- bool is_little_endian_binary, Types& result)
+ bool is_little_endian_binary,
+ const std::unique_ptr<Filter>& file_filter, Types& result)
: graph_(graph),
void_id_(void_id),
variadic_id_(variadic_id),
is_little_endian_binary_(is_little_endian_binary),
+ file_filter_(file_filter),
result_(result) {}
void Process(Entry& entry) {
@@ -303,6 +308,9 @@ class Processor {
case DW_TAG_ptr_to_member_type:
ProcessPointerToMember(entry);
break;
+ case DW_TAG_unspecified_type:
+ ProcessUnspecifiedType(entry);
+ break;
case DW_TAG_compile_unit:
ProcessCompileUnit(entry);
break;
@@ -326,7 +334,10 @@ class Processor {
ProcessReference<Qualified>(entry, Qualifier::ATOMIC);
break;
case DW_TAG_variable:
- ProcessVariable(entry);
+ // Process only variables visible externally
+ if (entry.GetFlag(DW_AT_external)) {
+ ProcessVariable(entry);
+ }
break;
case DW_TAG_subroutine_type:
// Standalone function type, for example, used in function pointers.
@@ -337,8 +348,7 @@ class Processor {
ProcessFunction(entry);
break;
case DW_TAG_namespace:
- // TODO: handle scopes
- ProcessAllChildren(entry);
+ ProcessNamespace(entry);
break;
default:
@@ -390,6 +400,15 @@ class Processor {
}
void ProcessCompileUnit(Entry& entry) {
+ if (file_filter_ != nullptr) {
+ files_ = dwarf::Files(entry);
+ }
+ ProcessAllChildren(entry);
+ }
+
+ void ProcessNamespace(Entry& entry) {
+ auto name = GetNameOrEmpty(entry);
+ const PushScopeName push_scope_name(scope_, "namespace", name);
ProcessAllChildren(entry);
}
@@ -427,21 +446,37 @@ class Processor {
pointee_type_id);
}
+ void ProcessUnspecifiedType(Entry& entry) {
+ const std::string type_name = GetName(entry);
+ Check(type_name == "decltype(nullptr)")
+ << "Unsupported DW_TAG_unspecified_type: " << type_name;
+ AddProcessedNode<Special>(entry, Special::Kind::NULLPTR);
+ }
+
+ bool ShouldKeepDefinition(Entry& entry, const std::string& name) const {
+ if (file_filter_ == nullptr) {
+ return true;
+ }
+ const auto file = files_.MaybeGetFile(entry, DW_AT_decl_file);
+ if (!file) {
+ // Built in types that do not have DW_AT_decl_file should be preserved.
+ static constexpr std::string_view kBuiltinPrefix = "__";
+ // TODO: use std::string_view::starts_with
+ if (name.substr(0, kBuiltinPrefix.size()) == kBuiltinPrefix) {
+ return true;
+ }
+ Die() << "File filter is provided, but DWARF entry << "
+ << EntryToString(entry) << " << doesn't have DW_AT_decl_file";
+ }
+ return (*file_filter_)(*file);
+ }
+
void ProcessStructUnion(Entry& entry, StructUnion::Kind kind) {
std::string name = GetNameOrEmpty(entry);
const std::string full_name = name.empty() ? std::string() : scope_ + name;
const PushScopeName push_scope_name(scope_, kind, name);
- if (entry.GetFlag(DW_AT_declaration)) {
- // It is expected to have only name and no children in declaration.
- // However, it is not guaranteed and we should do something if we find an
- // example.
- CheckNoChildren(entry);
- AddProcessedNode<StructUnion>(entry, kind, full_name);
- return;
- }
-
- auto byte_size = GetByteSize(entry);
+ std::vector<Id> base_classes;
std::vector<Id> members;
std::vector<Id> methods;
@@ -450,15 +485,21 @@ class Processor {
// All possible children of struct/class/union
switch (child_tag) {
case DW_TAG_member:
- members.push_back(GetIdForEntry(child));
- ProcessMember(child);
+ if (child.GetFlag(DW_AT_external)) {
+ // static members are interpreted as variables and not included in
+ // members.
+ ProcessVariable(child);
+ } else {
+ members.push_back(GetIdForEntry(child));
+ ProcessMember(child);
+ }
break;
case DW_TAG_subprogram:
- methods.push_back(GetIdForEntry(child));
- ProcessMethod(child);
+ ProcessMethod(methods, child);
break;
case DW_TAG_inheritance:
- // TODO: process base classes
+ base_classes.push_back(GetIdForEntry(child));
+ ProcessBaseClass(child);
break;
case DW_TAG_structure_type:
case DW_TAG_class_type:
@@ -470,19 +511,30 @@ class Processor {
break;
case DW_TAG_template_type_parameter:
case DW_TAG_template_value_parameter:
+ case DW_TAG_GNU_template_template_param:
+ case DW_TAG_GNU_template_parameter_pack:
// We just skip these as neither GCC nor Clang seem to use them
// properly (resulting in no references to such DIEs).
break;
default:
- Die() << "Unexpected tag for child of struct/class/union: 0x"
- << std::hex << child_tag;
+ Die() << "Unexpected tag for child of struct/class/union: "
+ << Hex(child_tag);
}
}
- // TODO: support base classes
+ if (entry.GetFlag(DW_AT_declaration) ||
+ !ShouldKeepDefinition(entry, name)) {
+ // Declaration may have partial information about members or method.
+ // We only need to parse children for information that will be needed in
+ // complete definition, but don't need to store them in incomplete node.
+ AddProcessedNode<StructUnion>(entry, kind, full_name);
+ return;
+ }
+
+ const auto byte_size = GetByteSize(entry);
+
const Id id = AddProcessedNode<StructUnion>(
- entry, kind, full_name, byte_size,
- /* base_classes = */ std::vector<Id>{},
+ entry, kind, full_name, byte_size, std::move(base_classes),
std::move(methods), std::move(members));
if (!full_name.empty()) {
AddNamedTypeNode(id);
@@ -504,7 +556,7 @@ class Processor {
GetDataBitOffset(entry, bit_size, is_little_endian_binary_), bit_size);
}
- void ProcessMethod(Entry& entry) {
+ void ProcessMethod(std::vector<Id>& methods, Entry& entry) {
Subprogram subprogram = GetSubprogram(entry);
auto id = graph_.Add<Function>(std::move(subprogram.node));
if (subprogram.external && subprogram.address) {
@@ -518,22 +570,47 @@ class Processor {
.address = *subprogram.address,
.id = id});
}
+ const auto virtuality = entry.MaybeGetUnsignedConstant(DW_AT_virtuality)
+ .value_or(DW_VIRTUALITY_none);
+ if (virtuality == DW_VIRTUALITY_virtual ||
+ virtuality == DW_VIRTUALITY_pure_virtual) {
+ if (!subprogram.name_with_context.unscoped_name) {
+ Die() << "Method " << EntryToString(entry) << " should have name";
+ }
+ if (subprogram.name_with_context.specification) {
+ Die() << "Method " << EntryToString(entry)
+ << " shouldn't have specification";
+ }
+ const auto vtable_offset = entry.MaybeGetVtableOffset();
+ if (!vtable_offset) {
+ Die() << "Virtual method " << EntryToString(entry)
+ << " should have offset";
+ }
+ // TODO: proper handling of missing linkage name
+ methods.push_back(AddProcessedNode<Method>(
+ entry, subprogram.linkage_name.value_or("{missing}"),
+ *subprogram.name_with_context.unscoped_name, *vtable_offset, id));
+ }
+ }
- // TODO: support kind
- const Method::Kind kind = Method::Kind::NON_VIRTUAL;
- // TODO: support vtable_offset
- if (!subprogram.name_with_context.unscoped_name) {
- Die() << "Method " << EntryToString(entry) << " should have name";
+ void ProcessBaseClass(Entry& entry) {
+ const auto type_id = GetIdForReferredType(GetReferredType(entry));
+ const auto byte_offset = entry.MaybeGetMemberByteOffset();
+ if (!byte_offset) {
+ Die() << "No offset found for base class " << EntryToString(entry);
}
- if (subprogram.name_with_context.specification) {
- Die() << "Method " << EntryToString(entry)
- << " shouldn't have specification";
+ const auto bit_offset = *byte_offset * 8;
+ const auto virtuality = entry.MaybeGetUnsignedConstant(DW_AT_virtuality)
+ .value_or(DW_VIRTUALITY_none);
+ BaseClass::Inheritance inheritance;
+ if (virtuality == DW_VIRTUALITY_none) {
+ inheritance = BaseClass::Inheritance::NON_VIRTUAL;
+ } else if (virtuality == DW_VIRTUALITY_virtual) {
+ inheritance = BaseClass::Inheritance::VIRTUAL;
+ } else {
+ Die() << "Unexpected base class virtuality: " << virtuality;
}
- // TODO: proper handling of missing linkage name
- AddProcessedNode<Method>(entry,
- subprogram.linkage_name.value_or("{missing}"),
- *subprogram.name_with_context.unscoped_name, kind,
- /* vtable_offset = */ std::nullopt, id);
+ AddProcessedNode<BaseClass>(entry, type_id, bit_offset, inheritance);
}
void ProcessArray(Entry& entry) {
@@ -562,7 +639,10 @@ class Processor {
}
void ProcessEnum(Entry& entry) {
- std::string name = GetNameOrEmpty(entry);
+ const std::optional<std::string> name_optional = MaybeGetName(entry);
+ const std::string name =
+ name_optional.has_value() ? scope_ + *name_optional : "";
+
if (entry.GetFlag(DW_AT_declaration)) {
// It is expected to have only name and no children in declaration.
// However, it is not guaranteed and we should do something if we find an
@@ -589,6 +669,10 @@ class Processor {
enumerators.emplace_back(enumerator_name,
static_cast<int64_t>(*value_optional));
}
+ if (!ShouldKeepDefinition(entry, name)) {
+ AddProcessedNode<Enumeration>(entry, name);
+ return;
+ }
const Id id = AddProcessedNode<Enumeration>(entry, name, underlying_type_id,
std::move(enumerators));
if (!name.empty()) {
@@ -676,24 +760,16 @@ class Processor {
}
void ProcessVariable(Entry& entry) {
- // Skip:
- // * anonymous variables (for example, anonymous union)
- // * variables not visible outside of its enclosing compilation unit
- if (!entry.GetFlag(DW_AT_external)) {
- return;
- }
- std::optional<std::string> name_optional = MaybeGetName(entry);
- if (!name_optional) {
- return;
- }
+ auto name_with_context = GetNameWithContext(entry);
auto referred_type = GetReferredType(entry);
auto referred_type_id = GetIdForEntry(referred_type);
if (auto address = entry.MaybeGetAddress(DW_AT_location)) {
// Only external variables with address are useful for ABI monitoring
+ const auto new_symbol_idx = result_.symbols.size();
result_.symbols.push_back(Types::Symbol{
- .name = *name_optional,
+ .name = GetScopedNameForSymbol(new_symbol_idx, name_with_context),
.linkage_name = entry.MaybeGetString(DW_AT_linkage_name),
.address = *address,
.id = referred_type_id});
@@ -719,7 +795,7 @@ class Processor {
Function node;
NameWithContext name_with_context;
std::optional<std::string> linkage_name;
- std::optional<size_t> address;
+ std::optional<Address> address;
bool external;
};
@@ -729,35 +805,51 @@ class Processor {
std::vector<Id> parameters;
for (auto& child : entry.GetChildren()) {
auto child_tag = child.GetTag();
- if (child_tag == DW_TAG_formal_parameter) {
- auto child_type = GetReferredType(child);
- auto child_type_id = GetIdForEntry(child_type);
- parameters.push_back(child_type_id);
- } else if (child_tag == DW_TAG_unspecified_parameters) {
- // Note: C++ allows a single ... argument specification but C does not.
- // However, "extern int foo();" (note lack of "void" in parameters) in C
- // will produce the same DWARF as "extern int foo(...);" in C++.
- CheckNoChildren(child);
- parameters.push_back(variadic_id_);
- } else if (child_tag == DW_TAG_enumeration_type ||
- child_tag == DW_TAG_label ||
- child_tag == DW_TAG_lexical_block ||
- child_tag == DW_TAG_structure_type ||
- child_tag == DW_TAG_class_type ||
- child_tag == DW_TAG_union_type ||
- child_tag == DW_TAG_typedef ||
- child_tag == DW_TAG_inlined_subroutine ||
- child_tag == DW_TAG_variable ||
- child_tag == DW_TAG_call_site ||
- child_tag == DW_TAG_GNU_call_site) {
- Process(child);
- } else if (child_tag == DW_TAG_template_type_parameter ||
- child_tag == DW_TAG_template_value_parameter) {
- // We just skip these as neither GCC nor Clang seem to use them properly
- // (resulting in no references to such DIEs).
- } else {
- Die() << "Unexpected tag for child of function: " << child_tag << ", "
- << EntryToString(child);
+ switch (child_tag) {
+ case DW_TAG_formal_parameter:
+ parameters.push_back(GetIdForReferredType(GetReferredType(child)));
+ break;
+ case DW_TAG_unspecified_parameters:
+ // Note: C++ allows a single ... argument specification but C does
+ // not. However, "extern int foo();" (note lack of "void" in
+ // parameters) in C will produce the same DWARF as "extern int
+ // foo(...);" in C++.
+ CheckNoChildren(child);
+ parameters.push_back(variadic_id_);
+ break;
+ case DW_TAG_enumeration_type:
+ case DW_TAG_label:
+ case DW_TAG_lexical_block:
+ case DW_TAG_structure_type:
+ case DW_TAG_class_type:
+ case DW_TAG_union_type:
+ case DW_TAG_typedef:
+ case DW_TAG_inlined_subroutine:
+ case DW_TAG_variable:
+ case DW_TAG_call_site:
+ case DW_TAG_GNU_call_site:
+ // TODO: Do not leak local types outside this scope.
+ // TODO: It would be better to not process any
+ // information that is function local but there is a dangling
+ // reference Clang bug.
+ Process(child);
+ break;
+ case DW_TAG_imported_declaration:
+ case DW_TAG_imported_module:
+ // For now information there is useless for ABI monitoring, but we
+ // need to check that there is no missing information in descendants.
+ CheckNoChildren(child);
+ break;
+ case DW_TAG_template_type_parameter:
+ case DW_TAG_template_value_parameter:
+ case DW_TAG_GNU_template_template_param:
+ case DW_TAG_GNU_template_parameter_pack:
+ // We just skip these as neither GCC nor Clang seem to use them
+ // properly (resulting in no references to such DIEs).
+ break;
+ default:
+ Die() << "Unexpected tag for child of function: " << child_tag << ", "
+ << EntryToString(child);
}
}
@@ -784,6 +876,11 @@ class Processor {
return referred_type ? GetIdForEntry(*referred_type) : void_id_;
}
+ // Wrapper for GetIdForEntry to allow lvalues.
+ Id GetIdForReferredType(Entry referred_type) {
+ return GetIdForEntry(referred_type);
+ }
+
// Populate Id from method above with processed Node.
template <typename Node, typename... Args>
Id AddProcessedNode(Entry& entry, Args&&... args) {
@@ -800,21 +897,23 @@ class Processor {
Id void_id_;
Id variadic_id_;
bool is_little_endian_binary_;
+ const std::unique_ptr<Filter>& file_filter_;
Types& result_;
std::unordered_map<Dwarf_Off, Id> id_map_;
// Current scope.
Scope scope_;
std::vector<std::pair<Dwarf_Off, std::string>> scoped_names_;
std::vector<std::pair<Dwarf_Off, size_t>> unresolved_symbol_specifications_;
+ dwarf::Files files_;
};
Types ProcessEntries(std::vector<Entry> entries, bool is_little_endian_binary,
- Graph& graph) {
+ const std::unique_ptr<Filter>& file_filter, Graph& graph) {
Types result;
- Id void_id = graph.Add<Void>();
- Id variadic_id = graph.Add<Variadic>();
+ const Id void_id = graph.Add<Special>(Special::Kind::VOID);
+ const Id variadic_id = graph.Add<Special>(Special::Kind::VARIADIC);
Processor processor(graph, void_id, variadic_id, is_little_endian_binary,
- result);
+ file_filter, result);
for (auto& entry : entries) {
processor.Process(entry);
}
@@ -824,9 +923,10 @@ Types ProcessEntries(std::vector<Entry> entries, bool is_little_endian_binary,
return result;
}
-Types Process(Handler& dwarf, bool is_little_endian_binary, Graph& graph) {
+Types Process(Handler& dwarf, bool is_little_endian_binary,
+ const std::unique_ptr<Filter>& file_filter, Graph& graph) {
return ProcessEntries(dwarf.GetCompilationUnits(), is_little_endian_binary,
- graph);
+ file_filter, graph);
}
} // namespace dwarf
diff --git a/dwarf_processor.h b/dwarf_processor.h
index 7e398ae..7dc0ee5 100644
--- a/dwarf_processor.h
+++ b/dwarf_processor.h
@@ -26,6 +26,7 @@
#include <vector>
#include "dwarf_wrappers.h"
+#include "filter.h"
#include "graph.h"
namespace stg {
@@ -35,7 +36,7 @@ struct Types {
struct Symbol {
std::string name;
std::optional<std::string> linkage_name;
- size_t address;
+ Address address;
Id id;
};
@@ -47,11 +48,12 @@ struct Types {
// Process every compilation unit from DWARF and returns processed STG along
// with information needed for matching to ELF symbols.
-Types Process(Handler& dwarf, bool is_little_endian_binary, Graph& graph);
+Types Process(Handler& dwarf, bool is_little_endian_binary,
+ const std::unique_ptr<Filter>& file_filter, Graph& graph);
// Process every entry passed there. It should be used only for testing.
Types ProcessEntries(std::vector<Entry> entries, bool is_little_endian_binary,
- Graph& graph);
+ const std::unique_ptr<Filter>& file_filter, Graph& graph);
} // namespace dwarf
} // namespace stg
diff --git a/dwarf_wrappers.cc b/dwarf_wrappers.cc
index ed0c22a..fc08593 100644
--- a/dwarf_wrappers.cc
+++ b/dwarf_wrappers.cc
@@ -30,6 +30,7 @@
#include <ios>
#include <memory>
#include <optional>
+#include <ostream>
#include <string>
#include <utility>
#include <vector>
@@ -39,6 +40,10 @@
namespace stg {
namespace dwarf {
+std::ostream& operator<<(std::ostream& os, const Address& address) {
+ return os << Hex(address.value) << (address.is_tls ? " (TLS)" : "");
+}
+
namespace {
static const Dwfl_Callbacks kDwflCallbacks = {
@@ -89,6 +94,51 @@ void CheckOrDwflError(bool condition, const char* caller) {
}
}
+std::optional<uint64_t> MaybeGetUnsignedOperand(const Dwarf_Op& operand) {
+ switch (operand.atom) {
+ case DW_OP_addr:
+ case DW_OP_const1u:
+ case DW_OP_const2u:
+ case DW_OP_const4u:
+ case DW_OP_const8u:
+ case DW_OP_constu:
+ return operand.number;
+ case DW_OP_const1s:
+ case DW_OP_const2s:
+ case DW_OP_const4s:
+ case DW_OP_const8s:
+ case DW_OP_consts:
+ if (static_cast<int64_t>(operand.number) < 0) {
+ // Atom is not an unsigned constant
+ return std::nullopt;
+ }
+ return operand.number;
+ case DW_OP_lit0...DW_OP_lit31:
+ return operand.atom - DW_OP_lit0;
+ default:
+ return std::nullopt;
+ }
+}
+
+struct Expression {
+ const Dwarf_Op& operator[](size_t i) const {
+ return atoms[i];
+ }
+
+ Dwarf_Op* atoms = nullptr;
+ size_t length = 0;
+};
+
+Expression GetExpression(Dwarf_Attribute& attribute) {
+ Expression result;
+
+ Check(dwarf_getlocation(&attribute, &result.atoms, &result.length) ==
+ kReturnOk) << "dwarf_getlocation returned error";
+ Check(result.atoms != nullptr && result.length > 0)
+ << "dwarf_getlocation returned empty expression";
+ return result;
+}
+
} // namespace
Handler::Handler(const std::string& path) : dwfl_(dwfl_begin(&kDwflCallbacks)) {
@@ -211,8 +261,9 @@ std::optional<uint64_t> Entry::MaybeGetUnsignedConstant(uint32_t attribute) {
}
uint64_t value;
- Check(dwarf_formudata(&dwarf_attribute.value(), &value) == kReturnOk)
- << "dwarf_formudata returned error";
+ if (dwarf_formudata(&dwarf_attribute.value(), &value) != kReturnOk) {
+ Die() << "dwarf_formudata returned error";
+ }
return value;
}
@@ -245,27 +296,33 @@ std::optional<Entry> Entry::MaybeGetReference(uint32_t attribute) {
namespace {
-std::optional<uint64_t> GetAddressFromLocation(Dwarf_Attribute& attribute) {
- Dwarf_Op* expr = nullptr;
- size_t expr_len = 0;
-
- Check(dwarf_getlocation(&attribute, &expr, &expr_len) == kReturnOk)
- << "dwarf_getlocation returned error";
- Check(expr != nullptr && expr_len > 0)
- << "dwarf_getlocation returned empty expression";
+std::optional<Address> GetAddressFromLocation(Dwarf_Attribute& attribute) {
+ const auto expression = GetExpression(attribute);
Dwarf_Attribute result_attribute;
- if (dwarf_getlocation_attr(&attribute, expr, &result_attribute) ==
+ if (dwarf_getlocation_attr(&attribute, expression.atoms, &result_attribute) ==
kReturnOk) {
- uint64_t addr;
- Check(dwarf_formaddr(&result_attribute, &addr) == kReturnOk)
+ uint64_t address;
+ Check(dwarf_formaddr(&result_attribute, &address) == kReturnOk)
<< "dwarf_formaddr returned error";
- return addr;
+ return Address{.value = address, .is_tls = false};
}
- if (expr_len == 1 && expr->atom == DW_OP_addr) {
+ if (expression.length == 1 && expression[0].atom == DW_OP_addr) {
// DW_OP_addr is unsupported by dwarf_getlocation_attr, so we need to
// manually extract the address from expression.
- return expr->number;
+ return Address{.value = expression[0].number, .is_tls = false};
+ }
+ // TLS operation has different encodings in Clang and GCC:
+ // * Clang 14 uses DW_OP_GNU_push_tls_address
+ // * GCC 12 uses DW_OP_form_tls_address
+ if (expression.length == 2 &&
+ (expression[1].atom == DW_OP_GNU_push_tls_address ||
+ expression[1].atom == DW_OP_form_tls_address)) {
+ // TLS symbols address may be incorrect because of unsupported
+ // relocations. Resetting it to zero the same way as it is done in
+ // elf::Reader::MaybeAddTypeInfo.
+ // TODO: match TLS variables by address
+ return Address{.value = 0, .is_tls = true};
}
Die() << "Unsupported data location expression";
@@ -273,7 +330,7 @@ std::optional<uint64_t> GetAddressFromLocation(Dwarf_Attribute& attribute) {
} // namespace
-std::optional<uint64_t> Entry::MaybeGetAddress(uint32_t attribute) {
+std::optional<Address> Entry::MaybeGetAddress(uint32_t attribute) {
auto dwarf_attribute = GetAttribute(&die, attribute);
if (!dwarf_attribute) {
return {};
@@ -282,10 +339,11 @@ std::optional<uint64_t> Entry::MaybeGetAddress(uint32_t attribute) {
return GetAddressFromLocation(*dwarf_attribute);
}
- uint64_t addr;
- Check(dwarf_formaddr(&dwarf_attribute.value(), &addr) == kReturnOk)
+ Address address;
+ Check(dwarf_formaddr(&dwarf_attribute.value(), &address.value) == kReturnOk)
<< "dwarf_formaddr returned error";
- return addr;
+ address.is_tls = false;
+ return address;
}
std::optional<uint64_t> Entry::MaybeGetMemberByteOffset() {
@@ -300,8 +358,93 @@ std::optional<uint64_t> Entry::MaybeGetMemberByteOffset() {
return offset;
}
- // TODO: support location expressions
- Die() << "dwarf_formudata returned error, " << std::hex << GetOffset();
+ // Parse location expression
+ const auto expression = GetExpression(attribute.value());
+
+ // Parse virtual base classes offset, which looks like this:
+ // [0] = DW_OP_dup
+ // [1] = DW_OP_deref
+ // [2] = constant operand
+ // [3] = DW_OP_minus
+ // [4] = DW_OP_deref
+ // [5] = DW_OP_plus
+ // This form is not in the standard, but hardcoded in compilers:
+ // * https://github.com/llvm/llvm-project/blob/release/17.x/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp#L1611
+ // * https://github.com/gcc-mirror/gcc/blob/releases/gcc-13/gcc/dwarf2out.cc#L20029
+ if (expression.length == 6 &&
+ expression[0].atom == DW_OP_dup &&
+ expression[1].atom == DW_OP_deref &&
+ expression[3].atom == DW_OP_minus &&
+ expression[4].atom == DW_OP_deref &&
+ expression[5].atom == DW_OP_plus) {
+ const auto byte_offset = MaybeGetUnsignedOperand(expression[2]);
+ if (byte_offset) {
+ return byte_offset;
+ }
+ }
+
+ Die() << "Unsupported member offset expression, " << Hex(GetOffset());
+}
+
+std::optional<uint64_t> Entry::MaybeGetVtableOffset() {
+ auto attribute = GetAttribute(&die, DW_AT_vtable_elem_location);
+ if (!attribute) {
+ return {};
+ }
+
+ // Parse location expression
+ const Expression expression = GetExpression(attribute.value());
+
+ // We expect compilers to produce expression with one constant operand
+ if (expression.length == 1) {
+ const auto offset = MaybeGetUnsignedOperand(expression[0]);
+ if (offset) {
+ return offset;
+ }
+ }
+
+ Die() << "Unsupported vtable offset expression, " << Hex(GetOffset());
+}
+
+std::optional<uint64_t> Entry::MaybeGetCount() {
+ auto dwarf_attribute = GetAttribute(&die, DW_AT_count);
+ if (!dwarf_attribute) {
+ return {};
+ }
+
+ uint64_t value;
+ if (dwarf_formudata(&dwarf_attribute.value(), &value) != kReturnOk) {
+ // All errors are interpreted as "value of DW_AT_count is not a constant"
+ // and ignored. There is no stable API in libdw for checking the exact error
+ // reason.
+ // TODO: implement clean solution for separating "not a
+ // constant" errors from other errors.
+ return {};
+ }
+ return value;
+}
+
+Files::Files(Entry& compilation_unit) {
+ if (dwarf_getsrcfiles(&compilation_unit.die, &files_, &files_count_) !=
+ kReturnOk) {
+ Die() << "No source file information in DWARF";
+ }
+}
+
+std::optional<std::string> Files::MaybeGetFile(Entry& entry,
+ uint32_t attribute) const {
+ auto file_index = entry.MaybeGetUnsignedConstant(attribute);
+ if (!file_index) {
+ return std::nullopt;
+ }
+ Check(files_ != nullptr) << "dwarf::Files was not initialised";
+ if (*file_index >= files_count_) {
+ Die() << "File index is greater than or equal files count (" << *file_index
+ << " >= " << files_count_ << ")";
+ }
+ const char* result = dwarf_filesrc(files_, *file_index, nullptr, nullptr);
+ Check(result != nullptr) << "dwarf_filesrc returned error";
+ return result;
}
} // namespace dwarf
diff --git a/dwarf_wrappers.h b/dwarf_wrappers.h
index 89f7705..90ae21d 100644
--- a/dwarf_wrappers.h
+++ b/dwarf_wrappers.h
@@ -28,12 +28,30 @@
#include <cstdint>
#include <memory>
#include <optional>
+#include <ostream>
#include <string>
+#include <tuple>
#include <vector>
namespace stg {
namespace dwarf {
+struct Address {
+ // TODO: use auto operator<=>
+ bool operator<(const Address& other) const {
+ return std::tie(value, is_tls) < std::tie(other.value, other.is_tls);
+ }
+
+ bool operator==(const Address& other) const {
+ return value == other.value && is_tls == other.is_tls;
+ }
+
+ uint64_t value;
+ bool is_tls;
+};
+
+std::ostream& operator<<(std::ostream& os, const Address& address);
+
// C++ wrapper over Dwarf_Die, providing interface for its various properties.
struct Entry {
// All methods in libdw take Dwarf_Die by non-const pointer as libdw caches
@@ -61,8 +79,12 @@ struct Entry {
std::optional<uint64_t> MaybeGetUnsignedConstant(uint32_t attribute);
bool GetFlag(uint32_t attribute);
std::optional<Entry> MaybeGetReference(uint32_t attribute);
- std::optional<uint64_t> MaybeGetAddress(uint32_t attribute);
+ std::optional<Address> MaybeGetAddress(uint32_t attribute);
std::optional<uint64_t> MaybeGetMemberByteOffset();
+ std::optional<uint64_t> MaybeGetVtableOffset();
+ // Returns value of DW_AT_count if it is constant or nullptr if it is not
+ // defined or cannot be represented as constant.
+ std::optional<uint64_t> MaybeGetCount();
};
// C++ wrapper over libdw (DWARF library).
@@ -92,6 +114,18 @@ class Handler {
Dwarf* dwarf_ = nullptr;
};
+class Files {
+ public:
+ Files() = default;
+ explicit Files(Entry& compilation_unit);
+ std::optional<std::string> MaybeGetFile(Entry& entry,
+ uint32_t attribute) const;
+
+ private:
+ Dwarf_Files* files_ = nullptr;
+ size_t files_count_ = 0;
+};
+
} // namespace dwarf
} // namespace stg
diff --git a/elf_loader.cc b/elf_loader.cc
index 0163304..5b9a864 100644
--- a/elf_loader.cc
+++ b/elf_loader.cc
@@ -29,6 +29,7 @@
#include <cstring>
#include <functional>
#include <iostream>
+#include <limits>
#include <ostream>
#include <string>
#include <string_view>
@@ -143,6 +144,28 @@ std::string ElfSectionTypeToString(Elf64_Word elf_section_type) {
}
}
+GElf_Half GetMachine(Elf* elf) {
+ GElf_Ehdr header;
+ Check(gelf_getehdr(elf, &header) != nullptr) << "could not get ELF header";
+ return header.e_machine;
+}
+
+void AdjustAddress(GElf_Half machine, SymbolTableEntry& entry) {
+ if (machine == EM_ARM) {
+ if (entry.symbol_type == SymbolTableEntry::SymbolType::FUNCTION
+ || entry.symbol_type == SymbolTableEntry::SymbolType::GNU_IFUNC) {
+ // Clear bit zero of ARM32 addresses as per "ELF for the Arm Architecture"
+ // section 5.5.3. https://static.docs.arm.com/ihi0044/g/aaelf32.pdf
+ entry.value &= ~1;
+ }
+ } else if (machine == EM_AARCH64) {
+ // Copy bit 55 over bits 56 to 63 which may be tag information.
+ entry.value = entry.value & (1ULL << 55)
+ ? entry.value | (0xffULL << 56)
+ : entry.value & ~(0xffULL << 56);
+ }
+}
+
std::vector<Elf_Scn*> GetSectionsIf(
Elf* elf, const std::function<bool(const GElf_Shdr&)>& predicate) {
std::vector<Elf_Scn*> result;
@@ -260,6 +283,68 @@ Elf_Scn* GetSymbolTableSection(Elf* elf, bool is_linux_kernel_binary,
<< "'";
}
+
+constexpr std::string_view kCFISuffix = ".cfi";
+
+bool IsCFISymbolName(std::string_view name) {
+ // Check if symbol name ends with ".cfi"
+ // TODO: use std::string_view::ends_with
+ return (name.size() >= kCFISuffix.size() &&
+ name.substr(name.size() - kCFISuffix.size()) == kCFISuffix);
+}
+
+} // namespace
+
+std::string_view UnwrapCFISymbolName(std::string_view cfi_name) {
+ Check(IsCFISymbolName(cfi_name))
+ << "CFI symbol " << cfi_name << " doesn't end with .cfi";
+ return cfi_name.substr(0, cfi_name.size() - kCFISuffix.size());
+}
+
+namespace {
+
+std::vector<SymbolTableEntry> GetSymbols(
+ Elf* elf, Elf_Scn* symbol_table_section, bool cfi) {
+ const auto machine = GetMachine(elf);
+ const auto [symbol_table_header, symbol_table_data] =
+ GetSectionInfo(symbol_table_section);
+ const size_t number_of_symbols = GetNumberOfEntries(symbol_table_header);
+
+ std::vector<SymbolTableEntry> result;
+ result.reserve(number_of_symbols);
+
+ // GElf uses int for indexes in symbol table, prevent int overflow.
+ Check(number_of_symbols <= std::numeric_limits<int>::max())
+ << "number of symbols exceeds INT_MAX";
+ for (size_t i = 0; i < number_of_symbols; ++i) {
+ GElf_Sym symbol;
+ Check(gelf_getsym(symbol_table_data, static_cast<int>(i), &symbol) !=
+ nullptr)
+ << "symbol (i = " << i << ") was not found";
+
+ const auto name =
+ GetString(elf, symbol_table_header.sh_link, symbol.st_name);
+ if (cfi != IsCFISymbolName(name)) {
+ continue;
+ }
+ SymbolTableEntry entry{
+ .name = name,
+ .value = symbol.st_value,
+ .size = symbol.st_size,
+ .symbol_type = ParseSymbolType(GELF_ST_TYPE(symbol.st_info)),
+ .binding = ParseSymbolBinding(GELF_ST_BIND(symbol.st_info)),
+ .visibility =
+ ParseSymbolVisibility(GELF_ST_VISIBILITY(symbol.st_other)),
+ .section_index = symbol.st_shndx,
+ .value_type = ParseSymbolValueType(symbol.st_shndx),
+ };
+ AdjustAddress(machine, entry);
+ result.push_back(entry);
+ }
+
+ return result;
+}
+
bool IsLinuxKernelBinary(Elf* elf) {
// The Linux kernel itself has many specific sections that are sufficient to
// classify a binary as kernel binary if present, `__ksymtab_strings` is one
@@ -367,34 +452,20 @@ std::vector<SymbolTableEntry> ElfLoader::GetElfSymbols() const {
Check(symbol_table_section != nullptr)
<< "failed to find symbol table section";
- const auto [symbol_table_header, symbol_table_data] =
- GetSectionInfo(symbol_table_section);
- const size_t number_of_symbols = GetNumberOfEntries(symbol_table_header);
-
- std::vector<SymbolTableEntry> result;
- result.reserve(number_of_symbols);
-
- for (size_t i = 0; i < number_of_symbols; ++i) {
- GElf_Sym symbol;
- Check(gelf_getsym(symbol_table_data, i, &symbol) != nullptr)
- << "symbol (i = " << i << ") was not found";
+ return GetSymbols(elf_, symbol_table_section, /* cfi = */ false);
+}
- const auto name =
- GetString(elf_, symbol_table_header.sh_link, symbol.st_name);
- result.push_back(SymbolTableEntry{
- .name = name,
- .value = symbol.st_value,
- .size = symbol.st_size,
- .symbol_type = ParseSymbolType(GELF_ST_TYPE(symbol.st_info)),
- .binding = ParseSymbolBinding(GELF_ST_BIND(symbol.st_info)),
- .visibility =
- ParseSymbolVisibility(GELF_ST_VISIBILITY(symbol.st_other)),
- .section_index = symbol.st_shndx,
- .value_type = ParseSymbolValueType(symbol.st_shndx),
- });
+std::vector<SymbolTableEntry> ElfLoader::GetCFISymbols() const {
+ // CFI symbols may be only in .symtab
+ Elf_Scn* symbol_table_section = MaybeGetSectionByType(elf_, SHT_SYMTAB);
+ if (symbol_table_section == nullptr) {
+ // It is possible for ET_DYN and ET_EXEC ELF binaries to not have .symtab,
+ // because it was trimmed away. We can't determine whether there were CFI
+ // symbols in the first place, so the best we can do is returning an empty
+ // list.
+ return {};
}
-
- return result;
+ return GetSymbols(elf_, symbol_table_section, /* cfi = */ true);
}
ElfSymbol::CRC ElfLoader::GetElfSymbolCRC(
diff --git a/elf_loader.h b/elf_loader.h
index 14faf6e..2679155 100644
--- a/elf_loader.h
+++ b/elf_loader.h
@@ -71,12 +71,15 @@ std::ostream& operator<<(std::ostream& os, SymbolTableEntry::SymbolType type);
std::ostream& operator<<(std::ostream& os, SymbolTableEntry::ValueType type);
+std::string_view UnwrapCFISymbolName(std::string_view cfi_name);
+
class ElfLoader final {
public:
explicit ElfLoader(Elf* elf, bool verbose = false);
std::string_view GetBtfRawData() const;
std::vector<SymbolTableEntry> GetElfSymbols() const;
+ std::vector<SymbolTableEntry> GetCFISymbols() const;
ElfSymbol::CRC GetElfSymbolCRC(const SymbolTableEntry& symbol) const;
std::string_view GetElfSymbolNamespace(const SymbolTableEntry& symbol) const;
size_t GetAbsoluteAddress(const SymbolTableEntry& symbol) const;
diff --git a/elf_reader.cc b/elf_reader.cc
index d87a7f9..404474f 100644
--- a/elf_reader.cc
+++ b/elf_reader.cc
@@ -25,6 +25,7 @@
#include <ios>
#include <iostream>
#include <map>
+#include <memory>
#include <optional>
#include <string>
#include <string_view>
@@ -35,6 +36,7 @@
#include "dwarf_wrappers.h"
#include "elf_loader.h"
#include "error.h"
+#include "filter.h"
#include "graph.h"
#include "metrics.h"
#include "reader_options.h"
@@ -134,6 +136,19 @@ NamespacesMap GetNamespacesMap(const SymbolTable& symbols,
return namespaces;
}
+AddressMap GetCFIAddressMap(const SymbolTable& symbols, const ElfLoader& elf) {
+ AddressMap name_to_address;
+ for (const auto& symbol : symbols) {
+ const std::string_view name_prefix = UnwrapCFISymbolName(symbol.name);
+ const size_t address = elf.GetAbsoluteAddress(symbol);
+ if (!name_to_address.emplace(name_prefix, address).second) {
+ Die() << "Multiple CFI symbols referring to symbol '" << name_prefix
+ << '\'';
+ }
+ }
+ return name_to_address;
+}
+
bool IsPublicFunctionOrVariable(const SymbolTableEntry& symbol) {
const auto symbol_type = symbol.symbol_type;
// Reject symbols that are not functions or variables.
@@ -180,34 +195,48 @@ namespace {
class Reader {
public:
Reader(Graph& graph, const std::string& path, ReadOptions options,
- Metrics& metrics)
+ const std::unique_ptr<Filter>& file_filter, Metrics& metrics)
: graph_(graph),
dwarf_(path),
elf_(dwarf_.GetElf(), options.Test(ReadOptions::INFO)),
options_(options),
+ file_filter_(file_filter),
metrics_(metrics) {}
Reader(Graph& graph, char* data, size_t size, ReadOptions options,
- Metrics& metrics)
+ const std::unique_ptr<Filter>& file_filter, Metrics& metrics)
: graph_(graph),
dwarf_(data, size),
elf_(dwarf_.GetElf(), options.Test(ReadOptions::INFO)),
options_(options),
+ file_filter_(file_filter),
metrics_(metrics) {}
Id Read();
private:
- using SymbolIndex = std::map<std::pair<size_t, std::string>, size_t>;
+ using SymbolIndex =
+ std::map<std::pair<dwarf::Address, std::string>, std::vector<size_t>>;
Id BuildRoot(const std::vector<std::pair<ElfSymbol, size_t>>& symbols) {
+ // On destruction, the unification object will remove or rewrite each graph
+ // node for which it has a mapping.
+ //
+ // Graph rewriting is expensive so an important optimisation is to restrict
+ // the nodes in consideration to the ones allocated by the DWARF processor
+ // here and any symbol or type roots that follow. This is done by setting
+ // the starting node ID to be the current graph limit.
+ Unification unification(graph_, graph_.Limit(), metrics_);
+
dwarf::Types types;
if (!options_.Test(ReadOptions::SKIP_DWARF)) {
- types = dwarf::Process(dwarf_, elf_.IsLittleEndianBinary(), graph_);
+ types = dwarf::Process(dwarf_, elf_.IsLittleEndianBinary(), file_filter_,
+ graph_);
}
- // Unification rewrites the graph on destruction.
- Unification unification(graph_, metrics_);
+ // A less important optimisation is avoiding copying the mapping array as it
+ // is populated. This is done by reserving space to the new graph limit.
+ unification.Reserve(graph_.Limit());
// fill address to id
//
@@ -225,18 +254,9 @@ class Reader {
for (size_t i = 0; i < types.symbols.size(); ++i) {
const auto& symbol = types.symbols[i];
- // TODO: support linkage_name to support C++
- auto [it, emplaced] = address_name_to_index.emplace(
- std::make_pair(symbol.address, symbol.name), i);
- if (!emplaced) {
- const auto& other = types.symbols[it->second];
- // TODO: allow "compatible" duplicates, for example
- // "void foo(int bar)" vs "void foo(const int bar)"
- if (!IsEqual(unification, symbol, other)) {
- Die() << "Duplicate DWARF symbol: address=" << Hex(symbol.address)
- << ", name=" << symbol.name;
- }
- }
+ const auto& name =
+ symbol.linkage_name.has_value() ? *symbol.linkage_name : symbol.name;
+ address_name_to_index[std::make_pair(symbol.address, name)].push_back(i);
}
std::map<std::string, Id> symbols_map;
@@ -244,7 +264,8 @@ class Reader {
// TODO: add VersionInfoToString to SymbolKey name
// TODO: check for uniqueness of SymbolKey in map after
// support for version info
- MaybeAddTypeInfo(address_name_to_index, types.symbols, address, symbol);
+ MaybeAddTypeInfo(address_name_to_index, types.symbols, address, symbol,
+ unification);
symbols_map.emplace(VersionedSymbolName(symbol),
graph_.Add<ElfSymbol>(symbol));
}
@@ -307,11 +328,20 @@ class Reader {
static void MaybeAddTypeInfo(
const SymbolIndex& address_name_to_index,
const std::vector<dwarf::Types::Symbol>& dwarf_symbols,
- const size_t address, ElfSymbol& node) {
+ size_t address_value, ElfSymbol& node, Unification& unification) {
+ const bool is_tls = node.symbol_type == ElfSymbol::SymbolType::TLS;
+ if (is_tls) {
+ // TLS symbols address may be incorrect because of unsupported
+ // relocations. Resetting it to zero the same way as it is done in
+ // dwarf::Entry::GetAddressFromLocation.
+ // TODO: match TLS variables by address
+ address_value = 0;
+ }
+ const dwarf::Address address{.value = address_value, .is_tls = is_tls};
// try to find the first symbol with given address
const auto start_it = address_name_to_index.lower_bound(
std::make_pair(address, std::string()));
- const dwarf::Types::Symbol* best_symbol = nullptr;
+ auto best_symbols_it = address_name_to_index.end();
bool matched_by_name = false;
size_t candidates = 0;
for (auto it = start_it;
@@ -319,28 +349,45 @@ class Reader {
++it) {
++candidates;
// We have at least matching addresses.
- const auto& candidate = dwarf_symbols[it->second];
if (it->first.second == node.symbol_name) {
// If we have also matching names we can stop looking further.
matched_by_name = true;
- best_symbol = &candidate;
+ best_symbols_it = it;
break;
}
- if (best_symbol == nullptr) {
+ if (best_symbols_it == address_name_to_index.end()) {
// Otherwise keep the first match.
- best_symbol = &candidate;
+ best_symbols_it = it;
}
}
- if (best_symbol != nullptr) {
+ if (best_symbols_it != address_name_to_index.end()) {
+ const auto& best_symbols = best_symbols_it->second;
+ Check(!best_symbols.empty()) << "best_symbols.empty()";
+ const auto& best_symbol = dwarf_symbols[best_symbols[0]];
+ for (size_t i = 1; i < best_symbols.size(); ++i) {
+ const auto& other = dwarf_symbols[best_symbols[i]];
+ // TODO: allow "compatible" duplicates, for example
+ // "void foo(int bar)" vs "void foo(const int bar)"
+ if (!IsEqual(unification, best_symbol, other)) {
+ Die() << "Duplicate DWARF symbol: address="
+ << best_symbol.address << ", name=" << best_symbol.name;
+ }
+ }
+ if (best_symbol.name.empty()) {
+ Die() << "DWARF symbol (address = " << best_symbol.address
+ << ", linkage_name = "
+ << best_symbol.linkage_name.value_or("{missing}")
+ << " should have a name";
+ }
// There may be multiple DWARF symbols with same address (zero-length
// arrays), or ELF symbol has different name from DWARF symbol (aliases).
// But if we have both situations at once, we can't match ELF to DWARF and
// it should be fixed in analysed binary source code.
Check(matched_by_name || candidates == 1)
<< "multiple candidates without matching names, best_symbol.name="
- << best_symbol->name;
- node.type_id = best_symbol->id;
- node.full_name = best_symbol->name;
+ << best_symbol.name;
+ node.type_id = best_symbol.id;
+ node.full_name = best_symbol.name;
}
}
@@ -350,6 +397,7 @@ class Reader {
dwarf::Handler dwarf_;
elf::ElfLoader elf_;
ReadOptions options_;
+ const std::unique_ptr<Filter>& file_filter_;
Metrics& metrics_;
};
@@ -370,6 +418,14 @@ Id Reader::Read() {
namespaces = GetNamespacesMap(all_symbols, elf_);
}
+ const auto cfi_address_map = GetCFIAddressMap(elf_.GetCFISymbols(), elf_);
+ if (options_.Test(ReadOptions::INFO) && !cfi_address_map.empty()) {
+ std::cout << "CFI symbols:\n";
+ for (const auto& [name, address] : cfi_address_map) {
+ std::cout << " " << name << " -> " << Hex(address) << '\n';
+ }
+ }
+
if (options_.Test(ReadOptions::INFO)) {
std::cout << "Public functions and variables:\n";
}
@@ -378,9 +434,12 @@ Id Reader::Read() {
for (const auto& symbol : all_symbols) {
if (IsPublicFunctionOrVariable(symbol) &&
(!is_linux_kernel || ksymtab_symbols.count(symbol.name))) {
+ const auto cfi_it = cfi_address_map.find(std::string(symbol.name));
+ const size_t address = cfi_it != cfi_address_map.end()
+ ? cfi_it->second
+ : elf_.GetAbsoluteAddress(symbol);
symbols.emplace_back(
- SymbolTableEntryToElfSymbol(crc_values, namespaces, symbol),
- elf_.GetAbsoluteAddress(symbol));
+ SymbolTableEntryToElfSymbol(crc_values, namespaces, symbol), address);
if (options_.Test(ReadOptions::INFO)) {
std::cout << " " << symbol.binding << ' ' << symbol.symbol_type << " '"
@@ -405,13 +464,14 @@ Id Reader::Read() {
} // namespace internal
Id Read(Graph& graph, const std::string& path, ReadOptions options,
- Metrics& metrics) {
- return internal::Reader(graph, path, options, metrics).Read();
+ const std::unique_ptr<Filter>& file_filter, Metrics& metrics) {
+ return internal::Reader(graph, path, options, file_filter, metrics).Read();
}
Id Read(Graph& graph, char* data, size_t size, ReadOptions options,
- Metrics& metrics) {
- return internal::Reader(graph, data, size, options, metrics).Read();
+ const std::unique_ptr<Filter>& file_filter, Metrics& metrics) {
+ return internal::Reader(graph, data, size, options, file_filter, metrics)
+ .Read();
}
} // namespace elf
diff --git a/elf_reader.h b/elf_reader.h
index d1f6680..12d5711 100644
--- a/elf_reader.h
+++ b/elf_reader.h
@@ -21,6 +21,7 @@
#define STG_ELF_READER_H_
#include <cstddef>
+#include <memory>
#include <string>
#include <string_view>
#include <unordered_map>
@@ -28,6 +29,7 @@
#include <vector>
#include "elf_loader.h"
+#include "filter.h"
#include "graph.h"
#include "metrics.h"
#include "reader_options.h"
@@ -36,9 +38,9 @@ namespace stg {
namespace elf {
Id Read(Graph& graph, const std::string& path, ReadOptions options,
- Metrics& metrics);
+ const std::unique_ptr<Filter>& file_filter, Metrics& metrics);
Id Read(Graph& graph, char* data, size_t size, ReadOptions options,
- Metrics& metrics);
+ const std::unique_ptr<Filter>& file_filter, Metrics& metrics);
// For unit tests only
namespace internal {
@@ -47,6 +49,7 @@ using SymbolTable = std::vector<SymbolTableEntry>;
using SymbolNameList = std::unordered_set<std::string_view>;
using CRCValuesMap = std::unordered_map<std::string, ElfSymbol::CRC>;
using NamespacesMap = std::unordered_map<std::string, std::string>;
+using AddressMap = std::unordered_map<std::string, size_t>;
ElfSymbol::SymbolType ConvertSymbolType(
SymbolTableEntry::SymbolType symbol_type);
@@ -54,6 +57,7 @@ SymbolNameList GetKsymtabSymbols(const SymbolTable& symbols);
CRCValuesMap GetCRCValuesMap(const SymbolTable& symbols, const ElfLoader& elf);
NamespacesMap GetNamespacesMap(const SymbolTable& symbols,
const ElfLoader& elf);
+AddressMap GetCFIAddressMap(const SymbolTable& symbols, const ElfLoader& elf);
bool IsPublicFunctionOrVariable(const SymbolTableEntry& symbol);
} // namespace internal
diff --git a/elf_reader_test.cc b/elf_reader_test.cc
index d550c26..58d69dc 100644
--- a/elf_reader_test.cc
+++ b/elf_reader_test.cc
@@ -17,17 +17,37 @@
//
// Author: Aleksei Vetrov
-#include "elf_reader.h"
+#include <string_view>
#include <catch2/catch.hpp>
+#include "elf_loader.h"
+#include "elf_reader.h"
+#include "graph.h"
namespace Test {
+using SymbolTable = stg::elf::internal::SymbolTable;
+using SymbolTableEntry = stg::elf::SymbolTableEntry;
+
+SymbolTableEntry MakeSymbol(std::string_view name) {
+ return {
+ .name = name,
+ .value = 0,
+ .size = 0,
+ .symbol_type = SymbolTableEntry::SymbolType::OBJECT,
+ .binding = SymbolTableEntry::Binding::GLOBAL,
+ .visibility = SymbolTableEntry::Visibility::DEFAULT,
+ .section_index = 0,
+ .value_type = SymbolTableEntry::ValueType::RELATIVE_TO_SECTION,
+ };
+}
+
+
TEST_CASE("GetKsymtabSymbols") {
- const stg::elf::internal::SymbolTable all_symbols = {
- {.name = "foo"},
- {.name = "__ksymtab_foo"},
- {.name = "bar"},
+ const SymbolTable all_symbols = {
+ MakeSymbol("foo"),
+ MakeSymbol("__ksymtab_foo"),
+ MakeSymbol("bar"),
};
const auto ksymtab = stg::elf::internal::GetKsymtabSymbols(all_symbols);
REQUIRE(ksymtab.size() == 1);
diff --git a/equality.h b/equality.h
index c9aabad..0747073 100644
--- a/equality.h
+++ b/equality.h
@@ -104,12 +104,8 @@ struct Equals {
return result && it1 == end1 && it2 == end2;
}
- bool operator()(const Void&, const Void&) {
- return true;
- }
-
- bool operator()(const Variadic&, const Variadic&) {
- return true;
+ bool operator()(const Special& x1, const Special& x2) {
+ return x1.kind == x2.kind;
}
bool operator()(const PointerReference& x1,
@@ -153,7 +149,6 @@ struct Equals {
bool operator()(const Method& x1, const Method& x2) {
return x1.mangled_name == x2.mangled_name
&& x1.name == x2.name
- && x1.kind == x2.kind
&& x1.vtable_offset == x2.vtable_offset
&& (*this)(x1.type_id, x2.type_id);
}
diff --git a/fidelity.cc b/fidelity.cc
index e0da3fd..89c3c8c 100644
--- a/fidelity.cc
+++ b/fidelity.cc
@@ -19,7 +19,6 @@
#include "fidelity.h"
-#include <algorithm>
#include <map>
#include <ostream>
#include <set>
@@ -35,48 +34,16 @@ namespace stg {
namespace {
-const std::unordered_map<SymbolFidelityTransition, FidelityDiffSeverity>
- kSymbolTransitionSeverity = {
- {{SymbolFidelity::ABSENT, SymbolFidelity::UNTYPED},
- FidelityDiffSeverity::SKIP},
- {{SymbolFidelity::ABSENT, SymbolFidelity::TYPED},
- FidelityDiffSeverity::SKIP},
- {{SymbolFidelity::UNTYPED, SymbolFidelity::ABSENT},
- FidelityDiffSeverity::SKIP},
- {{SymbolFidelity::UNTYPED, SymbolFidelity::TYPED},
- FidelityDiffSeverity::INFO},
- {{SymbolFidelity::TYPED, SymbolFidelity::ABSENT},
- FidelityDiffSeverity::SKIP},
- {{SymbolFidelity::TYPED, SymbolFidelity::UNTYPED},
- FidelityDiffSeverity::WARN},
-};
-
-const std::unordered_map<TypeFidelityTransition, FidelityDiffSeverity>
- kTypeTransitionSeverity = {
- {{TypeFidelity::ABSENT, TypeFidelity::DECLARATION_ONLY},
- FidelityDiffSeverity::WARN},
- {{TypeFidelity::ABSENT, TypeFidelity::FULLY_DEFINED},
- FidelityDiffSeverity::INFO},
- {{TypeFidelity::DECLARATION_ONLY, TypeFidelity::FULLY_DEFINED},
- FidelityDiffSeverity::INFO},
- {{TypeFidelity::DECLARATION_ONLY, TypeFidelity::ABSENT},
- FidelityDiffSeverity::WARN},
- {{TypeFidelity::FULLY_DEFINED, TypeFidelity::ABSENT},
- FidelityDiffSeverity::WARN},
- {{TypeFidelity::FULLY_DEFINED, TypeFidelity::DECLARATION_ONLY},
- FidelityDiffSeverity::WARN},
-};
-
struct Fidelity {
Fidelity(const Graph& graph, NameCache& name_cache)
- : graph(graph), describe(graph, name_cache), seen(graph.Limit())
- {}
+ : graph(graph), describe(graph, name_cache), seen(Id(0)) {
+ seen.Reserve(graph.Limit());
+ }
void operator()(Id);
void operator()(const std::vector<Id>&);
void operator()(const std::map<std::string, Id>&);
- void operator()(const Void&, Id);
- void operator()(const Variadic&, Id);
+ void operator()(const Special&, Id);
void operator()(const PointerReference&, Id);
void operator()(const PointerToMember&, Id);
void operator()(const Typedef&, Id);
@@ -117,9 +84,7 @@ void Fidelity::operator()(const std::map<std::string, Id>& x) {
}
}
-void Fidelity::operator()(const Void&, Id) {}
-
-void Fidelity::operator()(const Variadic&, Id) {}
+void Fidelity::operator()(const Special&, Id) {}
void Fidelity::operator()(const PointerReference& x, Id) {
(*this)(x.pointee_type_id);
@@ -214,16 +179,6 @@ std::set<std::string> GetKeys(
return keys;
}
-FidelityDiffSeverity GetTransitionSeverity(SymbolFidelityTransition x) {
- return x.first == x.second ? FidelityDiffSeverity::SKIP
- : kSymbolTransitionSeverity.at(x);
-}
-
-FidelityDiffSeverity GetTransitionSeverity(TypeFidelityTransition x) {
- return x.first == x.second ? FidelityDiffSeverity::SKIP
- : kTypeTransitionSeverity.at(x);
-}
-
void InsertTransition(FidelityDiff& diff, SymbolFidelityTransition transition,
const std::string& symbol) {
diff.symbol_transitions[transition].push_back(symbol);
@@ -243,11 +198,7 @@ void InsertTransitions(FidelityDiff& diff,
auto it2 = x2.find(key);
auto transition = std::make_pair(it1 == x1.end() ? T() : it1->second,
it2 == x2.end() ? T() : it2->second);
- auto transition_severity = GetTransitionSeverity(transition);
- if (transition_severity != FidelityDiffSeverity::SKIP) {
- diff.severity = std::max(diff.severity, transition_severity);
- InsertTransition(diff, transition, key);
- }
+ InsertTransition(diff, transition, key);
}
}
@@ -283,17 +234,6 @@ std::ostream& operator<<(std::ostream& os, TypeFidelityTransition x) {
return os << "type(s) changed from " << x.first << " to " << x.second;
}
-std::ostream& operator<<(std::ostream& os, FidelityDiffSeverity x) {
- switch (x) {
- case FidelityDiffSeverity::SKIP:
- return os << "NONE";
- case FidelityDiffSeverity::INFO:
- return os << "INFO";
- case FidelityDiffSeverity::WARN:
- return os << "WARN";
- }
-}
-
FidelityDiff GetFidelityTransitions(const Graph& graph, Id root1, Id root2) {
NameCache name_cache;
Fidelity fidelity1(graph, name_cache);
diff --git a/fidelity.h b/fidelity.h
index 2fa81a0..10cf4ee 100644
--- a/fidelity.h
+++ b/fidelity.h
@@ -25,6 +25,7 @@
#include <ostream>
#include <string>
#include <unordered_map>
+#include <utility>
#include <vector>
#include "graph.h"
@@ -43,12 +44,6 @@ enum class TypeFidelity {
FULLY_DEFINED = 2,
};
-enum class FidelityDiffSeverity {
- SKIP = 0,
- INFO = 1,
- WARN = 2,
-};
-
using SymbolFidelityTransition = std::pair<SymbolFidelity, SymbolFidelity>;
using TypeFidelityTransition = std::pair<TypeFidelity, TypeFidelity>;
@@ -56,7 +51,6 @@ std::ostream& operator<<(std::ostream& os, SymbolFidelity x);
std::ostream& operator<<(std::ostream& os, TypeFidelity x);
std::ostream& operator<<(std::ostream& os, SymbolFidelityTransition x);
std::ostream& operator<<(std::ostream& os, TypeFidelityTransition x);
-std::ostream& operator<<(std::ostream& os, FidelityDiffSeverity x);
} // namespace stg
@@ -85,7 +79,6 @@ struct FidelityDiff {
symbol_transitions;
std::unordered_map<TypeFidelityTransition, std::vector<std::string>>
type_transitions;
- FidelityDiffSeverity severity = FidelityDiffSeverity::SKIP;
};
FidelityDiff GetFidelityTransitions(const Graph& graph, Id root1, Id root2);
diff --git a/symbol_filter.cc b/filter.cc
index f1eedaa..d21dbcb 100644
--- a/symbol_filter.cc
+++ b/filter.cc
@@ -17,7 +17,7 @@
//
// Author: Giuliano Procida
-#include "symbol_filter.h"
+#include "filter.h"
#include <fnmatch.h>
@@ -32,6 +32,7 @@
#include <sstream>
#include <string>
#include <string_view>
+#include <unordered_set>
#include <utility>
#include "error.h"
@@ -39,16 +40,15 @@
namespace stg {
namespace {
-SymbolSet ReadAbigail(const std::string& filename) {
- static constexpr std::array<std::string_view, 2> section_suffices = {
- "symbol_list",
- "whitelist",
- };
- SymbolSet symbols;
+using Items = std::unordered_set<std::string>;
+
+Items ReadAbigail(const std::string& filename) {
+ static constexpr std::string_view kSectionSuffix = "list";
+ Items items;
std::ifstream file(filename);
- Check(file.good()) << "error opening symbol file '" << filename << ": "
+ Check(file.good()) << "error opening filter file '" << filename << ": "
<< Error(errno);
- bool in_symbol_section = false;
+ bool in_filter_section = false;
std::string line;
while (std::getline(file, line)) {
size_t start = 0;
@@ -69,100 +69,97 @@ SymbolSet ReadAbigail(const std::string& filename) {
if (start == limit) {
continue;
}
- // See if we are entering a symbol list section.
+ // See if we are entering a filter list section.
if (line[start] == '[' && line[limit - 1] == ']') {
std::string_view section(&line[start + 1], limit - start - 2);
- bool found = false;
- for (const auto& suffix : section_suffices) {
- if (section.size() >= suffix.size()
- && section.substr(section.size() - suffix.size()) == suffix) {
- found = true;
- break;
- }
- }
- in_symbol_section = found;
+ // TODO: use std::string_view::ends_with
+ const auto section_size = section.size();
+ const auto suffix_size = kSectionSuffix.size();
+ in_filter_section = section_size >= suffix_size &&
+ section.substr(section_size - suffix_size) == kSectionSuffix;
continue;
}
- // Add symbol.
- if (in_symbol_section) {
- symbols.insert(std::string(&line[start], limit - start));
+ // Add item.
+ if (in_filter_section) {
+ items.insert(std::string(&line[start], limit - start));
}
}
- Check(file.eof()) << "error reading symbol file '" << filename << ": "
+ Check(file.eof()) << "error reading filter file '" << filename << ": "
<< Error(errno);
- return symbols;
+ return items;
}
-// Inverted symbol filter.
-class NotFilter : public SymbolFilter {
+// Inverted filter.
+class NotFilter : public Filter {
public:
- NotFilter(std::unique_ptr<SymbolFilter> filter)
+ explicit NotFilter(std::unique_ptr<Filter> filter)
: filter_(std::move(filter)) {}
- bool operator()(const std::string& symbol) const final {
- return !(*filter_)(symbol);
+ bool operator()(const std::string& item) const final {
+ return !(*filter_)(item);
};
private:
- const std::unique_ptr<SymbolFilter> filter_;
+ const std::unique_ptr<Filter> filter_;
};
-// Conjunction of symbol filters.
-class AndFilter : public SymbolFilter {
+// Conjunction of filters.
+class AndFilter : public Filter {
public:
- AndFilter(std::unique_ptr<SymbolFilter> filter1,
- std::unique_ptr<SymbolFilter> filter2)
+ AndFilter(std::unique_ptr<Filter> filter1,
+ std::unique_ptr<Filter> filter2)
: filter1_(std::move(filter1)), filter2_(std::move(filter2)) {}
- bool operator()(const std::string& symbol) const final {
- return (*filter1_)(symbol) && (*filter2_)(symbol);
+ bool operator()(const std::string& item) const final {
+ return (*filter1_)(item) && (*filter2_)(item);
};
private:
- const std::unique_ptr<SymbolFilter> filter1_;
- const std::unique_ptr<SymbolFilter> filter2_;
+ const std::unique_ptr<Filter> filter1_;
+ const std::unique_ptr<Filter> filter2_;
};
-// Disjunction of symbol filters.
-class OrFilter : public SymbolFilter {
+// Disjunction of filters.
+class OrFilter : public Filter {
public:
- OrFilter(std::unique_ptr<SymbolFilter> filter1,
- std::unique_ptr<SymbolFilter> filter2)
+ OrFilter(std::unique_ptr<Filter> filter1,
+ std::unique_ptr<Filter> filter2)
: filter1_(std::move(filter1)), filter2_(std::move(filter2)) {}
- bool operator()(const std::string& symbol) const final {
- return (*filter1_)(symbol) || (*filter2_)(symbol);
+ bool operator()(const std::string& item) const final {
+ return (*filter1_)(item) || (*filter2_)(item);
};
private:
- const std::unique_ptr<SymbolFilter> filter1_;
- const std::unique_ptr<SymbolFilter> filter2_;
+ const std::unique_ptr<Filter> filter1_;
+ const std::unique_ptr<Filter> filter2_;
};
-// Glob symbol filter.
-class GlobFilter : public SymbolFilter {
+// Glob filter.
+class GlobFilter : public Filter {
public:
- GlobFilter(const std::string& pattern) : pattern_(pattern) {}
- bool operator()(const std::string& symbol) const final {
- return fnmatch(pattern_.c_str(), symbol.c_str(), 0) == 0;
+ explicit GlobFilter(const std::string& pattern) : pattern_(pattern) {}
+ bool operator()(const std::string& item) const final {
+ return fnmatch(pattern_.c_str(), item.c_str(), 0) == 0;
}
private:
const std::string pattern_;
};
-// Literal symbol list symbol filter.
-class SetFilter : public SymbolFilter {
+// Literal list filter.
+class SetFilter : public Filter {
public:
- SetFilter(SymbolSet&& symbols) : symbols_(std::move(symbols)) {}
- bool operator()(const std::string& symbol) const final {
- return symbols_.count(symbol);
+ explicit SetFilter(Items&& items)
+ : items_(std::move(items)) {}
+ bool operator()(const std::string& item) const final {
+ return items_.count(item) > 0;
};
private:
- const SymbolSet symbols_;
+ const Items items_;
};
static const char* kTokenCharacters = ":!()&|";
-// Split a symbol filter expression into tokens.
+// Split a filter expression into tokens.
//
// All tokens are just strings, but single characters from kTokenCharacters are
// recognised as special syntax. Whitespace can be used between tokens and will
@@ -186,7 +183,7 @@ std::queue<std::string> Tokenise(const std::string& filter) {
}
result.emplace(&*name, it - name);
} else {
- Die() << "unexpected character in symbol filter: '" << *it;
+ Die() << "unexpected character in filter: '" << *it;
}
}
@@ -194,10 +191,10 @@ std::queue<std::string> Tokenise(const std::string& filter) {
}
// The failing parser.
-std::unique_ptr<SymbolFilter> Fail(
+std::unique_ptr<Filter> Fail(
const std::string& message, std::queue<std::string>& tokens) {
std::ostringstream os;
- os << "syntax error in symbol expression: '" << message << "'; context:";
+ os << "syntax error in filter expression: '" << message << "'; context:";
for (size_t i = 0; i < 3; ++i) {
os << ' ';
if (tokens.empty()) {
@@ -210,12 +207,12 @@ std::unique_ptr<SymbolFilter> Fail(
Die() << os.str();
}
-std::unique_ptr<SymbolFilter> Expression(std::queue<std::string>& tokens);
+std::unique_ptr<Filter> Expression(std::queue<std::string>& tokens);
-// Parse a symbol filter atom.
-std::unique_ptr<SymbolFilter> Atom(std::queue<std::string>& tokens) {
+// Parse a filter atom.
+std::unique_ptr<Filter> Atom(std::queue<std::string>& tokens) {
if (tokens.empty()) {
- return Fail("expected a symbol expression", tokens);
+ return Fail("expected a filter expression", tokens);
}
auto token = tokens.front();
tokens.pop();
@@ -235,14 +232,14 @@ std::unique_ptr<SymbolFilter> Atom(std::queue<std::string>& tokens) {
return std::make_unique<SetFilter>(ReadAbigail(token));
} else {
if (std::strchr(kTokenCharacters, token[0])) {
- return Fail("expected a symbol glob", tokens);
+ return Fail("expected a glob token", tokens);
}
return std::make_unique<GlobFilter>(token);
}
}
-// Parse a symbol filter factor.
-std::unique_ptr<SymbolFilter> Factor(std::queue<std::string>& tokens) {
+// Parse a filter factor.
+std::unique_ptr<Filter> Factor(std::queue<std::string>& tokens) {
bool invert = false;
while (!tokens.empty() && tokens.front() == "!") {
tokens.pop();
@@ -255,8 +252,8 @@ std::unique_ptr<SymbolFilter> Factor(std::queue<std::string>& tokens) {
return atom;
}
-// Parse a symbol filter term.
-std::unique_ptr<SymbolFilter> Term(std::queue<std::string>& tokens) {
+// Parse a filter term.
+std::unique_ptr<Filter> Term(std::queue<std::string>& tokens) {
auto factor = Factor(tokens);
while (!tokens.empty() && tokens.front() == "&") {
tokens.pop();
@@ -265,8 +262,8 @@ std::unique_ptr<SymbolFilter> Term(std::queue<std::string>& tokens) {
return factor;
}
-// Parse a symbol filter expression.
-std::unique_ptr<SymbolFilter> Expression(std::queue<std::string>& tokens) {
+// Parse a filter expression.
+std::unique_ptr<Filter> Expression(std::queue<std::string>& tokens) {
auto term = Term(tokens);
while (!tokens.empty() && tokens.front() == "|") {
tokens.pop();
@@ -277,19 +274,19 @@ std::unique_ptr<SymbolFilter> Expression(std::queue<std::string>& tokens) {
} // namespace
-// Tokenise and parse a symbol filter expression.
-std::unique_ptr<SymbolFilter> MakeSymbolFilter(const std::string& filter)
+// Tokenise and parse a filter expression.
+std::unique_ptr<Filter> MakeFilter(const std::string& filter)
{
auto tokens = Tokenise(filter);
auto result = Expression(tokens);
if (!tokens.empty()) {
- return Fail("unexpected junk at end of symbol filter", tokens);
+ return Fail("unexpected junk at end of filter", tokens);
}
return result;
}
-void SymbolFilterUsage(std::ostream& os) {
- os << "symbol filter syntax:\n"
+void FilterUsage(std::ostream& os) {
+ os << "filter syntax:\n"
<< " <filter> ::= <term> | <expression> '|' <term>\n"
<< " <term> ::= <factor> | <term> '&' <factor>\n"
<< " <factor> ::= <atom> | '!' <factor>\n"
diff --git a/symbol_filter.h b/filter.h
index 3db07e5..2612ba6 100644
--- a/symbol_filter.h
+++ b/filter.h
@@ -17,34 +17,28 @@
//
// Author: Giuliano Procida
-#ifndef STG_SYMBOL_FILTER_H_
-#define STG_SYMBOL_FILTER_H_
+#ifndef STG_FILTER_H_
+#define STG_FILTER_H_
-#include <iostream>
#include <memory>
#include <ostream>
#include <string>
-#include <unordered_set>
-
-#include "error.h"
namespace stg {
-using SymbolSet = std::unordered_set<std::string>;
-
-// Abstract base class for filtering symbols.
-class SymbolFilter {
+// Abstract base class for filtering.
+class Filter {
public:
- virtual ~SymbolFilter() = default;
+ virtual ~Filter() = default;
// Filter predicate evaluation.
- virtual bool operator()(const std::string& symbol) const = 0;
+ virtual bool operator()(const std::string& item) const = 0;
};
-// Tokenise and parse a symbol filter expression.
-std::unique_ptr<SymbolFilter> MakeSymbolFilter(const std::string& filter);
+// Tokenise and parse a filter expression.
+std::unique_ptr<Filter> MakeFilter(const std::string& filter);
-void SymbolFilterUsage(std::ostream& os);
+void FilterUsage(std::ostream& os);
} // namespace stg
-#endif // STG_SYMBOL_FILTER_H_
+#endif // STG_FILTER_H_
diff --git a/symbol_filter_test.cc b/filter_test.cc
index abdf79a..7666965 100644
--- a/symbol_filter_test.cc
+++ b/filter_test.cc
@@ -17,7 +17,7 @@
//
// Author: Giuliano Procida
-#include "symbol_filter.h"
+#include "filter.h"
#include <sstream>
#include <string>
@@ -54,7 +54,7 @@ TEST_CASE("bad syntax cases") {
for (const auto& expression : cases) {
std::ostringstream os;
GIVEN("filter: " + expression) {
- CHECK_THROWS(stg::MakeSymbolFilter(expression));
+ CHECK_THROWS(stg::MakeFilter(expression));
}
}
}
@@ -106,7 +106,7 @@ TEST_CASE("hand-curated cases") {
for (const auto& [expression, ins, outs] : cases) {
std::ostringstream os;
GIVEN("filter: " + expression) {
- auto filter = stg::MakeSymbolFilter(expression);
+ auto filter = stg::MakeFilter(expression);
for (const auto& in : ins) {
GIVEN("in: " + in) {
CHECK((*filter)(in));
diff --git a/fingerprint.cc b/fingerprint.cc
index 38851d8..5d87677 100644
--- a/fingerprint.cc
+++ b/fingerprint.cc
@@ -41,12 +41,15 @@ struct Hasher {
non_trivial_scc_size(metrics, "fingerprint.non_trivial_scc_size") {}
// Graph function implementation
- HashValue operator()(const Void&) {
- return hash('O');
- }
-
- HashValue operator()(const Variadic&) {
- return hash('V');
+ HashValue operator()(const Special& x) {
+ switch (x.kind) {
+ case Special::Kind::VOID:
+ return hash('O');
+ case Special::Kind::VARIADIC:
+ return hash('V');
+ case Special::Kind::NULLPTR:
+ return hash('L');
+ }
}
HashValue operator()(const PointerReference& x) {
diff --git a/fuzz/elf_reader_fuzzer.cc b/fuzz/elf_reader_fuzzer.cc
index e4fc087..e2d8f55 100644
--- a/fuzz/elf_reader_fuzzer.cc
+++ b/fuzz/elf_reader_fuzzer.cc
@@ -34,7 +34,8 @@ extern "C" int LLVMFuzzerTestOneInput(char* data, size_t size) {
std::vector<char> data_copy(data, data + size);
stg::Graph graph;
stg::Metrics metrics;
- stg::elf::Read(graph, data_copy.data(), size, stg::ReadOptions(), metrics);
+ stg::elf::Read(graph, data_copy.data(), size, stg::ReadOptions(), nullptr,
+ metrics);
} catch (const stg::Exception&) {
// Pass as this is us catching invalid ELF properly.
}
diff --git a/graph.cc b/graph.cc
index df013e2..70ceae5 100644
--- a/graph.cc
+++ b/graph.cc
@@ -44,17 +44,6 @@ std::ostream& operator<<(std::ostream& os, BaseClass::Inheritance inheritance) {
}
}
-std::ostream& operator<<(std::ostream& os, Method::Kind kind) {
- switch (kind) {
- case Method::Kind::NON_VIRTUAL:
- return os << "non-virtual";
- case Method::Kind::STATIC:
- return os << "static";
- case Method::Kind::VIRTUAL:
- return os << "virtual";
- }
-}
-
namespace {
std::string_view ToString(StructUnion::Kind kind) {
diff --git a/graph.h b/graph.h
index 4eba9d7..32b74a7 100644
--- a/graph.h
+++ b/graph.h
@@ -83,10 +83,16 @@ struct hash<stg::Pair> {
namespace stg {
-struct Void {
-};
+struct Special {
+ enum class Kind {
+ VOID,
+ VARIADIC,
+ NULLPTR,
+ };
+ explicit Special(Kind kind)
+ : kind(kind) {}
-struct Variadic {
+ Kind kind;
};
struct PointerReference {
@@ -175,21 +181,17 @@ struct BaseClass {
std::ostream& operator<<(std::ostream& os, BaseClass::Inheritance inheritance);
struct Method {
- enum class Kind { NON_VIRTUAL, STATIC, VIRTUAL };
- Method(const std::string& mangled_name, const std::string& name, Kind kind,
- const std::optional<uint64_t> vtable_offset, Id type_id)
- : mangled_name(mangled_name), name(name), kind(kind),
+ Method(const std::string& mangled_name, const std::string& name,
+ uint64_t vtable_offset, Id type_id)
+ : mangled_name(mangled_name), name(name),
vtable_offset(vtable_offset), type_id(type_id) {}
std::string mangled_name;
std::string name;
- Kind kind;
- std::optional<uint64_t> vtable_offset;
+ uint64_t vtable_offset;
Id type_id;
};
-std::ostream& operator<<(std::ostream& os, Method::Kind kind);
-
struct Member {
Member(const std::string& name, Id type_id, uint64_t offset, uint64_t bitsize)
: name(name), type_id(type_id), offset(offset), bitsize(bitsize) {}
@@ -347,12 +349,9 @@ class Graph {
if (reference.first != Which::ABSENT) {
Die() << "node value already set: " << id;
}
- if constexpr (std::is_same_v<Node, Void>) {
- reference = {Which::VOID, void_.size()};
- void_.emplace_back(std::forward<Args>(args)...);
- } else if constexpr (std::is_same_v<Node, Variadic>) {
- reference = {Which::VARIADIC, variadic_.size()};
- variadic_.emplace_back(std::forward<Args>(args)...);
+ if constexpr (std::is_same_v<Node, Special>) {
+ reference = {Which::SPECIAL, special_.size()};
+ special_.emplace_back(std::forward<Args>(args)...);
} else if constexpr (std::is_same_v<Node, PointerReference>) {
reference = {Which::POINTER_REFERENCE, pointer_reference_.size()};
pointer_reference_.emplace_back(std::forward<Args>(args)...);
@@ -435,9 +434,8 @@ class Graph {
Result Apply(FunctionObject& function, Id id, Args&&... args);
template <typename Function>
- void ForEach(Function&& function) const {
- const size_t limit = Limit().ix_;
- for (size_t ix = 0; ix < limit; ++ix) {
+ void ForEach(Id start, Id limit, Function&& function) const {
+ for (size_t ix = start.ix_; ix < limit.ix_; ++ix) {
const Id id(ix);
if (Is(id)) {
function(id);
@@ -448,8 +446,7 @@ class Graph {
private:
enum class Which {
ABSENT,
- VOID,
- VARIADIC,
+ SPECIAL,
POINTER_REFERENCE,
POINTER_TO_MEMBER,
TYPEDEF,
@@ -468,8 +465,7 @@ class Graph {
std::vector<std::pair<Which, size_t>> indirection_;
- std::vector<Void> void_;
- std::vector<Variadic> variadic_;
+ std::vector<Special> special_;
std::vector<PointerReference> pointer_reference_;
std::vector<PointerToMember> pointer_to_member_;
std::vector<Typedef> typedef_;
@@ -492,10 +488,8 @@ Result Graph::Apply(FunctionObject& function, Id id, Args&&... args) const {
switch (which) {
case Which::ABSENT:
Die() << "undefined node: " << id;
- case Which::VOID:
- return function(void_[ix], std::forward<Args>(args)...);
- case Which::VARIADIC:
- return function(variadic_[ix], std::forward<Args>(args)...);
+ case Which::SPECIAL:
+ return function(special_[ix], std::forward<Args>(args)...);
case Which::POINTER_REFERENCE:
return function(pointer_reference_[ix], std::forward<Args>(args)...);
case Which::POINTER_TO_MEMBER:
@@ -538,11 +532,8 @@ Result Graph::Apply2(
switch (which1) {
case Which::ABSENT:
Die() << "undefined nodes: " << id1 << ", " << id2;
- case Which::VOID:
- return function(void_[ix1], void_[ix2],
- std::forward<Args>(args)...);
- case Which::VARIADIC:
- return function(variadic_[ix1], variadic_[ix2],
+ case Which::SPECIAL:
+ return function(special_[ix1], special_[ix2],
std::forward<Args>(args)...);
case Which::POINTER_REFERENCE:
return function(pointer_reference_[ix1], pointer_reference_[ix2],
@@ -649,20 +640,28 @@ struct InterfaceKey {
// key set limited to allocated Ids.
class DenseIdSet {
public:
- explicit DenseIdSet(Id limit) : ids_(limit.ix_, false) {}
+ explicit DenseIdSet(Id start) : offset_(start.ix_) {}
+ void Reserve(Id limit) {
+ ids_.reserve(limit.ix_ - offset_);
+ }
bool Insert(Id id) {
const auto ix = id.ix_;
- if (ix >= ids_.size()) {
- ids_.resize(ix + 1);
+ if (ix < offset_) {
+ Die() << "DenseIdSet: out of range access to " << id;
+ }
+ const auto offset_ix = ix - offset_;
+ if (offset_ix >= ids_.size()) {
+ ids_.resize(offset_ix + 1, false);
}
- if (ids_[ix]) {
+ if (ids_[offset_ix]) {
return false;
}
- ids_[ix] = true;
+ ids_[offset_ix] = true;
return true;
}
private:
+ size_t offset_;
std::vector<bool> ids_;
};
@@ -670,24 +669,27 @@ class DenseIdSet {
// but with constant time operations and key set limited to allocated Ids.
class DenseIdMapping {
public:
- explicit DenseIdMapping(Id limit) {
- ids_.reserve(limit.ix_);
- Populate(limit.ix_);
+ explicit DenseIdMapping(Id start) : offset_(start.ix_) {}
+ void Reserve(Id limit) {
+ ids_.reserve(limit.ix_ - offset_);
}
Id& operator[](Id id) {
const auto ix = id.ix_;
+ if (ix < offset_) {
+ Die() << "DenseIdMapping: out of range access to " << id;
+ }
Populate(ix + 1);
- return ids_[ix];
+ return ids_[ix - offset_];
}
private:
void Populate(size_t size) {
- const auto limit = ids_.size();
- for (size_t ix = limit; ix < size; ++ix) {
+ for (size_t ix = offset_ + ids_.size(); ix < size; ++ix) {
ids_.emplace_back(ix);
}
}
+ size_t offset_;
std::vector<Id> ids_;
};
diff --git a/hashing.h b/hashing.h
index 111bead..a1527e4 100644
--- a/hashing.h
+++ b/hashing.h
@@ -111,7 +111,7 @@ struct Hash {
}
// Hash std::string by constructing a std::string_view.
- constexpr HashValue operator()(const std::string& x) const {
+ HashValue operator()(const std::string& x) const {
return (*this)(std::string_view(x));
}
diff --git a/input.cc b/input.cc
index 43bbd76..c396b47 100644
--- a/input.cc
+++ b/input.cc
@@ -19,9 +19,12 @@
#include "input.h"
+#include <memory>
+
#include "abigail_reader.h"
#include "btf_reader.h"
#include "elf_reader.h"
+#include "filter.h"
#include "graph.h"
#include "metrics.h"
#include "proto_reader.h"
@@ -30,7 +33,8 @@
namespace stg {
Id Read(Graph& graph, InputFormat format, const char* input,
- ReadOptions options, Metrics& metrics) {
+ ReadOptions options, const std::unique_ptr<Filter>& file_filter,
+ Metrics& metrics) {
switch (format) {
case InputFormat::ABI: {
Time read(metrics, "read ABI");
@@ -42,7 +46,7 @@ Id Read(Graph& graph, InputFormat format, const char* input,
}
case InputFormat::ELF: {
Time read(metrics, "read ELF");
- return elf::Read(graph, input, options, metrics);
+ return elf::Read(graph, input, options, file_filter, metrics);
}
case InputFormat::STG: {
Time read(metrics, "read STG");
diff --git a/input.h b/input.h
index 108dd29..51894a2 100644
--- a/input.h
+++ b/input.h
@@ -20,6 +20,9 @@
#ifndef STG_INPUT_H_
#define STG_INPUT_H_
+#include <memory>
+
+#include "filter.h"
#include "graph.h"
#include "metrics.h"
#include "reader_options.h"
@@ -29,7 +32,8 @@ namespace stg {
enum class InputFormat { ABI, BTF, ELF, STG };
Id Read(Graph& graph, InputFormat format, const char* input,
- ReadOptions options, Metrics& metrics);
+ ReadOptions options, const std::unique_ptr<Filter>& file_filter,
+ Metrics& metrics);
} // namespace stg
diff --git a/naming.cc b/naming.cc
index 28064f8..8395291 100644
--- a/naming.cc
+++ b/naming.cc
@@ -24,6 +24,8 @@
#include <sstream>
#include <string>
+#include "graph.h"
+
namespace stg {
Name Name::Add(Side side, Precedence precedence,
@@ -113,12 +115,15 @@ Name Describe::operator()(Id id) {
return cached;
}
-Name Describe::operator()(const Void&) {
- return Name{"void"};
-}
-
-Name Describe::operator()(const Variadic&) {
- return Name{"..."};
+Name Describe::operator()(const Special& x) {
+ switch (x.kind) {
+ case Special::Kind::VOID:
+ return Name{"void"};
+ case Special::Kind::VARIADIC:
+ return Name{"..."};
+ case Special::Kind::NULLPTR:
+ return Name{"decltype(nullptr)"};
+ }
}
Name Describe::operator()(const PointerReference& x) {
diff --git a/naming.h b/naming.h
index 41a5910..1aaae39 100644
--- a/naming.h
+++ b/naming.h
@@ -58,8 +58,7 @@ using NameCache = std::unordered_map<Id, Name>;
struct Describe {
Describe(const Graph& graph, NameCache& names) : graph(graph), names(names) {}
Name operator()(Id id);
- Name operator()(const Void&);
- Name operator()(const Variadic&);
+ Name operator()(const Special&);
Name operator()(const PointerReference&);
Name operator()(const PointerToMember&);
Name operator()(const Typedef&);
diff --git a/proto_reader.cc b/proto_reader.cc
index b20813f..c7eacb7 100644
--- a/proto_reader.cc
+++ b/proto_reader.cc
@@ -55,6 +55,7 @@ struct Transformer {
void AddNodes(const google::protobuf::RepeatedPtrField<ProtoType>&);
void AddNode(const Void&);
void AddNode(const Variadic&);
+ void AddNode(const Special&);
void AddNode(const PointerReference&);
void AddNode(const PointerToMember&);
void AddNode(const Typedef&);
@@ -77,11 +78,11 @@ struct Transformer {
template <typename GetKey>
std::map<std::string, Id> Transform(GetKey,
const google::protobuf::RepeatedField<uint32_t>&);
+ stg::Special::Kind Transform(Special::Kind);
stg::PointerReference::Kind Transform(PointerReference::Kind);
stg::Qualifier Transform(Qualified::Qualifier);
stg::Primitive::Encoding Transform(Primitive::Encoding);
stg::BaseClass::Inheritance Transform(BaseClass::Inheritance);
- stg::Method::Kind Transform(Method::Kind);
stg::StructUnion::Kind Transform(StructUnion::Kind);
stg::ElfSymbol::SymbolType Transform(ElfSymbol::SymbolType);
stg::ElfSymbol::Binding Transform(ElfSymbol::Binding);
@@ -98,8 +99,9 @@ struct Transformer {
};
Id Transformer::Transform(const proto::STG& x) {
- AddNodes(x.void_());
- AddNodes(x.variadic());
+ AddNodes(x.void_()); // deprecated
+ AddNodes(x.variadic()); // deprecated
+ AddNodes(x.special());
AddNodes(x.pointer_reference());
AddNodes(x.pointer_to_member());
AddNodes(x.typedef_());
@@ -134,11 +136,15 @@ void Transformer::AddNodes(const google::protobuf::RepeatedPtrField<ProtoType>&
}
void Transformer::AddNode(const Void& x) {
- AddNode<stg::Void>(GetId(x.id()));
+ AddNode<stg::Special>(GetId(x.id()), stg::Special::Kind::VOID);
}
void Transformer::AddNode(const Variadic& x) {
- AddNode<stg::Variadic>(GetId(x.id()));
+ AddNode<stg::Special>(GetId(x.id()), stg::Special::Kind::VARIADIC);
+}
+
+void Transformer::AddNode(const Special& x) {
+ AddNode<stg::Special>(GetId(x.id()), x.kind());
}
void Transformer::AddNode(const PointerReference& x) {
@@ -177,10 +183,8 @@ void Transformer::AddNode(const BaseClass& x) {
}
void Transformer::AddNode(const Method& x) {
- const auto& vtable_offset =
- Transform<uint64_t>(x.has_vtable_offset(), x.vtable_offset());
- AddNode<stg::Method>(GetId(x.id()), x.mangled_name(), x.name(), x.kind(),
- vtable_offset, GetId(x.type_id()));
+ AddNode<stg::Method>(GetId(x.id()), x.mangled_name(), x.name(),
+ x.vtable_offset(), GetId(x.type_id()));
}
void Transformer::AddNode(const Member& x) {
@@ -279,6 +283,19 @@ std::map<std::string, Id> Transformer::Transform(
return result;
}
+stg::Special::Kind Transformer::Transform(Special::Kind x) {
+ switch (x) {
+ case Special::VOID:
+ return stg::Special::Kind::VOID;
+ case Special::VARIADIC:
+ return stg::Special::Kind::VARIADIC;
+ case Special::NULLPTR:
+ return stg::Special::Kind::NULLPTR;
+ default:
+ Die() << "unknown Special::Kind " << x;
+ }
+}
+
stg::PointerReference::Kind Transformer::Transform(PointerReference::Kind x) {
switch (x) {
case PointerReference::POINTER:
@@ -341,19 +358,6 @@ stg::BaseClass::Inheritance Transformer::Transform(BaseClass::Inheritance x) {
}
}
-stg::Method::Kind Transformer::Transform(Method::Kind x) {
- switch (x) {
- case Method::NON_VIRTUAL:
- return stg::Method::Kind::NON_VIRTUAL;
- case Method::STATIC:
- return stg::Method::Kind::STATIC;
- case Method::VIRTUAL:
- return stg::Method::Kind::VIRTUAL;
- default:
- Die() << "unknown Method::Kind " << x;
- }
-}
-
stg::StructUnion::Kind Transformer::Transform(StructUnion::Kind x) {
switch (x) {
case StructUnion::STRUCT:
@@ -434,7 +438,7 @@ Type Transformer::Transform(const Type& x) {
return x;
}
-const std::array<uint32_t, 2> kSupportedFormatVersions = {0, 1};
+const std::array<uint32_t, 3> kSupportedFormatVersions = {0, 1, 2};
void CheckFormatVersion(uint32_t version, std::optional<std::string> path) {
Check(std::count(kSupportedFormatVersions.begin(),
diff --git a/proto_reader.h b/proto_reader.h
index f8f6551..7639bf1 100644
--- a/proto_reader.h
+++ b/proto_reader.h
@@ -20,16 +20,10 @@
#ifndef STG_PROTO_READER_H_
#define STG_PROTO_READER_H_
-#include <cstdint>
-#include <istream>
-#include <optional>
#include <string>
#include <string_view>
-#include <unordered_map>
-#include <vector>
#include "graph.h"
-#include "stg.pb.h"
namespace stg {
namespace proto {
diff --git a/proto_writer.cc b/proto_writer.cc
index 4110908..741064d 100644
--- a/proto_writer.cc
+++ b/proto_writer.cc
@@ -61,8 +61,7 @@ struct Transform {
uint32_t operator()(Id);
- void operator()(const stg::Void&, uint32_t);
- void operator()(const stg::Variadic&, uint32_t);
+ void operator()(const stg::Special&, uint32_t);
void operator()(const stg::PointerReference&, uint32_t);
void operator()(const stg::PointerToMember&, uint32_t);
void operator()(const stg::Typedef&, uint32_t);
@@ -78,11 +77,11 @@ struct Transform {
void operator()(const stg::ElfSymbol&, uint32_t);
void operator()(const stg::Interface&, uint32_t);
+ Special::Kind operator()(stg::Special::Kind);
PointerReference::Kind operator()(stg::PointerReference::Kind);
Qualified::Qualifier operator()(stg::Qualifier);
Primitive::Encoding operator()(stg::Primitive::Encoding);
BaseClass::Inheritance operator()(stg::BaseClass::Inheritance);
- Method::Kind operator()(stg::Method::Kind);
StructUnion::Kind operator()(stg::StructUnion::Kind);
ElfSymbol::SymbolType operator()(stg::ElfSymbol::SymbolType);
ElfSymbol::Binding operator()(stg::ElfSymbol::Binding);
@@ -116,13 +115,10 @@ uint32_t Transform<MapId>::operator()(Id id) {
}
template <typename MapId>
-void Transform<MapId>::operator()(const stg::Void&, uint32_t id) {
- stg.add_void_()->set_id(id);
-}
-
-template <typename MapId>
-void Transform<MapId>::operator()(const stg::Variadic&, uint32_t id) {
- stg.add_variadic()->set_id(id);
+void Transform<MapId>::operator()(const stg::Special& x, uint32_t id) {
+ auto& special = *stg.add_special();
+ special.set_id(id);
+ special.set_kind((*this)(x.kind));
}
template <typename MapId>
@@ -191,10 +187,7 @@ void Transform<MapId>::operator()(const stg::Method& x, uint32_t id) {
method.set_id(id);
method.set_mangled_name(x.mangled_name);
method.set_name(x.name);
- method.set_kind((*this)(x.kind));
- if (x.vtable_offset) {
- method.set_vtable_offset(*x.vtable_offset);
- }
+ method.set_vtable_offset(x.vtable_offset);
method.set_type_id((*this)(x.type_id));
}
@@ -310,6 +303,19 @@ PointerReference::Kind Transform<MapId>::operator()(
}
template <typename MapId>
+Special::Kind Transform<MapId>::operator()(
+ stg::Special::Kind x) {
+ switch (x) {
+ case stg::Special::Kind::VOID:
+ return Special::VOID;
+ case stg::Special::Kind::VARIADIC:
+ return Special::VARIADIC;
+ case stg::Special::Kind::NULLPTR:
+ return Special::NULLPTR;
+ }
+}
+
+template <typename MapId>
Qualified::Qualifier Transform<MapId>::operator()(stg::Qualifier x) {
switch (x) {
case stg::Qualifier::CONST:
@@ -357,18 +363,6 @@ BaseClass::Inheritance Transform<MapId>::operator()(
}
template <typename MapId>
-Method::Kind Transform<MapId>::operator()(stg::Method::Kind x) {
- switch (x) {
- case stg::Method::Kind::NON_VIRTUAL:
- return Method::NON_VIRTUAL;
- case stg::Method::Kind::STATIC:
- return Method::STATIC;
- case stg::Method::Kind::VIRTUAL:
- return Method::VIRTUAL;
- }
-}
-
-template <typename MapId>
StructUnion::Kind Transform<MapId>::operator()(stg::StructUnion::Kind x) {
switch (x) {
case stg::StructUnion::Kind::STRUCT:
@@ -463,12 +457,14 @@ class HexPrinter : public google::protobuf::TextFormat::FastFieldValuePrinter {
uint32_t value,
google::protobuf::TextFormat::BaseTextGenerator* generator) const override {
std::ostringstream os;
- os << "0x" << std::hex << std::setw(8) << std::setfill('0') << value;
+ // 0x01234567
+ os << std::showbase << std::hex << std::setfill('0') << std::internal
+ << std::setw(10) << value;
generator->PrintString(os.str());
}
};
-const uint32_t kWrittenFormatVersion = 1;
+const uint32_t kWrittenFormatVersion = 2;
} // namespace
diff --git a/proto_writer.h b/proto_writer.h
index 59e2b38..ac78111 100644
--- a/proto_writer.h
+++ b/proto_writer.h
@@ -23,7 +23,6 @@
#include <ostream>
#include "graph.h"
-#include "stg.pb.h"
namespace stg {
namespace proto {
diff --git a/reporting.cc b/reporting.cc
index 4813f59..5c3c02c 100644
--- a/reporting.cc
+++ b/reporting.cc
@@ -36,8 +36,8 @@
#include "comparison.h"
#include "error.h"
-#include "graph.h"
#include "fidelity.h"
+#include "graph.h"
#include "post_processing.h"
namespace stg {
@@ -430,23 +430,26 @@ void Report(const Reporting& reporting, const Comparison& comparison,
}
}
-void FidelityDiff(const stg::FidelityDiff& diff, std::ostream& output) {
- auto print_bucket = [&diff, &output](auto&& from, auto&& to) {
+bool FidelityDiff(const stg::FidelityDiff& diff, std::ostream& output) {
+ bool diffs_reported = false;
+ auto print_bucket = [&diff, &output, &diffs_reported](auto&& from,
+ auto&& to) {
auto transition = std::make_pair(from, to);
if constexpr (std::is_same_v<decltype(from), SymbolFidelity&&>) {
auto it = diff.symbol_transitions.find(transition);
if (it != diff.symbol_transitions.end()) {
PrintFidelityReportBucket(transition, it->second, output);
+ diffs_reported = true;
}
} else if constexpr (std::is_same_v<decltype(from), TypeFidelity&&>) {
auto it = diff.type_transitions.find(transition);
if (it != diff.type_transitions.end()) {
PrintFidelityReportBucket(transition, it->second, output);
+ diffs_reported = true;
}
}
};
- output << "SEVERITY: " << diff.severity << "\n\n";
print_bucket(TypeFidelity::FULLY_DEFINED, TypeFidelity::ABSENT);
print_bucket(TypeFidelity::DECLARATION_ONLY, TypeFidelity::ABSENT);
print_bucket(TypeFidelity::FULLY_DEFINED, TypeFidelity::DECLARATION_ONLY);
@@ -455,6 +458,7 @@ void FidelityDiff(const stg::FidelityDiff& diff, std::ostream& output) {
print_bucket(TypeFidelity::DECLARATION_ONLY, TypeFidelity::FULLY_DEFINED);
print_bucket(SymbolFidelity::UNTYPED, SymbolFidelity::TYPED);
print_bucket(TypeFidelity::ABSENT, TypeFidelity::FULLY_DEFINED);
+ return diffs_reported;
}
} // namespace reporting
diff --git a/reporting.h b/reporting.h
index 8291536..edee19e 100644
--- a/reporting.h
+++ b/reporting.h
@@ -21,9 +21,9 @@
#define STG_REPORTING_H_
#include <cstddef>
-#include <string_view>
#include <optional>
#include <ostream>
+#include <string_view>
#include "comparison.h"
#include "fidelity.h"
@@ -54,7 +54,7 @@ struct Reporting {
void Report(const Reporting&, const Comparison&, std::ostream&);
-void FidelityDiff(const stg::FidelityDiff&, std::ostream&);
+bool FidelityDiff(const stg::FidelityDiff&, std::ostream&);
} // namespace reporting
} // namespace stg
diff --git a/reporting_test.cc b/reporting_test.cc
index a33dfb3..f41db1b 100644
--- a/reporting_test.cc
+++ b/reporting_test.cc
@@ -36,10 +36,18 @@ std::string filename_to_path(const std::string& f) {
TEST_CASE("fidelity diff") {
stg::FidelityDiff diff = {
- .symbol_transitions = {{{SymbolFidelity::TYPED, SymbolFidelity::UNTYPED},
- {"symbol1", "symbol2"}},
- {{SymbolFidelity::UNTYPED, SymbolFidelity::TYPED},
- {"symbol3"}}},
+ .symbol_transitions =
+ {
+ {{SymbolFidelity::TYPED, SymbolFidelity::UNTYPED},
+ {"symbol1", "symbol2"}},
+ {{SymbolFidelity::UNTYPED, SymbolFidelity::TYPED}, {"symbol3"}},
+ {{SymbolFidelity::ABSENT, SymbolFidelity::UNTYPED},
+ {"symbol4", "symbol5"}},
+ {{SymbolFidelity::ABSENT, SymbolFidelity::TYPED}, {"symbol6"}},
+ {{SymbolFidelity::TYPED, SymbolFidelity::ABSENT},
+ {"symbol7", "symbol8"}},
+ {{SymbolFidelity::UNTYPED, SymbolFidelity::ABSENT}, {"symbol9"}},
+ },
.type_transitions =
{{{TypeFidelity::FULLY_DEFINED, TypeFidelity::ABSENT},
{"struct s1", "union u1"}},
@@ -53,11 +61,10 @@ TEST_CASE("fidelity diff") {
{"enum e2", "union u3"}},
{{TypeFidelity::ABSENT, TypeFidelity::FULLY_DEFINED},
{"struct s3"}}},
- .severity = FidelityDiffSeverity::WARN,
};
std::ostringstream report;
- reporting::FidelityDiff(diff, report);
+ CHECK(reporting::FidelityDiff(diff, report));
std::ifstream expected_report_file(filename_to_path("fidelity_diff_report"));
std::ostringstream expected_report;
diff --git a/stable_hash.cc b/stable_hash.cc
index 88253f9..8621aed 100644
--- a/stable_hash.cc
+++ b/stable_hash.cc
@@ -68,12 +68,15 @@ HashValue StableHash::operator()(Id id) {
return it->second;
}
-HashValue StableHash::operator()(const Void&) {
- return hash_("void");
-}
-
-HashValue StableHash::operator()(const Variadic&) {
- return hash_("variadic");
+HashValue StableHash::operator()(const Special& x) {
+ switch (x.kind) {
+ case Special::Kind::VOID:
+ return hash_("void");
+ case Special::Kind::VARIADIC:
+ return hash_("variadic");
+ case Special::Kind::NULLPTR:
+ return hash_("nullptr");
+ }
}
HashValue StableHash::operator()(const PointerReference& x) {
@@ -110,7 +113,7 @@ HashValue StableHash::operator()(const BaseClass& x) {
}
HashValue StableHash::operator()(const Method& x) {
- return hash_(x.mangled_name, static_cast<uint32_t>(x.kind));
+ return hash_(x.mangled_name);
}
HashValue StableHash::operator()(const Member& x) {
diff --git a/stable_hash.h b/stable_hash.h
index 4e76af0..1732610 100644
--- a/stable_hash.h
+++ b/stable_hash.h
@@ -35,8 +35,7 @@ class StableHash {
explicit StableHash(const Graph& graph) : graph_(graph) {}
HashValue operator()(Id);
- HashValue operator()(const Void&);
- HashValue operator()(const Variadic&);
+ HashValue operator()(const Special&);
HashValue operator()(const PointerReference&);
HashValue operator()(const PointerToMember&);
HashValue operator()(const Typedef&);
diff --git a/stg.cc b/stg.cc
index 77fc8ad..b56996b 100644
--- a/stg.cc
+++ b/stg.cc
@@ -31,13 +31,13 @@
#include "deduplication.h"
#include "error.h"
+#include "filter.h"
#include "fingerprint.h"
#include "graph.h"
#include "input.h"
#include "metrics.h"
#include "proto_writer.h"
#include "reader_options.h"
-#include "symbol_filter.h"
#include "type_resolution.h"
#include "unification.h"
@@ -55,26 +55,33 @@ struct GetInterface {
}
};
-Id Merge(Graph& graph, const std::vector<Id>& roots) {
+Id Merge(Graph& graph, const std::vector<Id>& roots, Metrics& metrics) {
+ // this rewrites the graph on destruction
+ Unification unification(graph, Id(0), metrics);
+ unification.Reserve(graph.Limit());
std::map<std::string, Id> symbols;
+ std::map<std::string, Id> types;
GetInterface get;
for (auto root : roots) {
const auto& interface = graph.Apply<Interface&>(get, root);
- // TODO: Implement merging interfaces with type roots.
- if (!interface.types.empty()) {
- Die() << "merging interfaces with type roots is not yet supported";
- }
for (const auto& x : interface.symbols) {
if (!symbols.insert(x).second) {
Die() << "merge failed with duplicate symbol: " << x.first;
}
}
+ // TODO: test type roots merge
+ for (const auto& x : interface.types) {
+ const auto [it, inserted] = types.insert(x);
+ if (!inserted && !unification.Unify(x.second, it->second)) {
+ Die() << "merge failed with type conflict: " << x.first;
+ }
+ }
graph.Remove(root);
}
- return graph.Add<Interface>(symbols);
+ return graph.Add<Interface>(symbols, types);
}
-void Filter(Graph& graph, Id root, const SymbolFilter& filter) {
+void FilterSymbols(Graph& graph, Id root, const Filter& filter) {
std::map<std::string, Id> symbols;
GetInterface get;
auto& interface = graph.Apply<Interface&>(get, root);
@@ -109,7 +116,8 @@ int main(int argc, char* argv[]) {
// Process arguments.
bool opt_metrics = false;
bool opt_keep_duplicates = false;
- std::unique_ptr<stg::SymbolFilter> opt_symbols;
+ std::unique_ptr<stg::Filter> opt_file_filter;
+ std::unique_ptr<stg::Filter> opt_symbol_filter;
stg::ReadOptions opt_read_options;
stg::InputFormat opt_input_format = stg::InputFormat::ABI;
std::vector<const char*> inputs;
@@ -119,7 +127,9 @@ int main(int argc, char* argv[]) {
{"info", no_argument, nullptr, 'i' },
{"keep-duplicates", no_argument, nullptr, 'd' },
{"types", no_argument, nullptr, 't' },
+ {"file-filter", required_argument, nullptr, 'F' },
{"symbols", required_argument, nullptr, 'S' },
+ {"symbol-filter", required_argument, nullptr, 'S' },
{"abi", no_argument, nullptr, 'a' },
{"btf", no_argument, nullptr, 'b' },
{"elf", no_argument, nullptr, 'e' },
@@ -134,17 +144,18 @@ int main(int argc, char* argv[]) {
<< " [-i|--info]\n"
<< " [-d|--keep-duplicates]\n"
<< " [-t|--types]\n"
- << " [-S|--symbols <filter>]\n"
+ << " [-F|--file-filter <filter>]\n"
+ << " [-S|--symbols|--symbol-filter <filter>]\n"
<< " [--skip-dwarf]\n"
<< " [-a|--abi|-b|--btf|-e|--elf|-s|--stg] [file] ...\n"
<< " [{-o|--output} {filename|-}] ...\n"
<< "implicit defaults: --abi\n";
- stg::SymbolFilterUsage(std::cerr);
+ stg::FilterUsage(std::cerr);
return 1;
};
while (true) {
int ix;
- const int c = getopt_long(argc, argv, "-midtS:abeso:", opts, &ix);
+ const int c = getopt_long(argc, argv, "-midtS:F:abeso:", opts, &ix);
if (c == -1) {
break;
}
@@ -162,8 +173,11 @@ int main(int argc, char* argv[]) {
case 't':
opt_read_options.Set(stg::ReadOptions::TYPE_ROOTS);
break;
+ case 'F':
+ opt_file_filter = stg::MakeFilter(argument);
+ break;
case 'S':
- opt_symbols = stg::MakeSymbolFilter(argument);
+ opt_symbol_filter = stg::MakeFilter(argument);
break;
case 'a':
opt_input_format = stg::InputFormat::ABI;
@@ -201,15 +215,18 @@ int main(int argc, char* argv[]) {
roots.reserve(inputs.size());
for (auto input : inputs) {
roots.push_back(stg::Read(graph, opt_input_format, input,
- opt_read_options, metrics));
+ opt_read_options, opt_file_filter,
+ metrics));
}
- stg::Id root = roots.size() == 1 ? roots[0] : stg::Merge(graph, roots);
- if (opt_symbols) {
- stg::Filter(graph, root, *opt_symbols);
+ stg::Id root =
+ roots.size() == 1 ? roots[0] : stg::Merge(graph, roots, metrics);
+ if (opt_symbol_filter) {
+ stg::FilterSymbols(graph, root, *opt_symbol_filter);
}
if (!opt_keep_duplicates) {
{
- stg::Unification unification(graph, metrics);
+ stg::Unification unification(graph, stg::Id(0), metrics);
+ unification.Reserve(graph.Limit());
stg::ResolveTypes(graph, unification, {root}, metrics);
unification.Update(root);
}
diff --git a/stg.proto b/stg.proto
index 675468a..0937299 100644
--- a/stg.proto
+++ b/stg.proto
@@ -42,14 +42,28 @@ syntax = "proto3";
package stg.proto;
+// deprecated
message Void {
fixed32 id = 1;
}
+// deprecated
message Variadic {
fixed32 id = 1;
}
+message Special {
+ enum Kind {
+ KIND_UNSPECIFIED = 0;
+ VOID = 1;
+ VARIADIC = 2;
+ NULLPTR = 3;
+ }
+
+ fixed32 id = 1;
+ Kind kind = 2;
+}
+
message PointerReference {
enum Kind {
KIND_UNSPECIFIED = 0;
@@ -129,19 +143,11 @@ message BaseClass {
}
message Method {
- enum Kind {
- KIND_UNSPECIFIED = 0;
- NON_VIRTUAL = 1;
- STATIC = 2;
- VIRTUAL = 3;
- }
-
fixed32 id = 1;
string mangled_name = 2;
string name = 3;
- Kind kind = 4;
- optional uint64 vtable_offset = 5;
- fixed32 type_id = 6;
+ uint64 vtable_offset = 4;
+ fixed32 type_id = 5;
}
message Member {
@@ -252,19 +258,20 @@ message STG {
fixed32 root_id = 2;
repeated Void void = 3;
repeated Variadic variadic = 4;
- repeated PointerReference pointer_reference = 5;
- repeated PointerToMember pointer_to_member = 6;
- repeated Typedef typedef = 7;
- repeated Qualified qualified = 8;
- repeated Primitive primitive = 9;
- repeated Array array = 10;
- repeated BaseClass base_class = 11;
- repeated Method method = 12;
- repeated Member member = 13;
- repeated StructUnion struct_union = 14;
- repeated Enumeration enumeration = 15;
- repeated Function function = 16;
- repeated ElfSymbol elf_symbol = 17;
- repeated Symbols symbols = 18;
- repeated Interface interface = 19;
+ repeated Special special = 5;
+ repeated PointerReference pointer_reference = 6;
+ repeated PointerToMember pointer_to_member = 7;
+ repeated Typedef typedef = 8;
+ repeated Qualified qualified = 9;
+ repeated Primitive primitive = 10;
+ repeated Array array = 11;
+ repeated BaseClass base_class = 12;
+ repeated Method method = 13;
+ repeated Member member = 14;
+ repeated StructUnion struct_union = 15;
+ repeated Enumeration enumeration = 16;
+ repeated Function function = 17;
+ repeated ElfSymbol elf_symbol = 18;
+ repeated Symbols symbols = 19;
+ repeated Interface interface = 20;
}
diff --git a/stgdiff.cc b/stgdiff.cc
index b712a47..c256e4b 100644
--- a/stgdiff.cc
+++ b/stgdiff.cc
@@ -54,7 +54,8 @@ std::vector<stg::Id> Read(const Inputs& inputs, stg::Graph& graph,
stg::ReadOptions options, stg::Metrics& metrics) {
std::vector<stg::Id> roots;
for (const auto& [format, filename] : inputs) {
- roots.push_back(stg::Read(graph, format, filename, options, metrics));
+ roots.push_back(stg::Read(graph, format, filename, options, nullptr,
+ metrics));
}
return roots;
}
@@ -62,15 +63,15 @@ std::vector<stg::Id> Read(const Inputs& inputs, stg::Graph& graph,
int RunFidelity(const char* filename, const stg::Graph& graph,
const std::vector<stg::Id>& roots) {
std::ofstream output(filename);
- auto fidelity_diff = stg::GetFidelityTransitions(graph, roots[0], roots[1]);
- stg::reporting::FidelityDiff(fidelity_diff, output);
+ const auto fidelity_diff =
+ stg::GetFidelityTransitions(graph, roots[0], roots[1]);
+ const bool diffs_reported =
+ stg::reporting::FidelityDiff(fidelity_diff, output);
output << std::flush;
if (!output) {
stg::Die() << "error writing to " << '\'' << filename << '\'';
}
- return fidelity_diff.severity == stg::FidelityDiffSeverity::WARN
- ? kFidelityChange
- : 0;
+ return diffs_reported ? kFidelityChange : 0;
}
int RunExact(const Inputs& inputs, stg::ReadOptions options,
diff --git a/stgdiff_test.cc b/stgdiff_test.cc
index 7ae2da2..72c63d8 100644
--- a/stgdiff_test.cc
+++ b/stgdiff_test.cc
@@ -25,6 +25,7 @@
#include <string>
#include <catch2/catch.hpp>
+#include "comparison.h"
#include "graph.h"
#include "input.h"
#include "metrics.h"
@@ -52,7 +53,7 @@ stg::Id Read(stg::Graph& graph, stg::InputFormat format,
const std::string& input, stg::Metrics& metrics) {
const stg::ReadOptions opt_read_options(stg::ReadOptions::SKIP_DWARF);
return stg::Read(graph, format, filename_to_path(input).c_str(),
- opt_read_options, metrics);
+ opt_read_options, nullptr, metrics);
}
TEST_CASE("ignore") {
@@ -166,22 +167,31 @@ TEST_CASE("ignore") {
"empty",
true}),
IgnoreTestCase(
- {"interface addition",
+ {"CRC change",
stg::InputFormat::STG,
- "interface_addition_0.stg",
+ "crc_change_0.stg",
stg::InputFormat::STG,
- "interface_addition_1.stg",
+ "crc_change_1.stg",
stg::Ignore(),
- "interface_addition_small_diff",
+ "crc_change_small_diff",
false}),
IgnoreTestCase(
- {"type addition",
+ {"CRC change ignored",
stg::InputFormat::STG,
- "type_addition_0.stg",
+ "crc_change_0.stg",
stg::InputFormat::STG,
- "type_addition_1.stg",
+ "crc_change_1.stg",
+ stg::Ignore(stg::Ignore::SYMBOL_CRC),
+ "empty",
+ true}),
+ IgnoreTestCase(
+ {"interface addition",
+ stg::InputFormat::STG,
+ "interface_addition_0.stg",
+ stg::InputFormat::STG,
+ "interface_addition_1.stg",
stg::Ignore(),
- "type_addition_small_diff",
+ "interface_addition_small_diff",
false}),
IgnoreTestCase(
{"interface addition ignored",
@@ -193,6 +203,15 @@ TEST_CASE("ignore") {
"empty",
true}),
IgnoreTestCase(
+ {"type addition",
+ stg::InputFormat::STG,
+ "type_addition_0.stg",
+ stg::InputFormat::STG,
+ "type_addition_1.stg",
+ stg::Ignore(),
+ "type_addition_small_diff",
+ false}),
+ IgnoreTestCase(
{"type addition ignored",
stg::InputFormat::STG,
"type_addition_0.stg",
@@ -202,21 +221,21 @@ TEST_CASE("ignore") {
"empty",
true}),
IgnoreTestCase(
- {"CRC change",
+ {"type definition addition",
stg::InputFormat::STG,
- "crc_change_0.stg",
+ "type_addition_1.stg",
stg::InputFormat::STG,
- "crc_change_1.stg",
+ "type_addition_2.stg",
stg::Ignore(),
- "crc_change_small_diff",
+ "type_definition_addition_small_diff",
false}),
IgnoreTestCase(
- {"CRC change ignored",
+ {"type definition addition ignored",
stg::InputFormat::STG,
- "crc_change_0.stg",
+ "type_addition_1.stg",
stg::InputFormat::STG,
- "crc_change_1.stg",
- stg::Ignore(stg::Ignore::SYMBOL_CRC),
+ "type_addition_2.stg",
+ stg::Ignore(stg::Ignore::TYPE_DEFINITION_ADDITION),
"empty",
true})
);
diff --git a/stginfo.cc b/stginfo.cc
index 30fe125..4e0c98a 100644
--- a/stginfo.cc
+++ b/stginfo.cc
@@ -83,7 +83,8 @@ int main(int argc, char* const argv[]) {
try {
stg::Graph graph;
stg::Metrics metrics;
- (void)stg::Read(graph, format, filename, opt_read_options, metrics);
+ (void)stg::Read(graph, format, filename, opt_read_options, nullptr,
+ metrics);
} catch (const stg::Exception& e) {
std::cerr << e.what() << '\n';
return 1;
diff --git a/substitution.h b/substitution.h
index ebebd4c..5d3ba55 100644
--- a/substitution.h
+++ b/substitution.h
@@ -62,9 +62,7 @@ struct Substitute {
return graph.Apply<void>(*this, id);
}
- void operator()(Void&) {}
-
- void operator()(Variadic&) {}
+ void operator()(Special&) {}
void operator()(PointerReference& x) {
Update(x.pointee_type_id);
diff --git a/testdata/abigail_bad_elf_dwarf_link_0.xml b/testdata/abigail_bad_elf_dwarf_link_0.xml
new file mode 100644
index 0000000..3b0cb31
--- /dev/null
+++ b/testdata/abigail_bad_elf_dwarf_link_0.xml
@@ -0,0 +1,26 @@
+<!-- XML without the zero size object aliasing issue -->
+<abi-corpus version='2.1' path='vmlinux' architecture='elf-arm-aarch64'>
+ <elf-variable-symbols>
+ <elf-symbol name='vm_node_stat' size='256' type='object-type' binding='global-binding' visibility='default-visibility' is-defined='yes' crc='0xe5c4cf93'/>
+ </elf-variable-symbols>
+ <abi-instr address-size='64' path='mm/vmstat.c' language='LANG_C89'>
+ <type-decl name='long long int' size-in-bits='64' id='1eb56b1e'/>
+ <typedef-decl name='__s64' type-id='1eb56b1e' filepath='include/uapi/asm-generic/int-ll64.h' line='30' column='1' id='49659421'/>
+ <typedef-decl name='s64' type-id='49659421' filepath='include/asm-generic/int-ll64.h' line='22' column='1' id='9b7c55ef'/>
+ <class-decl name='atomic64_t' size-in-bits='64' is-struct='yes' naming-typedef-id='28ee064c' visibility='default' filepath='include/linux/types.h' line='176' column='1' id='2fef71f1'>
+ <data-member access='public' layout-offset-in-bits='0'>
+ <var-decl name='counter' type-id='9b7c55ef' visibility='default' filepath='include/linux/types.h' line='177' column='1'/>
+ </data-member>
+ </class-decl>
+ <typedef-decl name='atomic64_t' type-id='2fef71f1' filepath='include/linux/types.h' line='178' column='1' id='28ee064c'/>
+ <typedef-decl name='atomic_long_t' type-id='28ee064c' filepath='include/asm-generic/atomic-long.h' line='12' column='1' id='f22a8abb'/>
+ <array-type-def dimensions='1' type-id='f22a8abb' size-in-bits='infinite' id='a922812c'>
+ <subrange length='infinite' type-id='7ff19f0f' id='031f2035'/>
+ </array-type-def>
+ <array-type-def dimensions='1' type-id='f22a8abb' size-in-bits='2048' id='99d12d7b'>
+ <subrange length='32' type-id='7ff19f0f' id='ae5bde82'/>
+ </array-type-def>
+ <var-decl name='vm_numa_stat' type-id='a922812c' mangled-name='vm_numa_stat' visibility='default' filepath='mm/vmstat.c' line='164' column='1' elf-symbol-id='vm_numa_stat'/>
+ <var-decl name='vm_node_stat' type-id='99d12d7b' mangled-name='vm_node_stat' visibility='default' filepath='mm/vmstat.c' line='165' column='1' elf-symbol-id='vm_node_stat'/>
+ </abi-instr>
+</abi-corpus>
diff --git a/testdata/abigail_bad_elf_dwarf_link_1.xml b/testdata/abigail_bad_elf_dwarf_link_1.xml
new file mode 100644
index 0000000..b8ab134
--- /dev/null
+++ b/testdata/abigail_bad_elf_dwarf_link_1.xml
@@ -0,0 +1,26 @@
+<!-- XML with the zero size object aliasing issue, mangled-name matches name -->
+<abi-corpus version='2.1' path='vmlinux' architecture='elf-arm-aarch64'>
+ <elf-variable-symbols>
+ <elf-symbol name='vm_node_stat' size='256' type='object-type' binding='global-binding' visibility='default-visibility' is-defined='yes' crc='0xe5c4cf93'/>
+ </elf-variable-symbols>
+ <abi-instr address-size='64' path='mm/vmstat.c' language='LANG_C89'>
+ <type-decl name='long long int' size-in-bits='64' id='1eb56b1e'/>
+ <typedef-decl name='__s64' type-id='1eb56b1e' filepath='include/uapi/asm-generic/int-ll64.h' line='30' column='1' id='49659421'/>
+ <typedef-decl name='s64' type-id='49659421' filepath='include/asm-generic/int-ll64.h' line='22' column='1' id='9b7c55ef'/>
+ <class-decl name='atomic64_t' size-in-bits='64' is-struct='yes' naming-typedef-id='28ee064c' visibility='default' filepath='include/linux/types.h' line='176' column='1' id='2fef71f1'>
+ <data-member access='public' layout-offset-in-bits='0'>
+ <var-decl name='counter' type-id='9b7c55ef' visibility='default' filepath='include/linux/types.h' line='177' column='1'/>
+ </data-member>
+ </class-decl>
+ <typedef-decl name='atomic64_t' type-id='2fef71f1' filepath='include/linux/types.h' line='178' column='1' id='28ee064c'/>
+ <typedef-decl name='atomic_long_t' type-id='28ee064c' filepath='include/asm-generic/atomic-long.h' line='12' column='1' id='f22a8abb'/>
+ <array-type-def dimensions='1' type-id='f22a8abb' size-in-bits='infinite' id='a922812c'>
+ <subrange length='infinite' type-id='7ff19f0f' id='031f2035'/>
+ </array-type-def>
+ <array-type-def dimensions='1' type-id='f22a8abb' size-in-bits='2048' id='99d12d7b'>
+ <subrange length='32' type-id='7ff19f0f' id='ae5bde82'/>
+ </array-type-def>
+ <var-decl name='vm_numa_stat' type-id='a922812c' mangled-name='vm_numa_stat' visibility='default' filepath='mm/vmstat.c' line='164' column='1' elf-symbol-id='vm_node_stat'/>
+ <var-decl name='vm_node_stat' type-id='99d12d7b' mangled-name='vm_node_stat' visibility='default' filepath='mm/vmstat.c' line='165' column='1' elf-symbol-id='vm_node_stat'/>
+ </abi-instr>
+</abi-corpus>
diff --git a/testdata/abigail_bad_elf_dwarf_link_2.xml b/testdata/abigail_bad_elf_dwarf_link_2.xml
new file mode 100644
index 0000000..b3a8c81
--- /dev/null
+++ b/testdata/abigail_bad_elf_dwarf_link_2.xml
@@ -0,0 +1,26 @@
+<!-- XML with the zero size object aliasing issue, mangled-name matches elf-symbol-id -->
+<abi-corpus version='2.1' path='vmlinux' architecture='elf-arm-aarch64'>
+ <elf-variable-symbols>
+ <elf-symbol name='vm_node_stat' size='256' type='object-type' binding='global-binding' visibility='default-visibility' is-defined='yes' crc='0xe5c4cf93'/>
+ </elf-variable-symbols>
+ <abi-instr address-size='64' path='mm/vmstat.c' language='LANG_C89'>
+ <type-decl name='long long int' size-in-bits='64' id='1eb56b1e'/>
+ <typedef-decl name='__s64' type-id='1eb56b1e' filepath='include/uapi/asm-generic/int-ll64.h' line='30' column='1' id='49659421'/>
+ <typedef-decl name='s64' type-id='49659421' filepath='include/asm-generic/int-ll64.h' line='22' column='1' id='9b7c55ef'/>
+ <class-decl name='atomic64_t' size-in-bits='64' is-struct='yes' naming-typedef-id='28ee064c' visibility='default' filepath='include/linux/types.h' line='176' column='1' id='2fef71f1'>
+ <data-member access='public' layout-offset-in-bits='0'>
+ <var-decl name='counter' type-id='9b7c55ef' visibility='default' filepath='include/linux/types.h' line='177' column='1'/>
+ </data-member>
+ </class-decl>
+ <typedef-decl name='atomic64_t' type-id='2fef71f1' filepath='include/linux/types.h' line='178' column='1' id='28ee064c'/>
+ <typedef-decl name='atomic_long_t' type-id='28ee064c' filepath='include/asm-generic/atomic-long.h' line='12' column='1' id='f22a8abb'/>
+ <array-type-def dimensions='1' type-id='f22a8abb' size-in-bits='infinite' id='a922812c'>
+ <subrange length='infinite' type-id='7ff19f0f' id='031f2035'/>
+ </array-type-def>
+ <array-type-def dimensions='1' type-id='f22a8abb' size-in-bits='2048' id='99d12d7b'>
+ <subrange length='32' type-id='7ff19f0f' id='ae5bde82'/>
+ </array-type-def>
+ <var-decl name='vm_numa_stat' type-id='a922812c' mangled-name='vm_node_stat' visibility='default' filepath='mm/vmstat.c' line='164' column='1' elf-symbol-id='vm_node_stat'/>
+ <var-decl name='vm_node_stat' type-id='99d12d7b' mangled-name='vm_node_stat' visibility='default' filepath='mm/vmstat.c' line='165' column='1' elf-symbol-id='vm_node_stat'/>
+ </abi-instr>
+</abi-corpus>
diff --git a/testdata/abigail_duplicate_data_members_1.xml b/testdata/abigail_duplicate_data_members_1.xml
index 9c287bc..e750ecb 100644
--- a/testdata/abigail_duplicate_data_members_1.xml
+++ b/testdata/abigail_duplicate_data_members_1.xml
@@ -15,6 +15,9 @@
<data-member access='public' layout-offset-in-bits='32'>
<var-decl name='' type-id='e7f43f72' visibility='default'/>
</data-member>
+ <data-member access='public' layout-offset-in-bits='32'>
+ <var-decl name='' type-id='e7f43f72' visibility='default'/>
+ </data-member>
</class-decl>
<class-decl name='__anonymous_struct__' size-in-bits='32' is-struct='yes' is-anonymous='yes' visibility='default' id='e7f43f72'>
<data-member access='public' layout-offset-in-bits='0'>
diff --git a/testdata/crc_change_0.stg b/testdata/crc_change_0.stg
index 80057d5..d817a53 100644
--- a/testdata/crc_change_0.stg
+++ b/testdata/crc_change_0.stg
@@ -1,4 +1,4 @@
-version: 0x00000001
+version: 0x00000002
root_id: 0x84ea5130
primitive {
id: 0x6720d32f
diff --git a/testdata/crc_change_1.stg b/testdata/crc_change_1.stg
index fc73858..d4cc7b4 100644
--- a/testdata/crc_change_1.stg
+++ b/testdata/crc_change_1.stg
@@ -1,4 +1,4 @@
-version: 0x00000001
+version: 0x00000002
root_id: 0x84ea5130
primitive {
id: 0x6720d32f
diff --git a/testdata/enum_underlying_type_0.stg b/testdata/enum_underlying_type_0.stg
index a86a6bc..e66f6e6 100644
--- a/testdata/enum_underlying_type_0.stg
+++ b/testdata/enum_underlying_type_0.stg
@@ -1,6 +1,7 @@
-root_id: 0x25a8b173
+version: 0x00000002
+root_id: 0x84ea5130
primitive {
- id: 0x4585663f
+ id: 0xa08f335b
name: "bob"
encoding: UNSIGNED_INTEGER
bytesize: 0x00000004
@@ -8,7 +9,7 @@ primitive {
enumeration {
id: 0x64775884
definition {
- underlying_type_id: 0x4585663f
+ underlying_type_id: 0xa08f335b
enumerator {
name: "a"
value: 1
@@ -23,10 +24,7 @@ elf_symbol {
type_id: 0x64775884
full_name: "x"
}
-symbols {
- id: 0x25a8b173
- symbol {
- key: "x"
- value: 0x7709bd40
- }
+interface {
+ id: 0x84ea5130
+ symbol_id: 0x7709bd40
}
diff --git a/testdata/enum_underlying_type_1.stg b/testdata/enum_underlying_type_1.stg
index e3c444b..e60714d 100644
--- a/testdata/enum_underlying_type_1.stg
+++ b/testdata/enum_underlying_type_1.stg
@@ -1,6 +1,7 @@
-root_id: 0x25a8b173
+version: 0x00000002
+root_id: 0x84ea5130
primitive {
- id: 0x4585663f
+ id: 0xba1f26f4
name: "alice"
encoding: SIGNED_INTEGER
bytesize: 0x00000002
@@ -8,7 +9,7 @@ primitive {
enumeration {
id: 0x64775884
definition {
- underlying_type_id: 0x4585663f
+ underlying_type_id: 0xba1f26f4
enumerator {
name: "a"
value: 1
@@ -23,10 +24,7 @@ elf_symbol {
type_id: 0x64775884
full_name: "x"
}
-symbols {
- id: 0x25a8b173
- symbol {
- key: "x"
- value: 0x7709bd40
- }
+interface {
+ id: 0x84ea5130
+ symbol_id: 0x7709bd40
}
diff --git a/testdata/fidelity_diff_0.stg b/testdata/fidelity_diff_0.stg
index 061f4df..0228073 100644
--- a/testdata/fidelity_diff_0.stg
+++ b/testdata/fidelity_diff_0.stg
@@ -1,5 +1,7 @@
+version: 0x00000002
+root_id: 0x84ea5130
struct_union {
- id: 11
+ id: 0x4fb72e88
kind: STRUCT
name: "s1"
definition {
@@ -7,12 +9,12 @@ struct_union {
}
}
struct_union {
- id: 12
+ id: 0x66321df5
kind: STRUCT
name: "s2"
}
struct_union {
- id: 14
+ id: 0x23ff4eea
kind: UNION
name: "u1"
definition {
@@ -20,81 +22,69 @@ struct_union {
}
}
struct_union {
- id: 15
+ id: 0xbb5afcc9
kind: UNION
name: "u3"
}
enumeration {
- id: 16
+ id: 0xb5790be1
name: "e1"
definition {
+ underlying_type_id: 0x84ea5130
}
}
enumeration {
- id: 17
+ id: 0x39cb53cc
name: "e2"
}
elf_symbol {
- id: 1
+ id: 0x99f1fb6e
name: "symbol1"
symbol_type: OBJECT
- binding: GLOBAL
- visibility: DEFAULT
- type_id: 11
+ type_id: 0x4fb72e88
}
elf_symbol {
- id: 2
+ id: 0xd9b0a065
name: "symbol2"
symbol_type: OBJECT
- binding: GLOBAL
- visibility: DEFAULT
- type_id: 12
+ type_id: 0x66321df5
}
elf_symbol {
- id: 3
+ id: 0x1b7105d1
name: "symbol3"
symbol_type: OBJECT
- binding: GLOBAL
- visibility: DEFAULT
}
elf_symbol {
- id: 4
+ id: 0x5733f9fa
name: "symbol4"
symbol_type: OBJECT
- binding: GLOBAL
- visibility: DEFAULT
- type_id: 14
+ type_id: 0x23ff4eea
}
elf_symbol {
- id: 5
+ id: 0x96f0448d
name: "symbol5"
symbol_type: OBJECT
- binding: GLOBAL
- visibility: DEFAULT
- type_id: 15
+ type_id: 0xbb5afcc9
}
elf_symbol {
- id: 6
+ id: 0xd4b3326a
name: "symbol6"
symbol_type: OBJECT
- binding: GLOBAL
- visibility: DEFAULT
- type_id: 16
+ type_id: 0xb5790be1
}
elf_symbol {
- id: 7
+ id: 0x14739ab8
name: "symbol7"
symbol_type: OBJECT
- binding: GLOBAL
- visibility: DEFAULT
- type_id: 17
+ type_id: 0x39cb53cc
}
-symbols {
- symbol {key: "symbol1" value: 1}
- symbol {key: "symbol2" value: 2}
- symbol {key: "symbol3" value: 3}
- symbol {key: "symbol4" value: 4}
- symbol {key: "symbol5" value: 5}
- symbol {key: "symbol6" value: 6}
- symbol {key: "symbol7" value: 7}
+interface {
+ id: 0x84ea5130
+ symbol_id: 0x99f1fb6e
+ symbol_id: 0xd9b0a065
+ symbol_id: 0x1b7105d1
+ symbol_id: 0x5733f9fa
+ symbol_id: 0x96f0448d
+ symbol_id: 0xd4b3326a
+ symbol_id: 0x14739ab8
}
diff --git a/testdata/fidelity_diff_1.stg b/testdata/fidelity_diff_1.stg
index c9b9a75..0d74c98 100644
--- a/testdata/fidelity_diff_1.stg
+++ b/testdata/fidelity_diff_1.stg
@@ -1,5 +1,7 @@
+version: 0x00000002
+root_id: 0x84ea5130
struct_union {
- id: 13
+ id: 0x82c2ca27
kind: STRUCT
name: "s3"
definition {
@@ -7,12 +9,12 @@ struct_union {
}
}
struct_union {
- id: 14
+ id: 0x7a129a94
kind: UNION
name: "u2"
}
struct_union {
- id: 15
+ id: 0x2099f798
kind: UNION
name: "u3"
definition {
@@ -20,75 +22,63 @@ struct_union {
}
}
enumeration {
- id: 16
+ id: 0xf80afce3
+ name: "e1"
+}
+enumeration {
+ id: 0x75396efe
name: "e2"
definition {
+ underlying_type_id: 0x84ea5130
}
}
-enumeration {
- id: 17
- name: "e1"
-}
elf_symbol {
- id: 1
+ id: 0x99f1fb6e
name: "symbol1"
symbol_type: OBJECT
- binding: GLOBAL
- visibility: DEFAULT
}
elf_symbol {
- id: 2
+ id: 0xd9b0a065
name: "symbol2"
symbol_type: OBJECT
- binding: GLOBAL
- visibility: DEFAULT
}
elf_symbol {
- id: 3
+ id: 0x1b7105d1
name: "symbol3"
symbol_type: OBJECT
- binding: GLOBAL
- visibility: DEFAULT
- type_id: 13
+ type_id: 0x82c2ca27
}
elf_symbol {
- id: 4
+ id: 0x5733f9fa
name: "symbol4"
symbol_type: OBJECT
- binding: GLOBAL
- visibility: DEFAULT
- type_id: 14
+ type_id: 0x7a129a94
}
elf_symbol {
- id: 5
+ id: 0x96f0448d
name: "symbol5"
symbol_type: OBJECT
- binding: GLOBAL
- visibility: DEFAULT
- type_id: 15
+ type_id: 0x2099f798
}
elf_symbol {
- id: 6
+ id: 0xd4b3326a
name: "symbol6"
symbol_type: OBJECT
- binding: GLOBAL
- visibility: DEFAULT
- type_id: 16
+ type_id: 0x75396efe
}
elf_symbol {
- id: 7
+ id: 0x14739ab8
name: "symbol7"
symbol_type: OBJECT
- binding: GLOBAL
- visibility: DEFAULT
- type_id: 17
+ type_id: 0xf80afce3
}
-symbols {
- symbol {key: "symbol1" value: 1}
- symbol {key: "symbol2" value: 2}
- symbol {key: "symbol3" value: 3}
- symbol {key: "symbol4" value: 4}
- symbol {key: "symbol5" value: 5}
- symbol {key: "symbol6" value: 6}
- symbol {key: "symbol7" value: 7}
+interface {
+ id: 0x84ea5130
+ symbol_id: 0x99f1fb6e
+ symbol_id: 0xd9b0a065
+ symbol_id: 0x1b7105d1
+ symbol_id: 0x5733f9fa
+ symbol_id: 0x96f0448d
+ symbol_id: 0xd4b3326a
+ symbol_id: 0x14739ab8
}
diff --git a/testdata/fidelity_diff_report b/testdata/fidelity_diff_report
index 8a9d888..b21f3e8 100644
--- a/testdata/fidelity_diff_report
+++ b/testdata/fidelity_diff_report
@@ -1,5 +1,3 @@
-SEVERITY: WARN
-
2 type(s) changed from FULLY_DEFINED to ABSENT:
struct s1
union u1
diff --git a/testdata/interface_addition_0.stg b/testdata/interface_addition_0.stg
index 2071972..e91b30c 100644
--- a/testdata/interface_addition_0.stg
+++ b/testdata/interface_addition_0.stg
@@ -1,4 +1,4 @@
-version: 0x00000001
+version: 0x00000002
root_id: 0x84ea5130
interface {
id: 0x84ea5130
diff --git a/testdata/interface_addition_1.stg b/testdata/interface_addition_1.stg
index 1ad5ece..dc9363f 100644
--- a/testdata/interface_addition_1.stg
+++ b/testdata/interface_addition_1.stg
@@ -1,4 +1,4 @@
-version: 0x00000001
+version: 0x00000002
root_id: 0x84ea5130
primitive {
id: 0x6720d32f
diff --git a/testdata/member_size_0.stg b/testdata/member_size_0.stg
index 9b90b2e..838e97b 100644
--- a/testdata/member_size_0.stg
+++ b/testdata/member_size_0.stg
@@ -1,4 +1,5 @@
-root_id: 0x25a8b173
+version: 0x00000002
+root_id: 0x84ea5130
primitive {
id: 0x6720d32f
name: "int"
@@ -27,10 +28,7 @@ elf_symbol {
type_id: 0x0eecb660
full_name: "x"
}
-symbols {
- id: 0x25a8b173
- symbol {
- key: "x"
- value: 0x7709bd40
- }
+interface {
+ id: 0x84ea5130
+ symbol_id: 0x7709bd40
}
diff --git a/testdata/member_size_1.stg b/testdata/member_size_1.stg
index d885a46..95bd0b7 100644
--- a/testdata/member_size_1.stg
+++ b/testdata/member_size_1.stg
@@ -1,4 +1,5 @@
-root_id: 0x25a8b173
+version: 0x00000002
+root_id: 0x84ea5130
primitive {
id: 0x6720d32f
name: "int"
@@ -6,17 +7,17 @@ primitive {
bytesize: 0x00000004
}
member {
- id: 0x5a03caec
+ id: 0xf4e7319a
name: "member"
type_id: 0x6720d32f
bitsize: 2
}
struct_union {
- id: 0x0eecb660
+ id: 0x255588bd
kind: STRUCT
definition {
bytesize: 4
- member_id: 0x5a03caec
+ member_id: 0xf4e7319a
}
}
elf_symbol {
@@ -24,13 +25,10 @@ elf_symbol {
name: "x"
is_defined: true
symbol_type: OBJECT
- type_id: 0x0eecb660
+ type_id: 0x255588bd
full_name: "x"
}
-symbols {
- id: 0x25a8b173
- symbol {
- key: "x"
- value: 0x7709bd40
- }
+interface {
+ id: 0x84ea5130
+ symbol_id: 0x7709bd40
}
diff --git a/testdata/primitive_type_encoding_0.stg b/testdata/primitive_type_encoding_0.stg
index 0f53e1c..55403a8 100644
--- a/testdata/primitive_type_encoding_0.stg
+++ b/testdata/primitive_type_encoding_0.stg
@@ -1,6 +1,7 @@
-root_id: 0x25a8b173
+version: 0x00000002
+root_id: 0x84ea5130
primitive {
- id: 0x6720d32f
+ id: 0xa08f335b
name: "bob"
encoding: SIGNED_INTEGER
bytesize: 0x00000004
@@ -10,13 +11,10 @@ elf_symbol {
name: "x"
is_defined: true
symbol_type: OBJECT
- type_id: 0x6720d32f
+ type_id: 0xa08f335b
full_name: "x"
}
-symbols {
- id: 0x25a8b173
- symbol {
- key: "x"
- value: 0x7709bd40
- }
+interface {
+ id: 0x84ea5130
+ symbol_id: 0x7709bd40
}
diff --git a/testdata/primitive_type_encoding_1.stg b/testdata/primitive_type_encoding_1.stg
index be543a9..365c3a1 100644
--- a/testdata/primitive_type_encoding_1.stg
+++ b/testdata/primitive_type_encoding_1.stg
@@ -1,6 +1,7 @@
-root_id: 0x25a8b173
+version: 0x00000002
+root_id: 0x84ea5130
primitive {
- id: 0x6720d32f
+ id: 0xa08f335b
name: "bob"
encoding: UNSIGNED_INTEGER
bytesize: 0x00000004
@@ -10,13 +11,10 @@ elf_symbol {
name: "x"
is_defined: true
symbol_type: OBJECT
- type_id: 0x6720d32f
+ type_id: 0xa08f335b
full_name: "x"
}
-symbols {
- id: 0x25a8b173
- symbol {
- key: "x"
- value: 0x7709bd40
- }
+interface {
+ id: 0x84ea5130
+ symbol_id: 0x7709bd40
}
diff --git a/testdata/qualifier_0.stg b/testdata/qualifier_0.stg
index 0e9b75c..05328ef 100644
--- a/testdata/qualifier_0.stg
+++ b/testdata/qualifier_0.stg
@@ -1,4 +1,5 @@
-root_id: 0x25a8b173
+version: 0x00000002
+root_id: 0x84ea5130
qualified {
id: 0xc5d9d969
qualifier: CONST
@@ -18,10 +19,7 @@ elf_symbol {
type_id: 0xc5d9d969
full_name: "x"
}
-symbols {
- id: 0x25a8b173
- symbol {
- key: "x"
- value: 0x7709bd40
- }
+interface {
+ id: 0x84ea5130
+ symbol_id: 0x7709bd40
}
diff --git a/testdata/qualifier_1.stg b/testdata/qualifier_1.stg
index ea808b4..cf7c30f 100644
--- a/testdata/qualifier_1.stg
+++ b/testdata/qualifier_1.stg
@@ -1,6 +1,7 @@
-root_id: 0x25a8b173
+version: 0x00000002
+root_id: 0x84ea5130
qualified {
- id: 0xc5d9d969
+ id: 0x8dde4646
qualifier: VOLATILE
qualified_type_id: 0x6720d32f
}
@@ -15,13 +16,10 @@ elf_symbol {
name: "x"
is_defined: true
symbol_type: OBJECT
- type_id: 0xc5d9d969
+ type_id: 0x8dde4646
full_name: "x"
}
-symbols {
- id: 0x25a8b173
- symbol {
- key: "x"
- value: 0x7709bd40
- }
+interface {
+ id: 0x84ea5130
+ symbol_id: 0x7709bd40
}
diff --git a/testdata/symbol_list b/testdata/symbol_list
index 70343e9..6d062d2 100644
--- a/testdata/symbol_list
+++ b/testdata/symbol_list
@@ -9,7 +9,7 @@ one
# next is with leading and trailing spaces
two
# indented comment
-[not_a_list]
+[other_section]
bad
[symbol_list)
bad
diff --git a/testdata/type_addition_0.stg b/testdata/type_addition_0.stg
index 2071972..e91b30c 100644
--- a/testdata/type_addition_0.stg
+++ b/testdata/type_addition_0.stg
@@ -1,4 +1,4 @@
-version: 0x00000001
+version: 0x00000002
root_id: 0x84ea5130
interface {
id: 0x84ea5130
diff --git a/testdata/type_addition_1.stg b/testdata/type_addition_1.stg
index e449dae..961f68f 100644
--- a/testdata/type_addition_1.stg
+++ b/testdata/type_addition_1.stg
@@ -1,11 +1,11 @@
-version: 0x00000001
+version: 0x00000002
root_id: 0x84ea5130
struct_union {
- id: 0xa8a0bc6a
+ id: 0xb1a9a2d5
kind: UNION
name: "U"
}
interface {
id: 0x84ea5130
- type_id: 0xa8a0bc6a
+ type_id: 0xb1a9a2d5
}
diff --git a/testdata/type_addition_2.stg b/testdata/type_addition_2.stg
new file mode 100644
index 0000000..21c3cd3
--- /dev/null
+++ b/testdata/type_addition_2.stg
@@ -0,0 +1,13 @@
+version: 0x00000002
+root_id: 0x84ea5130
+struct_union {
+ id: 0xfa4f7de5
+ kind: UNION
+ name: "U"
+ definition {
+ }
+}
+interface {
+ id: 0x84ea5130
+ type_id: 0xfa4f7de5
+}
diff --git a/testdata/type_definition_addition_small_diff b/testdata/type_definition_addition_small_diff
new file mode 100644
index 0000000..936317c
--- /dev/null
+++ b/testdata/type_definition_addition_small_diff
@@ -0,0 +1,3 @@
+type 'union U' changed
+ was only declared, is now fully defined
+
diff --git a/type_normalisation.cc b/type_normalisation.cc
index 72cc94b..50a59b1 100644
--- a/type_normalisation.cc
+++ b/type_normalisation.cc
@@ -88,9 +88,7 @@ struct FindQualifiedTypesAndFunctions {
}
}
- void operator()(const Void&, Id) {}
-
- void operator()(const Variadic&, Id) {}
+ void operator()(const Special&, Id) {}
void operator()(const PointerReference& x, Id) {
(*this)(x.pointee_type_id);
diff --git a/type_resolution.cc b/type_resolution.cc
index bcf5f49..b32d535 100644
--- a/type_resolution.cc
+++ b/type_resolution.cc
@@ -37,11 +37,13 @@ namespace {
struct NamedTypes {
NamedTypes(const Graph& graph, Metrics& metrics)
: graph(graph),
- seen(graph.Limit()),
+ seen(Id(0)),
nodes(metrics, "named_types.nodes"),
types(metrics, "named_types.types"),
definitions(metrics, "named_types.definitions"),
- declarations(metrics, "named_types.declarations") {}
+ declarations(metrics, "named_types.declarations") {
+ seen.Reserve(graph.Limit());
+ }
enum class Tag { STRUCT, UNION, ENUM, TYPEDEF };
using Type = std::pair<Tag, std::string>;
@@ -79,9 +81,7 @@ struct NamedTypes {
}
// Graph function implementation
- void operator()(const Void&, Id) {}
-
- void operator()(const Variadic&, Id) {}
+ void operator()(const Special&, Id) {}
void operator()(const PointerReference& x, Id) {
(*this)(x.pointee_type_id);
diff --git a/unification.cc b/unification.cc
index 4421267..b48c2ee 100644
--- a/unification.cc
+++ b/unification.cc
@@ -104,12 +104,9 @@ struct Unifier {
return result && it1 == end1 && it2 == end2;
}
- Winner operator()(const Void&, const Void&) {
- return Right;
- }
-
- Winner operator()(const Variadic&, const Variadic&) {
- return Right;
+ Winner operator()(const Special& x1, const Special& x2) {
+ return x1.kind == x2.kind
+ ? Right : Neither;
}
Winner operator()(const PointerReference& x1,
@@ -160,7 +157,6 @@ struct Unifier {
Winner operator()(const Method& x1, const Method& x2) {
return x1.mangled_name == x2.mangled_name
&& x1.name == x2.name
- && x1.kind == x2.kind
&& x1.vtable_offset == x2.vtable_offset
&& (*this)(x1.type_id, x2.type_id)
? Right : Neither;
diff --git a/unification.h b/unification.h
index 2e20241..3f6ee9a 100644
--- a/unification.h
+++ b/unification.h
@@ -34,9 +34,10 @@ namespace stg {
// destruction.
class Unification {
public:
- Unification(Graph& graph, Metrics& metrics)
+ Unification(Graph& graph, Id start, Metrics& metrics)
: graph_(graph),
- mapping_(graph.Limit()),
+ start_(start),
+ mapping_(start),
metrics_(metrics),
find_query_(metrics, "unification.find_query"),
find_halved_(metrics, "unification.find_halved"),
@@ -56,7 +57,7 @@ class Unification {
Update(id);
};
::stg::Substitute substitute(graph_, remap);
- graph_.ForEach([&](Id id) {
+ graph_.ForEach(start_, graph_.Limit(), [&](Id id) {
if (Find(id) != id) {
graph_.Remove(id);
++removed;
@@ -67,6 +68,10 @@ class Unification {
});
}
+ void Reserve(Id limit) {
+ mapping_.Reserve(limit);
+ }
+
bool Unify(Id id1, Id id2);
Id Find(Id id) {
@@ -111,6 +116,7 @@ class Unification {
private:
Graph& graph_;
+ Id start_;
DenseIdMapping mapping_;
Metrics& metrics_;
Counter find_query_;