diff options
83 files changed, 1899 insertions, 880 deletions
@@ -76,6 +76,7 @@ cc_library_host_static { "elf_reader.cc", "fidelity.cc", "file_descriptor.cc", + "filter.cc", "fingerprint.cc", "graph.cc", "input.cc", @@ -86,7 +87,6 @@ cc_library_host_static { "proto_writer.cc", "reporting.cc", "stable_hash.cc", - "symbol_filter.cc", "stg.proto", "type_normalisation.cc", "type_resolution.cc", diff --git a/CMakeLists.txt b/CMakeLists.txt new file mode 100644 index 0000000..38410d0 --- /dev/null +++ b/CMakeLists.txt @@ -0,0 +1,109 @@ +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +# +# Copyright 2023 Google LLC +# +# Licensed under the Apache License v2.0 with LLVM Exceptions (the +# "License"); you may not use this file except in compliance with the +# License. You may obtain a copy of the License at +# +# https://llvm.org/LICENSE.txt +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# Author: Aleksei Vetrov + +cmake_minimum_required(VERSION 3.14) + +project( + stg + VERSION 0.0.1 + LANGUAGES CXX) + +set(CMAKE_CXX_STANDARD 17) + +add_compile_options(-fstrict-enums -Wall -Wextra) + +if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU") + # GCC has problems detecting "no-return" switches and destructors. + add_compile_options(-Wno-return-type) +endif() + +# Enable LTO for release builds +set(CMAKE_INTERPROCEDURAL_OPTIMIZATION_RELEASE TRUE) +set(CMAKE_INTERPROCEDURAL_OPTIMIZATION_RELWITHDEBINFO TRUE) + +list(APPEND CMAKE_MODULE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/cmake) + +find_package(LibElf REQUIRED) +find_package(LibDw REQUIRED) +find_package(LibXml2 REQUIRED) +find_package(Protobuf REQUIRED) + +set(COMMON_LIBRARIES + LibElf::LibElf + LibDw::LibDw + LibXml2::LibXml2 + protobuf::libprotobuf) + +if(NOT Jemalloc_DISABLE) + find_package(Jemalloc) + if(Jemalloc_FOUND) + list(APPEND COMMON_LIBRARIES Jemalloc::Jemalloc) + else() + message(WARNING "jemalloc significantly improves performance, but is not functionally required to build STG. +Use -DJemalloc_DISABLE=TRUE to disable jemalloc and suppress this warning.") + endif() +endif() + +protobuf_generate_cpp(PROTO_SRCS PROTO_HDRS stg.proto) + +add_library(libstg OBJECT + abigail_reader.cc + btf_reader.cc + comparison.cc + deduplication.cc + dwarf_processor.cc + dwarf_wrappers.cc + elf_loader.cc + elf_reader.cc + fidelity.cc + file_descriptor.cc + filter.cc + fingerprint.cc + graph.cc + input.cc + metrics.cc + naming.cc + post_processing.cc + proto_reader.cc + proto_writer.cc + reporting.cc + stable_hash.cc + type_normalisation.cc + type_resolution.cc + unification.cc + ${PROTO_SRCS} + ${PROTO_HDRS}) +# Needed for generated .pb.h files +target_include_directories(libstg PRIVATE ${CMAKE_CURRENT_BINARY_DIR}) +target_link_libraries(libstg PRIVATE ${COMMON_LIBRARIES}) + +set(STG_EXECUTABLE_TARGETS stg stgdiff stginfo) + +foreach(TARGET IN LISTS STG_EXECUTABLE_TARGETS) + add_executable("${TARGET}" "${TARGET}.cc") + target_link_libraries("${TARGET}" PRIVATE libstg) +endforeach() + +# Installation and packaging + +include(GNUInstallDirs) + +install( + TARGETS ${STG_EXECUTABLE_TARGETS} + COMPONENT Binaries + RUNTIME DESTINATION "${CMAKE_INSTALL_BINDIR}") @@ -1,14 +1,54 @@ +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +# +# Copyright 2022-2023 Google LLC +# +# Licensed under the Apache License v2.0 with LLVM Exceptions (the "License"); +# you may not use this file except in compliance with the License. You may +# obtain a copy of the License at +# +# https://llvm.org/LICENSE.txt +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations under +# the License. +# +# Author: Vanessa Sochat +# Author: Aleksei Vetrov + ARG debian_version=stable-slim FROM debian:${debian_version} as builder # docker build -t stg . RUN apt-get update && \ - apt-get install -y build-essential \ - libelf-dev \ - linux-libc-dev \ - libxml2-dev + apt-get install -y \ + build-essential \ + pkg-config \ + cmake \ + libelf-dev \ + libdw-dev \ + libxml2-dev \ + libprotobuf-dev \ + protobuf-compiler \ + libjemalloc-dev WORKDIR /src COPY . /src -RUN make -# uncomment for multistage build -# FROM debian:${debian_version} -# COPY --from=builder /src/stgdiff /usr/local/bin/stgdiff +RUN mkdir -p build && \ + cd build && \ + cmake -DCMAKE_BUILD_TYPE=Release .. && \ + cmake --build . --parallel && \ + cmake --install . --strip +# second stage +FROM debian:${debian_version} +RUN apt-get update && \ + apt-get install -y \ + libc6 \ + libgcc-s1 \ + libstdc++6 \ + libdw1 \ + libelf1 \ + libjemalloc2 \ + libprotobuf32 \ + libxml2 && \ + rm -rf /var/lib/apt/lists/* +COPY --from=builder /usr/local /usr/local diff --git a/Makefile b/Makefile deleted file mode 100644 index 97e641b..0000000 --- a/Makefile +++ /dev/null @@ -1,24 +0,0 @@ -LDLIBS := -lelf -lxml2 -LDFLAGS := -CXXFLAGS := -fstrict-enums -Wall -Wextra -I/usr/include -I/usr/include/libxml2 -std=c++17 - -SRCS := abigail_reader.cc btf_reader.cc elf_loader.cc elf_reader.cc \ - post_processing.cc reporting.cc stg.cc stgdiff.cc -HDRS := abigail_reader.h btf_reader.h crc.h elf_loader.h elf_reader.h \ - error.h id.h order.h post_processing.h reporting.h scc.h stg.h - -OBJS := $(SRCS:.cc=.o) -MAIN := stgdiff - -.PHONY: all - -all: $(MAIN) - -# Conservative header dependencies -$(OBJS): $(HDRS) - -$(MAIN): $(OBJS) - $(LINK.cc) $^ $(LDLIBS) -o $@ - -clean: - rm -f $(OBJS) $(MAIN) @@ -23,10 +23,12 @@ Instructions are included for local and Docker builds. ### Local Build -You can build as follows: +STG can be built with CMake as follows: ```bash -$ make +$ mkdir build && cd build +$ cmake .. +$ cmake --build . --parallel ``` ### Docker Build diff --git a/abigail_reader.cc b/abigail_reader.cc index 7082376..752af0e 100644 --- a/abigail_reader.cc +++ b/abigail_reader.cc @@ -43,6 +43,7 @@ #include <vector> #include <libxml/parser.h> +#include <libxml/tree.h> #include "error.h" #include "file_descriptor.h" #include "graph.h" @@ -118,8 +119,8 @@ std::string GetAttributeOrDie(xmlNodePtr node, const char* name) { } // Set an attribute value. -void SetAttribute(xmlNodePtr node, const char* name, const char* value) { - xmlSetProp(node, ToLibxml(name), ToLibxml(value)); +void SetAttribute(xmlNodePtr node, const char* name, const std::string &value) { + xmlSetProp(node, ToLibxml(name), ToLibxml(value.c_str())); } // Unset an attribute value. @@ -463,6 +464,59 @@ void StripReachabilityAttributes(xmlNodePtr node) { } } +// Fix bad DWARF -> ELF links caused by size zero symbol confusion. +// +// libabigail used to be confused by these sorts of symbols, resulting in +// declarations pointing at the wrong ELF symbols: +// +// 573623: ffffffc0122383c0 256 OBJECT GLOBAL DEFAULT 33 vm_node_stat +// 573960: ffffffc0122383c0 0 OBJECT GLOBAL DEFAULT 33 vm_numa_stat +void FixBadDwarfElfLinks(xmlNodePtr root) { + std::unordered_map<std::string, size_t> elf_links; + + // See which ELF symbol IDs have multiple declarations. + const std::function<void(xmlNodePtr)> count = [&](xmlNodePtr node) { + if (GetName(node) == "var-decl") { + const auto symbol_id = GetAttribute(node, "elf-symbol-id"); + if (symbol_id) { + ++elf_links[symbol_id.value()]; + } + } + + for (auto* child = Child(node); child; child = Next(child)) { + count(child); + } + }; + count(root); + + // Fix up likely bad links from DWARF declaration to ELF symbol. + const std::function<void(xmlNodePtr)> fix = [&](xmlNodePtr node) { + if (GetName(node) == "var-decl") { + const auto name = GetAttributeOrDie(node, "name"); + const auto mangled_name = GetAttribute(node, "mangled-name"); + const auto symbol_id = GetAttribute(node, "elf-symbol-id"); + if (mangled_name && symbol_id && name != symbol_id.value() + && elf_links[symbol_id.value()] > 1) { + if (mangled_name.value() == name) { + Warn() << "fixing up ELF symbol for '" << name + << "' (was '" << symbol_id.value() << "')"; + SetAttribute(node, "elf-symbol-id", name); + } else if (mangled_name.value() == symbol_id.value()) { + Warn() << "fixing up mangled name and ELF symbol for '" << name + << "' (was '" << symbol_id.value() << "')"; + SetAttribute(node, "mangled-name", name); + SetAttribute(node, "elf-symbol-id", name); + } + } + } + + for (auto* child = Child(node); child; child = Next(child)) { + fix(child); + } + }; + fix(root); +} + // Tidy anonymous types in various ways. // // 1. Normalise anonymous type names by dropping the name attribute. @@ -506,8 +560,8 @@ void TidyAnonymousTypes(xmlNodePtr node) { } } -// Remove duplicate data members. -void RemoveDuplicateDataMembers(xmlNodePtr root) { +// Remove duplicate members. +void RemoveDuplicateMembers(xmlNodePtr root) { std::vector<xmlNodePtr> types; // find all structs and unions @@ -524,28 +578,28 @@ void RemoveDuplicateDataMembers(xmlNodePtr root) { dfs(root); for (const auto& node : types) { - // filter data members - std::vector<xmlNodePtr> data_members; + // partition members by node name + std::map<std::string_view, std::vector<xmlNodePtr>> member_map; for (auto* child = Child(node); child; child = Next(child)) { - if (GetName(child) == "data-member") { - data_members.push_back(child); - } + member_map[GetName(child)].push_back(child); } - // remove identical duplicate data members - O(n^2) - for (size_t i = 0; i < data_members.size(); ++i) { - xmlNodePtr& i_node = data_members[i]; - bool duplicate = false; - for (size_t j = 0; j < i; ++j) { - const xmlNodePtr& j_node = data_members[j]; - if (j_node != nullptr && EqualTree(i_node, j_node)) { - duplicate = true; - break; + // for each kind of member... + for (auto& [name, members] : member_map) { + // ... remove identical duplicate members - O(n^2) + for (size_t i = 0; i < members.size(); ++i) { + xmlNodePtr& i_node = members[i]; + bool duplicate = false; + for (size_t j = 0; j < i; ++j) { + const xmlNodePtr& j_node = members[j]; + if (j_node != nullptr && EqualTree(i_node, j_node)) { + duplicate = true; + break; + } + } + if (duplicate) { + RemoveNode(i_node); + i_node = nullptr; } - } - if (duplicate) { - Warn() << "found duplicate data-member"; - RemoveNode(i_node); - i_node = nullptr; } } } @@ -702,13 +756,16 @@ namespace { // Transform XML elements to improve their semantics. void Tidy(xmlNodePtr root) { + // Fix bad ELF symbol links + FixBadDwarfElfLinks(root); + // Normalise anonymous type names. // Reanonymise anonymous types. // Discard naming typedef backlinks. TidyAnonymousTypes(root); - // Remove duplicate data members. - RemoveDuplicateDataMembers(root); + // Remove duplicate members. + RemoveDuplicateMembers(root); // Eliminate complete duplicates and extra fragments of types. // Report conflicting duplicate defintions. @@ -751,7 +808,7 @@ Id Abigail::GetEdge(xmlNodePtr element) { Id Abigail::GetVariadic() { if (!variadic_) { - variadic_ = {graph_.Add<Variadic>()}; + variadic_ = {graph_.Add<Special>(Special::Kind::VARIADIC)}; } return *variadic_; } @@ -1033,7 +1090,7 @@ void Abigail::ProcessTypeDecl(Id id, xmlNodePtr type_decl) { const auto bytes = bits / 8; if (name == "void") { - graph_.Set<Void>(id); + graph_.Set<Special>(id, Special::Kind::VOID); } else { // libabigail doesn't model encoding at all and we don't want to parse names // (which will not always work) in an attempt to reconstruct it. @@ -1079,7 +1136,7 @@ void Abigail::ProcessStructUnion(Id id, bool is_struct, } else if (child_name == "base-class") { base_classes.push_back(ProcessBaseClass(child)); } else if (child_name == "member-function") { - methods.push_back(ProcessMemberFunction(child)); + ProcessMemberFunction(methods, child); } else { Die() << "unrecognised " << kind << "-decl child element '" << child_name << "'"; @@ -1148,20 +1205,20 @@ std::optional<Id> Abigail::ProcessDataMember(bool is_struct, return {graph_.Add<Member>(name, type, offset, 0)}; } -Id Abigail::ProcessMemberFunction(xmlNodePtr method) { +void Abigail::ProcessMemberFunction(std::vector<Id>& methods, + xmlNodePtr method) { xmlNodePtr decl = GetOnlyChild(method); CheckName("function-decl", decl); - static const std::string missing = "{missing}"; - const auto mangled_name = ReadAttribute(decl, "mangled-name", missing); - const auto name = GetAttributeOrDie(decl, "name"); + // ProcessDecl creates symbol references so must be called unconditionally. const auto type = ProcessDecl(false, decl); const auto vtable_offset = ReadAttribute<uint64_t>(method, "vtable-offset"); - const auto kind = vtable_offset - ? Method::Kind::VIRTUAL - : ReadAttribute<bool>(method, "static", false) - ? Method::Kind::STATIC - : Method::Kind::NON_VIRTUAL; - return graph_.Add<Method>(mangled_name, name, kind, vtable_offset, type); + if (vtable_offset) { + static const std::string missing = "{missing}"; + const auto mangled_name = ReadAttribute(decl, "mangled-name", missing); + const auto name = GetAttributeOrDie(decl, "name"); + methods.push_back( + graph_.Add<Method>(mangled_name, name, vtable_offset.value(), type)); + } } void Abigail::ProcessMemberType(xmlNodePtr member_type) { diff --git a/abigail_reader.h b/abigail_reader.h index 90c2dc1..21ada8d 100644 --- a/abigail_reader.h +++ b/abigail_reader.h @@ -137,7 +137,7 @@ class Abigail { Id ProcessBaseClass(xmlNodePtr base_class); std::optional<Id> ProcessDataMember(bool is_struct, xmlNodePtr data_member); - Id ProcessMemberFunction(xmlNodePtr method); + void ProcessMemberFunction(std::vector<Id>& methods, xmlNodePtr method); void ProcessMemberType(xmlNodePtr member_type); Id BuildSymbol(const SymbolInfo& info, diff --git a/abigail_reader_test.cc b/abigail_reader_test.cc index e1b191d..930342f 100644 --- a/abigail_reader_test.cc +++ b/abigail_reader_test.cc @@ -161,6 +161,11 @@ struct TidyTestCase { TEST_CASE("Tidy") { const auto test = GENERATE( TidyTestCase( + {"bad DWARF ELF link", + {"abigail_bad_elf_dwarf_link_0.xml", + "abigail_bad_elf_dwarf_link_1.xml", + "abigail_bad_elf_dwarf_link_2.xml"}}), + TidyTestCase( {"anonymous type normalisation", {"abigail_anonymous_types_0.xml", "abigail_anonymous_types_1.xml", diff --git a/btf_reader.cc b/btf_reader.cc index 5fd3921..991d530 100644 --- a/btf_reader.cc +++ b/btf_reader.cc @@ -86,7 +86,7 @@ Structs::Structs(Graph& graph, const bool verbose) // Get the index of the void type, creating one if needed. Id Structs::GetVoid() { if (!void_) { - void_ = {graph_.Add<Void>()}; + void_ = {graph_.Add<Special>(Special::Kind::VOID)}; } return *void_; } @@ -94,7 +94,7 @@ Id Structs::GetVoid() { // Get the index of the variadic parameter type, creating one if needed. Id Structs::GetVariadic() { if (!variadic_) { - variadic_ = {graph_.Add<Variadic>()}; + variadic_ = {graph_.Add<Special>(Special::Kind::VARIADIC)}; } return *variadic_; } @@ -253,9 +253,9 @@ Id Structs::BuildTypes(MemoryRange memory) { std::cout << "Type section:\n"; } - // Alas, BTF overloads type id 0 to mean both Void (for everything but - // function parameters) and Variadic (for function parameters). We determine - // which is intended and create Void and Variadic types on demand. + // Alas, BTF overloads type id 0 to mean both void (for everything but + // function parameters) and variadic (for function parameters). We determine + // which is intended and create void and variadic types on demand. // The type section is parsed sequentially and each type's index is its id. uint32_t btf_index = 1; diff --git a/cmake/FindJemalloc.cmake b/cmake/FindJemalloc.cmake new file mode 100644 index 0000000..0873cc0 --- /dev/null +++ b/cmake/FindJemalloc.cmake @@ -0,0 +1,106 @@ +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +# +# Copyright 2023 Google LLC +# +# Licensed under the Apache License v2.0 with LLVM Exceptions (the "License"); +# you may not use this file except in compliance with the License. You may +# obtain a copy of the License at +# +# https://llvm.org/LICENSE.txt +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations under +# the License. +# +# Author: Aleksei Vetrov + +#[=======================================================================[.rst: +FindJemalloc +--------- + +Finds the jemalloc library. + +Imported Targets +^^^^^^^^^^^^^^^^ + +This module provides the following imported targets, if found: + +``Jemalloc::Jemalloc`` + The Jemalloc library + +Result Variables +^^^^^^^^^^^^^^^^ + +This will define the following variables: + +``Jemalloc_FOUND`` + True if the system has the Jemalloc library. +``Jemalloc_VERSION`` + The version of the Jemalloc library which was found. +``Jemalloc_INCLUDE_DIRS`` + Include directories needed to use Jemalloc. +``Jemalloc_LIBRARIES`` + Libraries needed to link to Jemalloc. +``Jemalloc_DEFINITIONS`` + the compiler switches required for using Jemalloc + +Cache Variables +^^^^^^^^^^^^^^^ + +The following cache variables may also be set: + +``Jemalloc_INCLUDE_DIR`` + The directory containing ``jemalloc.h``. +``Jemalloc_LIBRARY`` + The path to the ``libjemalloc.so``. + +#]=======================================================================] + +find_package(PkgConfig) +pkg_check_modules(PC_Jemalloc QUIET jemalloc) + +find_library( + Jemalloc_LIBRARY + NAMES jemalloc + HINTS ${PC_Jemalloc_LIBDIR} ${PC_Jemalloc_LIBRARY_DIRS}) +# Try the value from user if the library is not found. +if(DEFINED Jemalloc_LIBRARIES AND NOT DEFINED Jemalloc_LIBRARY) + set(Jemalloc_LIBRARY ${Jemalloc_LIBRARIES}) +endif() +mark_as_advanced(Jemalloc_LIBRARY) + +find_path( + Jemalloc_INCLUDE_DIR + NAMES jemalloc.h + PATH_SUFFIXES jemalloc + HINTS ${PC_Jemalloc_INCLUDEDIR} ${PC_Jemalloc_INCLUDE_DIRS}) +# Try the value from user if the library is not found. +if(DEFINED Jemalloc_INCLUDE_DIRS AND NOT DEFINED Jemalloc_INCLUDE_DIR) + set(Jemalloc_INCLUDE_DIR ${Jemalloc_INCLUDE_DIRS}) +endif() +mark_as_advanced(Jemalloc_INCLUDE_DIR) + +set(Jemalloc_VERSION ${PC_Jemalloc_VERSION}) + +include(FindPackageHandleStandardArgs) +find_package_handle_standard_args( + Jemalloc + REQUIRED_VARS Jemalloc_LIBRARY Jemalloc_INCLUDE_DIR + VERSION_VAR Jemalloc_VERSION) + +if(Jemalloc_FOUND) + set(Jemalloc_LIBRARIES ${Jemalloc_LIBRARY}) + set(Jemalloc_INCLUDE_DIRS ${Jemalloc_INCLUDE_DIR}) + set(Jemalloc_DEFINITIONS ${PC_Jemalloc_CFLAGS_OTHER}) +endif() + +if(Jemalloc_FOUND AND NOT TARGET Jemalloc::Jemalloc) + add_library(Jemalloc::Jemalloc UNKNOWN IMPORTED) + set_target_properties( + Jemalloc::Jemalloc + PROPERTIES IMPORTED_LOCATION "${Jemalloc_LIBRARY}" + INTERFACE_COMPILE_OPTIONS "${PC_Jemalloc_CFLAGS_OTHER}" + INTERFACE_INCLUDE_DIRECTORIES "${Jemalloc_INCLUDE_DIR}") +endif() diff --git a/cmake/FindLibDw.cmake b/cmake/FindLibDw.cmake new file mode 100644 index 0000000..352d672 --- /dev/null +++ b/cmake/FindLibDw.cmake @@ -0,0 +1,105 @@ +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +# +# Copyright 2023 Google LLC +# +# Licensed under the Apache License v2.0 with LLVM Exceptions (the "License"); +# you may not use this file except in compliance with the License. You may +# obtain a copy of the License at +# +# https://llvm.org/LICENSE.txt +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations under +# the License. +# +# Author: Aleksei Vetrov + +#[=======================================================================[.rst: +FindLibDw +--------- + +Finds the DWARF processing library (libdw). + +Imported Targets +^^^^^^^^^^^^^^^^ + +This module provides the following imported targets, if found: + +``LibDw::LibDw`` + The LibDw library + +Result Variables +^^^^^^^^^^^^^^^^ + +This will define the following variables: + +``LibDw_FOUND`` + True if the system has the LibDw library. +``LibDw_VERSION`` + The version of the LibDw library which was found. +``LibDw_INCLUDE_DIRS`` + Include directories needed to use LibDw. +``LibDw_LIBRARIES`` + Libraries needed to link to LibDw. +``LibDw_DEFINITIONS`` + the compiler switches required for using LibDw + +Cache Variables +^^^^^^^^^^^^^^^ + +The following cache variables may also be set: + +``LibDw_INCLUDE_DIR`` + The directory containing ``dwarf.h``. +``LibDw_LIBRARY`` + The path to the ``libdw.so``. + +#]=======================================================================] + +find_package(PkgConfig) +pkg_check_modules(PC_LibDw QUIET libdw) + +find_library( + LibDw_LIBRARY + NAMES dw + HINTS ${PC_LibDw_LIBDIR} ${PC_LibDw_LIBRARY_DIRS}) +# Try the value from user if the library is not found. +if(DEFINED LibDw_LIBRARIES AND NOT DEFINED LibDw_LIBRARY) + set(LibDw_LIBRARY ${LibDw_LIBRARIES}) +endif() +mark_as_advanced(LibDw_LIBRARY) + +find_path( + LibDw_INCLUDE_DIR + NAMES dwarf.h + HINTS ${PC_LibDw_INCLUDEDIR} ${PC_LibDw_INCLUDE_DIRS}) +# Try the value from user if the library is not found. +if(DEFINED LibDw_INCLUDE_DIRS AND NOT DEFINED LibDw_INCLUDE_DIR) + set(LibDw_INCLUDE_DIR ${LibDw_INCLUDE_DIRS}) +endif() +mark_as_advanced(LibDw_INCLUDE_DIR) + +set(LibDw_VERSION ${PC_LibDw_VERSION}) + +include(FindPackageHandleStandardArgs) +find_package_handle_standard_args( + LibDw + REQUIRED_VARS LibDw_LIBRARY LibDw_INCLUDE_DIR + VERSION_VAR LibDw_VERSION) + +if(LibDw_FOUND) + set(LibDw_LIBRARIES ${LibDw_LIBRARY}) + set(LibDw_INCLUDE_DIRS ${LibDw_INCLUDE_DIR}) + set(LibDw_DEFINITIONS ${PC_LibDw_CFLAGS_OTHER}) +endif() + +if(LibDw_FOUND AND NOT TARGET LibDw::LibDw) + add_library(LibDw::LibDw UNKNOWN IMPORTED) + set_target_properties( + LibDw::LibDw + PROPERTIES IMPORTED_LOCATION "${LibDw_LIBRARY}" + INTERFACE_COMPILE_OPTIONS "${PC_LibDw_CFLAGS_OTHER}" + INTERFACE_INCLUDE_DIRECTORIES "${LibDw_INCLUDE_DIR}") +endif() diff --git a/cmake/FindLibElf.cmake b/cmake/FindLibElf.cmake new file mode 100644 index 0000000..35872c9 --- /dev/null +++ b/cmake/FindLibElf.cmake @@ -0,0 +1,105 @@ +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +# +# Copyright 2023 Google LLC +# +# Licensed under the Apache License v2.0 with LLVM Exceptions (the "License"); +# you may not use this file except in compliance with the License. You may +# obtain a copy of the License at +# +# https://llvm.org/LICENSE.txt +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations under +# the License. +# +# Author: Aleksei Vetrov + +#[=======================================================================[.rst: +FindLibElf +---------- + +Finds the ELF processing library (libelf). + +Imported Targets +^^^^^^^^^^^^^^^^ + +This module provides the following imported targets, if found: + +``LibElf::LibElf`` + The LibElf library + +Result Variables +^^^^^^^^^^^^^^^^ + +This will define the following variables: + +``LibElf_FOUND`` + True if the system has the LibElf library. +``LibElf_VERSION`` + The version of the LibElf library which was found. +``LibElf_INCLUDE_DIRS`` + Include directories needed to use LibElf. +``LibElf_LIBRARIES`` + Libraries needed to link to LibElf. +``LibElf_DEFINITIONS`` + the compiler switches required for using LibElf + +Cache Variables +^^^^^^^^^^^^^^^ + +The following cache variables may also be set: + +``LibElf_INCLUDE_DIR`` + The directory containing ``libelf.h``. +``LibElf_LIBRARY`` + The path to the ``libelf.so``. + +#]=======================================================================] + +find_package(PkgConfig) +pkg_check_modules(PC_LibElf QUIET libelf) + +find_library( + LibElf_LIBRARY + NAMES elf + HINTS ${PC_LibElf_LIBDIR} ${PC_LibElf_LIBRARY_DIRS}) +# Try the value from user if the library is not found. +if(DEFINED LibElf_LIBRARIES AND NOT DEFINED LibElf_LIBRARY) + set(LibElf_LIBRARY ${LibElf_LIBRARIES}) +endif() +mark_as_advanced(LibElf_LIBRARY) + +find_path( + LibElf_INCLUDE_DIR + NAMES libelf.h + HINTS ${PC_LibElf_INCLUDEDIR} ${PC_LibElf_INCLUDE_DIRS}) +# Try the value from user if the library is not found. +if(DEFINED LibElf_INCLUDE_DIRS AND NOT DEFINED LibElf_INCLUDE_DIR) + set(LibElf_INCLUDE_DIR ${LibElf_INCLUDE_DIRS}) +endif() +mark_as_advanced(LibElf_INCLUDE_DIR) + +set(LibElf_VERSION ${PC_LibElf_VERSION}) + +include(FindPackageHandleStandardArgs) +find_package_handle_standard_args( + LibElf + REQUIRED_VARS LibElf_LIBRARY LibElf_INCLUDE_DIR + VERSION_VAR LibElf_VERSION) + +if(LibElf_FOUND) + set(LibElf_LIBRARIES ${LibElf_LIBRARY}) + set(LibElf_INCLUDE_DIRS ${LibElf_INCLUDE_DIR}) + set(LibElf_DEFINITIONS ${PC_LibElf_CFLAGS_OTHER}) +endif() + +if(LibElf_FOUND AND NOT TARGET LibElf::LibElf) + add_library(LibElf::LibElf UNKNOWN IMPORTED) + set_target_properties( + LibElf::LibElf + PROPERTIES IMPORTED_LOCATION "${LibElf_LIBRARY}" + INTERFACE_COMPILE_OPTIONS "${PC_LibElf_CFLAGS_OTHER}" + INTERFACE_INCLUDE_DIRECTORIES "${LibElf_INCLUDE_DIR}") +endif() diff --git a/comparison.cc b/comparison.cc index 5d94350..210cc26 100644 --- a/comparison.cc +++ b/comparison.cc @@ -44,15 +44,16 @@ struct IgnoreDescriptor { Ignore::Value value; }; -static constexpr std::array<IgnoreDescriptor, 8> kIgnores{{ - {"type_declaration_status", Ignore::TYPE_DECLARATION_STATUS}, - {"symbol_type_presence", Ignore::SYMBOL_TYPE_PRESENCE }, - {"primitive_type_encoding", Ignore::PRIMITIVE_TYPE_ENCODING}, - {"member_size", Ignore::MEMBER_SIZE }, - {"enum_underlying_type", Ignore::ENUM_UNDERLYING_TYPE }, - {"qualifier", Ignore::QUALIFIER }, - {"interface_addition", Ignore::INTERFACE_ADDITION }, - {"linux_symbol_crc", Ignore::SYMBOL_CRC }, +static constexpr std::array<IgnoreDescriptor, 9> kIgnores{{ + {"type_declaration_status", Ignore::TYPE_DECLARATION_STATUS }, + {"symbol_type_presence", Ignore::SYMBOL_TYPE_PRESENCE }, + {"primitive_type_encoding", Ignore::PRIMITIVE_TYPE_ENCODING }, + {"member_size", Ignore::MEMBER_SIZE }, + {"enum_underlying_type", Ignore::ENUM_UNDERLYING_TYPE }, + {"qualifier", Ignore::QUALIFIER }, + {"linux_symbol_crc", Ignore::SYMBOL_CRC }, + {"interface_addition", Ignore::INTERFACE_ADDITION }, + {"type_definition_addition", Ignore::TYPE_DEFINITION_ADDITION }, }}; std::optional<Ignore::Value> ParseIgnore(std::string_view ignore) { @@ -227,12 +228,12 @@ Result Compare::Mismatch() { return Result().MarkIncomparable(); } -Result Compare::operator()(const Void&, const Void&) { - return {}; -} - -Result Compare::operator()(const Variadic&, const Variadic&) { - return {}; +Result Compare::operator()(const Special& x1, const Special& x2) { + Result result; + if (x1.kind != x2.kind) { + return result.MarkIncomparable(); + } + return result; } Result Compare::operator()(const PointerReference& x1, @@ -289,12 +290,13 @@ Result Compare::operator()(const Array& x1, const Array& x2) { return result; } +// return whether to continue comparing both definitions bool Compare::CompareDefined(bool defined1, bool defined2, Result& result) { - if (defined1 && defined2) { - return true; + if (defined1 == defined2) { + return defined1; } - const bool ignore_diff = ignore.Test(Ignore::TYPE_DECLARATION_STATUS); - if (!ignore_diff && defined1 != defined2) { + if (!ignore.Test(Ignore::TYPE_DECLARATION_STATUS) + && !(ignore.Test(Ignore::TYPE_DEFINITION_ADDITION) && defined2)) { std::ostringstream os; os << "was " << (defined1 ? "fully defined" : "only declared") << ", is now " << (defined2 ? "fully defined" : "only declared"); @@ -450,7 +452,6 @@ Result Compare::operator()(const Member& x1, const Member& x2) { Result Compare::operator()(const Method& x1, const Method& x2) { Result result; - result.MaybeAddNodeDiff("kind", x1.kind, x2.kind); result.MaybeAddNodeDiff("vtable offset", x1.vtable_offset, x2.vtable_offset); result.MaybeAddEdgeDiff("", (*this)(x1.type_id, x2.type_id)); return result; diff --git a/comparison.h b/comparison.h index fc66580..b73cdc0 100644 --- a/comparison.h +++ b/comparison.h @@ -45,14 +45,17 @@ namespace stg { struct Ignore { enum Value { + // noise reduction SYMBOL_TYPE_PRESENCE = 1<<0, TYPE_DECLARATION_STATUS = 1<<1, PRIMITIVE_TYPE_ENCODING = 1<<2, MEMBER_SIZE = 1<<3, ENUM_UNDERLYING_TYPE = 1<<4, QUALIFIER = 1<<5, - INTERFACE_ADDITION = 1<<6, - SYMBOL_CRC = 1<<7, + SYMBOL_CRC = 1<<6, + // ABI compatibility testing + INTERFACE_ADDITION = 1<<7, + TYPE_DEFINITION_ADDITION = 1<<8, }; using Bitset = std::underlying_type_t<Value>; @@ -271,8 +274,7 @@ struct Compare { bool CompareDefined(bool defined1, bool defined2, Result& result); Result Mismatch(); - Result operator()(const Void&, const Void&); - Result operator()(const Variadic&, const Variadic&); + Result operator()(const Special&, const Special&); Result operator()(const PointerReference&, const PointerReference&); Result operator()(const PointerToMember&, const PointerToMember&); Result operator()(const Typedef&, const Typedef&); @@ -14,12 +14,13 @@ stg [-i|--info] [-d|--keep-duplicates] [-t|--types] - [-S|--symbols <filter>] + [-F|--file-filter <filter>] + [-S|--symbols|--symbol-filter <filter>] [--skip-dwarf] [-a|--abi|-b|--btf|-e|--elf|-s|--stg] [file] ... [{-o|--output} {filename|-}] ... implicit defaults: --abi -symbol filter syntax: +filter syntax: <filter> ::= <term> | <expression> '|' <term> <term> ::= <factor> | <term> '&' <factor> <factor> ::= <atom> | '!' <factor> @@ -89,16 +90,41 @@ Symbols must be disjoint across all inputs. ### Types -Merging is not yet supported with type roots. +If duplicate type roots are found during merge, they are unified. If unification +fails, the merge fails. + +Unification is a process which replaces references to forward declarations of +types with references to full definitions, if that would result in equal types. ## Filter -If a symbol filter is supplied, symbols not matching the filter are dropped. +There are two types of filters that can be applied to STG output: + +1. `-F|--file-filter <filter>` + + Filter type definitions by source location. + + DWARF information includes type declaration source locations. If a struct, + union, class or enum is defined in a file whose name does not match the + filter, then its definition (layout, members etc.) is dropped, leaving only + a declaration. + + File filters are only applicable to ELF binary objects containing DWARF with + source location information; any other kind of input will be unaffected. + +2. `-S|--symbols|--symbol-filter <filter>` + + Filter ELF symbols by name (which may include a `@version` or `@@version` + suffix). + + If a symbol filter is supplied, symbols not matching the filter are dropped. + Symbol filtering is universal across all input formats as it happens after + input processing. The basic syntactical elements are: * `glob` - a glob pattern matching symbol names -* `:filename` - the name of a file containing a libabigail format symbol list +* `:filename` - the name of a file containing a libabigail format filter list Filter expressions can be combined with infix disjuction (`|`) and conjunction (`&`) operators and negated with the prefix (`!`) operator; these obey the usual @@ -107,6 +133,8 @@ Whitespace is not significant, except as a string delimiter. ### Examples +Symbol filters: + * `jiffies |panic` - keep just the symbols `jiffies` and `panic` * `str*` - keep symbols beginning with `str` such as `strncpy_from_user` * `!(*@* & ! *@@*`) - drop versioned symbols that are not the default versions @@ -114,6 +142,11 @@ Whitespace is not significant, except as a string delimiter. * `:include & !:exclude ` - keep symbols that are in the symbol list file `include` but not in the symbol list file `exclude` +File filters: + +* `!*.cc` - exclude all type definitions found in source files named `*.cc` +* `*.h` - include only type definitions found in source files named `*.h` + ## Deduplication ABI representations, particularly merged ones, almost always have some sets of diff --git a/doc/stgdiff.md b/doc/stgdiff.md index b3e7789..d0c89ab 100644 --- a/doc/stgdiff.md +++ b/doc/stgdiff.md @@ -20,7 +20,7 @@ stgdiff implicit defaults: --abi --format plain --exact (node equality) cannot be combined with --output output formats: plain flat small short viz -ignore options: type_declaration_status symbol_type_presence primitive_type_encoding member_size enum_underlying_type qualifier interface_addition linux_symbol_crc +ignore options: type_declaration_status symbol_type_presence primitive_type_encoding member_size enum_underlying_type qualifier linux_symbol_crc interface_addition type_definition_addition ``` ## Input @@ -114,17 +114,24 @@ in how much (DWARF) information they preserve. Ignore qualifiers during comparison. Both libabigail and STG interpret and adjust type qualifiers but sometimes do so differently. -* `interface_addition` - - Ignore interface additions during comparison. This can be useful for ABI - comparisons where symbol / type additions are allowed. - * `linux_symbol_crc` Ignore Linux kernel symbol CRC changes during comparison. This can be useful for ABI comparisons across different toolchains, where CRC changes are often large and not useful. +These two options can be used for ABI compatibility testing where the first ABI +is expected to be a subset of the second. + +* `interface_addition` + + Ignore interface (symbol and type root) additions during comparison. + +* `type_definition_addition` + + Ignore type definition additions during comparison. Any extra symbol and + type roots may reach extra definitions of existing types. + ### Fidelity Reporting * `-F|--fidelity` diff --git a/dwarf_processor.cc b/dwarf_processor.cc index 46f433f..bd106af 100644 --- a/dwarf_processor.cc +++ b/dwarf_processor.cc @@ -24,17 +24,19 @@ #include <algorithm> #include <cstddef> -#include <ios> -#include <iostream> +#include <cstdint> +#include <memory> #include <optional> #include <sstream> #include <string> +#include <string_view> #include <unordered_map> #include <utility> #include <vector> #include "dwarf_wrappers.h" #include "error.h" +#include "filter.h" #include "graph.h" #include "scope.h" @@ -140,8 +142,9 @@ size_t GetNumberOfElements(Entry& entry) { << "Non-zero DW_AT_lower_bound is not supported"; std::optional<size_t> upper_bound_optional = entry.MaybeGetUnsignedConstant(DW_AT_upper_bound); - std::optional<size_t> number_of_elements_optional = - entry.MaybeGetUnsignedConstant(DW_AT_count); + // Don't fail if DW_AT_count is not a constant and treat this as no count + // provided. This can happen if array has variable length. + std::optional<size_t> number_of_elements_optional = entry.MaybeGetCount(); if (upper_bound_optional && number_of_elements_optional) { Die() << "Both DW_AT_upper_bound and DW_AT_count given"; } else if (upper_bound_optional) { @@ -259,11 +262,13 @@ size_t GetDataBitOffset(Entry& entry, size_t bit_size, class Processor { public: Processor(Graph& graph, Id void_id, Id variadic_id, - bool is_little_endian_binary, Types& result) + bool is_little_endian_binary, + const std::unique_ptr<Filter>& file_filter, Types& result) : graph_(graph), void_id_(void_id), variadic_id_(variadic_id), is_little_endian_binary_(is_little_endian_binary), + file_filter_(file_filter), result_(result) {} void Process(Entry& entry) { @@ -303,6 +308,9 @@ class Processor { case DW_TAG_ptr_to_member_type: ProcessPointerToMember(entry); break; + case DW_TAG_unspecified_type: + ProcessUnspecifiedType(entry); + break; case DW_TAG_compile_unit: ProcessCompileUnit(entry); break; @@ -326,7 +334,10 @@ class Processor { ProcessReference<Qualified>(entry, Qualifier::ATOMIC); break; case DW_TAG_variable: - ProcessVariable(entry); + // Process only variables visible externally + if (entry.GetFlag(DW_AT_external)) { + ProcessVariable(entry); + } break; case DW_TAG_subroutine_type: // Standalone function type, for example, used in function pointers. @@ -337,8 +348,7 @@ class Processor { ProcessFunction(entry); break; case DW_TAG_namespace: - // TODO: handle scopes - ProcessAllChildren(entry); + ProcessNamespace(entry); break; default: @@ -390,6 +400,15 @@ class Processor { } void ProcessCompileUnit(Entry& entry) { + if (file_filter_ != nullptr) { + files_ = dwarf::Files(entry); + } + ProcessAllChildren(entry); + } + + void ProcessNamespace(Entry& entry) { + auto name = GetNameOrEmpty(entry); + const PushScopeName push_scope_name(scope_, "namespace", name); ProcessAllChildren(entry); } @@ -427,21 +446,37 @@ class Processor { pointee_type_id); } + void ProcessUnspecifiedType(Entry& entry) { + const std::string type_name = GetName(entry); + Check(type_name == "decltype(nullptr)") + << "Unsupported DW_TAG_unspecified_type: " << type_name; + AddProcessedNode<Special>(entry, Special::Kind::NULLPTR); + } + + bool ShouldKeepDefinition(Entry& entry, const std::string& name) const { + if (file_filter_ == nullptr) { + return true; + } + const auto file = files_.MaybeGetFile(entry, DW_AT_decl_file); + if (!file) { + // Built in types that do not have DW_AT_decl_file should be preserved. + static constexpr std::string_view kBuiltinPrefix = "__"; + // TODO: use std::string_view::starts_with + if (name.substr(0, kBuiltinPrefix.size()) == kBuiltinPrefix) { + return true; + } + Die() << "File filter is provided, but DWARF entry << " + << EntryToString(entry) << " << doesn't have DW_AT_decl_file"; + } + return (*file_filter_)(*file); + } + void ProcessStructUnion(Entry& entry, StructUnion::Kind kind) { std::string name = GetNameOrEmpty(entry); const std::string full_name = name.empty() ? std::string() : scope_ + name; const PushScopeName push_scope_name(scope_, kind, name); - if (entry.GetFlag(DW_AT_declaration)) { - // It is expected to have only name and no children in declaration. - // However, it is not guaranteed and we should do something if we find an - // example. - CheckNoChildren(entry); - AddProcessedNode<StructUnion>(entry, kind, full_name); - return; - } - - auto byte_size = GetByteSize(entry); + std::vector<Id> base_classes; std::vector<Id> members; std::vector<Id> methods; @@ -450,15 +485,21 @@ class Processor { // All possible children of struct/class/union switch (child_tag) { case DW_TAG_member: - members.push_back(GetIdForEntry(child)); - ProcessMember(child); + if (child.GetFlag(DW_AT_external)) { + // static members are interpreted as variables and not included in + // members. + ProcessVariable(child); + } else { + members.push_back(GetIdForEntry(child)); + ProcessMember(child); + } break; case DW_TAG_subprogram: - methods.push_back(GetIdForEntry(child)); - ProcessMethod(child); + ProcessMethod(methods, child); break; case DW_TAG_inheritance: - // TODO: process base classes + base_classes.push_back(GetIdForEntry(child)); + ProcessBaseClass(child); break; case DW_TAG_structure_type: case DW_TAG_class_type: @@ -470,19 +511,30 @@ class Processor { break; case DW_TAG_template_type_parameter: case DW_TAG_template_value_parameter: + case DW_TAG_GNU_template_template_param: + case DW_TAG_GNU_template_parameter_pack: // We just skip these as neither GCC nor Clang seem to use them // properly (resulting in no references to such DIEs). break; default: - Die() << "Unexpected tag for child of struct/class/union: 0x" - << std::hex << child_tag; + Die() << "Unexpected tag for child of struct/class/union: " + << Hex(child_tag); } } - // TODO: support base classes + if (entry.GetFlag(DW_AT_declaration) || + !ShouldKeepDefinition(entry, name)) { + // Declaration may have partial information about members or method. + // We only need to parse children for information that will be needed in + // complete definition, but don't need to store them in incomplete node. + AddProcessedNode<StructUnion>(entry, kind, full_name); + return; + } + + const auto byte_size = GetByteSize(entry); + const Id id = AddProcessedNode<StructUnion>( - entry, kind, full_name, byte_size, - /* base_classes = */ std::vector<Id>{}, + entry, kind, full_name, byte_size, std::move(base_classes), std::move(methods), std::move(members)); if (!full_name.empty()) { AddNamedTypeNode(id); @@ -504,7 +556,7 @@ class Processor { GetDataBitOffset(entry, bit_size, is_little_endian_binary_), bit_size); } - void ProcessMethod(Entry& entry) { + void ProcessMethod(std::vector<Id>& methods, Entry& entry) { Subprogram subprogram = GetSubprogram(entry); auto id = graph_.Add<Function>(std::move(subprogram.node)); if (subprogram.external && subprogram.address) { @@ -518,22 +570,47 @@ class Processor { .address = *subprogram.address, .id = id}); } + const auto virtuality = entry.MaybeGetUnsignedConstant(DW_AT_virtuality) + .value_or(DW_VIRTUALITY_none); + if (virtuality == DW_VIRTUALITY_virtual || + virtuality == DW_VIRTUALITY_pure_virtual) { + if (!subprogram.name_with_context.unscoped_name) { + Die() << "Method " << EntryToString(entry) << " should have name"; + } + if (subprogram.name_with_context.specification) { + Die() << "Method " << EntryToString(entry) + << " shouldn't have specification"; + } + const auto vtable_offset = entry.MaybeGetVtableOffset(); + if (!vtable_offset) { + Die() << "Virtual method " << EntryToString(entry) + << " should have offset"; + } + // TODO: proper handling of missing linkage name + methods.push_back(AddProcessedNode<Method>( + entry, subprogram.linkage_name.value_or("{missing}"), + *subprogram.name_with_context.unscoped_name, *vtable_offset, id)); + } + } - // TODO: support kind - const Method::Kind kind = Method::Kind::NON_VIRTUAL; - // TODO: support vtable_offset - if (!subprogram.name_with_context.unscoped_name) { - Die() << "Method " << EntryToString(entry) << " should have name"; + void ProcessBaseClass(Entry& entry) { + const auto type_id = GetIdForReferredType(GetReferredType(entry)); + const auto byte_offset = entry.MaybeGetMemberByteOffset(); + if (!byte_offset) { + Die() << "No offset found for base class " << EntryToString(entry); } - if (subprogram.name_with_context.specification) { - Die() << "Method " << EntryToString(entry) - << " shouldn't have specification"; + const auto bit_offset = *byte_offset * 8; + const auto virtuality = entry.MaybeGetUnsignedConstant(DW_AT_virtuality) + .value_or(DW_VIRTUALITY_none); + BaseClass::Inheritance inheritance; + if (virtuality == DW_VIRTUALITY_none) { + inheritance = BaseClass::Inheritance::NON_VIRTUAL; + } else if (virtuality == DW_VIRTUALITY_virtual) { + inheritance = BaseClass::Inheritance::VIRTUAL; + } else { + Die() << "Unexpected base class virtuality: " << virtuality; } - // TODO: proper handling of missing linkage name - AddProcessedNode<Method>(entry, - subprogram.linkage_name.value_or("{missing}"), - *subprogram.name_with_context.unscoped_name, kind, - /* vtable_offset = */ std::nullopt, id); + AddProcessedNode<BaseClass>(entry, type_id, bit_offset, inheritance); } void ProcessArray(Entry& entry) { @@ -562,7 +639,10 @@ class Processor { } void ProcessEnum(Entry& entry) { - std::string name = GetNameOrEmpty(entry); + const std::optional<std::string> name_optional = MaybeGetName(entry); + const std::string name = + name_optional.has_value() ? scope_ + *name_optional : ""; + if (entry.GetFlag(DW_AT_declaration)) { // It is expected to have only name and no children in declaration. // However, it is not guaranteed and we should do something if we find an @@ -589,6 +669,10 @@ class Processor { enumerators.emplace_back(enumerator_name, static_cast<int64_t>(*value_optional)); } + if (!ShouldKeepDefinition(entry, name)) { + AddProcessedNode<Enumeration>(entry, name); + return; + } const Id id = AddProcessedNode<Enumeration>(entry, name, underlying_type_id, std::move(enumerators)); if (!name.empty()) { @@ -676,24 +760,16 @@ class Processor { } void ProcessVariable(Entry& entry) { - // Skip: - // * anonymous variables (for example, anonymous union) - // * variables not visible outside of its enclosing compilation unit - if (!entry.GetFlag(DW_AT_external)) { - return; - } - std::optional<std::string> name_optional = MaybeGetName(entry); - if (!name_optional) { - return; - } + auto name_with_context = GetNameWithContext(entry); auto referred_type = GetReferredType(entry); auto referred_type_id = GetIdForEntry(referred_type); if (auto address = entry.MaybeGetAddress(DW_AT_location)) { // Only external variables with address are useful for ABI monitoring + const auto new_symbol_idx = result_.symbols.size(); result_.symbols.push_back(Types::Symbol{ - .name = *name_optional, + .name = GetScopedNameForSymbol(new_symbol_idx, name_with_context), .linkage_name = entry.MaybeGetString(DW_AT_linkage_name), .address = *address, .id = referred_type_id}); @@ -719,7 +795,7 @@ class Processor { Function node; NameWithContext name_with_context; std::optional<std::string> linkage_name; - std::optional<size_t> address; + std::optional<Address> address; bool external; }; @@ -729,35 +805,51 @@ class Processor { std::vector<Id> parameters; for (auto& child : entry.GetChildren()) { auto child_tag = child.GetTag(); - if (child_tag == DW_TAG_formal_parameter) { - auto child_type = GetReferredType(child); - auto child_type_id = GetIdForEntry(child_type); - parameters.push_back(child_type_id); - } else if (child_tag == DW_TAG_unspecified_parameters) { - // Note: C++ allows a single ... argument specification but C does not. - // However, "extern int foo();" (note lack of "void" in parameters) in C - // will produce the same DWARF as "extern int foo(...);" in C++. - CheckNoChildren(child); - parameters.push_back(variadic_id_); - } else if (child_tag == DW_TAG_enumeration_type || - child_tag == DW_TAG_label || - child_tag == DW_TAG_lexical_block || - child_tag == DW_TAG_structure_type || - child_tag == DW_TAG_class_type || - child_tag == DW_TAG_union_type || - child_tag == DW_TAG_typedef || - child_tag == DW_TAG_inlined_subroutine || - child_tag == DW_TAG_variable || - child_tag == DW_TAG_call_site || - child_tag == DW_TAG_GNU_call_site) { - Process(child); - } else if (child_tag == DW_TAG_template_type_parameter || - child_tag == DW_TAG_template_value_parameter) { - // We just skip these as neither GCC nor Clang seem to use them properly - // (resulting in no references to such DIEs). - } else { - Die() << "Unexpected tag for child of function: " << child_tag << ", " - << EntryToString(child); + switch (child_tag) { + case DW_TAG_formal_parameter: + parameters.push_back(GetIdForReferredType(GetReferredType(child))); + break; + case DW_TAG_unspecified_parameters: + // Note: C++ allows a single ... argument specification but C does + // not. However, "extern int foo();" (note lack of "void" in + // parameters) in C will produce the same DWARF as "extern int + // foo(...);" in C++. + CheckNoChildren(child); + parameters.push_back(variadic_id_); + break; + case DW_TAG_enumeration_type: + case DW_TAG_label: + case DW_TAG_lexical_block: + case DW_TAG_structure_type: + case DW_TAG_class_type: + case DW_TAG_union_type: + case DW_TAG_typedef: + case DW_TAG_inlined_subroutine: + case DW_TAG_variable: + case DW_TAG_call_site: + case DW_TAG_GNU_call_site: + // TODO: Do not leak local types outside this scope. + // TODO: It would be better to not process any + // information that is function local but there is a dangling + // reference Clang bug. + Process(child); + break; + case DW_TAG_imported_declaration: + case DW_TAG_imported_module: + // For now information there is useless for ABI monitoring, but we + // need to check that there is no missing information in descendants. + CheckNoChildren(child); + break; + case DW_TAG_template_type_parameter: + case DW_TAG_template_value_parameter: + case DW_TAG_GNU_template_template_param: + case DW_TAG_GNU_template_parameter_pack: + // We just skip these as neither GCC nor Clang seem to use them + // properly (resulting in no references to such DIEs). + break; + default: + Die() << "Unexpected tag for child of function: " << child_tag << ", " + << EntryToString(child); } } @@ -784,6 +876,11 @@ class Processor { return referred_type ? GetIdForEntry(*referred_type) : void_id_; } + // Wrapper for GetIdForEntry to allow lvalues. + Id GetIdForReferredType(Entry referred_type) { + return GetIdForEntry(referred_type); + } + // Populate Id from method above with processed Node. template <typename Node, typename... Args> Id AddProcessedNode(Entry& entry, Args&&... args) { @@ -800,21 +897,23 @@ class Processor { Id void_id_; Id variadic_id_; bool is_little_endian_binary_; + const std::unique_ptr<Filter>& file_filter_; Types& result_; std::unordered_map<Dwarf_Off, Id> id_map_; // Current scope. Scope scope_; std::vector<std::pair<Dwarf_Off, std::string>> scoped_names_; std::vector<std::pair<Dwarf_Off, size_t>> unresolved_symbol_specifications_; + dwarf::Files files_; }; Types ProcessEntries(std::vector<Entry> entries, bool is_little_endian_binary, - Graph& graph) { + const std::unique_ptr<Filter>& file_filter, Graph& graph) { Types result; - Id void_id = graph.Add<Void>(); - Id variadic_id = graph.Add<Variadic>(); + const Id void_id = graph.Add<Special>(Special::Kind::VOID); + const Id variadic_id = graph.Add<Special>(Special::Kind::VARIADIC); Processor processor(graph, void_id, variadic_id, is_little_endian_binary, - result); + file_filter, result); for (auto& entry : entries) { processor.Process(entry); } @@ -824,9 +923,10 @@ Types ProcessEntries(std::vector<Entry> entries, bool is_little_endian_binary, return result; } -Types Process(Handler& dwarf, bool is_little_endian_binary, Graph& graph) { +Types Process(Handler& dwarf, bool is_little_endian_binary, + const std::unique_ptr<Filter>& file_filter, Graph& graph) { return ProcessEntries(dwarf.GetCompilationUnits(), is_little_endian_binary, - graph); + file_filter, graph); } } // namespace dwarf diff --git a/dwarf_processor.h b/dwarf_processor.h index 7e398ae..7dc0ee5 100644 --- a/dwarf_processor.h +++ b/dwarf_processor.h @@ -26,6 +26,7 @@ #include <vector> #include "dwarf_wrappers.h" +#include "filter.h" #include "graph.h" namespace stg { @@ -35,7 +36,7 @@ struct Types { struct Symbol { std::string name; std::optional<std::string> linkage_name; - size_t address; + Address address; Id id; }; @@ -47,11 +48,12 @@ struct Types { // Process every compilation unit from DWARF and returns processed STG along // with information needed for matching to ELF symbols. -Types Process(Handler& dwarf, bool is_little_endian_binary, Graph& graph); +Types Process(Handler& dwarf, bool is_little_endian_binary, + const std::unique_ptr<Filter>& file_filter, Graph& graph); // Process every entry passed there. It should be used only for testing. Types ProcessEntries(std::vector<Entry> entries, bool is_little_endian_binary, - Graph& graph); + const std::unique_ptr<Filter>& file_filter, Graph& graph); } // namespace dwarf } // namespace stg diff --git a/dwarf_wrappers.cc b/dwarf_wrappers.cc index ed0c22a..fc08593 100644 --- a/dwarf_wrappers.cc +++ b/dwarf_wrappers.cc @@ -30,6 +30,7 @@ #include <ios> #include <memory> #include <optional> +#include <ostream> #include <string> #include <utility> #include <vector> @@ -39,6 +40,10 @@ namespace stg { namespace dwarf { +std::ostream& operator<<(std::ostream& os, const Address& address) { + return os << Hex(address.value) << (address.is_tls ? " (TLS)" : ""); +} + namespace { static const Dwfl_Callbacks kDwflCallbacks = { @@ -89,6 +94,51 @@ void CheckOrDwflError(bool condition, const char* caller) { } } +std::optional<uint64_t> MaybeGetUnsignedOperand(const Dwarf_Op& operand) { + switch (operand.atom) { + case DW_OP_addr: + case DW_OP_const1u: + case DW_OP_const2u: + case DW_OP_const4u: + case DW_OP_const8u: + case DW_OP_constu: + return operand.number; + case DW_OP_const1s: + case DW_OP_const2s: + case DW_OP_const4s: + case DW_OP_const8s: + case DW_OP_consts: + if (static_cast<int64_t>(operand.number) < 0) { + // Atom is not an unsigned constant + return std::nullopt; + } + return operand.number; + case DW_OP_lit0...DW_OP_lit31: + return operand.atom - DW_OP_lit0; + default: + return std::nullopt; + } +} + +struct Expression { + const Dwarf_Op& operator[](size_t i) const { + return atoms[i]; + } + + Dwarf_Op* atoms = nullptr; + size_t length = 0; +}; + +Expression GetExpression(Dwarf_Attribute& attribute) { + Expression result; + + Check(dwarf_getlocation(&attribute, &result.atoms, &result.length) == + kReturnOk) << "dwarf_getlocation returned error"; + Check(result.atoms != nullptr && result.length > 0) + << "dwarf_getlocation returned empty expression"; + return result; +} + } // namespace Handler::Handler(const std::string& path) : dwfl_(dwfl_begin(&kDwflCallbacks)) { @@ -211,8 +261,9 @@ std::optional<uint64_t> Entry::MaybeGetUnsignedConstant(uint32_t attribute) { } uint64_t value; - Check(dwarf_formudata(&dwarf_attribute.value(), &value) == kReturnOk) - << "dwarf_formudata returned error"; + if (dwarf_formudata(&dwarf_attribute.value(), &value) != kReturnOk) { + Die() << "dwarf_formudata returned error"; + } return value; } @@ -245,27 +296,33 @@ std::optional<Entry> Entry::MaybeGetReference(uint32_t attribute) { namespace { -std::optional<uint64_t> GetAddressFromLocation(Dwarf_Attribute& attribute) { - Dwarf_Op* expr = nullptr; - size_t expr_len = 0; - - Check(dwarf_getlocation(&attribute, &expr, &expr_len) == kReturnOk) - << "dwarf_getlocation returned error"; - Check(expr != nullptr && expr_len > 0) - << "dwarf_getlocation returned empty expression"; +std::optional<Address> GetAddressFromLocation(Dwarf_Attribute& attribute) { + const auto expression = GetExpression(attribute); Dwarf_Attribute result_attribute; - if (dwarf_getlocation_attr(&attribute, expr, &result_attribute) == + if (dwarf_getlocation_attr(&attribute, expression.atoms, &result_attribute) == kReturnOk) { - uint64_t addr; - Check(dwarf_formaddr(&result_attribute, &addr) == kReturnOk) + uint64_t address; + Check(dwarf_formaddr(&result_attribute, &address) == kReturnOk) << "dwarf_formaddr returned error"; - return addr; + return Address{.value = address, .is_tls = false}; } - if (expr_len == 1 && expr->atom == DW_OP_addr) { + if (expression.length == 1 && expression[0].atom == DW_OP_addr) { // DW_OP_addr is unsupported by dwarf_getlocation_attr, so we need to // manually extract the address from expression. - return expr->number; + return Address{.value = expression[0].number, .is_tls = false}; + } + // TLS operation has different encodings in Clang and GCC: + // * Clang 14 uses DW_OP_GNU_push_tls_address + // * GCC 12 uses DW_OP_form_tls_address + if (expression.length == 2 && + (expression[1].atom == DW_OP_GNU_push_tls_address || + expression[1].atom == DW_OP_form_tls_address)) { + // TLS symbols address may be incorrect because of unsupported + // relocations. Resetting it to zero the same way as it is done in + // elf::Reader::MaybeAddTypeInfo. + // TODO: match TLS variables by address + return Address{.value = 0, .is_tls = true}; } Die() << "Unsupported data location expression"; @@ -273,7 +330,7 @@ std::optional<uint64_t> GetAddressFromLocation(Dwarf_Attribute& attribute) { } // namespace -std::optional<uint64_t> Entry::MaybeGetAddress(uint32_t attribute) { +std::optional<Address> Entry::MaybeGetAddress(uint32_t attribute) { auto dwarf_attribute = GetAttribute(&die, attribute); if (!dwarf_attribute) { return {}; @@ -282,10 +339,11 @@ std::optional<uint64_t> Entry::MaybeGetAddress(uint32_t attribute) { return GetAddressFromLocation(*dwarf_attribute); } - uint64_t addr; - Check(dwarf_formaddr(&dwarf_attribute.value(), &addr) == kReturnOk) + Address address; + Check(dwarf_formaddr(&dwarf_attribute.value(), &address.value) == kReturnOk) << "dwarf_formaddr returned error"; - return addr; + address.is_tls = false; + return address; } std::optional<uint64_t> Entry::MaybeGetMemberByteOffset() { @@ -300,8 +358,93 @@ std::optional<uint64_t> Entry::MaybeGetMemberByteOffset() { return offset; } - // TODO: support location expressions - Die() << "dwarf_formudata returned error, " << std::hex << GetOffset(); + // Parse location expression + const auto expression = GetExpression(attribute.value()); + + // Parse virtual base classes offset, which looks like this: + // [0] = DW_OP_dup + // [1] = DW_OP_deref + // [2] = constant operand + // [3] = DW_OP_minus + // [4] = DW_OP_deref + // [5] = DW_OP_plus + // This form is not in the standard, but hardcoded in compilers: + // * https://github.com/llvm/llvm-project/blob/release/17.x/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp#L1611 + // * https://github.com/gcc-mirror/gcc/blob/releases/gcc-13/gcc/dwarf2out.cc#L20029 + if (expression.length == 6 && + expression[0].atom == DW_OP_dup && + expression[1].atom == DW_OP_deref && + expression[3].atom == DW_OP_minus && + expression[4].atom == DW_OP_deref && + expression[5].atom == DW_OP_plus) { + const auto byte_offset = MaybeGetUnsignedOperand(expression[2]); + if (byte_offset) { + return byte_offset; + } + } + + Die() << "Unsupported member offset expression, " << Hex(GetOffset()); +} + +std::optional<uint64_t> Entry::MaybeGetVtableOffset() { + auto attribute = GetAttribute(&die, DW_AT_vtable_elem_location); + if (!attribute) { + return {}; + } + + // Parse location expression + const Expression expression = GetExpression(attribute.value()); + + // We expect compilers to produce expression with one constant operand + if (expression.length == 1) { + const auto offset = MaybeGetUnsignedOperand(expression[0]); + if (offset) { + return offset; + } + } + + Die() << "Unsupported vtable offset expression, " << Hex(GetOffset()); +} + +std::optional<uint64_t> Entry::MaybeGetCount() { + auto dwarf_attribute = GetAttribute(&die, DW_AT_count); + if (!dwarf_attribute) { + return {}; + } + + uint64_t value; + if (dwarf_formudata(&dwarf_attribute.value(), &value) != kReturnOk) { + // All errors are interpreted as "value of DW_AT_count is not a constant" + // and ignored. There is no stable API in libdw for checking the exact error + // reason. + // TODO: implement clean solution for separating "not a + // constant" errors from other errors. + return {}; + } + return value; +} + +Files::Files(Entry& compilation_unit) { + if (dwarf_getsrcfiles(&compilation_unit.die, &files_, &files_count_) != + kReturnOk) { + Die() << "No source file information in DWARF"; + } +} + +std::optional<std::string> Files::MaybeGetFile(Entry& entry, + uint32_t attribute) const { + auto file_index = entry.MaybeGetUnsignedConstant(attribute); + if (!file_index) { + return std::nullopt; + } + Check(files_ != nullptr) << "dwarf::Files was not initialised"; + if (*file_index >= files_count_) { + Die() << "File index is greater than or equal files count (" << *file_index + << " >= " << files_count_ << ")"; + } + const char* result = dwarf_filesrc(files_, *file_index, nullptr, nullptr); + Check(result != nullptr) << "dwarf_filesrc returned error"; + return result; } } // namespace dwarf diff --git a/dwarf_wrappers.h b/dwarf_wrappers.h index 89f7705..90ae21d 100644 --- a/dwarf_wrappers.h +++ b/dwarf_wrappers.h @@ -28,12 +28,30 @@ #include <cstdint> #include <memory> #include <optional> +#include <ostream> #include <string> +#include <tuple> #include <vector> namespace stg { namespace dwarf { +struct Address { + // TODO: use auto operator<=> + bool operator<(const Address& other) const { + return std::tie(value, is_tls) < std::tie(other.value, other.is_tls); + } + + bool operator==(const Address& other) const { + return value == other.value && is_tls == other.is_tls; + } + + uint64_t value; + bool is_tls; +}; + +std::ostream& operator<<(std::ostream& os, const Address& address); + // C++ wrapper over Dwarf_Die, providing interface for its various properties. struct Entry { // All methods in libdw take Dwarf_Die by non-const pointer as libdw caches @@ -61,8 +79,12 @@ struct Entry { std::optional<uint64_t> MaybeGetUnsignedConstant(uint32_t attribute); bool GetFlag(uint32_t attribute); std::optional<Entry> MaybeGetReference(uint32_t attribute); - std::optional<uint64_t> MaybeGetAddress(uint32_t attribute); + std::optional<Address> MaybeGetAddress(uint32_t attribute); std::optional<uint64_t> MaybeGetMemberByteOffset(); + std::optional<uint64_t> MaybeGetVtableOffset(); + // Returns value of DW_AT_count if it is constant or nullptr if it is not + // defined or cannot be represented as constant. + std::optional<uint64_t> MaybeGetCount(); }; // C++ wrapper over libdw (DWARF library). @@ -92,6 +114,18 @@ class Handler { Dwarf* dwarf_ = nullptr; }; +class Files { + public: + Files() = default; + explicit Files(Entry& compilation_unit); + std::optional<std::string> MaybeGetFile(Entry& entry, + uint32_t attribute) const; + + private: + Dwarf_Files* files_ = nullptr; + size_t files_count_ = 0; +}; + } // namespace dwarf } // namespace stg diff --git a/elf_loader.cc b/elf_loader.cc index 0163304..5b9a864 100644 --- a/elf_loader.cc +++ b/elf_loader.cc @@ -29,6 +29,7 @@ #include <cstring> #include <functional> #include <iostream> +#include <limits> #include <ostream> #include <string> #include <string_view> @@ -143,6 +144,28 @@ std::string ElfSectionTypeToString(Elf64_Word elf_section_type) { } } +GElf_Half GetMachine(Elf* elf) { + GElf_Ehdr header; + Check(gelf_getehdr(elf, &header) != nullptr) << "could not get ELF header"; + return header.e_machine; +} + +void AdjustAddress(GElf_Half machine, SymbolTableEntry& entry) { + if (machine == EM_ARM) { + if (entry.symbol_type == SymbolTableEntry::SymbolType::FUNCTION + || entry.symbol_type == SymbolTableEntry::SymbolType::GNU_IFUNC) { + // Clear bit zero of ARM32 addresses as per "ELF for the Arm Architecture" + // section 5.5.3. https://static.docs.arm.com/ihi0044/g/aaelf32.pdf + entry.value &= ~1; + } + } else if (machine == EM_AARCH64) { + // Copy bit 55 over bits 56 to 63 which may be tag information. + entry.value = entry.value & (1ULL << 55) + ? entry.value | (0xffULL << 56) + : entry.value & ~(0xffULL << 56); + } +} + std::vector<Elf_Scn*> GetSectionsIf( Elf* elf, const std::function<bool(const GElf_Shdr&)>& predicate) { std::vector<Elf_Scn*> result; @@ -260,6 +283,68 @@ Elf_Scn* GetSymbolTableSection(Elf* elf, bool is_linux_kernel_binary, << "'"; } + +constexpr std::string_view kCFISuffix = ".cfi"; + +bool IsCFISymbolName(std::string_view name) { + // Check if symbol name ends with ".cfi" + // TODO: use std::string_view::ends_with + return (name.size() >= kCFISuffix.size() && + name.substr(name.size() - kCFISuffix.size()) == kCFISuffix); +} + +} // namespace + +std::string_view UnwrapCFISymbolName(std::string_view cfi_name) { + Check(IsCFISymbolName(cfi_name)) + << "CFI symbol " << cfi_name << " doesn't end with .cfi"; + return cfi_name.substr(0, cfi_name.size() - kCFISuffix.size()); +} + +namespace { + +std::vector<SymbolTableEntry> GetSymbols( + Elf* elf, Elf_Scn* symbol_table_section, bool cfi) { + const auto machine = GetMachine(elf); + const auto [symbol_table_header, symbol_table_data] = + GetSectionInfo(symbol_table_section); + const size_t number_of_symbols = GetNumberOfEntries(symbol_table_header); + + std::vector<SymbolTableEntry> result; + result.reserve(number_of_symbols); + + // GElf uses int for indexes in symbol table, prevent int overflow. + Check(number_of_symbols <= std::numeric_limits<int>::max()) + << "number of symbols exceeds INT_MAX"; + for (size_t i = 0; i < number_of_symbols; ++i) { + GElf_Sym symbol; + Check(gelf_getsym(symbol_table_data, static_cast<int>(i), &symbol) != + nullptr) + << "symbol (i = " << i << ") was not found"; + + const auto name = + GetString(elf, symbol_table_header.sh_link, symbol.st_name); + if (cfi != IsCFISymbolName(name)) { + continue; + } + SymbolTableEntry entry{ + .name = name, + .value = symbol.st_value, + .size = symbol.st_size, + .symbol_type = ParseSymbolType(GELF_ST_TYPE(symbol.st_info)), + .binding = ParseSymbolBinding(GELF_ST_BIND(symbol.st_info)), + .visibility = + ParseSymbolVisibility(GELF_ST_VISIBILITY(symbol.st_other)), + .section_index = symbol.st_shndx, + .value_type = ParseSymbolValueType(symbol.st_shndx), + }; + AdjustAddress(machine, entry); + result.push_back(entry); + } + + return result; +} + bool IsLinuxKernelBinary(Elf* elf) { // The Linux kernel itself has many specific sections that are sufficient to // classify a binary as kernel binary if present, `__ksymtab_strings` is one @@ -367,34 +452,20 @@ std::vector<SymbolTableEntry> ElfLoader::GetElfSymbols() const { Check(symbol_table_section != nullptr) << "failed to find symbol table section"; - const auto [symbol_table_header, symbol_table_data] = - GetSectionInfo(symbol_table_section); - const size_t number_of_symbols = GetNumberOfEntries(symbol_table_header); - - std::vector<SymbolTableEntry> result; - result.reserve(number_of_symbols); - - for (size_t i = 0; i < number_of_symbols; ++i) { - GElf_Sym symbol; - Check(gelf_getsym(symbol_table_data, i, &symbol) != nullptr) - << "symbol (i = " << i << ") was not found"; + return GetSymbols(elf_, symbol_table_section, /* cfi = */ false); +} - const auto name = - GetString(elf_, symbol_table_header.sh_link, symbol.st_name); - result.push_back(SymbolTableEntry{ - .name = name, - .value = symbol.st_value, - .size = symbol.st_size, - .symbol_type = ParseSymbolType(GELF_ST_TYPE(symbol.st_info)), - .binding = ParseSymbolBinding(GELF_ST_BIND(symbol.st_info)), - .visibility = - ParseSymbolVisibility(GELF_ST_VISIBILITY(symbol.st_other)), - .section_index = symbol.st_shndx, - .value_type = ParseSymbolValueType(symbol.st_shndx), - }); +std::vector<SymbolTableEntry> ElfLoader::GetCFISymbols() const { + // CFI symbols may be only in .symtab + Elf_Scn* symbol_table_section = MaybeGetSectionByType(elf_, SHT_SYMTAB); + if (symbol_table_section == nullptr) { + // It is possible for ET_DYN and ET_EXEC ELF binaries to not have .symtab, + // because it was trimmed away. We can't determine whether there were CFI + // symbols in the first place, so the best we can do is returning an empty + // list. + return {}; } - - return result; + return GetSymbols(elf_, symbol_table_section, /* cfi = */ true); } ElfSymbol::CRC ElfLoader::GetElfSymbolCRC( diff --git a/elf_loader.h b/elf_loader.h index 14faf6e..2679155 100644 --- a/elf_loader.h +++ b/elf_loader.h @@ -71,12 +71,15 @@ std::ostream& operator<<(std::ostream& os, SymbolTableEntry::SymbolType type); std::ostream& operator<<(std::ostream& os, SymbolTableEntry::ValueType type); +std::string_view UnwrapCFISymbolName(std::string_view cfi_name); + class ElfLoader final { public: explicit ElfLoader(Elf* elf, bool verbose = false); std::string_view GetBtfRawData() const; std::vector<SymbolTableEntry> GetElfSymbols() const; + std::vector<SymbolTableEntry> GetCFISymbols() const; ElfSymbol::CRC GetElfSymbolCRC(const SymbolTableEntry& symbol) const; std::string_view GetElfSymbolNamespace(const SymbolTableEntry& symbol) const; size_t GetAbsoluteAddress(const SymbolTableEntry& symbol) const; diff --git a/elf_reader.cc b/elf_reader.cc index d87a7f9..404474f 100644 --- a/elf_reader.cc +++ b/elf_reader.cc @@ -25,6 +25,7 @@ #include <ios> #include <iostream> #include <map> +#include <memory> #include <optional> #include <string> #include <string_view> @@ -35,6 +36,7 @@ #include "dwarf_wrappers.h" #include "elf_loader.h" #include "error.h" +#include "filter.h" #include "graph.h" #include "metrics.h" #include "reader_options.h" @@ -134,6 +136,19 @@ NamespacesMap GetNamespacesMap(const SymbolTable& symbols, return namespaces; } +AddressMap GetCFIAddressMap(const SymbolTable& symbols, const ElfLoader& elf) { + AddressMap name_to_address; + for (const auto& symbol : symbols) { + const std::string_view name_prefix = UnwrapCFISymbolName(symbol.name); + const size_t address = elf.GetAbsoluteAddress(symbol); + if (!name_to_address.emplace(name_prefix, address).second) { + Die() << "Multiple CFI symbols referring to symbol '" << name_prefix + << '\''; + } + } + return name_to_address; +} + bool IsPublicFunctionOrVariable(const SymbolTableEntry& symbol) { const auto symbol_type = symbol.symbol_type; // Reject symbols that are not functions or variables. @@ -180,34 +195,48 @@ namespace { class Reader { public: Reader(Graph& graph, const std::string& path, ReadOptions options, - Metrics& metrics) + const std::unique_ptr<Filter>& file_filter, Metrics& metrics) : graph_(graph), dwarf_(path), elf_(dwarf_.GetElf(), options.Test(ReadOptions::INFO)), options_(options), + file_filter_(file_filter), metrics_(metrics) {} Reader(Graph& graph, char* data, size_t size, ReadOptions options, - Metrics& metrics) + const std::unique_ptr<Filter>& file_filter, Metrics& metrics) : graph_(graph), dwarf_(data, size), elf_(dwarf_.GetElf(), options.Test(ReadOptions::INFO)), options_(options), + file_filter_(file_filter), metrics_(metrics) {} Id Read(); private: - using SymbolIndex = std::map<std::pair<size_t, std::string>, size_t>; + using SymbolIndex = + std::map<std::pair<dwarf::Address, std::string>, std::vector<size_t>>; Id BuildRoot(const std::vector<std::pair<ElfSymbol, size_t>>& symbols) { + // On destruction, the unification object will remove or rewrite each graph + // node for which it has a mapping. + // + // Graph rewriting is expensive so an important optimisation is to restrict + // the nodes in consideration to the ones allocated by the DWARF processor + // here and any symbol or type roots that follow. This is done by setting + // the starting node ID to be the current graph limit. + Unification unification(graph_, graph_.Limit(), metrics_); + dwarf::Types types; if (!options_.Test(ReadOptions::SKIP_DWARF)) { - types = dwarf::Process(dwarf_, elf_.IsLittleEndianBinary(), graph_); + types = dwarf::Process(dwarf_, elf_.IsLittleEndianBinary(), file_filter_, + graph_); } - // Unification rewrites the graph on destruction. - Unification unification(graph_, metrics_); + // A less important optimisation is avoiding copying the mapping array as it + // is populated. This is done by reserving space to the new graph limit. + unification.Reserve(graph_.Limit()); // fill address to id // @@ -225,18 +254,9 @@ class Reader { for (size_t i = 0; i < types.symbols.size(); ++i) { const auto& symbol = types.symbols[i]; - // TODO: support linkage_name to support C++ - auto [it, emplaced] = address_name_to_index.emplace( - std::make_pair(symbol.address, symbol.name), i); - if (!emplaced) { - const auto& other = types.symbols[it->second]; - // TODO: allow "compatible" duplicates, for example - // "void foo(int bar)" vs "void foo(const int bar)" - if (!IsEqual(unification, symbol, other)) { - Die() << "Duplicate DWARF symbol: address=" << Hex(symbol.address) - << ", name=" << symbol.name; - } - } + const auto& name = + symbol.linkage_name.has_value() ? *symbol.linkage_name : symbol.name; + address_name_to_index[std::make_pair(symbol.address, name)].push_back(i); } std::map<std::string, Id> symbols_map; @@ -244,7 +264,8 @@ class Reader { // TODO: add VersionInfoToString to SymbolKey name // TODO: check for uniqueness of SymbolKey in map after // support for version info - MaybeAddTypeInfo(address_name_to_index, types.symbols, address, symbol); + MaybeAddTypeInfo(address_name_to_index, types.symbols, address, symbol, + unification); symbols_map.emplace(VersionedSymbolName(symbol), graph_.Add<ElfSymbol>(symbol)); } @@ -307,11 +328,20 @@ class Reader { static void MaybeAddTypeInfo( const SymbolIndex& address_name_to_index, const std::vector<dwarf::Types::Symbol>& dwarf_symbols, - const size_t address, ElfSymbol& node) { + size_t address_value, ElfSymbol& node, Unification& unification) { + const bool is_tls = node.symbol_type == ElfSymbol::SymbolType::TLS; + if (is_tls) { + // TLS symbols address may be incorrect because of unsupported + // relocations. Resetting it to zero the same way as it is done in + // dwarf::Entry::GetAddressFromLocation. + // TODO: match TLS variables by address + address_value = 0; + } + const dwarf::Address address{.value = address_value, .is_tls = is_tls}; // try to find the first symbol with given address const auto start_it = address_name_to_index.lower_bound( std::make_pair(address, std::string())); - const dwarf::Types::Symbol* best_symbol = nullptr; + auto best_symbols_it = address_name_to_index.end(); bool matched_by_name = false; size_t candidates = 0; for (auto it = start_it; @@ -319,28 +349,45 @@ class Reader { ++it) { ++candidates; // We have at least matching addresses. - const auto& candidate = dwarf_symbols[it->second]; if (it->first.second == node.symbol_name) { // If we have also matching names we can stop looking further. matched_by_name = true; - best_symbol = &candidate; + best_symbols_it = it; break; } - if (best_symbol == nullptr) { + if (best_symbols_it == address_name_to_index.end()) { // Otherwise keep the first match. - best_symbol = &candidate; + best_symbols_it = it; } } - if (best_symbol != nullptr) { + if (best_symbols_it != address_name_to_index.end()) { + const auto& best_symbols = best_symbols_it->second; + Check(!best_symbols.empty()) << "best_symbols.empty()"; + const auto& best_symbol = dwarf_symbols[best_symbols[0]]; + for (size_t i = 1; i < best_symbols.size(); ++i) { + const auto& other = dwarf_symbols[best_symbols[i]]; + // TODO: allow "compatible" duplicates, for example + // "void foo(int bar)" vs "void foo(const int bar)" + if (!IsEqual(unification, best_symbol, other)) { + Die() << "Duplicate DWARF symbol: address=" + << best_symbol.address << ", name=" << best_symbol.name; + } + } + if (best_symbol.name.empty()) { + Die() << "DWARF symbol (address = " << best_symbol.address + << ", linkage_name = " + << best_symbol.linkage_name.value_or("{missing}") + << " should have a name"; + } // There may be multiple DWARF symbols with same address (zero-length // arrays), or ELF symbol has different name from DWARF symbol (aliases). // But if we have both situations at once, we can't match ELF to DWARF and // it should be fixed in analysed binary source code. Check(matched_by_name || candidates == 1) << "multiple candidates without matching names, best_symbol.name=" - << best_symbol->name; - node.type_id = best_symbol->id; - node.full_name = best_symbol->name; + << best_symbol.name; + node.type_id = best_symbol.id; + node.full_name = best_symbol.name; } } @@ -350,6 +397,7 @@ class Reader { dwarf::Handler dwarf_; elf::ElfLoader elf_; ReadOptions options_; + const std::unique_ptr<Filter>& file_filter_; Metrics& metrics_; }; @@ -370,6 +418,14 @@ Id Reader::Read() { namespaces = GetNamespacesMap(all_symbols, elf_); } + const auto cfi_address_map = GetCFIAddressMap(elf_.GetCFISymbols(), elf_); + if (options_.Test(ReadOptions::INFO) && !cfi_address_map.empty()) { + std::cout << "CFI symbols:\n"; + for (const auto& [name, address] : cfi_address_map) { + std::cout << " " << name << " -> " << Hex(address) << '\n'; + } + } + if (options_.Test(ReadOptions::INFO)) { std::cout << "Public functions and variables:\n"; } @@ -378,9 +434,12 @@ Id Reader::Read() { for (const auto& symbol : all_symbols) { if (IsPublicFunctionOrVariable(symbol) && (!is_linux_kernel || ksymtab_symbols.count(symbol.name))) { + const auto cfi_it = cfi_address_map.find(std::string(symbol.name)); + const size_t address = cfi_it != cfi_address_map.end() + ? cfi_it->second + : elf_.GetAbsoluteAddress(symbol); symbols.emplace_back( - SymbolTableEntryToElfSymbol(crc_values, namespaces, symbol), - elf_.GetAbsoluteAddress(symbol)); + SymbolTableEntryToElfSymbol(crc_values, namespaces, symbol), address); if (options_.Test(ReadOptions::INFO)) { std::cout << " " << symbol.binding << ' ' << symbol.symbol_type << " '" @@ -405,13 +464,14 @@ Id Reader::Read() { } // namespace internal Id Read(Graph& graph, const std::string& path, ReadOptions options, - Metrics& metrics) { - return internal::Reader(graph, path, options, metrics).Read(); + const std::unique_ptr<Filter>& file_filter, Metrics& metrics) { + return internal::Reader(graph, path, options, file_filter, metrics).Read(); } Id Read(Graph& graph, char* data, size_t size, ReadOptions options, - Metrics& metrics) { - return internal::Reader(graph, data, size, options, metrics).Read(); + const std::unique_ptr<Filter>& file_filter, Metrics& metrics) { + return internal::Reader(graph, data, size, options, file_filter, metrics) + .Read(); } } // namespace elf diff --git a/elf_reader.h b/elf_reader.h index d1f6680..12d5711 100644 --- a/elf_reader.h +++ b/elf_reader.h @@ -21,6 +21,7 @@ #define STG_ELF_READER_H_ #include <cstddef> +#include <memory> #include <string> #include <string_view> #include <unordered_map> @@ -28,6 +29,7 @@ #include <vector> #include "elf_loader.h" +#include "filter.h" #include "graph.h" #include "metrics.h" #include "reader_options.h" @@ -36,9 +38,9 @@ namespace stg { namespace elf { Id Read(Graph& graph, const std::string& path, ReadOptions options, - Metrics& metrics); + const std::unique_ptr<Filter>& file_filter, Metrics& metrics); Id Read(Graph& graph, char* data, size_t size, ReadOptions options, - Metrics& metrics); + const std::unique_ptr<Filter>& file_filter, Metrics& metrics); // For unit tests only namespace internal { @@ -47,6 +49,7 @@ using SymbolTable = std::vector<SymbolTableEntry>; using SymbolNameList = std::unordered_set<std::string_view>; using CRCValuesMap = std::unordered_map<std::string, ElfSymbol::CRC>; using NamespacesMap = std::unordered_map<std::string, std::string>; +using AddressMap = std::unordered_map<std::string, size_t>; ElfSymbol::SymbolType ConvertSymbolType( SymbolTableEntry::SymbolType symbol_type); @@ -54,6 +57,7 @@ SymbolNameList GetKsymtabSymbols(const SymbolTable& symbols); CRCValuesMap GetCRCValuesMap(const SymbolTable& symbols, const ElfLoader& elf); NamespacesMap GetNamespacesMap(const SymbolTable& symbols, const ElfLoader& elf); +AddressMap GetCFIAddressMap(const SymbolTable& symbols, const ElfLoader& elf); bool IsPublicFunctionOrVariable(const SymbolTableEntry& symbol); } // namespace internal diff --git a/elf_reader_test.cc b/elf_reader_test.cc index d550c26..58d69dc 100644 --- a/elf_reader_test.cc +++ b/elf_reader_test.cc @@ -17,17 +17,37 @@ // // Author: Aleksei Vetrov -#include "elf_reader.h" +#include <string_view> #include <catch2/catch.hpp> +#include "elf_loader.h" +#include "elf_reader.h" +#include "graph.h" namespace Test { +using SymbolTable = stg::elf::internal::SymbolTable; +using SymbolTableEntry = stg::elf::SymbolTableEntry; + +SymbolTableEntry MakeSymbol(std::string_view name) { + return { + .name = name, + .value = 0, + .size = 0, + .symbol_type = SymbolTableEntry::SymbolType::OBJECT, + .binding = SymbolTableEntry::Binding::GLOBAL, + .visibility = SymbolTableEntry::Visibility::DEFAULT, + .section_index = 0, + .value_type = SymbolTableEntry::ValueType::RELATIVE_TO_SECTION, + }; +} + + TEST_CASE("GetKsymtabSymbols") { - const stg::elf::internal::SymbolTable all_symbols = { - {.name = "foo"}, - {.name = "__ksymtab_foo"}, - {.name = "bar"}, + const SymbolTable all_symbols = { + MakeSymbol("foo"), + MakeSymbol("__ksymtab_foo"), + MakeSymbol("bar"), }; const auto ksymtab = stg::elf::internal::GetKsymtabSymbols(all_symbols); REQUIRE(ksymtab.size() == 1); @@ -104,12 +104,8 @@ struct Equals { return result && it1 == end1 && it2 == end2; } - bool operator()(const Void&, const Void&) { - return true; - } - - bool operator()(const Variadic&, const Variadic&) { - return true; + bool operator()(const Special& x1, const Special& x2) { + return x1.kind == x2.kind; } bool operator()(const PointerReference& x1, @@ -153,7 +149,6 @@ struct Equals { bool operator()(const Method& x1, const Method& x2) { return x1.mangled_name == x2.mangled_name && x1.name == x2.name - && x1.kind == x2.kind && x1.vtable_offset == x2.vtable_offset && (*this)(x1.type_id, x2.type_id); } diff --git a/fidelity.cc b/fidelity.cc index e0da3fd..89c3c8c 100644 --- a/fidelity.cc +++ b/fidelity.cc @@ -19,7 +19,6 @@ #include "fidelity.h" -#include <algorithm> #include <map> #include <ostream> #include <set> @@ -35,48 +34,16 @@ namespace stg { namespace { -const std::unordered_map<SymbolFidelityTransition, FidelityDiffSeverity> - kSymbolTransitionSeverity = { - {{SymbolFidelity::ABSENT, SymbolFidelity::UNTYPED}, - FidelityDiffSeverity::SKIP}, - {{SymbolFidelity::ABSENT, SymbolFidelity::TYPED}, - FidelityDiffSeverity::SKIP}, - {{SymbolFidelity::UNTYPED, SymbolFidelity::ABSENT}, - FidelityDiffSeverity::SKIP}, - {{SymbolFidelity::UNTYPED, SymbolFidelity::TYPED}, - FidelityDiffSeverity::INFO}, - {{SymbolFidelity::TYPED, SymbolFidelity::ABSENT}, - FidelityDiffSeverity::SKIP}, - {{SymbolFidelity::TYPED, SymbolFidelity::UNTYPED}, - FidelityDiffSeverity::WARN}, -}; - -const std::unordered_map<TypeFidelityTransition, FidelityDiffSeverity> - kTypeTransitionSeverity = { - {{TypeFidelity::ABSENT, TypeFidelity::DECLARATION_ONLY}, - FidelityDiffSeverity::WARN}, - {{TypeFidelity::ABSENT, TypeFidelity::FULLY_DEFINED}, - FidelityDiffSeverity::INFO}, - {{TypeFidelity::DECLARATION_ONLY, TypeFidelity::FULLY_DEFINED}, - FidelityDiffSeverity::INFO}, - {{TypeFidelity::DECLARATION_ONLY, TypeFidelity::ABSENT}, - FidelityDiffSeverity::WARN}, - {{TypeFidelity::FULLY_DEFINED, TypeFidelity::ABSENT}, - FidelityDiffSeverity::WARN}, - {{TypeFidelity::FULLY_DEFINED, TypeFidelity::DECLARATION_ONLY}, - FidelityDiffSeverity::WARN}, -}; - struct Fidelity { Fidelity(const Graph& graph, NameCache& name_cache) - : graph(graph), describe(graph, name_cache), seen(graph.Limit()) - {} + : graph(graph), describe(graph, name_cache), seen(Id(0)) { + seen.Reserve(graph.Limit()); + } void operator()(Id); void operator()(const std::vector<Id>&); void operator()(const std::map<std::string, Id>&); - void operator()(const Void&, Id); - void operator()(const Variadic&, Id); + void operator()(const Special&, Id); void operator()(const PointerReference&, Id); void operator()(const PointerToMember&, Id); void operator()(const Typedef&, Id); @@ -117,9 +84,7 @@ void Fidelity::operator()(const std::map<std::string, Id>& x) { } } -void Fidelity::operator()(const Void&, Id) {} - -void Fidelity::operator()(const Variadic&, Id) {} +void Fidelity::operator()(const Special&, Id) {} void Fidelity::operator()(const PointerReference& x, Id) { (*this)(x.pointee_type_id); @@ -214,16 +179,6 @@ std::set<std::string> GetKeys( return keys; } -FidelityDiffSeverity GetTransitionSeverity(SymbolFidelityTransition x) { - return x.first == x.second ? FidelityDiffSeverity::SKIP - : kSymbolTransitionSeverity.at(x); -} - -FidelityDiffSeverity GetTransitionSeverity(TypeFidelityTransition x) { - return x.first == x.second ? FidelityDiffSeverity::SKIP - : kTypeTransitionSeverity.at(x); -} - void InsertTransition(FidelityDiff& diff, SymbolFidelityTransition transition, const std::string& symbol) { diff.symbol_transitions[transition].push_back(symbol); @@ -243,11 +198,7 @@ void InsertTransitions(FidelityDiff& diff, auto it2 = x2.find(key); auto transition = std::make_pair(it1 == x1.end() ? T() : it1->second, it2 == x2.end() ? T() : it2->second); - auto transition_severity = GetTransitionSeverity(transition); - if (transition_severity != FidelityDiffSeverity::SKIP) { - diff.severity = std::max(diff.severity, transition_severity); - InsertTransition(diff, transition, key); - } + InsertTransition(diff, transition, key); } } @@ -283,17 +234,6 @@ std::ostream& operator<<(std::ostream& os, TypeFidelityTransition x) { return os << "type(s) changed from " << x.first << " to " << x.second; } -std::ostream& operator<<(std::ostream& os, FidelityDiffSeverity x) { - switch (x) { - case FidelityDiffSeverity::SKIP: - return os << "NONE"; - case FidelityDiffSeverity::INFO: - return os << "INFO"; - case FidelityDiffSeverity::WARN: - return os << "WARN"; - } -} - FidelityDiff GetFidelityTransitions(const Graph& graph, Id root1, Id root2) { NameCache name_cache; Fidelity fidelity1(graph, name_cache); @@ -25,6 +25,7 @@ #include <ostream> #include <string> #include <unordered_map> +#include <utility> #include <vector> #include "graph.h" @@ -43,12 +44,6 @@ enum class TypeFidelity { FULLY_DEFINED = 2, }; -enum class FidelityDiffSeverity { - SKIP = 0, - INFO = 1, - WARN = 2, -}; - using SymbolFidelityTransition = std::pair<SymbolFidelity, SymbolFidelity>; using TypeFidelityTransition = std::pair<TypeFidelity, TypeFidelity>; @@ -56,7 +51,6 @@ std::ostream& operator<<(std::ostream& os, SymbolFidelity x); std::ostream& operator<<(std::ostream& os, TypeFidelity x); std::ostream& operator<<(std::ostream& os, SymbolFidelityTransition x); std::ostream& operator<<(std::ostream& os, TypeFidelityTransition x); -std::ostream& operator<<(std::ostream& os, FidelityDiffSeverity x); } // namespace stg @@ -85,7 +79,6 @@ struct FidelityDiff { symbol_transitions; std::unordered_map<TypeFidelityTransition, std::vector<std::string>> type_transitions; - FidelityDiffSeverity severity = FidelityDiffSeverity::SKIP; }; FidelityDiff GetFidelityTransitions(const Graph& graph, Id root1, Id root2); diff --git a/symbol_filter.cc b/filter.cc index f1eedaa..d21dbcb 100644 --- a/symbol_filter.cc +++ b/filter.cc @@ -17,7 +17,7 @@ // // Author: Giuliano Procida -#include "symbol_filter.h" +#include "filter.h" #include <fnmatch.h> @@ -32,6 +32,7 @@ #include <sstream> #include <string> #include <string_view> +#include <unordered_set> #include <utility> #include "error.h" @@ -39,16 +40,15 @@ namespace stg { namespace { -SymbolSet ReadAbigail(const std::string& filename) { - static constexpr std::array<std::string_view, 2> section_suffices = { - "symbol_list", - "whitelist", - }; - SymbolSet symbols; +using Items = std::unordered_set<std::string>; + +Items ReadAbigail(const std::string& filename) { + static constexpr std::string_view kSectionSuffix = "list"; + Items items; std::ifstream file(filename); - Check(file.good()) << "error opening symbol file '" << filename << ": " + Check(file.good()) << "error opening filter file '" << filename << ": " << Error(errno); - bool in_symbol_section = false; + bool in_filter_section = false; std::string line; while (std::getline(file, line)) { size_t start = 0; @@ -69,100 +69,97 @@ SymbolSet ReadAbigail(const std::string& filename) { if (start == limit) { continue; } - // See if we are entering a symbol list section. + // See if we are entering a filter list section. if (line[start] == '[' && line[limit - 1] == ']') { std::string_view section(&line[start + 1], limit - start - 2); - bool found = false; - for (const auto& suffix : section_suffices) { - if (section.size() >= suffix.size() - && section.substr(section.size() - suffix.size()) == suffix) { - found = true; - break; - } - } - in_symbol_section = found; + // TODO: use std::string_view::ends_with + const auto section_size = section.size(); + const auto suffix_size = kSectionSuffix.size(); + in_filter_section = section_size >= suffix_size && + section.substr(section_size - suffix_size) == kSectionSuffix; continue; } - // Add symbol. - if (in_symbol_section) { - symbols.insert(std::string(&line[start], limit - start)); + // Add item. + if (in_filter_section) { + items.insert(std::string(&line[start], limit - start)); } } - Check(file.eof()) << "error reading symbol file '" << filename << ": " + Check(file.eof()) << "error reading filter file '" << filename << ": " << Error(errno); - return symbols; + return items; } -// Inverted symbol filter. -class NotFilter : public SymbolFilter { +// Inverted filter. +class NotFilter : public Filter { public: - NotFilter(std::unique_ptr<SymbolFilter> filter) + explicit NotFilter(std::unique_ptr<Filter> filter) : filter_(std::move(filter)) {} - bool operator()(const std::string& symbol) const final { - return !(*filter_)(symbol); + bool operator()(const std::string& item) const final { + return !(*filter_)(item); }; private: - const std::unique_ptr<SymbolFilter> filter_; + const std::unique_ptr<Filter> filter_; }; -// Conjunction of symbol filters. -class AndFilter : public SymbolFilter { +// Conjunction of filters. +class AndFilter : public Filter { public: - AndFilter(std::unique_ptr<SymbolFilter> filter1, - std::unique_ptr<SymbolFilter> filter2) + AndFilter(std::unique_ptr<Filter> filter1, + std::unique_ptr<Filter> filter2) : filter1_(std::move(filter1)), filter2_(std::move(filter2)) {} - bool operator()(const std::string& symbol) const final { - return (*filter1_)(symbol) && (*filter2_)(symbol); + bool operator()(const std::string& item) const final { + return (*filter1_)(item) && (*filter2_)(item); }; private: - const std::unique_ptr<SymbolFilter> filter1_; - const std::unique_ptr<SymbolFilter> filter2_; + const std::unique_ptr<Filter> filter1_; + const std::unique_ptr<Filter> filter2_; }; -// Disjunction of symbol filters. -class OrFilter : public SymbolFilter { +// Disjunction of filters. +class OrFilter : public Filter { public: - OrFilter(std::unique_ptr<SymbolFilter> filter1, - std::unique_ptr<SymbolFilter> filter2) + OrFilter(std::unique_ptr<Filter> filter1, + std::unique_ptr<Filter> filter2) : filter1_(std::move(filter1)), filter2_(std::move(filter2)) {} - bool operator()(const std::string& symbol) const final { - return (*filter1_)(symbol) || (*filter2_)(symbol); + bool operator()(const std::string& item) const final { + return (*filter1_)(item) || (*filter2_)(item); }; private: - const std::unique_ptr<SymbolFilter> filter1_; - const std::unique_ptr<SymbolFilter> filter2_; + const std::unique_ptr<Filter> filter1_; + const std::unique_ptr<Filter> filter2_; }; -// Glob symbol filter. -class GlobFilter : public SymbolFilter { +// Glob filter. +class GlobFilter : public Filter { public: - GlobFilter(const std::string& pattern) : pattern_(pattern) {} - bool operator()(const std::string& symbol) const final { - return fnmatch(pattern_.c_str(), symbol.c_str(), 0) == 0; + explicit GlobFilter(const std::string& pattern) : pattern_(pattern) {} + bool operator()(const std::string& item) const final { + return fnmatch(pattern_.c_str(), item.c_str(), 0) == 0; } private: const std::string pattern_; }; -// Literal symbol list symbol filter. -class SetFilter : public SymbolFilter { +// Literal list filter. +class SetFilter : public Filter { public: - SetFilter(SymbolSet&& symbols) : symbols_(std::move(symbols)) {} - bool operator()(const std::string& symbol) const final { - return symbols_.count(symbol); + explicit SetFilter(Items&& items) + : items_(std::move(items)) {} + bool operator()(const std::string& item) const final { + return items_.count(item) > 0; }; private: - const SymbolSet symbols_; + const Items items_; }; static const char* kTokenCharacters = ":!()&|"; -// Split a symbol filter expression into tokens. +// Split a filter expression into tokens. // // All tokens are just strings, but single characters from kTokenCharacters are // recognised as special syntax. Whitespace can be used between tokens and will @@ -186,7 +183,7 @@ std::queue<std::string> Tokenise(const std::string& filter) { } result.emplace(&*name, it - name); } else { - Die() << "unexpected character in symbol filter: '" << *it; + Die() << "unexpected character in filter: '" << *it; } } @@ -194,10 +191,10 @@ std::queue<std::string> Tokenise(const std::string& filter) { } // The failing parser. -std::unique_ptr<SymbolFilter> Fail( +std::unique_ptr<Filter> Fail( const std::string& message, std::queue<std::string>& tokens) { std::ostringstream os; - os << "syntax error in symbol expression: '" << message << "'; context:"; + os << "syntax error in filter expression: '" << message << "'; context:"; for (size_t i = 0; i < 3; ++i) { os << ' '; if (tokens.empty()) { @@ -210,12 +207,12 @@ std::unique_ptr<SymbolFilter> Fail( Die() << os.str(); } -std::unique_ptr<SymbolFilter> Expression(std::queue<std::string>& tokens); +std::unique_ptr<Filter> Expression(std::queue<std::string>& tokens); -// Parse a symbol filter atom. -std::unique_ptr<SymbolFilter> Atom(std::queue<std::string>& tokens) { +// Parse a filter atom. +std::unique_ptr<Filter> Atom(std::queue<std::string>& tokens) { if (tokens.empty()) { - return Fail("expected a symbol expression", tokens); + return Fail("expected a filter expression", tokens); } auto token = tokens.front(); tokens.pop(); @@ -235,14 +232,14 @@ std::unique_ptr<SymbolFilter> Atom(std::queue<std::string>& tokens) { return std::make_unique<SetFilter>(ReadAbigail(token)); } else { if (std::strchr(kTokenCharacters, token[0])) { - return Fail("expected a symbol glob", tokens); + return Fail("expected a glob token", tokens); } return std::make_unique<GlobFilter>(token); } } -// Parse a symbol filter factor. -std::unique_ptr<SymbolFilter> Factor(std::queue<std::string>& tokens) { +// Parse a filter factor. +std::unique_ptr<Filter> Factor(std::queue<std::string>& tokens) { bool invert = false; while (!tokens.empty() && tokens.front() == "!") { tokens.pop(); @@ -255,8 +252,8 @@ std::unique_ptr<SymbolFilter> Factor(std::queue<std::string>& tokens) { return atom; } -// Parse a symbol filter term. -std::unique_ptr<SymbolFilter> Term(std::queue<std::string>& tokens) { +// Parse a filter term. +std::unique_ptr<Filter> Term(std::queue<std::string>& tokens) { auto factor = Factor(tokens); while (!tokens.empty() && tokens.front() == "&") { tokens.pop(); @@ -265,8 +262,8 @@ std::unique_ptr<SymbolFilter> Term(std::queue<std::string>& tokens) { return factor; } -// Parse a symbol filter expression. -std::unique_ptr<SymbolFilter> Expression(std::queue<std::string>& tokens) { +// Parse a filter expression. +std::unique_ptr<Filter> Expression(std::queue<std::string>& tokens) { auto term = Term(tokens); while (!tokens.empty() && tokens.front() == "|") { tokens.pop(); @@ -277,19 +274,19 @@ std::unique_ptr<SymbolFilter> Expression(std::queue<std::string>& tokens) { } // namespace -// Tokenise and parse a symbol filter expression. -std::unique_ptr<SymbolFilter> MakeSymbolFilter(const std::string& filter) +// Tokenise and parse a filter expression. +std::unique_ptr<Filter> MakeFilter(const std::string& filter) { auto tokens = Tokenise(filter); auto result = Expression(tokens); if (!tokens.empty()) { - return Fail("unexpected junk at end of symbol filter", tokens); + return Fail("unexpected junk at end of filter", tokens); } return result; } -void SymbolFilterUsage(std::ostream& os) { - os << "symbol filter syntax:\n" +void FilterUsage(std::ostream& os) { + os << "filter syntax:\n" << " <filter> ::= <term> | <expression> '|' <term>\n" << " <term> ::= <factor> | <term> '&' <factor>\n" << " <factor> ::= <atom> | '!' <factor>\n" diff --git a/symbol_filter.h b/filter.h index 3db07e5..2612ba6 100644 --- a/symbol_filter.h +++ b/filter.h @@ -17,34 +17,28 @@ // // Author: Giuliano Procida -#ifndef STG_SYMBOL_FILTER_H_ -#define STG_SYMBOL_FILTER_H_ +#ifndef STG_FILTER_H_ +#define STG_FILTER_H_ -#include <iostream> #include <memory> #include <ostream> #include <string> -#include <unordered_set> - -#include "error.h" namespace stg { -using SymbolSet = std::unordered_set<std::string>; - -// Abstract base class for filtering symbols. -class SymbolFilter { +// Abstract base class for filtering. +class Filter { public: - virtual ~SymbolFilter() = default; + virtual ~Filter() = default; // Filter predicate evaluation. - virtual bool operator()(const std::string& symbol) const = 0; + virtual bool operator()(const std::string& item) const = 0; }; -// Tokenise and parse a symbol filter expression. -std::unique_ptr<SymbolFilter> MakeSymbolFilter(const std::string& filter); +// Tokenise and parse a filter expression. +std::unique_ptr<Filter> MakeFilter(const std::string& filter); -void SymbolFilterUsage(std::ostream& os); +void FilterUsage(std::ostream& os); } // namespace stg -#endif // STG_SYMBOL_FILTER_H_ +#endif // STG_FILTER_H_ diff --git a/symbol_filter_test.cc b/filter_test.cc index abdf79a..7666965 100644 --- a/symbol_filter_test.cc +++ b/filter_test.cc @@ -17,7 +17,7 @@ // // Author: Giuliano Procida -#include "symbol_filter.h" +#include "filter.h" #include <sstream> #include <string> @@ -54,7 +54,7 @@ TEST_CASE("bad syntax cases") { for (const auto& expression : cases) { std::ostringstream os; GIVEN("filter: " + expression) { - CHECK_THROWS(stg::MakeSymbolFilter(expression)); + CHECK_THROWS(stg::MakeFilter(expression)); } } } @@ -106,7 +106,7 @@ TEST_CASE("hand-curated cases") { for (const auto& [expression, ins, outs] : cases) { std::ostringstream os; GIVEN("filter: " + expression) { - auto filter = stg::MakeSymbolFilter(expression); + auto filter = stg::MakeFilter(expression); for (const auto& in : ins) { GIVEN("in: " + in) { CHECK((*filter)(in)); diff --git a/fingerprint.cc b/fingerprint.cc index 38851d8..5d87677 100644 --- a/fingerprint.cc +++ b/fingerprint.cc @@ -41,12 +41,15 @@ struct Hasher { non_trivial_scc_size(metrics, "fingerprint.non_trivial_scc_size") {} // Graph function implementation - HashValue operator()(const Void&) { - return hash('O'); - } - - HashValue operator()(const Variadic&) { - return hash('V'); + HashValue operator()(const Special& x) { + switch (x.kind) { + case Special::Kind::VOID: + return hash('O'); + case Special::Kind::VARIADIC: + return hash('V'); + case Special::Kind::NULLPTR: + return hash('L'); + } } HashValue operator()(const PointerReference& x) { diff --git a/fuzz/elf_reader_fuzzer.cc b/fuzz/elf_reader_fuzzer.cc index e4fc087..e2d8f55 100644 --- a/fuzz/elf_reader_fuzzer.cc +++ b/fuzz/elf_reader_fuzzer.cc @@ -34,7 +34,8 @@ extern "C" int LLVMFuzzerTestOneInput(char* data, size_t size) { std::vector<char> data_copy(data, data + size); stg::Graph graph; stg::Metrics metrics; - stg::elf::Read(graph, data_copy.data(), size, stg::ReadOptions(), metrics); + stg::elf::Read(graph, data_copy.data(), size, stg::ReadOptions(), nullptr, + metrics); } catch (const stg::Exception&) { // Pass as this is us catching invalid ELF properly. } @@ -44,17 +44,6 @@ std::ostream& operator<<(std::ostream& os, BaseClass::Inheritance inheritance) { } } -std::ostream& operator<<(std::ostream& os, Method::Kind kind) { - switch (kind) { - case Method::Kind::NON_VIRTUAL: - return os << "non-virtual"; - case Method::Kind::STATIC: - return os << "static"; - case Method::Kind::VIRTUAL: - return os << "virtual"; - } -} - namespace { std::string_view ToString(StructUnion::Kind kind) { @@ -83,10 +83,16 @@ struct hash<stg::Pair> { namespace stg { -struct Void { -}; +struct Special { + enum class Kind { + VOID, + VARIADIC, + NULLPTR, + }; + explicit Special(Kind kind) + : kind(kind) {} -struct Variadic { + Kind kind; }; struct PointerReference { @@ -175,21 +181,17 @@ struct BaseClass { std::ostream& operator<<(std::ostream& os, BaseClass::Inheritance inheritance); struct Method { - enum class Kind { NON_VIRTUAL, STATIC, VIRTUAL }; - Method(const std::string& mangled_name, const std::string& name, Kind kind, - const std::optional<uint64_t> vtable_offset, Id type_id) - : mangled_name(mangled_name), name(name), kind(kind), + Method(const std::string& mangled_name, const std::string& name, + uint64_t vtable_offset, Id type_id) + : mangled_name(mangled_name), name(name), vtable_offset(vtable_offset), type_id(type_id) {} std::string mangled_name; std::string name; - Kind kind; - std::optional<uint64_t> vtable_offset; + uint64_t vtable_offset; Id type_id; }; -std::ostream& operator<<(std::ostream& os, Method::Kind kind); - struct Member { Member(const std::string& name, Id type_id, uint64_t offset, uint64_t bitsize) : name(name), type_id(type_id), offset(offset), bitsize(bitsize) {} @@ -347,12 +349,9 @@ class Graph { if (reference.first != Which::ABSENT) { Die() << "node value already set: " << id; } - if constexpr (std::is_same_v<Node, Void>) { - reference = {Which::VOID, void_.size()}; - void_.emplace_back(std::forward<Args>(args)...); - } else if constexpr (std::is_same_v<Node, Variadic>) { - reference = {Which::VARIADIC, variadic_.size()}; - variadic_.emplace_back(std::forward<Args>(args)...); + if constexpr (std::is_same_v<Node, Special>) { + reference = {Which::SPECIAL, special_.size()}; + special_.emplace_back(std::forward<Args>(args)...); } else if constexpr (std::is_same_v<Node, PointerReference>) { reference = {Which::POINTER_REFERENCE, pointer_reference_.size()}; pointer_reference_.emplace_back(std::forward<Args>(args)...); @@ -435,9 +434,8 @@ class Graph { Result Apply(FunctionObject& function, Id id, Args&&... args); template <typename Function> - void ForEach(Function&& function) const { - const size_t limit = Limit().ix_; - for (size_t ix = 0; ix < limit; ++ix) { + void ForEach(Id start, Id limit, Function&& function) const { + for (size_t ix = start.ix_; ix < limit.ix_; ++ix) { const Id id(ix); if (Is(id)) { function(id); @@ -448,8 +446,7 @@ class Graph { private: enum class Which { ABSENT, - VOID, - VARIADIC, + SPECIAL, POINTER_REFERENCE, POINTER_TO_MEMBER, TYPEDEF, @@ -468,8 +465,7 @@ class Graph { std::vector<std::pair<Which, size_t>> indirection_; - std::vector<Void> void_; - std::vector<Variadic> variadic_; + std::vector<Special> special_; std::vector<PointerReference> pointer_reference_; std::vector<PointerToMember> pointer_to_member_; std::vector<Typedef> typedef_; @@ -492,10 +488,8 @@ Result Graph::Apply(FunctionObject& function, Id id, Args&&... args) const { switch (which) { case Which::ABSENT: Die() << "undefined node: " << id; - case Which::VOID: - return function(void_[ix], std::forward<Args>(args)...); - case Which::VARIADIC: - return function(variadic_[ix], std::forward<Args>(args)...); + case Which::SPECIAL: + return function(special_[ix], std::forward<Args>(args)...); case Which::POINTER_REFERENCE: return function(pointer_reference_[ix], std::forward<Args>(args)...); case Which::POINTER_TO_MEMBER: @@ -538,11 +532,8 @@ Result Graph::Apply2( switch (which1) { case Which::ABSENT: Die() << "undefined nodes: " << id1 << ", " << id2; - case Which::VOID: - return function(void_[ix1], void_[ix2], - std::forward<Args>(args)...); - case Which::VARIADIC: - return function(variadic_[ix1], variadic_[ix2], + case Which::SPECIAL: + return function(special_[ix1], special_[ix2], std::forward<Args>(args)...); case Which::POINTER_REFERENCE: return function(pointer_reference_[ix1], pointer_reference_[ix2], @@ -649,20 +640,28 @@ struct InterfaceKey { // key set limited to allocated Ids. class DenseIdSet { public: - explicit DenseIdSet(Id limit) : ids_(limit.ix_, false) {} + explicit DenseIdSet(Id start) : offset_(start.ix_) {} + void Reserve(Id limit) { + ids_.reserve(limit.ix_ - offset_); + } bool Insert(Id id) { const auto ix = id.ix_; - if (ix >= ids_.size()) { - ids_.resize(ix + 1); + if (ix < offset_) { + Die() << "DenseIdSet: out of range access to " << id; + } + const auto offset_ix = ix - offset_; + if (offset_ix >= ids_.size()) { + ids_.resize(offset_ix + 1, false); } - if (ids_[ix]) { + if (ids_[offset_ix]) { return false; } - ids_[ix] = true; + ids_[offset_ix] = true; return true; } private: + size_t offset_; std::vector<bool> ids_; }; @@ -670,24 +669,27 @@ class DenseIdSet { // but with constant time operations and key set limited to allocated Ids. class DenseIdMapping { public: - explicit DenseIdMapping(Id limit) { - ids_.reserve(limit.ix_); - Populate(limit.ix_); + explicit DenseIdMapping(Id start) : offset_(start.ix_) {} + void Reserve(Id limit) { + ids_.reserve(limit.ix_ - offset_); } Id& operator[](Id id) { const auto ix = id.ix_; + if (ix < offset_) { + Die() << "DenseIdMapping: out of range access to " << id; + } Populate(ix + 1); - return ids_[ix]; + return ids_[ix - offset_]; } private: void Populate(size_t size) { - const auto limit = ids_.size(); - for (size_t ix = limit; ix < size; ++ix) { + for (size_t ix = offset_ + ids_.size(); ix < size; ++ix) { ids_.emplace_back(ix); } } + size_t offset_; std::vector<Id> ids_; }; @@ -111,7 +111,7 @@ struct Hash { } // Hash std::string by constructing a std::string_view. - constexpr HashValue operator()(const std::string& x) const { + HashValue operator()(const std::string& x) const { return (*this)(std::string_view(x)); } @@ -19,9 +19,12 @@ #include "input.h" +#include <memory> + #include "abigail_reader.h" #include "btf_reader.h" #include "elf_reader.h" +#include "filter.h" #include "graph.h" #include "metrics.h" #include "proto_reader.h" @@ -30,7 +33,8 @@ namespace stg { Id Read(Graph& graph, InputFormat format, const char* input, - ReadOptions options, Metrics& metrics) { + ReadOptions options, const std::unique_ptr<Filter>& file_filter, + Metrics& metrics) { switch (format) { case InputFormat::ABI: { Time read(metrics, "read ABI"); @@ -42,7 +46,7 @@ Id Read(Graph& graph, InputFormat format, const char* input, } case InputFormat::ELF: { Time read(metrics, "read ELF"); - return elf::Read(graph, input, options, metrics); + return elf::Read(graph, input, options, file_filter, metrics); } case InputFormat::STG: { Time read(metrics, "read STG"); @@ -20,6 +20,9 @@ #ifndef STG_INPUT_H_ #define STG_INPUT_H_ +#include <memory> + +#include "filter.h" #include "graph.h" #include "metrics.h" #include "reader_options.h" @@ -29,7 +32,8 @@ namespace stg { enum class InputFormat { ABI, BTF, ELF, STG }; Id Read(Graph& graph, InputFormat format, const char* input, - ReadOptions options, Metrics& metrics); + ReadOptions options, const std::unique_ptr<Filter>& file_filter, + Metrics& metrics); } // namespace stg @@ -24,6 +24,8 @@ #include <sstream> #include <string> +#include "graph.h" + namespace stg { Name Name::Add(Side side, Precedence precedence, @@ -113,12 +115,15 @@ Name Describe::operator()(Id id) { return cached; } -Name Describe::operator()(const Void&) { - return Name{"void"}; -} - -Name Describe::operator()(const Variadic&) { - return Name{"..."}; +Name Describe::operator()(const Special& x) { + switch (x.kind) { + case Special::Kind::VOID: + return Name{"void"}; + case Special::Kind::VARIADIC: + return Name{"..."}; + case Special::Kind::NULLPTR: + return Name{"decltype(nullptr)"}; + } } Name Describe::operator()(const PointerReference& x) { @@ -58,8 +58,7 @@ using NameCache = std::unordered_map<Id, Name>; struct Describe { Describe(const Graph& graph, NameCache& names) : graph(graph), names(names) {} Name operator()(Id id); - Name operator()(const Void&); - Name operator()(const Variadic&); + Name operator()(const Special&); Name operator()(const PointerReference&); Name operator()(const PointerToMember&); Name operator()(const Typedef&); diff --git a/proto_reader.cc b/proto_reader.cc index b20813f..c7eacb7 100644 --- a/proto_reader.cc +++ b/proto_reader.cc @@ -55,6 +55,7 @@ struct Transformer { void AddNodes(const google::protobuf::RepeatedPtrField<ProtoType>&); void AddNode(const Void&); void AddNode(const Variadic&); + void AddNode(const Special&); void AddNode(const PointerReference&); void AddNode(const PointerToMember&); void AddNode(const Typedef&); @@ -77,11 +78,11 @@ struct Transformer { template <typename GetKey> std::map<std::string, Id> Transform(GetKey, const google::protobuf::RepeatedField<uint32_t>&); + stg::Special::Kind Transform(Special::Kind); stg::PointerReference::Kind Transform(PointerReference::Kind); stg::Qualifier Transform(Qualified::Qualifier); stg::Primitive::Encoding Transform(Primitive::Encoding); stg::BaseClass::Inheritance Transform(BaseClass::Inheritance); - stg::Method::Kind Transform(Method::Kind); stg::StructUnion::Kind Transform(StructUnion::Kind); stg::ElfSymbol::SymbolType Transform(ElfSymbol::SymbolType); stg::ElfSymbol::Binding Transform(ElfSymbol::Binding); @@ -98,8 +99,9 @@ struct Transformer { }; Id Transformer::Transform(const proto::STG& x) { - AddNodes(x.void_()); - AddNodes(x.variadic()); + AddNodes(x.void_()); // deprecated + AddNodes(x.variadic()); // deprecated + AddNodes(x.special()); AddNodes(x.pointer_reference()); AddNodes(x.pointer_to_member()); AddNodes(x.typedef_()); @@ -134,11 +136,15 @@ void Transformer::AddNodes(const google::protobuf::RepeatedPtrField<ProtoType>& } void Transformer::AddNode(const Void& x) { - AddNode<stg::Void>(GetId(x.id())); + AddNode<stg::Special>(GetId(x.id()), stg::Special::Kind::VOID); } void Transformer::AddNode(const Variadic& x) { - AddNode<stg::Variadic>(GetId(x.id())); + AddNode<stg::Special>(GetId(x.id()), stg::Special::Kind::VARIADIC); +} + +void Transformer::AddNode(const Special& x) { + AddNode<stg::Special>(GetId(x.id()), x.kind()); } void Transformer::AddNode(const PointerReference& x) { @@ -177,10 +183,8 @@ void Transformer::AddNode(const BaseClass& x) { } void Transformer::AddNode(const Method& x) { - const auto& vtable_offset = - Transform<uint64_t>(x.has_vtable_offset(), x.vtable_offset()); - AddNode<stg::Method>(GetId(x.id()), x.mangled_name(), x.name(), x.kind(), - vtable_offset, GetId(x.type_id())); + AddNode<stg::Method>(GetId(x.id()), x.mangled_name(), x.name(), + x.vtable_offset(), GetId(x.type_id())); } void Transformer::AddNode(const Member& x) { @@ -279,6 +283,19 @@ std::map<std::string, Id> Transformer::Transform( return result; } +stg::Special::Kind Transformer::Transform(Special::Kind x) { + switch (x) { + case Special::VOID: + return stg::Special::Kind::VOID; + case Special::VARIADIC: + return stg::Special::Kind::VARIADIC; + case Special::NULLPTR: + return stg::Special::Kind::NULLPTR; + default: + Die() << "unknown Special::Kind " << x; + } +} + stg::PointerReference::Kind Transformer::Transform(PointerReference::Kind x) { switch (x) { case PointerReference::POINTER: @@ -341,19 +358,6 @@ stg::BaseClass::Inheritance Transformer::Transform(BaseClass::Inheritance x) { } } -stg::Method::Kind Transformer::Transform(Method::Kind x) { - switch (x) { - case Method::NON_VIRTUAL: - return stg::Method::Kind::NON_VIRTUAL; - case Method::STATIC: - return stg::Method::Kind::STATIC; - case Method::VIRTUAL: - return stg::Method::Kind::VIRTUAL; - default: - Die() << "unknown Method::Kind " << x; - } -} - stg::StructUnion::Kind Transformer::Transform(StructUnion::Kind x) { switch (x) { case StructUnion::STRUCT: @@ -434,7 +438,7 @@ Type Transformer::Transform(const Type& x) { return x; } -const std::array<uint32_t, 2> kSupportedFormatVersions = {0, 1}; +const std::array<uint32_t, 3> kSupportedFormatVersions = {0, 1, 2}; void CheckFormatVersion(uint32_t version, std::optional<std::string> path) { Check(std::count(kSupportedFormatVersions.begin(), diff --git a/proto_reader.h b/proto_reader.h index f8f6551..7639bf1 100644 --- a/proto_reader.h +++ b/proto_reader.h @@ -20,16 +20,10 @@ #ifndef STG_PROTO_READER_H_ #define STG_PROTO_READER_H_ -#include <cstdint> -#include <istream> -#include <optional> #include <string> #include <string_view> -#include <unordered_map> -#include <vector> #include "graph.h" -#include "stg.pb.h" namespace stg { namespace proto { diff --git a/proto_writer.cc b/proto_writer.cc index 4110908..741064d 100644 --- a/proto_writer.cc +++ b/proto_writer.cc @@ -61,8 +61,7 @@ struct Transform { uint32_t operator()(Id); - void operator()(const stg::Void&, uint32_t); - void operator()(const stg::Variadic&, uint32_t); + void operator()(const stg::Special&, uint32_t); void operator()(const stg::PointerReference&, uint32_t); void operator()(const stg::PointerToMember&, uint32_t); void operator()(const stg::Typedef&, uint32_t); @@ -78,11 +77,11 @@ struct Transform { void operator()(const stg::ElfSymbol&, uint32_t); void operator()(const stg::Interface&, uint32_t); + Special::Kind operator()(stg::Special::Kind); PointerReference::Kind operator()(stg::PointerReference::Kind); Qualified::Qualifier operator()(stg::Qualifier); Primitive::Encoding operator()(stg::Primitive::Encoding); BaseClass::Inheritance operator()(stg::BaseClass::Inheritance); - Method::Kind operator()(stg::Method::Kind); StructUnion::Kind operator()(stg::StructUnion::Kind); ElfSymbol::SymbolType operator()(stg::ElfSymbol::SymbolType); ElfSymbol::Binding operator()(stg::ElfSymbol::Binding); @@ -116,13 +115,10 @@ uint32_t Transform<MapId>::operator()(Id id) { } template <typename MapId> -void Transform<MapId>::operator()(const stg::Void&, uint32_t id) { - stg.add_void_()->set_id(id); -} - -template <typename MapId> -void Transform<MapId>::operator()(const stg::Variadic&, uint32_t id) { - stg.add_variadic()->set_id(id); +void Transform<MapId>::operator()(const stg::Special& x, uint32_t id) { + auto& special = *stg.add_special(); + special.set_id(id); + special.set_kind((*this)(x.kind)); } template <typename MapId> @@ -191,10 +187,7 @@ void Transform<MapId>::operator()(const stg::Method& x, uint32_t id) { method.set_id(id); method.set_mangled_name(x.mangled_name); method.set_name(x.name); - method.set_kind((*this)(x.kind)); - if (x.vtable_offset) { - method.set_vtable_offset(*x.vtable_offset); - } + method.set_vtable_offset(x.vtable_offset); method.set_type_id((*this)(x.type_id)); } @@ -310,6 +303,19 @@ PointerReference::Kind Transform<MapId>::operator()( } template <typename MapId> +Special::Kind Transform<MapId>::operator()( + stg::Special::Kind x) { + switch (x) { + case stg::Special::Kind::VOID: + return Special::VOID; + case stg::Special::Kind::VARIADIC: + return Special::VARIADIC; + case stg::Special::Kind::NULLPTR: + return Special::NULLPTR; + } +} + +template <typename MapId> Qualified::Qualifier Transform<MapId>::operator()(stg::Qualifier x) { switch (x) { case stg::Qualifier::CONST: @@ -357,18 +363,6 @@ BaseClass::Inheritance Transform<MapId>::operator()( } template <typename MapId> -Method::Kind Transform<MapId>::operator()(stg::Method::Kind x) { - switch (x) { - case stg::Method::Kind::NON_VIRTUAL: - return Method::NON_VIRTUAL; - case stg::Method::Kind::STATIC: - return Method::STATIC; - case stg::Method::Kind::VIRTUAL: - return Method::VIRTUAL; - } -} - -template <typename MapId> StructUnion::Kind Transform<MapId>::operator()(stg::StructUnion::Kind x) { switch (x) { case stg::StructUnion::Kind::STRUCT: @@ -463,12 +457,14 @@ class HexPrinter : public google::protobuf::TextFormat::FastFieldValuePrinter { uint32_t value, google::protobuf::TextFormat::BaseTextGenerator* generator) const override { std::ostringstream os; - os << "0x" << std::hex << std::setw(8) << std::setfill('0') << value; + // 0x01234567 + os << std::showbase << std::hex << std::setfill('0') << std::internal + << std::setw(10) << value; generator->PrintString(os.str()); } }; -const uint32_t kWrittenFormatVersion = 1; +const uint32_t kWrittenFormatVersion = 2; } // namespace diff --git a/proto_writer.h b/proto_writer.h index 59e2b38..ac78111 100644 --- a/proto_writer.h +++ b/proto_writer.h @@ -23,7 +23,6 @@ #include <ostream> #include "graph.h" -#include "stg.pb.h" namespace stg { namespace proto { diff --git a/reporting.cc b/reporting.cc index 4813f59..5c3c02c 100644 --- a/reporting.cc +++ b/reporting.cc @@ -36,8 +36,8 @@ #include "comparison.h" #include "error.h" -#include "graph.h" #include "fidelity.h" +#include "graph.h" #include "post_processing.h" namespace stg { @@ -430,23 +430,26 @@ void Report(const Reporting& reporting, const Comparison& comparison, } } -void FidelityDiff(const stg::FidelityDiff& diff, std::ostream& output) { - auto print_bucket = [&diff, &output](auto&& from, auto&& to) { +bool FidelityDiff(const stg::FidelityDiff& diff, std::ostream& output) { + bool diffs_reported = false; + auto print_bucket = [&diff, &output, &diffs_reported](auto&& from, + auto&& to) { auto transition = std::make_pair(from, to); if constexpr (std::is_same_v<decltype(from), SymbolFidelity&&>) { auto it = diff.symbol_transitions.find(transition); if (it != diff.symbol_transitions.end()) { PrintFidelityReportBucket(transition, it->second, output); + diffs_reported = true; } } else if constexpr (std::is_same_v<decltype(from), TypeFidelity&&>) { auto it = diff.type_transitions.find(transition); if (it != diff.type_transitions.end()) { PrintFidelityReportBucket(transition, it->second, output); + diffs_reported = true; } } }; - output << "SEVERITY: " << diff.severity << "\n\n"; print_bucket(TypeFidelity::FULLY_DEFINED, TypeFidelity::ABSENT); print_bucket(TypeFidelity::DECLARATION_ONLY, TypeFidelity::ABSENT); print_bucket(TypeFidelity::FULLY_DEFINED, TypeFidelity::DECLARATION_ONLY); @@ -455,6 +458,7 @@ void FidelityDiff(const stg::FidelityDiff& diff, std::ostream& output) { print_bucket(TypeFidelity::DECLARATION_ONLY, TypeFidelity::FULLY_DEFINED); print_bucket(SymbolFidelity::UNTYPED, SymbolFidelity::TYPED); print_bucket(TypeFidelity::ABSENT, TypeFidelity::FULLY_DEFINED); + return diffs_reported; } } // namespace reporting diff --git a/reporting.h b/reporting.h index 8291536..edee19e 100644 --- a/reporting.h +++ b/reporting.h @@ -21,9 +21,9 @@ #define STG_REPORTING_H_ #include <cstddef> -#include <string_view> #include <optional> #include <ostream> +#include <string_view> #include "comparison.h" #include "fidelity.h" @@ -54,7 +54,7 @@ struct Reporting { void Report(const Reporting&, const Comparison&, std::ostream&); -void FidelityDiff(const stg::FidelityDiff&, std::ostream&); +bool FidelityDiff(const stg::FidelityDiff&, std::ostream&); } // namespace reporting } // namespace stg diff --git a/reporting_test.cc b/reporting_test.cc index a33dfb3..f41db1b 100644 --- a/reporting_test.cc +++ b/reporting_test.cc @@ -36,10 +36,18 @@ std::string filename_to_path(const std::string& f) { TEST_CASE("fidelity diff") { stg::FidelityDiff diff = { - .symbol_transitions = {{{SymbolFidelity::TYPED, SymbolFidelity::UNTYPED}, - {"symbol1", "symbol2"}}, - {{SymbolFidelity::UNTYPED, SymbolFidelity::TYPED}, - {"symbol3"}}}, + .symbol_transitions = + { + {{SymbolFidelity::TYPED, SymbolFidelity::UNTYPED}, + {"symbol1", "symbol2"}}, + {{SymbolFidelity::UNTYPED, SymbolFidelity::TYPED}, {"symbol3"}}, + {{SymbolFidelity::ABSENT, SymbolFidelity::UNTYPED}, + {"symbol4", "symbol5"}}, + {{SymbolFidelity::ABSENT, SymbolFidelity::TYPED}, {"symbol6"}}, + {{SymbolFidelity::TYPED, SymbolFidelity::ABSENT}, + {"symbol7", "symbol8"}}, + {{SymbolFidelity::UNTYPED, SymbolFidelity::ABSENT}, {"symbol9"}}, + }, .type_transitions = {{{TypeFidelity::FULLY_DEFINED, TypeFidelity::ABSENT}, {"struct s1", "union u1"}}, @@ -53,11 +61,10 @@ TEST_CASE("fidelity diff") { {"enum e2", "union u3"}}, {{TypeFidelity::ABSENT, TypeFidelity::FULLY_DEFINED}, {"struct s3"}}}, - .severity = FidelityDiffSeverity::WARN, }; std::ostringstream report; - reporting::FidelityDiff(diff, report); + CHECK(reporting::FidelityDiff(diff, report)); std::ifstream expected_report_file(filename_to_path("fidelity_diff_report")); std::ostringstream expected_report; diff --git a/stable_hash.cc b/stable_hash.cc index 88253f9..8621aed 100644 --- a/stable_hash.cc +++ b/stable_hash.cc @@ -68,12 +68,15 @@ HashValue StableHash::operator()(Id id) { return it->second; } -HashValue StableHash::operator()(const Void&) { - return hash_("void"); -} - -HashValue StableHash::operator()(const Variadic&) { - return hash_("variadic"); +HashValue StableHash::operator()(const Special& x) { + switch (x.kind) { + case Special::Kind::VOID: + return hash_("void"); + case Special::Kind::VARIADIC: + return hash_("variadic"); + case Special::Kind::NULLPTR: + return hash_("nullptr"); + } } HashValue StableHash::operator()(const PointerReference& x) { @@ -110,7 +113,7 @@ HashValue StableHash::operator()(const BaseClass& x) { } HashValue StableHash::operator()(const Method& x) { - return hash_(x.mangled_name, static_cast<uint32_t>(x.kind)); + return hash_(x.mangled_name); } HashValue StableHash::operator()(const Member& x) { diff --git a/stable_hash.h b/stable_hash.h index 4e76af0..1732610 100644 --- a/stable_hash.h +++ b/stable_hash.h @@ -35,8 +35,7 @@ class StableHash { explicit StableHash(const Graph& graph) : graph_(graph) {} HashValue operator()(Id); - HashValue operator()(const Void&); - HashValue operator()(const Variadic&); + HashValue operator()(const Special&); HashValue operator()(const PointerReference&); HashValue operator()(const PointerToMember&); HashValue operator()(const Typedef&); @@ -31,13 +31,13 @@ #include "deduplication.h" #include "error.h" +#include "filter.h" #include "fingerprint.h" #include "graph.h" #include "input.h" #include "metrics.h" #include "proto_writer.h" #include "reader_options.h" -#include "symbol_filter.h" #include "type_resolution.h" #include "unification.h" @@ -55,26 +55,33 @@ struct GetInterface { } }; -Id Merge(Graph& graph, const std::vector<Id>& roots) { +Id Merge(Graph& graph, const std::vector<Id>& roots, Metrics& metrics) { + // this rewrites the graph on destruction + Unification unification(graph, Id(0), metrics); + unification.Reserve(graph.Limit()); std::map<std::string, Id> symbols; + std::map<std::string, Id> types; GetInterface get; for (auto root : roots) { const auto& interface = graph.Apply<Interface&>(get, root); - // TODO: Implement merging interfaces with type roots. - if (!interface.types.empty()) { - Die() << "merging interfaces with type roots is not yet supported"; - } for (const auto& x : interface.symbols) { if (!symbols.insert(x).second) { Die() << "merge failed with duplicate symbol: " << x.first; } } + // TODO: test type roots merge + for (const auto& x : interface.types) { + const auto [it, inserted] = types.insert(x); + if (!inserted && !unification.Unify(x.second, it->second)) { + Die() << "merge failed with type conflict: " << x.first; + } + } graph.Remove(root); } - return graph.Add<Interface>(symbols); + return graph.Add<Interface>(symbols, types); } -void Filter(Graph& graph, Id root, const SymbolFilter& filter) { +void FilterSymbols(Graph& graph, Id root, const Filter& filter) { std::map<std::string, Id> symbols; GetInterface get; auto& interface = graph.Apply<Interface&>(get, root); @@ -109,7 +116,8 @@ int main(int argc, char* argv[]) { // Process arguments. bool opt_metrics = false; bool opt_keep_duplicates = false; - std::unique_ptr<stg::SymbolFilter> opt_symbols; + std::unique_ptr<stg::Filter> opt_file_filter; + std::unique_ptr<stg::Filter> opt_symbol_filter; stg::ReadOptions opt_read_options; stg::InputFormat opt_input_format = stg::InputFormat::ABI; std::vector<const char*> inputs; @@ -119,7 +127,9 @@ int main(int argc, char* argv[]) { {"info", no_argument, nullptr, 'i' }, {"keep-duplicates", no_argument, nullptr, 'd' }, {"types", no_argument, nullptr, 't' }, + {"file-filter", required_argument, nullptr, 'F' }, {"symbols", required_argument, nullptr, 'S' }, + {"symbol-filter", required_argument, nullptr, 'S' }, {"abi", no_argument, nullptr, 'a' }, {"btf", no_argument, nullptr, 'b' }, {"elf", no_argument, nullptr, 'e' }, @@ -134,17 +144,18 @@ int main(int argc, char* argv[]) { << " [-i|--info]\n" << " [-d|--keep-duplicates]\n" << " [-t|--types]\n" - << " [-S|--symbols <filter>]\n" + << " [-F|--file-filter <filter>]\n" + << " [-S|--symbols|--symbol-filter <filter>]\n" << " [--skip-dwarf]\n" << " [-a|--abi|-b|--btf|-e|--elf|-s|--stg] [file] ...\n" << " [{-o|--output} {filename|-}] ...\n" << "implicit defaults: --abi\n"; - stg::SymbolFilterUsage(std::cerr); + stg::FilterUsage(std::cerr); return 1; }; while (true) { int ix; - const int c = getopt_long(argc, argv, "-midtS:abeso:", opts, &ix); + const int c = getopt_long(argc, argv, "-midtS:F:abeso:", opts, &ix); if (c == -1) { break; } @@ -162,8 +173,11 @@ int main(int argc, char* argv[]) { case 't': opt_read_options.Set(stg::ReadOptions::TYPE_ROOTS); break; + case 'F': + opt_file_filter = stg::MakeFilter(argument); + break; case 'S': - opt_symbols = stg::MakeSymbolFilter(argument); + opt_symbol_filter = stg::MakeFilter(argument); break; case 'a': opt_input_format = stg::InputFormat::ABI; @@ -201,15 +215,18 @@ int main(int argc, char* argv[]) { roots.reserve(inputs.size()); for (auto input : inputs) { roots.push_back(stg::Read(graph, opt_input_format, input, - opt_read_options, metrics)); + opt_read_options, opt_file_filter, + metrics)); } - stg::Id root = roots.size() == 1 ? roots[0] : stg::Merge(graph, roots); - if (opt_symbols) { - stg::Filter(graph, root, *opt_symbols); + stg::Id root = + roots.size() == 1 ? roots[0] : stg::Merge(graph, roots, metrics); + if (opt_symbol_filter) { + stg::FilterSymbols(graph, root, *opt_symbol_filter); } if (!opt_keep_duplicates) { { - stg::Unification unification(graph, metrics); + stg::Unification unification(graph, stg::Id(0), metrics); + unification.Reserve(graph.Limit()); stg::ResolveTypes(graph, unification, {root}, metrics); unification.Update(root); } @@ -42,14 +42,28 @@ syntax = "proto3"; package stg.proto; +// deprecated message Void { fixed32 id = 1; } +// deprecated message Variadic { fixed32 id = 1; } +message Special { + enum Kind { + KIND_UNSPECIFIED = 0; + VOID = 1; + VARIADIC = 2; + NULLPTR = 3; + } + + fixed32 id = 1; + Kind kind = 2; +} + message PointerReference { enum Kind { KIND_UNSPECIFIED = 0; @@ -129,19 +143,11 @@ message BaseClass { } message Method { - enum Kind { - KIND_UNSPECIFIED = 0; - NON_VIRTUAL = 1; - STATIC = 2; - VIRTUAL = 3; - } - fixed32 id = 1; string mangled_name = 2; string name = 3; - Kind kind = 4; - optional uint64 vtable_offset = 5; - fixed32 type_id = 6; + uint64 vtable_offset = 4; + fixed32 type_id = 5; } message Member { @@ -252,19 +258,20 @@ message STG { fixed32 root_id = 2; repeated Void void = 3; repeated Variadic variadic = 4; - repeated PointerReference pointer_reference = 5; - repeated PointerToMember pointer_to_member = 6; - repeated Typedef typedef = 7; - repeated Qualified qualified = 8; - repeated Primitive primitive = 9; - repeated Array array = 10; - repeated BaseClass base_class = 11; - repeated Method method = 12; - repeated Member member = 13; - repeated StructUnion struct_union = 14; - repeated Enumeration enumeration = 15; - repeated Function function = 16; - repeated ElfSymbol elf_symbol = 17; - repeated Symbols symbols = 18; - repeated Interface interface = 19; + repeated Special special = 5; + repeated PointerReference pointer_reference = 6; + repeated PointerToMember pointer_to_member = 7; + repeated Typedef typedef = 8; + repeated Qualified qualified = 9; + repeated Primitive primitive = 10; + repeated Array array = 11; + repeated BaseClass base_class = 12; + repeated Method method = 13; + repeated Member member = 14; + repeated StructUnion struct_union = 15; + repeated Enumeration enumeration = 16; + repeated Function function = 17; + repeated ElfSymbol elf_symbol = 18; + repeated Symbols symbols = 19; + repeated Interface interface = 20; } @@ -54,7 +54,8 @@ std::vector<stg::Id> Read(const Inputs& inputs, stg::Graph& graph, stg::ReadOptions options, stg::Metrics& metrics) { std::vector<stg::Id> roots; for (const auto& [format, filename] : inputs) { - roots.push_back(stg::Read(graph, format, filename, options, metrics)); + roots.push_back(stg::Read(graph, format, filename, options, nullptr, + metrics)); } return roots; } @@ -62,15 +63,15 @@ std::vector<stg::Id> Read(const Inputs& inputs, stg::Graph& graph, int RunFidelity(const char* filename, const stg::Graph& graph, const std::vector<stg::Id>& roots) { std::ofstream output(filename); - auto fidelity_diff = stg::GetFidelityTransitions(graph, roots[0], roots[1]); - stg::reporting::FidelityDiff(fidelity_diff, output); + const auto fidelity_diff = + stg::GetFidelityTransitions(graph, roots[0], roots[1]); + const bool diffs_reported = + stg::reporting::FidelityDiff(fidelity_diff, output); output << std::flush; if (!output) { stg::Die() << "error writing to " << '\'' << filename << '\''; } - return fidelity_diff.severity == stg::FidelityDiffSeverity::WARN - ? kFidelityChange - : 0; + return diffs_reported ? kFidelityChange : 0; } int RunExact(const Inputs& inputs, stg::ReadOptions options, diff --git a/stgdiff_test.cc b/stgdiff_test.cc index 7ae2da2..72c63d8 100644 --- a/stgdiff_test.cc +++ b/stgdiff_test.cc @@ -25,6 +25,7 @@ #include <string> #include <catch2/catch.hpp> +#include "comparison.h" #include "graph.h" #include "input.h" #include "metrics.h" @@ -52,7 +53,7 @@ stg::Id Read(stg::Graph& graph, stg::InputFormat format, const std::string& input, stg::Metrics& metrics) { const stg::ReadOptions opt_read_options(stg::ReadOptions::SKIP_DWARF); return stg::Read(graph, format, filename_to_path(input).c_str(), - opt_read_options, metrics); + opt_read_options, nullptr, metrics); } TEST_CASE("ignore") { @@ -166,22 +167,31 @@ TEST_CASE("ignore") { "empty", true}), IgnoreTestCase( - {"interface addition", + {"CRC change", stg::InputFormat::STG, - "interface_addition_0.stg", + "crc_change_0.stg", stg::InputFormat::STG, - "interface_addition_1.stg", + "crc_change_1.stg", stg::Ignore(), - "interface_addition_small_diff", + "crc_change_small_diff", false}), IgnoreTestCase( - {"type addition", + {"CRC change ignored", stg::InputFormat::STG, - "type_addition_0.stg", + "crc_change_0.stg", stg::InputFormat::STG, - "type_addition_1.stg", + "crc_change_1.stg", + stg::Ignore(stg::Ignore::SYMBOL_CRC), + "empty", + true}), + IgnoreTestCase( + {"interface addition", + stg::InputFormat::STG, + "interface_addition_0.stg", + stg::InputFormat::STG, + "interface_addition_1.stg", stg::Ignore(), - "type_addition_small_diff", + "interface_addition_small_diff", false}), IgnoreTestCase( {"interface addition ignored", @@ -193,6 +203,15 @@ TEST_CASE("ignore") { "empty", true}), IgnoreTestCase( + {"type addition", + stg::InputFormat::STG, + "type_addition_0.stg", + stg::InputFormat::STG, + "type_addition_1.stg", + stg::Ignore(), + "type_addition_small_diff", + false}), + IgnoreTestCase( {"type addition ignored", stg::InputFormat::STG, "type_addition_0.stg", @@ -202,21 +221,21 @@ TEST_CASE("ignore") { "empty", true}), IgnoreTestCase( - {"CRC change", + {"type definition addition", stg::InputFormat::STG, - "crc_change_0.stg", + "type_addition_1.stg", stg::InputFormat::STG, - "crc_change_1.stg", + "type_addition_2.stg", stg::Ignore(), - "crc_change_small_diff", + "type_definition_addition_small_diff", false}), IgnoreTestCase( - {"CRC change ignored", + {"type definition addition ignored", stg::InputFormat::STG, - "crc_change_0.stg", + "type_addition_1.stg", stg::InputFormat::STG, - "crc_change_1.stg", - stg::Ignore(stg::Ignore::SYMBOL_CRC), + "type_addition_2.stg", + stg::Ignore(stg::Ignore::TYPE_DEFINITION_ADDITION), "empty", true}) ); @@ -83,7 +83,8 @@ int main(int argc, char* const argv[]) { try { stg::Graph graph; stg::Metrics metrics; - (void)stg::Read(graph, format, filename, opt_read_options, metrics); + (void)stg::Read(graph, format, filename, opt_read_options, nullptr, + metrics); } catch (const stg::Exception& e) { std::cerr << e.what() << '\n'; return 1; diff --git a/substitution.h b/substitution.h index ebebd4c..5d3ba55 100644 --- a/substitution.h +++ b/substitution.h @@ -62,9 +62,7 @@ struct Substitute { return graph.Apply<void>(*this, id); } - void operator()(Void&) {} - - void operator()(Variadic&) {} + void operator()(Special&) {} void operator()(PointerReference& x) { Update(x.pointee_type_id); diff --git a/testdata/abigail_bad_elf_dwarf_link_0.xml b/testdata/abigail_bad_elf_dwarf_link_0.xml new file mode 100644 index 0000000..3b0cb31 --- /dev/null +++ b/testdata/abigail_bad_elf_dwarf_link_0.xml @@ -0,0 +1,26 @@ +<!-- XML without the zero size object aliasing issue --> +<abi-corpus version='2.1' path='vmlinux' architecture='elf-arm-aarch64'> + <elf-variable-symbols> + <elf-symbol name='vm_node_stat' size='256' type='object-type' binding='global-binding' visibility='default-visibility' is-defined='yes' crc='0xe5c4cf93'/> + </elf-variable-symbols> + <abi-instr address-size='64' path='mm/vmstat.c' language='LANG_C89'> + <type-decl name='long long int' size-in-bits='64' id='1eb56b1e'/> + <typedef-decl name='__s64' type-id='1eb56b1e' filepath='include/uapi/asm-generic/int-ll64.h' line='30' column='1' id='49659421'/> + <typedef-decl name='s64' type-id='49659421' filepath='include/asm-generic/int-ll64.h' line='22' column='1' id='9b7c55ef'/> + <class-decl name='atomic64_t' size-in-bits='64' is-struct='yes' naming-typedef-id='28ee064c' visibility='default' filepath='include/linux/types.h' line='176' column='1' id='2fef71f1'> + <data-member access='public' layout-offset-in-bits='0'> + <var-decl name='counter' type-id='9b7c55ef' visibility='default' filepath='include/linux/types.h' line='177' column='1'/> + </data-member> + </class-decl> + <typedef-decl name='atomic64_t' type-id='2fef71f1' filepath='include/linux/types.h' line='178' column='1' id='28ee064c'/> + <typedef-decl name='atomic_long_t' type-id='28ee064c' filepath='include/asm-generic/atomic-long.h' line='12' column='1' id='f22a8abb'/> + <array-type-def dimensions='1' type-id='f22a8abb' size-in-bits='infinite' id='a922812c'> + <subrange length='infinite' type-id='7ff19f0f' id='031f2035'/> + </array-type-def> + <array-type-def dimensions='1' type-id='f22a8abb' size-in-bits='2048' id='99d12d7b'> + <subrange length='32' type-id='7ff19f0f' id='ae5bde82'/> + </array-type-def> + <var-decl name='vm_numa_stat' type-id='a922812c' mangled-name='vm_numa_stat' visibility='default' filepath='mm/vmstat.c' line='164' column='1' elf-symbol-id='vm_numa_stat'/> + <var-decl name='vm_node_stat' type-id='99d12d7b' mangled-name='vm_node_stat' visibility='default' filepath='mm/vmstat.c' line='165' column='1' elf-symbol-id='vm_node_stat'/> + </abi-instr> +</abi-corpus> diff --git a/testdata/abigail_bad_elf_dwarf_link_1.xml b/testdata/abigail_bad_elf_dwarf_link_1.xml new file mode 100644 index 0000000..b8ab134 --- /dev/null +++ b/testdata/abigail_bad_elf_dwarf_link_1.xml @@ -0,0 +1,26 @@ +<!-- XML with the zero size object aliasing issue, mangled-name matches name --> +<abi-corpus version='2.1' path='vmlinux' architecture='elf-arm-aarch64'> + <elf-variable-symbols> + <elf-symbol name='vm_node_stat' size='256' type='object-type' binding='global-binding' visibility='default-visibility' is-defined='yes' crc='0xe5c4cf93'/> + </elf-variable-symbols> + <abi-instr address-size='64' path='mm/vmstat.c' language='LANG_C89'> + <type-decl name='long long int' size-in-bits='64' id='1eb56b1e'/> + <typedef-decl name='__s64' type-id='1eb56b1e' filepath='include/uapi/asm-generic/int-ll64.h' line='30' column='1' id='49659421'/> + <typedef-decl name='s64' type-id='49659421' filepath='include/asm-generic/int-ll64.h' line='22' column='1' id='9b7c55ef'/> + <class-decl name='atomic64_t' size-in-bits='64' is-struct='yes' naming-typedef-id='28ee064c' visibility='default' filepath='include/linux/types.h' line='176' column='1' id='2fef71f1'> + <data-member access='public' layout-offset-in-bits='0'> + <var-decl name='counter' type-id='9b7c55ef' visibility='default' filepath='include/linux/types.h' line='177' column='1'/> + </data-member> + </class-decl> + <typedef-decl name='atomic64_t' type-id='2fef71f1' filepath='include/linux/types.h' line='178' column='1' id='28ee064c'/> + <typedef-decl name='atomic_long_t' type-id='28ee064c' filepath='include/asm-generic/atomic-long.h' line='12' column='1' id='f22a8abb'/> + <array-type-def dimensions='1' type-id='f22a8abb' size-in-bits='infinite' id='a922812c'> + <subrange length='infinite' type-id='7ff19f0f' id='031f2035'/> + </array-type-def> + <array-type-def dimensions='1' type-id='f22a8abb' size-in-bits='2048' id='99d12d7b'> + <subrange length='32' type-id='7ff19f0f' id='ae5bde82'/> + </array-type-def> + <var-decl name='vm_numa_stat' type-id='a922812c' mangled-name='vm_numa_stat' visibility='default' filepath='mm/vmstat.c' line='164' column='1' elf-symbol-id='vm_node_stat'/> + <var-decl name='vm_node_stat' type-id='99d12d7b' mangled-name='vm_node_stat' visibility='default' filepath='mm/vmstat.c' line='165' column='1' elf-symbol-id='vm_node_stat'/> + </abi-instr> +</abi-corpus> diff --git a/testdata/abigail_bad_elf_dwarf_link_2.xml b/testdata/abigail_bad_elf_dwarf_link_2.xml new file mode 100644 index 0000000..b3a8c81 --- /dev/null +++ b/testdata/abigail_bad_elf_dwarf_link_2.xml @@ -0,0 +1,26 @@ +<!-- XML with the zero size object aliasing issue, mangled-name matches elf-symbol-id --> +<abi-corpus version='2.1' path='vmlinux' architecture='elf-arm-aarch64'> + <elf-variable-symbols> + <elf-symbol name='vm_node_stat' size='256' type='object-type' binding='global-binding' visibility='default-visibility' is-defined='yes' crc='0xe5c4cf93'/> + </elf-variable-symbols> + <abi-instr address-size='64' path='mm/vmstat.c' language='LANG_C89'> + <type-decl name='long long int' size-in-bits='64' id='1eb56b1e'/> + <typedef-decl name='__s64' type-id='1eb56b1e' filepath='include/uapi/asm-generic/int-ll64.h' line='30' column='1' id='49659421'/> + <typedef-decl name='s64' type-id='49659421' filepath='include/asm-generic/int-ll64.h' line='22' column='1' id='9b7c55ef'/> + <class-decl name='atomic64_t' size-in-bits='64' is-struct='yes' naming-typedef-id='28ee064c' visibility='default' filepath='include/linux/types.h' line='176' column='1' id='2fef71f1'> + <data-member access='public' layout-offset-in-bits='0'> + <var-decl name='counter' type-id='9b7c55ef' visibility='default' filepath='include/linux/types.h' line='177' column='1'/> + </data-member> + </class-decl> + <typedef-decl name='atomic64_t' type-id='2fef71f1' filepath='include/linux/types.h' line='178' column='1' id='28ee064c'/> + <typedef-decl name='atomic_long_t' type-id='28ee064c' filepath='include/asm-generic/atomic-long.h' line='12' column='1' id='f22a8abb'/> + <array-type-def dimensions='1' type-id='f22a8abb' size-in-bits='infinite' id='a922812c'> + <subrange length='infinite' type-id='7ff19f0f' id='031f2035'/> + </array-type-def> + <array-type-def dimensions='1' type-id='f22a8abb' size-in-bits='2048' id='99d12d7b'> + <subrange length='32' type-id='7ff19f0f' id='ae5bde82'/> + </array-type-def> + <var-decl name='vm_numa_stat' type-id='a922812c' mangled-name='vm_node_stat' visibility='default' filepath='mm/vmstat.c' line='164' column='1' elf-symbol-id='vm_node_stat'/> + <var-decl name='vm_node_stat' type-id='99d12d7b' mangled-name='vm_node_stat' visibility='default' filepath='mm/vmstat.c' line='165' column='1' elf-symbol-id='vm_node_stat'/> + </abi-instr> +</abi-corpus> diff --git a/testdata/abigail_duplicate_data_members_1.xml b/testdata/abigail_duplicate_data_members_1.xml index 9c287bc..e750ecb 100644 --- a/testdata/abigail_duplicate_data_members_1.xml +++ b/testdata/abigail_duplicate_data_members_1.xml @@ -15,6 +15,9 @@ <data-member access='public' layout-offset-in-bits='32'> <var-decl name='' type-id='e7f43f72' visibility='default'/> </data-member> + <data-member access='public' layout-offset-in-bits='32'> + <var-decl name='' type-id='e7f43f72' visibility='default'/> + </data-member> </class-decl> <class-decl name='__anonymous_struct__' size-in-bits='32' is-struct='yes' is-anonymous='yes' visibility='default' id='e7f43f72'> <data-member access='public' layout-offset-in-bits='0'> diff --git a/testdata/crc_change_0.stg b/testdata/crc_change_0.stg index 80057d5..d817a53 100644 --- a/testdata/crc_change_0.stg +++ b/testdata/crc_change_0.stg @@ -1,4 +1,4 @@ -version: 0x00000001 +version: 0x00000002 root_id: 0x84ea5130 primitive { id: 0x6720d32f diff --git a/testdata/crc_change_1.stg b/testdata/crc_change_1.stg index fc73858..d4cc7b4 100644 --- a/testdata/crc_change_1.stg +++ b/testdata/crc_change_1.stg @@ -1,4 +1,4 @@ -version: 0x00000001 +version: 0x00000002 root_id: 0x84ea5130 primitive { id: 0x6720d32f diff --git a/testdata/enum_underlying_type_0.stg b/testdata/enum_underlying_type_0.stg index a86a6bc..e66f6e6 100644 --- a/testdata/enum_underlying_type_0.stg +++ b/testdata/enum_underlying_type_0.stg @@ -1,6 +1,7 @@ -root_id: 0x25a8b173 +version: 0x00000002 +root_id: 0x84ea5130 primitive { - id: 0x4585663f + id: 0xa08f335b name: "bob" encoding: UNSIGNED_INTEGER bytesize: 0x00000004 @@ -8,7 +9,7 @@ primitive { enumeration { id: 0x64775884 definition { - underlying_type_id: 0x4585663f + underlying_type_id: 0xa08f335b enumerator { name: "a" value: 1 @@ -23,10 +24,7 @@ elf_symbol { type_id: 0x64775884 full_name: "x" } -symbols { - id: 0x25a8b173 - symbol { - key: "x" - value: 0x7709bd40 - } +interface { + id: 0x84ea5130 + symbol_id: 0x7709bd40 } diff --git a/testdata/enum_underlying_type_1.stg b/testdata/enum_underlying_type_1.stg index e3c444b..e60714d 100644 --- a/testdata/enum_underlying_type_1.stg +++ b/testdata/enum_underlying_type_1.stg @@ -1,6 +1,7 @@ -root_id: 0x25a8b173 +version: 0x00000002 +root_id: 0x84ea5130 primitive { - id: 0x4585663f + id: 0xba1f26f4 name: "alice" encoding: SIGNED_INTEGER bytesize: 0x00000002 @@ -8,7 +9,7 @@ primitive { enumeration { id: 0x64775884 definition { - underlying_type_id: 0x4585663f + underlying_type_id: 0xba1f26f4 enumerator { name: "a" value: 1 @@ -23,10 +24,7 @@ elf_symbol { type_id: 0x64775884 full_name: "x" } -symbols { - id: 0x25a8b173 - symbol { - key: "x" - value: 0x7709bd40 - } +interface { + id: 0x84ea5130 + symbol_id: 0x7709bd40 } diff --git a/testdata/fidelity_diff_0.stg b/testdata/fidelity_diff_0.stg index 061f4df..0228073 100644 --- a/testdata/fidelity_diff_0.stg +++ b/testdata/fidelity_diff_0.stg @@ -1,5 +1,7 @@ +version: 0x00000002 +root_id: 0x84ea5130 struct_union { - id: 11 + id: 0x4fb72e88 kind: STRUCT name: "s1" definition { @@ -7,12 +9,12 @@ struct_union { } } struct_union { - id: 12 + id: 0x66321df5 kind: STRUCT name: "s2" } struct_union { - id: 14 + id: 0x23ff4eea kind: UNION name: "u1" definition { @@ -20,81 +22,69 @@ struct_union { } } struct_union { - id: 15 + id: 0xbb5afcc9 kind: UNION name: "u3" } enumeration { - id: 16 + id: 0xb5790be1 name: "e1" definition { + underlying_type_id: 0x84ea5130 } } enumeration { - id: 17 + id: 0x39cb53cc name: "e2" } elf_symbol { - id: 1 + id: 0x99f1fb6e name: "symbol1" symbol_type: OBJECT - binding: GLOBAL - visibility: DEFAULT - type_id: 11 + type_id: 0x4fb72e88 } elf_symbol { - id: 2 + id: 0xd9b0a065 name: "symbol2" symbol_type: OBJECT - binding: GLOBAL - visibility: DEFAULT - type_id: 12 + type_id: 0x66321df5 } elf_symbol { - id: 3 + id: 0x1b7105d1 name: "symbol3" symbol_type: OBJECT - binding: GLOBAL - visibility: DEFAULT } elf_symbol { - id: 4 + id: 0x5733f9fa name: "symbol4" symbol_type: OBJECT - binding: GLOBAL - visibility: DEFAULT - type_id: 14 + type_id: 0x23ff4eea } elf_symbol { - id: 5 + id: 0x96f0448d name: "symbol5" symbol_type: OBJECT - binding: GLOBAL - visibility: DEFAULT - type_id: 15 + type_id: 0xbb5afcc9 } elf_symbol { - id: 6 + id: 0xd4b3326a name: "symbol6" symbol_type: OBJECT - binding: GLOBAL - visibility: DEFAULT - type_id: 16 + type_id: 0xb5790be1 } elf_symbol { - id: 7 + id: 0x14739ab8 name: "symbol7" symbol_type: OBJECT - binding: GLOBAL - visibility: DEFAULT - type_id: 17 + type_id: 0x39cb53cc } -symbols { - symbol {key: "symbol1" value: 1} - symbol {key: "symbol2" value: 2} - symbol {key: "symbol3" value: 3} - symbol {key: "symbol4" value: 4} - symbol {key: "symbol5" value: 5} - symbol {key: "symbol6" value: 6} - symbol {key: "symbol7" value: 7} +interface { + id: 0x84ea5130 + symbol_id: 0x99f1fb6e + symbol_id: 0xd9b0a065 + symbol_id: 0x1b7105d1 + symbol_id: 0x5733f9fa + symbol_id: 0x96f0448d + symbol_id: 0xd4b3326a + symbol_id: 0x14739ab8 } diff --git a/testdata/fidelity_diff_1.stg b/testdata/fidelity_diff_1.stg index c9b9a75..0d74c98 100644 --- a/testdata/fidelity_diff_1.stg +++ b/testdata/fidelity_diff_1.stg @@ -1,5 +1,7 @@ +version: 0x00000002 +root_id: 0x84ea5130 struct_union { - id: 13 + id: 0x82c2ca27 kind: STRUCT name: "s3" definition { @@ -7,12 +9,12 @@ struct_union { } } struct_union { - id: 14 + id: 0x7a129a94 kind: UNION name: "u2" } struct_union { - id: 15 + id: 0x2099f798 kind: UNION name: "u3" definition { @@ -20,75 +22,63 @@ struct_union { } } enumeration { - id: 16 + id: 0xf80afce3 + name: "e1" +} +enumeration { + id: 0x75396efe name: "e2" definition { + underlying_type_id: 0x84ea5130 } } -enumeration { - id: 17 - name: "e1" -} elf_symbol { - id: 1 + id: 0x99f1fb6e name: "symbol1" symbol_type: OBJECT - binding: GLOBAL - visibility: DEFAULT } elf_symbol { - id: 2 + id: 0xd9b0a065 name: "symbol2" symbol_type: OBJECT - binding: GLOBAL - visibility: DEFAULT } elf_symbol { - id: 3 + id: 0x1b7105d1 name: "symbol3" symbol_type: OBJECT - binding: GLOBAL - visibility: DEFAULT - type_id: 13 + type_id: 0x82c2ca27 } elf_symbol { - id: 4 + id: 0x5733f9fa name: "symbol4" symbol_type: OBJECT - binding: GLOBAL - visibility: DEFAULT - type_id: 14 + type_id: 0x7a129a94 } elf_symbol { - id: 5 + id: 0x96f0448d name: "symbol5" symbol_type: OBJECT - binding: GLOBAL - visibility: DEFAULT - type_id: 15 + type_id: 0x2099f798 } elf_symbol { - id: 6 + id: 0xd4b3326a name: "symbol6" symbol_type: OBJECT - binding: GLOBAL - visibility: DEFAULT - type_id: 16 + type_id: 0x75396efe } elf_symbol { - id: 7 + id: 0x14739ab8 name: "symbol7" symbol_type: OBJECT - binding: GLOBAL - visibility: DEFAULT - type_id: 17 + type_id: 0xf80afce3 } -symbols { - symbol {key: "symbol1" value: 1} - symbol {key: "symbol2" value: 2} - symbol {key: "symbol3" value: 3} - symbol {key: "symbol4" value: 4} - symbol {key: "symbol5" value: 5} - symbol {key: "symbol6" value: 6} - symbol {key: "symbol7" value: 7} +interface { + id: 0x84ea5130 + symbol_id: 0x99f1fb6e + symbol_id: 0xd9b0a065 + symbol_id: 0x1b7105d1 + symbol_id: 0x5733f9fa + symbol_id: 0x96f0448d + symbol_id: 0xd4b3326a + symbol_id: 0x14739ab8 } diff --git a/testdata/fidelity_diff_report b/testdata/fidelity_diff_report index 8a9d888..b21f3e8 100644 --- a/testdata/fidelity_diff_report +++ b/testdata/fidelity_diff_report @@ -1,5 +1,3 @@ -SEVERITY: WARN - 2 type(s) changed from FULLY_DEFINED to ABSENT: struct s1 union u1 diff --git a/testdata/interface_addition_0.stg b/testdata/interface_addition_0.stg index 2071972..e91b30c 100644 --- a/testdata/interface_addition_0.stg +++ b/testdata/interface_addition_0.stg @@ -1,4 +1,4 @@ -version: 0x00000001 +version: 0x00000002 root_id: 0x84ea5130 interface { id: 0x84ea5130 diff --git a/testdata/interface_addition_1.stg b/testdata/interface_addition_1.stg index 1ad5ece..dc9363f 100644 --- a/testdata/interface_addition_1.stg +++ b/testdata/interface_addition_1.stg @@ -1,4 +1,4 @@ -version: 0x00000001 +version: 0x00000002 root_id: 0x84ea5130 primitive { id: 0x6720d32f diff --git a/testdata/member_size_0.stg b/testdata/member_size_0.stg index 9b90b2e..838e97b 100644 --- a/testdata/member_size_0.stg +++ b/testdata/member_size_0.stg @@ -1,4 +1,5 @@ -root_id: 0x25a8b173 +version: 0x00000002 +root_id: 0x84ea5130 primitive { id: 0x6720d32f name: "int" @@ -27,10 +28,7 @@ elf_symbol { type_id: 0x0eecb660 full_name: "x" } -symbols { - id: 0x25a8b173 - symbol { - key: "x" - value: 0x7709bd40 - } +interface { + id: 0x84ea5130 + symbol_id: 0x7709bd40 } diff --git a/testdata/member_size_1.stg b/testdata/member_size_1.stg index d885a46..95bd0b7 100644 --- a/testdata/member_size_1.stg +++ b/testdata/member_size_1.stg @@ -1,4 +1,5 @@ -root_id: 0x25a8b173 +version: 0x00000002 +root_id: 0x84ea5130 primitive { id: 0x6720d32f name: "int" @@ -6,17 +7,17 @@ primitive { bytesize: 0x00000004 } member { - id: 0x5a03caec + id: 0xf4e7319a name: "member" type_id: 0x6720d32f bitsize: 2 } struct_union { - id: 0x0eecb660 + id: 0x255588bd kind: STRUCT definition { bytesize: 4 - member_id: 0x5a03caec + member_id: 0xf4e7319a } } elf_symbol { @@ -24,13 +25,10 @@ elf_symbol { name: "x" is_defined: true symbol_type: OBJECT - type_id: 0x0eecb660 + type_id: 0x255588bd full_name: "x" } -symbols { - id: 0x25a8b173 - symbol { - key: "x" - value: 0x7709bd40 - } +interface { + id: 0x84ea5130 + symbol_id: 0x7709bd40 } diff --git a/testdata/primitive_type_encoding_0.stg b/testdata/primitive_type_encoding_0.stg index 0f53e1c..55403a8 100644 --- a/testdata/primitive_type_encoding_0.stg +++ b/testdata/primitive_type_encoding_0.stg @@ -1,6 +1,7 @@ -root_id: 0x25a8b173 +version: 0x00000002 +root_id: 0x84ea5130 primitive { - id: 0x6720d32f + id: 0xa08f335b name: "bob" encoding: SIGNED_INTEGER bytesize: 0x00000004 @@ -10,13 +11,10 @@ elf_symbol { name: "x" is_defined: true symbol_type: OBJECT - type_id: 0x6720d32f + type_id: 0xa08f335b full_name: "x" } -symbols { - id: 0x25a8b173 - symbol { - key: "x" - value: 0x7709bd40 - } +interface { + id: 0x84ea5130 + symbol_id: 0x7709bd40 } diff --git a/testdata/primitive_type_encoding_1.stg b/testdata/primitive_type_encoding_1.stg index be543a9..365c3a1 100644 --- a/testdata/primitive_type_encoding_1.stg +++ b/testdata/primitive_type_encoding_1.stg @@ -1,6 +1,7 @@ -root_id: 0x25a8b173 +version: 0x00000002 +root_id: 0x84ea5130 primitive { - id: 0x6720d32f + id: 0xa08f335b name: "bob" encoding: UNSIGNED_INTEGER bytesize: 0x00000004 @@ -10,13 +11,10 @@ elf_symbol { name: "x" is_defined: true symbol_type: OBJECT - type_id: 0x6720d32f + type_id: 0xa08f335b full_name: "x" } -symbols { - id: 0x25a8b173 - symbol { - key: "x" - value: 0x7709bd40 - } +interface { + id: 0x84ea5130 + symbol_id: 0x7709bd40 } diff --git a/testdata/qualifier_0.stg b/testdata/qualifier_0.stg index 0e9b75c..05328ef 100644 --- a/testdata/qualifier_0.stg +++ b/testdata/qualifier_0.stg @@ -1,4 +1,5 @@ -root_id: 0x25a8b173 +version: 0x00000002 +root_id: 0x84ea5130 qualified { id: 0xc5d9d969 qualifier: CONST @@ -18,10 +19,7 @@ elf_symbol { type_id: 0xc5d9d969 full_name: "x" } -symbols { - id: 0x25a8b173 - symbol { - key: "x" - value: 0x7709bd40 - } +interface { + id: 0x84ea5130 + symbol_id: 0x7709bd40 } diff --git a/testdata/qualifier_1.stg b/testdata/qualifier_1.stg index ea808b4..cf7c30f 100644 --- a/testdata/qualifier_1.stg +++ b/testdata/qualifier_1.stg @@ -1,6 +1,7 @@ -root_id: 0x25a8b173 +version: 0x00000002 +root_id: 0x84ea5130 qualified { - id: 0xc5d9d969 + id: 0x8dde4646 qualifier: VOLATILE qualified_type_id: 0x6720d32f } @@ -15,13 +16,10 @@ elf_symbol { name: "x" is_defined: true symbol_type: OBJECT - type_id: 0xc5d9d969 + type_id: 0x8dde4646 full_name: "x" } -symbols { - id: 0x25a8b173 - symbol { - key: "x" - value: 0x7709bd40 - } +interface { + id: 0x84ea5130 + symbol_id: 0x7709bd40 } diff --git a/testdata/symbol_list b/testdata/symbol_list index 70343e9..6d062d2 100644 --- a/testdata/symbol_list +++ b/testdata/symbol_list @@ -9,7 +9,7 @@ one # next is with leading and trailing spaces two # indented comment -[not_a_list] +[other_section] bad [symbol_list) bad diff --git a/testdata/type_addition_0.stg b/testdata/type_addition_0.stg index 2071972..e91b30c 100644 --- a/testdata/type_addition_0.stg +++ b/testdata/type_addition_0.stg @@ -1,4 +1,4 @@ -version: 0x00000001 +version: 0x00000002 root_id: 0x84ea5130 interface { id: 0x84ea5130 diff --git a/testdata/type_addition_1.stg b/testdata/type_addition_1.stg index e449dae..961f68f 100644 --- a/testdata/type_addition_1.stg +++ b/testdata/type_addition_1.stg @@ -1,11 +1,11 @@ -version: 0x00000001 +version: 0x00000002 root_id: 0x84ea5130 struct_union { - id: 0xa8a0bc6a + id: 0xb1a9a2d5 kind: UNION name: "U" } interface { id: 0x84ea5130 - type_id: 0xa8a0bc6a + type_id: 0xb1a9a2d5 } diff --git a/testdata/type_addition_2.stg b/testdata/type_addition_2.stg new file mode 100644 index 0000000..21c3cd3 --- /dev/null +++ b/testdata/type_addition_2.stg @@ -0,0 +1,13 @@ +version: 0x00000002 +root_id: 0x84ea5130 +struct_union { + id: 0xfa4f7de5 + kind: UNION + name: "U" + definition { + } +} +interface { + id: 0x84ea5130 + type_id: 0xfa4f7de5 +} diff --git a/testdata/type_definition_addition_small_diff b/testdata/type_definition_addition_small_diff new file mode 100644 index 0000000..936317c --- /dev/null +++ b/testdata/type_definition_addition_small_diff @@ -0,0 +1,3 @@ +type 'union U' changed + was only declared, is now fully defined + diff --git a/type_normalisation.cc b/type_normalisation.cc index 72cc94b..50a59b1 100644 --- a/type_normalisation.cc +++ b/type_normalisation.cc @@ -88,9 +88,7 @@ struct FindQualifiedTypesAndFunctions { } } - void operator()(const Void&, Id) {} - - void operator()(const Variadic&, Id) {} + void operator()(const Special&, Id) {} void operator()(const PointerReference& x, Id) { (*this)(x.pointee_type_id); diff --git a/type_resolution.cc b/type_resolution.cc index bcf5f49..b32d535 100644 --- a/type_resolution.cc +++ b/type_resolution.cc @@ -37,11 +37,13 @@ namespace { struct NamedTypes { NamedTypes(const Graph& graph, Metrics& metrics) : graph(graph), - seen(graph.Limit()), + seen(Id(0)), nodes(metrics, "named_types.nodes"), types(metrics, "named_types.types"), definitions(metrics, "named_types.definitions"), - declarations(metrics, "named_types.declarations") {} + declarations(metrics, "named_types.declarations") { + seen.Reserve(graph.Limit()); + } enum class Tag { STRUCT, UNION, ENUM, TYPEDEF }; using Type = std::pair<Tag, std::string>; @@ -79,9 +81,7 @@ struct NamedTypes { } // Graph function implementation - void operator()(const Void&, Id) {} - - void operator()(const Variadic&, Id) {} + void operator()(const Special&, Id) {} void operator()(const PointerReference& x, Id) { (*this)(x.pointee_type_id); diff --git a/unification.cc b/unification.cc index 4421267..b48c2ee 100644 --- a/unification.cc +++ b/unification.cc @@ -104,12 +104,9 @@ struct Unifier { return result && it1 == end1 && it2 == end2; } - Winner operator()(const Void&, const Void&) { - return Right; - } - - Winner operator()(const Variadic&, const Variadic&) { - return Right; + Winner operator()(const Special& x1, const Special& x2) { + return x1.kind == x2.kind + ? Right : Neither; } Winner operator()(const PointerReference& x1, @@ -160,7 +157,6 @@ struct Unifier { Winner operator()(const Method& x1, const Method& x2) { return x1.mangled_name == x2.mangled_name && x1.name == x2.name - && x1.kind == x2.kind && x1.vtable_offset == x2.vtable_offset && (*this)(x1.type_id, x2.type_id) ? Right : Neither; diff --git a/unification.h b/unification.h index 2e20241..3f6ee9a 100644 --- a/unification.h +++ b/unification.h @@ -34,9 +34,10 @@ namespace stg { // destruction. class Unification { public: - Unification(Graph& graph, Metrics& metrics) + Unification(Graph& graph, Id start, Metrics& metrics) : graph_(graph), - mapping_(graph.Limit()), + start_(start), + mapping_(start), metrics_(metrics), find_query_(metrics, "unification.find_query"), find_halved_(metrics, "unification.find_halved"), @@ -56,7 +57,7 @@ class Unification { Update(id); }; ::stg::Substitute substitute(graph_, remap); - graph_.ForEach([&](Id id) { + graph_.ForEach(start_, graph_.Limit(), [&](Id id) { if (Find(id) != id) { graph_.Remove(id); ++removed; @@ -67,6 +68,10 @@ class Unification { }); } + void Reserve(Id limit) { + mapping_.Reserve(limit); + } + bool Unify(Id id1, Id id2); Id Find(Id id) { @@ -111,6 +116,7 @@ class Unification { private: Graph& graph_; + Id start_; DenseIdMapping mapping_; Metrics& metrics_; Counter find_query_; |