diff options
author | Shawn Willden <swillden@google.com> | 2020-12-15 00:02:24 +0000 |
---|---|---|
committer | Automerger Merge Worker <android-build-automerger-merge-worker@system.gserviceaccount.com> | 2020-12-15 00:02:24 +0000 |
commit | 2c21661224d5643de379b5fc243caf22ded8aa84 (patch) | |
tree | 29614bcd70bb06afbe966510ddb8049a165d4024 | |
parent | e86a87a22a056f01d0255c6f30f2c05463f78e39 (diff) | |
parent | cb173a1f06c9c37bda2e58183b6513cb405b4410 (diff) | |
download | libcppbor-2c21661224d5643de379b5fc243caf22ded8aa84.tar.gz |
Improve Map canonicalization and add Map iterators. am: 03990c2489 am: be17dfffad am: cb173a1f06
Original change: https://android-review.googlesource.com/c/platform/external/libcppbor/+/1515383
MUST ONLY BE SUBMITTED BY AUTOMERGER
Change-Id: Ibd29b6884faa5bd1b7d77d272c51a27405975846
-rw-r--r-- | Android.bp | 18 | ||||
-rw-r--r-- | include/cppbor/cppbor.h | 118 | ||||
-rw-r--r-- | src/cppbor.cpp | 99 | ||||
-rw-r--r-- | src/cppbor_parse.cpp | 15 | ||||
-rw-r--r-- | tests/cppbor_test.cpp | 170 |
5 files changed, 327 insertions, 93 deletions
@@ -12,8 +12,20 @@ // See the License for the specific language governing permissions and // limitations under the License. +cc_defaults { + name: "libcppbor_defaults", + cflags: [ + "-Wall", + "-Wextra", + "-Werror", + ], +} + cc_library { name: "libcppbor_external", + defaults: [ + "libcppbor_defaults", + ], vendor_available: true, host_supported: true, srcs: [ @@ -31,6 +43,9 @@ cc_library { cc_test { name: "cppbor_test_external", + defaults: [ + "libcppbor_defaults", + ], srcs: [ "tests/cppbor_test.cpp" ], @@ -46,6 +61,9 @@ cc_test { cc_test_host { name: "cppbor_host_test_external", + defaults: [ + "libcppbor_defaults", + ], srcs: [ "tests/cppbor_test.cpp" ], diff --git a/include/cppbor/cppbor.h b/include/cppbor/cppbor.h index 146974e..c33960b 100644 --- a/include/cppbor/cppbor.h +++ b/include/cppbor/cppbor.h @@ -508,6 +508,8 @@ class Map : public Item { public: static constexpr MajorType kMajorType = MAP; + using entry_type = std::pair<std::unique_ptr<Item>, std::unique_ptr<Item>>; + Map() = default; Map(const Map& other) = delete; Map(Map&&) = default; @@ -536,60 +538,80 @@ class Map : public Item { bool isCompound() const override { return true; } - virtual size_t size() const { - assertInvariant(); - return mEntries.size() / 2; - } + virtual size_t size() const { return mEntries.size(); } size_t encodedSize() const override { - return std::accumulate(mEntries.begin(), mEntries.end(), headerSize(size()), - [](size_t sum, auto& entry) { return sum + entry->encodedSize(); }); + return std::accumulate( + mEntries.begin(), mEntries.end(), headerSize(size()), [](size_t sum, auto& entry) { + return sum + entry.first->encodedSize() + entry.second->encodedSize(); + }); } using Item::encode; // Make base versions visible. uint8_t* encode(uint8_t* pos, const uint8_t* end) const override; void encode(EncodeCallback encodeCallback) const override; + /** + * Find and return the value associated with `key`, if any. + * + * If the searched-for `key` is not present, returns `nullptr`. + * + * Note that if the map is canonicalized (sorted), Map::get() peforms a binary search. If your + * map is large and you're searching in it many times, it may be worthwhile to canonicalize it + * to make Map::get() faster. Any use of a method that might modify the map disables the + * speedup. + */ template <typename Key, typename Enable> const std::unique_ptr<Item>& get(Key key) const; - std::pair<const std::unique_ptr<Item>&, const std::unique_ptr<Item>&> operator[]( - size_t index) const { - assertInvariant(); - return {mEntries[index * 2], mEntries[index * 2 + 1]}; - } - - std::pair<std::unique_ptr<Item>&, std::unique_ptr<Item>&> operator[](size_t index) { - assertInvariant(); - return {mEntries[index * 2], mEntries[index * 2 + 1]}; + // Note that use of non-const operator[] marks the map as not canonicalized. + auto& operator[](size_t index) { + mCanonicalized = false; + return mEntries[index]; } + const auto& operator[](size_t index) const { return mEntries[index]; } MajorType type() const override { return kMajorType; } Map* asMap() override { return this; } - // Sorts the map in canonical order, as defined in RFC 7049. Use this before encoding if you - // want canonicalization; cppbor does not canonicalize by default, though the integer encodings - // are always canonical and cppbor does not support indefinite-length encodings, so map order - // canonicalization is the only thing that needs to be done. - // - // Note that this canonicalization algorithm moves the map contents twice, so it isn't - // particularly efficient. Avoid using it unnecessarily on large maps. It does nothing for empty - // or single-entry maps, though, so it's recommended to always call it when you need a canonical - // map, even if the map is known to have less than two entries. That way if a maintainer later - // adds another item canonicalization will be preserved. - Map& canonicalize() &; - Map&& canonicalize() && { - canonicalize(); + /** + * Sorts the map in canonical order, as defined in RFC 7049. Use this before encoding if you + * want canonicalization; cppbor does not canonicalize by default, though the integer encodings + * are always canonical and cppbor does not support indefinite-length encodings, so map order + * canonicalization is the only thing that needs to be done. + * + * @param recurse If set to true, canonicalize() will also walk the contents of the map and + * canonicalize any contained maps as well. + */ + Map& canonicalize(bool recurse = false) &; + Map&& canonicalize(bool recurse = false) && { + canonicalize(recurse); return std::move(*this); } + bool isCanonical() { return mCanonicalized; } + std::unique_ptr<Item> clone() const override; + auto begin() { + mCanonicalized = false; + return mEntries.begin(); + } + auto begin() const { return mEntries.begin(); } + auto end() { + mCanonicalized = false; + return mEntries.end(); + } + auto end() const { return mEntries.end(); } + + // Returns true if a < b, per CBOR map key canonicalization rules. + static bool keyLess(const Item* a, const Item* b); + protected: - std::vector<std::unique_ptr<Item>> mEntries; + std::vector<entry_type> mEntries; private: - void assertInvariant() const; + bool mCanonicalized = false; }; class Semantic : public Item { @@ -872,6 +894,14 @@ std::unique_ptr<Item> makeItem(T v) { return std::unique_ptr<Item>(p); } +inline void map_helper(Map& /* map */) {} + +template <typename Key, typename Value, typename... Rest> +inline void map_helper(Map& map, Key&& key, Value&& value, Rest&&... rest) { + map.add(std::forward<Key>(key), std::forward<Value>(value)); + map_helper(map, std::forward<Rest>(rest)...); +} + } // namespace details template <typename... Args, @@ -899,14 +929,15 @@ template <typename... Args, /* Prevent use as copy ctor */ typename = std::enable_if_t<(sizeof...(Args)) != 1>> Map::Map(Args&&... args) { static_assert((sizeof...(Args)) % 2 == 0, "Map must have an even number of entries"); - mEntries.reserve(sizeof...(args)); - (mEntries.push_back(details::makeItem(std::forward<Args>(args))), ...); + mEntries.reserve(sizeof...(args) / 2); + details::map_helper(*this, std::forward<Args>(args)...); } template <typename Key, typename Value> Map& Map::add(Key&& key, Value&& value) & { - mEntries.push_back(details::makeItem(std::forward<Key>(key))); - mEntries.push_back(details::makeItem(std::forward<Value>(value))); + mEntries.push_back({details::makeItem(std::forward<Key>(key)), + details::makeItem(std::forward<Value>(value))}); + mCanonicalized = false; return *this; } @@ -922,14 +953,21 @@ template <typename Key, typename = std::enable_if_t<std::is_integral_v<Key> || std::is_enum_v<Key> || details::is_text_type_v<Key>::value>> const std::unique_ptr<Item>& Map::get(Key key) const { - assertInvariant(); auto keyItem = details::makeItem(key); - for (size_t i = 0; i < mEntries.size(); i += 2) { - if (*keyItem == *mEntries[i]) { - return mEntries[i + 1]; - } + + if (mCanonicalized) { + // It's sorted, so binary-search it. + auto found = std::lower_bound(begin(), end(), keyItem.get(), + [](const entry_type& entry, const Item* key) { + return keyLess(entry.first.get(), key); + }); + return (found == end() || *found->first != *keyItem) ? kEmptyItemPtr : found->second; + } else { + // Unsorted, do a linear search. + auto found = std::find_if( + begin(), end(), [&](const entry_type& entry) { return *entry.first == *keyItem; }); + return found == end() ? kEmptyItemPtr : found->second; } - return kEmptyItemPtr; } template <typename T> diff --git a/src/cppbor.cpp b/src/cppbor.cpp index 632dd0b..dc34985 100644 --- a/src/cppbor.cpp +++ b/src/cppbor.cpp @@ -67,8 +67,7 @@ bool cborAreAllElementsNonCompound(const Item* compoundItem) { } } else { const Map* map = compoundItem->asMap(); - for (size_t n = 0; n < map->size(); n++) { - auto [keyEntry, valueEntry] = (*map)[n]; + for (auto& [keyEntry, valueEntry] : *map) { switch (keyEntry->type()) { case ARRAY: case MAP: @@ -182,11 +181,9 @@ bool prettyPrintInternal(const Item* item, string& out, size_t indent, size_t ma out.append("{}"); } else { out.append("{\n" + indentString); - for (size_t n = 0; n < map->size(); n++) { + for (auto& [map_key, map_value] : *map) { out.append(" "); - auto [map_key, map_value] = (*map)[n]; - if (!prettyPrintInternal(map_key.get(), out, indent + 2, maxBStrSize, mapKeysToNotPrint)) { return false; @@ -400,17 +397,20 @@ std::unique_ptr<Item> Array::clone() const { bool Map::operator==(const Map& other) const& { return size() == other.size() - // Can't use vector::operator== because the contents are pointers. std::equal lets us - // provide a predicate that does the dereferencing. - && std::equal(mEntries.begin(), mEntries.end(), other.mEntries.begin(), - [](auto& a, auto& b) -> bool { return *a == *b; }); + // Can't use vector::operator== because the contents are pairs of pointers. std::equal + // lets us provide a predicate that does the dereferencing. + && std::equal(begin(), end(), other.begin(), [](auto& a, auto& b) { + return *a.first == *b.first && *a.second == *b.second; + }); } uint8_t* Map::encode(uint8_t* pos, const uint8_t* end) const { pos = encodeHeader(size(), pos, end); if (!pos) return nullptr; for (auto& entry : mEntries) { - pos = entry->encode(pos, end); + pos = entry.first->encode(pos, end); + if (!pos) return nullptr; + pos = entry.second->encode(pos, end); if (!pos) return nullptr; } return pos; @@ -419,71 +419,76 @@ uint8_t* Map::encode(uint8_t* pos, const uint8_t* end) const { void Map::encode(EncodeCallback encodeCallback) const { encodeHeader(size(), encodeCallback); for (auto& entry : mEntries) { - entry->encode(encodeCallback); + entry.first->encode(encodeCallback); + entry.second->encode(encodeCallback); } } -void Map::assertInvariant() const { - CHECK(mEntries.size() % 2 == 0); -} - -bool mapKeyLess(const std::pair<std::unique_ptr<Item>, std::unique_ptr<Item>>& a, - const std::pair<std::unique_ptr<Item>, std::unique_ptr<Item>>& b) { - auto& keyA = a.first; - auto& keyB = b.first; - +bool Map::keyLess(const Item* a, const Item* b) { // CBOR map canonicalization rules are: // 1. If two keys have different lengths, the shorter one sorts earlier. - if (keyA->encodedSize() < keyB->encodedSize()) return true; - if (keyA->encodedSize() > keyB->encodedSize()) return false; + if (a->encodedSize() < b->encodedSize()) return true; + if (a->encodedSize() > b->encodedSize()) return false; // 2. If two keys have the same length, the one with the lower value in (byte-wise) lexical // order sorts earlier. This requires encoding both items. - auto encodedA = keyA->encode(); - auto encodedB = keyB->encode(); + auto encodedA = a->encode(); + auto encodedB = b->encode(); return std::lexicographical_compare(encodedA.begin(), encodedA.end(), // encodedB.begin(), encodedB.end()); } -Map& Map::canonicalize() & { - assertInvariant(); +void recursivelyCanonicalize(std::unique_ptr<Item>& item) { + switch (item->type()) { + case UINT: + case NINT: + case BSTR: + case TSTR: + case SIMPLE: + return; - if (size() < 2) { - // Empty or single-entry map; no need to reorder. - return *this; - } + case ARRAY: + std::for_each(item->asArray()->begin(), item->asArray()->end(), + recursivelyCanonicalize); + return; - // The entries of a Map are stored in a flat vector. We can't easily apply - // std::sort on that, so instead we move all of the entries into a vector of - // std::pair, sort that, then move all of the entries back into the original - // flat vector. - vector<std::pair<std::unique_ptr<Item>, std::unique_ptr<Item>>> temp; - temp.reserve(size()); + case MAP: + item->asMap()->canonicalize(true /* recurse */); + return; - for (size_t i = 0; i < mEntries.size() - 1; i += 2) { - temp.push_back({std::move(mEntries[i]), std::move(mEntries[i + 1])}); + case SEMANTIC: + recursivelyCanonicalize(item->asSemantic()->child()); + return; } +} - std::sort(temp.begin(), temp.end(), mapKeyLess); +Map& Map::canonicalize(bool recurse) & { + if (recurse) { + for (auto& entry : mEntries) { + recursivelyCanonicalize(entry.first); + recursivelyCanonicalize(entry.second); + } + } - mEntries.resize(0); - mEntries.reserve(temp.size() * 2); // Should be a NOP since capacity should be unchanged. - for (auto& entry : temp) { - mEntries.push_back(std::move(entry.first)); - mEntries.push_back(std::move(entry.second)); + if (size() < 2 || mCanonicalized) { + // Trivially or already canonical; do nothing. + return *this; } + std::sort(begin(), end(), + [](auto& a, auto& b) { return keyLess(a.first.get(), b.first.get()); }); + mCanonicalized = true; return *this; } std::unique_ptr<Item> Map::clone() const { - assertInvariant(); auto res = std::make_unique<Map>(); - for (size_t i = 0; i < mEntries.size(); i += 2) { - res->add(mEntries[i]->clone(), mEntries[i + 1]->clone()); + for (auto& [key, value] : *this) { + res->add(key->clone(), value->clone()); } + res->mCanonicalized = mCanonicalized; return res; } diff --git a/src/cppbor_parse.cpp b/src/cppbor_parse.cpp index 488f8c7..2736b71 100644 --- a/src/cppbor_parse.cpp +++ b/src/cppbor_parse.cpp @@ -114,7 +114,7 @@ class IncompleteItem { class IncompleteArray : public Array, public IncompleteItem { public: - IncompleteArray(size_t size) : mSize(size) {} + explicit IncompleteArray(size_t size) : mSize(size) {} // We return the "complete" size, rather than the actual size. size_t size() const override { return mSize; } @@ -130,23 +130,28 @@ class IncompleteArray : public Array, public IncompleteItem { class IncompleteMap : public Map, public IncompleteItem { public: - IncompleteMap(size_t size) : mSize(size) {} + explicit IncompleteMap(size_t size) : mSize(size) {} // We return the "complete" size, rather than the actual size. size_t size() const override { return mSize; } void add(std::unique_ptr<Item> item) override { - mEntries.reserve(mSize * 2); - mEntries.push_back(std::move(item)); + if (mKeyHeldForAdding) { + mEntries.reserve(mSize); + mEntries.push_back({std::move(mKeyHeldForAdding), std::move(item)}); + } else { + mKeyHeldForAdding = std::move(item); + } } private: + std::unique_ptr<Item> mKeyHeldForAdding; size_t mSize; }; class IncompleteSemantic : public Semantic, public IncompleteItem { public: - IncompleteSemantic(uint64_t value) : Semantic(value) {} + explicit IncompleteSemantic(uint64_t value) : Semantic(value) {} // We return the "complete" size, rather than the actual size. size_t size() const override { return 1; } diff --git a/tests/cppbor_test.cpp b/tests/cppbor_test.cpp index 0a8000b..ed4b94e 100644 --- a/tests/cppbor_test.cpp +++ b/tests/cppbor_test.cpp @@ -829,7 +829,7 @@ TEST(CloningTest, Map) { EXPECT_EQ(clone->type(), MAP); EXPECT_NE(clone->asMap(), nullptr); EXPECT_EQ(item, *clone->asMap()); - auto [key, value] = item[0]; + auto& [key, value] = item[0]; std::move(key); std::move(value); std::move(item); @@ -913,6 +913,122 @@ TEST(MapCanonicalizationTest, CanonicalizationTest) { "}"); } +TEST(MapCanonicalizationTest, DecanonicalizationTest) { + Map map; + map.add("hello", 1) + .add("h", 1) + .add(1, 1) + .add(-4, 1) + .add(-5, 1) + .add(2, 1) + .add("hellp", 1) + .add(254, 1) + .add(27, 1); + + EXPECT_FALSE(map.isCanonical()); + map.canonicalize(); + EXPECT_TRUE(map.isCanonical()); + + /* + * Any operation that could potentially mutate the contents of the map should mark it as + * non-canonical. This includes getting non-const iterators or using the non-const [] operator. + */ + + map.begin(); + EXPECT_FALSE(map.isCanonical()); + + map.canonicalize(); + EXPECT_TRUE(map.isCanonical()); + + map.end(); // Non-const map.end() invalidates canonicalization. + EXPECT_FALSE(map.isCanonical()); + + map.canonicalize(); + EXPECT_TRUE(map.isCanonical()); + + map[0]; // Non-const map.operator[]() invalidates canonicalization. + EXPECT_FALSE(map.isCanonical()); +} + +TEST(MapCanonicalizationTest, RecursiveTest) { + auto map = Map() // + .add("hello", 1) + .add("h", 1) + .add(1, 1) + .add(-4, Array( // + 2, 1, + Map() // + .add("b", 1) + .add(Map() // + .add("hello", "goodbye") + .add(1, 9) + .add(0, 3), + Map() // + .add("b", 1) + .add("a", 2)))) + .add(-5, 1) + .add(2, 1) + .add("hellp", 1) + .add(254, 1) + .add(27, 1); + + EXPECT_EQ(prettyPrint(&map), + "{\n" + " 'hello' : 1,\n" + " 'h' : 1,\n" + " 1 : 1,\n" + " -4 : [\n" + " 2,\n" + " 1,\n" + " {\n" + " 'b' : 1,\n" + " {\n" + " 'hello' : 'goodbye',\n" + " 1 : 9,\n" + " 0 : 3,\n" + " } : {\n" + " 'b' : 1,\n" + " 'a' : 2,\n" + " },\n" + " },\n" + " ],\n" + " -5 : 1,\n" + " 2 : 1,\n" + " 'hellp' : 1,\n" + " 254 : 1,\n" + " 27 : 1,\n" + "}"); + + map.canonicalize(true /* recurse */); + + EXPECT_EQ(prettyPrint(&map), + "{\n" + " 1 : 1,\n" + " 2 : 1,\n" + " -4 : [\n" + " 2,\n" + " 1,\n" + " {\n" + " 'b' : 1,\n" + " {\n" + " 0 : 3,\n" + " 1 : 9,\n" + " 'hello' : 'goodbye',\n" + " } : {\n" + " 'a' : 2,\n" + " 'b' : 1,\n" + " },\n" + " },\n" + " ],\n" + " -5 : 1,\n" + " 27 : 1,\n" + " 254 : 1,\n" + " 'h' : 1,\n" + " 'hello' : 1,\n" + " 'hellp' : 1,\n" + "}"); +} + class MockParseClient : public ParseClient { public: MOCK_METHOD4(item, ParseClient*(std::unique_ptr<Item>& item, const uint8_t* hdrBegin, @@ -1557,6 +1673,58 @@ TEST(ArrayIterationTest, BidirectionalTest) { EXPECT_EQ(++iter, array.end()); } +TEST(MapIterationTest, EmptyMap) { + Map map; + + EXPECT_EQ(map.begin(), map.end()); +} + +TEST(MapIterationTest, ForwardTest) { + Map map(1, 2, 3, "hello", -4, 5); + + auto iter = map.begin(); + ASSERT_NE(iter, map.end()); + EXPECT_EQ(*iter->first, Uint(1)); + EXPECT_EQ(*iter->second, Uint(2)); + + ASSERT_NE(++iter, map.end()); + EXPECT_EQ(*iter->first, Uint(3)); + EXPECT_EQ(*(iter++)->second, Tstr("hello")); + + ASSERT_NE(iter, map.end()); + EXPECT_EQ(*iter->first, Nint(-4)); + EXPECT_EQ(*(iter++)->second, Uint(5)); + + EXPECT_EQ(iter, map.end()); +} + +TEST(MapIterationTest, BidirectionalTest) { + Map map(1, 2, 3, "hello", -4, 5); + + auto iter = map.begin(); + ASSERT_NE(iter, map.end()); + EXPECT_EQ(*iter->first, Uint(1)); + EXPECT_EQ(*iter->second, Uint(2)); + + ASSERT_NE(++iter, map.end()); + EXPECT_EQ(*iter->first, Uint(3)); + EXPECT_EQ(*(iter--)->second, Tstr("hello")); + + ASSERT_NE(iter, map.end()); + EXPECT_EQ(*iter->first, Uint(1)); + EXPECT_EQ(*(iter++)->second, Uint(2)); + + ASSERT_NE(iter, map.end()); + EXPECT_EQ(*iter->first, Uint(3)); + EXPECT_EQ(*iter->second, Tstr("hello")); + + ASSERT_NE(++iter, map.end()); + EXPECT_EQ(*iter->first, Nint(-4)); + EXPECT_EQ(*(iter++)->second, Uint(5)); + + EXPECT_EQ(iter, map.end()); +} + int main(int argc, char** argv) { ::testing::InitGoogleTest(&argc, argv); return RUN_ALL_TESTS(); |