diff options
author | Xin Li <delphij@google.com> | 2023-08-14 15:37:49 -0700 |
---|---|---|
committer | Xin Li <delphij@google.com> | 2023-08-14 15:37:49 -0700 |
commit | 278c52dc5965a73a985cc5ec9d801e05c5521981 (patch) | |
tree | e6bc18176b3a70a107c41e1802ae6335640689d0 | |
parent | 3ca31188d70c72e2879f70fdf52d95e37e1fefea (diff) | |
parent | 26e7ab7854539b9a7ccc23f3cf0c9cbc38ca8c37 (diff) | |
download | image_io-tmp_amf_298295554.tar.gz |
Merge Android U (ab/10368041)tmp_amf_298295554
Bug: 291102124
Merged-In: I2087549f00883c6ce2a3d9833cff5fde547afaf1
Change-Id: Icc1029a8dbfdd2ba801781465f51295a4c38627c
42 files changed, 2557 insertions, 26 deletions
@@ -35,15 +35,17 @@ cc_defaults { } cc_library_headers { - name: "libimage_io-headers", - host_supported: true, - export_include_dirs: ["includes"], + name: "libimage_io-headers", + host_supported: true, + vendor_available: true, + export_include_dirs: ["includes"], } cc_library { name: "libimage_io", host_supported: true, + vendor_available: true, defaults: ["libimage_io-defaults"], header_libs: ["libimage_io-headers"], export_include_dirs: ["includes"], diff --git a/includes/image_io/base/byte_data.h b/includes/image_io/base/byte_data.h index 7bfc97e..bd49517 100644 --- a/includes/image_io/base/byte_data.h +++ b/includes/image_io/base/byte_data.h @@ -126,6 +126,14 @@ class ByteData { return hex_string; } + /// @param value The UInt16 value to convert to an eight digit hex string. + /// @return The big endian hex string equivalent of the value. + static std::string UInt162BigEndianHex(UInt16 value) { + std::string hex_string = Byte2Hex((value >> 8) & 0xFF); + hex_string += Byte2Hex(value & 0xFF); + return hex_string; + } + private: Type type_; std::string value_; diff --git a/includes/image_io/base/byte_pointer_data_destination.h b/includes/image_io/base/byte_pointer_data_destination.h new file mode 100644 index 0000000..0442537 --- /dev/null +++ b/includes/image_io/base/byte_pointer_data_destination.h @@ -0,0 +1,44 @@ +#ifndef IMAGE_IO_BASE_BYTE_POINTER_DATA_DESTINATION_H_ // NOLINT +#define IMAGE_IO_BASE_BYTE_POINTER_DATA_DESTINATION_H_ // NOLINT + +#include "image_io/base/data_destination.h" + +namespace photos_editing_formats { +namespace image_io { + +/// A DataDestination that writes its output to byte buffer, the pointer to +/// which is supplied by the client along with a size of that buffer. +class BytePointerDataDestination : public DataDestination { + public: + /// Constructs an BytesDataDestination using the given a buffer and size. + /// @param bytes The buffer to receive the bytes. + /// @param size The size of the buffer to receive the bytes. + BytePointerDataDestination(Byte* bytes, size_t size) + : bytes_(bytes), size_(size), bytes_transferred_(0) {} + BytePointerDataDestination(const BytePointerDataDestination&) = delete; + BytePointerDataDestination& operator=(const BytePointerDataDestination&) = + delete; + + /// @return The number of bytes written to the bytes buffer. + size_t GetBytesTransferred() const override { return bytes_transferred_; } + + void StartTransfer() override; + TransferStatus Transfer(const DataRange& transfer_range, + const DataSegment& data_segment) override; + void FinishTransfer() override; + + private: + /// The bytes buffer to receive the data. + Byte* bytes_; + + /// The size of the bytes buffer. + size_t size_; + + /// The number of bytes written so far. + size_t bytes_transferred_; +}; + +} // namespace image_io +} // namespace photos_editing_formats + +#endif // IMAGE_IO_BASE_BYTE_POINTER_DATA_DESTINATION_H_ // NOLINT diff --git a/includes/image_io/base/cout_message_writer.h b/includes/image_io/base/cout_message_writer.h index a124ff6..60e0ef3 100644 --- a/includes/image_io/base/cout_message_writer.h +++ b/includes/image_io/base/cout_message_writer.h @@ -8,7 +8,7 @@ namespace photos_editing_formats { namespace image_io { -/// This subclass of MessageWriter writes messages to cout. +/// This subclass of MessageWriter writes messages to std::cout. class CoutMessageWriter : public MessageWriter { public: void WriteMessage(const Message& message) override { diff --git a/includes/image_io/base/data_line_map.h b/includes/image_io/base/data_line_map.h index d934410..f7514e0 100644 --- a/includes/image_io/base/data_line_map.h +++ b/includes/image_io/base/data_line_map.h @@ -26,7 +26,10 @@ class DataLineMap { DataLineMap() : last_line_incomplete_(false) {} /// Returns the number of data lines in the map. - size_t GetDataLineCount() const; + size_t GetDataLineCount() const { return data_lines_.size(); } + + /// Returns the data lines + const std::vector<DataLine> GetDataLines() const { return data_lines_; } /// Returns the data line assocated with the location, or one the number of /// which is zero and the range of which is invalid. diff --git a/includes/image_io/base/data_line_map_builder_destination.h b/includes/image_io/base/data_line_map_builder_destination.h new file mode 100644 index 0000000..a6c0195 --- /dev/null +++ b/includes/image_io/base/data_line_map_builder_destination.h @@ -0,0 +1,51 @@ +#ifndef IMAGE_IO_BASE_DATA_LINE_MAP_BUILDER_DESTINATION_H_ // NOLINT +#define IMAGE_IO_BASE_DATA_LINE_MAP_BUILDER_DESTINATION_H_ // NOLINT + +#include "image_io/base/data_destination.h" +#include "image_io/base/data_line_map.h" + +namespace photos_editing_formats { +namespace image_io { + +/// A class to build a data line map of the string implied by the transfer +/// range of a data segment before passing the transfer off to an optional +/// next data destination. +class DataLineMapBuilderDestination : public DataDestination { + public: + /// @param data_line_map The data line map to build. + /// @param next_destination An optional next transfer data destination + DataLineMapBuilderDestination(DataLineMap* data_line_map, + DataDestination* next_destination) + : data_line_map_(data_line_map), + next_destination_(next_destination), + bytes_transferred_(0) {} + void StartTransfer() override { + if (next_destination_ != nullptr) { + next_destination_->StartTransfer(); + } + } + void FinishTransfer() override { + if (next_destination_ != nullptr) { + next_destination_->FinishTransfer(); + } + } + TransferStatus Transfer(const DataRange& transfer_range, + const DataSegment& data_segment) override { + bytes_transferred_ += transfer_range.GetLength(); + data_line_map_->FindDataLines(transfer_range, data_segment); + return next_destination_ != nullptr + ? next_destination_->Transfer(transfer_range, data_segment) + : kTransferOk; + } + size_t GetBytesTransferred() const override { return bytes_transferred_; } + + private: + DataLineMap* data_line_map_; + DataDestination* next_destination_; + size_t bytes_transferred_; +}; + +} // namespace image_io +} // namespace photos_editing_formats + +#endif // IMAGE_IO_BASE_DATA_LINE_MAP_BUILDER_DESTINATION_H_ // NOLINT diff --git a/includes/image_io/base/data_scanner.h b/includes/image_io/base/data_scanner.h index 0d1af26..67beae9 100644 --- a/includes/image_io/base/data_scanner.h +++ b/includes/image_io/base/data_scanner.h @@ -59,6 +59,15 @@ class DataScanner { /// @return The set of whitespace characters: " \t\n\r". static std::string GetWhitespaceChars(); + /// @return The characters used for base64 encoding and optionally the pad + /// char at the end of the string. + /// @param include_pad_char Whether to include the base64 pad char at the end + /// of the string. + static std::string GetBase64Chars(bool include_pad_char); + + /// @return The character used to pad base64 encoded strings. + static std::string GetBase64PadChar(); + /// @param literal The literal to use for the scanner. /// @return A kLiteral type scanner. static DataScanner CreateLiteralScanner(const std::string& literal); @@ -85,12 +94,26 @@ class DataScanner { /// @return A kOptionalWhitespace type scanner; static DataScanner CreateOptionalWhitespaceScanner(); + /// A function like strspn that accepts the length of string to scan. If the + /// return value, ret, is not slen, then s[ret] is not in scanset. + /// @param s The string to scan + /// @param slen The length of the string to scan + /// @param scanset The set of characters to scan/skip over. + /// @return The number of scanned characters in s that were in accept. + static size_t ScanChars(const char* s, size_t slen, const char* scanset); + /// @return The type of the scanner. Type GetType() const { return type_; } - /// @return A description of the scanner, based on the type. + /// @return A description of the scanner, or one that is based on the type. std::string GetDescription() const; + /// @param The description to use for the scanner instead of an internal one + /// that is based on the type of scanner. + void SetDescription(const std::string& description) { + description_ = description; + } + /// @return The literal value of a kLiteral or kThroughLiteral type scanner, /// or an empty string otherwise. std::string GetLiteral() const; @@ -121,6 +144,10 @@ class DataScanner { /// Reset the scanner state to the value it had when it was first constructed. void Reset(); + /// @param delta_length The byte count to use to extend the token range end. + /// @return The new length of the token range. + size_t ExtendTokenLength(size_t delta_length); + private: explicit DataScanner(Type type) : DataScanner(type, "") {} DataScanner(Type type, const std::string& literal_or_sentinels) @@ -129,10 +156,6 @@ class DataScanner { scan_call_count_(0), type_(type) {} - /// @param delta_length The byte count to use to extend the token range end. - /// @return The new length of the token range. - size_t ExtendTokenLength(size_t delta_length); - /// The worker functions for scanning each type of literal. /// @param cbytes The pointer value to the buffer at the context's location. /// @param bytes_available The number of bytes available for the scan. @@ -169,6 +192,9 @@ class DataScanner { /// The string used for kLiteral, kThroughLiteral and kSentinel type scanners. std::string literal_or_sentinels_; + /// The custom description of the scanner. + std::string description_; + /// The token range built by one or calls to the Scan() function. DataRange token_range_; diff --git a/includes/image_io/base/data_segment.h b/includes/image_io/base/data_segment.h index ec6d584..2673cab 100644 --- a/includes/image_io/base/data_segment.h +++ b/includes/image_io/base/data_segment.h @@ -1,6 +1,7 @@ #ifndef IMAGE_IO_BASE_DATA_SEGMENT_H_ // NOLINT #define IMAGE_IO_BASE_DATA_SEGMENT_H_ // NOLINT +#include <cstring> #include <memory> #include "image_io/base/data_range.h" @@ -97,7 +98,7 @@ class DataSegment { /// @return The location of the start of the string, or the segment's end /// location value. size_t Find(size_t start_location, const char* str) const { - return Find(start_location, str, strlen(str)); + return Find(start_location, str, std::strlen(str)); } /// Finds the location of the string in the data segment. Although a data diff --git a/includes/image_io/base/image_metadata.h b/includes/image_io/base/image_metadata.h new file mode 100644 index 0000000..f2da692 --- /dev/null +++ b/includes/image_io/base/image_metadata.h @@ -0,0 +1,122 @@ +#ifndef IMAGE_IO_BASE_IMAGE_METADATA_H_ // NOLINT +#define IMAGE_IO_BASE_IMAGE_METADATA_H_ // NOLINT + +#include "image_io/base/types.h" + +namespace photos_editing_formats { +namespace image_io { + +/// A enum that represents orientation values for images. The values of this +/// enum correspond exactly to what is defined the Exif spec: +/// https://cs.corp.google.com/piper///depot/google3/third_party/libexif/ +enum class Orientation { + kNone = 0, + kRotate0 = 1, + kMirrorRotate0 = 2, + kRotate180 = 3, + kMirrorRotate180 = 4, + kMirrorRotate270 = 5, + kRotate90 = 6, + kMirrorRotate90 = 7, + kRotate270 = 8 +}; + +/// @param value The value to check the Orientation validity of. +/// @return Whether the value if cast to an Orientation is legal. +inline bool IsLegalOrientation(UInt32 value) { + return value <= static_cast<UInt32>(Orientation::kRotate270); +} + +/// @param value The value to check +/// @return Whether the orientation represents a rotation of 90 or 270 relative +/// to the y=0 line such that thge width/height of an image should be swapped. +inline bool HasVerticalRotation(Orientation value) { + return value == Orientation::kMirrorRotate90 || + value == Orientation::kMirrorRotate270 || + value == Orientation::kRotate90 || value == Orientation::kRotate270; +} + +/// A class to hold metadata typically found in an image file. +/// The //photos/editing/formats/image_io:jpeg library has a class to decode +/// the data in an Exif segment of a JPEG file and initialize this object. +class ImageMetadata { + public: + ImageMetadata() { Clear(); } + bool operator!=(const ImageMetadata& rhs) const { return !(*this == rhs); } + bool operator==(const ImageMetadata& rhs) const { + return width_ == rhs.width_ && height_ == rhs.height_ && + orientation_ == rhs.orientation_; + } + + /// Clears the values of the metadata, returning them to their startup values. + void Clear() { + width_ = -1; + height_ = -1; + orientation_ = Orientation::kNone; + } + + /// @param orientation The orientation to to use for the metadata. + void SetOrientation(Orientation orientation) { orientation_ = orientation; } + + /// @param width The width to use for the metadata. + void SetWidth(UInt32 width) { width_ = width; } + + /// @parma height The height to use for the metadata. + void SetHeight(UInt32 height) { height_ = height; } + + /// @return Whether the metadata has a width value. + bool HasWidth() const { return width_ >= 0; } + + /// @return Whether the metadata has a height value. + bool HasHeight() const { return height_ >= 0; } + + /// @return Whether the metadata has a width or height value depending on the + /// orientation. + bool HasTransformedWidth() const { + return HasVerticalRotation(orientation_) ? HasHeight() : HasWidth(); + } + + /// @return Whether the metadata has a width or height value depending on the + /// orientation. + bool HasTransformedHeight() const { + return HasVerticalRotation(orientation_) ? HasWidth() : HasHeight(); + } + + /// @return Whether the metadata has an orientation value. + bool HasOrientation() const { return orientation_ != Orientation::kNone; } + + /// @return The metadata's orientation value, or Orientation::kNone + Orientation GetOrientation() const { return orientation_; } + + /// @return The metadata's width value or 0 if none. Use the @f HasWidth() to + /// determine if a zero value represents a specified or unspecified value. + UInt32 GetWidth() const { + return HasWidth() ? static_cast<UInt32>(width_) : 0; + } + + /// @return The metadata's height value or 0 if none. Use the @f HasHeight() + /// to determine if a zero value represents a specified or unspecified value. + UInt32 GetHeight() const { + return HasHeight() ? static_cast<UInt32>(height_) : 0; + } + + /// @return The metadata's width or height depending on the orientation. + UInt32 GetTransformedWidth() const { + return HasVerticalRotation(orientation_) ? GetHeight() : GetWidth(); + } + + /// @return The metadata's width or height depending on the orientation. + UInt32 GetTransformedHeight() const { + return HasVerticalRotation(orientation_) ? GetWidth() : GetHeight(); + } + + private: + Int64 width_; + Int64 height_; + Orientation orientation_; +}; + +} // namespace image_io +} // namespace photos_editing_formats + +#endif // IMAGE_IO_BASE_IMAGE_METADATA_H_ // NOLINT diff --git a/includes/image_io/base/message_handler.h b/includes/image_io/base/message_handler.h index b60a593..cf2f37b 100644 --- a/includes/image_io/base/message_handler.h +++ b/includes/image_io/base/message_handler.h @@ -2,6 +2,7 @@ #define IMAGE_IO_BASE_MESSAGE_HANDLER_H_ // NOLINT #include <memory> +#include <string> #include <vector> #include "image_io/base/message.h" diff --git a/includes/image_io/base/message_stats.h b/includes/image_io/base/message_stats.h index 6b338f0..b61cebd 100644 --- a/includes/image_io/base/message_stats.h +++ b/includes/image_io/base/message_stats.h @@ -10,6 +10,12 @@ namespace image_io { struct MessageStats { MessageStats() { Clear(); } void Clear() { error_count = warning_count = status_count = 0; } + bool operator!=(const MessageStats& rhs) const { return !(*this == rhs); } + bool operator==(const MessageStats& rhs) const { + return error_count == rhs.error_count && + warning_count == rhs.warning_count && + status_count == rhs.status_count; + } size_t error_count; size_t warning_count; size_t status_count; diff --git a/includes/image_io/base/string_ref_data_source.h b/includes/image_io/base/string_ref_data_source.h new file mode 100644 index 0000000..b6188f7 --- /dev/null +++ b/includes/image_io/base/string_ref_data_source.h @@ -0,0 +1,32 @@ +#ifndef IMAGE_IO_BASE_STRING_REF_DATA_SOURCE_H_ // NOLINT +#define IMAGE_IO_BASE_STRING_REF_DATA_SOURCE_H_ // NOLINT + +#include <string> + +#include "image_io/base/data_segment_data_source.h" + +namespace photos_editing_formats { +namespace image_io { + +/// A DataSource that reads bytes from a string held by ref. The underlying +/// string must have a lifetime that exceeds the lifetime of this data source, +/// and the string contents must not change while the data source is referencing +/// it. +class StringRefDataSource : public DataSegmentDataSource { + public: + /// Constructs a StringRefDataSource using the given string. + /// @param string_refg The string to read from. + explicit StringRefDataSource(const std::string& string_ref); + + /// Returns the string being used as the data source. + const std::string& GetStringRef() const { return string_ref_; } + + private: + /// The string to read from. + const std::string& string_ref_; +}; + +} // namespace image_io +} // namespace photos_editing_formats + +#endif // IMAGE_IO_BASE_STRING_REF_DATA_SOURCE_H_ // NOLINT diff --git a/includes/image_io/xml/xml_action.h b/includes/image_io/xml/xml_action.h new file mode 100644 index 0000000..dce6c7d --- /dev/null +++ b/includes/image_io/xml/xml_action.h @@ -0,0 +1,57 @@ +#ifndef IMAGE_IO_XML_XML_ACTION_H_ // NOLINT +#define IMAGE_IO_XML_XML_ACTION_H_ // NOLINT + +#include <functional> + +#include "image_io/base/data_match_result.h" +#include "image_io/xml/xml_handler_context.h" + +namespace photos_editing_formats { +namespace image_io { + +class XmlActionContext; +class XmlTerminal; + +/// The definition for an action function associated with an XmlTerminal. +/// If the action does not need to change the result of the terminal, it can +/// simply return the value from XmlActionContext::GetResult(). +using XmlAction = + std::function<DataMatchResult(const XmlActionContext& context)>; + +/// The data context passed from an XmlTerminal to its action function. +class XmlActionContext : public XmlHandlerContext { + public: + XmlActionContext(const XmlHandlerContext& context, XmlTerminal* terminal, + const DataMatchResult& result) + : XmlHandlerContext(context), terminal_(terminal), result_(result) {} + XmlActionContext(size_t location, const DataRange& range, + const DataSegment& segment, const DataLineMap& data_line_map, + XmlHandler* handler, XmlTerminal* terminal, + const DataMatchResult& result) + : XmlHandlerContext(location, range, segment, data_line_map, handler), + terminal_(terminal), + result_(result) {} + + /// @return The terminal associated with the context. + XmlTerminal* GetTerminal() const { return terminal_; } + + /// @return The result associated with the constext. + const DataMatchResult& GetResult() const { return result_; } + + /// @param bytes_consumed The value to set in the returned result. + /// @return A result based on the context's action, but with its bytes + /// consumed value set to the given value. + DataMatchResult GetResultWithBytesConsumed(size_t bytes_consumed) const { + auto result = result_; + return result.SetBytesConsumed(bytes_consumed); + } + + private: + XmlTerminal* terminal_; + DataMatchResult result_; +}; + +} // namespace image_io +} // namespace photos_editing_formats + +#endif // IMAGE_IO_XML_XML_ACTION_H_ // NOLINT diff --git a/includes/image_io/xml/xml_attribute_rule.h b/includes/image_io/xml/xml_attribute_rule.h new file mode 100644 index 0000000..564af07 --- /dev/null +++ b/includes/image_io/xml/xml_attribute_rule.h @@ -0,0 +1,33 @@ +#ifndef IMAGE_IO_XML_XML_ATTRIBUTE_RULE_H_ // NOLINT +#define IMAGE_IO_XML_XML_ATTRIBUTE_RULE_H_ // NOLINT + +#include "image_io/xml/xml_rule.h" + +namespace photos_editing_formats { +namespace image_io { + +/// The XmlAttributeRule parses the following syntax: +/// S? Name S? = S? 'Value' +/// S? Name S? = S? "Value" +class XmlAttributeRule : public XmlRule { + public: + XmlAttributeRule(); + + private: + /// Builds an XmlTokenContext from the XmlActionContext and calls the + /// handler's AttributeName() function. + /// @param context The action context from the name terminal. + /// @return The result value from the handler's function. + DataMatchResult HandleName(const XmlActionContext& context); + + /// Builds an XmlTokenContext from the XmlActionContext and calls the + /// handler's AttributeValue() function. + /// @param context The action context from the quoted string terminal. + /// @return The result value from the handler's function. + DataMatchResult HandleValue(const XmlActionContext& context); +}; + +} // namespace image_io +} // namespace photos_editing_formats + +#endif // IMAGE_IO_XML_XML_ATTRIBUTE_RULE_H_ // NOLINT diff --git a/includes/image_io/xml/xml_cdata_and_comment_rules.h b/includes/image_io/xml/xml_cdata_and_comment_rules.h new file mode 100644 index 0000000..0cc2e50 --- /dev/null +++ b/includes/image_io/xml/xml_cdata_and_comment_rules.h @@ -0,0 +1,69 @@ +#ifndef IMAGE_IO_XML_XML_CDATA_AND_COMMENT_RULES_H_ // NOLINT +#define IMAGE_IO_XML_XML_CDATA_AND_COMMENT_RULES_H_ // NOLINT + +#include "image_io/xml/xml_rule.h" + +namespace photos_editing_formats { +namespace image_io { + +/// The XmlCdataRule parses the following syntax "<![CDATA[ ... ]]>". +/// As mentioned in the comments for the XmlHandler::Cdata() function, the token +/// value that is passed to the handler never includes the leading "<![CDATA[" +/// syntax and always includes the trailing "]]>" syntax. This considerably +/// simplifies the parsing task. The alternate start point constructor is used +/// by the XmlCdataOrCommentRule. +class XmlCdataRule : public XmlRule { + public: + XmlCdataRule(); + explicit XmlCdataRule(StartPoint start_point); + + private: + /// Builds an XmlTokenContext from the XmlActionContext and calls the + /// handler's Cdata() function. + /// @param context The action context from the rule's terminal. + /// @return The result value from the handler's function. + DataMatchResult HandleCdataValue(const XmlActionContext& context); +}; + +/// The XmlCommentRule parses the following syntax "<!-- ... -->". +/// As mentioned in the comments for the XmlHandler::Comment() function, the +/// token value that is passed to the handler never includes the leading "<!--" +/// syntax and always includes the trailing "-->" syntax. This considerably +/// simplifies the parsing task. The alternate start point constructor is used +/// by the XmlCdataOrCommentRule. +class XmlCommentRule : public XmlRule { + public: + XmlCommentRule(); + explicit XmlCommentRule(StartPoint start_point); + + private: + /// Builds an XmlTokenContext from the XmlActionContext and calls the + /// handler's Comment() function. + /// @param context The action context from the rule's terminal. + /// @return The result value from the handler's function. + DataMatchResult HandleCommentValue(const XmlActionContext& context); +}; + +/// This rule will use chain delegation to start either the XmlCdataRule or the +/// XmlCommentRule, depending on the text being parsed. The syntax for XML is +/// pretty poor here - the parser needs to look ahead two characters from the < +/// character to determine what to do. The alternate start point constructor is +/// used by the XmlElementContentRule. +class XmlCdataOrCommentRule : public XmlRule { + public: + XmlCdataOrCommentRule(); + explicit XmlCdataOrCommentRule(StartPoint start_point); + + private: + /// Builds an XmlTokenContext from the XmlActionContext and creates the + /// XmlCdataRule or XmlCommentRule to chain to depending on what character + /// follows the exclamation point of the "<!" syntax. + /// @param context The action context from the rule's terminal. + /// @return The result value from the action context. + DataMatchResult HandlePostBangChar(const XmlActionContext& context); +}; + +} // namespace image_io +} // namespace photos_editing_formats + +#endif // IMAGE_IO_XML_XML_CDATA_AND_COMMENT_RULES_H_ // NOLINT diff --git a/includes/image_io/xml/xml_element_rules.h b/includes/image_io/xml/xml_element_rules.h new file mode 100644 index 0000000..f40f370 --- /dev/null +++ b/includes/image_io/xml/xml_element_rules.h @@ -0,0 +1,92 @@ +#ifndef IMAGE_IO_XML_XML_ELEMENT_RULES_H_ // NOLINT +#define IMAGE_IO_XML_XML_ELEMENT_RULES_H_ // NOLINT + +#include "image_io/xml/xml_rule.h" + +namespace photos_editing_formats { +namespace image_io { + +/// The XmlElementRule parses the following syntax: +/// Element ::= EmptyElemTag | STag content ETag +/// EmptyElemTag ::= '<' Name (S Attribute)* S? '/>' +/// STag ::= '<' Name (S Attribute)* S? '>' +/// ETag ::= '</' Name S? '>' +/// The Attribute syntax is parsed by XmlAttributeRule, which this rule +/// delegates to as a child rule. The EmptyElemTag type syntax is handled by +/// this rule. The STag part of the syntax is handled by this rule, but the +/// element contents and the ETag syntax is handled by the XmlElementContentRule +/// that is chained to by this rule. +class XmlElementRule : public XmlRule { + public: + XmlElementRule(); + explicit XmlElementRule(StartPoint start_point); + + private: + /// Builds an XmlTokenContext from the XmlActionContext and calls the + /// handler's StartElement() function. + /// @param context The action context from the rule's terminal. + /// @return The result value from the handler's function. + DataMatchResult HandleName(const XmlActionContext& context); + + /// Handles the book keeping after parsing the whitespace following the name + /// of the element, basically looking ahead to see if an XmlAttributeRule has + /// to be delegated to as a child rule, or if the element ends. + /// @param context The action context from the rule's terminal. + /// @return The result value action context. + DataMatchResult HandlePostWhitespaceChar(const XmlActionContext& context); + + /// Builds an XmlTokenContext from the XmlActionContext and calls the + /// handler's FinishElement() function in response to the final literal in + /// the EmptyElemTag type sytax. As written in the comment for the XmlHandler + /// FinishElement() function, the token context passed to the handler in this + /// case will have an invalid range and a XmlPortion value of kNone - i.e., + /// the element name is not available tfor this form of the element syntax. + /// @param context The action context from the rule's terminal. + /// @return The result value from the handler's function. + DataMatchResult HandleEmptyElemTagEnd(const XmlActionContext& context); + + /// Handles the book keeping after parsing the final ">" literal of the STag + /// syntax of the rule, creating an XmlElementContentRule for use as a chained + /// to rule. + /// @param context The action context from the rule's terminal. + /// @return The result value action context. + DataMatchResult HandleSTagEnd(const XmlActionContext& context); +}; + +/// The XmlElementContentRule parses the following syntax: +/// (c? Element | PI | CDATA | Comment )+ ETag +/// The "c?" syntax represents the character data passed to the XmlHandler's +/// ElementContent() function. The syntax for Element, PI, CDATA and Comment +/// all cause a child rule to be created and delegated to. The ETag syntax will +/// cause this element to be finished with a DataMatchResult type of kFull. +class XmlElementContentRule : public XmlRule { + public: + XmlElementContentRule(); + + private: + /// Builds an XmlTokenContext from the XmlActionContext and calls the + /// handler's ElementContent() function. + /// @param context The action context from the rule's terminal. + /// @return The result value from the handler's function. + DataMatchResult HandleContent(const XmlActionContext& context); + + /// Handles the book keeping after parsing the element's content characters, + /// and the first character literal ("<") of the Element, PI, CDATA or Comment + /// syntax, creating an appropriate child rule to delegate the processing to. + /// @param context The action context from the rule's terminal. + /// @return The result value action context. + DataMatchResult HandlePostOpenChar(const XmlActionContext& context); + + /// Builds an XmlTokenContext from the XmlActionContext and calls the + /// handler's FinishElement() function. No check is done by the rule to verify + /// that the element name matches the one that was passed to the handler's + /// StartElement. + /// @param context The action context from the rule's terminal. + /// @return The result value from the handler's function. + DataMatchResult HandleEndTag(const XmlActionContext& context); +}; + +} // namespace image_io +} // namespace photos_editing_formats + +#endif // IMAGE_IO_XML_XML_ELEMENT_RULES_H_ // NOLINT diff --git a/includes/image_io/xml/xml_handler.h b/includes/image_io/xml/xml_handler.h new file mode 100644 index 0000000..cdf3d30 --- /dev/null +++ b/includes/image_io/xml/xml_handler.h @@ -0,0 +1,107 @@ +#ifndef IMAGE_IO_XML_XML_HANDLER_H_ // NOLINT +#define IMAGE_IO_XML_XML_HANDLER_H_ // NOLINT + +#include "image_io/base/data_match_result.h" +#include "image_io/xml/xml_token_context.h" + +namespace photos_editing_formats { +namespace image_io { + +/// The handler that is called by XmlRule instances as they parse XML syntax +/// and produce tokens defined in the XmlTokenContext. Each handler function +/// may be called multiple times with different XmlPortion values. The first +/// time the XmlPortion::kBegin bit will be set. The last time, XmlPortion::kEnd +/// will be set. In between, XmlPortion::kMiddle will be set. If the entire +/// token value is available for the handler, all three bits will be set. +/// The implementation of each function in this base class returns the +/// DataMatchResult value that the context provides. The function overrides in +/// subclasses can return the same context value, or a copy that is modified +/// with a different result type, message and "can continue" flag. +class XmlHandler { + public: + virtual ~XmlHandler() = default; + + /// This function is called to start an XML element. Once started, any of + /// the other handler functions may be called. + /// @param context The token context used to specify the element name. + /// @return The match result from the context, or one that is modified to + /// contain an error message if needed. + virtual DataMatchResult StartElement(const XmlTokenContext& context); + + /// This function is called to finish an XML element. Each call to this + /// function should be paired with a call to a StartElement function. + /// @param context The token context used to obtain the match result for + /// returning. For this function, the context might not have a valid token + /// value: the XmlPortion will always be kNone and the token range invalid. + /// This is the case if the syntax parsed is an empty element like this: + /// "<SomeElement [Attribute=Name]... />". For non empty elements with syntax: + /// "<SomeElement>...</SomeElement>", the value will be the element name. + /// @return The match result from the context, or one that is modified to + /// contain an error message if needed. + virtual DataMatchResult FinishElement(const XmlTokenContext& context); + + /// This function is called to define an attribute name. This function will + /// never be called unless an element has been started with a prior call to + /// the StartElement() function. + /// @param context The token context used to specify the attribute name. + /// @return The match result from the context, or one that is modified to + /// contain an error message if needed. + virtual DataMatchResult AttributeName(const XmlTokenContext& context); + + /// This function is called to define an attribute value. The token value + /// passed to this function always includes the quote marks at the begin and + /// end of the token value. The quote marks always match and may be either a + /// single quote (') or a double quote ("). Sometimes attribute values can be + /// very long, so implementations of this function should use care if they + /// retain the value as a string for later processing. This function will + /// never be called unless an element has been started with a prior call to + /// the StartElement() and AttributeName() functions. + /// @param context The token context used to specify the attribute value. + /// @return The match result from the context, or one that is modified to + /// contain an error message if needed. + virtual DataMatchResult AttributeValue(const XmlTokenContext& context); + + /// This function is called to define a block of characters in the body of + /// an element. This function may be called multiple times for a given + /// element. Handlers that are interested in the character content for an + /// element should concatenate the token values from all calls to obtain the + /// full value for the element. + /// @param context The token context used to specify the content value. + /// @return The match result from the context, or one that is modified to + /// contain an error message if needed. + virtual DataMatchResult ElementContent(const XmlTokenContext& context); + + /// This function is called to inform the handler of a comment. A comment in + /// XML has the syntax "<!--...-->". In order to simplify the XML parsing + /// task, the tokens passed to this function never include the leading "<!--" + /// characters, but always include the trailing "-->". + /// @param context The token context used to specify the comment. + /// @return The match result from the context, or one that is modified to + /// contain an error message if needed. + virtual DataMatchResult Comment(const XmlTokenContext& context); + + /// This function is called to inform the handler CDATA block. A CDATA block + /// in XML has the syntax "<![CDATA[...]]>". In order to simplify the XML + /// parsing task, the tokens passed to this function never include the leading + /// "<![CDATA[" characters, but always include the trailing "]]". + /// @param context The token context used to specify the CDATA block. + /// @return The match result from the context, or one that is modified to + /// contain an error message if needed. + virtual DataMatchResult Cdata(const XmlTokenContext& context); + + /// This function is called to define a processing instruction. Processing + /// instructions have an XML syntax "<?...?>". In order to simplify the XML + /// parsing task, no parsing of the processing instruction is done: handlers + /// that need the contents parsed are on their own. Also, again to simplify + /// the XML parsing task, the tokens passed to this function never include the + /// leading "<?" characters, but always include the trailing "?>". + /// @param context The token context used to specify the processing data. + /// @return The match result from the context, or one that is modified to + /// contain an error message if needed. + virtual DataMatchResult Pi(const XmlTokenContext& context); +}; + +} // namespace image_io +} // namespace photos_editing_formats + +#endif // IMAGE_IO_XML_XML_HANDLER_H_ // NOLINT diff --git a/includes/image_io/xml/xml_handler_context.h b/includes/image_io/xml/xml_handler_context.h new file mode 100644 index 0000000..5595118 --- /dev/null +++ b/includes/image_io/xml/xml_handler_context.h @@ -0,0 +1,31 @@ +#ifndef IMAGE_IO_XML_XML_HANDLER_CONTEXT_H_ // NOLINT +#define IMAGE_IO_XML_XML_HANDLER_CONTEXT_H_ // NOLINT + +#include "image_io/base/data_context.h" + +namespace photos_editing_formats { +namespace image_io { + +class XmlHandler; + +class XmlHandlerContext : public DataContext { + public: + XmlHandlerContext(const DataContext& context, XmlHandler* handler) + : DataContext(context), handler_(handler) {} + + XmlHandlerContext(size_t location, const DataRange& range, + const DataSegment& segment, + const DataLineMap& data_line_map, XmlHandler* handler) + : DataContext(location, range, segment, data_line_map), + handler_(handler) {} + + XmlHandler* GetHandler() const { return handler_; } + + private: + XmlHandler* handler_; +}; + +} // namespace image_io +} // namespace photos_editing_formats + +#endif // IMAGE_IO_XML_XML_HANDLER_CONTEXT_H_ // NOLINT diff --git a/includes/image_io/xml/xml_pi_rule.h b/includes/image_io/xml/xml_pi_rule.h new file mode 100644 index 0000000..674a3fa --- /dev/null +++ b/includes/image_io/xml/xml_pi_rule.h @@ -0,0 +1,32 @@ +#ifndef IMAGE_IO_XML_XML_PI_RULE_H_ // NOLINT +#define IMAGE_IO_XML_XML_PI_RULE_H_ // NOLINT + +#include "image_io/xml/xml_rule.h" + +namespace photos_editing_formats { +namespace image_io { + +/// The XmlPiRule parses the processing information syntax: "<?...?>". This +/// syntax is considerably simplified from the official XML specification. As +/// documented in the comments for the XmlHandler Pi() function, The leading +/// "<?" syntax is never sent to the handler, while the trailing "?>" literal +/// is always sent as part of the processing content token. This approach makes +/// it much easier to parse XML syntax. The alternate start point constructor +/// is used by the XmlElementContentRule. +class XmlPiRule : public XmlRule { + public: + XmlPiRule(); + explicit XmlPiRule(StartPoint start_point); + + private: + /// Builds an XmlTokenContext from the XmlActionContext and calls the + /// handler's Pi() function. + /// @param context The action context from the rule's terminal. + /// @return The result value from the handler's function. + DataMatchResult HandlePiValue(const XmlActionContext& context); +}; + +} // namespace image_io +} // namespace photos_editing_formats + +#endif // IMAGE_IO_XML_XML_PI_RULE_H_ // NOLINT diff --git a/includes/image_io/xml/xml_portion.h b/includes/image_io/xml/xml_portion.h new file mode 100644 index 0000000..673c958 --- /dev/null +++ b/includes/image_io/xml/xml_portion.h @@ -0,0 +1,48 @@ +#ifndef IMAGE_IO_XML_XML_PORTION_H_ // NOLINT +#define IMAGE_IO_XML_XML_PORTION_H_ // NOLINT + +namespace photos_editing_formats { +namespace image_io { + +/// An bit-type enum for indicating what part of an entity is defined: the +/// begin, middle and or end. Bitwise "and" and "or" operators are defined to +/// combine and test values. +enum class XmlPortion { + kNone = 0, + kBegin = 1, + kMiddle = 2, + kEnd = 4, +}; + +/// @return The value that results from the bitwise "and" of given portions. +inline XmlPortion operator&(XmlPortion lhs, XmlPortion rhs) { + int lhs_value = static_cast<int>(lhs); + int rhs_value = static_cast<int>(rhs); + return static_cast<XmlPortion>(lhs_value & rhs_value); +} + +/// @return The value that results from the bitwise "or" of given portions. +inline XmlPortion operator|(XmlPortion lhs, XmlPortion rhs) { + int lhs_value = static_cast<int>(lhs); + int rhs_value = static_cast<int>(rhs); + return static_cast<XmlPortion>(lhs_value | rhs_value); +} + +/// @param value The value to use for the test. +/// @param mask The mask to use for the test. +/// @return Whether any of the bits in the mask are set in the value. +inline bool ContainsAny(XmlPortion value, XmlPortion mask) { + return (value & mask) != XmlPortion::kNone; +} + +/// @param value The value to use for the test. +/// @param mask The mask to use for the test. +/// @return Whether all of the bits in the mask are set in the value. +inline bool ContainsAll(XmlPortion value, XmlPortion mask) { + return (value & mask) == mask; +} + +} // namespace image_io +} // namespace photos_editing_formats + +#endif // IMAGE_IO_XML_XML_PORTION_H_ // NOLINT diff --git a/includes/image_io/xml/xml_reader.h b/includes/image_io/xml/xml_reader.h new file mode 100644 index 0000000..905d072 --- /dev/null +++ b/includes/image_io/xml/xml_reader.h @@ -0,0 +1,137 @@ +#ifndef IMAGE_IO_XML_XML_READER_H_ // NOLINT +#define IMAGE_IO_XML_XML_READER_H_ // NOLINT + +#include <memory> +#include <string> +#include <vector> + +#include "image_io/base/data_line_map.h" +#include "image_io/base/data_match_result.h" +#include "image_io/base/message_handler.h" +#include "image_io/xml/xml_handler_context.h" +#include "image_io/xml/xml_rule.h" + +namespace photos_editing_formats { +namespace image_io { + +/// A class for reading and parsing the text of a data segment, resulting in the +/// functions of an XmlHandler to be called. This reader's Parse() function can +/// be called multiple times for text that spans multiple data segments. Errors +/// are reported to the message handler as they are encountered. In general, +/// there will be three types of errors: internal (programming), syntax, and +/// value errors. Internal errors can come from any where in this code base; +/// Only one such error is permitted per StartParse/Parse... sequence. Syntax +/// errors are usually issued by XmlRule instances; like internal errors, only +/// one such error is tolerated per StartParse/Parse... sequence. XmlHandler +/// functions may issue value errors; multiple such value errors are tolerated. +class XmlReader { + public: + XmlReader(XmlHandler* handler, MessageHandler* message_handler) + : handler_(handler), + message_handler_(message_handler), + data_line_map_(&internal_data_line_map_), + bytes_parsed_(0), + has_internal_or_syntax_error_(false), + has_errors_(false) {} + + /// A externally initialized data line map can be used for error messages + /// instead of the internally built map. Otherwise the internal map will be + /// used. + /// @param data_line_map The pre-initialized data line map to use. + void SetDataLineMap(const DataLineMap* data_line_map) { + data_line_map_ = data_line_map; + } + + /// Sets up the reader for parsing data segment text using the given XmlRule. + /// @param rule The top level rule to use when parsing the data segment text. + /// @return Whether the reader was set up propertly. + bool StartParse(std::unique_ptr<XmlRule> rule); + + /// Parses the text portion of the data segment starting at a location. This + /// function may be called multiple times for text that spans multiple data + /// segments. + /// @param start_location The location at which to start reading/parsing. + /// This location must be contained in the range parameter. + /// @param range The portion of the data segment to parse. This range value + /// must be contained in the range returned by DataSegment::GetRange() + /// @param segment The segment containing the text to parse. + /// @return Whether the reading/parsing was successful. + bool Parse(size_t start_location, const DataRange& range, + const DataSegment& segment); + + /// Parses the string value. This is an alternate way to parse XML syntax. + /// Internally, this function uses the string to create a data segment and + /// calls the Parse(start_location, range, segment) function. The range is + /// computed like this: [GetBytesParsed(), GetBytesParsed() + value.length()). + /// @param value The string value containing XML syntax to parse. + /// @return Whether the reading/parsing was successful. + bool Parse(const std::string& value); + + /// Finishes up the reading/parsing process. The rule passed to StartParse() + /// must have consumed all the text of the segments and be "done", otherwise + /// this function will issue a kPrematureEndOfDataError type error message. + /// @param Whether the reading/parsing operation was completed successfully. + bool FinishParse(); + + /// @return The total number of bytes of text that have been read/parsed. + size_t GetBytesParsed() const { return bytes_parsed_; } + + /// @return Whether errors have been encountered in reading/parsing the text. + /// This value may be different from the value returned by the Parse() and + /// FinishParse() functions. Those functions take into account only internal + /// and syntax type errors. This value includes all other types of errors. + bool HasErrors() const { return has_errors_; } + + /// @return The handler that handles the output of the parsing operations. + XmlHandler* GetHandler() const { return handler_; } + + private: + /// Sets up the context's name list that is used when creating error message. + /// @parma context The context to set up. + void InitializeContextNameList(XmlHandlerContext* context); + + /// If the result has a message, reports it otherwise does nothing. + /// @param result The result value for an XmlRule::Parse function. + void ReportMessageIfNeeded(const DataMatchResult& result); + + /// Reports the message indicated in the result to the message handler and + /// updates the data boolean data members indicating errors. + /// @param result The result value for an XmlRule::Parse function. + /// @param context The context for generating an error message if needed. + void ReportError(const DataMatchResult& result, const DataContext& context); + + /// Reports the message to the message handler and updates the data boolean + /// data members indicating errors. + /// @param message The message to send to the message handler. + void ReportError(const Message& message); + + /// The reader's handler. + XmlHandler* handler_; + + /// An optional message handler to write messages to. + MessageHandler* message_handler_; + + /// A possibly externally initialized data line map used for error messages. + const DataLineMap* data_line_map_; + + /// An internal data line map used for error message creation if an externally + /// defined map is not provided. + DataLineMap internal_data_line_map_; + + /// The pending and active rules. + std::vector<std::unique_ptr<XmlRule>> rule_stack_; + + /// The total number of bytes that have been parsed. + size_t bytes_parsed_; + + /// Whether an internal or syntax error has occurred. + bool has_internal_or_syntax_error_; + + /// Whether any type of error has occurred. + bool has_errors_; +}; + +} // namespace image_io +} // namespace photos_editing_formats + +#endif // IMAGE_IO_XML_XML_READER_H_ // NOLINT diff --git a/includes/image_io/xml/xml_rule.h b/includes/image_io/xml/xml_rule.h new file mode 100644 index 0000000..f564d1b --- /dev/null +++ b/includes/image_io/xml/xml_rule.h @@ -0,0 +1,186 @@ +#ifndef IMAGE_IO_XML_XML_RULE_H_ // NOLINT +#define IMAGE_IO_XML_XML_RULE_H_ // NOLINT + +#include <memory> +#include <string> +#include <vector> + +#include "image_io/base/data_match_result.h" +#include "image_io/xml/xml_handler_context.h" +#include "image_io/xml/xml_terminal.h" + +namespace photos_editing_formats { +namespace image_io { + +/// A rule represents a sequence of terminals to match text from a DataSource, +/// and the state needed to keep track the parsing operation in case the text +/// is split across multiple DataSegments. XmlRules collaborate with an instance +/// of XmlHandler to process the token values the terminals produce. +/// +/// Terminals are added in the constructors of the rule subclasses, and are +/// not typically accessed directly from the clients of an XmlRule. Instead, +/// XmlRule clients normally just call the rule's Parse function and take action +/// based on the DataMatchResult value that is returned. The functions of the +/// XmlHandler are called internally by the rule's terminals as they parse the +/// text in the data segment. +/// +/// Normally, the terminals are parsed by the Parse() function in a sequential +/// manner until they are exhausted. At which time the Parse function returns +/// with a DataMatchResult that has a type equal to kFull. If the DataSegment +/// runs out of data before the end of the final terminal, the result type will +/// be kPartialOutOfData. Of course if any of the terminals' scanners detect an +/// error the result type will be kError. +/// +/// Rules may decide to delegate the parsing process to another rule. There are +/// two types of delegation: +/// 1. Rule chaining - in this case a rule decides that another rule should +/// be used instead to continue the parsing process. This situation is +/// indicated when the result type is kFull and the rule's HasNextRule() +/// function returns true. The chained-to rule is obtained by calling the +/// rule's GetNextRule() function. The current rule can be discarded. +/// 2. Child rules - in this case a "parent" rule decides that the next set of +/// syntax should be parsed by another "child" rule, and after that rule +/// completes, the parsing task should be returned to the parent rule. This +/// situaltion is indicated when the result type is kPartial and the rule's +/// HasNextRule() returns true. The child rule is obtained by calling the +/// rule's GetNextRule() function. The current parent rule should be placed +/// on a stack until the child rule is done, and then the child discarded and +/// the parent rule used for the next Parse operation. +/// The action functions associated with a terminal are typically used to create +/// the next rule and set the result type and thus initiate the delegation +/// process. When the XmlRule::Parse function detects a delegation has been +/// requested, it returns to its caller so that the caller can handle the +/// delegation in the appropriate fashion. For an example, see the XmlReader's +/// Parse() function. +/// +/// In addition to delegation the action functions associated with a terminal +/// can change the order of the terminals processed from a strictly sequential +/// order to whatever the rule so desires. This is done by calling the rule's +/// SetTerminalIndex() function. Terminals can be identified by name using the +/// GetTerminalIndexFromName() function if the rule's terminals were +/// constructed with names. If the terminal index of a rule is set to a +/// terminal that has already been used, the terminal's scanners state must be +/// reset in order for it to parse successfully again. Sometimes the entire +/// rule is "restarted" in which case the ResetTerminalScanners() function can +/// be called to reset the scanners of all the rules terminals. +/// +/// Finally, because of the look-ahead needs of the XML grammar, some rules +/// support alternate "starting points", allowing them to skip some set of +/// initial terminals when the rule's Parse() function is called. Rules that +/// support this feature will have a constructor with an StartPoint parameter. +class XmlRule { + public: + /// For rules that support alternate starting points, this enum provides the + /// values at which a rule's Parse() function can begin. + enum StartPoint { + /// Start parsing at the first terminal position. + kFirstStartPoint, + + /// STart parsing at a second (alternative) position. + kSecondStartPoint, + }; + + virtual ~XmlRule() = default; + explicit XmlRule(const std::string& name); + + /// @return The name of the rule. + const std::string& GetName() const { return name_; } + + /// Parse the text indicated in the context's data segment and range and call + /// the context's XmlHandler functions as needed. The implementation of this + /// function makes use of the terminals contained by the rule, but it is + /// declared virtual so that subclasses can customize as needed. + /// @param context The context describing the text to parse and the handler + /// to call. + /// @param A result that indicates the type of match that occurred, the number + /// of bytes consumed and an error message if needed. + virtual DataMatchResult Parse(XmlHandlerContext context); + + /// Some rules are written such that there are optional tokens at the end, + /// and thus may be active on the XmlReader's rule stack when the end of the + /// text reached. This function determines whether it is permissible to finish + /// the parsing process even though this rule is active. Unless overridden, + /// this function returns false. + /// @param error_text A string pointer that will be used in the error message + /// that the caller produces if this function returns false. If left unset, + /// and the function returns false the caller is expected to use its own text. + /// @return Whether its ok for this rule to be active at the end of parsing. + virtual bool IsPermissibleToFinish(std::string* error_text) const; + + /// Adds a literal terminal to the rule. + /// @param literal The literal value to scan for. + /// @return The terminal, enabling direct calls to WithName()/WithAction(). + XmlTerminal& AddLiteralTerminal(const std::string& literal); + + /// Adds a name terminal to the rule. + /// @return The terminal, enabling direct calls to WithName()/WithAction(). + XmlTerminal& AddNameTerminal(); + + /// Adds a quoted string terminal to the rule. + /// @return The terminal, enabling direct calls to WithName()/WithAction(). + XmlTerminal& AddQuotedStringTerminal(); + + /// Adds a sentinel terminal to the rule. + /// @param sentinels The sentinel values to scan for. + /// @return The terminal, enabling direct calls to WithName()/WithAction(). + XmlTerminal& AddSentinelTerminal(const std::string& sentinels); + + /// Adds a scan through literal terminal to the rule. + /// @param literal The literal value to scan through. + /// @return The terminal, enabling direct calls to WithName()/WithAction(). + XmlTerminal& AddThroughLiteralTerminal(const std::string& literal); + + /// Adds a whitespace terminal to the rule. + /// @return The terminal, enabling direct calls to WithName()/WithAction(). + XmlTerminal& AddWhitespaceTerminal(); + + /// Adds an optional whitespace terminal to the rule. + /// @return The terminal, enabling direct calls to WithName()/WithAction(). + XmlTerminal& AddOptionalWhitespaceTerminal(); + + /// @return The number of terminals in the rule. + size_t GetTerminalCount() const { return terminals_.size(); } + + /// @return The index of the terminal currently parsing text. + size_t GetTerminalIndex() const { return terminal_index_; } + + /// @param name The name of the terminal to look for. + /// @return The index of the terminal with the given name, or the value + /// returned by the rule's GetTerminalCount() if not found. + size_t GetTerminalIndexFromName(const std::string name) const; + + /// @param terminal_index The index of the terminal that should next be used + /// for parsing the input text. + void SetTerminalIndex(size_t terminal_index); + + /// @return The terminal currently parsing text, or nullptr if there is none. + XmlTerminal* GetCurrentTerminal(); + + /// @param index The index of the terminal to get. + /// @return The terminal at the given index, or nullptr if index is invalid. + XmlTerminal* GetTerminal(size_t index); + + /// Resets the scanner's state of all the terminals in the rule. + void ResetTerminalScanners(); + + /// @return Whether the rule has a next rule for delegation. + bool HasNextRule() const; + + /// @return Returns the next rule to the caller. If there is no next rule, + /// the get function of the returned unique_ptr will return nullptr. + std::unique_ptr<XmlRule> ReleaseNextRule(); + + /// @param next_rule The new rule to use for delegation purposes. + void SetNextRule(std::unique_ptr<XmlRule> next_rule); + + private: + std::string name_; + std::vector<XmlTerminal> terminals_; + std::unique_ptr<XmlRule> next_rule_; + size_t terminal_index_; +}; + +} // namespace image_io +} // namespace photos_editing_formats + +#endif // IMAGE_IO_XML_XML_RULE_H_ // NOLINT diff --git a/includes/image_io/xml/xml_terminal.h b/includes/image_io/xml/xml_terminal.h new file mode 100644 index 0000000..3e6ebca --- /dev/null +++ b/includes/image_io/xml/xml_terminal.h @@ -0,0 +1,74 @@ +#ifndef IMAGE_IO_XML_XML_TERMINAL_H_ // NOLINT +#define IMAGE_IO_XML_XML_TERMINAL_H_ // NOLINT + +#include <string> + +#include "image_io/base/data_scanner.h" +#include "image_io/xml/xml_action.h" +#include "image_io/xml/xml_token_context.h" + +namespace photos_editing_formats { +namespace image_io { + +/// A terminal represents a part of a rule that uses a DataScanner to match +/// zero or more characters from a DataSource. A terminal can also have a name +/// that can be be used in error messages and also used to identify it in a +/// rule. A terminal can also have an action function associated with it that it +/// can use to validate the token produced by the terminal/scanner, and do +/// further processing with the token. Finally, the terminal's action function +/// can manipulate the DataMatchResult that was produced by the terminal's +/// scanner and accessible via the action function's XmlActionContext param. +class XmlTerminal { + public: + explicit XmlTerminal(const DataScanner& scanner) : scanner_(scanner) {} + + /// Sets the name of the terminal. Looks best with an XmlRule::AddTerminal + /// function: AddWhitespaceTerminal().WithName("SomeName"); + /// @param name The name to give to the terminal. + /// @return A reference to the terminal. + XmlTerminal& WithName(const std::string& name) { + name_ = name; + return *this; + } + + /// Sets the description of the terminal's scanner used for errors. + /// Looks best with an XmlRule::AddTerminal function: + /// AddWhitespaceTerminal().WithDescription("intra element whitespace") + /// @param description The description to give to the terminal's scanner. + /// @return A reference to the terminal. + XmlTerminal& WithDescription(const std::string& description) { + scanner_.SetDescription(description); + return *this; + } + + /// Sets the action of the terminal. Looks best with an XmlRule::AddTerminal + /// function: AddWhitespaceTerminal().WithAction(SomeAction); + /// @param action The action to give to the terminal. + /// @return A reference to the terminal. + XmlTerminal& WithAction(const XmlAction& action) { + action_ = action; + return *this; + } + + /// @return The terminal's scanner. + DataScanner* GetScanner() { return &scanner_; } + + /// @return The terminal's name. + const std::string& GetName() const { return name_; } + + /// @return The terminal's scanner's description. + std::string GetDescription() const { return scanner_.GetDescription(); } + + /// @return The terminal's action function. + const XmlAction& GetAction() const { return action_; } + + private: + DataScanner scanner_; + XmlAction action_; + std::string name_; +}; + +} // namespace image_io +} // namespace photos_editing_formats + +#endif // IMAGE_IO_XML_XML_TERMINAL_H_ // NOLINT diff --git a/includes/image_io/xml/xml_token_context.h b/includes/image_io/xml/xml_token_context.h new file mode 100644 index 0000000..3c0fcbd --- /dev/null +++ b/includes/image_io/xml/xml_token_context.h @@ -0,0 +1,82 @@ +#ifndef IMAGE_IO_XML_XML_TOKEN_CONTEXT_H_ // NOLINT +#define IMAGE_IO_XML_XML_TOKEN_CONTEXT_H_ // NOLINT + +#include <string> +#include <vector> + +#include "image_io/base/data_context.h" +#include "image_io/base/data_match_result.h" +#include "image_io/base/data_range.h" +#include "image_io/xml/xml_portion.h" + +namespace photos_editing_formats { +namespace image_io { + +class XmlActionContext; + +/// A token context is passed from the action of an XmlTerminal to an XmlHandler +/// associated with the XmlActionContext used to call the action function. +class XmlTokenContext : public DataContext { + public: + explicit XmlTokenContext(const XmlActionContext& context); + XmlTokenContext(size_t location, const DataRange& range, + const DataSegment& segment, const DataLineMap& data_line_map, + const DataMatchResult& result, const DataRange& token_range, + const XmlPortion& token_portion); + + /// @return The result associated with the context. + const DataMatchResult& GetResult() const { return result_; } + + /// @return The token range for the token. Note that the token range may not + /// be a subrange of the context's GetRange() or even the context's segment's + /// data range. Such would be the case when a token's value is split across + /// two or more data segments. + const DataRange& GetTokenRange() const { return token_range_; } + + /// @return The portion of the token that this context represents. This + /// portion value can be the bitwise or of any of the XmlPortion bit values. + const XmlPortion& GetTokenPortion() const { return token_portion_; } + + /// Builds the string value of the token. If the context's token portion has + /// the XmlPortion::kBegin bit set, the string value is first cleared. Then + /// the string is extracted from the context's data source and appended onto + /// the value. Remember that some token values (especially attribute values) + /// can be quite long so care should be excercised when obtaining values with + /// this function. + /// @param value The value of the token being built. + /// @param trim_first_and_last_chars Whether to remove the first and last + /// characters of the token. This is nice to use when the token value is a + /// quoted string and the value itself is wanted without the quote marks. + /// @return Whether the token value is complete (i.e., the context's portion + /// had the XmlPortion::kEnd bit set). + bool BuildTokenValue(std::string* value, + bool trim_first_and_last_chars = false) const; + + /// Builds the complete range of the token, which may need to be represented + /// by multiple disjoint ranges. If the token portion indicates all portions + /// of the token are present, then this simply clears the vector and pushes + /// copies the value returned by the GetTokenRange() into it. Otherwise, it + /// does the heavy lifting to build the vector of ranges. + /// @param value_ranges The vector of ranges of the token being built. + /// @param trim_first_and_last_chars Whether to remove the first and last + /// characters of the token. This is nice to use when the token value is a + /// If this parameter is true, the effect will be to increase the begin value + /// of the first range by 1 and decrease the last range's end by 1. + /// @return Whether the token range value is complete (i.e., the context's + /// portion had the XmlPortion::kEnd bit set). + bool BuildTokenValueRanges(std::vector<DataRange>* value_ranges, + bool trim_first_and_last_chars = false) const; + + static XmlPortion ComputeTokenPortion(size_t token_scan_count, + DataMatchResult::Type result_type); + + private: + DataMatchResult result_; + DataRange token_range_; + XmlPortion token_portion_; +}; + +} // namespace image_io +} // namespace photos_editing_formats + +#endif // IMAGE_IO_XML_XML_TOKEN_CONTEXT_H_ // NOLINT diff --git a/includes/image_io/xml/xml_writer.h b/includes/image_io/xml/xml_writer.h new file mode 100644 index 0000000..28270b7 --- /dev/null +++ b/includes/image_io/xml/xml_writer.h @@ -0,0 +1,205 @@ +#ifndef IMAGE_IO_XML_XML_WRITER_H_ // NOLINT +#define IMAGE_IO_XML_XML_WRITER_H_ // NOLINT + +#include <sstream> +#include <string> +#include <vector> + +namespace photos_editing_formats { +namespace image_io { + +/// A very simple writer forXML that frees client code from worries about XML +/// formatting and bracket issues. +/// +/// The intended sequence of operations this writer supports is as follows: +/// 1. Start writing an element. +/// 2. Write any and all attribute names and values to that element. +/// 3. Write any content, or add a child element by starting to write another +/// element (i.e., go to step 1). The "context" of the current element you +/// are writing is saved on a stack. Once you start writing content or +/// child elements you cannot add attribute names and values and expect to +/// see them as such in the resulting XML. +/// 4. When you are done with the element, finish writing it. The element +/// context stack is popped and you continue where you left off. +/// +/// When writing element content and attribute values no XML escaping of any +/// kind is done. If you need to do that, do it yourself. +class XmlWriter { + public: + /// @param os The stream to which the XML is written. + explicit XmlWriter(std::ostream& os); + + /// @return The number of elements that have been written. + size_t GetElementCount() const { return element_count_; } + + /// @return The depth of the element stack. + size_t GetElementDepth() const { return element_data_.size(); } + + /// @return The quote mark used when writing attribute values. The default + /// value set up by the constructor is the double quote ("). + char GetQuoteMark() const { return quote_mark_; } + + /// @param quote_park The new quote mark to use when writing attribute values. + void SetQuoteMark(char quote_mark) { quote_mark_ = quote_mark; } + + /// @return The leading indent written before the current element. + const std::string& GetIndent() const { return indent_; } + + /// Once you are done writing your elements, you can call this function to + /// finish writing of all open elements. After this call, the string contained + /// in the ostream you passed to the constructor is fully formed XML. + void FinishWriting() { FinishWritingElementsToDepth(0); } + + /// @return Whether the writing of XML can be considered done. + bool IsDone() const { return indent_.empty(); } + + /// Writes an xmlns attribute to the currently open element. + /// @param prefix The prefix you intend to use for elements/attributes. + /// @param uri The uri of the namespace. + void WriteXmlns(const std::string& prefix, const std::string& uri); + + /// Starts writing a new child element of the current element. Immediately + /// after this function you can add attributes to the element using one of the + /// AddAttributeNameAndValue() functions. + /// @param element_name The name of the element to write. + /// @return The number of open elements on the stack at the start of this + /// function. You can use this value with the FinishWritingElementToDepth() + /// function to finish writing this element and any open descendents. + size_t StartWritingElement(const std::string& element_name); + + /// Finishes writing the element and returns the "context" to the previously + /// open element so that you can continue adding child elements (via a call to + /// StartWritingElement()) or content (via a call to WriteContent()). + void FinishWritingElement(); + + /// Finishes writing any elements that exist in the stack of open elements + /// above the depth value parameter. + /// @param depth The depth above which to finish writing open elements. + void FinishWritingElementsToDepth(size_t depth); + + /// Starts writing the elements in the vector, leaving the last open for you + /// to add attributes or other elements to. + /// @param element_names The array of element names to start writing. + /// @return The number of open elements on the stack at the start of this + /// function. You can use this value with the FinishWritingElementToDepth() + /// function to finish writing this element and any open descendents. + size_t StartWritingElements(const std::vector<std::string>& element_names); + + /// A template method function that allows you to start an element, add the + /// value as its content and then finish writing the element. This is useful + /// if you are writing property values as elements. + /// @param element_name The name of the element to write. + /// @param value The value that is converted to a string and written as the + /// element's content. + template <class T> + void WriteElementAndContent(const std::string& element_name, const T& value) { + std::stringstream ss; + ss << value; + WriteElementAndContent(element_name, ss.str()); + } + + /// Starts writing an element with the given name, adds the string value as + /// its content and then finishes writing the element. This is useful + /// if you are writing property values as elements. + /// @param element_name The name of the element to write. + /// @param value The value to use as the element's content. + void WriteElementAndContent(const std::string& element_name, + const std::string& content); + + /// Writes the string as the currently open element's content. Note that if + /// you add child elements to the open element, the content you will see when + /// you read your element will have the whitespace due to the indent string. + /// @param content The content to write to the currently open element. + void WriteContent(const std::string& content); + + /// A template method function that allows you to add an attribute name and + /// value to a just-opened element. Attributes must be added to an element + /// before adding content or child elements. + /// @param name The name of the attribute to add. + /// @param value The value of the attribute. This value is converted to a + /// string and enclosed in the quote marks from the GetQuoteMark() function. + template <class T> + void WriteAttributeNameAndValue(const std::string& name, const T& value) { + std::stringstream ss; + ss << GetQuoteMark() << value << GetQuoteMark(); + WriteAttributeNameAndValue(name, ss.str(), false); + } + + /// Adds an attribute name and value to a just-opened element. Attributes must + /// be added to an element before adding content or child elements. + /// @param name The name of the attribute to add. + /// @param value The value of the attribute. + /// @param add_quote_marks Whether quote marks should be added before and + /// after the value. If this value is false, it is assumed that the client + /// code has added them before calling this function. + void WriteAttributeNameAndValue(const std::string& name, + const std::string& value, + bool add_quote_marks = true); + + /// Adds an attribute name and equal sign to the just-opened element. + /// Attributes must be added to an element before adding content or child + /// elements. Clients that use this function must call WriteAttributeValue() + /// with appropriate values to define a legally quoted value. This function + /// is useful for writing attribute with extremely long values that might not + /// be efficient to store as a single string value. + /// @param name The name of the attribute to add. + void WriteAttributeName(const std::string& name); + + /// Writes the attribute value with optional quote marks on either side. This + /// function may be repeatedly called with appropriate valeus for the leading + /// and trailing quote mark flags to write extremely long attribute values. + /// @param add_leading_quote_mark Whether to add a leading quote mark. + /// @param value The (probably partial) value to write. + /// @param add_trailing_quote_mark Whether to add a trailing quote mark. + void WriteAttributeValue(bool add_leading_quote_mark, + const std::string& value, + bool add_trailing_quote_mark); + + /// Writes a comment to the xml stream. Note that writing a comment is like + /// adding a child node/element to the current element. If the current element + /// is still open for names/values, it will be closed before writing it - i.e. + /// you can't add attributes to an element after calling this function. + /// @param comment The text of the comment to write. + void WriteComment(const std::string& comment); + + private: + /// The data that is known about each element on the stack. + struct ElementData { + ElementData(const std::string& name_) + : name(name_), + has_attributes(false), + has_content(false), + has_children(false) {} + std::string name; + bool has_attributes; + bool has_content; + bool has_children; + }; + + /// Determines if the start element syntax of the current element needs to + /// be closed with a bracket so that content or child elements or comments + /// can be added to the element. + /// @param with_trailing_newline Whether a newline is added after the bracket. + /// @return Whether the element's start syntax was closed with a bracket. + bool MaybeWriteCloseBracket(bool with_trailing_newline); + + /// The stream to which everything is written. + std::ostream& os_; + + /// The indent to write before elements and attribute names/values. + std::string indent_; + + /// The currently open elements being written. + std::vector<ElementData> element_data_; + + /// The number of elements that have been written. + size_t element_count_; + + /// The quote mark to use around attribute values by default. + char quote_mark_; +}; + +} // namespace image_io +} // namespace photos_editing_formats + +#endif // IMAGE_IO_XML_XML_WRITER_H_ // NOLINT diff --git a/src/base/byte_buffer.cc b/src/base/byte_buffer.cc index 55fbc2f..ac4d387 100644 --- a/src/base/byte_buffer.cc +++ b/src/base/byte_buffer.cc @@ -1,5 +1,6 @@ #include "image_io/base/byte_buffer.h" +#include <cstring> #include <utility> namespace photos_editing_formats { @@ -24,7 +25,7 @@ static size_t WriteBytes(const ByteData& byte_data, Byte* pos) { } } } else { - memcpy(pos, byte_data.GetValue().c_str(), byte_count); + std::memcpy(pos, byte_data.GetValue().c_str(), byte_count); } return byte_count; } diff --git a/src/base/byte_pointer_data_destination.cc b/src/base/byte_pointer_data_destination.cc new file mode 100644 index 0000000..9c8e572 --- /dev/null +++ b/src/base/byte_pointer_data_destination.cc @@ -0,0 +1,32 @@ +#include "image_io/base/byte_pointer_data_destination.h" + +#include <algorithm> +#include <cstring> + +#include "image_io/base/data_range.h" +#include "image_io/base/data_segment.h" + +namespace photos_editing_formats { +namespace image_io { + +void BytePointerDataDestination::StartTransfer() {} + +DataDestination::TransferStatus BytePointerDataDestination::Transfer( + const DataRange& transfer_range, const DataSegment& data_segment) { + if (transfer_range.IsValid()) { + size_t size_remaining = size_ - bytes_transferred_; + size_t bytes_to_copy = std::min(size_remaining, transfer_range.GetLength()); + const Byte* buffer = data_segment.GetBuffer(transfer_range.GetBegin()); + if (buffer) { + std::memcpy(bytes_ + bytes_transferred_, buffer, bytes_to_copy); + bytes_transferred_ += bytes_to_copy; + return bytes_transferred_ == size_ ? kTransferDone : kTransferOk; + } + } + return kTransferError; +} + +void BytePointerDataDestination::FinishTransfer() {} + +} // namespace image_io +} // namespace photos_editing_formats diff --git a/src/base/data_context.cc b/src/base/data_context.cc index 3d58cd2..fac3533 100644 --- a/src/base/data_context.cc +++ b/src/base/data_context.cc @@ -1,8 +1,10 @@ #include "image_io/base/data_context.h" +#include <algorithm> #include <cctype> #include <iomanip> #include <sstream> +#include <string> #include "image_io/base/byte_data.h" diff --git a/src/base/data_line_map.cc b/src/base/data_line_map.cc index 06ecfd9..2b3abe6 100644 --- a/src/base/data_line_map.cc +++ b/src/base/data_line_map.cc @@ -5,8 +5,6 @@ namespace photos_editing_formats { namespace image_io { -size_t DataLineMap::GetDataLineCount() const { return data_lines_.size(); } - DataLine DataLineMap::GetDataLine(size_t location) const { if (data_lines_.empty()) { return DataLine(); diff --git a/src/base/data_scanner.cc b/src/base/data_scanner.cc index e6677a5..36128af 100644 --- a/src/base/data_scanner.cc +++ b/src/base/data_scanner.cc @@ -1,11 +1,18 @@ #include "image_io/base/data_scanner.h" +#include <algorithm> + namespace photos_editing_formats { namespace image_io { +using std::string; + namespace { const char kWhitespaceChars[] = " \t\n\r"; +const char kBase64PadChar = '='; +const char kBase64Chars[] = + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; /// This function is like strspn but does not assume a null-terminated string. size_t memspn(const char* s, size_t slen, const char* accept) { @@ -49,9 +56,17 @@ size_t ScanWhitespaceChars(const char* s, size_t slen) { } // namespace -std::string DataScanner::GetWhitespaceChars() { return kWhitespaceChars; } +string DataScanner::GetWhitespaceChars() { return kWhitespaceChars; } + +string DataScanner::GetBase64Chars(bool include_pad_char) { + string chars(kBase64Chars); + if (include_pad_char) chars += kBase64PadChar; + return chars; +} + +string DataScanner::GetBase64PadChar() { return string(1, kBase64PadChar); } -DataScanner DataScanner::CreateLiteralScanner(const std::string& literal) { +DataScanner DataScanner::CreateLiteralScanner(const string& literal) { return DataScanner(DataScanner::kLiteral, literal); } @@ -63,12 +78,11 @@ DataScanner DataScanner::CreateQuotedStringScanner() { return DataScanner(DataScanner::kQuotedString); } -DataScanner DataScanner::CreateSentinelScanner(const std::string& sentinels) { +DataScanner DataScanner::CreateSentinelScanner(const string& sentinels) { return DataScanner(DataScanner::kSentinel, sentinels); } -DataScanner DataScanner::CreateThroughLiteralScanner( - const std::string& literal) { +DataScanner DataScanner::CreateThroughLiteralScanner(const string& literal) { return DataScanner(DataScanner::kThroughLiteral, literal); } @@ -80,6 +94,10 @@ DataScanner DataScanner::CreateOptionalWhitespaceScanner() { return DataScanner(DataScanner::kOptionalWhitespace); } +size_t DataScanner::ScanChars(const char* s, size_t slen, const char* scanset) { + return memspn(s, slen, scanset); +} + size_t DataScanner::ExtendTokenLength(size_t delta_length) { token_range_ = DataRange(token_range_.GetBegin(), token_range_.GetEnd() + delta_length); @@ -87,7 +105,7 @@ size_t DataScanner::ExtendTokenLength(size_t delta_length) { } void DataScanner::SetInternalError(const DataContext& context, - const std::string& error_description, + const string& error_description, DataMatchResult* result) { result->SetType(DataMatchResult::kError); result->SetMessage( @@ -96,7 +114,7 @@ void DataScanner::SetInternalError(const DataContext& context, } void DataScanner::SetSyntaxError(const DataContext& context, - const std::string& error_description, + const string& error_description, DataMatchResult* result) { result->SetType(DataMatchResult::kError); result->SetMessage(Message::kSyntaxError, @@ -215,7 +233,7 @@ DataMatchResult DataScanner::ScanSentinel(const char* cbytes, } } if (result.GetBytesConsumed() == 0) { - SetSyntaxError(context, "Expected sentinal character", &result); + SetSyntaxError(context, "Unexpected character encountered", &result); } return result; } @@ -341,8 +359,11 @@ void DataScanner::Reset() { ResetTokenRange(); } -std::string DataScanner::GetDescription() const { - std::string description; +string DataScanner::GetDescription() const { + if (!description_.empty()) { + return description_; + } + string description; switch (type_) { case kLiteral: description = "Literal:'"; @@ -375,12 +396,12 @@ std::string DataScanner::GetDescription() const { return description; } -std::string DataScanner::GetLiteral() const { +string DataScanner::GetLiteral() const { return type_ == kLiteral || type_ == kThroughLiteral ? literal_or_sentinels_ : ""; } -std::string DataScanner::GetSentenels() const { +string DataScanner::GetSentenels() const { return type_ == kSentinel ? literal_or_sentinels_ : ""; } diff --git a/src/base/data_segment.cc b/src/base/data_segment.cc index 95b4cc7..a3e4a9a 100644 --- a/src/base/data_segment.cc +++ b/src/base/data_segment.cc @@ -1,5 +1,6 @@ #include "image_io/base/data_segment.h" +#include <algorithm> #include <cstring> namespace photos_editing_formats { diff --git a/src/base/istream_ref_data_source.cc b/src/base/istream_ref_data_source.cc index 5e3d126..6bd298c 100644 --- a/src/base/istream_ref_data_source.cc +++ b/src/base/istream_ref_data_source.cc @@ -1,5 +1,7 @@ #include "image_io/base/istream_ref_data_source.h" +#include <algorithm> + #include "image_io/base/data_destination.h" #include "image_io/base/data_segment.h" diff --git a/src/base/string_ref_data_source.cc b/src/base/string_ref_data_source.cc new file mode 100644 index 0000000..cdf7bf4 --- /dev/null +++ b/src/base/string_ref_data_source.cc @@ -0,0 +1,25 @@ +#include "image_io/base/string_ref_data_source.h" + +#include <string> + +namespace photos_editing_formats { +namespace image_io { + +namespace { + +/// @param str The string from which to create a DataSegment. +/// @return A DataSegment the byte pointer of which is taken from the str. +std::shared_ptr<DataSegment> CreateDataSegment(const std::string &str) { + Byte *bytes = reinterpret_cast<Byte *>(const_cast<char *>(str.c_str())); + return DataSegment::Create(DataRange(0, str.length()), bytes, + DataSegment::BufferDispositionPolicy::kDontDelete); +} + +} // namespace + +StringRefDataSource::StringRefDataSource(const std::string &string_ref) + : DataSegmentDataSource(CreateDataSegment(string_ref)), + string_ref_(string_ref) {} + +} // namespace image_io +} // namespace photos_editing_formats diff --git a/src/xml/xml_attribute_rule.cc b/src/xml/xml_attribute_rule.cc new file mode 100644 index 0000000..955e60c --- /dev/null +++ b/src/xml/xml_attribute_rule.cc @@ -0,0 +1,32 @@ +#include "image_io/xml/xml_attribute_rule.h" + +#include "image_io/xml/xml_handler.h" +#include "image_io/xml/xml_token_context.h" + +namespace photos_editing_formats { +namespace image_io { + +XmlAttributeRule::XmlAttributeRule() : XmlRule("Attribute") { + // S? Name S? = S? 'Value' + AddOptionalWhitespaceTerminal(); + AddNameTerminal().WithAction( + [&](const XmlActionContext& context) { return HandleName(context); }); + AddOptionalWhitespaceTerminal(); + AddLiteralTerminal("="); + AddOptionalWhitespaceTerminal(); + AddQuotedStringTerminal().WithAction( + [&](const XmlActionContext& context) { return HandleValue(context); }); +} + +DataMatchResult XmlAttributeRule::HandleName(const XmlActionContext& context) { + XmlTokenContext token_context(context); + return context.GetHandler()->AttributeName(token_context); +} + +DataMatchResult XmlAttributeRule::HandleValue(const XmlActionContext& context) { + XmlTokenContext token_context(context); + return context.GetHandler()->AttributeValue(token_context); +} + +} // namespace image_io +} // namespace photos_editing_formats diff --git a/src/xml/xml_cdata_and_comment_rules.cc b/src/xml/xml_cdata_and_comment_rules.cc new file mode 100644 index 0000000..d3a4d50 --- /dev/null +++ b/src/xml/xml_cdata_and_comment_rules.cc @@ -0,0 +1,83 @@ +#include "image_io/xml/xml_cdata_and_comment_rules.h" + +#include <utility> + +#include "image_io/xml/xml_handler.h" +#include "image_io/xml/xml_token_context.h" + +namespace photos_editing_formats { +namespace image_io { + +XmlCdataRule::XmlCdataRule() : XmlCdataRule(kFirstStartPoint) {} + +XmlCdataRule::XmlCdataRule(StartPoint start_point) : XmlRule("CDATA") { + // <![CDATA[ ... ]]> + AddLiteralTerminal("<!"); + AddLiteralTerminal("[CDATA["); + AddThroughLiteralTerminal("]]>").WithAction( + [&](const XmlActionContext& context) { + return HandleCdataValue(context); + }); + if (start_point == kSecondStartPoint) { + SetTerminalIndex(1); + } +} + +DataMatchResult XmlCdataRule::HandleCdataValue( + const XmlActionContext& context) { + XmlTokenContext token_context(context); + return context.GetHandler()->Cdata(token_context); +} + +XmlCommentRule::XmlCommentRule() : XmlCommentRule(kFirstStartPoint) {} + +XmlCommentRule::XmlCommentRule(StartPoint start_point) : XmlRule("Comment") { + // <!-- ... --> + AddLiteralTerminal("<!"); + AddLiteralTerminal("--"); + AddThroughLiteralTerminal("-->").WithAction( + [&](const XmlActionContext& context) { + return HandleCommentValue(context); + }); + if (start_point == kSecondStartPoint) { + SetTerminalIndex(1); + } +} + +DataMatchResult XmlCommentRule::HandleCommentValue( + const XmlActionContext& context) { + XmlTokenContext token_context(context); + return context.GetHandler()->Comment(token_context); +} + +XmlCdataOrCommentRule::XmlCdataOrCommentRule() + : XmlCdataOrCommentRule(kFirstStartPoint) {} + +XmlCdataOrCommentRule::XmlCdataOrCommentRule(StartPoint start_point) + : XmlRule("CdataOrComment") { + // <![CDATA[ ... ]]> or <!-- ... --> + // So after the initial "<!" literal can come a "[" or a "-". + AddLiteralTerminal("<!"); + AddSentinelTerminal("[-").WithAction([&](const XmlActionContext& context) { + return HandlePostBangChar(context); + }); + if (start_point == kSecondStartPoint) { + SetTerminalIndex(1); + } +} + +DataMatchResult XmlCdataOrCommentRule::HandlePostBangChar( + const XmlActionContext& context) { + char sentinel = context.GetTerminal()->GetScanner()->GetSentinel(); + if (sentinel == '[') { + std::unique_ptr<XmlRule> rule(new XmlCdataRule(kSecondStartPoint)); + SetNextRule(std::move(rule)); + } else if (sentinel == '-') { + std::unique_ptr<XmlRule> rule(new XmlCommentRule(kSecondStartPoint)); + SetNextRule(std::move(rule)); + } + return context.GetResultWithBytesConsumed(0); +} + +} // namespace image_io +} // namespace photos_editing_formats diff --git a/src/xml/xml_element_rules.cc b/src/xml/xml_element_rules.cc new file mode 100644 index 0000000..53feb87 --- /dev/null +++ b/src/xml/xml_element_rules.cc @@ -0,0 +1,182 @@ +#include "image_io/xml/xml_element_rules.h" + +#include <utility> + +#include "image_io/xml/xml_attribute_rule.h" +#include "image_io/xml/xml_cdata_and_comment_rules.h" +#include "image_io/xml/xml_handler.h" +#include "image_io/xml/xml_pi_rule.h" +#include "image_io/xml/xml_token_context.h" + +namespace photos_editing_formats { +namespace image_io { + +namespace { + +/// Some names of terminals used by these rules. +const char kWhitespace[] = "Whitespace"; +const char kEmptyElementEnd[] = "EmptyElementEnd"; +const char kElementEnd[] = "ElementEnd"; +const char kElementSentinalDescription[] = + "The start of an attribute name or the end of the element ('>' or '/>')"; + +/// A shortcut for referring to all XmlPortion bits. +const XmlPortion kAllPortions = + XmlPortion::kBegin | XmlPortion::kMiddle | XmlPortion::kEnd; + +/// @param context The action context passed to an action handler. +/// @param token_range The token range to use when building the token context. +/// @param portion The token portion to use when building the token context. +/// @param A token context for use in calling an XmlHandler function. +XmlTokenContext GetTokenContext(const XmlActionContext& context, + const DataRange& token_range, + XmlPortion portion) { + return XmlTokenContext(context.GetLocation(), context.GetRange(), + context.GetSegment(), context.GetDataLineMap(), + context.GetResult(), token_range, portion); +} + +} // namespace + +XmlElementRule::XmlElementRule() : XmlElementRule(kFirstStartPoint) {} + +XmlElementRule::XmlElementRule(XmlRule::StartPoint start_point) + : XmlRule("Element") { + AddLiteralTerminal("<"); + AddNameTerminal().WithAction( + [&](const XmlActionContext& context) { return HandleName(context); }); + AddOptionalWhitespaceTerminal().WithName(kWhitespace); + AddSentinelTerminal("~/>") + .WithDescription(kElementSentinalDescription) + .WithAction([&](const XmlActionContext& context) { + return HandlePostWhitespaceChar(context); + }); + AddLiteralTerminal("/>") + .WithName(kEmptyElementEnd) + .WithAction([&](const XmlActionContext& context) { + return HandleEmptyElemTagEnd(context); + }); + AddLiteralTerminal(">") + .WithName(kElementEnd) + .WithAction([&](const XmlActionContext& context) { + return HandleSTagEnd(context); + }); + if (start_point == kSecondStartPoint) { + SetTerminalIndex(1); + } +} + +DataMatchResult XmlElementRule::HandleName(const XmlActionContext& context) { + XmlTokenContext token_context(context); + return context.GetHandler()->StartElement(token_context); +} + +DataMatchResult XmlElementRule::HandlePostWhitespaceChar( + const XmlActionContext& context) { + DataMatchResult result = context.GetResultWithBytesConsumed(0); + char sentinel = context.GetTerminal()->GetScanner()->GetSentinel(); + if (sentinel == '/') { + size_t index = GetTerminalIndexFromName(kEmptyElementEnd); + SetTerminalIndex(index); + } else if (sentinel == '>') { + size_t index = GetTerminalIndexFromName(kElementEnd); + SetTerminalIndex(index); + } else if (sentinel == '~') { + std::unique_ptr<XmlRule> rule(new XmlAttributeRule); + SetNextRule(std::move(rule)); + ResetTerminalScanners(); + size_t index = GetTerminalIndexFromName(kWhitespace); + SetTerminalIndex(index); + result.SetType(DataMatchResult::kPartial); + } + return result; +} + +DataMatchResult XmlElementRule::HandleEmptyElemTagEnd( + const XmlActionContext& context) { + SetTerminalIndex(GetTerminalCount()); + return context.GetHandler()->FinishElement( + GetTokenContext(context, DataRange(), XmlPortion::kNone)); +} + +DataMatchResult XmlElementRule::HandleSTagEnd(const XmlActionContext& context) { + DataMatchResult result = context.GetResult(); + std::unique_ptr<XmlRule> rule(new XmlElementContentRule); + SetNextRule(std::move(rule)); + return result; +} + +XmlElementContentRule::XmlElementContentRule() : XmlRule("ElementContent") { + // ElementContent until + // <N... Element + // <?N ... ?> PI + // <!-- ... --> Comment + // <![CDATA[ ... ]]> CDATA + // </Nws> Element Etag + // &...; EntityRef or CharRef (Don't care about this) + AddThroughLiteralTerminal("<").WithAction( + [&](const XmlActionContext& context) { return HandleContent(context); }); + AddSentinelTerminal("~?!/").WithAction([&](const XmlActionContext& context) { + return HandlePostOpenChar(context); + }); + AddNameTerminal().WithAction( + [&](const XmlActionContext& context) { return HandleEndTag(context); }); + AddLiteralTerminal(">"); +} + +DataMatchResult XmlElementContentRule::HandleContent( + const XmlActionContext& context) { + const auto& range = context.GetTerminal()->GetScanner()->GetTokenRange(); + if (range.IsValid()) { + size_t end = context.GetResult().GetType() == DataMatchResult::kFull + ? range.GetEnd() - 1 + : range.GetEnd(); + DataRange token_range(range.GetBegin(), end); + if (token_range.GetLength() > 0) { + XmlTokenContext token_context = + GetTokenContext(context, token_range, kAllPortions); + DataMatchResult result = + context.GetHandler()->ElementContent(token_context); + context.GetTerminal()->GetScanner()->ResetTokenRange(); + return result; + } + } + context.GetTerminal()->GetScanner()->ResetTokenRange(); + return context.GetResult(); +} + +DataMatchResult XmlElementContentRule::HandlePostOpenChar( + const XmlActionContext& context) { + DataMatchResult result = context.GetResult(); + char sentinel = context.GetTerminal()->GetScanner()->GetSentinel(); + if (sentinel == '~') { + result.SetBytesConsumed(0); + result.SetType(DataMatchResult::kPartial); + std::unique_ptr<XmlRule> rule(new XmlElementRule(kSecondStartPoint)); + SetNextRule(std::move(rule)); + } else if (sentinel == '?') { + result.SetType(DataMatchResult::kPartial); + std::unique_ptr<XmlRule> rule(new XmlPiRule(kSecondStartPoint)); + SetNextRule(std::move(rule)); + } else if (sentinel == '!') { + result.SetType(DataMatchResult::kPartial); + std::unique_ptr<XmlRule> rule(new XmlCdataOrCommentRule(kSecondStartPoint)); + SetNextRule(std::move(rule)); + } else if (sentinel == '/') { + // Do nothing so that the next terminals (the 'name>' part of '</name>') + // will be activated and scanned. + return context.GetResult(); + } + ResetTerminalScanners(); + SetTerminalIndex(0); + return result; +} + +DataMatchResult XmlElementContentRule::HandleEndTag( + const XmlActionContext& context) { + XmlTokenContext token_context(context); + return context.GetHandler()->FinishElement(token_context); +} + +} // namespace image_io +} // namespace photos_editing_formats diff --git a/src/xml/xml_handler.cc b/src/xml/xml_handler.cc new file mode 100644 index 0000000..591d43c --- /dev/null +++ b/src/xml/xml_handler.cc @@ -0,0 +1,39 @@ +#include "image_io/xml/xml_handler.h" + +namespace photos_editing_formats { +namespace image_io { + +DataMatchResult XmlHandler::AttributeName(const XmlTokenContext& context) { + return context.GetResult(); +} + +DataMatchResult XmlHandler::AttributeValue(const XmlTokenContext& context) { + return context.GetResult(); +} + +DataMatchResult XmlHandler::StartElement(const XmlTokenContext& context) { + return context.GetResult(); +} + +DataMatchResult XmlHandler::FinishElement(const XmlTokenContext& context) { + return context.GetResult(); +} + +DataMatchResult XmlHandler::ElementContent(const XmlTokenContext& context) { + return context.GetResult(); +} + +DataMatchResult XmlHandler::Comment(const XmlTokenContext& context) { + return context.GetResult(); +} + +DataMatchResult XmlHandler::Cdata(const XmlTokenContext& context) { + return context.GetResult(); +} + +DataMatchResult XmlHandler::Pi(const XmlTokenContext& context) { + return context.GetResult(); +} + +} // namespace image_io +} // namespace photos_editing_formats diff --git a/src/xml/xml_pi_rule.cc b/src/xml/xml_pi_rule.cc new file mode 100644 index 0000000..071b8fd --- /dev/null +++ b/src/xml/xml_pi_rule.cc @@ -0,0 +1,28 @@ +#include "image_io/xml/xml_pi_rule.h" + +#include "image_io/xml/xml_handler.h" +#include "image_io/xml/xml_token_context.h" + +namespace photos_editing_formats { +namespace image_io { + +XmlPiRule::XmlPiRule() : XmlPiRule(kFirstStartPoint) {} + +XmlPiRule::XmlPiRule(XmlRule::StartPoint start_point) : XmlRule("PI") { + // <? ... ?> + AddLiteralTerminal("<?"); + AddThroughLiteralTerminal("?>").WithAction( + [&](const XmlActionContext& context) { return HandlePiValue(context); }); + if (start_point == kSecondStartPoint) { + SetTerminalIndex(1); + } +} + +DataMatchResult XmlPiRule::HandlePiValue(const XmlActionContext& context) { + XmlTokenContext token_context(context); + DataMatchResult result = context.GetHandler()->Pi(token_context); + return result; +} + +} // namespace image_io +} // namespace photos_editing_formats diff --git a/src/xml/xml_reader.cc b/src/xml/xml_reader.cc new file mode 100644 index 0000000..467cb82 --- /dev/null +++ b/src/xml/xml_reader.cc @@ -0,0 +1,189 @@ +#include "image_io/xml/xml_reader.h" + +#include <iomanip> +#include <sstream> +#include <string> +#include <utility> + +#include "image_io/base/message.h" +#include "image_io/base/message_handler.h" + +namespace photos_editing_formats { +namespace image_io { + +namespace { + +/// The reader name used for error messages. +const char kReaderName[] = "XmlReader"; + +} // namespace + +bool XmlReader::StartParse(std::unique_ptr<XmlRule> rule) { + bytes_parsed_ = 0; + rule_stack_.clear(); + if (!rule) { + std::string text = std::string(kReaderName) + ":StartParse:NoTopLevelRule"; + Message message(Message::kInternalError, 0, text); + ReportError(message); + return false; + } + rule_stack_.push_back(std::move(rule)); + has_internal_or_syntax_error_ = false; + has_errors_ = false; + return true; +} + +bool XmlReader::FinishParse() { + if (has_internal_or_syntax_error_) { + return false; + } + std::string error_text; + if (rule_stack_.empty() || + (rule_stack_.size() == 1 && + rule_stack_.back()->IsPermissibleToFinish(&error_text))) { + return true; + } + std::stringstream ss; + ss << kReaderName << ":"; + if (error_text.empty()) { + ss << "While parsing text with rule:"; + ss << rule_stack_.back()->GetName(); + XmlTerminal* terminal = rule_stack_.back()->GetCurrentTerminal(); + if (terminal) { + if (!terminal->GetName().empty()) { + ss << ":" << terminal->GetName(); + } + ss << ":" << terminal->GetScanner()->GetDescription(); + } + } else { + ss << error_text; + } + Message message(Message::kPrematureEndOfDataError, 0, ss.str()); + has_internal_or_syntax_error_ = true; + ReportError(message); + return false; +} + +bool XmlReader::Parse(const std::string& value) { + size_t location = GetBytesParsed(); + DataRange range(location, location + value.length()); + const Byte* bytes = reinterpret_cast<const Byte*>(value.c_str()); + auto segment = DataSegment::Create(range, bytes, DataSegment::kDontDelete); + return Parse(location, range, *segment); +} + +bool XmlReader::Parse(size_t start_location, const DataRange& range, + const DataSegment& segment) { + if (has_internal_or_syntax_error_) { + return false; + } + XmlHandlerContext context(start_location, range, segment, *data_line_map_, + handler_); + InitializeContextNameList(&context); + if (!context.IsValidLocationAndRange()) { + DataMatchResult result; + result.SetMessage(Message::kInternalError, + context.GetInvalidLocationAndRangeErrorText()); + ReportError(result, context); + return false; + } + if (rule_stack_.empty()) { + DataMatchResult result; + result.SetMessage(Message::kInternalError, "NoActiveRule"); + ReportError(result, context); + return false; + } + if (data_line_map_ == &internal_data_line_map_) { + internal_data_line_map_.FindDataLines(range, segment); + } + size_t bytes_remaining = range.GetEnd() - start_location; + while (bytes_remaining > 0 && !rule_stack_.empty() && + !has_internal_or_syntax_error_) { + auto& rule = rule_stack_.back(); + InitializeContextNameList(&context); + DataMatchResult result = rule->Parse(context); + switch (result.GetType()) { + case DataMatchResult::kError: + case DataMatchResult::kNone: + ReportError(result, context); + break; + case DataMatchResult::kPartial: + ReportMessageIfNeeded(result); + bytes_parsed_ += result.GetBytesConsumed(); + bytes_remaining -= result.GetBytesConsumed(); + context.IncrementLocation(result.GetBytesConsumed()); + if (rule->HasNextRule()) { + // Delegation by child rule: push the next. + rule_stack_.push_back(rule->ReleaseNextRule()); + } + break; + case DataMatchResult::kPartialOutOfData: + ReportMessageIfNeeded(result); + bytes_parsed_ += result.GetBytesConsumed(); + return true; + case DataMatchResult::kFull: + ReportMessageIfNeeded(result); + bytes_parsed_ += result.GetBytesConsumed(); + bytes_remaining -= result.GetBytesConsumed(); + context.IncrementLocation(result.GetBytesConsumed()); + if (rule->HasNextRule()) { + // Delegation by chaining: pop the current rule and push the next. + auto next_rule = rule->ReleaseNextRule(); + rule_stack_.pop_back(); + rule_stack_.push_back(std::move(next_rule)); + } else { + rule_stack_.pop_back(); + } + break; + } + } + if (bytes_remaining > 0 && rule_stack_.empty()) { + InitializeContextNameList(&context); + std::string text = context.GetErrorText("NoActiveRule", ""); + Message message(Message::kSyntaxError, 0, text); + ReportError(message); + return false; + } + return !has_internal_or_syntax_error_; +} + +void XmlReader::InitializeContextNameList(XmlHandlerContext* context) { + auto name_list = context->GetNameList(); + name_list.clear(); + name_list.push_back(kReaderName); + if (!rule_stack_.empty()) { + name_list.push_back(rule_stack_.back()->GetName()); + } +} + +void XmlReader::ReportMessageIfNeeded(const DataMatchResult& result) { + if (result.HasMessage()) { + ReportError(result.GetMessage()); + } +} + +void XmlReader::ReportError(const DataMatchResult& result, + const DataContext& context) { + if (!result.HasMessage()) { + Message message(Message::kInternalError, 0, + context.GetErrorText("Rule had error but no message", "")); + ReportError(message); + } + ReportError(result.GetMessage()); +} + +void XmlReader::ReportError(const Message& message) { + if (message_handler_) { + message_handler_->ReportMessage(message); + } + if (message.GetType() == Message::kInternalError || + message.GetType() == Message::kSyntaxError) { + has_internal_or_syntax_error_ = true; + } + if (message.IsError()) { + has_errors_ = true; + } +} + +} // namespace image_io +} // namespace photos_editing_formats diff --git a/src/xml/xml_rule.cc b/src/xml/xml_rule.cc new file mode 100644 index 0000000..793381c --- /dev/null +++ b/src/xml/xml_rule.cc @@ -0,0 +1,187 @@ +#include "image_io/xml/xml_rule.h" + +#include <string> +#include <utility> + +#include "image_io/base/data_scanner.h" + +namespace photos_editing_formats { +namespace image_io { + +using std::string; +using std::unique_ptr; + +namespace { + +/// A scanner is reentrant if it ran out of data. In these cases, the next data +/// segment sent into the rule for parsing may be non-contiguous with the +/// previous one. If that is the case, update the scanner's token length to +/// account for the missing bytes. (Scanner token ranges represent a bounding +/// box around the token value - in these cases the actual token value is really +/// a vector of ranges. Client handlers are responsible for dealing with that +/// reality, not the scanner or rule). +/// @param scanner The current possibly reentrant scanner. +/// @param context_range The new data range that is to be parsed. +void MaybeUpdateTokenLengthForReentrantScanner(DataScanner* scanner, + const DataRange& context_range) { + const auto& token_range = scanner->GetTokenRange(); + if (scanner->GetScanCallCount() > 0 && token_range.IsValid() && + context_range.GetBegin() > token_range.GetEnd()) { + size_t skipped_byte_count = context_range.GetBegin() - token_range.GetEnd(); + scanner->ExtendTokenLength(skipped_byte_count); + } +} + +} // namespace + +XmlRule::XmlRule(const std::string& name) : name_(name), terminal_index_(0) {} + +XmlTerminal& XmlRule::AddLiteralTerminal(const std::string& literal) { + terminals_.emplace_back(DataScanner::CreateLiteralScanner(literal)); + return terminals_.back(); +} + +XmlTerminal& XmlRule::AddNameTerminal() { + terminals_.emplace_back(DataScanner::CreateNameScanner()); + return terminals_.back(); +} + +XmlTerminal& XmlRule::AddQuotedStringTerminal() { + terminals_.emplace_back(DataScanner::CreateQuotedStringScanner()); + return terminals_.back(); +} + +XmlTerminal& XmlRule::AddSentinelTerminal(const std::string& sentinels) { + terminals_.emplace_back(DataScanner::CreateSentinelScanner(sentinels)); + return terminals_.back(); +} + +XmlTerminal& XmlRule::AddThroughLiteralTerminal(const std::string& literal) { + terminals_.emplace_back(DataScanner::CreateThroughLiteralScanner(literal)); + return terminals_.back(); +} + +XmlTerminal& XmlRule::AddWhitespaceTerminal() { + terminals_.emplace_back(DataScanner::CreateWhitespaceScanner()); + return terminals_.back(); +} + +XmlTerminal& XmlRule::AddOptionalWhitespaceTerminal() { + terminals_.emplace_back(DataScanner::CreateOptionalWhitespaceScanner()); + return terminals_.back(); +} + +size_t XmlRule::GetTerminalIndexFromName(const std::string name) const { + if (!name.empty()) { + for (size_t index = 0; index < terminals_.size(); ++index) { + if (terminals_[index].GetName() == name) { + return index; + } + } + } + return terminals_.size(); +} + +void XmlRule::SetTerminalIndex(size_t terminal_index) { + terminal_index_ = terminal_index; +} + +XmlTerminal* XmlRule::GetCurrentTerminal() { + return terminal_index_ < terminals_.size() ? &terminals_[terminal_index_] + : nullptr; +} + +XmlTerminal* XmlRule::GetTerminal(size_t index) { + return index < terminals_.size() ? &terminals_[index] : nullptr; +} + +void XmlRule::ResetTerminalScanners() { + for (auto& terminal : terminals_) { + terminal.GetScanner()->Reset(); + } +} + +bool XmlRule::IsPermissibleToFinish(std::string*) const { + return false; +} + +DataMatchResult XmlRule::Parse(XmlHandlerContext context) { + DataMatchResult result; + if (!context.IsValidLocationAndRange()) { + result.SetType(DataMatchResult::kError); + result.SetMessage(Message::kInternalError, + context.GetInvalidLocationAndRangeErrorText()); + return result; + } + bool force_parse_return = false; + size_t bytes_available = context.GetBytesAvailable(); + size_t current_terminal_index = GetTerminalIndex(); + if (current_terminal_index < terminals_.size()) { + MaybeUpdateTokenLengthForReentrantScanner( + terminals_[current_terminal_index].GetScanner(), context.GetRange()); + } + while (!force_parse_return && current_terminal_index < terminals_.size() && + bytes_available > 0) { + SetTerminalIndex(current_terminal_index); + auto& terminal = terminals_[current_terminal_index]; + DataMatchResult scanner_result = terminal.GetScanner()->Scan(context); + if (terminal.GetAction() && + (scanner_result.GetType() == DataMatchResult::kFull || + scanner_result.GetType() == DataMatchResult::kPartialOutOfData)) { + XmlActionContext action_context(context, &terminal, scanner_result); + scanner_result = terminal.GetAction()(action_context); + } + result.SetType(scanner_result.GetType()); + result.IncrementBytesConsumed(scanner_result.GetBytesConsumed()); + context.IncrementLocation(scanner_result.GetBytesConsumed()); + bytes_available -= scanner_result.GetBytesConsumed(); + switch (scanner_result.GetType()) { + case DataMatchResult::kError: + result.SetMessage(scanner_result.GetMessage()); + force_parse_return = true; + break; + case DataMatchResult::kNone: + result.SetType(DataMatchResult::kError); + result.SetMessage( + Message::kInternalError, + context.GetErrorText("Invalid scanner match result", + terminal.GetScanner()->GetDescription())); + force_parse_return = true; + break; + case DataMatchResult::kPartial: + case DataMatchResult::kPartialOutOfData: + if (scanner_result.HasMessage()) { + result.SetMessage(scanner_result.GetMessage()); + } + force_parse_return = true; + break; + case DataMatchResult::kFull: + if (scanner_result.HasMessage() && !result.HasMessage()) { + result.SetMessage(scanner_result.GetMessage()); + } + current_terminal_index = current_terminal_index == GetTerminalIndex() + ? current_terminal_index + 1 + : GetTerminalIndex(); + SetTerminalIndex(current_terminal_index); + if (current_terminal_index < GetTerminalCount()) { + result.SetType(DataMatchResult::kPartial); + } + force_parse_return = HasNextRule(); + break; + } + } + return result; +} + +bool XmlRule::HasNextRule() const { return next_rule_ != nullptr; } + +std::unique_ptr<XmlRule> XmlRule::ReleaseNextRule() { + return std::move(next_rule_); +} + +void XmlRule::SetNextRule(std::unique_ptr<XmlRule> next_rule) { + next_rule_ = std::move(next_rule); +} + +} // namespace image_io +} // namespace photos_editing_formats diff --git a/src/xml/xml_token_context.cc b/src/xml/xml_token_context.cc new file mode 100644 index 0000000..4ffea3f --- /dev/null +++ b/src/xml/xml_token_context.cc @@ -0,0 +1,119 @@ +#include "image_io/xml/xml_token_context.h" + +#include <string> + +#include "image_io/xml/xml_action.h" +#include "image_io/xml/xml_terminal.h" + +namespace photos_editing_formats { +namespace image_io { + +using std::vector; + +namespace { + +const XmlPortion kAllPortions = + XmlPortion::kBegin | XmlPortion::kMiddle | XmlPortion::kEnd; + +XmlPortion GetPortion(const XmlActionContext& context) { + return XmlTokenContext::ComputeTokenPortion( + context.GetTerminal()->GetScanner()->GetScanCallCount(), + context.GetResult().GetType()); +} + +} // namespace + +XmlTokenContext::XmlTokenContext(const XmlActionContext& context) + : DataContext(context), + result_(context.GetResult()), + token_range_(context.GetTerminal()->GetScanner()->GetTokenRange()), + token_portion_(GetPortion(context)) {} + +XmlTokenContext::XmlTokenContext(size_t location, const DataRange& range, + const DataSegment& segment, + const DataLineMap& data_line_map, + const DataMatchResult& result, + const DataRange& token_range, + const XmlPortion& token_portion) + : DataContext(location, range, segment, data_line_map), + result_(result), + token_range_(token_range), + token_portion_(token_portion) {} + +bool XmlTokenContext::BuildTokenValue(std::string* value, + bool trim_first_and_last_chars) const { + bool contains_end = ContainsAny(token_portion_, XmlPortion::kEnd); + size_t end_delta = trim_first_and_last_chars && contains_end ? 1 : 0; + size_t begin_delta = 0; + if (ContainsAny(token_portion_, XmlPortion::kBegin)) { + begin_delta = trim_first_and_last_chars ? 1 : 0; + value->clear(); + } + if (ContainsAny(token_portion_, kAllPortions)) { + const auto& segment = GetSegment(); + DataRange range_with_deltas(token_range_.GetBegin() + begin_delta, + token_range_.GetEnd() - end_delta); + auto clipped_range = GetRange().GetIntersection(range_with_deltas); + if (clipped_range.IsValid()) { + const char* cbytes = reinterpret_cast<const char*>( + segment.GetBuffer(clipped_range.GetBegin())); + value->append(cbytes, clipped_range.GetLength()); + } + } + return contains_end; +} + +bool XmlTokenContext::BuildTokenValueRanges( + vector<DataRange>* value_ranges, bool trim_first_and_last_chars) const { + size_t delta = trim_first_and_last_chars ? 1 : 0; + auto clipped_range = GetRange().GetIntersection(token_range_); + if (ContainsAny(token_portion_, XmlPortion::kBegin)) { + value_ranges->clear(); + if (clipped_range.IsValid()) { + value_ranges->push_back( + DataRange(clipped_range.GetBegin() + delta, clipped_range.GetEnd())); + } + + } else if (ContainsAny(token_portion_, kAllPortions)) { + if (clipped_range.IsValid()) { + if (!value_ranges->empty() && + value_ranges->back().GetEnd() == clipped_range.GetBegin()) { + value_ranges->back() = + DataRange(value_ranges->back().GetBegin(), clipped_range.GetEnd()); + } else { + value_ranges->push_back(clipped_range); + } + } + } + bool has_end = ContainsAny(token_portion_, XmlPortion::kEnd); + if (has_end && !value_ranges->empty() && clipped_range.IsValid() && + trim_first_and_last_chars) { + auto& back_range = value_ranges->back(); + back_range = DataRange(back_range.GetBegin(), back_range.GetEnd() - delta); + } + return has_end; +} + +XmlPortion XmlTokenContext::ComputeTokenPortion( + size_t token_scan_count, DataMatchResult::Type result_type) { + const bool first_scan = token_scan_count == 1; + const bool subsequent_scan = token_scan_count > 1; + const bool full_match = result_type == DataMatchResult::kFull; + const bool partial_match = + result_type == DataMatchResult::kPartialOutOfData || + result_type == DataMatchResult::kPartial; + XmlPortion portion = XmlPortion::kNone; + if (first_scan && full_match) { + portion = kAllPortions; + } else if (first_scan && partial_match) { + portion = XmlPortion::kBegin | XmlPortion::kMiddle; + } else if (subsequent_scan && full_match) { + portion = XmlPortion::kMiddle | XmlPortion::kEnd; + } else if (subsequent_scan && partial_match) { + portion = XmlPortion::kMiddle; + } + return portion; +} + +} // namespace image_io +} // namespace photos_editing_formats diff --git a/src/xml/xml_writer.cc b/src/xml/xml_writer.cc new file mode 100644 index 0000000..e280b66 --- /dev/null +++ b/src/xml/xml_writer.cc @@ -0,0 +1,141 @@ +#include "image_io/xml/xml_writer.h" + +#include <iomanip> +#include <string> + +namespace photos_editing_formats { +namespace image_io { + +using std::ostream; +using std::string; +using std::vector; + +namespace { + +const char kXmlnsColon[] = "xmlns:"; + +} // namespace + +XmlWriter::XmlWriter(std::ostream& os) + : os_(os), element_count_(0), quote_mark_('"') {} + +void XmlWriter::WriteXmlns(const string& prefix, const string& uri) { + string name = string(kXmlnsColon) + prefix; + WriteAttributeNameAndValue(name, uri, true); +} + +size_t XmlWriter::StartWritingElement(const string& element_name) { + MaybeWriteCloseBracket(true); + size_t current_depth = element_data_.size(); + if (current_depth > 0) { + element_data_.back().has_children = true; + } + element_data_.emplace_back(element_name); + os_ << indent_ << "<" << element_name; + indent_ += " "; + element_count_ += 1; + return current_depth; +} + +void XmlWriter::FinishWritingElement() { + if (!element_data_.empty()) { + if (indent_.size() >= 2) { + indent_.resize(indent_.size() - 2); + } + auto& data = element_data_.back(); + if (!data.has_content && !data.has_children) { + if (!data.has_attributes || data.has_children) { + os_ << indent_; + } + os_ << "/>" << std::endl; + } else { + if (!data.has_content) { + os_ << indent_; + } + os_ << "</" << data.name << ">" << std::endl; + } + element_data_.pop_back(); + } +} + +void XmlWriter::FinishWritingElementsToDepth(size_t depth) { + if (!element_data_.empty()) { + for (size_t index = element_data_.size(); index > depth; --index) { + FinishWritingElement(); + } + } +} + +size_t XmlWriter::StartWritingElements(const vector<string>& element_names) { + size_t current_depth = element_data_.size(); + for (const auto& element_name : element_names) { + StartWritingElement(element_name); + } + return current_depth; +} + +void XmlWriter::WriteElementAndContent(const string& element_name, + const string& content) { + StartWritingElement(element_name); + WriteContent(content); + FinishWritingElement(); +} + +void XmlWriter::WriteContent(const string& content) { + MaybeWriteCloseBracket(false); + if (!element_data_.empty()) { + auto& data = element_data_.back(); + data.has_content = true; + os_ << content; + } +} + +void XmlWriter::WriteAttributeNameAndValue(const string& name, + const string& value, + bool add_quote_marks) { + WriteAttributeName(name); + WriteAttributeValue(add_quote_marks, value, add_quote_marks); +} + +void XmlWriter::WriteAttributeName(const string& name) { + if (!element_data_.empty()) { + os_ << std::endl << indent_ << name << "="; + element_data_.back().has_attributes = true; + } +} + +void XmlWriter::WriteAttributeValue(bool add_leading_quote_mark, + const string& value, + bool add_trailing_quote_mark) { + if (!element_data_.empty()) { + if (add_leading_quote_mark) os_ << quote_mark_; + os_ << value; + if (add_trailing_quote_mark) os_ << quote_mark_; + } +} + +void XmlWriter::WriteComment(const std::string& comment) { + MaybeWriteCloseBracket(true); + os_ << indent_ << "<!-- " << comment << " -->" << std::endl; + if (!element_data_.empty()) { + auto& data = element_data_.back(); + data.has_children = true; + } +} + +bool XmlWriter::MaybeWriteCloseBracket(bool with_trailing_newline) { + if (!element_data_.empty()) { + auto& data = element_data_.back(); + if (!data.has_content && !data.has_children) { + os_ << ">"; + if (with_trailing_newline) { + os_ << std::endl; + } + return true; + } + } + return false; +} + +} // namespace image_io +} // namespace photos_editing_formats |