text_format.h 30 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679
  1. // Protocol Buffers - Google's data interchange format
  2. // Copyright 2008 Google Inc. All rights reserved.
  3. // https://developers.google.com/protocol-buffers/
  4. //
  5. // Redistribution and use in source and binary forms, with or without
  6. // modification, are permitted provided that the following conditions are
  7. // met:
  8. //
  9. // * Redistributions of source code must retain the above copyright
  10. // notice, this list of conditions and the following disclaimer.
  11. // * Redistributions in binary form must reproduce the above
  12. // copyright notice, this list of conditions and the following disclaimer
  13. // in the documentation and/or other materials provided with the
  14. // distribution.
  15. // * Neither the name of Google Inc. nor the names of its
  16. // contributors may be used to endorse or promote products derived from
  17. // this software without specific prior written permission.
  18. //
  19. // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  20. // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  21. // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  22. // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  23. // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  24. // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  25. // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  26. // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  27. // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  28. // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  29. // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  30. // Author: jschorr@google.com (Joseph Schorr)
  31. // Based on original Protocol Buffers design by
  32. // Sanjay Ghemawat, Jeff Dean, and others.
  33. //
  34. // Utilities for printing and parsing protocol messages in a human-readable,
  35. // text-based format.
  36. #ifndef GOOGLE_PROTOBUF_TEXT_FORMAT_H__
  37. #define GOOGLE_PROTOBUF_TEXT_FORMAT_H__
  38. #include <map>
  39. #include <memory>
  40. #include <string>
  41. #include <vector>
  42. #include <google/protobuf/stubs/common.h>
  43. #include <google/protobuf/descriptor.h>
  44. #include <google/protobuf/message.h>
  45. #include <google/protobuf/message_lite.h>
  46. #include <google/protobuf/port.h>
  47. #include <google/protobuf/port_def.inc>
  48. #ifdef SWIG
  49. #error "You cannot SWIG proto headers"
  50. #endif
  51. namespace google {
  52. namespace protobuf {
  53. namespace io {
  54. class ErrorCollector; // tokenizer.h
  55. }
  56. // This class implements protocol buffer text format, colloquially known as text
  57. // proto. Printing and parsing protocol messages in text format is useful for
  58. // debugging and human editing of messages.
  59. //
  60. // This class is really a namespace that contains only static methods.
  61. class PROTOBUF_EXPORT TextFormat {
  62. public:
  63. // Outputs a textual representation of the given message to the given
  64. // output stream. Returns false if printing fails.
  65. static bool Print(const Message& message, io::ZeroCopyOutputStream* output);
  66. // Print the fields in an UnknownFieldSet. They are printed by tag number
  67. // only. Embedded messages are heuristically identified by attempting to
  68. // parse them. Returns false if printing fails.
  69. static bool PrintUnknownFields(const UnknownFieldSet& unknown_fields,
  70. io::ZeroCopyOutputStream* output);
  71. // Like Print(), but outputs directly to a string.
  72. // Note: output will be cleared prior to printing, and will be left empty
  73. // even if printing fails. Returns false if printing fails.
  74. static bool PrintToString(const Message& message, std::string* output);
  75. // Like PrintUnknownFields(), but outputs directly to a string. Returns
  76. // false if printing fails.
  77. static bool PrintUnknownFieldsToString(const UnknownFieldSet& unknown_fields,
  78. std::string* output);
  79. // Outputs a textual representation of the value of the field supplied on
  80. // the message supplied. For non-repeated fields, an index of -1 must
  81. // be supplied. Note that this method will print the default value for a
  82. // field if it is not set.
  83. static void PrintFieldValueToString(const Message& message,
  84. const FieldDescriptor* field, int index,
  85. std::string* output);
  86. class PROTOBUF_EXPORT BaseTextGenerator {
  87. public:
  88. virtual ~BaseTextGenerator();
  89. virtual void Indent() {}
  90. virtual void Outdent() {}
  91. // Returns the current indentation size in characters.
  92. virtual size_t GetCurrentIndentationSize() const { return 0; }
  93. // Print text to the output stream.
  94. virtual void Print(const char* text, size_t size) = 0;
  95. void PrintString(const std::string& str) { Print(str.data(), str.size()); }
  96. template <size_t n>
  97. void PrintLiteral(const char (&text)[n]) {
  98. Print(text, n - 1); // n includes the terminating zero character.
  99. }
  100. };
  101. // The default printer that converts scalar values from fields into their
  102. // string representation.
  103. // You can derive from this FastFieldValuePrinter if you want to have fields
  104. // to be printed in a different way and register it at the Printer.
  105. class PROTOBUF_EXPORT FastFieldValuePrinter {
  106. public:
  107. FastFieldValuePrinter();
  108. virtual ~FastFieldValuePrinter();
  109. virtual void PrintBool(bool val, BaseTextGenerator* generator) const;
  110. virtual void PrintInt32(int32_t val, BaseTextGenerator* generator) const;
  111. virtual void PrintUInt32(uint32_t val, BaseTextGenerator* generator) const;
  112. virtual void PrintInt64(int64_t val, BaseTextGenerator* generator) const;
  113. virtual void PrintUInt64(uint64_t val, BaseTextGenerator* generator) const;
  114. virtual void PrintFloat(float val, BaseTextGenerator* generator) const;
  115. virtual void PrintDouble(double val, BaseTextGenerator* generator) const;
  116. virtual void PrintString(const std::string& val,
  117. BaseTextGenerator* generator) const;
  118. virtual void PrintBytes(const std::string& val,
  119. BaseTextGenerator* generator) const;
  120. virtual void PrintEnum(int32_t val, const std::string& name,
  121. BaseTextGenerator* generator) const;
  122. virtual void PrintFieldName(const Message& message, int field_index,
  123. int field_count, const Reflection* reflection,
  124. const FieldDescriptor* field,
  125. BaseTextGenerator* generator) const;
  126. virtual void PrintFieldName(const Message& message,
  127. const Reflection* reflection,
  128. const FieldDescriptor* field,
  129. BaseTextGenerator* generator) const;
  130. virtual void PrintMessageStart(const Message& message, int field_index,
  131. int field_count, bool single_line_mode,
  132. BaseTextGenerator* generator) const;
  133. // Allows to override the logic on how to print the content of a message.
  134. // Return false to use the default printing logic. Note that it is legal for
  135. // this function to print something and then return false to use the default
  136. // content printing (although at that point it would behave similarly to
  137. // PrintMessageStart).
  138. virtual bool PrintMessageContent(const Message& message, int field_index,
  139. int field_count, bool single_line_mode,
  140. BaseTextGenerator* generator) const;
  141. virtual void PrintMessageEnd(const Message& message, int field_index,
  142. int field_count, bool single_line_mode,
  143. BaseTextGenerator* generator) const;
  144. private:
  145. GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(FastFieldValuePrinter);
  146. };
  147. // Deprecated: please use FastFieldValuePrinter instead.
  148. class PROTOBUF_EXPORT FieldValuePrinter {
  149. public:
  150. FieldValuePrinter();
  151. virtual ~FieldValuePrinter();
  152. virtual std::string PrintBool(bool val) const;
  153. virtual std::string PrintInt32(int32_t val) const;
  154. virtual std::string PrintUInt32(uint32_t val) const;
  155. virtual std::string PrintInt64(int64_t val) const;
  156. virtual std::string PrintUInt64(uint64_t val) const;
  157. virtual std::string PrintFloat(float val) const;
  158. virtual std::string PrintDouble(double val) const;
  159. virtual std::string PrintString(const std::string& val) const;
  160. virtual std::string PrintBytes(const std::string& val) const;
  161. virtual std::string PrintEnum(int32_t val, const std::string& name) const;
  162. virtual std::string PrintFieldName(const Message& message,
  163. const Reflection* reflection,
  164. const FieldDescriptor* field) const;
  165. virtual std::string PrintMessageStart(const Message& message,
  166. int field_index, int field_count,
  167. bool single_line_mode) const;
  168. virtual std::string PrintMessageEnd(const Message& message, int field_index,
  169. int field_count,
  170. bool single_line_mode) const;
  171. private:
  172. FastFieldValuePrinter delegate_;
  173. GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(FieldValuePrinter);
  174. };
  175. class PROTOBUF_EXPORT MessagePrinter {
  176. public:
  177. MessagePrinter() {}
  178. virtual ~MessagePrinter() {}
  179. virtual void Print(const Message& message, bool single_line_mode,
  180. BaseTextGenerator* generator) const = 0;
  181. private:
  182. GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(MessagePrinter);
  183. };
  184. // Interface that Printers or Parsers can use to find extensions, or types
  185. // referenced in Any messages.
  186. class PROTOBUF_EXPORT Finder {
  187. public:
  188. virtual ~Finder();
  189. // Try to find an extension of *message by fully-qualified field
  190. // name. Returns NULL if no extension is known for this name or number.
  191. // The base implementation uses the extensions already known by the message.
  192. virtual const FieldDescriptor* FindExtension(Message* message,
  193. const std::string& name) const;
  194. // Similar to FindExtension, but uses a Descriptor and the extension number
  195. // instead of using a Message and the name when doing the look up.
  196. virtual const FieldDescriptor* FindExtensionByNumber(
  197. const Descriptor* descriptor, int number) const;
  198. // Find the message type for an Any proto.
  199. // Returns NULL if no message is known for this name.
  200. // The base implementation only accepts prefixes of type.googleprod.com/ or
  201. // type.googleapis.com/, and searches the DescriptorPool of the parent
  202. // message.
  203. virtual const Descriptor* FindAnyType(const Message& message,
  204. const std::string& prefix,
  205. const std::string& name) const;
  206. // Find the message factory for the given extension field. This can be used
  207. // to generalize the Parser to add extension fields to a message in the same
  208. // way as the "input" message for the Parser.
  209. virtual MessageFactory* FindExtensionFactory(
  210. const FieldDescriptor* field) const;
  211. };
  212. // Class for those users which require more fine-grained control over how
  213. // a protobuffer message is printed out.
  214. class PROTOBUF_EXPORT Printer {
  215. public:
  216. Printer();
  217. // Like TextFormat::Print
  218. bool Print(const Message& message, io::ZeroCopyOutputStream* output) const;
  219. // Like TextFormat::PrintUnknownFields
  220. bool PrintUnknownFields(const UnknownFieldSet& unknown_fields,
  221. io::ZeroCopyOutputStream* output) const;
  222. // Like TextFormat::PrintToString
  223. bool PrintToString(const Message& message, std::string* output) const;
  224. // Like TextFormat::PrintUnknownFieldsToString
  225. bool PrintUnknownFieldsToString(const UnknownFieldSet& unknown_fields,
  226. std::string* output) const;
  227. // Like TextFormat::PrintFieldValueToString
  228. void PrintFieldValueToString(const Message& message,
  229. const FieldDescriptor* field, int index,
  230. std::string* output) const;
  231. // Adjust the initial indent level of all output. Each indent level is
  232. // equal to two spaces.
  233. void SetInitialIndentLevel(int indent_level) {
  234. initial_indent_level_ = indent_level;
  235. }
  236. // If printing in single line mode, then the entire message will be output
  237. // on a single line with no line breaks.
  238. void SetSingleLineMode(bool single_line_mode) {
  239. single_line_mode_ = single_line_mode;
  240. }
  241. bool IsInSingleLineMode() const { return single_line_mode_; }
  242. // If use_field_number is true, uses field number instead of field name.
  243. void SetUseFieldNumber(bool use_field_number) {
  244. use_field_number_ = use_field_number;
  245. }
  246. // Set true to print repeated primitives in a format like:
  247. // field_name: [1, 2, 3, 4]
  248. // instead of printing each value on its own line. Short format applies
  249. // only to primitive values -- i.e. everything except strings and
  250. // sub-messages/groups.
  251. void SetUseShortRepeatedPrimitives(bool use_short_repeated_primitives) {
  252. use_short_repeated_primitives_ = use_short_repeated_primitives;
  253. }
  254. // Set true to output UTF-8 instead of ASCII. The only difference
  255. // is that bytes >= 0x80 in string fields will not be escaped,
  256. // because they are assumed to be part of UTF-8 multi-byte
  257. // sequences. This will change the default FastFieldValuePrinter.
  258. void SetUseUtf8StringEscaping(bool as_utf8);
  259. // Set the default FastFieldValuePrinter that is used for all fields that
  260. // don't have a field-specific printer registered.
  261. // Takes ownership of the printer.
  262. void SetDefaultFieldValuePrinter(const FastFieldValuePrinter* printer);
  263. PROTOBUF_DEPRECATED_MSG("Please use FastFieldValuePrinter")
  264. void SetDefaultFieldValuePrinter(const FieldValuePrinter* printer);
  265. // Sets whether we want to hide unknown fields or not.
  266. // Usually unknown fields are printed in a generic way that includes the
  267. // tag number of the field instead of field name. However, sometimes it
  268. // is useful to be able to print the message without unknown fields (e.g.
  269. // for the python protobuf version to maintain consistency between its pure
  270. // python and c++ implementations).
  271. void SetHideUnknownFields(bool hide) { hide_unknown_fields_ = hide; }
  272. // If print_message_fields_in_index_order is true, fields of a proto message
  273. // will be printed using the order defined in source code instead of the
  274. // field number, extensions will be printed at the end of the message
  275. // and their relative order is determined by the extension number.
  276. // By default, use the field number order.
  277. void SetPrintMessageFieldsInIndexOrder(
  278. bool print_message_fields_in_index_order) {
  279. print_message_fields_in_index_order_ =
  280. print_message_fields_in_index_order;
  281. }
  282. // If expand==true, expand google.protobuf.Any payloads. The output
  283. // will be of form
  284. // [type_url] { <value_printed_in_text> }
  285. //
  286. // If expand==false, print Any using the default printer. The output will
  287. // look like
  288. // type_url: "<type_url>" value: "serialized_content"
  289. void SetExpandAny(bool expand) { expand_any_ = expand; }
  290. // Set how parser finds message for Any payloads.
  291. void SetFinder(const Finder* finder) { finder_ = finder; }
  292. // If non-zero, we truncate all string fields that are longer than
  293. // this threshold. This is useful when the proto message has very long
  294. // strings, e.g., dump of encoded image file.
  295. //
  296. // NOTE(hfgong): Setting a non-zero value breaks round-trip safe
  297. // property of TextFormat::Printer. That is, from the printed message, we
  298. // cannot fully recover the original string field any more.
  299. void SetTruncateStringFieldLongerThan(
  300. const int64_t truncate_string_field_longer_than) {
  301. truncate_string_field_longer_than_ = truncate_string_field_longer_than;
  302. }
  303. // Register a custom field-specific FastFieldValuePrinter for fields
  304. // with a particular FieldDescriptor.
  305. // Returns "true" if the registration succeeded, or "false", if there is
  306. // already a printer for that FieldDescriptor.
  307. // Takes ownership of the printer on successful registration.
  308. bool RegisterFieldValuePrinter(const FieldDescriptor* field,
  309. const FastFieldValuePrinter* printer);
  310. PROTOBUF_DEPRECATED_MSG("Please use FastFieldValuePrinter")
  311. bool RegisterFieldValuePrinter(const FieldDescriptor* field,
  312. const FieldValuePrinter* printer);
  313. // Register a custom message-specific MessagePrinter for messages with a
  314. // particular Descriptor.
  315. // Returns "true" if the registration succeeded, or "false" if there is
  316. // already a printer for that Descriptor.
  317. bool RegisterMessagePrinter(const Descriptor* descriptor,
  318. const MessagePrinter* printer);
  319. private:
  320. // Forward declaration of an internal class used to print the text
  321. // output to the OutputStream (see text_format.cc for implementation).
  322. class TextGenerator;
  323. // Forward declaration of an internal class used to print field values for
  324. // DebugString APIs (see text_format.cc for implementation).
  325. class DebugStringFieldValuePrinter;
  326. // Forward declaration of an internal class used to print UTF-8 escaped
  327. // strings (see text_format.cc for implementation).
  328. class FastFieldValuePrinterUtf8Escaping;
  329. static const char* const kDoNotParse;
  330. // Internal Print method, used for writing to the OutputStream via
  331. // the TextGenerator class.
  332. void Print(const Message& message, TextGenerator* generator) const;
  333. // Print a single field.
  334. void PrintField(const Message& message, const Reflection* reflection,
  335. const FieldDescriptor* field,
  336. TextGenerator* generator) const;
  337. // Print a repeated primitive field in short form.
  338. void PrintShortRepeatedField(const Message& message,
  339. const Reflection* reflection,
  340. const FieldDescriptor* field,
  341. TextGenerator* generator) const;
  342. // Print the name of a field -- i.e. everything that comes before the
  343. // ':' for a single name/value pair.
  344. void PrintFieldName(const Message& message, int field_index,
  345. int field_count, const Reflection* reflection,
  346. const FieldDescriptor* field,
  347. TextGenerator* generator) const;
  348. // Outputs a textual representation of the value of the field supplied on
  349. // the message supplied or the default value if not set.
  350. void PrintFieldValue(const Message& message, const Reflection* reflection,
  351. const FieldDescriptor* field, int index,
  352. TextGenerator* generator) const;
  353. // Print the fields in an UnknownFieldSet. They are printed by tag number
  354. // only. Embedded messages are heuristically identified by attempting to
  355. // parse them (subject to the recursion budget).
  356. void PrintUnknownFields(const UnknownFieldSet& unknown_fields,
  357. TextGenerator* generator,
  358. int recursion_budget) const;
  359. bool PrintAny(const Message& message, TextGenerator* generator) const;
  360. const FastFieldValuePrinter* GetFieldPrinter(
  361. const FieldDescriptor* field) const {
  362. auto it = custom_printers_.find(field);
  363. return it == custom_printers_.end() ? default_field_value_printer_.get()
  364. : it->second.get();
  365. }
  366. int initial_indent_level_;
  367. bool single_line_mode_;
  368. bool use_field_number_;
  369. bool use_short_repeated_primitives_;
  370. bool insert_silent_marker_;
  371. bool hide_unknown_fields_;
  372. bool print_message_fields_in_index_order_;
  373. bool expand_any_;
  374. int64_t truncate_string_field_longer_than_;
  375. std::unique_ptr<const FastFieldValuePrinter> default_field_value_printer_;
  376. typedef std::map<const FieldDescriptor*,
  377. std::unique_ptr<const FastFieldValuePrinter>>
  378. CustomPrinterMap;
  379. CustomPrinterMap custom_printers_;
  380. typedef std::map<const Descriptor*, std::unique_ptr<const MessagePrinter>>
  381. CustomMessagePrinterMap;
  382. CustomMessagePrinterMap custom_message_printers_;
  383. const Finder* finder_;
  384. };
  385. // Parses a text-format protocol message from the given input stream to
  386. // the given message object. This function parses the human-readable format
  387. // written by Print(). Returns true on success. The message is cleared first,
  388. // even if the function fails -- See Merge() to avoid this behavior.
  389. //
  390. // Example input: "user {\n id: 123 extra { gender: MALE language: 'en' }\n}"
  391. //
  392. // One use for this function is parsing handwritten strings in test code.
  393. // Another use is to parse the output from google::protobuf::Message::DebugString()
  394. // (or ShortDebugString()), because these functions output using
  395. // google::protobuf::TextFormat::Print().
  396. //
  397. // If you would like to read a protocol buffer serialized in the
  398. // (non-human-readable) binary wire format, see
  399. // google::protobuf::MessageLite::ParseFromString().
  400. static bool Parse(io::ZeroCopyInputStream* input, Message* output);
  401. // Like Parse(), but reads directly from a string.
  402. static bool ParseFromString(ConstStringParam input, Message* output);
  403. // Like Parse(), but the data is merged into the given message, as if
  404. // using Message::MergeFrom().
  405. static bool Merge(io::ZeroCopyInputStream* input, Message* output);
  406. // Like Merge(), but reads directly from a string.
  407. static bool MergeFromString(ConstStringParam input, Message* output);
  408. // Parse the given text as a single field value and store it into the
  409. // given field of the given message. If the field is a repeated field,
  410. // the new value will be added to the end
  411. static bool ParseFieldValueFromString(const std::string& input,
  412. const FieldDescriptor* field,
  413. Message* message);
  414. // A location in the parsed text.
  415. struct ParseLocation {
  416. int line;
  417. int column;
  418. ParseLocation() : line(-1), column(-1) {}
  419. ParseLocation(int line_param, int column_param)
  420. : line(line_param), column(column_param) {}
  421. };
  422. // A range of locations in the parsed text, including `start` and excluding
  423. // `end`.
  424. struct ParseLocationRange {
  425. ParseLocation start;
  426. ParseLocation end;
  427. ParseLocationRange() : start(), end() {}
  428. ParseLocationRange(ParseLocation start_param, ParseLocation end_param)
  429. : start(start_param), end(end_param) {}
  430. };
  431. // Data structure which is populated with the locations of each field
  432. // value parsed from the text.
  433. class PROTOBUF_EXPORT ParseInfoTree {
  434. public:
  435. ParseInfoTree() = default;
  436. ParseInfoTree(const ParseInfoTree&) = delete;
  437. ParseInfoTree& operator=(const ParseInfoTree&) = delete;
  438. // Returns the parse location range for index-th value of the field in
  439. // the parsed text. If none exists, returns a location with start and end
  440. // line -1. Index should be -1 for not-repeated fields.
  441. ParseLocationRange GetLocationRange(const FieldDescriptor* field,
  442. int index) const;
  443. // Returns the starting parse location for index-th value of the field in
  444. // the parsed text. If none exists, returns a location with line = -1. Index
  445. // should be -1 for not-repeated fields.
  446. ParseLocation GetLocation(const FieldDescriptor* field, int index) const {
  447. return GetLocationRange(field, index).start;
  448. }
  449. // Returns the parse info tree for the given field, which must be a message
  450. // type. The nested information tree is owned by the root tree and will be
  451. // deleted when it is deleted.
  452. ParseInfoTree* GetTreeForNested(const FieldDescriptor* field,
  453. int index) const;
  454. private:
  455. // Allow the text format parser to record information into the tree.
  456. friend class TextFormat;
  457. // Records the starting and ending locations of a single value for a field.
  458. void RecordLocation(const FieldDescriptor* field, ParseLocationRange range);
  459. // Create and records a nested tree for a nested message field.
  460. ParseInfoTree* CreateNested(const FieldDescriptor* field);
  461. // Defines the map from the index-th field descriptor to its parse location.
  462. typedef std::map<const FieldDescriptor*, std::vector<ParseLocationRange>>
  463. LocationMap;
  464. // Defines the map from the index-th field descriptor to the nested parse
  465. // info tree.
  466. typedef std::map<const FieldDescriptor*,
  467. std::vector<std::unique_ptr<ParseInfoTree>>>
  468. NestedMap;
  469. LocationMap locations_;
  470. NestedMap nested_;
  471. };
  472. // For more control over parsing, use this class.
  473. class PROTOBUF_EXPORT Parser {
  474. public:
  475. Parser();
  476. ~Parser();
  477. // Like TextFormat::Parse().
  478. bool Parse(io::ZeroCopyInputStream* input, Message* output);
  479. // Like TextFormat::ParseFromString().
  480. bool ParseFromString(ConstStringParam input, Message* output);
  481. // Like TextFormat::Merge().
  482. bool Merge(io::ZeroCopyInputStream* input, Message* output);
  483. // Like TextFormat::MergeFromString().
  484. bool MergeFromString(ConstStringParam input, Message* output);
  485. // Set where to report parse errors. If NULL (the default), errors will
  486. // be printed to stderr.
  487. void RecordErrorsTo(io::ErrorCollector* error_collector) {
  488. error_collector_ = error_collector;
  489. }
  490. // Set how parser finds extensions. If NULL (the default), the
  491. // parser will use the standard Reflection object associated with
  492. // the message being parsed.
  493. void SetFinder(const Finder* finder) { finder_ = finder; }
  494. // Sets where location information about the parse will be written. If NULL
  495. // (the default), then no location will be written.
  496. void WriteLocationsTo(ParseInfoTree* tree) { parse_info_tree_ = tree; }
  497. // Normally parsing fails if, after parsing, output->IsInitialized()
  498. // returns false. Call AllowPartialMessage(true) to skip this check.
  499. void AllowPartialMessage(bool allow) { allow_partial_ = allow; }
  500. // Allow field names to be matched case-insensitively.
  501. // This is not advisable if there are fields that only differ in case, or
  502. // if you want to enforce writing in the canonical form.
  503. // This is 'false' by default.
  504. void AllowCaseInsensitiveField(bool allow) {
  505. allow_case_insensitive_field_ = allow;
  506. }
  507. // Like TextFormat::ParseFieldValueFromString
  508. bool ParseFieldValueFromString(const std::string& input,
  509. const FieldDescriptor* field,
  510. Message* output);
  511. // When an unknown extension is met, parsing will fail if this option is
  512. // set to false (the default). If true, unknown extensions will be ignored
  513. // and a warning message will be generated.
  514. // Beware! Setting this option true may hide some errors (e.g. spelling
  515. // error on extension name). This allows data loss; unlike binary format,
  516. // text format cannot preserve unknown extensions. Avoid using this option
  517. // if possible.
  518. void AllowUnknownExtension(bool allow) { allow_unknown_extension_ = allow; }
  519. // When an unknown field is met, parsing will fail if this option is set
  520. // to false (the default). If true, unknown fields will be ignored and
  521. // a warning message will be generated.
  522. // Beware! Setting this option true may hide some errors (e.g. spelling
  523. // error on field name). This allows data loss; unlike binary format, text
  524. // format cannot preserve unknown fields. Avoid using this option
  525. // if possible.
  526. void AllowUnknownField(bool allow) { allow_unknown_field_ = allow; }
  527. void AllowFieldNumber(bool allow) { allow_field_number_ = allow; }
  528. // Sets maximum recursion depth which parser can use. This is effectively
  529. // the maximum allowed nesting of proto messages.
  530. void SetRecursionLimit(int limit) { recursion_limit_ = limit; }
  531. private:
  532. // Forward declaration of an internal class used to parse text
  533. // representations (see text_format.cc for implementation).
  534. class ParserImpl;
  535. // Like TextFormat::Merge(). The provided implementation is used
  536. // to do the parsing.
  537. bool MergeUsingImpl(io::ZeroCopyInputStream* input, Message* output,
  538. ParserImpl* parser_impl);
  539. io::ErrorCollector* error_collector_;
  540. const Finder* finder_;
  541. ParseInfoTree* parse_info_tree_;
  542. bool allow_partial_;
  543. bool allow_case_insensitive_field_;
  544. bool allow_unknown_field_;
  545. bool allow_unknown_extension_;
  546. bool allow_unknown_enum_;
  547. bool allow_field_number_;
  548. bool allow_relaxed_whitespace_;
  549. bool allow_singular_overwrites_;
  550. int recursion_limit_;
  551. };
  552. private:
  553. // Hack: ParseInfoTree declares TextFormat as a friend which should extend
  554. // the friendship to TextFormat::Parser::ParserImpl, but unfortunately some
  555. // old compilers (e.g. GCC 3.4.6) don't implement this correctly. We provide
  556. // helpers for ParserImpl to call methods of ParseInfoTree.
  557. static inline void RecordLocation(ParseInfoTree* info_tree,
  558. const FieldDescriptor* field,
  559. ParseLocationRange location);
  560. static inline ParseInfoTree* CreateNested(ParseInfoTree* info_tree,
  561. const FieldDescriptor* field);
  562. GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(TextFormat);
  563. };
  564. inline void TextFormat::RecordLocation(ParseInfoTree* info_tree,
  565. const FieldDescriptor* field,
  566. ParseLocationRange location) {
  567. info_tree->RecordLocation(field, location);
  568. }
  569. inline TextFormat::ParseInfoTree* TextFormat::CreateNested(
  570. ParseInfoTree* info_tree, const FieldDescriptor* field) {
  571. return info_tree->CreateNested(field);
  572. }
  573. } // namespace protobuf
  574. } // namespace google
  575. #include <google/protobuf/port_undef.inc>
  576. #endif // GOOGLE_PROTOBUF_TEXT_FORMAT_H__