parse_context.h 35 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938
  1. // Protocol Buffers - Google's data interchange format
  2. // Copyright 2008 Google Inc. All rights reserved.
  3. // https://developers.google.com/protocol-buffers/
  4. //
  5. // Redistribution and use in source and binary forms, with or without
  6. // modification, are permitted provided that the following conditions are
  7. // met:
  8. //
  9. // * Redistributions of source code must retain the above copyright
  10. // notice, this list of conditions and the following disclaimer.
  11. // * Redistributions in binary form must reproduce the above
  12. // copyright notice, this list of conditions and the following disclaimer
  13. // in the documentation and/or other materials provided with the
  14. // distribution.
  15. // * Neither the name of Google Inc. nor the names of its
  16. // contributors may be used to endorse or promote products derived from
  17. // this software without specific prior written permission.
  18. //
  19. // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  20. // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  21. // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  22. // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  23. // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  24. // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  25. // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  26. // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  27. // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  28. // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  29. // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  30. #ifndef GOOGLE_PROTOBUF_PARSE_CONTEXT_H__
  31. #define GOOGLE_PROTOBUF_PARSE_CONTEXT_H__
  32. #include <cstdint>
  33. #include <cstring>
  34. #include <string>
  35. #include <google/protobuf/io/coded_stream.h>
  36. #include <google/protobuf/io/zero_copy_stream.h>
  37. #include <google/protobuf/arena.h>
  38. #include <google/protobuf/arenastring.h>
  39. #include <google/protobuf/implicit_weak_message.h>
  40. #include <google/protobuf/inlined_string_field.h>
  41. #include <google/protobuf/metadata_lite.h>
  42. #include <google/protobuf/port.h>
  43. #include <google/protobuf/repeated_field.h>
  44. #include <google/protobuf/wire_format_lite.h>
  45. #include <google/protobuf/stubs/strutil.h>
  46. #include <google/protobuf/port_def.inc>
  47. namespace google {
  48. namespace protobuf {
  49. class UnknownFieldSet;
  50. class DescriptorPool;
  51. class MessageFactory;
  52. namespace internal {
  53. // Template code below needs to know about the existence of these functions.
  54. PROTOBUF_EXPORT void WriteVarint(uint32_t num, uint64_t val, std::string* s);
  55. PROTOBUF_EXPORT void WriteLengthDelimited(uint32_t num, StringPiece val,
  56. std::string* s);
  57. // Inline because it is just forwarding to s->WriteVarint
  58. inline void WriteVarint(uint32_t num, uint64_t val, UnknownFieldSet* s);
  59. inline void WriteLengthDelimited(uint32_t num, StringPiece val,
  60. UnknownFieldSet* s);
  61. // The basic abstraction the parser is designed for is a slight modification
  62. // of the ZeroCopyInputStream (ZCIS) abstraction. A ZCIS presents a serialized
  63. // stream as a series of buffers that concatenate to the full stream.
  64. // Pictorially a ZCIS presents a stream in chunks like so
  65. // [---------------------------------------------------------------]
  66. // [---------------------] chunk 1
  67. // [----------------------------] chunk 2
  68. // chunk 3 [--------------]
  69. //
  70. // Where the '-' represent the bytes which are vertically lined up with the
  71. // bytes of the stream. The proto parser requires its input to be presented
  72. // similarly with the extra
  73. // property that each chunk has kSlopBytes past its end that overlaps with the
  74. // first kSlopBytes of the next chunk, or if there is no next chunk at least its
  75. // still valid to read those bytes. Again, pictorially, we now have
  76. //
  77. // [---------------------------------------------------------------]
  78. // [-------------------....] chunk 1
  79. // [------------------------....] chunk 2
  80. // chunk 3 [------------------..**]
  81. // chunk 4 [--****]
  82. // Here '-' mean the bytes of the stream or chunk and '.' means bytes past the
  83. // chunk that match up with the start of the next chunk. Above each chunk has
  84. // 4 '.' after the chunk. In the case these 'overflow' bytes represents bytes
  85. // past the stream, indicated by '*' above, their values are unspecified. It is
  86. // still legal to read them (ie. should not segfault). Reading past the
  87. // end should be detected by the user and indicated as an error.
  88. //
  89. // The reason for this, admittedly, unconventional invariant is to ruthlessly
  90. // optimize the protobuf parser. Having an overlap helps in two important ways.
  91. // Firstly it alleviates having to performing bounds checks if a piece of code
  92. // is guaranteed to not read more than kSlopBytes. Secondly, and more
  93. // importantly, the protobuf wireformat is such that reading a key/value pair is
  94. // always less than 16 bytes. This removes the need to change to next buffer in
  95. // the middle of reading primitive values. Hence there is no need to store and
  96. // load the current position.
  97. class PROTOBUF_EXPORT EpsCopyInputStream {
  98. public:
  99. enum { kSlopBytes = 16, kMaxCordBytesToCopy = 512 };
  100. explicit EpsCopyInputStream(bool enable_aliasing)
  101. : aliasing_(enable_aliasing ? kOnPatch : kNoAliasing) {}
  102. void BackUp(const char* ptr) {
  103. GOOGLE_DCHECK(ptr <= buffer_end_ + kSlopBytes);
  104. int count;
  105. if (next_chunk_ == buffer_) {
  106. count = static_cast<int>(buffer_end_ + kSlopBytes - ptr);
  107. } else {
  108. count = size_ + static_cast<int>(buffer_end_ - ptr);
  109. }
  110. if (count > 0) StreamBackUp(count);
  111. }
  112. // If return value is negative it's an error
  113. PROTOBUF_MUST_USE_RESULT int PushLimit(const char* ptr, int limit) {
  114. GOOGLE_DCHECK(limit >= 0 && limit <= INT_MAX - kSlopBytes);
  115. // This add is safe due to the invariant above, because
  116. // ptr - buffer_end_ <= kSlopBytes.
  117. limit += static_cast<int>(ptr - buffer_end_);
  118. limit_end_ = buffer_end_ + (std::min)(0, limit);
  119. auto old_limit = limit_;
  120. limit_ = limit;
  121. return old_limit - limit;
  122. }
  123. PROTOBUF_MUST_USE_RESULT bool PopLimit(int delta) {
  124. if (PROTOBUF_PREDICT_FALSE(!EndedAtLimit())) return false;
  125. limit_ = limit_ + delta;
  126. // TODO(gerbens) We could remove this line and hoist the code to
  127. // DoneFallback. Study the perf/bin-size effects.
  128. limit_end_ = buffer_end_ + (std::min)(0, limit_);
  129. return true;
  130. }
  131. PROTOBUF_MUST_USE_RESULT const char* Skip(const char* ptr, int size) {
  132. if (size <= buffer_end_ + kSlopBytes - ptr) {
  133. return ptr + size;
  134. }
  135. return SkipFallback(ptr, size);
  136. }
  137. PROTOBUF_MUST_USE_RESULT const char* ReadString(const char* ptr, int size,
  138. std::string* s) {
  139. if (size <= buffer_end_ + kSlopBytes - ptr) {
  140. s->assign(ptr, size);
  141. return ptr + size;
  142. }
  143. return ReadStringFallback(ptr, size, s);
  144. }
  145. PROTOBUF_MUST_USE_RESULT const char* AppendString(const char* ptr, int size,
  146. std::string* s) {
  147. if (size <= buffer_end_ + kSlopBytes - ptr) {
  148. s->append(ptr, size);
  149. return ptr + size;
  150. }
  151. return AppendStringFallback(ptr, size, s);
  152. }
  153. // Implemented in arenastring.cc
  154. PROTOBUF_MUST_USE_RESULT const char* ReadArenaString(const char* ptr,
  155. ArenaStringPtr* s,
  156. Arena* arena);
  157. template <typename Tag, typename T>
  158. PROTOBUF_MUST_USE_RESULT const char* ReadRepeatedFixed(const char* ptr,
  159. Tag expected_tag,
  160. RepeatedField<T>* out);
  161. template <typename T>
  162. PROTOBUF_MUST_USE_RESULT const char* ReadPackedFixed(const char* ptr,
  163. int size,
  164. RepeatedField<T>* out);
  165. template <typename Add>
  166. PROTOBUF_MUST_USE_RESULT const char* ReadPackedVarint(const char* ptr,
  167. Add add);
  168. uint32_t LastTag() const { return last_tag_minus_1_ + 1; }
  169. bool ConsumeEndGroup(uint32_t start_tag) {
  170. bool res = last_tag_minus_1_ == start_tag;
  171. last_tag_minus_1_ = 0;
  172. return res;
  173. }
  174. bool EndedAtLimit() const { return last_tag_minus_1_ == 0; }
  175. bool EndedAtEndOfStream() const { return last_tag_minus_1_ == 1; }
  176. void SetLastTag(uint32_t tag) { last_tag_minus_1_ = tag - 1; }
  177. void SetEndOfStream() { last_tag_minus_1_ = 1; }
  178. bool IsExceedingLimit(const char* ptr) {
  179. return ptr > limit_end_ &&
  180. (next_chunk_ == nullptr || ptr - buffer_end_ > limit_);
  181. }
  182. int BytesUntilLimit(const char* ptr) const {
  183. return limit_ + static_cast<int>(buffer_end_ - ptr);
  184. }
  185. // Returns true if more data is available, if false is returned one has to
  186. // call Done for further checks.
  187. bool DataAvailable(const char* ptr) { return ptr < limit_end_; }
  188. protected:
  189. // Returns true is limit (either an explicit limit or end of stream) is
  190. // reached. It aligns *ptr across buffer seams.
  191. // If limit is exceeded it returns true and ptr is set to null.
  192. bool DoneWithCheck(const char** ptr, int d) {
  193. GOOGLE_DCHECK(*ptr);
  194. if (PROTOBUF_PREDICT_TRUE(*ptr < limit_end_)) return false;
  195. int overrun = static_cast<int>(*ptr - buffer_end_);
  196. GOOGLE_DCHECK_LE(overrun, kSlopBytes); // Guaranteed by parse loop.
  197. if (overrun ==
  198. limit_) { // No need to flip buffers if we ended on a limit.
  199. // If we actually overrun the buffer and next_chunk_ is null. It means
  200. // the stream ended and we passed the stream end.
  201. if (overrun > 0 && next_chunk_ == nullptr) *ptr = nullptr;
  202. return true;
  203. }
  204. auto res = DoneFallback(overrun, d);
  205. *ptr = res.first;
  206. return res.second;
  207. }
  208. const char* InitFrom(StringPiece flat) {
  209. overall_limit_ = 0;
  210. if (flat.size() > kSlopBytes) {
  211. limit_ = kSlopBytes;
  212. limit_end_ = buffer_end_ = flat.data() + flat.size() - kSlopBytes;
  213. next_chunk_ = buffer_;
  214. if (aliasing_ == kOnPatch) aliasing_ = kNoDelta;
  215. return flat.data();
  216. } else {
  217. std::memcpy(buffer_, flat.data(), flat.size());
  218. limit_ = 0;
  219. limit_end_ = buffer_end_ = buffer_ + flat.size();
  220. next_chunk_ = nullptr;
  221. if (aliasing_ == kOnPatch) {
  222. aliasing_ = reinterpret_cast<std::uintptr_t>(flat.data()) -
  223. reinterpret_cast<std::uintptr_t>(buffer_);
  224. }
  225. return buffer_;
  226. }
  227. }
  228. const char* InitFrom(io::ZeroCopyInputStream* zcis);
  229. const char* InitFrom(io::ZeroCopyInputStream* zcis, int limit) {
  230. if (limit == -1) return InitFrom(zcis);
  231. overall_limit_ = limit;
  232. auto res = InitFrom(zcis);
  233. limit_ = limit - static_cast<int>(buffer_end_ - res);
  234. limit_end_ = buffer_end_ + (std::min)(0, limit_);
  235. return res;
  236. }
  237. private:
  238. const char* limit_end_; // buffer_end_ + min(limit_, 0)
  239. const char* buffer_end_;
  240. const char* next_chunk_;
  241. int size_;
  242. int limit_; // relative to buffer_end_;
  243. io::ZeroCopyInputStream* zcis_ = nullptr;
  244. char buffer_[2 * kSlopBytes] = {};
  245. enum { kNoAliasing = 0, kOnPatch = 1, kNoDelta = 2 };
  246. std::uintptr_t aliasing_ = kNoAliasing;
  247. // This variable is used to communicate how the parse ended, in order to
  248. // completely verify the parsed data. A wire-format parse can end because of
  249. // one of the following conditions:
  250. // 1) A parse can end on a pushed limit.
  251. // 2) A parse can end on End Of Stream (EOS).
  252. // 3) A parse can end on 0 tag (only valid for toplevel message).
  253. // 4) A parse can end on an end-group tag.
  254. // This variable should always be set to 0, which indicates case 1. If the
  255. // parse terminated due to EOS (case 2), it's set to 1. In case the parse
  256. // ended due to a terminating tag (case 3 and 4) it's set to (tag - 1).
  257. // This var doesn't really belong in EpsCopyInputStream and should be part of
  258. // the ParseContext, but case 2 is most easily and optimally implemented in
  259. // DoneFallback.
  260. uint32_t last_tag_minus_1_ = 0;
  261. int overall_limit_ = INT_MAX; // Overall limit independent of pushed limits.
  262. // Pretty random large number that seems like a safe allocation on most
  263. // systems. TODO(gerbens) do we need to set this as build flag?
  264. enum { kSafeStringSize = 50000000 };
  265. // Advances to next buffer chunk returns a pointer to the same logical place
  266. // in the stream as set by overrun. Overrun indicates the position in the slop
  267. // region the parse was left (0 <= overrun <= kSlopBytes). Returns true if at
  268. // limit, at which point the returned pointer maybe null if there was an
  269. // error. The invariant of this function is that it's guaranteed that
  270. // kSlopBytes bytes can be accessed from the returned ptr. This function might
  271. // advance more buffers than one in the underlying ZeroCopyInputStream.
  272. std::pair<const char*, bool> DoneFallback(int overrun, int depth);
  273. // Advances to the next buffer, at most one call to Next() on the underlying
  274. // ZeroCopyInputStream is made. This function DOES NOT match the returned
  275. // pointer to where in the slop region the parse ends, hence no overrun
  276. // parameter. This is useful for string operations where you always copy
  277. // to the end of the buffer (including the slop region).
  278. const char* Next();
  279. // overrun is the location in the slop region the stream currently is
  280. // (0 <= overrun <= kSlopBytes). To prevent flipping to the next buffer of
  281. // the ZeroCopyInputStream in the case the parse will end in the last
  282. // kSlopBytes of the current buffer. depth is the current depth of nested
  283. // groups (or negative if the use case does not need careful tracking).
  284. inline const char* NextBuffer(int overrun, int depth);
  285. const char* SkipFallback(const char* ptr, int size);
  286. const char* AppendStringFallback(const char* ptr, int size, std::string* str);
  287. const char* ReadStringFallback(const char* ptr, int size, std::string* str);
  288. bool StreamNext(const void** data) {
  289. bool res = zcis_->Next(data, &size_);
  290. if (res) overall_limit_ -= size_;
  291. return res;
  292. }
  293. void StreamBackUp(int count) {
  294. zcis_->BackUp(count);
  295. overall_limit_ += count;
  296. }
  297. template <typename A>
  298. const char* AppendSize(const char* ptr, int size, const A& append) {
  299. int chunk_size = buffer_end_ + kSlopBytes - ptr;
  300. do {
  301. GOOGLE_DCHECK(size > chunk_size);
  302. if (next_chunk_ == nullptr) return nullptr;
  303. append(ptr, chunk_size);
  304. ptr += chunk_size;
  305. size -= chunk_size;
  306. // TODO(gerbens) Next calls NextBuffer which generates buffers with
  307. // overlap and thus incurs cost of copying the slop regions. This is not
  308. // necessary for reading strings. We should just call Next buffers.
  309. if (limit_ <= kSlopBytes) return nullptr;
  310. ptr = Next();
  311. if (ptr == nullptr) return nullptr; // passed the limit
  312. ptr += kSlopBytes;
  313. chunk_size = buffer_end_ + kSlopBytes - ptr;
  314. } while (size > chunk_size);
  315. append(ptr, size);
  316. return ptr + size;
  317. }
  318. // AppendUntilEnd appends data until a limit (either a PushLimit or end of
  319. // stream. Normal payloads are from length delimited fields which have an
  320. // explicit size. Reading until limit only comes when the string takes
  321. // the place of a protobuf, ie RawMessage/StringRawMessage, lazy fields and
  322. // implicit weak messages. We keep these methods private and friend them.
  323. template <typename A>
  324. const char* AppendUntilEnd(const char* ptr, const A& append) {
  325. if (ptr - buffer_end_ > limit_) return nullptr;
  326. while (limit_ > kSlopBytes) {
  327. size_t chunk_size = buffer_end_ + kSlopBytes - ptr;
  328. append(ptr, chunk_size);
  329. ptr = Next();
  330. if (ptr == nullptr) return limit_end_;
  331. ptr += kSlopBytes;
  332. }
  333. auto end = buffer_end_ + limit_;
  334. GOOGLE_DCHECK(end >= ptr);
  335. append(ptr, end - ptr);
  336. return end;
  337. }
  338. PROTOBUF_MUST_USE_RESULT const char* AppendString(const char* ptr,
  339. std::string* str) {
  340. return AppendUntilEnd(
  341. ptr, [str](const char* p, ptrdiff_t s) { str->append(p, s); });
  342. }
  343. friend class ImplicitWeakMessage;
  344. };
  345. // ParseContext holds all data that is global to the entire parse. Most
  346. // importantly it contains the input stream, but also recursion depth and also
  347. // stores the end group tag, in case a parser ended on a endgroup, to verify
  348. // matching start/end group tags.
  349. class PROTOBUF_EXPORT ParseContext : public EpsCopyInputStream {
  350. public:
  351. struct Data {
  352. const DescriptorPool* pool = nullptr;
  353. MessageFactory* factory = nullptr;
  354. Arena* arena = nullptr;
  355. };
  356. template <typename... T>
  357. ParseContext(int depth, bool aliasing, const char** start, T&&... args)
  358. : EpsCopyInputStream(aliasing), depth_(depth) {
  359. *start = InitFrom(std::forward<T>(args)...);
  360. }
  361. void TrackCorrectEnding() { group_depth_ = 0; }
  362. bool Done(const char** ptr) { return DoneWithCheck(ptr, group_depth_); }
  363. int depth() const { return depth_; }
  364. Data& data() { return data_; }
  365. const Data& data() const { return data_; }
  366. const char* ParseMessage(MessageLite* msg, const char* ptr);
  367. // This overload supports those few cases where ParseMessage is called
  368. // on a class that is not actually a proto message.
  369. // TODO(jorg): Eliminate this use case.
  370. template <typename T,
  371. typename std::enable_if<!std::is_base_of<MessageLite, T>::value,
  372. bool>::type = true>
  373. PROTOBUF_MUST_USE_RESULT const char* ParseMessage(T* msg, const char* ptr);
  374. template <typename T>
  375. PROTOBUF_MUST_USE_RESULT PROTOBUF_NDEBUG_INLINE const char* ParseGroup(
  376. T* msg, const char* ptr, uint32_t tag) {
  377. if (--depth_ < 0) return nullptr;
  378. group_depth_++;
  379. ptr = msg->_InternalParse(ptr, this);
  380. group_depth_--;
  381. depth_++;
  382. if (PROTOBUF_PREDICT_FALSE(!ConsumeEndGroup(tag))) return nullptr;
  383. return ptr;
  384. }
  385. private:
  386. // Out-of-line routine to save space in ParseContext::ParseMessage<T>
  387. // int old;
  388. // ptr = ReadSizeAndPushLimitAndDepth(ptr, &old)
  389. // is equivalent to:
  390. // int size = ReadSize(&ptr);
  391. // if (!ptr) return nullptr;
  392. // int old = PushLimit(ptr, size);
  393. // if (--depth_ < 0) return nullptr;
  394. PROTOBUF_MUST_USE_RESULT const char* ReadSizeAndPushLimitAndDepth(
  395. const char* ptr, int* old_limit);
  396. // The context keeps an internal stack to keep track of the recursive
  397. // part of the parse state.
  398. // Current depth of the active parser, depth counts down.
  399. // This is used to limit recursion depth (to prevent overflow on malicious
  400. // data), but is also used to index in stack_ to store the current state.
  401. int depth_;
  402. // Unfortunately necessary for the fringe case of ending on 0 or end-group tag
  403. // in the last kSlopBytes of a ZeroCopyInputStream chunk.
  404. int group_depth_ = INT_MIN;
  405. Data data_;
  406. };
  407. template <uint32_t tag>
  408. bool ExpectTag(const char* ptr) {
  409. if (tag < 128) {
  410. return *ptr == static_cast<char>(tag);
  411. } else {
  412. static_assert(tag < 128 * 128, "We only expect tags for 1 or 2 bytes");
  413. char buf[2] = {static_cast<char>(tag | 0x80), static_cast<char>(tag >> 7)};
  414. return std::memcmp(ptr, buf, 2) == 0;
  415. }
  416. }
  417. template <int>
  418. struct EndianHelper;
  419. template <>
  420. struct EndianHelper<1> {
  421. static uint8_t Load(const void* p) { return *static_cast<const uint8_t*>(p); }
  422. };
  423. template <>
  424. struct EndianHelper<2> {
  425. static uint16_t Load(const void* p) {
  426. uint16_t tmp;
  427. std::memcpy(&tmp, p, 2);
  428. #ifndef PROTOBUF_LITTLE_ENDIAN
  429. tmp = bswap_16(tmp);
  430. #endif
  431. return tmp;
  432. }
  433. };
  434. template <>
  435. struct EndianHelper<4> {
  436. static uint32_t Load(const void* p) {
  437. uint32_t tmp;
  438. std::memcpy(&tmp, p, 4);
  439. #ifndef PROTOBUF_LITTLE_ENDIAN
  440. tmp = bswap_32(tmp);
  441. #endif
  442. return tmp;
  443. }
  444. };
  445. template <>
  446. struct EndianHelper<8> {
  447. static uint64_t Load(const void* p) {
  448. uint64_t tmp;
  449. std::memcpy(&tmp, p, 8);
  450. #ifndef PROTOBUF_LITTLE_ENDIAN
  451. tmp = bswap_64(tmp);
  452. #endif
  453. return tmp;
  454. }
  455. };
  456. template <typename T>
  457. T UnalignedLoad(const char* p) {
  458. auto tmp = EndianHelper<sizeof(T)>::Load(p);
  459. T res;
  460. memcpy(&res, &tmp, sizeof(T));
  461. return res;
  462. }
  463. PROTOBUF_EXPORT
  464. std::pair<const char*, uint32_t> VarintParseSlow32(const char* p, uint32_t res);
  465. PROTOBUF_EXPORT
  466. std::pair<const char*, uint64_t> VarintParseSlow64(const char* p, uint32_t res);
  467. inline const char* VarintParseSlow(const char* p, uint32_t res, uint32_t* out) {
  468. auto tmp = VarintParseSlow32(p, res);
  469. *out = tmp.second;
  470. return tmp.first;
  471. }
  472. inline const char* VarintParseSlow(const char* p, uint32_t res, uint64_t* out) {
  473. auto tmp = VarintParseSlow64(p, res);
  474. *out = tmp.second;
  475. return tmp.first;
  476. }
  477. template <typename T>
  478. PROTOBUF_MUST_USE_RESULT const char* VarintParse(const char* p, T* out) {
  479. auto ptr = reinterpret_cast<const uint8_t*>(p);
  480. uint32_t res = ptr[0];
  481. if (!(res & 0x80)) {
  482. *out = res;
  483. return p + 1;
  484. }
  485. uint32_t byte = ptr[1];
  486. res += (byte - 1) << 7;
  487. if (!(byte & 0x80)) {
  488. *out = res;
  489. return p + 2;
  490. }
  491. return VarintParseSlow(p, res, out);
  492. }
  493. // Used for tags, could read up to 5 bytes which must be available.
  494. // Caller must ensure its safe to call.
  495. PROTOBUF_EXPORT
  496. std::pair<const char*, uint32_t> ReadTagFallback(const char* p, uint32_t res);
  497. // Same as ParseVarint but only accept 5 bytes at most.
  498. inline const char* ReadTag(const char* p, uint32_t* out,
  499. uint32_t /*max_tag*/ = 0) {
  500. uint32_t res = static_cast<uint8_t>(p[0]);
  501. if (res < 128) {
  502. *out = res;
  503. return p + 1;
  504. }
  505. uint32_t second = static_cast<uint8_t>(p[1]);
  506. res += (second - 1) << 7;
  507. if (second < 128) {
  508. *out = res;
  509. return p + 2;
  510. }
  511. auto tmp = ReadTagFallback(p, res);
  512. *out = tmp.second;
  513. return tmp.first;
  514. }
  515. // Decode 2 consecutive bytes of a varint and returns the value, shifted left
  516. // by 1. It simultaneous updates *ptr to *ptr + 1 or *ptr + 2 depending if the
  517. // first byte's continuation bit is set.
  518. // If bit 15 of return value is set (equivalent to the continuation bits of both
  519. // bytes being set) the varint continues, otherwise the parse is done. On x86
  520. // movsx eax, dil
  521. // add edi, eax
  522. // adc [rsi], 1
  523. // add eax, eax
  524. // and eax, edi
  525. inline uint32_t DecodeTwoBytes(const char** ptr) {
  526. uint32_t value = UnalignedLoad<uint16_t>(*ptr);
  527. // Sign extend the low byte continuation bit
  528. uint32_t x = static_cast<int8_t>(value);
  529. // This add is an amazing operation, it cancels the low byte continuation bit
  530. // from y transferring it to the carry. Simultaneously it also shifts the 7
  531. // LSB left by one tightly against high byte varint bits. Hence value now
  532. // contains the unpacked value shifted left by 1.
  533. value += x;
  534. // Use the carry to update the ptr appropriately.
  535. *ptr += value < x ? 2 : 1;
  536. return value & (x + x); // Mask out the high byte iff no continuation
  537. }
  538. // More efficient varint parsing for big varints
  539. inline const char* ParseBigVarint(const char* p, uint64_t* out) {
  540. auto pnew = p;
  541. auto tmp = DecodeTwoBytes(&pnew);
  542. uint64_t res = tmp >> 1;
  543. if (PROTOBUF_PREDICT_TRUE(static_cast<std::int16_t>(tmp) >= 0)) {
  544. *out = res;
  545. return pnew;
  546. }
  547. for (std::uint32_t i = 1; i < 5; i++) {
  548. pnew = p + 2 * i;
  549. tmp = DecodeTwoBytes(&pnew);
  550. res += (static_cast<std::uint64_t>(tmp) - 2) << (14 * i - 1);
  551. if (PROTOBUF_PREDICT_TRUE(static_cast<std::int16_t>(tmp) >= 0)) {
  552. *out = res;
  553. return pnew;
  554. }
  555. }
  556. return nullptr;
  557. }
  558. PROTOBUF_EXPORT
  559. std::pair<const char*, int32_t> ReadSizeFallback(const char* p, uint32_t first);
  560. // Used for tags, could read up to 5 bytes which must be available. Additionally
  561. // it makes sure the unsigned value fits a int32_t, otherwise returns nullptr.
  562. // Caller must ensure its safe to call.
  563. inline uint32_t ReadSize(const char** pp) {
  564. auto p = *pp;
  565. uint32_t res = static_cast<uint8_t>(p[0]);
  566. if (res < 128) {
  567. *pp = p + 1;
  568. return res;
  569. }
  570. auto x = ReadSizeFallback(p, res);
  571. *pp = x.first;
  572. return x.second;
  573. }
  574. // Some convenience functions to simplify the generated parse loop code.
  575. // Returning the value and updating the buffer pointer allows for nicer
  576. // function composition. We rely on the compiler to inline this.
  577. // Also in debug compiles having local scoped variables tend to generated
  578. // stack frames that scale as O(num fields).
  579. inline uint64_t ReadVarint64(const char** p) {
  580. uint64_t tmp;
  581. *p = VarintParse(*p, &tmp);
  582. return tmp;
  583. }
  584. inline uint32_t ReadVarint32(const char** p) {
  585. uint32_t tmp;
  586. *p = VarintParse(*p, &tmp);
  587. return tmp;
  588. }
  589. inline int64_t ReadVarintZigZag64(const char** p) {
  590. uint64_t tmp;
  591. *p = VarintParse(*p, &tmp);
  592. return WireFormatLite::ZigZagDecode64(tmp);
  593. }
  594. inline int32_t ReadVarintZigZag32(const char** p) {
  595. uint64_t tmp;
  596. *p = VarintParse(*p, &tmp);
  597. return WireFormatLite::ZigZagDecode32(static_cast<uint32_t>(tmp));
  598. }
  599. template <typename T, typename std::enable_if<
  600. !std::is_base_of<MessageLite, T>::value, bool>::type>
  601. PROTOBUF_MUST_USE_RESULT const char* ParseContext::ParseMessage(
  602. T* msg, const char* ptr) {
  603. int old;
  604. ptr = ReadSizeAndPushLimitAndDepth(ptr, &old);
  605. ptr = ptr ? msg->_InternalParse(ptr, this) : nullptr;
  606. depth_++;
  607. if (!PopLimit(old)) return nullptr;
  608. return ptr;
  609. }
  610. template <typename Tag, typename T>
  611. const char* EpsCopyInputStream::ReadRepeatedFixed(const char* ptr,
  612. Tag expected_tag,
  613. RepeatedField<T>* out) {
  614. do {
  615. out->Add(UnalignedLoad<T>(ptr));
  616. ptr += sizeof(T);
  617. if (PROTOBUF_PREDICT_FALSE(ptr >= limit_end_)) return ptr;
  618. } while (UnalignedLoad<Tag>(ptr) == expected_tag && (ptr += sizeof(Tag)));
  619. return ptr;
  620. }
  621. template <typename T>
  622. const char* EpsCopyInputStream::ReadPackedFixed(const char* ptr, int size,
  623. RepeatedField<T>* out) {
  624. int nbytes = buffer_end_ + kSlopBytes - ptr;
  625. while (size > nbytes) {
  626. int num = nbytes / sizeof(T);
  627. int old_entries = out->size();
  628. out->Reserve(old_entries + num);
  629. int block_size = num * sizeof(T);
  630. auto dst = out->AddNAlreadyReserved(num);
  631. #ifdef PROTOBUF_LITTLE_ENDIAN
  632. std::memcpy(dst, ptr, block_size);
  633. #else
  634. for (int i = 0; i < num; i++)
  635. dst[i] = UnalignedLoad<T>(ptr + i * sizeof(T));
  636. #endif
  637. size -= block_size;
  638. if (limit_ <= kSlopBytes) return nullptr;
  639. ptr = Next();
  640. if (ptr == nullptr) return nullptr;
  641. ptr += kSlopBytes - (nbytes - block_size);
  642. nbytes = buffer_end_ + kSlopBytes - ptr;
  643. }
  644. int num = size / sizeof(T);
  645. int old_entries = out->size();
  646. out->Reserve(old_entries + num);
  647. int block_size = num * sizeof(T);
  648. auto dst = out->AddNAlreadyReserved(num);
  649. #ifdef PROTOBUF_LITTLE_ENDIAN
  650. std::memcpy(dst, ptr, block_size);
  651. #else
  652. for (int i = 0; i < num; i++) dst[i] = UnalignedLoad<T>(ptr + i * sizeof(T));
  653. #endif
  654. ptr += block_size;
  655. if (size != block_size) return nullptr;
  656. return ptr;
  657. }
  658. template <typename Add>
  659. const char* ReadPackedVarintArray(const char* ptr, const char* end, Add add) {
  660. while (ptr < end) {
  661. uint64_t varint;
  662. ptr = VarintParse(ptr, &varint);
  663. if (ptr == nullptr) return nullptr;
  664. add(varint);
  665. }
  666. return ptr;
  667. }
  668. template <typename Add>
  669. const char* EpsCopyInputStream::ReadPackedVarint(const char* ptr, Add add) {
  670. int size = ReadSize(&ptr);
  671. if (ptr == nullptr) return nullptr;
  672. int chunk_size = buffer_end_ - ptr;
  673. while (size > chunk_size) {
  674. ptr = ReadPackedVarintArray(ptr, buffer_end_, add);
  675. if (ptr == nullptr) return nullptr;
  676. int overrun = ptr - buffer_end_;
  677. GOOGLE_DCHECK(overrun >= 0 && overrun <= kSlopBytes);
  678. if (size - chunk_size <= kSlopBytes) {
  679. // The current buffer contains all the information needed, we don't need
  680. // to flip buffers. However we must parse from a buffer with enough space
  681. // so we are not prone to a buffer overflow.
  682. char buf[kSlopBytes + 10] = {};
  683. std::memcpy(buf, buffer_end_, kSlopBytes);
  684. GOOGLE_CHECK_LE(size - chunk_size, kSlopBytes);
  685. auto end = buf + (size - chunk_size);
  686. auto res = ReadPackedVarintArray(buf + overrun, end, add);
  687. if (res == nullptr || res != end) return nullptr;
  688. return buffer_end_ + (res - buf);
  689. }
  690. size -= overrun + chunk_size;
  691. GOOGLE_DCHECK_GT(size, 0);
  692. // We must flip buffers
  693. if (limit_ <= kSlopBytes) return nullptr;
  694. ptr = Next();
  695. if (ptr == nullptr) return nullptr;
  696. ptr += overrun;
  697. chunk_size = buffer_end_ - ptr;
  698. }
  699. auto end = ptr + size;
  700. ptr = ReadPackedVarintArray(ptr, end, add);
  701. return end == ptr ? ptr : nullptr;
  702. }
  703. // Helper for verification of utf8
  704. PROTOBUF_EXPORT
  705. bool VerifyUTF8(StringPiece s, const char* field_name);
  706. inline bool VerifyUTF8(const std::string* s, const char* field_name) {
  707. return VerifyUTF8(*s, field_name);
  708. }
  709. // All the string parsers with or without UTF checking and for all CTypes.
  710. PROTOBUF_EXPORT PROTOBUF_MUST_USE_RESULT const char* InlineGreedyStringParser(
  711. std::string* s, const char* ptr, ParseContext* ctx);
  712. // Add any of the following lines to debug which parse function is failing.
  713. #define GOOGLE_PROTOBUF_ASSERT_RETURN(predicate, ret) \
  714. if (!(predicate)) { \
  715. /* ::raise(SIGINT); */ \
  716. /* GOOGLE_LOG(ERROR) << "Parse failure"; */ \
  717. return ret; \
  718. }
  719. #define GOOGLE_PROTOBUF_PARSER_ASSERT(predicate) \
  720. GOOGLE_PROTOBUF_ASSERT_RETURN(predicate, nullptr)
  721. template <typename T>
  722. PROTOBUF_MUST_USE_RESULT const char* FieldParser(uint64_t tag, T& field_parser,
  723. const char* ptr,
  724. ParseContext* ctx) {
  725. uint32_t number = tag >> 3;
  726. GOOGLE_PROTOBUF_PARSER_ASSERT(number != 0);
  727. using WireType = internal::WireFormatLite::WireType;
  728. switch (tag & 7) {
  729. case WireType::WIRETYPE_VARINT: {
  730. uint64_t value;
  731. ptr = VarintParse(ptr, &value);
  732. GOOGLE_PROTOBUF_PARSER_ASSERT(ptr);
  733. field_parser.AddVarint(number, value);
  734. break;
  735. }
  736. case WireType::WIRETYPE_FIXED64: {
  737. uint64_t value = UnalignedLoad<uint64_t>(ptr);
  738. ptr += 8;
  739. field_parser.AddFixed64(number, value);
  740. break;
  741. }
  742. case WireType::WIRETYPE_LENGTH_DELIMITED: {
  743. ptr = field_parser.ParseLengthDelimited(number, ptr, ctx);
  744. GOOGLE_PROTOBUF_PARSER_ASSERT(ptr);
  745. break;
  746. }
  747. case WireType::WIRETYPE_START_GROUP: {
  748. ptr = field_parser.ParseGroup(number, ptr, ctx);
  749. GOOGLE_PROTOBUF_PARSER_ASSERT(ptr);
  750. break;
  751. }
  752. case WireType::WIRETYPE_END_GROUP: {
  753. GOOGLE_LOG(FATAL) << "Can't happen";
  754. break;
  755. }
  756. case WireType::WIRETYPE_FIXED32: {
  757. uint32_t value = UnalignedLoad<uint32_t>(ptr);
  758. ptr += 4;
  759. field_parser.AddFixed32(number, value);
  760. break;
  761. }
  762. default:
  763. return nullptr;
  764. }
  765. return ptr;
  766. }
  767. template <typename T>
  768. PROTOBUF_MUST_USE_RESULT const char* WireFormatParser(T& field_parser,
  769. const char* ptr,
  770. ParseContext* ctx) {
  771. while (!ctx->Done(&ptr)) {
  772. uint32_t tag;
  773. ptr = ReadTag(ptr, &tag);
  774. GOOGLE_PROTOBUF_PARSER_ASSERT(ptr != nullptr);
  775. if (tag == 0 || (tag & 7) == 4) {
  776. ctx->SetLastTag(tag);
  777. return ptr;
  778. }
  779. ptr = FieldParser(tag, field_parser, ptr, ctx);
  780. GOOGLE_PROTOBUF_PARSER_ASSERT(ptr != nullptr);
  781. }
  782. return ptr;
  783. }
  784. // The packed parsers parse repeated numeric primitives directly into the
  785. // corresponding field
  786. // These are packed varints
  787. PROTOBUF_EXPORT PROTOBUF_MUST_USE_RESULT const char* PackedInt32Parser(
  788. void* object, const char* ptr, ParseContext* ctx);
  789. PROTOBUF_EXPORT PROTOBUF_MUST_USE_RESULT const char* PackedUInt32Parser(
  790. void* object, const char* ptr, ParseContext* ctx);
  791. PROTOBUF_EXPORT PROTOBUF_MUST_USE_RESULT const char* PackedInt64Parser(
  792. void* object, const char* ptr, ParseContext* ctx);
  793. PROTOBUF_EXPORT PROTOBUF_MUST_USE_RESULT const char* PackedUInt64Parser(
  794. void* object, const char* ptr, ParseContext* ctx);
  795. PROTOBUF_EXPORT PROTOBUF_MUST_USE_RESULT const char* PackedSInt32Parser(
  796. void* object, const char* ptr, ParseContext* ctx);
  797. PROTOBUF_EXPORT PROTOBUF_MUST_USE_RESULT const char* PackedSInt64Parser(
  798. void* object, const char* ptr, ParseContext* ctx);
  799. PROTOBUF_EXPORT PROTOBUF_MUST_USE_RESULT const char* PackedEnumParser(
  800. void* object, const char* ptr, ParseContext* ctx);
  801. template <typename T>
  802. PROTOBUF_MUST_USE_RESULT const char* PackedEnumParser(
  803. void* object, const char* ptr, ParseContext* ctx, bool (*is_valid)(int),
  804. InternalMetadata* metadata, int field_num) {
  805. return ctx->ReadPackedVarint(
  806. ptr, [object, is_valid, metadata, field_num](uint64_t val) {
  807. if (is_valid(val)) {
  808. static_cast<RepeatedField<int>*>(object)->Add(val);
  809. } else {
  810. WriteVarint(field_num, val, metadata->mutable_unknown_fields<T>());
  811. }
  812. });
  813. }
  814. template <typename T>
  815. PROTOBUF_MUST_USE_RESULT const char* PackedEnumParserArg(
  816. void* object, const char* ptr, ParseContext* ctx,
  817. bool (*is_valid)(const void*, int), const void* data,
  818. InternalMetadata* metadata, int field_num) {
  819. return ctx->ReadPackedVarint(
  820. ptr, [object, is_valid, data, metadata, field_num](uint64_t val) {
  821. if (is_valid(data, val)) {
  822. static_cast<RepeatedField<int>*>(object)->Add(val);
  823. } else {
  824. WriteVarint(field_num, val, metadata->mutable_unknown_fields<T>());
  825. }
  826. });
  827. }
  828. PROTOBUF_EXPORT PROTOBUF_MUST_USE_RESULT const char* PackedBoolParser(
  829. void* object, const char* ptr, ParseContext* ctx);
  830. PROTOBUF_EXPORT PROTOBUF_MUST_USE_RESULT const char* PackedFixed32Parser(
  831. void* object, const char* ptr, ParseContext* ctx);
  832. PROTOBUF_EXPORT PROTOBUF_MUST_USE_RESULT const char* PackedSFixed32Parser(
  833. void* object, const char* ptr, ParseContext* ctx);
  834. PROTOBUF_EXPORT PROTOBUF_MUST_USE_RESULT const char* PackedFixed64Parser(
  835. void* object, const char* ptr, ParseContext* ctx);
  836. PROTOBUF_EXPORT PROTOBUF_MUST_USE_RESULT const char* PackedSFixed64Parser(
  837. void* object, const char* ptr, ParseContext* ctx);
  838. PROTOBUF_EXPORT PROTOBUF_MUST_USE_RESULT const char* PackedFloatParser(
  839. void* object, const char* ptr, ParseContext* ctx);
  840. PROTOBUF_EXPORT PROTOBUF_MUST_USE_RESULT const char* PackedDoubleParser(
  841. void* object, const char* ptr, ParseContext* ctx);
  842. // This is the only recursive parser.
  843. PROTOBUF_EXPORT PROTOBUF_MUST_USE_RESULT const char* UnknownGroupLiteParse(
  844. std::string* unknown, const char* ptr, ParseContext* ctx);
  845. // This is a helper to for the UnknownGroupLiteParse but is actually also
  846. // useful in the generated code. It uses overload on std::string* vs
  847. // UnknownFieldSet* to make the generated code isomorphic between full and lite.
  848. PROTOBUF_EXPORT PROTOBUF_MUST_USE_RESULT const char* UnknownFieldParse(
  849. uint32_t tag, std::string* unknown, const char* ptr, ParseContext* ctx);
  850. } // namespace internal
  851. } // namespace protobuf
  852. } // namespace google
  853. #include <google/protobuf/port_undef.inc>
  854. #endif // GOOGLE_PROTOBUF_PARSE_CONTEXT_H__