1// Protocol Buffers - Google's data interchange format
2// Copyright 2008 Google Inc.  All rights reserved.
3// http://code.google.com/p/protobuf/
4//
5// Redistribution and use in source and binary forms, with or without
6// modification, are permitted provided that the following conditions are
7// met:
8//
9//     * Redistributions of source code must retain the above copyright
10// notice, this list of conditions and the following disclaimer.
11//     * Redistributions in binary form must reproduce the above
12// copyright notice, this list of conditions and the following disclaimer
13// in the documentation and/or other materials provided with the
14// distribution.
15//     * Neither the name of Google Inc. nor the names of its
16// contributors may be used to endorse or promote products derived from
17// this software without specific prior written permission.
18//
19// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30
31// Author: kenton@google.com (Kenton Varda)
32//  Based on original Protocol Buffers design by
33//  Sanjay Ghemawat, Jeff Dean, and others.
34//
35// This file contains the CodedInputStream and CodedOutputStream classes,
36// which wrap a ZeroCopyInputStream or ZeroCopyOutputStream, respectively,
37// and allow you to read or write individual pieces of data in various
38// formats.  In particular, these implement the varint encoding for
39// integers, a simple variable-length encoding in which smaller numbers
40// take fewer bytes.
41//
42// Typically these classes will only be used internally by the protocol
43// buffer library in order to encode and decode protocol buffers.  Clients
44// of the library only need to know about this class if they wish to write
45// custom message parsing or serialization procedures.
46//
47// CodedOutputStream example:
48//   // Write some data to "myfile".  First we write a 4-byte "magic number"
49//   // to identify the file type, then write a length-delimited string.  The
50//   // string is composed of a varint giving the length followed by the raw
51//   // bytes.
52//   int fd = open("myfile", O_WRONLY);
53//   ZeroCopyOutputStream* raw_output = new FileOutputStream(fd);
54//   CodedOutputStream* coded_output = new CodedOutputStream(raw_output);
55//
56//   int magic_number = 1234;
57//   char text[] = "Hello world!";
58//   coded_output->WriteLittleEndian32(magic_number);
59//   coded_output->WriteVarint32(strlen(text));
60//   coded_output->WriteRaw(text, strlen(text));
61//
62//   delete coded_output;
63//   delete raw_output;
64//   close(fd);
65//
66// CodedInputStream example:
67//   // Read a file created by the above code.
68//   int fd = open("myfile", O_RDONLY);
69//   ZeroCopyInputStream* raw_input = new FileInputStream(fd);
70//   CodedInputStream coded_input = new CodedInputStream(raw_input);
71//
72//   coded_input->ReadLittleEndian32(&magic_number);
73//   if (magic_number != 1234) {
74//     cerr << "File not in expected format." << endl;
75//     return;
76//   }
77//
78//   uint32 size;
79//   coded_input->ReadVarint32(&size);
80//
81//   char* text = new char[size + 1];
82//   coded_input->ReadRaw(buffer, size);
83//   text[size] = '\0';
84//
85//   delete coded_input;
86//   delete raw_input;
87//   close(fd);
88//
89//   cout << "Text is: " << text << endl;
90//   delete [] text;
91//
92// For those who are interested, varint encoding is defined as follows:
93//
94// The encoding operates on unsigned integers of up to 64 bits in length.
95// Each byte of the encoded value has the format:
96// * bits 0-6: Seven bits of the number being encoded.
97// * bit 7: Zero if this is the last byte in the encoding (in which
98//   case all remaining bits of the number are zero) or 1 if
99//   more bytes follow.
100// The first byte contains the least-significant 7 bits of the number, the
101// second byte (if present) contains the next-least-significant 7 bits,
102// and so on.  So, the binary number 1011000101011 would be encoded in two
103// bytes as "10101011 00101100".
104//
105// In theory, varint could be used to encode integers of any length.
106// However, for practicality we set a limit at 64 bits.  The maximum encoded
107// length of a number is thus 10 bytes.
108
109#ifndef GOOGLE_PROTOBUF_IO_CODED_STREAM_H__
110#define GOOGLE_PROTOBUF_IO_CODED_STREAM_H__
111
112#include <string>
113#ifdef _MSC_VER
114  #if defined(_M_IX86) && \
115      !defined(PROTOBUF_DISABLE_LITTLE_ENDIAN_OPT_FOR_TEST)
116    #define PROTOBUF_LITTLE_ENDIAN 1
117  #endif
118  #if _MSC_VER >= 1300
119    // If MSVC has "/RTCc" set, it will complain about truncating casts at
120    // runtime.  This file contains some intentional truncating casts.
121    #pragma runtime_checks("c", off)
122  #endif
123#else
124  #include <sys/param.h>   // __BYTE_ORDER
125  #if defined(__BYTE_ORDER) && __BYTE_ORDER == __LITTLE_ENDIAN && \
126      !defined(PROTOBUF_DISABLE_LITTLE_ENDIAN_OPT_FOR_TEST)
127    #define PROTOBUF_LITTLE_ENDIAN 1
128  #endif
129#endif
130#include <google/protobuf/stubs/common.h>
131
132
133namespace google {
134namespace protobuf {
135
136class DescriptorPool;
137class MessageFactory;
138
139namespace io {
140
141// Defined in this file.
142class CodedInputStream;
143class CodedOutputStream;
144
145// Defined in other files.
146class ZeroCopyInputStream;           // zero_copy_stream.h
147class ZeroCopyOutputStream;          // zero_copy_stream.h
148
149// Class which reads and decodes binary data which is composed of varint-
150// encoded integers and fixed-width pieces.  Wraps a ZeroCopyInputStream.
151// Most users will not need to deal with CodedInputStream.
152//
153// Most methods of CodedInputStream that return a bool return false if an
154// underlying I/O error occurs or if the data is malformed.  Once such a
155// failure occurs, the CodedInputStream is broken and is no longer useful.
156class LIBPROTOBUF_EXPORT CodedInputStream {
157 public:
158  // Create a CodedInputStream that reads from the given ZeroCopyInputStream.
159  explicit CodedInputStream(ZeroCopyInputStream* input);
160
161  // Create a CodedInputStream that reads from the given flat array.  This is
162  // faster than using an ArrayInputStream.  PushLimit(size) is implied by
163  // this constructor.
164  explicit CodedInputStream(const uint8* buffer, int size);
165
166  // Destroy the CodedInputStream and position the underlying
167  // ZeroCopyInputStream at the first unread byte.  If an error occurred while
168  // reading (causing a method to return false), then the exact position of
169  // the input stream may be anywhere between the last value that was read
170  // successfully and the stream's byte limit.
171  ~CodedInputStream();
172
173  // Return true if this CodedInputStream reads from a flat array instead of
174  // a ZeroCopyInputStream.
175  inline bool IsFlat() const;
176
177  // Skips a number of bytes.  Returns false if an underlying read error
178  // occurs.
179  bool Skip(int count);
180
181  // Sets *data to point directly at the unread part of the CodedInputStream's
182  // underlying buffer, and *size to the size of that buffer, but does not
183  // advance the stream's current position.  This will always either produce
184  // a non-empty buffer or return false.  If the caller consumes any of
185  // this data, it should then call Skip() to skip over the consumed bytes.
186  // This may be useful for implementing external fast parsing routines for
187  // types of data not covered by the CodedInputStream interface.
188  bool GetDirectBufferPointer(const void** data, int* size);
189
190  // Like GetDirectBufferPointer, but this method is inlined, and does not
191  // attempt to Refresh() if the buffer is currently empty.
192  inline void GetDirectBufferPointerInline(const void** data,
193                                           int* size) GOOGLE_ATTRIBUTE_ALWAYS_INLINE;
194
195  // Read raw bytes, copying them into the given buffer.
196  bool ReadRaw(void* buffer, int size);
197
198  // Like ReadRaw, but reads into a string.
199  //
200  // Implementation Note:  ReadString() grows the string gradually as it
201  // reads in the data, rather than allocating the entire requested size
202  // upfront.  This prevents denial-of-service attacks in which a client
203  // could claim that a string is going to be MAX_INT bytes long in order to
204  // crash the server because it can't allocate this much space at once.
205  bool ReadString(string* buffer, int size);
206  // Like the above, with inlined optimizations. This should only be used
207  // by the protobuf implementation.
208  inline bool InternalReadStringInline(string* buffer,
209                                       int size) GOOGLE_ATTRIBUTE_ALWAYS_INLINE;
210
211
212  // Read a 32-bit little-endian integer.
213  bool ReadLittleEndian32(uint32* value);
214  // Read a 64-bit little-endian integer.
215  bool ReadLittleEndian64(uint64* value);
216
217  // These methods read from an externally provided buffer. The caller is
218  // responsible for ensuring that the buffer has sufficient space.
219  // Read a 32-bit little-endian integer.
220  static const uint8* ReadLittleEndian32FromArray(const uint8* buffer,
221                                                   uint32* value);
222  // Read a 64-bit little-endian integer.
223  static const uint8* ReadLittleEndian64FromArray(const uint8* buffer,
224                                                   uint64* value);
225
226  // Read an unsigned integer with Varint encoding, truncating to 32 bits.
227  // Reading a 32-bit value is equivalent to reading a 64-bit one and casting
228  // it to uint32, but may be more efficient.
229  bool ReadVarint32(uint32* value);
230  // Read an unsigned integer with Varint encoding.
231  bool ReadVarint64(uint64* value);
232
233  // Read a tag.  This calls ReadVarint32() and returns the result, or returns
234  // zero (which is not a valid tag) if ReadVarint32() fails.  Also, it updates
235  // the last tag value, which can be checked with LastTagWas().
236  // Always inline because this is only called in once place per parse loop
237  // but it is called for every iteration of said loop, so it should be fast.
238  // GCC doesn't want to inline this by default.
239  uint32 ReadTag() GOOGLE_ATTRIBUTE_ALWAYS_INLINE;
240
241  // Usually returns true if calling ReadVarint32() now would produce the given
242  // value.  Will always return false if ReadVarint32() would not return the
243  // given value.  If ExpectTag() returns true, it also advances past
244  // the varint.  For best performance, use a compile-time constant as the
245  // parameter.
246  // Always inline because this collapses to a small number of instructions
247  // when given a constant parameter, but GCC doesn't want to inline by default.
248  bool ExpectTag(uint32 expected) GOOGLE_ATTRIBUTE_ALWAYS_INLINE;
249
250  // Like above, except this reads from the specified buffer. The caller is
251  // responsible for ensuring that the buffer is large enough to read a varint
252  // of the expected size. For best performance, use a compile-time constant as
253  // the expected tag parameter.
254  //
255  // Returns a pointer beyond the expected tag if it was found, or NULL if it
256  // was not.
257  static const uint8* ExpectTagFromArray(
258      const uint8* buffer,
259      uint32 expected) GOOGLE_ATTRIBUTE_ALWAYS_INLINE;
260
261  // Usually returns true if no more bytes can be read.  Always returns false
262  // if more bytes can be read.  If ExpectAtEnd() returns true, a subsequent
263  // call to LastTagWas() will act as if ReadTag() had been called and returned
264  // zero, and ConsumedEntireMessage() will return true.
265  bool ExpectAtEnd();
266
267  // If the last call to ReadTag() returned the given value, returns true.
268  // Otherwise, returns false;
269  //
270  // This is needed because parsers for some types of embedded messages
271  // (with field type TYPE_GROUP) don't actually know that they've reached the
272  // end of a message until they see an ENDGROUP tag, which was actually part
273  // of the enclosing message.  The enclosing message would like to check that
274  // tag to make sure it had the right number, so it calls LastTagWas() on
275  // return from the embedded parser to check.
276  bool LastTagWas(uint32 expected);
277
278  // When parsing message (but NOT a group), this method must be called
279  // immediately after MergeFromCodedStream() returns (if it returns true)
280  // to further verify that the message ended in a legitimate way.  For
281  // example, this verifies that parsing did not end on an end-group tag.
282  // It also checks for some cases where, due to optimizations,
283  // MergeFromCodedStream() can incorrectly return true.
284  bool ConsumedEntireMessage();
285
286  // Limits ----------------------------------------------------------
287  // Limits are used when parsing length-delimited embedded messages.
288  // After the message's length is read, PushLimit() is used to prevent
289  // the CodedInputStream from reading beyond that length.  Once the
290  // embedded message has been parsed, PopLimit() is called to undo the
291  // limit.
292
293  // Opaque type used with PushLimit() and PopLimit().  Do not modify
294  // values of this type yourself.  The only reason that this isn't a
295  // struct with private internals is for efficiency.
296  typedef int Limit;
297
298  // Places a limit on the number of bytes that the stream may read,
299  // starting from the current position.  Once the stream hits this limit,
300  // it will act like the end of the input has been reached until PopLimit()
301  // is called.
302  //
303  // As the names imply, the stream conceptually has a stack of limits.  The
304  // shortest limit on the stack is always enforced, even if it is not the
305  // top limit.
306  //
307  // The value returned by PushLimit() is opaque to the caller, and must
308  // be passed unchanged to the corresponding call to PopLimit().
309  Limit PushLimit(int byte_limit);
310
311  // Pops the last limit pushed by PushLimit().  The input must be the value
312  // returned by that call to PushLimit().
313  void PopLimit(Limit limit);
314
315  // Returns the number of bytes left until the nearest limit on the
316  // stack is hit, or -1 if no limits are in place.
317  int BytesUntilLimit() const;
318
319  // Returns current position relative to the beginning of the input stream.
320  int CurrentPosition() const;
321
322  // Total Bytes Limit -----------------------------------------------
323  // To prevent malicious users from sending excessively large messages
324  // and causing integer overflows or memory exhaustion, CodedInputStream
325  // imposes a hard limit on the total number of bytes it will read.
326
327  // Sets the maximum number of bytes that this CodedInputStream will read
328  // before refusing to continue.  To prevent integer overflows in the
329  // protocol buffers implementation, as well as to prevent servers from
330  // allocating enormous amounts of memory to hold parsed messages, the
331  // maximum message length should be limited to the shortest length that
332  // will not harm usability.  The theoretical shortest message that could
333  // cause integer overflows is 512MB.  The default limit is 64MB.  Apps
334  // should set shorter limits if possible.  If warning_threshold is not -1,
335  // a warning will be printed to stderr after warning_threshold bytes are
336  // read.  For backwards compatibility all negative values get squached to -1,
337  // as other negative values might have special internal meanings.
338  // An error will always be printed to stderr if the limit is reached.
339  //
340  // This is unrelated to PushLimit()/PopLimit().
341  //
342  // Hint:  If you are reading this because your program is printing a
343  //   warning about dangerously large protocol messages, you may be
344  //   confused about what to do next.  The best option is to change your
345  //   design such that excessively large messages are not necessary.
346  //   For example, try to design file formats to consist of many small
347  //   messages rather than a single large one.  If this is infeasible,
348  //   you will need to increase the limit.  Chances are, though, that
349  //   your code never constructs a CodedInputStream on which the limit
350  //   can be set.  You probably parse messages by calling things like
351  //   Message::ParseFromString().  In this case, you will need to change
352  //   your code to instead construct some sort of ZeroCopyInputStream
353  //   (e.g. an ArrayInputStream), construct a CodedInputStream around
354  //   that, then call Message::ParseFromCodedStream() instead.  Then
355  //   you can adjust the limit.  Yes, it's more work, but you're doing
356  //   something unusual.
357  void SetTotalBytesLimit(int total_bytes_limit, int warning_threshold);
358
359  // Recursion Limit -------------------------------------------------
360  // To prevent corrupt or malicious messages from causing stack overflows,
361  // we must keep track of the depth of recursion when parsing embedded
362  // messages and groups.  CodedInputStream keeps track of this because it
363  // is the only object that is passed down the stack during parsing.
364
365  // Sets the maximum recursion depth.  The default is 100.
366  void SetRecursionLimit(int limit);
367
368
369  // Increments the current recursion depth.  Returns true if the depth is
370  // under the limit, false if it has gone over.
371  bool IncrementRecursionDepth();
372
373  // Decrements the recursion depth.
374  void DecrementRecursionDepth();
375
376  // Extension Registry ----------------------------------------------
377  // ADVANCED USAGE:  99.9% of people can ignore this section.
378  //
379  // By default, when parsing extensions, the parser looks for extension
380  // definitions in the pool which owns the outer message's Descriptor.
381  // However, you may call SetExtensionRegistry() to provide an alternative
382  // pool instead.  This makes it possible, for example, to parse a message
383  // using a generated class, but represent some extensions using
384  // DynamicMessage.
385
386  // Set the pool used to look up extensions.  Most users do not need to call
387  // this as the correct pool will be chosen automatically.
388  //
389  // WARNING:  It is very easy to misuse this.  Carefully read the requirements
390  //   below.  Do not use this unless you are sure you need it.  Almost no one
391  //   does.
392  //
393  // Let's say you are parsing a message into message object m, and you want
394  // to take advantage of SetExtensionRegistry().  You must follow these
395  // requirements:
396  //
397  // The given DescriptorPool must contain m->GetDescriptor().  It is not
398  // sufficient for it to simply contain a descriptor that has the same name
399  // and content -- it must be the *exact object*.  In other words:
400  //   assert(pool->FindMessageTypeByName(m->GetDescriptor()->full_name()) ==
401  //          m->GetDescriptor());
402  // There are two ways to satisfy this requirement:
403  // 1) Use m->GetDescriptor()->pool() as the pool.  This is generally useless
404  //    because this is the pool that would be used anyway if you didn't call
405  //    SetExtensionRegistry() at all.
406  // 2) Use a DescriptorPool which has m->GetDescriptor()->pool() as an
407  //    "underlay".  Read the documentation for DescriptorPool for more
408  //    information about underlays.
409  //
410  // You must also provide a MessageFactory.  This factory will be used to
411  // construct Message objects representing extensions.  The factory's
412  // GetPrototype() MUST return non-NULL for any Descriptor which can be found
413  // through the provided pool.
414  //
415  // If the provided factory might return instances of protocol-compiler-
416  // generated (i.e. compiled-in) types, or if the outer message object m is
417  // a generated type, then the given factory MUST have this property:  If
418  // GetPrototype() is given a Descriptor which resides in
419  // DescriptorPool::generated_pool(), the factory MUST return the same
420  // prototype which MessageFactory::generated_factory() would return.  That
421  // is, given a descriptor for a generated type, the factory must return an
422  // instance of the generated class (NOT DynamicMessage).  However, when
423  // given a descriptor for a type that is NOT in generated_pool, the factory
424  // is free to return any implementation.
425  //
426  // The reason for this requirement is that generated sub-objects may be
427  // accessed via the standard (non-reflection) extension accessor methods,
428  // and these methods will down-cast the object to the generated class type.
429  // If the object is not actually of that type, the results would be undefined.
430  // On the other hand, if an extension is not compiled in, then there is no
431  // way the code could end up accessing it via the standard accessors -- the
432  // only way to access the extension is via reflection.  When using reflection,
433  // DynamicMessage and generated messages are indistinguishable, so it's fine
434  // if these objects are represented using DynamicMessage.
435  //
436  // Using DynamicMessageFactory on which you have called
437  // SetDelegateToGeneratedFactory(true) should be sufficient to satisfy the
438  // above requirement.
439  //
440  // If either pool or factory is NULL, both must be NULL.
441  //
442  // Note that this feature is ignored when parsing "lite" messages as they do
443  // not have descriptors.
444  void SetExtensionRegistry(const DescriptorPool* pool,
445                            MessageFactory* factory);
446
447  // Get the DescriptorPool set via SetExtensionRegistry(), or NULL if no pool
448  // has been provided.
449  const DescriptorPool* GetExtensionPool();
450
451  // Get the MessageFactory set via SetExtensionRegistry(), or NULL if no
452  // factory has been provided.
453  MessageFactory* GetExtensionFactory();
454
455 private:
456  GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(CodedInputStream);
457
458  ZeroCopyInputStream* input_;
459  const uint8* buffer_;
460  const uint8* buffer_end_;     // pointer to the end of the buffer.
461  int total_bytes_read_;  // total bytes read from input_, including
462                          // the current buffer
463
464  // If total_bytes_read_ surpasses INT_MAX, we record the extra bytes here
465  // so that we can BackUp() on destruction.
466  int overflow_bytes_;
467
468  // LastTagWas() stuff.
469  uint32 last_tag_;         // result of last ReadTag().
470
471  // This is set true by ReadTag{Fallback/Slow}() if it is called when exactly
472  // at EOF, or by ExpectAtEnd() when it returns true.  This happens when we
473  // reach the end of a message and attempt to read another tag.
474  bool legitimate_message_end_;
475
476  // See EnableAliasing().
477  bool aliasing_enabled_;
478
479  // Limits
480  Limit current_limit_;   // if position = -1, no limit is applied
481
482  // For simplicity, if the current buffer crosses a limit (either a normal
483  // limit created by PushLimit() or the total bytes limit), buffer_size_
484  // only tracks the number of bytes before that limit.  This field
485  // contains the number of bytes after it.  Note that this implies that if
486  // buffer_size_ == 0 and buffer_size_after_limit_ > 0, we know we've
487  // hit a limit.  However, if both are zero, it doesn't necessarily mean
488  // we aren't at a limit -- the buffer may have ended exactly at the limit.
489  int buffer_size_after_limit_;
490
491  // Maximum number of bytes to read, period.  This is unrelated to
492  // current_limit_.  Set using SetTotalBytesLimit().
493  int total_bytes_limit_;
494
495  // If positive/0: Limit for bytes read after which a warning due to size
496  // should be logged.
497  // If -1: Printing of warning disabled. Can be set by client.
498  // If -2: Internal: Limit has been reached, print full size when destructing.
499  int total_bytes_warning_threshold_;
500
501  // Current recursion depth, controlled by IncrementRecursionDepth() and
502  // DecrementRecursionDepth().
503  int recursion_depth_;
504  // Recursion depth limit, set by SetRecursionLimit().
505  int recursion_limit_;
506
507  // See SetExtensionRegistry().
508  const DescriptorPool* extension_pool_;
509  MessageFactory* extension_factory_;
510
511  // Private member functions.
512
513  // Advance the buffer by a given number of bytes.
514  void Advance(int amount);
515
516  // Back up input_ to the current buffer position.
517  void BackUpInputToCurrentPosition();
518
519  // Recomputes the value of buffer_size_after_limit_.  Must be called after
520  // current_limit_ or total_bytes_limit_ changes.
521  void RecomputeBufferLimits();
522
523  // Writes an error message saying that we hit total_bytes_limit_.
524  void PrintTotalBytesLimitError();
525
526  // Called when the buffer runs out to request more data.  Implies an
527  // Advance(BufferSize()).
528  bool Refresh();
529
530  // When parsing varints, we optimize for the common case of small values, and
531  // then optimize for the case when the varint fits within the current buffer
532  // piece. The Fallback method is used when we can't use the one-byte
533  // optimization. The Slow method is yet another fallback when the buffer is
534  // not large enough. Making the slow path out-of-line speeds up the common
535  // case by 10-15%. The slow path is fairly uncommon: it only triggers when a
536  // message crosses multiple buffers.
537  bool ReadVarint32Fallback(uint32* value);
538  bool ReadVarint64Fallback(uint64* value);
539  bool ReadVarint32Slow(uint32* value);
540  bool ReadVarint64Slow(uint64* value);
541  bool ReadLittleEndian32Fallback(uint32* value);
542  bool ReadLittleEndian64Fallback(uint64* value);
543  // Fallback/slow methods for reading tags. These do not update last_tag_,
544  // but will set legitimate_message_end_ if we are at the end of the input
545  // stream.
546  uint32 ReadTagFallback();
547  uint32 ReadTagSlow();
548  bool ReadStringFallback(string* buffer, int size);
549
550  // Return the size of the buffer.
551  int BufferSize() const;
552
553  static const int kDefaultTotalBytesLimit = 64 << 20;  // 64MB
554
555  static const int kDefaultTotalBytesWarningThreshold = 32 << 20;  // 32MB
556
557  static int default_recursion_limit_;  // 100 by default.
558};
559
560// Class which encodes and writes binary data which is composed of varint-
561// encoded integers and fixed-width pieces.  Wraps a ZeroCopyOutputStream.
562// Most users will not need to deal with CodedOutputStream.
563//
564// Most methods of CodedOutputStream which return a bool return false if an
565// underlying I/O error occurs.  Once such a failure occurs, the
566// CodedOutputStream is broken and is no longer useful. The Write* methods do
567// not return the stream status, but will invalidate the stream if an error
568// occurs. The client can probe HadError() to determine the status.
569//
570// Note that every method of CodedOutputStream which writes some data has
571// a corresponding static "ToArray" version. These versions write directly
572// to the provided buffer, returning a pointer past the last written byte.
573// They require that the buffer has sufficient capacity for the encoded data.
574// This allows an optimization where we check if an output stream has enough
575// space for an entire message before we start writing and, if there is, we
576// call only the ToArray methods to avoid doing bound checks for each
577// individual value.
578// i.e., in the example above:
579//
580//   CodedOutputStream coded_output = new CodedOutputStream(raw_output);
581//   int magic_number = 1234;
582//   char text[] = "Hello world!";
583//
584//   int coded_size = sizeof(magic_number) +
585//                    CodedOutputStream::VarintSize32(strlen(text)) +
586//                    strlen(text);
587//
588//   uint8* buffer =
589//       coded_output->GetDirectBufferForNBytesAndAdvance(coded_size);
590//   if (buffer != NULL) {
591//     // The output stream has enough space in the buffer: write directly to
592//     // the array.
593//     buffer = CodedOutputStream::WriteLittleEndian32ToArray(magic_number,
594//                                                            buffer);
595//     buffer = CodedOutputStream::WriteVarint32ToArray(strlen(text), buffer);
596//     buffer = CodedOutputStream::WriteRawToArray(text, strlen(text), buffer);
597//   } else {
598//     // Make bound-checked writes, which will ask the underlying stream for
599//     // more space as needed.
600//     coded_output->WriteLittleEndian32(magic_number);
601//     coded_output->WriteVarint32(strlen(text));
602//     coded_output->WriteRaw(text, strlen(text));
603//   }
604//
605//   delete coded_output;
606class LIBPROTOBUF_EXPORT CodedOutputStream {
607 public:
608  // Create an CodedOutputStream that writes to the given ZeroCopyOutputStream.
609  explicit CodedOutputStream(ZeroCopyOutputStream* output);
610
611  // Destroy the CodedOutputStream and position the underlying
612  // ZeroCopyOutputStream immediately after the last byte written.
613  ~CodedOutputStream();
614
615  // Skips a number of bytes, leaving the bytes unmodified in the underlying
616  // buffer.  Returns false if an underlying write error occurs.  This is
617  // mainly useful with GetDirectBufferPointer().
618  bool Skip(int count);
619
620  // Sets *data to point directly at the unwritten part of the
621  // CodedOutputStream's underlying buffer, and *size to the size of that
622  // buffer, but does not advance the stream's current position.  This will
623  // always either produce a non-empty buffer or return false.  If the caller
624  // writes any data to this buffer, it should then call Skip() to skip over
625  // the consumed bytes.  This may be useful for implementing external fast
626  // serialization routines for types of data not covered by the
627  // CodedOutputStream interface.
628  bool GetDirectBufferPointer(void** data, int* size);
629
630  // If there are at least "size" bytes available in the current buffer,
631  // returns a pointer directly into the buffer and advances over these bytes.
632  // The caller may then write directly into this buffer (e.g. using the
633  // *ToArray static methods) rather than go through CodedOutputStream.  If
634  // there are not enough bytes available, returns NULL.  The return pointer is
635  // invalidated as soon as any other non-const method of CodedOutputStream
636  // is called.
637  inline uint8* GetDirectBufferForNBytesAndAdvance(int size);
638
639  // Write raw bytes, copying them from the given buffer.
640  void WriteRaw(const void* buffer, int size);
641  // Like WriteRaw()  but writing directly to the target array.
642  // This is _not_ inlined, as the compiler often optimizes memcpy into inline
643  // copy loops. Since this gets called by every field with string or bytes
644  // type, inlining may lead to a significant amount of code bloat, with only a
645  // minor performance gain.
646  static uint8* WriteRawToArray(const void* buffer, int size, uint8* target);
647
648  // Equivalent to WriteRaw(str.data(), str.size()).
649  void WriteString(const string& str);
650  // Like WriteString()  but writing directly to the target array.
651  static uint8* WriteStringToArray(const string& str, uint8* target);
652
653
654  // Write a 32-bit little-endian integer.
655  void WriteLittleEndian32(uint32 value);
656  // Like WriteLittleEndian32()  but writing directly to the target array.
657  static uint8* WriteLittleEndian32ToArray(uint32 value, uint8* target);
658  // Write a 64-bit little-endian integer.
659  void WriteLittleEndian64(uint64 value);
660  // Like WriteLittleEndian64()  but writing directly to the target array.
661  static uint8* WriteLittleEndian64ToArray(uint64 value, uint8* target);
662
663  // Write an unsigned integer with Varint encoding.  Writing a 32-bit value
664  // is equivalent to casting it to uint64 and writing it as a 64-bit value,
665  // but may be more efficient.
666  void WriteVarint32(uint32 value);
667  // Like WriteVarint32()  but writing directly to the target array.
668  static uint8* WriteVarint32ToArray(uint32 value, uint8* target);
669  // Write an unsigned integer with Varint encoding.
670  void WriteVarint64(uint64 value);
671  // Like WriteVarint64()  but writing directly to the target array.
672  static uint8* WriteVarint64ToArray(uint64 value, uint8* target);
673
674  // Equivalent to WriteVarint32() except when the value is negative,
675  // in which case it must be sign-extended to a full 10 bytes.
676  void WriteVarint32SignExtended(int32 value);
677  // Like WriteVarint32SignExtended()  but writing directly to the target array.
678  static uint8* WriteVarint32SignExtendedToArray(int32 value, uint8* target);
679
680  // This is identical to WriteVarint32(), but optimized for writing tags.
681  // In particular, if the input is a compile-time constant, this method
682  // compiles down to a couple instructions.
683  // Always inline because otherwise the aformentioned optimization can't work,
684  // but GCC by default doesn't want to inline this.
685  void WriteTag(uint32 value);
686  // Like WriteTag()  but writing directly to the target array.
687  static uint8* WriteTagToArray(
688      uint32 value, uint8* target) GOOGLE_ATTRIBUTE_ALWAYS_INLINE;
689
690  // Returns the number of bytes needed to encode the given value as a varint.
691  static int VarintSize32(uint32 value);
692  // Returns the number of bytes needed to encode the given value as a varint.
693  static int VarintSize64(uint64 value);
694
695  // If negative, 10 bytes.  Otheriwse, same as VarintSize32().
696  static int VarintSize32SignExtended(int32 value);
697
698  // Compile-time equivalent of VarintSize32().
699  template <uint32 Value>
700  struct StaticVarintSize32 {
701    static const int value =
702        (Value < (1 << 7))
703            ? 1
704            : (Value < (1 << 14))
705                ? 2
706                : (Value < (1 << 21))
707                    ? 3
708                    : (Value < (1 << 28))
709                        ? 4
710                        : 5;
711  };
712
713  // Returns the total number of bytes written since this object was created.
714  inline int ByteCount() const;
715
716  // Returns true if there was an underlying I/O error since this object was
717  // created.
718  bool HadError() const { return had_error_; }
719
720 private:
721  GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(CodedOutputStream);
722
723  ZeroCopyOutputStream* output_;
724  uint8* buffer_;
725  int buffer_size_;
726  int total_bytes_;  // Sum of sizes of all buffers seen so far.
727  bool had_error_;   // Whether an error occurred during output.
728
729  // Advance the buffer by a given number of bytes.
730  void Advance(int amount);
731
732  // Called when the buffer runs out to request more data.  Implies an
733  // Advance(buffer_size_).
734  bool Refresh();
735
736  static uint8* WriteVarint32FallbackToArray(uint32 value, uint8* target);
737
738  // Always-inlined versions of WriteVarint* functions so that code can be
739  // reused, while still controlling size. For instance, WriteVarint32ToArray()
740  // should not directly call this: since it is inlined itself, doing so
741  // would greatly increase the size of generated code. Instead, it should call
742  // WriteVarint32FallbackToArray.  Meanwhile, WriteVarint32() is already
743  // out-of-line, so it should just invoke this directly to avoid any extra
744  // function call overhead.
745  static uint8* WriteVarint32FallbackToArrayInline(
746      uint32 value, uint8* target) GOOGLE_ATTRIBUTE_ALWAYS_INLINE;
747  static uint8* WriteVarint64ToArrayInline(
748      uint64 value, uint8* target) GOOGLE_ATTRIBUTE_ALWAYS_INLINE;
749
750  static int VarintSize32Fallback(uint32 value);
751};
752
753// inline methods ====================================================
754// The vast majority of varints are only one byte.  These inline
755// methods optimize for that case.
756
757inline bool CodedInputStream::ReadVarint32(uint32* value) {
758  if (GOOGLE_PREDICT_TRUE(buffer_ < buffer_end_) && *buffer_ < 0x80) {
759    *value = *buffer_;
760    Advance(1);
761    return true;
762  } else {
763    return ReadVarint32Fallback(value);
764  }
765}
766
767inline bool CodedInputStream::ReadVarint64(uint64* value) {
768  if (GOOGLE_PREDICT_TRUE(buffer_ < buffer_end_) && *buffer_ < 0x80) {
769    *value = *buffer_;
770    Advance(1);
771    return true;
772  } else {
773    return ReadVarint64Fallback(value);
774  }
775}
776
777// static
778inline const uint8* CodedInputStream::ReadLittleEndian32FromArray(
779    const uint8* buffer,
780    uint32* value) {
781#if defined(PROTOBUF_LITTLE_ENDIAN)
782  memcpy(value, buffer, sizeof(*value));
783  return buffer + sizeof(*value);
784#else
785  *value = (static_cast<uint32>(buffer[0])      ) |
786           (static_cast<uint32>(buffer[1]) <<  8) |
787           (static_cast<uint32>(buffer[2]) << 16) |
788           (static_cast<uint32>(buffer[3]) << 24);
789  return buffer + sizeof(*value);
790#endif
791}
792// static
793inline const uint8* CodedInputStream::ReadLittleEndian64FromArray(
794    const uint8* buffer,
795    uint64* value) {
796#if defined(PROTOBUF_LITTLE_ENDIAN)
797  memcpy(value, buffer, sizeof(*value));
798  return buffer + sizeof(*value);
799#else
800  uint32 part0 = (static_cast<uint32>(buffer[0])      ) |
801                 (static_cast<uint32>(buffer[1]) <<  8) |
802                 (static_cast<uint32>(buffer[2]) << 16) |
803                 (static_cast<uint32>(buffer[3]) << 24);
804  uint32 part1 = (static_cast<uint32>(buffer[4])      ) |
805                 (static_cast<uint32>(buffer[5]) <<  8) |
806                 (static_cast<uint32>(buffer[6]) << 16) |
807                 (static_cast<uint32>(buffer[7]) << 24);
808  *value = static_cast<uint64>(part0) |
809          (static_cast<uint64>(part1) << 32);
810  return buffer + sizeof(*value);
811#endif
812}
813
814inline bool CodedInputStream::ReadLittleEndian32(uint32* value) {
815#if defined(PROTOBUF_LITTLE_ENDIAN)
816  if (GOOGLE_PREDICT_TRUE(BufferSize() >= static_cast<int>(sizeof(*value)))) {
817    memcpy(value, buffer_, sizeof(*value));
818    Advance(sizeof(*value));
819    return true;
820  } else {
821    return ReadLittleEndian32Fallback(value);
822  }
823#else
824  return ReadLittleEndian32Fallback(value);
825#endif
826}
827
828inline bool CodedInputStream::ReadLittleEndian64(uint64* value) {
829#if defined(PROTOBUF_LITTLE_ENDIAN)
830  if (GOOGLE_PREDICT_TRUE(BufferSize() >= static_cast<int>(sizeof(*value)))) {
831    memcpy(value, buffer_, sizeof(*value));
832    Advance(sizeof(*value));
833    return true;
834  } else {
835    return ReadLittleEndian64Fallback(value);
836  }
837#else
838  return ReadLittleEndian64Fallback(value);
839#endif
840}
841
842inline uint32 CodedInputStream::ReadTag() {
843  if (GOOGLE_PREDICT_TRUE(buffer_ < buffer_end_) && buffer_[0] < 0x80) {
844    last_tag_ = buffer_[0];
845    Advance(1);
846    return last_tag_;
847  } else {
848    last_tag_ = ReadTagFallback();
849    return last_tag_;
850  }
851}
852
853inline bool CodedInputStream::LastTagWas(uint32 expected) {
854  return last_tag_ == expected;
855}
856
857inline bool CodedInputStream::ConsumedEntireMessage() {
858  return legitimate_message_end_;
859}
860
861inline bool CodedInputStream::ExpectTag(uint32 expected) {
862  if (expected < (1 << 7)) {
863    if (GOOGLE_PREDICT_TRUE(buffer_ < buffer_end_) && buffer_[0] == expected) {
864      Advance(1);
865      return true;
866    } else {
867      return false;
868    }
869  } else if (expected < (1 << 14)) {
870    if (GOOGLE_PREDICT_TRUE(BufferSize() >= 2) &&
871        buffer_[0] == static_cast<uint8>(expected | 0x80) &&
872        buffer_[1] == static_cast<uint8>(expected >> 7)) {
873      Advance(2);
874      return true;
875    } else {
876      return false;
877    }
878  } else {
879    // Don't bother optimizing for larger values.
880    return false;
881  }
882}
883
884inline const uint8* CodedInputStream::ExpectTagFromArray(
885    const uint8* buffer, uint32 expected) {
886  if (expected < (1 << 7)) {
887    if (buffer[0] == expected) {
888      return buffer + 1;
889    }
890  } else if (expected < (1 << 14)) {
891    if (buffer[0] == static_cast<uint8>(expected | 0x80) &&
892        buffer[1] == static_cast<uint8>(expected >> 7)) {
893      return buffer + 2;
894    }
895  }
896  return NULL;
897}
898
899inline void CodedInputStream::GetDirectBufferPointerInline(const void** data,
900                                                           int* size) {
901  *data = buffer_;
902  *size = buffer_end_ - buffer_;
903}
904
905inline bool CodedInputStream::ExpectAtEnd() {
906  // If we are at a limit we know no more bytes can be read.  Otherwise, it's
907  // hard to say without calling Refresh(), and we'd rather not do that.
908
909  if (buffer_ == buffer_end_ &&
910      ((buffer_size_after_limit_ != 0) ||
911       (total_bytes_read_ == current_limit_))) {
912    last_tag_ = 0;                   // Pretend we called ReadTag()...
913    legitimate_message_end_ = true;  // ... and it hit EOF.
914    return true;
915  } else {
916    return false;
917  }
918}
919
920inline int CodedInputStream::CurrentPosition() const {
921  return total_bytes_read_ - (BufferSize() + buffer_size_after_limit_);
922}
923
924inline uint8* CodedOutputStream::GetDirectBufferForNBytesAndAdvance(int size) {
925  if (buffer_size_ < size) {
926    return NULL;
927  } else {
928    uint8* result = buffer_;
929    Advance(size);
930    return result;
931  }
932}
933
934inline uint8* CodedOutputStream::WriteVarint32ToArray(uint32 value,
935                                                        uint8* target) {
936  if (value < 0x80) {
937    *target = value;
938    return target + 1;
939  } else {
940    return WriteVarint32FallbackToArray(value, target);
941  }
942}
943
944inline void CodedOutputStream::WriteVarint32SignExtended(int32 value) {
945  if (value < 0) {
946    WriteVarint64(static_cast<uint64>(value));
947  } else {
948    WriteVarint32(static_cast<uint32>(value));
949  }
950}
951
952inline uint8* CodedOutputStream::WriteVarint32SignExtendedToArray(
953    int32 value, uint8* target) {
954  if (value < 0) {
955    return WriteVarint64ToArray(static_cast<uint64>(value), target);
956  } else {
957    return WriteVarint32ToArray(static_cast<uint32>(value), target);
958  }
959}
960
961inline uint8* CodedOutputStream::WriteLittleEndian32ToArray(uint32 value,
962                                                            uint8* target) {
963#if defined(PROTOBUF_LITTLE_ENDIAN)
964  memcpy(target, &value, sizeof(value));
965#else
966  target[0] = static_cast<uint8>(value);
967  target[1] = static_cast<uint8>(value >>  8);
968  target[2] = static_cast<uint8>(value >> 16);
969  target[3] = static_cast<uint8>(value >> 24);
970#endif
971  return target + sizeof(value);
972}
973
974inline uint8* CodedOutputStream::WriteLittleEndian64ToArray(uint64 value,
975                                                            uint8* target) {
976#if defined(PROTOBUF_LITTLE_ENDIAN)
977  memcpy(target, &value, sizeof(value));
978#else
979  uint32 part0 = static_cast<uint32>(value);
980  uint32 part1 = static_cast<uint32>(value >> 32);
981
982  target[0] = static_cast<uint8>(part0);
983  target[1] = static_cast<uint8>(part0 >>  8);
984  target[2] = static_cast<uint8>(part0 >> 16);
985  target[3] = static_cast<uint8>(part0 >> 24);
986  target[4] = static_cast<uint8>(part1);
987  target[5] = static_cast<uint8>(part1 >>  8);
988  target[6] = static_cast<uint8>(part1 >> 16);
989  target[7] = static_cast<uint8>(part1 >> 24);
990#endif
991  return target + sizeof(value);
992}
993
994inline void CodedOutputStream::WriteTag(uint32 value) {
995  WriteVarint32(value);
996}
997
998inline uint8* CodedOutputStream::WriteTagToArray(
999    uint32 value, uint8* target) {
1000  if (value < (1 << 7)) {
1001    target[0] = value;
1002    return target + 1;
1003  } else if (value < (1 << 14)) {
1004    target[0] = static_cast<uint8>(value | 0x80);
1005    target[1] = static_cast<uint8>(value >> 7);
1006    return target + 2;
1007  } else {
1008    return WriteVarint32FallbackToArray(value, target);
1009  }
1010}
1011
1012inline int CodedOutputStream::VarintSize32(uint32 value) {
1013  if (value < (1 << 7)) {
1014    return 1;
1015  } else  {
1016    return VarintSize32Fallback(value);
1017  }
1018}
1019
1020inline int CodedOutputStream::VarintSize32SignExtended(int32 value) {
1021  if (value < 0) {
1022    return 10;     // TODO(kenton):  Make this a symbolic constant.
1023  } else {
1024    return VarintSize32(static_cast<uint32>(value));
1025  }
1026}
1027
1028inline void CodedOutputStream::WriteString(const string& str) {
1029  WriteRaw(str.data(), static_cast<int>(str.size()));
1030}
1031
1032inline uint8* CodedOutputStream::WriteStringToArray(
1033    const string& str, uint8* target) {
1034  return WriteRawToArray(str.data(), static_cast<int>(str.size()), target);
1035}
1036
1037inline int CodedOutputStream::ByteCount() const {
1038  return total_bytes_ - buffer_size_;
1039}
1040
1041inline void CodedInputStream::Advance(int amount) {
1042  buffer_ += amount;
1043}
1044
1045inline void CodedOutputStream::Advance(int amount) {
1046  buffer_ += amount;
1047  buffer_size_ -= amount;
1048}
1049
1050inline void CodedInputStream::SetRecursionLimit(int limit) {
1051  recursion_limit_ = limit;
1052}
1053
1054inline bool CodedInputStream::IncrementRecursionDepth() {
1055  ++recursion_depth_;
1056  return recursion_depth_ <= recursion_limit_;
1057}
1058
1059inline void CodedInputStream::DecrementRecursionDepth() {
1060  if (recursion_depth_ > 0) --recursion_depth_;
1061}
1062
1063inline void CodedInputStream::SetExtensionRegistry(const DescriptorPool* pool,
1064                                                   MessageFactory* factory) {
1065  extension_pool_ = pool;
1066  extension_factory_ = factory;
1067}
1068
1069inline const DescriptorPool* CodedInputStream::GetExtensionPool() {
1070  return extension_pool_;
1071}
1072
1073inline MessageFactory* CodedInputStream::GetExtensionFactory() {
1074  return extension_factory_;
1075}
1076
1077inline int CodedInputStream::BufferSize() const {
1078  return buffer_end_ - buffer_;
1079}
1080
1081inline CodedInputStream::CodedInputStream(ZeroCopyInputStream* input)
1082  : input_(input),
1083    buffer_(NULL),
1084    buffer_end_(NULL),
1085    total_bytes_read_(0),
1086    overflow_bytes_(0),
1087    last_tag_(0),
1088    legitimate_message_end_(false),
1089    aliasing_enabled_(false),
1090    current_limit_(kint32max),
1091    buffer_size_after_limit_(0),
1092    total_bytes_limit_(kDefaultTotalBytesLimit),
1093    total_bytes_warning_threshold_(kDefaultTotalBytesWarningThreshold),
1094    recursion_depth_(0),
1095    recursion_limit_(default_recursion_limit_),
1096    extension_pool_(NULL),
1097    extension_factory_(NULL) {
1098  // Eagerly Refresh() so buffer space is immediately available.
1099  Refresh();
1100}
1101
1102inline CodedInputStream::CodedInputStream(const uint8* buffer, int size)
1103  : input_(NULL),
1104    buffer_(buffer),
1105    buffer_end_(buffer + size),
1106    total_bytes_read_(size),
1107    overflow_bytes_(0),
1108    last_tag_(0),
1109    legitimate_message_end_(false),
1110    aliasing_enabled_(false),
1111    current_limit_(size),
1112    buffer_size_after_limit_(0),
1113    total_bytes_limit_(kDefaultTotalBytesLimit),
1114    total_bytes_warning_threshold_(kDefaultTotalBytesWarningThreshold),
1115    recursion_depth_(0),
1116    recursion_limit_(default_recursion_limit_),
1117    extension_pool_(NULL),
1118    extension_factory_(NULL) {
1119  // Note that setting current_limit_ == size is important to prevent some
1120  // code paths from trying to access input_ and segfaulting.
1121}
1122
1123inline bool CodedInputStream::IsFlat() const {
1124  return input_ == NULL;
1125}
1126
1127}  // namespace io
1128}  // namespace protobuf
1129
1130
1131#if defined(_MSC_VER) && _MSC_VER >= 1300
1132  #pragma runtime_checks("c", restore)
1133#endif  // _MSC_VER
1134
1135}  // namespace google
1136#endif  // GOOGLE_PROTOBUF_IO_CODED_STREAM_H__
1137