1// Protocol Buffers - Google's data interchange format
2// Copyright 2008 Google Inc.  All rights reserved.
3// http://code.google.com/p/protobuf/
4//
5// Redistribution and use in source and binary forms, with or without
6// modification, are permitted provided that the following conditions are
7// met:
8//
9//     * Redistributions of source code must retain the above copyright
10// notice, this list of conditions and the following disclaimer.
11//     * Redistributions in binary form must reproduce the above
12// copyright notice, this list of conditions and the following disclaimer
13// in the documentation and/or other materials provided with the
14// distribution.
15//     * Neither the name of Google Inc. nor the names of its
16// contributors may be used to endorse or promote products derived from
17// this software without specific prior written permission.
18//
19// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30
31// Author: kenton@google.com (Kenton Varda)
32//  Based on original Protocol Buffers design by
33//  Sanjay Ghemawat, Jeff Dean, and others.
34//
35// This file contains the CodedInputStream and CodedOutputStream classes,
36// which wrap a ZeroCopyInputStream or ZeroCopyOutputStream, respectively,
37// and allow you to read or write individual pieces of data in various
38// formats.  In particular, these implement the varint encoding for
39// integers, a simple variable-length encoding in which smaller numbers
40// take fewer bytes.
41//
42// Typically these classes will only be used internally by the protocol
43// buffer library in order to encode and decode protocol buffers.  Clients
44// of the library only need to know about this class if they wish to write
45// custom message parsing or serialization procedures.
46//
47// CodedOutputStream example:
48//   // Write some data to "myfile".  First we write a 4-byte "magic number"
49//   // to identify the file type, then write a length-delimited string.  The
50//   // string is composed of a varint giving the length followed by the raw
51//   // bytes.
52//   int fd = open("myfile", O_WRONLY);
53//   ZeroCopyOutputStream* raw_output = new FileOutputStream(fd);
54//   CodedOutputStream* coded_output = new CodedOutputStream(raw_output);
55//
56//   int magic_number = 1234;
57//   char text[] = "Hello world!";
58//   coded_output->WriteLittleEndian32(magic_number);
59//   coded_output->WriteVarint32(strlen(text));
60//   coded_output->WriteRaw(text, strlen(text));
61//
62//   delete coded_output;
63//   delete raw_output;
64//   close(fd);
65//
66// CodedInputStream example:
67//   // Read a file created by the above code.
68//   int fd = open("myfile", O_RDONLY);
69//   ZeroCopyInputStream* raw_input = new FileInputStream(fd);
70//   CodedInputStream coded_input = new CodedInputStream(raw_input);
71//
72//   coded_input->ReadLittleEndian32(&magic_number);
73//   if (magic_number != 1234) {
74//     cerr << "File not in expected format." << endl;
75//     return;
76//   }
77//
78//   uint32 size;
79//   coded_input->ReadVarint32(&size);
80//
81//   char* text = new char[size + 1];
82//   coded_input->ReadRaw(buffer, size);
83//   text[size] = '\0';
84//
85//   delete coded_input;
86//   delete raw_input;
87//   close(fd);
88//
89//   cout << "Text is: " << text << endl;
90//   delete [] text;
91//
92// For those who are interested, varint encoding is defined as follows:
93//
94// The encoding operates on unsigned integers of up to 64 bits in length.
95// Each byte of the encoded value has the format:
96// * bits 0-6: Seven bits of the number being encoded.
97// * bit 7: Zero if this is the last byte in the encoding (in which
98//   case all remaining bits of the number are zero) or 1 if
99//   more bytes follow.
100// The first byte contains the least-significant 7 bits of the number, the
101// second byte (if present) contains the next-least-significant 7 bits,
102// and so on.  So, the binary number 1011000101011 would be encoded in two
103// bytes as "10101011 00101100".
104//
105// In theory, varint could be used to encode integers of any length.
106// However, for practicality we set a limit at 64 bits.  The maximum encoded
107// length of a number is thus 10 bytes.
108
109#ifndef GOOGLE_PROTOBUF_IO_CODED_STREAM_H__
110#define GOOGLE_PROTOBUF_IO_CODED_STREAM_H__
111
112#include <string>
113#ifndef _MSC_VER
114#include <sys/param.h>
115#endif  // !_MSC_VER
116#include <google/protobuf/stubs/common.h>
117#include <google/protobuf/stubs/common.h>          // for GOOGLE_PREDICT_TRUE macro
118
119namespace google {
120
121namespace protobuf {
122
123class DescriptorPool;
124class MessageFactory;
125
126namespace io {
127
128// Defined in this file.
129class CodedInputStream;
130class CodedOutputStream;
131
132// Defined in other files.
133class ZeroCopyInputStream;           // zero_copy_stream.h
134class ZeroCopyOutputStream;          // zero_copy_stream.h
135
136// Class which reads and decodes binary data which is composed of varint-
137// encoded integers and fixed-width pieces.  Wraps a ZeroCopyInputStream.
138// Most users will not need to deal with CodedInputStream.
139//
140// Most methods of CodedInputStream that return a bool return false if an
141// underlying I/O error occurs or if the data is malformed.  Once such a
142// failure occurs, the CodedInputStream is broken and is no longer useful.
143class LIBPROTOBUF_EXPORT CodedInputStream {
144 public:
145  // Create a CodedInputStream that reads from the given ZeroCopyInputStream.
146  explicit CodedInputStream(ZeroCopyInputStream* input);
147
148  // Create a CodedInputStream that reads from the given flat array.  This is
149  // faster than using an ArrayInputStream.  PushLimit(size) is implied by
150  // this constructor.
151  explicit CodedInputStream(const uint8* buffer, int size);
152
153  // Destroy the CodedInputStream and position the underlying
154  // ZeroCopyInputStream at the first unread byte.  If an error occurred while
155  // reading (causing a method to return false), then the exact position of
156  // the input stream may be anywhere between the last value that was read
157  // successfully and the stream's byte limit.
158  ~CodedInputStream();
159
160
161  // Skips a number of bytes.  Returns false if an underlying read error
162  // occurs.
163  bool Skip(int count);
164
165  // Sets *data to point directly at the unread part of the CodedInputStream's
166  // underlying buffer, and *size to the size of that buffer, but does not
167  // advance the stream's current position.  This will always either produce
168  // a non-empty buffer or return false.  If the caller consumes any of
169  // this data, it should then call Skip() to skip over the consumed bytes.
170  // This may be useful for implementing external fast parsing routines for
171  // types of data not covered by the CodedInputStream interface.
172  bool GetDirectBufferPointer(const void** data, int* size);
173
174  // Like GetDirectBufferPointer, but this method is inlined, and does not
175  // attempt to Refresh() if the buffer is currently empty.
176  inline void GetDirectBufferPointerInline(const void** data,
177                                           int* size) GOOGLE_ATTRIBUTE_ALWAYS_INLINE;
178
179  // Read raw bytes, copying them into the given buffer.
180  bool ReadRaw(void* buffer, int size);
181
182  // Like ReadRaw, but reads into a string.
183  //
184  // Implementation Note:  ReadString() grows the string gradually as it
185  // reads in the data, rather than allocating the entire requested size
186  // upfront.  This prevents denial-of-service attacks in which a client
187  // could claim that a string is going to be MAX_INT bytes long in order to
188  // crash the server because it can't allocate this much space at once.
189  bool ReadString(string* buffer, int size);
190  // Like the above, with inlined optimizations. This should only be used
191  // by the protobuf implementation.
192  inline bool InternalReadStringInline(string* buffer,
193                                       int size) GOOGLE_ATTRIBUTE_ALWAYS_INLINE;
194
195
196  // Read a 32-bit little-endian integer.
197  bool ReadLittleEndian32(uint32* value);
198  // Read a 64-bit little-endian integer.
199  bool ReadLittleEndian64(uint64* value);
200
201  // These methods read from an externally provided buffer. The caller is
202  // responsible for ensuring that the buffer has sufficient space.
203  // Read a 32-bit little-endian integer.
204  static const uint8* ReadLittleEndian32FromArray(const uint8* buffer,
205                                                   uint32* value);
206  // Read a 64-bit little-endian integer.
207  static const uint8* ReadLittleEndian64FromArray(const uint8* buffer,
208                                                   uint64* value);
209
210  // Read an unsigned integer with Varint encoding, truncating to 32 bits.
211  // Reading a 32-bit value is equivalent to reading a 64-bit one and casting
212  // it to uint32, but may be more efficient.
213  bool ReadVarint32(uint32* value);
214  // Read an unsigned integer with Varint encoding.
215  bool ReadVarint64(uint64* value);
216
217  // Read a tag.  This calls ReadVarint32() and returns the result, or returns
218  // zero (which is not a valid tag) if ReadVarint32() fails.  Also, it updates
219  // the last tag value, which can be checked with LastTagWas().
220  // Always inline because this is only called in once place per parse loop
221  // but it is called for every iteration of said loop, so it should be fast.
222  // GCC doesn't want to inline this by default.
223  uint32 ReadTag() GOOGLE_ATTRIBUTE_ALWAYS_INLINE;
224
225  // Usually returns true if calling ReadVarint32() now would produce the given
226  // value.  Will always return false if ReadVarint32() would not return the
227  // given value.  If ExpectTag() returns true, it also advances past
228  // the varint.  For best performance, use a compile-time constant as the
229  // parameter.
230  // Always inline because this collapses to a small number of instructions
231  // when given a constant parameter, but GCC doesn't want to inline by default.
232  bool ExpectTag(uint32 expected) GOOGLE_ATTRIBUTE_ALWAYS_INLINE;
233
234  // Like above, except this reads from the specified buffer. The caller is
235  // responsible for ensuring that the buffer is large enough to read a varint
236  // of the expected size. For best performance, use a compile-time constant as
237  // the expected tag parameter.
238  //
239  // Returns a pointer beyond the expected tag if it was found, or NULL if it
240  // was not.
241  static const uint8* ExpectTagFromArray(
242      const uint8* buffer,
243      uint32 expected) GOOGLE_ATTRIBUTE_ALWAYS_INLINE;
244
245  // Usually returns true if no more bytes can be read.  Always returns false
246  // if more bytes can be read.  If ExpectAtEnd() returns true, a subsequent
247  // call to LastTagWas() will act as if ReadTag() had been called and returned
248  // zero, and ConsumedEntireMessage() will return true.
249  bool ExpectAtEnd();
250
251  // If the last call to ReadTag() returned the given value, returns true.
252  // Otherwise, returns false;
253  //
254  // This is needed because parsers for some types of embedded messages
255  // (with field type TYPE_GROUP) don't actually know that they've reached the
256  // end of a message until they see an ENDGROUP tag, which was actually part
257  // of the enclosing message.  The enclosing message would like to check that
258  // tag to make sure it had the right number, so it calls LastTagWas() on
259  // return from the embedded parser to check.
260  bool LastTagWas(uint32 expected);
261
262  // When parsing message (but NOT a group), this method must be called
263  // immediately after MergeFromCodedStream() returns (if it returns true)
264  // to further verify that the message ended in a legitimate way.  For
265  // example, this verifies that parsing did not end on an end-group tag.
266  // It also checks for some cases where, due to optimizations,
267  // MergeFromCodedStream() can incorrectly return true.
268  bool ConsumedEntireMessage();
269
270  // Limits ----------------------------------------------------------
271  // Limits are used when parsing length-delimited embedded messages.
272  // After the message's length is read, PushLimit() is used to prevent
273  // the CodedInputStream from reading beyond that length.  Once the
274  // embedded message has been parsed, PopLimit() is called to undo the
275  // limit.
276
277  // Opaque type used with PushLimit() and PopLimit().  Do not modify
278  // values of this type yourself.  The only reason that this isn't a
279  // struct with private internals is for efficiency.
280  typedef int Limit;
281
282  // Places a limit on the number of bytes that the stream may read,
283  // starting from the current position.  Once the stream hits this limit,
284  // it will act like the end of the input has been reached until PopLimit()
285  // is called.
286  //
287  // As the names imply, the stream conceptually has a stack of limits.  The
288  // shortest limit on the stack is always enforced, even if it is not the
289  // top limit.
290  //
291  // The value returned by PushLimit() is opaque to the caller, and must
292  // be passed unchanged to the corresponding call to PopLimit().
293  Limit PushLimit(int byte_limit);
294
295  // Pops the last limit pushed by PushLimit().  The input must be the value
296  // returned by that call to PushLimit().
297  void PopLimit(Limit limit);
298
299  // Returns the number of bytes left until the nearest limit on the
300  // stack is hit, or -1 if no limits are in place.
301  int BytesUntilLimit();
302
303  // Total Bytes Limit -----------------------------------------------
304  // To prevent malicious users from sending excessively large messages
305  // and causing integer overflows or memory exhaustion, CodedInputStream
306  // imposes a hard limit on the total number of bytes it will read.
307
308  // Sets the maximum number of bytes that this CodedInputStream will read
309  // before refusing to continue.  To prevent integer overflows in the
310  // protocol buffers implementation, as well as to prevent servers from
311  // allocating enormous amounts of memory to hold parsed messages, the
312  // maximum message length should be limited to the shortest length that
313  // will not harm usability.  The theoretical shortest message that could
314  // cause integer overflows is 512MB.  The default limit is 64MB.  Apps
315  // should set shorter limits if possible.  If warning_threshold is not -1,
316  // a warning will be printed to stderr after warning_threshold bytes are
317  // read.  An error will always be printed to stderr if the limit is
318  // reached.
319  //
320  // This is unrelated to PushLimit()/PopLimit().
321  //
322  // Hint:  If you are reading this because your program is printing a
323  //   warning about dangerously large protocol messages, you may be
324  //   confused about what to do next.  The best option is to change your
325  //   design such that excessively large messages are not necessary.
326  //   For example, try to design file formats to consist of many small
327  //   messages rather than a single large one.  If this is infeasible,
328  //   you will need to increase the limit.  Chances are, though, that
329  //   your code never constructs a CodedInputStream on which the limit
330  //   can be set.  You probably parse messages by calling things like
331  //   Message::ParseFromString().  In this case, you will need to change
332  //   your code to instead construct some sort of ZeroCopyInputStream
333  //   (e.g. an ArrayInputStream), construct a CodedInputStream around
334  //   that, then call Message::ParseFromCodedStream() instead.  Then
335  //   you can adjust the limit.  Yes, it's more work, but you're doing
336  //   something unusual.
337  void SetTotalBytesLimit(int total_bytes_limit, int warning_threshold);
338
339  // Recursion Limit -------------------------------------------------
340  // To prevent corrupt or malicious messages from causing stack overflows,
341  // we must keep track of the depth of recursion when parsing embedded
342  // messages and groups.  CodedInputStream keeps track of this because it
343  // is the only object that is passed down the stack during parsing.
344
345  // Sets the maximum recursion depth.  The default is 64.
346  void SetRecursionLimit(int limit);
347
348  // Increments the current recursion depth.  Returns true if the depth is
349  // under the limit, false if it has gone over.
350  bool IncrementRecursionDepth();
351
352  // Decrements the recursion depth.
353  void DecrementRecursionDepth();
354
355  // Extension Registry ----------------------------------------------
356  // ADVANCED USAGE:  99.9% of people can ignore this section.
357  //
358  // By default, when parsing extensions, the parser looks for extension
359  // definitions in the pool which owns the outer message's Descriptor.
360  // However, you may call SetExtensionRegistry() to provide an alternative
361  // pool instead.  This makes it possible, for example, to parse a message
362  // using a generated class, but represent some extensions using
363  // DynamicMessage.
364
365  // Set the pool used to look up extensions.  Most users do not need to call
366  // this as the correct pool will be chosen automatically.
367  //
368  // WARNING:  It is very easy to misuse this.  Carefully read the requirements
369  //   below.  Do not use this unless you are sure you need it.  Almost no one
370  //   does.
371  //
372  // Let's say you are parsing a message into message object m, and you want
373  // to take advantage of SetExtensionRegistry().  You must follow these
374  // requirements:
375  //
376  // The given DescriptorPool must contain m->GetDescriptor().  It is not
377  // sufficient for it to simply contain a descriptor that has the same name
378  // and content -- it must be the *exact object*.  In other words:
379  //   assert(pool->FindMessageTypeByName(m->GetDescriptor()->full_name()) ==
380  //          m->GetDescriptor());
381  // There are two ways to satisfy this requirement:
382  // 1) Use m->GetDescriptor()->pool() as the pool.  This is generally useless
383  //    because this is the pool that would be used anyway if you didn't call
384  //    SetExtensionRegistry() at all.
385  // 2) Use a DescriptorPool which has m->GetDescriptor()->pool() as an
386  //    "underlay".  Read the documentation for DescriptorPool for more
387  //    information about underlays.
388  //
389  // You must also provide a MessageFactory.  This factory will be used to
390  // construct Message objects representing extensions.  The factory's
391  // GetPrototype() MUST return non-NULL for any Descriptor which can be found
392  // through the provided pool.
393  //
394  // If the provided factory might return instances of protocol-compiler-
395  // generated (i.e. compiled-in) types, or if the outer message object m is
396  // a generated type, then the given factory MUST have this property:  If
397  // GetPrototype() is given a Descriptor which resides in
398  // DescriptorPool::generated_pool(), the factory MUST return the same
399  // prototype which MessageFactory::generated_factory() would return.  That
400  // is, given a descriptor for a generated type, the factory must return an
401  // instance of the generated class (NOT DynamicMessage).  However, when
402  // given a descriptor for a type that is NOT in generated_pool, the factory
403  // is free to return any implementation.
404  //
405  // The reason for this requirement is that generated sub-objects may be
406  // accessed via the standard (non-reflection) extension accessor methods,
407  // and these methods will down-cast the object to the generated class type.
408  // If the object is not actually of that type, the results would be undefined.
409  // On the other hand, if an extension is not compiled in, then there is no
410  // way the code could end up accessing it via the standard accessors -- the
411  // only way to access the extension is via reflection.  When using reflection,
412  // DynamicMessage and generated messages are indistinguishable, so it's fine
413  // if these objects are represented using DynamicMessage.
414  //
415  // Using DynamicMessageFactory on which you have called
416  // SetDelegateToGeneratedFactory(true) should be sufficient to satisfy the
417  // above requirement.
418  //
419  // If either pool or factory is NULL, both must be NULL.
420  //
421  // Note that this feature is ignored when parsing "lite" messages as they do
422  // not have descriptors.
423  void SetExtensionRegistry(DescriptorPool* pool, MessageFactory* factory);
424
425  // Get the DescriptorPool set via SetExtensionRegistry(), or NULL if no pool
426  // has been provided.
427  const DescriptorPool* GetExtensionPool();
428
429  // Get the MessageFactory set via SetExtensionRegistry(), or NULL if no
430  // factory has been provided.
431  MessageFactory* GetExtensionFactory();
432
433 private:
434  GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(CodedInputStream);
435
436  ZeroCopyInputStream* input_;
437  const uint8* buffer_;
438  const uint8* buffer_end_;     // pointer to the end of the buffer.
439  int total_bytes_read_;  // total bytes read from input_, including
440                          // the current buffer
441
442  // If total_bytes_read_ surpasses INT_MAX, we record the extra bytes here
443  // so that we can BackUp() on destruction.
444  int overflow_bytes_;
445
446  // LastTagWas() stuff.
447  uint32 last_tag_;         // result of last ReadTag().
448
449  // This is set true by ReadTag{Fallback/Slow}() if it is called when exactly
450  // at EOF, or by ExpectAtEnd() when it returns true.  This happens when we
451  // reach the end of a message and attempt to read another tag.
452  bool legitimate_message_end_;
453
454  // See EnableAliasing().
455  bool aliasing_enabled_;
456
457  // Limits
458  Limit current_limit_;   // if position = -1, no limit is applied
459
460  // For simplicity, if the current buffer crosses a limit (either a normal
461  // limit created by PushLimit() or the total bytes limit), buffer_size_
462  // only tracks the number of bytes before that limit.  This field
463  // contains the number of bytes after it.  Note that this implies that if
464  // buffer_size_ == 0 and buffer_size_after_limit_ > 0, we know we've
465  // hit a limit.  However, if both are zero, it doesn't necessarily mean
466  // we aren't at a limit -- the buffer may have ended exactly at the limit.
467  int buffer_size_after_limit_;
468
469  // Maximum number of bytes to read, period.  This is unrelated to
470  // current_limit_.  Set using SetTotalBytesLimit().
471  int total_bytes_limit_;
472  int total_bytes_warning_threshold_;
473
474  // Current recursion depth, controlled by IncrementRecursionDepth() and
475  // DecrementRecursionDepth().
476  int recursion_depth_;
477  // Recursion depth limit, set by SetRecursionLimit().
478  int recursion_limit_;
479
480  // See SetExtensionRegistry().
481  const DescriptorPool* extension_pool_;
482  MessageFactory* extension_factory_;
483
484  // Private member functions.
485
486  // Advance the buffer by a given number of bytes.
487  void Advance(int amount);
488
489  // Back up input_ to the current buffer position.
490  void BackUpInputToCurrentPosition();
491
492  // Recomputes the value of buffer_size_after_limit_.  Must be called after
493  // current_limit_ or total_bytes_limit_ changes.
494  void RecomputeBufferLimits();
495
496  // Writes an error message saying that we hit total_bytes_limit_.
497  void PrintTotalBytesLimitError();
498
499  // Called when the buffer runs out to request more data.  Implies an
500  // Advance(BufferSize()).
501  bool Refresh();
502
503  // When parsing varints, we optimize for the common case of small values, and
504  // then optimize for the case when the varint fits within the current buffer
505  // piece. The Fallback method is used when we can't use the one-byte
506  // optimization. The Slow method is yet another fallback when the buffer is
507  // not large enough. Making the slow path out-of-line speeds up the common
508  // case by 10-15%. The slow path is fairly uncommon: it only triggers when a
509  // message crosses multiple buffers.
510  bool ReadVarint32Fallback(uint32* value);
511  bool ReadVarint64Fallback(uint64* value);
512  bool ReadVarint32Slow(uint32* value);
513  bool ReadVarint64Slow(uint64* value);
514  bool ReadLittleEndian32Fallback(uint32* value);
515  bool ReadLittleEndian64Fallback(uint64* value);
516  // Fallback/slow methods for reading tags. These do not update last_tag_,
517  // but will set legitimate_message_end_ if we are at the end of the input
518  // stream.
519  uint32 ReadTagFallback();
520  uint32 ReadTagSlow();
521  bool ReadStringFallback(string* buffer, int size);
522
523  // Return the size of the buffer.
524  uint32 BufferSize() const;
525
526  static const int kDefaultTotalBytesLimit = 64 << 20;  // 64MB
527
528  static const int kDefaultTotalBytesWarningThreshold = 32 << 20;  // 32MB
529  static const int kDefaultRecursionLimit = 64;
530};
531
532// Class which encodes and writes binary data which is composed of varint-
533// encoded integers and fixed-width pieces.  Wraps a ZeroCopyOutputStream.
534// Most users will not need to deal with CodedOutputStream.
535//
536// Most methods of CodedOutputStream which return a bool return false if an
537// underlying I/O error occurs.  Once such a failure occurs, the
538// CodedOutputStream is broken and is no longer useful. The Write* methods do
539// not return the stream status, but will invalidate the stream if an error
540// occurs. The client can probe HadError() to determine the status.
541//
542// Note that every method of CodedOutputStream which writes some data has
543// a corresponding static "ToArray" version. These versions write directly
544// to the provided buffer, returning a pointer past the last written byte.
545// They require that the buffer has sufficient capacity for the encoded data.
546// This allows an optimization where we check if an output stream has enough
547// space for an entire message before we start writing and, if there is, we
548// call only the ToArray methods to avoid doing bound checks for each
549// individual value.
550// i.e., in the example above:
551//
552//   CodedOutputStream coded_output = new CodedOutputStream(raw_output);
553//   int magic_number = 1234;
554//   char text[] = "Hello world!";
555//
556//   int coded_size = sizeof(magic_number) +
557//                    CodedOutputStream::Varint32Size(strlen(text)) +
558//                    strlen(text);
559//
560//   uint8* buffer =
561//       coded_output->GetDirectBufferForNBytesAndAdvance(coded_size);
562//   if (buffer != NULL) {
563//     // The output stream has enough space in the buffer: write directly to
564//     // the array.
565//     buffer = CodedOutputStream::WriteLittleEndian32ToArray(magic_number,
566//                                                            buffer);
567//     buffer = CodedOutputStream::WriteVarint32ToArray(strlen(text), buffer);
568//     buffer = CodedOutputStream::WriteRawToArray(text, strlen(text), buffer);
569//   } else {
570//     // Make bound-checked writes, which will ask the underlying stream for
571//     // more space as needed.
572//     coded_output->WriteLittleEndian32(magic_number);
573//     coded_output->WriteVarint32(strlen(text));
574//     coded_output->WriteRaw(text, strlen(text));
575//   }
576//
577//   delete coded_output;
578class LIBPROTOBUF_EXPORT CodedOutputStream {
579 public:
580  // Create an CodedOutputStream that writes to the given ZeroCopyOutputStream.
581  explicit CodedOutputStream(ZeroCopyOutputStream* output);
582
583  // Destroy the CodedOutputStream and position the underlying
584  // ZeroCopyOutputStream immediately after the last byte written.
585  ~CodedOutputStream();
586
587  // Skips a number of bytes, leaving the bytes unmodified in the underlying
588  // buffer.  Returns false if an underlying write error occurs.  This is
589  // mainly useful with GetDirectBufferPointer().
590  bool Skip(int count);
591
592  // Sets *data to point directly at the unwritten part of the
593  // CodedOutputStream's underlying buffer, and *size to the size of that
594  // buffer, but does not advance the stream's current position.  This will
595  // always either produce a non-empty buffer or return false.  If the caller
596  // writes any data to this buffer, it should then call Skip() to skip over
597  // the consumed bytes.  This may be useful for implementing external fast
598  // serialization routines for types of data not covered by the
599  // CodedOutputStream interface.
600  bool GetDirectBufferPointer(void** data, int* size);
601
602  // If there are at least "size" bytes available in the current buffer,
603  // returns a pointer directly into the buffer and advances over these bytes.
604  // The caller may then write directly into this buffer (e.g. using the
605  // *ToArray static methods) rather than go through CodedOutputStream.  If
606  // there are not enough bytes available, returns NULL.  The return pointer is
607  // invalidated as soon as any other non-const method of CodedOutputStream
608  // is called.
609  inline uint8* GetDirectBufferForNBytesAndAdvance(int size);
610
611  // Write raw bytes, copying them from the given buffer.
612  void WriteRaw(const void* buffer, int size);
613  // Like WriteRaw()  but writing directly to the target array.
614  // This is _not_ inlined, as the compiler often optimizes memcpy into inline
615  // copy loops. Since this gets called by every field with string or bytes
616  // type, inlining may lead to a significant amount of code bloat, with only a
617  // minor performance gain.
618  static uint8* WriteRawToArray(const void* buffer, int size, uint8* target);
619
620  // Equivalent to WriteRaw(str.data(), str.size()).
621  void WriteString(const string& str);
622  // Like WriteString()  but writing directly to the target array.
623  static uint8* WriteStringToArray(const string& str, uint8* target);
624
625
626  // Write a 32-bit little-endian integer.
627  void WriteLittleEndian32(uint32 value);
628  // Like WriteLittleEndian32()  but writing directly to the target array.
629  static uint8* WriteLittleEndian32ToArray(uint32 value, uint8* target);
630  // Write a 64-bit little-endian integer.
631  void WriteLittleEndian64(uint64 value);
632  // Like WriteLittleEndian64()  but writing directly to the target array.
633  static uint8* WriteLittleEndian64ToArray(uint64 value, uint8* target);
634
635  // Write an unsigned integer with Varint encoding.  Writing a 32-bit value
636  // is equivalent to casting it to uint64 and writing it as a 64-bit value,
637  // but may be more efficient.
638  void WriteVarint32(uint32 value);
639  // Like WriteVarint32()  but writing directly to the target array.
640  static uint8* WriteVarint32ToArray(uint32 value, uint8* target);
641  // Write an unsigned integer with Varint encoding.
642  void WriteVarint64(uint64 value);
643  // Like WriteVarint64()  but writing directly to the target array.
644  static uint8* WriteVarint64ToArray(uint64 value, uint8* target);
645
646  // Equivalent to WriteVarint32() except when the value is negative,
647  // in which case it must be sign-extended to a full 10 bytes.
648  void WriteVarint32SignExtended(int32 value);
649  // Like WriteVarint32SignExtended()  but writing directly to the target array.
650  static uint8* WriteVarint32SignExtendedToArray(int32 value, uint8* target);
651
652  // This is identical to WriteVarint32(), but optimized for writing tags.
653  // In particular, if the input is a compile-time constant, this method
654  // compiles down to a couple instructions.
655  // Always inline because otherwise the aformentioned optimization can't work,
656  // but GCC by default doesn't want to inline this.
657  void WriteTag(uint32 value);
658  // Like WriteTag()  but writing directly to the target array.
659  static uint8* WriteTagToArray(
660      uint32 value, uint8* target) GOOGLE_ATTRIBUTE_ALWAYS_INLINE;
661
662  // Returns the number of bytes needed to encode the given value as a varint.
663  static int VarintSize32(uint32 value);
664  // Returns the number of bytes needed to encode the given value as a varint.
665  static int VarintSize64(uint64 value);
666
667  // If negative, 10 bytes.  Otheriwse, same as VarintSize32().
668  static int VarintSize32SignExtended(int32 value);
669
670  // Returns the total number of bytes written since this object was created.
671  inline int ByteCount() const;
672
673  // Returns true if there was an underlying I/O error since this object was
674  // created.
675  bool HadError() const { return had_error_; }
676
677 private:
678  GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(CodedOutputStream);
679
680  ZeroCopyOutputStream* output_;
681  uint8* buffer_;
682  int buffer_size_;
683  int total_bytes_;  // Sum of sizes of all buffers seen so far.
684  bool had_error_;   // Whether an error occurred during output.
685
686  // Advance the buffer by a given number of bytes.
687  void Advance(int amount);
688
689  // Called when the buffer runs out to request more data.  Implies an
690  // Advance(buffer_size_).
691  bool Refresh();
692
693  static uint8* WriteVarint32FallbackToArray(uint32 value, uint8* target);
694
695  // Always-inlined versions of WriteVarint* functions so that code can be
696  // reused, while still controlling size. For instance, WriteVarint32ToArray()
697  // should not directly call this: since it is inlined itself, doing so
698  // would greatly increase the size of generated code. Instead, it should call
699  // WriteVarint32FallbackToArray.  Meanwhile, WriteVarint32() is already
700  // out-of-line, so it should just invoke this directly to avoid any extra
701  // function call overhead.
702  static uint8* WriteVarint32FallbackToArrayInline(
703      uint32 value, uint8* target) GOOGLE_ATTRIBUTE_ALWAYS_INLINE;
704  static uint8* WriteVarint64ToArrayInline(
705      uint64 value, uint8* target) GOOGLE_ATTRIBUTE_ALWAYS_INLINE;
706
707  static int VarintSize32Fallback(uint32 value);
708};
709
710// inline methods ====================================================
711// The vast majority of varints are only one byte.  These inline
712// methods optimize for that case.
713
714inline bool CodedInputStream::ReadVarint32(uint32* value) {
715  if (GOOGLE_PREDICT_TRUE(buffer_ < buffer_end_) && *buffer_ < 0x80) {
716    *value = *buffer_;
717    Advance(1);
718    return true;
719  } else {
720    return ReadVarint32Fallback(value);
721  }
722}
723
724inline bool CodedInputStream::ReadVarint64(uint64* value) {
725  if (GOOGLE_PREDICT_TRUE(buffer_ < buffer_end_) && *buffer_ < 0x80) {
726    *value = *buffer_;
727    Advance(1);
728    return true;
729  } else {
730    return ReadVarint64Fallback(value);
731  }
732}
733
734// static
735inline const uint8* CodedInputStream::ReadLittleEndian32FromArray(
736    const uint8* buffer,
737    uint32* value) {
738#if !defined(PROTOBUF_DISABLE_LITTLE_ENDIAN_OPT_FOR_TEST) && \
739    defined(__BYTE_ORDER) && __BYTE_ORDER == __LITTLE_ENDIAN
740  memcpy(value, buffer, sizeof(*value));
741  return buffer + sizeof(*value);
742#else
743  *value = (static_cast<uint32>(buffer[0])      ) |
744           (static_cast<uint32>(buffer[1]) <<  8) |
745           (static_cast<uint32>(buffer[2]) << 16) |
746           (static_cast<uint32>(buffer[3]) << 24);
747  return buffer + sizeof(*value);
748#endif
749}
750// static
751inline const uint8* CodedInputStream::ReadLittleEndian64FromArray(
752    const uint8* buffer,
753    uint64* value) {
754#if !defined(PROTOBUF_DISABLE_LITTLE_ENDIAN_OPT_FOR_TEST) && \
755    defined(__BYTE_ORDER) && __BYTE_ORDER == __LITTLE_ENDIAN
756  memcpy(value, buffer, sizeof(*value));
757  return buffer + sizeof(*value);
758#else
759  uint32 part0 = (static_cast<uint32>(buffer[0])      ) |
760                 (static_cast<uint32>(buffer[1]) <<  8) |
761                 (static_cast<uint32>(buffer[2]) << 16) |
762                 (static_cast<uint32>(buffer[3]) << 24);
763  uint32 part1 = (static_cast<uint32>(buffer[4])      ) |
764                 (static_cast<uint32>(buffer[5]) <<  8) |
765                 (static_cast<uint32>(buffer[6]) << 16) |
766                 (static_cast<uint32>(buffer[7]) << 24);
767  *value = static_cast<uint64>(part0) |
768          (static_cast<uint64>(part1) << 32);
769  return buffer + sizeof(*value);
770#endif
771}
772
773inline bool CodedInputStream::ReadLittleEndian32(uint32* value) {
774#if !defined(PROTOBUF_DISABLE_LITTLE_ENDIAN_OPT_FOR_TEST) && \
775    defined(__BYTE_ORDER) && __BYTE_ORDER == __LITTLE_ENDIAN
776  if (GOOGLE_PREDICT_TRUE(BufferSize() >= sizeof(*value))) {
777    memcpy(value, buffer_, sizeof(*value));
778    Advance(sizeof(*value));
779    return true;
780  } else {
781    return ReadLittleEndian32Fallback(value);
782  }
783#else
784  return ReadLittleEndian32Fallback(value);
785#endif
786}
787
788inline bool CodedInputStream::ReadLittleEndian64(uint64* value) {
789#if !defined(PROTOBUF_DISABLE_LITTLE_ENDIAN_OPT_FOR_TEST) && \
790    defined(__BYTE_ORDER) && __BYTE_ORDER == __LITTLE_ENDIAN
791  if (GOOGLE_PREDICT_TRUE(BufferSize() >= sizeof(*value))) {
792    memcpy(value, buffer_, sizeof(*value));
793    Advance(sizeof(*value));
794    return true;
795  } else {
796    return ReadLittleEndian64Fallback(value);
797  }
798#else
799  return ReadLittleEndian64Fallback(value);
800#endif
801}
802
803inline uint32 CodedInputStream::ReadTag() {
804  if (GOOGLE_PREDICT_TRUE(buffer_ < buffer_end_) && buffer_[0] < 0x80) {
805    last_tag_ = buffer_[0];
806    Advance(1);
807    return last_tag_;
808  } else {
809    last_tag_ = ReadTagFallback();
810    return last_tag_;
811  }
812}
813
814inline bool CodedInputStream::LastTagWas(uint32 expected) {
815  return last_tag_ == expected;
816}
817
818inline bool CodedInputStream::ConsumedEntireMessage() {
819  return legitimate_message_end_;
820}
821
822inline bool CodedInputStream::ExpectTag(uint32 expected) {
823  if (expected < (1 << 7)) {
824    if (GOOGLE_PREDICT_TRUE(buffer_ < buffer_end_) && buffer_[0] == expected) {
825      Advance(1);
826      return true;
827    } else {
828      return false;
829    }
830  } else if (expected < (1 << 14)) {
831    if (GOOGLE_PREDICT_TRUE(BufferSize() >= 2) &&
832        buffer_[0] == static_cast<uint8>(expected | 0x80) &&
833        buffer_[1] == static_cast<uint8>(expected >> 7)) {
834      Advance(2);
835      return true;
836    } else {
837      return false;
838    }
839  } else {
840    // Don't bother optimizing for larger values.
841    return false;
842  }
843}
844
845inline const uint8* CodedInputStream::ExpectTagFromArray(
846    const uint8* buffer, uint32 expected) {
847  if (expected < (1 << 7)) {
848    if (buffer[0] == expected) {
849      return buffer + 1;
850    }
851  } else if (expected < (1 << 14)) {
852    if (buffer[0] == static_cast<uint8>(expected | 0x80) &&
853        buffer[1] == static_cast<uint8>(expected >> 7)) {
854      return buffer + 2;
855    }
856  }
857  return NULL;
858}
859
860inline void CodedInputStream::GetDirectBufferPointerInline(const void** data,
861                                                           int* size) {
862  *data = buffer_;
863  *size = buffer_end_ - buffer_;
864}
865
866inline bool CodedInputStream::ExpectAtEnd() {
867  // If we are at a limit we know no more bytes can be read.  Otherwise, it's
868  // hard to say without calling Refresh(), and we'd rather not do that.
869
870  if (buffer_ == buffer_end_ && buffer_size_after_limit_ != 0) {
871    last_tag_ = 0;                   // Pretend we called ReadTag()...
872    legitimate_message_end_ = true;  // ... and it hit EOF.
873    return true;
874  } else {
875    return false;
876  }
877}
878
879inline uint8* CodedOutputStream::GetDirectBufferForNBytesAndAdvance(int size) {
880  if (buffer_size_ < size) {
881    return NULL;
882  } else {
883    uint8* result = buffer_;
884    Advance(size);
885    return result;
886  }
887}
888
889inline uint8* CodedOutputStream::WriteVarint32ToArray(uint32 value,
890                                                        uint8* target) {
891  if (value < 0x80) {
892    *target = value;
893    return target + 1;
894  } else {
895    return WriteVarint32FallbackToArray(value, target);
896  }
897}
898
899inline void CodedOutputStream::WriteVarint32SignExtended(int32 value) {
900  if (value < 0) {
901    WriteVarint64(static_cast<uint64>(value));
902  } else {
903    WriteVarint32(static_cast<uint32>(value));
904  }
905}
906
907inline uint8* CodedOutputStream::WriteVarint32SignExtendedToArray(
908    int32 value, uint8* target) {
909  if (value < 0) {
910    return WriteVarint64ToArray(static_cast<uint64>(value), target);
911  } else {
912    return WriteVarint32ToArray(static_cast<uint32>(value), target);
913  }
914}
915
916inline uint8* CodedOutputStream::WriteLittleEndian32ToArray(uint32 value,
917                                                            uint8* target) {
918#if !defined(PROTOBUF_DISABLE_LITTLE_ENDIAN_OPT_FOR_TEST) && \
919    defined(__BYTE_ORDER) && __BYTE_ORDER == __LITTLE_ENDIAN
920  memcpy(target, &value, sizeof(value));
921#else
922  target[0] = static_cast<uint8>(value);
923  target[1] = static_cast<uint8>(value >>  8);
924  target[2] = static_cast<uint8>(value >> 16);
925  target[3] = static_cast<uint8>(value >> 24);
926#endif
927  return target + sizeof(value);
928}
929
930inline uint8* CodedOutputStream::WriteLittleEndian64ToArray(uint64 value,
931                                                            uint8* target) {
932#if !defined(PROTOBUF_DISABLE_LITTLE_ENDIAN_OPT_FOR_TEST) && \
933    defined(__BYTE_ORDER) && __BYTE_ORDER == __LITTLE_ENDIAN
934  memcpy(target, &value, sizeof(value));
935#else
936  uint32 part0 = static_cast<uint32>(value);
937  uint32 part1 = static_cast<uint32>(value >> 32);
938
939  target[0] = static_cast<uint8>(part0);
940  target[1] = static_cast<uint8>(part0 >>  8);
941  target[2] = static_cast<uint8>(part0 >> 16);
942  target[3] = static_cast<uint8>(part0 >> 24);
943  target[4] = static_cast<uint8>(part1);
944  target[5] = static_cast<uint8>(part1 >>  8);
945  target[6] = static_cast<uint8>(part1 >> 16);
946  target[7] = static_cast<uint8>(part1 >> 24);
947#endif
948  return target + sizeof(value);
949}
950
951inline void CodedOutputStream::WriteTag(uint32 value) {
952  WriteVarint32(value);
953}
954
955inline uint8* CodedOutputStream::WriteTagToArray(
956    uint32 value, uint8* target) {
957  if (value < (1 << 7)) {
958    target[0] = value;
959    return target + 1;
960  } else if (value < (1 << 14)) {
961    target[0] = static_cast<uint8>(value | 0x80);
962    target[1] = static_cast<uint8>(value >> 7);
963    return target + 2;
964  } else {
965    return WriteVarint32FallbackToArray(value, target);
966  }
967}
968
969inline int CodedOutputStream::VarintSize32(uint32 value) {
970  if (value < (1 << 7)) {
971    return 1;
972  } else  {
973    return VarintSize32Fallback(value);
974  }
975}
976
977inline int CodedOutputStream::VarintSize32SignExtended(int32 value) {
978  if (value < 0) {
979    return 10;     // TODO(kenton):  Make this a symbolic constant.
980  } else {
981    return VarintSize32(static_cast<uint32>(value));
982  }
983}
984
985inline void CodedOutputStream::WriteString(const string& str) {
986  WriteRaw(str.data(), str.size());
987}
988
989inline uint8* CodedOutputStream::WriteStringToArray(
990    const string& str, uint8* target) {
991  return WriteRawToArray(str.data(), str.size(), target);
992}
993
994inline int CodedOutputStream::ByteCount() const {
995  return total_bytes_ - buffer_size_;
996}
997
998inline void CodedInputStream::Advance(int amount) {
999  buffer_ += amount;
1000}
1001
1002inline void CodedOutputStream::Advance(int amount) {
1003  buffer_ += amount;
1004  buffer_size_ -= amount;
1005}
1006
1007inline void CodedInputStream::SetRecursionLimit(int limit) {
1008  recursion_limit_ = limit;
1009}
1010
1011inline bool CodedInputStream::IncrementRecursionDepth() {
1012  ++recursion_depth_;
1013  return recursion_depth_ <= recursion_limit_;
1014}
1015
1016inline void CodedInputStream::DecrementRecursionDepth() {
1017  if (recursion_depth_ > 0) --recursion_depth_;
1018}
1019
1020inline void CodedInputStream::SetExtensionRegistry(DescriptorPool* pool,
1021                                                   MessageFactory* factory) {
1022  extension_pool_ = pool;
1023  extension_factory_ = factory;
1024}
1025
1026inline const DescriptorPool* CodedInputStream::GetExtensionPool() {
1027  return extension_pool_;
1028}
1029
1030inline MessageFactory* CodedInputStream::GetExtensionFactory() {
1031  return extension_factory_;
1032}
1033
1034inline uint32 CodedInputStream::BufferSize() const {
1035  return buffer_end_ - buffer_;
1036}
1037
1038inline CodedInputStream::CodedInputStream(ZeroCopyInputStream* input)
1039  : input_(input),
1040    buffer_(NULL),
1041    buffer_end_(NULL),
1042    total_bytes_read_(0),
1043    overflow_bytes_(0),
1044    last_tag_(0),
1045    legitimate_message_end_(false),
1046    aliasing_enabled_(false),
1047    current_limit_(INT_MAX),
1048    buffer_size_after_limit_(0),
1049    total_bytes_limit_(kDefaultTotalBytesLimit),
1050    total_bytes_warning_threshold_(kDefaultTotalBytesWarningThreshold),
1051    recursion_depth_(0),
1052    recursion_limit_(kDefaultRecursionLimit),
1053    extension_pool_(NULL),
1054    extension_factory_(NULL) {
1055  // Eagerly Refresh() so buffer space is immediately available.
1056  Refresh();
1057}
1058
1059inline CodedInputStream::CodedInputStream(const uint8* buffer, int size)
1060  : input_(NULL),
1061    buffer_(buffer),
1062    buffer_end_(buffer + size),
1063    total_bytes_read_(size),
1064    overflow_bytes_(0),
1065    last_tag_(0),
1066    legitimate_message_end_(false),
1067    aliasing_enabled_(false),
1068    current_limit_(size),
1069    buffer_size_after_limit_(0),
1070    total_bytes_limit_(kDefaultTotalBytesLimit),
1071    total_bytes_warning_threshold_(kDefaultTotalBytesWarningThreshold),
1072    recursion_depth_(0),
1073    recursion_limit_(kDefaultRecursionLimit),
1074    extension_pool_(NULL),
1075    extension_factory_(NULL) {
1076  // Note that setting current_limit_ == size is important to prevent some
1077  // code paths from trying to access input_ and segfaulting.
1078}
1079
1080inline CodedInputStream::~CodedInputStream() {
1081  if (input_ != NULL) {
1082    BackUpInputToCurrentPosition();
1083  }
1084}
1085
1086}  // namespace io
1087}  // namespace protobuf
1088
1089}  // namespace google
1090#endif  // GOOGLE_PROTOBUF_IO_CODED_STREAM_H__
1091