1# Protocol Buffers - Google's data interchange format
2# Copyright 2008 Google Inc.  All rights reserved.
3# http://code.google.com/p/protobuf/
4#
5# Redistribution and use in source and binary forms, with or without
6# modification, are permitted provided that the following conditions are
7# met:
8#
9#     * Redistributions of source code must retain the above copyright
10# notice, this list of conditions and the following disclaimer.
11#     * Redistributions in binary form must reproduce the above
12# copyright notice, this list of conditions and the following disclaimer
13# in the documentation and/or other materials provided with the
14# distribution.
15#     * Neither the name of Google Inc. nor the names of its
16# contributors may be used to endorse or promote products derived from
17# this software without specific prior written permission.
18#
19# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30
31"""Code for decoding protocol buffer primitives.
32
33This code is very similar to encoder.py -- read the docs for that module first.
34
35A "decoder" is a function with the signature:
36  Decode(buffer, pos, end, message, field_dict)
37The arguments are:
38  buffer:     The string containing the encoded message.
39  pos:        The current position in the string.
40  end:        The position in the string where the current message ends.  May be
41              less than len(buffer) if we're reading a sub-message.
42  message:    The message object into which we're parsing.
43  field_dict: message._fields (avoids a hashtable lookup).
44The decoder reads the field and stores it into field_dict, returning the new
45buffer position.  A decoder for a repeated field may proactively decode all of
46the elements of that field, if they appear consecutively.
47
48Note that decoders may throw any of the following:
49  IndexError:  Indicates a truncated message.
50  struct.error:  Unpacking of a fixed-width field failed.
51  message.DecodeError:  Other errors.
52
53Decoders are expected to raise an exception if they are called with pos > end.
54This allows callers to be lax about bounds checking:  it's fineto read past
55"end" as long as you are sure that someone else will notice and throw an
56exception later on.
57
58Something up the call stack is expected to catch IndexError and struct.error
59and convert them to message.DecodeError.
60
61Decoders are constructed using decoder constructors with the signature:
62  MakeDecoder(field_number, is_repeated, is_packed, key, new_default)
63The arguments are:
64  field_number:  The field number of the field we want to decode.
65  is_repeated:   Is the field a repeated field? (bool)
66  is_packed:     Is the field a packed field? (bool)
67  key:           The key to use when looking up the field within field_dict.
68                 (This is actually the FieldDescriptor but nothing in this
69                 file should depend on that.)
70  new_default:   A function which takes a message object as a parameter and
71                 returns a new instance of the default value for this field.
72                 (This is called for repeated fields and sub-messages, when an
73                 instance does not already exist.)
74
75As with encoders, we define a decoder constructor for every type of field.
76Then, for every field of every message class we construct an actual decoder.
77That decoder goes into a dict indexed by tag, so when we decode a message
78we repeatedly read a tag, look up the corresponding decoder, and invoke it.
79"""
80
81__author__ = 'kenton@google.com (Kenton Varda)'
82
83import struct
84from google.protobuf.internal import encoder
85from google.protobuf.internal import wire_format
86from google.protobuf import message
87
88
89# This will overflow and thus become IEEE-754 "infinity".  We would use
90# "float('inf')" but it doesn't work on Windows pre-Python-2.6.
91_POS_INF = 1e10000
92_NEG_INF = -_POS_INF
93_NAN = _POS_INF * 0
94
95
96# This is not for optimization, but rather to avoid conflicts with local
97# variables named "message".
98_DecodeError = message.DecodeError
99
100
101def _VarintDecoder(mask):
102  """Return an encoder for a basic varint value (does not include tag).
103
104  Decoded values will be bitwise-anded with the given mask before being
105  returned, e.g. to limit them to 32 bits.  The returned decoder does not
106  take the usual "end" parameter -- the caller is expected to do bounds checking
107  after the fact (often the caller can defer such checking until later).  The
108  decoder returns a (value, new_pos) pair.
109  """
110
111  local_ord = ord
112  def DecodeVarint(buffer, pos):
113    result = 0
114    shift = 0
115    while 1:
116      b = local_ord(buffer[pos])
117      result |= ((b & 0x7f) << shift)
118      pos += 1
119      if not (b & 0x80):
120        result &= mask
121        return (result, pos)
122      shift += 7
123      if shift >= 64:
124        raise _DecodeError('Too many bytes when decoding varint.')
125  return DecodeVarint
126
127
128def _SignedVarintDecoder(mask):
129  """Like _VarintDecoder() but decodes signed values."""
130
131  local_ord = ord
132  def DecodeVarint(buffer, pos):
133    result = 0
134    shift = 0
135    while 1:
136      b = local_ord(buffer[pos])
137      result |= ((b & 0x7f) << shift)
138      pos += 1
139      if not (b & 0x80):
140        if result > 0x7fffffffffffffff:
141          result -= (1 << 64)
142          result |= ~mask
143        else:
144          result &= mask
145        return (result, pos)
146      shift += 7
147      if shift >= 64:
148        raise _DecodeError('Too many bytes when decoding varint.')
149  return DecodeVarint
150
151
152_DecodeVarint = _VarintDecoder((1 << 64) - 1)
153_DecodeSignedVarint = _SignedVarintDecoder((1 << 64) - 1)
154
155# Use these versions for values which must be limited to 32 bits.
156_DecodeVarint32 = _VarintDecoder((1 << 32) - 1)
157_DecodeSignedVarint32 = _SignedVarintDecoder((1 << 32) - 1)
158
159
160def ReadTag(buffer, pos):
161  """Read a tag from the buffer, and return a (tag_bytes, new_pos) tuple.
162
163  We return the raw bytes of the tag rather than decoding them.  The raw
164  bytes can then be used to look up the proper decoder.  This effectively allows
165  us to trade some work that would be done in pure-python (decoding a varint)
166  for work that is done in C (searching for a byte string in a hash table).
167  In a low-level language it would be much cheaper to decode the varint and
168  use that, but not in Python.
169  """
170
171  start = pos
172  while ord(buffer[pos]) & 0x80:
173    pos += 1
174  pos += 1
175  return (buffer[start:pos], pos)
176
177
178# --------------------------------------------------------------------
179
180
181def _SimpleDecoder(wire_type, decode_value):
182  """Return a constructor for a decoder for fields of a particular type.
183
184  Args:
185      wire_type:  The field's wire type.
186      decode_value:  A function which decodes an individual value, e.g.
187        _DecodeVarint()
188  """
189
190  def SpecificDecoder(field_number, is_repeated, is_packed, key, new_default):
191    if is_packed:
192      local_DecodeVarint = _DecodeVarint
193      def DecodePackedField(buffer, pos, end, message, field_dict):
194        value = field_dict.get(key)
195        if value is None:
196          value = field_dict.setdefault(key, new_default(message))
197        (endpoint, pos) = local_DecodeVarint(buffer, pos)
198        endpoint += pos
199        if endpoint > end:
200          raise _DecodeError('Truncated message.')
201        while pos < endpoint:
202          (element, pos) = decode_value(buffer, pos)
203          value.append(element)
204        if pos > endpoint:
205          del value[-1]   # Discard corrupt value.
206          raise _DecodeError('Packed element was truncated.')
207        return pos
208      return DecodePackedField
209    elif is_repeated:
210      tag_bytes = encoder.TagBytes(field_number, wire_type)
211      tag_len = len(tag_bytes)
212      def DecodeRepeatedField(buffer, pos, end, message, field_dict):
213        value = field_dict.get(key)
214        if value is None:
215          value = field_dict.setdefault(key, new_default(message))
216        while 1:
217          (element, new_pos) = decode_value(buffer, pos)
218          value.append(element)
219          # Predict that the next tag is another copy of the same repeated
220          # field.
221          pos = new_pos + tag_len
222          if buffer[new_pos:pos] != tag_bytes or new_pos >= end:
223            # Prediction failed.  Return.
224            if new_pos > end:
225              raise _DecodeError('Truncated message.')
226            return new_pos
227      return DecodeRepeatedField
228    else:
229      def DecodeField(buffer, pos, end, message, field_dict):
230        (field_dict[key], pos) = decode_value(buffer, pos)
231        if pos > end:
232          del field_dict[key]  # Discard corrupt value.
233          raise _DecodeError('Truncated message.')
234        return pos
235      return DecodeField
236
237  return SpecificDecoder
238
239
240def _ModifiedDecoder(wire_type, decode_value, modify_value):
241  """Like SimpleDecoder but additionally invokes modify_value on every value
242  before storing it.  Usually modify_value is ZigZagDecode.
243  """
244
245  # Reusing _SimpleDecoder is slightly slower than copying a bunch of code, but
246  # not enough to make a significant difference.
247
248  def InnerDecode(buffer, pos):
249    (result, new_pos) = decode_value(buffer, pos)
250    return (modify_value(result), new_pos)
251  return _SimpleDecoder(wire_type, InnerDecode)
252
253
254def _StructPackDecoder(wire_type, format):
255  """Return a constructor for a decoder for a fixed-width field.
256
257  Args:
258      wire_type:  The field's wire type.
259      format:  The format string to pass to struct.unpack().
260  """
261
262  value_size = struct.calcsize(format)
263  local_unpack = struct.unpack
264
265  # Reusing _SimpleDecoder is slightly slower than copying a bunch of code, but
266  # not enough to make a significant difference.
267
268  # Note that we expect someone up-stack to catch struct.error and convert
269  # it to _DecodeError -- this way we don't have to set up exception-
270  # handling blocks every time we parse one value.
271
272  def InnerDecode(buffer, pos):
273    new_pos = pos + value_size
274    result = local_unpack(format, buffer[pos:new_pos])[0]
275    return (result, new_pos)
276  return _SimpleDecoder(wire_type, InnerDecode)
277
278
279def _FloatDecoder():
280  """Returns a decoder for a float field.
281
282  This code works around a bug in struct.unpack for non-finite 32-bit
283  floating-point values.
284  """
285
286  local_unpack = struct.unpack
287
288  def InnerDecode(buffer, pos):
289    # We expect a 32-bit value in little-endian byte order.  Bit 1 is the sign
290    # bit, bits 2-9 represent the exponent, and bits 10-32 are the significand.
291    new_pos = pos + 4
292    float_bytes = buffer[pos:new_pos]
293
294    # If this value has all its exponent bits set, then it's non-finite.
295    # In Python 2.4, struct.unpack will convert it to a finite 64-bit value.
296    # To avoid that, we parse it specially.
297    if ((float_bytes[3] in '\x7F\xFF')
298        and (float_bytes[2] >= '\x80')):
299      # If at least one significand bit is set...
300      if float_bytes[0:3] != '\x00\x00\x80':
301        return (_NAN, new_pos)
302      # If sign bit is set...
303      if float_bytes[3] == '\xFF':
304        return (_NEG_INF, new_pos)
305      return (_POS_INF, new_pos)
306
307    # Note that we expect someone up-stack to catch struct.error and convert
308    # it to _DecodeError -- this way we don't have to set up exception-
309    # handling blocks every time we parse one value.
310    result = local_unpack('<f', float_bytes)[0]
311    return (result, new_pos)
312  return _SimpleDecoder(wire_format.WIRETYPE_FIXED32, InnerDecode)
313
314
315def _DoubleDecoder():
316  """Returns a decoder for a double field.
317
318  This code works around a bug in struct.unpack for not-a-number.
319  """
320
321  local_unpack = struct.unpack
322
323  def InnerDecode(buffer, pos):
324    # We expect a 64-bit value in little-endian byte order.  Bit 1 is the sign
325    # bit, bits 2-12 represent the exponent, and bits 13-64 are the significand.
326    new_pos = pos + 8
327    double_bytes = buffer[pos:new_pos]
328
329    # If this value has all its exponent bits set and at least one significand
330    # bit set, it's not a number.  In Python 2.4, struct.unpack will treat it
331    # as inf or -inf.  To avoid that, we treat it specially.
332    if ((double_bytes[7] in '\x7F\xFF')
333        and (double_bytes[6] >= '\xF0')
334        and (double_bytes[0:7] != '\x00\x00\x00\x00\x00\x00\xF0')):
335      return (_NAN, new_pos)
336
337    # Note that we expect someone up-stack to catch struct.error and convert
338    # it to _DecodeError -- this way we don't have to set up exception-
339    # handling blocks every time we parse one value.
340    result = local_unpack('<d', double_bytes)[0]
341    return (result, new_pos)
342  return _SimpleDecoder(wire_format.WIRETYPE_FIXED64, InnerDecode)
343
344
345# --------------------------------------------------------------------
346
347
348Int32Decoder = EnumDecoder = _SimpleDecoder(
349    wire_format.WIRETYPE_VARINT, _DecodeSignedVarint32)
350
351Int64Decoder = _SimpleDecoder(
352    wire_format.WIRETYPE_VARINT, _DecodeSignedVarint)
353
354UInt32Decoder = _SimpleDecoder(wire_format.WIRETYPE_VARINT, _DecodeVarint32)
355UInt64Decoder = _SimpleDecoder(wire_format.WIRETYPE_VARINT, _DecodeVarint)
356
357SInt32Decoder = _ModifiedDecoder(
358    wire_format.WIRETYPE_VARINT, _DecodeVarint32, wire_format.ZigZagDecode)
359SInt64Decoder = _ModifiedDecoder(
360    wire_format.WIRETYPE_VARINT, _DecodeVarint, wire_format.ZigZagDecode)
361
362# Note that Python conveniently guarantees that when using the '<' prefix on
363# formats, they will also have the same size across all platforms (as opposed
364# to without the prefix, where their sizes depend on the C compiler's basic
365# type sizes).
366Fixed32Decoder  = _StructPackDecoder(wire_format.WIRETYPE_FIXED32, '<I')
367Fixed64Decoder  = _StructPackDecoder(wire_format.WIRETYPE_FIXED64, '<Q')
368SFixed32Decoder = _StructPackDecoder(wire_format.WIRETYPE_FIXED32, '<i')
369SFixed64Decoder = _StructPackDecoder(wire_format.WIRETYPE_FIXED64, '<q')
370FloatDecoder = _FloatDecoder()
371DoubleDecoder = _DoubleDecoder()
372
373BoolDecoder = _ModifiedDecoder(
374    wire_format.WIRETYPE_VARINT, _DecodeVarint, bool)
375
376
377def StringDecoder(field_number, is_repeated, is_packed, key, new_default):
378  """Returns a decoder for a string field."""
379
380  local_DecodeVarint = _DecodeVarint
381  local_unicode = unicode
382
383  assert not is_packed
384  if is_repeated:
385    tag_bytes = encoder.TagBytes(field_number,
386                                 wire_format.WIRETYPE_LENGTH_DELIMITED)
387    tag_len = len(tag_bytes)
388    def DecodeRepeatedField(buffer, pos, end, message, field_dict):
389      value = field_dict.get(key)
390      if value is None:
391        value = field_dict.setdefault(key, new_default(message))
392      while 1:
393        (size, pos) = local_DecodeVarint(buffer, pos)
394        new_pos = pos + size
395        if new_pos > end:
396          raise _DecodeError('Truncated string.')
397        value.append(local_unicode(buffer[pos:new_pos], 'utf-8'))
398        # Predict that the next tag is another copy of the same repeated field.
399        pos = new_pos + tag_len
400        if buffer[new_pos:pos] != tag_bytes or new_pos == end:
401          # Prediction failed.  Return.
402          return new_pos
403    return DecodeRepeatedField
404  else:
405    def DecodeField(buffer, pos, end, message, field_dict):
406      (size, pos) = local_DecodeVarint(buffer, pos)
407      new_pos = pos + size
408      if new_pos > end:
409        raise _DecodeError('Truncated string.')
410      field_dict[key] = local_unicode(buffer[pos:new_pos], 'utf-8')
411      return new_pos
412    return DecodeField
413
414
415def BytesDecoder(field_number, is_repeated, is_packed, key, new_default):
416  """Returns a decoder for a bytes field."""
417
418  local_DecodeVarint = _DecodeVarint
419
420  assert not is_packed
421  if is_repeated:
422    tag_bytes = encoder.TagBytes(field_number,
423                                 wire_format.WIRETYPE_LENGTH_DELIMITED)
424    tag_len = len(tag_bytes)
425    def DecodeRepeatedField(buffer, pos, end, message, field_dict):
426      value = field_dict.get(key)
427      if value is None:
428        value = field_dict.setdefault(key, new_default(message))
429      while 1:
430        (size, pos) = local_DecodeVarint(buffer, pos)
431        new_pos = pos + size
432        if new_pos > end:
433          raise _DecodeError('Truncated string.')
434        value.append(buffer[pos:new_pos])
435        # Predict that the next tag is another copy of the same repeated field.
436        pos = new_pos + tag_len
437        if buffer[new_pos:pos] != tag_bytes or new_pos == end:
438          # Prediction failed.  Return.
439          return new_pos
440    return DecodeRepeatedField
441  else:
442    def DecodeField(buffer, pos, end, message, field_dict):
443      (size, pos) = local_DecodeVarint(buffer, pos)
444      new_pos = pos + size
445      if new_pos > end:
446        raise _DecodeError('Truncated string.')
447      field_dict[key] = buffer[pos:new_pos]
448      return new_pos
449    return DecodeField
450
451
452def GroupDecoder(field_number, is_repeated, is_packed, key, new_default):
453  """Returns a decoder for a group field."""
454
455  end_tag_bytes = encoder.TagBytes(field_number,
456                                   wire_format.WIRETYPE_END_GROUP)
457  end_tag_len = len(end_tag_bytes)
458
459  assert not is_packed
460  if is_repeated:
461    tag_bytes = encoder.TagBytes(field_number,
462                                 wire_format.WIRETYPE_START_GROUP)
463    tag_len = len(tag_bytes)
464    def DecodeRepeatedField(buffer, pos, end, message, field_dict):
465      value = field_dict.get(key)
466      if value is None:
467        value = field_dict.setdefault(key, new_default(message))
468      while 1:
469        value = field_dict.get(key)
470        if value is None:
471          value = field_dict.setdefault(key, new_default(message))
472        # Read sub-message.
473        pos = value.add()._InternalParse(buffer, pos, end)
474        # Read end tag.
475        new_pos = pos+end_tag_len
476        if buffer[pos:new_pos] != end_tag_bytes or new_pos > end:
477          raise _DecodeError('Missing group end tag.')
478        # Predict that the next tag is another copy of the same repeated field.
479        pos = new_pos + tag_len
480        if buffer[new_pos:pos] != tag_bytes or new_pos == end:
481          # Prediction failed.  Return.
482          return new_pos
483    return DecodeRepeatedField
484  else:
485    def DecodeField(buffer, pos, end, message, field_dict):
486      value = field_dict.get(key)
487      if value is None:
488        value = field_dict.setdefault(key, new_default(message))
489      # Read sub-message.
490      pos = value._InternalParse(buffer, pos, end)
491      # Read end tag.
492      new_pos = pos+end_tag_len
493      if buffer[pos:new_pos] != end_tag_bytes or new_pos > end:
494        raise _DecodeError('Missing group end tag.')
495      return new_pos
496    return DecodeField
497
498
499def MessageDecoder(field_number, is_repeated, is_packed, key, new_default):
500  """Returns a decoder for a message field."""
501
502  local_DecodeVarint = _DecodeVarint
503
504  assert not is_packed
505  if is_repeated:
506    tag_bytes = encoder.TagBytes(field_number,
507                                 wire_format.WIRETYPE_LENGTH_DELIMITED)
508    tag_len = len(tag_bytes)
509    def DecodeRepeatedField(buffer, pos, end, message, field_dict):
510      value = field_dict.get(key)
511      if value is None:
512        value = field_dict.setdefault(key, new_default(message))
513      while 1:
514        value = field_dict.get(key)
515        if value is None:
516          value = field_dict.setdefault(key, new_default(message))
517        # Read length.
518        (size, pos) = local_DecodeVarint(buffer, pos)
519        new_pos = pos + size
520        if new_pos > end:
521          raise _DecodeError('Truncated message.')
522        # Read sub-message.
523        if value.add()._InternalParse(buffer, pos, new_pos) != new_pos:
524          # The only reason _InternalParse would return early is if it
525          # encountered an end-group tag.
526          raise _DecodeError('Unexpected end-group tag.')
527        # Predict that the next tag is another copy of the same repeated field.
528        pos = new_pos + tag_len
529        if buffer[new_pos:pos] != tag_bytes or new_pos == end:
530          # Prediction failed.  Return.
531          return new_pos
532    return DecodeRepeatedField
533  else:
534    def DecodeField(buffer, pos, end, message, field_dict):
535      value = field_dict.get(key)
536      if value is None:
537        value = field_dict.setdefault(key, new_default(message))
538      # Read length.
539      (size, pos) = local_DecodeVarint(buffer, pos)
540      new_pos = pos + size
541      if new_pos > end:
542        raise _DecodeError('Truncated message.')
543      # Read sub-message.
544      if value._InternalParse(buffer, pos, new_pos) != new_pos:
545        # The only reason _InternalParse would return early is if it encountered
546        # an end-group tag.
547        raise _DecodeError('Unexpected end-group tag.')
548      return new_pos
549    return DecodeField
550
551
552# --------------------------------------------------------------------
553
554MESSAGE_SET_ITEM_TAG = encoder.TagBytes(1, wire_format.WIRETYPE_START_GROUP)
555
556def MessageSetItemDecoder(extensions_by_number):
557  """Returns a decoder for a MessageSet item.
558
559  The parameter is the _extensions_by_number map for the message class.
560
561  The message set message looks like this:
562    message MessageSet {
563      repeated group Item = 1 {
564        required int32 type_id = 2;
565        required string message = 3;
566      }
567    }
568  """
569
570  type_id_tag_bytes = encoder.TagBytes(2, wire_format.WIRETYPE_VARINT)
571  message_tag_bytes = encoder.TagBytes(3, wire_format.WIRETYPE_LENGTH_DELIMITED)
572  item_end_tag_bytes = encoder.TagBytes(1, wire_format.WIRETYPE_END_GROUP)
573
574  local_ReadTag = ReadTag
575  local_DecodeVarint = _DecodeVarint
576  local_SkipField = SkipField
577
578  def DecodeItem(buffer, pos, end, message, field_dict):
579    message_set_item_start = pos
580    type_id = -1
581    message_start = -1
582    message_end = -1
583
584    # Technically, type_id and message can appear in any order, so we need
585    # a little loop here.
586    while 1:
587      (tag_bytes, pos) = local_ReadTag(buffer, pos)
588      if tag_bytes == type_id_tag_bytes:
589        (type_id, pos) = local_DecodeVarint(buffer, pos)
590      elif tag_bytes == message_tag_bytes:
591        (size, message_start) = local_DecodeVarint(buffer, pos)
592        pos = message_end = message_start + size
593      elif tag_bytes == item_end_tag_bytes:
594        break
595      else:
596        pos = SkipField(buffer, pos, end, tag_bytes)
597        if pos == -1:
598          raise _DecodeError('Missing group end tag.')
599
600    if pos > end:
601      raise _DecodeError('Truncated message.')
602
603    if type_id == -1:
604      raise _DecodeError('MessageSet item missing type_id.')
605    if message_start == -1:
606      raise _DecodeError('MessageSet item missing message.')
607
608    extension = extensions_by_number.get(type_id)
609    if extension is not None:
610      value = field_dict.get(extension)
611      if value is None:
612        value = field_dict.setdefault(
613            extension, extension.message_type._concrete_class())
614      if value._InternalParse(buffer, message_start,message_end) != message_end:
615        # The only reason _InternalParse would return early is if it encountered
616        # an end-group tag.
617        raise _DecodeError('Unexpected end-group tag.')
618    else:
619      if not message._unknown_fields:
620        message._unknown_fields = []
621      message._unknown_fields.append((MESSAGE_SET_ITEM_TAG,
622                                      buffer[message_set_item_start:pos]))
623
624    return pos
625
626  return DecodeItem
627
628# --------------------------------------------------------------------
629# Optimization is not as heavy here because calls to SkipField() are rare,
630# except for handling end-group tags.
631
632def _SkipVarint(buffer, pos, end):
633  """Skip a varint value.  Returns the new position."""
634
635  while ord(buffer[pos]) & 0x80:
636    pos += 1
637  pos += 1
638  if pos > end:
639    raise _DecodeError('Truncated message.')
640  return pos
641
642def _SkipFixed64(buffer, pos, end):
643  """Skip a fixed64 value.  Returns the new position."""
644
645  pos += 8
646  if pos > end:
647    raise _DecodeError('Truncated message.')
648  return pos
649
650def _SkipLengthDelimited(buffer, pos, end):
651  """Skip a length-delimited value.  Returns the new position."""
652
653  (size, pos) = _DecodeVarint(buffer, pos)
654  pos += size
655  if pos > end:
656    raise _DecodeError('Truncated message.')
657  return pos
658
659def _SkipGroup(buffer, pos, end):
660  """Skip sub-group.  Returns the new position."""
661
662  while 1:
663    (tag_bytes, pos) = ReadTag(buffer, pos)
664    new_pos = SkipField(buffer, pos, end, tag_bytes)
665    if new_pos == -1:
666      return pos
667    pos = new_pos
668
669def _EndGroup(buffer, pos, end):
670  """Skipping an END_GROUP tag returns -1 to tell the parent loop to break."""
671
672  return -1
673
674def _SkipFixed32(buffer, pos, end):
675  """Skip a fixed32 value.  Returns the new position."""
676
677  pos += 4
678  if pos > end:
679    raise _DecodeError('Truncated message.')
680  return pos
681
682def _RaiseInvalidWireType(buffer, pos, end):
683  """Skip function for unknown wire types.  Raises an exception."""
684
685  raise _DecodeError('Tag had invalid wire type.')
686
687def _FieldSkipper():
688  """Constructs the SkipField function."""
689
690  WIRETYPE_TO_SKIPPER = [
691      _SkipVarint,
692      _SkipFixed64,
693      _SkipLengthDelimited,
694      _SkipGroup,
695      _EndGroup,
696      _SkipFixed32,
697      _RaiseInvalidWireType,
698      _RaiseInvalidWireType,
699      ]
700
701  wiretype_mask = wire_format.TAG_TYPE_MASK
702  local_ord = ord
703
704  def SkipField(buffer, pos, end, tag_bytes):
705    """Skips a field with the specified tag.
706
707    |pos| should point to the byte immediately after the tag.
708
709    Returns:
710        The new position (after the tag value), or -1 if the tag is an end-group
711        tag (in which case the calling loop should break).
712    """
713
714    # The wire type is always in the first byte since varints are little-endian.
715    wire_type = local_ord(tag_bytes[0]) & wiretype_mask
716    return WIRETYPE_TO_SKIPPER[wire_type](buffer, pos, end)
717
718  return SkipField
719
720SkipField = _FieldSkipper()
721