decoder.py revision fbaaef999ba563838ebd00874ed8a1c01fbf286d
1# Protocol Buffers - Google's data interchange format
2# Copyright 2008 Google Inc.  All rights reserved.
3# http://code.google.com/p/protobuf/
4#
5# Redistribution and use in source and binary forms, with or without
6# modification, are permitted provided that the following conditions are
7# met:
8#
9#     * Redistributions of source code must retain the above copyright
10# notice, this list of conditions and the following disclaimer.
11#     * Redistributions in binary form must reproduce the above
12# copyright notice, this list of conditions and the following disclaimer
13# in the documentation and/or other materials provided with the
14# distribution.
15#     * Neither the name of Google Inc. nor the names of its
16# contributors may be used to endorse or promote products derived from
17# this software without specific prior written permission.
18#
19# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30
31"""Class for decoding protocol buffer primitives.
32
33Contains the logic for decoding every logical protocol field type
34from one of the 5 physical wire types.
35"""
36
37__author__ = 'robinson@google.com (Will Robinson)'
38
39import struct
40from google.protobuf import message
41from google.protobuf.internal import input_stream
42from google.protobuf.internal import wire_format
43
44
45
46# Note that much of this code is ported from //net/proto/ProtocolBuffer, and
47# that the interface is strongly inspired by WireFormat from the C++ proto2
48# implementation.
49
50
51class Decoder(object):
52
53  """Decodes logical protocol buffer fields from the wire."""
54
55  def __init__(self, s):
56    """Initializes the decoder to read from s.
57
58    Args:
59      s: An immutable sequence of bytes, which must be accessible
60        via the Python buffer() primitive (i.e., buffer(s)).
61    """
62    self._stream = input_stream.InputStream(s)
63
64  def EndOfStream(self):
65    """Returns true iff we've reached the end of the bytes we're reading."""
66    return self._stream.EndOfStream()
67
68  def Position(self):
69    """Returns the 0-indexed position in |s|."""
70    return self._stream.Position()
71
72  def ReadFieldNumberAndWireType(self):
73    """Reads a tag from the wire. Returns a (field_number, wire_type) pair."""
74    tag_and_type = self.ReadUInt32()
75    return wire_format.UnpackTag(tag_and_type)
76
77  def SkipBytes(self, bytes):
78    """Skips the specified number of bytes on the wire."""
79    self._stream.SkipBytes(bytes)
80
81  # Note that the Read*() methods below are not exactly symmetrical with the
82  # corresponding Encoder.Append*() methods.  Those Encoder methods first
83  # encode a tag, but the Read*() methods below assume that the tag has already
84  # been read, and that the client wishes to read a field of the specified type
85  # starting at the current position.
86
87  def ReadInt32(self):
88    """Reads and returns a signed, varint-encoded, 32-bit integer."""
89    return self._stream.ReadVarint32()
90
91  def ReadInt64(self):
92    """Reads and returns a signed, varint-encoded, 64-bit integer."""
93    return self._stream.ReadVarint64()
94
95  def ReadUInt32(self):
96    """Reads and returns an signed, varint-encoded, 32-bit integer."""
97    return self._stream.ReadVarUInt32()
98
99  def ReadUInt64(self):
100    """Reads and returns an signed, varint-encoded,64-bit integer."""
101    return self._stream.ReadVarUInt64()
102
103  def ReadSInt32(self):
104    """Reads and returns a signed, zigzag-encoded, varint-encoded,
105    32-bit integer."""
106    return wire_format.ZigZagDecode(self._stream.ReadVarUInt32())
107
108  def ReadSInt64(self):
109    """Reads and returns a signed, zigzag-encoded, varint-encoded,
110    64-bit integer."""
111    return wire_format.ZigZagDecode(self._stream.ReadVarUInt64())
112
113  def ReadFixed32(self):
114    """Reads and returns an unsigned, fixed-width, 32-bit integer."""
115    return self._stream.ReadLittleEndian32()
116
117  def ReadFixed64(self):
118    """Reads and returns an unsigned, fixed-width, 64-bit integer."""
119    return self._stream.ReadLittleEndian64()
120
121  def ReadSFixed32(self):
122    """Reads and returns a signed, fixed-width, 32-bit integer."""
123    value = self._stream.ReadLittleEndian32()
124    if value >= (1 << 31):
125      value -= (1 << 32)
126    return value
127
128  def ReadSFixed64(self):
129    """Reads and returns a signed, fixed-width, 64-bit integer."""
130    value = self._stream.ReadLittleEndian64()
131    if value >= (1 << 63):
132      value -= (1 << 64)
133    return value
134
135  def ReadFloat(self):
136    """Reads and returns a 4-byte floating-point number."""
137    serialized = self._stream.ReadBytes(4)
138    return struct.unpack(wire_format.FORMAT_FLOAT_LITTLE_ENDIAN, serialized)[0]
139
140  def ReadDouble(self):
141    """Reads and returns an 8-byte floating-point number."""
142    serialized = self._stream.ReadBytes(8)
143    return struct.unpack(wire_format.FORMAT_DOUBLE_LITTLE_ENDIAN, serialized)[0]
144
145  def ReadBool(self):
146    """Reads and returns a bool."""
147    i = self._stream.ReadVarUInt32()
148    return bool(i)
149
150  def ReadEnum(self):
151    """Reads and returns an enum value."""
152    return self._stream.ReadVarUInt32()
153
154  def ReadString(self):
155    """Reads and returns a length-delimited string."""
156    bytes = self.ReadBytes()
157    return unicode(bytes, 'utf-8')
158
159  def ReadBytes(self):
160    """Reads and returns a length-delimited byte sequence."""
161    length = self._stream.ReadVarUInt32()
162    return self._stream.ReadBytes(length)
163
164  def ReadMessageInto(self, msg):
165    """Calls msg.MergeFromString() to merge
166    length-delimited serialized message data into |msg|.
167
168    REQUIRES: The decoder must be positioned at the serialized "length"
169      prefix to a length-delmiited serialized message.
170
171    POSTCONDITION: The decoder is positioned just after the
172      serialized message, and we have merged those serialized
173      contents into |msg|.
174    """
175    length = self._stream.ReadVarUInt32()
176    sub_buffer = self._stream.GetSubBuffer(length)
177    num_bytes_used = msg.MergeFromString(sub_buffer)
178    if num_bytes_used != length:
179      raise message.DecodeError(
180          'Submessage told to deserialize from %d-byte encoding, '
181          'but used only %d bytes' % (length, num_bytes_used))
182    self._stream.SkipBytes(num_bytes_used)
183
184  def ReadGroupInto(self, expected_field_number, group):
185    """Calls group.MergeFromString() to merge
186    END_GROUP-delimited serialized message data into |group|.
187    We'll raise an exception if we don't find an END_GROUP
188    tag immediately after the serialized message contents.
189
190    REQUIRES: The decoder is positioned just after the START_GROUP
191      tag for this group.
192
193    POSTCONDITION: The decoder is positioned just after the
194      END_GROUP tag for this group, and we have merged
195      the contents of the group into |group|.
196    """
197    sub_buffer = self._stream.GetSubBuffer()  # No a priori length limit.
198    num_bytes_used = group.MergeFromString(sub_buffer)
199    if num_bytes_used < 0:
200      raise message.DecodeError('Group message reported negative bytes read.')
201    self._stream.SkipBytes(num_bytes_used)
202    field_number, field_type = self.ReadFieldNumberAndWireType()
203    if field_type != wire_format.WIRETYPE_END_GROUP:
204      raise message.DecodeError('Group message did not end with an END_GROUP.')
205    if field_number != expected_field_number:
206      raise message.DecodeError('END_GROUP tag had field '
207                                'number %d, was expecting field number %d' % (
208          field_number, expected_field_number))
209    # We're now positioned just after the END_GROUP tag.  Perfect.
210