encoder.py revision fbaaef999ba563838ebd00874ed8a1c01fbf286d
1# Protocol Buffers - Google's data interchange format
2# Copyright 2008 Google Inc.  All rights reserved.
3# http://code.google.com/p/protobuf/
4#
5# Redistribution and use in source and binary forms, with or without
6# modification, are permitted provided that the following conditions are
7# met:
8#
9#     * Redistributions of source code must retain the above copyright
10# notice, this list of conditions and the following disclaimer.
11#     * Redistributions in binary form must reproduce the above
12# copyright notice, this list of conditions and the following disclaimer
13# in the documentation and/or other materials provided with the
14# distribution.
15#     * Neither the name of Google Inc. nor the names of its
16# contributors may be used to endorse or promote products derived from
17# this software without specific prior written permission.
18#
19# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30
31"""Class for encoding protocol message primitives.
32
33Contains the logic for encoding every logical protocol field type
34into one of the 5 physical wire types.
35"""
36
37__author__ = 'robinson@google.com (Will Robinson)'
38
39import struct
40from google.protobuf import message
41from google.protobuf.internal import wire_format
42from google.protobuf.internal import output_stream
43
44
45# Note that much of this code is ported from //net/proto/ProtocolBuffer, and
46# that the interface is strongly inspired by WireFormat from the C++ proto2
47# implementation.
48
49
50class Encoder(object):
51
52  """Encodes logical protocol buffer fields to the wire format."""
53
54  def __init__(self):
55    self._stream = output_stream.OutputStream()
56
57  def ToString(self):
58    """Returns all values encoded in this object as a string."""
59    return self._stream.ToString()
60
61  # Append*NoTag methods.  These are necessary for serializing packed
62  # repeated fields.  The Append*() methods call these methods to do
63  # the actual serialization.
64  def AppendInt32NoTag(self, value):
65    """Appends a 32-bit integer to our buffer, varint-encoded."""
66    self._stream.AppendVarint32(value)
67
68  def AppendInt64NoTag(self, value):
69    """Appends a 64-bit integer to our buffer, varint-encoded."""
70    self._stream.AppendVarint64(value)
71
72  def AppendUInt32NoTag(self, unsigned_value):
73    """Appends an unsigned 32-bit integer to our buffer, varint-encoded."""
74    self._stream.AppendVarUInt32(unsigned_value)
75
76  def AppendUInt64NoTag(self, unsigned_value):
77    """Appends an unsigned 64-bit integer to our buffer, varint-encoded."""
78    self._stream.AppendVarUInt64(unsigned_value)
79
80  def AppendSInt32NoTag(self, value):
81    """Appends a 32-bit integer to our buffer, zigzag-encoded and then
82    varint-encoded.
83    """
84    zigzag_value = wire_format.ZigZagEncode(value)
85    self._stream.AppendVarUInt32(zigzag_value)
86
87  def AppendSInt64NoTag(self, value):
88    """Appends a 64-bit integer to our buffer, zigzag-encoded and then
89    varint-encoded.
90    """
91    zigzag_value = wire_format.ZigZagEncode(value)
92    self._stream.AppendVarUInt64(zigzag_value)
93
94  def AppendFixed32NoTag(self, unsigned_value):
95    """Appends an unsigned 32-bit integer to our buffer, in little-endian
96    byte-order.
97    """
98    self._stream.AppendLittleEndian32(unsigned_value)
99
100  def AppendFixed64NoTag(self, unsigned_value):
101    """Appends an unsigned 64-bit integer to our buffer, in little-endian
102    byte-order.
103    """
104    self._stream.AppendLittleEndian64(unsigned_value)
105
106  def AppendSFixed32NoTag(self, value):
107    """Appends a signed 32-bit integer to our buffer, in little-endian
108    byte-order.
109    """
110    sign = (value & 0x80000000) and -1 or 0
111    if value >> 32 != sign:
112      raise message.EncodeError('SFixed32 out of range: %d' % value)
113    self._stream.AppendLittleEndian32(value & 0xffffffff)
114
115  def AppendSFixed64NoTag(self, value):
116    """Appends a signed 64-bit integer to our buffer, in little-endian
117    byte-order.
118    """
119    sign = (value & 0x8000000000000000) and -1 or 0
120    if value >> 64 != sign:
121      raise message.EncodeError('SFixed64 out of range: %d' % value)
122    self._stream.AppendLittleEndian64(value & 0xffffffffffffffff)
123
124  def AppendFloatNoTag(self, value):
125    """Appends a floating-point number to our buffer."""
126    self._stream.AppendRawBytes(
127        struct.pack(wire_format.FORMAT_FLOAT_LITTLE_ENDIAN, value))
128
129  def AppendDoubleNoTag(self, value):
130    """Appends a double-precision floating-point number to our buffer."""
131    self._stream.AppendRawBytes(
132        struct.pack(wire_format.FORMAT_DOUBLE_LITTLE_ENDIAN, value))
133
134  def AppendBoolNoTag(self, value):
135    """Appends a boolean to our buffer."""
136    self.AppendInt32NoTag(value)
137
138  def AppendEnumNoTag(self, value):
139    """Appends an enum value to our buffer."""
140    self.AppendInt32NoTag(value)
141
142
143  # All the Append*() methods below first append a tag+type pair to the buffer
144  # before appending the specified value.
145
146  def AppendInt32(self, field_number, value):
147    """Appends a 32-bit integer to our buffer, varint-encoded."""
148    self.AppendTag(field_number, wire_format.WIRETYPE_VARINT)
149    self.AppendInt32NoTag(value)
150
151  def AppendInt64(self, field_number, value):
152    """Appends a 64-bit integer to our buffer, varint-encoded."""
153    self.AppendTag(field_number, wire_format.WIRETYPE_VARINT)
154    self.AppendInt64NoTag(value)
155
156  def AppendUInt32(self, field_number, unsigned_value):
157    """Appends an unsigned 32-bit integer to our buffer, varint-encoded."""
158    self.AppendTag(field_number, wire_format.WIRETYPE_VARINT)
159    self.AppendUInt32NoTag(unsigned_value)
160
161  def AppendUInt64(self, field_number, unsigned_value):
162    """Appends an unsigned 64-bit integer to our buffer, varint-encoded."""
163    self.AppendTag(field_number, wire_format.WIRETYPE_VARINT)
164    self.AppendUInt64NoTag(unsigned_value)
165
166  def AppendSInt32(self, field_number, value):
167    """Appends a 32-bit integer to our buffer, zigzag-encoded and then
168    varint-encoded.
169    """
170    self.AppendTag(field_number, wire_format.WIRETYPE_VARINT)
171    self.AppendSInt32NoTag(value)
172
173  def AppendSInt64(self, field_number, value):
174    """Appends a 64-bit integer to our buffer, zigzag-encoded and then
175    varint-encoded.
176    """
177    self.AppendTag(field_number, wire_format.WIRETYPE_VARINT)
178    self.AppendSInt64NoTag(value)
179
180  def AppendFixed32(self, field_number, unsigned_value):
181    """Appends an unsigned 32-bit integer to our buffer, in little-endian
182    byte-order.
183    """
184    self.AppendTag(field_number, wire_format.WIRETYPE_FIXED32)
185    self.AppendFixed32NoTag(unsigned_value)
186
187  def AppendFixed64(self, field_number, unsigned_value):
188    """Appends an unsigned 64-bit integer to our buffer, in little-endian
189    byte-order.
190    """
191    self.AppendTag(field_number, wire_format.WIRETYPE_FIXED64)
192    self.AppendFixed64NoTag(unsigned_value)
193
194  def AppendSFixed32(self, field_number, value):
195    """Appends a signed 32-bit integer to our buffer, in little-endian
196    byte-order.
197    """
198    self.AppendTag(field_number, wire_format.WIRETYPE_FIXED32)
199    self.AppendSFixed32NoTag(value)
200
201  def AppendSFixed64(self, field_number, value):
202    """Appends a signed 64-bit integer to our buffer, in little-endian
203    byte-order.
204    """
205    self.AppendTag(field_number, wire_format.WIRETYPE_FIXED64)
206    self.AppendSFixed64NoTag(value)
207
208  def AppendFloat(self, field_number, value):
209    """Appends a floating-point number to our buffer."""
210    self.AppendTag(field_number, wire_format.WIRETYPE_FIXED32)
211    self.AppendFloatNoTag(value)
212
213  def AppendDouble(self, field_number, value):
214    """Appends a double-precision floating-point number to our buffer."""
215    self.AppendTag(field_number, wire_format.WIRETYPE_FIXED64)
216    self.AppendDoubleNoTag(value)
217
218  def AppendBool(self, field_number, value):
219    """Appends a boolean to our buffer."""
220    self.AppendInt32(field_number, value)
221
222  def AppendEnum(self, field_number, value):
223    """Appends an enum value to our buffer."""
224    self.AppendInt32(field_number, value)
225
226  def AppendString(self, field_number, value):
227    """Appends a length-prefixed unicode string, encoded as UTF-8 to our buffer,
228    with the length varint-encoded.
229    """
230    self.AppendBytes(field_number, value.encode('utf-8'))
231
232  def AppendBytes(self, field_number, value):
233    """Appends a length-prefixed sequence of bytes to our buffer, with the
234    length varint-encoded.
235    """
236    self.AppendTag(field_number, wire_format.WIRETYPE_LENGTH_DELIMITED)
237    self._stream.AppendVarUInt32(len(value))
238    self._stream.AppendRawBytes(value)
239
240  # TODO(robinson): For AppendGroup() and AppendMessage(), we'd really like to
241  # avoid the extra string copy here.  We can do so if we widen the Message
242  # interface to be able to serialize to a stream in addition to a string.  The
243  # challenge when thinking ahead to the Python/C API implementation of Message
244  # is finding a stream-like Python thing to which we can write raw bytes
245  # from C.  I'm not sure such a thing exists(?).  (array.array is pretty much
246  # what we want, but it's not directly exposed in the Python/C API).
247
248  def AppendGroup(self, field_number, group):
249    """Appends a group to our buffer.
250    """
251    self.AppendTag(field_number, wire_format.WIRETYPE_START_GROUP)
252    self._stream.AppendRawBytes(group.SerializeToString())
253    self.AppendTag(field_number, wire_format.WIRETYPE_END_GROUP)
254
255  def AppendMessage(self, field_number, msg):
256    """Appends a nested message to our buffer.
257    """
258    self.AppendTag(field_number, wire_format.WIRETYPE_LENGTH_DELIMITED)
259    self._stream.AppendVarUInt32(msg.ByteSize())
260    self._stream.AppendRawBytes(msg.SerializeToString())
261
262  def AppendMessageSetItem(self, field_number, msg):
263    """Appends an item using the message set wire format.
264
265    The message set message looks like this:
266      message MessageSet {
267        repeated group Item = 1 {
268          required int32 type_id = 2;
269          required string message = 3;
270        }
271      }
272    """
273    self.AppendTag(1, wire_format.WIRETYPE_START_GROUP)
274    self.AppendInt32(2, field_number)
275    self.AppendMessage(3, msg)
276    self.AppendTag(1, wire_format.WIRETYPE_END_GROUP)
277
278  def AppendTag(self, field_number, wire_type):
279    """Appends a tag containing field number and wire type information."""
280    self._stream.AppendVarUInt32(wire_format.PackTag(field_number, wire_type))
281