1// Protocol Buffers - Google's data interchange format
2// Copyright 2008 Google Inc.  All rights reserved.
3// https://developers.google.com/protocol-buffers/
4//
5// Redistribution and use in source and binary forms, with or without
6// modification, are permitted provided that the following conditions are
7// met:
8//
9//     * Redistributions of source code must retain the above copyright
10// notice, this list of conditions and the following disclaimer.
11//     * Redistributions in binary form must reproduce the above
12// copyright notice, this list of conditions and the following disclaimer
13// in the documentation and/or other materials provided with the
14// distribution.
15//     * Neither the name of Google Inc. nor the names of its
16// contributors may be used to endorse or promote products derived from
17// this software without specific prior written permission.
18//
19// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30
31// Implements the DescriptorPool, which collects all descriptors.
32
33#include <Python.h>
34
35#include <google/protobuf/descriptor.pb.h>
36#include <google/protobuf/dynamic_message.h>
37#include <google/protobuf/pyext/descriptor.h>
38#include <google/protobuf/pyext/descriptor_database.h>
39#include <google/protobuf/pyext/descriptor_pool.h>
40#include <google/protobuf/pyext/message.h>
41#include <google/protobuf/pyext/scoped_pyobject_ptr.h>
42
43#if PY_MAJOR_VERSION >= 3
44  #define PyString_FromStringAndSize PyUnicode_FromStringAndSize
45  #if PY_VERSION_HEX < 0x03030000
46    #error "Python 3.0 - 3.2 are not supported."
47  #endif
48  #define PyString_AsStringAndSize(ob, charpp, sizep) \
49    (PyUnicode_Check(ob)? \
50       ((*(charpp) = PyUnicode_AsUTF8AndSize(ob, (sizep))) == NULL? -1: 0): \
51       PyBytes_AsStringAndSize(ob, (charpp), (sizep)))
52#endif
53
54namespace google {
55namespace protobuf {
56namespace python {
57
58// A map to cache Python Pools per C++ pointer.
59// Pointers are not owned here, and belong to the PyDescriptorPool.
60static hash_map<const DescriptorPool*, PyDescriptorPool*> descriptor_pool_map;
61
62namespace cdescriptor_pool {
63
64// Create a Python DescriptorPool object, but does not fill the "pool"
65// attribute.
66static PyDescriptorPool* _CreateDescriptorPool() {
67  PyDescriptorPool* cpool = PyObject_New(
68      PyDescriptorPool, &PyDescriptorPool_Type);
69  if (cpool == NULL) {
70    return NULL;
71  }
72
73  cpool->underlay = NULL;
74  cpool->database = NULL;
75
76  DynamicMessageFactory* message_factory = new DynamicMessageFactory();
77  // This option might be the default some day.
78  message_factory->SetDelegateToGeneratedFactory(true);
79  cpool->message_factory = message_factory;
80
81  // TODO(amauryfa): Rewrite the SymbolDatabase in C so that it uses the same
82  // storage.
83  cpool->classes_by_descriptor =
84      new PyDescriptorPool::ClassesByMessageMap();
85  cpool->descriptor_options =
86      new hash_map<const void*, PyObject *>();
87
88  return cpool;
89}
90
91// Create a Python DescriptorPool, using the given pool as an underlay:
92// new messages will be added to a custom pool, not to the underlay.
93//
94// Ownership of the underlay is not transferred, its pointer should
95// stay alive.
96static PyDescriptorPool* PyDescriptorPool_NewWithUnderlay(
97    const DescriptorPool* underlay) {
98  PyDescriptorPool* cpool = _CreateDescriptorPool();
99  if (cpool == NULL) {
100    return NULL;
101  }
102  cpool->pool = new DescriptorPool(underlay);
103  cpool->underlay = underlay;
104
105  if (!descriptor_pool_map.insert(
106      std::make_pair(cpool->pool, cpool)).second) {
107    // Should never happen -- would indicate an internal error / bug.
108    PyErr_SetString(PyExc_ValueError, "DescriptorPool already registered");
109    return NULL;
110  }
111
112  return cpool;
113}
114
115static PyDescriptorPool* PyDescriptorPool_NewWithDatabase(
116    DescriptorDatabase* database) {
117  PyDescriptorPool* cpool = _CreateDescriptorPool();
118  if (cpool == NULL) {
119    return NULL;
120  }
121  if (database != NULL) {
122    cpool->pool = new DescriptorPool(database);
123    cpool->database = database;
124  } else {
125    cpool->pool = new DescriptorPool();
126  }
127
128  if (!descriptor_pool_map.insert(std::make_pair(cpool->pool, cpool)).second) {
129    // Should never happen -- would indicate an internal error / bug.
130    PyErr_SetString(PyExc_ValueError, "DescriptorPool already registered");
131    return NULL;
132  }
133
134  return cpool;
135}
136
137// The public DescriptorPool constructor.
138static PyObject* New(PyTypeObject* type,
139                     PyObject* args, PyObject* kwargs) {
140  static char* kwlist[] = {"descriptor_db", 0};
141  PyObject* py_database = NULL;
142  if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|O", kwlist, &py_database)) {
143    return NULL;
144  }
145  DescriptorDatabase* database = NULL;
146  if (py_database && py_database != Py_None) {
147    database = new PyDescriptorDatabase(py_database);
148  }
149  return reinterpret_cast<PyObject*>(
150      PyDescriptorPool_NewWithDatabase(database));
151}
152
153static void Dealloc(PyDescriptorPool* self) {
154  typedef PyDescriptorPool::ClassesByMessageMap::iterator iterator;
155  descriptor_pool_map.erase(self->pool);
156  for (iterator it = self->classes_by_descriptor->begin();
157       it != self->classes_by_descriptor->end(); ++it) {
158    Py_DECREF(it->second);
159  }
160  delete self->classes_by_descriptor;
161  for (hash_map<const void*, PyObject*>::iterator it =
162           self->descriptor_options->begin();
163       it != self->descriptor_options->end(); ++it) {
164    Py_DECREF(it->second);
165  }
166  delete self->descriptor_options;
167  delete self->message_factory;
168  delete self->database;
169  delete self->pool;
170  Py_TYPE(self)->tp_free(reinterpret_cast<PyObject*>(self));
171}
172
173PyObject* FindMessageByName(PyDescriptorPool* self, PyObject* arg) {
174  Py_ssize_t name_size;
175  char* name;
176  if (PyString_AsStringAndSize(arg, &name, &name_size) < 0) {
177    return NULL;
178  }
179
180  const Descriptor* message_descriptor =
181      self->pool->FindMessageTypeByName(string(name, name_size));
182
183  if (message_descriptor == NULL) {
184    PyErr_Format(PyExc_KeyError, "Couldn't find message %.200s", name);
185    return NULL;
186  }
187
188  return PyMessageDescriptor_FromDescriptor(message_descriptor);
189}
190
191// Add a message class to our database.
192int RegisterMessageClass(PyDescriptorPool* self,
193                         const Descriptor* message_descriptor,
194                         CMessageClass* message_class) {
195  Py_INCREF(message_class);
196  typedef PyDescriptorPool::ClassesByMessageMap::iterator iterator;
197  std::pair<iterator, bool> ret = self->classes_by_descriptor->insert(
198      std::make_pair(message_descriptor, message_class));
199  if (!ret.second) {
200    // Update case: DECREF the previous value.
201    Py_DECREF(ret.first->second);
202    ret.first->second = message_class;
203  }
204  return 0;
205}
206
207// Retrieve the message class added to our database.
208CMessageClass* GetMessageClass(PyDescriptorPool* self,
209                               const Descriptor* message_descriptor) {
210  typedef PyDescriptorPool::ClassesByMessageMap::iterator iterator;
211  iterator ret = self->classes_by_descriptor->find(message_descriptor);
212  if (ret == self->classes_by_descriptor->end()) {
213    PyErr_Format(PyExc_TypeError, "No message class registered for '%s'",
214                 message_descriptor->full_name().c_str());
215    return NULL;
216  } else {
217    return ret->second;
218  }
219}
220
221PyObject* FindFileByName(PyDescriptorPool* self, PyObject* arg) {
222  Py_ssize_t name_size;
223  char* name;
224  if (PyString_AsStringAndSize(arg, &name, &name_size) < 0) {
225    return NULL;
226  }
227
228  const FileDescriptor* file_descriptor =
229      self->pool->FindFileByName(string(name, name_size));
230  if (file_descriptor == NULL) {
231    PyErr_Format(PyExc_KeyError, "Couldn't find file %.200s",
232                 name);
233    return NULL;
234  }
235
236  return PyFileDescriptor_FromDescriptor(file_descriptor);
237}
238
239PyObject* FindFieldByName(PyDescriptorPool* self, PyObject* arg) {
240  Py_ssize_t name_size;
241  char* name;
242  if (PyString_AsStringAndSize(arg, &name, &name_size) < 0) {
243    return NULL;
244  }
245
246  const FieldDescriptor* field_descriptor =
247      self->pool->FindFieldByName(string(name, name_size));
248  if (field_descriptor == NULL) {
249    PyErr_Format(PyExc_KeyError, "Couldn't find field %.200s",
250                 name);
251    return NULL;
252  }
253
254  return PyFieldDescriptor_FromDescriptor(field_descriptor);
255}
256
257PyObject* FindExtensionByName(PyDescriptorPool* self, PyObject* arg) {
258  Py_ssize_t name_size;
259  char* name;
260  if (PyString_AsStringAndSize(arg, &name, &name_size) < 0) {
261    return NULL;
262  }
263
264  const FieldDescriptor* field_descriptor =
265      self->pool->FindExtensionByName(string(name, name_size));
266  if (field_descriptor == NULL) {
267    PyErr_Format(PyExc_KeyError, "Couldn't find extension field %.200s", name);
268    return NULL;
269  }
270
271  return PyFieldDescriptor_FromDescriptor(field_descriptor);
272}
273
274PyObject* FindEnumTypeByName(PyDescriptorPool* self, PyObject* arg) {
275  Py_ssize_t name_size;
276  char* name;
277  if (PyString_AsStringAndSize(arg, &name, &name_size) < 0) {
278    return NULL;
279  }
280
281  const EnumDescriptor* enum_descriptor =
282      self->pool->FindEnumTypeByName(string(name, name_size));
283  if (enum_descriptor == NULL) {
284    PyErr_Format(PyExc_KeyError, "Couldn't find enum %.200s", name);
285    return NULL;
286  }
287
288  return PyEnumDescriptor_FromDescriptor(enum_descriptor);
289}
290
291PyObject* FindOneofByName(PyDescriptorPool* self, PyObject* arg) {
292  Py_ssize_t name_size;
293  char* name;
294  if (PyString_AsStringAndSize(arg, &name, &name_size) < 0) {
295    return NULL;
296  }
297
298  const OneofDescriptor* oneof_descriptor =
299      self->pool->FindOneofByName(string(name, name_size));
300  if (oneof_descriptor == NULL) {
301    PyErr_Format(PyExc_KeyError, "Couldn't find oneof %.200s", name);
302    return NULL;
303  }
304
305  return PyOneofDescriptor_FromDescriptor(oneof_descriptor);
306}
307
308PyObject* FindFileContainingSymbol(PyDescriptorPool* self, PyObject* arg) {
309  Py_ssize_t name_size;
310  char* name;
311  if (PyString_AsStringAndSize(arg, &name, &name_size) < 0) {
312    return NULL;
313  }
314
315  const FileDescriptor* file_descriptor =
316      self->pool->FindFileContainingSymbol(string(name, name_size));
317  if (file_descriptor == NULL) {
318    PyErr_Format(PyExc_KeyError, "Couldn't find symbol %.200s", name);
319    return NULL;
320  }
321
322  return PyFileDescriptor_FromDescriptor(file_descriptor);
323}
324
325// These functions should not exist -- the only valid way to create
326// descriptors is to call Add() or AddSerializedFile().
327// But these AddDescriptor() functions were created in Python and some people
328// call them, so we support them for now for compatibility.
329// However we do check that the existing descriptor already exists in the pool,
330// which appears to always be true for existing calls -- but then why do people
331// call a function that will just be a no-op?
332// TODO(amauryfa): Need to investigate further.
333
334PyObject* AddFileDescriptor(PyDescriptorPool* self, PyObject* descriptor) {
335  const FileDescriptor* file_descriptor =
336      PyFileDescriptor_AsDescriptor(descriptor);
337  if (!file_descriptor) {
338    return NULL;
339  }
340  if (file_descriptor !=
341      self->pool->FindFileByName(file_descriptor->name())) {
342    PyErr_Format(PyExc_ValueError,
343                 "The file descriptor %s does not belong to this pool",
344                 file_descriptor->name().c_str());
345    return NULL;
346  }
347  Py_RETURN_NONE;
348}
349
350PyObject* AddDescriptor(PyDescriptorPool* self, PyObject* descriptor) {
351  const Descriptor* message_descriptor =
352      PyMessageDescriptor_AsDescriptor(descriptor);
353  if (!message_descriptor) {
354    return NULL;
355  }
356  if (message_descriptor !=
357      self->pool->FindMessageTypeByName(message_descriptor->full_name())) {
358    PyErr_Format(PyExc_ValueError,
359                 "The message descriptor %s does not belong to this pool",
360                 message_descriptor->full_name().c_str());
361    return NULL;
362  }
363  Py_RETURN_NONE;
364}
365
366PyObject* AddEnumDescriptor(PyDescriptorPool* self, PyObject* descriptor) {
367  const EnumDescriptor* enum_descriptor =
368      PyEnumDescriptor_AsDescriptor(descriptor);
369  if (!enum_descriptor) {
370    return NULL;
371  }
372  if (enum_descriptor !=
373      self->pool->FindEnumTypeByName(enum_descriptor->full_name())) {
374    PyErr_Format(PyExc_ValueError,
375                 "The enum descriptor %s does not belong to this pool",
376                 enum_descriptor->full_name().c_str());
377    return NULL;
378  }
379  Py_RETURN_NONE;
380}
381
382// The code below loads new Descriptors from a serialized FileDescriptorProto.
383
384
385// Collects errors that occur during proto file building to allow them to be
386// propagated in the python exception instead of only living in ERROR logs.
387class BuildFileErrorCollector : public DescriptorPool::ErrorCollector {
388 public:
389  BuildFileErrorCollector() : error_message(""), had_errors(false) {}
390
391  void AddError(const string& filename, const string& element_name,
392                const Message* descriptor, ErrorLocation location,
393                const string& message) {
394    // Replicates the logging behavior that happens in the C++ implementation
395    // when an error collector is not passed in.
396    if (!had_errors) {
397      error_message +=
398          ("Invalid proto descriptor for file \"" + filename + "\":\n");
399      had_errors = true;
400    }
401    // As this only happens on failure and will result in the program not
402    // running at all, no effort is made to optimize this string manipulation.
403    error_message += ("  " + element_name + ": " + message + "\n");
404  }
405
406  string error_message;
407  bool had_errors;
408};
409
410PyObject* AddSerializedFile(PyDescriptorPool* self, PyObject* serialized_pb) {
411  char* message_type;
412  Py_ssize_t message_len;
413
414  if (self->database != NULL) {
415    PyErr_SetString(
416        PyExc_ValueError,
417        "Cannot call Add on a DescriptorPool that uses a DescriptorDatabase. "
418        "Add your file to the underlying database.");
419    return NULL;
420  }
421
422  if (PyBytes_AsStringAndSize(serialized_pb, &message_type, &message_len) < 0) {
423    return NULL;
424  }
425
426  FileDescriptorProto file_proto;
427  if (!file_proto.ParseFromArray(message_type, message_len)) {
428    PyErr_SetString(PyExc_TypeError, "Couldn't parse file content!");
429    return NULL;
430  }
431
432  // If the file was already part of a C++ library, all its descriptors are in
433  // the underlying pool.  No need to do anything else.
434  const FileDescriptor* generated_file = NULL;
435  if (self->underlay) {
436    generated_file = self->underlay->FindFileByName(file_proto.name());
437  }
438  if (generated_file != NULL) {
439    return PyFileDescriptor_FromDescriptorWithSerializedPb(
440        generated_file, serialized_pb);
441  }
442
443  BuildFileErrorCollector error_collector;
444  const FileDescriptor* descriptor =
445      self->pool->BuildFileCollectingErrors(file_proto,
446                                            &error_collector);
447  if (descriptor == NULL) {
448    PyErr_Format(PyExc_TypeError,
449                 "Couldn't build proto file into descriptor pool!\n%s",
450                 error_collector.error_message.c_str());
451    return NULL;
452  }
453
454  return PyFileDescriptor_FromDescriptorWithSerializedPb(
455      descriptor, serialized_pb);
456}
457
458PyObject* Add(PyDescriptorPool* self, PyObject* file_descriptor_proto) {
459  ScopedPyObjectPtr serialized_pb(
460      PyObject_CallMethod(file_descriptor_proto, "SerializeToString", NULL));
461  if (serialized_pb == NULL) {
462    return NULL;
463  }
464  return AddSerializedFile(self, serialized_pb.get());
465}
466
467static PyMethodDef Methods[] = {
468  { "Add", (PyCFunction)Add, METH_O,
469    "Adds the FileDescriptorProto and its types to this pool." },
470  { "AddSerializedFile", (PyCFunction)AddSerializedFile, METH_O,
471    "Adds a serialized FileDescriptorProto to this pool." },
472
473  // TODO(amauryfa): Understand why the Python implementation differs from
474  // this one, ask users to use another API and deprecate these functions.
475  { "AddFileDescriptor", (PyCFunction)AddFileDescriptor, METH_O,
476    "No-op. Add() must have been called before." },
477  { "AddDescriptor", (PyCFunction)AddDescriptor, METH_O,
478    "No-op. Add() must have been called before." },
479  { "AddEnumDescriptor", (PyCFunction)AddEnumDescriptor, METH_O,
480    "No-op. Add() must have been called before." },
481
482  { "FindFileByName", (PyCFunction)FindFileByName, METH_O,
483    "Searches for a file descriptor by its .proto name." },
484  { "FindMessageTypeByName", (PyCFunction)FindMessageByName, METH_O,
485    "Searches for a message descriptor by full name." },
486  { "FindFieldByName", (PyCFunction)FindFieldByName, METH_O,
487    "Searches for a field descriptor by full name." },
488  { "FindExtensionByName", (PyCFunction)FindExtensionByName, METH_O,
489    "Searches for extension descriptor by full name." },
490  { "FindEnumTypeByName", (PyCFunction)FindEnumTypeByName, METH_O,
491    "Searches for enum type descriptor by full name." },
492  { "FindOneofByName", (PyCFunction)FindOneofByName, METH_O,
493    "Searches for oneof descriptor by full name." },
494
495  { "FindFileContainingSymbol", (PyCFunction)FindFileContainingSymbol, METH_O,
496    "Gets the FileDescriptor containing the specified symbol." },
497  {NULL}
498};
499
500}  // namespace cdescriptor_pool
501
502PyTypeObject PyDescriptorPool_Type = {
503  PyVarObject_HEAD_INIT(&PyType_Type, 0)
504  FULL_MODULE_NAME ".DescriptorPool",  // tp_name
505  sizeof(PyDescriptorPool),            // tp_basicsize
506  0,                                   // tp_itemsize
507  (destructor)cdescriptor_pool::Dealloc,  // tp_dealloc
508  0,                                   // tp_print
509  0,                                   // tp_getattr
510  0,                                   // tp_setattr
511  0,                                   // tp_compare
512  0,                                   // tp_repr
513  0,                                   // tp_as_number
514  0,                                   // tp_as_sequence
515  0,                                   // tp_as_mapping
516  0,                                   // tp_hash
517  0,                                   // tp_call
518  0,                                   // tp_str
519  0,                                   // tp_getattro
520  0,                                   // tp_setattro
521  0,                                   // tp_as_buffer
522  Py_TPFLAGS_DEFAULT,                  // tp_flags
523  "A Descriptor Pool",                 // tp_doc
524  0,                                   // tp_traverse
525  0,                                   // tp_clear
526  0,                                   // tp_richcompare
527  0,                                   // tp_weaklistoffset
528  0,                                   // tp_iter
529  0,                                   // tp_iternext
530  cdescriptor_pool::Methods,           // tp_methods
531  0,                                   // tp_members
532  0,                                   // tp_getset
533  0,                                   // tp_base
534  0,                                   // tp_dict
535  0,                                   // tp_descr_get
536  0,                                   // tp_descr_set
537  0,                                   // tp_dictoffset
538  0,                                   // tp_init
539  0,                                   // tp_alloc
540  cdescriptor_pool::New,               // tp_new
541  PyObject_Del,                        // tp_free
542};
543
544// This is the DescriptorPool which contains all the definitions from the
545// generated _pb2.py modules.
546static PyDescriptorPool* python_generated_pool = NULL;
547
548bool InitDescriptorPool() {
549  if (PyType_Ready(&PyDescriptorPool_Type) < 0)
550    return false;
551
552  // The Pool of messages declared in Python libraries.
553  // generated_pool() contains all messages already linked in C++ libraries, and
554  // is used as underlay.
555  python_generated_pool = cdescriptor_pool::PyDescriptorPool_NewWithUnderlay(
556      DescriptorPool::generated_pool());
557  if (python_generated_pool == NULL) {
558    return false;
559  }
560  // Register this pool to be found for C++-generated descriptors.
561  descriptor_pool_map.insert(
562      std::make_pair(DescriptorPool::generated_pool(),
563                     python_generated_pool));
564
565  return true;
566}
567
568// The default DescriptorPool used everywhere in this module.
569// Today it's the python_generated_pool.
570// TODO(amauryfa): Remove all usages of this function: the pool should be
571// derived from the context.
572PyDescriptorPool* GetDefaultDescriptorPool() {
573  return python_generated_pool;
574}
575
576PyDescriptorPool* GetDescriptorPool_FromPool(const DescriptorPool* pool) {
577  // Fast path for standard descriptors.
578  if (pool == python_generated_pool->pool ||
579      pool == DescriptorPool::generated_pool()) {
580    return python_generated_pool;
581  }
582  hash_map<const DescriptorPool*, PyDescriptorPool*>::iterator it =
583      descriptor_pool_map.find(pool);
584  if (it == descriptor_pool_map.end()) {
585    PyErr_SetString(PyExc_KeyError, "Unknown descriptor pool");
586    return NULL;
587  }
588  return it->second;
589}
590
591}  // namespace python
592}  // namespace protobuf
593}  // namespace google
594