1// Protocol Buffers - Google's data interchange format 2// Copyright 2008 Google Inc. All rights reserved. 3// https://developers.google.com/protocol-buffers/ 4// 5// Redistribution and use in source and binary forms, with or without 6// modification, are permitted provided that the following conditions are 7// met: 8// 9// * Redistributions of source code must retain the above copyright 10// notice, this list of conditions and the following disclaimer. 11// * Redistributions in binary form must reproduce the above 12// copyright notice, this list of conditions and the following disclaimer 13// in the documentation and/or other materials provided with the 14// distribution. 15// * Neither the name of Google Inc. nor the names of its 16// contributors may be used to endorse or promote products derived from 17// this software without specific prior written permission. 18// 19// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 20// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 21// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 22// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 23// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 24// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 25// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 26// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 27// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 28// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 31// Implements the DescriptorPool, which collects all descriptors. 32 33#include <Python.h> 34 35#include <google/protobuf/descriptor.pb.h> 36#include <google/protobuf/dynamic_message.h> 37#include <google/protobuf/pyext/descriptor.h> 38#include <google/protobuf/pyext/descriptor_database.h> 39#include <google/protobuf/pyext/descriptor_pool.h> 40#include <google/protobuf/pyext/message.h> 41#include <google/protobuf/pyext/scoped_pyobject_ptr.h> 42 43#if PY_MAJOR_VERSION >= 3 44 #define PyString_FromStringAndSize PyUnicode_FromStringAndSize 45 #if PY_VERSION_HEX < 0x03030000 46 #error "Python 3.0 - 3.2 are not supported." 47 #endif 48 #define PyString_AsStringAndSize(ob, charpp, sizep) \ 49 (PyUnicode_Check(ob)? \ 50 ((*(charpp) = PyUnicode_AsUTF8AndSize(ob, (sizep))) == NULL? -1: 0): \ 51 PyBytes_AsStringAndSize(ob, (charpp), (sizep))) 52#endif 53 54namespace google { 55namespace protobuf { 56namespace python { 57 58// A map to cache Python Pools per C++ pointer. 59// Pointers are not owned here, and belong to the PyDescriptorPool. 60static hash_map<const DescriptorPool*, PyDescriptorPool*> descriptor_pool_map; 61 62namespace cdescriptor_pool { 63 64// Create a Python DescriptorPool object, but does not fill the "pool" 65// attribute. 66static PyDescriptorPool* _CreateDescriptorPool() { 67 PyDescriptorPool* cpool = PyObject_New( 68 PyDescriptorPool, &PyDescriptorPool_Type); 69 if (cpool == NULL) { 70 return NULL; 71 } 72 73 cpool->underlay = NULL; 74 cpool->database = NULL; 75 76 DynamicMessageFactory* message_factory = new DynamicMessageFactory(); 77 // This option might be the default some day. 78 message_factory->SetDelegateToGeneratedFactory(true); 79 cpool->message_factory = message_factory; 80 81 // TODO(amauryfa): Rewrite the SymbolDatabase in C so that it uses the same 82 // storage. 83 cpool->classes_by_descriptor = 84 new PyDescriptorPool::ClassesByMessageMap(); 85 cpool->descriptor_options = 86 new hash_map<const void*, PyObject *>(); 87 88 return cpool; 89} 90 91// Create a Python DescriptorPool, using the given pool as an underlay: 92// new messages will be added to a custom pool, not to the underlay. 93// 94// Ownership of the underlay is not transferred, its pointer should 95// stay alive. 96static PyDescriptorPool* PyDescriptorPool_NewWithUnderlay( 97 const DescriptorPool* underlay) { 98 PyDescriptorPool* cpool = _CreateDescriptorPool(); 99 if (cpool == NULL) { 100 return NULL; 101 } 102 cpool->pool = new DescriptorPool(underlay); 103 cpool->underlay = underlay; 104 105 if (!descriptor_pool_map.insert( 106 std::make_pair(cpool->pool, cpool)).second) { 107 // Should never happen -- would indicate an internal error / bug. 108 PyErr_SetString(PyExc_ValueError, "DescriptorPool already registered"); 109 return NULL; 110 } 111 112 return cpool; 113} 114 115static PyDescriptorPool* PyDescriptorPool_NewWithDatabase( 116 DescriptorDatabase* database) { 117 PyDescriptorPool* cpool = _CreateDescriptorPool(); 118 if (cpool == NULL) { 119 return NULL; 120 } 121 if (database != NULL) { 122 cpool->pool = new DescriptorPool(database); 123 cpool->database = database; 124 } else { 125 cpool->pool = new DescriptorPool(); 126 } 127 128 if (!descriptor_pool_map.insert(std::make_pair(cpool->pool, cpool)).second) { 129 // Should never happen -- would indicate an internal error / bug. 130 PyErr_SetString(PyExc_ValueError, "DescriptorPool already registered"); 131 return NULL; 132 } 133 134 return cpool; 135} 136 137// The public DescriptorPool constructor. 138static PyObject* New(PyTypeObject* type, 139 PyObject* args, PyObject* kwargs) { 140 static char* kwlist[] = {"descriptor_db", 0}; 141 PyObject* py_database = NULL; 142 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|O", kwlist, &py_database)) { 143 return NULL; 144 } 145 DescriptorDatabase* database = NULL; 146 if (py_database && py_database != Py_None) { 147 database = new PyDescriptorDatabase(py_database); 148 } 149 return reinterpret_cast<PyObject*>( 150 PyDescriptorPool_NewWithDatabase(database)); 151} 152 153static void Dealloc(PyDescriptorPool* self) { 154 typedef PyDescriptorPool::ClassesByMessageMap::iterator iterator; 155 descriptor_pool_map.erase(self->pool); 156 for (iterator it = self->classes_by_descriptor->begin(); 157 it != self->classes_by_descriptor->end(); ++it) { 158 Py_DECREF(it->second); 159 } 160 delete self->classes_by_descriptor; 161 for (hash_map<const void*, PyObject*>::iterator it = 162 self->descriptor_options->begin(); 163 it != self->descriptor_options->end(); ++it) { 164 Py_DECREF(it->second); 165 } 166 delete self->descriptor_options; 167 delete self->message_factory; 168 delete self->database; 169 delete self->pool; 170 Py_TYPE(self)->tp_free(reinterpret_cast<PyObject*>(self)); 171} 172 173PyObject* FindMessageByName(PyDescriptorPool* self, PyObject* arg) { 174 Py_ssize_t name_size; 175 char* name; 176 if (PyString_AsStringAndSize(arg, &name, &name_size) < 0) { 177 return NULL; 178 } 179 180 const Descriptor* message_descriptor = 181 self->pool->FindMessageTypeByName(string(name, name_size)); 182 183 if (message_descriptor == NULL) { 184 PyErr_Format(PyExc_KeyError, "Couldn't find message %.200s", name); 185 return NULL; 186 } 187 188 return PyMessageDescriptor_FromDescriptor(message_descriptor); 189} 190 191// Add a message class to our database. 192int RegisterMessageClass(PyDescriptorPool* self, 193 const Descriptor* message_descriptor, 194 CMessageClass* message_class) { 195 Py_INCREF(message_class); 196 typedef PyDescriptorPool::ClassesByMessageMap::iterator iterator; 197 std::pair<iterator, bool> ret = self->classes_by_descriptor->insert( 198 std::make_pair(message_descriptor, message_class)); 199 if (!ret.second) { 200 // Update case: DECREF the previous value. 201 Py_DECREF(ret.first->second); 202 ret.first->second = message_class; 203 } 204 return 0; 205} 206 207// Retrieve the message class added to our database. 208CMessageClass* GetMessageClass(PyDescriptorPool* self, 209 const Descriptor* message_descriptor) { 210 typedef PyDescriptorPool::ClassesByMessageMap::iterator iterator; 211 iterator ret = self->classes_by_descriptor->find(message_descriptor); 212 if (ret == self->classes_by_descriptor->end()) { 213 PyErr_Format(PyExc_TypeError, "No message class registered for '%s'", 214 message_descriptor->full_name().c_str()); 215 return NULL; 216 } else { 217 return ret->second; 218 } 219} 220 221PyObject* FindFileByName(PyDescriptorPool* self, PyObject* arg) { 222 Py_ssize_t name_size; 223 char* name; 224 if (PyString_AsStringAndSize(arg, &name, &name_size) < 0) { 225 return NULL; 226 } 227 228 const FileDescriptor* file_descriptor = 229 self->pool->FindFileByName(string(name, name_size)); 230 if (file_descriptor == NULL) { 231 PyErr_Format(PyExc_KeyError, "Couldn't find file %.200s", 232 name); 233 return NULL; 234 } 235 236 return PyFileDescriptor_FromDescriptor(file_descriptor); 237} 238 239PyObject* FindFieldByName(PyDescriptorPool* self, PyObject* arg) { 240 Py_ssize_t name_size; 241 char* name; 242 if (PyString_AsStringAndSize(arg, &name, &name_size) < 0) { 243 return NULL; 244 } 245 246 const FieldDescriptor* field_descriptor = 247 self->pool->FindFieldByName(string(name, name_size)); 248 if (field_descriptor == NULL) { 249 PyErr_Format(PyExc_KeyError, "Couldn't find field %.200s", 250 name); 251 return NULL; 252 } 253 254 return PyFieldDescriptor_FromDescriptor(field_descriptor); 255} 256 257PyObject* FindExtensionByName(PyDescriptorPool* self, PyObject* arg) { 258 Py_ssize_t name_size; 259 char* name; 260 if (PyString_AsStringAndSize(arg, &name, &name_size) < 0) { 261 return NULL; 262 } 263 264 const FieldDescriptor* field_descriptor = 265 self->pool->FindExtensionByName(string(name, name_size)); 266 if (field_descriptor == NULL) { 267 PyErr_Format(PyExc_KeyError, "Couldn't find extension field %.200s", name); 268 return NULL; 269 } 270 271 return PyFieldDescriptor_FromDescriptor(field_descriptor); 272} 273 274PyObject* FindEnumTypeByName(PyDescriptorPool* self, PyObject* arg) { 275 Py_ssize_t name_size; 276 char* name; 277 if (PyString_AsStringAndSize(arg, &name, &name_size) < 0) { 278 return NULL; 279 } 280 281 const EnumDescriptor* enum_descriptor = 282 self->pool->FindEnumTypeByName(string(name, name_size)); 283 if (enum_descriptor == NULL) { 284 PyErr_Format(PyExc_KeyError, "Couldn't find enum %.200s", name); 285 return NULL; 286 } 287 288 return PyEnumDescriptor_FromDescriptor(enum_descriptor); 289} 290 291PyObject* FindOneofByName(PyDescriptorPool* self, PyObject* arg) { 292 Py_ssize_t name_size; 293 char* name; 294 if (PyString_AsStringAndSize(arg, &name, &name_size) < 0) { 295 return NULL; 296 } 297 298 const OneofDescriptor* oneof_descriptor = 299 self->pool->FindOneofByName(string(name, name_size)); 300 if (oneof_descriptor == NULL) { 301 PyErr_Format(PyExc_KeyError, "Couldn't find oneof %.200s", name); 302 return NULL; 303 } 304 305 return PyOneofDescriptor_FromDescriptor(oneof_descriptor); 306} 307 308PyObject* FindFileContainingSymbol(PyDescriptorPool* self, PyObject* arg) { 309 Py_ssize_t name_size; 310 char* name; 311 if (PyString_AsStringAndSize(arg, &name, &name_size) < 0) { 312 return NULL; 313 } 314 315 const FileDescriptor* file_descriptor = 316 self->pool->FindFileContainingSymbol(string(name, name_size)); 317 if (file_descriptor == NULL) { 318 PyErr_Format(PyExc_KeyError, "Couldn't find symbol %.200s", name); 319 return NULL; 320 } 321 322 return PyFileDescriptor_FromDescriptor(file_descriptor); 323} 324 325// These functions should not exist -- the only valid way to create 326// descriptors is to call Add() or AddSerializedFile(). 327// But these AddDescriptor() functions were created in Python and some people 328// call them, so we support them for now for compatibility. 329// However we do check that the existing descriptor already exists in the pool, 330// which appears to always be true for existing calls -- but then why do people 331// call a function that will just be a no-op? 332// TODO(amauryfa): Need to investigate further. 333 334PyObject* AddFileDescriptor(PyDescriptorPool* self, PyObject* descriptor) { 335 const FileDescriptor* file_descriptor = 336 PyFileDescriptor_AsDescriptor(descriptor); 337 if (!file_descriptor) { 338 return NULL; 339 } 340 if (file_descriptor != 341 self->pool->FindFileByName(file_descriptor->name())) { 342 PyErr_Format(PyExc_ValueError, 343 "The file descriptor %s does not belong to this pool", 344 file_descriptor->name().c_str()); 345 return NULL; 346 } 347 Py_RETURN_NONE; 348} 349 350PyObject* AddDescriptor(PyDescriptorPool* self, PyObject* descriptor) { 351 const Descriptor* message_descriptor = 352 PyMessageDescriptor_AsDescriptor(descriptor); 353 if (!message_descriptor) { 354 return NULL; 355 } 356 if (message_descriptor != 357 self->pool->FindMessageTypeByName(message_descriptor->full_name())) { 358 PyErr_Format(PyExc_ValueError, 359 "The message descriptor %s does not belong to this pool", 360 message_descriptor->full_name().c_str()); 361 return NULL; 362 } 363 Py_RETURN_NONE; 364} 365 366PyObject* AddEnumDescriptor(PyDescriptorPool* self, PyObject* descriptor) { 367 const EnumDescriptor* enum_descriptor = 368 PyEnumDescriptor_AsDescriptor(descriptor); 369 if (!enum_descriptor) { 370 return NULL; 371 } 372 if (enum_descriptor != 373 self->pool->FindEnumTypeByName(enum_descriptor->full_name())) { 374 PyErr_Format(PyExc_ValueError, 375 "The enum descriptor %s does not belong to this pool", 376 enum_descriptor->full_name().c_str()); 377 return NULL; 378 } 379 Py_RETURN_NONE; 380} 381 382// The code below loads new Descriptors from a serialized FileDescriptorProto. 383 384 385// Collects errors that occur during proto file building to allow them to be 386// propagated in the python exception instead of only living in ERROR logs. 387class BuildFileErrorCollector : public DescriptorPool::ErrorCollector { 388 public: 389 BuildFileErrorCollector() : error_message(""), had_errors(false) {} 390 391 void AddError(const string& filename, const string& element_name, 392 const Message* descriptor, ErrorLocation location, 393 const string& message) { 394 // Replicates the logging behavior that happens in the C++ implementation 395 // when an error collector is not passed in. 396 if (!had_errors) { 397 error_message += 398 ("Invalid proto descriptor for file \"" + filename + "\":\n"); 399 had_errors = true; 400 } 401 // As this only happens on failure and will result in the program not 402 // running at all, no effort is made to optimize this string manipulation. 403 error_message += (" " + element_name + ": " + message + "\n"); 404 } 405 406 string error_message; 407 bool had_errors; 408}; 409 410PyObject* AddSerializedFile(PyDescriptorPool* self, PyObject* serialized_pb) { 411 char* message_type; 412 Py_ssize_t message_len; 413 414 if (self->database != NULL) { 415 PyErr_SetString( 416 PyExc_ValueError, 417 "Cannot call Add on a DescriptorPool that uses a DescriptorDatabase. " 418 "Add your file to the underlying database."); 419 return NULL; 420 } 421 422 if (PyBytes_AsStringAndSize(serialized_pb, &message_type, &message_len) < 0) { 423 return NULL; 424 } 425 426 FileDescriptorProto file_proto; 427 if (!file_proto.ParseFromArray(message_type, message_len)) { 428 PyErr_SetString(PyExc_TypeError, "Couldn't parse file content!"); 429 return NULL; 430 } 431 432 // If the file was already part of a C++ library, all its descriptors are in 433 // the underlying pool. No need to do anything else. 434 const FileDescriptor* generated_file = NULL; 435 if (self->underlay) { 436 generated_file = self->underlay->FindFileByName(file_proto.name()); 437 } 438 if (generated_file != NULL) { 439 return PyFileDescriptor_FromDescriptorWithSerializedPb( 440 generated_file, serialized_pb); 441 } 442 443 BuildFileErrorCollector error_collector; 444 const FileDescriptor* descriptor = 445 self->pool->BuildFileCollectingErrors(file_proto, 446 &error_collector); 447 if (descriptor == NULL) { 448 PyErr_Format(PyExc_TypeError, 449 "Couldn't build proto file into descriptor pool!\n%s", 450 error_collector.error_message.c_str()); 451 return NULL; 452 } 453 454 return PyFileDescriptor_FromDescriptorWithSerializedPb( 455 descriptor, serialized_pb); 456} 457 458PyObject* Add(PyDescriptorPool* self, PyObject* file_descriptor_proto) { 459 ScopedPyObjectPtr serialized_pb( 460 PyObject_CallMethod(file_descriptor_proto, "SerializeToString", NULL)); 461 if (serialized_pb == NULL) { 462 return NULL; 463 } 464 return AddSerializedFile(self, serialized_pb.get()); 465} 466 467static PyMethodDef Methods[] = { 468 { "Add", (PyCFunction)Add, METH_O, 469 "Adds the FileDescriptorProto and its types to this pool." }, 470 { "AddSerializedFile", (PyCFunction)AddSerializedFile, METH_O, 471 "Adds a serialized FileDescriptorProto to this pool." }, 472 473 // TODO(amauryfa): Understand why the Python implementation differs from 474 // this one, ask users to use another API and deprecate these functions. 475 { "AddFileDescriptor", (PyCFunction)AddFileDescriptor, METH_O, 476 "No-op. Add() must have been called before." }, 477 { "AddDescriptor", (PyCFunction)AddDescriptor, METH_O, 478 "No-op. Add() must have been called before." }, 479 { "AddEnumDescriptor", (PyCFunction)AddEnumDescriptor, METH_O, 480 "No-op. Add() must have been called before." }, 481 482 { "FindFileByName", (PyCFunction)FindFileByName, METH_O, 483 "Searches for a file descriptor by its .proto name." }, 484 { "FindMessageTypeByName", (PyCFunction)FindMessageByName, METH_O, 485 "Searches for a message descriptor by full name." }, 486 { "FindFieldByName", (PyCFunction)FindFieldByName, METH_O, 487 "Searches for a field descriptor by full name." }, 488 { "FindExtensionByName", (PyCFunction)FindExtensionByName, METH_O, 489 "Searches for extension descriptor by full name." }, 490 { "FindEnumTypeByName", (PyCFunction)FindEnumTypeByName, METH_O, 491 "Searches for enum type descriptor by full name." }, 492 { "FindOneofByName", (PyCFunction)FindOneofByName, METH_O, 493 "Searches for oneof descriptor by full name." }, 494 495 { "FindFileContainingSymbol", (PyCFunction)FindFileContainingSymbol, METH_O, 496 "Gets the FileDescriptor containing the specified symbol." }, 497 {NULL} 498}; 499 500} // namespace cdescriptor_pool 501 502PyTypeObject PyDescriptorPool_Type = { 503 PyVarObject_HEAD_INIT(&PyType_Type, 0) 504 FULL_MODULE_NAME ".DescriptorPool", // tp_name 505 sizeof(PyDescriptorPool), // tp_basicsize 506 0, // tp_itemsize 507 (destructor)cdescriptor_pool::Dealloc, // tp_dealloc 508 0, // tp_print 509 0, // tp_getattr 510 0, // tp_setattr 511 0, // tp_compare 512 0, // tp_repr 513 0, // tp_as_number 514 0, // tp_as_sequence 515 0, // tp_as_mapping 516 0, // tp_hash 517 0, // tp_call 518 0, // tp_str 519 0, // tp_getattro 520 0, // tp_setattro 521 0, // tp_as_buffer 522 Py_TPFLAGS_DEFAULT, // tp_flags 523 "A Descriptor Pool", // tp_doc 524 0, // tp_traverse 525 0, // tp_clear 526 0, // tp_richcompare 527 0, // tp_weaklistoffset 528 0, // tp_iter 529 0, // tp_iternext 530 cdescriptor_pool::Methods, // tp_methods 531 0, // tp_members 532 0, // tp_getset 533 0, // tp_base 534 0, // tp_dict 535 0, // tp_descr_get 536 0, // tp_descr_set 537 0, // tp_dictoffset 538 0, // tp_init 539 0, // tp_alloc 540 cdescriptor_pool::New, // tp_new 541 PyObject_Del, // tp_free 542}; 543 544// This is the DescriptorPool which contains all the definitions from the 545// generated _pb2.py modules. 546static PyDescriptorPool* python_generated_pool = NULL; 547 548bool InitDescriptorPool() { 549 if (PyType_Ready(&PyDescriptorPool_Type) < 0) 550 return false; 551 552 // The Pool of messages declared in Python libraries. 553 // generated_pool() contains all messages already linked in C++ libraries, and 554 // is used as underlay. 555 python_generated_pool = cdescriptor_pool::PyDescriptorPool_NewWithUnderlay( 556 DescriptorPool::generated_pool()); 557 if (python_generated_pool == NULL) { 558 return false; 559 } 560 // Register this pool to be found for C++-generated descriptors. 561 descriptor_pool_map.insert( 562 std::make_pair(DescriptorPool::generated_pool(), 563 python_generated_pool)); 564 565 return true; 566} 567 568// The default DescriptorPool used everywhere in this module. 569// Today it's the python_generated_pool. 570// TODO(amauryfa): Remove all usages of this function: the pool should be 571// derived from the context. 572PyDescriptorPool* GetDefaultDescriptorPool() { 573 return python_generated_pool; 574} 575 576PyDescriptorPool* GetDescriptorPool_FromPool(const DescriptorPool* pool) { 577 // Fast path for standard descriptors. 578 if (pool == python_generated_pool->pool || 579 pool == DescriptorPool::generated_pool()) { 580 return python_generated_pool; 581 } 582 hash_map<const DescriptorPool*, PyDescriptorPool*>::iterator it = 583 descriptor_pool_map.find(pool); 584 if (it == descriptor_pool_map.end()) { 585 PyErr_SetString(PyExc_KeyError, "Unknown descriptor pool"); 586 return NULL; 587 } 588 return it->second; 589} 590 591} // namespace python 592} // namespace protobuf 593} // namespace google 594