python_generator.cc revision fbaaef999ba563838ebd00874ed8a1c01fbf286d
1// Protocol Buffers - Google's data interchange format
2// Copyright 2008 Google Inc.  All rights reserved.
3// http://code.google.com/p/protobuf/
4//
5// Redistribution and use in source and binary forms, with or without
6// modification, are permitted provided that the following conditions are
7// met:
8//
9//     * Redistributions of source code must retain the above copyright
10// notice, this list of conditions and the following disclaimer.
11//     * Redistributions in binary form must reproduce the above
12// copyright notice, this list of conditions and the following disclaimer
13// in the documentation and/or other materials provided with the
14// distribution.
15//     * Neither the name of Google Inc. nor the names of its
16// contributors may be used to endorse or promote products derived from
17// this software without specific prior written permission.
18//
19// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30
31// Author: robinson@google.com (Will Robinson)
32//
33// This module outputs pure-Python protocol message classes that will
34// largely be constructed at runtime via the metaclass in reflection.py.
35// In other words, our job is basically to output a Python equivalent
36// of the C++ *Descriptor objects, and fix up all circular references
37// within these objects.
38//
39// Note that the runtime performance of protocol message classes created in
40// this way is expected to be lousy.  The plan is to create an alternate
41// generator that outputs a Python/C extension module that lets
42// performance-minded Python code leverage the fast C++ implementation
43// directly.
44
45#include <utility>
46#include <map>
47#include <string>
48#include <vector>
49
50#include <google/protobuf/compiler/python/python_generator.h>
51#include <google/protobuf/descriptor.pb.h>
52
53#include <google/protobuf/stubs/common.h>
54#include <google/protobuf/io/printer.h>
55#include <google/protobuf/descriptor.h>
56#include <google/protobuf/io/zero_copy_stream.h>
57#include <google/protobuf/stubs/strutil.h>
58#include <google/protobuf/stubs/substitute.h>
59
60namespace google {
61namespace protobuf {
62namespace compiler {
63namespace python {
64
65namespace {
66
67// Returns a copy of |filename| with any trailing ".protodevel" or ".proto
68// suffix stripped.
69// TODO(robinson): Unify with copy in compiler/cpp/internal/helpers.cc.
70string StripProto(const string& filename) {
71  const char* suffix = HasSuffixString(filename, ".protodevel")
72      ? ".protodevel" : ".proto";
73  return StripSuffixString(filename, suffix);
74}
75
76
77// Returns the Python module name expected for a given .proto filename.
78string ModuleName(const string& filename) {
79  string basename = StripProto(filename);
80  StripString(&basename, "-", '_');
81  StripString(&basename, "/", '.');
82  return basename + "_pb2";
83}
84
85
86// Returns the name of all containing types for descriptor,
87// in order from outermost to innermost, followed by descriptor's
88// own name.  Each name is separated by |separator|.
89template <typename DescriptorT>
90string NamePrefixedWithNestedTypes(const DescriptorT& descriptor,
91                                   const string& separator) {
92  string name = descriptor.name();
93  for (const Descriptor* current = descriptor.containing_type();
94       current != NULL; current = current->containing_type()) {
95    name = current->name() + separator + name;
96  }
97  return name;
98}
99
100
101// Name of the class attribute where we store the Python
102// descriptor.Descriptor instance for the generated class.
103// Must stay consistent with the _DESCRIPTOR_KEY constant
104// in proto2/public/reflection.py.
105const char kDescriptorKey[] = "DESCRIPTOR";
106
107
108// Prints the common boilerplate needed at the top of every .py
109// file output by this generator.
110void PrintTopBoilerplate(
111    io::Printer* printer, const FileDescriptor* file, bool descriptor_proto) {
112  // TODO(robinson): Allow parameterization of Python version?
113  printer->Print(
114      "# Generated by the protocol buffer compiler.  DO NOT EDIT!\n"
115      "\n"
116      "from google.protobuf import descriptor\n"
117      "from google.protobuf import message\n"
118      "from google.protobuf import reflection\n"
119      "from google.protobuf import service\n"
120      "from google.protobuf import service_reflection\n");
121  // Avoid circular imports if this module is descriptor_pb2.
122  if (!descriptor_proto) {
123    printer->Print(
124        "from google.protobuf import descriptor_pb2\n");
125  }
126}
127
128
129// Returns a Python literal giving the default value for a field.
130// If the field specifies no explicit default value, we'll return
131// the default default value for the field type (zero for numbers,
132// empty string for strings, empty list for repeated fields, and
133// None for non-repeated, composite fields).
134//
135// TODO(robinson): Unify with code from
136// //compiler/cpp/internal/primitive_field.cc
137// //compiler/cpp/internal/enum_field.cc
138// //compiler/cpp/internal/string_field.cc
139string StringifyDefaultValue(const FieldDescriptor& field) {
140  if (field.is_repeated()) {
141    return "[]";
142  }
143
144  switch (field.cpp_type()) {
145    case FieldDescriptor::CPPTYPE_INT32:
146      return SimpleItoa(field.default_value_int32());
147    case FieldDescriptor::CPPTYPE_UINT32:
148      return SimpleItoa(field.default_value_uint32());
149    case FieldDescriptor::CPPTYPE_INT64:
150      return SimpleItoa(field.default_value_int64());
151    case FieldDescriptor::CPPTYPE_UINT64:
152      return SimpleItoa(field.default_value_uint64());
153    case FieldDescriptor::CPPTYPE_DOUBLE:
154      return SimpleDtoa(field.default_value_double());
155    case FieldDescriptor::CPPTYPE_FLOAT:
156      return SimpleFtoa(field.default_value_float());
157    case FieldDescriptor::CPPTYPE_BOOL:
158      return field.default_value_bool() ? "True" : "False";
159    case FieldDescriptor::CPPTYPE_ENUM:
160      return SimpleItoa(field.default_value_enum()->number());
161    case FieldDescriptor::CPPTYPE_STRING:
162      if (field.type() == FieldDescriptor::TYPE_STRING) {
163        return "unicode(\"" + CEscape(field.default_value_string()) +
164            "\", \"utf-8\")";
165      } else {
166        return "\"" + CEscape(field.default_value_string()) + "\"";
167      }
168    case FieldDescriptor::CPPTYPE_MESSAGE:
169      return "None";
170  }
171  // (We could add a default case above but then we wouldn't get the nice
172  // compiler warning when a new type is added.)
173  GOOGLE_LOG(FATAL) << "Not reached.";
174  return "";
175}
176
177
178
179}  // namespace
180
181
182Generator::Generator() : file_(NULL) {
183}
184
185Generator::~Generator() {
186}
187
188bool Generator::Generate(const FileDescriptor* file,
189                         const string& parameter,
190                         OutputDirectory* output_directory,
191                         string* error) const {
192
193  // Completely serialize all Generate() calls on this instance.  The
194  // thread-safety constraints of the CodeGenerator interface aren't clear so
195  // just be as conservative as possible.  It's easier to relax this later if
196  // we need to, but I doubt it will be an issue.
197  // TODO(kenton):  The proper thing to do would be to allocate any state on
198  //   the stack and use that, so that the Generator class itself does not need
199  //   to have any mutable members.  Then it is implicitly thread-safe.
200  MutexLock lock(&mutex_);
201  file_ = file;
202  string module_name = ModuleName(file->name());
203  string filename = module_name;
204  StripString(&filename, ".", '/');
205  filename += ".py";
206
207
208  scoped_ptr<io::ZeroCopyOutputStream> output(output_directory->Open(filename));
209  GOOGLE_CHECK(output.get());
210  io::Printer printer(output.get(), '$');
211  printer_ = &printer;
212
213  PrintTopBoilerplate(printer_, file_, GeneratingDescriptorProto());
214  PrintTopLevelEnums();
215  PrintTopLevelExtensions();
216  PrintAllNestedEnumsInFile();
217  PrintMessageDescriptors();
218  // We have to print the imports after the descriptors, so that mutually
219  // recursive protos in separate files can successfully reference each other.
220  PrintImports();
221  FixForeignFieldsInDescriptors();
222  PrintMessages();
223  // We have to fix up the extensions after the message classes themselves,
224  // since they need to call static RegisterExtension() methods on these
225  // classes.
226  FixForeignFieldsInExtensions();
227  PrintServices();
228  return !printer.failed();
229}
230
231// Prints Python imports for all modules imported by |file|.
232void Generator::PrintImports() const {
233  for (int i = 0; i < file_->dependency_count(); ++i) {
234    string module_name = ModuleName(file_->dependency(i)->name());
235    printer_->Print("import $module$\n", "module",
236                    module_name);
237  }
238  printer_->Print("\n");
239}
240
241// Prints descriptors and module-level constants for all top-level
242// enums defined in |file|.
243void Generator::PrintTopLevelEnums() const {
244  vector<pair<string, int> > top_level_enum_values;
245  for (int i = 0; i < file_->enum_type_count(); ++i) {
246    const EnumDescriptor& enum_descriptor = *file_->enum_type(i);
247    PrintEnum(enum_descriptor);
248    printer_->Print("\n");
249
250    for (int j = 0; j < enum_descriptor.value_count(); ++j) {
251      const EnumValueDescriptor& value_descriptor = *enum_descriptor.value(j);
252      top_level_enum_values.push_back(
253          make_pair(value_descriptor.name(), value_descriptor.number()));
254    }
255  }
256
257  for (int i = 0; i < top_level_enum_values.size(); ++i) {
258    printer_->Print("$name$ = $value$\n",
259                    "name", top_level_enum_values[i].first,
260                    "value", SimpleItoa(top_level_enum_values[i].second));
261  }
262  printer_->Print("\n");
263}
264
265// Prints all enums contained in all message types in |file|.
266void Generator::PrintAllNestedEnumsInFile() const {
267  for (int i = 0; i < file_->message_type_count(); ++i) {
268    PrintNestedEnums(*file_->message_type(i));
269  }
270}
271
272// Prints a Python statement assigning the appropriate module-level
273// enum name to a Python EnumDescriptor object equivalent to
274// enum_descriptor.
275void Generator::PrintEnum(const EnumDescriptor& enum_descriptor) const {
276  map<string, string> m;
277  m["descriptor_name"] = ModuleLevelDescriptorName(enum_descriptor);
278  m["name"] = enum_descriptor.name();
279  m["full_name"] = enum_descriptor.full_name();
280  m["filename"] = enum_descriptor.name();
281  const char enum_descriptor_template[] =
282      "$descriptor_name$ = descriptor.EnumDescriptor(\n"
283      "  name='$name$',\n"
284      "  full_name='$full_name$',\n"
285      "  filename='$filename$',\n"
286      "  values=[\n";
287  string options_string;
288  enum_descriptor.options().SerializeToString(&options_string);
289  printer_->Print(m, enum_descriptor_template);
290  printer_->Indent();
291  printer_->Indent();
292  for (int i = 0; i < enum_descriptor.value_count(); ++i) {
293    PrintEnumValueDescriptor(*enum_descriptor.value(i));
294    printer_->Print(",\n");
295  }
296  printer_->Outdent();
297  printer_->Print("],\n");
298  printer_->Print("options=$options_value$,\n",
299                  "options_value",
300                  OptionsValue("EnumOptions", CEscape(options_string)));
301  printer_->Outdent();
302  printer_->Print(")\n");
303  printer_->Print("\n");
304}
305
306// Recursively prints enums in nested types within descriptor, then
307// prints enums contained at the top level in descriptor.
308void Generator::PrintNestedEnums(const Descriptor& descriptor) const {
309  for (int i = 0; i < descriptor.nested_type_count(); ++i) {
310    PrintNestedEnums(*descriptor.nested_type(i));
311  }
312
313  for (int i = 0; i < descriptor.enum_type_count(); ++i) {
314    PrintEnum(*descriptor.enum_type(i));
315  }
316}
317
318void Generator::PrintTopLevelExtensions() const {
319  const bool is_extension = true;
320  for (int i = 0; i < file_->extension_count(); ++i) {
321    const FieldDescriptor& extension_field = *file_->extension(i);
322    string constant_name = extension_field.name() + "_FIELD_NUMBER";
323    UpperString(&constant_name);
324    printer_->Print("$constant_name$ = $number$\n",
325      "constant_name", constant_name,
326      "number", SimpleItoa(extension_field.number()));
327    printer_->Print("$name$ = ", "name", extension_field.name());
328    PrintFieldDescriptor(extension_field, is_extension);
329    printer_->Print("\n");
330  }
331  printer_->Print("\n");
332}
333
334// Prints Python equivalents of all Descriptors in |file|.
335void Generator::PrintMessageDescriptors() const {
336  for (int i = 0; i < file_->message_type_count(); ++i) {
337    PrintDescriptor(*file_->message_type(i));
338    printer_->Print("\n");
339  }
340}
341
342void Generator::PrintServices() const {
343  for (int i = 0; i < file_->service_count(); ++i) {
344    PrintServiceDescriptor(*file_->service(i));
345    PrintServiceClass(*file_->service(i));
346    PrintServiceStub(*file_->service(i));
347    printer_->Print("\n");
348  }
349}
350
351void Generator::PrintServiceDescriptor(
352    const ServiceDescriptor& descriptor) const {
353  printer_->Print("\n");
354  string service_name = ModuleLevelServiceDescriptorName(descriptor);
355  string options_string;
356  descriptor.options().SerializeToString(&options_string);
357
358  printer_->Print(
359      "$service_name$ = descriptor.ServiceDescriptor(\n",
360      "service_name", service_name);
361  printer_->Indent();
362  map<string, string> m;
363  m["name"] = descriptor.name();
364  m["full_name"] = descriptor.full_name();
365  m["index"] = SimpleItoa(descriptor.index());
366  m["options_value"] = OptionsValue("ServiceOptions", options_string);
367  const char required_function_arguments[] =
368      "name='$name$',\n"
369      "full_name='$full_name$',\n"
370      "index=$index$,\n"
371      "options=$options_value$,\n"
372      "methods=[\n";
373  printer_->Print(m, required_function_arguments);
374  for (int i = 0; i < descriptor.method_count(); ++i) {
375    const MethodDescriptor* method = descriptor.method(i);
376    string options_string;
377    method->options().SerializeToString(&options_string);
378
379    m.clear();
380    m["name"] = method->name();
381    m["full_name"] = method->full_name();
382    m["index"] = SimpleItoa(method->index());
383    m["serialized_options"] = CEscape(options_string);
384    m["input_type"] = ModuleLevelDescriptorName(*(method->input_type()));
385    m["output_type"] = ModuleLevelDescriptorName(*(method->output_type()));
386    m["options_value"] = OptionsValue("MethodOptions", options_string);
387    printer_->Print("descriptor.MethodDescriptor(\n");
388    printer_->Indent();
389    printer_->Print(
390        m,
391        "name='$name$',\n"
392        "full_name='$full_name$',\n"
393        "index=$index$,\n"
394        "containing_service=None,\n"
395        "input_type=$input_type$,\n"
396        "output_type=$output_type$,\n"
397        "options=$options_value$,\n");
398    printer_->Outdent();
399    printer_->Print("),\n");
400  }
401
402  printer_->Outdent();
403  printer_->Print("])\n\n");
404}
405
406void Generator::PrintServiceClass(const ServiceDescriptor& descriptor) const {
407  // Print the service.
408  printer_->Print("class $class_name$(service.Service):\n",
409                  "class_name", descriptor.name());
410  printer_->Indent();
411  printer_->Print(
412      "__metaclass__ = service_reflection.GeneratedServiceType\n"
413      "$descriptor_key$ = $descriptor_name$\n",
414      "descriptor_key", kDescriptorKey,
415      "descriptor_name", ModuleLevelServiceDescriptorName(descriptor));
416  printer_->Outdent();
417}
418
419void Generator::PrintServiceStub(const ServiceDescriptor& descriptor) const {
420  // Print the service stub.
421  printer_->Print("class $class_name$_Stub($class_name$):\n",
422                  "class_name", descriptor.name());
423  printer_->Indent();
424  printer_->Print(
425      "__metaclass__ = service_reflection.GeneratedServiceStubType\n"
426      "$descriptor_key$ = $descriptor_name$\n",
427      "descriptor_key", kDescriptorKey,
428      "descriptor_name", ModuleLevelServiceDescriptorName(descriptor));
429  printer_->Outdent();
430}
431
432// Prints statement assigning ModuleLevelDescriptorName(message_descriptor)
433// to a Python Descriptor object for message_descriptor.
434//
435// Mutually recursive with PrintNestedDescriptors().
436void Generator::PrintDescriptor(const Descriptor& message_descriptor) const {
437  PrintNestedDescriptors(message_descriptor);
438
439  printer_->Print("\n");
440  printer_->Print("$descriptor_name$ = descriptor.Descriptor(\n",
441                  "descriptor_name",
442                  ModuleLevelDescriptorName(message_descriptor));
443  printer_->Indent();
444  map<string, string> m;
445  m["name"] = message_descriptor.name();
446  m["full_name"] = message_descriptor.full_name();
447  m["filename"] = message_descriptor.file()->name();
448  const char required_function_arguments[] =
449      "name='$name$',\n"
450      "full_name='$full_name$',\n"
451      "filename='$filename$',\n"
452      "containing_type=None,\n";  // TODO(robinson): Implement containing_type.
453  printer_->Print(m, required_function_arguments);
454  PrintFieldsInDescriptor(message_descriptor);
455  PrintExtensionsInDescriptor(message_descriptor);
456  // TODO(robinson): implement printing of nested_types.
457  printer_->Print("nested_types=[],  # TODO(robinson): Implement.\n");
458  printer_->Print("enum_types=[\n");
459  printer_->Indent();
460  for (int i = 0; i < message_descriptor.enum_type_count(); ++i) {
461    const string descriptor_name = ModuleLevelDescriptorName(
462        *message_descriptor.enum_type(i));
463    printer_->Print(descriptor_name.c_str());
464    printer_->Print(",\n");
465  }
466  printer_->Outdent();
467  printer_->Print("],\n");
468  string options_string;
469  message_descriptor.options().SerializeToString(&options_string);
470  printer_->Print(
471      "options=$options_value$",
472      "options_value", OptionsValue("MessageOptions", options_string));
473  printer_->Outdent();
474  printer_->Print(")\n");
475}
476
477// Prints Python Descriptor objects for all nested types contained in
478// message_descriptor.
479//
480// Mutually recursive with PrintDescriptor().
481void Generator::PrintNestedDescriptors(
482    const Descriptor& containing_descriptor) const {
483  for (int i = 0; i < containing_descriptor.nested_type_count(); ++i) {
484    PrintDescriptor(*containing_descriptor.nested_type(i));
485  }
486}
487
488// Prints all messages in |file|.
489void Generator::PrintMessages() const {
490  for (int i = 0; i < file_->message_type_count(); ++i) {
491    PrintMessage(*file_->message_type(i));
492    printer_->Print("\n");
493  }
494}
495
496// Prints a Python class for the given message descriptor.  We defer to the
497// metaclass to do almost all of the work of actually creating a useful class.
498// The purpose of this function and its many helper functions above is merely
499// to output a Python version of the descriptors, which the metaclass in
500// reflection.py will use to construct the meat of the class itself.
501//
502// Mutually recursive with PrintNestedMessages().
503void Generator::PrintMessage(
504    const Descriptor& message_descriptor) const {
505  printer_->Print("class $name$(message.Message):\n", "name",
506                  message_descriptor.name());
507  printer_->Indent();
508  printer_->Print("__metaclass__ = reflection.GeneratedProtocolMessageType\n");
509  PrintNestedMessages(message_descriptor);
510  map<string, string> m;
511  m["descriptor_key"] = kDescriptorKey;
512  m["descriptor_name"] = ModuleLevelDescriptorName(message_descriptor);
513  printer_->Print(m, "$descriptor_key$ = $descriptor_name$\n");
514  printer_->Outdent();
515}
516
517// Prints all nested messages within |containing_descriptor|.
518// Mutually recursive with PrintMessage().
519void Generator::PrintNestedMessages(
520    const Descriptor& containing_descriptor) const {
521  for (int i = 0; i < containing_descriptor.nested_type_count(); ++i) {
522    printer_->Print("\n");
523    PrintMessage(*containing_descriptor.nested_type(i));
524  }
525}
526
527// Recursively fixes foreign fields in all nested types in |descriptor|, then
528// sets the message_type and enum_type of all message and enum fields to point
529// to their respective descriptors.
530void Generator::FixForeignFieldsInDescriptor(
531    const Descriptor& descriptor) const {
532  for (int i = 0; i < descriptor.nested_type_count(); ++i) {
533    FixForeignFieldsInDescriptor(*descriptor.nested_type(i));
534  }
535
536  for (int i = 0; i < descriptor.field_count(); ++i) {
537    const FieldDescriptor& field_descriptor = *descriptor.field(i);
538    FixForeignFieldsInField(&descriptor, field_descriptor, "fields_by_name");
539  }
540}
541
542// Sets any necessary message_type and enum_type attributes
543// for the Python version of |field|.
544//
545// containing_type may be NULL, in which case this is a module-level field.
546//
547// python_dict_name is the name of the Python dict where we should
548// look the field up in the containing type.  (e.g., fields_by_name
549// or extensions_by_name).  We ignore python_dict_name if containing_type
550// is NULL.
551void Generator::FixForeignFieldsInField(const Descriptor* containing_type,
552                                        const FieldDescriptor& field,
553                                        const string& python_dict_name) const {
554  const string field_referencing_expression = FieldReferencingExpression(
555      containing_type, field, python_dict_name);
556  map<string, string> m;
557  m["field_ref"] = field_referencing_expression;
558  const Descriptor* foreign_message_type = field.message_type();
559  if (foreign_message_type) {
560    m["foreign_type"] = ModuleLevelDescriptorName(*foreign_message_type);
561    printer_->Print(m, "$field_ref$.message_type = $foreign_type$\n");
562  }
563  const EnumDescriptor* enum_type = field.enum_type();
564  if (enum_type) {
565    m["enum_type"] = ModuleLevelDescriptorName(*enum_type);
566    printer_->Print(m, "$field_ref$.enum_type = $enum_type$\n");
567  }
568}
569
570// Returns the module-level expression for the given FieldDescriptor.
571// Only works for fields in the .proto file this Generator is generating for.
572//
573// containing_type may be NULL, in which case this is a module-level field.
574//
575// python_dict_name is the name of the Python dict where we should
576// look the field up in the containing type.  (e.g., fields_by_name
577// or extensions_by_name).  We ignore python_dict_name if containing_type
578// is NULL.
579string Generator::FieldReferencingExpression(
580    const Descriptor* containing_type,
581    const FieldDescriptor& field,
582    const string& python_dict_name) const {
583  // We should only ever be looking up fields in the current file.
584  // The only things we refer to from other files are message descriptors.
585  GOOGLE_CHECK_EQ(field.file(), file_) << field.file()->name() << " vs. "
586                                << file_->name();
587  if (!containing_type) {
588    return field.name();
589  }
590  return strings::Substitute(
591      "$0.$1['$2']",
592      ModuleLevelDescriptorName(*containing_type),
593      python_dict_name, field.name());
594}
595
596// Prints statements setting the message_type and enum_type fields in the
597// Python descriptor objects we've already output in ths file.  We must
598// do this in a separate step due to circular references (otherwise, we'd
599// just set everything in the initial assignment statements).
600void Generator::FixForeignFieldsInDescriptors() const {
601  for (int i = 0; i < file_->message_type_count(); ++i) {
602    FixForeignFieldsInDescriptor(*file_->message_type(i));
603  }
604  printer_->Print("\n");
605}
606
607// We need to not only set any necessary message_type fields, but
608// also need to call RegisterExtension() on each message we're
609// extending.
610void Generator::FixForeignFieldsInExtensions() const {
611  // Top-level extensions.
612  for (int i = 0; i < file_->extension_count(); ++i) {
613    FixForeignFieldsInExtension(*file_->extension(i));
614  }
615  // Nested extensions.
616  for (int i = 0; i < file_->message_type_count(); ++i) {
617    FixForeignFieldsInNestedExtensions(*file_->message_type(i));
618  }
619}
620
621void Generator::FixForeignFieldsInExtension(
622    const FieldDescriptor& extension_field) const {
623  GOOGLE_CHECK(extension_field.is_extension());
624  // extension_scope() will be NULL for top-level extensions, which is
625  // exactly what FixForeignFieldsInField() wants.
626  FixForeignFieldsInField(extension_field.extension_scope(), extension_field,
627                          "extensions_by_name");
628
629  map<string, string> m;
630  // Confusingly, for FieldDescriptors that happen to be extensions,
631  // containing_type() means "extended type."
632  // On the other hand, extension_scope() will give us what we normally
633  // mean by containing_type().
634  m["extended_message_class"] = ModuleLevelMessageName(
635      *extension_field.containing_type());
636  m["field"] = FieldReferencingExpression(extension_field.extension_scope(),
637                                          extension_field,
638                                          "extensions_by_name");
639  printer_->Print(m, "$extended_message_class$.RegisterExtension($field$)\n");
640}
641
642void Generator::FixForeignFieldsInNestedExtensions(
643    const Descriptor& descriptor) const {
644  // Recursively fix up extensions in all nested types.
645  for (int i = 0; i < descriptor.nested_type_count(); ++i) {
646    FixForeignFieldsInNestedExtensions(*descriptor.nested_type(i));
647  }
648  // Fix up extensions directly contained within this type.
649  for (int i = 0; i < descriptor.extension_count(); ++i) {
650    FixForeignFieldsInExtension(*descriptor.extension(i));
651  }
652}
653
654// Returns a Python expression that instantiates a Python EnumValueDescriptor
655// object for the given C++ descriptor.
656void Generator::PrintEnumValueDescriptor(
657    const EnumValueDescriptor& descriptor) const {
658  // TODO(robinson): Fix up EnumValueDescriptor "type" fields.
659  // More circular references.  ::sigh::
660  string options_string;
661  descriptor.options().SerializeToString(&options_string);
662  map<string, string> m;
663  m["name"] = descriptor.name();
664  m["index"] = SimpleItoa(descriptor.index());
665  m["number"] = SimpleItoa(descriptor.number());
666  m["options"] = OptionsValue("EnumValueOptions", options_string);
667  printer_->Print(
668      m,
669      "descriptor.EnumValueDescriptor(\n"
670      "  name='$name$', index=$index$, number=$number$,\n"
671      "  options=$options$,\n"
672      "  type=None)");
673}
674
675string Generator::OptionsValue(
676    const string& class_name, const string& serialized_options) const {
677  if (serialized_options.length() == 0 || GeneratingDescriptorProto()) {
678    return "None";
679  } else {
680    string full_class_name = "descriptor_pb2." + class_name;
681    return "descriptor._ParseOptions(" + full_class_name + "(), '"
682        + CEscape(serialized_options)+ "')";
683  }
684}
685
686// Prints an expression for a Python FieldDescriptor for |field|.
687void Generator::PrintFieldDescriptor(
688    const FieldDescriptor& field, bool is_extension) const {
689  string options_string;
690  field.options().SerializeToString(&options_string);
691  map<string, string> m;
692  m["name"] = field.name();
693  m["full_name"] = field.full_name();
694  m["index"] = SimpleItoa(field.index());
695  m["number"] = SimpleItoa(field.number());
696  m["type"] = SimpleItoa(field.type());
697  m["cpp_type"] = SimpleItoa(field.cpp_type());
698  m["label"] = SimpleItoa(field.label());
699  m["default_value"] = StringifyDefaultValue(field);
700  m["is_extension"] = is_extension ? "True" : "False";
701  m["options"] = OptionsValue("FieldOptions", options_string);
702  // We always set message_type and enum_type to None at this point, and then
703  // these fields in correctly after all referenced descriptors have been
704  // defined and/or imported (see FixForeignFieldsInDescriptors()).
705  const char field_descriptor_decl[] =
706      "descriptor.FieldDescriptor(\n"
707      "  name='$name$', full_name='$full_name$', index=$index$,\n"
708      "  number=$number$, type=$type$, cpp_type=$cpp_type$, label=$label$,\n"
709      "  default_value=$default_value$,\n"
710      "  message_type=None, enum_type=None, containing_type=None,\n"
711      "  is_extension=$is_extension$, extension_scope=None,\n"
712      "  options=$options$)";
713  printer_->Print(m, field_descriptor_decl);
714}
715
716// Helper for Print{Fields,Extensions}InDescriptor().
717void Generator::PrintFieldDescriptorsInDescriptor(
718    const Descriptor& message_descriptor,
719    bool is_extension,
720    const string& list_variable_name,
721    int (Descriptor::*CountFn)() const,
722    const FieldDescriptor* (Descriptor::*GetterFn)(int) const) const {
723  printer_->Print("$list$=[\n", "list", list_variable_name);
724  printer_->Indent();
725  for (int i = 0; i < (message_descriptor.*CountFn)(); ++i) {
726    PrintFieldDescriptor(*(message_descriptor.*GetterFn)(i),
727                         is_extension);
728    printer_->Print(",\n");
729  }
730  printer_->Outdent();
731  printer_->Print("],\n");
732}
733
734// Prints a statement assigning "fields" to a list of Python FieldDescriptors,
735// one for each field present in message_descriptor.
736void Generator::PrintFieldsInDescriptor(
737    const Descriptor& message_descriptor) const {
738  const bool is_extension = false;
739  PrintFieldDescriptorsInDescriptor(
740      message_descriptor, is_extension, "fields",
741      &Descriptor::field_count, &Descriptor::field);
742}
743
744// Prints a statement assigning "extensions" to a list of Python
745// FieldDescriptors, one for each extension present in message_descriptor.
746void Generator::PrintExtensionsInDescriptor(
747    const Descriptor& message_descriptor) const {
748  const bool is_extension = true;
749  PrintFieldDescriptorsInDescriptor(
750      message_descriptor, is_extension, "extensions",
751      &Descriptor::extension_count, &Descriptor::extension);
752}
753
754bool Generator::GeneratingDescriptorProto() const {
755  return file_->name() == "google/protobuf/descriptor.proto";
756}
757
758// Returns the unique Python module-level identifier given to a descriptor.
759// This name is module-qualified iff the given descriptor describes an
760// entity that doesn't come from the current file.
761template <typename DescriptorT>
762string Generator::ModuleLevelDescriptorName(
763    const DescriptorT& descriptor) const {
764  // FIXME(robinson):
765  // We currently don't worry about collisions with underscores in the type
766  // names, so these would collide in nasty ways if found in the same file:
767  //   OuterProto.ProtoA.ProtoB
768  //   OuterProto_ProtoA.ProtoB  # Underscore instead of period.
769  // As would these:
770  //   OuterProto.ProtoA_.ProtoB
771  //   OuterProto.ProtoA._ProtoB  # Leading vs. trailing underscore.
772  // (Contrived, but certainly possible).
773  //
774  // The C++ implementation doesn't guard against this either.  Leaving
775  // it for now...
776  string name = NamePrefixedWithNestedTypes(descriptor, "_");
777  UpperString(&name);
778  // Module-private for now.  Easy to make public later; almost impossible
779  // to make private later.
780  name = "_" + name;
781  // We now have the name relative to its own module.  Also qualify with
782  // the module name iff this descriptor is from a different .proto file.
783  if (descriptor.file() != file_) {
784    name = ModuleName(descriptor.file()->name()) + "." + name;
785  }
786  return name;
787}
788
789// Returns the name of the message class itself, not the descriptor.
790// Like ModuleLevelDescriptorName(), module-qualifies the name iff
791// the given descriptor describes an entity that doesn't come from
792// the current file.
793string Generator::ModuleLevelMessageName(const Descriptor& descriptor) const {
794  string name = NamePrefixedWithNestedTypes(descriptor, ".");
795  if (descriptor.file() != file_) {
796    name = ModuleName(descriptor.file()->name()) + "." + name;
797  }
798  return name;
799}
800
801// Returns the unique Python module-level identifier given to a service
802// descriptor.
803string Generator::ModuleLevelServiceDescriptorName(
804    const ServiceDescriptor& descriptor) const {
805  string name = descriptor.name();
806  UpperString(&name);
807  name = "_" + name;
808  if (descriptor.file() != file_) {
809    name = ModuleName(descriptor.file()->name()) + "." + name;
810  }
811  return name;
812}
813
814}  // namespace python
815}  // namespace compiler
816}  // namespace protobuf
817}  // namespace google
818