1// Protocol Buffers - Google's data interchange format
2// Copyright 2008 Google Inc.  All rights reserved.
3// http://code.google.com/p/protobuf/
4//
5// Redistribution and use in source and binary forms, with or without
6// modification, are permitted provided that the following conditions are
7// met:
8//
9//     * Redistributions of source code must retain the above copyright
10// notice, this list of conditions and the following disclaimer.
11//     * Redistributions in binary form must reproduce the above
12// copyright notice, this list of conditions and the following disclaimer
13// in the documentation and/or other materials provided with the
14// distribution.
15//     * Neither the name of Google Inc. nor the names of its
16// contributors may be used to endorse or promote products derived from
17// this software without specific prior written permission.
18//
19// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30
31// Author: robinson@google.com (Will Robinson)
32//
33// This module outputs pure-Python protocol message classes that will
34// largely be constructed at runtime via the metaclass in reflection.py.
35// In other words, our job is basically to output a Python equivalent
36// of the C++ *Descriptor objects, and fix up all circular references
37// within these objects.
38//
39// Note that the runtime performance of protocol message classes created in
40// this way is expected to be lousy.  The plan is to create an alternate
41// generator that outputs a Python/C extension module that lets
42// performance-minded Python code leverage the fast C++ implementation
43// directly.
44
45#include <limits>
46#include <map>
47#include <utility>
48#include <string>
49#include <vector>
50
51#include <google/protobuf/compiler/python/python_generator.h>
52#include <google/protobuf/descriptor.pb.h>
53
54#include <google/protobuf/stubs/common.h>
55#include <google/protobuf/stubs/stringprintf.h>
56#include <google/protobuf/io/printer.h>
57#include <google/protobuf/descriptor.h>
58#include <google/protobuf/io/zero_copy_stream.h>
59#include <google/protobuf/stubs/strutil.h>
60#include <google/protobuf/stubs/substitute.h>
61
62namespace google {
63namespace protobuf {
64namespace compiler {
65namespace python {
66
67namespace {
68
69// Returns a copy of |filename| with any trailing ".protodevel" or ".proto
70// suffix stripped.
71// TODO(robinson): Unify with copy in compiler/cpp/internal/helpers.cc.
72string StripProto(const string& filename) {
73  const char* suffix = HasSuffixString(filename, ".protodevel")
74      ? ".protodevel" : ".proto";
75  return StripSuffixString(filename, suffix);
76}
77
78
79// Returns the Python module name expected for a given .proto filename.
80string ModuleName(const string& filename) {
81  string basename = StripProto(filename);
82  StripString(&basename, "-", '_');
83  StripString(&basename, "/", '.');
84  return basename + "_pb2";
85}
86
87
88// Returns the name of all containing types for descriptor,
89// in order from outermost to innermost, followed by descriptor's
90// own name.  Each name is separated by |separator|.
91template <typename DescriptorT>
92string NamePrefixedWithNestedTypes(const DescriptorT& descriptor,
93                                   const string& separator) {
94  string name = descriptor.name();
95  for (const Descriptor* current = descriptor.containing_type();
96       current != NULL; current = current->containing_type()) {
97    name = current->name() + separator + name;
98  }
99  return name;
100}
101
102
103// Name of the class attribute where we store the Python
104// descriptor.Descriptor instance for the generated class.
105// Must stay consistent with the _DESCRIPTOR_KEY constant
106// in proto2/public/reflection.py.
107const char kDescriptorKey[] = "DESCRIPTOR";
108
109
110// Does the file have top-level enums?
111inline bool HasTopLevelEnums(const FileDescriptor *file) {
112  return file->enum_type_count() > 0;
113}
114
115
116// Should we generate generic services for this file?
117inline bool HasGenericServices(const FileDescriptor *file) {
118  return file->service_count() > 0 &&
119         file->options().py_generic_services();
120}
121
122
123// Prints the common boilerplate needed at the top of every .py
124// file output by this generator.
125void PrintTopBoilerplate(
126    io::Printer* printer, const FileDescriptor* file, bool descriptor_proto) {
127  // TODO(robinson): Allow parameterization of Python version?
128  printer->Print(
129      "# Generated by the protocol buffer compiler.  DO NOT EDIT!\n"
130      "# source: $filename$\n"
131      "\n",
132      "filename", file->name());
133  if (HasTopLevelEnums(file)) {
134    printer->Print(
135        "from google.protobuf.internal import enum_type_wrapper\n");
136  }
137  printer->Print(
138      "from google.protobuf import descriptor as _descriptor\n"
139      "from google.protobuf import message as _message\n"
140      "from google.protobuf import reflection as _reflection\n"
141      );
142  if (HasGenericServices(file)) {
143    printer->Print(
144        "from google.protobuf import service as _service\n"
145        "from google.protobuf import service_reflection\n");
146  }
147
148  // Avoid circular imports if this module is descriptor_pb2.
149  if (!descriptor_proto) {
150    printer->Print(
151        "from google.protobuf import descriptor_pb2\n");
152  }
153  printer->Print(
154    "# @@protoc_insertion_point(imports)\n");
155  printer->Print("\n\n");
156}
157
158
159// Returns a Python literal giving the default value for a field.
160// If the field specifies no explicit default value, we'll return
161// the default default value for the field type (zero for numbers,
162// empty string for strings, empty list for repeated fields, and
163// None for non-repeated, composite fields).
164//
165// TODO(robinson): Unify with code from
166// //compiler/cpp/internal/primitive_field.cc
167// //compiler/cpp/internal/enum_field.cc
168// //compiler/cpp/internal/string_field.cc
169string StringifyDefaultValue(const FieldDescriptor& field) {
170  if (field.is_repeated()) {
171    return "[]";
172  }
173
174  switch (field.cpp_type()) {
175    case FieldDescriptor::CPPTYPE_INT32:
176      return SimpleItoa(field.default_value_int32());
177    case FieldDescriptor::CPPTYPE_UINT32:
178      return SimpleItoa(field.default_value_uint32());
179    case FieldDescriptor::CPPTYPE_INT64:
180      return SimpleItoa(field.default_value_int64());
181    case FieldDescriptor::CPPTYPE_UINT64:
182      return SimpleItoa(field.default_value_uint64());
183    case FieldDescriptor::CPPTYPE_DOUBLE: {
184      double value = field.default_value_double();
185      if (value == numeric_limits<double>::infinity()) {
186        // Python pre-2.6 on Windows does not parse "inf" correctly.  However,
187        // a numeric literal that is too big for a double will become infinity.
188        return "1e10000";
189      } else if (value == -numeric_limits<double>::infinity()) {
190        // See above.
191        return "-1e10000";
192      } else if (value != value) {
193        // infinity * 0 = nan
194        return "(1e10000 * 0)";
195      } else {
196        return SimpleDtoa(value);
197      }
198    }
199    case FieldDescriptor::CPPTYPE_FLOAT: {
200      float value = field.default_value_float();
201      if (value == numeric_limits<float>::infinity()) {
202        // Python pre-2.6 on Windows does not parse "inf" correctly.  However,
203        // a numeric literal that is too big for a double will become infinity.
204        return "1e10000";
205      } else if (value == -numeric_limits<float>::infinity()) {
206        // See above.
207        return "-1e10000";
208      } else if (value != value) {
209        // infinity - infinity = nan
210        return "(1e10000 * 0)";
211      } else {
212        return SimpleFtoa(value);
213      }
214    }
215    case FieldDescriptor::CPPTYPE_BOOL:
216      return field.default_value_bool() ? "True" : "False";
217    case FieldDescriptor::CPPTYPE_ENUM:
218      return SimpleItoa(field.default_value_enum()->number());
219    case FieldDescriptor::CPPTYPE_STRING:
220      if (field.type() == FieldDescriptor::TYPE_STRING) {
221        return "unicode(\"" + CEscape(field.default_value_string()) +
222                        "\", \"utf-8\")";
223      } else {
224        return "\"" + CEscape(field.default_value_string()) + "\"";
225      }
226      case FieldDescriptor::CPPTYPE_MESSAGE:
227          return "None";
228  }
229  // (We could add a default case above but then we wouldn't get the nice
230  // compiler warning when a new type is added.)
231  GOOGLE_LOG(FATAL) << "Not reached.";
232  return "";
233}
234
235
236
237}  // namespace
238
239
240Generator::Generator() : file_(NULL) {
241}
242
243Generator::~Generator() {
244}
245
246bool Generator::Generate(const FileDescriptor* file,
247                         const string& parameter,
248                         GeneratorContext* context,
249                         string* error) const {
250
251  // Completely serialize all Generate() calls on this instance.  The
252  // thread-safety constraints of the CodeGenerator interface aren't clear so
253  // just be as conservative as possible.  It's easier to relax this later if
254  // we need to, but I doubt it will be an issue.
255  // TODO(kenton):  The proper thing to do would be to allocate any state on
256  //   the stack and use that, so that the Generator class itself does not need
257  //   to have any mutable members.  Then it is implicitly thread-safe.
258  MutexLock lock(&mutex_);
259  file_ = file;
260  string module_name = ModuleName(file->name());
261  string filename = module_name;
262  StripString(&filename, ".", '/');
263  filename += ".py";
264
265  FileDescriptorProto fdp;
266  file_->CopyTo(&fdp);
267  fdp.SerializeToString(&file_descriptor_serialized_);
268
269
270  scoped_ptr<io::ZeroCopyOutputStream> output(context->Open(filename));
271  GOOGLE_CHECK(output.get());
272  io::Printer printer(output.get(), '$');
273  printer_ = &printer;
274
275  PrintTopBoilerplate(printer_, file_, GeneratingDescriptorProto());
276  PrintImports();
277  PrintFileDescriptor();
278  PrintTopLevelEnums();
279  PrintTopLevelExtensions();
280  PrintAllNestedEnumsInFile();
281  PrintMessageDescriptors();
282  FixForeignFieldsInDescriptors();
283  PrintMessages();
284  // We have to fix up the extensions after the message classes themselves,
285  // since they need to call static RegisterExtension() methods on these
286  // classes.
287  FixForeignFieldsInExtensions();
288  // Descriptor options may have custom extensions. These custom options
289  // can only be successfully parsed after we register corresponding
290  // extensions. Therefore we parse all options again here to recognize
291  // custom options that may be unknown when we define the descriptors.
292  FixAllDescriptorOptions();
293  if (HasGenericServices(file)) {
294    PrintServices();
295  }
296
297  printer.Print(
298    "# @@protoc_insertion_point(module_scope)\n");
299
300  return !printer.failed();
301}
302
303// Prints Python imports for all modules imported by |file|.
304void Generator::PrintImports() const {
305  for (int i = 0; i < file_->dependency_count(); ++i) {
306    string module_name = ModuleName(file_->dependency(i)->name());
307    printer_->Print("import $module$\n", "module",
308                    module_name);
309  }
310  printer_->Print("\n");
311
312  // Print public imports.
313  for (int i = 0; i < file_->public_dependency_count(); ++i) {
314    string module_name = ModuleName(file_->public_dependency(i)->name());
315    printer_->Print("from $module$ import *\n", "module", module_name);
316  }
317  printer_->Print("\n");
318}
319
320// Prints the single file descriptor for this file.
321void Generator::PrintFileDescriptor() const {
322  map<string, string> m;
323  m["descriptor_name"] = kDescriptorKey;
324  m["name"] = file_->name();
325  m["package"] = file_->package();
326  const char file_descriptor_template[] =
327      "$descriptor_name$ = _descriptor.FileDescriptor(\n"
328      "  name='$name$',\n"
329      "  package='$package$',\n";
330  printer_->Print(m, file_descriptor_template);
331  printer_->Indent();
332  printer_->Print(
333      "serialized_pb='$value$'",
334      "value", strings::CHexEscape(file_descriptor_serialized_));
335
336  // TODO(falk): Also print options and fix the message_type, enum_type,
337  //             service and extension later in the generation.
338
339  printer_->Outdent();
340  printer_->Print(")\n");
341  printer_->Print("\n");
342}
343
344// Prints descriptors and module-level constants for all top-level
345// enums defined in |file|.
346void Generator::PrintTopLevelEnums() const {
347  vector<pair<string, int> > top_level_enum_values;
348  for (int i = 0; i < file_->enum_type_count(); ++i) {
349    const EnumDescriptor& enum_descriptor = *file_->enum_type(i);
350    PrintEnum(enum_descriptor);
351    printer_->Print("$name$ = "
352                    "enum_type_wrapper.EnumTypeWrapper($descriptor_name$)",
353                    "name", enum_descriptor.name(),
354                    "descriptor_name",
355                    ModuleLevelDescriptorName(enum_descriptor));
356    printer_->Print("\n");
357
358    for (int j = 0; j < enum_descriptor.value_count(); ++j) {
359      const EnumValueDescriptor& value_descriptor = *enum_descriptor.value(j);
360      top_level_enum_values.push_back(
361          make_pair(value_descriptor.name(), value_descriptor.number()));
362    }
363  }
364
365  for (int i = 0; i < top_level_enum_values.size(); ++i) {
366    printer_->Print("$name$ = $value$\n",
367                    "name", top_level_enum_values[i].first,
368                    "value", SimpleItoa(top_level_enum_values[i].second));
369  }
370  printer_->Print("\n");
371}
372
373// Prints all enums contained in all message types in |file|.
374void Generator::PrintAllNestedEnumsInFile() const {
375  for (int i = 0; i < file_->message_type_count(); ++i) {
376    PrintNestedEnums(*file_->message_type(i));
377  }
378}
379
380// Prints a Python statement assigning the appropriate module-level
381// enum name to a Python EnumDescriptor object equivalent to
382// enum_descriptor.
383void Generator::PrintEnum(const EnumDescriptor& enum_descriptor) const {
384  map<string, string> m;
385  m["descriptor_name"] = ModuleLevelDescriptorName(enum_descriptor);
386  m["name"] = enum_descriptor.name();
387  m["full_name"] = enum_descriptor.full_name();
388  m["file"] = kDescriptorKey;
389  const char enum_descriptor_template[] =
390      "$descriptor_name$ = _descriptor.EnumDescriptor(\n"
391      "  name='$name$',\n"
392      "  full_name='$full_name$',\n"
393      "  filename=None,\n"
394      "  file=$file$,\n"
395      "  values=[\n";
396  string options_string;
397  enum_descriptor.options().SerializeToString(&options_string);
398  printer_->Print(m, enum_descriptor_template);
399  printer_->Indent();
400  printer_->Indent();
401  for (int i = 0; i < enum_descriptor.value_count(); ++i) {
402    PrintEnumValueDescriptor(*enum_descriptor.value(i));
403    printer_->Print(",\n");
404  }
405  printer_->Outdent();
406  printer_->Print("],\n");
407  printer_->Print("containing_type=None,\n");
408  printer_->Print("options=$options_value$,\n",
409                  "options_value",
410                  OptionsValue("EnumOptions", options_string));
411  EnumDescriptorProto edp;
412  PrintSerializedPbInterval(enum_descriptor, edp);
413  printer_->Outdent();
414  printer_->Print(")\n");
415  printer_->Print("\n");
416}
417
418// Recursively prints enums in nested types within descriptor, then
419// prints enums contained at the top level in descriptor.
420void Generator::PrintNestedEnums(const Descriptor& descriptor) const {
421  for (int i = 0; i < descriptor.nested_type_count(); ++i) {
422    PrintNestedEnums(*descriptor.nested_type(i));
423  }
424
425  for (int i = 0; i < descriptor.enum_type_count(); ++i) {
426    PrintEnum(*descriptor.enum_type(i));
427  }
428}
429
430void Generator::PrintTopLevelExtensions() const {
431  const bool is_extension = true;
432  for (int i = 0; i < file_->extension_count(); ++i) {
433    const FieldDescriptor& extension_field = *file_->extension(i);
434    string constant_name = extension_field.name() + "_FIELD_NUMBER";
435    UpperString(&constant_name);
436    printer_->Print("$constant_name$ = $number$\n",
437      "constant_name", constant_name,
438      "number", SimpleItoa(extension_field.number()));
439    printer_->Print("$name$ = ", "name", extension_field.name());
440    PrintFieldDescriptor(extension_field, is_extension);
441    printer_->Print("\n");
442  }
443  printer_->Print("\n");
444}
445
446// Prints Python equivalents of all Descriptors in |file|.
447void Generator::PrintMessageDescriptors() const {
448  for (int i = 0; i < file_->message_type_count(); ++i) {
449    PrintDescriptor(*file_->message_type(i));
450    printer_->Print("\n");
451  }
452}
453
454void Generator::PrintServices() const {
455  for (int i = 0; i < file_->service_count(); ++i) {
456    PrintServiceDescriptor(*file_->service(i));
457    PrintServiceClass(*file_->service(i));
458    PrintServiceStub(*file_->service(i));
459    printer_->Print("\n");
460  }
461}
462
463void Generator::PrintServiceDescriptor(
464    const ServiceDescriptor& descriptor) const {
465  printer_->Print("\n");
466  string service_name = ModuleLevelServiceDescriptorName(descriptor);
467  string options_string;
468  descriptor.options().SerializeToString(&options_string);
469
470  printer_->Print(
471      "$service_name$ = _descriptor.ServiceDescriptor(\n",
472      "service_name", service_name);
473  printer_->Indent();
474  map<string, string> m;
475  m["name"] = descriptor.name();
476  m["full_name"] = descriptor.full_name();
477  m["file"] = kDescriptorKey;
478  m["index"] = SimpleItoa(descriptor.index());
479  m["options_value"] = OptionsValue("ServiceOptions", options_string);
480  const char required_function_arguments[] =
481      "name='$name$',\n"
482      "full_name='$full_name$',\n"
483      "file=$file$,\n"
484      "index=$index$,\n"
485      "options=$options_value$,\n";
486  printer_->Print(m, required_function_arguments);
487
488  ServiceDescriptorProto sdp;
489  PrintSerializedPbInterval(descriptor, sdp);
490
491  printer_->Print("methods=[\n");
492  for (int i = 0; i < descriptor.method_count(); ++i) {
493    const MethodDescriptor* method = descriptor.method(i);
494    method->options().SerializeToString(&options_string);
495
496    m.clear();
497    m["name"] = method->name();
498    m["full_name"] = method->full_name();
499    m["index"] = SimpleItoa(method->index());
500    m["serialized_options"] = CEscape(options_string);
501    m["input_type"] = ModuleLevelDescriptorName(*(method->input_type()));
502    m["output_type"] = ModuleLevelDescriptorName(*(method->output_type()));
503    m["options_value"] = OptionsValue("MethodOptions", options_string);
504    printer_->Print("_descriptor.MethodDescriptor(\n");
505    printer_->Indent();
506    printer_->Print(
507        m,
508        "name='$name$',\n"
509        "full_name='$full_name$',\n"
510        "index=$index$,\n"
511        "containing_service=None,\n"
512        "input_type=$input_type$,\n"
513        "output_type=$output_type$,\n"
514        "options=$options_value$,\n");
515    printer_->Outdent();
516    printer_->Print("),\n");
517  }
518
519  printer_->Outdent();
520  printer_->Print("])\n\n");
521}
522
523void Generator::PrintServiceClass(const ServiceDescriptor& descriptor) const {
524  // Print the service.
525  printer_->Print("class $class_name$(_service.Service):\n",
526                  "class_name", descriptor.name());
527  printer_->Indent();
528  printer_->Print(
529      "__metaclass__ = service_reflection.GeneratedServiceType\n"
530      "$descriptor_key$ = $descriptor_name$\n",
531      "descriptor_key", kDescriptorKey,
532      "descriptor_name", ModuleLevelServiceDescriptorName(descriptor));
533  printer_->Outdent();
534}
535
536void Generator::PrintServiceStub(const ServiceDescriptor& descriptor) const {
537  // Print the service stub.
538  printer_->Print("class $class_name$_Stub($class_name$):\n",
539                  "class_name", descriptor.name());
540  printer_->Indent();
541  printer_->Print(
542      "__metaclass__ = service_reflection.GeneratedServiceStubType\n"
543      "$descriptor_key$ = $descriptor_name$\n",
544      "descriptor_key", kDescriptorKey,
545      "descriptor_name", ModuleLevelServiceDescriptorName(descriptor));
546  printer_->Outdent();
547}
548
549// Prints statement assigning ModuleLevelDescriptorName(message_descriptor)
550// to a Python Descriptor object for message_descriptor.
551//
552// Mutually recursive with PrintNestedDescriptors().
553void Generator::PrintDescriptor(const Descriptor& message_descriptor) const {
554  PrintNestedDescriptors(message_descriptor);
555
556  printer_->Print("\n");
557  printer_->Print("$descriptor_name$ = _descriptor.Descriptor(\n",
558                  "descriptor_name",
559                  ModuleLevelDescriptorName(message_descriptor));
560  printer_->Indent();
561  map<string, string> m;
562  m["name"] = message_descriptor.name();
563  m["full_name"] = message_descriptor.full_name();
564  m["file"] = kDescriptorKey;
565  const char required_function_arguments[] =
566      "name='$name$',\n"
567      "full_name='$full_name$',\n"
568      "filename=None,\n"
569      "file=$file$,\n"
570      "containing_type=None,\n";
571  printer_->Print(m, required_function_arguments);
572  PrintFieldsInDescriptor(message_descriptor);
573  PrintExtensionsInDescriptor(message_descriptor);
574
575  // Nested types
576  printer_->Print("nested_types=[");
577  for (int i = 0; i < message_descriptor.nested_type_count(); ++i) {
578    const string nested_name = ModuleLevelDescriptorName(
579        *message_descriptor.nested_type(i));
580    printer_->Print("$name$, ", "name", nested_name);
581  }
582  printer_->Print("],\n");
583
584  // Enum types
585  printer_->Print("enum_types=[\n");
586  printer_->Indent();
587  for (int i = 0; i < message_descriptor.enum_type_count(); ++i) {
588    const string descriptor_name = ModuleLevelDescriptorName(
589        *message_descriptor.enum_type(i));
590    printer_->Print(descriptor_name.c_str());
591    printer_->Print(",\n");
592  }
593  printer_->Outdent();
594  printer_->Print("],\n");
595  string options_string;
596  message_descriptor.options().SerializeToString(&options_string);
597  printer_->Print(
598      "options=$options_value$,\n"
599      "is_extendable=$extendable$",
600      "options_value", OptionsValue("MessageOptions", options_string),
601      "extendable", message_descriptor.extension_range_count() > 0 ?
602                      "True" : "False");
603  printer_->Print(",\n");
604
605  // Extension ranges
606  printer_->Print("extension_ranges=[");
607  for (int i = 0; i < message_descriptor.extension_range_count(); ++i) {
608    const Descriptor::ExtensionRange* range =
609        message_descriptor.extension_range(i);
610    printer_->Print("($start$, $end$), ",
611                    "start", SimpleItoa(range->start),
612                    "end", SimpleItoa(range->end));
613  }
614  printer_->Print("],\n");
615
616  // Serialization of proto
617  DescriptorProto edp;
618  PrintSerializedPbInterval(message_descriptor, edp);
619
620  printer_->Outdent();
621  printer_->Print(")\n");
622}
623
624// Prints Python Descriptor objects for all nested types contained in
625// message_descriptor.
626//
627// Mutually recursive with PrintDescriptor().
628void Generator::PrintNestedDescriptors(
629    const Descriptor& containing_descriptor) const {
630  for (int i = 0; i < containing_descriptor.nested_type_count(); ++i) {
631    PrintDescriptor(*containing_descriptor.nested_type(i));
632  }
633}
634
635// Prints all messages in |file|.
636void Generator::PrintMessages() const {
637  for (int i = 0; i < file_->message_type_count(); ++i) {
638    PrintMessage(*file_->message_type(i));
639    printer_->Print("\n");
640  }
641}
642
643// Prints a Python class for the given message descriptor.  We defer to the
644// metaclass to do almost all of the work of actually creating a useful class.
645// The purpose of this function and its many helper functions above is merely
646// to output a Python version of the descriptors, which the metaclass in
647// reflection.py will use to construct the meat of the class itself.
648//
649// Mutually recursive with PrintNestedMessages().
650void Generator::PrintMessage(
651    const Descriptor& message_descriptor) const {
652  printer_->Print("class $name$(_message.Message):\n", "name",
653                  message_descriptor.name());
654  printer_->Indent();
655  printer_->Print("__metaclass__ = _reflection.GeneratedProtocolMessageType\n");
656  PrintNestedMessages(message_descriptor);
657  map<string, string> m;
658  m["descriptor_key"] = kDescriptorKey;
659  m["descriptor_name"] = ModuleLevelDescriptorName(message_descriptor);
660  printer_->Print(m, "$descriptor_key$ = $descriptor_name$\n");
661
662  printer_->Print(
663    "\n"
664    "# @@protoc_insertion_point(class_scope:$full_name$)\n",
665    "full_name", message_descriptor.full_name());
666
667  printer_->Outdent();
668}
669
670// Prints all nested messages within |containing_descriptor|.
671// Mutually recursive with PrintMessage().
672void Generator::PrintNestedMessages(
673    const Descriptor& containing_descriptor) const {
674  for (int i = 0; i < containing_descriptor.nested_type_count(); ++i) {
675    printer_->Print("\n");
676    PrintMessage(*containing_descriptor.nested_type(i));
677  }
678}
679
680// Recursively fixes foreign fields in all nested types in |descriptor|, then
681// sets the message_type and enum_type of all message and enum fields to point
682// to their respective descriptors.
683// Args:
684//   descriptor: descriptor to print fields for.
685//   containing_descriptor: if descriptor is a nested type, this is its
686//       containing type, or NULL if this is a root/top-level type.
687void Generator::FixForeignFieldsInDescriptor(
688    const Descriptor& descriptor,
689    const Descriptor* containing_descriptor) const {
690  for (int i = 0; i < descriptor.nested_type_count(); ++i) {
691    FixForeignFieldsInDescriptor(*descriptor.nested_type(i), &descriptor);
692  }
693
694  for (int i = 0; i < descriptor.field_count(); ++i) {
695    const FieldDescriptor& field_descriptor = *descriptor.field(i);
696    FixForeignFieldsInField(&descriptor, field_descriptor, "fields_by_name");
697  }
698
699  FixContainingTypeInDescriptor(descriptor, containing_descriptor);
700  for (int i = 0; i < descriptor.enum_type_count(); ++i) {
701    const EnumDescriptor& enum_descriptor = *descriptor.enum_type(i);
702    FixContainingTypeInDescriptor(enum_descriptor, &descriptor);
703  }
704}
705
706void Generator::AddMessageToFileDescriptor(const Descriptor& descriptor) const {
707  map<string, string> m;
708  m["descriptor_name"] = kDescriptorKey;
709  m["message_name"] = descriptor.name();
710  m["message_descriptor_name"] = ModuleLevelDescriptorName(descriptor);
711  const char file_descriptor_template[] =
712      "$descriptor_name$.message_types_by_name['$message_name$'] = "
713      "$message_descriptor_name$\n";
714  printer_->Print(m, file_descriptor_template);
715}
716
717// Sets any necessary message_type and enum_type attributes
718// for the Python version of |field|.
719//
720// containing_type may be NULL, in which case this is a module-level field.
721//
722// python_dict_name is the name of the Python dict where we should
723// look the field up in the containing type.  (e.g., fields_by_name
724// or extensions_by_name).  We ignore python_dict_name if containing_type
725// is NULL.
726void Generator::FixForeignFieldsInField(const Descriptor* containing_type,
727                                        const FieldDescriptor& field,
728                                        const string& python_dict_name) const {
729  const string field_referencing_expression = FieldReferencingExpression(
730      containing_type, field, python_dict_name);
731  map<string, string> m;
732  m["field_ref"] = field_referencing_expression;
733  const Descriptor* foreign_message_type = field.message_type();
734  if (foreign_message_type) {
735    m["foreign_type"] = ModuleLevelDescriptorName(*foreign_message_type);
736    printer_->Print(m, "$field_ref$.message_type = $foreign_type$\n");
737  }
738  const EnumDescriptor* enum_type = field.enum_type();
739  if (enum_type) {
740    m["enum_type"] = ModuleLevelDescriptorName(*enum_type);
741    printer_->Print(m, "$field_ref$.enum_type = $enum_type$\n");
742  }
743}
744
745// Returns the module-level expression for the given FieldDescriptor.
746// Only works for fields in the .proto file this Generator is generating for.
747//
748// containing_type may be NULL, in which case this is a module-level field.
749//
750// python_dict_name is the name of the Python dict where we should
751// look the field up in the containing type.  (e.g., fields_by_name
752// or extensions_by_name).  We ignore python_dict_name if containing_type
753// is NULL.
754string Generator::FieldReferencingExpression(
755    const Descriptor* containing_type,
756    const FieldDescriptor& field,
757    const string& python_dict_name) const {
758  // We should only ever be looking up fields in the current file.
759  // The only things we refer to from other files are message descriptors.
760  GOOGLE_CHECK_EQ(field.file(), file_) << field.file()->name() << " vs. "
761                                << file_->name();
762  if (!containing_type) {
763    return field.name();
764  }
765  return strings::Substitute(
766      "$0.$1['$2']",
767      ModuleLevelDescriptorName(*containing_type),
768      python_dict_name, field.name());
769}
770
771// Prints containing_type for nested descriptors or enum descriptors.
772template <typename DescriptorT>
773void Generator::FixContainingTypeInDescriptor(
774    const DescriptorT& descriptor,
775    const Descriptor* containing_descriptor) const {
776  if (containing_descriptor != NULL) {
777    const string nested_name = ModuleLevelDescriptorName(descriptor);
778    const string parent_name = ModuleLevelDescriptorName(
779        *containing_descriptor);
780    printer_->Print(
781        "$nested_name$.containing_type = $parent_name$;\n",
782        "nested_name", nested_name,
783        "parent_name", parent_name);
784  }
785}
786
787// Prints statements setting the message_type and enum_type fields in the
788// Python descriptor objects we've already output in ths file.  We must
789// do this in a separate step due to circular references (otherwise, we'd
790// just set everything in the initial assignment statements).
791void Generator::FixForeignFieldsInDescriptors() const {
792  for (int i = 0; i < file_->message_type_count(); ++i) {
793    FixForeignFieldsInDescriptor(*file_->message_type(i), NULL);
794  }
795  for (int i = 0; i < file_->message_type_count(); ++i) {
796    AddMessageToFileDescriptor(*file_->message_type(i));
797  }
798  printer_->Print("\n");
799}
800
801// We need to not only set any necessary message_type fields, but
802// also need to call RegisterExtension() on each message we're
803// extending.
804void Generator::FixForeignFieldsInExtensions() const {
805  // Top-level extensions.
806  for (int i = 0; i < file_->extension_count(); ++i) {
807    FixForeignFieldsInExtension(*file_->extension(i));
808  }
809  // Nested extensions.
810  for (int i = 0; i < file_->message_type_count(); ++i) {
811    FixForeignFieldsInNestedExtensions(*file_->message_type(i));
812  }
813  printer_->Print("\n");
814}
815
816void Generator::FixForeignFieldsInExtension(
817    const FieldDescriptor& extension_field) const {
818  GOOGLE_CHECK(extension_field.is_extension());
819  // extension_scope() will be NULL for top-level extensions, which is
820  // exactly what FixForeignFieldsInField() wants.
821  FixForeignFieldsInField(extension_field.extension_scope(), extension_field,
822                          "extensions_by_name");
823
824  map<string, string> m;
825  // Confusingly, for FieldDescriptors that happen to be extensions,
826  // containing_type() means "extended type."
827  // On the other hand, extension_scope() will give us what we normally
828  // mean by containing_type().
829  m["extended_message_class"] = ModuleLevelMessageName(
830      *extension_field.containing_type());
831  m["field"] = FieldReferencingExpression(extension_field.extension_scope(),
832                                          extension_field,
833                                          "extensions_by_name");
834  printer_->Print(m, "$extended_message_class$.RegisterExtension($field$)\n");
835}
836
837void Generator::FixForeignFieldsInNestedExtensions(
838    const Descriptor& descriptor) const {
839  // Recursively fix up extensions in all nested types.
840  for (int i = 0; i < descriptor.nested_type_count(); ++i) {
841    FixForeignFieldsInNestedExtensions(*descriptor.nested_type(i));
842  }
843  // Fix up extensions directly contained within this type.
844  for (int i = 0; i < descriptor.extension_count(); ++i) {
845    FixForeignFieldsInExtension(*descriptor.extension(i));
846  }
847}
848
849// Returns a Python expression that instantiates a Python EnumValueDescriptor
850// object for the given C++ descriptor.
851void Generator::PrintEnumValueDescriptor(
852    const EnumValueDescriptor& descriptor) const {
853  // TODO(robinson): Fix up EnumValueDescriptor "type" fields.
854  // More circular references.  ::sigh::
855  string options_string;
856  descriptor.options().SerializeToString(&options_string);
857  map<string, string> m;
858  m["name"] = descriptor.name();
859  m["index"] = SimpleItoa(descriptor.index());
860  m["number"] = SimpleItoa(descriptor.number());
861  m["options"] = OptionsValue("EnumValueOptions", options_string);
862  printer_->Print(
863      m,
864      "_descriptor.EnumValueDescriptor(\n"
865      "  name='$name$', index=$index$, number=$number$,\n"
866      "  options=$options$,\n"
867      "  type=None)");
868}
869
870// Returns a Python expression that calls descriptor._ParseOptions using
871// the given descriptor class name and serialized options protobuf string.
872string Generator::OptionsValue(
873    const string& class_name, const string& serialized_options) const {
874  if (serialized_options.length() == 0 || GeneratingDescriptorProto()) {
875    return "None";
876  } else {
877    string full_class_name = "descriptor_pb2." + class_name;
878    return "_descriptor._ParseOptions(" + full_class_name + "(), '"
879        + CEscape(serialized_options)+ "')";
880  }
881}
882
883// Prints an expression for a Python FieldDescriptor for |field|.
884void Generator::PrintFieldDescriptor(
885    const FieldDescriptor& field, bool is_extension) const {
886  string options_string;
887  field.options().SerializeToString(&options_string);
888  map<string, string> m;
889  m["name"] = field.name();
890  m["full_name"] = field.full_name();
891  m["index"] = SimpleItoa(field.index());
892  m["number"] = SimpleItoa(field.number());
893  m["type"] = SimpleItoa(field.type());
894  m["cpp_type"] = SimpleItoa(field.cpp_type());
895  m["label"] = SimpleItoa(field.label());
896  m["has_default_value"] = field.has_default_value() ? "True" : "False";
897  m["default_value"] = StringifyDefaultValue(field);
898  m["is_extension"] = is_extension ? "True" : "False";
899  m["options"] = OptionsValue("FieldOptions", options_string);
900  // We always set message_type and enum_type to None at this point, and then
901  // these fields in correctly after all referenced descriptors have been
902  // defined and/or imported (see FixForeignFieldsInDescriptors()).
903  const char field_descriptor_decl[] =
904    "_descriptor.FieldDescriptor(\n"
905    "  name='$name$', full_name='$full_name$', index=$index$,\n"
906    "  number=$number$, type=$type$, cpp_type=$cpp_type$, label=$label$,\n"
907    "  has_default_value=$has_default_value$, default_value=$default_value$,\n"
908    "  message_type=None, enum_type=None, containing_type=None,\n"
909    "  is_extension=$is_extension$, extension_scope=None,\n"
910    "  options=$options$)";
911  printer_->Print(m, field_descriptor_decl);
912}
913
914// Helper for Print{Fields,Extensions}InDescriptor().
915void Generator::PrintFieldDescriptorsInDescriptor(
916    const Descriptor& message_descriptor,
917    bool is_extension,
918    const string& list_variable_name,
919    int (Descriptor::*CountFn)() const,
920    const FieldDescriptor* (Descriptor::*GetterFn)(int) const) const {
921  printer_->Print("$list$=[\n", "list", list_variable_name);
922  printer_->Indent();
923  for (int i = 0; i < (message_descriptor.*CountFn)(); ++i) {
924    PrintFieldDescriptor(*(message_descriptor.*GetterFn)(i),
925                         is_extension);
926    printer_->Print(",\n");
927  }
928  printer_->Outdent();
929  printer_->Print("],\n");
930}
931
932// Prints a statement assigning "fields" to a list of Python FieldDescriptors,
933// one for each field present in message_descriptor.
934void Generator::PrintFieldsInDescriptor(
935    const Descriptor& message_descriptor) const {
936  const bool is_extension = false;
937  PrintFieldDescriptorsInDescriptor(
938      message_descriptor, is_extension, "fields",
939      &Descriptor::field_count, &Descriptor::field);
940}
941
942// Prints a statement assigning "extensions" to a list of Python
943// FieldDescriptors, one for each extension present in message_descriptor.
944void Generator::PrintExtensionsInDescriptor(
945    const Descriptor& message_descriptor) const {
946  const bool is_extension = true;
947  PrintFieldDescriptorsInDescriptor(
948      message_descriptor, is_extension, "extensions",
949      &Descriptor::extension_count, &Descriptor::extension);
950}
951
952bool Generator::GeneratingDescriptorProto() const {
953  return file_->name() == "google/protobuf/descriptor.proto";
954}
955
956// Returns the unique Python module-level identifier given to a descriptor.
957// This name is module-qualified iff the given descriptor describes an
958// entity that doesn't come from the current file.
959template <typename DescriptorT>
960string Generator::ModuleLevelDescriptorName(
961    const DescriptorT& descriptor) const {
962  // FIXME(robinson):
963  // We currently don't worry about collisions with underscores in the type
964  // names, so these would collide in nasty ways if found in the same file:
965  //   OuterProto.ProtoA.ProtoB
966  //   OuterProto_ProtoA.ProtoB  # Underscore instead of period.
967  // As would these:
968  //   OuterProto.ProtoA_.ProtoB
969  //   OuterProto.ProtoA._ProtoB  # Leading vs. trailing underscore.
970  // (Contrived, but certainly possible).
971  //
972  // The C++ implementation doesn't guard against this either.  Leaving
973  // it for now...
974  string name = NamePrefixedWithNestedTypes(descriptor, "_");
975  UpperString(&name);
976  // Module-private for now.  Easy to make public later; almost impossible
977  // to make private later.
978  name = "_" + name;
979  // We now have the name relative to its own module.  Also qualify with
980  // the module name iff this descriptor is from a different .proto file.
981  if (descriptor.file() != file_) {
982    name = ModuleName(descriptor.file()->name()) + "." + name;
983  }
984  return name;
985}
986
987// Returns the name of the message class itself, not the descriptor.
988// Like ModuleLevelDescriptorName(), module-qualifies the name iff
989// the given descriptor describes an entity that doesn't come from
990// the current file.
991string Generator::ModuleLevelMessageName(const Descriptor& descriptor) const {
992  string name = NamePrefixedWithNestedTypes(descriptor, ".");
993  if (descriptor.file() != file_) {
994    name = ModuleName(descriptor.file()->name()) + "." + name;
995  }
996  return name;
997}
998
999// Returns the unique Python module-level identifier given to a service
1000// descriptor.
1001string Generator::ModuleLevelServiceDescriptorName(
1002    const ServiceDescriptor& descriptor) const {
1003  string name = descriptor.name();
1004  UpperString(&name);
1005  name = "_" + name;
1006  if (descriptor.file() != file_) {
1007    name = ModuleName(descriptor.file()->name()) + "." + name;
1008  }
1009  return name;
1010}
1011
1012// Prints standard constructor arguments serialized_start and serialized_end.
1013// Args:
1014//   descriptor: The cpp descriptor to have a serialized reference.
1015//   proto: A proto
1016// Example printer output:
1017// serialized_start=41,
1018// serialized_end=43,
1019//
1020template <typename DescriptorT, typename DescriptorProtoT>
1021void Generator::PrintSerializedPbInterval(
1022    const DescriptorT& descriptor, DescriptorProtoT& proto) const {
1023  descriptor.CopyTo(&proto);
1024  string sp;
1025  proto.SerializeToString(&sp);
1026  int offset = file_descriptor_serialized_.find(sp);
1027  GOOGLE_CHECK_GE(offset, 0);
1028
1029  printer_->Print("serialized_start=$serialized_start$,\n"
1030                  "serialized_end=$serialized_end$,\n",
1031                  "serialized_start", SimpleItoa(offset),
1032                  "serialized_end", SimpleItoa(offset + sp.size()));
1033}
1034
1035namespace {
1036void PrintDescriptorOptionsFixingCode(const string& descriptor,
1037                                      const string& options,
1038                                      io::Printer* printer) {
1039  // TODO(xiaofeng): I have added a method _SetOptions() to DescriptorBase
1040  // in proto2 python runtime but it couldn't be used here because appengine
1041  // uses a snapshot version of the library in which the new method is not
1042  // yet present. After appengine has synced their runtime library, the code
1043  // below should be cleaned up to use _SetOptions().
1044  printer->Print(
1045      "$descriptor$.has_options = True\n"
1046      "$descriptor$._options = $options$\n",
1047      "descriptor", descriptor, "options", options);
1048}
1049}  // namespace
1050
1051// Prints expressions that set the options field of all descriptors.
1052void Generator::FixAllDescriptorOptions() const {
1053  // Prints an expression that sets the file descriptor's options.
1054  string file_options = OptionsValue(
1055      "FileOptions", file_->options().SerializeAsString());
1056  if (file_options != "None") {
1057    PrintDescriptorOptionsFixingCode(kDescriptorKey, file_options, printer_);
1058  }
1059  // Prints expressions that set the options for all top level enums.
1060  for (int i = 0; i < file_->enum_type_count(); ++i) {
1061    const EnumDescriptor& enum_descriptor = *file_->enum_type(i);
1062    FixOptionsForEnum(enum_descriptor);
1063  }
1064  // Prints expressions that set the options for all top level extensions.
1065  for (int i = 0; i < file_->extension_count(); ++i) {
1066    const FieldDescriptor& field = *file_->extension(i);
1067    FixOptionsForField(field);
1068  }
1069  // Prints expressions that set the options for all messages, nested enums,
1070  // nested extensions and message fields.
1071  for (int i = 0; i < file_->message_type_count(); ++i) {
1072    FixOptionsForMessage(*file_->message_type(i));
1073  }
1074}
1075
1076// Prints expressions that set the options for an enum descriptor and its
1077// value descriptors.
1078void Generator::FixOptionsForEnum(const EnumDescriptor& enum_descriptor) const {
1079  string descriptor_name = ModuleLevelDescriptorName(enum_descriptor);
1080  string enum_options = OptionsValue(
1081      "EnumOptions", enum_descriptor.options().SerializeAsString());
1082  if (enum_options != "None") {
1083    PrintDescriptorOptionsFixingCode(descriptor_name, enum_options, printer_);
1084  }
1085  for (int i = 0; i < enum_descriptor.value_count(); ++i) {
1086    const EnumValueDescriptor& value_descriptor = *enum_descriptor.value(i);
1087    string value_options = OptionsValue(
1088        "EnumValueOptions", value_descriptor.options().SerializeAsString());
1089    if (value_options != "None") {
1090      PrintDescriptorOptionsFixingCode(
1091          StringPrintf("%s.values_by_name[\"%s\"]", descriptor_name.c_str(),
1092                       value_descriptor.name().c_str()),
1093          value_options, printer_);
1094    }
1095  }
1096}
1097
1098// Prints expressions that set the options for field descriptors (including
1099// extensions).
1100void Generator::FixOptionsForField(
1101    const FieldDescriptor& field) const {
1102  string field_options = OptionsValue(
1103      "FieldOptions", field.options().SerializeAsString());
1104  if (field_options != "None") {
1105    string field_name;
1106    if (field.is_extension()) {
1107      if (field.extension_scope() == NULL) {
1108        // Top level extensions.
1109        field_name = field.name();
1110      } else {
1111        field_name = FieldReferencingExpression(
1112            field.extension_scope(), field, "extensions_by_name");
1113      }
1114    } else {
1115      field_name = FieldReferencingExpression(
1116          field.containing_type(), field, "fields_by_name");
1117    }
1118    PrintDescriptorOptionsFixingCode(field_name, field_options, printer_);
1119  }
1120}
1121
1122// Prints expressions that set the options for a message and all its inner
1123// types (nested messages, nested enums, extensions, fields).
1124void Generator::FixOptionsForMessage(const Descriptor& descriptor) const {
1125  // Nested messages.
1126  for (int i = 0; i < descriptor.nested_type_count(); ++i) {
1127    FixOptionsForMessage(*descriptor.nested_type(i));
1128  }
1129  // Enums.
1130  for (int i = 0; i < descriptor.enum_type_count(); ++i) {
1131    FixOptionsForEnum(*descriptor.enum_type(i));
1132  }
1133  // Fields.
1134  for (int i = 0; i < descriptor.field_count(); ++i) {
1135    const FieldDescriptor& field = *descriptor.field(i);
1136    FixOptionsForField(field);
1137  }
1138  // Extensions.
1139  for (int i = 0; i < descriptor.extension_count(); ++i) {
1140    const FieldDescriptor& field = *descriptor.extension(i);
1141    FixOptionsForField(field);
1142  }
1143  // Message option for this message.
1144  string message_options = OptionsValue(
1145      "MessageOptions", descriptor.options().SerializeAsString());
1146  if (message_options != "None") {
1147    string descriptor_name = ModuleLevelDescriptorName(descriptor);
1148    PrintDescriptorOptionsFixingCode(descriptor_name,
1149                                     message_options,
1150                                     printer_);
1151  }
1152}
1153
1154}  // namespace python
1155}  // namespace compiler
1156}  // namespace protobuf
1157}  // namespace google
1158