1// Protocol Buffers - Google's data interchange format
2// Copyright 2008 Google Inc.  All rights reserved.
3// http://code.google.com/p/protobuf/
4//
5// Redistribution and use in source and binary forms, with or without
6// modification, are permitted provided that the following conditions are
7// met:
8//
9//     * Redistributions of source code must retain the above copyright
10// notice, this list of conditions and the following disclaimer.
11//     * Redistributions in binary form must reproduce the above
12// copyright notice, this list of conditions and the following disclaimer
13// in the documentation and/or other materials provided with the
14// distribution.
15//     * Neither the name of Google Inc. nor the names of its
16// contributors may be used to endorse or promote products derived from
17// this software without specific prior written permission.
18//
19// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30
31// Author: kenton@google.com (Kenton Varda)
32//  Based on original Protocol Buffers design by
33//  Sanjay Ghemawat, Jeff Dean, and others.
34
35#include <google/protobuf/compiler/command_line_interface.h>
36
37#include <stdio.h>
38#include <sys/types.h>
39#include <sys/stat.h>
40#include <fcntl.h>
41#ifdef _MSC_VER
42#include <io.h>
43#include <direct.h>
44#else
45#include <unistd.h>
46#endif
47#include <errno.h>
48#include <iostream>
49#include <ctype.h>
50
51#include <google/protobuf/compiler/importer.h>
52#include <google/protobuf/compiler/code_generator.h>
53#include <google/protobuf/compiler/plugin.pb.h>
54#include <google/protobuf/compiler/subprocess.h>
55#include <google/protobuf/compiler/zip_writer.h>
56#include <google/protobuf/descriptor.h>
57#include <google/protobuf/text_format.h>
58#include <google/protobuf/dynamic_message.h>
59#include <google/protobuf/io/zero_copy_stream_impl.h>
60#include <google/protobuf/io/printer.h>
61#include <google/protobuf/stubs/common.h>
62#include <google/protobuf/stubs/strutil.h>
63#include <google/protobuf/stubs/substitute.h>
64#include <google/protobuf/stubs/map-util.h>
65#include <google/protobuf/stubs/stl_util-inl.h>
66#include <google/protobuf/stubs/hash.h>
67
68
69namespace google {
70namespace protobuf {
71namespace compiler {
72
73#if defined(_WIN32)
74#define mkdir(name, mode) mkdir(name)
75#ifndef W_OK
76#define W_OK 02  // not defined by MSVC for whatever reason
77#endif
78#ifndef F_OK
79#define F_OK 00  // not defined by MSVC for whatever reason
80#endif
81#ifndef STDIN_FILENO
82#define STDIN_FILENO 0
83#endif
84#ifndef STDOUT_FILENO
85#define STDOUT_FILENO 1
86#endif
87#endif
88
89#ifndef O_BINARY
90#ifdef _O_BINARY
91#define O_BINARY _O_BINARY
92#else
93#define O_BINARY 0     // If this isn't defined, the platform doesn't need it.
94#endif
95#endif
96
97namespace {
98#if defined(_WIN32) && !defined(__CYGWIN__)
99static const char* kPathSeparator = ";";
100#else
101static const char* kPathSeparator = ":";
102#endif
103
104// Returns true if the text looks like a Windows-style absolute path, starting
105// with a drive letter.  Example:  "C:\foo".  TODO(kenton):  Share this with
106// copy in importer.cc?
107static bool IsWindowsAbsolutePath(const string& text) {
108#if defined(_WIN32) || defined(__CYGWIN__)
109  return text.size() >= 3 && text[1] == ':' &&
110         isalpha(text[0]) &&
111         (text[2] == '/' || text[2] == '\\') &&
112         text.find_last_of(':') == 1;
113#else
114  return false;
115#endif
116}
117
118void SetFdToTextMode(int fd) {
119#ifdef _WIN32
120  if (_setmode(fd, _O_TEXT) == -1) {
121    // This should never happen, I think.
122    GOOGLE_LOG(WARNING) << "_setmode(" << fd << ", _O_TEXT): " << strerror(errno);
123  }
124#endif
125  // (Text and binary are the same on non-Windows platforms.)
126}
127
128void SetFdToBinaryMode(int fd) {
129#ifdef _WIN32
130  if (_setmode(fd, _O_BINARY) == -1) {
131    // This should never happen, I think.
132    GOOGLE_LOG(WARNING) << "_setmode(" << fd << ", _O_BINARY): " << strerror(errno);
133  }
134#endif
135  // (Text and binary are the same on non-Windows platforms.)
136}
137
138void AddTrailingSlash(string* path) {
139  if (!path->empty() && path->at(path->size() - 1) != '/') {
140    path->push_back('/');
141  }
142}
143
144bool VerifyDirectoryExists(const string& path) {
145  if (path.empty()) return true;
146
147  if (access(path.c_str(), W_OK) == -1) {
148    cerr << path << ": " << strerror(errno) << endl;
149    return false;
150  } else {
151    return true;
152  }
153}
154
155// Try to create the parent directory of the given file, creating the parent's
156// parent if necessary, and so on.  The full file name is actually
157// (prefix + filename), but we assume |prefix| already exists and only create
158// directories listed in |filename|.
159bool TryCreateParentDirectory(const string& prefix, const string& filename) {
160  // Recursively create parent directories to the output file.
161  vector<string> parts;
162  SplitStringUsing(filename, "/", &parts);
163  string path_so_far = prefix;
164  for (int i = 0; i < parts.size() - 1; i++) {
165    path_so_far += parts[i];
166    if (mkdir(path_so_far.c_str(), 0777) != 0) {
167      if (errno != EEXIST) {
168        cerr << filename << ": while trying to create directory "
169             << path_so_far << ": " << strerror(errno) << endl;
170        return false;
171      }
172    }
173    path_so_far += '/';
174  }
175
176  return true;
177}
178
179}  // namespace
180
181// A MultiFileErrorCollector that prints errors to stderr.
182class CommandLineInterface::ErrorPrinter : public MultiFileErrorCollector,
183                                           public io::ErrorCollector {
184 public:
185  ErrorPrinter(ErrorFormat format) : format_(format) {}
186  ~ErrorPrinter() {}
187
188  // implements MultiFileErrorCollector ------------------------------
189  void AddError(const string& filename, int line, int column,
190                const string& message) {
191
192    cerr << filename;
193
194    // Users typically expect 1-based line/column numbers, so we add 1
195    // to each here.
196    if (line != -1) {
197      // Allow for both GCC- and Visual-Studio-compatible output.
198      switch (format_) {
199        case CommandLineInterface::ERROR_FORMAT_GCC:
200          cerr << ":" << (line + 1) << ":" << (column + 1);
201          break;
202        case CommandLineInterface::ERROR_FORMAT_MSVS:
203          cerr << "(" << (line + 1) << ") : error in column=" << (column + 1);
204          break;
205      }
206    }
207
208    cerr << ": " << message << endl;
209  }
210
211  // implements io::ErrorCollector -----------------------------------
212  void AddError(int line, int column, const string& message) {
213    AddError("input", line, column, message);
214  }
215
216 private:
217  const ErrorFormat format_;
218};
219
220// -------------------------------------------------------------------
221
222// An OutputDirectory implementation that buffers files in memory, then dumps
223// them all to disk on demand.
224class CommandLineInterface::MemoryOutputDirectory : public OutputDirectory {
225 public:
226  MemoryOutputDirectory();
227  ~MemoryOutputDirectory();
228
229  // Write all files in the directory to disk at the given output location,
230  // which must end in a '/'.
231  bool WriteAllToDisk(const string& prefix);
232
233  // Write the contents of this directory to a ZIP-format archive with the
234  // given name.
235  bool WriteAllToZip(const string& filename);
236
237  // Add a boilerplate META-INF/MANIFEST.MF file as required by the Java JAR
238  // format, unless one has already been written.
239  void AddJarManifest();
240
241  // implements OutputDirectory --------------------------------------
242  io::ZeroCopyOutputStream* Open(const string& filename);
243  io::ZeroCopyOutputStream* OpenForInsert(
244      const string& filename, const string& insertion_point);
245
246 private:
247  friend class MemoryOutputStream;
248
249  // map instead of hash_map so that files are written in order (good when
250  // writing zips).
251  map<string, string*> files_;
252  bool had_error_;
253};
254
255class CommandLineInterface::MemoryOutputStream
256    : public io::ZeroCopyOutputStream {
257 public:
258  MemoryOutputStream(MemoryOutputDirectory* directory, const string& filename);
259  MemoryOutputStream(MemoryOutputDirectory* directory, const string& filename,
260                     const string& insertion_point);
261  virtual ~MemoryOutputStream();
262
263  // implements ZeroCopyOutputStream ---------------------------------
264  virtual bool Next(void** data, int* size) { return inner_->Next(data, size); }
265  virtual void BackUp(int count)            {        inner_->BackUp(count);    }
266  virtual int64 ByteCount() const           { return inner_->ByteCount();      }
267
268 private:
269  // Where to insert the string when it's done.
270  MemoryOutputDirectory* directory_;
271  string filename_;
272  string insertion_point_;
273
274  // The string we're building.
275  string data_;
276
277  // StringOutputStream writing to data_.
278  scoped_ptr<io::StringOutputStream> inner_;
279};
280
281// -------------------------------------------------------------------
282
283CommandLineInterface::MemoryOutputDirectory::MemoryOutputDirectory()
284    : had_error_(false) {}
285
286CommandLineInterface::MemoryOutputDirectory::~MemoryOutputDirectory() {
287  STLDeleteValues(&files_);
288}
289
290bool CommandLineInterface::MemoryOutputDirectory::WriteAllToDisk(
291    const string& prefix) {
292  if (had_error_) {
293    return false;
294  }
295
296  if (!VerifyDirectoryExists(prefix)) {
297    return false;
298  }
299
300  for (map<string, string*>::const_iterator iter = files_.begin();
301       iter != files_.end(); ++iter) {
302    const string& relative_filename = iter->first;
303    const char* data = iter->second->data();
304    int size = iter->second->size();
305
306    if (!TryCreateParentDirectory(prefix, relative_filename)) {
307      return false;
308    }
309    string filename = prefix + relative_filename;
310
311    // Create the output file.
312    int file_descriptor;
313    do {
314      file_descriptor =
315        open(filename.c_str(), O_WRONLY | O_CREAT | O_TRUNC | O_BINARY, 0666);
316    } while (file_descriptor < 0 && errno == EINTR);
317
318    if (file_descriptor < 0) {
319      int error = errno;
320      cerr << filename << ": " << strerror(error);
321      return false;
322    }
323
324    // Write the file.
325    while (size > 0) {
326      int write_result;
327      do {
328        write_result = write(file_descriptor, data, size);
329      } while (write_result < 0 && errno == EINTR);
330
331      if (write_result <= 0) {
332        // Write error.
333
334        // FIXME(kenton):  According to the man page, if write() returns zero,
335        //   there was no error; write() simply did not write anything.  It's
336        //   unclear under what circumstances this might happen, but presumably
337        //   errno won't be set in this case.  I am confused as to how such an
338        //   event should be handled.  For now I'm treating it as an error,
339        //   since retrying seems like it could lead to an infinite loop.  I
340        //   suspect this never actually happens anyway.
341
342        if (write_result < 0) {
343          int error = errno;
344          cerr << filename << ": write: " << strerror(error);
345        } else {
346          cerr << filename << ": write() returned zero?" << endl;
347        }
348        return false;
349      }
350
351      data += write_result;
352      size -= write_result;
353    }
354
355    if (close(file_descriptor) != 0) {
356      int error = errno;
357      cerr << filename << ": close: " << strerror(error);
358      return false;
359    }
360  }
361
362  return true;
363}
364
365bool CommandLineInterface::MemoryOutputDirectory::WriteAllToZip(
366    const string& filename) {
367  if (had_error_) {
368    return false;
369  }
370
371  // Create the output file.
372  int file_descriptor;
373  do {
374    file_descriptor =
375      open(filename.c_str(), O_WRONLY | O_CREAT | O_TRUNC | O_BINARY, 0666);
376  } while (file_descriptor < 0 && errno == EINTR);
377
378  if (file_descriptor < 0) {
379    int error = errno;
380    cerr << filename << ": " << strerror(error);
381    return false;
382  }
383
384  // Create the ZipWriter
385  io::FileOutputStream stream(file_descriptor);
386  ZipWriter zip_writer(&stream);
387
388  for (map<string, string*>::const_iterator iter = files_.begin();
389       iter != files_.end(); ++iter) {
390    zip_writer.Write(iter->first, *iter->second);
391  }
392
393  zip_writer.WriteDirectory();
394
395  if (stream.GetErrno() != 0) {
396    cerr << filename << ": " << strerror(stream.GetErrno()) << endl;
397  }
398
399  if (!stream.Close()) {
400    cerr << filename << ": " << strerror(stream.GetErrno()) << endl;
401  }
402
403  return true;
404}
405
406void CommandLineInterface::MemoryOutputDirectory::AddJarManifest() {
407  string** map_slot = &files_["META-INF/MANIFEST.MF"];
408  if (*map_slot == NULL) {
409    *map_slot = new string(
410        "Manifest-Version: 1.0\n"
411        "Created-By: 1.6.0 (protoc)\n"
412        "\n");
413  }
414}
415
416io::ZeroCopyOutputStream* CommandLineInterface::MemoryOutputDirectory::Open(
417    const string& filename) {
418  return new MemoryOutputStream(this, filename);
419}
420
421io::ZeroCopyOutputStream*
422CommandLineInterface::MemoryOutputDirectory::OpenForInsert(
423    const string& filename, const string& insertion_point) {
424  return new MemoryOutputStream(this, filename, insertion_point);
425}
426
427// -------------------------------------------------------------------
428
429CommandLineInterface::MemoryOutputStream::MemoryOutputStream(
430    MemoryOutputDirectory* directory, const string& filename)
431    : directory_(directory),
432      filename_(filename),
433      inner_(new io::StringOutputStream(&data_)) {
434}
435
436CommandLineInterface::MemoryOutputStream::MemoryOutputStream(
437    MemoryOutputDirectory* directory, const string& filename,
438    const string& insertion_point)
439    : directory_(directory),
440      filename_(filename),
441      insertion_point_(insertion_point),
442      inner_(new io::StringOutputStream(&data_)) {
443}
444
445CommandLineInterface::MemoryOutputStream::~MemoryOutputStream() {
446  // Make sure all data has been written.
447  inner_.reset();
448
449  // Insert into the directory.
450  string** map_slot = &directory_->files_[filename_];
451
452  if (insertion_point_.empty()) {
453    // This was just a regular Open().
454    if (*map_slot != NULL) {
455      cerr << filename_ << ": Tried to write the same file twice." << endl;
456      directory_->had_error_ = true;
457      return;
458    }
459
460    *map_slot = new string;
461    (*map_slot)->swap(data_);
462  } else {
463    // This was an OpenForInsert().
464
465    // If the data doens't end with a clean line break, add one.
466    if (!data_.empty() && data_[data_.size() - 1] != '\n') {
467      data_.push_back('\n');
468    }
469
470    // Find the file we are going to insert into.
471    if (*map_slot == NULL) {
472      cerr << filename_ << ": Tried to insert into file that doesn't exist."
473           << endl;
474      directory_->had_error_ = true;
475      return;
476    }
477    string* target = *map_slot;
478
479    // Find the insertion point.
480    string magic_string = strings::Substitute(
481        "@@protoc_insertion_point($0)", insertion_point_);
482    string::size_type pos = target->find(magic_string);
483
484    if (pos == string::npos) {
485      cerr << filename_ << ": insertion point \"" << insertion_point_
486           << "\" not found." << endl;
487      directory_->had_error_ = true;
488      return;
489    }
490
491    // Seek backwards to the beginning of the line, which is where we will
492    // insert the data.  Note that this has the effect of pushing the insertion
493    // point down, so the data is inserted before it.  This is intentional
494    // because it means that multiple insertions at the same point will end
495    // up in the expected order in the final output.
496    pos = target->find_last_of('\n', pos);
497    if (pos == string::npos) {
498      // Insertion point is on the first line.
499      pos = 0;
500    } else {
501      // Advance to character after '\n'.
502      ++pos;
503    }
504
505    // Extract indent.
506    string indent_(*target, pos, target->find_first_not_of(" \t", pos) - pos);
507
508    if (indent_.empty()) {
509      // No indent.  This makes things easier.
510      target->insert(pos, data_);
511    } else {
512      // Calculate how much space we need.
513      int indent_size = 0;
514      for (int i = 0; i < data_.size(); i++) {
515        if (data_[i] == '\n') indent_size += indent_.size();
516      }
517
518      // Make a hole for it.
519      target->insert(pos, data_.size() + indent_size, '\0');
520
521      // Now copy in the data.
522      string::size_type data_pos = 0;
523      char* target_ptr = string_as_array(target) + pos;
524      while (data_pos < data_.size()) {
525        // Copy indent.
526        memcpy(target_ptr, indent_.data(), indent_.size());
527        target_ptr += indent_.size();
528
529        // Copy line from data_.
530        // We already guaranteed that data_ ends with a newline (above), so this
531        // search can't fail.
532        string::size_type line_length =
533            data_.find_first_of('\n', data_pos) + 1 - data_pos;
534        memcpy(target_ptr, data_.data() + data_pos, line_length);
535        target_ptr += line_length;
536        data_pos += line_length;
537      }
538
539      GOOGLE_CHECK_EQ(target_ptr,
540          string_as_array(target) + pos + data_.size() + indent_size);
541    }
542  }
543}
544
545// ===================================================================
546
547CommandLineInterface::CommandLineInterface()
548  : mode_(MODE_COMPILE),
549    error_format_(ERROR_FORMAT_GCC),
550    imports_in_descriptor_set_(false),
551    disallow_services_(false),
552    inputs_are_proto_path_relative_(false) {}
553CommandLineInterface::~CommandLineInterface() {}
554
555void CommandLineInterface::RegisterGenerator(const string& flag_name,
556                                             CodeGenerator* generator,
557                                             const string& help_text) {
558  GeneratorInfo info;
559  info.generator = generator;
560  info.help_text = help_text;
561  generators_[flag_name] = info;
562}
563
564void CommandLineInterface::AllowPlugins(const string& exe_name_prefix) {
565  plugin_prefix_ = exe_name_prefix;
566}
567
568int CommandLineInterface::Run(int argc, const char* const argv[]) {
569  Clear();
570  if (!ParseArguments(argc, argv)) return 1;
571
572  // Set up the source tree.
573  DiskSourceTree source_tree;
574  for (int i = 0; i < proto_path_.size(); i++) {
575    source_tree.MapPath(proto_path_[i].first, proto_path_[i].second);
576  }
577
578  // Map input files to virtual paths if necessary.
579  if (!inputs_are_proto_path_relative_) {
580    if (!MakeInputsBeProtoPathRelative(&source_tree)) {
581      return 1;
582    }
583  }
584
585  // Allocate the Importer.
586  ErrorPrinter error_collector(error_format_);
587  Importer importer(&source_tree, &error_collector);
588
589  vector<const FileDescriptor*> parsed_files;
590
591  // Parse each file.
592  for (int i = 0; i < input_files_.size(); i++) {
593    // Import the file.
594    const FileDescriptor* parsed_file = importer.Import(input_files_[i]);
595    if (parsed_file == NULL) return 1;
596    parsed_files.push_back(parsed_file);
597
598    // Enforce --disallow_services.
599    if (disallow_services_ && parsed_file->service_count() > 0) {
600      cerr << parsed_file->name() << ": This file contains services, but "
601              "--disallow_services was used." << endl;
602      return 1;
603    }
604  }
605
606  // We construct a separate OutputDirectory for each output location.  Note
607  // that two code generators may output to the same location, in which case
608  // they should share a single OutputDirectory (so that OpenForInsert() works).
609  typedef hash_map<string, MemoryOutputDirectory*> OutputDirectoryMap;
610  OutputDirectoryMap output_directories;
611
612  // Generate output.
613  if (mode_ == MODE_COMPILE) {
614    for (int i = 0; i < output_directives_.size(); i++) {
615      string output_location = output_directives_[i].output_location;
616      if (!HasSuffixString(output_location, ".zip") &&
617          !HasSuffixString(output_location, ".jar")) {
618        AddTrailingSlash(&output_location);
619      }
620      MemoryOutputDirectory** map_slot = &output_directories[output_location];
621
622      if (*map_slot == NULL) {
623        // First time we've seen this output location.
624        *map_slot = new MemoryOutputDirectory;
625      }
626
627      if (!GenerateOutput(parsed_files, output_directives_[i], *map_slot)) {
628        STLDeleteValues(&output_directories);
629        return 1;
630      }
631    }
632  }
633
634  // Write all output to disk.
635  for (OutputDirectoryMap::iterator iter = output_directories.begin();
636       iter != output_directories.end(); ++iter) {
637    const string& location = iter->first;
638    MemoryOutputDirectory* directory = iter->second;
639    if (HasSuffixString(location, "/")) {
640      if (!directory->WriteAllToDisk(location)) {
641        STLDeleteValues(&output_directories);
642        return 1;
643      }
644    } else {
645      if (HasSuffixString(location, ".jar")) {
646        directory->AddJarManifest();
647      }
648
649      if (!directory->WriteAllToZip(location)) {
650        STLDeleteValues(&output_directories);
651        return 1;
652      }
653    }
654  }
655
656  STLDeleteValues(&output_directories);
657
658  if (!descriptor_set_name_.empty()) {
659    if (!WriteDescriptorSet(parsed_files)) {
660      return 1;
661    }
662  }
663
664  if (mode_ == MODE_ENCODE || mode_ == MODE_DECODE) {
665    if (codec_type_.empty()) {
666      // HACK:  Define an EmptyMessage type to use for decoding.
667      DescriptorPool pool;
668      FileDescriptorProto file;
669      file.set_name("empty_message.proto");
670      file.add_message_type()->set_name("EmptyMessage");
671      GOOGLE_CHECK(pool.BuildFile(file) != NULL);
672      codec_type_ = "EmptyMessage";
673      if (!EncodeOrDecode(&pool)) {
674        return 1;
675      }
676    } else {
677      if (!EncodeOrDecode(importer.pool())) {
678        return 1;
679      }
680    }
681  }
682
683  return 0;
684}
685
686void CommandLineInterface::Clear() {
687  // Clear all members that are set by Run().  Note that we must not clear
688  // members which are set by other methods before Run() is called.
689  executable_name_.clear();
690  proto_path_.clear();
691  input_files_.clear();
692  output_directives_.clear();
693  codec_type_.clear();
694  descriptor_set_name_.clear();
695
696  mode_ = MODE_COMPILE;
697  imports_in_descriptor_set_ = false;
698  disallow_services_ = false;
699}
700
701bool CommandLineInterface::MakeInputsBeProtoPathRelative(
702    DiskSourceTree* source_tree) {
703  for (int i = 0; i < input_files_.size(); i++) {
704    string virtual_file, shadowing_disk_file;
705    switch (source_tree->DiskFileToVirtualFile(
706        input_files_[i], &virtual_file, &shadowing_disk_file)) {
707      case DiskSourceTree::SUCCESS:
708        input_files_[i] = virtual_file;
709        break;
710      case DiskSourceTree::SHADOWED:
711        cerr << input_files_[i] << ": Input is shadowed in the --proto_path "
712                "by \"" << shadowing_disk_file << "\".  Either use the latter "
713                "file as your input or reorder the --proto_path so that the "
714                "former file's location comes first." << endl;
715        return false;
716      case DiskSourceTree::CANNOT_OPEN:
717        cerr << input_files_[i] << ": " << strerror(errno) << endl;
718        return false;
719      case DiskSourceTree::NO_MAPPING:
720        // First check if the file exists at all.
721        if (access(input_files_[i].c_str(), F_OK) < 0) {
722          // File does not even exist.
723          cerr << input_files_[i] << ": " << strerror(ENOENT) << endl;
724        } else {
725          cerr << input_files_[i] << ": File does not reside within any path "
726                  "specified using --proto_path (or -I).  You must specify a "
727                  "--proto_path which encompasses this file.  Note that the "
728                  "proto_path must be an exact prefix of the .proto file "
729                  "names -- protoc is too dumb to figure out when two paths "
730                  "(e.g. absolute and relative) are equivalent (it's harder "
731                  "than you think)." << endl;
732        }
733        return false;
734    }
735  }
736
737  return true;
738}
739
740bool CommandLineInterface::ParseArguments(int argc, const char* const argv[]) {
741  executable_name_ = argv[0];
742
743  // Iterate through all arguments and parse them.
744  for (int i = 1; i < argc; i++) {
745    string name, value;
746
747    if (ParseArgument(argv[i], &name, &value)) {
748      // Returned true => Use the next argument as the flag value.
749      if (i + 1 == argc || argv[i+1][0] == '-') {
750        cerr << "Missing value for flag: " << name << endl;
751        if (name == "--decode") {
752          cerr << "To decode an unknown message, use --decode_raw." << endl;
753        }
754        return false;
755      } else {
756        ++i;
757        value = argv[i];
758      }
759    }
760
761    if (!InterpretArgument(name, value)) return false;
762  }
763
764  // If no --proto_path was given, use the current working directory.
765  if (proto_path_.empty()) {
766    proto_path_.push_back(make_pair("", "."));
767  }
768
769  // Check some errror cases.
770  bool decoding_raw = (mode_ == MODE_DECODE) && codec_type_.empty();
771  if (decoding_raw && !input_files_.empty()) {
772    cerr << "When using --decode_raw, no input files should be given." << endl;
773    return false;
774  } else if (!decoding_raw && input_files_.empty()) {
775    cerr << "Missing input file." << endl;
776    return false;
777  }
778  if (mode_ == MODE_COMPILE && output_directives_.empty() &&
779      descriptor_set_name_.empty()) {
780    cerr << "Missing output directives." << endl;
781    return false;
782  }
783  if (imports_in_descriptor_set_ && descriptor_set_name_.empty()) {
784    cerr << "--include_imports only makes sense when combined with "
785            "--descriptor_set_out." << endl;
786  }
787
788  return true;
789}
790
791bool CommandLineInterface::ParseArgument(const char* arg,
792                                         string* name, string* value) {
793  bool parsed_value = false;
794
795  if (arg[0] != '-') {
796    // Not a flag.
797    name->clear();
798    parsed_value = true;
799    *value = arg;
800  } else if (arg[1] == '-') {
801    // Two dashes:  Multi-character name, with '=' separating name and
802    //   value.
803    const char* equals_pos = strchr(arg, '=');
804    if (equals_pos != NULL) {
805      *name = string(arg, equals_pos - arg);
806      *value = equals_pos + 1;
807      parsed_value = true;
808    } else {
809      *name = arg;
810    }
811  } else {
812    // One dash:  One-character name, all subsequent characters are the
813    //   value.
814    if (arg[1] == '\0') {
815      // arg is just "-".  We treat this as an input file, except that at
816      // present this will just lead to a "file not found" error.
817      name->clear();
818      *value = arg;
819      parsed_value = true;
820    } else {
821      *name = string(arg, 2);
822      *value = arg + 2;
823      parsed_value = !value->empty();
824    }
825  }
826
827  // Need to return true iff the next arg should be used as the value for this
828  // one, false otherwise.
829
830  if (parsed_value) {
831    // We already parsed a value for this flag.
832    return false;
833  }
834
835  if (*name == "-h" || *name == "--help" ||
836      *name == "--disallow_services" ||
837      *name == "--include_imports" ||
838      *name == "--version" ||
839      *name == "--decode_raw") {
840    // HACK:  These are the only flags that don't take a value.
841    //   They probably should not be hard-coded like this but for now it's
842    //   not worth doing better.
843    return false;
844  }
845
846  // Next argument is the flag value.
847  return true;
848}
849
850bool CommandLineInterface::InterpretArgument(const string& name,
851                                             const string& value) {
852  if (name.empty()) {
853    // Not a flag.  Just a filename.
854    if (value.empty()) {
855      cerr << "You seem to have passed an empty string as one of the "
856              "arguments to " << executable_name_ << ".  This is actually "
857              "sort of hard to do.  Congrats.  Unfortunately it is not valid "
858              "input so the program is going to die now." << endl;
859      return false;
860    }
861
862    input_files_.push_back(value);
863
864  } else if (name == "-I" || name == "--proto_path") {
865    // Java's -classpath (and some other languages) delimits path components
866    // with colons.  Let's accept that syntax too just to make things more
867    // intuitive.
868    vector<string> parts;
869    SplitStringUsing(value, kPathSeparator, &parts);
870
871    for (int i = 0; i < parts.size(); i++) {
872      string virtual_path;
873      string disk_path;
874
875      int equals_pos = parts[i].find_first_of('=');
876      if (equals_pos == string::npos) {
877        virtual_path = "";
878        disk_path = parts[i];
879      } else {
880        virtual_path = parts[i].substr(0, equals_pos);
881        disk_path = parts[i].substr(equals_pos + 1);
882      }
883
884      if (disk_path.empty()) {
885        cerr << "--proto_path passed empty directory name.  (Use \".\" for "
886                "current directory.)" << endl;
887        return false;
888      }
889
890      // Make sure disk path exists, warn otherwise.
891      if (access(disk_path.c_str(), F_OK) < 0) {
892        cerr << disk_path << ": warning: directory does not exist." << endl;
893      }
894
895      proto_path_.push_back(make_pair(virtual_path, disk_path));
896    }
897
898  } else if (name == "-o" || name == "--descriptor_set_out") {
899    if (!descriptor_set_name_.empty()) {
900      cerr << name << " may only be passed once." << endl;
901      return false;
902    }
903    if (value.empty()) {
904      cerr << name << " requires a non-empty value." << endl;
905      return false;
906    }
907    if (mode_ != MODE_COMPILE) {
908      cerr << "Cannot use --encode or --decode and generate descriptors at the "
909              "same time." << endl;
910      return false;
911    }
912    descriptor_set_name_ = value;
913
914  } else if (name == "--include_imports") {
915    if (imports_in_descriptor_set_) {
916      cerr << name << " may only be passed once." << endl;
917      return false;
918    }
919    imports_in_descriptor_set_ = true;
920
921  } else if (name == "-h" || name == "--help") {
922    PrintHelpText();
923    return false;  // Exit without running compiler.
924
925  } else if (name == "--version") {
926    if (!version_info_.empty()) {
927      cout << version_info_ << endl;
928    }
929    cout << "libprotoc "
930         << protobuf::internal::VersionString(GOOGLE_PROTOBUF_VERSION)
931         << endl;
932    return false;  // Exit without running compiler.
933
934  } else if (name == "--disallow_services") {
935    disallow_services_ = true;
936
937  } else if (name == "--encode" || name == "--decode" ||
938             name == "--decode_raw") {
939    if (mode_ != MODE_COMPILE) {
940      cerr << "Only one of --encode and --decode can be specified." << endl;
941      return false;
942    }
943    if (!output_directives_.empty() || !descriptor_set_name_.empty()) {
944      cerr << "Cannot use " << name
945           << " and generate code or descriptors at the same time." << endl;
946      return false;
947    }
948
949    mode_ = (name == "--encode") ? MODE_ENCODE : MODE_DECODE;
950
951    if (value.empty() && name != "--decode_raw") {
952      cerr << "Type name for " << name << " cannot be blank." << endl;
953      if (name == "--decode") {
954        cerr << "To decode an unknown message, use --decode_raw." << endl;
955      }
956      return false;
957    } else if (!value.empty() && name == "--decode_raw") {
958      cerr << "--decode_raw does not take a parameter." << endl;
959      return false;
960    }
961
962    codec_type_ = value;
963
964  } else if (name == "--error_format") {
965    if (value == "gcc") {
966      error_format_ = ERROR_FORMAT_GCC;
967    } else if (value == "msvs") {
968      error_format_ = ERROR_FORMAT_MSVS;
969    } else {
970      cerr << "Unknown error format: " << value << endl;
971      return false;
972    }
973
974  } else if (name == "--plugin") {
975    if (plugin_prefix_.empty()) {
976      cerr << "This compiler does not support plugins." << endl;
977      return false;
978    }
979
980    string name;
981    string path;
982
983    string::size_type equals_pos = value.find_first_of('=');
984    if (equals_pos == string::npos) {
985      // Use the basename of the file.
986      string::size_type slash_pos = value.find_last_of('/');
987      if (slash_pos == string::npos) {
988        name = value;
989      } else {
990        name = value.substr(slash_pos + 1);
991      }
992      path = value;
993    } else {
994      name = value.substr(0, equals_pos);
995      path = value.substr(equals_pos + 1);
996    }
997
998    plugins_[name] = path;
999
1000  } else {
1001    // Some other flag.  Look it up in the generators list.
1002    const GeneratorInfo* generator_info = FindOrNull(generators_, name);
1003    if (generator_info == NULL &&
1004        (plugin_prefix_.empty() || !HasSuffixString(name, "_out"))) {
1005      cerr << "Unknown flag: " << name << endl;
1006      return false;
1007    }
1008
1009    // It's an output flag.  Add it to the output directives.
1010    if (mode_ != MODE_COMPILE) {
1011      cerr << "Cannot use --encode or --decode and generate code at the "
1012              "same time." << endl;
1013      return false;
1014    }
1015
1016    OutputDirective directive;
1017    directive.name = name;
1018    if (generator_info == NULL) {
1019      directive.generator = NULL;
1020    } else {
1021      directive.generator = generator_info->generator;
1022    }
1023
1024    // Split value at ':' to separate the generator parameter from the
1025    // filename.  However, avoid doing this if the colon is part of a valid
1026    // Windows-style absolute path.
1027    string::size_type colon_pos = value.find_first_of(':');
1028    if (colon_pos == string::npos || IsWindowsAbsolutePath(value)) {
1029      directive.output_location = value;
1030    } else {
1031      directive.parameter = value.substr(0, colon_pos);
1032      directive.output_location = value.substr(colon_pos + 1);
1033    }
1034
1035    output_directives_.push_back(directive);
1036  }
1037
1038  return true;
1039}
1040
1041void CommandLineInterface::PrintHelpText() {
1042  // Sorry for indentation here; line wrapping would be uglier.
1043  cerr <<
1044"Usage: " << executable_name_ << " [OPTION] PROTO_FILES\n"
1045"Parse PROTO_FILES and generate output based on the options given:\n"
1046"  -IPATH, --proto_path=PATH   Specify the directory in which to search for\n"
1047"                              imports.  May be specified multiple times;\n"
1048"                              directories will be searched in order.  If not\n"
1049"                              given, the current working directory is used.\n"
1050"  --version                   Show version info and exit.\n"
1051"  -h, --help                  Show this text and exit.\n"
1052"  --encode=MESSAGE_TYPE       Read a text-format message of the given type\n"
1053"                              from standard input and write it in binary\n"
1054"                              to standard output.  The message type must\n"
1055"                              be defined in PROTO_FILES or their imports.\n"
1056"  --decode=MESSAGE_TYPE       Read a binary message of the given type from\n"
1057"                              standard input and write it in text format\n"
1058"                              to standard output.  The message type must\n"
1059"                              be defined in PROTO_FILES or their imports.\n"
1060"  --decode_raw                Read an arbitrary protocol message from\n"
1061"                              standard input and write the raw tag/value\n"
1062"                              pairs in text format to standard output.  No\n"
1063"                              PROTO_FILES should be given when using this\n"
1064"                              flag.\n"
1065"  -oFILE,                     Writes a FileDescriptorSet (a protocol buffer,\n"
1066"    --descriptor_set_out=FILE defined in descriptor.proto) containing all of\n"
1067"                              the input files to FILE.\n"
1068"  --include_imports           When using --descriptor_set_out, also include\n"
1069"                              all dependencies of the input files in the\n"
1070"                              set, so that the set is self-contained.\n"
1071"  --error_format=FORMAT       Set the format in which to print errors.\n"
1072"                              FORMAT may be 'gcc' (the default) or 'msvs'\n"
1073"                              (Microsoft Visual Studio format)." << endl;
1074  if (!plugin_prefix_.empty()) {
1075    cerr <<
1076"  --plugin=EXECUTABLE         Specifies a plugin executable to use.\n"
1077"                              Normally, protoc searches the PATH for\n"
1078"                              plugins, but you may specify additional\n"
1079"                              executables not in the path using this flag.\n"
1080"                              Additionally, EXECUTABLE may be of the form\n"
1081"                              NAME=PATH, in which case the given plugin name\n"
1082"                              is mapped to the given executable even if\n"
1083"                              the executable's own name differs." << endl;
1084  }
1085
1086  for (GeneratorMap::iterator iter = generators_.begin();
1087       iter != generators_.end(); ++iter) {
1088    // FIXME(kenton):  If the text is long enough it will wrap, which is ugly,
1089    //   but fixing this nicely (e.g. splitting on spaces) is probably more
1090    //   trouble than it's worth.
1091    cerr << "  " << iter->first << "=OUT_DIR "
1092         << string(19 - iter->first.size(), ' ')  // Spaces for alignment.
1093         << iter->second.help_text << endl;
1094  }
1095}
1096
1097bool CommandLineInterface::GenerateOutput(
1098    const vector<const FileDescriptor*>& parsed_files,
1099    const OutputDirective& output_directive,
1100    OutputDirectory* output_directory) {
1101  // Call the generator.
1102  string error;
1103  if (output_directive.generator == NULL) {
1104    // This is a plugin.
1105    GOOGLE_CHECK(HasPrefixString(output_directive.name, "--") &&
1106          HasSuffixString(output_directive.name, "_out"))
1107        << "Bad name for plugin generator: " << output_directive.name;
1108
1109    // Strip the "--" and "_out" and add the plugin prefix.
1110    string plugin_name = plugin_prefix_ + "gen-" +
1111        output_directive.name.substr(2, output_directive.name.size() - 6);
1112
1113    if (!GeneratePluginOutput(parsed_files, plugin_name,
1114                              output_directive.parameter,
1115                              output_directory, &error)) {
1116      cerr << output_directive.name << ": " << error << endl;
1117      return false;
1118    }
1119  } else {
1120    // Regular generator.
1121    for (int i = 0; i < parsed_files.size(); i++) {
1122      if (!output_directive.generator->Generate(
1123          parsed_files[i], output_directive.parameter,
1124          output_directory, &error)) {
1125        // Generator returned an error.
1126        cerr << output_directive.name << ": " << parsed_files[i]->name() << ": "
1127             << error << endl;
1128        return false;
1129      }
1130    }
1131  }
1132
1133  return true;
1134}
1135
1136bool CommandLineInterface::GeneratePluginOutput(
1137    const vector<const FileDescriptor*>& parsed_files,
1138    const string& plugin_name,
1139    const string& parameter,
1140    OutputDirectory* output_directory,
1141    string* error) {
1142  CodeGeneratorRequest request;
1143  CodeGeneratorResponse response;
1144
1145  // Build the request.
1146  if (!parameter.empty()) {
1147    request.set_parameter(parameter);
1148  }
1149
1150  set<const FileDescriptor*> already_seen;
1151  for (int i = 0; i < parsed_files.size(); i++) {
1152    request.add_file_to_generate(parsed_files[i]->name());
1153    GetTransitiveDependencies(parsed_files[i], &already_seen,
1154                              request.mutable_proto_file());
1155  }
1156
1157  // Invoke the plugin.
1158  Subprocess subprocess;
1159
1160  if (plugins_.count(plugin_name) > 0) {
1161    subprocess.Start(plugins_[plugin_name], Subprocess::EXACT_NAME);
1162  } else {
1163    subprocess.Start(plugin_name, Subprocess::SEARCH_PATH);
1164  }
1165
1166  string communicate_error;
1167  if (!subprocess.Communicate(request, &response, &communicate_error)) {
1168    *error = strings::Substitute("$0: $1", plugin_name, communicate_error);
1169    return false;
1170  }
1171
1172  // Write the files.  We do this even if there was a generator error in order
1173  // to match the behavior of a compiled-in generator.
1174  scoped_ptr<io::ZeroCopyOutputStream> current_output;
1175  for (int i = 0; i < response.file_size(); i++) {
1176    const CodeGeneratorResponse::File& output_file = response.file(i);
1177
1178    if (!output_file.insertion_point().empty()) {
1179      // Open a file for insert.
1180      // We reset current_output to NULL first so that the old file is closed
1181      // before the new one is opened.
1182      current_output.reset();
1183      current_output.reset(output_directory->OpenForInsert(
1184          output_file.name(), output_file.insertion_point()));
1185    } else if (!output_file.name().empty()) {
1186      // Starting a new file.  Open it.
1187      // We reset current_output to NULL first so that the old file is closed
1188      // before the new one is opened.
1189      current_output.reset();
1190      current_output.reset(output_directory->Open(output_file.name()));
1191    } else if (current_output == NULL) {
1192      *error = strings::Substitute(
1193        "$0: First file chunk returned by plugin did not specify a file name.",
1194        plugin_name);
1195      return false;
1196    }
1197
1198    // Use CodedOutputStream for convenience; otherwise we'd need to provide
1199    // our own buffer-copying loop.
1200    io::CodedOutputStream writer(current_output.get());
1201    writer.WriteString(output_file.content());
1202  }
1203
1204  // Check for errors.
1205  if (!response.error().empty()) {
1206    // Generator returned an error.
1207    *error = response.error();
1208    return false;
1209  }
1210
1211  return true;
1212}
1213
1214bool CommandLineInterface::EncodeOrDecode(const DescriptorPool* pool) {
1215  // Look up the type.
1216  const Descriptor* type = pool->FindMessageTypeByName(codec_type_);
1217  if (type == NULL) {
1218    cerr << "Type not defined: " << codec_type_ << endl;
1219    return false;
1220  }
1221
1222  DynamicMessageFactory dynamic_factory(pool);
1223  scoped_ptr<Message> message(dynamic_factory.GetPrototype(type)->New());
1224
1225  if (mode_ == MODE_ENCODE) {
1226    SetFdToTextMode(STDIN_FILENO);
1227    SetFdToBinaryMode(STDOUT_FILENO);
1228  } else {
1229    SetFdToBinaryMode(STDIN_FILENO);
1230    SetFdToTextMode(STDOUT_FILENO);
1231  }
1232
1233  io::FileInputStream in(STDIN_FILENO);
1234  io::FileOutputStream out(STDOUT_FILENO);
1235
1236  if (mode_ == MODE_ENCODE) {
1237    // Input is text.
1238    ErrorPrinter error_collector(error_format_);
1239    TextFormat::Parser parser;
1240    parser.RecordErrorsTo(&error_collector);
1241    parser.AllowPartialMessage(true);
1242
1243    if (!parser.Parse(&in, message.get())) {
1244      cerr << "Failed to parse input." << endl;
1245      return false;
1246    }
1247  } else {
1248    // Input is binary.
1249    if (!message->ParsePartialFromZeroCopyStream(&in)) {
1250      cerr << "Failed to parse input." << endl;
1251      return false;
1252    }
1253  }
1254
1255  if (!message->IsInitialized()) {
1256    cerr << "warning:  Input message is missing required fields:  "
1257         << message->InitializationErrorString() << endl;
1258  }
1259
1260  if (mode_ == MODE_ENCODE) {
1261    // Output is binary.
1262    if (!message->SerializePartialToZeroCopyStream(&out)) {
1263      cerr << "output: I/O error." << endl;
1264      return false;
1265    }
1266  } else {
1267    // Output is text.
1268    if (!TextFormat::Print(*message, &out)) {
1269      cerr << "output: I/O error." << endl;
1270      return false;
1271    }
1272  }
1273
1274  return true;
1275}
1276
1277bool CommandLineInterface::WriteDescriptorSet(
1278    const vector<const FileDescriptor*> parsed_files) {
1279  FileDescriptorSet file_set;
1280
1281  if (imports_in_descriptor_set_) {
1282    set<const FileDescriptor*> already_seen;
1283    for (int i = 0; i < parsed_files.size(); i++) {
1284      GetTransitiveDependencies(
1285          parsed_files[i], &already_seen, file_set.mutable_file());
1286    }
1287  } else {
1288    for (int i = 0; i < parsed_files.size(); i++) {
1289      parsed_files[i]->CopyTo(file_set.add_file());
1290    }
1291  }
1292
1293  int fd;
1294  do {
1295    fd = open(descriptor_set_name_.c_str(),
1296              O_WRONLY | O_CREAT | O_TRUNC | O_BINARY, 0666);
1297  } while (fd < 0 && errno == EINTR);
1298
1299  if (fd < 0) {
1300    perror(descriptor_set_name_.c_str());
1301    return false;
1302  }
1303
1304  io::FileOutputStream out(fd);
1305  if (!file_set.SerializeToZeroCopyStream(&out)) {
1306    cerr << descriptor_set_name_ << ": " << strerror(out.GetErrno()) << endl;
1307    out.Close();
1308    return false;
1309  }
1310  if (!out.Close()) {
1311    cerr << descriptor_set_name_ << ": " << strerror(out.GetErrno()) << endl;
1312    return false;
1313  }
1314
1315  return true;
1316}
1317
1318void CommandLineInterface::GetTransitiveDependencies(
1319    const FileDescriptor* file,
1320    set<const FileDescriptor*>* already_seen,
1321    RepeatedPtrField<FileDescriptorProto>* output) {
1322  if (!already_seen->insert(file).second) {
1323    // Already saw this file.  Skip.
1324    return;
1325  }
1326
1327  // Add all dependencies.
1328  for (int i = 0; i < file->dependency_count(); i++) {
1329    GetTransitiveDependencies(file->dependency(i), already_seen, output);
1330  }
1331
1332  // Add this file.
1333  file->CopyTo(output->Add());
1334}
1335
1336
1337}  // namespace compiler
1338}  // namespace protobuf
1339}  // namespace google
1340