1// Protocol Buffers - Google's data interchange format
2// Copyright 2008 Google Inc.  All rights reserved.
3// https://developers.google.com/protocol-buffers/
4//
5// Redistribution and use in source and binary forms, with or without
6// modification, are permitted provided that the following conditions are
7// met:
8//
9//     * Redistributions of source code must retain the above copyright
10// notice, this list of conditions and the following disclaimer.
11//     * Redistributions in binary form must reproduce the above
12// copyright notice, this list of conditions and the following disclaimer
13// in the documentation and/or other materials provided with the
14// distribution.
15//     * Neither the name of Google Inc. nor the names of its
16// contributors may be used to endorse or promote products derived from
17// this software without specific prior written permission.
18//
19// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30
31// Author: kenton@google.com (Kenton Varda)
32//  Based on original Protocol Buffers design by
33//  Sanjay Ghemawat, Jeff Dean, and others.
34
35#ifdef _MSC_VER
36#include <io.h>
37#else
38#include <unistd.h>
39#endif
40#include <sys/types.h>
41#include <sys/stat.h>
42#include <fcntl.h>
43#include <errno.h>
44
45#include <algorithm>
46#include <memory>
47#ifndef _SHARED_PTR_H
48#include <google/protobuf/stubs/shared_ptr.h>
49#endif
50
51#include <google/protobuf/compiler/importer.h>
52
53#include <google/protobuf/compiler/parser.h>
54#include <google/protobuf/io/tokenizer.h>
55#include <google/protobuf/io/zero_copy_stream_impl.h>
56#include <google/protobuf/stubs/strutil.h>
57
58namespace google {
59namespace protobuf {
60namespace compiler {
61
62#ifdef _WIN32
63#ifndef F_OK
64#define F_OK 00  // not defined by MSVC for whatever reason
65#endif
66#include <ctype.h>
67#endif
68
69// Returns true if the text looks like a Windows-style absolute path, starting
70// with a drive letter.  Example:  "C:\foo".  TODO(kenton):  Share this with
71// copy in command_line_interface.cc?
72static bool IsWindowsAbsolutePath(const string& text) {
73#if defined(_WIN32) || defined(__CYGWIN__)
74  return text.size() >= 3 && text[1] == ':' &&
75         isalpha(text[0]) &&
76         (text[2] == '/' || text[2] == '\\') &&
77         text.find_last_of(':') == 1;
78#else
79  return false;
80#endif
81}
82
83MultiFileErrorCollector::~MultiFileErrorCollector() {}
84
85// This class serves two purposes:
86// - It implements the ErrorCollector interface (used by Tokenizer and Parser)
87//   in terms of MultiFileErrorCollector, using a particular filename.
88// - It lets us check if any errors have occurred.
89class SourceTreeDescriptorDatabase::SingleFileErrorCollector
90    : public io::ErrorCollector {
91 public:
92  SingleFileErrorCollector(const string& filename,
93                           MultiFileErrorCollector* multi_file_error_collector)
94    : filename_(filename),
95      multi_file_error_collector_(multi_file_error_collector),
96      had_errors_(false) {}
97  ~SingleFileErrorCollector() {}
98
99  bool had_errors() { return had_errors_; }
100
101  // implements ErrorCollector ---------------------------------------
102  void AddError(int line, int column, const string& message) {
103    if (multi_file_error_collector_ != NULL) {
104      multi_file_error_collector_->AddError(filename_, line, column, message);
105    }
106    had_errors_ = true;
107  }
108
109 private:
110  string filename_;
111  MultiFileErrorCollector* multi_file_error_collector_;
112  bool had_errors_;
113};
114
115// ===================================================================
116
117SourceTreeDescriptorDatabase::SourceTreeDescriptorDatabase(
118    SourceTree* source_tree)
119  : source_tree_(source_tree),
120    error_collector_(NULL),
121    using_validation_error_collector_(false),
122    validation_error_collector_(this) {}
123
124SourceTreeDescriptorDatabase::~SourceTreeDescriptorDatabase() {}
125
126bool SourceTreeDescriptorDatabase::FindFileByName(
127    const string& filename, FileDescriptorProto* output) {
128  google::protobuf::scoped_ptr<io::ZeroCopyInputStream> input(source_tree_->Open(filename));
129  if (input == NULL) {
130    if (error_collector_ != NULL) {
131      error_collector_->AddError(filename, -1, 0,
132                                 source_tree_->GetLastErrorMessage());
133    }
134    return false;
135  }
136
137  // Set up the tokenizer and parser.
138  SingleFileErrorCollector file_error_collector(filename, error_collector_);
139  io::Tokenizer tokenizer(input.get(), &file_error_collector);
140
141  Parser parser;
142  if (error_collector_ != NULL) {
143    parser.RecordErrorsTo(&file_error_collector);
144  }
145  if (using_validation_error_collector_) {
146    parser.RecordSourceLocationsTo(&source_locations_);
147  }
148
149  // Parse it.
150  output->set_name(filename);
151  return parser.Parse(&tokenizer, output) &&
152         !file_error_collector.had_errors();
153}
154
155bool SourceTreeDescriptorDatabase::FindFileContainingSymbol(
156    const string& symbol_name, FileDescriptorProto* output) {
157  return false;
158}
159
160bool SourceTreeDescriptorDatabase::FindFileContainingExtension(
161    const string& containing_type, int field_number,
162    FileDescriptorProto* output) {
163  return false;
164}
165
166// -------------------------------------------------------------------
167
168SourceTreeDescriptorDatabase::ValidationErrorCollector::
169ValidationErrorCollector(SourceTreeDescriptorDatabase* owner)
170  : owner_(owner) {}
171
172SourceTreeDescriptorDatabase::ValidationErrorCollector::
173~ValidationErrorCollector() {}
174
175void SourceTreeDescriptorDatabase::ValidationErrorCollector::AddError(
176    const string& filename,
177    const string& element_name,
178    const Message* descriptor,
179    ErrorLocation location,
180    const string& message) {
181  if (owner_->error_collector_ == NULL) return;
182
183  int line, column;
184  owner_->source_locations_.Find(descriptor, location, &line, &column);
185  owner_->error_collector_->AddError(filename, line, column, message);
186}
187
188void SourceTreeDescriptorDatabase::ValidationErrorCollector::AddWarning(
189    const string& filename,
190    const string& element_name,
191    const Message* descriptor,
192    ErrorLocation location,
193    const string& message) {
194  if (owner_->error_collector_ == NULL) return;
195
196  int line, column;
197  owner_->source_locations_.Find(descriptor, location, &line, &column);
198  owner_->error_collector_->AddWarning(filename, line, column, message);
199}
200
201// ===================================================================
202
203Importer::Importer(SourceTree* source_tree,
204                   MultiFileErrorCollector* error_collector)
205  : database_(source_tree),
206    pool_(&database_, database_.GetValidationErrorCollector()) {
207  pool_.EnforceWeakDependencies(true);
208  database_.RecordErrorsTo(error_collector);
209}
210
211Importer::~Importer() {}
212
213const FileDescriptor* Importer::Import(const string& filename) {
214  return pool_.FindFileByName(filename);
215}
216
217void Importer::AddUnusedImportTrackFile(const string& file_name) {
218  pool_.AddUnusedImportTrackFile(file_name);
219}
220
221void Importer::ClearUnusedImportTrackFiles() {
222  pool_.ClearUnusedImportTrackFiles();
223}
224
225// ===================================================================
226
227SourceTree::~SourceTree() {}
228
229string SourceTree::GetLastErrorMessage() {
230  return "File not found.";
231}
232
233DiskSourceTree::DiskSourceTree() {}
234
235DiskSourceTree::~DiskSourceTree() {}
236
237static inline char LastChar(const string& str) {
238  return str[str.size() - 1];
239}
240
241// Given a path, returns an equivalent path with these changes:
242// - On Windows, any backslashes are replaced with forward slashes.
243// - Any instances of the directory "." are removed.
244// - Any consecutive '/'s are collapsed into a single slash.
245// Note that the resulting string may be empty.
246//
247// TODO(kenton):  It would be nice to handle "..", e.g. so that we can figure
248//   out that "foo/bar.proto" is inside "baz/../foo".  However, if baz is a
249//   symlink or doesn't exist, then things get complicated, and we can't
250//   actually determine this without investigating the filesystem, probably
251//   in non-portable ways.  So, we punt.
252//
253// TODO(kenton):  It would be nice to use realpath() here except that it
254//   resolves symbolic links.  This could cause problems if people place
255//   symbolic links in their source tree.  For example, if you executed:
256//     protoc --proto_path=foo foo/bar/baz.proto
257//   then if foo/bar is a symbolic link, foo/bar/baz.proto will canonicalize
258//   to a path which does not appear to be under foo, and thus the compiler
259//   will complain that baz.proto is not inside the --proto_path.
260static string CanonicalizePath(string path) {
261#ifdef _WIN32
262  // The Win32 API accepts forward slashes as a path delimiter even though
263  // backslashes are standard.  Let's avoid confusion and use only forward
264  // slashes.
265  if (HasPrefixString(path, "\\\\")) {
266    // Avoid converting two leading backslashes.
267    path = "\\\\" + StringReplace(path.substr(2), "\\", "/", true);
268  } else {
269    path = StringReplace(path, "\\", "/", true);
270  }
271#endif
272
273  vector<string> canonical_parts;
274  vector<string> parts = Split(
275      path, "/", true);  // Note:  Removes empty parts.
276  for (int i = 0; i < parts.size(); i++) {
277    if (parts[i] == ".") {
278      // Ignore.
279    } else {
280      canonical_parts.push_back(parts[i]);
281    }
282  }
283  string result = Join(canonical_parts, "/");
284  if (!path.empty() && path[0] == '/') {
285    // Restore leading slash.
286    result = '/' + result;
287  }
288  if (!path.empty() && LastChar(path) == '/' &&
289      !result.empty() && LastChar(result) != '/') {
290    // Restore trailing slash.
291    result += '/';
292  }
293  return result;
294}
295
296static inline bool ContainsParentReference(const string& path) {
297  return path == ".." ||
298         HasPrefixString(path, "../") ||
299         HasSuffixString(path, "/..") ||
300         path.find("/../") != string::npos;
301}
302
303// Maps a file from an old location to a new one.  Typically, old_prefix is
304// a virtual path and new_prefix is its corresponding disk path.  Returns
305// false if the filename did not start with old_prefix, otherwise replaces
306// old_prefix with new_prefix and stores the result in *result.  Examples:
307//   string result;
308//   assert(ApplyMapping("foo/bar", "", "baz", &result));
309//   assert(result == "baz/foo/bar");
310//
311//   assert(ApplyMapping("foo/bar", "foo", "baz", &result));
312//   assert(result == "baz/bar");
313//
314//   assert(ApplyMapping("foo", "foo", "bar", &result));
315//   assert(result == "bar");
316//
317//   assert(!ApplyMapping("foo/bar", "baz", "qux", &result));
318//   assert(!ApplyMapping("foo/bar", "baz", "qux", &result));
319//   assert(!ApplyMapping("foobar", "foo", "baz", &result));
320static bool ApplyMapping(const string& filename,
321                         const string& old_prefix,
322                         const string& new_prefix,
323                         string* result) {
324  if (old_prefix.empty()) {
325    // old_prefix matches any relative path.
326    if (ContainsParentReference(filename)) {
327      // We do not allow the file name to use "..".
328      return false;
329    }
330    if (HasPrefixString(filename, "/") ||
331        IsWindowsAbsolutePath(filename)) {
332      // This is an absolute path, so it isn't matched by the empty string.
333      return false;
334    }
335    result->assign(new_prefix);
336    if (!result->empty()) result->push_back('/');
337    result->append(filename);
338    return true;
339  } else if (HasPrefixString(filename, old_prefix)) {
340    // old_prefix is a prefix of the filename.  Is it the whole filename?
341    if (filename.size() == old_prefix.size()) {
342      // Yep, it's an exact match.
343      *result = new_prefix;
344      return true;
345    } else {
346      // Not an exact match.  Is the next character a '/'?  Otherwise,
347      // this isn't actually a match at all.  E.g. the prefix "foo/bar"
348      // does not match the filename "foo/barbaz".
349      int after_prefix_start = -1;
350      if (filename[old_prefix.size()] == '/') {
351        after_prefix_start = old_prefix.size() + 1;
352      } else if (filename[old_prefix.size() - 1] == '/') {
353        // old_prefix is never empty, and canonicalized paths never have
354        // consecutive '/' characters.
355        after_prefix_start = old_prefix.size();
356      }
357      if (after_prefix_start != -1) {
358        // Yep.  So the prefixes are directories and the filename is a file
359        // inside them.
360        string after_prefix = filename.substr(after_prefix_start);
361        if (ContainsParentReference(after_prefix)) {
362          // We do not allow the file name to use "..".
363          return false;
364        }
365        result->assign(new_prefix);
366        if (!result->empty()) result->push_back('/');
367        result->append(after_prefix);
368        return true;
369      }
370    }
371  }
372
373  return false;
374}
375
376void DiskSourceTree::MapPath(const string& virtual_path,
377                             const string& disk_path) {
378  mappings_.push_back(Mapping(virtual_path, CanonicalizePath(disk_path)));
379}
380
381DiskSourceTree::DiskFileToVirtualFileResult
382DiskSourceTree::DiskFileToVirtualFile(
383    const string& disk_file,
384    string* virtual_file,
385    string* shadowing_disk_file) {
386  int mapping_index = -1;
387  string canonical_disk_file = CanonicalizePath(disk_file);
388
389  for (int i = 0; i < mappings_.size(); i++) {
390    // Apply the mapping in reverse.
391    if (ApplyMapping(canonical_disk_file, mappings_[i].disk_path,
392                     mappings_[i].virtual_path, virtual_file)) {
393      // Success.
394      mapping_index = i;
395      break;
396    }
397  }
398
399  if (mapping_index == -1) {
400    return NO_MAPPING;
401  }
402
403  // Iterate through all mappings with higher precedence and verify that none
404  // of them map this file to some other existing file.
405  for (int i = 0; i < mapping_index; i++) {
406    if (ApplyMapping(*virtual_file, mappings_[i].virtual_path,
407                     mappings_[i].disk_path, shadowing_disk_file)) {
408      if (access(shadowing_disk_file->c_str(), F_OK) >= 0) {
409        // File exists.
410        return SHADOWED;
411      }
412    }
413  }
414  shadowing_disk_file->clear();
415
416  // Verify that we can open the file.  Note that this also has the side-effect
417  // of verifying that we are not canonicalizing away any non-existent
418  // directories.
419  google::protobuf::scoped_ptr<io::ZeroCopyInputStream> stream(OpenDiskFile(disk_file));
420  if (stream == NULL) {
421    return CANNOT_OPEN;
422  }
423
424  return SUCCESS;
425}
426
427bool DiskSourceTree::VirtualFileToDiskFile(const string& virtual_file,
428                                           string* disk_file) {
429  google::protobuf::scoped_ptr<io::ZeroCopyInputStream> stream(
430      OpenVirtualFile(virtual_file, disk_file));
431  return stream != NULL;
432}
433
434io::ZeroCopyInputStream* DiskSourceTree::Open(const string& filename) {
435  return OpenVirtualFile(filename, NULL);
436}
437
438string DiskSourceTree::GetLastErrorMessage() {
439  return last_error_message_;
440}
441
442io::ZeroCopyInputStream* DiskSourceTree::OpenVirtualFile(
443    const string& virtual_file,
444    string* disk_file) {
445  if (virtual_file != CanonicalizePath(virtual_file) ||
446      ContainsParentReference(virtual_file)) {
447    // We do not allow importing of paths containing things like ".." or
448    // consecutive slashes since the compiler expects files to be uniquely
449    // identified by file name.
450    last_error_message_ = "Backslashes, consecutive slashes, \".\", or \"..\" "
451                          "are not allowed in the virtual path";
452    return NULL;
453  }
454
455  for (int i = 0; i < mappings_.size(); i++) {
456    string temp_disk_file;
457    if (ApplyMapping(virtual_file, mappings_[i].virtual_path,
458                     mappings_[i].disk_path, &temp_disk_file)) {
459      io::ZeroCopyInputStream* stream = OpenDiskFile(temp_disk_file);
460      if (stream != NULL) {
461        if (disk_file != NULL) {
462          *disk_file = temp_disk_file;
463        }
464        return stream;
465      }
466
467      if (errno == EACCES) {
468        // The file exists but is not readable.
469        last_error_message_ = "Read access is denied for file: " +
470                              temp_disk_file;
471        return NULL;
472      }
473    }
474  }
475  last_error_message_ = "File not found.";
476  return NULL;
477}
478
479io::ZeroCopyInputStream* DiskSourceTree::OpenDiskFile(
480    const string& filename) {
481  int file_descriptor;
482  do {
483    file_descriptor = open(filename.c_str(), O_RDONLY);
484  } while (file_descriptor < 0 && errno == EINTR);
485  if (file_descriptor >= 0) {
486    io::FileInputStream* result = new io::FileInputStream(file_descriptor);
487    result->SetCloseOnDelete(true);
488    return result;
489  } else {
490    return NULL;
491  }
492}
493
494}  // namespace compiler
495}  // namespace protobuf
496}  // namespace google
497