1// Protocol Buffers - Google's data interchange format
2// Copyright 2008 Google Inc.  All rights reserved.
3// https://developers.google.com/protocol-buffers/
4//
5// Redistribution and use in source and binary forms, with or without
6// modification, are permitted provided that the following conditions are
7// met:
8//
9//     * Redistributions of source code must retain the above copyright
10// notice, this list of conditions and the following disclaimer.
11//     * Redistributions in binary form must reproduce the above
12// copyright notice, this list of conditions and the following disclaimer
13// in the documentation and/or other materials provided with the
14// distribution.
15//     * Neither the name of Google Inc. nor the names of its
16// contributors may be used to endorse or promote products derived from
17// this software without specific prior written permission.
18//
19// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30
31// Author: kenton@google.com (Kenton Varda)
32//  Based on original Protocol Buffers design by
33//  Sanjay Ghemawat, Jeff Dean, and others.
34
35#ifdef _MSC_VER
36#include <io.h>
37#else
38#include <unistd.h>
39#endif
40#include <sys/types.h>
41#include <sys/stat.h>
42#include <fcntl.h>
43#include <errno.h>
44
45#include <algorithm>
46#include <memory>
47
48#include <google/protobuf/compiler/importer.h>
49
50#include <google/protobuf/compiler/parser.h>
51#include <google/protobuf/io/tokenizer.h>
52#include <google/protobuf/io/zero_copy_stream_impl.h>
53#include <google/protobuf/stubs/strutil.h>
54
55namespace google {
56namespace protobuf {
57namespace compiler {
58
59#ifdef _WIN32
60#ifndef F_OK
61#define F_OK 00  // not defined by MSVC for whatever reason
62#endif
63#include <ctype.h>
64#endif
65
66// Returns true if the text looks like a Windows-style absolute path, starting
67// with a drive letter.  Example:  "C:\foo".  TODO(kenton):  Share this with
68// copy in command_line_interface.cc?
69static bool IsWindowsAbsolutePath(const string& text) {
70#if defined(_WIN32) || defined(__CYGWIN__)
71  return text.size() >= 3 && text[1] == ':' &&
72         isalpha(text[0]) &&
73         (text[2] == '/' || text[2] == '\\') &&
74         text.find_last_of(':') == 1;
75#else
76  return false;
77#endif
78}
79
80MultiFileErrorCollector::~MultiFileErrorCollector() {}
81
82// This class serves two purposes:
83// - It implements the ErrorCollector interface (used by Tokenizer and Parser)
84//   in terms of MultiFileErrorCollector, using a particular filename.
85// - It lets us check if any errors have occurred.
86class SourceTreeDescriptorDatabase::SingleFileErrorCollector
87    : public io::ErrorCollector {
88 public:
89  SingleFileErrorCollector(const string& filename,
90                           MultiFileErrorCollector* multi_file_error_collector)
91    : filename_(filename),
92      multi_file_error_collector_(multi_file_error_collector),
93      had_errors_(false) {}
94  ~SingleFileErrorCollector() {}
95
96  bool had_errors() { return had_errors_; }
97
98  // implements ErrorCollector ---------------------------------------
99  void AddError(int line, int column, const string& message) {
100    if (multi_file_error_collector_ != NULL) {
101      multi_file_error_collector_->AddError(filename_, line, column, message);
102    }
103    had_errors_ = true;
104  }
105
106 private:
107  string filename_;
108  MultiFileErrorCollector* multi_file_error_collector_;
109  bool had_errors_;
110};
111
112// ===================================================================
113
114SourceTreeDescriptorDatabase::SourceTreeDescriptorDatabase(
115    SourceTree* source_tree)
116  : source_tree_(source_tree),
117    error_collector_(NULL),
118    using_validation_error_collector_(false),
119    validation_error_collector_(this) {}
120
121SourceTreeDescriptorDatabase::~SourceTreeDescriptorDatabase() {}
122
123bool SourceTreeDescriptorDatabase::FindFileByName(
124    const string& filename, FileDescriptorProto* output) {
125  scoped_ptr<io::ZeroCopyInputStream> input(source_tree_->Open(filename));
126  if (input == NULL) {
127    if (error_collector_ != NULL) {
128      error_collector_->AddError(filename, -1, 0,
129                                 source_tree_->GetLastErrorMessage());
130    }
131    return false;
132  }
133
134  // Set up the tokenizer and parser.
135  SingleFileErrorCollector file_error_collector(filename, error_collector_);
136  io::Tokenizer tokenizer(input.get(), &file_error_collector);
137
138  Parser parser;
139  if (error_collector_ != NULL) {
140    parser.RecordErrorsTo(&file_error_collector);
141  }
142  if (using_validation_error_collector_) {
143    parser.RecordSourceLocationsTo(&source_locations_);
144  }
145
146  // Parse it.
147  output->set_name(filename);
148  return parser.Parse(&tokenizer, output) &&
149         !file_error_collector.had_errors();
150}
151
152bool SourceTreeDescriptorDatabase::FindFileContainingSymbol(
153    const string& symbol_name, FileDescriptorProto* output) {
154  return false;
155}
156
157bool SourceTreeDescriptorDatabase::FindFileContainingExtension(
158    const string& containing_type, int field_number,
159    FileDescriptorProto* output) {
160  return false;
161}
162
163// -------------------------------------------------------------------
164
165SourceTreeDescriptorDatabase::ValidationErrorCollector::
166ValidationErrorCollector(SourceTreeDescriptorDatabase* owner)
167  : owner_(owner) {}
168
169SourceTreeDescriptorDatabase::ValidationErrorCollector::
170~ValidationErrorCollector() {}
171
172void SourceTreeDescriptorDatabase::ValidationErrorCollector::AddError(
173    const string& filename,
174    const string& element_name,
175    const Message* descriptor,
176    ErrorLocation location,
177    const string& message) {
178  if (owner_->error_collector_ == NULL) return;
179
180  int line, column;
181  owner_->source_locations_.Find(descriptor, location, &line, &column);
182  owner_->error_collector_->AddError(filename, line, column, message);
183}
184
185// ===================================================================
186
187Importer::Importer(SourceTree* source_tree,
188                   MultiFileErrorCollector* error_collector)
189  : database_(source_tree),
190    pool_(&database_, database_.GetValidationErrorCollector()) {
191  pool_.EnforceWeakDependencies(true);
192  database_.RecordErrorsTo(error_collector);
193}
194
195Importer::~Importer() {}
196
197const FileDescriptor* Importer::Import(const string& filename) {
198  return pool_.FindFileByName(filename);
199}
200
201void Importer::AddUnusedImportTrackFile(const string& file_name) {
202  pool_.AddUnusedImportTrackFile(file_name);
203}
204
205void Importer::ClearUnusedImportTrackFiles() {
206  pool_.ClearUnusedImportTrackFiles();
207}
208
209// ===================================================================
210
211SourceTree::~SourceTree() {}
212
213string SourceTree::GetLastErrorMessage() {
214  return "File not found.";
215}
216
217DiskSourceTree::DiskSourceTree() {}
218
219DiskSourceTree::~DiskSourceTree() {}
220
221static inline char LastChar(const string& str) {
222  return str[str.size() - 1];
223}
224
225// Given a path, returns an equivalent path with these changes:
226// - On Windows, any backslashes are replaced with forward slashes.
227// - Any instances of the directory "." are removed.
228// - Any consecutive '/'s are collapsed into a single slash.
229// Note that the resulting string may be empty.
230//
231// TODO(kenton):  It would be nice to handle "..", e.g. so that we can figure
232//   out that "foo/bar.proto" is inside "baz/../foo".  However, if baz is a
233//   symlink or doesn't exist, then things get complicated, and we can't
234//   actually determine this without investigating the filesystem, probably
235//   in non-portable ways.  So, we punt.
236//
237// TODO(kenton):  It would be nice to use realpath() here except that it
238//   resolves symbolic links.  This could cause problems if people place
239//   symbolic links in their source tree.  For example, if you executed:
240//     protoc --proto_path=foo foo/bar/baz.proto
241//   then if foo/bar is a symbolic link, foo/bar/baz.proto will canonicalize
242//   to a path which does not appear to be under foo, and thus the compiler
243//   will complain that baz.proto is not inside the --proto_path.
244static string CanonicalizePath(string path) {
245#ifdef _WIN32
246  // The Win32 API accepts forward slashes as a path delimiter even though
247  // backslashes are standard.  Let's avoid confusion and use only forward
248  // slashes.
249  if (HasPrefixString(path, "\\\\")) {
250    // Avoid converting two leading backslashes.
251    path = "\\\\" + StringReplace(path.substr(2), "\\", "/", true);
252  } else {
253    path = StringReplace(path, "\\", "/", true);
254  }
255#endif
256
257  vector<string> canonical_parts;
258  vector<string> parts = Split(
259      path, "/", true);  // Note:  Removes empty parts.
260  for (int i = 0; i < parts.size(); i++) {
261    if (parts[i] == ".") {
262      // Ignore.
263    } else {
264      canonical_parts.push_back(parts[i]);
265    }
266  }
267  string result = Join(canonical_parts, "/");
268  if (!path.empty() && path[0] == '/') {
269    // Restore leading slash.
270    result = '/' + result;
271  }
272  if (!path.empty() && LastChar(path) == '/' &&
273      !result.empty() && LastChar(result) != '/') {
274    // Restore trailing slash.
275    result += '/';
276  }
277  return result;
278}
279
280static inline bool ContainsParentReference(const string& path) {
281  return path == ".." ||
282         HasPrefixString(path, "../") ||
283         HasSuffixString(path, "/..") ||
284         path.find("/../") != string::npos;
285}
286
287// Maps a file from an old location to a new one.  Typically, old_prefix is
288// a virtual path and new_prefix is its corresponding disk path.  Returns
289// false if the filename did not start with old_prefix, otherwise replaces
290// old_prefix with new_prefix and stores the result in *result.  Examples:
291//   string result;
292//   assert(ApplyMapping("foo/bar", "", "baz", &result));
293//   assert(result == "baz/foo/bar");
294//
295//   assert(ApplyMapping("foo/bar", "foo", "baz", &result));
296//   assert(result == "baz/bar");
297//
298//   assert(ApplyMapping("foo", "foo", "bar", &result));
299//   assert(result == "bar");
300//
301//   assert(!ApplyMapping("foo/bar", "baz", "qux", &result));
302//   assert(!ApplyMapping("foo/bar", "baz", "qux", &result));
303//   assert(!ApplyMapping("foobar", "foo", "baz", &result));
304static bool ApplyMapping(const string& filename,
305                         const string& old_prefix,
306                         const string& new_prefix,
307                         string* result) {
308  if (old_prefix.empty()) {
309    // old_prefix matches any relative path.
310    if (ContainsParentReference(filename)) {
311      // We do not allow the file name to use "..".
312      return false;
313    }
314    if (HasPrefixString(filename, "/") ||
315        IsWindowsAbsolutePath(filename)) {
316      // This is an absolute path, so it isn't matched by the empty string.
317      return false;
318    }
319    result->assign(new_prefix);
320    if (!result->empty()) result->push_back('/');
321    result->append(filename);
322    return true;
323  } else if (HasPrefixString(filename, old_prefix)) {
324    // old_prefix is a prefix of the filename.  Is it the whole filename?
325    if (filename.size() == old_prefix.size()) {
326      // Yep, it's an exact match.
327      *result = new_prefix;
328      return true;
329    } else {
330      // Not an exact match.  Is the next character a '/'?  Otherwise,
331      // this isn't actually a match at all.  E.g. the prefix "foo/bar"
332      // does not match the filename "foo/barbaz".
333      int after_prefix_start = -1;
334      if (filename[old_prefix.size()] == '/') {
335        after_prefix_start = old_prefix.size() + 1;
336      } else if (filename[old_prefix.size() - 1] == '/') {
337        // old_prefix is never empty, and canonicalized paths never have
338        // consecutive '/' characters.
339        after_prefix_start = old_prefix.size();
340      }
341      if (after_prefix_start != -1) {
342        // Yep.  So the prefixes are directories and the filename is a file
343        // inside them.
344        string after_prefix = filename.substr(after_prefix_start);
345        if (ContainsParentReference(after_prefix)) {
346          // We do not allow the file name to use "..".
347          return false;
348        }
349        result->assign(new_prefix);
350        if (!result->empty()) result->push_back('/');
351        result->append(after_prefix);
352        return true;
353      }
354    }
355  }
356
357  return false;
358}
359
360void DiskSourceTree::MapPath(const string& virtual_path,
361                             const string& disk_path) {
362  mappings_.push_back(Mapping(virtual_path, CanonicalizePath(disk_path)));
363}
364
365DiskSourceTree::DiskFileToVirtualFileResult
366DiskSourceTree::DiskFileToVirtualFile(
367    const string& disk_file,
368    string* virtual_file,
369    string* shadowing_disk_file) {
370  int mapping_index = -1;
371  string canonical_disk_file = CanonicalizePath(disk_file);
372
373  for (int i = 0; i < mappings_.size(); i++) {
374    // Apply the mapping in reverse.
375    if (ApplyMapping(canonical_disk_file, mappings_[i].disk_path,
376                     mappings_[i].virtual_path, virtual_file)) {
377      // Success.
378      mapping_index = i;
379      break;
380    }
381  }
382
383  if (mapping_index == -1) {
384    return NO_MAPPING;
385  }
386
387  // Iterate through all mappings with higher precedence and verify that none
388  // of them map this file to some other existing file.
389  for (int i = 0; i < mapping_index; i++) {
390    if (ApplyMapping(*virtual_file, mappings_[i].virtual_path,
391                     mappings_[i].disk_path, shadowing_disk_file)) {
392      if (access(shadowing_disk_file->c_str(), F_OK) >= 0) {
393        // File exists.
394        return SHADOWED;
395      }
396    }
397  }
398  shadowing_disk_file->clear();
399
400  // Verify that we can open the file.  Note that this also has the side-effect
401  // of verifying that we are not canonicalizing away any non-existent
402  // directories.
403  scoped_ptr<io::ZeroCopyInputStream> stream(OpenDiskFile(disk_file));
404  if (stream == NULL) {
405    return CANNOT_OPEN;
406  }
407
408  return SUCCESS;
409}
410
411bool DiskSourceTree::VirtualFileToDiskFile(const string& virtual_file,
412                                           string* disk_file) {
413  scoped_ptr<io::ZeroCopyInputStream> stream(
414      OpenVirtualFile(virtual_file, disk_file));
415  return stream != NULL;
416}
417
418io::ZeroCopyInputStream* DiskSourceTree::Open(const string& filename) {
419  return OpenVirtualFile(filename, NULL);
420}
421
422string DiskSourceTree::GetLastErrorMessage() {
423  return last_error_message_;
424}
425
426io::ZeroCopyInputStream* DiskSourceTree::OpenVirtualFile(
427    const string& virtual_file,
428    string* disk_file) {
429  if (virtual_file != CanonicalizePath(virtual_file) ||
430      ContainsParentReference(virtual_file)) {
431    // We do not allow importing of paths containing things like ".." or
432    // consecutive slashes since the compiler expects files to be uniquely
433    // identified by file name.
434    last_error_message_ = "Backslashes, consecutive slashes, \".\", or \"..\" "
435                          "are not allowed in the virtual path";
436    return NULL;
437  }
438
439  for (int i = 0; i < mappings_.size(); i++) {
440    string temp_disk_file;
441    if (ApplyMapping(virtual_file, mappings_[i].virtual_path,
442                     mappings_[i].disk_path, &temp_disk_file)) {
443      io::ZeroCopyInputStream* stream = OpenDiskFile(temp_disk_file);
444      if (stream != NULL) {
445        if (disk_file != NULL) {
446          *disk_file = temp_disk_file;
447        }
448        return stream;
449      }
450
451      if (errno == EACCES) {
452        // The file exists but is not readable.
453        last_error_message_ = "Read access is denied for file: " +
454                              temp_disk_file;
455        return NULL;
456      }
457    }
458  }
459  last_error_message_ = "File not found.";
460  return NULL;
461}
462
463io::ZeroCopyInputStream* DiskSourceTree::OpenDiskFile(
464    const string& filename) {
465  int file_descriptor;
466  do {
467    file_descriptor = open(filename.c_str(), O_RDONLY);
468  } while (file_descriptor < 0 && errno == EINTR);
469  if (file_descriptor >= 0) {
470    io::FileInputStream* result = new io::FileInputStream(file_descriptor);
471    result->SetCloseOnDelete(true);
472    return result;
473  } else {
474    return NULL;
475  }
476}
477
478}  // namespace compiler
479}  // namespace protobuf
480}  // namespace google
481