1// Protocol Buffers - Google's data interchange format
2// Copyright 2008 Google Inc.  All rights reserved.
3// http://code.google.com/p/protobuf/
4//
5// Redistribution and use in source and binary forms, with or without
6// modification, are permitted provided that the following conditions are
7// met:
8//
9//     * Redistributions of source code must retain the above copyright
10// notice, this list of conditions and the following disclaimer.
11//     * Redistributions in binary form must reproduce the above
12// copyright notice, this list of conditions and the following disclaimer
13// in the documentation and/or other materials provided with the
14// distribution.
15//     * Neither the name of Google Inc. nor the names of its
16// contributors may be used to endorse or promote products derived from
17// this software without specific prior written permission.
18//
19// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30
31// Author: kenton@google.com (Kenton Varda)
32//  Based on original Protocol Buffers design by
33//  Sanjay Ghemawat, Jeff Dean, and others.
34
35#ifdef _MSC_VER
36#include <io.h>
37#else
38#include <unistd.h>
39#endif
40#include <sys/types.h>
41#include <sys/stat.h>
42#include <fcntl.h>
43#include <errno.h>
44
45#include <algorithm>
46
47#include <google/protobuf/compiler/importer.h>
48
49#include <google/protobuf/compiler/parser.h>
50#include <google/protobuf/io/tokenizer.h>
51#include <google/protobuf/io/zero_copy_stream_impl.h>
52#include <google/protobuf/stubs/strutil.h>
53
54namespace google {
55namespace protobuf {
56namespace compiler {
57
58#ifdef _WIN32
59#ifndef F_OK
60#define F_OK 00  // not defined by MSVC for whatever reason
61#endif
62#include <ctype.h>
63#endif
64
65// Returns true if the text looks like a Windows-style absolute path, starting
66// with a drive letter.  Example:  "C:\foo".  TODO(kenton):  Share this with
67// copy in command_line_interface.cc?
68static bool IsWindowsAbsolutePath(const string& text) {
69#if defined(_WIN32) || defined(__CYGWIN__)
70  return text.size() >= 3 && text[1] == ':' &&
71         isalpha(text[0]) &&
72         (text[2] == '/' || text[2] == '\\') &&
73         text.find_last_of(':') == 1;
74#else
75  return false;
76#endif
77}
78
79MultiFileErrorCollector::~MultiFileErrorCollector() {}
80
81// This class serves two purposes:
82// - It implements the ErrorCollector interface (used by Tokenizer and Parser)
83//   in terms of MultiFileErrorCollector, using a particular filename.
84// - It lets us check if any errors have occurred.
85class SourceTreeDescriptorDatabase::SingleFileErrorCollector
86    : public io::ErrorCollector {
87 public:
88  SingleFileErrorCollector(const string& filename,
89                           MultiFileErrorCollector* multi_file_error_collector)
90    : filename_(filename),
91      multi_file_error_collector_(multi_file_error_collector),
92      had_errors_(false) {}
93  ~SingleFileErrorCollector() {}
94
95  bool had_errors() { return had_errors_; }
96
97  // implements ErrorCollector ---------------------------------------
98  void AddError(int line, int column, const string& message) {
99    if (multi_file_error_collector_ != NULL) {
100      multi_file_error_collector_->AddError(filename_, line, column, message);
101    }
102    had_errors_ = true;
103  }
104
105 private:
106  string filename_;
107  MultiFileErrorCollector* multi_file_error_collector_;
108  bool had_errors_;
109};
110
111// ===================================================================
112
113SourceTreeDescriptorDatabase::SourceTreeDescriptorDatabase(
114    SourceTree* source_tree)
115  : source_tree_(source_tree),
116    error_collector_(NULL),
117    using_validation_error_collector_(false),
118    validation_error_collector_(this) {}
119
120SourceTreeDescriptorDatabase::~SourceTreeDescriptorDatabase() {}
121
122bool SourceTreeDescriptorDatabase::FindFileByName(
123    const string& filename, FileDescriptorProto* output) {
124  scoped_ptr<io::ZeroCopyInputStream> input(source_tree_->Open(filename));
125  if (input == NULL) {
126    if (error_collector_ != NULL) {
127      error_collector_->AddError(filename, -1, 0, "File not found.");
128    }
129    return false;
130  }
131
132  // Set up the tokenizer and parser.
133  SingleFileErrorCollector file_error_collector(filename, error_collector_);
134  io::Tokenizer tokenizer(input.get(), &file_error_collector);
135
136  Parser parser;
137  if (error_collector_ != NULL) {
138    parser.RecordErrorsTo(&file_error_collector);
139  }
140  if (using_validation_error_collector_) {
141    parser.RecordSourceLocationsTo(&source_locations_);
142  }
143
144  // Parse it.
145  output->set_name(filename);
146  return parser.Parse(&tokenizer, output) &&
147         !file_error_collector.had_errors();
148}
149
150bool SourceTreeDescriptorDatabase::FindFileContainingSymbol(
151    const string& symbol_name, FileDescriptorProto* output) {
152  return false;
153}
154
155bool SourceTreeDescriptorDatabase::FindFileContainingExtension(
156    const string& containing_type, int field_number,
157    FileDescriptorProto* output) {
158  return false;
159}
160
161// -------------------------------------------------------------------
162
163SourceTreeDescriptorDatabase::ValidationErrorCollector::
164ValidationErrorCollector(SourceTreeDescriptorDatabase* owner)
165  : owner_(owner) {}
166
167SourceTreeDescriptorDatabase::ValidationErrorCollector::
168~ValidationErrorCollector() {}
169
170void SourceTreeDescriptorDatabase::ValidationErrorCollector::AddError(
171    const string& filename,
172    const string& element_name,
173    const Message* descriptor,
174    ErrorLocation location,
175    const string& message) {
176  if (owner_->error_collector_ == NULL) return;
177
178  int line, column;
179  owner_->source_locations_.Find(descriptor, location, &line, &column);
180  owner_->error_collector_->AddError(filename, line, column, message);
181}
182
183// ===================================================================
184
185Importer::Importer(SourceTree* source_tree,
186                   MultiFileErrorCollector* error_collector)
187  : database_(source_tree),
188    pool_(&database_, database_.GetValidationErrorCollector()) {
189  database_.RecordErrorsTo(error_collector);
190}
191
192Importer::~Importer() {}
193
194const FileDescriptor* Importer::Import(const string& filename) {
195  return pool_.FindFileByName(filename);
196}
197
198// ===================================================================
199
200SourceTree::~SourceTree() {}
201
202DiskSourceTree::DiskSourceTree() {}
203
204DiskSourceTree::~DiskSourceTree() {}
205
206static inline char LastChar(const string& str) {
207  return str[str.size() - 1];
208}
209
210// Given a path, returns an equivalent path with these changes:
211// - On Windows, any backslashes are replaced with forward slashes.
212// - Any instances of the directory "." are removed.
213// - Any consecutive '/'s are collapsed into a single slash.
214// Note that the resulting string may be empty.
215//
216// TODO(kenton):  It would be nice to handle "..", e.g. so that we can figure
217//   out that "foo/bar.proto" is inside "baz/../foo".  However, if baz is a
218//   symlink or doesn't exist, then things get complicated, and we can't
219//   actually determine this without investigating the filesystem, probably
220//   in non-portable ways.  So, we punt.
221//
222// TODO(kenton):  It would be nice to use realpath() here except that it
223//   resolves symbolic links.  This could cause problems if people place
224//   symbolic links in their source tree.  For example, if you executed:
225//     protoc --proto_path=foo foo/bar/baz.proto
226//   then if foo/bar is a symbolic link, foo/bar/baz.proto will canonicalize
227//   to a path which does not appear to be under foo, and thus the compiler
228//   will complain that baz.proto is not inside the --proto_path.
229static string CanonicalizePath(string path) {
230#ifdef _WIN32
231  // The Win32 API accepts forward slashes as a path delimiter even though
232  // backslashes are standard.  Let's avoid confusion and use only forward
233  // slashes.
234  if (HasPrefixString(path, "\\\\")) {
235    // Avoid converting two leading backslashes.
236    path = "\\\\" + StringReplace(path.substr(2), "\\", "/", true);
237  } else {
238    path = StringReplace(path, "\\", "/", true);
239  }
240#endif
241
242  vector<string> parts;
243  vector<string> canonical_parts;
244  SplitStringUsing(path, "/", &parts);  // Note:  Removes empty parts.
245  for (int i = 0; i < parts.size(); i++) {
246    if (parts[i] == ".") {
247      // Ignore.
248    } else {
249      canonical_parts.push_back(parts[i]);
250    }
251  }
252  string result = JoinStrings(canonical_parts, "/");
253  if (!path.empty() && path[0] == '/') {
254    // Restore leading slash.
255    result = '/' + result;
256  }
257  if (!path.empty() && LastChar(path) == '/' &&
258      !result.empty() && LastChar(result) != '/') {
259    // Restore trailing slash.
260    result += '/';
261  }
262  return result;
263}
264
265static inline bool ContainsParentReference(const string& path) {
266  return path == ".." ||
267         HasPrefixString(path, "../") ||
268         HasSuffixString(path, "/..") ||
269         path.find("/../") != string::npos;
270}
271
272// Maps a file from an old location to a new one.  Typically, old_prefix is
273// a virtual path and new_prefix is its corresponding disk path.  Returns
274// false if the filename did not start with old_prefix, otherwise replaces
275// old_prefix with new_prefix and stores the result in *result.  Examples:
276//   string result;
277//   assert(ApplyMapping("foo/bar", "", "baz", &result));
278//   assert(result == "baz/foo/bar");
279//
280//   assert(ApplyMapping("foo/bar", "foo", "baz", &result));
281//   assert(result == "baz/bar");
282//
283//   assert(ApplyMapping("foo", "foo", "bar", &result));
284//   assert(result == "bar");
285//
286//   assert(!ApplyMapping("foo/bar", "baz", "qux", &result));
287//   assert(!ApplyMapping("foo/bar", "baz", "qux", &result));
288//   assert(!ApplyMapping("foobar", "foo", "baz", &result));
289static bool ApplyMapping(const string& filename,
290                         const string& old_prefix,
291                         const string& new_prefix,
292                         string* result) {
293  if (old_prefix.empty()) {
294    // old_prefix matches any relative path.
295    if (ContainsParentReference(filename)) {
296      // We do not allow the file name to use "..".
297      return false;
298    }
299    if (HasPrefixString(filename, "/") ||
300        IsWindowsAbsolutePath(filename)) {
301      // This is an absolute path, so it isn't matched by the empty string.
302      return false;
303    }
304    result->assign(new_prefix);
305    if (!result->empty()) result->push_back('/');
306    result->append(filename);
307    return true;
308  } else if (HasPrefixString(filename, old_prefix)) {
309    // old_prefix is a prefix of the filename.  Is it the whole filename?
310    if (filename.size() == old_prefix.size()) {
311      // Yep, it's an exact match.
312      *result = new_prefix;
313      return true;
314    } else {
315      // Not an exact match.  Is the next character a '/'?  Otherwise,
316      // this isn't actually a match at all.  E.g. the prefix "foo/bar"
317      // does not match the filename "foo/barbaz".
318      int after_prefix_start = -1;
319      if (filename[old_prefix.size()] == '/') {
320        after_prefix_start = old_prefix.size() + 1;
321      } else if (filename[old_prefix.size() - 1] == '/') {
322        // old_prefix is never empty, and canonicalized paths never have
323        // consecutive '/' characters.
324        after_prefix_start = old_prefix.size();
325      }
326      if (after_prefix_start != -1) {
327        // Yep.  So the prefixes are directories and the filename is a file
328        // inside them.
329        string after_prefix = filename.substr(after_prefix_start);
330        if (ContainsParentReference(after_prefix)) {
331          // We do not allow the file name to use "..".
332          return false;
333        }
334        result->assign(new_prefix);
335        if (!result->empty()) result->push_back('/');
336        result->append(after_prefix);
337        return true;
338      }
339    }
340  }
341
342  return false;
343}
344
345void DiskSourceTree::MapPath(const string& virtual_path,
346                             const string& disk_path) {
347  mappings_.push_back(Mapping(virtual_path, CanonicalizePath(disk_path)));
348}
349
350DiskSourceTree::DiskFileToVirtualFileResult
351DiskSourceTree::DiskFileToVirtualFile(
352    const string& disk_file,
353    string* virtual_file,
354    string* shadowing_disk_file) {
355  int mapping_index = -1;
356  string canonical_disk_file = CanonicalizePath(disk_file);
357
358  for (int i = 0; i < mappings_.size(); i++) {
359    // Apply the mapping in reverse.
360    if (ApplyMapping(canonical_disk_file, mappings_[i].disk_path,
361                     mappings_[i].virtual_path, virtual_file)) {
362      // Success.
363      mapping_index = i;
364      break;
365    }
366  }
367
368  if (mapping_index == -1) {
369    return NO_MAPPING;
370  }
371
372  // Iterate through all mappings with higher precedence and verify that none
373  // of them map this file to some other existing file.
374  for (int i = 0; i < mapping_index; i++) {
375    if (ApplyMapping(*virtual_file, mappings_[i].virtual_path,
376                     mappings_[i].disk_path, shadowing_disk_file)) {
377      if (access(shadowing_disk_file->c_str(), F_OK) >= 0) {
378        // File exists.
379        return SHADOWED;
380      }
381    }
382  }
383  shadowing_disk_file->clear();
384
385  // Verify that we can open the file.  Note that this also has the side-effect
386  // of verifying that we are not canonicalizing away any non-existent
387  // directories.
388  scoped_ptr<io::ZeroCopyInputStream> stream(OpenDiskFile(disk_file));
389  if (stream == NULL) {
390    return CANNOT_OPEN;
391  }
392
393  return SUCCESS;
394}
395
396bool DiskSourceTree::VirtualFileToDiskFile(const string& virtual_file,
397                                           string* disk_file) {
398  scoped_ptr<io::ZeroCopyInputStream> stream(OpenVirtualFile(virtual_file,
399                                                             disk_file));
400  return stream != NULL;
401}
402
403io::ZeroCopyInputStream* DiskSourceTree::Open(const string& filename) {
404  return OpenVirtualFile(filename, NULL);
405}
406
407io::ZeroCopyInputStream* DiskSourceTree::OpenVirtualFile(
408    const string& virtual_file,
409    string* disk_file) {
410  if (virtual_file != CanonicalizePath(virtual_file) ||
411      ContainsParentReference(virtual_file)) {
412    // We do not allow importing of paths containing things like ".." or
413    // consecutive slashes since the compiler expects files to be uniquely
414    // identified by file name.
415    return NULL;
416  }
417
418  for (int i = 0; i < mappings_.size(); i++) {
419    string temp_disk_file;
420    if (ApplyMapping(virtual_file, mappings_[i].virtual_path,
421                     mappings_[i].disk_path, &temp_disk_file)) {
422      io::ZeroCopyInputStream* stream = OpenDiskFile(temp_disk_file);
423      if (stream != NULL) {
424        if (disk_file != NULL) {
425          *disk_file = temp_disk_file;
426        }
427        return stream;
428      }
429
430      if (errno == EACCES) {
431        // The file exists but is not readable.
432        // TODO(kenton):  Find a way to report this more nicely.
433        GOOGLE_LOG(WARNING) << "Read access is denied for file: " << temp_disk_file;
434        return NULL;
435      }
436    }
437  }
438
439  return NULL;
440}
441
442io::ZeroCopyInputStream* DiskSourceTree::OpenDiskFile(
443    const string& filename) {
444  int file_descriptor;
445  do {
446    file_descriptor = open(filename.c_str(), O_RDONLY);
447  } while (file_descriptor < 0 && errno == EINTR);
448  if (file_descriptor >= 0) {
449    io::FileInputStream* result = new io::FileInputStream(file_descriptor);
450    result->SetCloseOnDelete(true);
451    return result;
452  } else {
453    return NULL;
454  }
455}
456
457}  // namespace compiler
458}  // namespace protobuf
459}  // namespace google
460