1// Protocol Buffers - Google's data interchange format 2// Copyright 2008 Google Inc. All rights reserved. 3// http://code.google.com/p/protobuf/ 4// 5// Redistribution and use in source and binary forms, with or without 6// modification, are permitted provided that the following conditions are 7// met: 8// 9// * Redistributions of source code must retain the above copyright 10// notice, this list of conditions and the following disclaimer. 11// * Redistributions in binary form must reproduce the above 12// copyright notice, this list of conditions and the following disclaimer 13// in the documentation and/or other materials provided with the 14// distribution. 15// * Neither the name of Google Inc. nor the names of its 16// contributors may be used to endorse or promote products derived from 17// this software without specific prior written permission. 18// 19// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 20// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 21// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 22// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 23// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 24// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 25// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 26// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 27// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 28// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 31// Author: kenton@google.com (Kenton Varda) 32// Based on original Protocol Buffers design by 33// Sanjay Ghemawat, Jeff Dean, and others. 34 35#ifdef _MSC_VER 36#include <io.h> 37#else 38#include <unistd.h> 39#endif 40#include <sys/types.h> 41#include <sys/stat.h> 42#include <fcntl.h> 43#include <errno.h> 44 45#include <algorithm> 46 47#include <google/protobuf/compiler/importer.h> 48 49#include <google/protobuf/compiler/parser.h> 50#include <google/protobuf/io/tokenizer.h> 51#include <google/protobuf/io/zero_copy_stream_impl.h> 52#include <google/protobuf/stubs/strutil.h> 53 54namespace google { 55namespace protobuf { 56namespace compiler { 57 58#ifdef _WIN32 59#ifndef F_OK 60#define F_OK 00 // not defined by MSVC for whatever reason 61#endif 62#include <ctype.h> 63#endif 64 65// Returns true if the text looks like a Windows-style absolute path, starting 66// with a drive letter. Example: "C:\foo". TODO(kenton): Share this with 67// copy in command_line_interface.cc? 68static bool IsWindowsAbsolutePath(const string& text) { 69#if defined(_WIN32) || defined(__CYGWIN__) 70 return text.size() >= 3 && text[1] == ':' && 71 isalpha(text[0]) && 72 (text[2] == '/' || text[2] == '\\') && 73 text.find_last_of(':') == 1; 74#else 75 return false; 76#endif 77} 78 79MultiFileErrorCollector::~MultiFileErrorCollector() {} 80 81// This class serves two purposes: 82// - It implements the ErrorCollector interface (used by Tokenizer and Parser) 83// in terms of MultiFileErrorCollector, using a particular filename. 84// - It lets us check if any errors have occurred. 85class SourceTreeDescriptorDatabase::SingleFileErrorCollector 86 : public io::ErrorCollector { 87 public: 88 SingleFileErrorCollector(const string& filename, 89 MultiFileErrorCollector* multi_file_error_collector) 90 : filename_(filename), 91 multi_file_error_collector_(multi_file_error_collector), 92 had_errors_(false) {} 93 ~SingleFileErrorCollector() {} 94 95 bool had_errors() { return had_errors_; } 96 97 // implements ErrorCollector --------------------------------------- 98 void AddError(int line, int column, const string& message) { 99 if (multi_file_error_collector_ != NULL) { 100 multi_file_error_collector_->AddError(filename_, line, column, message); 101 } 102 had_errors_ = true; 103 } 104 105 private: 106 string filename_; 107 MultiFileErrorCollector* multi_file_error_collector_; 108 bool had_errors_; 109}; 110 111// =================================================================== 112 113SourceTreeDescriptorDatabase::SourceTreeDescriptorDatabase( 114 SourceTree* source_tree) 115 : source_tree_(source_tree), 116 error_collector_(NULL), 117 using_validation_error_collector_(false), 118 validation_error_collector_(this) {} 119 120SourceTreeDescriptorDatabase::~SourceTreeDescriptorDatabase() {} 121 122bool SourceTreeDescriptorDatabase::FindFileByName( 123 const string& filename, FileDescriptorProto* output) { 124 scoped_ptr<io::ZeroCopyInputStream> input(source_tree_->Open(filename)); 125 if (input == NULL) { 126 if (error_collector_ != NULL) { 127 error_collector_->AddError(filename, -1, 0, "File not found."); 128 } 129 return false; 130 } 131 132 // Set up the tokenizer and parser. 133 SingleFileErrorCollector file_error_collector(filename, error_collector_); 134 io::Tokenizer tokenizer(input.get(), &file_error_collector); 135 136 Parser parser; 137 if (error_collector_ != NULL) { 138 parser.RecordErrorsTo(&file_error_collector); 139 } 140 if (using_validation_error_collector_) { 141 parser.RecordSourceLocationsTo(&source_locations_); 142 } 143 144 // Parse it. 145 output->set_name(filename); 146 return parser.Parse(&tokenizer, output) && 147 !file_error_collector.had_errors(); 148} 149 150bool SourceTreeDescriptorDatabase::FindFileContainingSymbol( 151 const string& symbol_name, FileDescriptorProto* output) { 152 return false; 153} 154 155bool SourceTreeDescriptorDatabase::FindFileContainingExtension( 156 const string& containing_type, int field_number, 157 FileDescriptorProto* output) { 158 return false; 159} 160 161// ------------------------------------------------------------------- 162 163SourceTreeDescriptorDatabase::ValidationErrorCollector:: 164ValidationErrorCollector(SourceTreeDescriptorDatabase* owner) 165 : owner_(owner) {} 166 167SourceTreeDescriptorDatabase::ValidationErrorCollector:: 168~ValidationErrorCollector() {} 169 170void SourceTreeDescriptorDatabase::ValidationErrorCollector::AddError( 171 const string& filename, 172 const string& element_name, 173 const Message* descriptor, 174 ErrorLocation location, 175 const string& message) { 176 if (owner_->error_collector_ == NULL) return; 177 178 int line, column; 179 owner_->source_locations_.Find(descriptor, location, &line, &column); 180 owner_->error_collector_->AddError(filename, line, column, message); 181} 182 183// =================================================================== 184 185Importer::Importer(SourceTree* source_tree, 186 MultiFileErrorCollector* error_collector) 187 : database_(source_tree), 188 pool_(&database_, database_.GetValidationErrorCollector()) { 189 database_.RecordErrorsTo(error_collector); 190} 191 192Importer::~Importer() {} 193 194const FileDescriptor* Importer::Import(const string& filename) { 195 return pool_.FindFileByName(filename); 196} 197 198// =================================================================== 199 200SourceTree::~SourceTree() {} 201 202DiskSourceTree::DiskSourceTree() {} 203 204DiskSourceTree::~DiskSourceTree() {} 205 206static inline char LastChar(const string& str) { 207 return str[str.size() - 1]; 208} 209 210// Given a path, returns an equivalent path with these changes: 211// - On Windows, any backslashes are replaced with forward slashes. 212// - Any instances of the directory "." are removed. 213// - Any consecutive '/'s are collapsed into a single slash. 214// Note that the resulting string may be empty. 215// 216// TODO(kenton): It would be nice to handle "..", e.g. so that we can figure 217// out that "foo/bar.proto" is inside "baz/../foo". However, if baz is a 218// symlink or doesn't exist, then things get complicated, and we can't 219// actually determine this without investigating the filesystem, probably 220// in non-portable ways. So, we punt. 221// 222// TODO(kenton): It would be nice to use realpath() here except that it 223// resolves symbolic links. This could cause problems if people place 224// symbolic links in their source tree. For example, if you executed: 225// protoc --proto_path=foo foo/bar/baz.proto 226// then if foo/bar is a symbolic link, foo/bar/baz.proto will canonicalize 227// to a path which does not appear to be under foo, and thus the compiler 228// will complain that baz.proto is not inside the --proto_path. 229static string CanonicalizePath(string path) { 230#ifdef _WIN32 231 // The Win32 API accepts forward slashes as a path delimiter even though 232 // backslashes are standard. Let's avoid confusion and use only forward 233 // slashes. 234 path = StringReplace(path, "\\", "/", true); 235#endif 236 237 vector<string> parts; 238 vector<string> canonical_parts; 239 SplitStringUsing(path, "/", &parts); // Note: Removes empty parts. 240 for (int i = 0; i < parts.size(); i++) { 241 if (parts[i] == ".") { 242 // Ignore. 243 } else { 244 canonical_parts.push_back(parts[i]); 245 } 246 } 247 string result = JoinStrings(canonical_parts, "/"); 248 if (!path.empty() && path[0] == '/') { 249 // Restore leading slash. 250 result = '/' + result; 251 } 252 if (!path.empty() && LastChar(path) == '/' && 253 !result.empty() && LastChar(result) != '/') { 254 // Restore trailing slash. 255 result += '/'; 256 } 257 return result; 258} 259 260static inline bool ContainsParentReference(const string& path) { 261 return path == ".." || 262 HasPrefixString(path, "../") || 263 HasSuffixString(path, "/..") || 264 path.find("/../") != string::npos; 265} 266 267// Maps a file from an old location to a new one. Typically, old_prefix is 268// a virtual path and new_prefix is its corresponding disk path. Returns 269// false if the filename did not start with old_prefix, otherwise replaces 270// old_prefix with new_prefix and stores the result in *result. Examples: 271// string result; 272// assert(ApplyMapping("foo/bar", "", "baz", &result)); 273// assert(result == "baz/foo/bar"); 274// 275// assert(ApplyMapping("foo/bar", "foo", "baz", &result)); 276// assert(result == "baz/bar"); 277// 278// assert(ApplyMapping("foo", "foo", "bar", &result)); 279// assert(result == "bar"); 280// 281// assert(!ApplyMapping("foo/bar", "baz", "qux", &result)); 282// assert(!ApplyMapping("foo/bar", "baz", "qux", &result)); 283// assert(!ApplyMapping("foobar", "foo", "baz", &result)); 284static bool ApplyMapping(const string& filename, 285 const string& old_prefix, 286 const string& new_prefix, 287 string* result) { 288 if (old_prefix.empty()) { 289 // old_prefix matches any relative path. 290 if (ContainsParentReference(filename)) { 291 // We do not allow the file name to use "..". 292 return false; 293 } 294 if (HasPrefixString(filename, "/") || 295 IsWindowsAbsolutePath(filename)) { 296 // This is an absolute path, so it isn't matched by the empty string. 297 return false; 298 } 299 result->assign(new_prefix); 300 if (!result->empty()) result->push_back('/'); 301 result->append(filename); 302 return true; 303 } else if (HasPrefixString(filename, old_prefix)) { 304 // old_prefix is a prefix of the filename. Is it the whole filename? 305 if (filename.size() == old_prefix.size()) { 306 // Yep, it's an exact match. 307 *result = new_prefix; 308 return true; 309 } else { 310 // Not an exact match. Is the next character a '/'? Otherwise, 311 // this isn't actually a match at all. E.g. the prefix "foo/bar" 312 // does not match the filename "foo/barbaz". 313 int after_prefix_start = -1; 314 if (filename[old_prefix.size()] == '/') { 315 after_prefix_start = old_prefix.size() + 1; 316 } else if (filename[old_prefix.size() - 1] == '/') { 317 // old_prefix is never empty, and canonicalized paths never have 318 // consecutive '/' characters. 319 after_prefix_start = old_prefix.size(); 320 } 321 if (after_prefix_start != -1) { 322 // Yep. So the prefixes are directories and the filename is a file 323 // inside them. 324 string after_prefix = filename.substr(after_prefix_start); 325 if (ContainsParentReference(after_prefix)) { 326 // We do not allow the file name to use "..". 327 return false; 328 } 329 result->assign(new_prefix); 330 if (!result->empty()) result->push_back('/'); 331 result->append(after_prefix); 332 return true; 333 } 334 } 335 } 336 337 return false; 338} 339 340void DiskSourceTree::MapPath(const string& virtual_path, 341 const string& disk_path) { 342 mappings_.push_back(Mapping(virtual_path, CanonicalizePath(disk_path))); 343} 344 345DiskSourceTree::DiskFileToVirtualFileResult 346DiskSourceTree::DiskFileToVirtualFile( 347 const string& disk_file, 348 string* virtual_file, 349 string* shadowing_disk_file) { 350 int mapping_index = -1; 351 string canonical_disk_file = CanonicalizePath(disk_file); 352 353 for (int i = 0; i < mappings_.size(); i++) { 354 // Apply the mapping in reverse. 355 if (ApplyMapping(canonical_disk_file, mappings_[i].disk_path, 356 mappings_[i].virtual_path, virtual_file)) { 357 // Success. 358 mapping_index = i; 359 break; 360 } 361 } 362 363 if (mapping_index == -1) { 364 return NO_MAPPING; 365 } 366 367 // Iterate through all mappings with higher precedence and verify that none 368 // of them map this file to some other existing file. 369 for (int i = 0; i < mapping_index; i++) { 370 if (ApplyMapping(*virtual_file, mappings_[i].virtual_path, 371 mappings_[i].disk_path, shadowing_disk_file)) { 372 if (access(shadowing_disk_file->c_str(), F_OK) >= 0) { 373 // File exists. 374 return SHADOWED; 375 } 376 } 377 } 378 shadowing_disk_file->clear(); 379 380 // Verify that we can open the file. Note that this also has the side-effect 381 // of verifying that we are not canonicalizing away any non-existent 382 // directories. 383 scoped_ptr<io::ZeroCopyInputStream> stream(OpenDiskFile(disk_file)); 384 if (stream == NULL) { 385 return CANNOT_OPEN; 386 } 387 388 return SUCCESS; 389} 390 391bool DiskSourceTree::VirtualFileToDiskFile(const string& virtual_file, 392 string* disk_file) { 393 scoped_ptr<io::ZeroCopyInputStream> stream(OpenVirtualFile(virtual_file, 394 disk_file)); 395 return stream != NULL; 396} 397 398io::ZeroCopyInputStream* DiskSourceTree::Open(const string& filename) { 399 return OpenVirtualFile(filename, NULL); 400} 401 402io::ZeroCopyInputStream* DiskSourceTree::OpenVirtualFile( 403 const string& virtual_file, 404 string* disk_file) { 405 if (virtual_file != CanonicalizePath(virtual_file) || 406 ContainsParentReference(virtual_file)) { 407 // We do not allow importing of paths containing things like ".." or 408 // consecutive slashes since the compiler expects files to be uniquely 409 // identified by file name. 410 return NULL; 411 } 412 413 for (int i = 0; i < mappings_.size(); i++) { 414 string temp_disk_file; 415 if (ApplyMapping(virtual_file, mappings_[i].virtual_path, 416 mappings_[i].disk_path, &temp_disk_file)) { 417 io::ZeroCopyInputStream* stream = OpenDiskFile(temp_disk_file); 418 if (stream != NULL) { 419 if (disk_file != NULL) { 420 *disk_file = temp_disk_file; 421 } 422 return stream; 423 } 424 425 if (errno == EACCES) { 426 // The file exists but is not readable. 427 // TODO(kenton): Find a way to report this more nicely. 428 GOOGLE_LOG(WARNING) << "Read access is denied for file: " << temp_disk_file; 429 return NULL; 430 } 431 } 432 } 433 434 return NULL; 435} 436 437io::ZeroCopyInputStream* DiskSourceTree::OpenDiskFile( 438 const string& filename) { 439 int file_descriptor; 440 do { 441 file_descriptor = open(filename.c_str(), O_RDONLY); 442 } while (file_descriptor < 0 && errno == EINTR); 443 if (file_descriptor >= 0) { 444 io::FileInputStream* result = new io::FileInputStream(file_descriptor); 445 result->SetCloseOnDelete(true); 446 return result; 447 } else { 448 return NULL; 449 } 450} 451 452} // namespace compiler 453} // namespace protobuf 454} // namespace google 455