1// Protocol Buffers - Google's data interchange format 2// Copyright 2008 Google Inc. All rights reserved. 3// http://code.google.com/p/protobuf/ 4// 5// Redistribution and use in source and binary forms, with or without 6// modification, are permitted provided that the following conditions are 7// met: 8// 9// * Redistributions of source code must retain the above copyright 10// notice, this list of conditions and the following disclaimer. 11// * Redistributions in binary form must reproduce the above 12// copyright notice, this list of conditions and the following disclaimer 13// in the documentation and/or other materials provided with the 14// distribution. 15// * Neither the name of Google Inc. nor the names of its 16// contributors may be used to endorse or promote products derived from 17// this software without specific prior written permission. 18// 19// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 20// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 21// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 22// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 23// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 24// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 25// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 26// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 27// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 28// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 31// Author: kenton@google.com (Kenton Varda) 32// Based on original Protocol Buffers design by 33// Sanjay Ghemawat, Jeff Dean, and others. 34 35#ifdef _MSC_VER 36#include <io.h> 37#else 38#include <unistd.h> 39#endif 40#include <sys/types.h> 41#include <sys/stat.h> 42#include <fcntl.h> 43#include <errno.h> 44 45#include <algorithm> 46 47#include <google/protobuf/compiler/importer.h> 48 49#include <google/protobuf/compiler/parser.h> 50#include <google/protobuf/io/tokenizer.h> 51#include <google/protobuf/io/zero_copy_stream_impl.h> 52#include <google/protobuf/stubs/strutil.h> 53 54namespace google { 55namespace protobuf { 56namespace compiler { 57 58#ifdef _WIN32 59#ifndef F_OK 60#define F_OK 00 // not defined by MSVC for whatever reason 61#endif 62#include <ctype.h> 63#endif 64 65// Returns true if the text looks like a Windows-style absolute path, starting 66// with a drive letter. Example: "C:\foo". TODO(kenton): Share this with 67// copy in command_line_interface.cc? 68static bool IsWindowsAbsolutePath(const string& text) { 69#if defined(_WIN32) || defined(__CYGWIN__) 70 return text.size() >= 3 && text[1] == ':' && 71 isalpha(text[0]) && 72 (text[2] == '/' || text[2] == '\\') && 73 text.find_last_of(':') == 1; 74#else 75 return false; 76#endif 77} 78 79MultiFileErrorCollector::~MultiFileErrorCollector() {} 80 81// This class serves two purposes: 82// - It implements the ErrorCollector interface (used by Tokenizer and Parser) 83// in terms of MultiFileErrorCollector, using a particular filename. 84// - It lets us check if any errors have occurred. 85class SourceTreeDescriptorDatabase::SingleFileErrorCollector 86 : public io::ErrorCollector { 87 public: 88 SingleFileErrorCollector(const string& filename, 89 MultiFileErrorCollector* multi_file_error_collector) 90 : filename_(filename), 91 multi_file_error_collector_(multi_file_error_collector), 92 had_errors_(false) {} 93 ~SingleFileErrorCollector() {} 94 95 bool had_errors() { return had_errors_; } 96 97 // implements ErrorCollector --------------------------------------- 98 void AddError(int line, int column, const string& message) { 99 if (multi_file_error_collector_ != NULL) { 100 multi_file_error_collector_->AddError(filename_, line, column, message); 101 } 102 had_errors_ = true; 103 } 104 105 private: 106 string filename_; 107 MultiFileErrorCollector* multi_file_error_collector_; 108 bool had_errors_; 109}; 110 111// =================================================================== 112 113SourceTreeDescriptorDatabase::SourceTreeDescriptorDatabase( 114 SourceTree* source_tree) 115 : source_tree_(source_tree), 116 error_collector_(NULL), 117 using_validation_error_collector_(false), 118 validation_error_collector_(this) {} 119 120SourceTreeDescriptorDatabase::~SourceTreeDescriptorDatabase() {} 121 122bool SourceTreeDescriptorDatabase::FindFileByName( 123 const string& filename, FileDescriptorProto* output) { 124 scoped_ptr<io::ZeroCopyInputStream> input(source_tree_->Open(filename)); 125 if (input == NULL) { 126 if (error_collector_ != NULL) { 127 error_collector_->AddError(filename, -1, 0, "File not found."); 128 } 129 return false; 130 } 131 132 // Set up the tokenizer and parser. 133 SingleFileErrorCollector file_error_collector(filename, error_collector_); 134 io::Tokenizer tokenizer(input.get(), &file_error_collector); 135 136 Parser parser; 137 if (error_collector_ != NULL) { 138 parser.RecordErrorsTo(&file_error_collector); 139 } 140 if (using_validation_error_collector_) { 141 parser.RecordSourceLocationsTo(&source_locations_); 142 } 143 144 // Parse it. 145 output->set_name(filename); 146 return parser.Parse(&tokenizer, output) && 147 !file_error_collector.had_errors(); 148} 149 150bool SourceTreeDescriptorDatabase::FindFileContainingSymbol( 151 const string& symbol_name, FileDescriptorProto* output) { 152 return false; 153} 154 155bool SourceTreeDescriptorDatabase::FindFileContainingExtension( 156 const string& containing_type, int field_number, 157 FileDescriptorProto* output) { 158 return false; 159} 160 161// ------------------------------------------------------------------- 162 163SourceTreeDescriptorDatabase::ValidationErrorCollector:: 164ValidationErrorCollector(SourceTreeDescriptorDatabase* owner) 165 : owner_(owner) {} 166 167SourceTreeDescriptorDatabase::ValidationErrorCollector:: 168~ValidationErrorCollector() {} 169 170void SourceTreeDescriptorDatabase::ValidationErrorCollector::AddError( 171 const string& filename, 172 const string& element_name, 173 const Message* descriptor, 174 ErrorLocation location, 175 const string& message) { 176 if (owner_->error_collector_ == NULL) return; 177 178 int line, column; 179 owner_->source_locations_.Find(descriptor, location, &line, &column); 180 owner_->error_collector_->AddError(filename, line, column, message); 181} 182 183// =================================================================== 184 185Importer::Importer(SourceTree* source_tree, 186 MultiFileErrorCollector* error_collector) 187 : database_(source_tree), 188 pool_(&database_, database_.GetValidationErrorCollector()) { 189 database_.RecordErrorsTo(error_collector); 190} 191 192Importer::~Importer() {} 193 194const FileDescriptor* Importer::Import(const string& filename) { 195 return pool_.FindFileByName(filename); 196} 197 198// =================================================================== 199 200SourceTree::~SourceTree() {} 201 202DiskSourceTree::DiskSourceTree() {} 203 204DiskSourceTree::~DiskSourceTree() {} 205 206static inline char LastChar(const string& str) { 207 return str[str.size() - 1]; 208} 209 210// Given a path, returns an equivalent path with these changes: 211// - On Windows, any backslashes are replaced with forward slashes. 212// - Any instances of the directory "." are removed. 213// - Any consecutive '/'s are collapsed into a single slash. 214// Note that the resulting string may be empty. 215// 216// TODO(kenton): It would be nice to handle "..", e.g. so that we can figure 217// out that "foo/bar.proto" is inside "baz/../foo". However, if baz is a 218// symlink or doesn't exist, then things get complicated, and we can't 219// actually determine this without investigating the filesystem, probably 220// in non-portable ways. So, we punt. 221// 222// TODO(kenton): It would be nice to use realpath() here except that it 223// resolves symbolic links. This could cause problems if people place 224// symbolic links in their source tree. For example, if you executed: 225// protoc --proto_path=foo foo/bar/baz.proto 226// then if foo/bar is a symbolic link, foo/bar/baz.proto will canonicalize 227// to a path which does not appear to be under foo, and thus the compiler 228// will complain that baz.proto is not inside the --proto_path. 229static string CanonicalizePath(string path) { 230#ifdef _WIN32 231 // The Win32 API accepts forward slashes as a path delimiter even though 232 // backslashes are standard. Let's avoid confusion and use only forward 233 // slashes. 234 if (HasPrefixString(path, "\\\\")) { 235 // Avoid converting two leading backslashes. 236 path = "\\\\" + StringReplace(path.substr(2), "\\", "/", true); 237 } else { 238 path = StringReplace(path, "\\", "/", true); 239 } 240#endif 241 242 vector<string> parts; 243 vector<string> canonical_parts; 244 SplitStringUsing(path, "/", &parts); // Note: Removes empty parts. 245 for (int i = 0; i < parts.size(); i++) { 246 if (parts[i] == ".") { 247 // Ignore. 248 } else { 249 canonical_parts.push_back(parts[i]); 250 } 251 } 252 string result = JoinStrings(canonical_parts, "/"); 253 if (!path.empty() && path[0] == '/') { 254 // Restore leading slash. 255 result = '/' + result; 256 } 257 if (!path.empty() && LastChar(path) == '/' && 258 !result.empty() && LastChar(result) != '/') { 259 // Restore trailing slash. 260 result += '/'; 261 } 262 return result; 263} 264 265static inline bool ContainsParentReference(const string& path) { 266 return path == ".." || 267 HasPrefixString(path, "../") || 268 HasSuffixString(path, "/..") || 269 path.find("/../") != string::npos; 270} 271 272// Maps a file from an old location to a new one. Typically, old_prefix is 273// a virtual path and new_prefix is its corresponding disk path. Returns 274// false if the filename did not start with old_prefix, otherwise replaces 275// old_prefix with new_prefix and stores the result in *result. Examples: 276// string result; 277// assert(ApplyMapping("foo/bar", "", "baz", &result)); 278// assert(result == "baz/foo/bar"); 279// 280// assert(ApplyMapping("foo/bar", "foo", "baz", &result)); 281// assert(result == "baz/bar"); 282// 283// assert(ApplyMapping("foo", "foo", "bar", &result)); 284// assert(result == "bar"); 285// 286// assert(!ApplyMapping("foo/bar", "baz", "qux", &result)); 287// assert(!ApplyMapping("foo/bar", "baz", "qux", &result)); 288// assert(!ApplyMapping("foobar", "foo", "baz", &result)); 289static bool ApplyMapping(const string& filename, 290 const string& old_prefix, 291 const string& new_prefix, 292 string* result) { 293 if (old_prefix.empty()) { 294 // old_prefix matches any relative path. 295 if (ContainsParentReference(filename)) { 296 // We do not allow the file name to use "..". 297 return false; 298 } 299 if (HasPrefixString(filename, "/") || 300 IsWindowsAbsolutePath(filename)) { 301 // This is an absolute path, so it isn't matched by the empty string. 302 return false; 303 } 304 result->assign(new_prefix); 305 if (!result->empty()) result->push_back('/'); 306 result->append(filename); 307 return true; 308 } else if (HasPrefixString(filename, old_prefix)) { 309 // old_prefix is a prefix of the filename. Is it the whole filename? 310 if (filename.size() == old_prefix.size()) { 311 // Yep, it's an exact match. 312 *result = new_prefix; 313 return true; 314 } else { 315 // Not an exact match. Is the next character a '/'? Otherwise, 316 // this isn't actually a match at all. E.g. the prefix "foo/bar" 317 // does not match the filename "foo/barbaz". 318 int after_prefix_start = -1; 319 if (filename[old_prefix.size()] == '/') { 320 after_prefix_start = old_prefix.size() + 1; 321 } else if (filename[old_prefix.size() - 1] == '/') { 322 // old_prefix is never empty, and canonicalized paths never have 323 // consecutive '/' characters. 324 after_prefix_start = old_prefix.size(); 325 } 326 if (after_prefix_start != -1) { 327 // Yep. So the prefixes are directories and the filename is a file 328 // inside them. 329 string after_prefix = filename.substr(after_prefix_start); 330 if (ContainsParentReference(after_prefix)) { 331 // We do not allow the file name to use "..". 332 return false; 333 } 334 result->assign(new_prefix); 335 if (!result->empty()) result->push_back('/'); 336 result->append(after_prefix); 337 return true; 338 } 339 } 340 } 341 342 return false; 343} 344 345void DiskSourceTree::MapPath(const string& virtual_path, 346 const string& disk_path) { 347 mappings_.push_back(Mapping(virtual_path, CanonicalizePath(disk_path))); 348} 349 350DiskSourceTree::DiskFileToVirtualFileResult 351DiskSourceTree::DiskFileToVirtualFile( 352 const string& disk_file, 353 string* virtual_file, 354 string* shadowing_disk_file) { 355 int mapping_index = -1; 356 string canonical_disk_file = CanonicalizePath(disk_file); 357 358 for (int i = 0; i < mappings_.size(); i++) { 359 // Apply the mapping in reverse. 360 if (ApplyMapping(canonical_disk_file, mappings_[i].disk_path, 361 mappings_[i].virtual_path, virtual_file)) { 362 // Success. 363 mapping_index = i; 364 break; 365 } 366 } 367 368 if (mapping_index == -1) { 369 return NO_MAPPING; 370 } 371 372 // Iterate through all mappings with higher precedence and verify that none 373 // of them map this file to some other existing file. 374 for (int i = 0; i < mapping_index; i++) { 375 if (ApplyMapping(*virtual_file, mappings_[i].virtual_path, 376 mappings_[i].disk_path, shadowing_disk_file)) { 377 if (access(shadowing_disk_file->c_str(), F_OK) >= 0) { 378 // File exists. 379 return SHADOWED; 380 } 381 } 382 } 383 shadowing_disk_file->clear(); 384 385 // Verify that we can open the file. Note that this also has the side-effect 386 // of verifying that we are not canonicalizing away any non-existent 387 // directories. 388 scoped_ptr<io::ZeroCopyInputStream> stream(OpenDiskFile(disk_file)); 389 if (stream == NULL) { 390 return CANNOT_OPEN; 391 } 392 393 return SUCCESS; 394} 395 396bool DiskSourceTree::VirtualFileToDiskFile(const string& virtual_file, 397 string* disk_file) { 398 scoped_ptr<io::ZeroCopyInputStream> stream(OpenVirtualFile(virtual_file, 399 disk_file)); 400 return stream != NULL; 401} 402 403io::ZeroCopyInputStream* DiskSourceTree::Open(const string& filename) { 404 return OpenVirtualFile(filename, NULL); 405} 406 407io::ZeroCopyInputStream* DiskSourceTree::OpenVirtualFile( 408 const string& virtual_file, 409 string* disk_file) { 410 if (virtual_file != CanonicalizePath(virtual_file) || 411 ContainsParentReference(virtual_file)) { 412 // We do not allow importing of paths containing things like ".." or 413 // consecutive slashes since the compiler expects files to be uniquely 414 // identified by file name. 415 return NULL; 416 } 417 418 for (int i = 0; i < mappings_.size(); i++) { 419 string temp_disk_file; 420 if (ApplyMapping(virtual_file, mappings_[i].virtual_path, 421 mappings_[i].disk_path, &temp_disk_file)) { 422 io::ZeroCopyInputStream* stream = OpenDiskFile(temp_disk_file); 423 if (stream != NULL) { 424 if (disk_file != NULL) { 425 *disk_file = temp_disk_file; 426 } 427 return stream; 428 } 429 430 if (errno == EACCES) { 431 // The file exists but is not readable. 432 // TODO(kenton): Find a way to report this more nicely. 433 GOOGLE_LOG(WARNING) << "Read access is denied for file: " << temp_disk_file; 434 return NULL; 435 } 436 } 437 } 438 439 return NULL; 440} 441 442io::ZeroCopyInputStream* DiskSourceTree::OpenDiskFile( 443 const string& filename) { 444 int file_descriptor; 445 do { 446 file_descriptor = open(filename.c_str(), O_RDONLY); 447 } while (file_descriptor < 0 && errno == EINTR); 448 if (file_descriptor >= 0) { 449 io::FileInputStream* result = new io::FileInputStream(file_descriptor); 450 result->SetCloseOnDelete(true); 451 return result; 452 } else { 453 return NULL; 454 } 455} 456 457} // namespace compiler 458} // namespace protobuf 459} // namespace google 460