Preprocessor.cpp revision 9ae2b9a5213219e929e5cab6cea3f1f1e476d2fd
1/* 2 * Copyright (C) 2016 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17#include "Preprocessor.h" 18 19#include <err.h> 20#include <fcntl.h> 21#include <fts.h> 22#include <libgen.h> 23#include <string.h> 24#include <sys/stat.h> 25#include <sys/types.h> 26#include <unistd.h> 27 28#include <deque> 29#include <fstream> 30#include <string> 31#include <unordered_map> 32 33#include <llvm/ADT/StringRef.h> 34#include <llvm/ADT/Twine.h> 35#include <llvm/Support/FileSystem.h> 36#include <llvm/Support/Path.h> 37 38#include "Arch.h" 39#include "DeclarationDatabase.h" 40#include "versioner.h" 41 42using namespace std::string_literals; 43 44static DeclarationAvailability calculateRequiredGuard(const Declaration& declaration) { 45 // To avoid redundant macro guards, the availability calculated by this function is the set 46 // difference of 'targets marked-available' from 'targets the declaration is visible in'. 47 // For example, a declaration that is visible always and introduced in 9 would return introduced 48 // in 9, but the same declaration, except only visible in 9+ would return an empty 49 // DeclarationAvailability. 50 51 // This currently only handles __INTRODUCED_IN. 52 // TODO: Do the same for __REMOVED_IN. 53 int global_min_api_visible = 0; 54 ArchMap<int> arch_visibility; 55 56 for (const auto& it : declaration.availability) { 57 const CompilationType& type = it.first; 58 59 if (global_min_api_visible == 0 || global_min_api_visible > type.api_level) { 60 global_min_api_visible = type.api_level; 61 } 62 63 if (arch_visibility[type.arch] == 0 || arch_visibility[type.arch] > type.api_level) { 64 arch_visibility[type.arch] = type.api_level; 65 } 66 } 67 68 DeclarationAvailability decl_av; 69 if (!declaration.calculateAvailability(&decl_av)) { 70 fprintf(stderr, "versioner: failed to calculate availability while preprocessing:\n"); 71 declaration.dump("", stderr, 2); 72 exit(1); 73 } 74 75 D("Calculating required guard for %s:\n", declaration.name.c_str()); 76 D(" Declaration availability: %s\n", to_string(decl_av).c_str()); 77 78 if (verbose) { 79 std::string arch_visibility_str; 80 for (Arch arch : supported_archs) { 81 if (arch_visibility[arch] != 0) { 82 arch_visibility_str += to_string(arch); 83 arch_visibility_str += ": "; 84 arch_visibility_str += std::to_string(arch_visibility[arch]); 85 arch_visibility_str += ", "; 86 } 87 } 88 if (!arch_visibility_str.empty()) { 89 arch_visibility_str.resize(arch_visibility_str.size() - 2); 90 } 91 D(" Declaration visibility: global = %d, arch = %s\n", global_min_api_visible, 92 arch_visibility_str.c_str()); 93 } 94 95 DeclarationAvailability result = decl_av; 96 if (result.global_availability.introduced <= global_min_api_visible) { 97 result.global_availability.introduced = 0; 98 } 99 100 for (Arch arch : supported_archs) { 101 if (result.arch_availability[arch].introduced <= arch_visibility[arch]) { 102 result.arch_availability[arch].introduced = 0; 103 } 104 } 105 106 D(" Calculated result: %s\n", to_string(result).c_str()); 107 D("\n"); 108 109 return result; 110} 111 112static std::deque<std::string> readFileLines(const std::string& path) { 113 std::ifstream is(path.c_str()); 114 std::deque<std::string> result; 115 std::string line; 116 117 while (std::getline(is, line)) { 118 result.push_back(std::move(line)); 119 } 120 121 return result; 122} 123 124static std::string dirname(const std::string& path) { 125 std::unique_ptr<char, decltype(&free)> path_copy(strdup(path.c_str()), free); 126 return dirname(path_copy.get()); 127} 128 129static bool mkdirs(const std::string& path) { 130 struct stat st; 131 if (stat(path.c_str(), &st) == 0 && S_ISDIR(st.st_mode)) { 132 return true; 133 } 134 135 std::string parent = dirname(path); 136 if (parent == path) { 137 return false; 138 } 139 140 if (!mkdirs(parent)) { 141 return false; 142 } 143 144 if (mkdir(path.c_str(), 0700) != 0) { 145 return false; 146 } 147 148 return true; 149} 150 151static void writeFileLines(const std::string& path, const std::deque<std::string>& lines) { 152 if (!mkdirs(dirname(path))) { 153 err(1, "failed to create directory '%s'", dirname(path).c_str()); 154 } 155 156 std::ofstream os(path.c_str(), std::ios_base::out | std::ios_base::trunc); 157 158 for (const std::string& line : lines) { 159 os << line << "\n"; 160 } 161} 162 163using GuardMap = std::map<Location, DeclarationAvailability>; 164 165static std::string generateGuardCondition(const DeclarationAvailability& avail) { 166 // Logically orred expressions that constitute the macro guard. 167 std::vector<std::string> expressions; 168 static const std::vector<std::pair<std::string, std::set<Arch>>> arch_sets = { 169 { "", supported_archs }, 170 { "!defined(__LP64__)", { Arch::arm, Arch::mips, Arch::x86 } }, 171 { "defined(__LP64__)", { Arch::arm64, Arch::mips64, Arch::x86_64 } }, 172 { "defined(__mips__)", { Arch::mips, Arch::mips64 } }, 173 }; 174 std::map<Arch, std::string> individual_archs = { 175 { Arch::arm, "defined(__arm__)" }, 176 { Arch::arm64, "defined(__aarch64__)" }, 177 { Arch::mips, "defined(__mips__) && !defined(__LP64__)" }, 178 { Arch::mips64, "defined(__mips__) && defined(__LP64__)" }, 179 { Arch::x86, "defined(__i386__)" }, 180 { Arch::x86_64, "defined(__x86_64__)" }, 181 }; 182 183 auto generate_guard = [](const std::string& arch_expr, int min_version) { 184 if (min_version == 0) { 185 return arch_expr; 186 } 187 return arch_expr + " && __ANDROID_API__ >= " + std::to_string(min_version); 188 }; 189 190 D("Generating guard for availability: %s\n", to_string(avail).c_str()); 191 if (!avail.global_availability.empty()) { 192 for (Arch arch : supported_archs) { 193 if (!avail.arch_availability[arch].empty()) { 194 errx(1, "attempted to generate guard with global and per-arch values: %s", 195 to_string(avail).c_str()); 196 } 197 } 198 199 if (avail.global_availability.introduced == 0) { 200 fprintf(stderr, "warning: attempted to generate guard with empty availability: %s\n", 201 to_string(avail).c_str()); 202 return ""; 203 } 204 205 if (avail.global_availability.introduced <= 9) { 206 return ""; 207 } 208 209 return "__ANDROID_API__ >= "s + std::to_string(avail.global_availability.introduced); 210 } 211 212 for (const auto& it : arch_sets) { 213 const std::string& arch_expr = it.first; 214 const std::set<Arch>& archs = it.second; 215 216 D(" Checking arch set '%s'\n", arch_expr.c_str()); 217 218 int version = avail.arch_availability[*it.second.begin()].introduced; 219 220 // Assume that the entire declaration is declared __INTRODUCED_IN_FUTURE if one arch is. 221 bool future = avail.arch_availability[*it.second.begin()].future; 222 223 if (future) { 224 return "__ANDROID_API__ >= __ANDROID_API_FUTURE__"; 225 } 226 227 // The maximum min_version of the set. 228 int max_min_version = 0; 229 for (Arch arch : archs) { 230 if (arch_min_api[arch] > max_min_version) { 231 max_min_version = arch_min_api[arch]; 232 } 233 234 if (avail.arch_availability[arch].introduced != version) { 235 D(" Skipping arch set, availability for %s doesn't match %s\n", 236 to_string(*it.second.begin()).c_str(), to_string(arch).c_str()); 237 goto skip; 238 } 239 } 240 241 // If all of the archs in the set have a min_api that satifies version, elide the check. 242 if (max_min_version >= version) { 243 version = 0; 244 } 245 246 expressions.emplace_back(generate_guard(arch_expr, version)); 247 248 D(" Generated expression '%s'\n", expressions.rbegin()->c_str()); 249 250 for (Arch arch : archs) { 251 individual_archs.erase(arch); 252 } 253 254 skip: 255 continue; 256 } 257 258 for (const auto& it : individual_archs) { 259 const std::string& arch_expr = it.second; 260 int introduced = avail.arch_availability[it.first].introduced; 261 if (introduced == 0) { 262 expressions.emplace_back(arch_expr); 263 } else { 264 expressions.emplace_back(generate_guard(arch_expr, introduced)); 265 } 266 } 267 268 if (expressions.size() == 0) { 269 errx(1, "generated empty guard for availability %s", to_string(avail).c_str()); 270 } else if (expressions.size() == 1) { 271 return expressions[0]; 272 } 273 274 return "("s + Join(expressions, ") || (") + ")"; 275} 276 277// Assumes that nothing crazy is happening (e.g. having the semicolon be in a macro) 278static FileLocation findNextSemicolon(const std::deque<std::string>& lines, FileLocation start) { 279 unsigned current_line = start.line; 280 unsigned current_column = start.column; 281 while (current_line <= lines.size()) { 282 size_t result = lines[current_line - 1].find_first_of(';', current_column - 1); 283 284 if (result != std::string::npos) { 285 FileLocation loc = { 286 .line = current_line, 287 .column = unsigned(result) + 1, 288 }; 289 290 return loc; 291 } 292 293 ++current_line; 294 current_column = 0; 295 } 296 297 errx(1, "failed to find semicolon starting from %u:%u", start.line, start.column); 298} 299 300// Merge adjacent blocks with identical guards. 301static void mergeGuards(std::deque<std::string>& file_lines, GuardMap& guard_map) { 302 if (guard_map.size() < 2) { 303 return; 304 } 305 306 auto current = guard_map.begin(); 307 auto next = current; 308 ++next; 309 310 while (next != guard_map.end()) { 311 if (current->second != next->second) { 312 ++current; 313 ++next; 314 continue; 315 } 316 317 // Scan from the end of current to the beginning of next. 318 bool in_block_comment = false; 319 bool valid = true; 320 321 FileLocation current_location = current->first.end; 322 FileLocation end_location = next->first.start; 323 324 auto nextLine = [¤t_location]() { 325 ++current_location.line; 326 current_location.column = 1; 327 }; 328 329 auto nextCol = [&file_lines, ¤t_location, &nextLine]() { 330 if (current_location.column == file_lines[current_location.column - 1].length()) { 331 nextLine(); 332 } else { 333 ++current_location.column; 334 } 335 }; 336 337 // The end location will point to the semicolon, which we don't want to read, so skip it. 338 nextCol(); 339 340 while (current_location < end_location) { 341 const std::string& line = file_lines[current_location.line - 1]; 342 size_t line_index = current_location.column - 1; 343 344 if (in_block_comment) { 345 size_t pos = line.find("*/", line_index); 346 if (pos == std::string::npos) { 347 D("Didn't find block comment terminator, skipping line\n"); 348 nextLine(); 349 continue; 350 } else { 351 D("Found block comment terminator\n"); 352 in_block_comment = false; 353 current_location.column = pos + 2; 354 nextCol(); 355 continue; 356 } 357 } else { 358 size_t pos = line.find_first_not_of(" \t", line_index); 359 if (pos == std::string::npos) { 360 nextLine(); 361 continue; 362 } 363 364 current_location.column = pos + 1; 365 if (line[pos] != '/') { 366 D("Trailing character '%c' is not a slash: %s\n", line[pos], line.substr(pos).c_str()); 367 valid = false; 368 break; 369 } 370 371 nextCol(); 372 if (line.length() <= pos + 1) { 373 // Trailing slash at the end of a line? 374 D("Trailing slash at end of line\n"); 375 valid = false; 376 break; 377 } 378 379 if (line[pos + 1] == '/') { 380 // C++ style comment 381 nextLine(); 382 } else if (line[pos + 1] == '*') { 383 // Block comment 384 nextCol(); 385 in_block_comment = true; 386 D("In a block comment\n"); 387 } else { 388 // Garbage? 389 D("Unexpected output after /: %s\n", line.substr(pos).c_str()); 390 valid = false; 391 break; 392 } 393 } 394 } 395 396 if (!valid) { 397 D("Not merging blocks %s and %s\n", to_string(current->first).c_str(), 398 to_string(next->first).c_str()); 399 ++current; 400 ++next; 401 continue; 402 } 403 404 D("Merging blocks %s and %s\n", to_string(current->first).c_str(), 405 to_string(next->first).c_str()); 406 407 Location merged = current->first; 408 merged.end = next->first.end; 409 410 DeclarationAvailability avail = current->second; 411 412 guard_map.erase(current); 413 guard_map.erase(next); 414 bool dummy; 415 std::tie(current, dummy) = guard_map.insert(std::make_pair(merged, avail)); 416 next = current; 417 ++next; 418 } 419} 420 421static void rewriteFile(const std::string& output_path, std::deque<std::string>& file_lines, 422 const GuardMap& guard_map) { 423 for (auto it = guard_map.rbegin(); it != guard_map.rend(); ++it) { 424 const Location& loc = it->first; 425 const DeclarationAvailability& avail = it->second; 426 427 std::string condition = generateGuardCondition(avail); 428 if (condition.empty()) { 429 continue; 430 } 431 432 std::string prologue = "\n#if "s + condition + "\n"; 433 std::string epilogue = "\n#endif /* " + condition + " */\n"; 434 435 file_lines[loc.end.line - 1].insert(loc.end.column, epilogue); 436 file_lines[loc.start.line - 1].insert(loc.start.column - 1, prologue); 437 } 438 439 printf("Preprocessing %s...\n", output_path.c_str()); 440 writeFileLines(output_path, file_lines); 441} 442 443bool preprocessHeaders(const std::string& dst_dir, const std::string& src_dir, 444 HeaderDatabase* database) { 445 std::unordered_map<std::string, GuardMap> guards; 446 std::unordered_map<std::string, std::deque<std::string>> file_lines; 447 448 for (const auto& symbol_it : database->symbols) { 449 const Symbol& symbol = symbol_it.second; 450 451 for (const auto& decl_it : symbol.declarations) { 452 const Location& location = decl_it.first; 453 const Declaration& decl = decl_it.second; 454 455 if (decl.no_guard) { 456 // No guard required. 457 continue; 458 } 459 460 DeclarationAvailability macro_guard = calculateRequiredGuard(decl); 461 if (!macro_guard.empty()) { 462 guards[location.filename][location] = macro_guard; 463 } 464 } 465 } 466 467 // Copy over the original headers before preprocessing. 468 char* fts_paths[2] = { const_cast<char*>(src_dir.c_str()), nullptr }; 469 FTS* fts = fts_open(fts_paths, FTS_LOGICAL, nullptr); 470 while (FTSENT* ent = fts_read(fts)) { 471 llvm::StringRef path = ent->fts_path; 472 if (!path.startswith(src_dir)) { 473 err(1, "path '%s' doesn't start with source dir '%s'", ent->fts_path, src_dir.c_str()); 474 } 475 476 if (ent->fts_info != FTS_F) { 477 continue; 478 } 479 480 std::string rel_path = path.substr(src_dir.length() + 1); 481 std::string dst_path = dst_dir + "/" + rel_path; 482 llvm::StringRef parent_path = llvm::sys::path::parent_path(dst_path); 483 if (llvm::sys::fs::create_directories(parent_path)) { 484 errx(1, "failed to ensure existence of directory '%s'", parent_path.str().c_str()); 485 } 486 if (llvm::sys::fs::copy_file(path, dst_path)) { 487 errx(1, "failed to copy '%s/%s' to '%s'", src_dir.c_str(), path.str().c_str(), 488 dst_path.c_str()); 489 } 490 } 491 fts_close(fts); 492 493 for (const auto& file_it : guards) { 494 file_lines[file_it.first] = readFileLines(file_it.first); 495 } 496 497 for (auto& file_it : guards) { 498 llvm::StringRef file_path = file_it.first; 499 GuardMap& orig_guard_map = file_it.second; 500 501 // The end positions given to us are the end of the declaration, which is some point before the 502 // semicolon. Fix up the end positions by scanning for the next semicolon. 503 GuardMap guard_map; 504 for (const auto& it : orig_guard_map) { 505 Location loc = it.first; 506 loc.end = findNextSemicolon(file_lines[file_path], loc.end); 507 guard_map[loc] = it.second; 508 } 509 510 // TODO: Make sure that the Locations don't overlap. 511 // TODO: Merge adjacent non-identical guards. 512 mergeGuards(file_lines[file_path], guard_map); 513 514 if (!file_path.startswith(src_dir)) { 515 errx(1, "input file %s is not in %s\n", file_path.str().c_str(), src_dir.c_str()); 516 } 517 518 // rel_path has a leading slash. 519 llvm::StringRef rel_path = file_path.substr(src_dir.size(), file_path.size() - src_dir.size()); 520 std::string output_path = (llvm::Twine(dst_dir) + rel_path).str(); 521 522 rewriteFile(output_path, file_lines[file_path], guard_map); 523 } 524 525 return true; 526} 527