Preprocessor.cpp revision 9ae2b9a5213219e929e5cab6cea3f1f1e476d2fd
1/*
2 * Copyright (C) 2016 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include "Preprocessor.h"
18
19#include <err.h>
20#include <fcntl.h>
21#include <fts.h>
22#include <libgen.h>
23#include <string.h>
24#include <sys/stat.h>
25#include <sys/types.h>
26#include <unistd.h>
27
28#include <deque>
29#include <fstream>
30#include <string>
31#include <unordered_map>
32
33#include <llvm/ADT/StringRef.h>
34#include <llvm/ADT/Twine.h>
35#include <llvm/Support/FileSystem.h>
36#include <llvm/Support/Path.h>
37
38#include "Arch.h"
39#include "DeclarationDatabase.h"
40#include "versioner.h"
41
42using namespace std::string_literals;
43
44static DeclarationAvailability calculateRequiredGuard(const Declaration& declaration) {
45  // To avoid redundant macro guards, the availability calculated by this function is the set
46  // difference of 'targets marked-available' from 'targets the declaration is visible in'.
47  // For example, a declaration that is visible always and introduced in 9 would return introduced
48  // in 9, but the same declaration, except only visible in 9+ would return an empty
49  // DeclarationAvailability.
50
51  // This currently only handles __INTRODUCED_IN.
52  // TODO: Do the same for __REMOVED_IN.
53  int global_min_api_visible = 0;
54  ArchMap<int> arch_visibility;
55
56  for (const auto& it : declaration.availability) {
57    const CompilationType& type = it.first;
58
59    if (global_min_api_visible == 0 || global_min_api_visible > type.api_level) {
60      global_min_api_visible = type.api_level;
61    }
62
63    if (arch_visibility[type.arch] == 0 || arch_visibility[type.arch] > type.api_level) {
64      arch_visibility[type.arch] = type.api_level;
65    }
66  }
67
68  DeclarationAvailability decl_av;
69  if (!declaration.calculateAvailability(&decl_av)) {
70    fprintf(stderr, "versioner: failed to calculate availability while preprocessing:\n");
71    declaration.dump("", stderr, 2);
72    exit(1);
73  }
74
75  D("Calculating required guard for %s:\n", declaration.name.c_str());
76  D("  Declaration availability: %s\n", to_string(decl_av).c_str());
77
78  if (verbose) {
79    std::string arch_visibility_str;
80    for (Arch arch : supported_archs) {
81      if (arch_visibility[arch] != 0) {
82        arch_visibility_str += to_string(arch);
83        arch_visibility_str += ": ";
84        arch_visibility_str += std::to_string(arch_visibility[arch]);
85        arch_visibility_str += ", ";
86      }
87    }
88    if (!arch_visibility_str.empty()) {
89      arch_visibility_str.resize(arch_visibility_str.size() - 2);
90    }
91    D("  Declaration visibility: global = %d, arch = %s\n", global_min_api_visible,
92      arch_visibility_str.c_str());
93  }
94
95  DeclarationAvailability result = decl_av;
96  if (result.global_availability.introduced <= global_min_api_visible) {
97    result.global_availability.introduced = 0;
98  }
99
100  for (Arch arch : supported_archs) {
101    if (result.arch_availability[arch].introduced <= arch_visibility[arch]) {
102      result.arch_availability[arch].introduced = 0;
103    }
104  }
105
106  D("  Calculated result: %s\n", to_string(result).c_str());
107  D("\n");
108
109  return result;
110}
111
112static std::deque<std::string> readFileLines(const std::string& path) {
113  std::ifstream is(path.c_str());
114  std::deque<std::string> result;
115  std::string line;
116
117  while (std::getline(is, line)) {
118    result.push_back(std::move(line));
119  }
120
121  return result;
122}
123
124static std::string dirname(const std::string& path) {
125  std::unique_ptr<char, decltype(&free)> path_copy(strdup(path.c_str()), free);
126  return dirname(path_copy.get());
127}
128
129static bool mkdirs(const std::string& path) {
130  struct stat st;
131  if (stat(path.c_str(), &st) == 0 && S_ISDIR(st.st_mode)) {
132    return true;
133  }
134
135  std::string parent = dirname(path);
136  if (parent == path) {
137    return false;
138  }
139
140  if (!mkdirs(parent)) {
141    return false;
142  }
143
144  if (mkdir(path.c_str(), 0700) != 0) {
145    return false;
146  }
147
148  return true;
149}
150
151static void writeFileLines(const std::string& path, const std::deque<std::string>& lines) {
152  if (!mkdirs(dirname(path))) {
153    err(1, "failed to create directory '%s'", dirname(path).c_str());
154  }
155
156  std::ofstream os(path.c_str(), std::ios_base::out | std::ios_base::trunc);
157
158  for (const std::string& line : lines) {
159    os << line << "\n";
160  }
161}
162
163using GuardMap = std::map<Location, DeclarationAvailability>;
164
165static std::string generateGuardCondition(const DeclarationAvailability& avail) {
166  // Logically orred expressions that constitute the macro guard.
167  std::vector<std::string> expressions;
168  static const std::vector<std::pair<std::string, std::set<Arch>>> arch_sets = {
169    { "", supported_archs },
170    { "!defined(__LP64__)", { Arch::arm, Arch::mips, Arch::x86 } },
171    { "defined(__LP64__)", { Arch::arm64, Arch::mips64, Arch::x86_64 } },
172    { "defined(__mips__)", { Arch::mips, Arch::mips64 } },
173  };
174  std::map<Arch, std::string> individual_archs = {
175    { Arch::arm, "defined(__arm__)" },
176    { Arch::arm64, "defined(__aarch64__)" },
177    { Arch::mips, "defined(__mips__) && !defined(__LP64__)" },
178    { Arch::mips64, "defined(__mips__) && defined(__LP64__)" },
179    { Arch::x86, "defined(__i386__)" },
180    { Arch::x86_64, "defined(__x86_64__)" },
181  };
182
183  auto generate_guard = [](const std::string& arch_expr, int min_version) {
184    if (min_version == 0) {
185      return arch_expr;
186    }
187    return arch_expr + " && __ANDROID_API__ >= " + std::to_string(min_version);
188  };
189
190  D("Generating guard for availability: %s\n", to_string(avail).c_str());
191  if (!avail.global_availability.empty()) {
192    for (Arch arch : supported_archs) {
193      if (!avail.arch_availability[arch].empty()) {
194        errx(1, "attempted to generate guard with global and per-arch values: %s",
195             to_string(avail).c_str());
196      }
197    }
198
199    if (avail.global_availability.introduced == 0) {
200      fprintf(stderr, "warning: attempted to generate guard with empty availability: %s\n",
201              to_string(avail).c_str());
202      return "";
203    }
204
205    if (avail.global_availability.introduced <= 9) {
206      return "";
207    }
208
209    return "__ANDROID_API__ >= "s + std::to_string(avail.global_availability.introduced);
210  }
211
212  for (const auto& it : arch_sets) {
213    const std::string& arch_expr = it.first;
214    const std::set<Arch>& archs = it.second;
215
216    D("  Checking arch set '%s'\n", arch_expr.c_str());
217
218    int version = avail.arch_availability[*it.second.begin()].introduced;
219
220    // Assume that the entire declaration is declared __INTRODUCED_IN_FUTURE if one arch is.
221    bool future = avail.arch_availability[*it.second.begin()].future;
222
223    if (future) {
224      return "__ANDROID_API__ >= __ANDROID_API_FUTURE__";
225    }
226
227    // The maximum min_version of the set.
228    int max_min_version = 0;
229    for (Arch arch : archs) {
230      if (arch_min_api[arch] > max_min_version) {
231        max_min_version = arch_min_api[arch];
232      }
233
234      if (avail.arch_availability[arch].introduced != version) {
235        D("    Skipping arch set, availability for %s doesn't match %s\n",
236          to_string(*it.second.begin()).c_str(), to_string(arch).c_str());
237        goto skip;
238      }
239    }
240
241    // If all of the archs in the set have a min_api that satifies version, elide the check.
242    if (max_min_version >= version) {
243      version = 0;
244    }
245
246    expressions.emplace_back(generate_guard(arch_expr, version));
247
248    D("    Generated expression '%s'\n", expressions.rbegin()->c_str());
249
250    for (Arch arch : archs) {
251      individual_archs.erase(arch);
252    }
253
254  skip:
255    continue;
256  }
257
258  for (const auto& it : individual_archs) {
259    const std::string& arch_expr = it.second;
260    int introduced = avail.arch_availability[it.first].introduced;
261    if (introduced == 0) {
262      expressions.emplace_back(arch_expr);
263    } else {
264      expressions.emplace_back(generate_guard(arch_expr, introduced));
265    }
266  }
267
268  if (expressions.size() == 0) {
269    errx(1, "generated empty guard for availability %s", to_string(avail).c_str());
270  } else if (expressions.size() == 1) {
271    return expressions[0];
272  }
273
274  return "("s + Join(expressions, ") || (") + ")";
275}
276
277// Assumes that nothing crazy is happening (e.g. having the semicolon be in a macro)
278static FileLocation findNextSemicolon(const std::deque<std::string>& lines, FileLocation start) {
279  unsigned current_line = start.line;
280  unsigned current_column = start.column;
281  while (current_line <= lines.size()) {
282    size_t result = lines[current_line - 1].find_first_of(';', current_column - 1);
283
284    if (result != std::string::npos) {
285      FileLocation loc = {
286        .line = current_line,
287        .column = unsigned(result) + 1,
288      };
289
290      return loc;
291    }
292
293    ++current_line;
294    current_column = 0;
295  }
296
297  errx(1, "failed to find semicolon starting from %u:%u", start.line, start.column);
298}
299
300// Merge adjacent blocks with identical guards.
301static void mergeGuards(std::deque<std::string>& file_lines, GuardMap& guard_map) {
302  if (guard_map.size() < 2) {
303    return;
304  }
305
306  auto current = guard_map.begin();
307  auto next = current;
308  ++next;
309
310  while (next != guard_map.end()) {
311    if (current->second != next->second) {
312      ++current;
313      ++next;
314      continue;
315    }
316
317    // Scan from the end of current to the beginning of next.
318    bool in_block_comment = false;
319    bool valid = true;
320
321    FileLocation current_location = current->first.end;
322    FileLocation end_location = next->first.start;
323
324    auto nextLine = [&current_location]() {
325      ++current_location.line;
326      current_location.column = 1;
327    };
328
329    auto nextCol = [&file_lines, &current_location, &nextLine]() {
330      if (current_location.column == file_lines[current_location.column - 1].length()) {
331        nextLine();
332      } else {
333        ++current_location.column;
334      }
335    };
336
337    // The end location will point to the semicolon, which we don't want to read, so skip it.
338    nextCol();
339
340    while (current_location < end_location) {
341      const std::string& line = file_lines[current_location.line - 1];
342      size_t line_index = current_location.column - 1;
343
344      if (in_block_comment) {
345        size_t pos = line.find("*/", line_index);
346        if (pos == std::string::npos) {
347          D("Didn't find block comment terminator, skipping line\n");
348          nextLine();
349          continue;
350        } else {
351          D("Found block comment terminator\n");
352          in_block_comment = false;
353          current_location.column = pos + 2;
354          nextCol();
355          continue;
356        }
357      } else {
358        size_t pos = line.find_first_not_of(" \t", line_index);
359        if (pos == std::string::npos) {
360          nextLine();
361          continue;
362        }
363
364        current_location.column = pos + 1;
365        if (line[pos] != '/') {
366          D("Trailing character '%c' is not a slash: %s\n", line[pos], line.substr(pos).c_str());
367          valid = false;
368          break;
369        }
370
371        nextCol();
372        if (line.length() <= pos + 1) {
373          // Trailing slash at the end of a line?
374          D("Trailing slash at end of line\n");
375          valid = false;
376          break;
377        }
378
379        if (line[pos + 1] == '/') {
380          // C++ style comment
381          nextLine();
382        } else if (line[pos + 1] == '*') {
383          // Block comment
384          nextCol();
385          in_block_comment = true;
386          D("In a block comment\n");
387        } else {
388          // Garbage?
389          D("Unexpected output after /: %s\n", line.substr(pos).c_str());
390          valid = false;
391          break;
392        }
393      }
394    }
395
396    if (!valid) {
397      D("Not merging blocks %s and %s\n", to_string(current->first).c_str(),
398        to_string(next->first).c_str());
399      ++current;
400      ++next;
401      continue;
402    }
403
404    D("Merging blocks %s and %s\n", to_string(current->first).c_str(),
405      to_string(next->first).c_str());
406
407    Location merged = current->first;
408    merged.end = next->first.end;
409
410    DeclarationAvailability avail = current->second;
411
412    guard_map.erase(current);
413    guard_map.erase(next);
414    bool dummy;
415    std::tie(current, dummy) = guard_map.insert(std::make_pair(merged, avail));
416    next = current;
417    ++next;
418  }
419}
420
421static void rewriteFile(const std::string& output_path, std::deque<std::string>& file_lines,
422                        const GuardMap& guard_map) {
423  for (auto it = guard_map.rbegin(); it != guard_map.rend(); ++it) {
424    const Location& loc = it->first;
425    const DeclarationAvailability& avail = it->second;
426
427    std::string condition = generateGuardCondition(avail);
428    if (condition.empty()) {
429      continue;
430    }
431
432    std::string prologue = "\n#if "s + condition + "\n";
433    std::string epilogue = "\n#endif /* " + condition + " */\n";
434
435    file_lines[loc.end.line - 1].insert(loc.end.column, epilogue);
436    file_lines[loc.start.line - 1].insert(loc.start.column - 1, prologue);
437  }
438
439  printf("Preprocessing %s...\n", output_path.c_str());
440  writeFileLines(output_path, file_lines);
441}
442
443bool preprocessHeaders(const std::string& dst_dir, const std::string& src_dir,
444                       HeaderDatabase* database) {
445  std::unordered_map<std::string, GuardMap> guards;
446  std::unordered_map<std::string, std::deque<std::string>> file_lines;
447
448  for (const auto& symbol_it : database->symbols) {
449    const Symbol& symbol = symbol_it.second;
450
451    for (const auto& decl_it : symbol.declarations) {
452      const Location& location = decl_it.first;
453      const Declaration& decl = decl_it.second;
454
455      if (decl.no_guard) {
456        // No guard required.
457        continue;
458      }
459
460      DeclarationAvailability macro_guard = calculateRequiredGuard(decl);
461      if (!macro_guard.empty()) {
462        guards[location.filename][location] = macro_guard;
463      }
464    }
465  }
466
467  // Copy over the original headers before preprocessing.
468  char* fts_paths[2] = { const_cast<char*>(src_dir.c_str()), nullptr };
469  FTS* fts = fts_open(fts_paths, FTS_LOGICAL, nullptr);
470  while (FTSENT* ent = fts_read(fts)) {
471    llvm::StringRef path = ent->fts_path;
472    if (!path.startswith(src_dir)) {
473      err(1, "path '%s' doesn't start with source dir '%s'", ent->fts_path, src_dir.c_str());
474    }
475
476    if (ent->fts_info != FTS_F) {
477      continue;
478    }
479
480    std::string rel_path = path.substr(src_dir.length() + 1);
481    std::string dst_path = dst_dir + "/" + rel_path;
482    llvm::StringRef parent_path = llvm::sys::path::parent_path(dst_path);
483    if (llvm::sys::fs::create_directories(parent_path)) {
484      errx(1, "failed to ensure existence of directory '%s'", parent_path.str().c_str());
485    }
486    if (llvm::sys::fs::copy_file(path, dst_path)) {
487      errx(1, "failed to copy '%s/%s' to '%s'", src_dir.c_str(), path.str().c_str(),
488           dst_path.c_str());
489    }
490  }
491  fts_close(fts);
492
493  for (const auto& file_it : guards) {
494    file_lines[file_it.first] = readFileLines(file_it.first);
495  }
496
497  for (auto& file_it : guards) {
498    llvm::StringRef file_path = file_it.first;
499    GuardMap& orig_guard_map = file_it.second;
500
501    // The end positions given to us are the end of the declaration, which is some point before the
502    // semicolon. Fix up the end positions by scanning for the next semicolon.
503    GuardMap guard_map;
504    for (const auto& it : orig_guard_map) {
505      Location loc = it.first;
506      loc.end = findNextSemicolon(file_lines[file_path], loc.end);
507      guard_map[loc] = it.second;
508    }
509
510    // TODO: Make sure that the Locations don't overlap.
511    // TODO: Merge adjacent non-identical guards.
512    mergeGuards(file_lines[file_path], guard_map);
513
514    if (!file_path.startswith(src_dir)) {
515      errx(1, "input file %s is not in %s\n", file_path.str().c_str(), src_dir.c_str());
516    }
517
518    // rel_path has a leading slash.
519    llvm::StringRef rel_path = file_path.substr(src_dir.size(), file_path.size() - src_dir.size());
520    std::string output_path = (llvm::Twine(dst_dir) + rel_path).str();
521
522    rewriteFile(output_path, file_lines[file_path], guard_map);
523  }
524
525  return true;
526}
527