1d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper//===--- FileMatchTrie.cpp - ----------------------------------------------===//
2d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper//
3d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper//                     The LLVM Compiler Infrastructure
4d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper//
5d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper// This file is distributed under the University of Illinois Open Source
6d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper// License. See LICENSE.TXT for details.
7d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper//
8d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper//===----------------------------------------------------------------------===//
9d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper//
10d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper//  This file contains the implementation of a FileMatchTrie.
11d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper//
12d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper//===----------------------------------------------------------------------===//
13d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper
14d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper#include "clang/Tooling/FileMatchTrie.h"
15d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper#include "llvm/ADT/StringMap.h"
16d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper#include "llvm/Support/FileSystem.h"
178229d22e6449851b89361bf2f41804557328be63Rafael Espindola#include "llvm/Support/Path.h"
1861a50adfd9a095cfa017751d41af409c5bc26edfDaniel Jasper#include "llvm/Support/raw_ostream.h"
1955fc873017f10f6f566b182b70f6fc22aefa3464Chandler Carruth#include <sstream>
20d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper
21d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jaspernamespace clang {
22d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jaspernamespace tooling {
23d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper
24d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper/// \brief Default \c PathComparator using \c llvm::sys::fs::equivalent().
25d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasperstruct DefaultPathComparator : public PathComparator {
26d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper  virtual ~DefaultPathComparator() {}
27651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines  bool equivalent(StringRef FileA, StringRef FileB) const override {
282dbe2fa7eb3ccf8eabae884a177b4570d3fb260dDaniel Jasper    return FileA == FileB || llvm::sys::fs::equivalent(FileA, FileB);
29d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper  }
30d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper};
31d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper
32d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper/// \brief A node of the \c FileMatchTrie.
33d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper///
34d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper/// Each node has storage for up to one path and a map mapping a path segment to
35d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper/// child nodes. The trie starts with an empty root node.
36d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasperclass FileMatchTrieNode {
37d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasperpublic:
38d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper  /// \brief Inserts 'NewPath' into this trie. \c ConsumedLength denotes
39d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper  /// the number of \c NewPath's trailing characters already consumed during
40d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper  /// recursion.
41d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper  ///
42d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper  /// An insert of a path
43d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper  /// 'p'starts at the root node and does the following:
44d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper  /// - If the node is empty, insert 'p' into its storage and abort.
45d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper  /// - If the node has a path 'p2' but no children, take the last path segment
46d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper  ///   's' of 'p2', put a new child into the map at 's' an insert the rest of
47d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper  ///   'p2' there.
48d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper  /// - Insert a new child for the last segment of 'p' and insert the rest of
49d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper  ///   'p' there.
50d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper  ///
51d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper  /// An insert operation is linear in the number of a path's segments.
52d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper  void insert(StringRef NewPath, unsigned ConsumedLength = 0) {
53d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper    // We cannot put relative paths into the FileMatchTrie as then a path can be
54d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper    // a postfix of another path, violating a core assumption of the trie.
55d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper    if (llvm::sys::path::is_relative(NewPath))
56d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper      return;
57d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper    if (Path.empty()) {
58d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper      // This is an empty leaf. Store NewPath and return.
59d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper      Path = NewPath;
60d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper      return;
61d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper    }
62d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper    if (Children.empty()) {
63d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper      // This is a leaf, ignore duplicate entry if 'Path' equals 'NewPath'.
64d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper      if (NewPath == Path)
65d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper          return;
66d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper      // Make this a node and create a child-leaf with 'Path'.
67d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper      StringRef Element(llvm::sys::path::filename(
68d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper          StringRef(Path).drop_back(ConsumedLength)));
69d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper      Children[Element].Path = Path;
70d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper    }
71d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper    StringRef Element(llvm::sys::path::filename(
72d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper          StringRef(NewPath).drop_back(ConsumedLength)));
73d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper    Children[Element].insert(NewPath, ConsumedLength + Element.size() + 1);
74d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper  }
75d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper
76d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper  /// \brief Tries to find the node under this \c FileMatchTrieNode that best
77d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper  /// matches 'FileName'.
78d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper  ///
79d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper  /// If multiple paths fit 'FileName' equally well, \c IsAmbiguous is set to
80d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper  /// \c true and an empty string is returned. If no path fits 'FileName', an
81d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper  /// empty string is returned. \c ConsumedLength denotes the number of
82d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper  /// \c Filename's trailing characters already consumed during recursion.
83d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper  ///
84d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper  /// To find the best matching node for a given path 'p', the
85d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper  /// \c findEquivalent() function is called recursively for each path segment
86d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper  /// (back to fron) of 'p' until a node 'n' is reached that does not ..
87d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper  /// - .. have children. In this case it is checked
88d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper  ///   whether the stored path is equivalent to 'p'. If yes, the best match is
89d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper  ///   found. Otherwise continue with the parent node as if this node did not
90d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper  ///   exist.
91d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper  /// - .. a child matching the next path segment. In this case, all children of
92d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper  ///   'n' are an equally good match for 'p'. All children are of 'n' are found
93d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper  ///   recursively and their equivalence to 'p' is determined. If none are
94d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper  ///   equivalent, continue with the parent node as if 'n' didn't exist. If one
95d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper  ///   is equivalent, the best match is found. Otherwise, report and ambigiuity
96d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper  ///   error.
97d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper  StringRef findEquivalent(const PathComparator& Comparator,
98d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper                           StringRef FileName,
99d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper                           bool &IsAmbiguous,
100d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper                           unsigned ConsumedLength = 0) const {
101d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper    if (Children.empty()) {
102d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper      if (Comparator.equivalent(StringRef(Path), FileName))
103d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper        return StringRef(Path);
104d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper      return StringRef();
105d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper    }
106d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper    StringRef Element(llvm::sys::path::filename(FileName.drop_back(
107d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper        ConsumedLength)));
108d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper    llvm::StringMap<FileMatchTrieNode>::const_iterator MatchingChild =
109d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper        Children.find(Element);
110d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper    if (MatchingChild != Children.end()) {
111d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper      StringRef Result = MatchingChild->getValue().findEquivalent(
112d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper          Comparator, FileName, IsAmbiguous,
113d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper          ConsumedLength + Element.size() + 1);
114d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper      if (!Result.empty() || IsAmbiguous)
115d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper        return Result;
116d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper    }
117d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper    std::vector<StringRef> AllChildren;
118d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper    getAll(AllChildren, MatchingChild);
119d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper    StringRef Result;
120d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper    for (unsigned i = 0; i < AllChildren.size(); i++) {
121d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper      if (Comparator.equivalent(AllChildren[i], FileName)) {
122d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper        if (Result.empty()) {
123d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper          Result = AllChildren[i];
124d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper        } else {
125d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper          IsAmbiguous = true;
126d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper          return StringRef();
127d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper        }
128d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper      }
129d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper    }
130d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper    return Result;
131d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper  }
132d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper
133d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasperprivate:
134d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper  /// \brief Gets all paths under this FileMatchTrieNode.
135d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper  void getAll(std::vector<StringRef> &Results,
136d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper              llvm::StringMap<FileMatchTrieNode>::const_iterator Except) const {
137d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper    if (Path.empty())
138d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper      return;
139d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper    if (Children.empty()) {
140d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper      Results.push_back(StringRef(Path));
141d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper      return;
142d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper    }
143d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper    for (llvm::StringMap<FileMatchTrieNode>::const_iterator
144d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper         It = Children.begin(), E = Children.end();
145d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper         It != E; ++It) {
146d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper      if (It == Except)
147d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper        continue;
148d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper      It->getValue().getAll(Results, Children.end());
149d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper    }
150d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper  }
151d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper
152d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper  // The stored absolute path in this node. Only valid for leaf nodes, i.e.
153d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper  // nodes where Children.empty().
154d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper  std::string Path;
155d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper
156d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper  // The children of this node stored in a map based on the next path segment.
157d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper  llvm::StringMap<FileMatchTrieNode> Children;
158d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper};
159d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper
160d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel JasperFileMatchTrie::FileMatchTrie()
161d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper  : Root(new FileMatchTrieNode), Comparator(new DefaultPathComparator()) {}
162d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper
163d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel JasperFileMatchTrie::FileMatchTrie(PathComparator *Comparator)
164d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper  : Root(new FileMatchTrieNode), Comparator(Comparator) {}
165d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper
166d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel JasperFileMatchTrie::~FileMatchTrie() {
167d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper  delete Root;
168d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper}
169d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper
170d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jaspervoid FileMatchTrie::insert(StringRef NewPath) {
171d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper  Root->insert(NewPath);
172d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper}
173d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper
174d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel JasperStringRef FileMatchTrie::findEquivalent(StringRef FileName,
175cfa88f893915ceb8ae4ce2f17c46c24a4d67502fDmitri Gribenko                                        raw_ostream &Error) const {
176d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper  if (llvm::sys::path::is_relative(FileName)) {
177d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper    Error << "Cannot resolve relative paths";
178d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper    return StringRef();
179d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper  }
180d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper  bool IsAmbiguous = false;
181d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper  StringRef Result = Root->findEquivalent(*Comparator, FileName, IsAmbiguous);
182d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper  if (IsAmbiguous)
183d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper    Error << "Path is ambiguous";
184d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper  return Result;
185d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper}
186d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper
187d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper} // end namespace tooling
188d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper} // end namespace clang
189