1d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper//===--- FileMatchTrie.cpp - ----------------------------------------------===// 2d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper// 3d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper// The LLVM Compiler Infrastructure 4d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper// 5d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper// This file is distributed under the University of Illinois Open Source 6d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper// License. See LICENSE.TXT for details. 7d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper// 8d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper//===----------------------------------------------------------------------===// 9d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper// 10d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper// This file contains the implementation of a FileMatchTrie. 11d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper// 12d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper//===----------------------------------------------------------------------===// 13d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper 14d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper#include "clang/Tooling/FileMatchTrie.h" 15d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper#include "llvm/ADT/StringMap.h" 16d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper#include "llvm/Support/FileSystem.h" 178229d22e6449851b89361bf2f41804557328be63Rafael Espindola#include "llvm/Support/Path.h" 1861a50adfd9a095cfa017751d41af409c5bc26edfDaniel Jasper#include "llvm/Support/raw_ostream.h" 1955fc873017f10f6f566b182b70f6fc22aefa3464Chandler Carruth#include <sstream> 20d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper 21d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jaspernamespace clang { 22d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jaspernamespace tooling { 23d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper 24d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper/// \brief Default \c PathComparator using \c llvm::sys::fs::equivalent(). 25d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasperstruct DefaultPathComparator : public PathComparator { 26d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper virtual ~DefaultPathComparator() {} 27651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines bool equivalent(StringRef FileA, StringRef FileB) const override { 282dbe2fa7eb3ccf8eabae884a177b4570d3fb260dDaniel Jasper return FileA == FileB || llvm::sys::fs::equivalent(FileA, FileB); 29d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper } 30d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper}; 31d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper 32d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper/// \brief A node of the \c FileMatchTrie. 33d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper/// 34d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper/// Each node has storage for up to one path and a map mapping a path segment to 35d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper/// child nodes. The trie starts with an empty root node. 36d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasperclass FileMatchTrieNode { 37d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasperpublic: 38d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper /// \brief Inserts 'NewPath' into this trie. \c ConsumedLength denotes 39d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper /// the number of \c NewPath's trailing characters already consumed during 40d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper /// recursion. 41d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper /// 42d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper /// An insert of a path 43d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper /// 'p'starts at the root node and does the following: 44d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper /// - If the node is empty, insert 'p' into its storage and abort. 45d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper /// - If the node has a path 'p2' but no children, take the last path segment 46d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper /// 's' of 'p2', put a new child into the map at 's' an insert the rest of 47d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper /// 'p2' there. 48d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper /// - Insert a new child for the last segment of 'p' and insert the rest of 49d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper /// 'p' there. 50d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper /// 51d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper /// An insert operation is linear in the number of a path's segments. 52d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper void insert(StringRef NewPath, unsigned ConsumedLength = 0) { 53d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper // We cannot put relative paths into the FileMatchTrie as then a path can be 54d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper // a postfix of another path, violating a core assumption of the trie. 55d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper if (llvm::sys::path::is_relative(NewPath)) 56d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper return; 57d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper if (Path.empty()) { 58d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper // This is an empty leaf. Store NewPath and return. 59d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper Path = NewPath; 60d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper return; 61d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper } 62d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper if (Children.empty()) { 63d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper // This is a leaf, ignore duplicate entry if 'Path' equals 'NewPath'. 64d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper if (NewPath == Path) 65d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper return; 66d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper // Make this a node and create a child-leaf with 'Path'. 67d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper StringRef Element(llvm::sys::path::filename( 68d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper StringRef(Path).drop_back(ConsumedLength))); 69d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper Children[Element].Path = Path; 70d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper } 71d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper StringRef Element(llvm::sys::path::filename( 72d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper StringRef(NewPath).drop_back(ConsumedLength))); 73d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper Children[Element].insert(NewPath, ConsumedLength + Element.size() + 1); 74d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper } 75d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper 76d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper /// \brief Tries to find the node under this \c FileMatchTrieNode that best 77d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper /// matches 'FileName'. 78d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper /// 79d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper /// If multiple paths fit 'FileName' equally well, \c IsAmbiguous is set to 80d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper /// \c true and an empty string is returned. If no path fits 'FileName', an 81d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper /// empty string is returned. \c ConsumedLength denotes the number of 82d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper /// \c Filename's trailing characters already consumed during recursion. 83d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper /// 84d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper /// To find the best matching node for a given path 'p', the 85d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper /// \c findEquivalent() function is called recursively for each path segment 86d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper /// (back to fron) of 'p' until a node 'n' is reached that does not .. 87d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper /// - .. have children. In this case it is checked 88d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper /// whether the stored path is equivalent to 'p'. If yes, the best match is 89d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper /// found. Otherwise continue with the parent node as if this node did not 90d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper /// exist. 91d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper /// - .. a child matching the next path segment. In this case, all children of 92d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper /// 'n' are an equally good match for 'p'. All children are of 'n' are found 93d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper /// recursively and their equivalence to 'p' is determined. If none are 94d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper /// equivalent, continue with the parent node as if 'n' didn't exist. If one 95d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper /// is equivalent, the best match is found. Otherwise, report and ambigiuity 96d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper /// error. 97d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper StringRef findEquivalent(const PathComparator& Comparator, 98d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper StringRef FileName, 99d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper bool &IsAmbiguous, 100d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper unsigned ConsumedLength = 0) const { 101d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper if (Children.empty()) { 102d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper if (Comparator.equivalent(StringRef(Path), FileName)) 103d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper return StringRef(Path); 104d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper return StringRef(); 105d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper } 106d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper StringRef Element(llvm::sys::path::filename(FileName.drop_back( 107d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper ConsumedLength))); 108d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper llvm::StringMap<FileMatchTrieNode>::const_iterator MatchingChild = 109d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper Children.find(Element); 110d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper if (MatchingChild != Children.end()) { 111d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper StringRef Result = MatchingChild->getValue().findEquivalent( 112d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper Comparator, FileName, IsAmbiguous, 113d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper ConsumedLength + Element.size() + 1); 114d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper if (!Result.empty() || IsAmbiguous) 115d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper return Result; 116d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper } 117d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper std::vector<StringRef> AllChildren; 118d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper getAll(AllChildren, MatchingChild); 119d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper StringRef Result; 120d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper for (unsigned i = 0; i < AllChildren.size(); i++) { 121d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper if (Comparator.equivalent(AllChildren[i], FileName)) { 122d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper if (Result.empty()) { 123d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper Result = AllChildren[i]; 124d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper } else { 125d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper IsAmbiguous = true; 126d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper return StringRef(); 127d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper } 128d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper } 129d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper } 130d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper return Result; 131d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper } 132d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper 133d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasperprivate: 134d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper /// \brief Gets all paths under this FileMatchTrieNode. 135d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper void getAll(std::vector<StringRef> &Results, 136d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper llvm::StringMap<FileMatchTrieNode>::const_iterator Except) const { 137d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper if (Path.empty()) 138d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper return; 139d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper if (Children.empty()) { 140d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper Results.push_back(StringRef(Path)); 141d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper return; 142d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper } 143d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper for (llvm::StringMap<FileMatchTrieNode>::const_iterator 144d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper It = Children.begin(), E = Children.end(); 145d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper It != E; ++It) { 146d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper if (It == Except) 147d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper continue; 148d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper It->getValue().getAll(Results, Children.end()); 149d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper } 150d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper } 151d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper 152d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper // The stored absolute path in this node. Only valid for leaf nodes, i.e. 153d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper // nodes where Children.empty(). 154d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper std::string Path; 155d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper 156d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper // The children of this node stored in a map based on the next path segment. 157d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper llvm::StringMap<FileMatchTrieNode> Children; 158d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper}; 159d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper 160d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel JasperFileMatchTrie::FileMatchTrie() 161d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper : Root(new FileMatchTrieNode), Comparator(new DefaultPathComparator()) {} 162d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper 163d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel JasperFileMatchTrie::FileMatchTrie(PathComparator *Comparator) 164d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper : Root(new FileMatchTrieNode), Comparator(Comparator) {} 165d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper 166d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel JasperFileMatchTrie::~FileMatchTrie() { 167d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper delete Root; 168d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper} 169d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper 170d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jaspervoid FileMatchTrie::insert(StringRef NewPath) { 171d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper Root->insert(NewPath); 172d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper} 173d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper 174d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel JasperStringRef FileMatchTrie::findEquivalent(StringRef FileName, 175cfa88f893915ceb8ae4ce2f17c46c24a4d67502fDmitri Gribenko raw_ostream &Error) const { 176d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper if (llvm::sys::path::is_relative(FileName)) { 177d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper Error << "Cannot resolve relative paths"; 178d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper return StringRef(); 179d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper } 180d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper bool IsAmbiguous = false; 181d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper StringRef Result = Root->findEquivalent(*Comparator, FileName, IsAmbiguous); 182d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper if (IsAmbiguous) 183d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper Error << "Path is ambiguous"; 184d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper return Result; 185d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper} 186d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper 187d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper} // end namespace tooling 188d3420c906e3605d94c084e8b8b1f3fa490093c86Daniel Jasper} // end namespace clang 189