1//===- StringMatcher.cpp - Generate a matcher for input strings -----------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file implements the StringMatcher class. 11// 12//===----------------------------------------------------------------------===// 13 14#include "llvm/TableGen/StringMatcher.h" 15#include "llvm/Support/raw_ostream.h" 16#include <map> 17using namespace llvm; 18 19/// FindFirstNonCommonLetter - Find the first character in the keys of the 20/// string pairs that is not shared across the whole set of strings. All 21/// strings are assumed to have the same length. 22static unsigned 23FindFirstNonCommonLetter(const std::vector<const 24 StringMatcher::StringPair*> &Matches) { 25 assert(!Matches.empty()); 26 for (unsigned i = 0, e = Matches[0]->first.size(); i != e; ++i) { 27 // Check to see if letter i is the same across the set. 28 char Letter = Matches[0]->first[i]; 29 30 for (unsigned str = 0, e = Matches.size(); str != e; ++str) 31 if (Matches[str]->first[i] != Letter) 32 return i; 33 } 34 35 return Matches[0]->first.size(); 36} 37 38/// EmitStringMatcherForChar - Given a set of strings that are known to be the 39/// same length and whose characters leading up to CharNo are the same, emit 40/// code to verify that CharNo and later are the same. 41/// 42/// \return - True if control can leave the emitted code fragment. 43bool StringMatcher:: 44EmitStringMatcherForChar(const std::vector<const StringPair*> &Matches, 45 unsigned CharNo, unsigned IndentCount) const { 46 assert(!Matches.empty() && "Must have at least one string to match!"); 47 std::string Indent(IndentCount*2+4, ' '); 48 49 // If we have verified that the entire string matches, we're done: output the 50 // matching code. 51 if (CharNo == Matches[0]->first.size()) { 52 assert(Matches.size() == 1 && "Had duplicate keys to match on"); 53 54 // If the to-execute code has \n's in it, indent each subsequent line. 55 StringRef Code = Matches[0]->second; 56 57 std::pair<StringRef, StringRef> Split = Code.split('\n'); 58 OS << Indent << Split.first << "\t // \"" << Matches[0]->first << "\"\n"; 59 60 Code = Split.second; 61 while (!Code.empty()) { 62 Split = Code.split('\n'); 63 OS << Indent << Split.first << "\n"; 64 Code = Split.second; 65 } 66 return false; 67 } 68 69 // Bucket the matches by the character we are comparing. 70 std::map<char, std::vector<const StringPair*> > MatchesByLetter; 71 72 for (unsigned i = 0, e = Matches.size(); i != e; ++i) 73 MatchesByLetter[Matches[i]->first[CharNo]].push_back(Matches[i]); 74 75 76 // If we have exactly one bucket to match, see how many characters are common 77 // across the whole set and match all of them at once. 78 if (MatchesByLetter.size() == 1) { 79 unsigned FirstNonCommonLetter = FindFirstNonCommonLetter(Matches); 80 unsigned NumChars = FirstNonCommonLetter-CharNo; 81 82 // Emit code to break out if the prefix doesn't match. 83 if (NumChars == 1) { 84 // Do the comparison with if (Str[1] != 'f') 85 // FIXME: Need to escape general characters. 86 OS << Indent << "if (" << StrVariableName << "[" << CharNo << "] != '" 87 << Matches[0]->first[CharNo] << "')\n"; 88 OS << Indent << " break;\n"; 89 } else { 90 // Do the comparison with if memcmp(Str.data()+1, "foo", 3). 91 // FIXME: Need to escape general strings. 92 OS << Indent << "if (memcmp(" << StrVariableName << ".data()+" << CharNo 93 << ", \"" << Matches[0]->first.substr(CharNo, NumChars) << "\", " 94 << NumChars << "))\n"; 95 OS << Indent << " break;\n"; 96 } 97 98 return EmitStringMatcherForChar(Matches, FirstNonCommonLetter, IndentCount); 99 } 100 101 // Otherwise, we have multiple possible things, emit a switch on the 102 // character. 103 OS << Indent << "switch (" << StrVariableName << "[" << CharNo << "]) {\n"; 104 OS << Indent << "default: break;\n"; 105 106 for (std::map<char, std::vector<const StringPair*> >::iterator LI = 107 MatchesByLetter.begin(), E = MatchesByLetter.end(); LI != E; ++LI) { 108 // TODO: escape hard stuff (like \n) if we ever care about it. 109 OS << Indent << "case '" << LI->first << "':\t // " 110 << LI->second.size() << " string"; 111 if (LI->second.size() != 1) OS << 's'; 112 OS << " to match.\n"; 113 if (EmitStringMatcherForChar(LI->second, CharNo+1, IndentCount+1)) 114 OS << Indent << " break;\n"; 115 } 116 117 OS << Indent << "}\n"; 118 return true; 119} 120 121 122/// Emit - Top level entry point. 123/// 124void StringMatcher::Emit(unsigned Indent) const { 125 // If nothing to match, just fall through. 126 if (Matches.empty()) return; 127 128 // First level categorization: group strings by length. 129 std::map<unsigned, std::vector<const StringPair*> > MatchesByLength; 130 131 for (unsigned i = 0, e = Matches.size(); i != e; ++i) 132 MatchesByLength[Matches[i].first.size()].push_back(&Matches[i]); 133 134 // Output a switch statement on length and categorize the elements within each 135 // bin. 136 OS.indent(Indent*2+2) << "switch (" << StrVariableName << ".size()) {\n"; 137 OS.indent(Indent*2+2) << "default: break;\n"; 138 139 for (std::map<unsigned, std::vector<const StringPair*> >::iterator LI = 140 MatchesByLength.begin(), E = MatchesByLength.end(); LI != E; ++LI) { 141 OS.indent(Indent*2+2) << "case " << LI->first << ":\t // " 142 << LI->second.size() 143 << " string" << (LI->second.size() == 1 ? "" : "s") << " to match.\n"; 144 if (EmitStringMatcherForChar(LI->second, 0, Indent)) 145 OS.indent(Indent*2+4) << "break;\n"; 146 } 147 148 OS.indent(Indent*2+2) << "}\n"; 149} 150