15845e5c62b42d025557765006515156691a6a8b1Chris Lattner//===- StringMatcher.cpp - Generate a matcher for input strings -----------===// 25845e5c62b42d025557765006515156691a6a8b1Chris Lattner// 35845e5c62b42d025557765006515156691a6a8b1Chris Lattner// The LLVM Compiler Infrastructure 45845e5c62b42d025557765006515156691a6a8b1Chris Lattner// 55845e5c62b42d025557765006515156691a6a8b1Chris Lattner// This file is distributed under the University of Illinois Open Source 65845e5c62b42d025557765006515156691a6a8b1Chris Lattner// License. See LICENSE.TXT for details. 75845e5c62b42d025557765006515156691a6a8b1Chris Lattner// 85845e5c62b42d025557765006515156691a6a8b1Chris Lattner//===----------------------------------------------------------------------===// 95845e5c62b42d025557765006515156691a6a8b1Chris Lattner// 105845e5c62b42d025557765006515156691a6a8b1Chris Lattner// This file implements the StringMatcher class. 115845e5c62b42d025557765006515156691a6a8b1Chris Lattner// 125845e5c62b42d025557765006515156691a6a8b1Chris Lattner//===----------------------------------------------------------------------===// 135845e5c62b42d025557765006515156691a6a8b1Chris Lattner 14f657da2e4896732f306a9e62261418112e7337ceDouglas Gregor#include "llvm/TableGen/StringMatcher.h" 155845e5c62b42d025557765006515156691a6a8b1Chris Lattner#include "llvm/Support/raw_ostream.h" 165845e5c62b42d025557765006515156691a6a8b1Chris Lattner#include <map> 175845e5c62b42d025557765006515156691a6a8b1Chris Lattnerusing namespace llvm; 185845e5c62b42d025557765006515156691a6a8b1Chris Lattner 195845e5c62b42d025557765006515156691a6a8b1Chris Lattner/// FindFirstNonCommonLetter - Find the first character in the keys of the 205845e5c62b42d025557765006515156691a6a8b1Chris Lattner/// string pairs that is not shared across the whole set of strings. All 215845e5c62b42d025557765006515156691a6a8b1Chris Lattner/// strings are assumed to have the same length. 225845e5c62b42d025557765006515156691a6a8b1Chris Lattnerstatic unsigned 235845e5c62b42d025557765006515156691a6a8b1Chris LattnerFindFirstNonCommonLetter(const std::vector<const 245845e5c62b42d025557765006515156691a6a8b1Chris Lattner StringMatcher::StringPair*> &Matches) { 255845e5c62b42d025557765006515156691a6a8b1Chris Lattner assert(!Matches.empty()); 265845e5c62b42d025557765006515156691a6a8b1Chris Lattner for (unsigned i = 0, e = Matches[0]->first.size(); i != e; ++i) { 275845e5c62b42d025557765006515156691a6a8b1Chris Lattner // Check to see if letter i is the same across the set. 285845e5c62b42d025557765006515156691a6a8b1Chris Lattner char Letter = Matches[0]->first[i]; 295845e5c62b42d025557765006515156691a6a8b1Chris Lattner 305845e5c62b42d025557765006515156691a6a8b1Chris Lattner for (unsigned str = 0, e = Matches.size(); str != e; ++str) 315845e5c62b42d025557765006515156691a6a8b1Chris Lattner if (Matches[str]->first[i] != Letter) 325845e5c62b42d025557765006515156691a6a8b1Chris Lattner return i; 335845e5c62b42d025557765006515156691a6a8b1Chris Lattner } 345845e5c62b42d025557765006515156691a6a8b1Chris Lattner 355845e5c62b42d025557765006515156691a6a8b1Chris Lattner return Matches[0]->first.size(); 365845e5c62b42d025557765006515156691a6a8b1Chris Lattner} 375845e5c62b42d025557765006515156691a6a8b1Chris Lattner 385845e5c62b42d025557765006515156691a6a8b1Chris Lattner/// EmitStringMatcherForChar - Given a set of strings that are known to be the 395845e5c62b42d025557765006515156691a6a8b1Chris Lattner/// same length and whose characters leading up to CharNo are the same, emit 405845e5c62b42d025557765006515156691a6a8b1Chris Lattner/// code to verify that CharNo and later are the same. 415845e5c62b42d025557765006515156691a6a8b1Chris Lattner/// 425845e5c62b42d025557765006515156691a6a8b1Chris Lattner/// \return - True if control can leave the emitted code fragment. 435845e5c62b42d025557765006515156691a6a8b1Chris Lattnerbool StringMatcher:: 445845e5c62b42d025557765006515156691a6a8b1Chris LattnerEmitStringMatcherForChar(const std::vector<const StringPair*> &Matches, 455845e5c62b42d025557765006515156691a6a8b1Chris Lattner unsigned CharNo, unsigned IndentCount) const { 465845e5c62b42d025557765006515156691a6a8b1Chris Lattner assert(!Matches.empty() && "Must have at least one string to match!"); 475845e5c62b42d025557765006515156691a6a8b1Chris Lattner std::string Indent(IndentCount*2+4, ' '); 485845e5c62b42d025557765006515156691a6a8b1Chris Lattner 495845e5c62b42d025557765006515156691a6a8b1Chris Lattner // If we have verified that the entire string matches, we're done: output the 505845e5c62b42d025557765006515156691a6a8b1Chris Lattner // matching code. 515845e5c62b42d025557765006515156691a6a8b1Chris Lattner if (CharNo == Matches[0]->first.size()) { 525845e5c62b42d025557765006515156691a6a8b1Chris Lattner assert(Matches.size() == 1 && "Had duplicate keys to match on"); 535845e5c62b42d025557765006515156691a6a8b1Chris Lattner 54d7e409da6db6e1d2d9e44dd8de5303c5cfd5bd29Chris Lattner // If the to-execute code has \n's in it, indent each subsequent line. 55d7e409da6db6e1d2d9e44dd8de5303c5cfd5bd29Chris Lattner StringRef Code = Matches[0]->second; 56d7e409da6db6e1d2d9e44dd8de5303c5cfd5bd29Chris Lattner 57d7e409da6db6e1d2d9e44dd8de5303c5cfd5bd29Chris Lattner std::pair<StringRef, StringRef> Split = Code.split('\n'); 58d7e409da6db6e1d2d9e44dd8de5303c5cfd5bd29Chris Lattner OS << Indent << Split.first << "\t // \"" << Matches[0]->first << "\"\n"; 59d7e409da6db6e1d2d9e44dd8de5303c5cfd5bd29Chris Lattner 60d7e409da6db6e1d2d9e44dd8de5303c5cfd5bd29Chris Lattner Code = Split.second; 61d7e409da6db6e1d2d9e44dd8de5303c5cfd5bd29Chris Lattner while (!Code.empty()) { 62d7e409da6db6e1d2d9e44dd8de5303c5cfd5bd29Chris Lattner Split = Code.split('\n'); 63d7e409da6db6e1d2d9e44dd8de5303c5cfd5bd29Chris Lattner OS << Indent << Split.first << "\n"; 64d7e409da6db6e1d2d9e44dd8de5303c5cfd5bd29Chris Lattner Code = Split.second; 65d7e409da6db6e1d2d9e44dd8de5303c5cfd5bd29Chris Lattner } 665845e5c62b42d025557765006515156691a6a8b1Chris Lattner return false; 675845e5c62b42d025557765006515156691a6a8b1Chris Lattner } 685845e5c62b42d025557765006515156691a6a8b1Chris Lattner 695845e5c62b42d025557765006515156691a6a8b1Chris Lattner // Bucket the matches by the character we are comparing. 705845e5c62b42d025557765006515156691a6a8b1Chris Lattner std::map<char, std::vector<const StringPair*> > MatchesByLetter; 715845e5c62b42d025557765006515156691a6a8b1Chris Lattner 725845e5c62b42d025557765006515156691a6a8b1Chris Lattner for (unsigned i = 0, e = Matches.size(); i != e; ++i) 735845e5c62b42d025557765006515156691a6a8b1Chris Lattner MatchesByLetter[Matches[i]->first[CharNo]].push_back(Matches[i]); 745845e5c62b42d025557765006515156691a6a8b1Chris Lattner 755845e5c62b42d025557765006515156691a6a8b1Chris Lattner 765845e5c62b42d025557765006515156691a6a8b1Chris Lattner // If we have exactly one bucket to match, see how many characters are common 775845e5c62b42d025557765006515156691a6a8b1Chris Lattner // across the whole set and match all of them at once. 785845e5c62b42d025557765006515156691a6a8b1Chris Lattner if (MatchesByLetter.size() == 1) { 795845e5c62b42d025557765006515156691a6a8b1Chris Lattner unsigned FirstNonCommonLetter = FindFirstNonCommonLetter(Matches); 805845e5c62b42d025557765006515156691a6a8b1Chris Lattner unsigned NumChars = FirstNonCommonLetter-CharNo; 815845e5c62b42d025557765006515156691a6a8b1Chris Lattner 825845e5c62b42d025557765006515156691a6a8b1Chris Lattner // Emit code to break out if the prefix doesn't match. 835845e5c62b42d025557765006515156691a6a8b1Chris Lattner if (NumChars == 1) { 845845e5c62b42d025557765006515156691a6a8b1Chris Lattner // Do the comparison with if (Str[1] != 'f') 855845e5c62b42d025557765006515156691a6a8b1Chris Lattner // FIXME: Need to escape general characters. 865845e5c62b42d025557765006515156691a6a8b1Chris Lattner OS << Indent << "if (" << StrVariableName << "[" << CharNo << "] != '" 875845e5c62b42d025557765006515156691a6a8b1Chris Lattner << Matches[0]->first[CharNo] << "')\n"; 885845e5c62b42d025557765006515156691a6a8b1Chris Lattner OS << Indent << " break;\n"; 895845e5c62b42d025557765006515156691a6a8b1Chris Lattner } else { 905875c33746a86c4596b3039768889680ff4f5752Benjamin Kramer // Do the comparison with if memcmp(Str.data()+1, "foo", 3). 915845e5c62b42d025557765006515156691a6a8b1Chris Lattner // FIXME: Need to escape general strings. 925875c33746a86c4596b3039768889680ff4f5752Benjamin Kramer OS << Indent << "if (memcmp(" << StrVariableName << ".data()+" << CharNo 935875c33746a86c4596b3039768889680ff4f5752Benjamin Kramer << ", \"" << Matches[0]->first.substr(CharNo, NumChars) << "\", " 945875c33746a86c4596b3039768889680ff4f5752Benjamin Kramer << NumChars << "))\n"; 955845e5c62b42d025557765006515156691a6a8b1Chris Lattner OS << Indent << " break;\n"; 965845e5c62b42d025557765006515156691a6a8b1Chris Lattner } 975845e5c62b42d025557765006515156691a6a8b1Chris Lattner 985845e5c62b42d025557765006515156691a6a8b1Chris Lattner return EmitStringMatcherForChar(Matches, FirstNonCommonLetter, IndentCount); 995845e5c62b42d025557765006515156691a6a8b1Chris Lattner } 1005845e5c62b42d025557765006515156691a6a8b1Chris Lattner 1015845e5c62b42d025557765006515156691a6a8b1Chris Lattner // Otherwise, we have multiple possible things, emit a switch on the 1025845e5c62b42d025557765006515156691a6a8b1Chris Lattner // character. 1035845e5c62b42d025557765006515156691a6a8b1Chris Lattner OS << Indent << "switch (" << StrVariableName << "[" << CharNo << "]) {\n"; 1045845e5c62b42d025557765006515156691a6a8b1Chris Lattner OS << Indent << "default: break;\n"; 1055845e5c62b42d025557765006515156691a6a8b1Chris Lattner 1065845e5c62b42d025557765006515156691a6a8b1Chris Lattner for (std::map<char, std::vector<const StringPair*> >::iterator LI = 1075845e5c62b42d025557765006515156691a6a8b1Chris Lattner MatchesByLetter.begin(), E = MatchesByLetter.end(); LI != E; ++LI) { 1085845e5c62b42d025557765006515156691a6a8b1Chris Lattner // TODO: escape hard stuff (like \n) if we ever care about it. 1095845e5c62b42d025557765006515156691a6a8b1Chris Lattner OS << Indent << "case '" << LI->first << "':\t // " 1106d7c307a088664c007a0b7e801d2376cd15a9da0Chris Lattner << LI->second.size() << " string"; 1116d7c307a088664c007a0b7e801d2376cd15a9da0Chris Lattner if (LI->second.size() != 1) OS << 's'; 1126d7c307a088664c007a0b7e801d2376cd15a9da0Chris Lattner OS << " to match.\n"; 1135845e5c62b42d025557765006515156691a6a8b1Chris Lattner if (EmitStringMatcherForChar(LI->second, CharNo+1, IndentCount+1)) 1145845e5c62b42d025557765006515156691a6a8b1Chris Lattner OS << Indent << " break;\n"; 1155845e5c62b42d025557765006515156691a6a8b1Chris Lattner } 1165845e5c62b42d025557765006515156691a6a8b1Chris Lattner 1175845e5c62b42d025557765006515156691a6a8b1Chris Lattner OS << Indent << "}\n"; 1185845e5c62b42d025557765006515156691a6a8b1Chris Lattner return true; 1195845e5c62b42d025557765006515156691a6a8b1Chris Lattner} 1205845e5c62b42d025557765006515156691a6a8b1Chris Lattner 1215845e5c62b42d025557765006515156691a6a8b1Chris Lattner 1225845e5c62b42d025557765006515156691a6a8b1Chris Lattner/// Emit - Top level entry point. 1235845e5c62b42d025557765006515156691a6a8b1Chris Lattner/// 124902edf21668f48a279389bc3a585bcce40487c56Chris Lattnervoid StringMatcher::Emit(unsigned Indent) const { 125902edf21668f48a279389bc3a585bcce40487c56Chris Lattner // If nothing to match, just fall through. 126902edf21668f48a279389bc3a585bcce40487c56Chris Lattner if (Matches.empty()) return; 127902edf21668f48a279389bc3a585bcce40487c56Chris Lattner 1285845e5c62b42d025557765006515156691a6a8b1Chris Lattner // First level categorization: group strings by length. 1295845e5c62b42d025557765006515156691a6a8b1Chris Lattner std::map<unsigned, std::vector<const StringPair*> > MatchesByLength; 1305845e5c62b42d025557765006515156691a6a8b1Chris Lattner 1315845e5c62b42d025557765006515156691a6a8b1Chris Lattner for (unsigned i = 0, e = Matches.size(); i != e; ++i) 1325845e5c62b42d025557765006515156691a6a8b1Chris Lattner MatchesByLength[Matches[i].first.size()].push_back(&Matches[i]); 1335845e5c62b42d025557765006515156691a6a8b1Chris Lattner 1345845e5c62b42d025557765006515156691a6a8b1Chris Lattner // Output a switch statement on length and categorize the elements within each 1355845e5c62b42d025557765006515156691a6a8b1Chris Lattner // bin. 136902edf21668f48a279389bc3a585bcce40487c56Chris Lattner OS.indent(Indent*2+2) << "switch (" << StrVariableName << ".size()) {\n"; 137902edf21668f48a279389bc3a585bcce40487c56Chris Lattner OS.indent(Indent*2+2) << "default: break;\n"; 1385845e5c62b42d025557765006515156691a6a8b1Chris Lattner 1395845e5c62b42d025557765006515156691a6a8b1Chris Lattner for (std::map<unsigned, std::vector<const StringPair*> >::iterator LI = 1405845e5c62b42d025557765006515156691a6a8b1Chris Lattner MatchesByLength.begin(), E = MatchesByLength.end(); LI != E; ++LI) { 141902edf21668f48a279389bc3a585bcce40487c56Chris Lattner OS.indent(Indent*2+2) << "case " << LI->first << ":\t // " 142902edf21668f48a279389bc3a585bcce40487c56Chris Lattner << LI->second.size() 1438e4fdef6ccb12deeb22e75b666094e1154f721a6Chris Lattner << " string" << (LI->second.size() == 1 ? "" : "s") << " to match.\n"; 144902edf21668f48a279389bc3a585bcce40487c56Chris Lattner if (EmitStringMatcherForChar(LI->second, 0, Indent)) 145902edf21668f48a279389bc3a585bcce40487c56Chris Lattner OS.indent(Indent*2+4) << "break;\n"; 1465845e5c62b42d025557765006515156691a6a8b1Chris Lattner } 1475845e5c62b42d025557765006515156691a6a8b1Chris Lattner 148902edf21668f48a279389bc3a585bcce40487c56Chris Lattner OS.indent(Indent*2+2) << "}\n"; 1495845e5c62b42d025557765006515156691a6a8b1Chris Lattner} 150