1//===- FileCheck.cpp - Check that File's Contents match what is expected --===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// FileCheck does a line-by line check of a file that validates whether it 11// contains the expected content. This is useful for regression tests etc. 12// 13// This program exits with an error status of 2 on error, exit status of 0 if 14// the file matched the expected contents, and exit status of 1 if it did not 15// contain the expected contents. 16// 17//===----------------------------------------------------------------------===// 18 19#include "llvm/ADT/SmallString.h" 20#include "llvm/ADT/StringExtras.h" 21#include "llvm/ADT/StringMap.h" 22#include "llvm/ADT/StringSet.h" 23#include "llvm/Support/CommandLine.h" 24#include "llvm/Support/MemoryBuffer.h" 25#include "llvm/Support/PrettyStackTrace.h" 26#include "llvm/Support/Regex.h" 27#include "llvm/Support/Signals.h" 28#include "llvm/Support/SourceMgr.h" 29#include "llvm/Support/raw_ostream.h" 30#include <algorithm> 31#include <cctype> 32#include <map> 33#include <string> 34#include <system_error> 35#include <vector> 36using namespace llvm; 37 38static cl::opt<std::string> 39CheckFilename(cl::Positional, cl::desc("<check-file>"), cl::Required); 40 41static cl::opt<std::string> 42InputFilename("input-file", cl::desc("File to check (defaults to stdin)"), 43 cl::init("-"), cl::value_desc("filename")); 44 45static cl::list<std::string> 46CheckPrefixes("check-prefix", 47 cl::desc("Prefix to use from check file (defaults to 'CHECK')")); 48static cl::alias CheckPrefixesAlias( 49 "check-prefixes", cl::aliasopt(CheckPrefixes), cl::CommaSeparated, 50 cl::NotHidden, 51 cl::desc( 52 "Alias for -check-prefix permitting multiple comma separated values")); 53 54static cl::opt<bool> 55NoCanonicalizeWhiteSpace("strict-whitespace", 56 cl::desc("Do not treat all horizontal whitespace as equivalent")); 57 58static cl::list<std::string> ImplicitCheckNot( 59 "implicit-check-not", 60 cl::desc("Add an implicit negative check with this pattern to every\n" 61 "positive check. This can be used to ensure that no instances of\n" 62 "this pattern occur which are not matched by a positive pattern"), 63 cl::value_desc("pattern")); 64 65static cl::opt<bool> AllowEmptyInput( 66 "allow-empty", cl::init(false), 67 cl::desc("Allow the input file to be empty. This is useful when making\n" 68 "checks that some error message does not occur, for example.")); 69 70static cl::opt<bool> MatchFullLines( 71 "match-full-lines", cl::init(false), 72 cl::desc("Require all positive matches to cover an entire input line.\n" 73 "Allows leading and trailing whitespace if --strict-whitespace\n" 74 "is not also passed.")); 75 76typedef cl::list<std::string>::const_iterator prefix_iterator; 77 78//===----------------------------------------------------------------------===// 79// Pattern Handling Code. 80//===----------------------------------------------------------------------===// 81 82namespace Check { 83 enum CheckType { 84 CheckNone = 0, 85 CheckPlain, 86 CheckNext, 87 CheckSame, 88 CheckNot, 89 CheckDAG, 90 CheckLabel, 91 92 /// MatchEOF - When set, this pattern only matches the end of file. This is 93 /// used for trailing CHECK-NOTs. 94 CheckEOF, 95 /// CheckBadNot - Found -NOT combined with another CHECK suffix. 96 CheckBadNot 97 }; 98} 99 100class Pattern { 101 SMLoc PatternLoc; 102 103 Check::CheckType CheckTy; 104 105 /// FixedStr - If non-empty, this pattern is a fixed string match with the 106 /// specified fixed string. 107 StringRef FixedStr; 108 109 /// RegEx - If non-empty, this is a regex pattern. 110 std::string RegExStr; 111 112 /// \brief Contains the number of line this pattern is in. 113 unsigned LineNumber; 114 115 /// VariableUses - Entries in this vector map to uses of a variable in the 116 /// pattern, e.g. "foo[[bar]]baz". In this case, the RegExStr will contain 117 /// "foobaz" and we'll get an entry in this vector that tells us to insert the 118 /// value of bar at offset 3. 119 std::vector<std::pair<StringRef, unsigned> > VariableUses; 120 121 /// VariableDefs - Maps definitions of variables to their parenthesized 122 /// capture numbers. 123 /// E.g. for the pattern "foo[[bar:.*]]baz", VariableDefs will map "bar" to 1. 124 std::map<StringRef, unsigned> VariableDefs; 125 126public: 127 128 Pattern(Check::CheckType Ty) 129 : CheckTy(Ty) { } 130 131 /// getLoc - Return the location in source code. 132 SMLoc getLoc() const { return PatternLoc; } 133 134 /// ParsePattern - Parse the given string into the Pattern. Prefix provides 135 /// which prefix is being matched, SM provides the SourceMgr used for error 136 /// reports, and LineNumber is the line number in the input file from which 137 /// the pattern string was read. Returns true in case of an error, false 138 /// otherwise. 139 bool ParsePattern(StringRef PatternStr, 140 StringRef Prefix, 141 SourceMgr &SM, 142 unsigned LineNumber); 143 144 /// Match - Match the pattern string against the input buffer Buffer. This 145 /// returns the position that is matched or npos if there is no match. If 146 /// there is a match, the size of the matched string is returned in MatchLen. 147 /// 148 /// The VariableTable StringMap provides the current values of filecheck 149 /// variables and is updated if this match defines new values. 150 size_t Match(StringRef Buffer, size_t &MatchLen, 151 StringMap<StringRef> &VariableTable) const; 152 153 /// PrintFailureInfo - Print additional information about a failure to match 154 /// involving this pattern. 155 void PrintFailureInfo(const SourceMgr &SM, StringRef Buffer, 156 const StringMap<StringRef> &VariableTable) const; 157 158 bool hasVariable() const { return !(VariableUses.empty() && 159 VariableDefs.empty()); } 160 161 Check::CheckType getCheckTy() const { return CheckTy; } 162 163private: 164 bool AddRegExToRegEx(StringRef RS, unsigned &CurParen, SourceMgr &SM); 165 void AddBackrefToRegEx(unsigned BackrefNum); 166 167 /// ComputeMatchDistance - Compute an arbitrary estimate for the quality of 168 /// matching this pattern at the start of \arg Buffer; a distance of zero 169 /// should correspond to a perfect match. 170 unsigned ComputeMatchDistance(StringRef Buffer, 171 const StringMap<StringRef> &VariableTable) const; 172 173 /// \brief Evaluates expression and stores the result to \p Value. 174 /// \return true on success. false when the expression has invalid syntax. 175 bool EvaluateExpression(StringRef Expr, std::string &Value) const; 176 177 /// \brief Finds the closing sequence of a regex variable usage or 178 /// definition. Str has to point in the beginning of the definition 179 /// (right after the opening sequence). 180 /// \return offset of the closing sequence within Str, or npos if it was not 181 /// found. 182 size_t FindRegexVarEnd(StringRef Str, SourceMgr &SM); 183}; 184 185 186bool Pattern::ParsePattern(StringRef PatternStr, 187 StringRef Prefix, 188 SourceMgr &SM, 189 unsigned LineNumber) { 190 bool MatchFullLinesHere = MatchFullLines && CheckTy != Check::CheckNot; 191 192 this->LineNumber = LineNumber; 193 PatternLoc = SMLoc::getFromPointer(PatternStr.data()); 194 195 // Ignore trailing whitespace. 196 while (!PatternStr.empty() && 197 (PatternStr.back() == ' ' || PatternStr.back() == '\t')) 198 PatternStr = PatternStr.substr(0, PatternStr.size()-1); 199 200 // Check that there is something on the line. 201 if (PatternStr.empty()) { 202 SM.PrintMessage(PatternLoc, SourceMgr::DK_Error, 203 "found empty check string with prefix '" + 204 Prefix + ":'"); 205 return true; 206 } 207 208 // Check to see if this is a fixed string, or if it has regex pieces. 209 if (!MatchFullLinesHere && 210 (PatternStr.size() < 2 || (PatternStr.find("{{") == StringRef::npos && 211 PatternStr.find("[[") == StringRef::npos))) { 212 FixedStr = PatternStr; 213 return false; 214 } 215 216 if (MatchFullLinesHere) { 217 RegExStr += '^'; 218 if (!NoCanonicalizeWhiteSpace) 219 RegExStr += " *"; 220 } 221 222 // Paren value #0 is for the fully matched string. Any new parenthesized 223 // values add from there. 224 unsigned CurParen = 1; 225 226 // Otherwise, there is at least one regex piece. Build up the regex pattern 227 // by escaping scary characters in fixed strings, building up one big regex. 228 while (!PatternStr.empty()) { 229 // RegEx matches. 230 if (PatternStr.startswith("{{")) { 231 // This is the start of a regex match. Scan for the }}. 232 size_t End = PatternStr.find("}}"); 233 if (End == StringRef::npos) { 234 SM.PrintMessage(SMLoc::getFromPointer(PatternStr.data()), 235 SourceMgr::DK_Error, 236 "found start of regex string with no end '}}'"); 237 return true; 238 } 239 240 // Enclose {{}} patterns in parens just like [[]] even though we're not 241 // capturing the result for any purpose. This is required in case the 242 // expression contains an alternation like: CHECK: abc{{x|z}}def. We 243 // want this to turn into: "abc(x|z)def" not "abcx|zdef". 244 RegExStr += '('; 245 ++CurParen; 246 247 if (AddRegExToRegEx(PatternStr.substr(2, End-2), CurParen, SM)) 248 return true; 249 RegExStr += ')'; 250 251 PatternStr = PatternStr.substr(End+2); 252 continue; 253 } 254 255 // Named RegEx matches. These are of two forms: [[foo:.*]] which matches .* 256 // (or some other regex) and assigns it to the FileCheck variable 'foo'. The 257 // second form is [[foo]] which is a reference to foo. The variable name 258 // itself must be of the form "[a-zA-Z_][0-9a-zA-Z_]*", otherwise we reject 259 // it. This is to catch some common errors. 260 if (PatternStr.startswith("[[")) { 261 // Find the closing bracket pair ending the match. End is going to be an 262 // offset relative to the beginning of the match string. 263 size_t End = FindRegexVarEnd(PatternStr.substr(2), SM); 264 265 if (End == StringRef::npos) { 266 SM.PrintMessage(SMLoc::getFromPointer(PatternStr.data()), 267 SourceMgr::DK_Error, 268 "invalid named regex reference, no ]] found"); 269 return true; 270 } 271 272 StringRef MatchStr = PatternStr.substr(2, End); 273 PatternStr = PatternStr.substr(End+4); 274 275 // Get the regex name (e.g. "foo"). 276 size_t NameEnd = MatchStr.find(':'); 277 StringRef Name = MatchStr.substr(0, NameEnd); 278 279 if (Name.empty()) { 280 SM.PrintMessage(SMLoc::getFromPointer(Name.data()), SourceMgr::DK_Error, 281 "invalid name in named regex: empty name"); 282 return true; 283 } 284 285 // Verify that the name/expression is well formed. FileCheck currently 286 // supports @LINE, @LINE+number, @LINE-number expressions. The check here 287 // is relaxed, more strict check is performed in \c EvaluateExpression. 288 bool IsExpression = false; 289 for (unsigned i = 0, e = Name.size(); i != e; ++i) { 290 if (i == 0 && Name[i] == '@') { 291 if (NameEnd != StringRef::npos) { 292 SM.PrintMessage(SMLoc::getFromPointer(Name.data()), 293 SourceMgr::DK_Error, 294 "invalid name in named regex definition"); 295 return true; 296 } 297 IsExpression = true; 298 continue; 299 } 300 if (Name[i] != '_' && !isalnum(Name[i]) && 301 (!IsExpression || (Name[i] != '+' && Name[i] != '-'))) { 302 SM.PrintMessage(SMLoc::getFromPointer(Name.data()+i), 303 SourceMgr::DK_Error, "invalid name in named regex"); 304 return true; 305 } 306 } 307 308 // Name can't start with a digit. 309 if (isdigit(static_cast<unsigned char>(Name[0]))) { 310 SM.PrintMessage(SMLoc::getFromPointer(Name.data()), SourceMgr::DK_Error, 311 "invalid name in named regex"); 312 return true; 313 } 314 315 // Handle [[foo]]. 316 if (NameEnd == StringRef::npos) { 317 // Handle variables that were defined earlier on the same line by 318 // emitting a backreference. 319 if (VariableDefs.find(Name) != VariableDefs.end()) { 320 unsigned VarParenNum = VariableDefs[Name]; 321 if (VarParenNum < 1 || VarParenNum > 9) { 322 SM.PrintMessage(SMLoc::getFromPointer(Name.data()), 323 SourceMgr::DK_Error, 324 "Can't back-reference more than 9 variables"); 325 return true; 326 } 327 AddBackrefToRegEx(VarParenNum); 328 } else { 329 VariableUses.push_back(std::make_pair(Name, RegExStr.size())); 330 } 331 continue; 332 } 333 334 // Handle [[foo:.*]]. 335 VariableDefs[Name] = CurParen; 336 RegExStr += '('; 337 ++CurParen; 338 339 if (AddRegExToRegEx(MatchStr.substr(NameEnd+1), CurParen, SM)) 340 return true; 341 342 RegExStr += ')'; 343 } 344 345 // Handle fixed string matches. 346 // Find the end, which is the start of the next regex. 347 size_t FixedMatchEnd = PatternStr.find("{{"); 348 FixedMatchEnd = std::min(FixedMatchEnd, PatternStr.find("[[")); 349 RegExStr += Regex::escape(PatternStr.substr(0, FixedMatchEnd)); 350 PatternStr = PatternStr.substr(FixedMatchEnd); 351 } 352 353 if (MatchFullLinesHere) { 354 if (!NoCanonicalizeWhiteSpace) 355 RegExStr += " *"; 356 RegExStr += '$'; 357 } 358 359 return false; 360} 361 362bool Pattern::AddRegExToRegEx(StringRef RS, unsigned &CurParen, 363 SourceMgr &SM) { 364 Regex R(RS); 365 std::string Error; 366 if (!R.isValid(Error)) { 367 SM.PrintMessage(SMLoc::getFromPointer(RS.data()), SourceMgr::DK_Error, 368 "invalid regex: " + Error); 369 return true; 370 } 371 372 RegExStr += RS.str(); 373 CurParen += R.getNumMatches(); 374 return false; 375} 376 377void Pattern::AddBackrefToRegEx(unsigned BackrefNum) { 378 assert(BackrefNum >= 1 && BackrefNum <= 9 && "Invalid backref number"); 379 std::string Backref = std::string("\\") + 380 std::string(1, '0' + BackrefNum); 381 RegExStr += Backref; 382} 383 384bool Pattern::EvaluateExpression(StringRef Expr, std::string &Value) const { 385 // The only supported expression is @LINE([\+-]\d+)? 386 if (!Expr.startswith("@LINE")) 387 return false; 388 Expr = Expr.substr(StringRef("@LINE").size()); 389 int Offset = 0; 390 if (!Expr.empty()) { 391 if (Expr[0] == '+') 392 Expr = Expr.substr(1); 393 else if (Expr[0] != '-') 394 return false; 395 if (Expr.getAsInteger(10, Offset)) 396 return false; 397 } 398 Value = llvm::itostr(LineNumber + Offset); 399 return true; 400} 401 402/// Match - Match the pattern string against the input buffer Buffer. This 403/// returns the position that is matched or npos if there is no match. If 404/// there is a match, the size of the matched string is returned in MatchLen. 405size_t Pattern::Match(StringRef Buffer, size_t &MatchLen, 406 StringMap<StringRef> &VariableTable) const { 407 // If this is the EOF pattern, match it immediately. 408 if (CheckTy == Check::CheckEOF) { 409 MatchLen = 0; 410 return Buffer.size(); 411 } 412 413 // If this is a fixed string pattern, just match it now. 414 if (!FixedStr.empty()) { 415 MatchLen = FixedStr.size(); 416 return Buffer.find(FixedStr); 417 } 418 419 // Regex match. 420 421 // If there are variable uses, we need to create a temporary string with the 422 // actual value. 423 StringRef RegExToMatch = RegExStr; 424 std::string TmpStr; 425 if (!VariableUses.empty()) { 426 TmpStr = RegExStr; 427 428 unsigned InsertOffset = 0; 429 for (const auto &VariableUse : VariableUses) { 430 std::string Value; 431 432 if (VariableUse.first[0] == '@') { 433 if (!EvaluateExpression(VariableUse.first, Value)) 434 return StringRef::npos; 435 } else { 436 StringMap<StringRef>::iterator it = 437 VariableTable.find(VariableUse.first); 438 // If the variable is undefined, return an error. 439 if (it == VariableTable.end()) 440 return StringRef::npos; 441 442 // Look up the value and escape it so that we can put it into the regex. 443 Value += Regex::escape(it->second); 444 } 445 446 // Plop it into the regex at the adjusted offset. 447 TmpStr.insert(TmpStr.begin() + VariableUse.second + InsertOffset, 448 Value.begin(), Value.end()); 449 InsertOffset += Value.size(); 450 } 451 452 // Match the newly constructed regex. 453 RegExToMatch = TmpStr; 454 } 455 456 457 SmallVector<StringRef, 4> MatchInfo; 458 if (!Regex(RegExToMatch, Regex::Newline).match(Buffer, &MatchInfo)) 459 return StringRef::npos; 460 461 // Successful regex match. 462 assert(!MatchInfo.empty() && "Didn't get any match"); 463 StringRef FullMatch = MatchInfo[0]; 464 465 // If this defines any variables, remember their values. 466 for (const auto &VariableDef : VariableDefs) { 467 assert(VariableDef.second < MatchInfo.size() && "Internal paren error"); 468 VariableTable[VariableDef.first] = MatchInfo[VariableDef.second]; 469 } 470 471 MatchLen = FullMatch.size(); 472 return FullMatch.data()-Buffer.data(); 473} 474 475unsigned Pattern::ComputeMatchDistance(StringRef Buffer, 476 const StringMap<StringRef> &VariableTable) const { 477 // Just compute the number of matching characters. For regular expressions, we 478 // just compare against the regex itself and hope for the best. 479 // 480 // FIXME: One easy improvement here is have the regex lib generate a single 481 // example regular expression which matches, and use that as the example 482 // string. 483 StringRef ExampleString(FixedStr); 484 if (ExampleString.empty()) 485 ExampleString = RegExStr; 486 487 // Only compare up to the first line in the buffer, or the string size. 488 StringRef BufferPrefix = Buffer.substr(0, ExampleString.size()); 489 BufferPrefix = BufferPrefix.split('\n').first; 490 return BufferPrefix.edit_distance(ExampleString); 491} 492 493void Pattern::PrintFailureInfo(const SourceMgr &SM, StringRef Buffer, 494 const StringMap<StringRef> &VariableTable) const{ 495 // If this was a regular expression using variables, print the current 496 // variable values. 497 if (!VariableUses.empty()) { 498 for (const auto &VariableUse : VariableUses) { 499 SmallString<256> Msg; 500 raw_svector_ostream OS(Msg); 501 StringRef Var = VariableUse.first; 502 if (Var[0] == '@') { 503 std::string Value; 504 if (EvaluateExpression(Var, Value)) { 505 OS << "with expression \""; 506 OS.write_escaped(Var) << "\" equal to \""; 507 OS.write_escaped(Value) << "\""; 508 } else { 509 OS << "uses incorrect expression \""; 510 OS.write_escaped(Var) << "\""; 511 } 512 } else { 513 StringMap<StringRef>::const_iterator it = VariableTable.find(Var); 514 515 // Check for undefined variable references. 516 if (it == VariableTable.end()) { 517 OS << "uses undefined variable \""; 518 OS.write_escaped(Var) << "\""; 519 } else { 520 OS << "with variable \""; 521 OS.write_escaped(Var) << "\" equal to \""; 522 OS.write_escaped(it->second) << "\""; 523 } 524 } 525 526 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note, 527 OS.str()); 528 } 529 } 530 531 // Attempt to find the closest/best fuzzy match. Usually an error happens 532 // because some string in the output didn't exactly match. In these cases, we 533 // would like to show the user a best guess at what "should have" matched, to 534 // save them having to actually check the input manually. 535 size_t NumLinesForward = 0; 536 size_t Best = StringRef::npos; 537 double BestQuality = 0; 538 539 // Use an arbitrary 4k limit on how far we will search. 540 for (size_t i = 0, e = std::min(size_t(4096), Buffer.size()); i != e; ++i) { 541 if (Buffer[i] == '\n') 542 ++NumLinesForward; 543 544 // Patterns have leading whitespace stripped, so skip whitespace when 545 // looking for something which looks like a pattern. 546 if (Buffer[i] == ' ' || Buffer[i] == '\t') 547 continue; 548 549 // Compute the "quality" of this match as an arbitrary combination of the 550 // match distance and the number of lines skipped to get to this match. 551 unsigned Distance = ComputeMatchDistance(Buffer.substr(i), VariableTable); 552 double Quality = Distance + (NumLinesForward / 100.); 553 554 if (Quality < BestQuality || Best == StringRef::npos) { 555 Best = i; 556 BestQuality = Quality; 557 } 558 } 559 560 // Print the "possible intended match here" line if we found something 561 // reasonable and not equal to what we showed in the "scanning from here" 562 // line. 563 if (Best && Best != StringRef::npos && BestQuality < 50) { 564 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data() + Best), 565 SourceMgr::DK_Note, "possible intended match here"); 566 567 // FIXME: If we wanted to be really friendly we would show why the match 568 // failed, as it can be hard to spot simple one character differences. 569 } 570} 571 572size_t Pattern::FindRegexVarEnd(StringRef Str, SourceMgr &SM) { 573 // Offset keeps track of the current offset within the input Str 574 size_t Offset = 0; 575 // [...] Nesting depth 576 size_t BracketDepth = 0; 577 578 while (!Str.empty()) { 579 if (Str.startswith("]]") && BracketDepth == 0) 580 return Offset; 581 if (Str[0] == '\\') { 582 // Backslash escapes the next char within regexes, so skip them both. 583 Str = Str.substr(2); 584 Offset += 2; 585 } else { 586 switch (Str[0]) { 587 default: 588 break; 589 case '[': 590 BracketDepth++; 591 break; 592 case ']': 593 if (BracketDepth == 0) { 594 SM.PrintMessage(SMLoc::getFromPointer(Str.data()), 595 SourceMgr::DK_Error, 596 "missing closing \"]\" for regex variable"); 597 exit(1); 598 } 599 BracketDepth--; 600 break; 601 } 602 Str = Str.substr(1); 603 Offset++; 604 } 605 } 606 607 return StringRef::npos; 608} 609 610 611//===----------------------------------------------------------------------===// 612// Check Strings. 613//===----------------------------------------------------------------------===// 614 615/// CheckString - This is a check that we found in the input file. 616struct CheckString { 617 /// Pat - The pattern to match. 618 Pattern Pat; 619 620 /// Prefix - Which prefix name this check matched. 621 StringRef Prefix; 622 623 /// Loc - The location in the match file that the check string was specified. 624 SMLoc Loc; 625 626 /// CheckTy - Specify what kind of check this is. e.g. CHECK-NEXT: directive, 627 /// as opposed to a CHECK: directive. 628 // Check::CheckType CheckTy; 629 630 /// DagNotStrings - These are all of the strings that are disallowed from 631 /// occurring between this match string and the previous one (or start of 632 /// file). 633 std::vector<Pattern> DagNotStrings; 634 635 CheckString(const Pattern &P, StringRef S, SMLoc L) 636 : Pat(P), Prefix(S), Loc(L) {} 637 638 /// Check - Match check string and its "not strings" and/or "dag strings". 639 size_t Check(const SourceMgr &SM, StringRef Buffer, bool IsLabelScanMode, 640 size_t &MatchLen, StringMap<StringRef> &VariableTable) const; 641 642 /// CheckNext - Verify there is a single line in the given buffer. 643 bool CheckNext(const SourceMgr &SM, StringRef Buffer) const; 644 645 /// CheckSame - Verify there is no newline in the given buffer. 646 bool CheckSame(const SourceMgr &SM, StringRef Buffer) const; 647 648 /// CheckNot - Verify there's no "not strings" in the given buffer. 649 bool CheckNot(const SourceMgr &SM, StringRef Buffer, 650 const std::vector<const Pattern *> &NotStrings, 651 StringMap<StringRef> &VariableTable) const; 652 653 /// CheckDag - Match "dag strings" and their mixed "not strings". 654 size_t CheckDag(const SourceMgr &SM, StringRef Buffer, 655 std::vector<const Pattern *> &NotStrings, 656 StringMap<StringRef> &VariableTable) const; 657}; 658 659/// Canonicalize whitespaces in the input file. Line endings are replaced 660/// with UNIX-style '\n'. 661/// 662/// \param PreserveHorizontal Don't squash consecutive horizontal whitespace 663/// characters to a single space. 664static std::unique_ptr<MemoryBuffer> 665CanonicalizeInputFile(std::unique_ptr<MemoryBuffer> MB, 666 bool PreserveHorizontal) { 667 SmallString<128> NewFile; 668 NewFile.reserve(MB->getBufferSize()); 669 670 for (const char *Ptr = MB->getBufferStart(), *End = MB->getBufferEnd(); 671 Ptr != End; ++Ptr) { 672 // Eliminate trailing dosish \r. 673 if (Ptr <= End - 2 && Ptr[0] == '\r' && Ptr[1] == '\n') { 674 continue; 675 } 676 677 // If current char is not a horizontal whitespace or if horizontal 678 // whitespace canonicalization is disabled, dump it to output as is. 679 if (PreserveHorizontal || (*Ptr != ' ' && *Ptr != '\t')) { 680 NewFile.push_back(*Ptr); 681 continue; 682 } 683 684 // Otherwise, add one space and advance over neighboring space. 685 NewFile.push_back(' '); 686 while (Ptr+1 != End && 687 (Ptr[1] == ' ' || Ptr[1] == '\t')) 688 ++Ptr; 689 } 690 691 return std::unique_ptr<MemoryBuffer>( 692 MemoryBuffer::getMemBufferCopy(NewFile.str(), MB->getBufferIdentifier())); 693} 694 695static bool IsPartOfWord(char c) { 696 return (isalnum(c) || c == '-' || c == '_'); 697} 698 699// Get the size of the prefix extension. 700static size_t CheckTypeSize(Check::CheckType Ty) { 701 switch (Ty) { 702 case Check::CheckNone: 703 case Check::CheckBadNot: 704 return 0; 705 706 case Check::CheckPlain: 707 return sizeof(":") - 1; 708 709 case Check::CheckNext: 710 return sizeof("-NEXT:") - 1; 711 712 case Check::CheckSame: 713 return sizeof("-SAME:") - 1; 714 715 case Check::CheckNot: 716 return sizeof("-NOT:") - 1; 717 718 case Check::CheckDAG: 719 return sizeof("-DAG:") - 1; 720 721 case Check::CheckLabel: 722 return sizeof("-LABEL:") - 1; 723 724 case Check::CheckEOF: 725 llvm_unreachable("Should not be using EOF size"); 726 } 727 728 llvm_unreachable("Bad check type"); 729} 730 731static Check::CheckType FindCheckType(StringRef Buffer, StringRef Prefix) { 732 char NextChar = Buffer[Prefix.size()]; 733 734 // Verify that the : is present after the prefix. 735 if (NextChar == ':') 736 return Check::CheckPlain; 737 738 if (NextChar != '-') 739 return Check::CheckNone; 740 741 StringRef Rest = Buffer.drop_front(Prefix.size() + 1); 742 if (Rest.startswith("NEXT:")) 743 return Check::CheckNext; 744 745 if (Rest.startswith("SAME:")) 746 return Check::CheckSame; 747 748 if (Rest.startswith("NOT:")) 749 return Check::CheckNot; 750 751 if (Rest.startswith("DAG:")) 752 return Check::CheckDAG; 753 754 if (Rest.startswith("LABEL:")) 755 return Check::CheckLabel; 756 757 // You can't combine -NOT with another suffix. 758 if (Rest.startswith("DAG-NOT:") || Rest.startswith("NOT-DAG:") || 759 Rest.startswith("NEXT-NOT:") || Rest.startswith("NOT-NEXT:") || 760 Rest.startswith("SAME-NOT:") || Rest.startswith("NOT-SAME:")) 761 return Check::CheckBadNot; 762 763 return Check::CheckNone; 764} 765 766// From the given position, find the next character after the word. 767static size_t SkipWord(StringRef Str, size_t Loc) { 768 while (Loc < Str.size() && IsPartOfWord(Str[Loc])) 769 ++Loc; 770 return Loc; 771} 772 773// Try to find the first match in buffer for any prefix. If a valid match is 774// found, return that prefix and set its type and location. If there are almost 775// matches (e.g. the actual prefix string is found, but is not an actual check 776// string), but no valid match, return an empty string and set the position to 777// resume searching from. If no partial matches are found, return an empty 778// string and the location will be StringRef::npos. If one prefix is a substring 779// of another, the maximal match should be found. e.g. if "A" and "AA" are 780// prefixes then AA-CHECK: should match the second one. 781static StringRef FindFirstCandidateMatch(StringRef &Buffer, 782 Check::CheckType &CheckTy, 783 size_t &CheckLoc) { 784 StringRef FirstPrefix; 785 size_t FirstLoc = StringRef::npos; 786 size_t SearchLoc = StringRef::npos; 787 Check::CheckType FirstTy = Check::CheckNone; 788 789 CheckTy = Check::CheckNone; 790 CheckLoc = StringRef::npos; 791 792 for (StringRef Prefix : CheckPrefixes) { 793 size_t PrefixLoc = Buffer.find(Prefix); 794 795 if (PrefixLoc == StringRef::npos) 796 continue; 797 798 // Track where we are searching for invalid prefixes that look almost right. 799 // We need to only advance to the first partial match on the next attempt 800 // since a partial match could be a substring of a later, valid prefix. 801 // Need to skip to the end of the word, otherwise we could end up 802 // matching a prefix in a substring later. 803 if (PrefixLoc < SearchLoc) 804 SearchLoc = SkipWord(Buffer, PrefixLoc); 805 806 // We only want to find the first match to avoid skipping some. 807 if (PrefixLoc > FirstLoc) 808 continue; 809 // If one matching check-prefix is a prefix of another, choose the 810 // longer one. 811 if (PrefixLoc == FirstLoc && Prefix.size() < FirstPrefix.size()) 812 continue; 813 814 StringRef Rest = Buffer.drop_front(PrefixLoc); 815 // Make sure we have actually found the prefix, and not a word containing 816 // it. This should also prevent matching the wrong prefix when one is a 817 // substring of another. 818 if (PrefixLoc != 0 && IsPartOfWord(Buffer[PrefixLoc - 1])) 819 FirstTy = Check::CheckNone; 820 else 821 FirstTy = FindCheckType(Rest, Prefix); 822 823 FirstLoc = PrefixLoc; 824 FirstPrefix = Prefix; 825 } 826 827 // If the first prefix is invalid, we should continue the search after it. 828 if (FirstTy == Check::CheckNone) { 829 CheckLoc = SearchLoc; 830 return ""; 831 } 832 833 CheckTy = FirstTy; 834 CheckLoc = FirstLoc; 835 return FirstPrefix; 836} 837 838static StringRef FindFirstMatchingPrefix(StringRef &Buffer, 839 unsigned &LineNumber, 840 Check::CheckType &CheckTy, 841 size_t &CheckLoc) { 842 while (!Buffer.empty()) { 843 StringRef Prefix = FindFirstCandidateMatch(Buffer, CheckTy, CheckLoc); 844 // If we found a real match, we are done. 845 if (!Prefix.empty()) { 846 LineNumber += Buffer.substr(0, CheckLoc).count('\n'); 847 return Prefix; 848 } 849 850 // We didn't find any almost matches either, we are also done. 851 if (CheckLoc == StringRef::npos) 852 return StringRef(); 853 854 LineNumber += Buffer.substr(0, CheckLoc + 1).count('\n'); 855 856 // Advance to the last possible match we found and try again. 857 Buffer = Buffer.drop_front(CheckLoc + 1); 858 } 859 860 return StringRef(); 861} 862 863/// ReadCheckFile - Read the check file, which specifies the sequence of 864/// expected strings. The strings are added to the CheckStrings vector. 865/// Returns true in case of an error, false otherwise. 866static bool ReadCheckFile(SourceMgr &SM, 867 std::vector<CheckString> &CheckStrings) { 868 ErrorOr<std::unique_ptr<MemoryBuffer>> FileOrErr = 869 MemoryBuffer::getFileOrSTDIN(CheckFilename); 870 if (std::error_code EC = FileOrErr.getError()) { 871 errs() << "Could not open check file '" << CheckFilename 872 << "': " << EC.message() << '\n'; 873 return true; 874 } 875 876 // If we want to canonicalize whitespace, strip excess whitespace from the 877 // buffer containing the CHECK lines. Remove DOS style line endings. 878 std::unique_ptr<MemoryBuffer> F = CanonicalizeInputFile( 879 std::move(FileOrErr.get()), NoCanonicalizeWhiteSpace); 880 881 // Find all instances of CheckPrefix followed by : in the file. 882 StringRef Buffer = F->getBuffer(); 883 884 SM.AddNewSourceBuffer(std::move(F), SMLoc()); 885 886 std::vector<Pattern> ImplicitNegativeChecks; 887 for (const auto &PatternString : ImplicitCheckNot) { 888 // Create a buffer with fake command line content in order to display the 889 // command line option responsible for the specific implicit CHECK-NOT. 890 std::string Prefix = (Twine("-") + ImplicitCheckNot.ArgStr + "='").str(); 891 std::string Suffix = "'"; 892 std::unique_ptr<MemoryBuffer> CmdLine = MemoryBuffer::getMemBufferCopy( 893 Prefix + PatternString + Suffix, "command line"); 894 895 StringRef PatternInBuffer = 896 CmdLine->getBuffer().substr(Prefix.size(), PatternString.size()); 897 SM.AddNewSourceBuffer(std::move(CmdLine), SMLoc()); 898 899 ImplicitNegativeChecks.push_back(Pattern(Check::CheckNot)); 900 ImplicitNegativeChecks.back().ParsePattern(PatternInBuffer, 901 "IMPLICIT-CHECK", SM, 0); 902 } 903 904 905 std::vector<Pattern> DagNotMatches = ImplicitNegativeChecks; 906 907 // LineNumber keeps track of the line on which CheckPrefix instances are 908 // found. 909 unsigned LineNumber = 1; 910 911 while (1) { 912 Check::CheckType CheckTy; 913 size_t PrefixLoc; 914 915 // See if a prefix occurs in the memory buffer. 916 StringRef UsedPrefix = FindFirstMatchingPrefix(Buffer, 917 LineNumber, 918 CheckTy, 919 PrefixLoc); 920 if (UsedPrefix.empty()) 921 break; 922 923 Buffer = Buffer.drop_front(PrefixLoc); 924 925 // Location to use for error messages. 926 const char *UsedPrefixStart = Buffer.data() + (PrefixLoc == 0 ? 0 : 1); 927 928 // PrefixLoc is to the start of the prefix. Skip to the end. 929 Buffer = Buffer.drop_front(UsedPrefix.size() + CheckTypeSize(CheckTy)); 930 931 // Complain about useful-looking but unsupported suffixes. 932 if (CheckTy == Check::CheckBadNot) { 933 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), 934 SourceMgr::DK_Error, 935 "unsupported -NOT combo on prefix '" + UsedPrefix + "'"); 936 return true; 937 } 938 939 // Okay, we found the prefix, yay. Remember the rest of the line, but ignore 940 // leading and trailing whitespace. 941 Buffer = Buffer.substr(Buffer.find_first_not_of(" \t")); 942 943 // Scan ahead to the end of line. 944 size_t EOL = Buffer.find_first_of("\n\r"); 945 946 // Remember the location of the start of the pattern, for diagnostics. 947 SMLoc PatternLoc = SMLoc::getFromPointer(Buffer.data()); 948 949 // Parse the pattern. 950 Pattern P(CheckTy); 951 if (P.ParsePattern(Buffer.substr(0, EOL), UsedPrefix, SM, LineNumber)) 952 return true; 953 954 // Verify that CHECK-LABEL lines do not define or use variables 955 if ((CheckTy == Check::CheckLabel) && P.hasVariable()) { 956 SM.PrintMessage(SMLoc::getFromPointer(UsedPrefixStart), 957 SourceMgr::DK_Error, 958 "found '" + UsedPrefix + "-LABEL:'" 959 " with variable definition or use"); 960 return true; 961 } 962 963 Buffer = Buffer.substr(EOL); 964 965 // Verify that CHECK-NEXT lines have at least one CHECK line before them. 966 if ((CheckTy == Check::CheckNext || CheckTy == Check::CheckSame) && 967 CheckStrings.empty()) { 968 StringRef Type = CheckTy == Check::CheckNext ? "NEXT" : "SAME"; 969 SM.PrintMessage(SMLoc::getFromPointer(UsedPrefixStart), 970 SourceMgr::DK_Error, 971 "found '" + UsedPrefix + "-" + Type + "' without previous '" 972 + UsedPrefix + ": line"); 973 return true; 974 } 975 976 // Handle CHECK-DAG/-NOT. 977 if (CheckTy == Check::CheckDAG || CheckTy == Check::CheckNot) { 978 DagNotMatches.push_back(P); 979 continue; 980 } 981 982 // Okay, add the string we captured to the output vector and move on. 983 CheckStrings.emplace_back(P, UsedPrefix, PatternLoc); 984 std::swap(DagNotMatches, CheckStrings.back().DagNotStrings); 985 DagNotMatches = ImplicitNegativeChecks; 986 } 987 988 // Add an EOF pattern for any trailing CHECK-DAG/-NOTs, and use the first 989 // prefix as a filler for the error message. 990 if (!DagNotMatches.empty()) { 991 CheckStrings.emplace_back(Pattern(Check::CheckEOF), *CheckPrefixes.begin(), 992 SMLoc::getFromPointer(Buffer.data())); 993 std::swap(DagNotMatches, CheckStrings.back().DagNotStrings); 994 } 995 996 if (CheckStrings.empty()) { 997 errs() << "error: no check strings found with prefix" 998 << (CheckPrefixes.size() > 1 ? "es " : " "); 999 prefix_iterator I = CheckPrefixes.begin(); 1000 prefix_iterator E = CheckPrefixes.end(); 1001 if (I != E) { 1002 errs() << "\'" << *I << ":'"; 1003 ++I; 1004 } 1005 for (; I != E; ++I) 1006 errs() << ", \'" << *I << ":'"; 1007 1008 errs() << '\n'; 1009 return true; 1010 } 1011 1012 return false; 1013} 1014 1015static void PrintCheckFailed(const SourceMgr &SM, SMLoc Loc, 1016 const Pattern &Pat, StringRef Buffer, 1017 StringMap<StringRef> &VariableTable) { 1018 // Otherwise, we have an error, emit an error message. 1019 SM.PrintMessage(Loc, SourceMgr::DK_Error, 1020 "expected string not found in input"); 1021 1022 // Print the "scanning from here" line. If the current position is at the 1023 // end of a line, advance to the start of the next line. 1024 Buffer = Buffer.substr(Buffer.find_first_not_of(" \t\n\r")); 1025 1026 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note, 1027 "scanning from here"); 1028 1029 // Allow the pattern to print additional information if desired. 1030 Pat.PrintFailureInfo(SM, Buffer, VariableTable); 1031} 1032 1033static void PrintCheckFailed(const SourceMgr &SM, const CheckString &CheckStr, 1034 StringRef Buffer, 1035 StringMap<StringRef> &VariableTable) { 1036 PrintCheckFailed(SM, CheckStr.Loc, CheckStr.Pat, Buffer, VariableTable); 1037} 1038 1039/// CountNumNewlinesBetween - Count the number of newlines in the specified 1040/// range. 1041static unsigned CountNumNewlinesBetween(StringRef Range, 1042 const char *&FirstNewLine) { 1043 unsigned NumNewLines = 0; 1044 while (1) { 1045 // Scan for newline. 1046 Range = Range.substr(Range.find_first_of("\n\r")); 1047 if (Range.empty()) return NumNewLines; 1048 1049 ++NumNewLines; 1050 1051 // Handle \n\r and \r\n as a single newline. 1052 if (Range.size() > 1 && 1053 (Range[1] == '\n' || Range[1] == '\r') && 1054 (Range[0] != Range[1])) 1055 Range = Range.substr(1); 1056 Range = Range.substr(1); 1057 1058 if (NumNewLines == 1) 1059 FirstNewLine = Range.begin(); 1060 } 1061} 1062 1063size_t CheckString::Check(const SourceMgr &SM, StringRef Buffer, 1064 bool IsLabelScanMode, size_t &MatchLen, 1065 StringMap<StringRef> &VariableTable) const { 1066 size_t LastPos = 0; 1067 std::vector<const Pattern *> NotStrings; 1068 1069 // IsLabelScanMode is true when we are scanning forward to find CHECK-LABEL 1070 // bounds; we have not processed variable definitions within the bounded block 1071 // yet so cannot handle any final CHECK-DAG yet; this is handled when going 1072 // over the block again (including the last CHECK-LABEL) in normal mode. 1073 if (!IsLabelScanMode) { 1074 // Match "dag strings" (with mixed "not strings" if any). 1075 LastPos = CheckDag(SM, Buffer, NotStrings, VariableTable); 1076 if (LastPos == StringRef::npos) 1077 return StringRef::npos; 1078 } 1079 1080 // Match itself from the last position after matching CHECK-DAG. 1081 StringRef MatchBuffer = Buffer.substr(LastPos); 1082 size_t MatchPos = Pat.Match(MatchBuffer, MatchLen, VariableTable); 1083 if (MatchPos == StringRef::npos) { 1084 PrintCheckFailed(SM, *this, MatchBuffer, VariableTable); 1085 return StringRef::npos; 1086 } 1087 1088 // Similar to the above, in "label-scan mode" we can't yet handle CHECK-NEXT 1089 // or CHECK-NOT 1090 if (!IsLabelScanMode) { 1091 StringRef SkippedRegion = Buffer.substr(LastPos, MatchPos); 1092 1093 // If this check is a "CHECK-NEXT", verify that the previous match was on 1094 // the previous line (i.e. that there is one newline between them). 1095 if (CheckNext(SM, SkippedRegion)) 1096 return StringRef::npos; 1097 1098 // If this check is a "CHECK-SAME", verify that the previous match was on 1099 // the same line (i.e. that there is no newline between them). 1100 if (CheckSame(SM, SkippedRegion)) 1101 return StringRef::npos; 1102 1103 // If this match had "not strings", verify that they don't exist in the 1104 // skipped region. 1105 if (CheckNot(SM, SkippedRegion, NotStrings, VariableTable)) 1106 return StringRef::npos; 1107 } 1108 1109 return LastPos + MatchPos; 1110} 1111 1112bool CheckString::CheckNext(const SourceMgr &SM, StringRef Buffer) const { 1113 if (Pat.getCheckTy() != Check::CheckNext) 1114 return false; 1115 1116 // Count the number of newlines between the previous match and this one. 1117 assert(Buffer.data() != 1118 SM.getMemoryBuffer( 1119 SM.FindBufferContainingLoc( 1120 SMLoc::getFromPointer(Buffer.data())))->getBufferStart() && 1121 "CHECK-NEXT can't be the first check in a file"); 1122 1123 const char *FirstNewLine = nullptr; 1124 unsigned NumNewLines = CountNumNewlinesBetween(Buffer, FirstNewLine); 1125 1126 if (NumNewLines == 0) { 1127 SM.PrintMessage(Loc, SourceMgr::DK_Error, Prefix + 1128 "-NEXT: is on the same line as previous match"); 1129 SM.PrintMessage(SMLoc::getFromPointer(Buffer.end()), 1130 SourceMgr::DK_Note, "'next' match was here"); 1131 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note, 1132 "previous match ended here"); 1133 return true; 1134 } 1135 1136 if (NumNewLines != 1) { 1137 SM.PrintMessage(Loc, SourceMgr::DK_Error, Prefix + 1138 "-NEXT: is not on the line after the previous match"); 1139 SM.PrintMessage(SMLoc::getFromPointer(Buffer.end()), 1140 SourceMgr::DK_Note, "'next' match was here"); 1141 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note, 1142 "previous match ended here"); 1143 SM.PrintMessage(SMLoc::getFromPointer(FirstNewLine), SourceMgr::DK_Note, 1144 "non-matching line after previous match is here"); 1145 return true; 1146 } 1147 1148 return false; 1149} 1150 1151bool CheckString::CheckSame(const SourceMgr &SM, StringRef Buffer) const { 1152 if (Pat.getCheckTy() != Check::CheckSame) 1153 return false; 1154 1155 // Count the number of newlines between the previous match and this one. 1156 assert(Buffer.data() != 1157 SM.getMemoryBuffer(SM.FindBufferContainingLoc( 1158 SMLoc::getFromPointer(Buffer.data()))) 1159 ->getBufferStart() && 1160 "CHECK-SAME can't be the first check in a file"); 1161 1162 const char *FirstNewLine = nullptr; 1163 unsigned NumNewLines = CountNumNewlinesBetween(Buffer, FirstNewLine); 1164 1165 if (NumNewLines != 0) { 1166 SM.PrintMessage(Loc, SourceMgr::DK_Error, 1167 Prefix + 1168 "-SAME: is not on the same line as the previous match"); 1169 SM.PrintMessage(SMLoc::getFromPointer(Buffer.end()), SourceMgr::DK_Note, 1170 "'next' match was here"); 1171 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note, 1172 "previous match ended here"); 1173 return true; 1174 } 1175 1176 return false; 1177} 1178 1179bool CheckString::CheckNot(const SourceMgr &SM, StringRef Buffer, 1180 const std::vector<const Pattern *> &NotStrings, 1181 StringMap<StringRef> &VariableTable) const { 1182 for (const Pattern *Pat : NotStrings) { 1183 assert((Pat->getCheckTy() == Check::CheckNot) && "Expect CHECK-NOT!"); 1184 1185 size_t MatchLen = 0; 1186 size_t Pos = Pat->Match(Buffer, MatchLen, VariableTable); 1187 1188 if (Pos == StringRef::npos) continue; 1189 1190 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()+Pos), 1191 SourceMgr::DK_Error, 1192 Prefix + "-NOT: string occurred!"); 1193 SM.PrintMessage(Pat->getLoc(), SourceMgr::DK_Note, 1194 Prefix + "-NOT: pattern specified here"); 1195 return true; 1196 } 1197 1198 return false; 1199} 1200 1201size_t CheckString::CheckDag(const SourceMgr &SM, StringRef Buffer, 1202 std::vector<const Pattern *> &NotStrings, 1203 StringMap<StringRef> &VariableTable) const { 1204 if (DagNotStrings.empty()) 1205 return 0; 1206 1207 size_t LastPos = 0; 1208 size_t StartPos = LastPos; 1209 1210 for (const Pattern &Pat : DagNotStrings) { 1211 assert((Pat.getCheckTy() == Check::CheckDAG || 1212 Pat.getCheckTy() == Check::CheckNot) && 1213 "Invalid CHECK-DAG or CHECK-NOT!"); 1214 1215 if (Pat.getCheckTy() == Check::CheckNot) { 1216 NotStrings.push_back(&Pat); 1217 continue; 1218 } 1219 1220 assert((Pat.getCheckTy() == Check::CheckDAG) && "Expect CHECK-DAG!"); 1221 1222 size_t MatchLen = 0, MatchPos; 1223 1224 // CHECK-DAG always matches from the start. 1225 StringRef MatchBuffer = Buffer.substr(StartPos); 1226 MatchPos = Pat.Match(MatchBuffer, MatchLen, VariableTable); 1227 // With a group of CHECK-DAGs, a single mismatching means the match on 1228 // that group of CHECK-DAGs fails immediately. 1229 if (MatchPos == StringRef::npos) { 1230 PrintCheckFailed(SM, Pat.getLoc(), Pat, MatchBuffer, VariableTable); 1231 return StringRef::npos; 1232 } 1233 // Re-calc it as the offset relative to the start of the original string. 1234 MatchPos += StartPos; 1235 1236 if (!NotStrings.empty()) { 1237 if (MatchPos < LastPos) { 1238 // Reordered? 1239 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data() + MatchPos), 1240 SourceMgr::DK_Error, 1241 Prefix + "-DAG: found a match of CHECK-DAG" 1242 " reordering across a CHECK-NOT"); 1243 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data() + LastPos), 1244 SourceMgr::DK_Note, 1245 Prefix + "-DAG: the farthest match of CHECK-DAG" 1246 " is found here"); 1247 SM.PrintMessage(NotStrings[0]->getLoc(), SourceMgr::DK_Note, 1248 Prefix + "-NOT: the crossed pattern specified" 1249 " here"); 1250 SM.PrintMessage(Pat.getLoc(), SourceMgr::DK_Note, 1251 Prefix + "-DAG: the reordered pattern specified" 1252 " here"); 1253 return StringRef::npos; 1254 } 1255 // All subsequent CHECK-DAGs should be matched from the farthest 1256 // position of all precedent CHECK-DAGs (including this one.) 1257 StartPos = LastPos; 1258 // If there's CHECK-NOTs between two CHECK-DAGs or from CHECK to 1259 // CHECK-DAG, verify that there's no 'not' strings occurred in that 1260 // region. 1261 StringRef SkippedRegion = Buffer.substr(LastPos, MatchPos); 1262 if (CheckNot(SM, SkippedRegion, NotStrings, VariableTable)) 1263 return StringRef::npos; 1264 // Clear "not strings". 1265 NotStrings.clear(); 1266 } 1267 1268 // Update the last position with CHECK-DAG matches. 1269 LastPos = std::max(MatchPos + MatchLen, LastPos); 1270 } 1271 1272 return LastPos; 1273} 1274 1275// A check prefix must contain only alphanumeric, hyphens and underscores. 1276static bool ValidateCheckPrefix(StringRef CheckPrefix) { 1277 Regex Validator("^[a-zA-Z0-9_-]*$"); 1278 return Validator.match(CheckPrefix); 1279} 1280 1281static bool ValidateCheckPrefixes() { 1282 StringSet<> PrefixSet; 1283 1284 for (StringRef Prefix : CheckPrefixes) { 1285 // Reject empty prefixes. 1286 if (Prefix == "") 1287 return false; 1288 1289 if (!PrefixSet.insert(Prefix).second) 1290 return false; 1291 1292 if (!ValidateCheckPrefix(Prefix)) 1293 return false; 1294 } 1295 1296 return true; 1297} 1298 1299// I don't think there's a way to specify an initial value for cl::list, 1300// so if nothing was specified, add the default 1301static void AddCheckPrefixIfNeeded() { 1302 if (CheckPrefixes.empty()) 1303 CheckPrefixes.push_back("CHECK"); 1304} 1305 1306static void DumpCommandLine(int argc, char **argv) { 1307 errs() << "FileCheck command line: "; 1308 for (int I = 0; I < argc; I++) 1309 errs() << " " << argv[I]; 1310 errs() << "\n"; 1311} 1312 1313int main(int argc, char **argv) { 1314 sys::PrintStackTraceOnErrorSignal(argv[0]); 1315 PrettyStackTraceProgram X(argc, argv); 1316 cl::ParseCommandLineOptions(argc, argv); 1317 1318 if (!ValidateCheckPrefixes()) { 1319 errs() << "Supplied check-prefix is invalid! Prefixes must be unique and " 1320 "start with a letter and contain only alphanumeric characters, " 1321 "hyphens and underscores\n"; 1322 return 2; 1323 } 1324 1325 AddCheckPrefixIfNeeded(); 1326 1327 SourceMgr SM; 1328 1329 // Read the expected strings from the check file. 1330 std::vector<CheckString> CheckStrings; 1331 if (ReadCheckFile(SM, CheckStrings)) 1332 return 2; 1333 1334 // Open the file to check and add it to SourceMgr. 1335 ErrorOr<std::unique_ptr<MemoryBuffer>> FileOrErr = 1336 MemoryBuffer::getFileOrSTDIN(InputFilename); 1337 if (std::error_code EC = FileOrErr.getError()) { 1338 errs() << "Could not open input file '" << InputFilename 1339 << "': " << EC.message() << '\n'; 1340 return 2; 1341 } 1342 std::unique_ptr<MemoryBuffer> &File = FileOrErr.get(); 1343 1344 if (File->getBufferSize() == 0 && !AllowEmptyInput) { 1345 errs() << "FileCheck error: '" << InputFilename << "' is empty.\n"; 1346 DumpCommandLine(argc, argv); 1347 return 2; 1348 } 1349 1350 // Remove duplicate spaces in the input file if requested. 1351 // Remove DOS style line endings. 1352 std::unique_ptr<MemoryBuffer> F = 1353 CanonicalizeInputFile(std::move(File), NoCanonicalizeWhiteSpace); 1354 1355 // Check that we have all of the expected strings, in order, in the input 1356 // file. 1357 StringRef Buffer = F->getBuffer(); 1358 1359 SM.AddNewSourceBuffer(std::move(F), SMLoc()); 1360 1361 /// VariableTable - This holds all the current filecheck variables. 1362 StringMap<StringRef> VariableTable; 1363 1364 bool hasError = false; 1365 1366 unsigned i = 0, j = 0, e = CheckStrings.size(); 1367 1368 while (true) { 1369 StringRef CheckRegion; 1370 if (j == e) { 1371 CheckRegion = Buffer; 1372 } else { 1373 const CheckString &CheckLabelStr = CheckStrings[j]; 1374 if (CheckLabelStr.Pat.getCheckTy() != Check::CheckLabel) { 1375 ++j; 1376 continue; 1377 } 1378 1379 // Scan to next CHECK-LABEL match, ignoring CHECK-NOT and CHECK-DAG 1380 size_t MatchLabelLen = 0; 1381 size_t MatchLabelPos = CheckLabelStr.Check(SM, Buffer, true, 1382 MatchLabelLen, VariableTable); 1383 if (MatchLabelPos == StringRef::npos) { 1384 hasError = true; 1385 break; 1386 } 1387 1388 CheckRegion = Buffer.substr(0, MatchLabelPos + MatchLabelLen); 1389 Buffer = Buffer.substr(MatchLabelPos + MatchLabelLen); 1390 ++j; 1391 } 1392 1393 for ( ; i != j; ++i) { 1394 const CheckString &CheckStr = CheckStrings[i]; 1395 1396 // Check each string within the scanned region, including a second check 1397 // of any final CHECK-LABEL (to verify CHECK-NOT and CHECK-DAG) 1398 size_t MatchLen = 0; 1399 size_t MatchPos = CheckStr.Check(SM, CheckRegion, false, MatchLen, 1400 VariableTable); 1401 1402 if (MatchPos == StringRef::npos) { 1403 hasError = true; 1404 i = j; 1405 break; 1406 } 1407 1408 CheckRegion = CheckRegion.substr(MatchPos + MatchLen); 1409 } 1410 1411 if (j == e) 1412 break; 1413 } 1414 1415 return hasError ? 1 : 0; 1416} 1417