1//===- FileCheck.cpp - Check that File's Contents match what is expected --===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// FileCheck does a line-by line check of a file that validates whether it
11// contains the expected content.  This is useful for regression tests etc.
12//
13// This program exits with an error status of 2 on error, exit status of 0 if
14// the file matched the expected contents, and exit status of 1 if it did not
15// contain the expected contents.
16//
17//===----------------------------------------------------------------------===//
18
19#include "llvm/ADT/SmallString.h"
20#include "llvm/ADT/StringExtras.h"
21#include "llvm/ADT/StringMap.h"
22#include "llvm/ADT/StringSet.h"
23#include "llvm/Support/CommandLine.h"
24#include "llvm/Support/MemoryBuffer.h"
25#include "llvm/Support/PrettyStackTrace.h"
26#include "llvm/Support/Regex.h"
27#include "llvm/Support/Signals.h"
28#include "llvm/Support/SourceMgr.h"
29#include "llvm/Support/raw_ostream.h"
30#include <algorithm>
31#include <cctype>
32#include <map>
33#include <string>
34#include <system_error>
35#include <vector>
36using namespace llvm;
37
38static cl::opt<std::string>
39CheckFilename(cl::Positional, cl::desc("<check-file>"), cl::Required);
40
41static cl::opt<std::string>
42InputFilename("input-file", cl::desc("File to check (defaults to stdin)"),
43              cl::init("-"), cl::value_desc("filename"));
44
45static cl::list<std::string>
46CheckPrefixes("check-prefix",
47              cl::desc("Prefix to use from check file (defaults to 'CHECK')"));
48static cl::alias CheckPrefixesAlias(
49    "check-prefixes", cl::aliasopt(CheckPrefixes), cl::CommaSeparated,
50    cl::NotHidden,
51    cl::desc(
52        "Alias for -check-prefix permitting multiple comma separated values"));
53
54static cl::opt<bool>
55NoCanonicalizeWhiteSpace("strict-whitespace",
56              cl::desc("Do not treat all horizontal whitespace as equivalent"));
57
58static cl::list<std::string> ImplicitCheckNot(
59    "implicit-check-not",
60    cl::desc("Add an implicit negative check with this pattern to every\n"
61             "positive check. This can be used to ensure that no instances of\n"
62             "this pattern occur which are not matched by a positive pattern"),
63    cl::value_desc("pattern"));
64
65static cl::opt<bool> AllowEmptyInput(
66    "allow-empty", cl::init(false),
67    cl::desc("Allow the input file to be empty. This is useful when making\n"
68             "checks that some error message does not occur, for example."));
69
70static cl::opt<bool> MatchFullLines(
71    "match-full-lines", cl::init(false),
72    cl::desc("Require all positive matches to cover an entire input line.\n"
73             "Allows leading and trailing whitespace if --strict-whitespace\n"
74             "is not also passed."));
75
76typedef cl::list<std::string>::const_iterator prefix_iterator;
77
78//===----------------------------------------------------------------------===//
79// Pattern Handling Code.
80//===----------------------------------------------------------------------===//
81
82namespace Check {
83  enum CheckType {
84    CheckNone = 0,
85    CheckPlain,
86    CheckNext,
87    CheckSame,
88    CheckNot,
89    CheckDAG,
90    CheckLabel,
91
92    /// MatchEOF - When set, this pattern only matches the end of file. This is
93    /// used for trailing CHECK-NOTs.
94    CheckEOF,
95    /// CheckBadNot - Found -NOT combined with another CHECK suffix.
96    CheckBadNot
97  };
98}
99
100class Pattern {
101  SMLoc PatternLoc;
102
103  Check::CheckType CheckTy;
104
105  /// FixedStr - If non-empty, this pattern is a fixed string match with the
106  /// specified fixed string.
107  StringRef FixedStr;
108
109  /// RegEx - If non-empty, this is a regex pattern.
110  std::string RegExStr;
111
112  /// \brief Contains the number of line this pattern is in.
113  unsigned LineNumber;
114
115  /// VariableUses - Entries in this vector map to uses of a variable in the
116  /// pattern, e.g. "foo[[bar]]baz".  In this case, the RegExStr will contain
117  /// "foobaz" and we'll get an entry in this vector that tells us to insert the
118  /// value of bar at offset 3.
119  std::vector<std::pair<StringRef, unsigned> > VariableUses;
120
121  /// VariableDefs - Maps definitions of variables to their parenthesized
122  /// capture numbers.
123  /// E.g. for the pattern "foo[[bar:.*]]baz", VariableDefs will map "bar" to 1.
124  std::map<StringRef, unsigned> VariableDefs;
125
126public:
127
128  Pattern(Check::CheckType Ty)
129    : CheckTy(Ty) { }
130
131  /// getLoc - Return the location in source code.
132  SMLoc getLoc() const { return PatternLoc; }
133
134  /// ParsePattern - Parse the given string into the Pattern. Prefix provides
135  /// which prefix is being matched, SM provides the SourceMgr used for error
136  /// reports, and LineNumber is the line number in the input file from which
137  /// the pattern string was read.  Returns true in case of an error, false
138  /// otherwise.
139  bool ParsePattern(StringRef PatternStr,
140                    StringRef Prefix,
141                    SourceMgr &SM,
142                    unsigned LineNumber);
143
144  /// Match - Match the pattern string against the input buffer Buffer.  This
145  /// returns the position that is matched or npos if there is no match.  If
146  /// there is a match, the size of the matched string is returned in MatchLen.
147  ///
148  /// The VariableTable StringMap provides the current values of filecheck
149  /// variables and is updated if this match defines new values.
150  size_t Match(StringRef Buffer, size_t &MatchLen,
151               StringMap<StringRef> &VariableTable) const;
152
153  /// PrintFailureInfo - Print additional information about a failure to match
154  /// involving this pattern.
155  void PrintFailureInfo(const SourceMgr &SM, StringRef Buffer,
156                        const StringMap<StringRef> &VariableTable) const;
157
158  bool hasVariable() const { return !(VariableUses.empty() &&
159                                      VariableDefs.empty()); }
160
161  Check::CheckType getCheckTy() const { return CheckTy; }
162
163private:
164  bool AddRegExToRegEx(StringRef RS, unsigned &CurParen, SourceMgr &SM);
165  void AddBackrefToRegEx(unsigned BackrefNum);
166
167  /// ComputeMatchDistance - Compute an arbitrary estimate for the quality of
168  /// matching this pattern at the start of \arg Buffer; a distance of zero
169  /// should correspond to a perfect match.
170  unsigned ComputeMatchDistance(StringRef Buffer,
171                               const StringMap<StringRef> &VariableTable) const;
172
173  /// \brief Evaluates expression and stores the result to \p Value.
174  /// \return true on success. false when the expression has invalid syntax.
175  bool EvaluateExpression(StringRef Expr, std::string &Value) const;
176
177  /// \brief Finds the closing sequence of a regex variable usage or
178  /// definition. Str has to point in the beginning of the definition
179  /// (right after the opening sequence).
180  /// \return offset of the closing sequence within Str, or npos if it was not
181  /// found.
182  size_t FindRegexVarEnd(StringRef Str, SourceMgr &SM);
183};
184
185
186bool Pattern::ParsePattern(StringRef PatternStr,
187                           StringRef Prefix,
188                           SourceMgr &SM,
189                           unsigned LineNumber) {
190  bool MatchFullLinesHere = MatchFullLines && CheckTy != Check::CheckNot;
191
192  this->LineNumber = LineNumber;
193  PatternLoc = SMLoc::getFromPointer(PatternStr.data());
194
195  // Ignore trailing whitespace.
196  while (!PatternStr.empty() &&
197         (PatternStr.back() == ' ' || PatternStr.back() == '\t'))
198    PatternStr = PatternStr.substr(0, PatternStr.size()-1);
199
200  // Check that there is something on the line.
201  if (PatternStr.empty()) {
202    SM.PrintMessage(PatternLoc, SourceMgr::DK_Error,
203                    "found empty check string with prefix '" +
204                    Prefix + ":'");
205    return true;
206  }
207
208  // Check to see if this is a fixed string, or if it has regex pieces.
209  if (!MatchFullLinesHere &&
210      (PatternStr.size() < 2 || (PatternStr.find("{{") == StringRef::npos &&
211                                 PatternStr.find("[[") == StringRef::npos))) {
212    FixedStr = PatternStr;
213    return false;
214  }
215
216  if (MatchFullLinesHere) {
217    RegExStr += '^';
218    if (!NoCanonicalizeWhiteSpace)
219      RegExStr += " *";
220  }
221
222  // Paren value #0 is for the fully matched string.  Any new parenthesized
223  // values add from there.
224  unsigned CurParen = 1;
225
226  // Otherwise, there is at least one regex piece.  Build up the regex pattern
227  // by escaping scary characters in fixed strings, building up one big regex.
228  while (!PatternStr.empty()) {
229    // RegEx matches.
230    if (PatternStr.startswith("{{")) {
231      // This is the start of a regex match.  Scan for the }}.
232      size_t End = PatternStr.find("}}");
233      if (End == StringRef::npos) {
234        SM.PrintMessage(SMLoc::getFromPointer(PatternStr.data()),
235                        SourceMgr::DK_Error,
236                        "found start of regex string with no end '}}'");
237        return true;
238      }
239
240      // Enclose {{}} patterns in parens just like [[]] even though we're not
241      // capturing the result for any purpose.  This is required in case the
242      // expression contains an alternation like: CHECK:  abc{{x|z}}def.  We
243      // want this to turn into: "abc(x|z)def" not "abcx|zdef".
244      RegExStr += '(';
245      ++CurParen;
246
247      if (AddRegExToRegEx(PatternStr.substr(2, End-2), CurParen, SM))
248        return true;
249      RegExStr += ')';
250
251      PatternStr = PatternStr.substr(End+2);
252      continue;
253    }
254
255    // Named RegEx matches.  These are of two forms: [[foo:.*]] which matches .*
256    // (or some other regex) and assigns it to the FileCheck variable 'foo'. The
257    // second form is [[foo]] which is a reference to foo.  The variable name
258    // itself must be of the form "[a-zA-Z_][0-9a-zA-Z_]*", otherwise we reject
259    // it.  This is to catch some common errors.
260    if (PatternStr.startswith("[[")) {
261      // Find the closing bracket pair ending the match.  End is going to be an
262      // offset relative to the beginning of the match string.
263      size_t End = FindRegexVarEnd(PatternStr.substr(2), SM);
264
265      if (End == StringRef::npos) {
266        SM.PrintMessage(SMLoc::getFromPointer(PatternStr.data()),
267                        SourceMgr::DK_Error,
268                        "invalid named regex reference, no ]] found");
269        return true;
270      }
271
272      StringRef MatchStr = PatternStr.substr(2, End);
273      PatternStr = PatternStr.substr(End+4);
274
275      // Get the regex name (e.g. "foo").
276      size_t NameEnd = MatchStr.find(':');
277      StringRef Name = MatchStr.substr(0, NameEnd);
278
279      if (Name.empty()) {
280        SM.PrintMessage(SMLoc::getFromPointer(Name.data()), SourceMgr::DK_Error,
281                        "invalid name in named regex: empty name");
282        return true;
283      }
284
285      // Verify that the name/expression is well formed. FileCheck currently
286      // supports @LINE, @LINE+number, @LINE-number expressions. The check here
287      // is relaxed, more strict check is performed in \c EvaluateExpression.
288      bool IsExpression = false;
289      for (unsigned i = 0, e = Name.size(); i != e; ++i) {
290        if (i == 0 && Name[i] == '@') {
291          if (NameEnd != StringRef::npos) {
292            SM.PrintMessage(SMLoc::getFromPointer(Name.data()),
293                            SourceMgr::DK_Error,
294                            "invalid name in named regex definition");
295            return true;
296          }
297          IsExpression = true;
298          continue;
299        }
300        if (Name[i] != '_' && !isalnum(Name[i]) &&
301            (!IsExpression || (Name[i] != '+' && Name[i] != '-'))) {
302          SM.PrintMessage(SMLoc::getFromPointer(Name.data()+i),
303                          SourceMgr::DK_Error, "invalid name in named regex");
304          return true;
305        }
306      }
307
308      // Name can't start with a digit.
309      if (isdigit(static_cast<unsigned char>(Name[0]))) {
310        SM.PrintMessage(SMLoc::getFromPointer(Name.data()), SourceMgr::DK_Error,
311                        "invalid name in named regex");
312        return true;
313      }
314
315      // Handle [[foo]].
316      if (NameEnd == StringRef::npos) {
317        // Handle variables that were defined earlier on the same line by
318        // emitting a backreference.
319        if (VariableDefs.find(Name) != VariableDefs.end()) {
320          unsigned VarParenNum = VariableDefs[Name];
321          if (VarParenNum < 1 || VarParenNum > 9) {
322            SM.PrintMessage(SMLoc::getFromPointer(Name.data()),
323                            SourceMgr::DK_Error,
324                            "Can't back-reference more than 9 variables");
325            return true;
326          }
327          AddBackrefToRegEx(VarParenNum);
328        } else {
329          VariableUses.push_back(std::make_pair(Name, RegExStr.size()));
330        }
331        continue;
332      }
333
334      // Handle [[foo:.*]].
335      VariableDefs[Name] = CurParen;
336      RegExStr += '(';
337      ++CurParen;
338
339      if (AddRegExToRegEx(MatchStr.substr(NameEnd+1), CurParen, SM))
340        return true;
341
342      RegExStr += ')';
343    }
344
345    // Handle fixed string matches.
346    // Find the end, which is the start of the next regex.
347    size_t FixedMatchEnd = PatternStr.find("{{");
348    FixedMatchEnd = std::min(FixedMatchEnd, PatternStr.find("[["));
349    RegExStr += Regex::escape(PatternStr.substr(0, FixedMatchEnd));
350    PatternStr = PatternStr.substr(FixedMatchEnd);
351  }
352
353  if (MatchFullLinesHere) {
354    if (!NoCanonicalizeWhiteSpace)
355      RegExStr += " *";
356    RegExStr += '$';
357  }
358
359  return false;
360}
361
362bool Pattern::AddRegExToRegEx(StringRef RS, unsigned &CurParen,
363                              SourceMgr &SM) {
364  Regex R(RS);
365  std::string Error;
366  if (!R.isValid(Error)) {
367    SM.PrintMessage(SMLoc::getFromPointer(RS.data()), SourceMgr::DK_Error,
368                    "invalid regex: " + Error);
369    return true;
370  }
371
372  RegExStr += RS.str();
373  CurParen += R.getNumMatches();
374  return false;
375}
376
377void Pattern::AddBackrefToRegEx(unsigned BackrefNum) {
378  assert(BackrefNum >= 1 && BackrefNum <= 9 && "Invalid backref number");
379  std::string Backref = std::string("\\") +
380                        std::string(1, '0' + BackrefNum);
381  RegExStr += Backref;
382}
383
384bool Pattern::EvaluateExpression(StringRef Expr, std::string &Value) const {
385  // The only supported expression is @LINE([\+-]\d+)?
386  if (!Expr.startswith("@LINE"))
387    return false;
388  Expr = Expr.substr(StringRef("@LINE").size());
389  int Offset = 0;
390  if (!Expr.empty()) {
391    if (Expr[0] == '+')
392      Expr = Expr.substr(1);
393    else if (Expr[0] != '-')
394      return false;
395    if (Expr.getAsInteger(10, Offset))
396      return false;
397  }
398  Value = llvm::itostr(LineNumber + Offset);
399  return true;
400}
401
402/// Match - Match the pattern string against the input buffer Buffer.  This
403/// returns the position that is matched or npos if there is no match.  If
404/// there is a match, the size of the matched string is returned in MatchLen.
405size_t Pattern::Match(StringRef Buffer, size_t &MatchLen,
406                      StringMap<StringRef> &VariableTable) const {
407  // If this is the EOF pattern, match it immediately.
408  if (CheckTy == Check::CheckEOF) {
409    MatchLen = 0;
410    return Buffer.size();
411  }
412
413  // If this is a fixed string pattern, just match it now.
414  if (!FixedStr.empty()) {
415    MatchLen = FixedStr.size();
416    return Buffer.find(FixedStr);
417  }
418
419  // Regex match.
420
421  // If there are variable uses, we need to create a temporary string with the
422  // actual value.
423  StringRef RegExToMatch = RegExStr;
424  std::string TmpStr;
425  if (!VariableUses.empty()) {
426    TmpStr = RegExStr;
427
428    unsigned InsertOffset = 0;
429    for (const auto &VariableUse : VariableUses) {
430      std::string Value;
431
432      if (VariableUse.first[0] == '@') {
433        if (!EvaluateExpression(VariableUse.first, Value))
434          return StringRef::npos;
435      } else {
436        StringMap<StringRef>::iterator it =
437            VariableTable.find(VariableUse.first);
438        // If the variable is undefined, return an error.
439        if (it == VariableTable.end())
440          return StringRef::npos;
441
442        // Look up the value and escape it so that we can put it into the regex.
443        Value += Regex::escape(it->second);
444      }
445
446      // Plop it into the regex at the adjusted offset.
447      TmpStr.insert(TmpStr.begin() + VariableUse.second + InsertOffset,
448                    Value.begin(), Value.end());
449      InsertOffset += Value.size();
450    }
451
452    // Match the newly constructed regex.
453    RegExToMatch = TmpStr;
454  }
455
456
457  SmallVector<StringRef, 4> MatchInfo;
458  if (!Regex(RegExToMatch, Regex::Newline).match(Buffer, &MatchInfo))
459    return StringRef::npos;
460
461  // Successful regex match.
462  assert(!MatchInfo.empty() && "Didn't get any match");
463  StringRef FullMatch = MatchInfo[0];
464
465  // If this defines any variables, remember their values.
466  for (const auto &VariableDef : VariableDefs) {
467    assert(VariableDef.second < MatchInfo.size() && "Internal paren error");
468    VariableTable[VariableDef.first] = MatchInfo[VariableDef.second];
469  }
470
471  MatchLen = FullMatch.size();
472  return FullMatch.data()-Buffer.data();
473}
474
475unsigned Pattern::ComputeMatchDistance(StringRef Buffer,
476                              const StringMap<StringRef> &VariableTable) const {
477  // Just compute the number of matching characters. For regular expressions, we
478  // just compare against the regex itself and hope for the best.
479  //
480  // FIXME: One easy improvement here is have the regex lib generate a single
481  // example regular expression which matches, and use that as the example
482  // string.
483  StringRef ExampleString(FixedStr);
484  if (ExampleString.empty())
485    ExampleString = RegExStr;
486
487  // Only compare up to the first line in the buffer, or the string size.
488  StringRef BufferPrefix = Buffer.substr(0, ExampleString.size());
489  BufferPrefix = BufferPrefix.split('\n').first;
490  return BufferPrefix.edit_distance(ExampleString);
491}
492
493void Pattern::PrintFailureInfo(const SourceMgr &SM, StringRef Buffer,
494                               const StringMap<StringRef> &VariableTable) const{
495  // If this was a regular expression using variables, print the current
496  // variable values.
497  if (!VariableUses.empty()) {
498    for (const auto &VariableUse : VariableUses) {
499      SmallString<256> Msg;
500      raw_svector_ostream OS(Msg);
501      StringRef Var = VariableUse.first;
502      if (Var[0] == '@') {
503        std::string Value;
504        if (EvaluateExpression(Var, Value)) {
505          OS << "with expression \"";
506          OS.write_escaped(Var) << "\" equal to \"";
507          OS.write_escaped(Value) << "\"";
508        } else {
509          OS << "uses incorrect expression \"";
510          OS.write_escaped(Var) << "\"";
511        }
512      } else {
513        StringMap<StringRef>::const_iterator it = VariableTable.find(Var);
514
515        // Check for undefined variable references.
516        if (it == VariableTable.end()) {
517          OS << "uses undefined variable \"";
518          OS.write_escaped(Var) << "\"";
519        } else {
520          OS << "with variable \"";
521          OS.write_escaped(Var) << "\" equal to \"";
522          OS.write_escaped(it->second) << "\"";
523        }
524      }
525
526      SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note,
527                      OS.str());
528    }
529  }
530
531  // Attempt to find the closest/best fuzzy match.  Usually an error happens
532  // because some string in the output didn't exactly match. In these cases, we
533  // would like to show the user a best guess at what "should have" matched, to
534  // save them having to actually check the input manually.
535  size_t NumLinesForward = 0;
536  size_t Best = StringRef::npos;
537  double BestQuality = 0;
538
539  // Use an arbitrary 4k limit on how far we will search.
540  for (size_t i = 0, e = std::min(size_t(4096), Buffer.size()); i != e; ++i) {
541    if (Buffer[i] == '\n')
542      ++NumLinesForward;
543
544    // Patterns have leading whitespace stripped, so skip whitespace when
545    // looking for something which looks like a pattern.
546    if (Buffer[i] == ' ' || Buffer[i] == '\t')
547      continue;
548
549    // Compute the "quality" of this match as an arbitrary combination of the
550    // match distance and the number of lines skipped to get to this match.
551    unsigned Distance = ComputeMatchDistance(Buffer.substr(i), VariableTable);
552    double Quality = Distance + (NumLinesForward / 100.);
553
554    if (Quality < BestQuality || Best == StringRef::npos) {
555      Best = i;
556      BestQuality = Quality;
557    }
558  }
559
560  // Print the "possible intended match here" line if we found something
561  // reasonable and not equal to what we showed in the "scanning from here"
562  // line.
563  if (Best && Best != StringRef::npos && BestQuality < 50) {
564      SM.PrintMessage(SMLoc::getFromPointer(Buffer.data() + Best),
565                      SourceMgr::DK_Note, "possible intended match here");
566
567    // FIXME: If we wanted to be really friendly we would show why the match
568    // failed, as it can be hard to spot simple one character differences.
569  }
570}
571
572size_t Pattern::FindRegexVarEnd(StringRef Str, SourceMgr &SM) {
573  // Offset keeps track of the current offset within the input Str
574  size_t Offset = 0;
575  // [...] Nesting depth
576  size_t BracketDepth = 0;
577
578  while (!Str.empty()) {
579    if (Str.startswith("]]") && BracketDepth == 0)
580      return Offset;
581    if (Str[0] == '\\') {
582      // Backslash escapes the next char within regexes, so skip them both.
583      Str = Str.substr(2);
584      Offset += 2;
585    } else {
586      switch (Str[0]) {
587        default:
588          break;
589        case '[':
590          BracketDepth++;
591          break;
592        case ']':
593          if (BracketDepth == 0) {
594            SM.PrintMessage(SMLoc::getFromPointer(Str.data()),
595                            SourceMgr::DK_Error,
596                            "missing closing \"]\" for regex variable");
597            exit(1);
598          }
599          BracketDepth--;
600          break;
601      }
602      Str = Str.substr(1);
603      Offset++;
604    }
605  }
606
607  return StringRef::npos;
608}
609
610
611//===----------------------------------------------------------------------===//
612// Check Strings.
613//===----------------------------------------------------------------------===//
614
615/// CheckString - This is a check that we found in the input file.
616struct CheckString {
617  /// Pat - The pattern to match.
618  Pattern Pat;
619
620  /// Prefix - Which prefix name this check matched.
621  StringRef Prefix;
622
623  /// Loc - The location in the match file that the check string was specified.
624  SMLoc Loc;
625
626  /// CheckTy - Specify what kind of check this is. e.g. CHECK-NEXT: directive,
627  /// as opposed to a CHECK: directive.
628  //  Check::CheckType CheckTy;
629
630  /// DagNotStrings - These are all of the strings that are disallowed from
631  /// occurring between this match string and the previous one (or start of
632  /// file).
633  std::vector<Pattern> DagNotStrings;
634
635  CheckString(const Pattern &P, StringRef S, SMLoc L)
636      : Pat(P), Prefix(S), Loc(L) {}
637
638  /// Check - Match check string and its "not strings" and/or "dag strings".
639  size_t Check(const SourceMgr &SM, StringRef Buffer, bool IsLabelScanMode,
640               size_t &MatchLen, StringMap<StringRef> &VariableTable) const;
641
642  /// CheckNext - Verify there is a single line in the given buffer.
643  bool CheckNext(const SourceMgr &SM, StringRef Buffer) const;
644
645  /// CheckSame - Verify there is no newline in the given buffer.
646  bool CheckSame(const SourceMgr &SM, StringRef Buffer) const;
647
648  /// CheckNot - Verify there's no "not strings" in the given buffer.
649  bool CheckNot(const SourceMgr &SM, StringRef Buffer,
650                const std::vector<const Pattern *> &NotStrings,
651                StringMap<StringRef> &VariableTable) const;
652
653  /// CheckDag - Match "dag strings" and their mixed "not strings".
654  size_t CheckDag(const SourceMgr &SM, StringRef Buffer,
655                  std::vector<const Pattern *> &NotStrings,
656                  StringMap<StringRef> &VariableTable) const;
657};
658
659/// Canonicalize whitespaces in the input file. Line endings are replaced
660/// with UNIX-style '\n'.
661///
662/// \param PreserveHorizontal Don't squash consecutive horizontal whitespace
663/// characters to a single space.
664static std::unique_ptr<MemoryBuffer>
665CanonicalizeInputFile(std::unique_ptr<MemoryBuffer> MB,
666                      bool PreserveHorizontal) {
667  SmallString<128> NewFile;
668  NewFile.reserve(MB->getBufferSize());
669
670  for (const char *Ptr = MB->getBufferStart(), *End = MB->getBufferEnd();
671       Ptr != End; ++Ptr) {
672    // Eliminate trailing dosish \r.
673    if (Ptr <= End - 2 && Ptr[0] == '\r' && Ptr[1] == '\n') {
674      continue;
675    }
676
677    // If current char is not a horizontal whitespace or if horizontal
678    // whitespace canonicalization is disabled, dump it to output as is.
679    if (PreserveHorizontal || (*Ptr != ' ' && *Ptr != '\t')) {
680      NewFile.push_back(*Ptr);
681      continue;
682    }
683
684    // Otherwise, add one space and advance over neighboring space.
685    NewFile.push_back(' ');
686    while (Ptr+1 != End &&
687           (Ptr[1] == ' ' || Ptr[1] == '\t'))
688      ++Ptr;
689  }
690
691  return std::unique_ptr<MemoryBuffer>(
692      MemoryBuffer::getMemBufferCopy(NewFile.str(), MB->getBufferIdentifier()));
693}
694
695static bool IsPartOfWord(char c) {
696  return (isalnum(c) || c == '-' || c == '_');
697}
698
699// Get the size of the prefix extension.
700static size_t CheckTypeSize(Check::CheckType Ty) {
701  switch (Ty) {
702  case Check::CheckNone:
703  case Check::CheckBadNot:
704    return 0;
705
706  case Check::CheckPlain:
707    return sizeof(":") - 1;
708
709  case Check::CheckNext:
710    return sizeof("-NEXT:") - 1;
711
712  case Check::CheckSame:
713    return sizeof("-SAME:") - 1;
714
715  case Check::CheckNot:
716    return sizeof("-NOT:") - 1;
717
718  case Check::CheckDAG:
719    return sizeof("-DAG:") - 1;
720
721  case Check::CheckLabel:
722    return sizeof("-LABEL:") - 1;
723
724  case Check::CheckEOF:
725    llvm_unreachable("Should not be using EOF size");
726  }
727
728  llvm_unreachable("Bad check type");
729}
730
731static Check::CheckType FindCheckType(StringRef Buffer, StringRef Prefix) {
732  char NextChar = Buffer[Prefix.size()];
733
734  // Verify that the : is present after the prefix.
735  if (NextChar == ':')
736    return Check::CheckPlain;
737
738  if (NextChar != '-')
739    return Check::CheckNone;
740
741  StringRef Rest = Buffer.drop_front(Prefix.size() + 1);
742  if (Rest.startswith("NEXT:"))
743    return Check::CheckNext;
744
745  if (Rest.startswith("SAME:"))
746    return Check::CheckSame;
747
748  if (Rest.startswith("NOT:"))
749    return Check::CheckNot;
750
751  if (Rest.startswith("DAG:"))
752    return Check::CheckDAG;
753
754  if (Rest.startswith("LABEL:"))
755    return Check::CheckLabel;
756
757  // You can't combine -NOT with another suffix.
758  if (Rest.startswith("DAG-NOT:") || Rest.startswith("NOT-DAG:") ||
759      Rest.startswith("NEXT-NOT:") || Rest.startswith("NOT-NEXT:") ||
760      Rest.startswith("SAME-NOT:") || Rest.startswith("NOT-SAME:"))
761    return Check::CheckBadNot;
762
763  return Check::CheckNone;
764}
765
766// From the given position, find the next character after the word.
767static size_t SkipWord(StringRef Str, size_t Loc) {
768  while (Loc < Str.size() && IsPartOfWord(Str[Loc]))
769    ++Loc;
770  return Loc;
771}
772
773// Try to find the first match in buffer for any prefix. If a valid match is
774// found, return that prefix and set its type and location.  If there are almost
775// matches (e.g. the actual prefix string is found, but is not an actual check
776// string), but no valid match, return an empty string and set the position to
777// resume searching from. If no partial matches are found, return an empty
778// string and the location will be StringRef::npos. If one prefix is a substring
779// of another, the maximal match should be found. e.g. if "A" and "AA" are
780// prefixes then AA-CHECK: should match the second one.
781static StringRef FindFirstCandidateMatch(StringRef &Buffer,
782                                         Check::CheckType &CheckTy,
783                                         size_t &CheckLoc) {
784  StringRef FirstPrefix;
785  size_t FirstLoc = StringRef::npos;
786  size_t SearchLoc = StringRef::npos;
787  Check::CheckType FirstTy = Check::CheckNone;
788
789  CheckTy = Check::CheckNone;
790  CheckLoc = StringRef::npos;
791
792  for (StringRef Prefix : CheckPrefixes) {
793    size_t PrefixLoc = Buffer.find(Prefix);
794
795    if (PrefixLoc == StringRef::npos)
796      continue;
797
798    // Track where we are searching for invalid prefixes that look almost right.
799    // We need to only advance to the first partial match on the next attempt
800    // since a partial match could be a substring of a later, valid prefix.
801    // Need to skip to the end of the word, otherwise we could end up
802    // matching a prefix in a substring later.
803    if (PrefixLoc < SearchLoc)
804      SearchLoc = SkipWord(Buffer, PrefixLoc);
805
806    // We only want to find the first match to avoid skipping some.
807    if (PrefixLoc > FirstLoc)
808      continue;
809    // If one matching check-prefix is a prefix of another, choose the
810    // longer one.
811    if (PrefixLoc == FirstLoc && Prefix.size() < FirstPrefix.size())
812      continue;
813
814    StringRef Rest = Buffer.drop_front(PrefixLoc);
815    // Make sure we have actually found the prefix, and not a word containing
816    // it. This should also prevent matching the wrong prefix when one is a
817    // substring of another.
818    if (PrefixLoc != 0 && IsPartOfWord(Buffer[PrefixLoc - 1]))
819      FirstTy = Check::CheckNone;
820    else
821      FirstTy = FindCheckType(Rest, Prefix);
822
823    FirstLoc = PrefixLoc;
824    FirstPrefix = Prefix;
825  }
826
827  // If the first prefix is invalid, we should continue the search after it.
828  if (FirstTy == Check::CheckNone) {
829    CheckLoc = SearchLoc;
830    return "";
831  }
832
833  CheckTy = FirstTy;
834  CheckLoc = FirstLoc;
835  return FirstPrefix;
836}
837
838static StringRef FindFirstMatchingPrefix(StringRef &Buffer,
839                                         unsigned &LineNumber,
840                                         Check::CheckType &CheckTy,
841                                         size_t &CheckLoc) {
842  while (!Buffer.empty()) {
843    StringRef Prefix = FindFirstCandidateMatch(Buffer, CheckTy, CheckLoc);
844    // If we found a real match, we are done.
845    if (!Prefix.empty()) {
846      LineNumber += Buffer.substr(0, CheckLoc).count('\n');
847      return Prefix;
848    }
849
850    // We didn't find any almost matches either, we are also done.
851    if (CheckLoc == StringRef::npos)
852      return StringRef();
853
854    LineNumber += Buffer.substr(0, CheckLoc + 1).count('\n');
855
856    // Advance to the last possible match we found and try again.
857    Buffer = Buffer.drop_front(CheckLoc + 1);
858  }
859
860  return StringRef();
861}
862
863/// ReadCheckFile - Read the check file, which specifies the sequence of
864/// expected strings.  The strings are added to the CheckStrings vector.
865/// Returns true in case of an error, false otherwise.
866static bool ReadCheckFile(SourceMgr &SM,
867                          std::vector<CheckString> &CheckStrings) {
868  ErrorOr<std::unique_ptr<MemoryBuffer>> FileOrErr =
869      MemoryBuffer::getFileOrSTDIN(CheckFilename);
870  if (std::error_code EC = FileOrErr.getError()) {
871    errs() << "Could not open check file '" << CheckFilename
872           << "': " << EC.message() << '\n';
873    return true;
874  }
875
876  // If we want to canonicalize whitespace, strip excess whitespace from the
877  // buffer containing the CHECK lines. Remove DOS style line endings.
878  std::unique_ptr<MemoryBuffer> F = CanonicalizeInputFile(
879      std::move(FileOrErr.get()), NoCanonicalizeWhiteSpace);
880
881  // Find all instances of CheckPrefix followed by : in the file.
882  StringRef Buffer = F->getBuffer();
883
884  SM.AddNewSourceBuffer(std::move(F), SMLoc());
885
886  std::vector<Pattern> ImplicitNegativeChecks;
887  for (const auto &PatternString : ImplicitCheckNot) {
888    // Create a buffer with fake command line content in order to display the
889    // command line option responsible for the specific implicit CHECK-NOT.
890    std::string Prefix = (Twine("-") + ImplicitCheckNot.ArgStr + "='").str();
891    std::string Suffix = "'";
892    std::unique_ptr<MemoryBuffer> CmdLine = MemoryBuffer::getMemBufferCopy(
893        Prefix + PatternString + Suffix, "command line");
894
895    StringRef PatternInBuffer =
896        CmdLine->getBuffer().substr(Prefix.size(), PatternString.size());
897    SM.AddNewSourceBuffer(std::move(CmdLine), SMLoc());
898
899    ImplicitNegativeChecks.push_back(Pattern(Check::CheckNot));
900    ImplicitNegativeChecks.back().ParsePattern(PatternInBuffer,
901                                               "IMPLICIT-CHECK", SM, 0);
902  }
903
904
905  std::vector<Pattern> DagNotMatches = ImplicitNegativeChecks;
906
907  // LineNumber keeps track of the line on which CheckPrefix instances are
908  // found.
909  unsigned LineNumber = 1;
910
911  while (1) {
912    Check::CheckType CheckTy;
913    size_t PrefixLoc;
914
915    // See if a prefix occurs in the memory buffer.
916    StringRef UsedPrefix = FindFirstMatchingPrefix(Buffer,
917                                                   LineNumber,
918                                                   CheckTy,
919                                                   PrefixLoc);
920    if (UsedPrefix.empty())
921      break;
922
923    Buffer = Buffer.drop_front(PrefixLoc);
924
925    // Location to use for error messages.
926    const char *UsedPrefixStart = Buffer.data() + (PrefixLoc == 0 ? 0 : 1);
927
928    // PrefixLoc is to the start of the prefix. Skip to the end.
929    Buffer = Buffer.drop_front(UsedPrefix.size() + CheckTypeSize(CheckTy));
930
931    // Complain about useful-looking but unsupported suffixes.
932    if (CheckTy == Check::CheckBadNot) {
933      SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()),
934                      SourceMgr::DK_Error,
935                      "unsupported -NOT combo on prefix '" + UsedPrefix + "'");
936      return true;
937    }
938
939    // Okay, we found the prefix, yay. Remember the rest of the line, but ignore
940    // leading and trailing whitespace.
941    Buffer = Buffer.substr(Buffer.find_first_not_of(" \t"));
942
943    // Scan ahead to the end of line.
944    size_t EOL = Buffer.find_first_of("\n\r");
945
946    // Remember the location of the start of the pattern, for diagnostics.
947    SMLoc PatternLoc = SMLoc::getFromPointer(Buffer.data());
948
949    // Parse the pattern.
950    Pattern P(CheckTy);
951    if (P.ParsePattern(Buffer.substr(0, EOL), UsedPrefix, SM, LineNumber))
952      return true;
953
954    // Verify that CHECK-LABEL lines do not define or use variables
955    if ((CheckTy == Check::CheckLabel) && P.hasVariable()) {
956      SM.PrintMessage(SMLoc::getFromPointer(UsedPrefixStart),
957                      SourceMgr::DK_Error,
958                      "found '" + UsedPrefix + "-LABEL:'"
959                      " with variable definition or use");
960      return true;
961    }
962
963    Buffer = Buffer.substr(EOL);
964
965    // Verify that CHECK-NEXT lines have at least one CHECK line before them.
966    if ((CheckTy == Check::CheckNext || CheckTy == Check::CheckSame) &&
967        CheckStrings.empty()) {
968      StringRef Type = CheckTy == Check::CheckNext ? "NEXT" : "SAME";
969      SM.PrintMessage(SMLoc::getFromPointer(UsedPrefixStart),
970                      SourceMgr::DK_Error,
971                      "found '" + UsedPrefix + "-" + Type + "' without previous '"
972                      + UsedPrefix + ": line");
973      return true;
974    }
975
976    // Handle CHECK-DAG/-NOT.
977    if (CheckTy == Check::CheckDAG || CheckTy == Check::CheckNot) {
978      DagNotMatches.push_back(P);
979      continue;
980    }
981
982    // Okay, add the string we captured to the output vector and move on.
983    CheckStrings.emplace_back(P, UsedPrefix, PatternLoc);
984    std::swap(DagNotMatches, CheckStrings.back().DagNotStrings);
985    DagNotMatches = ImplicitNegativeChecks;
986  }
987
988  // Add an EOF pattern for any trailing CHECK-DAG/-NOTs, and use the first
989  // prefix as a filler for the error message.
990  if (!DagNotMatches.empty()) {
991    CheckStrings.emplace_back(Pattern(Check::CheckEOF), *CheckPrefixes.begin(),
992                              SMLoc::getFromPointer(Buffer.data()));
993    std::swap(DagNotMatches, CheckStrings.back().DagNotStrings);
994  }
995
996  if (CheckStrings.empty()) {
997    errs() << "error: no check strings found with prefix"
998           << (CheckPrefixes.size() > 1 ? "es " : " ");
999    prefix_iterator I = CheckPrefixes.begin();
1000    prefix_iterator E = CheckPrefixes.end();
1001    if (I != E) {
1002      errs() << "\'" << *I << ":'";
1003      ++I;
1004    }
1005    for (; I != E; ++I)
1006      errs() << ", \'" << *I << ":'";
1007
1008    errs() << '\n';
1009    return true;
1010  }
1011
1012  return false;
1013}
1014
1015static void PrintCheckFailed(const SourceMgr &SM, SMLoc Loc,
1016                             const Pattern &Pat, StringRef Buffer,
1017                             StringMap<StringRef> &VariableTable) {
1018  // Otherwise, we have an error, emit an error message.
1019  SM.PrintMessage(Loc, SourceMgr::DK_Error,
1020                  "expected string not found in input");
1021
1022  // Print the "scanning from here" line.  If the current position is at the
1023  // end of a line, advance to the start of the next line.
1024  Buffer = Buffer.substr(Buffer.find_first_not_of(" \t\n\r"));
1025
1026  SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note,
1027                  "scanning from here");
1028
1029  // Allow the pattern to print additional information if desired.
1030  Pat.PrintFailureInfo(SM, Buffer, VariableTable);
1031}
1032
1033static void PrintCheckFailed(const SourceMgr &SM, const CheckString &CheckStr,
1034                             StringRef Buffer,
1035                             StringMap<StringRef> &VariableTable) {
1036  PrintCheckFailed(SM, CheckStr.Loc, CheckStr.Pat, Buffer, VariableTable);
1037}
1038
1039/// CountNumNewlinesBetween - Count the number of newlines in the specified
1040/// range.
1041static unsigned CountNumNewlinesBetween(StringRef Range,
1042                                        const char *&FirstNewLine) {
1043  unsigned NumNewLines = 0;
1044  while (1) {
1045    // Scan for newline.
1046    Range = Range.substr(Range.find_first_of("\n\r"));
1047    if (Range.empty()) return NumNewLines;
1048
1049    ++NumNewLines;
1050
1051    // Handle \n\r and \r\n as a single newline.
1052    if (Range.size() > 1 &&
1053        (Range[1] == '\n' || Range[1] == '\r') &&
1054        (Range[0] != Range[1]))
1055      Range = Range.substr(1);
1056    Range = Range.substr(1);
1057
1058    if (NumNewLines == 1)
1059      FirstNewLine = Range.begin();
1060  }
1061}
1062
1063size_t CheckString::Check(const SourceMgr &SM, StringRef Buffer,
1064                          bool IsLabelScanMode, size_t &MatchLen,
1065                          StringMap<StringRef> &VariableTable) const {
1066  size_t LastPos = 0;
1067  std::vector<const Pattern *> NotStrings;
1068
1069  // IsLabelScanMode is true when we are scanning forward to find CHECK-LABEL
1070  // bounds; we have not processed variable definitions within the bounded block
1071  // yet so cannot handle any final CHECK-DAG yet; this is handled when going
1072  // over the block again (including the last CHECK-LABEL) in normal mode.
1073  if (!IsLabelScanMode) {
1074    // Match "dag strings" (with mixed "not strings" if any).
1075    LastPos = CheckDag(SM, Buffer, NotStrings, VariableTable);
1076    if (LastPos == StringRef::npos)
1077      return StringRef::npos;
1078  }
1079
1080  // Match itself from the last position after matching CHECK-DAG.
1081  StringRef MatchBuffer = Buffer.substr(LastPos);
1082  size_t MatchPos = Pat.Match(MatchBuffer, MatchLen, VariableTable);
1083  if (MatchPos == StringRef::npos) {
1084    PrintCheckFailed(SM, *this, MatchBuffer, VariableTable);
1085    return StringRef::npos;
1086  }
1087
1088  // Similar to the above, in "label-scan mode" we can't yet handle CHECK-NEXT
1089  // or CHECK-NOT
1090  if (!IsLabelScanMode) {
1091    StringRef SkippedRegion = Buffer.substr(LastPos, MatchPos);
1092
1093    // If this check is a "CHECK-NEXT", verify that the previous match was on
1094    // the previous line (i.e. that there is one newline between them).
1095    if (CheckNext(SM, SkippedRegion))
1096      return StringRef::npos;
1097
1098    // If this check is a "CHECK-SAME", verify that the previous match was on
1099    // the same line (i.e. that there is no newline between them).
1100    if (CheckSame(SM, SkippedRegion))
1101      return StringRef::npos;
1102
1103    // If this match had "not strings", verify that they don't exist in the
1104    // skipped region.
1105    if (CheckNot(SM, SkippedRegion, NotStrings, VariableTable))
1106      return StringRef::npos;
1107  }
1108
1109  return LastPos + MatchPos;
1110}
1111
1112bool CheckString::CheckNext(const SourceMgr &SM, StringRef Buffer) const {
1113  if (Pat.getCheckTy() != Check::CheckNext)
1114    return false;
1115
1116  // Count the number of newlines between the previous match and this one.
1117  assert(Buffer.data() !=
1118         SM.getMemoryBuffer(
1119           SM.FindBufferContainingLoc(
1120             SMLoc::getFromPointer(Buffer.data())))->getBufferStart() &&
1121         "CHECK-NEXT can't be the first check in a file");
1122
1123  const char *FirstNewLine = nullptr;
1124  unsigned NumNewLines = CountNumNewlinesBetween(Buffer, FirstNewLine);
1125
1126  if (NumNewLines == 0) {
1127    SM.PrintMessage(Loc, SourceMgr::DK_Error, Prefix +
1128                    "-NEXT: is on the same line as previous match");
1129    SM.PrintMessage(SMLoc::getFromPointer(Buffer.end()),
1130                    SourceMgr::DK_Note, "'next' match was here");
1131    SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note,
1132                    "previous match ended here");
1133    return true;
1134  }
1135
1136  if (NumNewLines != 1) {
1137    SM.PrintMessage(Loc, SourceMgr::DK_Error, Prefix +
1138                    "-NEXT: is not on the line after the previous match");
1139    SM.PrintMessage(SMLoc::getFromPointer(Buffer.end()),
1140                    SourceMgr::DK_Note, "'next' match was here");
1141    SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note,
1142                    "previous match ended here");
1143    SM.PrintMessage(SMLoc::getFromPointer(FirstNewLine), SourceMgr::DK_Note,
1144                    "non-matching line after previous match is here");
1145    return true;
1146  }
1147
1148  return false;
1149}
1150
1151bool CheckString::CheckSame(const SourceMgr &SM, StringRef Buffer) const {
1152  if (Pat.getCheckTy() != Check::CheckSame)
1153    return false;
1154
1155  // Count the number of newlines between the previous match and this one.
1156  assert(Buffer.data() !=
1157             SM.getMemoryBuffer(SM.FindBufferContainingLoc(
1158                                    SMLoc::getFromPointer(Buffer.data())))
1159                 ->getBufferStart() &&
1160         "CHECK-SAME can't be the first check in a file");
1161
1162  const char *FirstNewLine = nullptr;
1163  unsigned NumNewLines = CountNumNewlinesBetween(Buffer, FirstNewLine);
1164
1165  if (NumNewLines != 0) {
1166    SM.PrintMessage(Loc, SourceMgr::DK_Error,
1167                    Prefix +
1168                        "-SAME: is not on the same line as the previous match");
1169    SM.PrintMessage(SMLoc::getFromPointer(Buffer.end()), SourceMgr::DK_Note,
1170                    "'next' match was here");
1171    SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note,
1172                    "previous match ended here");
1173    return true;
1174  }
1175
1176  return false;
1177}
1178
1179bool CheckString::CheckNot(const SourceMgr &SM, StringRef Buffer,
1180                           const std::vector<const Pattern *> &NotStrings,
1181                           StringMap<StringRef> &VariableTable) const {
1182  for (const Pattern *Pat : NotStrings) {
1183    assert((Pat->getCheckTy() == Check::CheckNot) && "Expect CHECK-NOT!");
1184
1185    size_t MatchLen = 0;
1186    size_t Pos = Pat->Match(Buffer, MatchLen, VariableTable);
1187
1188    if (Pos == StringRef::npos) continue;
1189
1190    SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()+Pos),
1191                    SourceMgr::DK_Error,
1192                    Prefix + "-NOT: string occurred!");
1193    SM.PrintMessage(Pat->getLoc(), SourceMgr::DK_Note,
1194                    Prefix + "-NOT: pattern specified here");
1195    return true;
1196  }
1197
1198  return false;
1199}
1200
1201size_t CheckString::CheckDag(const SourceMgr &SM, StringRef Buffer,
1202                             std::vector<const Pattern *> &NotStrings,
1203                             StringMap<StringRef> &VariableTable) const {
1204  if (DagNotStrings.empty())
1205    return 0;
1206
1207  size_t LastPos = 0;
1208  size_t StartPos = LastPos;
1209
1210  for (const Pattern &Pat : DagNotStrings) {
1211    assert((Pat.getCheckTy() == Check::CheckDAG ||
1212            Pat.getCheckTy() == Check::CheckNot) &&
1213           "Invalid CHECK-DAG or CHECK-NOT!");
1214
1215    if (Pat.getCheckTy() == Check::CheckNot) {
1216      NotStrings.push_back(&Pat);
1217      continue;
1218    }
1219
1220    assert((Pat.getCheckTy() == Check::CheckDAG) && "Expect CHECK-DAG!");
1221
1222    size_t MatchLen = 0, MatchPos;
1223
1224    // CHECK-DAG always matches from the start.
1225    StringRef MatchBuffer = Buffer.substr(StartPos);
1226    MatchPos = Pat.Match(MatchBuffer, MatchLen, VariableTable);
1227    // With a group of CHECK-DAGs, a single mismatching means the match on
1228    // that group of CHECK-DAGs fails immediately.
1229    if (MatchPos == StringRef::npos) {
1230      PrintCheckFailed(SM, Pat.getLoc(), Pat, MatchBuffer, VariableTable);
1231      return StringRef::npos;
1232    }
1233    // Re-calc it as the offset relative to the start of the original string.
1234    MatchPos += StartPos;
1235
1236    if (!NotStrings.empty()) {
1237      if (MatchPos < LastPos) {
1238        // Reordered?
1239        SM.PrintMessage(SMLoc::getFromPointer(Buffer.data() + MatchPos),
1240                        SourceMgr::DK_Error,
1241                        Prefix + "-DAG: found a match of CHECK-DAG"
1242                        " reordering across a CHECK-NOT");
1243        SM.PrintMessage(SMLoc::getFromPointer(Buffer.data() + LastPos),
1244                        SourceMgr::DK_Note,
1245                        Prefix + "-DAG: the farthest match of CHECK-DAG"
1246                        " is found here");
1247        SM.PrintMessage(NotStrings[0]->getLoc(), SourceMgr::DK_Note,
1248                        Prefix + "-NOT: the crossed pattern specified"
1249                        " here");
1250        SM.PrintMessage(Pat.getLoc(), SourceMgr::DK_Note,
1251                        Prefix + "-DAG: the reordered pattern specified"
1252                        " here");
1253        return StringRef::npos;
1254      }
1255      // All subsequent CHECK-DAGs should be matched from the farthest
1256      // position of all precedent CHECK-DAGs (including this one.)
1257      StartPos = LastPos;
1258      // If there's CHECK-NOTs between two CHECK-DAGs or from CHECK to
1259      // CHECK-DAG, verify that there's no 'not' strings occurred in that
1260      // region.
1261      StringRef SkippedRegion = Buffer.substr(LastPos, MatchPos);
1262      if (CheckNot(SM, SkippedRegion, NotStrings, VariableTable))
1263        return StringRef::npos;
1264      // Clear "not strings".
1265      NotStrings.clear();
1266    }
1267
1268    // Update the last position with CHECK-DAG matches.
1269    LastPos = std::max(MatchPos + MatchLen, LastPos);
1270  }
1271
1272  return LastPos;
1273}
1274
1275// A check prefix must contain only alphanumeric, hyphens and underscores.
1276static bool ValidateCheckPrefix(StringRef CheckPrefix) {
1277  Regex Validator("^[a-zA-Z0-9_-]*$");
1278  return Validator.match(CheckPrefix);
1279}
1280
1281static bool ValidateCheckPrefixes() {
1282  StringSet<> PrefixSet;
1283
1284  for (StringRef Prefix : CheckPrefixes) {
1285    // Reject empty prefixes.
1286    if (Prefix == "")
1287      return false;
1288
1289    if (!PrefixSet.insert(Prefix).second)
1290      return false;
1291
1292    if (!ValidateCheckPrefix(Prefix))
1293      return false;
1294  }
1295
1296  return true;
1297}
1298
1299// I don't think there's a way to specify an initial value for cl::list,
1300// so if nothing was specified, add the default
1301static void AddCheckPrefixIfNeeded() {
1302  if (CheckPrefixes.empty())
1303    CheckPrefixes.push_back("CHECK");
1304}
1305
1306static void DumpCommandLine(int argc, char **argv) {
1307  errs() << "FileCheck command line: ";
1308  for (int I = 0; I < argc; I++)
1309    errs() << " " << argv[I];
1310  errs() << "\n";
1311}
1312
1313int main(int argc, char **argv) {
1314  sys::PrintStackTraceOnErrorSignal(argv[0]);
1315  PrettyStackTraceProgram X(argc, argv);
1316  cl::ParseCommandLineOptions(argc, argv);
1317
1318  if (!ValidateCheckPrefixes()) {
1319    errs() << "Supplied check-prefix is invalid! Prefixes must be unique and "
1320              "start with a letter and contain only alphanumeric characters, "
1321              "hyphens and underscores\n";
1322    return 2;
1323  }
1324
1325  AddCheckPrefixIfNeeded();
1326
1327  SourceMgr SM;
1328
1329  // Read the expected strings from the check file.
1330  std::vector<CheckString> CheckStrings;
1331  if (ReadCheckFile(SM, CheckStrings))
1332    return 2;
1333
1334  // Open the file to check and add it to SourceMgr.
1335  ErrorOr<std::unique_ptr<MemoryBuffer>> FileOrErr =
1336      MemoryBuffer::getFileOrSTDIN(InputFilename);
1337  if (std::error_code EC = FileOrErr.getError()) {
1338    errs() << "Could not open input file '" << InputFilename
1339           << "': " << EC.message() << '\n';
1340    return 2;
1341  }
1342  std::unique_ptr<MemoryBuffer> &File = FileOrErr.get();
1343
1344  if (File->getBufferSize() == 0 && !AllowEmptyInput) {
1345    errs() << "FileCheck error: '" << InputFilename << "' is empty.\n";
1346    DumpCommandLine(argc, argv);
1347    return 2;
1348  }
1349
1350  // Remove duplicate spaces in the input file if requested.
1351  // Remove DOS style line endings.
1352  std::unique_ptr<MemoryBuffer> F =
1353      CanonicalizeInputFile(std::move(File), NoCanonicalizeWhiteSpace);
1354
1355  // Check that we have all of the expected strings, in order, in the input
1356  // file.
1357  StringRef Buffer = F->getBuffer();
1358
1359  SM.AddNewSourceBuffer(std::move(F), SMLoc());
1360
1361  /// VariableTable - This holds all the current filecheck variables.
1362  StringMap<StringRef> VariableTable;
1363
1364  bool hasError = false;
1365
1366  unsigned i = 0, j = 0, e = CheckStrings.size();
1367
1368  while (true) {
1369    StringRef CheckRegion;
1370    if (j == e) {
1371      CheckRegion = Buffer;
1372    } else {
1373      const CheckString &CheckLabelStr = CheckStrings[j];
1374      if (CheckLabelStr.Pat.getCheckTy() != Check::CheckLabel) {
1375        ++j;
1376        continue;
1377      }
1378
1379      // Scan to next CHECK-LABEL match, ignoring CHECK-NOT and CHECK-DAG
1380      size_t MatchLabelLen = 0;
1381      size_t MatchLabelPos = CheckLabelStr.Check(SM, Buffer, true,
1382                                                 MatchLabelLen, VariableTable);
1383      if (MatchLabelPos == StringRef::npos) {
1384        hasError = true;
1385        break;
1386      }
1387
1388      CheckRegion = Buffer.substr(0, MatchLabelPos + MatchLabelLen);
1389      Buffer = Buffer.substr(MatchLabelPos + MatchLabelLen);
1390      ++j;
1391    }
1392
1393    for ( ; i != j; ++i) {
1394      const CheckString &CheckStr = CheckStrings[i];
1395
1396      // Check each string within the scanned region, including a second check
1397      // of any final CHECK-LABEL (to verify CHECK-NOT and CHECK-DAG)
1398      size_t MatchLen = 0;
1399      size_t MatchPos = CheckStr.Check(SM, CheckRegion, false, MatchLen,
1400                                       VariableTable);
1401
1402      if (MatchPos == StringRef::npos) {
1403        hasError = true;
1404        i = j;
1405        break;
1406      }
1407
1408      CheckRegion = CheckRegion.substr(MatchPos + MatchLen);
1409    }
1410
1411    if (j == e)
1412      break;
1413  }
1414
1415  return hasError ? 1 : 0;
1416}
1417