1//===- FileCheck.cpp - Check that File's Contents match what is expected --===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// FileCheck does a line-by line check of a file that validates whether it
11// contains the expected content.  This is useful for regression tests etc.
12//
13// This program exits with an error status of 2 on error, exit status of 0 if
14// the file matched the expected contents, and exit status of 1 if it did not
15// contain the expected contents.
16//
17//===----------------------------------------------------------------------===//
18
19#include "llvm/ADT/SmallString.h"
20#include "llvm/ADT/StringExtras.h"
21#include "llvm/ADT/StringMap.h"
22#include "llvm/ADT/StringSet.h"
23#include "llvm/Support/CommandLine.h"
24#include "llvm/Support/MemoryBuffer.h"
25#include "llvm/Support/PrettyStackTrace.h"
26#include "llvm/Support/Regex.h"
27#include "llvm/Support/Signals.h"
28#include "llvm/Support/SourceMgr.h"
29#include "llvm/Support/raw_ostream.h"
30#include <algorithm>
31#include <cctype>
32#include <map>
33#include <string>
34#include <system_error>
35#include <vector>
36using namespace llvm;
37
38static cl::opt<std::string>
39CheckFilename(cl::Positional, cl::desc("<check-file>"), cl::Required);
40
41static cl::opt<std::string>
42InputFilename("input-file", cl::desc("File to check (defaults to stdin)"),
43              cl::init("-"), cl::value_desc("filename"));
44
45static cl::list<std::string>
46CheckPrefixes("check-prefix",
47              cl::desc("Prefix to use from check file (defaults to 'CHECK')"));
48
49static cl::opt<bool>
50NoCanonicalizeWhiteSpace("strict-whitespace",
51              cl::desc("Do not treat all horizontal whitespace as equivalent"));
52
53static cl::list<std::string> ImplicitCheckNot(
54    "implicit-check-not",
55    cl::desc("Add an implicit negative check with this pattern to every\n"
56             "positive check. This can be used to ensure that no instances of\n"
57             "this pattern occur which are not matched by a positive pattern"),
58    cl::value_desc("pattern"));
59
60static cl::opt<bool> AllowEmptyInput(
61    "allow-empty", cl::init(false),
62    cl::desc("Allow the input file to be empty. This is useful when making\n"
63             "checks that some error message does not occur, for example."));
64
65typedef cl::list<std::string>::const_iterator prefix_iterator;
66
67//===----------------------------------------------------------------------===//
68// Pattern Handling Code.
69//===----------------------------------------------------------------------===//
70
71namespace Check {
72  enum CheckType {
73    CheckNone = 0,
74    CheckPlain,
75    CheckNext,
76    CheckSame,
77    CheckNot,
78    CheckDAG,
79    CheckLabel,
80
81    /// MatchEOF - When set, this pattern only matches the end of file. This is
82    /// used for trailing CHECK-NOTs.
83    CheckEOF
84  };
85}
86
87class Pattern {
88  SMLoc PatternLoc;
89
90  Check::CheckType CheckTy;
91
92  /// FixedStr - If non-empty, this pattern is a fixed string match with the
93  /// specified fixed string.
94  StringRef FixedStr;
95
96  /// RegEx - If non-empty, this is a regex pattern.
97  std::string RegExStr;
98
99  /// \brief Contains the number of line this pattern is in.
100  unsigned LineNumber;
101
102  /// VariableUses - Entries in this vector map to uses of a variable in the
103  /// pattern, e.g. "foo[[bar]]baz".  In this case, the RegExStr will contain
104  /// "foobaz" and we'll get an entry in this vector that tells us to insert the
105  /// value of bar at offset 3.
106  std::vector<std::pair<StringRef, unsigned> > VariableUses;
107
108  /// VariableDefs - Maps definitions of variables to their parenthesized
109  /// capture numbers.
110  /// E.g. for the pattern "foo[[bar:.*]]baz", VariableDefs will map "bar" to 1.
111  std::map<StringRef, unsigned> VariableDefs;
112
113public:
114
115  Pattern(Check::CheckType Ty)
116    : CheckTy(Ty) { }
117
118  /// getLoc - Return the location in source code.
119  SMLoc getLoc() const { return PatternLoc; }
120
121  /// ParsePattern - Parse the given string into the Pattern. Prefix provides
122  /// which prefix is being matched, SM provides the SourceMgr used for error
123  /// reports, and LineNumber is the line number in the input file from which
124  /// the pattern string was read.  Returns true in case of an error, false
125  /// otherwise.
126  bool ParsePattern(StringRef PatternStr,
127                    StringRef Prefix,
128                    SourceMgr &SM,
129                    unsigned LineNumber);
130
131  /// Match - Match the pattern string against the input buffer Buffer.  This
132  /// returns the position that is matched or npos if there is no match.  If
133  /// there is a match, the size of the matched string is returned in MatchLen.
134  ///
135  /// The VariableTable StringMap provides the current values of filecheck
136  /// variables and is updated if this match defines new values.
137  size_t Match(StringRef Buffer, size_t &MatchLen,
138               StringMap<StringRef> &VariableTable) const;
139
140  /// PrintFailureInfo - Print additional information about a failure to match
141  /// involving this pattern.
142  void PrintFailureInfo(const SourceMgr &SM, StringRef Buffer,
143                        const StringMap<StringRef> &VariableTable) const;
144
145  bool hasVariable() const { return !(VariableUses.empty() &&
146                                      VariableDefs.empty()); }
147
148  Check::CheckType getCheckTy() const { return CheckTy; }
149
150private:
151  bool AddRegExToRegEx(StringRef RS, unsigned &CurParen, SourceMgr &SM);
152  void AddBackrefToRegEx(unsigned BackrefNum);
153
154  /// ComputeMatchDistance - Compute an arbitrary estimate for the quality of
155  /// matching this pattern at the start of \arg Buffer; a distance of zero
156  /// should correspond to a perfect match.
157  unsigned ComputeMatchDistance(StringRef Buffer,
158                               const StringMap<StringRef> &VariableTable) const;
159
160  /// \brief Evaluates expression and stores the result to \p Value.
161  /// \return true on success. false when the expression has invalid syntax.
162  bool EvaluateExpression(StringRef Expr, std::string &Value) const;
163
164  /// \brief Finds the closing sequence of a regex variable usage or
165  /// definition. Str has to point in the beginning of the definition
166  /// (right after the opening sequence).
167  /// \return offset of the closing sequence within Str, or npos if it was not
168  /// found.
169  size_t FindRegexVarEnd(StringRef Str, SourceMgr &SM);
170};
171
172
173bool Pattern::ParsePattern(StringRef PatternStr,
174                           StringRef Prefix,
175                           SourceMgr &SM,
176                           unsigned LineNumber) {
177  this->LineNumber = LineNumber;
178  PatternLoc = SMLoc::getFromPointer(PatternStr.data());
179
180  // Ignore trailing whitespace.
181  while (!PatternStr.empty() &&
182         (PatternStr.back() == ' ' || PatternStr.back() == '\t'))
183    PatternStr = PatternStr.substr(0, PatternStr.size()-1);
184
185  // Check that there is something on the line.
186  if (PatternStr.empty()) {
187    SM.PrintMessage(PatternLoc, SourceMgr::DK_Error,
188                    "found empty check string with prefix '" +
189                    Prefix + ":'");
190    return true;
191  }
192
193  // Check to see if this is a fixed string, or if it has regex pieces.
194  if (PatternStr.size() < 2 ||
195      (PatternStr.find("{{") == StringRef::npos &&
196       PatternStr.find("[[") == StringRef::npos)) {
197    FixedStr = PatternStr;
198    return false;
199  }
200
201  // Paren value #0 is for the fully matched string.  Any new parenthesized
202  // values add from there.
203  unsigned CurParen = 1;
204
205  // Otherwise, there is at least one regex piece.  Build up the regex pattern
206  // by escaping scary characters in fixed strings, building up one big regex.
207  while (!PatternStr.empty()) {
208    // RegEx matches.
209    if (PatternStr.startswith("{{")) {
210      // This is the start of a regex match.  Scan for the }}.
211      size_t End = PatternStr.find("}}");
212      if (End == StringRef::npos) {
213        SM.PrintMessage(SMLoc::getFromPointer(PatternStr.data()),
214                        SourceMgr::DK_Error,
215                        "found start of regex string with no end '}}'");
216        return true;
217      }
218
219      // Enclose {{}} patterns in parens just like [[]] even though we're not
220      // capturing the result for any purpose.  This is required in case the
221      // expression contains an alternation like: CHECK:  abc{{x|z}}def.  We
222      // want this to turn into: "abc(x|z)def" not "abcx|zdef".
223      RegExStr += '(';
224      ++CurParen;
225
226      if (AddRegExToRegEx(PatternStr.substr(2, End-2), CurParen, SM))
227        return true;
228      RegExStr += ')';
229
230      PatternStr = PatternStr.substr(End+2);
231      continue;
232    }
233
234    // Named RegEx matches.  These are of two forms: [[foo:.*]] which matches .*
235    // (or some other regex) and assigns it to the FileCheck variable 'foo'. The
236    // second form is [[foo]] which is a reference to foo.  The variable name
237    // itself must be of the form "[a-zA-Z_][0-9a-zA-Z_]*", otherwise we reject
238    // it.  This is to catch some common errors.
239    if (PatternStr.startswith("[[")) {
240      // Find the closing bracket pair ending the match.  End is going to be an
241      // offset relative to the beginning of the match string.
242      size_t End = FindRegexVarEnd(PatternStr.substr(2), SM);
243
244      if (End == StringRef::npos) {
245        SM.PrintMessage(SMLoc::getFromPointer(PatternStr.data()),
246                        SourceMgr::DK_Error,
247                        "invalid named regex reference, no ]] found");
248        return true;
249      }
250
251      StringRef MatchStr = PatternStr.substr(2, End);
252      PatternStr = PatternStr.substr(End+4);
253
254      // Get the regex name (e.g. "foo").
255      size_t NameEnd = MatchStr.find(':');
256      StringRef Name = MatchStr.substr(0, NameEnd);
257
258      if (Name.empty()) {
259        SM.PrintMessage(SMLoc::getFromPointer(Name.data()), SourceMgr::DK_Error,
260                        "invalid name in named regex: empty name");
261        return true;
262      }
263
264      // Verify that the name/expression is well formed. FileCheck currently
265      // supports @LINE, @LINE+number, @LINE-number expressions. The check here
266      // is relaxed, more strict check is performed in \c EvaluateExpression.
267      bool IsExpression = false;
268      for (unsigned i = 0, e = Name.size(); i != e; ++i) {
269        if (i == 0 && Name[i] == '@') {
270          if (NameEnd != StringRef::npos) {
271            SM.PrintMessage(SMLoc::getFromPointer(Name.data()),
272                            SourceMgr::DK_Error,
273                            "invalid name in named regex definition");
274            return true;
275          }
276          IsExpression = true;
277          continue;
278        }
279        if (Name[i] != '_' && !isalnum(Name[i]) &&
280            (!IsExpression || (Name[i] != '+' && Name[i] != '-'))) {
281          SM.PrintMessage(SMLoc::getFromPointer(Name.data()+i),
282                          SourceMgr::DK_Error, "invalid name in named regex");
283          return true;
284        }
285      }
286
287      // Name can't start with a digit.
288      if (isdigit(static_cast<unsigned char>(Name[0]))) {
289        SM.PrintMessage(SMLoc::getFromPointer(Name.data()), SourceMgr::DK_Error,
290                        "invalid name in named regex");
291        return true;
292      }
293
294      // Handle [[foo]].
295      if (NameEnd == StringRef::npos) {
296        // Handle variables that were defined earlier on the same line by
297        // emitting a backreference.
298        if (VariableDefs.find(Name) != VariableDefs.end()) {
299          unsigned VarParenNum = VariableDefs[Name];
300          if (VarParenNum < 1 || VarParenNum > 9) {
301            SM.PrintMessage(SMLoc::getFromPointer(Name.data()),
302                            SourceMgr::DK_Error,
303                            "Can't back-reference more than 9 variables");
304            return true;
305          }
306          AddBackrefToRegEx(VarParenNum);
307        } else {
308          VariableUses.push_back(std::make_pair(Name, RegExStr.size()));
309        }
310        continue;
311      }
312
313      // Handle [[foo:.*]].
314      VariableDefs[Name] = CurParen;
315      RegExStr += '(';
316      ++CurParen;
317
318      if (AddRegExToRegEx(MatchStr.substr(NameEnd+1), CurParen, SM))
319        return true;
320
321      RegExStr += ')';
322    }
323
324    // Handle fixed string matches.
325    // Find the end, which is the start of the next regex.
326    size_t FixedMatchEnd = PatternStr.find("{{");
327    FixedMatchEnd = std::min(FixedMatchEnd, PatternStr.find("[["));
328    RegExStr += Regex::escape(PatternStr.substr(0, FixedMatchEnd));
329    PatternStr = PatternStr.substr(FixedMatchEnd);
330  }
331
332  return false;
333}
334
335bool Pattern::AddRegExToRegEx(StringRef RS, unsigned &CurParen,
336                              SourceMgr &SM) {
337  Regex R(RS);
338  std::string Error;
339  if (!R.isValid(Error)) {
340    SM.PrintMessage(SMLoc::getFromPointer(RS.data()), SourceMgr::DK_Error,
341                    "invalid regex: " + Error);
342    return true;
343  }
344
345  RegExStr += RS.str();
346  CurParen += R.getNumMatches();
347  return false;
348}
349
350void Pattern::AddBackrefToRegEx(unsigned BackrefNum) {
351  assert(BackrefNum >= 1 && BackrefNum <= 9 && "Invalid backref number");
352  std::string Backref = std::string("\\") +
353                        std::string(1, '0' + BackrefNum);
354  RegExStr += Backref;
355}
356
357bool Pattern::EvaluateExpression(StringRef Expr, std::string &Value) const {
358  // The only supported expression is @LINE([\+-]\d+)?
359  if (!Expr.startswith("@LINE"))
360    return false;
361  Expr = Expr.substr(StringRef("@LINE").size());
362  int Offset = 0;
363  if (!Expr.empty()) {
364    if (Expr[0] == '+')
365      Expr = Expr.substr(1);
366    else if (Expr[0] != '-')
367      return false;
368    if (Expr.getAsInteger(10, Offset))
369      return false;
370  }
371  Value = llvm::itostr(LineNumber + Offset);
372  return true;
373}
374
375/// Match - Match the pattern string against the input buffer Buffer.  This
376/// returns the position that is matched or npos if there is no match.  If
377/// there is a match, the size of the matched string is returned in MatchLen.
378size_t Pattern::Match(StringRef Buffer, size_t &MatchLen,
379                      StringMap<StringRef> &VariableTable) const {
380  // If this is the EOF pattern, match it immediately.
381  if (CheckTy == Check::CheckEOF) {
382    MatchLen = 0;
383    return Buffer.size();
384  }
385
386  // If this is a fixed string pattern, just match it now.
387  if (!FixedStr.empty()) {
388    MatchLen = FixedStr.size();
389    return Buffer.find(FixedStr);
390  }
391
392  // Regex match.
393
394  // If there are variable uses, we need to create a temporary string with the
395  // actual value.
396  StringRef RegExToMatch = RegExStr;
397  std::string TmpStr;
398  if (!VariableUses.empty()) {
399    TmpStr = RegExStr;
400
401    unsigned InsertOffset = 0;
402    for (unsigned i = 0, e = VariableUses.size(); i != e; ++i) {
403      std::string Value;
404
405      if (VariableUses[i].first[0] == '@') {
406        if (!EvaluateExpression(VariableUses[i].first, Value))
407          return StringRef::npos;
408      } else {
409        StringMap<StringRef>::iterator it =
410          VariableTable.find(VariableUses[i].first);
411        // If the variable is undefined, return an error.
412        if (it == VariableTable.end())
413          return StringRef::npos;
414
415        // Look up the value and escape it so that we can put it into the regex.
416        Value += Regex::escape(it->second);
417      }
418
419      // Plop it into the regex at the adjusted offset.
420      TmpStr.insert(TmpStr.begin()+VariableUses[i].second+InsertOffset,
421                    Value.begin(), Value.end());
422      InsertOffset += Value.size();
423    }
424
425    // Match the newly constructed regex.
426    RegExToMatch = TmpStr;
427  }
428
429
430  SmallVector<StringRef, 4> MatchInfo;
431  if (!Regex(RegExToMatch, Regex::Newline).match(Buffer, &MatchInfo))
432    return StringRef::npos;
433
434  // Successful regex match.
435  assert(!MatchInfo.empty() && "Didn't get any match");
436  StringRef FullMatch = MatchInfo[0];
437
438  // If this defines any variables, remember their values.
439  for (std::map<StringRef, unsigned>::const_iterator I = VariableDefs.begin(),
440                                                     E = VariableDefs.end();
441       I != E; ++I) {
442    assert(I->second < MatchInfo.size() && "Internal paren error");
443    VariableTable[I->first] = MatchInfo[I->second];
444  }
445
446  MatchLen = FullMatch.size();
447  return FullMatch.data()-Buffer.data();
448}
449
450unsigned Pattern::ComputeMatchDistance(StringRef Buffer,
451                              const StringMap<StringRef> &VariableTable) const {
452  // Just compute the number of matching characters. For regular expressions, we
453  // just compare against the regex itself and hope for the best.
454  //
455  // FIXME: One easy improvement here is have the regex lib generate a single
456  // example regular expression which matches, and use that as the example
457  // string.
458  StringRef ExampleString(FixedStr);
459  if (ExampleString.empty())
460    ExampleString = RegExStr;
461
462  // Only compare up to the first line in the buffer, or the string size.
463  StringRef BufferPrefix = Buffer.substr(0, ExampleString.size());
464  BufferPrefix = BufferPrefix.split('\n').first;
465  return BufferPrefix.edit_distance(ExampleString);
466}
467
468void Pattern::PrintFailureInfo(const SourceMgr &SM, StringRef Buffer,
469                               const StringMap<StringRef> &VariableTable) const{
470  // If this was a regular expression using variables, print the current
471  // variable values.
472  if (!VariableUses.empty()) {
473    for (unsigned i = 0, e = VariableUses.size(); i != e; ++i) {
474      SmallString<256> Msg;
475      raw_svector_ostream OS(Msg);
476      StringRef Var = VariableUses[i].first;
477      if (Var[0] == '@') {
478        std::string Value;
479        if (EvaluateExpression(Var, Value)) {
480          OS << "with expression \"";
481          OS.write_escaped(Var) << "\" equal to \"";
482          OS.write_escaped(Value) << "\"";
483        } else {
484          OS << "uses incorrect expression \"";
485          OS.write_escaped(Var) << "\"";
486        }
487      } else {
488        StringMap<StringRef>::const_iterator it = VariableTable.find(Var);
489
490        // Check for undefined variable references.
491        if (it == VariableTable.end()) {
492          OS << "uses undefined variable \"";
493          OS.write_escaped(Var) << "\"";
494        } else {
495          OS << "with variable \"";
496          OS.write_escaped(Var) << "\" equal to \"";
497          OS.write_escaped(it->second) << "\"";
498        }
499      }
500
501      SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note,
502                      OS.str());
503    }
504  }
505
506  // Attempt to find the closest/best fuzzy match.  Usually an error happens
507  // because some string in the output didn't exactly match. In these cases, we
508  // would like to show the user a best guess at what "should have" matched, to
509  // save them having to actually check the input manually.
510  size_t NumLinesForward = 0;
511  size_t Best = StringRef::npos;
512  double BestQuality = 0;
513
514  // Use an arbitrary 4k limit on how far we will search.
515  for (size_t i = 0, e = std::min(size_t(4096), Buffer.size()); i != e; ++i) {
516    if (Buffer[i] == '\n')
517      ++NumLinesForward;
518
519    // Patterns have leading whitespace stripped, so skip whitespace when
520    // looking for something which looks like a pattern.
521    if (Buffer[i] == ' ' || Buffer[i] == '\t')
522      continue;
523
524    // Compute the "quality" of this match as an arbitrary combination of the
525    // match distance and the number of lines skipped to get to this match.
526    unsigned Distance = ComputeMatchDistance(Buffer.substr(i), VariableTable);
527    double Quality = Distance + (NumLinesForward / 100.);
528
529    if (Quality < BestQuality || Best == StringRef::npos) {
530      Best = i;
531      BestQuality = Quality;
532    }
533  }
534
535  // Print the "possible intended match here" line if we found something
536  // reasonable and not equal to what we showed in the "scanning from here"
537  // line.
538  if (Best && Best != StringRef::npos && BestQuality < 50) {
539      SM.PrintMessage(SMLoc::getFromPointer(Buffer.data() + Best),
540                      SourceMgr::DK_Note, "possible intended match here");
541
542    // FIXME: If we wanted to be really friendly we would show why the match
543    // failed, as it can be hard to spot simple one character differences.
544  }
545}
546
547size_t Pattern::FindRegexVarEnd(StringRef Str, SourceMgr &SM) {
548  // Offset keeps track of the current offset within the input Str
549  size_t Offset = 0;
550  // [...] Nesting depth
551  size_t BracketDepth = 0;
552
553  while (!Str.empty()) {
554    if (Str.startswith("]]") && BracketDepth == 0)
555      return Offset;
556    if (Str[0] == '\\') {
557      // Backslash escapes the next char within regexes, so skip them both.
558      Str = Str.substr(2);
559      Offset += 2;
560    } else {
561      switch (Str[0]) {
562        default:
563          break;
564        case '[':
565          BracketDepth++;
566          break;
567        case ']':
568          if (BracketDepth == 0) {
569            SM.PrintMessage(SMLoc::getFromPointer(Str.data()),
570                            SourceMgr::DK_Error,
571                            "missing closing \"]\" for regex variable");
572            exit(1);
573          }
574          BracketDepth--;
575          break;
576      }
577      Str = Str.substr(1);
578      Offset++;
579    }
580  }
581
582  return StringRef::npos;
583}
584
585
586//===----------------------------------------------------------------------===//
587// Check Strings.
588//===----------------------------------------------------------------------===//
589
590/// CheckString - This is a check that we found in the input file.
591struct CheckString {
592  /// Pat - The pattern to match.
593  Pattern Pat;
594
595  /// Prefix - Which prefix name this check matched.
596  StringRef Prefix;
597
598  /// Loc - The location in the match file that the check string was specified.
599  SMLoc Loc;
600
601  /// CheckTy - Specify what kind of check this is. e.g. CHECK-NEXT: directive,
602  /// as opposed to a CHECK: directive.
603  Check::CheckType CheckTy;
604
605  /// DagNotStrings - These are all of the strings that are disallowed from
606  /// occurring between this match string and the previous one (or start of
607  /// file).
608  std::vector<Pattern> DagNotStrings;
609
610
611  CheckString(const Pattern &P,
612              StringRef S,
613              SMLoc L,
614              Check::CheckType Ty)
615    : Pat(P), Prefix(S), Loc(L), CheckTy(Ty) {}
616
617  /// Check - Match check string and its "not strings" and/or "dag strings".
618  size_t Check(const SourceMgr &SM, StringRef Buffer, bool IsLabelScanMode,
619               size_t &MatchLen, StringMap<StringRef> &VariableTable) const;
620
621  /// CheckNext - Verify there is a single line in the given buffer.
622  bool CheckNext(const SourceMgr &SM, StringRef Buffer) const;
623
624  /// CheckSame - Verify there is no newline in the given buffer.
625  bool CheckSame(const SourceMgr &SM, StringRef Buffer) const;
626
627  /// CheckNot - Verify there's no "not strings" in the given buffer.
628  bool CheckNot(const SourceMgr &SM, StringRef Buffer,
629                const std::vector<const Pattern *> &NotStrings,
630                StringMap<StringRef> &VariableTable) const;
631
632  /// CheckDag - Match "dag strings" and their mixed "not strings".
633  size_t CheckDag(const SourceMgr &SM, StringRef Buffer,
634                  std::vector<const Pattern *> &NotStrings,
635                  StringMap<StringRef> &VariableTable) const;
636};
637
638/// Canonicalize whitespaces in the input file. Line endings are replaced
639/// with UNIX-style '\n'.
640///
641/// \param PreserveHorizontal Don't squash consecutive horizontal whitespace
642/// characters to a single space.
643static std::unique_ptr<MemoryBuffer>
644CanonicalizeInputFile(std::unique_ptr<MemoryBuffer> MB,
645                      bool PreserveHorizontal) {
646  SmallString<128> NewFile;
647  NewFile.reserve(MB->getBufferSize());
648
649  for (const char *Ptr = MB->getBufferStart(), *End = MB->getBufferEnd();
650       Ptr != End; ++Ptr) {
651    // Eliminate trailing dosish \r.
652    if (Ptr <= End - 2 && Ptr[0] == '\r' && Ptr[1] == '\n') {
653      continue;
654    }
655
656    // If current char is not a horizontal whitespace or if horizontal
657    // whitespace canonicalization is disabled, dump it to output as is.
658    if (PreserveHorizontal || (*Ptr != ' ' && *Ptr != '\t')) {
659      NewFile.push_back(*Ptr);
660      continue;
661    }
662
663    // Otherwise, add one space and advance over neighboring space.
664    NewFile.push_back(' ');
665    while (Ptr+1 != End &&
666           (Ptr[1] == ' ' || Ptr[1] == '\t'))
667      ++Ptr;
668  }
669
670  return std::unique_ptr<MemoryBuffer>(
671      MemoryBuffer::getMemBufferCopy(NewFile.str(), MB->getBufferIdentifier()));
672}
673
674static bool IsPartOfWord(char c) {
675  return (isalnum(c) || c == '-' || c == '_');
676}
677
678// Get the size of the prefix extension.
679static size_t CheckTypeSize(Check::CheckType Ty) {
680  switch (Ty) {
681  case Check::CheckNone:
682    return 0;
683
684  case Check::CheckPlain:
685    return sizeof(":") - 1;
686
687  case Check::CheckNext:
688    return sizeof("-NEXT:") - 1;
689
690  case Check::CheckSame:
691    return sizeof("-SAME:") - 1;
692
693  case Check::CheckNot:
694    return sizeof("-NOT:") - 1;
695
696  case Check::CheckDAG:
697    return sizeof("-DAG:") - 1;
698
699  case Check::CheckLabel:
700    return sizeof("-LABEL:") - 1;
701
702  case Check::CheckEOF:
703    llvm_unreachable("Should not be using EOF size");
704  }
705
706  llvm_unreachable("Bad check type");
707}
708
709static Check::CheckType FindCheckType(StringRef Buffer, StringRef Prefix) {
710  char NextChar = Buffer[Prefix.size()];
711
712  // Verify that the : is present after the prefix.
713  if (NextChar == ':')
714    return Check::CheckPlain;
715
716  if (NextChar != '-')
717    return Check::CheckNone;
718
719  StringRef Rest = Buffer.drop_front(Prefix.size() + 1);
720  if (Rest.startswith("NEXT:"))
721    return Check::CheckNext;
722
723  if (Rest.startswith("SAME:"))
724    return Check::CheckSame;
725
726  if (Rest.startswith("NOT:"))
727    return Check::CheckNot;
728
729  if (Rest.startswith("DAG:"))
730    return Check::CheckDAG;
731
732  if (Rest.startswith("LABEL:"))
733    return Check::CheckLabel;
734
735  return Check::CheckNone;
736}
737
738// From the given position, find the next character after the word.
739static size_t SkipWord(StringRef Str, size_t Loc) {
740  while (Loc < Str.size() && IsPartOfWord(Str[Loc]))
741    ++Loc;
742  return Loc;
743}
744
745// Try to find the first match in buffer for any prefix. If a valid match is
746// found, return that prefix and set its type and location.  If there are almost
747// matches (e.g. the actual prefix string is found, but is not an actual check
748// string), but no valid match, return an empty string and set the position to
749// resume searching from. If no partial matches are found, return an empty
750// string and the location will be StringRef::npos. If one prefix is a substring
751// of another, the maximal match should be found. e.g. if "A" and "AA" are
752// prefixes then AA-CHECK: should match the second one.
753static StringRef FindFirstCandidateMatch(StringRef &Buffer,
754                                         Check::CheckType &CheckTy,
755                                         size_t &CheckLoc) {
756  StringRef FirstPrefix;
757  size_t FirstLoc = StringRef::npos;
758  size_t SearchLoc = StringRef::npos;
759  Check::CheckType FirstTy = Check::CheckNone;
760
761  CheckTy = Check::CheckNone;
762  CheckLoc = StringRef::npos;
763
764  for (prefix_iterator I = CheckPrefixes.begin(), E = CheckPrefixes.end();
765       I != E; ++I) {
766    StringRef Prefix(*I);
767    size_t PrefixLoc = Buffer.find(Prefix);
768
769    if (PrefixLoc == StringRef::npos)
770      continue;
771
772    // Track where we are searching for invalid prefixes that look almost right.
773    // We need to only advance to the first partial match on the next attempt
774    // since a partial match could be a substring of a later, valid prefix.
775    // Need to skip to the end of the word, otherwise we could end up
776    // matching a prefix in a substring later.
777    if (PrefixLoc < SearchLoc)
778      SearchLoc = SkipWord(Buffer, PrefixLoc);
779
780    // We only want to find the first match to avoid skipping some.
781    if (PrefixLoc > FirstLoc)
782      continue;
783    // If one matching check-prefix is a prefix of another, choose the
784    // longer one.
785    if (PrefixLoc == FirstLoc && Prefix.size() < FirstPrefix.size())
786      continue;
787
788    StringRef Rest = Buffer.drop_front(PrefixLoc);
789    // Make sure we have actually found the prefix, and not a word containing
790    // it. This should also prevent matching the wrong prefix when one is a
791    // substring of another.
792    if (PrefixLoc != 0 && IsPartOfWord(Buffer[PrefixLoc - 1]))
793      FirstTy = Check::CheckNone;
794    else
795      FirstTy = FindCheckType(Rest, Prefix);
796
797    FirstLoc = PrefixLoc;
798    FirstPrefix = Prefix;
799  }
800
801  // If the first prefix is invalid, we should continue the search after it.
802  if (FirstTy == Check::CheckNone) {
803    CheckLoc = SearchLoc;
804    return "";
805  }
806
807  CheckTy = FirstTy;
808  CheckLoc = FirstLoc;
809  return FirstPrefix;
810}
811
812static StringRef FindFirstMatchingPrefix(StringRef &Buffer,
813                                         unsigned &LineNumber,
814                                         Check::CheckType &CheckTy,
815                                         size_t &CheckLoc) {
816  while (!Buffer.empty()) {
817    StringRef Prefix = FindFirstCandidateMatch(Buffer, CheckTy, CheckLoc);
818    // If we found a real match, we are done.
819    if (!Prefix.empty()) {
820      LineNumber += Buffer.substr(0, CheckLoc).count('\n');
821      return Prefix;
822    }
823
824    // We didn't find any almost matches either, we are also done.
825    if (CheckLoc == StringRef::npos)
826      return StringRef();
827
828    LineNumber += Buffer.substr(0, CheckLoc + 1).count('\n');
829
830    // Advance to the last possible match we found and try again.
831    Buffer = Buffer.drop_front(CheckLoc + 1);
832  }
833
834  return StringRef();
835}
836
837/// ReadCheckFile - Read the check file, which specifies the sequence of
838/// expected strings.  The strings are added to the CheckStrings vector.
839/// Returns true in case of an error, false otherwise.
840static bool ReadCheckFile(SourceMgr &SM,
841                          std::vector<CheckString> &CheckStrings) {
842  ErrorOr<std::unique_ptr<MemoryBuffer>> FileOrErr =
843      MemoryBuffer::getFileOrSTDIN(CheckFilename);
844  if (std::error_code EC = FileOrErr.getError()) {
845    errs() << "Could not open check file '" << CheckFilename
846           << "': " << EC.message() << '\n';
847    return true;
848  }
849
850  // If we want to canonicalize whitespace, strip excess whitespace from the
851  // buffer containing the CHECK lines. Remove DOS style line endings.
852  std::unique_ptr<MemoryBuffer> F = CanonicalizeInputFile(
853      std::move(FileOrErr.get()), NoCanonicalizeWhiteSpace);
854
855  // Find all instances of CheckPrefix followed by : in the file.
856  StringRef Buffer = F->getBuffer();
857
858  SM.AddNewSourceBuffer(std::move(F), SMLoc());
859
860  std::vector<Pattern> ImplicitNegativeChecks;
861  for (const auto &PatternString : ImplicitCheckNot) {
862    // Create a buffer with fake command line content in order to display the
863    // command line option responsible for the specific implicit CHECK-NOT.
864    std::string Prefix = std::string("-") + ImplicitCheckNot.ArgStr + "='";
865    std::string Suffix = "'";
866    std::unique_ptr<MemoryBuffer> CmdLine = MemoryBuffer::getMemBufferCopy(
867        Prefix + PatternString + Suffix, "command line");
868
869    StringRef PatternInBuffer =
870        CmdLine->getBuffer().substr(Prefix.size(), PatternString.size());
871    SM.AddNewSourceBuffer(std::move(CmdLine), SMLoc());
872
873    ImplicitNegativeChecks.push_back(Pattern(Check::CheckNot));
874    ImplicitNegativeChecks.back().ParsePattern(PatternInBuffer,
875                                               "IMPLICIT-CHECK", SM, 0);
876  }
877
878
879  std::vector<Pattern> DagNotMatches = ImplicitNegativeChecks;
880
881  // LineNumber keeps track of the line on which CheckPrefix instances are
882  // found.
883  unsigned LineNumber = 1;
884
885  while (1) {
886    Check::CheckType CheckTy;
887    size_t PrefixLoc;
888
889    // See if a prefix occurs in the memory buffer.
890    StringRef UsedPrefix = FindFirstMatchingPrefix(Buffer,
891                                                   LineNumber,
892                                                   CheckTy,
893                                                   PrefixLoc);
894    if (UsedPrefix.empty())
895      break;
896
897    Buffer = Buffer.drop_front(PrefixLoc);
898
899    // Location to use for error messages.
900    const char *UsedPrefixStart = Buffer.data() + (PrefixLoc == 0 ? 0 : 1);
901
902    // PrefixLoc is to the start of the prefix. Skip to the end.
903    Buffer = Buffer.drop_front(UsedPrefix.size() + CheckTypeSize(CheckTy));
904
905    // Okay, we found the prefix, yay. Remember the rest of the line, but ignore
906    // leading and trailing whitespace.
907    Buffer = Buffer.substr(Buffer.find_first_not_of(" \t"));
908
909    // Scan ahead to the end of line.
910    size_t EOL = Buffer.find_first_of("\n\r");
911
912    // Remember the location of the start of the pattern, for diagnostics.
913    SMLoc PatternLoc = SMLoc::getFromPointer(Buffer.data());
914
915    // Parse the pattern.
916    Pattern P(CheckTy);
917    if (P.ParsePattern(Buffer.substr(0, EOL), UsedPrefix, SM, LineNumber))
918      return true;
919
920    // Verify that CHECK-LABEL lines do not define or use variables
921    if ((CheckTy == Check::CheckLabel) && P.hasVariable()) {
922      SM.PrintMessage(SMLoc::getFromPointer(UsedPrefixStart),
923                      SourceMgr::DK_Error,
924                      "found '" + UsedPrefix + "-LABEL:'"
925                      " with variable definition or use");
926      return true;
927    }
928
929    Buffer = Buffer.substr(EOL);
930
931    // Verify that CHECK-NEXT lines have at least one CHECK line before them.
932    if ((CheckTy == Check::CheckNext || CheckTy == Check::CheckSame) &&
933        CheckStrings.empty()) {
934      StringRef Type = CheckTy == Check::CheckNext ? "NEXT" : "SAME";
935      SM.PrintMessage(SMLoc::getFromPointer(UsedPrefixStart),
936                      SourceMgr::DK_Error,
937                      "found '" + UsedPrefix + "-" + Type + "' without previous '"
938                      + UsedPrefix + ": line");
939      return true;
940    }
941
942    // Handle CHECK-DAG/-NOT.
943    if (CheckTy == Check::CheckDAG || CheckTy == Check::CheckNot) {
944      DagNotMatches.push_back(P);
945      continue;
946    }
947
948    // Okay, add the string we captured to the output vector and move on.
949    CheckStrings.push_back(CheckString(P,
950                                       UsedPrefix,
951                                       PatternLoc,
952                                       CheckTy));
953    std::swap(DagNotMatches, CheckStrings.back().DagNotStrings);
954    DagNotMatches = ImplicitNegativeChecks;
955  }
956
957  // Add an EOF pattern for any trailing CHECK-DAG/-NOTs, and use the first
958  // prefix as a filler for the error message.
959  if (!DagNotMatches.empty()) {
960    CheckStrings.push_back(CheckString(Pattern(Check::CheckEOF),
961                                       CheckPrefixes[0],
962                                       SMLoc::getFromPointer(Buffer.data()),
963                                       Check::CheckEOF));
964    std::swap(DagNotMatches, CheckStrings.back().DagNotStrings);
965  }
966
967  if (CheckStrings.empty()) {
968    errs() << "error: no check strings found with prefix"
969           << (CheckPrefixes.size() > 1 ? "es " : " ");
970    for (size_t I = 0, N = CheckPrefixes.size(); I != N; ++I) {
971      StringRef Prefix(CheckPrefixes[I]);
972      errs() << '\'' << Prefix << ":'";
973      if (I != N - 1)
974        errs() << ", ";
975    }
976
977    errs() << '\n';
978    return true;
979  }
980
981  return false;
982}
983
984static void PrintCheckFailed(const SourceMgr &SM, const SMLoc &Loc,
985                             const Pattern &Pat, StringRef Buffer,
986                             StringMap<StringRef> &VariableTable) {
987  // Otherwise, we have an error, emit an error message.
988  SM.PrintMessage(Loc, SourceMgr::DK_Error,
989                  "expected string not found in input");
990
991  // Print the "scanning from here" line.  If the current position is at the
992  // end of a line, advance to the start of the next line.
993  Buffer = Buffer.substr(Buffer.find_first_not_of(" \t\n\r"));
994
995  SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note,
996                  "scanning from here");
997
998  // Allow the pattern to print additional information if desired.
999  Pat.PrintFailureInfo(SM, Buffer, VariableTable);
1000}
1001
1002static void PrintCheckFailed(const SourceMgr &SM, const CheckString &CheckStr,
1003                             StringRef Buffer,
1004                             StringMap<StringRef> &VariableTable) {
1005  PrintCheckFailed(SM, CheckStr.Loc, CheckStr.Pat, Buffer, VariableTable);
1006}
1007
1008/// CountNumNewlinesBetween - Count the number of newlines in the specified
1009/// range.
1010static unsigned CountNumNewlinesBetween(StringRef Range,
1011                                        const char *&FirstNewLine) {
1012  unsigned NumNewLines = 0;
1013  while (1) {
1014    // Scan for newline.
1015    Range = Range.substr(Range.find_first_of("\n\r"));
1016    if (Range.empty()) return NumNewLines;
1017
1018    ++NumNewLines;
1019
1020    // Handle \n\r and \r\n as a single newline.
1021    if (Range.size() > 1 &&
1022        (Range[1] == '\n' || Range[1] == '\r') &&
1023        (Range[0] != Range[1]))
1024      Range = Range.substr(1);
1025    Range = Range.substr(1);
1026
1027    if (NumNewLines == 1)
1028      FirstNewLine = Range.begin();
1029  }
1030}
1031
1032size_t CheckString::Check(const SourceMgr &SM, StringRef Buffer,
1033                          bool IsLabelScanMode, size_t &MatchLen,
1034                          StringMap<StringRef> &VariableTable) const {
1035  size_t LastPos = 0;
1036  std::vector<const Pattern *> NotStrings;
1037
1038  // IsLabelScanMode is true when we are scanning forward to find CHECK-LABEL
1039  // bounds; we have not processed variable definitions within the bounded block
1040  // yet so cannot handle any final CHECK-DAG yet; this is handled when going
1041  // over the block again (including the last CHECK-LABEL) in normal mode.
1042  if (!IsLabelScanMode) {
1043    // Match "dag strings" (with mixed "not strings" if any).
1044    LastPos = CheckDag(SM, Buffer, NotStrings, VariableTable);
1045    if (LastPos == StringRef::npos)
1046      return StringRef::npos;
1047  }
1048
1049  // Match itself from the last position after matching CHECK-DAG.
1050  StringRef MatchBuffer = Buffer.substr(LastPos);
1051  size_t MatchPos = Pat.Match(MatchBuffer, MatchLen, VariableTable);
1052  if (MatchPos == StringRef::npos) {
1053    PrintCheckFailed(SM, *this, MatchBuffer, VariableTable);
1054    return StringRef::npos;
1055  }
1056
1057  // Similar to the above, in "label-scan mode" we can't yet handle CHECK-NEXT
1058  // or CHECK-NOT
1059  if (!IsLabelScanMode) {
1060    StringRef SkippedRegion = Buffer.substr(LastPos, MatchPos);
1061
1062    // If this check is a "CHECK-NEXT", verify that the previous match was on
1063    // the previous line (i.e. that there is one newline between them).
1064    if (CheckNext(SM, SkippedRegion))
1065      return StringRef::npos;
1066
1067    // If this check is a "CHECK-SAME", verify that the previous match was on
1068    // the same line (i.e. that there is no newline between them).
1069    if (CheckSame(SM, SkippedRegion))
1070      return StringRef::npos;
1071
1072    // If this match had "not strings", verify that they don't exist in the
1073    // skipped region.
1074    if (CheckNot(SM, SkippedRegion, NotStrings, VariableTable))
1075      return StringRef::npos;
1076  }
1077
1078  return LastPos + MatchPos;
1079}
1080
1081bool CheckString::CheckNext(const SourceMgr &SM, StringRef Buffer) const {
1082  if (CheckTy != Check::CheckNext)
1083    return false;
1084
1085  // Count the number of newlines between the previous match and this one.
1086  assert(Buffer.data() !=
1087         SM.getMemoryBuffer(
1088           SM.FindBufferContainingLoc(
1089             SMLoc::getFromPointer(Buffer.data())))->getBufferStart() &&
1090         "CHECK-NEXT can't be the first check in a file");
1091
1092  const char *FirstNewLine = nullptr;
1093  unsigned NumNewLines = CountNumNewlinesBetween(Buffer, FirstNewLine);
1094
1095  if (NumNewLines == 0) {
1096    SM.PrintMessage(Loc, SourceMgr::DK_Error, Prefix +
1097                    "-NEXT: is on the same line as previous match");
1098    SM.PrintMessage(SMLoc::getFromPointer(Buffer.end()),
1099                    SourceMgr::DK_Note, "'next' match was here");
1100    SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note,
1101                    "previous match ended here");
1102    return true;
1103  }
1104
1105  if (NumNewLines != 1) {
1106    SM.PrintMessage(Loc, SourceMgr::DK_Error, Prefix +
1107                    "-NEXT: is not on the line after the previous match");
1108    SM.PrintMessage(SMLoc::getFromPointer(Buffer.end()),
1109                    SourceMgr::DK_Note, "'next' match was here");
1110    SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note,
1111                    "previous match ended here");
1112    SM.PrintMessage(SMLoc::getFromPointer(FirstNewLine), SourceMgr::DK_Note,
1113                    "non-matching line after previous match is here");
1114    return true;
1115  }
1116
1117  return false;
1118}
1119
1120bool CheckString::CheckSame(const SourceMgr &SM, StringRef Buffer) const {
1121  if (CheckTy != Check::CheckSame)
1122    return false;
1123
1124  // Count the number of newlines between the previous match and this one.
1125  assert(Buffer.data() !=
1126             SM.getMemoryBuffer(SM.FindBufferContainingLoc(
1127                                    SMLoc::getFromPointer(Buffer.data())))
1128                 ->getBufferStart() &&
1129         "CHECK-SAME can't be the first check in a file");
1130
1131  const char *FirstNewLine = nullptr;
1132  unsigned NumNewLines = CountNumNewlinesBetween(Buffer, FirstNewLine);
1133
1134  if (NumNewLines != 0) {
1135    SM.PrintMessage(Loc, SourceMgr::DK_Error,
1136                    Prefix +
1137                        "-SAME: is not on the same line as the previous match");
1138    SM.PrintMessage(SMLoc::getFromPointer(Buffer.end()), SourceMgr::DK_Note,
1139                    "'next' match was here");
1140    SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note,
1141                    "previous match ended here");
1142    return true;
1143  }
1144
1145  return false;
1146}
1147
1148bool CheckString::CheckNot(const SourceMgr &SM, StringRef Buffer,
1149                           const std::vector<const Pattern *> &NotStrings,
1150                           StringMap<StringRef> &VariableTable) const {
1151  for (unsigned ChunkNo = 0, e = NotStrings.size();
1152       ChunkNo != e; ++ChunkNo) {
1153    const Pattern *Pat = NotStrings[ChunkNo];
1154    assert((Pat->getCheckTy() == Check::CheckNot) && "Expect CHECK-NOT!");
1155
1156    size_t MatchLen = 0;
1157    size_t Pos = Pat->Match(Buffer, MatchLen, VariableTable);
1158
1159    if (Pos == StringRef::npos) continue;
1160
1161    SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()+Pos),
1162                    SourceMgr::DK_Error,
1163                    Prefix + "-NOT: string occurred!");
1164    SM.PrintMessage(Pat->getLoc(), SourceMgr::DK_Note,
1165                    Prefix + "-NOT: pattern specified here");
1166    return true;
1167  }
1168
1169  return false;
1170}
1171
1172size_t CheckString::CheckDag(const SourceMgr &SM, StringRef Buffer,
1173                             std::vector<const Pattern *> &NotStrings,
1174                             StringMap<StringRef> &VariableTable) const {
1175  if (DagNotStrings.empty())
1176    return 0;
1177
1178  size_t LastPos = 0;
1179  size_t StartPos = LastPos;
1180
1181  for (unsigned ChunkNo = 0, e = DagNotStrings.size();
1182       ChunkNo != e; ++ChunkNo) {
1183    const Pattern &Pat = DagNotStrings[ChunkNo];
1184
1185    assert((Pat.getCheckTy() == Check::CheckDAG ||
1186            Pat.getCheckTy() == Check::CheckNot) &&
1187           "Invalid CHECK-DAG or CHECK-NOT!");
1188
1189    if (Pat.getCheckTy() == Check::CheckNot) {
1190      NotStrings.push_back(&Pat);
1191      continue;
1192    }
1193
1194    assert((Pat.getCheckTy() == Check::CheckDAG) && "Expect CHECK-DAG!");
1195
1196    size_t MatchLen = 0, MatchPos;
1197
1198    // CHECK-DAG always matches from the start.
1199    StringRef MatchBuffer = Buffer.substr(StartPos);
1200    MatchPos = Pat.Match(MatchBuffer, MatchLen, VariableTable);
1201    // With a group of CHECK-DAGs, a single mismatching means the match on
1202    // that group of CHECK-DAGs fails immediately.
1203    if (MatchPos == StringRef::npos) {
1204      PrintCheckFailed(SM, Pat.getLoc(), Pat, MatchBuffer, VariableTable);
1205      return StringRef::npos;
1206    }
1207    // Re-calc it as the offset relative to the start of the original string.
1208    MatchPos += StartPos;
1209
1210    if (!NotStrings.empty()) {
1211      if (MatchPos < LastPos) {
1212        // Reordered?
1213        SM.PrintMessage(SMLoc::getFromPointer(Buffer.data() + MatchPos),
1214                        SourceMgr::DK_Error,
1215                        Prefix + "-DAG: found a match of CHECK-DAG"
1216                        " reordering across a CHECK-NOT");
1217        SM.PrintMessage(SMLoc::getFromPointer(Buffer.data() + LastPos),
1218                        SourceMgr::DK_Note,
1219                        Prefix + "-DAG: the farthest match of CHECK-DAG"
1220                        " is found here");
1221        SM.PrintMessage(NotStrings[0]->getLoc(), SourceMgr::DK_Note,
1222                        Prefix + "-NOT: the crossed pattern specified"
1223                        " here");
1224        SM.PrintMessage(Pat.getLoc(), SourceMgr::DK_Note,
1225                        Prefix + "-DAG: the reordered pattern specified"
1226                        " here");
1227        return StringRef::npos;
1228      }
1229      // All subsequent CHECK-DAGs should be matched from the farthest
1230      // position of all precedent CHECK-DAGs (including this one.)
1231      StartPos = LastPos;
1232      // If there's CHECK-NOTs between two CHECK-DAGs or from CHECK to
1233      // CHECK-DAG, verify that there's no 'not' strings occurred in that
1234      // region.
1235      StringRef SkippedRegion = Buffer.substr(LastPos, MatchPos);
1236      if (CheckNot(SM, SkippedRegion, NotStrings, VariableTable))
1237        return StringRef::npos;
1238      // Clear "not strings".
1239      NotStrings.clear();
1240    }
1241
1242    // Update the last position with CHECK-DAG matches.
1243    LastPos = std::max(MatchPos + MatchLen, LastPos);
1244  }
1245
1246  return LastPos;
1247}
1248
1249// A check prefix must contain only alphanumeric, hyphens and underscores.
1250static bool ValidateCheckPrefix(StringRef CheckPrefix) {
1251  Regex Validator("^[a-zA-Z0-9_-]*$");
1252  return Validator.match(CheckPrefix);
1253}
1254
1255static bool ValidateCheckPrefixes() {
1256  StringSet<> PrefixSet;
1257
1258  for (prefix_iterator I = CheckPrefixes.begin(), E = CheckPrefixes.end();
1259       I != E; ++I) {
1260    StringRef Prefix(*I);
1261
1262    // Reject empty prefixes.
1263    if (Prefix == "")
1264      return false;
1265
1266    if (!PrefixSet.insert(Prefix).second)
1267      return false;
1268
1269    if (!ValidateCheckPrefix(Prefix))
1270      return false;
1271  }
1272
1273  return true;
1274}
1275
1276// I don't think there's a way to specify an initial value for cl::list,
1277// so if nothing was specified, add the default
1278static void AddCheckPrefixIfNeeded() {
1279  if (CheckPrefixes.empty())
1280    CheckPrefixes.push_back("CHECK");
1281}
1282
1283int main(int argc, char **argv) {
1284  sys::PrintStackTraceOnErrorSignal();
1285  PrettyStackTraceProgram X(argc, argv);
1286  cl::ParseCommandLineOptions(argc, argv);
1287
1288  if (!ValidateCheckPrefixes()) {
1289    errs() << "Supplied check-prefix is invalid! Prefixes must be unique and "
1290              "start with a letter and contain only alphanumeric characters, "
1291              "hyphens and underscores\n";
1292    return 2;
1293  }
1294
1295  AddCheckPrefixIfNeeded();
1296
1297  SourceMgr SM;
1298
1299  // Read the expected strings from the check file.
1300  std::vector<CheckString> CheckStrings;
1301  if (ReadCheckFile(SM, CheckStrings))
1302    return 2;
1303
1304  // Open the file to check and add it to SourceMgr.
1305  ErrorOr<std::unique_ptr<MemoryBuffer>> FileOrErr =
1306      MemoryBuffer::getFileOrSTDIN(InputFilename);
1307  if (std::error_code EC = FileOrErr.getError()) {
1308    errs() << "Could not open input file '" << InputFilename
1309           << "': " << EC.message() << '\n';
1310    return 2;
1311  }
1312  std::unique_ptr<MemoryBuffer> &File = FileOrErr.get();
1313
1314  if (File->getBufferSize() == 0 && !AllowEmptyInput) {
1315    errs() << "FileCheck error: '" << InputFilename << "' is empty.\n";
1316    return 2;
1317  }
1318
1319  // Remove duplicate spaces in the input file if requested.
1320  // Remove DOS style line endings.
1321  std::unique_ptr<MemoryBuffer> F =
1322      CanonicalizeInputFile(std::move(File), NoCanonicalizeWhiteSpace);
1323
1324  // Check that we have all of the expected strings, in order, in the input
1325  // file.
1326  StringRef Buffer = F->getBuffer();
1327
1328  SM.AddNewSourceBuffer(std::move(F), SMLoc());
1329
1330  /// VariableTable - This holds all the current filecheck variables.
1331  StringMap<StringRef> VariableTable;
1332
1333  bool hasError = false;
1334
1335  unsigned i = 0, j = 0, e = CheckStrings.size();
1336
1337  while (true) {
1338    StringRef CheckRegion;
1339    if (j == e) {
1340      CheckRegion = Buffer;
1341    } else {
1342      const CheckString &CheckLabelStr = CheckStrings[j];
1343      if (CheckLabelStr.CheckTy != Check::CheckLabel) {
1344        ++j;
1345        continue;
1346      }
1347
1348      // Scan to next CHECK-LABEL match, ignoring CHECK-NOT and CHECK-DAG
1349      size_t MatchLabelLen = 0;
1350      size_t MatchLabelPos = CheckLabelStr.Check(SM, Buffer, true,
1351                                                 MatchLabelLen, VariableTable);
1352      if (MatchLabelPos == StringRef::npos) {
1353        hasError = true;
1354        break;
1355      }
1356
1357      CheckRegion = Buffer.substr(0, MatchLabelPos + MatchLabelLen);
1358      Buffer = Buffer.substr(MatchLabelPos + MatchLabelLen);
1359      ++j;
1360    }
1361
1362    for ( ; i != j; ++i) {
1363      const CheckString &CheckStr = CheckStrings[i];
1364
1365      // Check each string within the scanned region, including a second check
1366      // of any final CHECK-LABEL (to verify CHECK-NOT and CHECK-DAG)
1367      size_t MatchLen = 0;
1368      size_t MatchPos = CheckStr.Check(SM, CheckRegion, false, MatchLen,
1369                                       VariableTable);
1370
1371      if (MatchPos == StringRef::npos) {
1372        hasError = true;
1373        i = j;
1374        break;
1375      }
1376
1377      CheckRegion = CheckRegion.substr(MatchPos + MatchLen);
1378    }
1379
1380    if (j == e)
1381      break;
1382  }
1383
1384  return hasError ? 1 : 0;
1385}
1386