FileCheck.cpp revision d9485ddfddc6c5342141703e39399850200bf746
1//===- FileCheck.cpp - Check that File's Contents match what is expected --===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// FileCheck does a line-by line check of a file that validates whether it
11// contains the expected content.  This is useful for regression tests etc.
12//
13// This program exits with an error status of 2 on error, exit status of 0 if
14// the file matched the expected contents, and exit status of 1 if it did not
15// contain the expected contents.
16//
17//===----------------------------------------------------------------------===//
18
19#include "llvm/Support/CommandLine.h"
20#include "llvm/Support/MemoryBuffer.h"
21#include "llvm/Support/PrettyStackTrace.h"
22#include "llvm/Support/Regex.h"
23#include "llvm/Support/SourceMgr.h"
24#include "llvm/Support/raw_ostream.h"
25#include "llvm/System/Signals.h"
26using namespace llvm;
27
28static cl::opt<std::string>
29CheckFilename(cl::Positional, cl::desc("<check-file>"), cl::Required);
30
31static cl::opt<std::string>
32InputFilename("input-file", cl::desc("File to check (defaults to stdin)"),
33              cl::init("-"), cl::value_desc("filename"));
34
35static cl::opt<std::string>
36CheckPrefix("check-prefix", cl::init("CHECK"),
37            cl::desc("Prefix to use from check file (defaults to 'CHECK')"));
38
39static cl::opt<bool>
40NoCanonicalizeWhiteSpace("strict-whitespace",
41              cl::desc("Do not treat all horizontal whitespace as equivalent"));
42
43//===----------------------------------------------------------------------===//
44// Pattern Handling Code.
45//===----------------------------------------------------------------------===//
46
47class PatternChunk {
48  StringRef Str;
49  bool isRegEx;
50public:
51  PatternChunk(StringRef S, bool isRE) : Str(S), isRegEx(isRE) {}
52
53  size_t Match(StringRef Buffer, size_t &MatchLen) const {
54    if (!isRegEx) {
55      // Fixed string match.
56      MatchLen = Str.size();
57      return Buffer.find(Str);
58    }
59
60    // Regex match.
61    SmallVector<StringRef, 4> MatchInfo;
62    if (!Regex(Str, Regex::Sub|Regex::Newline).match(Buffer, &MatchInfo))
63      return StringRef::npos;
64
65    // Successful regex match.
66    assert(!MatchInfo.empty() && "Didn't get any match");
67    StringRef FullMatch = MatchInfo[0];
68
69    MatchLen = FullMatch.size();
70    return FullMatch.data()-Buffer.data();
71  }
72};
73
74class Pattern {
75  /// Chunks - The pattern chunks to match.  If the bool is false, it is a fixed
76  /// string match, if it is true, it is a regex match.
77  SmallVector<PatternChunk, 4> Chunks;
78public:
79
80  Pattern() { }
81
82  bool ParsePattern(StringRef PatternStr, SourceMgr &SM);
83
84  /// Match - Match the pattern string against the input buffer Buffer.  This
85  /// returns the position that is matched or npos if there is no match.  If
86  /// there is a match, the size of the matched string is returned in MatchLen.
87  size_t Match(StringRef Buffer, size_t &MatchLen) const;
88};
89
90bool Pattern::ParsePattern(StringRef PatternStr, SourceMgr &SM) {
91  // Ignore trailing whitespace.
92  while (!PatternStr.empty() &&
93         (PatternStr.back() == ' ' || PatternStr.back() == '\t'))
94    PatternStr = PatternStr.substr(0, PatternStr.size()-1);
95
96  // Check that there is something on the line.
97  if (PatternStr.empty()) {
98    SM.PrintMessage(SMLoc::getFromPointer(PatternStr.data()),
99                    "found empty check string with prefix '"+CheckPrefix+":'",
100                    "error");
101    return true;
102  }
103
104  // Scan the pattern to break it into regex and non-regex pieces.
105  while (!PatternStr.empty()) {
106    // Handle fixed string matches.
107    if (PatternStr.size() < 2 ||
108        PatternStr[0] != '{' || PatternStr[1] != '{') {
109      // Find the end, which is the start of the next regex.
110      size_t FixedMatchEnd = PatternStr.find("{{");
111
112      Chunks.push_back(PatternChunk(PatternStr.substr(0, FixedMatchEnd),false));
113      PatternStr = PatternStr.substr(FixedMatchEnd);
114      continue;
115    }
116
117    // Otherwise, this is the start of a regex match.  Scan for the }}.
118    size_t End = PatternStr.find("}}");
119    if (End == StringRef::npos) {
120      SM.PrintMessage(SMLoc::getFromPointer(PatternStr.data()),
121                      "found start of regex string with no end '}}'", "error");
122      return true;
123    }
124
125    Regex R(PatternStr.substr(2, End-2));
126    std::string Error;
127    if (!R.isValid(Error)) {
128      SM.PrintMessage(SMLoc::getFromPointer(PatternStr.data()+2),
129                      "invalid regex: " + Error, "error");
130      return true;
131    }
132
133    Chunks.push_back(PatternChunk(PatternStr.substr(2, End-2), true));
134    PatternStr = PatternStr.substr(End+2);
135  }
136
137  return false;
138}
139
140/// Match - Match the pattern string against the input buffer Buffer.  This
141/// returns the position that is matched or npos if there is no match.  If
142/// there is a match, the size of the matched string is returned in MatchLen.
143size_t Pattern::Match(StringRef Buffer, size_t &MatchLen) const {
144  size_t FirstMatch = StringRef::npos;
145  MatchLen = 0;
146
147  while (!Buffer.empty()) {
148    StringRef MatchAttempt = Buffer;
149
150    unsigned ChunkNo = 0, e = Chunks.size();
151    for (; ChunkNo != e; ++ChunkNo) {
152      size_t ThisMatch, ThisLength = StringRef::npos;
153      ThisMatch = Chunks[ChunkNo].Match(MatchAttempt, ThisLength);
154
155      // Otherwise, what we do depends on if this is the first match or not.  If
156      // this is the first match, it doesn't match to match at the start of
157      // MatchAttempt.
158      if (ChunkNo == 0) {
159        // If the first match fails then this pattern will never match in
160        // Buffer.
161        if (ThisMatch == StringRef::npos)
162          return ThisMatch;
163
164        FirstMatch = ThisMatch;
165        MatchAttempt = MatchAttempt.substr(FirstMatch);
166        ThisMatch = 0;
167      }
168
169      // If this chunk didn't match, then the entire pattern didn't match from
170      // FirstMatch, try later in the buffer.
171      if (ThisMatch == StringRef::npos)
172        break;
173
174      // Ok, if the match didn't match at the beginning of MatchAttempt, then we
175      // have something like "ABC{{DEF}} and something was in-between.  Reject
176      // the match.
177      if (ThisMatch != 0)
178        break;
179
180      // Otherwise, match the string and move to the next chunk.
181      MatchLen += ThisLength;
182      MatchAttempt = MatchAttempt.substr(ThisLength);
183    }
184
185    // If the whole thing matched, we win.
186    if (ChunkNo == e)
187      return FirstMatch;
188
189    // Otherwise, try matching again after FirstMatch to see if this pattern
190    // matches later in the buffer.
191    Buffer = Buffer.substr(FirstMatch+1);
192  }
193
194  // If we ran out of stuff to scan, then we didn't match.
195  return StringRef::npos;
196}
197
198
199//===----------------------------------------------------------------------===//
200// Check Strings.
201//===----------------------------------------------------------------------===//
202
203/// CheckString - This is a check that we found in the input file.
204struct CheckString {
205  /// Pat - The pattern to match.
206  Pattern Pat;
207
208  /// Loc - The location in the match file that the check string was specified.
209  SMLoc Loc;
210
211  /// IsCheckNext - This is true if this is a CHECK-NEXT: directive (as opposed
212  /// to a CHECK: directive.
213  bool IsCheckNext;
214
215  /// NotStrings - These are all of the strings that are disallowed from
216  /// occurring between this match string and the previous one (or start of
217  /// file).
218  std::vector<std::pair<SMLoc, Pattern> > NotStrings;
219
220  CheckString(const Pattern &P, SMLoc L, bool isCheckNext)
221    : Pat(P), Loc(L), IsCheckNext(isCheckNext) {}
222};
223
224/// CanonicalizeInputFile - Remove duplicate horizontal space from the specified
225/// memory buffer, free it, and return a new one.
226static MemoryBuffer *CanonicalizeInputFile(MemoryBuffer *MB) {
227  SmallVector<char, 16> NewFile;
228  NewFile.reserve(MB->getBufferSize());
229
230  for (const char *Ptr = MB->getBufferStart(), *End = MB->getBufferEnd();
231       Ptr != End; ++Ptr) {
232    // If C is not a horizontal whitespace, skip it.
233    if (*Ptr != ' ' && *Ptr != '\t') {
234      NewFile.push_back(*Ptr);
235      continue;
236    }
237
238    // Otherwise, add one space and advance over neighboring space.
239    NewFile.push_back(' ');
240    while (Ptr+1 != End &&
241           (Ptr[1] == ' ' || Ptr[1] == '\t'))
242      ++Ptr;
243  }
244
245  // Free the old buffer and return a new one.
246  MemoryBuffer *MB2 =
247    MemoryBuffer::getMemBufferCopy(NewFile.data(),
248                                   NewFile.data() + NewFile.size(),
249                                   MB->getBufferIdentifier());
250
251  delete MB;
252  return MB2;
253}
254
255
256/// ReadCheckFile - Read the check file, which specifies the sequence of
257/// expected strings.  The strings are added to the CheckStrings vector.
258static bool ReadCheckFile(SourceMgr &SM,
259                          std::vector<CheckString> &CheckStrings) {
260  // Open the check file, and tell SourceMgr about it.
261  std::string ErrorStr;
262  MemoryBuffer *F =
263    MemoryBuffer::getFileOrSTDIN(CheckFilename.c_str(), &ErrorStr);
264  if (F == 0) {
265    errs() << "Could not open check file '" << CheckFilename << "': "
266           << ErrorStr << '\n';
267    return true;
268  }
269
270  // If we want to canonicalize whitespace, strip excess whitespace from the
271  // buffer containing the CHECK lines.
272  if (!NoCanonicalizeWhiteSpace)
273    F = CanonicalizeInputFile(F);
274
275  SM.AddNewSourceBuffer(F, SMLoc());
276
277  // Find all instances of CheckPrefix followed by : in the file.
278  StringRef Buffer = F->getBuffer();
279
280  std::vector<std::pair<SMLoc, Pattern> > NotMatches;
281
282  while (1) {
283    // See if Prefix occurs in the memory buffer.
284    Buffer = Buffer.substr(Buffer.find(CheckPrefix));
285
286    // If we didn't find a match, we're done.
287    if (Buffer.empty())
288      break;
289
290    const char *CheckPrefixStart = Buffer.data();
291
292    // When we find a check prefix, keep track of whether we find CHECK: or
293    // CHECK-NEXT:
294    bool IsCheckNext = false, IsCheckNot = false;
295
296    // Verify that the : is present after the prefix.
297    if (Buffer[CheckPrefix.size()] == ':') {
298      Buffer = Buffer.substr(CheckPrefix.size()+1);
299    } else if (Buffer.size() > CheckPrefix.size()+6 &&
300               memcmp(Buffer.data()+CheckPrefix.size(), "-NEXT:", 6) == 0) {
301      Buffer = Buffer.substr(CheckPrefix.size()+7);
302      IsCheckNext = true;
303    } else if (Buffer.size() > CheckPrefix.size()+5 &&
304               memcmp(Buffer.data()+CheckPrefix.size(), "-NOT:", 5) == 0) {
305      Buffer = Buffer.substr(CheckPrefix.size()+6);
306      IsCheckNot = true;
307    } else {
308      Buffer = Buffer.substr(1);
309      continue;
310    }
311
312    // Okay, we found the prefix, yay.  Remember the rest of the line, but
313    // ignore leading and trailing whitespace.
314    Buffer = Buffer.substr(Buffer.find_first_not_of(" \t"));
315
316    // Scan ahead to the end of line.
317    size_t EOL = Buffer.find_first_of("\n\r");
318
319    // Parse the pattern.
320    Pattern P;
321    if (P.ParsePattern(Buffer.substr(0, EOL), SM))
322      return true;
323
324    Buffer = Buffer.substr(EOL);
325
326
327    // Verify that CHECK-NEXT lines have at least one CHECK line before them.
328    if (IsCheckNext && CheckStrings.empty()) {
329      SM.PrintMessage(SMLoc::getFromPointer(CheckPrefixStart),
330                      "found '"+CheckPrefix+"-NEXT:' without previous '"+
331                      CheckPrefix+ ": line", "error");
332      return true;
333    }
334
335    // Handle CHECK-NOT.
336    if (IsCheckNot) {
337      NotMatches.push_back(std::make_pair(SMLoc::getFromPointer(Buffer.data()),
338                                          P));
339      continue;
340    }
341
342
343    // Okay, add the string we captured to the output vector and move on.
344    CheckStrings.push_back(CheckString(P,
345                                       SMLoc::getFromPointer(Buffer.data()),
346                                       IsCheckNext));
347    std::swap(NotMatches, CheckStrings.back().NotStrings);
348  }
349
350  if (CheckStrings.empty()) {
351    errs() << "error: no check strings found with prefix '" << CheckPrefix
352           << ":'\n";
353    return true;
354  }
355
356  if (!NotMatches.empty()) {
357    errs() << "error: '" << CheckPrefix
358           << "-NOT:' not supported after last check line.\n";
359    return true;
360  }
361
362  return false;
363}
364
365static void PrintCheckFailed(const SourceMgr &SM, const CheckString &CheckStr,
366                             StringRef Buffer) {
367  // Otherwise, we have an error, emit an error message.
368  SM.PrintMessage(CheckStr.Loc, "expected string not found in input",
369                  "error");
370
371  // Print the "scanning from here" line.  If the current position is at the
372  // end of a line, advance to the start of the next line.
373  Buffer = Buffer.substr(Buffer.find_first_not_of(" \t\n\r"));
374
375  SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), "scanning from here",
376                  "note");
377}
378
379/// CountNumNewlinesBetween - Count the number of newlines in the specified
380/// range.
381static unsigned CountNumNewlinesBetween(StringRef Range) {
382  unsigned NumNewLines = 0;
383  while (1) {
384    // Scan for newline.
385    Range = Range.substr(Range.find_first_of("\n\r"));
386    if (Range.empty()) return NumNewLines;
387
388    ++NumNewLines;
389
390    // Handle \n\r and \r\n as a single newline.
391    if (Range.size() > 1 &&
392        (Range[1] == '\n' || Range[1] == '\r') &&
393        (Range[0] != Range[1]))
394      Range = Range.substr(1);
395    Range = Range.substr(1);
396  }
397}
398
399int main(int argc, char **argv) {
400  sys::PrintStackTraceOnErrorSignal();
401  PrettyStackTraceProgram X(argc, argv);
402  cl::ParseCommandLineOptions(argc, argv);
403
404  SourceMgr SM;
405
406  // Read the expected strings from the check file.
407  std::vector<CheckString> CheckStrings;
408  if (ReadCheckFile(SM, CheckStrings))
409    return 2;
410
411  // Open the file to check and add it to SourceMgr.
412  std::string ErrorStr;
413  MemoryBuffer *F =
414    MemoryBuffer::getFileOrSTDIN(InputFilename.c_str(), &ErrorStr);
415  if (F == 0) {
416    errs() << "Could not open input file '" << InputFilename << "': "
417           << ErrorStr << '\n';
418    return true;
419  }
420
421  // Remove duplicate spaces in the input file if requested.
422  if (!NoCanonicalizeWhiteSpace)
423    F = CanonicalizeInputFile(F);
424
425  SM.AddNewSourceBuffer(F, SMLoc());
426
427  // Check that we have all of the expected strings, in order, in the input
428  // file.
429  StringRef Buffer = F->getBuffer();
430
431  const char *LastMatch = Buffer.data();
432
433  for (unsigned StrNo = 0, e = CheckStrings.size(); StrNo != e; ++StrNo) {
434    const CheckString &CheckStr = CheckStrings[StrNo];
435
436    StringRef SearchFrom = Buffer;
437
438    // Find StrNo in the file.
439    size_t MatchLen = 0;
440    Buffer = Buffer.substr(CheckStr.Pat.Match(Buffer, MatchLen));
441
442    // If we didn't find a match, reject the input.
443    if (Buffer.empty()) {
444      PrintCheckFailed(SM, CheckStr, SearchFrom);
445      return 1;
446    }
447
448    StringRef SkippedRegion(LastMatch, Buffer.data()-LastMatch);
449
450    // If this check is a "CHECK-NEXT", verify that the previous match was on
451    // the previous line (i.e. that there is one newline between them).
452    if (CheckStr.IsCheckNext) {
453      // Count the number of newlines between the previous match and this one.
454      assert(LastMatch != F->getBufferStart() &&
455             "CHECK-NEXT can't be the first check in a file");
456
457      unsigned NumNewLines = CountNumNewlinesBetween(SkippedRegion);
458      if (NumNewLines == 0) {
459        SM.PrintMessage(CheckStr.Loc,
460                    CheckPrefix+"-NEXT: is on the same line as previous match",
461                        "error");
462        SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()),
463                        "'next' match was here", "note");
464        SM.PrintMessage(SMLoc::getFromPointer(LastMatch),
465                        "previous match was here", "note");
466        return 1;
467      }
468
469      if (NumNewLines != 1) {
470        SM.PrintMessage(CheckStr.Loc,
471                        CheckPrefix+
472                        "-NEXT: is not on the line after the previous match",
473                        "error");
474        SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()),
475                        "'next' match was here", "note");
476        SM.PrintMessage(SMLoc::getFromPointer(LastMatch),
477                        "previous match was here", "note");
478        return 1;
479      }
480    }
481
482    // If this match had "not strings", verify that they don't exist in the
483    // skipped region.
484    for (unsigned ChunkNo = 0, e = CheckStr.NotStrings.size(); ChunkNo != e; ++ChunkNo) {
485      size_t MatchLen = 0;
486      size_t Pos = CheckStr.NotStrings[ChunkNo].second.Match(SkippedRegion, MatchLen);
487      if (Pos == StringRef::npos) continue;
488
489      SM.PrintMessage(SMLoc::getFromPointer(LastMatch+Pos),
490                      CheckPrefix+"-NOT: string occurred!", "error");
491      SM.PrintMessage(CheckStr.NotStrings[ChunkNo].first,
492                      CheckPrefix+"-NOT: pattern specified here", "note");
493      return 1;
494    }
495
496
497    // Otherwise, everything is good.  Step over the matched text and remember
498    // the position after the match as the end of the last match.
499    Buffer = Buffer.substr(MatchLen);
500    LastMatch = Buffer.data();
501  }
502
503  return 0;
504}
505