1//===-- llvm-mcmarkup.cpp - Parse the MC assembly markup tags -------------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// Example simple parser implementation for the MC assembly markup language.
11//
12//===----------------------------------------------------------------------===//
13
14#include "llvm/Support/CommandLine.h"
15#include "llvm/Support/Format.h"
16#include "llvm/Support/ManagedStatic.h"
17#include "llvm/Support/MemoryBuffer.h"
18#include "llvm/Support/PrettyStackTrace.h"
19#include "llvm/Support/Signals.h"
20#include "llvm/Support/SourceMgr.h"
21#include "llvm/Support/raw_ostream.h"
22#include <system_error>
23using namespace llvm;
24
25static cl::list<std::string>
26       InputFilenames(cl::Positional, cl::desc("<input files>"),
27                      cl::ZeroOrMore);
28static cl::opt<bool>
29DumpTags("dump-tags", cl::desc("List all tags encountered in input"));
30
31static StringRef ToolName;
32
33/// Trivial lexer for the markup parser. Input is always handled a character
34/// at a time. The lexer just encapsulates EOF and lookahead handling.
35class MarkupLexer {
36  StringRef::const_iterator Start;
37  StringRef::const_iterator CurPtr;
38  StringRef::const_iterator End;
39public:
40  MarkupLexer(StringRef Source)
41    : Start(Source.begin()), CurPtr(Source.begin()), End(Source.end()) {}
42  // When processing non-markup, input is consumed a character at a time.
43  bool isEOF() { return CurPtr == End; }
44  int getNextChar() {
45    if (CurPtr == End) return EOF;
46    return *CurPtr++;
47  }
48  int peekNextChar() {
49    if (CurPtr == End) return EOF;
50    return *CurPtr;
51  }
52  StringRef::const_iterator getPosition() const { return CurPtr; }
53};
54
55/// A markup tag is a name and a (usually empty) list of modifiers.
56class MarkupTag {
57  StringRef Name;
58  StringRef Modifiers;
59  SMLoc StartLoc;
60public:
61  MarkupTag(StringRef n, StringRef m, SMLoc Loc)
62    : Name(n), Modifiers(m), StartLoc(Loc) {}
63  StringRef getName() const { return Name; }
64  StringRef getModifiers() const { return Modifiers; }
65  SMLoc getLoc() const { return StartLoc; }
66};
67
68/// A simple parser implementation for creating MarkupTags from input text.
69class MarkupParser {
70  MarkupLexer &Lex;
71  SourceMgr &SM;
72public:
73  MarkupParser(MarkupLexer &lex, SourceMgr &SrcMgr) : Lex(lex), SM(SrcMgr) {}
74  /// Create a MarkupTag from the current position in the MarkupLexer.
75  /// The parseTag() method should be called when the lexer has processed
76  /// the opening '<' character. Input will be consumed up to and including
77  /// the ':' which terminates the tag open.
78  MarkupTag parseTag();
79  /// Issue a diagnostic and terminate program execution.
80  void FatalError(SMLoc Loc, StringRef Msg);
81};
82
83void MarkupParser::FatalError(SMLoc Loc, StringRef Msg) {
84  SM.PrintMessage(Loc, SourceMgr::DK_Error, Msg);
85  exit(1);
86}
87
88// Example handler for when a tag is recognized.
89static void processStartTag(MarkupTag &Tag) {
90  // If we're just printing the tags, do that, otherwise do some simple
91  // colorization.
92  if (DumpTags) {
93    outs() << Tag.getName();
94    if (Tag.getModifiers().size())
95      outs() << " " << Tag.getModifiers();
96    outs() << "\n";
97    return;
98  }
99
100  if (!outs().has_colors())
101    return;
102  // Color registers as red and immediates as cyan. Those don't have nested
103  // tags, so don't bother keeping a stack of colors to reset to.
104  if (Tag.getName() == "reg")
105    outs().changeColor(raw_ostream::RED);
106  else if (Tag.getName() == "imm")
107    outs().changeColor(raw_ostream::CYAN);
108}
109
110// Example handler for when the end of a tag is recognized.
111static void processEndTag(MarkupTag &Tag) {
112  // If we're printing the tags, there's nothing more to do here. Otherwise,
113  // set the color back the normal.
114  if (DumpTags)
115    return;
116  if (!outs().has_colors())
117    return;
118  // Just reset to basic white.
119  outs().changeColor(raw_ostream::WHITE, false);
120}
121
122MarkupTag MarkupParser::parseTag() {
123  // First off, extract the tag into it's own StringRef so we can look at it
124  // outside of the context of consuming input.
125  StringRef::const_iterator Start = Lex.getPosition();
126  SMLoc Loc = SMLoc::getFromPointer(Start - 1);
127  while(Lex.getNextChar() != ':') {
128    // EOF is an error.
129    if (Lex.isEOF())
130      FatalError(SMLoc::getFromPointer(Start), "unterminated markup tag");
131  }
132  StringRef RawTag(Start, Lex.getPosition() - Start - 1);
133  std::pair<StringRef, StringRef> SplitTag = RawTag.split(' ');
134  return MarkupTag(SplitTag.first, SplitTag.second, Loc);
135}
136
137static void parseMCMarkup(StringRef Filename) {
138  ErrorOr<std::unique_ptr<MemoryBuffer>> BufferPtr =
139      MemoryBuffer::getFileOrSTDIN(Filename);
140  if (std::error_code EC = BufferPtr.getError()) {
141    errs() << ToolName << ": " << EC.message() << '\n';
142    return;
143  }
144  MemoryBuffer *Buffer = BufferPtr->release();
145
146  SourceMgr SrcMgr;
147
148  // Tell SrcMgr about this buffer, which is what the parser will pick up.
149  SrcMgr.AddNewSourceBuffer(Buffer, SMLoc());
150
151  StringRef InputSource = Buffer->getBuffer();
152  MarkupLexer Lex(InputSource);
153  MarkupParser Parser(Lex, SrcMgr);
154
155  SmallVector<MarkupTag, 4> TagStack;
156
157  for (int CurChar = Lex.getNextChar();
158       CurChar != EOF;
159       CurChar = Lex.getNextChar()) {
160    switch (CurChar) {
161    case '<': {
162      // A "<<" is output as a literal '<' and does not start a markup tag.
163      if (Lex.peekNextChar() == '<') {
164        (void)Lex.getNextChar();
165        break;
166      }
167      // Parse the markup entry.
168      TagStack.push_back(Parser.parseTag());
169
170      // Do any special handling for the start of a tag.
171      processStartTag(TagStack.back());
172      continue;
173    }
174    case '>': {
175      SMLoc Loc = SMLoc::getFromPointer(Lex.getPosition() - 1);
176      // A ">>" is output as a literal '>' and does not end a markup tag.
177      if (Lex.peekNextChar() == '>') {
178        (void)Lex.getNextChar();
179        break;
180      }
181      // Close out the innermost tag.
182      if (TagStack.empty())
183        Parser.FatalError(Loc, "'>' without matching '<'");
184
185      // Do any special handling for the end of a tag.
186      processEndTag(TagStack.back());
187
188      TagStack.pop_back();
189      continue;
190    }
191    default:
192      break;
193    }
194    // For anything else, just echo the character back out.
195    if (!DumpTags && CurChar != EOF)
196      outs() << (char)CurChar;
197  }
198
199  // If there are any unterminated markup tags, issue diagnostics for them.
200  while (!TagStack.empty()) {
201    MarkupTag &Tag = TagStack.back();
202    SrcMgr.PrintMessage(Tag.getLoc(), SourceMgr::DK_Error,
203                        "unterminated markup tag");
204    TagStack.pop_back();
205  }
206}
207
208int main(int argc, char **argv) {
209  // Print a stack trace if we signal out.
210  sys::PrintStackTraceOnErrorSignal();
211  PrettyStackTraceProgram X(argc, argv);
212
213  llvm_shutdown_obj Y;  // Call llvm_shutdown() on exit.
214  cl::ParseCommandLineOptions(argc, argv, "llvm MC markup parser\n");
215
216  ToolName = argv[0];
217
218  // If no input files specified, read from stdin.
219  if (InputFilenames.size() == 0)
220    InputFilenames.push_back("-");
221
222  std::for_each(InputFilenames.begin(), InputFilenames.end(),
223                parseMCMarkup);
224  return 0;
225}
226