1//===- TGLexer.h - Lexer for TableGen Files ---------------------*- C++ -*-===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This class represents the Lexer for tablegen files.
11//
12//===----------------------------------------------------------------------===//
13
14#ifndef LLVM_LIB_TABLEGEN_TGLEXER_H
15#define LLVM_LIB_TABLEGEN_TGLEXER_H
16
17#include "llvm/ADT/StringRef.h"
18#include "llvm/Support/DataTypes.h"
19#include "llvm/Support/SMLoc.h"
20#include <cassert>
21#include <map>
22#include <string>
23
24namespace llvm {
25class SourceMgr;
26class SMLoc;
27class Twine;
28
29namespace tgtok {
30  enum TokKind {
31    // Markers
32    Eof, Error,
33
34    // Tokens with no info.
35    minus, plus,        // - +
36    l_square, r_square, // [ ]
37    l_brace, r_brace,   // { }
38    l_paren, r_paren,   // ( )
39    less, greater,      // < >
40    colon, semi,        // : ;
41    comma, period,      // , .
42    equal, question,    // = ?
43    paste,              // #
44
45    // Keywords.
46    Bit, Bits, Class, Code, Dag, Def, Foreach, Defm, Field, In, Int, Let, List,
47    MultiClass, String,
48
49    // !keywords.
50    XConcat, XADD, XAND, XSRA, XSRL, XSHL, XListConcat, XStrConcat, XCast,
51    XSubst, XForEach, XHead, XTail, XEmpty, XIf, XEq,
52
53    // Integer value.
54    IntVal,
55
56    // Binary constant.  Note that these are sized according to the number of
57    // bits given.
58    BinaryIntVal,
59
60    // String valued tokens.
61    Id, StrVal, VarName, CodeFragment
62  };
63}
64
65/// TGLexer - TableGen Lexer class.
66class TGLexer {
67  SourceMgr &SrcMgr;
68
69  const char *CurPtr;
70  StringRef CurBuf;
71
72  // Information about the current token.
73  const char *TokStart;
74  tgtok::TokKind CurCode;
75  std::string CurStrVal;  // This is valid for ID, STRVAL, VARNAME, CODEFRAGMENT
76  int64_t CurIntVal;      // This is valid for INTVAL.
77
78  /// CurBuffer - This is the current buffer index we're lexing from as managed
79  /// by the SourceMgr object.
80  unsigned CurBuffer;
81
82public:
83  typedef std::map<std::string, SMLoc> DependenciesMapTy;
84private:
85  /// Dependencies - This is the list of all included files.
86  DependenciesMapTy Dependencies;
87
88public:
89  TGLexer(SourceMgr &SrcMgr);
90
91  tgtok::TokKind Lex() {
92    return CurCode = LexToken();
93  }
94
95  const DependenciesMapTy &getDependencies() const {
96    return Dependencies;
97  }
98
99  tgtok::TokKind getCode() const { return CurCode; }
100
101  const std::string &getCurStrVal() const {
102    assert((CurCode == tgtok::Id || CurCode == tgtok::StrVal ||
103            CurCode == tgtok::VarName || CurCode == tgtok::CodeFragment) &&
104           "This token doesn't have a string value");
105    return CurStrVal;
106  }
107  int64_t getCurIntVal() const {
108    assert(CurCode == tgtok::IntVal && "This token isn't an integer");
109    return CurIntVal;
110  }
111  std::pair<int64_t, unsigned> getCurBinaryIntVal() const {
112    assert(CurCode == tgtok::BinaryIntVal &&
113           "This token isn't a binary integer");
114    return std::make_pair(CurIntVal, (CurPtr - TokStart)-2);
115  }
116
117  SMLoc getLoc() const;
118
119private:
120  /// LexToken - Read the next token and return its code.
121  tgtok::TokKind LexToken();
122
123  tgtok::TokKind ReturnError(const char *Loc, const Twine &Msg);
124
125  int getNextChar();
126  int peekNextChar(int Index);
127  void SkipBCPLComment();
128  bool SkipCComment();
129  tgtok::TokKind LexIdentifier();
130  bool LexInclude();
131  tgtok::TokKind LexString();
132  tgtok::TokKind LexVarName();
133  tgtok::TokKind LexNumber();
134  tgtok::TokKind LexBracket();
135  tgtok::TokKind LexExclaim();
136};
137
138} // end namespace llvm
139
140#endif
141