Token.h revision 39a8de10c18365bde7062d8959b7ed525449c561
1//===--- Token.h - Token interface ------------------------------*- C++ -*-===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10//  This file defines the Token interface.
11//
12//===----------------------------------------------------------------------===//
13
14#ifndef LLVM_CLANG_TOKEN_H
15#define LLVM_CLANG_TOKEN_H
16
17#include "clang/Basic/TemplateKinds.h"
18#include "clang/Basic/TokenKinds.h"
19#include "clang/Basic/SourceLocation.h"
20
21namespace clang {
22
23class IdentifierInfo;
24
25/// Token - This structure provides full information about a lexed token.
26/// It is not intended to be space efficient, it is intended to return as much
27/// information as possible about each returned token.  This is expected to be
28/// compressed into a smaller form if memory footprint is important.
29///
30/// The parser can create a special "annotation token" representing a stream of
31/// tokens that were parsed and semantically resolved, e.g.: "foo::MyClass<int>"
32/// can be represented by a single typename annotation token that carries
33/// information about the SourceRange of the tokens and the type object.
34class Token {
35  /// The location of the token.
36  SourceLocation Loc;
37
38  // Conceptually these next two fields could be in a union.  However, this
39  // causes gcc 4.2 to pessimize LexTokenInternal, a very performance critical
40  // routine. Keeping as separate members with casts until a more beautiful fix
41  // presents itself.
42
43  /// UintData - This holds either the length of the token text, when
44  /// a normal token, or the end of the SourceRange when an annotation
45  /// token.
46  unsigned UintData;
47
48  /// PtrData - This is a union of four different pointer types, which depends
49  /// on what type of token this is:
50  ///  Identifiers, keywords, etc:
51  ///    This is an IdentifierInfo*, which contains the uniqued identifier
52  ///    spelling.
53  ///  Literals:  isLiteral() returns true.
54  ///    This is a pointer to the start of the token in a text buffer, which
55  ///    may be dirty (have trigraphs / escaped newlines).
56  ///  Annotations (resolved type names, C++ scopes, etc): isAnnotation().
57  ///    This is a pointer to sema-specific data for the annotation token.
58  ///  Other:
59  ///    This is null.
60  void *PtrData;
61
62  /// Kind - The actual flavor of token this is.
63  ///
64  unsigned Kind : 8;  // DON'T make Kind a 'tok::TokenKind';
65                      // MSVC will treat it as a signed char and
66                      // TokenKinds > 127 won't be handled correctly.
67
68  /// Flags - Bits we track about this token, members of the TokenFlags enum.
69  unsigned Flags : 8;
70public:
71
72  // Various flags set per token:
73  enum TokenFlags {
74    StartOfLine   = 0x01,  // At start of line or only after whitespace.
75    LeadingSpace  = 0x02,  // Whitespace exists before this token.
76    DisableExpand = 0x04,  // This identifier may never be macro expanded.
77    NeedsCleaning = 0x08   // Contained an escaped newline or trigraph.
78  };
79
80  tok::TokenKind getKind() const { return (tok::TokenKind)Kind; }
81  void setKind(tok::TokenKind K) { Kind = K; }
82
83  /// is/isNot - Predicates to check if this token is a specific kind, as in
84  /// "if (Tok.is(tok::l_brace)) {...}".
85  bool is(tok::TokenKind K) const { return Kind == (unsigned) K; }
86  bool isNot(tok::TokenKind K) const { return Kind != (unsigned) K; }
87
88  /// isLiteral - Return true if this is a "literal", like a numeric
89  /// constant, string, etc.
90  bool isLiteral() const {
91    return is(tok::numeric_constant) || is(tok::char_constant) ||
92           is(tok::string_literal) || is(tok::wide_string_literal) ||
93           is(tok::angle_string_literal);
94  }
95
96  bool isAnnotation() const {
97    return is(tok::annot_typename) ||
98           is(tok::annot_cxxscope) ||
99           is(tok::annot_template_id);
100  }
101
102  /// getLocation - Return a source location identifier for the specified
103  /// offset in the current file.
104  SourceLocation getLocation() const { return Loc; }
105  unsigned getLength() const {
106    assert(!isAnnotation() && "Annotation tokens have no length field");
107    return UintData;
108  }
109
110  void setLocation(SourceLocation L) { Loc = L; }
111  void setLength(unsigned Len) {
112    assert(!isAnnotation() && "Annotation tokens have no length field");
113    UintData = Len;
114  }
115
116  SourceLocation getAnnotationEndLoc() const {
117    assert(isAnnotation() && "Used AnnotEndLocID on non-annotation token");
118    return SourceLocation::getFromRawEncoding(UintData);
119  }
120  void setAnnotationEndLoc(SourceLocation L) {
121    assert(isAnnotation() && "Used AnnotEndLocID on non-annotation token");
122    UintData = L.getRawEncoding();
123  }
124
125  /// getAnnotationRange - SourceRange of the group of tokens that this
126  /// annotation token represents.
127  SourceRange getAnnotationRange() const {
128    return SourceRange(getLocation(), getAnnotationEndLoc());
129  }
130  void setAnnotationRange(SourceRange R) {
131    setLocation(R.getBegin());
132    setAnnotationEndLoc(R.getEnd());
133  }
134
135  const char *getName() const {
136    return tok::getTokenName( (tok::TokenKind) Kind);
137  }
138
139  /// startToken - Reset all flags to cleared.
140  ///
141  void startToken() {
142    Kind = tok::unknown;
143    Flags = 0;
144    PtrData = 0;
145    Loc = SourceLocation();
146  }
147
148  IdentifierInfo *getIdentifierInfo() const {
149    assert(!isAnnotation() && "Used IdentInfo on annotation token!");
150    if (isLiteral()) return 0;
151    return (IdentifierInfo*) PtrData;
152  }
153  void setIdentifierInfo(IdentifierInfo *II) {
154    PtrData = (void*) II;
155  }
156
157  /// getLiteralData - For a literal token (numeric constant, string, etc), this
158  /// returns a pointer to the start of it in the text buffer if known, null
159  /// otherwise.
160  const char *getLiteralData() const {
161    assert(isLiteral() && "Cannot get literal data of non-literal");
162    return reinterpret_cast<const char*>(PtrData);
163  }
164  void setLiteralData(const char *Ptr) {
165    assert(isLiteral() && "Cannot set literal data of non-literal");
166    PtrData = (void*)Ptr;
167  }
168
169  void *getAnnotationValue() const {
170    assert(isAnnotation() && "Used AnnotVal on non-annotation token");
171    return PtrData;
172  }
173  void setAnnotationValue(void *val) {
174    assert(isAnnotation() && "Used AnnotVal on non-annotation token");
175    PtrData = val;
176  }
177
178  /// setFlag - Set the specified flag.
179  void setFlag(TokenFlags Flag) {
180    Flags |= Flag;
181  }
182
183  /// clearFlag - Unset the specified flag.
184  void clearFlag(TokenFlags Flag) {
185    Flags &= ~Flag;
186  }
187
188  /// getFlags - Return the internal represtation of the flags.
189  ///  Only intended for low-level operations such as writing tokens to
190  //   disk.
191  unsigned getFlags() const {
192    return Flags;
193  }
194
195  /// setFlagValue - Set a flag to either true or false.
196  void setFlagValue(TokenFlags Flag, bool Val) {
197    if (Val)
198      setFlag(Flag);
199    else
200      clearFlag(Flag);
201  }
202
203  /// isAtStartOfLine - Return true if this token is at the start of a line.
204  ///
205  bool isAtStartOfLine() const { return (Flags & StartOfLine) ? true : false; }
206
207  /// hasLeadingSpace - Return true if this token has whitespace before it.
208  ///
209  bool hasLeadingSpace() const { return (Flags & LeadingSpace) ? true : false; }
210
211  /// isExpandDisabled - Return true if this identifier token should never
212  /// be expanded in the future, due to C99 6.10.3.4p2.
213  bool isExpandDisabled() const {
214    return (Flags & DisableExpand) ? true : false;
215  }
216
217  /// isObjCAtKeyword - Return true if we have an ObjC keyword identifier.
218  bool isObjCAtKeyword(tok::ObjCKeywordKind objcKey) const;
219
220  /// getObjCKeywordID - Return the ObjC keyword kind.
221  tok::ObjCKeywordKind getObjCKeywordID() const;
222
223  /// needsCleaning - Return true if this token has trigraphs or escaped
224  /// newlines in it.
225  ///
226  bool needsCleaning() const { return (Flags & NeedsCleaning) ? true : false; }
227};
228
229/// PPConditionalInfo - Information about the conditional stack (#if directives)
230/// currently active.
231struct PPConditionalInfo {
232  /// IfLoc - Location where the conditional started.
233  ///
234  SourceLocation IfLoc;
235
236  /// WasSkipping - True if this was contained in a skipping directive, e.g.
237  /// in a "#if 0" block.
238  bool WasSkipping;
239
240  /// FoundNonSkip - True if we have emitted tokens already, and now we're in
241  /// an #else block or something.  Only useful in Skipping blocks.
242  bool FoundNonSkip;
243
244  /// FoundElse - True if we've seen a #else in this block.  If so,
245  /// #elif/#else directives are not allowed.
246  bool FoundElse;
247};
248
249/// TemplateIdAnnotation - Information about a template-id annotation
250/// token, which contains the template declaration, template
251/// arguments, whether those template arguments were types or
252/// expressions, and the source locations for important tokens. All of
253/// the information about template arguments is allocated directly
254/// after this structure.
255struct TemplateIdAnnotation {
256  /// TemplateNameLoc - The location of the template name within the
257  /// source.
258  SourceLocation TemplateNameLoc;
259
260  /// FIXME: Temporarily stores the name of a specialization
261  IdentifierInfo *Name;
262
263  /// The declaration of the template corresponding to the
264  /// template-name. This is an Action::DeclTy*.
265  void *Template;
266
267  /// The kind of template that Template refers to.
268  TemplateNameKind Kind;
269
270  /// The location of the '<' before the template argument
271  /// list.
272  SourceLocation LAngleLoc;
273
274  /// The location of the '>' after the template argument
275  /// list.
276  SourceLocation RAngleLoc;
277
278  /// NumArgs - The number of template arguments.
279  unsigned NumArgs;
280
281  /// \brief Retrieves a pointer to the template arguments
282  void **getTemplateArgs() { return (void **)(this + 1); }
283
284  /// \brief Retrieves a pointer to the array of template argument
285  /// locations.
286  SourceLocation *getTemplateArgLocations() {
287    return (SourceLocation *)(getTemplateArgs() + NumArgs);
288  }
289
290  /// \brief Retrieves a pointer to the array of flags that states
291  /// whether the template arguments are types.
292  bool *getTemplateArgIsType() {
293    return (bool *)(getTemplateArgLocations() + NumArgs);
294  }
295
296  static TemplateIdAnnotation* Allocate(unsigned NumArgs) {
297    TemplateIdAnnotation *TemplateId
298      = (TemplateIdAnnotation *)malloc(sizeof(TemplateIdAnnotation) +
299                                       sizeof(void*) * NumArgs +
300                                       sizeof(SourceLocation) * NumArgs +
301                                       sizeof(bool) * NumArgs);
302    TemplateId->NumArgs = NumArgs;
303    return TemplateId;
304  }
305
306  void Destroy() { free(this); }
307};
308
309}  // end namespace clang
310
311#endif
312