Lexer.cpp revision e91e93225db2e66906878513c6ef4dd6a7ee2b6a
1//===--- Lexer.cpp - C Language Family Lexer ------------------------------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10//  This file implements the Lexer and Token interfaces.
11//
12//===----------------------------------------------------------------------===//
13//
14// TODO: GCC Diagnostics emitted by the lexer:
15// PEDWARN: (form feed|vertical tab) in preprocessing directive
16//
17// Universal characters, unicode, char mapping:
18// WARNING: `%.*s' is not in NFKC
19// WARNING: `%.*s' is not in NFC
20//
21// Other:
22// TODO: Options to support:
23//    -fexec-charset,-fwide-exec-charset
24//
25//===----------------------------------------------------------------------===//
26
27#include "clang/Lex/Lexer.h"
28#include "clang/Lex/Preprocessor.h"
29#include "clang/Lex/LexDiagnostic.h"
30#include "clang/Basic/SourceManager.h"
31#include "llvm/Support/Compiler.h"
32#include "llvm/Support/MemoryBuffer.h"
33#include <cctype>
34using namespace clang;
35
36static void InitCharacterInfo();
37
38//===----------------------------------------------------------------------===//
39// Token Class Implementation
40//===----------------------------------------------------------------------===//
41
42/// isObjCAtKeyword - Return true if we have an ObjC keyword identifier.
43bool Token::isObjCAtKeyword(tok::ObjCKeywordKind objcKey) const {
44  if (IdentifierInfo *II = getIdentifierInfo())
45    return II->getObjCKeywordID() == objcKey;
46  return false;
47}
48
49/// getObjCKeywordID - Return the ObjC keyword kind.
50tok::ObjCKeywordKind Token::getObjCKeywordID() const {
51  IdentifierInfo *specId = getIdentifierInfo();
52  return specId ? specId->getObjCKeywordID() : tok::objc_not_keyword;
53}
54
55
56//===----------------------------------------------------------------------===//
57// Lexer Class Implementation
58//===----------------------------------------------------------------------===//
59
60void Lexer::InitLexer(const char *BufStart, const char *BufPtr,
61                      const char *BufEnd) {
62  InitCharacterInfo();
63
64  BufferStart = BufStart;
65  BufferPtr = BufPtr;
66  BufferEnd = BufEnd;
67
68  assert(BufEnd[0] == 0 &&
69         "We assume that the input buffer has a null character at the end"
70         " to simplify lexing!");
71
72  Is_PragmaLexer = false;
73
74  // Start of the file is a start of line.
75  IsAtStartOfLine = true;
76
77  // We are not after parsing a #.
78  ParsingPreprocessorDirective = false;
79
80  // We are not after parsing #include.
81  ParsingFilename = false;
82
83  // We are not in raw mode.  Raw mode disables diagnostics and interpretation
84  // of tokens (e.g. identifiers, thus disabling macro expansion).  It is used
85  // to quickly lex the tokens of the buffer, e.g. when handling a "#if 0" block
86  // or otherwise skipping over tokens.
87  LexingRawMode = false;
88
89  // Default to not keeping comments.
90  ExtendedTokenMode = 0;
91}
92
93/// Lexer constructor - Create a new lexer object for the specified buffer
94/// with the specified preprocessor managing the lexing process.  This lexer
95/// assumes that the associated file buffer and Preprocessor objects will
96/// outlive it, so it doesn't take ownership of either of them.
97Lexer::Lexer(FileID FID, Preprocessor &PP)
98  : PreprocessorLexer(&PP, FID),
99    FileLoc(PP.getSourceManager().getLocForStartOfFile(FID)),
100    Features(PP.getLangOptions()) {
101
102  const llvm::MemoryBuffer *InputFile = PP.getSourceManager().getBuffer(FID);
103
104  InitLexer(InputFile->getBufferStart(), InputFile->getBufferStart(),
105            InputFile->getBufferEnd());
106
107  // Default to keeping comments if the preprocessor wants them.
108  SetCommentRetentionState(PP.getCommentRetentionState());
109}
110
111/// Lexer constructor - Create a new raw lexer object.  This object is only
112/// suitable for calls to 'LexRawToken'.  This lexer assumes that the text
113/// range will outlive it, so it doesn't take ownership of it.
114Lexer::Lexer(SourceLocation fileloc, const LangOptions &features,
115             const char *BufStart, const char *BufPtr, const char *BufEnd)
116  : FileLoc(fileloc), Features(features) {
117
118  InitLexer(BufStart, BufPtr, BufEnd);
119
120  // We *are* in raw mode.
121  LexingRawMode = true;
122}
123
124/// Lexer constructor - Create a new raw lexer object.  This object is only
125/// suitable for calls to 'LexRawToken'.  This lexer assumes that the text
126/// range will outlive it, so it doesn't take ownership of it.
127Lexer::Lexer(FileID FID, const SourceManager &SM, const LangOptions &features)
128  : FileLoc(SM.getLocForStartOfFile(FID)), Features(features) {
129  const llvm::MemoryBuffer *FromFile = SM.getBuffer(FID);
130
131  InitLexer(FromFile->getBufferStart(), FromFile->getBufferStart(),
132            FromFile->getBufferEnd());
133
134  // We *are* in raw mode.
135  LexingRawMode = true;
136}
137
138/// Create_PragmaLexer: Lexer constructor - Create a new lexer object for
139/// _Pragma expansion.  This has a variety of magic semantics that this method
140/// sets up.  It returns a new'd Lexer that must be delete'd when done.
141///
142/// On entrance to this routine, TokStartLoc is a macro location which has a
143/// spelling loc that indicates the bytes to be lexed for the token and an
144/// instantiation location that indicates where all lexed tokens should be
145/// "expanded from".
146///
147/// FIXME: It would really be nice to make _Pragma just be a wrapper around a
148/// normal lexer that remaps tokens as they fly by.  This would require making
149/// Preprocessor::Lex virtual.  Given that, we could just dump in a magic lexer
150/// interface that could handle this stuff.  This would pull GetMappedTokenLoc
151/// out of the critical path of the lexer!
152///
153Lexer *Lexer::Create_PragmaLexer(SourceLocation SpellingLoc,
154                                 SourceLocation InstantiationLocStart,
155                                 SourceLocation InstantiationLocEnd,
156                                 unsigned TokLen, Preprocessor &PP) {
157  SourceManager &SM = PP.getSourceManager();
158
159  // Create the lexer as if we were going to lex the file normally.
160  FileID SpellingFID = SM.getFileID(SpellingLoc);
161  Lexer *L = new Lexer(SpellingFID, PP);
162
163  // Now that the lexer is created, change the start/end locations so that we
164  // just lex the subsection of the file that we want.  This is lexing from a
165  // scratch buffer.
166  const char *StrData = SM.getCharacterData(SpellingLoc);
167
168  L->BufferPtr = StrData;
169  L->BufferEnd = StrData+TokLen;
170  assert(L->BufferEnd[0] == 0 && "Buffer is not nul terminated!");
171
172  // Set the SourceLocation with the remapping information.  This ensures that
173  // GetMappedTokenLoc will remap the tokens as they are lexed.
174  L->FileLoc = SM.createInstantiationLoc(SM.getLocForStartOfFile(SpellingFID),
175                                         InstantiationLocStart,
176                                         InstantiationLocEnd, TokLen);
177
178  // Ensure that the lexer thinks it is inside a directive, so that end \n will
179  // return an EOM token.
180  L->ParsingPreprocessorDirective = true;
181
182  // This lexer really is for _Pragma.
183  L->Is_PragmaLexer = true;
184  return L;
185}
186
187
188/// Stringify - Convert the specified string into a C string, with surrounding
189/// ""'s, and with escaped \ and " characters.
190std::string Lexer::Stringify(const std::string &Str, bool Charify) {
191  std::string Result = Str;
192  char Quote = Charify ? '\'' : '"';
193  for (unsigned i = 0, e = Result.size(); i != e; ++i) {
194    if (Result[i] == '\\' || Result[i] == Quote) {
195      Result.insert(Result.begin()+i, '\\');
196      ++i; ++e;
197    }
198  }
199  return Result;
200}
201
202/// Stringify - Convert the specified string into a C string by escaping '\'
203/// and " characters.  This does not add surrounding ""'s to the string.
204void Lexer::Stringify(llvm::SmallVectorImpl<char> &Str) {
205  for (unsigned i = 0, e = Str.size(); i != e; ++i) {
206    if (Str[i] == '\\' || Str[i] == '"') {
207      Str.insert(Str.begin()+i, '\\');
208      ++i; ++e;
209    }
210  }
211}
212
213
214/// MeasureTokenLength - Relex the token at the specified location and return
215/// its length in bytes in the input file.  If the token needs cleaning (e.g.
216/// includes a trigraph or an escaped newline) then this count includes bytes
217/// that are part of that.
218unsigned Lexer::MeasureTokenLength(SourceLocation Loc,
219                                   const SourceManager &SM) {
220  // TODO: this could be special cased for common tokens like identifiers, ')',
221  // etc to make this faster, if it mattered.  Just look at StrData[0] to handle
222  // all obviously single-char tokens.  This could use
223  // Lexer::isObviouslySimpleCharacter for example to handle identifiers or
224  // something.
225
226  // If this comes from a macro expansion, we really do want the macro name, not
227  // the token this macro expanded to.
228  Loc = SM.getInstantiationLoc(Loc);
229  std::pair<FileID, unsigned> LocInfo = SM.getDecomposedLoc(Loc);
230  std::pair<const char *,const char *> Buffer = SM.getBufferData(LocInfo.first);
231  const char *StrData = Buffer.first+LocInfo.second;
232
233  // Create a langops struct and enable trigraphs.  This is sufficient for
234  // measuring tokens.
235  LangOptions LangOpts;
236  LangOpts.Trigraphs = true;
237
238  // Create a lexer starting at the beginning of this token.
239  Lexer TheLexer(Loc, LangOpts, Buffer.first, StrData, Buffer.second);
240  Token TheTok;
241  TheLexer.LexFromRawLexer(TheTok);
242  return TheTok.getLength();
243}
244
245//===----------------------------------------------------------------------===//
246// Character information.
247//===----------------------------------------------------------------------===//
248
249static unsigned char CharInfo[256];
250
251enum {
252  CHAR_HORZ_WS  = 0x01,  // ' ', '\t', '\f', '\v'.  Note, no '\0'
253  CHAR_VERT_WS  = 0x02,  // '\r', '\n'
254  CHAR_LETTER   = 0x04,  // a-z,A-Z
255  CHAR_NUMBER   = 0x08,  // 0-9
256  CHAR_UNDER    = 0x10,  // _
257  CHAR_PERIOD   = 0x20   // .
258};
259
260static void InitCharacterInfo() {
261  static bool isInited = false;
262  if (isInited) return;
263  isInited = true;
264
265  // Intiialize the CharInfo table.
266  // TODO: statically initialize this.
267  CharInfo[(int)' '] = CharInfo[(int)'\t'] =
268  CharInfo[(int)'\f'] = CharInfo[(int)'\v'] = CHAR_HORZ_WS;
269  CharInfo[(int)'\n'] = CharInfo[(int)'\r'] = CHAR_VERT_WS;
270
271  CharInfo[(int)'_'] = CHAR_UNDER;
272  CharInfo[(int)'.'] = CHAR_PERIOD;
273  for (unsigned i = 'a'; i <= 'z'; ++i)
274    CharInfo[i] = CharInfo[i+'A'-'a'] = CHAR_LETTER;
275  for (unsigned i = '0'; i <= '9'; ++i)
276    CharInfo[i] = CHAR_NUMBER;
277}
278
279/// isIdentifierBody - Return true if this is the body character of an
280/// identifier, which is [a-zA-Z0-9_].
281static inline bool isIdentifierBody(unsigned char c) {
282  return (CharInfo[c] & (CHAR_LETTER|CHAR_NUMBER|CHAR_UNDER)) ? true : false;
283}
284
285/// isHorizontalWhitespace - Return true if this character is horizontal
286/// whitespace: ' ', '\t', '\f', '\v'.  Note that this returns false for '\0'.
287static inline bool isHorizontalWhitespace(unsigned char c) {
288  return (CharInfo[c] & CHAR_HORZ_WS) ? true : false;
289}
290
291/// isWhitespace - Return true if this character is horizontal or vertical
292/// whitespace: ' ', '\t', '\f', '\v', '\n', '\r'.  Note that this returns false
293/// for '\0'.
294static inline bool isWhitespace(unsigned char c) {
295  return (CharInfo[c] & (CHAR_HORZ_WS|CHAR_VERT_WS)) ? true : false;
296}
297
298/// isNumberBody - Return true if this is the body character of an
299/// preprocessing number, which is [a-zA-Z0-9_.].
300static inline bool isNumberBody(unsigned char c) {
301  return (CharInfo[c] & (CHAR_LETTER|CHAR_NUMBER|CHAR_UNDER|CHAR_PERIOD)) ?
302    true : false;
303}
304
305
306//===----------------------------------------------------------------------===//
307// Diagnostics forwarding code.
308//===----------------------------------------------------------------------===//
309
310/// GetMappedTokenLoc - If lexing out of a 'mapped buffer', where we pretend the
311/// lexer buffer was all instantiated at a single point, perform the mapping.
312/// This is currently only used for _Pragma implementation, so it is the slow
313/// path of the hot getSourceLocation method.  Do not allow it to be inlined.
314static SourceLocation GetMappedTokenLoc(Preprocessor &PP,
315                                        SourceLocation FileLoc,
316                                        unsigned CharNo,
317                                        unsigned TokLen) DISABLE_INLINE;
318static SourceLocation GetMappedTokenLoc(Preprocessor &PP,
319                                        SourceLocation FileLoc,
320                                        unsigned CharNo, unsigned TokLen) {
321  assert(FileLoc.isMacroID() && "Must be an instantiation");
322
323  // Otherwise, we're lexing "mapped tokens".  This is used for things like
324  // _Pragma handling.  Combine the instantiation location of FileLoc with the
325  // spelling location.
326  SourceManager &SM = PP.getSourceManager();
327
328  // Create a new SLoc which is expanded from Instantiation(FileLoc) but whose
329  // characters come from spelling(FileLoc)+Offset.
330  SourceLocation SpellingLoc = SM.getSpellingLoc(FileLoc);
331  SpellingLoc = SpellingLoc.getFileLocWithOffset(CharNo);
332
333  // Figure out the expansion loc range, which is the range covered by the
334  // original _Pragma(...) sequence.
335  std::pair<SourceLocation,SourceLocation> II =
336    SM.getImmediateInstantiationRange(FileLoc);
337
338  return SM.createInstantiationLoc(SpellingLoc, II.first, II.second, TokLen);
339}
340
341/// getSourceLocation - Return a source location identifier for the specified
342/// offset in the current file.
343SourceLocation Lexer::getSourceLocation(const char *Loc,
344                                        unsigned TokLen) const {
345  assert(Loc >= BufferStart && Loc <= BufferEnd &&
346         "Location out of range for this buffer!");
347
348  // In the normal case, we're just lexing from a simple file buffer, return
349  // the file id from FileLoc with the offset specified.
350  unsigned CharNo = Loc-BufferStart;
351  if (FileLoc.isFileID())
352    return FileLoc.getFileLocWithOffset(CharNo);
353
354  // Otherwise, this is the _Pragma lexer case, which pretends that all of the
355  // tokens are lexed from where the _Pragma was defined.
356  assert(PP && "This doesn't work on raw lexers");
357  return GetMappedTokenLoc(*PP, FileLoc, CharNo, TokLen);
358}
359
360/// Diag - Forwarding function for diagnostics.  This translate a source
361/// position in the current buffer into a SourceLocation object for rendering.
362DiagnosticBuilder Lexer::Diag(const char *Loc, unsigned DiagID) const {
363  return PP->Diag(getSourceLocation(Loc), DiagID);
364}
365
366//===----------------------------------------------------------------------===//
367// Trigraph and Escaped Newline Handling Code.
368//===----------------------------------------------------------------------===//
369
370/// GetTrigraphCharForLetter - Given a character that occurs after a ?? pair,
371/// return the decoded trigraph letter it corresponds to, or '\0' if nothing.
372static char GetTrigraphCharForLetter(char Letter) {
373  switch (Letter) {
374  default:   return 0;
375  case '=':  return '#';
376  case ')':  return ']';
377  case '(':  return '[';
378  case '!':  return '|';
379  case '\'': return '^';
380  case '>':  return '}';
381  case '/':  return '\\';
382  case '<':  return '{';
383  case '-':  return '~';
384  }
385}
386
387/// DecodeTrigraphChar - If the specified character is a legal trigraph when
388/// prefixed with ??, emit a trigraph warning.  If trigraphs are enabled,
389/// return the result character.  Finally, emit a warning about trigraph use
390/// whether trigraphs are enabled or not.
391static char DecodeTrigraphChar(const char *CP, Lexer *L) {
392  char Res = GetTrigraphCharForLetter(*CP);
393  if (!Res || !L) return Res;
394
395  if (!L->getFeatures().Trigraphs) {
396    if (!L->isLexingRawMode())
397      L->Diag(CP-2, diag::trigraph_ignored);
398    return 0;
399  }
400
401  if (!L->isLexingRawMode())
402    L->Diag(CP-2, diag::trigraph_converted) << std::string()+Res;
403  return Res;
404}
405
406/// getCharAndSizeSlow - Peek a single 'character' from the specified buffer,
407/// get its size, and return it.  This is tricky in several cases:
408///   1. If currently at the start of a trigraph, we warn about the trigraph,
409///      then either return the trigraph (skipping 3 chars) or the '?',
410///      depending on whether trigraphs are enabled or not.
411///   2. If this is an escaped newline (potentially with whitespace between
412///      the backslash and newline), implicitly skip the newline and return
413///      the char after it.
414///   3. If this is a UCN, return it.  FIXME: C++ UCN's?
415///
416/// This handles the slow/uncommon case of the getCharAndSize method.  Here we
417/// know that we can accumulate into Size, and that we have already incremented
418/// Ptr by Size bytes.
419///
420/// NOTE: When this method is updated, getCharAndSizeSlowNoWarn (below) should
421/// be updated to match.
422///
423char Lexer::getCharAndSizeSlow(const char *Ptr, unsigned &Size,
424                               Token *Tok) {
425  // If we have a slash, look for an escaped newline.
426  if (Ptr[0] == '\\') {
427    ++Size;
428    ++Ptr;
429Slash:
430    // Common case, backslash-char where the char is not whitespace.
431    if (!isWhitespace(Ptr[0])) return '\\';
432
433    // See if we have optional whitespace characters followed by a newline.
434    {
435      unsigned SizeTmp = 0;
436      do {
437        ++SizeTmp;
438        if (Ptr[SizeTmp-1] == '\n' || Ptr[SizeTmp-1] == '\r') {
439          // Remember that this token needs to be cleaned.
440          if (Tok) Tok->setFlag(Token::NeedsCleaning);
441
442          // Warn if there was whitespace between the backslash and newline.
443          if (SizeTmp != 1 && Tok && !isLexingRawMode())
444            Diag(Ptr, diag::backslash_newline_space);
445
446          // If this is a \r\n or \n\r, skip the newlines.
447          if ((Ptr[SizeTmp] == '\r' || Ptr[SizeTmp] == '\n') &&
448              Ptr[SizeTmp-1] != Ptr[SizeTmp])
449            ++SizeTmp;
450
451          // Found backslash<whitespace><newline>.  Parse the char after it.
452          Size += SizeTmp;
453          Ptr  += SizeTmp;
454          // Use slow version to accumulate a correct size field.
455          return getCharAndSizeSlow(Ptr, Size, Tok);
456        }
457      } while (isWhitespace(Ptr[SizeTmp]));
458    }
459
460    // Otherwise, this is not an escaped newline, just return the slash.
461    return '\\';
462  }
463
464  // If this is a trigraph, process it.
465  if (Ptr[0] == '?' && Ptr[1] == '?') {
466    // If this is actually a legal trigraph (not something like "??x"), emit
467    // a trigraph warning.  If so, and if trigraphs are enabled, return it.
468    if (char C = DecodeTrigraphChar(Ptr+2, Tok ? this : 0)) {
469      // Remember that this token needs to be cleaned.
470      if (Tok) Tok->setFlag(Token::NeedsCleaning);
471
472      Ptr += 3;
473      Size += 3;
474      if (C == '\\') goto Slash;
475      return C;
476    }
477  }
478
479  // If this is neither, return a single character.
480  ++Size;
481  return *Ptr;
482}
483
484
485/// getCharAndSizeSlowNoWarn - Handle the slow/uncommon case of the
486/// getCharAndSizeNoWarn method.  Here we know that we can accumulate into Size,
487/// and that we have already incremented Ptr by Size bytes.
488///
489/// NOTE: When this method is updated, getCharAndSizeSlow (above) should
490/// be updated to match.
491char Lexer::getCharAndSizeSlowNoWarn(const char *Ptr, unsigned &Size,
492                                     const LangOptions &Features) {
493  // If we have a slash, look for an escaped newline.
494  if (Ptr[0] == '\\') {
495    ++Size;
496    ++Ptr;
497Slash:
498    // Common case, backslash-char where the char is not whitespace.
499    if (!isWhitespace(Ptr[0])) return '\\';
500
501    // See if we have optional whitespace characters followed by a newline.
502    {
503      unsigned SizeTmp = 0;
504      do {
505        ++SizeTmp;
506        if (Ptr[SizeTmp-1] == '\n' || Ptr[SizeTmp-1] == '\r') {
507
508          // If this is a \r\n or \n\r, skip the newlines.
509          if ((Ptr[SizeTmp] == '\r' || Ptr[SizeTmp] == '\n') &&
510              Ptr[SizeTmp-1] != Ptr[SizeTmp])
511            ++SizeTmp;
512
513          // Found backslash<whitespace><newline>.  Parse the char after it.
514          Size += SizeTmp;
515          Ptr  += SizeTmp;
516
517          // Use slow version to accumulate a correct size field.
518          return getCharAndSizeSlowNoWarn(Ptr, Size, Features);
519        }
520      } while (isWhitespace(Ptr[SizeTmp]));
521    }
522
523    // Otherwise, this is not an escaped newline, just return the slash.
524    return '\\';
525  }
526
527  // If this is a trigraph, process it.
528  if (Features.Trigraphs && Ptr[0] == '?' && Ptr[1] == '?') {
529    // If this is actually a legal trigraph (not something like "??x"), return
530    // it.
531    if (char C = GetTrigraphCharForLetter(Ptr[2])) {
532      Ptr += 3;
533      Size += 3;
534      if (C == '\\') goto Slash;
535      return C;
536    }
537  }
538
539  // If this is neither, return a single character.
540  ++Size;
541  return *Ptr;
542}
543
544//===----------------------------------------------------------------------===//
545// Helper methods for lexing.
546//===----------------------------------------------------------------------===//
547
548void Lexer::LexIdentifier(Token &Result, const char *CurPtr) {
549  // Match [_A-Za-z0-9]*, we have already matched [_A-Za-z$]
550  unsigned Size;
551  unsigned char C = *CurPtr++;
552  while (isIdentifierBody(C)) {
553    C = *CurPtr++;
554  }
555  --CurPtr;   // Back up over the skipped character.
556
557  // Fast path, no $,\,? in identifier found.  '\' might be an escaped newline
558  // or UCN, and ? might be a trigraph for '\', an escaped newline or UCN.
559  // FIXME: UCNs.
560  if (C != '\\' && C != '?' && (C != '$' || !Features.DollarIdents)) {
561FinishIdentifier:
562    const char *IdStart = BufferPtr;
563    FormTokenWithChars(Result, CurPtr, tok::identifier);
564
565    // If we are in raw mode, return this identifier raw.  There is no need to
566    // look up identifier information or attempt to macro expand it.
567    if (LexingRawMode) return;
568
569    // Fill in Result.IdentifierInfo, looking up the identifier in the
570    // identifier table.
571    IdentifierInfo *II = PP->LookUpIdentifierInfo(Result, IdStart);
572
573    // Change the kind of this identifier to the appropriate token kind, e.g.
574    // turning "for" into a keyword.
575    Result.setKind(II->getTokenID());
576
577    // Finally, now that we know we have an identifier, pass this off to the
578    // preprocessor, which may macro expand it or something.
579    if (II->isHandleIdentifierCase())
580      PP->HandleIdentifier(Result);
581    return;
582  }
583
584  // Otherwise, $,\,? in identifier found.  Enter slower path.
585
586  C = getCharAndSize(CurPtr, Size);
587  while (1) {
588    if (C == '$') {
589      // If we hit a $ and they are not supported in identifiers, we are done.
590      if (!Features.DollarIdents) goto FinishIdentifier;
591
592      // Otherwise, emit a diagnostic and continue.
593      if (!isLexingRawMode())
594        Diag(CurPtr, diag::ext_dollar_in_identifier);
595      CurPtr = ConsumeChar(CurPtr, Size, Result);
596      C = getCharAndSize(CurPtr, Size);
597      continue;
598    } else if (!isIdentifierBody(C)) { // FIXME: UCNs.
599      // Found end of identifier.
600      goto FinishIdentifier;
601    }
602
603    // Otherwise, this character is good, consume it.
604    CurPtr = ConsumeChar(CurPtr, Size, Result);
605
606    C = getCharAndSize(CurPtr, Size);
607    while (isIdentifierBody(C)) { // FIXME: UCNs.
608      CurPtr = ConsumeChar(CurPtr, Size, Result);
609      C = getCharAndSize(CurPtr, Size);
610    }
611  }
612}
613
614
615/// LexNumericConstant - Lex the remainder of a integer or floating point
616/// constant. From[-1] is the first character lexed.  Return the end of the
617/// constant.
618void Lexer::LexNumericConstant(Token &Result, const char *CurPtr) {
619  unsigned Size;
620  char C = getCharAndSize(CurPtr, Size);
621  char PrevCh = 0;
622  while (isNumberBody(C)) { // FIXME: UCNs?
623    CurPtr = ConsumeChar(CurPtr, Size, Result);
624    PrevCh = C;
625    C = getCharAndSize(CurPtr, Size);
626  }
627
628  // If we fell out, check for a sign, due to 1e+12.  If we have one, continue.
629  if ((C == '-' || C == '+') && (PrevCh == 'E' || PrevCh == 'e'))
630    return LexNumericConstant(Result, ConsumeChar(CurPtr, Size, Result));
631
632  // If we have a hex FP constant, continue.
633  if ((C == '-' || C == '+') && (PrevCh == 'P' || PrevCh == 'p') &&
634      (Features.HexFloats || !Features.NoExtensions))
635    return LexNumericConstant(Result, ConsumeChar(CurPtr, Size, Result));
636
637  // Update the location of token as well as BufferPtr.
638  const char *TokStart = BufferPtr;
639  FormTokenWithChars(Result, CurPtr, tok::numeric_constant);
640  Result.setLiteralData(TokStart);
641}
642
643/// LexStringLiteral - Lex the remainder of a string literal, after having lexed
644/// either " or L".
645void Lexer::LexStringLiteral(Token &Result, const char *CurPtr, bool Wide) {
646  const char *NulCharacter = 0; // Does this string contain the \0 character?
647
648  char C = getAndAdvanceChar(CurPtr, Result);
649  while (C != '"') {
650    // Skip escaped characters.
651    if (C == '\\') {
652      // Skip the escaped character.
653      C = getAndAdvanceChar(CurPtr, Result);
654    } else if (C == '\n' || C == '\r' ||             // Newline.
655               (C == 0 && CurPtr-1 == BufferEnd)) {  // End of file.
656      if (!isLexingRawMode())
657        Diag(BufferPtr, diag::err_unterminated_string);
658      FormTokenWithChars(Result, CurPtr-1, tok::unknown);
659      return;
660    } else if (C == 0) {
661      NulCharacter = CurPtr-1;
662    }
663    C = getAndAdvanceChar(CurPtr, Result);
664  }
665
666  // If a nul character existed in the string, warn about it.
667  if (NulCharacter && !isLexingRawMode())
668    Diag(NulCharacter, diag::null_in_string);
669
670  // Update the location of the token as well as the BufferPtr instance var.
671  const char *TokStart = BufferPtr;
672  FormTokenWithChars(Result, CurPtr,
673                     Wide ? tok::wide_string_literal : tok::string_literal);
674  Result.setLiteralData(TokStart);
675}
676
677/// LexAngledStringLiteral - Lex the remainder of an angled string literal,
678/// after having lexed the '<' character.  This is used for #include filenames.
679void Lexer::LexAngledStringLiteral(Token &Result, const char *CurPtr) {
680  const char *NulCharacter = 0; // Does this string contain the \0 character?
681
682  char C = getAndAdvanceChar(CurPtr, Result);
683  while (C != '>') {
684    // Skip escaped characters.
685    if (C == '\\') {
686      // Skip the escaped character.
687      C = getAndAdvanceChar(CurPtr, Result);
688    } else if (C == '\n' || C == '\r' ||             // Newline.
689               (C == 0 && CurPtr-1 == BufferEnd)) {  // End of file.
690      if (!isLexingRawMode())
691        Diag(BufferPtr, diag::err_unterminated_angled_string);
692      FormTokenWithChars(Result, CurPtr-1, tok::unknown);
693      return;
694    } else if (C == 0) {
695      NulCharacter = CurPtr-1;
696    }
697    C = getAndAdvanceChar(CurPtr, Result);
698  }
699
700  // If a nul character existed in the string, warn about it.
701  if (NulCharacter && !isLexingRawMode())
702    Diag(NulCharacter, diag::null_in_string);
703
704  // Update the location of token as well as BufferPtr.
705  const char *TokStart = BufferPtr;
706  FormTokenWithChars(Result, CurPtr, tok::angle_string_literal);
707  Result.setLiteralData(TokStart);
708}
709
710
711/// LexCharConstant - Lex the remainder of a character constant, after having
712/// lexed either ' or L'.
713void Lexer::LexCharConstant(Token &Result, const char *CurPtr) {
714  const char *NulCharacter = 0; // Does this character contain the \0 character?
715
716  // Handle the common case of 'x' and '\y' efficiently.
717  char C = getAndAdvanceChar(CurPtr, Result);
718  if (C == '\'') {
719    if (!isLexingRawMode())
720      Diag(BufferPtr, diag::err_empty_character);
721    FormTokenWithChars(Result, CurPtr, tok::unknown);
722    return;
723  } else if (C == '\\') {
724    // Skip the escaped character.
725    // FIXME: UCN's.
726    C = getAndAdvanceChar(CurPtr, Result);
727  }
728
729  if (C && C != '\n' && C != '\r' && CurPtr[0] == '\'') {
730    ++CurPtr;
731  } else {
732    // Fall back on generic code for embedded nulls, newlines, wide chars.
733    do {
734      // Skip escaped characters.
735      if (C == '\\') {
736        // Skip the escaped character.
737        C = getAndAdvanceChar(CurPtr, Result);
738      } else if (C == '\n' || C == '\r' ||               // Newline.
739                 (C == 0 && CurPtr-1 == BufferEnd)) {    // End of file.
740        if (!isLexingRawMode())
741          Diag(BufferPtr, diag::err_unterminated_char);
742        FormTokenWithChars(Result, CurPtr-1, tok::unknown);
743        return;
744      } else if (C == 0) {
745        NulCharacter = CurPtr-1;
746      }
747      C = getAndAdvanceChar(CurPtr, Result);
748    } while (C != '\'');
749  }
750
751  if (NulCharacter && !isLexingRawMode())
752    Diag(NulCharacter, diag::null_in_char);
753
754  // Update the location of token as well as BufferPtr.
755  const char *TokStart = BufferPtr;
756  FormTokenWithChars(Result, CurPtr, tok::char_constant);
757  Result.setLiteralData(TokStart);
758}
759
760/// SkipWhitespace - Efficiently skip over a series of whitespace characters.
761/// Update BufferPtr to point to the next non-whitespace character and return.
762///
763/// This method forms a token and returns true if KeepWhitespaceMode is enabled.
764///
765bool Lexer::SkipWhitespace(Token &Result, const char *CurPtr) {
766  // Whitespace - Skip it, then return the token after the whitespace.
767  unsigned char Char = *CurPtr;  // Skip consequtive spaces efficiently.
768  while (1) {
769    // Skip horizontal whitespace very aggressively.
770    while (isHorizontalWhitespace(Char))
771      Char = *++CurPtr;
772
773    // Otherwise if we have something other than whitespace, we're done.
774    if (Char != '\n' && Char != '\r')
775      break;
776
777    if (ParsingPreprocessorDirective) {
778      // End of preprocessor directive line, let LexTokenInternal handle this.
779      BufferPtr = CurPtr;
780      return false;
781    }
782
783    // ok, but handle newline.
784    // The returned token is at the start of the line.
785    Result.setFlag(Token::StartOfLine);
786    // No leading whitespace seen so far.
787    Result.clearFlag(Token::LeadingSpace);
788    Char = *++CurPtr;
789  }
790
791  // If this isn't immediately after a newline, there is leading space.
792  char PrevChar = CurPtr[-1];
793  if (PrevChar != '\n' && PrevChar != '\r')
794    Result.setFlag(Token::LeadingSpace);
795
796  // If the client wants us to return whitespace, return it now.
797  if (isKeepWhitespaceMode()) {
798    FormTokenWithChars(Result, CurPtr, tok::unknown);
799    return true;
800  }
801
802  BufferPtr = CurPtr;
803  return false;
804}
805
806// SkipBCPLComment - We have just read the // characters from input.  Skip until
807// we find the newline character thats terminate the comment.  Then update
808/// BufferPtr and return.  If we're in KeepCommentMode, this will form the token
809/// and return true.
810bool Lexer::SkipBCPLComment(Token &Result, const char *CurPtr) {
811  // If BCPL comments aren't explicitly enabled for this language, emit an
812  // extension warning.
813  if (!Features.BCPLComment && !isLexingRawMode()) {
814    Diag(BufferPtr, diag::ext_bcpl_comment);
815
816    // Mark them enabled so we only emit one warning for this translation
817    // unit.
818    Features.BCPLComment = true;
819  }
820
821  // Scan over the body of the comment.  The common case, when scanning, is that
822  // the comment contains normal ascii characters with nothing interesting in
823  // them.  As such, optimize for this case with the inner loop.
824  char C;
825  do {
826    C = *CurPtr;
827    // FIXME: Speedup BCPL comment lexing.  Just scan for a \n or \r character.
828    // If we find a \n character, scan backwards, checking to see if it's an
829    // escaped newline, like we do for block comments.
830
831    // Skip over characters in the fast loop.
832    while (C != 0 &&                // Potentially EOF.
833           C != '\\' &&             // Potentially escaped newline.
834           C != '?' &&              // Potentially trigraph.
835           C != '\n' && C != '\r')  // Newline or DOS-style newline.
836      C = *++CurPtr;
837
838    // If this is a newline, we're done.
839    if (C == '\n' || C == '\r')
840      break;  // Found the newline? Break out!
841
842    // Otherwise, this is a hard case.  Fall back on getAndAdvanceChar to
843    // properly decode the character.  Read it in raw mode to avoid emitting
844    // diagnostics about things like trigraphs.  If we see an escaped newline,
845    // we'll handle it below.
846    const char *OldPtr = CurPtr;
847    bool OldRawMode = isLexingRawMode();
848    LexingRawMode = true;
849    C = getAndAdvanceChar(CurPtr, Result);
850    LexingRawMode = OldRawMode;
851
852    // If we read multiple characters, and one of those characters was a \r or
853    // \n, then we had an escaped newline within the comment.  Emit diagnostic
854    // unless the next line is also a // comment.
855    if (CurPtr != OldPtr+1 && C != '/' && CurPtr[0] != '/') {
856      for (; OldPtr != CurPtr; ++OldPtr)
857        if (OldPtr[0] == '\n' || OldPtr[0] == '\r') {
858          // Okay, we found a // comment that ends in a newline, if the next
859          // line is also a // comment, but has spaces, don't emit a diagnostic.
860          if (isspace(C)) {
861            const char *ForwardPtr = CurPtr;
862            while (isspace(*ForwardPtr))  // Skip whitespace.
863              ++ForwardPtr;
864            if (ForwardPtr[0] == '/' && ForwardPtr[1] == '/')
865              break;
866          }
867
868          if (!isLexingRawMode())
869            Diag(OldPtr-1, diag::ext_multi_line_bcpl_comment);
870          break;
871        }
872    }
873
874    if (CurPtr == BufferEnd+1) { --CurPtr; break; }
875  } while (C != '\n' && C != '\r');
876
877  // Found but did not consume the newline.
878
879  // If we are returning comments as tokens, return this comment as a token.
880  if (inKeepCommentMode())
881    return SaveBCPLComment(Result, CurPtr);
882
883  // If we are inside a preprocessor directive and we see the end of line,
884  // return immediately, so that the lexer can return this as an EOM token.
885  if (ParsingPreprocessorDirective || CurPtr == BufferEnd) {
886    BufferPtr = CurPtr;
887    return false;
888  }
889
890  // Otherwise, eat the \n character.  We don't care if this is a \n\r or
891  // \r\n sequence.  This is an efficiency hack (because we know the \n can't
892  // contribute to another token), it isn't needed for correctness.  Note that
893  // this is ok even in KeepWhitespaceMode, because we would have returned the
894  /// comment above in that mode.
895  ++CurPtr;
896
897  // The next returned token is at the start of the line.
898  Result.setFlag(Token::StartOfLine);
899  // No leading whitespace seen so far.
900  Result.clearFlag(Token::LeadingSpace);
901  BufferPtr = CurPtr;
902  return false;
903}
904
905/// SaveBCPLComment - If in save-comment mode, package up this BCPL comment in
906/// an appropriate way and return it.
907bool Lexer::SaveBCPLComment(Token &Result, const char *CurPtr) {
908  // If we're not in a preprocessor directive, just return the // comment
909  // directly.
910  FormTokenWithChars(Result, CurPtr, tok::comment);
911
912  if (!ParsingPreprocessorDirective)
913    return true;
914
915  // If this BCPL-style comment is in a macro definition, transmogrify it into
916  // a C-style block comment.
917  std::string Spelling = PP->getSpelling(Result);
918  assert(Spelling[0] == '/' && Spelling[1] == '/' && "Not bcpl comment?");
919  Spelling[1] = '*';   // Change prefix to "/*".
920  Spelling += "*/";    // add suffix.
921
922  Result.setKind(tok::comment);
923  PP->CreateString(&Spelling[0], Spelling.size(), Result,
924                   Result.getLocation());
925  return true;
926}
927
928/// isBlockCommentEndOfEscapedNewLine - Return true if the specified newline
929/// character (either \n or \r) is part of an escaped newline sequence.  Issue a
930/// diagnostic if so.  We know that the newline is inside of a block comment.
931static bool isEndOfBlockCommentWithEscapedNewLine(const char *CurPtr,
932                                                  Lexer *L) {
933  assert(CurPtr[0] == '\n' || CurPtr[0] == '\r');
934
935  // Back up off the newline.
936  --CurPtr;
937
938  // If this is a two-character newline sequence, skip the other character.
939  if (CurPtr[0] == '\n' || CurPtr[0] == '\r') {
940    // \n\n or \r\r -> not escaped newline.
941    if (CurPtr[0] == CurPtr[1])
942      return false;
943    // \n\r or \r\n -> skip the newline.
944    --CurPtr;
945  }
946
947  // If we have horizontal whitespace, skip over it.  We allow whitespace
948  // between the slash and newline.
949  bool HasSpace = false;
950  while (isHorizontalWhitespace(*CurPtr) || *CurPtr == 0) {
951    --CurPtr;
952    HasSpace = true;
953  }
954
955  // If we have a slash, we know this is an escaped newline.
956  if (*CurPtr == '\\') {
957    if (CurPtr[-1] != '*') return false;
958  } else {
959    // It isn't a slash, is it the ?? / trigraph?
960    if (CurPtr[0] != '/' || CurPtr[-1] != '?' || CurPtr[-2] != '?' ||
961        CurPtr[-3] != '*')
962      return false;
963
964    // This is the trigraph ending the comment.  Emit a stern warning!
965    CurPtr -= 2;
966
967    // If no trigraphs are enabled, warn that we ignored this trigraph and
968    // ignore this * character.
969    if (!L->getFeatures().Trigraphs) {
970      if (!L->isLexingRawMode())
971        L->Diag(CurPtr, diag::trigraph_ignored_block_comment);
972      return false;
973    }
974    if (!L->isLexingRawMode())
975      L->Diag(CurPtr, diag::trigraph_ends_block_comment);
976  }
977
978  // Warn about having an escaped newline between the */ characters.
979  if (!L->isLexingRawMode())
980    L->Diag(CurPtr, diag::escaped_newline_block_comment_end);
981
982  // If there was space between the backslash and newline, warn about it.
983  if (HasSpace && !L->isLexingRawMode())
984    L->Diag(CurPtr, diag::backslash_newline_space);
985
986  return true;
987}
988
989#ifdef __SSE2__
990#include <emmintrin.h>
991#elif __ALTIVEC__
992#include <altivec.h>
993#undef bool
994#endif
995
996/// SkipBlockComment - We have just read the /* characters from input.  Read
997/// until we find the */ characters that terminate the comment.  Note that we
998/// don't bother decoding trigraphs or escaped newlines in block comments,
999/// because they cannot cause the comment to end.  The only thing that can
1000/// happen is the comment could end with an escaped newline between the */ end
1001/// of comment.
1002///
1003/// If KeepCommentMode is enabled, this forms a token from the comment and
1004/// returns true.
1005bool Lexer::SkipBlockComment(Token &Result, const char *CurPtr) {
1006  // Scan one character past where we should, looking for a '/' character.  Once
1007  // we find it, check to see if it was preceeded by a *.  This common
1008  // optimization helps people who like to put a lot of * characters in their
1009  // comments.
1010
1011  // The first character we get with newlines and trigraphs skipped to handle
1012  // the degenerate /*/ case below correctly if the * has an escaped newline
1013  // after it.
1014  unsigned CharSize;
1015  unsigned char C = getCharAndSize(CurPtr, CharSize);
1016  CurPtr += CharSize;
1017  if (C == 0 && CurPtr == BufferEnd+1) {
1018    if (!isLexingRawMode())
1019      Diag(BufferPtr, diag::err_unterminated_block_comment);
1020    --CurPtr;
1021
1022    // KeepWhitespaceMode should return this broken comment as a token.  Since
1023    // it isn't a well formed comment, just return it as an 'unknown' token.
1024    if (isKeepWhitespaceMode()) {
1025      FormTokenWithChars(Result, CurPtr, tok::unknown);
1026      return true;
1027    }
1028
1029    BufferPtr = CurPtr;
1030    return false;
1031  }
1032
1033  // Check to see if the first character after the '/*' is another /.  If so,
1034  // then this slash does not end the block comment, it is part of it.
1035  if (C == '/')
1036    C = *CurPtr++;
1037
1038  while (1) {
1039    // Skip over all non-interesting characters until we find end of buffer or a
1040    // (probably ending) '/' character.
1041    if (CurPtr + 24 < BufferEnd) {
1042      // While not aligned to a 16-byte boundary.
1043      while (C != '/' && ((intptr_t)CurPtr & 0x0F) != 0)
1044        C = *CurPtr++;
1045
1046      if (C == '/') goto FoundSlash;
1047
1048#ifdef __SSE2__
1049      __m128i Slashes = _mm_set_epi8('/', '/', '/', '/', '/', '/', '/', '/',
1050                                     '/', '/', '/', '/', '/', '/', '/', '/');
1051      while (CurPtr+16 <= BufferEnd &&
1052             _mm_movemask_epi8(_mm_cmpeq_epi8(*(__m128i*)CurPtr, Slashes)) == 0)
1053        CurPtr += 16;
1054#elif __ALTIVEC__
1055      __vector unsigned char Slashes = {
1056        '/', '/', '/', '/',  '/', '/', '/', '/',
1057        '/', '/', '/', '/',  '/', '/', '/', '/'
1058      };
1059      while (CurPtr+16 <= BufferEnd &&
1060             !vec_any_eq(*(vector unsigned char*)CurPtr, Slashes))
1061        CurPtr += 16;
1062#else
1063      // Scan for '/' quickly.  Many block comments are very large.
1064      while (CurPtr[0] != '/' &&
1065             CurPtr[1] != '/' &&
1066             CurPtr[2] != '/' &&
1067             CurPtr[3] != '/' &&
1068             CurPtr+4 < BufferEnd) {
1069        CurPtr += 4;
1070      }
1071#endif
1072
1073      // It has to be one of the bytes scanned, increment to it and read one.
1074      C = *CurPtr++;
1075    }
1076
1077    // Loop to scan the remainder.
1078    while (C != '/' && C != '\0')
1079      C = *CurPtr++;
1080
1081  FoundSlash:
1082    if (C == '/') {
1083      if (CurPtr[-2] == '*')  // We found the final */.  We're done!
1084        break;
1085
1086      if ((CurPtr[-2] == '\n' || CurPtr[-2] == '\r')) {
1087        if (isEndOfBlockCommentWithEscapedNewLine(CurPtr-2, this)) {
1088          // We found the final */, though it had an escaped newline between the
1089          // * and /.  We're done!
1090          break;
1091        }
1092      }
1093      if (CurPtr[0] == '*' && CurPtr[1] != '/') {
1094        // If this is a /* inside of the comment, emit a warning.  Don't do this
1095        // if this is a /*/, which will end the comment.  This misses cases with
1096        // embedded escaped newlines, but oh well.
1097        if (!isLexingRawMode())
1098          Diag(CurPtr-1, diag::warn_nested_block_comment);
1099      }
1100    } else if (C == 0 && CurPtr == BufferEnd+1) {
1101      if (!isLexingRawMode())
1102        Diag(BufferPtr, diag::err_unterminated_block_comment);
1103      // Note: the user probably forgot a */.  We could continue immediately
1104      // after the /*, but this would involve lexing a lot of what really is the
1105      // comment, which surely would confuse the parser.
1106      --CurPtr;
1107
1108      // KeepWhitespaceMode should return this broken comment as a token.  Since
1109      // it isn't a well formed comment, just return it as an 'unknown' token.
1110      if (isKeepWhitespaceMode()) {
1111        FormTokenWithChars(Result, CurPtr, tok::unknown);
1112        return true;
1113      }
1114
1115      BufferPtr = CurPtr;
1116      return false;
1117    }
1118    C = *CurPtr++;
1119  }
1120
1121  // If we are returning comments as tokens, return this comment as a token.
1122  if (inKeepCommentMode()) {
1123    FormTokenWithChars(Result, CurPtr, tok::comment);
1124    return true;
1125  }
1126
1127  // It is common for the tokens immediately after a /**/ comment to be
1128  // whitespace.  Instead of going through the big switch, handle it
1129  // efficiently now.  This is safe even in KeepWhitespaceMode because we would
1130  // have already returned above with the comment as a token.
1131  if (isHorizontalWhitespace(*CurPtr)) {
1132    Result.setFlag(Token::LeadingSpace);
1133    SkipWhitespace(Result, CurPtr+1);
1134    return false;
1135  }
1136
1137  // Otherwise, just return so that the next character will be lexed as a token.
1138  BufferPtr = CurPtr;
1139  Result.setFlag(Token::LeadingSpace);
1140  return false;
1141}
1142
1143//===----------------------------------------------------------------------===//
1144// Primary Lexing Entry Points
1145//===----------------------------------------------------------------------===//
1146
1147/// ReadToEndOfLine - Read the rest of the current preprocessor line as an
1148/// uninterpreted string.  This switches the lexer out of directive mode.
1149std::string Lexer::ReadToEndOfLine() {
1150  assert(ParsingPreprocessorDirective && ParsingFilename == false &&
1151         "Must be in a preprocessing directive!");
1152  std::string Result;
1153  Token Tmp;
1154
1155  // CurPtr - Cache BufferPtr in an automatic variable.
1156  const char *CurPtr = BufferPtr;
1157  while (1) {
1158    char Char = getAndAdvanceChar(CurPtr, Tmp);
1159    switch (Char) {
1160    default:
1161      Result += Char;
1162      break;
1163    case 0:  // Null.
1164      // Found end of file?
1165      if (CurPtr-1 != BufferEnd) {
1166        // Nope, normal character, continue.
1167        Result += Char;
1168        break;
1169      }
1170      // FALL THROUGH.
1171    case '\r':
1172    case '\n':
1173      // Okay, we found the end of the line. First, back up past the \0, \r, \n.
1174      assert(CurPtr[-1] == Char && "Trigraphs for newline?");
1175      BufferPtr = CurPtr-1;
1176
1177      // Next, lex the character, which should handle the EOM transition.
1178      Lex(Tmp);
1179      assert(Tmp.is(tok::eom) && "Unexpected token!");
1180
1181      // Finally, we're done, return the string we found.
1182      return Result;
1183    }
1184  }
1185}
1186
1187/// LexEndOfFile - CurPtr points to the end of this file.  Handle this
1188/// condition, reporting diagnostics and handling other edge cases as required.
1189/// This returns true if Result contains a token, false if PP.Lex should be
1190/// called again.
1191bool Lexer::LexEndOfFile(Token &Result, const char *CurPtr) {
1192  // If we hit the end of the file while parsing a preprocessor directive,
1193  // end the preprocessor directive first.  The next token returned will
1194  // then be the end of file.
1195  if (ParsingPreprocessorDirective) {
1196    // Done parsing the "line".
1197    ParsingPreprocessorDirective = false;
1198    // Update the location of token as well as BufferPtr.
1199    FormTokenWithChars(Result, CurPtr, tok::eom);
1200
1201    // Restore comment saving mode, in case it was disabled for directive.
1202    SetCommentRetentionState(PP->getCommentRetentionState());
1203    return true;  // Have a token.
1204  }
1205
1206  // If we are in raw mode, return this event as an EOF token.  Let the caller
1207  // that put us in raw mode handle the event.
1208  if (isLexingRawMode()) {
1209    Result.startToken();
1210    BufferPtr = BufferEnd;
1211    FormTokenWithChars(Result, BufferEnd, tok::eof);
1212    return true;
1213  }
1214
1215  // Otherwise, issue diagnostics for unterminated #if and missing newline.
1216
1217  // If we are in a #if directive, emit an error.
1218  while (!ConditionalStack.empty()) {
1219    PP->Diag(ConditionalStack.back().IfLoc,
1220             diag::err_pp_unterminated_conditional);
1221    ConditionalStack.pop_back();
1222  }
1223
1224  // C99 5.1.1.2p2: If the file is non-empty and didn't end in a newline, issue
1225  // a pedwarn.
1226  if (CurPtr != BufferStart && (CurPtr[-1] != '\n' && CurPtr[-1] != '\r'))
1227    Diag(BufferEnd, diag::ext_no_newline_eof);
1228
1229  BufferPtr = CurPtr;
1230
1231  // Finally, let the preprocessor handle this.
1232  return PP->HandleEndOfFile(Result);
1233}
1234
1235/// isNextPPTokenLParen - Return 1 if the next unexpanded token lexed from
1236/// the specified lexer will return a tok::l_paren token, 0 if it is something
1237/// else and 2 if there are no more tokens in the buffer controlled by the
1238/// lexer.
1239unsigned Lexer::isNextPPTokenLParen() {
1240  assert(!LexingRawMode && "How can we expand a macro from a skipping buffer?");
1241
1242  // Switch to 'skipping' mode.  This will ensure that we can lex a token
1243  // without emitting diagnostics, disables macro expansion, and will cause EOF
1244  // to return an EOF token instead of popping the include stack.
1245  LexingRawMode = true;
1246
1247  // Save state that can be changed while lexing so that we can restore it.
1248  const char *TmpBufferPtr = BufferPtr;
1249
1250  Token Tok;
1251  Tok.startToken();
1252  LexTokenInternal(Tok);
1253
1254  // Restore state that may have changed.
1255  BufferPtr = TmpBufferPtr;
1256
1257  // Restore the lexer back to non-skipping mode.
1258  LexingRawMode = false;
1259
1260  if (Tok.is(tok::eof))
1261    return 2;
1262  return Tok.is(tok::l_paren);
1263}
1264
1265
1266/// LexTokenInternal - This implements a simple C family lexer.  It is an
1267/// extremely performance critical piece of code.  This assumes that the buffer
1268/// has a null character at the end of the file.  Return true if an error
1269/// occurred and compilation should terminate, false if normal.  This returns a
1270/// preprocessing token, not a normal token, as such, it is an internal
1271/// interface.  It assumes that the Flags of result have been cleared before
1272/// calling this.
1273void Lexer::LexTokenInternal(Token &Result) {
1274LexNextToken:
1275  // New token, can't need cleaning yet.
1276  Result.clearFlag(Token::NeedsCleaning);
1277  Result.setIdentifierInfo(0);
1278
1279  // CurPtr - Cache BufferPtr in an automatic variable.
1280  const char *CurPtr = BufferPtr;
1281
1282  // Small amounts of horizontal whitespace is very common between tokens.
1283  if ((*CurPtr == ' ') || (*CurPtr == '\t')) {
1284    ++CurPtr;
1285    while ((*CurPtr == ' ') || (*CurPtr == '\t'))
1286      ++CurPtr;
1287
1288    // If we are keeping whitespace and other tokens, just return what we just
1289    // skipped.  The next lexer invocation will return the token after the
1290    // whitespace.
1291    if (isKeepWhitespaceMode()) {
1292      FormTokenWithChars(Result, CurPtr, tok::unknown);
1293      return;
1294    }
1295
1296    BufferPtr = CurPtr;
1297    Result.setFlag(Token::LeadingSpace);
1298  }
1299
1300  unsigned SizeTmp, SizeTmp2;   // Temporaries for use in cases below.
1301
1302  // Read a character, advancing over it.
1303  char Char = getAndAdvanceChar(CurPtr, Result);
1304  tok::TokenKind Kind;
1305
1306  switch (Char) {
1307  case 0:  // Null.
1308    // Found end of file?
1309    if (CurPtr-1 == BufferEnd) {
1310      // Read the PP instance variable into an automatic variable, because
1311      // LexEndOfFile will often delete 'this'.
1312      Preprocessor *PPCache = PP;
1313      if (LexEndOfFile(Result, CurPtr-1))  // Retreat back into the file.
1314        return;   // Got a token to return.
1315      assert(PPCache && "Raw buffer::LexEndOfFile should return a token");
1316      return PPCache->Lex(Result);
1317    }
1318
1319    if (!isLexingRawMode())
1320      Diag(CurPtr-1, diag::null_in_file);
1321    Result.setFlag(Token::LeadingSpace);
1322    if (SkipWhitespace(Result, CurPtr))
1323      return; // KeepWhitespaceMode
1324
1325    goto LexNextToken;   // GCC isn't tail call eliminating.
1326  case '\n':
1327  case '\r':
1328    // If we are inside a preprocessor directive and we see the end of line,
1329    // we know we are done with the directive, so return an EOM token.
1330    if (ParsingPreprocessorDirective) {
1331      // Done parsing the "line".
1332      ParsingPreprocessorDirective = false;
1333
1334      // Restore comment saving mode, in case it was disabled for directive.
1335      SetCommentRetentionState(PP->getCommentRetentionState());
1336
1337      // Since we consumed a newline, we are back at the start of a line.
1338      IsAtStartOfLine = true;
1339
1340      Kind = tok::eom;
1341      break;
1342    }
1343    // The returned token is at the start of the line.
1344    Result.setFlag(Token::StartOfLine);
1345    // No leading whitespace seen so far.
1346    Result.clearFlag(Token::LeadingSpace);
1347
1348    if (SkipWhitespace(Result, CurPtr))
1349      return; // KeepWhitespaceMode
1350    goto LexNextToken;   // GCC isn't tail call eliminating.
1351  case ' ':
1352  case '\t':
1353  case '\f':
1354  case '\v':
1355  SkipHorizontalWhitespace:
1356    Result.setFlag(Token::LeadingSpace);
1357    if (SkipWhitespace(Result, CurPtr))
1358      return; // KeepWhitespaceMode
1359
1360  SkipIgnoredUnits:
1361    CurPtr = BufferPtr;
1362
1363    // If the next token is obviously a // or /* */ comment, skip it efficiently
1364    // too (without going through the big switch stmt).
1365    if (CurPtr[0] == '/' && CurPtr[1] == '/' && !inKeepCommentMode() &&
1366        Features.BCPLComment) {
1367      SkipBCPLComment(Result, CurPtr+2);
1368      goto SkipIgnoredUnits;
1369    } else if (CurPtr[0] == '/' && CurPtr[1] == '*' && !inKeepCommentMode()) {
1370      SkipBlockComment(Result, CurPtr+2);
1371      goto SkipIgnoredUnits;
1372    } else if (isHorizontalWhitespace(*CurPtr)) {
1373      goto SkipHorizontalWhitespace;
1374    }
1375    goto LexNextToken;   // GCC isn't tail call eliminating.
1376
1377  // C99 6.4.4.1: Integer Constants.
1378  // C99 6.4.4.2: Floating Constants.
1379  case '0': case '1': case '2': case '3': case '4':
1380  case '5': case '6': case '7': case '8': case '9':
1381    // Notify MIOpt that we read a non-whitespace/non-comment token.
1382    MIOpt.ReadToken();
1383    return LexNumericConstant(Result, CurPtr);
1384
1385  case 'L':   // Identifier (Loony) or wide literal (L'x' or L"xyz").
1386    // Notify MIOpt that we read a non-whitespace/non-comment token.
1387    MIOpt.ReadToken();
1388    Char = getCharAndSize(CurPtr, SizeTmp);
1389
1390    // Wide string literal.
1391    if (Char == '"')
1392      return LexStringLiteral(Result, ConsumeChar(CurPtr, SizeTmp, Result),
1393                              true);
1394
1395    // Wide character constant.
1396    if (Char == '\'')
1397      return LexCharConstant(Result, ConsumeChar(CurPtr, SizeTmp, Result));
1398    // FALL THROUGH, treating L like the start of an identifier.
1399
1400  // C99 6.4.2: Identifiers.
1401  case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': case 'G':
1402  case 'H': case 'I': case 'J': case 'K':    /*'L'*/case 'M': case 'N':
1403  case 'O': case 'P': case 'Q': case 'R': case 'S': case 'T': case 'U':
1404  case 'V': case 'W': case 'X': case 'Y': case 'Z':
1405  case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': case 'g':
1406  case 'h': case 'i': case 'j': case 'k': case 'l': case 'm': case 'n':
1407  case 'o': case 'p': case 'q': case 'r': case 's': case 't': case 'u':
1408  case 'v': case 'w': case 'x': case 'y': case 'z':
1409  case '_':
1410    // Notify MIOpt that we read a non-whitespace/non-comment token.
1411    MIOpt.ReadToken();
1412    return LexIdentifier(Result, CurPtr);
1413
1414  case '$':   // $ in identifiers.
1415    if (Features.DollarIdents) {
1416      if (!isLexingRawMode())
1417        Diag(CurPtr-1, diag::ext_dollar_in_identifier);
1418      // Notify MIOpt that we read a non-whitespace/non-comment token.
1419      MIOpt.ReadToken();
1420      return LexIdentifier(Result, CurPtr);
1421    }
1422
1423    Kind = tok::unknown;
1424    break;
1425
1426  // C99 6.4.4: Character Constants.
1427  case '\'':
1428    // Notify MIOpt that we read a non-whitespace/non-comment token.
1429    MIOpt.ReadToken();
1430    return LexCharConstant(Result, CurPtr);
1431
1432  // C99 6.4.5: String Literals.
1433  case '"':
1434    // Notify MIOpt that we read a non-whitespace/non-comment token.
1435    MIOpt.ReadToken();
1436    return LexStringLiteral(Result, CurPtr, false);
1437
1438  // C99 6.4.6: Punctuators.
1439  case '?':
1440    Kind = tok::question;
1441    break;
1442  case '[':
1443    Kind = tok::l_square;
1444    break;
1445  case ']':
1446    Kind = tok::r_square;
1447    break;
1448  case '(':
1449    Kind = tok::l_paren;
1450    break;
1451  case ')':
1452    Kind = tok::r_paren;
1453    break;
1454  case '{':
1455    Kind = tok::l_brace;
1456    break;
1457  case '}':
1458    Kind = tok::r_brace;
1459    break;
1460  case '.':
1461    Char = getCharAndSize(CurPtr, SizeTmp);
1462    if (Char >= '0' && Char <= '9') {
1463      // Notify MIOpt that we read a non-whitespace/non-comment token.
1464      MIOpt.ReadToken();
1465
1466      return LexNumericConstant(Result, ConsumeChar(CurPtr, SizeTmp, Result));
1467    } else if (Features.CPlusPlus && Char == '*') {
1468      Kind = tok::periodstar;
1469      CurPtr += SizeTmp;
1470    } else if (Char == '.' &&
1471               getCharAndSize(CurPtr+SizeTmp, SizeTmp2) == '.') {
1472      Kind = tok::ellipsis;
1473      CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),
1474                           SizeTmp2, Result);
1475    } else {
1476      Kind = tok::period;
1477    }
1478    break;
1479  case '&':
1480    Char = getCharAndSize(CurPtr, SizeTmp);
1481    if (Char == '&') {
1482      Kind = tok::ampamp;
1483      CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
1484    } else if (Char == '=') {
1485      Kind = tok::ampequal;
1486      CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
1487    } else {
1488      Kind = tok::amp;
1489    }
1490    break;
1491  case '*':
1492    if (getCharAndSize(CurPtr, SizeTmp) == '=') {
1493      Kind = tok::starequal;
1494      CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
1495    } else {
1496      Kind = tok::star;
1497    }
1498    break;
1499  case '+':
1500    Char = getCharAndSize(CurPtr, SizeTmp);
1501    if (Char == '+') {
1502      CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
1503      Kind = tok::plusplus;
1504    } else if (Char == '=') {
1505      CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
1506      Kind = tok::plusequal;
1507    } else {
1508      Kind = tok::plus;
1509    }
1510    break;
1511  case '-':
1512    Char = getCharAndSize(CurPtr, SizeTmp);
1513    if (Char == '-') {      // --
1514      CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
1515      Kind = tok::minusminus;
1516    } else if (Char == '>' && Features.CPlusPlus &&
1517               getCharAndSize(CurPtr+SizeTmp, SizeTmp2) == '*') {  // C++ ->*
1518      CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),
1519                           SizeTmp2, Result);
1520      Kind = tok::arrowstar;
1521    } else if (Char == '>') {   // ->
1522      CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
1523      Kind = tok::arrow;
1524    } else if (Char == '=') {   // -=
1525      CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
1526      Kind = tok::minusequal;
1527    } else {
1528      Kind = tok::minus;
1529    }
1530    break;
1531  case '~':
1532    Kind = tok::tilde;
1533    break;
1534  case '!':
1535    if (getCharAndSize(CurPtr, SizeTmp) == '=') {
1536      Kind = tok::exclaimequal;
1537      CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
1538    } else {
1539      Kind = tok::exclaim;
1540    }
1541    break;
1542  case '/':
1543    // 6.4.9: Comments
1544    Char = getCharAndSize(CurPtr, SizeTmp);
1545    if (Char == '/') {         // BCPL comment.
1546      // Even if BCPL comments are disabled (e.g. in C89 mode), we generally
1547      // want to lex this as a comment.  There is one problem with this though,
1548      // that in one particular corner case, this can change the behavior of the
1549      // resultant program.  For example, In  "foo //**/ bar", C89 would lex
1550      // this as "foo / bar" and langauges with BCPL comments would lex it as
1551      // "foo".  Check to see if the character after the second slash is a '*'.
1552      // If so, we will lex that as a "/" instead of the start of a comment.
1553      if (Features.BCPLComment ||
1554          getCharAndSize(CurPtr+SizeTmp, SizeTmp2) != '*') {
1555        if (SkipBCPLComment(Result, ConsumeChar(CurPtr, SizeTmp, Result)))
1556          return; // KeepCommentMode
1557
1558        // It is common for the tokens immediately after a // comment to be
1559        // whitespace (indentation for the next line).  Instead of going through
1560        // the big switch, handle it efficiently now.
1561        goto SkipIgnoredUnits;
1562      }
1563    }
1564
1565    if (Char == '*') {  // /**/ comment.
1566      if (SkipBlockComment(Result, ConsumeChar(CurPtr, SizeTmp, Result)))
1567        return; // KeepCommentMode
1568      goto LexNextToken;   // GCC isn't tail call eliminating.
1569    }
1570
1571    if (Char == '=') {
1572      CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
1573      Kind = tok::slashequal;
1574    } else {
1575      Kind = tok::slash;
1576    }
1577    break;
1578  case '%':
1579    Char = getCharAndSize(CurPtr, SizeTmp);
1580    if (Char == '=') {
1581      Kind = tok::percentequal;
1582      CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
1583    } else if (Features.Digraphs && Char == '>') {
1584      Kind = tok::r_brace;                             // '%>' -> '}'
1585      CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
1586    } else if (Features.Digraphs && Char == ':') {
1587      CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
1588      Char = getCharAndSize(CurPtr, SizeTmp);
1589      if (Char == '%' && getCharAndSize(CurPtr+SizeTmp, SizeTmp2) == ':') {
1590        Kind = tok::hashhash;                          // '%:%:' -> '##'
1591        CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),
1592                             SizeTmp2, Result);
1593      } else if (Char == '@' && Features.Microsoft) {  // %:@ -> #@ -> Charize
1594        CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
1595        if (!isLexingRawMode())
1596          Diag(BufferPtr, diag::charize_microsoft_ext);
1597        Kind = tok::hashat;
1598      } else {                                         // '%:' -> '#'
1599        // We parsed a # character.  If this occurs at the start of the line,
1600        // it's actually the start of a preprocessing directive.  Callback to
1601        // the preprocessor to handle it.
1602        // FIXME: -fpreprocessed mode??
1603        if (Result.isAtStartOfLine() && !LexingRawMode) {
1604          FormTokenWithChars(Result, CurPtr, tok::hash);
1605          PP->HandleDirective(Result);
1606
1607          // As an optimization, if the preprocessor didn't switch lexers, tail
1608          // recurse.
1609          if (PP->isCurrentLexer(this)) {
1610            // Start a new token. If this is a #include or something, the PP may
1611            // want us starting at the beginning of the line again.  If so, set
1612            // the StartOfLine flag.
1613            if (IsAtStartOfLine) {
1614              Result.setFlag(Token::StartOfLine);
1615              IsAtStartOfLine = false;
1616            }
1617            goto LexNextToken;   // GCC isn't tail call eliminating.
1618          }
1619
1620          return PP->Lex(Result);
1621        }
1622
1623        Kind = tok::hash;
1624      }
1625    } else {
1626      Kind = tok::percent;
1627    }
1628    break;
1629  case '<':
1630    Char = getCharAndSize(CurPtr, SizeTmp);
1631    if (ParsingFilename) {
1632      return LexAngledStringLiteral(Result, CurPtr+SizeTmp);
1633    } else if (Char == '<' &&
1634               getCharAndSize(CurPtr+SizeTmp, SizeTmp2) == '=') {
1635      Kind = tok::lesslessequal;
1636      CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),
1637                           SizeTmp2, Result);
1638    } else if (Char == '<') {
1639      CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
1640      Kind = tok::lessless;
1641    } else if (Char == '=') {
1642      CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
1643      Kind = tok::lessequal;
1644    } else if (Features.Digraphs && Char == ':') {     // '<:' -> '['
1645      CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
1646      Kind = tok::l_square;
1647    } else if (Features.Digraphs && Char == '%') {     // '<%' -> '{'
1648      CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
1649      Kind = tok::l_brace;
1650    } else {
1651      Kind = tok::less;
1652    }
1653    break;
1654  case '>':
1655    Char = getCharAndSize(CurPtr, SizeTmp);
1656    if (Char == '=') {
1657      CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
1658      Kind = tok::greaterequal;
1659    } else if (Char == '>' &&
1660               getCharAndSize(CurPtr+SizeTmp, SizeTmp2) == '=') {
1661      CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),
1662                           SizeTmp2, Result);
1663      Kind = tok::greatergreaterequal;
1664    } else if (Char == '>') {
1665      CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
1666      Kind = tok::greatergreater;
1667    } else {
1668      Kind = tok::greater;
1669    }
1670    break;
1671  case '^':
1672    Char = getCharAndSize(CurPtr, SizeTmp);
1673    if (Char == '=') {
1674      CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
1675      Kind = tok::caretequal;
1676    } else {
1677      Kind = tok::caret;
1678    }
1679    break;
1680  case '|':
1681    Char = getCharAndSize(CurPtr, SizeTmp);
1682    if (Char == '=') {
1683      Kind = tok::pipeequal;
1684      CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
1685    } else if (Char == '|') {
1686      Kind = tok::pipepipe;
1687      CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
1688    } else {
1689      Kind = tok::pipe;
1690    }
1691    break;
1692  case ':':
1693    Char = getCharAndSize(CurPtr, SizeTmp);
1694    if (Features.Digraphs && Char == '>') {
1695      Kind = tok::r_square; // ':>' -> ']'
1696      CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
1697    } else if (Features.CPlusPlus && Char == ':') {
1698      Kind = tok::coloncolon;
1699      CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
1700    } else {
1701      Kind = tok::colon;
1702    }
1703    break;
1704  case ';':
1705    Kind = tok::semi;
1706    break;
1707  case '=':
1708    Char = getCharAndSize(CurPtr, SizeTmp);
1709    if (Char == '=') {
1710      Kind = tok::equalequal;
1711      CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
1712    } else {
1713      Kind = tok::equal;
1714    }
1715    break;
1716  case ',':
1717    Kind = tok::comma;
1718    break;
1719  case '#':
1720    Char = getCharAndSize(CurPtr, SizeTmp);
1721    if (Char == '#') {
1722      Kind = tok::hashhash;
1723      CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
1724    } else if (Char == '@' && Features.Microsoft) {  // #@ -> Charize
1725      Kind = tok::hashat;
1726      if (!isLexingRawMode())
1727        Diag(BufferPtr, diag::charize_microsoft_ext);
1728      CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
1729    } else {
1730      // We parsed a # character.  If this occurs at the start of the line,
1731      // it's actually the start of a preprocessing directive.  Callback to
1732      // the preprocessor to handle it.
1733      // FIXME: -fpreprocessed mode??
1734      if (Result.isAtStartOfLine() && !LexingRawMode) {
1735        FormTokenWithChars(Result, CurPtr, tok::hash);
1736        PP->HandleDirective(Result);
1737
1738        // As an optimization, if the preprocessor didn't switch lexers, tail
1739        // recurse.
1740        if (PP->isCurrentLexer(this)) {
1741          // Start a new token.  If this is a #include or something, the PP may
1742          // want us starting at the beginning of the line again.  If so, set
1743          // the StartOfLine flag.
1744          if (IsAtStartOfLine) {
1745            Result.setFlag(Token::StartOfLine);
1746            IsAtStartOfLine = false;
1747          }
1748          goto LexNextToken;   // GCC isn't tail call eliminating.
1749        }
1750        return PP->Lex(Result);
1751      }
1752
1753      Kind = tok::hash;
1754    }
1755    break;
1756
1757  case '@':
1758    // Objective C support.
1759    if (CurPtr[-1] == '@' && Features.ObjC1)
1760      Kind = tok::at;
1761    else
1762      Kind = tok::unknown;
1763    break;
1764
1765  case '\\':
1766    // FIXME: UCN's.
1767    // FALL THROUGH.
1768  default:
1769    Kind = tok::unknown;
1770    break;
1771  }
1772
1773  // Notify MIOpt that we read a non-whitespace/non-comment token.
1774  MIOpt.ReadToken();
1775
1776  // Update the location of token as well as BufferPtr.
1777  FormTokenWithChars(Result, CurPtr, Kind);
1778}
1779