Lexer.h revision 0af574270d3be2b0e73a3379dfaa633746f8fc6f
131d157ae1ac2cd9c787dc3c1d28e64c682803844Jia Liu//===--- Lexer.h - C Language Family Lexer ----------------------*- C++ -*-===// 2b5f662fa0314f7e7e690aae8ebff7136cc3a5ab0Misha Brukman// 3f2ccb77ee9d8ab35866dae111fa36929689c7511Misha Brukman// The LLVM Compiler Infrastructure 4f2ccb77ee9d8ab35866dae111fa36929689c7511Misha Brukman// 54ee451de366474b9c228b4e5fa573795a715216dChris Lattner// This file is distributed under the University of Illinois Open Source 64ee451de366474b9c228b4e5fa573795a715216dChris Lattner// License. See LICENSE.TXT for details. 7b5f662fa0314f7e7e690aae8ebff7136cc3a5ab0Misha Brukman// 8f2ccb77ee9d8ab35866dae111fa36929689c7511Misha Brukman//===----------------------------------------------------------------------===// 9f2ccb77ee9d8ab35866dae111fa36929689c7511Misha Brukman// 106f0d024a534af18d9e60b3ea757376cd8a3a980eDan Gohman// This file defines the Lexer interface. 116f0d024a534af18d9e60b3ea757376cd8a3a980eDan Gohman// 12f2ccb77ee9d8ab35866dae111fa36929689c7511Misha Brukman//===----------------------------------------------------------------------===// 13f2ccb77ee9d8ab35866dae111fa36929689c7511Misha Brukman 14f2ccb77ee9d8ab35866dae111fa36929689c7511Misha Brukman#ifndef LLVM_CLANG_LEXER_H 1579aa3417eb6f58d668aadfedf075240a41d35a26Craig Topper#define LLVM_CLANG_LEXER_H 162668959b8879097db368aec7d76c455260abc75bChris Lattner 17d04a8d4b33ff316ca4cf961e06c9e312eff8e64fChandler Carruth#include "clang/Lex/Token.h" 1826bd0d48a164c419462133270e3ec1c2401a34d7Chris Lattner#include "clang/Lex/MultipleIncludeOpt.h" 192f616bff7ef1e2e08d6d23c2a8b42ec2bfebb173Jim Laskey#include "clang/Basic/LangOptions.h" 20804e06704261f233111913a047ef7f7dec1b8725Chris Lattner#include "llvm/ADT/SmallVector.h" 21d04a8d4b33ff316ca4cf961e06c9e312eff8e64fChandler Carruth#include <string> 22d04a8d4b33ff316ca4cf961e06c9e312eff8e64fChandler Carruth#include <vector> 23d04a8d4b33ff316ca4cf961e06c9e312eff8e64fChandler Carruth#include <cassert> 24d04a8d4b33ff316ca4cf961e06c9e312eff8e64fChandler Carruth 25f2ccb77ee9d8ab35866dae111fa36929689c7511Misha Brukmannamespace clang { 2644c3b9fdd416c79f4b67cde1aecfced5921efd81Jim Laskeyclass Diagnostic; 2784bc5427d6883f73cfeae3da640acd011d35c006Chris Lattnerclass SourceManager; 287194aaf738a1b89441635340403f1c5b06ae18efBill Wendlingclass Preprocessor; 290b8c9a80f20772c3793201ab5b251d3520b9cea3Chandler Carruth 300b8c9a80f20772c3793201ab5b251d3520b9cea3Chandler Carruth/// Lexer - This provides a simple interface that turns a text buffer into a 310b8c9a80f20772c3793201ab5b251d3520b9cea3Chandler Carruth/// stream of tokens. This provides no support for file reading or buffering, 320b8c9a80f20772c3793201ab5b251d3520b9cea3Chandler Carruth/// or buffering/seeking of tokens, only forward lexing is supported. It relies 33551ccae044b0ff658fe629dd67edd5ffe75d10e8Reid Spencer/// on the specified Preprocessor object to handle preprocessor directives, etc. 34551ccae044b0ff658fe629dd67edd5ffe75d10e8Reid Spencerclass Lexer { 35dac237e18209b697a8ba122d0ddd9cad4dfba1f8Torok Edwin //===--------------------------------------------------------------------===// 36ae232e7a1055033436370c0b3aecf054fa44d5e7Nate Begeman // Constant configuration values for this lexer. 37dac237e18209b697a8ba122d0ddd9cad4dfba1f8Torok Edwin const char *BufferStart; // Start of the buffer. 38d04a8d4b33ff316ca4cf961e06c9e312eff8e64fChandler Carruth const char *BufferEnd; // End of the buffer. 39d04a8d4b33ff316ca4cf961e06c9e312eff8e64fChandler Carruth SourceLocation FileLoc; // Location for start of file. 40d04a8d4b33ff316ca4cf961e06c9e312eff8e64fChandler Carruth Preprocessor *PP; // Preprocessor object controlling lexing. 41d04a8d4b33ff316ca4cf961e06c9e312eff8e64fChandler Carruth LangOptions Features; // Features enabled by this language (cache). 42f2ccb77ee9d8ab35866dae111fa36929689c7511Misha Brukman bool Is_PragmaLexer; // True if lexer for _Pragma handling. 4373f50d9bc3bd46cc0abeba9bb0d46977ba1aea42Evan Cheng 44dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines //===--------------------------------------------------------------------===// 45dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines // Context-specific lexing flags set by the preprocessor. 46dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines // 47dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines 4873f50d9bc3bd46cc0abeba9bb0d46977ba1aea42Evan Cheng /// ParsingPreprocessorDirective - This is true when parsing #XXX. This turns 49a347f85dbeee37a7f2bb68df1a7d4cdfbb7b576dEvan Cheng /// '\n' into a tok::eom token. 50f2ccb77ee9d8ab35866dae111fa36929689c7511Misha Brukman bool ParsingPreprocessorDirective; 51fe47bf8fa07e12b70ff8b234fa1f6b97c8d2753dHal Finkel 52fe47bf8fa07e12b70ff8b234fa1f6b97c8d2753dHal Finkel /// ParsingFilename - True after #include: this turns <xx> into a 53fe47bf8fa07e12b70ff8b234fa1f6b97c8d2753dHal Finkel /// tok::angle_string_literal token. 54fe47bf8fa07e12b70ff8b234fa1f6b97c8d2753dHal Finkel bool ParsingFilename; 55fe47bf8fa07e12b70ff8b234fa1f6b97c8d2753dHal Finkel 56fe47bf8fa07e12b70ff8b234fa1f6b97c8d2753dHal Finkel /// LexingRawMode - True if in raw mode: This flag disables interpretation of 57fe47bf8fa07e12b70ff8b234fa1f6b97c8d2753dHal Finkel /// tokens and is a far faster mode to lex in than non-raw-mode. This flag: 58fe47bf8fa07e12b70ff8b234fa1f6b97c8d2753dHal Finkel /// 1. If EOF of the current lexer is found, the include stack isn't popped. 5980ada583f3b40ffb201e54cd57c42f9518039c9eBill Wendling /// 2. Identifier information is not looked up for identifier tokens. As an 600e6a052331f674dd70e28af41f654a7874405eabEvan Cheng /// effect of this, implicit macro expansion is naturally disabled. 610e6a052331f674dd70e28af41f654a7874405eabEvan Cheng /// 3. "#" tokens at the start of a line are treated as normal tokens, not 620e6a052331f674dd70e28af41f654a7874405eabEvan Cheng /// implicitly transformed by the lexer. 6380ada583f3b40ffb201e54cd57c42f9518039c9eBill Wendling /// 4. All diagnostic messages are disabled. 64b5f662fa0314f7e7e690aae8ebff7136cc3a5ab0Misha Brukman /// 5. No callbacks are made into the preprocessor. 65f2ccb77ee9d8ab35866dae111fa36929689c7511Misha Brukman /// 66f2ccb77ee9d8ab35866dae111fa36929689c7511Misha Brukman /// Note that in raw mode that the PP pointer may be null. 67f2ccb77ee9d8ab35866dae111fa36929689c7511Misha Brukman bool LexingRawMode; 68f2ccb77ee9d8ab35866dae111fa36929689c7511Misha Brukman 69f2ccb77ee9d8ab35866dae111fa36929689c7511Misha Brukman /// KeepCommentMode - The lexer can optionally keep C & BCPL-style comments, 70f2ccb77ee9d8ab35866dae111fa36929689c7511Misha Brukman /// and return them as tokens. This is used for -C and -CC modes. 711d9d7427c4a4e3c7bdcfd1f725447f355e509c20Nate Begeman bool KeepCommentMode; 729bc94276e796d644cb425a7c7d38cc44dbf4e9c1Bill Schmidt 7382d25148a7aab0b7e048ab9b774207b3766d1bbfBill Wendling //===--------------------------------------------------------------------===// 7482d25148a7aab0b7e048ab9b774207b3766d1bbfBill Wendling // Context that changes as the file is lexed. 7582d25148a7aab0b7e048ab9b774207b3766d1bbfBill Wendling // NOTE: any state that mutates when in raw mode must have save/restore code 7682d25148a7aab0b7e048ab9b774207b3766d1bbfBill Wendling // in Lexer::isNextPPTokenLParen. 7782d25148a7aab0b7e048ab9b774207b3766d1bbfBill Wendling 7882d25148a7aab0b7e048ab9b774207b3766d1bbfBill Wendling // BufferPtr - Current pointer into the buffer. This is the next character 799ad0f4907b3ba0916a8b6cdb95d298d2ddb7d405Hal Finkel // to be lexed. 80f2ccb77ee9d8ab35866dae111fa36929689c7511Misha Brukman const char *BufferPtr; 81f2ccb77ee9d8ab35866dae111fa36929689c7511Misha Brukman 82770bcc7b15adbc978800db70dbb1c3c22913b52cEvan Cheng // IsAtStartOfLine - True if the next lexed token should get the "start of 83770bcc7b15adbc978800db70dbb1c3c22913b52cEvan Cheng // line" flag set on it. 842cfd52c507bd5790457a171eb9bcb39019cc6860Chris Lattner bool IsAtStartOfLine; 85397fc4874efe9c17e737d4c5c50bd19dc3bf27f5Jakob Stoklund Olesen 86397fc4874efe9c17e737d4c5c50bd19dc3bf27f5Jakob Stoklund Olesen /// MIOpt - This is a state machine that detects the #ifndef-wrapping a file 87839b9096538f790a2bb060547df24703807cb83bHal Finkel /// idiom for the multiple-include optimization. 88839b9096538f790a2bb060547df24703807cb83bHal Finkel MultipleIncludeOpt MIOpt; 89a548afc98fd4c61a8dfdd550ba57c37f2cfe3ed9Hal Finkel 90a548afc98fd4c61a8dfdd550ba57c37f2cfe3ed9Hal Finkel /// ConditionalStack - Information about the set of #if/#ifdef/#ifndef blocks 91a548afc98fd4c61a8dfdd550ba57c37f2cfe3ed9Hal Finkel /// we are currently in. 92a548afc98fd4c61a8dfdd550ba57c37f2cfe3ed9Hal Finkel std::vector<PPConditionalInfo> ConditionalStack; 93a548afc98fd4c61a8dfdd550ba57c37f2cfe3ed9Hal Finkel 94a548afc98fd4c61a8dfdd550ba57c37f2cfe3ed9Hal Finkel Lexer(const Lexer&); // DO NOT IMPLEMENT 95770bcc7b15adbc978800db70dbb1c3c22913b52cEvan Cheng void operator=(const Lexer&); // DO NOT IMPLEMENT 96770bcc7b15adbc978800db70dbb1c3c22913b52cEvan Cheng friend class Preprocessor; 972cfd52c507bd5790457a171eb9bcb39019cc6860Chris Lattnerpublic: 98770bcc7b15adbc978800db70dbb1c3c22913b52cEvan Cheng 99770bcc7b15adbc978800db70dbb1c3c22913b52cEvan Cheng /// Lexer constructor - Create a new lexer object for the specified buffer 100dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines /// with the specified preprocessor managing the lexing process. This lexer 10164d80e3387f328d21cd9cc06464b5de7861e3f27Evan Cheng /// assumes that the associated file buffer and Preprocessor objects will 102e46137f498fa81a088f13d24c79242eed3ff45a7Roman Divacky /// outlive it, so it doesn't take ownership of either of them. 10375dd57a8f0407be32551cf695e63a106dd051a27Hal Finkel Lexer(SourceLocation FileLoc, Preprocessor &PP, 10475dd57a8f0407be32551cf695e63a106dd051a27Hal Finkel const char *BufStart = 0, const char *BufEnd = 0); 10575dd57a8f0407be32551cf695e63a106dd051a27Hal Finkel 10675dd57a8f0407be32551cf695e63a106dd051a27Hal Finkel /// Lexer constructor - Create a new raw lexer object. This object is only 10775dd57a8f0407be32551cf695e63a106dd051a27Hal Finkel /// suitable for calls to 'LexRawToken'. This lexer assumes that the text 10875dd57a8f0407be32551cf695e63a106dd051a27Hal Finkel /// range will outlive it, so it doesn't take ownership of it. 10975dd57a8f0407be32551cf695e63a106dd051a27Hal Finkel Lexer(SourceLocation FileLoc, const LangOptions &Features, 11075dd57a8f0407be32551cf695e63a106dd051a27Hal Finkel const char *BufStart, const char *BufEnd, 11175dd57a8f0407be32551cf695e63a106dd051a27Hal Finkel const llvm::MemoryBuffer *FromFile = 0); 11275dd57a8f0407be32551cf695e63a106dd051a27Hal Finkel 11375dd57a8f0407be32551cf695e63a106dd051a27Hal Finkel /// getFeatures - Return the language features currently enabled. NOTE: this 11475dd57a8f0407be32551cf695e63a106dd051a27Hal Finkel /// lexer modifies features as a file is parsed! 11575dd57a8f0407be32551cf695e63a106dd051a27Hal Finkel const LangOptions &getFeatures() const { return Features; } 116e46137f498fa81a088f13d24c79242eed3ff45a7Roman Divacky 1176b16eff207f99bbde3c0f7340452a5287218772cTilmann Scheller /// getFileLoc - Return the File Location for the file we are lexing out of. 1186ffb4024d875570a01d6b8db900a0385a491403bCraig Topper /// The physical location encodes the location where the characters come from, 119e46137f498fa81a088f13d24c79242eed3ff45a7Roman Divacky /// the virtual location encodes where we should *claim* the characters came 1202a9ddfb903ae3baede7282348afae1f750905248Tilmann Scheller /// from. Currently this is only used by _Pragma handling. 12175dd57a8f0407be32551cf695e63a106dd051a27Hal Finkel SourceLocation getFileLoc() const { return FileLoc; } 12275dd57a8f0407be32551cf695e63a106dd051a27Hal Finkel 12375dd57a8f0407be32551cf695e63a106dd051a27Hal Finkel /// Lex - Return the next token in the file. If this is the end of file, it 12475dd57a8f0407be32551cf695e63a106dd051a27Hal Finkel /// return the tok::eof token. Return true if an error occurred and 12575dd57a8f0407be32551cf695e63a106dd051a27Hal Finkel /// compilation should terminate, false if normal. This implicitly involves 12675dd57a8f0407be32551cf695e63a106dd051a27Hal Finkel /// the preprocessor. 12775dd57a8f0407be32551cf695e63a106dd051a27Hal Finkel void Lex(Token &Result) { 12875dd57a8f0407be32551cf695e63a106dd051a27Hal Finkel // Start a new token. 12975dd57a8f0407be32551cf695e63a106dd051a27Hal Finkel Result.startToken(); 13075dd57a8f0407be32551cf695e63a106dd051a27Hal Finkel 13175dd57a8f0407be32551cf695e63a106dd051a27Hal Finkel // NOTE, any changes here should also change code after calls to 13275dd57a8f0407be32551cf695e63a106dd051a27Hal Finkel // Preprocessor::HandleDirective 13375dd57a8f0407be32551cf695e63a106dd051a27Hal Finkel if (IsAtStartOfLine) { 1340f3ac8d8d4ce23eb2ae6f9d850f389250874eea5Evan Cheng Result.setFlag(Token::StartOfLine); 1350f3ac8d8d4ce23eb2ae6f9d850f389250874eea5Evan Cheng IsAtStartOfLine = false; 1367ee74a663a3b4d4ee6b55d23362f347ed1d390c2Hal Finkel } 1377ee74a663a3b4d4ee6b55d23362f347ed1d390c2Hal Finkel 1387ee74a663a3b4d4ee6b55d23362f347ed1d390c2Hal Finkel // Get a token. Note that this may delete the current lexer if the end of 1397ee74a663a3b4d4ee6b55d23362f347ed1d390c2Hal Finkel // file is reached. 1407ee74a663a3b4d4ee6b55d23362f347ed1d390c2Hal Finkel LexTokenInternal(Result); 141b371f457b0ea4a652a9f526ba4375c80ae542252Evan Cheng } 142b371f457b0ea4a652a9f526ba4375c80ae542252Evan Cheng 14316c29b5f285f375be53dabaa73e3e91107485fe4Anton Korobeynikov /// LexFromRawLexer - Lex a token from a designated raw lexer (one with no 14416c29b5f285f375be53dabaa73e3e91107485fe4Anton Korobeynikov /// associated preprocessor object. Return true if the 'next character to 145d0c38176690e9602a93a20a43f1bd084564a8116Anton Korobeynikov /// read' pointer points and the end of the lexer buffer, false otherwise. 146a548afc98fd4c61a8dfdd550ba57c37f2cfe3ed9Hal Finkel bool LexFromRawLexer(Token &Result) { 147a548afc98fd4c61a8dfdd550ba57c37f2cfe3ed9Hal Finkel assert(LexingRawMode && "Not already in raw mode!"); 148a548afc98fd4c61a8dfdd550ba57c37f2cfe3ed9Hal Finkel Lex(Result); 1497697370adff8983e2a3de493362f0d8c9f9b0e17Hal Finkel // Note that lexing to the end of the buffer doesn't implicitly delete the 150a548afc98fd4c61a8dfdd550ba57c37f2cfe3ed9Hal Finkel // lexer when in raw mode. 151e9cc0a09ae38c87b1b26a44f5e32222ede4f84e6Hal Finkel return BufferPtr == BufferEnd; 152e9cc0a09ae38c87b1b26a44f5e32222ede4f84e6Hal Finkel } 153e9cc0a09ae38c87b1b26a44f5e32222ede4f84e6Hal Finkel 154e9cc0a09ae38c87b1b26a44f5e32222ede4f84e6Hal Finkel /// SetCommentRetentionMode - Change the comment retention mode of the lexer 155e9cc0a09ae38c87b1b26a44f5e32222ede4f84e6Hal Finkel /// to the specified mode. This is really only useful when lexing in raw 1560541722de4beb2e53058dbf4ed1ebf0d96ddd6cbHal Finkel /// mode, because otherwise the lexer needs to manage this. 1570541722de4beb2e53058dbf4ed1ebf0d96ddd6cbHal Finkel void SetCommentRetentionState(bool Mode) { 1580541722de4beb2e53058dbf4ed1ebf0d96ddd6cbHal Finkel KeepCommentMode = Mode; 1590541722de4beb2e53058dbf4ed1ebf0d96ddd6cbHal Finkel } 1600541722de4beb2e53058dbf4ed1ebf0d96ddd6cbHal Finkel 161b1fd3cd78f8acd21dbf514b75fef991827c343b6Hal Finkel /// ReadToEndOfLine - Read the rest of the current preprocessor line as an 162b1fd3cd78f8acd21dbf514b75fef991827c343b6Hal Finkel /// uninterpreted string. This switches the lexer out of directive mode. 163b1fd3cd78f8acd21dbf514b75fef991827c343b6Hal Finkel std::string ReadToEndOfLine(); 164b1fd3cd78f8acd21dbf514b75fef991827c343b6Hal Finkel 165b1fd3cd78f8acd21dbf514b75fef991827c343b6Hal Finkel 166b371f457b0ea4a652a9f526ba4375c80ae542252Evan Cheng /// Diag - Forwarding function for diagnostics. This translate a source 167b371f457b0ea4a652a9f526ba4375c80ae542252Evan Cheng /// position in the current buffer into a SourceLocation object for rendering. 1687194aaf738a1b89441635340403f1c5b06ae18efBill Wendling void Diag(const char *Loc, unsigned DiagID, 169b384ab9ea113ad22a9c7034b98060c7470f0dcc5Dale Johannesen const std::string &Msg = std::string()) const; 1707194aaf738a1b89441635340403f1c5b06ae18efBill Wendling void Diag(SourceLocation Loc, unsigned DiagID, 17175dd57a8f0407be32551cf695e63a106dd051a27Hal Finkel const std::string &Msg = std::string()) const; 17275dd57a8f0407be32551cf695e63a106dd051a27Hal Finkel 17375dd57a8f0407be32551cf695e63a106dd051a27Hal Finkel /// getSourceLocation - Return a source location identifier for the specified 174ffd0200abfd63177257f949a3674b91dcf87bf23Tilmann Scheller /// offset in the current file. 1752a9ddfb903ae3baede7282348afae1f750905248Tilmann Scheller SourceLocation getSourceLocation(const char *Loc) const; 176ffd0200abfd63177257f949a3674b91dcf87bf23Tilmann Scheller 177ffd0200abfd63177257f949a3674b91dcf87bf23Tilmann Scheller /// Stringify - Convert the specified string into a C string by escaping '\' 178ffd0200abfd63177257f949a3674b91dcf87bf23Tilmann Scheller /// and " characters. This does not add surrounding ""'s to the string. 179ffd0200abfd63177257f949a3674b91dcf87bf23Tilmann Scheller /// If Charify is true, this escapes the ' character instead of ". 1806b16eff207f99bbde3c0f7340452a5287218772cTilmann Scheller static std::string Stringify(const std::string &Str, bool Charify = false); 181b371f457b0ea4a652a9f526ba4375c80ae542252Evan Cheng 182b371f457b0ea4a652a9f526ba4375c80ae542252Evan Cheng /// Stringify - Convert the specified string into a C string by escaping '\' 1837194aaf738a1b89441635340403f1c5b06ae18efBill Wendling /// and " characters. This does not add surrounding ""'s to the string. 1847194aaf738a1b89441635340403f1c5b06ae18efBill Wendling static void Stringify(llvm::SmallVectorImpl<char> &Str); 1857194aaf738a1b89441635340403f1c5b06ae18efBill Wendling 18654e57f8cb79bdc23ed8289cf2a558fa7c9602972Hal Finkel /// MeasureTokenLength - Relex the token at the specified location and return 1870541722de4beb2e53058dbf4ed1ebf0d96ddd6cbHal Finkel /// its length in bytes in the input file. If the token needs cleaning (e.g. 18854e57f8cb79bdc23ed8289cf2a558fa7c9602972Hal Finkel /// includes a trigraph or an escaped newline) then this count includes bytes 1896b16eff207f99bbde3c0f7340452a5287218772cTilmann Scheller /// that are part of that. 1900541722de4beb2e53058dbf4ed1ebf0d96ddd6cbHal Finkel static unsigned MeasureTokenLength(SourceLocation Loc, 1910541722de4beb2e53058dbf4ed1ebf0d96ddd6cbHal Finkel const SourceManager &SM); 192fe47bf8fa07e12b70ff8b234fa1f6b97c8d2753dHal Finkel 1936b16eff207f99bbde3c0f7340452a5287218772cTilmann Scheller //===--------------------------------------------------------------------===// 1946b16eff207f99bbde3c0f7340452a5287218772cTilmann Scheller // Internal implementation interfaces. 1956b16eff207f99bbde3c0f7340452a5287218772cTilmann Schellerprivate: 1966b16eff207f99bbde3c0f7340452a5287218772cTilmann Scheller 197b371f457b0ea4a652a9f526ba4375c80ae542252Evan Cheng /// LexTokenInternal - Internal interface to lex a preprocessing token. Called 1987194aaf738a1b89441635340403f1c5b06ae18efBill Wendling /// by Lex. 1990541722de4beb2e53058dbf4ed1ebf0d96ddd6cbHal Finkel /// 200b371f457b0ea4a652a9f526ba4375c80ae542252Evan Cheng void LexTokenInternal(Token &Result); 2017194aaf738a1b89441635340403f1c5b06ae18efBill Wendling 2020541722de4beb2e53058dbf4ed1ebf0d96ddd6cbHal Finkel /// FormTokenWithChars - When we lex a token, we have identified a span 2030541722de4beb2e53058dbf4ed1ebf0d96ddd6cbHal Finkel /// starting at BufferPtr, going to TokEnd that forms the token. This method 204fe47bf8fa07e12b70ff8b234fa1f6b97c8d2753dHal Finkel /// takes that range and assigns it to the token as its location and size. In 20575dd57a8f0407be32551cf695e63a106dd051a27Hal Finkel /// addition, since tokens cannot overlap, this also updates BufferPtr to be 20675dd57a8f0407be32551cf695e63a106dd051a27Hal Finkel /// TokEnd. 20775dd57a8f0407be32551cf695e63a106dd051a27Hal Finkel void FormTokenWithChars(Token &Result, const char *TokEnd) { 20875dd57a8f0407be32551cf695e63a106dd051a27Hal Finkel Result.setLocation(getSourceLocation(BufferPtr)); 20975dd57a8f0407be32551cf695e63a106dd051a27Hal Finkel Result.setLength(TokEnd-BufferPtr); 21075dd57a8f0407be32551cf695e63a106dd051a27Hal Finkel BufferPtr = TokEnd; 211b371f457b0ea4a652a9f526ba4375c80ae542252Evan Cheng } 212b371f457b0ea4a652a9f526ba4375c80ae542252Evan Cheng 213b371f457b0ea4a652a9f526ba4375c80ae542252Evan Cheng /// isNextPPTokenLParen - Return 1 if the next unexpanded token will return a 214768c65f677af3f05c2e94982043f90a1bfaceda5Hal Finkel /// tok::l_paren token, 0 if it is something else and 2 if there are no more 215768c65f677af3f05c2e94982043f90a1bfaceda5Hal Finkel /// tokens in the buffer controlled by this lexer. 216768c65f677af3f05c2e94982043f90a1bfaceda5Hal Finkel unsigned isNextPPTokenLParen(); 217768c65f677af3f05c2e94982043f90a1bfaceda5Hal Finkel 218768c65f677af3f05c2e94982043f90a1bfaceda5Hal Finkel //===--------------------------------------------------------------------===// 219768c65f677af3f05c2e94982043f90a1bfaceda5Hal Finkel // Lexer character reading interfaces. 220768c65f677af3f05c2e94982043f90a1bfaceda5Hal Finkelpublic: 221768c65f677af3f05c2e94982043f90a1bfaceda5Hal Finkel 222768c65f677af3f05c2e94982043f90a1bfaceda5Hal Finkel // This lexer is built on two interfaces for reading characters, both of which 223a548afc98fd4c61a8dfdd550ba57c37f2cfe3ed9Hal Finkel // automatically provide phase 1/2 translation. getAndAdvanceChar is used 224a548afc98fd4c61a8dfdd550ba57c37f2cfe3ed9Hal Finkel // when we know that we will be reading a character from the input buffer and 225768c65f677af3f05c2e94982043f90a1bfaceda5Hal Finkel // that this character will be part of the result token. This occurs in (f.e.) 226768c65f677af3f05c2e94982043f90a1bfaceda5Hal Finkel // string processing, because we know we need to read until we find the 227768c65f677af3f05c2e94982043f90a1bfaceda5Hal Finkel // closing '"' character. 228768c65f677af3f05c2e94982043f90a1bfaceda5Hal Finkel // 229768c65f677af3f05c2e94982043f90a1bfaceda5Hal Finkel // The second interface is the combination of PeekCharAndSize with 230768c65f677af3f05c2e94982043f90a1bfaceda5Hal Finkel // ConsumeChar. PeekCharAndSize reads a phase 1/2 translated character, 231768c65f677af3f05c2e94982043f90a1bfaceda5Hal Finkel // returning it and its size. If the lexer decides that this character is 232768c65f677af3f05c2e94982043f90a1bfaceda5Hal Finkel // part of the current token, it calls ConsumeChar on it. This two stage 23336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // approach allows us to emit diagnostics for characters (e.g. warnings about 23436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // trigraphs), knowing that they only are emitted if the character is 23536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // consumed. 236768c65f677af3f05c2e94982043f90a1bfaceda5Hal Finkel 23736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines /// isObviouslySimpleCharacter - Return true if the specified character is 23836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines /// obviously the same in translation phase 1 and translation phase 3. This 23936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines /// can return false for characters that end up being the same, but it will 2402e313caa3631e2c94bb6d81429b77a30b116b520Hal Finkel /// never return true for something that needs to be mapped. 2412e313caa3631e2c94bb6d81429b77a30b116b520Hal Finkel static bool isObviouslySimpleCharacter(char C) { 242768c65f677af3f05c2e94982043f90a1bfaceda5Hal Finkel return C != '?' && C != '\\'; 243768c65f677af3f05c2e94982043f90a1bfaceda5Hal Finkel } 244768c65f677af3f05c2e94982043f90a1bfaceda5Hal Finkel 24536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines /// getAndAdvanceChar - Read a single 'character' from the specified buffer, 24636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines /// advance over it, and return it. This is tricky in several cases. Here we 24736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines /// just handle the trivial case and fall-back to the non-inlined 24836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines /// getCharAndSizeSlow method to handle the hard case. 24936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines inline char getAndAdvanceChar(const char *&Ptr, Token &Tok) { 25036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // If this is not a trigraph and not a UCN or escaped newline, return 25136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // quickly. 25236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines if (isObviouslySimpleCharacter(Ptr[0])) return *Ptr++; 25336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines 25436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines unsigned Size = 0; 25536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines char C = getCharAndSizeSlow(Ptr, Size, &Tok); 25636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines Ptr += Size; 25736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines return C; 25836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines } 25936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines 260f2ccb77ee9d8ab35866dae111fa36929689c7511Misha Brukmanprivate: 261f2ccb77ee9d8ab35866dae111fa36929689c7511Misha Brukman /// ConsumeChar - When a character (identified by PeekCharAndSize) is consumed 262f2ccb77ee9d8ab35866dae111fa36929689c7511Misha Brukman /// and added to a given token, check to see if there are diagnostics that 263f2ccb77ee9d8ab35866dae111fa36929689c7511Misha Brukman /// need to be emitted or flags that need to be set on the token. If so, do 2647194aaf738a1b89441635340403f1c5b06ae18efBill Wendling /// it. 2652f616bff7ef1e2e08d6d23c2a8b42ec2bfebb173Jim Laskey const char *ConsumeChar(const char *Ptr, unsigned Size, Token &Tok) { 2662f616bff7ef1e2e08d6d23c2a8b42ec2bfebb173Jim Laskey // Normal case, we consumed exactly one token. Just return it. 2670f8b53f19d29013ab18f3d444cea1e6305405611Dan Gohman if (Size == 1) 2682f616bff7ef1e2e08d6d23c2a8b42ec2bfebb173Jim Laskey return Ptr+Size; 2690f8b53f19d29013ab18f3d444cea1e6305405611Dan Gohman 2702f616bff7ef1e2e08d6d23c2a8b42ec2bfebb173Jim Laskey // Otherwise, re-lex the character with a current token, allowing 27102327fefd8a4b7d9f4dc90e066ba70b1d6253c27Hal Finkel // diagnostics to be emitted and flags to be set. 2722f616bff7ef1e2e08d6d23c2a8b42ec2bfebb173Jim Laskey Size = 0; 2732f616bff7ef1e2e08d6d23c2a8b42ec2bfebb173Jim Laskey getCharAndSizeSlow(Ptr, Size, &Tok); 2742f616bff7ef1e2e08d6d23c2a8b42ec2bfebb173Jim Laskey return Ptr+Size; 2752f616bff7ef1e2e08d6d23c2a8b42ec2bfebb173Jim Laskey } 2762f616bff7ef1e2e08d6d23c2a8b42ec2bfebb173Jim Laskey 2772f616bff7ef1e2e08d6d23c2a8b42ec2bfebb173Jim Laskey /// getCharAndSize - Peek a single 'character' from the specified buffer, 2782f616bff7ef1e2e08d6d23c2a8b42ec2bfebb173Jim Laskey /// get its size, and return it. This is tricky in several cases. Here we 2792f616bff7ef1e2e08d6d23c2a8b42ec2bfebb173Jim Laskey /// just handle the trivial case and fall-back to the non-inlined 28080ada583f3b40ffb201e54cd57c42f9518039c9eBill Wendling /// getCharAndSizeSlow method to handle the hard case. 28180ada583f3b40ffb201e54cd57c42f9518039c9eBill Wendling inline char getCharAndSize(const char *Ptr, unsigned &Size) { 2822f616bff7ef1e2e08d6d23c2a8b42ec2bfebb173Jim Laskey // If this is not a trigraph and not a UCN or escaped newline, return 2832f616bff7ef1e2e08d6d23c2a8b42ec2bfebb173Jim Laskey // quickly. 284536a2f1f8467a17f6d145bd83f25faae1f689839Dale Johannesen if (isObviouslySimpleCharacter(Ptr[0])) { 2852f616bff7ef1e2e08d6d23c2a8b42ec2bfebb173Jim Laskey Size = 1; 286fab0439c62984b3dc851eb99c31c4f6edda092a1Evan Cheng return *Ptr; 2872f616bff7ef1e2e08d6d23c2a8b42ec2bfebb173Jim Laskey } 2882f616bff7ef1e2e08d6d23c2a8b42ec2bfebb173Jim Laskey 2892f616bff7ef1e2e08d6d23c2a8b42ec2bfebb173Jim Laskey Size = 0; 2902f616bff7ef1e2e08d6d23c2a8b42ec2bfebb173Jim Laskey return getCharAndSizeSlow(Ptr, Size); 2912f616bff7ef1e2e08d6d23c2a8b42ec2bfebb173Jim Laskey } 29216c29b5f285f375be53dabaa73e3e91107485fe4Anton Korobeynikov 2932f616bff7ef1e2e08d6d23c2a8b42ec2bfebb173Jim Laskey /// getCharAndSizeSlow - Handle the slow/uncommon case of the getCharAndSize 294aad2a72c285a48e34d89ba69d24eb624f2b09b0eHal Finkel /// method. 295aad2a72c285a48e34d89ba69d24eb624f2b09b0eHal Finkel char getCharAndSizeSlow(const char *Ptr, unsigned &Size, Token *Tok = 0); 2962f616bff7ef1e2e08d6d23c2a8b42ec2bfebb173Jim Laskey 2972f616bff7ef1e2e08d6d23c2a8b42ec2bfebb173Jim Laskey /// getCharAndSizeNoWarn - Like the getCharAndSize method, but does not ever 2982f616bff7ef1e2e08d6d23c2a8b42ec2bfebb173Jim Laskey /// emit a warning. 2992f616bff7ef1e2e08d6d23c2a8b42ec2bfebb173Jim Laskey static inline char getCharAndSizeNoWarn(const char *Ptr, unsigned &Size, 3002f616bff7ef1e2e08d6d23c2a8b42ec2bfebb173Jim Laskey const LangOptions &Features) { 3012f616bff7ef1e2e08d6d23c2a8b42ec2bfebb173Jim Laskey // If this is not a trigraph and not a UCN or escaped newline, return 3022f616bff7ef1e2e08d6d23c2a8b42ec2bfebb173Jim Laskey // quickly. 3037194aaf738a1b89441635340403f1c5b06ae18efBill Wendling if (isObviouslySimpleCharacter(Ptr[0])) { 3047194aaf738a1b89441635340403f1c5b06ae18efBill Wendling Size = 1; 3051c6c61a6089fb2bef47de5ee9a5f4acc34047600Hal Finkel return *Ptr; 3067194aaf738a1b89441635340403f1c5b06ae18efBill Wendling } 30734247a0f356edf45ae3ad9ce04e1f90a77c6dba7Benjamin Kramer 308536a2f1f8467a17f6d145bd83f25faae1f689839Dale Johannesen Size = 0; 3092f616bff7ef1e2e08d6d23c2a8b42ec2bfebb173Jim Laskey return getCharAndSizeSlowNoWarn(Ptr, Size, Features); 3102f616bff7ef1e2e08d6d23c2a8b42ec2bfebb173Jim Laskey } 3112f616bff7ef1e2e08d6d23c2a8b42ec2bfebb173Jim Laskey 3127285e8d98c9a44b7efe792462188cfe713dd9641Hal Finkel /// getCharAndSizeSlowNoWarn - Same as getCharAndSizeSlow, but never emits a 3137285e8d98c9a44b7efe792462188cfe713dd9641Hal Finkel /// diagnostic. 3147285e8d98c9a44b7efe792462188cfe713dd9641Hal Finkel static char getCharAndSizeSlowNoWarn(const char *Ptr, unsigned &Size, 3152f616bff7ef1e2e08d6d23c2a8b42ec2bfebb173Jim Laskey const LangOptions &Features); 316536a2f1f8467a17f6d145bd83f25faae1f689839Dale Johannesen 3172f616bff7ef1e2e08d6d23c2a8b42ec2bfebb173Jim Laskey //===--------------------------------------------------------------------===// 3182f616bff7ef1e2e08d6d23c2a8b42ec2bfebb173Jim Laskey // #if directive handling. 3192f616bff7ef1e2e08d6d23c2a8b42ec2bfebb173Jim Laskey 320aad2a72c285a48e34d89ba69d24eb624f2b09b0eHal Finkel /// pushConditionalLevel - When we enter a #if directive, this keeps track of 321aad2a72c285a48e34d89ba69d24eb624f2b09b0eHal Finkel /// what we are currently in for diagnostic emission (e.g. #if with missing 322aad2a72c285a48e34d89ba69d24eb624f2b09b0eHal Finkel /// #endif). 323aad2a72c285a48e34d89ba69d24eb624f2b09b0eHal Finkel void pushConditionalLevel(SourceLocation DirectiveStart, bool WasSkipping, 3247194aaf738a1b89441635340403f1c5b06ae18efBill Wendling bool FoundNonSkip, bool FoundElse) { 3257194aaf738a1b89441635340403f1c5b06ae18efBill Wendling PPConditionalInfo CI; 3262f616bff7ef1e2e08d6d23c2a8b42ec2bfebb173Jim Laskey CI.IfLoc = DirectiveStart; 327aad2a72c285a48e34d89ba69d24eb624f2b09b0eHal Finkel CI.WasSkipping = WasSkipping; 328aad2a72c285a48e34d89ba69d24eb624f2b09b0eHal Finkel CI.FoundNonSkip = FoundNonSkip; 329aad2a72c285a48e34d89ba69d24eb624f2b09b0eHal Finkel CI.FoundElse = FoundElse; 330aad2a72c285a48e34d89ba69d24eb624f2b09b0eHal Finkel ConditionalStack.push_back(CI); 331aad2a72c285a48e34d89ba69d24eb624f2b09b0eHal Finkel } 332aad2a72c285a48e34d89ba69d24eb624f2b09b0eHal Finkel void pushConditionalLevel(const PPConditionalInfo &CI) { 333aad2a72c285a48e34d89ba69d24eb624f2b09b0eHal Finkel ConditionalStack.push_back(CI); 334aad2a72c285a48e34d89ba69d24eb624f2b09b0eHal Finkel } 335aad2a72c285a48e34d89ba69d24eb624f2b09b0eHal Finkel 336aad2a72c285a48e34d89ba69d24eb624f2b09b0eHal Finkel /// popConditionalLevel - Remove an entry off the top of the conditional 337aad2a72c285a48e34d89ba69d24eb624f2b09b0eHal Finkel /// stack, returning information about it. If the conditional stack is empty, 338aad2a72c285a48e34d89ba69d24eb624f2b09b0eHal Finkel /// this returns true and does not fill in the arguments. 339aad2a72c285a48e34d89ba69d24eb624f2b09b0eHal Finkel bool popConditionalLevel(PPConditionalInfo &CI) { 340aad2a72c285a48e34d89ba69d24eb624f2b09b0eHal Finkel if (ConditionalStack.empty()) return true; 341aad2a72c285a48e34d89ba69d24eb624f2b09b0eHal Finkel CI = ConditionalStack.back(); 342aad2a72c285a48e34d89ba69d24eb624f2b09b0eHal Finkel ConditionalStack.pop_back(); 343aad2a72c285a48e34d89ba69d24eb624f2b09b0eHal Finkel return false; 3447285e8d98c9a44b7efe792462188cfe713dd9641Hal Finkel } 3457285e8d98c9a44b7efe792462188cfe713dd9641Hal Finkel 3467285e8d98c9a44b7efe792462188cfe713dd9641Hal Finkel /// peekConditionalLevel - Return the top of the conditional stack. This 347aad2a72c285a48e34d89ba69d24eb624f2b09b0eHal Finkel /// requires that there be a conditional active. 348aad2a72c285a48e34d89ba69d24eb624f2b09b0eHal Finkel PPConditionalInfo &peekConditionalLevel() { 349aad2a72c285a48e34d89ba69d24eb624f2b09b0eHal Finkel assert(!ConditionalStack.empty() && "No conditionals active!"); 350aad2a72c285a48e34d89ba69d24eb624f2b09b0eHal Finkel return ConditionalStack.back(); 3512f616bff7ef1e2e08d6d23c2a8b42ec2bfebb173Jim Laskey } 352aad2a72c285a48e34d89ba69d24eb624f2b09b0eHal Finkel 353aad2a72c285a48e34d89ba69d24eb624f2b09b0eHal Finkel unsigned getConditionalStackDepth() const { return ConditionalStack.size(); } 354aad2a72c285a48e34d89ba69d24eb624f2b09b0eHal Finkel 355aad2a72c285a48e34d89ba69d24eb624f2b09b0eHal Finkel //===--------------------------------------------------------------------===// 356aad2a72c285a48e34d89ba69d24eb624f2b09b0eHal Finkel // Other lexer functions. 357aad2a72c285a48e34d89ba69d24eb624f2b09b0eHal Finkel 358aad2a72c285a48e34d89ba69d24eb624f2b09b0eHal Finkel // Helper functions to lex the remainder of a token of the specific type. 359aad2a72c285a48e34d89ba69d24eb624f2b09b0eHal Finkel void LexIdentifier (Token &Result, const char *CurPtr); 360aad2a72c285a48e34d89ba69d24eb624f2b09b0eHal Finkel void LexNumericConstant (Token &Result, const char *CurPtr); 361aad2a72c285a48e34d89ba69d24eb624f2b09b0eHal Finkel void LexStringLiteral (Token &Result, const char *CurPtr,bool Wide); 362aad2a72c285a48e34d89ba69d24eb624f2b09b0eHal Finkel void LexAngledStringLiteral(Token &Result, const char *CurPtr); 363aad2a72c285a48e34d89ba69d24eb624f2b09b0eHal Finkel void LexCharConstant (Token &Result, const char *CurPtr); 364aad2a72c285a48e34d89ba69d24eb624f2b09b0eHal Finkel bool LexEndOfFile (Token &Result, const char *CurPtr); 365aad2a72c285a48e34d89ba69d24eb624f2b09b0eHal Finkel 366aad2a72c285a48e34d89ba69d24eb624f2b09b0eHal Finkel void SkipWhitespace (Token &Result, const char *CurPtr); 367aad2a72c285a48e34d89ba69d24eb624f2b09b0eHal Finkel bool SkipBCPLComment (Token &Result, const char *CurPtr); 368aad2a72c285a48e34d89ba69d24eb624f2b09b0eHal Finkel bool SkipBlockComment (Token &Result, const char *CurPtr); 369ac81cc3282750d724f824547bc519caec0a01bceHal Finkel bool SaveBCPLComment (Token &Result, const char *CurPtr); 370587daedce2d6c2b2d380b6a5843a6f8b6cfc79e4Bill Wendling 371ac81cc3282750d724f824547bc519caec0a01bceHal Finkel /// LexIncludeFilename - After the preprocessor has parsed a #include, lex and 372aad2a72c285a48e34d89ba69d24eb624f2b09b0eHal Finkel /// (potentially) macro expand the filename. If the sequence parsed is not 373aad2a72c285a48e34d89ba69d24eb624f2b09b0eHal Finkel /// lexically legal, emit a diagnostic and return a result EOM token. 374aad2a72c285a48e34d89ba69d24eb624f2b09b0eHal Finkel void LexIncludeFilename(Token &Result); 375aad2a72c285a48e34d89ba69d24eb624f2b09b0eHal Finkel}; 3762f616bff7ef1e2e08d6d23c2a8b42ec2bfebb173Jim Laskey 3772f616bff7ef1e2e08d6d23c2a8b42ec2bfebb173Jim Laskey 3782f616bff7ef1e2e08d6d23c2a8b42ec2bfebb173Jim Laskey} // end namespace clang 3792f616bff7ef1e2e08d6d23c2a8b42ec2bfebb173Jim Laskey 3802f616bff7ef1e2e08d6d23c2a8b42ec2bfebb173Jim Laskey#endif 3812f616bff7ef1e2e08d6d23c2a8b42ec2bfebb173Jim Laskey