Parser.cpp revision 8a77c20375874c0759a5cd5b4a34e83465d821b2
1f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek//===--- Parser.cpp - Matcher expression parser -----*- C++ -*-===// 2f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek// 3f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek// The LLVM Compiler Infrastructure 4f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek// 5f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek// This file is distributed under the University of Illinois Open Source 6f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek// License. See LICENSE.TXT for details. 7f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek// 8f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek//===----------------------------------------------------------------------===// 9f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek/// 10f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek/// \file 11f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek/// \brief Recursive parser implementation for the matcher expression grammar. 12f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek/// 13f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek//===----------------------------------------------------------------------===// 14f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek 15f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek#include <string> 16f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek#include <vector> 17f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek 18f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek#include "clang/ASTMatchers/Dynamic/Parser.h" 19f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek#include "clang/ASTMatchers/Dynamic/Registry.h" 20f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek#include "clang/Basic/CharInfo.h" 21f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek#include "llvm/ADT/Twine.h" 22f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek 23f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimeknamespace clang { 24f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimeknamespace ast_matchers { 25f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimeknamespace dynamic { 26f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek 27f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek/// \brief Simple structure to hold information for one token from the parser. 28f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimekstruct Parser::TokenInfo { 29f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek /// \brief Different possible tokens. 30f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek enum TokenKind { 31f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek TK_Eof = 0, 32f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek TK_OpenParen = 1, 33f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek TK_CloseParen = 2, 34f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek TK_Comma = 3, 354f37d925927dfdd0c770702ffb22de38fc2007dcSamuel Benzaquen TK_Period = 4, 364f37d925927dfdd0c770702ffb22de38fc2007dcSamuel Benzaquen TK_Literal = 5, 374f37d925927dfdd0c770702ffb22de38fc2007dcSamuel Benzaquen TK_Ident = 6, 384f37d925927dfdd0c770702ffb22de38fc2007dcSamuel Benzaquen TK_InvalidChar = 7, 394f37d925927dfdd0c770702ffb22de38fc2007dcSamuel Benzaquen TK_Error = 8 40f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek }; 41f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek 424f37d925927dfdd0c770702ffb22de38fc2007dcSamuel Benzaquen /// \brief Some known identifiers. 434f37d925927dfdd0c770702ffb22de38fc2007dcSamuel Benzaquen static const char* const ID_Bind; 444f37d925927dfdd0c770702ffb22de38fc2007dcSamuel Benzaquen 45f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek TokenInfo() : Text(), Kind(TK_Eof), Range(), Value() {} 46f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek 47f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek StringRef Text; 48f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek TokenKind Kind; 49f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek SourceRange Range; 50f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek VariantValue Value; 51f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek}; 52f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek 534f37d925927dfdd0c770702ffb22de38fc2007dcSamuel Benzaquenconst char* const Parser::TokenInfo::ID_Bind = "bind"; 544f37d925927dfdd0c770702ffb22de38fc2007dcSamuel Benzaquen 55f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek/// \brief Simple tokenizer for the parser. 56f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimekclass Parser::CodeTokenizer { 57f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimekpublic: 58f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek explicit CodeTokenizer(StringRef MatcherCode, Diagnostics *Error) 59f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek : Code(MatcherCode), StartOfLine(MatcherCode), Line(1), Error(Error) { 60f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek NextToken = getNextToken(); 61f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek } 62f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek 63f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek /// \brief Returns but doesn't consume the next token. 64f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek const TokenInfo &peekNextToken() const { return NextToken; } 65f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek 66f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek /// \brief Consumes and returns the next token. 67f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek TokenInfo consumeNextToken() { 68f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek TokenInfo ThisToken = NextToken; 69f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek NextToken = getNextToken(); 70f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek return ThisToken; 71f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek } 72f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek 73f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek TokenInfo::TokenKind nextTokenKind() const { return NextToken.Kind; } 74f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek 75f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimekprivate: 76f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek TokenInfo getNextToken() { 77f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek consumeWhitespace(); 78f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek TokenInfo Result; 79f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek Result.Range.Start = currentLocation(); 80f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek 81f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek if (Code.empty()) { 82f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek Result.Kind = TokenInfo::TK_Eof; 83f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek Result.Text = ""; 84f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek return Result; 85f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek } 86f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek 87f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek switch (Code[0]) { 88f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek case ',': 89f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek Result.Kind = TokenInfo::TK_Comma; 90f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek Result.Text = Code.substr(0, 1); 91f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek Code = Code.drop_front(); 92f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek break; 934f37d925927dfdd0c770702ffb22de38fc2007dcSamuel Benzaquen case '.': 944f37d925927dfdd0c770702ffb22de38fc2007dcSamuel Benzaquen Result.Kind = TokenInfo::TK_Period; 954f37d925927dfdd0c770702ffb22de38fc2007dcSamuel Benzaquen Result.Text = Code.substr(0, 1); 964f37d925927dfdd0c770702ffb22de38fc2007dcSamuel Benzaquen Code = Code.drop_front(); 974f37d925927dfdd0c770702ffb22de38fc2007dcSamuel Benzaquen break; 98f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek case '(': 99f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek Result.Kind = TokenInfo::TK_OpenParen; 100f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek Result.Text = Code.substr(0, 1); 101f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek Code = Code.drop_front(); 102f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek break; 103f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek case ')': 104f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek Result.Kind = TokenInfo::TK_CloseParen; 105f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek Result.Text = Code.substr(0, 1); 106f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek Code = Code.drop_front(); 107f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek break; 108f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek 109f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek case '"': 110f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek case '\'': 111f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek // Parse a string literal. 112f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek consumeStringLiteral(&Result); 113f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek break; 114f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek 1157a337af9e8bc752a2d3b227e4058ed2baf7a19d1Samuel Benzaquen case '0': case '1': case '2': case '3': case '4': 1167a337af9e8bc752a2d3b227e4058ed2baf7a19d1Samuel Benzaquen case '5': case '6': case '7': case '8': case '9': 1177a337af9e8bc752a2d3b227e4058ed2baf7a19d1Samuel Benzaquen // Parse an unsigned literal. 1187a337af9e8bc752a2d3b227e4058ed2baf7a19d1Samuel Benzaquen consumeUnsignedLiteral(&Result); 1197a337af9e8bc752a2d3b227e4058ed2baf7a19d1Samuel Benzaquen break; 1207a337af9e8bc752a2d3b227e4058ed2baf7a19d1Samuel Benzaquen 121f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek default: 122f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek if (isAlphanumeric(Code[0])) { 123f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek // Parse an identifier 124f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek size_t TokenLength = 1; 125f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek while (TokenLength < Code.size() && isAlphanumeric(Code[TokenLength])) 126f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek ++TokenLength; 127f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek Result.Kind = TokenInfo::TK_Ident; 128f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek Result.Text = Code.substr(0, TokenLength); 129f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek Code = Code.drop_front(TokenLength); 130f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek } else { 131f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek Result.Kind = TokenInfo::TK_InvalidChar; 132f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek Result.Text = Code.substr(0, 1); 133f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek Code = Code.drop_front(1); 134f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek } 135f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek break; 136f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek } 137f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek 138f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek Result.Range.End = currentLocation(); 139f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek return Result; 140f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek } 141f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek 1427a337af9e8bc752a2d3b227e4058ed2baf7a19d1Samuel Benzaquen /// \brief Consume an unsigned literal. 1437a337af9e8bc752a2d3b227e4058ed2baf7a19d1Samuel Benzaquen void consumeUnsignedLiteral(TokenInfo *Result) { 1447a337af9e8bc752a2d3b227e4058ed2baf7a19d1Samuel Benzaquen unsigned Length = 1; 1457a337af9e8bc752a2d3b227e4058ed2baf7a19d1Samuel Benzaquen if (Code.size() > 1) { 1467a337af9e8bc752a2d3b227e4058ed2baf7a19d1Samuel Benzaquen // Consume the 'x' or 'b' radix modifier, if present. 1477a337af9e8bc752a2d3b227e4058ed2baf7a19d1Samuel Benzaquen switch (toLowercase(Code[1])) { 1487a337af9e8bc752a2d3b227e4058ed2baf7a19d1Samuel Benzaquen case 'x': case 'b': Length = 2; 1497a337af9e8bc752a2d3b227e4058ed2baf7a19d1Samuel Benzaquen } 1507a337af9e8bc752a2d3b227e4058ed2baf7a19d1Samuel Benzaquen } 1517a337af9e8bc752a2d3b227e4058ed2baf7a19d1Samuel Benzaquen while (Length < Code.size() && isHexDigit(Code[Length])) 1527a337af9e8bc752a2d3b227e4058ed2baf7a19d1Samuel Benzaquen ++Length; 1537a337af9e8bc752a2d3b227e4058ed2baf7a19d1Samuel Benzaquen 1547a337af9e8bc752a2d3b227e4058ed2baf7a19d1Samuel Benzaquen Result->Text = Code.substr(0, Length); 1557a337af9e8bc752a2d3b227e4058ed2baf7a19d1Samuel Benzaquen Code = Code.drop_front(Length); 1567a337af9e8bc752a2d3b227e4058ed2baf7a19d1Samuel Benzaquen 1577a337af9e8bc752a2d3b227e4058ed2baf7a19d1Samuel Benzaquen unsigned Value; 1587a337af9e8bc752a2d3b227e4058ed2baf7a19d1Samuel Benzaquen if (!Result->Text.getAsInteger(0, Value)) { 1597a337af9e8bc752a2d3b227e4058ed2baf7a19d1Samuel Benzaquen Result->Kind = TokenInfo::TK_Literal; 1607a337af9e8bc752a2d3b227e4058ed2baf7a19d1Samuel Benzaquen Result->Value = Value; 1617a337af9e8bc752a2d3b227e4058ed2baf7a19d1Samuel Benzaquen } else { 1627a337af9e8bc752a2d3b227e4058ed2baf7a19d1Samuel Benzaquen SourceRange Range; 1637a337af9e8bc752a2d3b227e4058ed2baf7a19d1Samuel Benzaquen Range.Start = Result->Range.Start; 1647a337af9e8bc752a2d3b227e4058ed2baf7a19d1Samuel Benzaquen Range.End = currentLocation(); 1658a77c20375874c0759a5cd5b4a34e83465d821b2Samuel Benzaquen Error->addError(Range, Error->ET_ParserUnsignedError) << Result->Text; 1667a337af9e8bc752a2d3b227e4058ed2baf7a19d1Samuel Benzaquen Result->Kind = TokenInfo::TK_Error; 1677a337af9e8bc752a2d3b227e4058ed2baf7a19d1Samuel Benzaquen } 1687a337af9e8bc752a2d3b227e4058ed2baf7a19d1Samuel Benzaquen } 1697a337af9e8bc752a2d3b227e4058ed2baf7a19d1Samuel Benzaquen 170f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek /// \brief Consume a string literal. 171f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek /// 172f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek /// \c Code must be positioned at the start of the literal (the opening 173f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek /// quote). Consumed until it finds the same closing quote character. 174f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek void consumeStringLiteral(TokenInfo *Result) { 175f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek bool InEscape = false; 176f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek const char Marker = Code[0]; 177f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek for (size_t Length = 1, Size = Code.size(); Length != Size; ++Length) { 178f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek if (InEscape) { 179f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek InEscape = false; 180f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek continue; 181f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek } 182f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek if (Code[Length] == '\\') { 183f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek InEscape = true; 184f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek continue; 185f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek } 186f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek if (Code[Length] == Marker) { 187f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek Result->Kind = TokenInfo::TK_Literal; 188f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek Result->Text = Code.substr(0, Length + 1); 189f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek Result->Value = Code.substr(1, Length - 1).str(); 190f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek Code = Code.drop_front(Length + 1); 191f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek return; 192f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek } 193f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek } 194f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek 195f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek StringRef ErrorText = Code; 196f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek Code = Code.drop_front(Code.size()); 197f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek SourceRange Range; 198f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek Range.Start = Result->Range.Start; 199f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek Range.End = currentLocation(); 2008a77c20375874c0759a5cd5b4a34e83465d821b2Samuel Benzaquen Error->addError(Range, Error->ET_ParserStringError) << ErrorText; 201f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek Result->Kind = TokenInfo::TK_Error; 202f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek } 203f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek 204f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek /// \brief Consume all leading whitespace from \c Code. 205f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek void consumeWhitespace() { 206f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek while (!Code.empty() && isWhitespace(Code[0])) { 207f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek if (Code[0] == '\n') { 208f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek ++Line; 209f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek StartOfLine = Code.drop_front(); 210f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek } 211f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek Code = Code.drop_front(); 212f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek } 213f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek } 214f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek 215f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek SourceLocation currentLocation() { 216f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek SourceLocation Location; 217f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek Location.Line = Line; 218f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek Location.Column = Code.data() - StartOfLine.data() + 1; 219f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek return Location; 220f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek } 221f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek 222f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek StringRef Code; 223f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek StringRef StartOfLine; 224f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek unsigned Line; 225f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek Diagnostics *Error; 226f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek TokenInfo NextToken; 227f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek}; 228f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek 229f7f295f321fd434e1e542844a71f538a56f2f8fbManuel KlimekParser::Sema::~Sema() {} 230f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek 231f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek/// \brief Parse and validate a matcher expression. 232f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek/// \return \c true on success, in which case \c Value has the matcher parsed. 233f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek/// If the input is malformed, or some argument has an error, it 234f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek/// returns \c false. 235f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimekbool Parser::parseMatcherExpressionImpl(VariantValue *Value) { 236f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek const TokenInfo NameToken = Tokenizer->consumeNextToken(); 237f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek assert(NameToken.Kind == TokenInfo::TK_Ident); 238f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek const TokenInfo OpenToken = Tokenizer->consumeNextToken(); 239f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek if (OpenToken.Kind != TokenInfo::TK_OpenParen) { 2408a77c20375874c0759a5cd5b4a34e83465d821b2Samuel Benzaquen Error->addError(OpenToken.Range, Error->ET_ParserNoOpenParen) 241f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek << OpenToken.Text; 242f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek return false; 243f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek } 244f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek 245f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek std::vector<ParserValue> Args; 246f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek TokenInfo EndToken; 247f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek while (Tokenizer->nextTokenKind() != TokenInfo::TK_Eof) { 248f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek if (Tokenizer->nextTokenKind() == TokenInfo::TK_CloseParen) { 249f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek // End of args. 250f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek EndToken = Tokenizer->consumeNextToken(); 251f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek break; 252f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek } 253f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek if (Args.size() > 0) { 254f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek // We must find a , token to continue. 255f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek const TokenInfo CommaToken = Tokenizer->consumeNextToken(); 256f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek if (CommaToken.Kind != TokenInfo::TK_Comma) { 2578a77c20375874c0759a5cd5b4a34e83465d821b2Samuel Benzaquen Error->addError(CommaToken.Range, Error->ET_ParserNoComma) 258f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek << CommaToken.Text; 259f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek return false; 260f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek } 261f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek } 262f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek 2638a77c20375874c0759a5cd5b4a34e83465d821b2Samuel Benzaquen Diagnostics::Context Ctx(Diagnostics::Context::MatcherArg, Error, 2648a77c20375874c0759a5cd5b4a34e83465d821b2Samuel Benzaquen NameToken.Text, NameToken.Range, Args.size() + 1); 265f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek ParserValue ArgValue; 266f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek ArgValue.Text = Tokenizer->peekNextToken().Text; 267f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek ArgValue.Range = Tokenizer->peekNextToken().Range; 2688a77c20375874c0759a5cd5b4a34e83465d821b2Samuel Benzaquen if (!parseExpressionImpl(&ArgValue.Value)) return false; 269f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek 270f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek Args.push_back(ArgValue); 271f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek } 272f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek 273f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek if (EndToken.Kind == TokenInfo::TK_Eof) { 2748a77c20375874c0759a5cd5b4a34e83465d821b2Samuel Benzaquen Error->addError(OpenToken.Range, Error->ET_ParserNoCloseParen); 275f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek return false; 276f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek } 277f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek 2784f37d925927dfdd0c770702ffb22de38fc2007dcSamuel Benzaquen std::string BindID; 2794f37d925927dfdd0c770702ffb22de38fc2007dcSamuel Benzaquen if (Tokenizer->peekNextToken().Kind == TokenInfo::TK_Period) { 2804f37d925927dfdd0c770702ffb22de38fc2007dcSamuel Benzaquen // Parse .bind("foo") 2814f37d925927dfdd0c770702ffb22de38fc2007dcSamuel Benzaquen Tokenizer->consumeNextToken(); // consume the period. 2824f37d925927dfdd0c770702ffb22de38fc2007dcSamuel Benzaquen const TokenInfo BindToken = Tokenizer->consumeNextToken(); 2834f37d925927dfdd0c770702ffb22de38fc2007dcSamuel Benzaquen const TokenInfo OpenToken = Tokenizer->consumeNextToken(); 2844f37d925927dfdd0c770702ffb22de38fc2007dcSamuel Benzaquen const TokenInfo IDToken = Tokenizer->consumeNextToken(); 2854f37d925927dfdd0c770702ffb22de38fc2007dcSamuel Benzaquen const TokenInfo CloseToken = Tokenizer->consumeNextToken(); 2864f37d925927dfdd0c770702ffb22de38fc2007dcSamuel Benzaquen 2874f37d925927dfdd0c770702ffb22de38fc2007dcSamuel Benzaquen // TODO: We could use different error codes for each/some to be more 2884f37d925927dfdd0c770702ffb22de38fc2007dcSamuel Benzaquen // explicit about the syntax error. 2894f37d925927dfdd0c770702ffb22de38fc2007dcSamuel Benzaquen if (BindToken.Kind != TokenInfo::TK_Ident || 2904f37d925927dfdd0c770702ffb22de38fc2007dcSamuel Benzaquen BindToken.Text != TokenInfo::ID_Bind) { 2918a77c20375874c0759a5cd5b4a34e83465d821b2Samuel Benzaquen Error->addError(BindToken.Range, Error->ET_ParserMalformedBindExpr); 2924f37d925927dfdd0c770702ffb22de38fc2007dcSamuel Benzaquen return false; 2934f37d925927dfdd0c770702ffb22de38fc2007dcSamuel Benzaquen } 2944f37d925927dfdd0c770702ffb22de38fc2007dcSamuel Benzaquen if (OpenToken.Kind != TokenInfo::TK_OpenParen) { 2958a77c20375874c0759a5cd5b4a34e83465d821b2Samuel Benzaquen Error->addError(OpenToken.Range, Error->ET_ParserMalformedBindExpr); 2964f37d925927dfdd0c770702ffb22de38fc2007dcSamuel Benzaquen return false; 2974f37d925927dfdd0c770702ffb22de38fc2007dcSamuel Benzaquen } 2984f37d925927dfdd0c770702ffb22de38fc2007dcSamuel Benzaquen if (IDToken.Kind != TokenInfo::TK_Literal || !IDToken.Value.isString()) { 2998a77c20375874c0759a5cd5b4a34e83465d821b2Samuel Benzaquen Error->addError(IDToken.Range, Error->ET_ParserMalformedBindExpr); 3004f37d925927dfdd0c770702ffb22de38fc2007dcSamuel Benzaquen return false; 3014f37d925927dfdd0c770702ffb22de38fc2007dcSamuel Benzaquen } 3024f37d925927dfdd0c770702ffb22de38fc2007dcSamuel Benzaquen if (CloseToken.Kind != TokenInfo::TK_CloseParen) { 3038a77c20375874c0759a5cd5b4a34e83465d821b2Samuel Benzaquen Error->addError(CloseToken.Range, Error->ET_ParserMalformedBindExpr); 3044f37d925927dfdd0c770702ffb22de38fc2007dcSamuel Benzaquen return false; 3054f37d925927dfdd0c770702ffb22de38fc2007dcSamuel Benzaquen } 3064f37d925927dfdd0c770702ffb22de38fc2007dcSamuel Benzaquen BindID = IDToken.Value.getString(); 3074f37d925927dfdd0c770702ffb22de38fc2007dcSamuel Benzaquen } 3084f37d925927dfdd0c770702ffb22de38fc2007dcSamuel Benzaquen 309f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek // Merge the start and end infos. 3108a77c20375874c0759a5cd5b4a34e83465d821b2Samuel Benzaquen Diagnostics::Context Ctx(Diagnostics::Context::ConstructMatcher, Error, 3118a77c20375874c0759a5cd5b4a34e83465d821b2Samuel Benzaquen NameToken.Text, NameToken.Range); 312f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek SourceRange MatcherRange = NameToken.Range; 313f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek MatcherRange.End = EndToken.Range.End; 314ef7eb024397a6a9d1455b31bc7b10288a817ac3bSamuel Benzaquen MatcherList Result = S->actOnMatcherExpression( 3154f37d925927dfdd0c770702ffb22de38fc2007dcSamuel Benzaquen NameToken.Text, MatcherRange, BindID, Args, Error); 3168a77c20375874c0759a5cd5b4a34e83465d821b2Samuel Benzaquen if (Result.empty()) return false; 317f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek 318ef7eb024397a6a9d1455b31bc7b10288a817ac3bSamuel Benzaquen *Value = Result; 319f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek return true; 320f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek} 321f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek 322f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek/// \brief Parse an <Expresssion> 323f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimekbool Parser::parseExpressionImpl(VariantValue *Value) { 324f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek switch (Tokenizer->nextTokenKind()) { 325f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek case TokenInfo::TK_Literal: 326f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek *Value = Tokenizer->consumeNextToken().Value; 327f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek return true; 328f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek 329f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek case TokenInfo::TK_Ident: 330f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek return parseMatcherExpressionImpl(Value); 331f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek 332f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek case TokenInfo::TK_Eof: 3338a77c20375874c0759a5cd5b4a34e83465d821b2Samuel Benzaquen Error->addError(Tokenizer->consumeNextToken().Range, 3348a77c20375874c0759a5cd5b4a34e83465d821b2Samuel Benzaquen Error->ET_ParserNoCode); 335f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek return false; 336f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek 337f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek case TokenInfo::TK_Error: 338f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek // This error was already reported by the tokenizer. 339f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek return false; 340f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek 341f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek case TokenInfo::TK_OpenParen: 342f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek case TokenInfo::TK_CloseParen: 343f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek case TokenInfo::TK_Comma: 3444f37d925927dfdd0c770702ffb22de38fc2007dcSamuel Benzaquen case TokenInfo::TK_Period: 345f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek case TokenInfo::TK_InvalidChar: 346f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek const TokenInfo Token = Tokenizer->consumeNextToken(); 3478a77c20375874c0759a5cd5b4a34e83465d821b2Samuel Benzaquen Error->addError(Token.Range, Error->ET_ParserInvalidToken) << Token.Text; 348f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek return false; 349f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek } 350f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek 351f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek llvm_unreachable("Unknown token kind."); 352f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek} 353f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek 354f7f295f321fd434e1e542844a71f538a56f2f8fbManuel KlimekParser::Parser(CodeTokenizer *Tokenizer, Sema *S, 355f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek Diagnostics *Error) 356f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek : Tokenizer(Tokenizer), S(S), Error(Error) {} 357f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek 358f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimekclass RegistrySema : public Parser::Sema { 359f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimekpublic: 360f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek virtual ~RegistrySema() {} 361ef7eb024397a6a9d1455b31bc7b10288a817ac3bSamuel Benzaquen MatcherList actOnMatcherExpression(StringRef MatcherName, 362ef7eb024397a6a9d1455b31bc7b10288a817ac3bSamuel Benzaquen const SourceRange &NameRange, 363ef7eb024397a6a9d1455b31bc7b10288a817ac3bSamuel Benzaquen StringRef BindID, 364ef7eb024397a6a9d1455b31bc7b10288a817ac3bSamuel Benzaquen ArrayRef<ParserValue> Args, 365ef7eb024397a6a9d1455b31bc7b10288a817ac3bSamuel Benzaquen Diagnostics *Error) { 3664f37d925927dfdd0c770702ffb22de38fc2007dcSamuel Benzaquen if (BindID.empty()) { 3674f37d925927dfdd0c770702ffb22de38fc2007dcSamuel Benzaquen return Registry::constructMatcher(MatcherName, NameRange, Args, Error); 3684f37d925927dfdd0c770702ffb22de38fc2007dcSamuel Benzaquen } else { 3694f37d925927dfdd0c770702ffb22de38fc2007dcSamuel Benzaquen return Registry::constructBoundMatcher(MatcherName, NameRange, BindID, 3704f37d925927dfdd0c770702ffb22de38fc2007dcSamuel Benzaquen Args, Error); 3714f37d925927dfdd0c770702ffb22de38fc2007dcSamuel Benzaquen } 372f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek } 373f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek}; 374f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek 375f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimekbool Parser::parseExpression(StringRef Code, VariantValue *Value, 376f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek Diagnostics *Error) { 377f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek RegistrySema S; 378f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek return parseExpression(Code, &S, Value, Error); 379f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek} 380f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek 381f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimekbool Parser::parseExpression(StringRef Code, Sema *S, 382f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek VariantValue *Value, Diagnostics *Error) { 383f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek CodeTokenizer Tokenizer(Code, Error); 3844f37d925927dfdd0c770702ffb22de38fc2007dcSamuel Benzaquen if (!Parser(&Tokenizer, S, Error).parseExpressionImpl(Value)) return false; 3854f37d925927dfdd0c770702ffb22de38fc2007dcSamuel Benzaquen if (Tokenizer.peekNextToken().Kind != TokenInfo::TK_Eof) { 3868a77c20375874c0759a5cd5b4a34e83465d821b2Samuel Benzaquen Error->addError(Tokenizer.peekNextToken().Range, 3878a77c20375874c0759a5cd5b4a34e83465d821b2Samuel Benzaquen Error->ET_ParserTrailingCode); 3884f37d925927dfdd0c770702ffb22de38fc2007dcSamuel Benzaquen return false; 3894f37d925927dfdd0c770702ffb22de38fc2007dcSamuel Benzaquen } 3904f37d925927dfdd0c770702ffb22de38fc2007dcSamuel Benzaquen return true; 391f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek} 392f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek 393f7f295f321fd434e1e542844a71f538a56f2f8fbManuel KlimekDynTypedMatcher *Parser::parseMatcherExpression(StringRef Code, 394f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek Diagnostics *Error) { 395f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek RegistrySema S; 396f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek return parseMatcherExpression(Code, &S, Error); 397f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek} 398f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek 399f7f295f321fd434e1e542844a71f538a56f2f8fbManuel KlimekDynTypedMatcher *Parser::parseMatcherExpression(StringRef Code, 400f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek Parser::Sema *S, 401f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek Diagnostics *Error) { 402f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek VariantValue Value; 403f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek if (!parseExpression(Code, S, &Value, Error)) 404f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek return NULL; 405ef7eb024397a6a9d1455b31bc7b10288a817ac3bSamuel Benzaquen if (!Value.isMatchers()) { 4068a77c20375874c0759a5cd5b4a34e83465d821b2Samuel Benzaquen Error->addError(SourceRange(), Error->ET_ParserNotAMatcher); 407f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek return NULL; 408f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek } 409ef7eb024397a6a9d1455b31bc7b10288a817ac3bSamuel Benzaquen if (Value.getMatchers().matchers().size() != 1) { 4108a77c20375874c0759a5cd5b4a34e83465d821b2Samuel Benzaquen Error->addError(SourceRange(), Error->ET_ParserOverloadedType) 411ef7eb024397a6a9d1455b31bc7b10288a817ac3bSamuel Benzaquen << Value.getTypeAsString(); 412ef7eb024397a6a9d1455b31bc7b10288a817ac3bSamuel Benzaquen return NULL; 413ef7eb024397a6a9d1455b31bc7b10288a817ac3bSamuel Benzaquen } 414ef7eb024397a6a9d1455b31bc7b10288a817ac3bSamuel Benzaquen return Value.getMatchers().matchers()[0]->clone(); 415f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek} 416f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek 417f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek} // namespace dynamic 418f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek} // namespace ast_matchers 419f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek} // namespace clang 420