1f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek//===--- Parser.h - Matcher expression parser -----*- C++ -*-===// 2f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek// 3f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek// The LLVM Compiler Infrastructure 4f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek// 5f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek// This file is distributed under the University of Illinois Open Source 6f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek// License. See LICENSE.TXT for details. 7f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek// 8f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek//===----------------------------------------------------------------------===// 9f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek/// 10f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek/// \file 11f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek/// \brief Simple matcher expression parser. 12f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek/// 13f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek/// The parser understands matcher expressions of the form: 14f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek/// MatcherName(Arg0, Arg1, ..., ArgN) 15f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek/// as well as simple types like strings. 16f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek/// The parser does not know how to process the matchers. It delegates this task 17f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek/// to a Sema object received as an argument. 18f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek/// 19f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek/// \code 20f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek/// Grammar for the expressions supported: 216bcf27bb9a4b5c3f79cb44c0e4654a6d7619ad89Stephen Hines/// <Expression> := <Literal> | <NamedValue> | <MatcherExpression> 227a337af9e8bc752a2d3b227e4058ed2baf7a19d1Samuel Benzaquen/// <Literal> := <StringLiteral> | <Unsigned> 23f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek/// <StringLiteral> := "quoted string" 247a337af9e8bc752a2d3b227e4058ed2baf7a19d1Samuel Benzaquen/// <Unsigned> := [0-9]+ 256bcf27bb9a4b5c3f79cb44c0e4654a6d7619ad89Stephen Hines/// <NamedValue> := <Identifier> 266bcf27bb9a4b5c3f79cb44c0e4654a6d7619ad89Stephen Hines/// <MatcherExpression> := <Identifier>(<ArgumentList>) | 276bcf27bb9a4b5c3f79cb44c0e4654a6d7619ad89Stephen Hines/// <Identifier>(<ArgumentList>).bind(<StringLiteral>) 286bcf27bb9a4b5c3f79cb44c0e4654a6d7619ad89Stephen Hines/// <Identifier> := [a-zA-Z]+ 29f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek/// <ArgumentList> := <Expression> | <Expression>,<ArgumentList> 30f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek/// \endcode 31f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek/// 32f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek//===----------------------------------------------------------------------===// 33f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek 34f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek#ifndef LLVM_CLANG_AST_MATCHERS_DYNAMIC_PARSER_H 35f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek#define LLVM_CLANG_AST_MATCHERS_DYNAMIC_PARSER_H 36f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek 37f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek#include "clang/ASTMatchers/Dynamic/Diagnostics.h" 38651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines#include "clang/ASTMatchers/Dynamic/Registry.h" 39f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek#include "clang/ASTMatchers/Dynamic/VariantValue.h" 40f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek#include "clang/Basic/LLVM.h" 41f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek#include "llvm/ADT/ArrayRef.h" 42b7488d77414b000ce2506b520a6b29f845fb3950Samuel Benzaquen#include "llvm/ADT/Optional.h" 43f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek#include "llvm/ADT/StringRef.h" 44f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek 45f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimeknamespace clang { 46f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimeknamespace ast_matchers { 47f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimeknamespace dynamic { 48f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek 49f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek/// \brief Matcher expression parser. 50f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimekclass Parser { 51f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimekpublic: 52f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek /// \brief Interface to connect the parser with the registry and more. 53f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek /// 54f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek /// The parser uses the Sema instance passed into 55f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek /// parseMatcherExpression() to handle all matcher tokens. The simplest 56f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek /// processor implementation would simply call into the registry to create 57f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek /// the matchers. 58f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek /// However, a more complex processor might decide to intercept the matcher 59f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek /// creation and do some extra work. For example, it could apply some 60f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek /// transformation to the matcher by adding some id() nodes, or could detect 61f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek /// specific matcher nodes for more efficient lookup. 62f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek class Sema { 63f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek public: 64f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek virtual ~Sema(); 65f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek 666bcf27bb9a4b5c3f79cb44c0e4654a6d7619ad89Stephen Hines /// \brief Lookup a value by name. 676bcf27bb9a4b5c3f79cb44c0e4654a6d7619ad89Stephen Hines /// 686bcf27bb9a4b5c3f79cb44c0e4654a6d7619ad89Stephen Hines /// This can be used in the Sema layer to declare known constants or to 696bcf27bb9a4b5c3f79cb44c0e4654a6d7619ad89Stephen Hines /// allow to split an expression in pieces. 706bcf27bb9a4b5c3f79cb44c0e4654a6d7619ad89Stephen Hines /// 716bcf27bb9a4b5c3f79cb44c0e4654a6d7619ad89Stephen Hines /// \param Name The name of the value to lookup. 726bcf27bb9a4b5c3f79cb44c0e4654a6d7619ad89Stephen Hines /// 736bcf27bb9a4b5c3f79cb44c0e4654a6d7619ad89Stephen Hines /// \return The named value. It could be any type that VariantValue 746bcf27bb9a4b5c3f79cb44c0e4654a6d7619ad89Stephen Hines /// supports. An empty value means that the name is not recognized. 756bcf27bb9a4b5c3f79cb44c0e4654a6d7619ad89Stephen Hines virtual VariantValue getNamedValue(StringRef Name); 766bcf27bb9a4b5c3f79cb44c0e4654a6d7619ad89Stephen Hines 77f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek /// \brief Process a matcher expression. 78f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek /// 79f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek /// All the arguments passed here have already been processed. 80f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek /// 81651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines /// \param Ctor A matcher constructor looked up by lookupMatcherCtor. 82f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek /// 83f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek /// \param NameRange The location of the name in the matcher source. 84f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek /// Useful for error reporting. 85f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek /// 864f37d925927dfdd0c770702ffb22de38fc2007dcSamuel Benzaquen /// \param BindID The ID to use to bind the matcher, or a null \c StringRef 874f37d925927dfdd0c770702ffb22de38fc2007dcSamuel Benzaquen /// if no ID is specified. 884f37d925927dfdd0c770702ffb22de38fc2007dcSamuel Benzaquen /// 89f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek /// \param Args The argument list for the matcher. 90f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek /// 919d02807c3ea9782442b98201df68294cd7cd7313Samuel Benzaquen /// \return The matcher objects constructed by the processor, or a null 929d02807c3ea9782442b98201df68294cd7cd7313Samuel Benzaquen /// matcher if an error occurred. In that case, \c Error will contain a 93f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek /// description of the error. 94651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines virtual VariantMatcher actOnMatcherExpression(MatcherCtor Ctor, 959d02807c3ea9782442b98201df68294cd7cd7313Samuel Benzaquen const SourceRange &NameRange, 969d02807c3ea9782442b98201df68294cd7cd7313Samuel Benzaquen StringRef BindID, 979d02807c3ea9782442b98201df68294cd7cd7313Samuel Benzaquen ArrayRef<ParserValue> Args, 989d02807c3ea9782442b98201df68294cd7cd7313Samuel Benzaquen Diagnostics *Error) = 0; 99651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines 100651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines /// \brief Look up a matcher by name. 101651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines /// 102651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines /// \param MatcherName The matcher name found by the parser. 103651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines /// 1046bcf27bb9a4b5c3f79cb44c0e4654a6d7619ad89Stephen Hines /// \return The matcher constructor, or Optional<MatcherCtor>() if not 1056bcf27bb9a4b5c3f79cb44c0e4654a6d7619ad89Stephen Hines /// found. 106651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines virtual llvm::Optional<MatcherCtor> 1076bcf27bb9a4b5c3f79cb44c0e4654a6d7619ad89Stephen Hines lookupMatcherCtor(StringRef MatcherName) = 0; 1086bcf27bb9a4b5c3f79cb44c0e4654a6d7619ad89Stephen Hines }; 1096bcf27bb9a4b5c3f79cb44c0e4654a6d7619ad89Stephen Hines 1106bcf27bb9a4b5c3f79cb44c0e4654a6d7619ad89Stephen Hines /// \brief Sema implementation that uses the matcher registry to process the 1116bcf27bb9a4b5c3f79cb44c0e4654a6d7619ad89Stephen Hines /// tokens. 1126bcf27bb9a4b5c3f79cb44c0e4654a6d7619ad89Stephen Hines class RegistrySema : public Parser::Sema { 1136bcf27bb9a4b5c3f79cb44c0e4654a6d7619ad89Stephen Hines public: 1146bcf27bb9a4b5c3f79cb44c0e4654a6d7619ad89Stephen Hines virtual ~RegistrySema(); 1156bcf27bb9a4b5c3f79cb44c0e4654a6d7619ad89Stephen Hines 1166bcf27bb9a4b5c3f79cb44c0e4654a6d7619ad89Stephen Hines llvm::Optional<MatcherCtor> 1176bcf27bb9a4b5c3f79cb44c0e4654a6d7619ad89Stephen Hines lookupMatcherCtor(StringRef MatcherName) override; 1186bcf27bb9a4b5c3f79cb44c0e4654a6d7619ad89Stephen Hines 1196bcf27bb9a4b5c3f79cb44c0e4654a6d7619ad89Stephen Hines VariantMatcher actOnMatcherExpression(MatcherCtor Ctor, 1206bcf27bb9a4b5c3f79cb44c0e4654a6d7619ad89Stephen Hines const SourceRange &NameRange, 1216bcf27bb9a4b5c3f79cb44c0e4654a6d7619ad89Stephen Hines StringRef BindID, 1226bcf27bb9a4b5c3f79cb44c0e4654a6d7619ad89Stephen Hines ArrayRef<ParserValue> Args, 1236bcf27bb9a4b5c3f79cb44c0e4654a6d7619ad89Stephen Hines Diagnostics *Error) override; 124f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek }; 125f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek 126f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek /// \brief Parse a matcher expression, creating matchers from the registry. 127f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek /// 128f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek /// This overload creates matchers calling directly into the registry. If the 129f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek /// caller needs more control over how the matchers are created, then it can 130f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek /// use the overload below that takes a Sema. 131f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek /// 132f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek /// \param MatcherCode The matcher expression to parse. 133f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek /// 134b7488d77414b000ce2506b520a6b29f845fb3950Samuel Benzaquen /// \return The matcher object constructed, or an empty Optional if an error 135b7488d77414b000ce2506b520a6b29f845fb3950Samuel Benzaquen /// occurred. 136b7488d77414b000ce2506b520a6b29f845fb3950Samuel Benzaquen /// In that case, \c Error will contain a description of the error. 137f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek /// The caller takes ownership of the DynTypedMatcher object returned. 138b7488d77414b000ce2506b520a6b29f845fb3950Samuel Benzaquen static llvm::Optional<DynTypedMatcher> 139b7488d77414b000ce2506b520a6b29f845fb3950Samuel Benzaquen parseMatcherExpression(StringRef MatcherCode, Diagnostics *Error); 140f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek 141f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek /// \brief Parse a matcher expression. 142f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek /// 143f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek /// \param MatcherCode The matcher expression to parse. 144f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek /// 145f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek /// \param S The Sema instance that will help the parser 146f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek /// construct the matchers. 147b7488d77414b000ce2506b520a6b29f845fb3950Samuel Benzaquen /// \return The matcher object constructed by the processor, or an empty 148b7488d77414b000ce2506b520a6b29f845fb3950Samuel Benzaquen /// Optional if an error occurred. In that case, \c Error will contain a 149f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek /// description of the error. 150f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek /// The caller takes ownership of the DynTypedMatcher object returned. 151b7488d77414b000ce2506b520a6b29f845fb3950Samuel Benzaquen static llvm::Optional<DynTypedMatcher> 152b7488d77414b000ce2506b520a6b29f845fb3950Samuel Benzaquen parseMatcherExpression(StringRef MatcherCode, Sema *S, Diagnostics *Error); 153f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek 154f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek /// \brief Parse an expression, creating matchers from the registry. 155f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek /// 156f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek /// Parses any expression supported by this parser. In general, the 157f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek /// \c parseMatcherExpression function is a better approach to get a matcher 158f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek /// object. 159f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek static bool parseExpression(StringRef Code, VariantValue *Value, 160f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek Diagnostics *Error); 161f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek 162f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek /// \brief Parse an expression. 163f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek /// 164f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek /// Parses any expression supported by this parser. In general, the 165f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek /// \c parseMatcherExpression function is a better approach to get a matcher 166f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek /// object. 167f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek static bool parseExpression(StringRef Code, Sema *S, 168f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek VariantValue *Value, Diagnostics *Error); 169f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek 170651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines /// \brief Complete an expression at the given offset. 171651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines /// 172651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines /// \return The list of completions, which may be empty if there are no 173651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines /// available completions or if an error occurred. 174651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines static std::vector<MatcherCompletion> 175651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines completeExpression(StringRef Code, unsigned CompletionOffset); 176651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines 177f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimekprivate: 178f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek class CodeTokenizer; 179651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines struct ScopedContextEntry; 180f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek struct TokenInfo; 181f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek 182f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek Parser(CodeTokenizer *Tokenizer, Sema *S, 183f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek Diagnostics *Error); 184f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek 185f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek bool parseExpressionImpl(VariantValue *Value); 1866bcf27bb9a4b5c3f79cb44c0e4654a6d7619ad89Stephen Hines bool parseMatcherExpressionImpl(const TokenInfo &NameToken, 1876bcf27bb9a4b5c3f79cb44c0e4654a6d7619ad89Stephen Hines VariantValue *Value); 1886bcf27bb9a4b5c3f79cb44c0e4654a6d7619ad89Stephen Hines bool parseIdentifierPrefixImpl(VariantValue *Value); 189f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek 190651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines void addCompletion(const TokenInfo &CompToken, StringRef TypedText, 191651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines StringRef Decl); 192651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines void addExpressionCompletions(); 193651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines 194f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek CodeTokenizer *const Tokenizer; 195f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek Sema *const S; 196f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek Diagnostics *const Error; 197651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines 198651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines typedef std::vector<std::pair<MatcherCtor, unsigned> > ContextStackTy; 199651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines ContextStackTy ContextStack; 200651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines std::vector<MatcherCompletion> Completions; 201f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek}; 202f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek 203f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek} // namespace dynamic 204f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek} // namespace ast_matchers 205f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek} // namespace clang 206f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek 207f7f295f321fd434e1e542844a71f538a56f2f8fbManuel Klimek#endif // LLVM_CLANG_AST_MATCHERS_DYNAMIC_PARSER_H 208