1//===--- Parser.h - Matcher expression parser -----*- C++ -*-===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9///
10/// \file
11/// \brief Simple matcher expression parser.
12///
13/// The parser understands matcher expressions of the form:
14///   MatcherName(Arg0, Arg1, ..., ArgN)
15/// as well as simple types like strings.
16/// The parser does not know how to process the matchers. It delegates this task
17/// to a Sema object received as an argument.
18///
19/// \code
20/// Grammar for the expressions supported:
21/// <Expression>        := <Literal> | <NamedValue> | <MatcherExpression>
22/// <Literal>           := <StringLiteral> | <Unsigned>
23/// <StringLiteral>     := "quoted string"
24/// <Unsigned>          := [0-9]+
25/// <NamedValue>        := <Identifier>
26/// <MatcherExpression> := <Identifier>(<ArgumentList>) |
27///                        <Identifier>(<ArgumentList>).bind(<StringLiteral>)
28/// <Identifier>        := [a-zA-Z]+
29/// <ArgumentList>      := <Expression> | <Expression>,<ArgumentList>
30/// \endcode
31///
32//===----------------------------------------------------------------------===//
33
34#ifndef LLVM_CLANG_ASTMATCHERS_DYNAMIC_PARSER_H
35#define LLVM_CLANG_ASTMATCHERS_DYNAMIC_PARSER_H
36
37#include "clang/ASTMatchers/Dynamic/Diagnostics.h"
38#include "clang/ASTMatchers/Dynamic/Registry.h"
39#include "clang/ASTMatchers/Dynamic/VariantValue.h"
40#include "clang/Basic/LLVM.h"
41#include "llvm/ADT/ArrayRef.h"
42#include "llvm/ADT/Optional.h"
43#include "llvm/ADT/StringRef.h"
44
45namespace clang {
46namespace ast_matchers {
47namespace dynamic {
48
49/// \brief Matcher expression parser.
50class Parser {
51public:
52  /// \brief Interface to connect the parser with the registry and more.
53  ///
54  /// The parser uses the Sema instance passed into
55  /// parseMatcherExpression() to handle all matcher tokens. The simplest
56  /// processor implementation would simply call into the registry to create
57  /// the matchers.
58  /// However, a more complex processor might decide to intercept the matcher
59  /// creation and do some extra work. For example, it could apply some
60  /// transformation to the matcher by adding some id() nodes, or could detect
61  /// specific matcher nodes for more efficient lookup.
62  class Sema {
63  public:
64    virtual ~Sema();
65
66    /// \brief Process a matcher expression.
67    ///
68    /// All the arguments passed here have already been processed.
69    ///
70    /// \param Ctor A matcher constructor looked up by lookupMatcherCtor.
71    ///
72    /// \param NameRange The location of the name in the matcher source.
73    ///   Useful for error reporting.
74    ///
75    /// \param BindID The ID to use to bind the matcher, or a null \c StringRef
76    ///   if no ID is specified.
77    ///
78    /// \param Args The argument list for the matcher.
79    ///
80    /// \return The matcher objects constructed by the processor, or a null
81    ///   matcher if an error occurred. In that case, \c Error will contain a
82    ///   description of the error.
83    virtual VariantMatcher actOnMatcherExpression(MatcherCtor Ctor,
84                                                  SourceRange NameRange,
85                                                  StringRef BindID,
86                                                  ArrayRef<ParserValue> Args,
87                                                  Diagnostics *Error) = 0;
88
89    /// \brief Look up a matcher by name.
90    ///
91    /// \param MatcherName The matcher name found by the parser.
92    ///
93    /// \return The matcher constructor, or Optional<MatcherCtor>() if not
94    /// found.
95    virtual llvm::Optional<MatcherCtor>
96    lookupMatcherCtor(StringRef MatcherName) = 0;
97
98    /// \brief Compute the list of completion types for \p Context.
99    ///
100    /// Each element of \p Context represents a matcher invocation, going from
101    /// outermost to innermost. Elements are pairs consisting of a reference to
102    /// the matcher constructor and the index of the next element in the
103    /// argument list of that matcher (or for the last element, the index of
104    /// the completion point in the argument list). An empty list requests
105    /// completion for the root matcher.
106    virtual std::vector<ArgKind> getAcceptedCompletionTypes(
107        llvm::ArrayRef<std::pair<MatcherCtor, unsigned>> Context);
108
109    /// \brief Compute the list of completions that match any of
110    /// \p AcceptedTypes.
111    ///
112    /// \param AcceptedTypes All types accepted for this completion.
113    ///
114    /// \return All completions for the specified types.
115    /// Completions should be valid when used in \c lookupMatcherCtor().
116    /// The matcher constructed from the return of \c lookupMatcherCtor()
117    /// should be convertible to some type in \p AcceptedTypes.
118    virtual std::vector<MatcherCompletion>
119    getMatcherCompletions(llvm::ArrayRef<ArgKind> AcceptedTypes);
120  };
121
122  /// \brief Sema implementation that uses the matcher registry to process the
123  ///   tokens.
124  class RegistrySema : public Parser::Sema {
125   public:
126     ~RegistrySema() override;
127
128    llvm::Optional<MatcherCtor>
129    lookupMatcherCtor(StringRef MatcherName) override;
130
131    VariantMatcher actOnMatcherExpression(MatcherCtor Ctor,
132                                          SourceRange NameRange,
133                                          StringRef BindID,
134                                          ArrayRef<ParserValue> Args,
135                                          Diagnostics *Error) override;
136
137    std::vector<ArgKind> getAcceptedCompletionTypes(
138        llvm::ArrayRef<std::pair<MatcherCtor, unsigned>> Context) override;
139
140    std::vector<MatcherCompletion>
141    getMatcherCompletions(llvm::ArrayRef<ArgKind> AcceptedTypes) override;
142  };
143
144  typedef llvm::StringMap<VariantValue> NamedValueMap;
145
146  /// \brief Parse a matcher expression.
147  ///
148  /// \param MatcherCode The matcher expression to parse.
149  ///
150  /// \param S The Sema instance that will help the parser
151  ///   construct the matchers. If null, it uses the default registry.
152  ///
153  /// \param NamedValues A map of precomputed named values.  This provides
154  ///   the dictionary for the <NamedValue> rule of the grammar.
155  ///   If null, it is ignored.
156  ///
157  /// \return The matcher object constructed by the processor, or an empty
158  ///   Optional if an error occurred. In that case, \c Error will contain a
159  ///   description of the error.
160  ///   The caller takes ownership of the DynTypedMatcher object returned.
161  static llvm::Optional<DynTypedMatcher>
162  parseMatcherExpression(StringRef MatcherCode, Sema *S,
163                         const NamedValueMap *NamedValues,
164                         Diagnostics *Error);
165  static llvm::Optional<DynTypedMatcher>
166  parseMatcherExpression(StringRef MatcherCode, Sema *S,
167                         Diagnostics *Error) {
168    return parseMatcherExpression(MatcherCode, S, nullptr, Error);
169  }
170  static llvm::Optional<DynTypedMatcher>
171  parseMatcherExpression(StringRef MatcherCode, Diagnostics *Error) {
172    return parseMatcherExpression(MatcherCode, nullptr, Error);
173  }
174
175  /// \brief Parse an expression.
176  ///
177  /// Parses any expression supported by this parser. In general, the
178  /// \c parseMatcherExpression function is a better approach to get a matcher
179  /// object.
180  ///
181  /// \param S The Sema instance that will help the parser
182  ///   construct the matchers. If null, it uses the default registry.
183  ///
184  /// \param NamedValues A map of precomputed named values.  This provides
185  ///   the dictionary for the <NamedValue> rule of the grammar.
186  ///   If null, it is ignored.
187  static bool parseExpression(StringRef Code, Sema *S,
188                              const NamedValueMap *NamedValues,
189                              VariantValue *Value, Diagnostics *Error);
190  static bool parseExpression(StringRef Code, Sema *S,
191                              VariantValue *Value, Diagnostics *Error) {
192    return parseExpression(Code, S, nullptr, Value, Error);
193  }
194  static bool parseExpression(StringRef Code, VariantValue *Value,
195                              Diagnostics *Error) {
196    return parseExpression(Code, nullptr, Value, Error);
197  }
198
199  /// \brief Complete an expression at the given offset.
200  ///
201  /// \param S The Sema instance that will help the parser
202  ///   construct the matchers. If null, it uses the default registry.
203  ///
204  /// \param NamedValues A map of precomputed named values.  This provides
205  ///   the dictionary for the <NamedValue> rule of the grammar.
206  ///   If null, it is ignored.
207  ///
208  /// \return The list of completions, which may be empty if there are no
209  /// available completions or if an error occurred.
210  static std::vector<MatcherCompletion>
211  completeExpression(StringRef Code, unsigned CompletionOffset, Sema *S,
212                     const NamedValueMap *NamedValues);
213  static std::vector<MatcherCompletion>
214  completeExpression(StringRef Code, unsigned CompletionOffset, Sema *S) {
215    return completeExpression(Code, CompletionOffset, S, nullptr);
216  }
217  static std::vector<MatcherCompletion>
218  completeExpression(StringRef Code, unsigned CompletionOffset) {
219    return completeExpression(Code, CompletionOffset, nullptr);
220  }
221
222private:
223  class CodeTokenizer;
224  struct ScopedContextEntry;
225  struct TokenInfo;
226
227  Parser(CodeTokenizer *Tokenizer, Sema *S,
228         const NamedValueMap *NamedValues,
229         Diagnostics *Error);
230
231  bool parseExpressionImpl(VariantValue *Value);
232  bool parseMatcherExpressionImpl(const TokenInfo &NameToken,
233                                  VariantValue *Value);
234  bool parseIdentifierPrefixImpl(VariantValue *Value);
235
236  void addCompletion(const TokenInfo &CompToken,
237                     const MatcherCompletion &Completion);
238  void addExpressionCompletions();
239
240  std::vector<MatcherCompletion>
241  getNamedValueCompletions(ArrayRef<ArgKind> AcceptedTypes);
242
243  CodeTokenizer *const Tokenizer;
244  Sema *const S;
245  const NamedValueMap *const NamedValues;
246  Diagnostics *const Error;
247
248  typedef std::vector<std::pair<MatcherCtor, unsigned> > ContextStackTy;
249  ContextStackTy ContextStack;
250  std::vector<MatcherCompletion> Completions;
251};
252
253}  // namespace dynamic
254}  // namespace ast_matchers
255}  // namespace clang
256
257#endif  // LLVM_CLANG_AST_MATCHERS_DYNAMIC_PARSER_H
258