1// Copyright (c) 2013 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4//
5// This implements a Clang tool to rewrite all instances of
6// scoped_refptr<T>'s implicit cast to T (operator T*) to an explicit call to
7// the .get() method.
8
9#include <assert.h>
10#include <algorithm>
11#include <memory>
12#include <string>
13
14#include "clang/AST/ASTContext.h"
15#include "clang/ASTMatchers/ASTMatchers.h"
16#include "clang/ASTMatchers/ASTMatchersMacros.h"
17#include "clang/ASTMatchers/ASTMatchFinder.h"
18#include "clang/Basic/SourceManager.h"
19#include "clang/Frontend/FrontendActions.h"
20#include "clang/Lex/Lexer.h"
21#include "clang/Tooling/CommonOptionsParser.h"
22#include "clang/Tooling/Refactoring.h"
23#include "clang/Tooling/Tooling.h"
24#include "llvm/Support/CommandLine.h"
25#include "llvm/Support/TargetSelect.h"
26
27using namespace clang::ast_matchers;
28using clang::tooling::CommonOptionsParser;
29using clang::tooling::Replacement;
30using clang::tooling::Replacements;
31using llvm::StringRef;
32
33namespace clang {
34namespace ast_matchers {
35
36const internal::VariadicDynCastAllOfMatcher<Decl, CXXConversionDecl>
37    conversionDecl;
38
39AST_MATCHER(QualType, isBoolean) {
40  return Node->isBooleanType();
41}
42
43}  // namespace ast_matchers
44}  // namespace clang
45
46namespace {
47
48// Returns true if expr needs to be put in parens (eg: when it is an operator
49// syntactically).
50bool NeedsParens(const clang::Expr* expr) {
51  if (llvm::dyn_cast<clang::UnaryOperator>(expr) ||
52      llvm::dyn_cast<clang::BinaryOperator>(expr) ||
53      llvm::dyn_cast<clang::ConditionalOperator>(expr)) {
54    return true;
55  }
56  // Calls to an overloaded operator also need parens, except for foo(...) and
57  // foo[...] expressions.
58  if (const clang::CXXOperatorCallExpr* op =
59          llvm::dyn_cast<clang::CXXOperatorCallExpr>(expr)) {
60    return op->getOperator() != clang::OO_Call &&
61           op->getOperator() != clang::OO_Subscript;
62  }
63  return false;
64}
65
66Replacement RewriteImplicitToExplicitConversion(
67    const MatchFinder::MatchResult& result,
68    const clang::Expr* expr) {
69  clang::CharSourceRange range = clang::CharSourceRange::getTokenRange(
70      result.SourceManager->getSpellingLoc(expr->getLocStart()),
71      result.SourceManager->getSpellingLoc(expr->getLocEnd()));
72  assert(range.isValid() && "Invalid range!");
73
74  // Handle cases where an implicit cast is being done by dereferencing a
75  // pointer to a scoped_refptr<> (sadly, it happens...)
76  //
77  // This rewrites both "*foo" and "*(foo)" as "foo->get()".
78  if (const clang::UnaryOperator* op =
79          llvm::dyn_cast<clang::UnaryOperator>(expr)) {
80    if (op->getOpcode() == clang::UO_Deref) {
81      const clang::Expr* const sub_expr =
82          op->getSubExpr()->IgnoreParenImpCasts();
83      clang::CharSourceRange sub_expr_range =
84          clang::CharSourceRange::getTokenRange(
85              result.SourceManager->getSpellingLoc(sub_expr->getLocStart()),
86              result.SourceManager->getSpellingLoc(sub_expr->getLocEnd()));
87      assert(sub_expr_range.isValid() && "Invalid subexpression range!");
88
89      std::string inner_text = clang::Lexer::getSourceText(
90          sub_expr_range, *result.SourceManager, result.Context->getLangOpts());
91      assert(!inner_text.empty() && "No text for subexpression!");
92      if (NeedsParens(sub_expr)) {
93        inner_text.insert(0, "(");
94        inner_text.append(")");
95      }
96      inner_text.append("->get()");
97      return Replacement(*result.SourceManager, range, inner_text);
98    }
99  }
100
101  std::string text = clang::Lexer::getSourceText(
102      range, *result.SourceManager, result.Context->getLangOpts());
103  assert(!text.empty() && "No text for expression!");
104
105  // Unwrap any temporaries - for example, custom iterators that return
106  // scoped_refptr<T> as part of operator*. Any such iterators should also
107  // be declaring a scoped_refptr<T>* operator->, per C++03 24.4.1.1 (Table 72)
108  if (const clang::CXXBindTemporaryExpr* op =
109          llvm::dyn_cast<clang::CXXBindTemporaryExpr>(expr)) {
110    expr = op->getSubExpr();
111  }
112
113  // Handle iterators (which are operator* calls, followed by implicit
114  // conversions) by rewriting *it as it->get()
115  if (const clang::CXXOperatorCallExpr* op =
116          llvm::dyn_cast<clang::CXXOperatorCallExpr>(expr)) {
117    if (op->getOperator() == clang::OO_Star) {
118      // Note that this doesn't rewrite **it correctly, since it should be
119      // rewritten using parens, e.g. (*it)->get(). However, this shouldn't
120      // happen frequently, if at all, since it would likely indicate code is
121      // storing pointers to a scoped_refptr in a container.
122      text.erase(0, 1);
123      text.append("->get()");
124      return Replacement(*result.SourceManager, range, text);
125    }
126  }
127
128  // The only remaining calls should be non-dereferencing calls (eg: member
129  // calls), so a simple ".get()" appending should suffice.
130  if (NeedsParens(expr)) {
131    text.insert(0, "(");
132    text.append(")");
133  }
134  text.append(".get()");
135  return Replacement(*result.SourceManager, range, text);
136}
137
138Replacement RewriteRawPtrToScopedRefptr(const MatchFinder::MatchResult& result,
139                                        clang::SourceLocation begin,
140                                        clang::SourceLocation end) {
141  clang::CharSourceRange range = clang::CharSourceRange::getTokenRange(
142      result.SourceManager->getSpellingLoc(begin),
143      result.SourceManager->getSpellingLoc(end));
144  assert(range.isValid() && "Invalid range!");
145
146  std::string text = clang::Lexer::getSourceText(
147      range, *result.SourceManager, result.Context->getLangOpts());
148  text.erase(text.rfind('*'));
149
150  std::string replacement_text("scoped_refptr<");
151  replacement_text += text;
152  replacement_text += ">";
153
154  return Replacement(*result.SourceManager, range, replacement_text);
155}
156
157class GetRewriterCallback : public MatchFinder::MatchCallback {
158 public:
159  explicit GetRewriterCallback(Replacements* replacements)
160      : replacements_(replacements) {}
161  virtual void run(const MatchFinder::MatchResult& result) override;
162
163 private:
164  Replacements* const replacements_;
165};
166
167void GetRewriterCallback::run(const MatchFinder::MatchResult& result) {
168  const clang::Expr* arg = result.Nodes.getNodeAs<clang::Expr>("arg");
169  assert(arg && "Unexpected match! No Expr captured!");
170  auto err =
171      replacements_->add(RewriteImplicitToExplicitConversion(result, arg));
172  assert(!err);
173}
174
175class VarRewriterCallback : public MatchFinder::MatchCallback {
176 public:
177  explicit VarRewriterCallback(Replacements* replacements)
178      : replacements_(replacements) {}
179  virtual void run(const MatchFinder::MatchResult& result) override;
180
181 private:
182  Replacements* const replacements_;
183};
184
185void VarRewriterCallback::run(const MatchFinder::MatchResult& result) {
186  const clang::DeclaratorDecl* const var_decl =
187      result.Nodes.getNodeAs<clang::DeclaratorDecl>("var");
188  assert(var_decl && "Unexpected match! No VarDecl captured!");
189
190  const clang::TypeSourceInfo* tsi = var_decl->getTypeSourceInfo();
191
192  // TODO(dcheng): This mishandles a case where a variable has multiple
193  // declarations, e.g.:
194  //
195  // in .h:
196  // Foo* my_global_magical_foo;
197  //
198  // in .cc:
199  // Foo* my_global_magical_foo = CreateFoo();
200  //
201  // In this case, it will only rewrite the .cc definition. Oh well. This should
202  // be rare enough that these cases can be manually handled, since the style
203  // guide prohibits globals of non-POD type.
204  auto err = replacements_->add(RewriteRawPtrToScopedRefptr(
205      result, tsi->getTypeLoc().getBeginLoc(), tsi->getTypeLoc().getEndLoc()));
206  assert(!err);
207}
208
209class FunctionRewriterCallback : public MatchFinder::MatchCallback {
210 public:
211  explicit FunctionRewriterCallback(Replacements* replacements)
212      : replacements_(replacements) {}
213  virtual void run(const MatchFinder::MatchResult& result) override;
214
215 private:
216  Replacements* const replacements_;
217};
218
219void FunctionRewriterCallback::run(const MatchFinder::MatchResult& result) {
220  const clang::FunctionDecl* const function_decl =
221      result.Nodes.getNodeAs<clang::FunctionDecl>("fn");
222  assert(function_decl && "Unexpected match! No FunctionDecl captured!");
223
224  // If matched against an implicit conversion to a DeclRefExpr, make sure the
225  // referenced declaration is of class type, e.g. the tool skips trying to
226  // chase pointers/references to determine if the pointee is a scoped_refptr<T>
227  // with local storage. Instead, let a human manually handle those cases.
228  const clang::VarDecl* const var_decl =
229      result.Nodes.getNodeAs<clang::VarDecl>("var");
230  if (var_decl && !var_decl->getTypeSourceInfo()->getType()->isClassType()) {
231    return;
232  }
233
234  for (clang::FunctionDecl* f : function_decl->redecls()) {
235    clang::SourceRange range = f->getReturnTypeSourceRange();
236    auto err = replacements_->add(
237        RewriteRawPtrToScopedRefptr(result, range.getBegin(), range.getEnd()));
238    assert(!err);
239  }
240}
241
242class MacroRewriterCallback : public MatchFinder::MatchCallback {
243 public:
244  explicit MacroRewriterCallback(Replacements* replacements)
245      : replacements_(replacements) {}
246  virtual void run(const MatchFinder::MatchResult& result) override;
247
248 private:
249  Replacements* const replacements_;
250};
251
252void MacroRewriterCallback::run(const MatchFinder::MatchResult& result) {
253  const clang::Expr* const expr = result.Nodes.getNodeAs<clang::Expr>("expr");
254  assert(expr && "Unexpected match! No Expr captured!");
255  auto err =
256      replacements_->add(RewriteImplicitToExplicitConversion(result, expr));
257  assert(!err);
258}
259
260}  // namespace
261
262static llvm::cl::extrahelp common_help(CommonOptionsParser::HelpMessage);
263
264int main(int argc, const char* argv[]) {
265  // TODO(dcheng): Clang tooling should do this itself.
266  // http://llvm.org/bugs/show_bug.cgi?id=21627
267  llvm::InitializeNativeTarget();
268  llvm::InitializeNativeTargetAsmParser();
269  llvm::cl::OptionCategory category("Remove scoped_refptr conversions");
270  CommonOptionsParser options(argc, argv, category);
271  clang::tooling::ClangTool tool(options.getCompilations(),
272                                 options.getSourcePathList());
273
274  MatchFinder match_finder;
275  Replacements replacements;
276
277  auto is_scoped_refptr = cxxRecordDecl(isSameOrDerivedFrom("::scoped_refptr"),
278                                        isTemplateInstantiation());
279
280  // Finds all calls to conversion operator member function. This catches calls
281  // to "operator T*", "operator Testable", and "operator bool" equally.
282  auto base_matcher =
283      cxxMemberCallExpr(thisPointerType(is_scoped_refptr),
284                        callee(conversionDecl()), on(id("arg", expr())));
285
286  // The heuristic for whether or not converting a temporary is 'unsafe'. An
287  // unsafe conversion is one where a temporary scoped_refptr<T> is converted to
288  // another type. The matcher provides an exception for a temporary
289  // scoped_refptr that is the result of an operator call. In this case, assume
290  // that it's the result of an iterator dereference, and the container itself
291  // retains the necessary reference, since this is a common idiom to see in
292  // loop bodies.
293  auto is_unsafe_temporary_conversion =
294      on(cxxBindTemporaryExpr(unless(has(cxxOperatorCallExpr()))));
295
296  // Returning a scoped_refptr<T> as a T* is considered unsafe if either are
297  // true:
298  // - The scoped_refptr<T> is a temporary.
299  // - The scoped_refptr<T> has local lifetime.
300  auto returned_as_raw_ptr = hasParent(
301      returnStmt(hasAncestor(id("fn", functionDecl(returns(pointerType()))))));
302  // This matcher intentionally matches more than it should. For example, this
303  // will match:
304  //   scoped_refptr<Foo>& foo = some_other_foo;
305  //   return foo;
306  // The matcher callback filters out VarDecls that aren't a scoped_refptr<T>,
307  // so those cases can be manually handled.
308  auto is_local_variable =
309      on(declRefExpr(to(id("var", varDecl(hasLocalStorage())))));
310  auto is_unsafe_return =
311      anyOf(allOf(hasParent(implicitCastExpr(returned_as_raw_ptr)),
312                  is_local_variable),
313            allOf(hasParent(implicitCastExpr(
314                      hasParent(exprWithCleanups(returned_as_raw_ptr)))),
315                  is_unsafe_temporary_conversion));
316
317  // This catches both user-defined conversions (eg: "operator bool") and
318  // standard conversion sequence (C++03 13.3.3.1.1), such as converting a
319  // pointer to a bool.
320  auto implicit_to_bool =
321      implicitCastExpr(hasImplicitDestinationType(isBoolean()));
322
323  // Avoid converting calls to of "operator Testable" -> "bool" and calls of
324  // "operator T*" -> "bool".
325  auto bool_conversion_matcher = hasParent(
326      expr(anyOf(implicit_to_bool, expr(hasParent(implicit_to_bool)))));
327
328  auto is_logging_helper =
329      functionDecl(anyOf(hasName("CheckEQImpl"), hasName("CheckNEImpl")));
330  auto is_gtest_helper = functionDecl(
331      anyOf(cxxMethodDecl(ofClass(cxxRecordDecl(isSameOrDerivedFrom(
332                              hasName("::testing::internal::EqHelper")))),
333                          hasName("Compare")),
334            hasName("::testing::internal::CmpHelperNE")));
335  auto is_gtest_assertion_result_ctor =
336      cxxConstructorDecl(ofClass(cxxRecordDecl(
337          isSameOrDerivedFrom(hasName("::testing::AssertionResult")))));
338
339  // Find all calls to an operator overload that are 'safe'.
340  //
341  // All bool conversions will be handled with the Testable trick, but that
342  // can only be used once "operator T*" is removed, since otherwise it leaves
343  // the call ambiguous.
344  GetRewriterCallback get_callback(&replacements);
345  match_finder.addMatcher(
346      cxxMemberCallExpr(
347          base_matcher,
348          // Excluded since the conversion may be unsafe.
349          unless(anyOf(is_unsafe_temporary_conversion, is_unsafe_return)),
350          // Excluded since the conversion occurs inside a helper function that
351          // the macro wraps. Letting this callback handle the rewrite would
352          // result in an incorrect replacement that changes the helper function
353          // itself. Instead, the right replacement is to rewrite the macro's
354          // arguments.
355          unless(hasAncestor(decl(anyOf(is_logging_helper, is_gtest_helper,
356                                        is_gtest_assertion_result_ctor))))),
357      &get_callback);
358
359  // Find temporary scoped_refptr<T>'s being unsafely assigned to a T*.
360  VarRewriterCallback var_callback(&replacements);
361  auto initialized_with_temporary = has(ignoringImpCasts(
362      cxxMemberCallExpr(base_matcher, is_unsafe_temporary_conversion)));
363  match_finder.addMatcher(
364      id("var", varDecl(hasInitializer(initialized_with_temporary),
365                        hasType(pointerType()))),
366      &var_callback);
367  match_finder.addMatcher(
368      cxxConstructorDecl(forEachConstructorInitializer(
369          allOf(withInitializer(initialized_with_temporary),
370                forField(id("var", fieldDecl(hasType(pointerType()))))))),
371      &var_callback);
372
373  // Rewrite functions that unsafely turn a scoped_refptr<T> into a T* when
374  // returning a value.
375  FunctionRewriterCallback fn_callback(&replacements);
376  match_finder.addMatcher(cxxMemberCallExpr(base_matcher, is_unsafe_return),
377                          &fn_callback);
378
379  // Rewrite logging / gtest expressions that result in an implicit conversion.
380  // Luckily, the matchers don't need to handle the case where one of the macro
381  // arguments is NULL, such as:
382  // CHECK_EQ(my_scoped_refptr, NULL)
383  // because it simply doesn't compile--since NULL is actually of integral type,
384  // this doesn't trigger scoped_refptr<T>'s implicit conversion. Since there is
385  // no comparison overload for scoped_refptr<T> and int, this fails to compile.
386  MacroRewriterCallback macro_callback(&replacements);
387  // CHECK_EQ/CHECK_NE helpers.
388  match_finder.addMatcher(
389      callExpr(callee(is_logging_helper), argumentCountIs(3),
390               hasAnyArgument(ignoringParenImpCasts(
391                   id("expr", expr(hasType(is_scoped_refptr))))),
392               hasAnyArgument(ignoringParenImpCasts(hasType(pointerType()))),
393               hasArgument(2, stringLiteral())),
394      &macro_callback);
395  // ASSERT_EQ/ASSERT_NE/EXPECT_EQ/EXPECT_EQ, which use the same underlying
396  // helper functions. Even though gtest has special handling for pointer to
397  // NULL comparisons, it doesn't trigger in this case, so no special handling
398  // is needed for the replacements.
399  match_finder.addMatcher(
400      callExpr(callee(is_gtest_helper),
401               argumentCountIs(4),
402               hasArgument(0, stringLiteral()),
403               hasArgument(1, stringLiteral()),
404               hasAnyArgument(id("expr", expr(hasType(is_scoped_refptr)))),
405               hasAnyArgument(hasType(pointerType()))),
406      &macro_callback);
407  // ASSERT_TRUE/EXPECT_TRUE helpers. Note that this matcher doesn't need to
408  // handle ASSERT_FALSE/EXPECT_FALSE, because it gets coerced to bool before
409  // being passed as an argument to AssertionResult's constructor. As a result,
410  // GetRewriterCallback handles this case properly since the conversion isn't
411  // hidden inside AssertionResult, and the generated replacement properly
412  // rewrites the macro argument.
413  // However, the tool does need to handle the _TRUE counterparts, since the
414  // conversion occurs inside the constructor in those cases.
415  match_finder.addMatcher(
416      cxxConstructExpr(
417          argumentCountIs(2),
418          hasArgument(0, id("expr", expr(hasType(is_scoped_refptr)))),
419          hasDeclaration(is_gtest_assertion_result_ctor)),
420      &macro_callback);
421
422  std::unique_ptr<clang::tooling::FrontendActionFactory> factory =
423      clang::tooling::newFrontendActionFactory(&match_finder);
424  int result = tool.run(factory.get());
425  if (result != 0)
426    return result;
427
428  // Serialization format is documented in tools/clang/scripts/run_tool.py
429  llvm::outs() << "==== BEGIN EDITS ====\n";
430  for (const auto& r : replacements) {
431    std::string replacement_text = r.getReplacementText().str();
432    std::replace(replacement_text.begin(), replacement_text.end(), '\n', '\0');
433    llvm::outs() << "r:::" << r.getFilePath() << ":::" << r.getOffset() << ":::"
434                 << r.getLength() << ":::" << replacement_text << "\n";
435  }
436  llvm::outs() << "==== END EDITS ====\n";
437
438  return 0;
439}
440