1e00575f12cf280621ef0ed4d69e909bdfc9fef62Anna Zaks//== CStringSyntaxChecker.cpp - CoreFoundation containers API *- C++ -*-==//
2e00575f12cf280621ef0ed4d69e909bdfc9fef62Anna Zaks//
3e00575f12cf280621ef0ed4d69e909bdfc9fef62Anna Zaks//                     The LLVM Compiler Infrastructure
4e00575f12cf280621ef0ed4d69e909bdfc9fef62Anna Zaks//
5e00575f12cf280621ef0ed4d69e909bdfc9fef62Anna Zaks// This file is distributed under the University of Illinois Open Source
6e00575f12cf280621ef0ed4d69e909bdfc9fef62Anna Zaks// License. See LICENSE.TXT for details.
7e00575f12cf280621ef0ed4d69e909bdfc9fef62Anna Zaks//
8e00575f12cf280621ef0ed4d69e909bdfc9fef62Anna Zaks//===----------------------------------------------------------------------===//
9e00575f12cf280621ef0ed4d69e909bdfc9fef62Anna Zaks//
10e00575f12cf280621ef0ed4d69e909bdfc9fef62Anna Zaks// An AST checker that looks for common pitfalls when using C string APIs.
11e00575f12cf280621ef0ed4d69e909bdfc9fef62Anna Zaks//  - Identifies erroneous patterns in the last argument to strncat - the number
12e00575f12cf280621ef0ed4d69e909bdfc9fef62Anna Zaks//    of bytes to copy.
13e00575f12cf280621ef0ed4d69e909bdfc9fef62Anna Zaks//
14e00575f12cf280621ef0ed4d69e909bdfc9fef62Anna Zaks//===----------------------------------------------------------------------===//
15e00575f12cf280621ef0ed4d69e909bdfc9fef62Anna Zaks#include "ClangSACheckers.h"
16e00575f12cf280621ef0ed4d69e909bdfc9fef62Anna Zaks#include "clang/AST/Expr.h"
17e00575f12cf280621ef0ed4d69e909bdfc9fef62Anna Zaks#include "clang/AST/OperationKinds.h"
18e00575f12cf280621ef0ed4d69e909bdfc9fef62Anna Zaks#include "clang/AST/StmtVisitor.h"
1955fc873017f10f6f566b182b70f6fc22aefa3464Chandler Carruth#include "clang/Analysis/AnalysisContext.h"
20e00575f12cf280621ef0ed4d69e909bdfc9fef62Anna Zaks#include "clang/Basic/TargetInfo.h"
21e00575f12cf280621ef0ed4d69e909bdfc9fef62Anna Zaks#include "clang/Basic/TypeTraits.h"
22e00575f12cf280621ef0ed4d69e909bdfc9fef62Anna Zaks#include "clang/StaticAnalyzer/Core/BugReporter/BugReporter.h"
2355fc873017f10f6f566b182b70f6fc22aefa3464Chandler Carruth#include "clang/StaticAnalyzer/Core/Checker.h"
24e00575f12cf280621ef0ed4d69e909bdfc9fef62Anna Zaks#include "clang/StaticAnalyzer/Core/PathSensitive/AnalysisManager.h"
25e00575f12cf280621ef0ed4d69e909bdfc9fef62Anna Zaks#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
268fe83e1df954d72c0f4ffc15d20a5222ec151c21Benjamin Kramer#include "llvm/ADT/SmallString.h"
27e00575f12cf280621ef0ed4d69e909bdfc9fef62Anna Zaks#include "llvm/Support/raw_ostream.h"
28e00575f12cf280621ef0ed4d69e909bdfc9fef62Anna Zaks
29e00575f12cf280621ef0ed4d69e909bdfc9fef62Anna Zaksusing namespace clang;
30e00575f12cf280621ef0ed4d69e909bdfc9fef62Anna Zaksusing namespace ento;
31e00575f12cf280621ef0ed4d69e909bdfc9fef62Anna Zaks
32e00575f12cf280621ef0ed4d69e909bdfc9fef62Anna Zaksnamespace {
33e00575f12cf280621ef0ed4d69e909bdfc9fef62Anna Zaksclass WalkAST: public StmtVisitor<WalkAST> {
34651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines  const CheckerBase *Checker;
35e00575f12cf280621ef0ed4d69e909bdfc9fef62Anna Zaks  BugReporter &BR;
36e00575f12cf280621ef0ed4d69e909bdfc9fef62Anna Zaks  AnalysisDeclContext* AC;
37e00575f12cf280621ef0ed4d69e909bdfc9fef62Anna Zaks
38e00575f12cf280621ef0ed4d69e909bdfc9fef62Anna Zaks  /// Check if two expressions refer to the same declaration.
39e00575f12cf280621ef0ed4d69e909bdfc9fef62Anna Zaks  inline bool sameDecl(const Expr *A1, const Expr *A2) {
40e00575f12cf280621ef0ed4d69e909bdfc9fef62Anna Zaks    if (const DeclRefExpr *D1 = dyn_cast<DeclRefExpr>(A1->IgnoreParenCasts()))
41e00575f12cf280621ef0ed4d69e909bdfc9fef62Anna Zaks      if (const DeclRefExpr *D2 = dyn_cast<DeclRefExpr>(A2->IgnoreParenCasts()))
42e00575f12cf280621ef0ed4d69e909bdfc9fef62Anna Zaks        return D1->getDecl() == D2->getDecl();
43e00575f12cf280621ef0ed4d69e909bdfc9fef62Anna Zaks    return false;
44e00575f12cf280621ef0ed4d69e909bdfc9fef62Anna Zaks  }
45e00575f12cf280621ef0ed4d69e909bdfc9fef62Anna Zaks
46e00575f12cf280621ef0ed4d69e909bdfc9fef62Anna Zaks  /// Check if the expression E is a sizeof(WithArg).
47e00575f12cf280621ef0ed4d69e909bdfc9fef62Anna Zaks  inline bool isSizeof(const Expr *E, const Expr *WithArg) {
48e00575f12cf280621ef0ed4d69e909bdfc9fef62Anna Zaks    if (const UnaryExprOrTypeTraitExpr *UE =
49e00575f12cf280621ef0ed4d69e909bdfc9fef62Anna Zaks    dyn_cast<UnaryExprOrTypeTraitExpr>(E))
50e00575f12cf280621ef0ed4d69e909bdfc9fef62Anna Zaks      if (UE->getKind() == UETT_SizeOf)
51e00575f12cf280621ef0ed4d69e909bdfc9fef62Anna Zaks        return sameDecl(UE->getArgumentExpr(), WithArg);
52e00575f12cf280621ef0ed4d69e909bdfc9fef62Anna Zaks    return false;
53e00575f12cf280621ef0ed4d69e909bdfc9fef62Anna Zaks  }
54e00575f12cf280621ef0ed4d69e909bdfc9fef62Anna Zaks
55e00575f12cf280621ef0ed4d69e909bdfc9fef62Anna Zaks  /// Check if the expression E is a strlen(WithArg).
56e00575f12cf280621ef0ed4d69e909bdfc9fef62Anna Zaks  inline bool isStrlen(const Expr *E, const Expr *WithArg) {
57e00575f12cf280621ef0ed4d69e909bdfc9fef62Anna Zaks    if (const CallExpr *CE = dyn_cast<CallExpr>(E)) {
58e00575f12cf280621ef0ed4d69e909bdfc9fef62Anna Zaks      const FunctionDecl *FD = CE->getDirectCallee();
59e00575f12cf280621ef0ed4d69e909bdfc9fef62Anna Zaks      if (!FD)
60e00575f12cf280621ef0ed4d69e909bdfc9fef62Anna Zaks        return false;
61d624607d4196e4b37d235daa14699bcb3c1012a6Jordan Rose      return (CheckerContext::isCLibraryFunction(FD, "strlen") &&
62d624607d4196e4b37d235daa14699bcb3c1012a6Jordan Rose              sameDecl(CE->getArg(0), WithArg));
63e00575f12cf280621ef0ed4d69e909bdfc9fef62Anna Zaks    }
64e00575f12cf280621ef0ed4d69e909bdfc9fef62Anna Zaks    return false;
65e00575f12cf280621ef0ed4d69e909bdfc9fef62Anna Zaks  }
66e00575f12cf280621ef0ed4d69e909bdfc9fef62Anna Zaks
67e00575f12cf280621ef0ed4d69e909bdfc9fef62Anna Zaks  /// Check if the expression is an integer literal with value 1.
68e00575f12cf280621ef0ed4d69e909bdfc9fef62Anna Zaks  inline bool isOne(const Expr *E) {
69e00575f12cf280621ef0ed4d69e909bdfc9fef62Anna Zaks    if (const IntegerLiteral *IL = dyn_cast<IntegerLiteral>(E))
70e00575f12cf280621ef0ed4d69e909bdfc9fef62Anna Zaks      return (IL->getValue().isIntN(1));
71e00575f12cf280621ef0ed4d69e909bdfc9fef62Anna Zaks    return false;
72e00575f12cf280621ef0ed4d69e909bdfc9fef62Anna Zaks  }
73e00575f12cf280621ef0ed4d69e909bdfc9fef62Anna Zaks
74e00575f12cf280621ef0ed4d69e909bdfc9fef62Anna Zaks  inline StringRef getPrintableName(const Expr *E) {
75e00575f12cf280621ef0ed4d69e909bdfc9fef62Anna Zaks    if (const DeclRefExpr *D = dyn_cast<DeclRefExpr>(E->IgnoreParenCasts()))
76e00575f12cf280621ef0ed4d69e909bdfc9fef62Anna Zaks      return D->getDecl()->getName();
77e00575f12cf280621ef0ed4d69e909bdfc9fef62Anna Zaks    return StringRef();
78e00575f12cf280621ef0ed4d69e909bdfc9fef62Anna Zaks  }
79e00575f12cf280621ef0ed4d69e909bdfc9fef62Anna Zaks
80e00575f12cf280621ef0ed4d69e909bdfc9fef62Anna Zaks  /// Identify erroneous patterns in the last argument to strncat - the number
81e00575f12cf280621ef0ed4d69e909bdfc9fef62Anna Zaks  /// of bytes to copy.
82e00575f12cf280621ef0ed4d69e909bdfc9fef62Anna Zaks  bool containsBadStrncatPattern(const CallExpr *CE);
83e00575f12cf280621ef0ed4d69e909bdfc9fef62Anna Zaks
84e00575f12cf280621ef0ed4d69e909bdfc9fef62Anna Zakspublic:
85651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines  WalkAST(const CheckerBase *checker, BugReporter &br, AnalysisDeclContext *ac)
86651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines      : Checker(checker), BR(br), AC(ac) {}
87e00575f12cf280621ef0ed4d69e909bdfc9fef62Anna Zaks
88e00575f12cf280621ef0ed4d69e909bdfc9fef62Anna Zaks  // Statement visitor methods.
89e00575f12cf280621ef0ed4d69e909bdfc9fef62Anna Zaks  void VisitChildren(Stmt *S);
90e00575f12cf280621ef0ed4d69e909bdfc9fef62Anna Zaks  void VisitStmt(Stmt *S) {
91e00575f12cf280621ef0ed4d69e909bdfc9fef62Anna Zaks    VisitChildren(S);
92e00575f12cf280621ef0ed4d69e909bdfc9fef62Anna Zaks  }
93e00575f12cf280621ef0ed4d69e909bdfc9fef62Anna Zaks  void VisitCallExpr(CallExpr *CE);
94e00575f12cf280621ef0ed4d69e909bdfc9fef62Anna Zaks};
95e00575f12cf280621ef0ed4d69e909bdfc9fef62Anna Zaks} // end anonymous namespace
96e00575f12cf280621ef0ed4d69e909bdfc9fef62Anna Zaks
97e00575f12cf280621ef0ed4d69e909bdfc9fef62Anna Zaks// The correct size argument should look like following:
98e00575f12cf280621ef0ed4d69e909bdfc9fef62Anna Zaks//   strncat(dst, src, sizeof(dst) - strlen(dest) - 1);
99e00575f12cf280621ef0ed4d69e909bdfc9fef62Anna Zaks// We look for the following anti-patterns:
100e00575f12cf280621ef0ed4d69e909bdfc9fef62Anna Zaks//   - strncat(dst, src, sizeof(dst) - strlen(dst));
101e00575f12cf280621ef0ed4d69e909bdfc9fef62Anna Zaks//   - strncat(dst, src, sizeof(dst) - 1);
102e00575f12cf280621ef0ed4d69e909bdfc9fef62Anna Zaks//   - strncat(dst, src, sizeof(dst));
103e00575f12cf280621ef0ed4d69e909bdfc9fef62Anna Zaksbool WalkAST::containsBadStrncatPattern(const CallExpr *CE) {
104fececcbc3890955fd46f92036e9cb6ee7d0a60f4Anna Zaks  if (CE->getNumArgs() != 3)
105fececcbc3890955fd46f92036e9cb6ee7d0a60f4Anna Zaks    return false;
106e00575f12cf280621ef0ed4d69e909bdfc9fef62Anna Zaks  const Expr *DstArg = CE->getArg(0);
107e00575f12cf280621ef0ed4d69e909bdfc9fef62Anna Zaks  const Expr *SrcArg = CE->getArg(1);
108e00575f12cf280621ef0ed4d69e909bdfc9fef62Anna Zaks  const Expr *LenArg = CE->getArg(2);
109e00575f12cf280621ef0ed4d69e909bdfc9fef62Anna Zaks
110e00575f12cf280621ef0ed4d69e909bdfc9fef62Anna Zaks  // Identify wrong size expressions, which are commonly used instead.
111e00575f12cf280621ef0ed4d69e909bdfc9fef62Anna Zaks  if (const BinaryOperator *BE =
112e00575f12cf280621ef0ed4d69e909bdfc9fef62Anna Zaks              dyn_cast<BinaryOperator>(LenArg->IgnoreParenCasts())) {
113e00575f12cf280621ef0ed4d69e909bdfc9fef62Anna Zaks    // - sizeof(dst) - strlen(dst)
114e00575f12cf280621ef0ed4d69e909bdfc9fef62Anna Zaks    if (BE->getOpcode() == BO_Sub) {
115e00575f12cf280621ef0ed4d69e909bdfc9fef62Anna Zaks      const Expr *L = BE->getLHS();
116e00575f12cf280621ef0ed4d69e909bdfc9fef62Anna Zaks      const Expr *R = BE->getRHS();
117e00575f12cf280621ef0ed4d69e909bdfc9fef62Anna Zaks      if (isSizeof(L, DstArg) && isStrlen(R, DstArg))
118e00575f12cf280621ef0ed4d69e909bdfc9fef62Anna Zaks        return true;
119e00575f12cf280621ef0ed4d69e909bdfc9fef62Anna Zaks
120e00575f12cf280621ef0ed4d69e909bdfc9fef62Anna Zaks      // - sizeof(dst) - 1
121e00575f12cf280621ef0ed4d69e909bdfc9fef62Anna Zaks      if (isSizeof(L, DstArg) && isOne(R->IgnoreParenCasts()))
122e00575f12cf280621ef0ed4d69e909bdfc9fef62Anna Zaks        return true;
123e00575f12cf280621ef0ed4d69e909bdfc9fef62Anna Zaks    }
124e00575f12cf280621ef0ed4d69e909bdfc9fef62Anna Zaks  }
125e00575f12cf280621ef0ed4d69e909bdfc9fef62Anna Zaks  // - sizeof(dst)
126e00575f12cf280621ef0ed4d69e909bdfc9fef62Anna Zaks  if (isSizeof(LenArg, DstArg))
127e00575f12cf280621ef0ed4d69e909bdfc9fef62Anna Zaks    return true;
128e00575f12cf280621ef0ed4d69e909bdfc9fef62Anna Zaks
129e00575f12cf280621ef0ed4d69e909bdfc9fef62Anna Zaks  // - sizeof(src)
130e00575f12cf280621ef0ed4d69e909bdfc9fef62Anna Zaks  if (isSizeof(LenArg, SrcArg))
131e00575f12cf280621ef0ed4d69e909bdfc9fef62Anna Zaks    return true;
132e00575f12cf280621ef0ed4d69e909bdfc9fef62Anna Zaks  return false;
133e00575f12cf280621ef0ed4d69e909bdfc9fef62Anna Zaks}
134e00575f12cf280621ef0ed4d69e909bdfc9fef62Anna Zaks
135e00575f12cf280621ef0ed4d69e909bdfc9fef62Anna Zaksvoid WalkAST::VisitCallExpr(CallExpr *CE) {
136e00575f12cf280621ef0ed4d69e909bdfc9fef62Anna Zaks  const FunctionDecl *FD = CE->getDirectCallee();
137e00575f12cf280621ef0ed4d69e909bdfc9fef62Anna Zaks  if (!FD)
138e00575f12cf280621ef0ed4d69e909bdfc9fef62Anna Zaks    return;
139e00575f12cf280621ef0ed4d69e909bdfc9fef62Anna Zaks
140d624607d4196e4b37d235daa14699bcb3c1012a6Jordan Rose  if (CheckerContext::isCLibraryFunction(FD, "strncat")) {
141e00575f12cf280621ef0ed4d69e909bdfc9fef62Anna Zaks    if (containsBadStrncatPattern(CE)) {
142e00575f12cf280621ef0ed4d69e909bdfc9fef62Anna Zaks      const Expr *DstArg = CE->getArg(0);
143e00575f12cf280621ef0ed4d69e909bdfc9fef62Anna Zaks      const Expr *LenArg = CE->getArg(2);
144e00575f12cf280621ef0ed4d69e909bdfc9fef62Anna Zaks      PathDiagnosticLocation Loc =
145e00575f12cf280621ef0ed4d69e909bdfc9fef62Anna Zaks        PathDiagnosticLocation::createBegin(LenArg, BR.getSourceManager(), AC);
146e00575f12cf280621ef0ed4d69e909bdfc9fef62Anna Zaks
147e00575f12cf280621ef0ed4d69e909bdfc9fef62Anna Zaks      StringRef DstName = getPrintableName(DstArg);
148e00575f12cf280621ef0ed4d69e909bdfc9fef62Anna Zaks
149f7ccbad5d9949e7ddd1cbef43d482553b811e026Dylan Noblesmith      SmallString<256> S;
150e00575f12cf280621ef0ed4d69e909bdfc9fef62Anna Zaks      llvm::raw_svector_ostream os(S);
151e00575f12cf280621ef0ed4d69e909bdfc9fef62Anna Zaks      os << "Potential buffer overflow. ";
152e00575f12cf280621ef0ed4d69e909bdfc9fef62Anna Zaks      if (!DstName.empty()) {
153e00575f12cf280621ef0ed4d69e909bdfc9fef62Anna Zaks        os << "Replace with 'sizeof(" << DstName << ") "
154e00575f12cf280621ef0ed4d69e909bdfc9fef62Anna Zaks              "- strlen(" << DstName <<") - 1'";
155e00575f12cf280621ef0ed4d69e909bdfc9fef62Anna Zaks        os << " or u";
156e00575f12cf280621ef0ed4d69e909bdfc9fef62Anna Zaks      } else
157e00575f12cf280621ef0ed4d69e909bdfc9fef62Anna Zaks        os << "U";
158e00575f12cf280621ef0ed4d69e909bdfc9fef62Anna Zaks      os << "se a safer 'strlcat' API";
159e00575f12cf280621ef0ed4d69e909bdfc9fef62Anna Zaks
160651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines      BR.EmitBasicReport(FD, Checker, "Anti-pattern in the argument",
161651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines                         "C String API", os.str(), Loc,
162651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines                         LenArg->getSourceRange());
163e00575f12cf280621ef0ed4d69e909bdfc9fef62Anna Zaks    }
164e00575f12cf280621ef0ed4d69e909bdfc9fef62Anna Zaks  }
165e00575f12cf280621ef0ed4d69e909bdfc9fef62Anna Zaks
166e00575f12cf280621ef0ed4d69e909bdfc9fef62Anna Zaks  // Recurse and check children.
167e00575f12cf280621ef0ed4d69e909bdfc9fef62Anna Zaks  VisitChildren(CE);
168e00575f12cf280621ef0ed4d69e909bdfc9fef62Anna Zaks}
169e00575f12cf280621ef0ed4d69e909bdfc9fef62Anna Zaks
170e00575f12cf280621ef0ed4d69e909bdfc9fef62Anna Zaksvoid WalkAST::VisitChildren(Stmt *S) {
171e00575f12cf280621ef0ed4d69e909bdfc9fef62Anna Zaks  for (Stmt::child_iterator I = S->child_begin(), E = S->child_end(); I != E;
172e00575f12cf280621ef0ed4d69e909bdfc9fef62Anna Zaks      ++I)
173e00575f12cf280621ef0ed4d69e909bdfc9fef62Anna Zaks    if (Stmt *child = *I)
174e00575f12cf280621ef0ed4d69e909bdfc9fef62Anna Zaks      Visit(child);
175e00575f12cf280621ef0ed4d69e909bdfc9fef62Anna Zaks}
176e00575f12cf280621ef0ed4d69e909bdfc9fef62Anna Zaks
177e00575f12cf280621ef0ed4d69e909bdfc9fef62Anna Zaksnamespace {
178e00575f12cf280621ef0ed4d69e909bdfc9fef62Anna Zaksclass CStringSyntaxChecker: public Checker<check::ASTCodeBody> {
179e00575f12cf280621ef0ed4d69e909bdfc9fef62Anna Zakspublic:
180e00575f12cf280621ef0ed4d69e909bdfc9fef62Anna Zaks
181e00575f12cf280621ef0ed4d69e909bdfc9fef62Anna Zaks  void checkASTCodeBody(const Decl *D, AnalysisManager& Mgr,
182e00575f12cf280621ef0ed4d69e909bdfc9fef62Anna Zaks      BugReporter &BR) const {
183651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines    WalkAST walker(this, BR, Mgr.getAnalysisDeclContext(D));
184e00575f12cf280621ef0ed4d69e909bdfc9fef62Anna Zaks    walker.Visit(D->getBody());
185e00575f12cf280621ef0ed4d69e909bdfc9fef62Anna Zaks  }
186e00575f12cf280621ef0ed4d69e909bdfc9fef62Anna Zaks};
187e00575f12cf280621ef0ed4d69e909bdfc9fef62Anna Zaks}
188e00575f12cf280621ef0ed4d69e909bdfc9fef62Anna Zaks
189e00575f12cf280621ef0ed4d69e909bdfc9fef62Anna Zaksvoid ento::registerCStringSyntaxChecker(CheckerManager &mgr) {
190e00575f12cf280621ef0ed4d69e909bdfc9fef62Anna Zaks  mgr.registerChecker<CStringSyntaxChecker>();
191e00575f12cf280621ef0ed4d69e909bdfc9fef62Anna Zaks}
192e00575f12cf280621ef0ed4d69e909bdfc9fef62Anna Zaks
193