clang  3.7.0
GenericTaintChecker.cpp
Go to the documentation of this file.
1 //== GenericTaintChecker.cpp ----------------------------------- -*- C++ -*--=//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This checker defines the attack surface for generic taint propagation.
11 //
12 // The taint information produced by it might be useful to other checkers. For
13 // example, checkers should report errors which involve tainted data more
14 // aggressively, even if the involved symbols are under constrained.
15 //
16 //===----------------------------------------------------------------------===//
17 #include "ClangSACheckers.h"
18 #include "clang/AST/Attr.h"
19 #include "clang/Basic/Builtins.h"
25 #include <climits>
26 
27 using namespace clang;
28 using namespace ento;
29 
30 namespace {
31 class GenericTaintChecker : public Checker< check::PostStmt<CallExpr>,
32  check::PreStmt<CallExpr> > {
33 public:
34  static void *getTag() { static int Tag; return &Tag; }
35 
36  void checkPostStmt(const CallExpr *CE, CheckerContext &C) const;
37 
38  void checkPreStmt(const CallExpr *CE, CheckerContext &C) const;
39 
40 private:
41  static const unsigned InvalidArgIndex = UINT_MAX;
42  /// Denotes the return vale.
43  static const unsigned ReturnValueIndex = UINT_MAX - 1;
44 
45  mutable std::unique_ptr<BugType> BT;
46  inline void initBugType() const {
47  if (!BT)
48  BT.reset(new BugType(this, "Use of Untrusted Data", "Untrusted Data"));
49  }
50 
51  /// \brief Catch taint related bugs. Check if tainted data is passed to a
52  /// system call etc.
53  bool checkPre(const CallExpr *CE, CheckerContext &C) const;
54 
55  /// \brief Add taint sources on a pre-visit.
56  void addSourcesPre(const CallExpr *CE, CheckerContext &C) const;
57 
58  /// \brief Propagate taint generated at pre-visit.
59  bool propagateFromPre(const CallExpr *CE, CheckerContext &C) const;
60 
61  /// \brief Add taint sources on a post visit.
62  void addSourcesPost(const CallExpr *CE, CheckerContext &C) const;
63 
64  /// Check if the region the expression evaluates to is the standard input,
65  /// and thus, is tainted.
66  static bool isStdin(const Expr *E, CheckerContext &C);
67 
68  /// \brief Given a pointer argument, get the symbol of the value it contains
69  /// (points to).
70  static SymbolRef getPointedToSymbol(CheckerContext &C, const Expr *Arg);
71 
72  /// Functions defining the attack surface.
73  typedef ProgramStateRef (GenericTaintChecker::*FnCheck)(const CallExpr *,
74  CheckerContext &C) const;
75  ProgramStateRef postScanf(const CallExpr *CE, CheckerContext &C) const;
76  ProgramStateRef postSocket(const CallExpr *CE, CheckerContext &C) const;
77  ProgramStateRef postRetTaint(const CallExpr *CE, CheckerContext &C) const;
78 
79  /// Taint the scanned input if the file is tainted.
80  ProgramStateRef preFscanf(const CallExpr *CE, CheckerContext &C) const;
81 
82  /// Check for CWE-134: Uncontrolled Format String.
83  static const char MsgUncontrolledFormatString[];
84  bool checkUncontrolledFormatString(const CallExpr *CE,
85  CheckerContext &C) const;
86 
87  /// Check for:
88  /// CERT/STR02-C. "Sanitize data passed to complex subsystems"
89  /// CWE-78, "Failure to Sanitize Data into an OS Command"
90  static const char MsgSanitizeSystemArgs[];
91  bool checkSystemCall(const CallExpr *CE, StringRef Name,
92  CheckerContext &C) const;
93 
94  /// Check if tainted data is used as a buffer size ins strn.. functions,
95  /// and allocators.
96  static const char MsgTaintedBufferSize[];
97  bool checkTaintedBufferSize(const CallExpr *CE, const FunctionDecl *FDecl,
98  CheckerContext &C) const;
99 
100  /// Generate a report if the expression is tainted or points to tainted data.
101  bool generateReportIfTainted(const Expr *E, const char Msg[],
102  CheckerContext &C) const;
103 
104 
105  typedef SmallVector<unsigned, 2> ArgVector;
106 
107  /// \brief A struct used to specify taint propagation rules for a function.
108  ///
109  /// If any of the possible taint source arguments is tainted, all of the
110  /// destination arguments should also be tainted. Use InvalidArgIndex in the
111  /// src list to specify that all of the arguments can introduce taint. Use
112  /// InvalidArgIndex in the dst arguments to signify that all the non-const
113  /// pointer and reference arguments might be tainted on return. If
114  /// ReturnValueIndex is added to the dst list, the return value will be
115  /// tainted.
116  struct TaintPropagationRule {
117  /// List of arguments which can be taint sources and should be checked.
118  ArgVector SrcArgs;
119  /// List of arguments which should be tainted on function return.
120  ArgVector DstArgs;
121  // TODO: Check if using other data structures would be more optimal.
122 
123  TaintPropagationRule() {}
124 
125  TaintPropagationRule(unsigned SArg,
126  unsigned DArg, bool TaintRet = false) {
127  SrcArgs.push_back(SArg);
128  DstArgs.push_back(DArg);
129  if (TaintRet)
130  DstArgs.push_back(ReturnValueIndex);
131  }
132 
133  TaintPropagationRule(unsigned SArg1, unsigned SArg2,
134  unsigned DArg, bool TaintRet = false) {
135  SrcArgs.push_back(SArg1);
136  SrcArgs.push_back(SArg2);
137  DstArgs.push_back(DArg);
138  if (TaintRet)
139  DstArgs.push_back(ReturnValueIndex);
140  }
141 
142  /// Get the propagation rule for a given function.
143  static TaintPropagationRule
144  getTaintPropagationRule(const FunctionDecl *FDecl,
145  StringRef Name,
146  CheckerContext &C);
147 
148  inline void addSrcArg(unsigned A) { SrcArgs.push_back(A); }
149  inline void addDstArg(unsigned A) { DstArgs.push_back(A); }
150 
151  inline bool isNull() const { return SrcArgs.empty(); }
152 
153  inline bool isDestinationArgument(unsigned ArgNum) const {
154  return (std::find(DstArgs.begin(),
155  DstArgs.end(), ArgNum) != DstArgs.end());
156  }
157 
158  static inline bool isTaintedOrPointsToTainted(const Expr *E,
160  CheckerContext &C) {
161  return (State->isTainted(E, C.getLocationContext()) || isStdin(E, C) ||
162  (E->getType().getTypePtr()->isPointerType() &&
163  State->isTainted(getPointedToSymbol(C, E))));
164  }
165 
166  /// \brief Pre-process a function which propagates taint according to the
167  /// taint rule.
168  ProgramStateRef process(const CallExpr *CE, CheckerContext &C) const;
169 
170  };
171 };
172 
173 const unsigned GenericTaintChecker::ReturnValueIndex;
174 const unsigned GenericTaintChecker::InvalidArgIndex;
175 
176 const char GenericTaintChecker::MsgUncontrolledFormatString[] =
177  "Untrusted data is used as a format string "
178  "(CWE-134: Uncontrolled Format String)";
179 
180 const char GenericTaintChecker::MsgSanitizeSystemArgs[] =
181  "Untrusted data is passed to a system call "
182  "(CERT/STR02-C. Sanitize data passed to complex subsystems)";
183 
184 const char GenericTaintChecker::MsgTaintedBufferSize[] =
185  "Untrusted data is used to specify the buffer size "
186  "(CERT/STR31-C. Guarantee that storage for strings has sufficient space for "
187  "character data and the null terminator)";
188 
189 } // end of anonymous namespace
190 
191 /// A set which is used to pass information from call pre-visit instruction
192 /// to the call post-visit. The values are unsigned integers, which are either
193 /// ReturnValueIndex, or indexes of the pointer/reference argument, which
194 /// points to data, which should be tainted on return.
195 REGISTER_SET_WITH_PROGRAMSTATE(TaintArgsOnPostVisit, unsigned)
196 
197 GenericTaintChecker::TaintPropagationRule
198 GenericTaintChecker::TaintPropagationRule::getTaintPropagationRule(
199  const FunctionDecl *FDecl,
200  StringRef Name,
201  CheckerContext &C) {
202  // TODO: Currently, we might lose precision here: we always mark a return
203  // value as tainted even if it's just a pointer, pointing to tainted data.
204 
205  // Check for exact name match for functions without builtin substitutes.
206  TaintPropagationRule Rule = llvm::StringSwitch<TaintPropagationRule>(Name)
207  .Case("atoi", TaintPropagationRule(0, ReturnValueIndex))
208  .Case("atol", TaintPropagationRule(0, ReturnValueIndex))
209  .Case("atoll", TaintPropagationRule(0, ReturnValueIndex))
210  .Case("getc", TaintPropagationRule(0, ReturnValueIndex))
211  .Case("fgetc", TaintPropagationRule(0, ReturnValueIndex))
212  .Case("getc_unlocked", TaintPropagationRule(0, ReturnValueIndex))
213  .Case("getw", TaintPropagationRule(0, ReturnValueIndex))
214  .Case("toupper", TaintPropagationRule(0, ReturnValueIndex))
215  .Case("tolower", TaintPropagationRule(0, ReturnValueIndex))
216  .Case("strchr", TaintPropagationRule(0, ReturnValueIndex))
217  .Case("strrchr", TaintPropagationRule(0, ReturnValueIndex))
218  .Case("read", TaintPropagationRule(0, 2, 1, true))
219  .Case("pread", TaintPropagationRule(InvalidArgIndex, 1, true))
220  .Case("gets", TaintPropagationRule(InvalidArgIndex, 0, true))
221  .Case("fgets", TaintPropagationRule(2, 0, true))
222  .Case("getline", TaintPropagationRule(2, 0))
223  .Case("getdelim", TaintPropagationRule(3, 0))
224  .Case("fgetln", TaintPropagationRule(0, ReturnValueIndex))
225  .Default(TaintPropagationRule());
226 
227  if (!Rule.isNull())
228  return Rule;
229 
230  // Check if it's one of the memory setting/copying functions.
231  // This check is specialized but faster then calling isCLibraryFunction.
232  unsigned BId = 0;
233  if ( (BId = FDecl->getMemoryFunctionKind()) )
234  switch(BId) {
235  case Builtin::BImemcpy:
236  case Builtin::BImemmove:
237  case Builtin::BIstrncpy:
238  case Builtin::BIstrncat:
239  return TaintPropagationRule(1, 2, 0, true);
240  case Builtin::BIstrlcpy:
241  case Builtin::BIstrlcat:
242  return TaintPropagationRule(1, 2, 0, false);
243  case Builtin::BIstrndup:
244  return TaintPropagationRule(0, 1, ReturnValueIndex);
245 
246  default:
247  break;
248  };
249 
250  // Process all other functions which could be defined as builtins.
251  if (Rule.isNull()) {
252  if (C.isCLibraryFunction(FDecl, "snprintf") ||
253  C.isCLibraryFunction(FDecl, "sprintf"))
254  return TaintPropagationRule(InvalidArgIndex, 0, true);
255  else if (C.isCLibraryFunction(FDecl, "strcpy") ||
256  C.isCLibraryFunction(FDecl, "stpcpy") ||
257  C.isCLibraryFunction(FDecl, "strcat"))
258  return TaintPropagationRule(1, 0, true);
259  else if (C.isCLibraryFunction(FDecl, "bcopy"))
260  return TaintPropagationRule(0, 2, 1, false);
261  else if (C.isCLibraryFunction(FDecl, "strdup") ||
262  C.isCLibraryFunction(FDecl, "strdupa"))
263  return TaintPropagationRule(0, ReturnValueIndex);
264  else if (C.isCLibraryFunction(FDecl, "wcsdup"))
265  return TaintPropagationRule(0, ReturnValueIndex);
266  }
267 
268  // Skipping the following functions, since they might be used for cleansing
269  // or smart memory copy:
270  // - memccpy - copying until hitting a special character.
271 
272  return TaintPropagationRule();
273 }
274 
275 void GenericTaintChecker::checkPreStmt(const CallExpr *CE,
276  CheckerContext &C) const {
277  // Check for errors first.
278  if (checkPre(CE, C))
279  return;
280 
281  // Add taint second.
282  addSourcesPre(CE, C);
283 }
284 
285 void GenericTaintChecker::checkPostStmt(const CallExpr *CE,
286  CheckerContext &C) const {
287  if (propagateFromPre(CE, C))
288  return;
289  addSourcesPost(CE, C);
290 }
291 
292 void GenericTaintChecker::addSourcesPre(const CallExpr *CE,
293  CheckerContext &C) const {
294  ProgramStateRef State = nullptr;
295  const FunctionDecl *FDecl = C.getCalleeDecl(CE);
296  if (!FDecl || FDecl->getKind() != Decl::Function)
297  return;
298 
299  StringRef Name = C.getCalleeName(FDecl);
300  if (Name.empty())
301  return;
302 
303  // First, try generating a propagation rule for this function.
304  TaintPropagationRule Rule =
305  TaintPropagationRule::getTaintPropagationRule(FDecl, Name, C);
306  if (!Rule.isNull()) {
307  State = Rule.process(CE, C);
308  if (!State)
309  return;
310  C.addTransition(State);
311  return;
312  }
313 
314  // Otherwise, check if we have custom pre-processing implemented.
315  FnCheck evalFunction = llvm::StringSwitch<FnCheck>(Name)
316  .Case("fscanf", &GenericTaintChecker::preFscanf)
317  .Default(nullptr);
318  // Check and evaluate the call.
319  if (evalFunction)
320  State = (this->*evalFunction)(CE, C);
321  if (!State)
322  return;
323  C.addTransition(State);
324 
325 }
326 
327 bool GenericTaintChecker::propagateFromPre(const CallExpr *CE,
328  CheckerContext &C) const {
329  ProgramStateRef State = C.getState();
330 
331  // Depending on what was tainted at pre-visit, we determined a set of
332  // arguments which should be tainted after the function returns. These are
333  // stored in the state as TaintArgsOnPostVisit set.
334  TaintArgsOnPostVisitTy TaintArgs = State->get<TaintArgsOnPostVisit>();
335  if (TaintArgs.isEmpty())
336  return false;
337 
339  I = TaintArgs.begin(), E = TaintArgs.end(); I != E; ++I) {
340  unsigned ArgNum = *I;
341 
342  // Special handling for the tainted return value.
343  if (ArgNum == ReturnValueIndex) {
344  State = State->addTaint(CE, C.getLocationContext());
345  continue;
346  }
347 
348  // The arguments are pointer arguments. The data they are pointing at is
349  // tainted after the call.
350  if (CE->getNumArgs() < (ArgNum + 1))
351  return false;
352  const Expr* Arg = CE->getArg(ArgNum);
353  SymbolRef Sym = getPointedToSymbol(C, Arg);
354  if (Sym)
355  State = State->addTaint(Sym);
356  }
357 
358  // Clear up the taint info from the state.
359  State = State->remove<TaintArgsOnPostVisit>();
360 
361  if (State != C.getState()) {
362  C.addTransition(State);
363  return true;
364  }
365  return false;
366 }
367 
368 void GenericTaintChecker::addSourcesPost(const CallExpr *CE,
369  CheckerContext &C) const {
370  // Define the attack surface.
371  // Set the evaluation function by switching on the callee name.
372  const FunctionDecl *FDecl = C.getCalleeDecl(CE);
373  if (!FDecl || FDecl->getKind() != Decl::Function)
374  return;
375 
376  StringRef Name = C.getCalleeName(FDecl);
377  if (Name.empty())
378  return;
379  FnCheck evalFunction = llvm::StringSwitch<FnCheck>(Name)
380  .Case("scanf", &GenericTaintChecker::postScanf)
381  // TODO: Add support for vfscanf & family.
382  .Case("getchar", &GenericTaintChecker::postRetTaint)
383  .Case("getchar_unlocked", &GenericTaintChecker::postRetTaint)
384  .Case("getenv", &GenericTaintChecker::postRetTaint)
385  .Case("fopen", &GenericTaintChecker::postRetTaint)
386  .Case("fdopen", &GenericTaintChecker::postRetTaint)
387  .Case("freopen", &GenericTaintChecker::postRetTaint)
388  .Case("getch", &GenericTaintChecker::postRetTaint)
389  .Case("wgetch", &GenericTaintChecker::postRetTaint)
390  .Case("socket", &GenericTaintChecker::postSocket)
391  .Default(nullptr);
392 
393  // If the callee isn't defined, it is not of security concern.
394  // Check and evaluate the call.
395  ProgramStateRef State = nullptr;
396  if (evalFunction)
397  State = (this->*evalFunction)(CE, C);
398  if (!State)
399  return;
400 
401  C.addTransition(State);
402 }
403 
404 bool GenericTaintChecker::checkPre(const CallExpr *CE, CheckerContext &C) const{
405 
406  if (checkUncontrolledFormatString(CE, C))
407  return true;
408 
409  const FunctionDecl *FDecl = C.getCalleeDecl(CE);
410  if (!FDecl || FDecl->getKind() != Decl::Function)
411  return false;
412 
413  StringRef Name = C.getCalleeName(FDecl);
414  if (Name.empty())
415  return false;
416 
417  if (checkSystemCall(CE, Name, C))
418  return true;
419 
420  if (checkTaintedBufferSize(CE, FDecl, C))
421  return true;
422 
423  return false;
424 }
425 
426 SymbolRef GenericTaintChecker::getPointedToSymbol(CheckerContext &C,
427  const Expr* Arg) {
428  ProgramStateRef State = C.getState();
429  SVal AddrVal = State->getSVal(Arg->IgnoreParens(), C.getLocationContext());
430  if (AddrVal.isUnknownOrUndef())
431  return nullptr;
432 
433  Optional<Loc> AddrLoc = AddrVal.getAs<Loc>();
434  if (!AddrLoc)
435  return nullptr;
436 
437  const PointerType *ArgTy =
438  dyn_cast<PointerType>(Arg->getType().getCanonicalType().getTypePtr());
439  SVal Val = State->getSVal(*AddrLoc,
440  ArgTy ? ArgTy->getPointeeType(): QualType());
441  return Val.getAsSymbol();
442 }
443 
445 GenericTaintChecker::TaintPropagationRule::process(const CallExpr *CE,
446  CheckerContext &C) const {
447  ProgramStateRef State = C.getState();
448 
449  // Check for taint in arguments.
450  bool IsTainted = false;
451  for (ArgVector::const_iterator I = SrcArgs.begin(),
452  E = SrcArgs.end(); I != E; ++I) {
453  unsigned ArgNum = *I;
454 
455  if (ArgNum == InvalidArgIndex) {
456  // Check if any of the arguments is tainted, but skip the
457  // destination arguments.
458  for (unsigned int i = 0; i < CE->getNumArgs(); ++i) {
459  if (isDestinationArgument(i))
460  continue;
461  if ((IsTainted = isTaintedOrPointsToTainted(CE->getArg(i), State, C)))
462  break;
463  }
464  break;
465  }
466 
467  if (CE->getNumArgs() < (ArgNum + 1))
468  return State;
469  if ((IsTainted = isTaintedOrPointsToTainted(CE->getArg(ArgNum), State, C)))
470  break;
471  }
472  if (!IsTainted)
473  return State;
474 
475  // Mark the arguments which should be tainted after the function returns.
476  for (ArgVector::const_iterator I = DstArgs.begin(),
477  E = DstArgs.end(); I != E; ++I) {
478  unsigned ArgNum = *I;
479 
480  // Should we mark all arguments as tainted?
481  if (ArgNum == InvalidArgIndex) {
482  // For all pointer and references that were passed in:
483  // If they are not pointing to const data, mark data as tainted.
484  // TODO: So far we are just going one level down; ideally we'd need to
485  // recurse here.
486  for (unsigned int i = 0; i < CE->getNumArgs(); ++i) {
487  const Expr *Arg = CE->getArg(i);
488  // Process pointer argument.
489  const Type *ArgTy = Arg->getType().getTypePtr();
490  QualType PType = ArgTy->getPointeeType();
491  if ((!PType.isNull() && !PType.isConstQualified())
492  || (ArgTy->isReferenceType() && !Arg->getType().isConstQualified()))
493  State = State->add<TaintArgsOnPostVisit>(i);
494  }
495  continue;
496  }
497 
498  // Should mark the return value?
499  if (ArgNum == ReturnValueIndex) {
500  State = State->add<TaintArgsOnPostVisit>(ReturnValueIndex);
501  continue;
502  }
503 
504  // Mark the given argument.
505  assert(ArgNum < CE->getNumArgs());
506  State = State->add<TaintArgsOnPostVisit>(ArgNum);
507  }
508 
509  return State;
510 }
511 
512 
513 // If argument 0 (file descriptor) is tainted, all arguments except for arg 0
514 // and arg 1 should get taint.
515 ProgramStateRef GenericTaintChecker::preFscanf(const CallExpr *CE,
516  CheckerContext &C) const {
517  assert(CE->getNumArgs() >= 2);
518  ProgramStateRef State = C.getState();
519 
520  // Check is the file descriptor is tainted.
521  if (State->isTainted(CE->getArg(0), C.getLocationContext()) ||
522  isStdin(CE->getArg(0), C)) {
523  // All arguments except for the first two should get taint.
524  for (unsigned int i = 2; i < CE->getNumArgs(); ++i)
525  State = State->add<TaintArgsOnPostVisit>(i);
526  return State;
527  }
528 
529  return nullptr;
530 }
531 
532 
533 // If argument 0(protocol domain) is network, the return value should get taint.
534 ProgramStateRef GenericTaintChecker::postSocket(const CallExpr *CE,
535  CheckerContext &C) const {
536  ProgramStateRef State = C.getState();
537  if (CE->getNumArgs() < 3)
538  return State;
539 
540  SourceLocation DomLoc = CE->getArg(0)->getExprLoc();
541  StringRef DomName = C.getMacroNameOrSpelling(DomLoc);
542  // White list the internal communication protocols.
543  if (DomName.equals("AF_SYSTEM") || DomName.equals("AF_LOCAL") ||
544  DomName.equals("AF_UNIX") || DomName.equals("AF_RESERVED_36"))
545  return State;
546  State = State->addTaint(CE, C.getLocationContext());
547  return State;
548 }
549 
550 ProgramStateRef GenericTaintChecker::postScanf(const CallExpr *CE,
551  CheckerContext &C) const {
552  ProgramStateRef State = C.getState();
553  if (CE->getNumArgs() < 2)
554  return State;
555 
556  // All arguments except for the very first one should get taint.
557  for (unsigned int i = 1; i < CE->getNumArgs(); ++i) {
558  // The arguments are pointer arguments. The data they are pointing at is
559  // tainted after the call.
560  const Expr* Arg = CE->getArg(i);
561  SymbolRef Sym = getPointedToSymbol(C, Arg);
562  if (Sym)
563  State = State->addTaint(Sym);
564  }
565  return State;
566 }
567 
568 ProgramStateRef GenericTaintChecker::postRetTaint(const CallExpr *CE,
569  CheckerContext &C) const {
570  return C.getState()->addTaint(CE, C.getLocationContext());
571 }
572 
573 bool GenericTaintChecker::isStdin(const Expr *E, CheckerContext &C) {
574  ProgramStateRef State = C.getState();
575  SVal Val = State->getSVal(E, C.getLocationContext());
576 
577  // stdin is a pointer, so it would be a region.
578  const MemRegion *MemReg = Val.getAsRegion();
579 
580  // The region should be symbolic, we do not know it's value.
581  const SymbolicRegion *SymReg = dyn_cast_or_null<SymbolicRegion>(MemReg);
582  if (!SymReg)
583  return false;
584 
585  // Get it's symbol and find the declaration region it's pointing to.
586  const SymbolRegionValue *Sm =dyn_cast<SymbolRegionValue>(SymReg->getSymbol());
587  if (!Sm)
588  return false;
589  const DeclRegion *DeclReg = dyn_cast_or_null<DeclRegion>(Sm->getRegion());
590  if (!DeclReg)
591  return false;
592 
593  // This region corresponds to a declaration, find out if it's a global/extern
594  // variable named stdin with the proper type.
595  if (const VarDecl *D = dyn_cast_or_null<VarDecl>(DeclReg->getDecl())) {
596  D = D->getCanonicalDecl();
597  if ((D->getName().find("stdin") != StringRef::npos) && D->isExternC())
598  if (const PointerType * PtrTy =
599  dyn_cast<PointerType>(D->getType().getTypePtr()))
600  if (PtrTy->getPointeeType() == C.getASTContext().getFILEType())
601  return true;
602  }
603  return false;
604 }
605 
606 static bool getPrintfFormatArgumentNum(const CallExpr *CE,
607  const CheckerContext &C,
608  unsigned int &ArgNum) {
609  // Find if the function contains a format string argument.
610  // Handles: fprintf, printf, sprintf, snprintf, vfprintf, vprintf, vsprintf,
611  // vsnprintf, syslog, custom annotated functions.
612  const FunctionDecl *FDecl = C.getCalleeDecl(CE);
613  if (!FDecl)
614  return false;
615  for (const auto *Format : FDecl->specific_attrs<FormatAttr>()) {
616  ArgNum = Format->getFormatIdx() - 1;
617  if ((Format->getType()->getName() == "printf") &&
618  CE->getNumArgs() > ArgNum)
619  return true;
620  }
621 
622  // Or if a function is named setproctitle (this is a heuristic).
623  if (C.getCalleeName(CE).find("setproctitle") != StringRef::npos) {
624  ArgNum = 0;
625  return true;
626  }
627 
628  return false;
629 }
630 
631 bool GenericTaintChecker::generateReportIfTainted(const Expr *E,
632  const char Msg[],
633  CheckerContext &C) const {
634  assert(E);
635 
636  // Check for taint.
637  ProgramStateRef State = C.getState();
638  if (!State->isTainted(getPointedToSymbol(C, E)) &&
639  !State->isTainted(E, C.getLocationContext()))
640  return false;
641 
642  // Generate diagnostic.
643  if (ExplodedNode *N = C.addTransition()) {
644  initBugType();
645  auto report = llvm::make_unique<BugReport>(*BT, Msg, N);
646  report->addRange(E->getSourceRange());
647  C.emitReport(std::move(report));
648  return true;
649  }
650  return false;
651 }
652 
653 bool GenericTaintChecker::checkUncontrolledFormatString(const CallExpr *CE,
654  CheckerContext &C) const{
655  // Check if the function contains a format string argument.
656  unsigned int ArgNum = 0;
657  if (!getPrintfFormatArgumentNum(CE, C, ArgNum))
658  return false;
659 
660  // If either the format string content or the pointer itself are tainted, warn.
661  if (generateReportIfTainted(CE->getArg(ArgNum),
662  MsgUncontrolledFormatString, C))
663  return true;
664  return false;
665 }
666 
667 bool GenericTaintChecker::checkSystemCall(const CallExpr *CE,
668  StringRef Name,
669  CheckerContext &C) const {
670  // TODO: It might make sense to run this check on demand. In some cases,
671  // we should check if the environment has been cleansed here. We also might
672  // need to know if the user was reset before these calls(seteuid).
673  unsigned ArgNum = llvm::StringSwitch<unsigned>(Name)
674  .Case("system", 0)
675  .Case("popen", 0)
676  .Case("execl", 0)
677  .Case("execle", 0)
678  .Case("execlp", 0)
679  .Case("execv", 0)
680  .Case("execvp", 0)
681  .Case("execvP", 0)
682  .Case("execve", 0)
683  .Case("dlopen", 0)
684  .Default(UINT_MAX);
685 
686  if (ArgNum == UINT_MAX || CE->getNumArgs() < (ArgNum + 1))
687  return false;
688 
689  if (generateReportIfTainted(CE->getArg(ArgNum),
690  MsgSanitizeSystemArgs, C))
691  return true;
692 
693  return false;
694 }
695 
696 // TODO: Should this check be a part of the CString checker?
697 // If yes, should taint be a global setting?
698 bool GenericTaintChecker::checkTaintedBufferSize(const CallExpr *CE,
699  const FunctionDecl *FDecl,
700  CheckerContext &C) const {
701  // If the function has a buffer size argument, set ArgNum.
702  unsigned ArgNum = InvalidArgIndex;
703  unsigned BId = 0;
704  if ( (BId = FDecl->getMemoryFunctionKind()) )
705  switch(BId) {
706  case Builtin::BImemcpy:
707  case Builtin::BImemmove:
708  case Builtin::BIstrncpy:
709  ArgNum = 2;
710  break;
711  case Builtin::BIstrndup:
712  ArgNum = 1;
713  break;
714  default:
715  break;
716  };
717 
718  if (ArgNum == InvalidArgIndex) {
719  if (C.isCLibraryFunction(FDecl, "malloc") ||
720  C.isCLibraryFunction(FDecl, "calloc") ||
721  C.isCLibraryFunction(FDecl, "alloca"))
722  ArgNum = 0;
723  else if (C.isCLibraryFunction(FDecl, "memccpy"))
724  ArgNum = 3;
725  else if (C.isCLibraryFunction(FDecl, "realloc"))
726  ArgNum = 1;
727  else if (C.isCLibraryFunction(FDecl, "bcopy"))
728  ArgNum = 2;
729  }
730 
731  if (ArgNum != InvalidArgIndex && CE->getNumArgs() > ArgNum &&
732  generateReportIfTainted(CE->getArg(ArgNum), MsgTaintedBufferSize, C))
733  return true;
734 
735  return false;
736 }
737 
738 void ento::registerGenericTaintChecker(CheckerManager &mgr) {
739  mgr.registerChecker<GenericTaintChecker>();
740 }
StringRef getCalleeName(const FunctionDecl *FunDecl) const
Get the name of the called function (path-sensitive).
MemRegion - The root abstract class for all memory regions.
Definition: MemRegion.h:77
Expr * getArg(unsigned Arg)
getArg - Return the specified argument.
Definition: Expr.h:2216
const Decl * getDecl() const
Definition: MemRegion.h:853
ExplodedNode * addTransition(ProgramStateRef State=nullptr, const ProgramPointTag *Tag=nullptr)
Generates a new transition in the program state graph (ExplodedGraph). Uses the default CheckerContex...
IntrusiveRefCntPtr< const ProgramState > ProgramStateRef
#define REGISTER_SET_WITH_PROGRAMSTATE(Name, Elem)
Symbolic value. These values used to capture symbolic execution of the program.
Definition: SymbolManager.h:42
const FunctionDecl * getCalleeDecl(const CallExpr *CE) const
Get the declaration of the called function (path-sensitive).
LineState State
bool isReferenceType() const
Definition: Type.h:5241
SymbolRef getSymbol() const
Definition: MemRegion.h:719
bool isUnknownOrUndef() const
Definition: SVals.h:125
static bool getPrintfFormatArgumentNum(const CallExpr *CE, const CheckerContext &C, unsigned int &ArgNum)
#define UINT_MAX
Definition: limits.h:72
QualType getPointeeType() const
Definition: Type.cpp:414
const ProgramStateRef & getState() const
static bool isCLibraryFunction(const FunctionDecl *FD, StringRef Name=StringRef())
Returns true if the callee is an externally-visible function in the top-level namespace, such as malloc.
Optional< T > getAs() const
Convert to the specified SVal type, returning None if this SVal is not of the desired type...
Definition: SVals.h:86
Kind getKind() const
Definition: DeclBase.h:375
QualType getFILEType() const
Retrieve the C FILE type.
Definition: ASTContext.h:1398
const TypedValueRegion * getRegion() const
void emitReport(std::unique_ptr< BugReport > R)
Emit the diagnostics report.
CHECKER * registerChecker()
Used to register checkers.
Encodes a location in the source. The SourceManager can decode this to get at the full include stack...
const Type * getTypePtr() const
Definition: Type.h:5016
unsigned getMemoryFunctionKind() const
Identify a memory copying or setting function. If the given function is a memory copy or setting func...
Definition: Decl.cpp:3226
SourceLocation getExprLoc() const LLVM_READONLY
Definition: Expr.cpp:193
QualType getPointeeType() const
Definition: Type.h:2139
QualType getType() const
Definition: Expr.h:125
A symbol representing the value stored at a MemRegion.
StringRef getMacroNameOrSpelling(SourceLocation &Loc)
Depending on wither the location corresponds to a macro, return either the macro name or the token sp...
const MemRegion * getAsRegion() const
Definition: SVals.cpp:135
unsigned getNumArgs() const
Definition: Expr.h:2205
QualType getCanonicalType() const
Definition: Type.h:5055
llvm::iterator_range< specific_attr_iterator< T > > specific_attrs() const
Definition: DeclBase.h:470
bool isConstQualified() const
Determine whether this type is const-qualified.
Definition: Type.h:5075
bool isNull() const
isNull - Return true if this QualType doesn't point to a type yet.
Definition: Type.h:633
Defines enum values for all the target-independent builtin functions.
Expr * IgnoreParens() LLVM_READONLY
Definition: Expr.cpp:2408
const LocationContext * getLocationContext() const
bool isPointerType() const
Definition: Type.h:5232