clang  3.7.0
CStringChecker.cpp
Go to the documentation of this file.
1 //= CStringChecker.cpp - Checks calls to C string functions --------*- C++ -*-//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This defines CStringChecker, which is an assortment of checks on calls
11 // to functions in <string.h>.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "ClangSACheckers.h"
16 #include "InterCheckerAPI.h"
17 #include "clang/Basic/CharInfo.h"
23 #include "llvm/ADT/STLExtras.h"
24 #include "llvm/ADT/SmallString.h"
25 #include "llvm/ADT/StringSwitch.h"
26 #include "llvm/Support/raw_ostream.h"
27 
28 using namespace clang;
29 using namespace ento;
30 
31 namespace {
32 class CStringChecker : public Checker< eval::Call,
33  check::PreStmt<DeclStmt>,
34  check::LiveSymbols,
35  check::DeadSymbols,
36  check::RegionChanges
37  > {
38  mutable std::unique_ptr<BugType> BT_Null, BT_Bounds, BT_Overlap,
39  BT_NotCString, BT_AdditionOverflow;
40 
41  mutable const char *CurrentFunctionDescription;
42 
43 public:
44  /// The filter is used to filter out the diagnostics which are not enabled by
45  /// the user.
46  struct CStringChecksFilter {
47  DefaultBool CheckCStringNullArg;
48  DefaultBool CheckCStringOutOfBounds;
49  DefaultBool CheckCStringBufferOverlap;
50  DefaultBool CheckCStringNotNullTerm;
51 
52  CheckName CheckNameCStringNullArg;
53  CheckName CheckNameCStringOutOfBounds;
54  CheckName CheckNameCStringBufferOverlap;
55  CheckName CheckNameCStringNotNullTerm;
56  };
57 
58  CStringChecksFilter Filter;
59 
60  static void *getTag() { static int tag; return &tag; }
61 
62  bool evalCall(const CallExpr *CE, CheckerContext &C) const;
63  void checkPreStmt(const DeclStmt *DS, CheckerContext &C) const;
64  void checkLiveSymbols(ProgramStateRef state, SymbolReaper &SR) const;
65  void checkDeadSymbols(SymbolReaper &SR, CheckerContext &C) const;
66  bool wantsRegionChangeUpdate(ProgramStateRef state) const;
67 
69  checkRegionChanges(ProgramStateRef state,
70  const InvalidatedSymbols *,
71  ArrayRef<const MemRegion *> ExplicitRegions,
73  const CallEvent *Call) const;
74 
75  typedef void (CStringChecker::*FnCheck)(CheckerContext &,
76  const CallExpr *) const;
77 
78  void evalMemcpy(CheckerContext &C, const CallExpr *CE) const;
79  void evalMempcpy(CheckerContext &C, const CallExpr *CE) const;
80  void evalMemmove(CheckerContext &C, const CallExpr *CE) const;
81  void evalBcopy(CheckerContext &C, const CallExpr *CE) const;
82  void evalCopyCommon(CheckerContext &C, const CallExpr *CE,
83  ProgramStateRef state,
84  const Expr *Size,
85  const Expr *Source,
86  const Expr *Dest,
87  bool Restricted = false,
88  bool IsMempcpy = false) const;
89 
90  void evalMemcmp(CheckerContext &C, const CallExpr *CE) const;
91 
92  void evalstrLength(CheckerContext &C, const CallExpr *CE) const;
93  void evalstrnLength(CheckerContext &C, const CallExpr *CE) const;
94  void evalstrLengthCommon(CheckerContext &C,
95  const CallExpr *CE,
96  bool IsStrnlen = false) const;
97 
98  void evalStrcpy(CheckerContext &C, const CallExpr *CE) const;
99  void evalStrncpy(CheckerContext &C, const CallExpr *CE) const;
100  void evalStpcpy(CheckerContext &C, const CallExpr *CE) const;
101  void evalStrcpyCommon(CheckerContext &C,
102  const CallExpr *CE,
103  bool returnEnd,
104  bool isBounded,
105  bool isAppending) const;
106 
107  void evalStrcat(CheckerContext &C, const CallExpr *CE) const;
108  void evalStrncat(CheckerContext &C, const CallExpr *CE) const;
109 
110  void evalStrcmp(CheckerContext &C, const CallExpr *CE) const;
111  void evalStrncmp(CheckerContext &C, const CallExpr *CE) const;
112  void evalStrcasecmp(CheckerContext &C, const CallExpr *CE) const;
113  void evalStrncasecmp(CheckerContext &C, const CallExpr *CE) const;
114  void evalStrcmpCommon(CheckerContext &C,
115  const CallExpr *CE,
116  bool isBounded = false,
117  bool ignoreCase = false) const;
118 
119  void evalStrsep(CheckerContext &C, const CallExpr *CE) const;
120 
121  // Utility methods
122  std::pair<ProgramStateRef , ProgramStateRef >
123  static assumeZero(CheckerContext &C,
124  ProgramStateRef state, SVal V, QualType Ty);
125 
126  static ProgramStateRef setCStringLength(ProgramStateRef state,
127  const MemRegion *MR,
128  SVal strLength);
129  static SVal getCStringLengthForRegion(CheckerContext &C,
130  ProgramStateRef &state,
131  const Expr *Ex,
132  const MemRegion *MR,
133  bool hypothetical);
134  SVal getCStringLength(CheckerContext &C,
135  ProgramStateRef &state,
136  const Expr *Ex,
137  SVal Buf,
138  bool hypothetical = false) const;
139 
140  const StringLiteral *getCStringLiteral(CheckerContext &C,
141  ProgramStateRef &state,
142  const Expr *expr,
143  SVal val) const;
144 
145  static ProgramStateRef InvalidateBuffer(CheckerContext &C,
146  ProgramStateRef state,
147  const Expr *Ex, SVal V,
148  bool IsSourceBuffer);
149 
150  static bool SummarizeRegion(raw_ostream &os, ASTContext &Ctx,
151  const MemRegion *MR);
152 
153  // Re-usable checks
154  ProgramStateRef checkNonNull(CheckerContext &C,
155  ProgramStateRef state,
156  const Expr *S,
157  SVal l) const;
158  ProgramStateRef CheckLocation(CheckerContext &C,
159  ProgramStateRef state,
160  const Expr *S,
161  SVal l,
162  const char *message = nullptr) const;
163  ProgramStateRef CheckBufferAccess(CheckerContext &C,
164  ProgramStateRef state,
165  const Expr *Size,
166  const Expr *FirstBuf,
167  const Expr *SecondBuf,
168  const char *firstMessage = nullptr,
169  const char *secondMessage = nullptr,
170  bool WarnAboutSize = false) const;
171 
172  ProgramStateRef CheckBufferAccess(CheckerContext &C,
173  ProgramStateRef state,
174  const Expr *Size,
175  const Expr *Buf,
176  const char *message = nullptr,
177  bool WarnAboutSize = false) const {
178  // This is a convenience override.
179  return CheckBufferAccess(C, state, Size, Buf, nullptr, message, nullptr,
180  WarnAboutSize);
181  }
182  ProgramStateRef CheckOverlap(CheckerContext &C,
183  ProgramStateRef state,
184  const Expr *Size,
185  const Expr *First,
186  const Expr *Second) const;
187  void emitOverlapBug(CheckerContext &C,
188  ProgramStateRef state,
189  const Stmt *First,
190  const Stmt *Second) const;
191 
192  ProgramStateRef checkAdditionOverflow(CheckerContext &C,
193  ProgramStateRef state,
194  NonLoc left,
195  NonLoc right) const;
196 };
197 
198 } //end anonymous namespace
199 
200 REGISTER_MAP_WITH_PROGRAMSTATE(CStringLength, const MemRegion *, SVal)
201 
202 //===----------------------------------------------------------------------===//
203 // Individual checks and utility methods.
204 //===----------------------------------------------------------------------===//
205 
206 std::pair<ProgramStateRef , ProgramStateRef >
207 CStringChecker::assumeZero(CheckerContext &C, ProgramStateRef state, SVal V,
208  QualType Ty) {
209  Optional<DefinedSVal> val = V.getAs<DefinedSVal>();
210  if (!val)
211  return std::pair<ProgramStateRef , ProgramStateRef >(state, state);
212 
213  SValBuilder &svalBuilder = C.getSValBuilder();
214  DefinedOrUnknownSVal zero = svalBuilder.makeZeroVal(Ty);
215  return state->assume(svalBuilder.evalEQ(state, *val, zero));
216 }
217 
218 ProgramStateRef CStringChecker::checkNonNull(CheckerContext &C,
219  ProgramStateRef state,
220  const Expr *S, SVal l) const {
221  // If a previous check has failed, propagate the failure.
222  if (!state)
223  return nullptr;
224 
225  ProgramStateRef stateNull, stateNonNull;
226  std::tie(stateNull, stateNonNull) = assumeZero(C, state, l, S->getType());
227 
228  if (stateNull && !stateNonNull) {
229  if (!Filter.CheckCStringNullArg)
230  return nullptr;
231 
232  ExplodedNode *N = C.generateSink(stateNull);
233  if (!N)
234  return nullptr;
235 
236  if (!BT_Null)
237  BT_Null.reset(new BuiltinBug(
238  Filter.CheckNameCStringNullArg, categories::UnixAPI,
239  "Null pointer argument in call to byte string function"));
240 
241  SmallString<80> buf;
242  llvm::raw_svector_ostream os(buf);
243  assert(CurrentFunctionDescription);
244  os << "Null pointer argument in call to " << CurrentFunctionDescription;
245 
246  // Generate a report for this bug.
247  BuiltinBug *BT = static_cast<BuiltinBug*>(BT_Null.get());
248  auto report = llvm::make_unique<BugReport>(*BT, os.str(), N);
249 
250  report->addRange(S->getSourceRange());
251  bugreporter::trackNullOrUndefValue(N, S, *report);
252  C.emitReport(std::move(report));
253  return nullptr;
254  }
255 
256  // From here on, assume that the value is non-null.
257  assert(stateNonNull);
258  return stateNonNull;
259 }
260 
261 // FIXME: This was originally copied from ArrayBoundChecker.cpp. Refactor?
262 ProgramStateRef CStringChecker::CheckLocation(CheckerContext &C,
263  ProgramStateRef state,
264  const Expr *S, SVal l,
265  const char *warningMsg) const {
266  // If a previous check has failed, propagate the failure.
267  if (!state)
268  return nullptr;
269 
270  // Check for out of bound array element access.
271  const MemRegion *R = l.getAsRegion();
272  if (!R)
273  return state;
274 
275  const ElementRegion *ER = dyn_cast<ElementRegion>(R);
276  if (!ER)
277  return state;
278 
279  assert(ER->getValueType() == C.getASTContext().CharTy &&
280  "CheckLocation should only be called with char* ElementRegions");
281 
282  // Get the size of the array.
283  const SubRegion *superReg = cast<SubRegion>(ER->getSuperRegion());
284  SValBuilder &svalBuilder = C.getSValBuilder();
285  SVal Extent =
286  svalBuilder.convertToArrayIndex(superReg->getExtent(svalBuilder));
288 
289  // Get the index of the accessed element.
291 
292  ProgramStateRef StInBound = state->assumeInBound(Idx, Size, true);
293  ProgramStateRef StOutBound = state->assumeInBound(Idx, Size, false);
294  if (StOutBound && !StInBound) {
295  ExplodedNode *N = C.generateSink(StOutBound);
296  if (!N)
297  return nullptr;
298 
299  if (!BT_Bounds) {
300  BT_Bounds.reset(new BuiltinBug(
301  Filter.CheckNameCStringOutOfBounds, "Out-of-bound array access",
302  "Byte string function accesses out-of-bound array element"));
303  }
304  BuiltinBug *BT = static_cast<BuiltinBug*>(BT_Bounds.get());
305 
306  // Generate a report for this bug.
307  std::unique_ptr<BugReport> report;
308  if (warningMsg) {
309  report = llvm::make_unique<BugReport>(*BT, warningMsg, N);
310  } else {
311  assert(CurrentFunctionDescription);
312  assert(CurrentFunctionDescription[0] != '\0');
313 
314  SmallString<80> buf;
315  llvm::raw_svector_ostream os(buf);
316  os << toUppercase(CurrentFunctionDescription[0])
317  << &CurrentFunctionDescription[1]
318  << " accesses out-of-bound array element";
319  report = llvm::make_unique<BugReport>(*BT, os.str(), N);
320  }
321 
322  // FIXME: It would be nice to eventually make this diagnostic more clear,
323  // e.g., by referencing the original declaration or by saying *why* this
324  // reference is outside the range.
325 
326  report->addRange(S->getSourceRange());
327  C.emitReport(std::move(report));
328  return nullptr;
329  }
330 
331  // Array bound check succeeded. From this point forward the array bound
332  // should always succeed.
333  return StInBound;
334 }
335 
336 ProgramStateRef CStringChecker::CheckBufferAccess(CheckerContext &C,
337  ProgramStateRef state,
338  const Expr *Size,
339  const Expr *FirstBuf,
340  const Expr *SecondBuf,
341  const char *firstMessage,
342  const char *secondMessage,
343  bool WarnAboutSize) const {
344  // If a previous check has failed, propagate the failure.
345  if (!state)
346  return nullptr;
347 
348  SValBuilder &svalBuilder = C.getSValBuilder();
349  ASTContext &Ctx = svalBuilder.getContext();
350  const LocationContext *LCtx = C.getLocationContext();
351 
352  QualType sizeTy = Size->getType();
353  QualType PtrTy = Ctx.getPointerType(Ctx.CharTy);
354 
355  // Check that the first buffer is non-null.
356  SVal BufVal = state->getSVal(FirstBuf, LCtx);
357  state = checkNonNull(C, state, FirstBuf, BufVal);
358  if (!state)
359  return nullptr;
360 
361  // If out-of-bounds checking is turned off, skip the rest.
362  if (!Filter.CheckCStringOutOfBounds)
363  return state;
364 
365  // Get the access length and make sure it is known.
366  // FIXME: This assumes the caller has already checked that the access length
367  // is positive. And that it's unsigned.
368  SVal LengthVal = state->getSVal(Size, LCtx);
369  Optional<NonLoc> Length = LengthVal.getAs<NonLoc>();
370  if (!Length)
371  return state;
372 
373  // Compute the offset of the last element to be accessed: size-1.
374  NonLoc One = svalBuilder.makeIntVal(1, sizeTy).castAs<NonLoc>();
375  NonLoc LastOffset = svalBuilder
376  .evalBinOpNN(state, BO_Sub, *Length, One, sizeTy).castAs<NonLoc>();
377 
378  // Check that the first buffer is sufficiently long.
379  SVal BufStart = svalBuilder.evalCast(BufVal, PtrTy, FirstBuf->getType());
380  if (Optional<Loc> BufLoc = BufStart.getAs<Loc>()) {
381  const Expr *warningExpr = (WarnAboutSize ? Size : FirstBuf);
382 
383  SVal BufEnd = svalBuilder.evalBinOpLN(state, BO_Add, *BufLoc,
384  LastOffset, PtrTy);
385  state = CheckLocation(C, state, warningExpr, BufEnd, firstMessage);
386 
387  // If the buffer isn't large enough, abort.
388  if (!state)
389  return nullptr;
390  }
391 
392  // If there's a second buffer, check it as well.
393  if (SecondBuf) {
394  BufVal = state->getSVal(SecondBuf, LCtx);
395  state = checkNonNull(C, state, SecondBuf, BufVal);
396  if (!state)
397  return nullptr;
398 
399  BufStart = svalBuilder.evalCast(BufVal, PtrTy, SecondBuf->getType());
400  if (Optional<Loc> BufLoc = BufStart.getAs<Loc>()) {
401  const Expr *warningExpr = (WarnAboutSize ? Size : SecondBuf);
402 
403  SVal BufEnd = svalBuilder.evalBinOpLN(state, BO_Add, *BufLoc,
404  LastOffset, PtrTy);
405  state = CheckLocation(C, state, warningExpr, BufEnd, secondMessage);
406  }
407  }
408 
409  // Large enough or not, return this state!
410  return state;
411 }
412 
413 ProgramStateRef CStringChecker::CheckOverlap(CheckerContext &C,
414  ProgramStateRef state,
415  const Expr *Size,
416  const Expr *First,
417  const Expr *Second) const {
418  if (!Filter.CheckCStringBufferOverlap)
419  return state;
420 
421  // Do a simple check for overlap: if the two arguments are from the same
422  // buffer, see if the end of the first is greater than the start of the second
423  // or vice versa.
424 
425  // If a previous check has failed, propagate the failure.
426  if (!state)
427  return nullptr;
428 
429  ProgramStateRef stateTrue, stateFalse;
430 
431  // Get the buffer values and make sure they're known locations.
432  const LocationContext *LCtx = C.getLocationContext();
433  SVal firstVal = state->getSVal(First, LCtx);
434  SVal secondVal = state->getSVal(Second, LCtx);
435 
436  Optional<Loc> firstLoc = firstVal.getAs<Loc>();
437  if (!firstLoc)
438  return state;
439 
440  Optional<Loc> secondLoc = secondVal.getAs<Loc>();
441  if (!secondLoc)
442  return state;
443 
444  // Are the two values the same?
445  SValBuilder &svalBuilder = C.getSValBuilder();
446  std::tie(stateTrue, stateFalse) =
447  state->assume(svalBuilder.evalEQ(state, *firstLoc, *secondLoc));
448 
449  if (stateTrue && !stateFalse) {
450  // If the values are known to be equal, that's automatically an overlap.
451  emitOverlapBug(C, stateTrue, First, Second);
452  return nullptr;
453  }
454 
455  // assume the two expressions are not equal.
456  assert(stateFalse);
457  state = stateFalse;
458 
459  // Which value comes first?
460  QualType cmpTy = svalBuilder.getConditionType();
461  SVal reverse = svalBuilder.evalBinOpLL(state, BO_GT,
462  *firstLoc, *secondLoc, cmpTy);
463  Optional<DefinedOrUnknownSVal> reverseTest =
464  reverse.getAs<DefinedOrUnknownSVal>();
465  if (!reverseTest)
466  return state;
467 
468  std::tie(stateTrue, stateFalse) = state->assume(*reverseTest);
469  if (stateTrue) {
470  if (stateFalse) {
471  // If we don't know which one comes first, we can't perform this test.
472  return state;
473  } else {
474  // Switch the values so that firstVal is before secondVal.
475  std::swap(firstLoc, secondLoc);
476 
477  // Switch the Exprs as well, so that they still correspond.
478  std::swap(First, Second);
479  }
480  }
481 
482  // Get the length, and make sure it too is known.
483  SVal LengthVal = state->getSVal(Size, LCtx);
484  Optional<NonLoc> Length = LengthVal.getAs<NonLoc>();
485  if (!Length)
486  return state;
487 
488  // Convert the first buffer's start address to char*.
489  // Bail out if the cast fails.
490  ASTContext &Ctx = svalBuilder.getContext();
491  QualType CharPtrTy = Ctx.getPointerType(Ctx.CharTy);
492  SVal FirstStart = svalBuilder.evalCast(*firstLoc, CharPtrTy,
493  First->getType());
494  Optional<Loc> FirstStartLoc = FirstStart.getAs<Loc>();
495  if (!FirstStartLoc)
496  return state;
497 
498  // Compute the end of the first buffer. Bail out if THAT fails.
499  SVal FirstEnd = svalBuilder.evalBinOpLN(state, BO_Add,
500  *FirstStartLoc, *Length, CharPtrTy);
501  Optional<Loc> FirstEndLoc = FirstEnd.getAs<Loc>();
502  if (!FirstEndLoc)
503  return state;
504 
505  // Is the end of the first buffer past the start of the second buffer?
506  SVal Overlap = svalBuilder.evalBinOpLL(state, BO_GT,
507  *FirstEndLoc, *secondLoc, cmpTy);
508  Optional<DefinedOrUnknownSVal> OverlapTest =
509  Overlap.getAs<DefinedOrUnknownSVal>();
510  if (!OverlapTest)
511  return state;
512 
513  std::tie(stateTrue, stateFalse) = state->assume(*OverlapTest);
514 
515  if (stateTrue && !stateFalse) {
516  // Overlap!
517  emitOverlapBug(C, stateTrue, First, Second);
518  return nullptr;
519  }
520 
521  // assume the two expressions don't overlap.
522  assert(stateFalse);
523  return stateFalse;
524 }
525 
526 void CStringChecker::emitOverlapBug(CheckerContext &C, ProgramStateRef state,
527  const Stmt *First, const Stmt *Second) const {
528  ExplodedNode *N = C.generateSink(state);
529  if (!N)
530  return;
531 
532  if (!BT_Overlap)
533  BT_Overlap.reset(new BugType(Filter.CheckNameCStringBufferOverlap,
534  categories::UnixAPI, "Improper arguments"));
535 
536  // Generate a report for this bug.
537  auto report = llvm::make_unique<BugReport>(
538  *BT_Overlap, "Arguments must not be overlapping buffers", N);
539  report->addRange(First->getSourceRange());
540  report->addRange(Second->getSourceRange());
541 
542  C.emitReport(std::move(report));
543 }
544 
545 ProgramStateRef CStringChecker::checkAdditionOverflow(CheckerContext &C,
546  ProgramStateRef state,
547  NonLoc left,
548  NonLoc right) const {
549  // If out-of-bounds checking is turned off, skip the rest.
550  if (!Filter.CheckCStringOutOfBounds)
551  return state;
552 
553  // If a previous check has failed, propagate the failure.
554  if (!state)
555  return nullptr;
556 
557  SValBuilder &svalBuilder = C.getSValBuilder();
558  BasicValueFactory &BVF = svalBuilder.getBasicValueFactory();
559 
560  QualType sizeTy = svalBuilder.getContext().getSizeType();
561  const llvm::APSInt &maxValInt = BVF.getMaxValue(sizeTy);
562  NonLoc maxVal = svalBuilder.makeIntVal(maxValInt);
563 
564  SVal maxMinusRight;
565  if (right.getAs<nonloc::ConcreteInt>()) {
566  maxMinusRight = svalBuilder.evalBinOpNN(state, BO_Sub, maxVal, right,
567  sizeTy);
568  } else {
569  // Try switching the operands. (The order of these two assignments is
570  // important!)
571  maxMinusRight = svalBuilder.evalBinOpNN(state, BO_Sub, maxVal, left,
572  sizeTy);
573  left = right;
574  }
575 
576  if (Optional<NonLoc> maxMinusRightNL = maxMinusRight.getAs<NonLoc>()) {
577  QualType cmpTy = svalBuilder.getConditionType();
578  // If left > max - right, we have an overflow.
579  SVal willOverflow = svalBuilder.evalBinOpNN(state, BO_GT, left,
580  *maxMinusRightNL, cmpTy);
581 
582  ProgramStateRef stateOverflow, stateOkay;
583  std::tie(stateOverflow, stateOkay) =
584  state->assume(willOverflow.castAs<DefinedOrUnknownSVal>());
585 
586  if (stateOverflow && !stateOkay) {
587  // We have an overflow. Emit a bug report.
588  ExplodedNode *N = C.generateSink(stateOverflow);
589  if (!N)
590  return nullptr;
591 
592  if (!BT_AdditionOverflow)
593  BT_AdditionOverflow.reset(
594  new BuiltinBug(Filter.CheckNameCStringOutOfBounds, "API",
595  "Sum of expressions causes overflow"));
596 
597  // This isn't a great error message, but this should never occur in real
598  // code anyway -- you'd have to create a buffer longer than a size_t can
599  // represent, which is sort of a contradiction.
600  const char *warning =
601  "This expression will create a string whose length is too big to "
602  "be represented as a size_t";
603 
604  // Generate a report for this bug.
605  C.emitReport(
606  llvm::make_unique<BugReport>(*BT_AdditionOverflow, warning, N));
607 
608  return nullptr;
609  }
610 
611  // From now on, assume an overflow didn't occur.
612  assert(stateOkay);
613  state = stateOkay;
614  }
615 
616  return state;
617 }
618 
619 ProgramStateRef CStringChecker::setCStringLength(ProgramStateRef state,
620  const MemRegion *MR,
621  SVal strLength) {
622  assert(!strLength.isUndef() && "Attempt to set an undefined string length");
623 
624  MR = MR->StripCasts();
625 
626  switch (MR->getKind()) {
628  // FIXME: This can happen if we strcpy() into a string region. This is
629  // undefined [C99 6.4.5p6], but we should still warn about it.
630  return state;
631 
637  // These are the types we can currently track string lengths for.
638  break;
639 
641  // FIXME: Handle element regions by upper-bounding the parent region's
642  // string length.
643  return state;
644 
645  default:
646  // Other regions (mostly non-data) can't have a reliable C string length.
647  // For now, just ignore the change.
648  // FIXME: These are rare but not impossible. We should output some kind of
649  // warning for things like strcpy((char[]){'a', 0}, "b");
650  return state;
651  }
652 
653  if (strLength.isUnknown())
654  return state->remove<CStringLength>(MR);
655 
656  return state->set<CStringLength>(MR, strLength);
657 }
658 
659 SVal CStringChecker::getCStringLengthForRegion(CheckerContext &C,
660  ProgramStateRef &state,
661  const Expr *Ex,
662  const MemRegion *MR,
663  bool hypothetical) {
664  if (!hypothetical) {
665  // If there's a recorded length, go ahead and return it.
666  const SVal *Recorded = state->get<CStringLength>(MR);
667  if (Recorded)
668  return *Recorded;
669  }
670 
671  // Otherwise, get a new symbol and update the state.
672  SValBuilder &svalBuilder = C.getSValBuilder();
673  QualType sizeTy = svalBuilder.getContext().getSizeType();
674  SVal strLength = svalBuilder.getMetadataSymbolVal(CStringChecker::getTag(),
675  MR, Ex, sizeTy,
676  C.blockCount());
677 
678  if (!hypothetical) {
679  if (Optional<NonLoc> strLn = strLength.getAs<NonLoc>()) {
680  // In case of unbounded calls strlen etc bound the range to SIZE_MAX/4
681  BasicValueFactory &BVF = svalBuilder.getBasicValueFactory();
682  const llvm::APSInt &maxValInt = BVF.getMaxValue(sizeTy);
683  llvm::APSInt fourInt = APSIntType(maxValInt).getValue(4);
684  const llvm::APSInt *maxLengthInt = BVF.evalAPSInt(BO_Div, maxValInt,
685  fourInt);
686  NonLoc maxLength = svalBuilder.makeIntVal(*maxLengthInt);
687  SVal evalLength = svalBuilder.evalBinOpNN(state, BO_LE, *strLn,
688  maxLength, sizeTy);
689  state = state->assume(evalLength.castAs<DefinedOrUnknownSVal>(), true);
690  }
691  state = state->set<CStringLength>(MR, strLength);
692  }
693 
694  return strLength;
695 }
696 
697 SVal CStringChecker::getCStringLength(CheckerContext &C, ProgramStateRef &state,
698  const Expr *Ex, SVal Buf,
699  bool hypothetical) const {
700  const MemRegion *MR = Buf.getAsRegion();
701  if (!MR) {
702  // If we can't get a region, see if it's something we /know/ isn't a
703  // C string. In the context of locations, the only time we can issue such
704  // a warning is for labels.
705  if (Optional<loc::GotoLabel> Label = Buf.getAs<loc::GotoLabel>()) {
706  if (!Filter.CheckCStringNotNullTerm)
707  return UndefinedVal();
708 
709  if (ExplodedNode *N = C.addTransition(state)) {
710  if (!BT_NotCString)
711  BT_NotCString.reset(new BuiltinBug(
712  Filter.CheckNameCStringNotNullTerm, categories::UnixAPI,
713  "Argument is not a null-terminated string."));
714 
715  SmallString<120> buf;
716  llvm::raw_svector_ostream os(buf);
717  assert(CurrentFunctionDescription);
718  os << "Argument to " << CurrentFunctionDescription
719  << " is the address of the label '" << Label->getLabel()->getName()
720  << "', which is not a null-terminated string";
721 
722  // Generate a report for this bug.
723  auto report = llvm::make_unique<BugReport>(*BT_NotCString, os.str(), N);
724 
725  report->addRange(Ex->getSourceRange());
726  C.emitReport(std::move(report));
727  }
728  return UndefinedVal();
729 
730  }
731 
732  // If it's not a region and not a label, give up.
733  return UnknownVal();
734  }
735 
736  // If we have a region, strip casts from it and see if we can figure out
737  // its length. For anything we can't figure out, just return UnknownVal.
738  MR = MR->StripCasts();
739 
740  switch (MR->getKind()) {
742  // Modifying the contents of string regions is undefined [C99 6.4.5p6],
743  // so we can assume that the byte length is the correct C string length.
744  SValBuilder &svalBuilder = C.getSValBuilder();
745  QualType sizeTy = svalBuilder.getContext().getSizeType();
746  const StringLiteral *strLit = cast<StringRegion>(MR)->getStringLiteral();
747  return svalBuilder.makeIntVal(strLit->getByteLength(), sizeTy);
748  }
754  return getCStringLengthForRegion(C, state, Ex, MR, hypothetical);
756  // FIXME: Can we track this? Is it necessary?
757  return UnknownVal();
759  // FIXME: How can we handle this? It's not good enough to subtract the
760  // offset from the base string length; consider "123\x00567" and &a[5].
761  return UnknownVal();
762  default:
763  // Other regions (mostly non-data) can't have a reliable C string length.
764  // In this case, an error is emitted and UndefinedVal is returned.
765  // The caller should always be prepared to handle this case.
766  if (!Filter.CheckCStringNotNullTerm)
767  return UndefinedVal();
768 
769  if (ExplodedNode *N = C.addTransition(state)) {
770  if (!BT_NotCString)
771  BT_NotCString.reset(new BuiltinBug(
772  Filter.CheckNameCStringNotNullTerm, categories::UnixAPI,
773  "Argument is not a null-terminated string."));
774 
775  SmallString<120> buf;
776  llvm::raw_svector_ostream os(buf);
777 
778  assert(CurrentFunctionDescription);
779  os << "Argument to " << CurrentFunctionDescription << " is ";
780 
781  if (SummarizeRegion(os, C.getASTContext(), MR))
782  os << ", which is not a null-terminated string";
783  else
784  os << "not a null-terminated string";
785 
786  // Generate a report for this bug.
787  auto report = llvm::make_unique<BugReport>(*BT_NotCString, os.str(), N);
788 
789  report->addRange(Ex->getSourceRange());
790  C.emitReport(std::move(report));
791  }
792 
793  return UndefinedVal();
794  }
795 }
796 
797 const StringLiteral *CStringChecker::getCStringLiteral(CheckerContext &C,
798  ProgramStateRef &state, const Expr *expr, SVal val) const {
799 
800  // Get the memory region pointed to by the val.
801  const MemRegion *bufRegion = val.getAsRegion();
802  if (!bufRegion)
803  return nullptr;
804 
805  // Strip casts off the memory region.
806  bufRegion = bufRegion->StripCasts();
807 
808  // Cast the memory region to a string region.
809  const StringRegion *strRegion= dyn_cast<StringRegion>(bufRegion);
810  if (!strRegion)
811  return nullptr;
812 
813  // Return the actual string in the string region.
814  return strRegion->getStringLiteral();
815 }
816 
817 ProgramStateRef CStringChecker::InvalidateBuffer(CheckerContext &C,
818  ProgramStateRef state,
819  const Expr *E, SVal V,
820  bool IsSourceBuffer) {
821  Optional<Loc> L = V.getAs<Loc>();
822  if (!L)
823  return state;
824 
825  // FIXME: This is a simplified version of what's in CFRefCount.cpp -- it makes
826  // some assumptions about the value that CFRefCount can't. Even so, it should
827  // probably be refactored.
828  if (Optional<loc::MemRegionVal> MR = L->getAs<loc::MemRegionVal>()) {
829  const MemRegion *R = MR->getRegion()->StripCasts();
830 
831  // Are we dealing with an ElementRegion? If so, we should be invalidating
832  // the super-region.
833  if (const ElementRegion *ER = dyn_cast<ElementRegion>(R)) {
834  R = ER->getSuperRegion();
835  // FIXME: What about layers of ElementRegions?
836  }
837 
838  // Invalidate this region.
840 
841  bool CausesPointerEscape = false;
843  // Invalidate and escape only indirect regions accessible through the source
844  // buffer.
845  if (IsSourceBuffer) {
846  ITraits.setTrait(R,
849  CausesPointerEscape = true;
850  }
851 
852  return state->invalidateRegions(R, E, C.blockCount(), LCtx,
853  CausesPointerEscape, nullptr, nullptr,
854  &ITraits);
855  }
856 
857  // If we have a non-region value by chance, just remove the binding.
858  // FIXME: is this necessary or correct? This handles the non-Region
859  // cases. Is it ever valid to store to these?
860  return state->killBinding(*L);
861 }
862 
863 bool CStringChecker::SummarizeRegion(raw_ostream &os, ASTContext &Ctx,
864  const MemRegion *MR) {
865  const TypedValueRegion *TVR = dyn_cast<TypedValueRegion>(MR);
866 
867  switch (MR->getKind()) {
869  const NamedDecl *FD = cast<FunctionTextRegion>(MR)->getDecl();
870  if (FD)
871  os << "the address of the function '" << *FD << '\'';
872  else
873  os << "the address of a function";
874  return true;
875  }
877  os << "block text";
878  return true;
880  os << "a block";
881  return true;
884  os << "a C++ temp object of type " << TVR->getValueType().getAsString();
885  return true;
887  os << "a variable of type" << TVR->getValueType().getAsString();
888  return true;
890  os << "a field of type " << TVR->getValueType().getAsString();
891  return true;
893  os << "an instance variable of type " << TVR->getValueType().getAsString();
894  return true;
895  default:
896  return false;
897  }
898 }
899 
900 //===----------------------------------------------------------------------===//
901 // evaluation of individual function calls.
902 //===----------------------------------------------------------------------===//
903 
904 void CStringChecker::evalCopyCommon(CheckerContext &C,
905  const CallExpr *CE,
906  ProgramStateRef state,
907  const Expr *Size, const Expr *Dest,
908  const Expr *Source, bool Restricted,
909  bool IsMempcpy) const {
910  CurrentFunctionDescription = "memory copy function";
911 
912  // See if the size argument is zero.
913  const LocationContext *LCtx = C.getLocationContext();
914  SVal sizeVal = state->getSVal(Size, LCtx);
915  QualType sizeTy = Size->getType();
916 
917  ProgramStateRef stateZeroSize, stateNonZeroSize;
918  std::tie(stateZeroSize, stateNonZeroSize) =
919  assumeZero(C, state, sizeVal, sizeTy);
920 
921  // Get the value of the Dest.
922  SVal destVal = state->getSVal(Dest, LCtx);
923 
924  // If the size is zero, there won't be any actual memory access, so
925  // just bind the return value to the destination buffer and return.
926  if (stateZeroSize && !stateNonZeroSize) {
927  stateZeroSize = stateZeroSize->BindExpr(CE, LCtx, destVal);
928  C.addTransition(stateZeroSize);
929  return;
930  }
931 
932  // If the size can be nonzero, we have to check the other arguments.
933  if (stateNonZeroSize) {
934  state = stateNonZeroSize;
935 
936  // Ensure the destination is not null. If it is NULL there will be a
937  // NULL pointer dereference.
938  state = checkNonNull(C, state, Dest, destVal);
939  if (!state)
940  return;
941 
942  // Get the value of the Src.
943  SVal srcVal = state->getSVal(Source, LCtx);
944 
945  // Ensure the source is not null. If it is NULL there will be a
946  // NULL pointer dereference.
947  state = checkNonNull(C, state, Source, srcVal);
948  if (!state)
949  return;
950 
951  // Ensure the accesses are valid and that the buffers do not overlap.
952  const char * const writeWarning =
953  "Memory copy function overflows destination buffer";
954  state = CheckBufferAccess(C, state, Size, Dest, Source,
955  writeWarning, /* sourceWarning = */ nullptr);
956  if (Restricted)
957  state = CheckOverlap(C, state, Size, Dest, Source);
958 
959  if (!state)
960  return;
961 
962  // If this is mempcpy, get the byte after the last byte copied and
963  // bind the expr.
964  if (IsMempcpy) {
965  loc::MemRegionVal destRegVal = destVal.castAs<loc::MemRegionVal>();
966 
967  // Get the length to copy.
968  if (Optional<NonLoc> lenValNonLoc = sizeVal.getAs<NonLoc>()) {
969  // Get the byte after the last byte copied.
970  SValBuilder &SvalBuilder = C.getSValBuilder();
971  ASTContext &Ctx = SvalBuilder.getContext();
972  QualType CharPtrTy = Ctx.getPointerType(Ctx.CharTy);
973  loc::MemRegionVal DestRegCharVal = SvalBuilder.evalCast(destRegVal,
974  CharPtrTy, Dest->getType()).castAs<loc::MemRegionVal>();
975  SVal lastElement = C.getSValBuilder().evalBinOpLN(state, BO_Add,
976  DestRegCharVal,
977  *lenValNonLoc,
978  Dest->getType());
979 
980  // The byte after the last byte copied is the return value.
981  state = state->BindExpr(CE, LCtx, lastElement);
982  } else {
983  // If we don't know how much we copied, we can at least
984  // conjure a return value for later.
985  SVal result = C.getSValBuilder().conjureSymbolVal(nullptr, CE, LCtx,
986  C.blockCount());
987  state = state->BindExpr(CE, LCtx, result);
988  }
989 
990  } else {
991  // All other copies return the destination buffer.
992  // (Well, bcopy() has a void return type, but this won't hurt.)
993  state = state->BindExpr(CE, LCtx, destVal);
994  }
995 
996  // Invalidate the destination (regular invalidation without pointer-escaping
997  // the address of the top-level region).
998  // FIXME: Even if we can't perfectly model the copy, we should see if we
999  // can use LazyCompoundVals to copy the source values into the destination.
1000  // This would probably remove any existing bindings past the end of the
1001  // copied region, but that's still an improvement over blank invalidation.
1002  state = InvalidateBuffer(C, state, Dest, C.getSVal(Dest),
1003  /*IsSourceBuffer*/false);
1004 
1005  // Invalidate the source (const-invalidation without const-pointer-escaping
1006  // the address of the top-level region).
1007  state = InvalidateBuffer(C, state, Source, C.getSVal(Source),
1008  /*IsSourceBuffer*/true);
1009 
1010  C.addTransition(state);
1011  }
1012 }
1013 
1014 
1015 void CStringChecker::evalMemcpy(CheckerContext &C, const CallExpr *CE) const {
1016  if (CE->getNumArgs() < 3)
1017  return;
1018 
1019  // void *memcpy(void *restrict dst, const void *restrict src, size_t n);
1020  // The return value is the address of the destination buffer.
1021  const Expr *Dest = CE->getArg(0);
1022  ProgramStateRef state = C.getState();
1023 
1024  evalCopyCommon(C, CE, state, CE->getArg(2), Dest, CE->getArg(1), true);
1025 }
1026 
1027 void CStringChecker::evalMempcpy(CheckerContext &C, const CallExpr *CE) const {
1028  if (CE->getNumArgs() < 3)
1029  return;
1030 
1031  // void *mempcpy(void *restrict dst, const void *restrict src, size_t n);
1032  // The return value is a pointer to the byte following the last written byte.
1033  const Expr *Dest = CE->getArg(0);
1034  ProgramStateRef state = C.getState();
1035 
1036  evalCopyCommon(C, CE, state, CE->getArg(2), Dest, CE->getArg(1), true, true);
1037 }
1038 
1039 void CStringChecker::evalMemmove(CheckerContext &C, const CallExpr *CE) const {
1040  if (CE->getNumArgs() < 3)
1041  return;
1042 
1043  // void *memmove(void *dst, const void *src, size_t n);
1044  // The return value is the address of the destination buffer.
1045  const Expr *Dest = CE->getArg(0);
1046  ProgramStateRef state = C.getState();
1047 
1048  evalCopyCommon(C, CE, state, CE->getArg(2), Dest, CE->getArg(1));
1049 }
1050 
1051 void CStringChecker::evalBcopy(CheckerContext &C, const CallExpr *CE) const {
1052  if (CE->getNumArgs() < 3)
1053  return;
1054 
1055  // void bcopy(const void *src, void *dst, size_t n);
1056  evalCopyCommon(C, CE, C.getState(),
1057  CE->getArg(2), CE->getArg(1), CE->getArg(0));
1058 }
1059 
1060 void CStringChecker::evalMemcmp(CheckerContext &C, const CallExpr *CE) const {
1061  if (CE->getNumArgs() < 3)
1062  return;
1063 
1064  // int memcmp(const void *s1, const void *s2, size_t n);
1065  CurrentFunctionDescription = "memory comparison function";
1066 
1067  const Expr *Left = CE->getArg(0);
1068  const Expr *Right = CE->getArg(1);
1069  const Expr *Size = CE->getArg(2);
1070 
1071  ProgramStateRef state = C.getState();
1072  SValBuilder &svalBuilder = C.getSValBuilder();
1073 
1074  // See if the size argument is zero.
1075  const LocationContext *LCtx = C.getLocationContext();
1076  SVal sizeVal = state->getSVal(Size, LCtx);
1077  QualType sizeTy = Size->getType();
1078 
1079  ProgramStateRef stateZeroSize, stateNonZeroSize;
1080  std::tie(stateZeroSize, stateNonZeroSize) =
1081  assumeZero(C, state, sizeVal, sizeTy);
1082 
1083  // If the size can be zero, the result will be 0 in that case, and we don't
1084  // have to check either of the buffers.
1085  if (stateZeroSize) {
1086  state = stateZeroSize;
1087  state = state->BindExpr(CE, LCtx,
1088  svalBuilder.makeZeroVal(CE->getType()));
1089  C.addTransition(state);
1090  }
1091 
1092  // If the size can be nonzero, we have to check the other arguments.
1093  if (stateNonZeroSize) {
1094  state = stateNonZeroSize;
1095  // If we know the two buffers are the same, we know the result is 0.
1096  // First, get the two buffers' addresses. Another checker will have already
1097  // made sure they're not undefined.
1099  state->getSVal(Left, LCtx).castAs<DefinedOrUnknownSVal>();
1101  state->getSVal(Right, LCtx).castAs<DefinedOrUnknownSVal>();
1102 
1103  // See if they are the same.
1104  DefinedOrUnknownSVal SameBuf = svalBuilder.evalEQ(state, LV, RV);
1105  ProgramStateRef StSameBuf, StNotSameBuf;
1106  std::tie(StSameBuf, StNotSameBuf) = state->assume(SameBuf);
1107 
1108  // If the two arguments might be the same buffer, we know the result is 0,
1109  // and we only need to check one size.
1110  if (StSameBuf) {
1111  state = StSameBuf;
1112  state = CheckBufferAccess(C, state, Size, Left);
1113  if (state) {
1114  state = StSameBuf->BindExpr(CE, LCtx,
1115  svalBuilder.makeZeroVal(CE->getType()));
1116  C.addTransition(state);
1117  }
1118  }
1119 
1120  // If the two arguments might be different buffers, we have to check the
1121  // size of both of them.
1122  if (StNotSameBuf) {
1123  state = StNotSameBuf;
1124  state = CheckBufferAccess(C, state, Size, Left, Right);
1125  if (state) {
1126  // The return value is the comparison result, which we don't know.
1127  SVal CmpV = svalBuilder.conjureSymbolVal(nullptr, CE, LCtx,
1128  C.blockCount());
1129  state = state->BindExpr(CE, LCtx, CmpV);
1130  C.addTransition(state);
1131  }
1132  }
1133  }
1134 }
1135 
1136 void CStringChecker::evalstrLength(CheckerContext &C,
1137  const CallExpr *CE) const {
1138  if (CE->getNumArgs() < 1)
1139  return;
1140 
1141  // size_t strlen(const char *s);
1142  evalstrLengthCommon(C, CE, /* IsStrnlen = */ false);
1143 }
1144 
1145 void CStringChecker::evalstrnLength(CheckerContext &C,
1146  const CallExpr *CE) const {
1147  if (CE->getNumArgs() < 2)
1148  return;
1149 
1150  // size_t strnlen(const char *s, size_t maxlen);
1151  evalstrLengthCommon(C, CE, /* IsStrnlen = */ true);
1152 }
1153 
1154 void CStringChecker::evalstrLengthCommon(CheckerContext &C, const CallExpr *CE,
1155  bool IsStrnlen) const {
1156  CurrentFunctionDescription = "string length function";
1157  ProgramStateRef state = C.getState();
1158  const LocationContext *LCtx = C.getLocationContext();
1159 
1160  if (IsStrnlen) {
1161  const Expr *maxlenExpr = CE->getArg(1);
1162  SVal maxlenVal = state->getSVal(maxlenExpr, LCtx);
1163 
1164  ProgramStateRef stateZeroSize, stateNonZeroSize;
1165  std::tie(stateZeroSize, stateNonZeroSize) =
1166  assumeZero(C, state, maxlenVal, maxlenExpr->getType());
1167 
1168  // If the size can be zero, the result will be 0 in that case, and we don't
1169  // have to check the string itself.
1170  if (stateZeroSize) {
1171  SVal zero = C.getSValBuilder().makeZeroVal(CE->getType());
1172  stateZeroSize = stateZeroSize->BindExpr(CE, LCtx, zero);
1173  C.addTransition(stateZeroSize);
1174  }
1175 
1176  // If the size is GUARANTEED to be zero, we're done!
1177  if (!stateNonZeroSize)
1178  return;
1179 
1180  // Otherwise, record the assumption that the size is nonzero.
1181  state = stateNonZeroSize;
1182  }
1183 
1184  // Check that the string argument is non-null.
1185  const Expr *Arg = CE->getArg(0);
1186  SVal ArgVal = state->getSVal(Arg, LCtx);
1187 
1188  state = checkNonNull(C, state, Arg, ArgVal);
1189 
1190  if (!state)
1191  return;
1192 
1193  SVal strLength = getCStringLength(C, state, Arg, ArgVal);
1194 
1195  // If the argument isn't a valid C string, there's no valid state to
1196  // transition to.
1197  if (strLength.isUndef())
1198  return;
1199 
1200  DefinedOrUnknownSVal result = UnknownVal();
1201 
1202  // If the check is for strnlen() then bind the return value to no more than
1203  // the maxlen value.
1204  if (IsStrnlen) {
1206 
1207  // It's a little unfortunate to be getting this again,
1208  // but it's not that expensive...
1209  const Expr *maxlenExpr = CE->getArg(1);
1210  SVal maxlenVal = state->getSVal(maxlenExpr, LCtx);
1211 
1212  Optional<NonLoc> strLengthNL = strLength.getAs<NonLoc>();
1213  Optional<NonLoc> maxlenValNL = maxlenVal.getAs<NonLoc>();
1214 
1215  if (strLengthNL && maxlenValNL) {
1216  ProgramStateRef stateStringTooLong, stateStringNotTooLong;
1217 
1218  // Check if the strLength is greater than the maxlen.
1219  std::tie(stateStringTooLong, stateStringNotTooLong) = state->assume(
1220  C.getSValBuilder()
1221  .evalBinOpNN(state, BO_GT, *strLengthNL, *maxlenValNL, cmpTy)
1223 
1224  if (stateStringTooLong && !stateStringNotTooLong) {
1225  // If the string is longer than maxlen, return maxlen.
1226  result = *maxlenValNL;
1227  } else if (stateStringNotTooLong && !stateStringTooLong) {
1228  // If the string is shorter than maxlen, return its length.
1229  result = *strLengthNL;
1230  }
1231  }
1232 
1233  if (result.isUnknown()) {
1234  // If we don't have enough information for a comparison, there's
1235  // no guarantee the full string length will actually be returned.
1236  // All we know is the return value is the min of the string length
1237  // and the limit. This is better than nothing.
1238  result = C.getSValBuilder().conjureSymbolVal(nullptr, CE, LCtx,
1239  C.blockCount());
1240  NonLoc resultNL = result.castAs<NonLoc>();
1241 
1242  if (strLengthNL) {
1243  state = state->assume(C.getSValBuilder().evalBinOpNN(
1244  state, BO_LE, resultNL, *strLengthNL, cmpTy)
1245  .castAs<DefinedOrUnknownSVal>(), true);
1246  }
1247 
1248  if (maxlenValNL) {
1249  state = state->assume(C.getSValBuilder().evalBinOpNN(
1250  state, BO_LE, resultNL, *maxlenValNL, cmpTy)
1251  .castAs<DefinedOrUnknownSVal>(), true);
1252  }
1253  }
1254 
1255  } else {
1256  // This is a plain strlen(), not strnlen().
1257  result = strLength.castAs<DefinedOrUnknownSVal>();
1258 
1259  // If we don't know the length of the string, conjure a return
1260  // value, so it can be used in constraints, at least.
1261  if (result.isUnknown()) {
1262  result = C.getSValBuilder().conjureSymbolVal(nullptr, CE, LCtx,
1263  C.blockCount());
1264  }
1265  }
1266 
1267  // Bind the return value.
1268  assert(!result.isUnknown() && "Should have conjured a value by now");
1269  state = state->BindExpr(CE, LCtx, result);
1270  C.addTransition(state);
1271 }
1272 
1273 void CStringChecker::evalStrcpy(CheckerContext &C, const CallExpr *CE) const {
1274  if (CE->getNumArgs() < 2)
1275  return;
1276 
1277  // char *strcpy(char *restrict dst, const char *restrict src);
1278  evalStrcpyCommon(C, CE,
1279  /* returnEnd = */ false,
1280  /* isBounded = */ false,
1281  /* isAppending = */ false);
1282 }
1283 
1284 void CStringChecker::evalStrncpy(CheckerContext &C, const CallExpr *CE) const {
1285  if (CE->getNumArgs() < 3)
1286  return;
1287 
1288  // char *strncpy(char *restrict dst, const char *restrict src, size_t n);
1289  evalStrcpyCommon(C, CE,
1290  /* returnEnd = */ false,
1291  /* isBounded = */ true,
1292  /* isAppending = */ false);
1293 }
1294 
1295 void CStringChecker::evalStpcpy(CheckerContext &C, const CallExpr *CE) const {
1296  if (CE->getNumArgs() < 2)
1297  return;
1298 
1299  // char *stpcpy(char *restrict dst, const char *restrict src);
1300  evalStrcpyCommon(C, CE,
1301  /* returnEnd = */ true,
1302  /* isBounded = */ false,
1303  /* isAppending = */ false);
1304 }
1305 
1306 void CStringChecker::evalStrcat(CheckerContext &C, const CallExpr *CE) const {
1307  if (CE->getNumArgs() < 2)
1308  return;
1309 
1310  //char *strcat(char *restrict s1, const char *restrict s2);
1311  evalStrcpyCommon(C, CE,
1312  /* returnEnd = */ false,
1313  /* isBounded = */ false,
1314  /* isAppending = */ true);
1315 }
1316 
1317 void CStringChecker::evalStrncat(CheckerContext &C, const CallExpr *CE) const {
1318  if (CE->getNumArgs() < 3)
1319  return;
1320 
1321  //char *strncat(char *restrict s1, const char *restrict s2, size_t n);
1322  evalStrcpyCommon(C, CE,
1323  /* returnEnd = */ false,
1324  /* isBounded = */ true,
1325  /* isAppending = */ true);
1326 }
1327 
1328 void CStringChecker::evalStrcpyCommon(CheckerContext &C, const CallExpr *CE,
1329  bool returnEnd, bool isBounded,
1330  bool isAppending) const {
1331  CurrentFunctionDescription = "string copy function";
1332  ProgramStateRef state = C.getState();
1333  const LocationContext *LCtx = C.getLocationContext();
1334 
1335  // Check that the destination is non-null.
1336  const Expr *Dst = CE->getArg(0);
1337  SVal DstVal = state->getSVal(Dst, LCtx);
1338 
1339  state = checkNonNull(C, state, Dst, DstVal);
1340  if (!state)
1341  return;
1342 
1343  // Check that the source is non-null.
1344  const Expr *srcExpr = CE->getArg(1);
1345  SVal srcVal = state->getSVal(srcExpr, LCtx);
1346  state = checkNonNull(C, state, srcExpr, srcVal);
1347  if (!state)
1348  return;
1349 
1350  // Get the string length of the source.
1351  SVal strLength = getCStringLength(C, state, srcExpr, srcVal);
1352 
1353  // If the source isn't a valid C string, give up.
1354  if (strLength.isUndef())
1355  return;
1356 
1357  SValBuilder &svalBuilder = C.getSValBuilder();
1358  QualType cmpTy = svalBuilder.getConditionType();
1359  QualType sizeTy = svalBuilder.getContext().getSizeType();
1360 
1361  // These two values allow checking two kinds of errors:
1362  // - actual overflows caused by a source that doesn't fit in the destination
1363  // - potential overflows caused by a bound that could exceed the destination
1364  SVal amountCopied = UnknownVal();
1365  SVal maxLastElementIndex = UnknownVal();
1366  const char *boundWarning = nullptr;
1367 
1368  // If the function is strncpy, strncat, etc... it is bounded.
1369  if (isBounded) {
1370  // Get the max number of characters to copy.
1371  const Expr *lenExpr = CE->getArg(2);
1372  SVal lenVal = state->getSVal(lenExpr, LCtx);
1373 
1374  // Protect against misdeclared strncpy().
1375  lenVal = svalBuilder.evalCast(lenVal, sizeTy, lenExpr->getType());
1376 
1377  Optional<NonLoc> strLengthNL = strLength.getAs<NonLoc>();
1378  Optional<NonLoc> lenValNL = lenVal.getAs<NonLoc>();
1379 
1380  // If we know both values, we might be able to figure out how much
1381  // we're copying.
1382  if (strLengthNL && lenValNL) {
1383  ProgramStateRef stateSourceTooLong, stateSourceNotTooLong;
1384 
1385  // Check if the max number to copy is less than the length of the src.
1386  // If the bound is equal to the source length, strncpy won't null-
1387  // terminate the result!
1388  std::tie(stateSourceTooLong, stateSourceNotTooLong) = state->assume(
1389  svalBuilder.evalBinOpNN(state, BO_GE, *strLengthNL, *lenValNL, cmpTy)
1391 
1392  if (stateSourceTooLong && !stateSourceNotTooLong) {
1393  // Max number to copy is less than the length of the src, so the actual
1394  // strLength copied is the max number arg.
1395  state = stateSourceTooLong;
1396  amountCopied = lenVal;
1397 
1398  } else if (!stateSourceTooLong && stateSourceNotTooLong) {
1399  // The source buffer entirely fits in the bound.
1400  state = stateSourceNotTooLong;
1401  amountCopied = strLength;
1402  }
1403  }
1404 
1405  // We still want to know if the bound is known to be too large.
1406  if (lenValNL) {
1407  if (isAppending) {
1408  // For strncat, the check is strlen(dst) + lenVal < sizeof(dst)
1409 
1410  // Get the string length of the destination. If the destination is
1411  // memory that can't have a string length, we shouldn't be copying
1412  // into it anyway.
1413  SVal dstStrLength = getCStringLength(C, state, Dst, DstVal);
1414  if (dstStrLength.isUndef())
1415  return;
1416 
1417  if (Optional<NonLoc> dstStrLengthNL = dstStrLength.getAs<NonLoc>()) {
1418  maxLastElementIndex = svalBuilder.evalBinOpNN(state, BO_Add,
1419  *lenValNL,
1420  *dstStrLengthNL,
1421  sizeTy);
1422  boundWarning = "Size argument is greater than the free space in the "
1423  "destination buffer";
1424  }
1425 
1426  } else {
1427  // For strncpy, this is just checking that lenVal <= sizeof(dst)
1428  // (Yes, strncpy and strncat differ in how they treat termination.
1429  // strncat ALWAYS terminates, but strncpy doesn't.)
1430 
1431  // We need a special case for when the copy size is zero, in which
1432  // case strncpy will do no work at all. Our bounds check uses n-1
1433  // as the last element accessed, so n == 0 is problematic.
1434  ProgramStateRef StateZeroSize, StateNonZeroSize;
1435  std::tie(StateZeroSize, StateNonZeroSize) =
1436  assumeZero(C, state, *lenValNL, sizeTy);
1437 
1438  // If the size is known to be zero, we're done.
1439  if (StateZeroSize && !StateNonZeroSize) {
1440  StateZeroSize = StateZeroSize->BindExpr(CE, LCtx, DstVal);
1441  C.addTransition(StateZeroSize);
1442  return;
1443  }
1444 
1445  // Otherwise, go ahead and figure out the last element we'll touch.
1446  // We don't record the non-zero assumption here because we can't
1447  // be sure. We won't warn on a possible zero.
1448  NonLoc one = svalBuilder.makeIntVal(1, sizeTy).castAs<NonLoc>();
1449  maxLastElementIndex = svalBuilder.evalBinOpNN(state, BO_Sub, *lenValNL,
1450  one, sizeTy);
1451  boundWarning = "Size argument is greater than the length of the "
1452  "destination buffer";
1453  }
1454  }
1455 
1456  // If we couldn't pin down the copy length, at least bound it.
1457  // FIXME: We should actually run this code path for append as well, but
1458  // right now it creates problems with constraints (since we can end up
1459  // trying to pass constraints from symbol to symbol).
1460  if (amountCopied.isUnknown() && !isAppending) {
1461  // Try to get a "hypothetical" string length symbol, which we can later
1462  // set as a real value if that turns out to be the case.
1463  amountCopied = getCStringLength(C, state, lenExpr, srcVal, true);
1464  assert(!amountCopied.isUndef());
1465 
1466  if (Optional<NonLoc> amountCopiedNL = amountCopied.getAs<NonLoc>()) {
1467  if (lenValNL) {
1468  // amountCopied <= lenVal
1469  SVal copiedLessThanBound = svalBuilder.evalBinOpNN(state, BO_LE,
1470  *amountCopiedNL,
1471  *lenValNL,
1472  cmpTy);
1473  state = state->assume(
1474  copiedLessThanBound.castAs<DefinedOrUnknownSVal>(), true);
1475  if (!state)
1476  return;
1477  }
1478 
1479  if (strLengthNL) {
1480  // amountCopied <= strlen(source)
1481  SVal copiedLessThanSrc = svalBuilder.evalBinOpNN(state, BO_LE,
1482  *amountCopiedNL,
1483  *strLengthNL,
1484  cmpTy);
1485  state = state->assume(
1486  copiedLessThanSrc.castAs<DefinedOrUnknownSVal>(), true);
1487  if (!state)
1488  return;
1489  }
1490  }
1491  }
1492 
1493  } else {
1494  // The function isn't bounded. The amount copied should match the length
1495  // of the source buffer.
1496  amountCopied = strLength;
1497  }
1498 
1499  assert(state);
1500 
1501  // This represents the number of characters copied into the destination
1502  // buffer. (It may not actually be the strlen if the destination buffer
1503  // is not terminated.)
1504  SVal finalStrLength = UnknownVal();
1505 
1506  // If this is an appending function (strcat, strncat...) then set the
1507  // string length to strlen(src) + strlen(dst) since the buffer will
1508  // ultimately contain both.
1509  if (isAppending) {
1510  // Get the string length of the destination. If the destination is memory
1511  // that can't have a string length, we shouldn't be copying into it anyway.
1512  SVal dstStrLength = getCStringLength(C, state, Dst, DstVal);
1513  if (dstStrLength.isUndef())
1514  return;
1515 
1516  Optional<NonLoc> srcStrLengthNL = amountCopied.getAs<NonLoc>();
1517  Optional<NonLoc> dstStrLengthNL = dstStrLength.getAs<NonLoc>();
1518 
1519  // If we know both string lengths, we might know the final string length.
1520  if (srcStrLengthNL && dstStrLengthNL) {
1521  // Make sure the two lengths together don't overflow a size_t.
1522  state = checkAdditionOverflow(C, state, *srcStrLengthNL, *dstStrLengthNL);
1523  if (!state)
1524  return;
1525 
1526  finalStrLength = svalBuilder.evalBinOpNN(state, BO_Add, *srcStrLengthNL,
1527  *dstStrLengthNL, sizeTy);
1528  }
1529 
1530  // If we couldn't get a single value for the final string length,
1531  // we can at least bound it by the individual lengths.
1532  if (finalStrLength.isUnknown()) {
1533  // Try to get a "hypothetical" string length symbol, which we can later
1534  // set as a real value if that turns out to be the case.
1535  finalStrLength = getCStringLength(C, state, CE, DstVal, true);
1536  assert(!finalStrLength.isUndef());
1537 
1538  if (Optional<NonLoc> finalStrLengthNL = finalStrLength.getAs<NonLoc>()) {
1539  if (srcStrLengthNL) {
1540  // finalStrLength >= srcStrLength
1541  SVal sourceInResult = svalBuilder.evalBinOpNN(state, BO_GE,
1542  *finalStrLengthNL,
1543  *srcStrLengthNL,
1544  cmpTy);
1545  state = state->assume(sourceInResult.castAs<DefinedOrUnknownSVal>(),
1546  true);
1547  if (!state)
1548  return;
1549  }
1550 
1551  if (dstStrLengthNL) {
1552  // finalStrLength >= dstStrLength
1553  SVal destInResult = svalBuilder.evalBinOpNN(state, BO_GE,
1554  *finalStrLengthNL,
1555  *dstStrLengthNL,
1556  cmpTy);
1557  state =
1558  state->assume(destInResult.castAs<DefinedOrUnknownSVal>(), true);
1559  if (!state)
1560  return;
1561  }
1562  }
1563  }
1564 
1565  } else {
1566  // Otherwise, this is a copy-over function (strcpy, strncpy, ...), and
1567  // the final string length will match the input string length.
1568  finalStrLength = amountCopied;
1569  }
1570 
1571  // The final result of the function will either be a pointer past the last
1572  // copied element, or a pointer to the start of the destination buffer.
1573  SVal Result = (returnEnd ? UnknownVal() : DstVal);
1574 
1575  assert(state);
1576 
1577  // If the destination is a MemRegion, try to check for a buffer overflow and
1578  // record the new string length.
1579  if (Optional<loc::MemRegionVal> dstRegVal =
1580  DstVal.getAs<loc::MemRegionVal>()) {
1581  QualType ptrTy = Dst->getType();
1582 
1583  // If we have an exact value on a bounded copy, use that to check for
1584  // overflows, rather than our estimate about how much is actually copied.
1585  if (boundWarning) {
1586  if (Optional<NonLoc> maxLastNL = maxLastElementIndex.getAs<NonLoc>()) {
1587  SVal maxLastElement = svalBuilder.evalBinOpLN(state, BO_Add, *dstRegVal,
1588  *maxLastNL, ptrTy);
1589  state = CheckLocation(C, state, CE->getArg(2), maxLastElement,
1590  boundWarning);
1591  if (!state)
1592  return;
1593  }
1594  }
1595 
1596  // Then, if the final length is known...
1597  if (Optional<NonLoc> knownStrLength = finalStrLength.getAs<NonLoc>()) {
1598  SVal lastElement = svalBuilder.evalBinOpLN(state, BO_Add, *dstRegVal,
1599  *knownStrLength, ptrTy);
1600 
1601  // ...and we haven't checked the bound, we'll check the actual copy.
1602  if (!boundWarning) {
1603  const char * const warningMsg =
1604  "String copy function overflows destination buffer";
1605  state = CheckLocation(C, state, Dst, lastElement, warningMsg);
1606  if (!state)
1607  return;
1608  }
1609 
1610  // If this is a stpcpy-style copy, the last element is the return value.
1611  if (returnEnd)
1612  Result = lastElement;
1613  }
1614 
1615  // Invalidate the destination (regular invalidation without pointer-escaping
1616  // the address of the top-level region). This must happen before we set the
1617  // C string length because invalidation will clear the length.
1618  // FIXME: Even if we can't perfectly model the copy, we should see if we
1619  // can use LazyCompoundVals to copy the source values into the destination.
1620  // This would probably remove any existing bindings past the end of the
1621  // string, but that's still an improvement over blank invalidation.
1622  state = InvalidateBuffer(C, state, Dst, *dstRegVal,
1623  /*IsSourceBuffer*/false);
1624 
1625  // Invalidate the source (const-invalidation without const-pointer-escaping
1626  // the address of the top-level region).
1627  state = InvalidateBuffer(C, state, srcExpr, srcVal, /*IsSourceBuffer*/true);
1628 
1629  // Set the C string length of the destination, if we know it.
1630  if (isBounded && !isAppending) {
1631  // strncpy is annoying in that it doesn't guarantee to null-terminate
1632  // the result string. If the original string didn't fit entirely inside
1633  // the bound (including the null-terminator), we don't know how long the
1634  // result is.
1635  if (amountCopied != strLength)
1636  finalStrLength = UnknownVal();
1637  }
1638  state = setCStringLength(state, dstRegVal->getRegion(), finalStrLength);
1639  }
1640 
1641  assert(state);
1642 
1643  // If this is a stpcpy-style copy, but we were unable to check for a buffer
1644  // overflow, we still need a result. Conjure a return value.
1645  if (returnEnd && Result.isUnknown()) {
1646  Result = svalBuilder.conjureSymbolVal(nullptr, CE, LCtx, C.blockCount());
1647  }
1648 
1649  // Set the return value.
1650  state = state->BindExpr(CE, LCtx, Result);
1651  C.addTransition(state);
1652 }
1653 
1654 void CStringChecker::evalStrcmp(CheckerContext &C, const CallExpr *CE) const {
1655  if (CE->getNumArgs() < 2)
1656  return;
1657 
1658  //int strcmp(const char *s1, const char *s2);
1659  evalStrcmpCommon(C, CE, /* isBounded = */ false, /* ignoreCase = */ false);
1660 }
1661 
1662 void CStringChecker::evalStrncmp(CheckerContext &C, const CallExpr *CE) const {
1663  if (CE->getNumArgs() < 3)
1664  return;
1665 
1666  //int strncmp(const char *s1, const char *s2, size_t n);
1667  evalStrcmpCommon(C, CE, /* isBounded = */ true, /* ignoreCase = */ false);
1668 }
1669 
1670 void CStringChecker::evalStrcasecmp(CheckerContext &C,
1671  const CallExpr *CE) const {
1672  if (CE->getNumArgs() < 2)
1673  return;
1674 
1675  //int strcasecmp(const char *s1, const char *s2);
1676  evalStrcmpCommon(C, CE, /* isBounded = */ false, /* ignoreCase = */ true);
1677 }
1678 
1679 void CStringChecker::evalStrncasecmp(CheckerContext &C,
1680  const CallExpr *CE) const {
1681  if (CE->getNumArgs() < 3)
1682  return;
1683 
1684  //int strncasecmp(const char *s1, const char *s2, size_t n);
1685  evalStrcmpCommon(C, CE, /* isBounded = */ true, /* ignoreCase = */ true);
1686 }
1687 
1688 void CStringChecker::evalStrcmpCommon(CheckerContext &C, const CallExpr *CE,
1689  bool isBounded, bool ignoreCase) const {
1690  CurrentFunctionDescription = "string comparison function";
1691  ProgramStateRef state = C.getState();
1692  const LocationContext *LCtx = C.getLocationContext();
1693 
1694  // Check that the first string is non-null
1695  const Expr *s1 = CE->getArg(0);
1696  SVal s1Val = state->getSVal(s1, LCtx);
1697  state = checkNonNull(C, state, s1, s1Val);
1698  if (!state)
1699  return;
1700 
1701  // Check that the second string is non-null.
1702  const Expr *s2 = CE->getArg(1);
1703  SVal s2Val = state->getSVal(s2, LCtx);
1704  state = checkNonNull(C, state, s2, s2Val);
1705  if (!state)
1706  return;
1707 
1708  // Get the string length of the first string or give up.
1709  SVal s1Length = getCStringLength(C, state, s1, s1Val);
1710  if (s1Length.isUndef())
1711  return;
1712 
1713  // Get the string length of the second string or give up.
1714  SVal s2Length = getCStringLength(C, state, s2, s2Val);
1715  if (s2Length.isUndef())
1716  return;
1717 
1718  // If we know the two buffers are the same, we know the result is 0.
1719  // First, get the two buffers' addresses. Another checker will have already
1720  // made sure they're not undefined.
1723 
1724  // See if they are the same.
1725  SValBuilder &svalBuilder = C.getSValBuilder();
1726  DefinedOrUnknownSVal SameBuf = svalBuilder.evalEQ(state, LV, RV);
1727  ProgramStateRef StSameBuf, StNotSameBuf;
1728  std::tie(StSameBuf, StNotSameBuf) = state->assume(SameBuf);
1729 
1730  // If the two arguments might be the same buffer, we know the result is 0,
1731  // and we only need to check one size.
1732  if (StSameBuf) {
1733  StSameBuf = StSameBuf->BindExpr(CE, LCtx,
1734  svalBuilder.makeZeroVal(CE->getType()));
1735  C.addTransition(StSameBuf);
1736 
1737  // If the two arguments are GUARANTEED to be the same, we're done!
1738  if (!StNotSameBuf)
1739  return;
1740  }
1741 
1742  assert(StNotSameBuf);
1743  state = StNotSameBuf;
1744 
1745  // At this point we can go about comparing the two buffers.
1746  // For now, we only do this if they're both known string literals.
1747 
1748  // Attempt to extract string literals from both expressions.
1749  const StringLiteral *s1StrLiteral = getCStringLiteral(C, state, s1, s1Val);
1750  const StringLiteral *s2StrLiteral = getCStringLiteral(C, state, s2, s2Val);
1751  bool canComputeResult = false;
1752 
1753  if (s1StrLiteral && s2StrLiteral) {
1754  StringRef s1StrRef = s1StrLiteral->getString();
1755  StringRef s2StrRef = s2StrLiteral->getString();
1756 
1757  if (isBounded) {
1758  // Get the max number of characters to compare.
1759  const Expr *lenExpr = CE->getArg(2);
1760  SVal lenVal = state->getSVal(lenExpr, LCtx);
1761 
1762  // If the length is known, we can get the right substrings.
1763  if (const llvm::APSInt *len = svalBuilder.getKnownValue(state, lenVal)) {
1764  // Create substrings of each to compare the prefix.
1765  s1StrRef = s1StrRef.substr(0, (size_t)len->getZExtValue());
1766  s2StrRef = s2StrRef.substr(0, (size_t)len->getZExtValue());
1767  canComputeResult = true;
1768  }
1769  } else {
1770  // This is a normal, unbounded strcmp.
1771  canComputeResult = true;
1772  }
1773 
1774  if (canComputeResult) {
1775  // Real strcmp stops at null characters.
1776  size_t s1Term = s1StrRef.find('\0');
1777  if (s1Term != StringRef::npos)
1778  s1StrRef = s1StrRef.substr(0, s1Term);
1779 
1780  size_t s2Term = s2StrRef.find('\0');
1781  if (s2Term != StringRef::npos)
1782  s2StrRef = s2StrRef.substr(0, s2Term);
1783 
1784  // Use StringRef's comparison methods to compute the actual result.
1785  int result;
1786 
1787  if (ignoreCase) {
1788  // Compare string 1 to string 2 the same way strcasecmp() does.
1789  result = s1StrRef.compare_lower(s2StrRef);
1790  } else {
1791  // Compare string 1 to string 2 the same way strcmp() does.
1792  result = s1StrRef.compare(s2StrRef);
1793  }
1794 
1795  // Build the SVal of the comparison and bind the return value.
1796  SVal resultVal = svalBuilder.makeIntVal(result, CE->getType());
1797  state = state->BindExpr(CE, LCtx, resultVal);
1798  }
1799  }
1800 
1801  if (!canComputeResult) {
1802  // Conjure a symbolic value. It's the best we can do.
1803  SVal resultVal = svalBuilder.conjureSymbolVal(nullptr, CE, LCtx,
1804  C.blockCount());
1805  state = state->BindExpr(CE, LCtx, resultVal);
1806  }
1807 
1808  // Record this as a possible path.
1809  C.addTransition(state);
1810 }
1811 
1812 void CStringChecker::evalStrsep(CheckerContext &C, const CallExpr *CE) const {
1813  //char *strsep(char **stringp, const char *delim);
1814  if (CE->getNumArgs() < 2)
1815  return;
1816 
1817  // Sanity: does the search string parameter match the return type?
1818  const Expr *SearchStrPtr = CE->getArg(0);
1819  QualType CharPtrTy = SearchStrPtr->getType()->getPointeeType();
1820  if (CharPtrTy.isNull() ||
1821  CE->getType().getUnqualifiedType() != CharPtrTy.getUnqualifiedType())
1822  return;
1823 
1824  CurrentFunctionDescription = "strsep()";
1825  ProgramStateRef State = C.getState();
1826  const LocationContext *LCtx = C.getLocationContext();
1827 
1828  // Check that the search string pointer is non-null (though it may point to
1829  // a null string).
1830  SVal SearchStrVal = State->getSVal(SearchStrPtr, LCtx);
1831  State = checkNonNull(C, State, SearchStrPtr, SearchStrVal);
1832  if (!State)
1833  return;
1834 
1835  // Check that the delimiter string is non-null.
1836  const Expr *DelimStr = CE->getArg(1);
1837  SVal DelimStrVal = State->getSVal(DelimStr, LCtx);
1838  State = checkNonNull(C, State, DelimStr, DelimStrVal);
1839  if (!State)
1840  return;
1841 
1842  SValBuilder &SVB = C.getSValBuilder();
1843  SVal Result;
1844  if (Optional<Loc> SearchStrLoc = SearchStrVal.getAs<Loc>()) {
1845  // Get the current value of the search string pointer, as a char*.
1846  Result = State->getSVal(*SearchStrLoc, CharPtrTy);
1847 
1848  // Invalidate the search string, representing the change of one delimiter
1849  // character to NUL.
1850  State = InvalidateBuffer(C, State, SearchStrPtr, Result,
1851  /*IsSourceBuffer*/false);
1852 
1853  // Overwrite the search string pointer. The new value is either an address
1854  // further along in the same string, or NULL if there are no more tokens.
1855  State = State->bindLoc(*SearchStrLoc,
1856  SVB.conjureSymbolVal(getTag(), CE, LCtx, CharPtrTy,
1857  C.blockCount()));
1858  } else {
1859  assert(SearchStrVal.isUnknown());
1860  // Conjure a symbolic value. It's the best we can do.
1861  Result = SVB.conjureSymbolVal(nullptr, CE, LCtx, C.blockCount());
1862  }
1863 
1864  // Set the return value, and finish.
1865  State = State->BindExpr(CE, LCtx, Result);
1866  C.addTransition(State);
1867 }
1868 
1869 
1870 //===----------------------------------------------------------------------===//
1871 // The driver method, and other Checker callbacks.
1872 //===----------------------------------------------------------------------===//
1873 
1874 bool CStringChecker::evalCall(const CallExpr *CE, CheckerContext &C) const {
1875  const FunctionDecl *FDecl = C.getCalleeDecl(CE);
1876 
1877  if (!FDecl)
1878  return false;
1879 
1880  // FIXME: Poorly-factored string switches are slow.
1881  FnCheck evalFunction = nullptr;
1882  if (C.isCLibraryFunction(FDecl, "memcpy"))
1883  evalFunction = &CStringChecker::evalMemcpy;
1884  else if (C.isCLibraryFunction(FDecl, "mempcpy"))
1885  evalFunction = &CStringChecker::evalMempcpy;
1886  else if (C.isCLibraryFunction(FDecl, "memcmp"))
1887  evalFunction = &CStringChecker::evalMemcmp;
1888  else if (C.isCLibraryFunction(FDecl, "memmove"))
1889  evalFunction = &CStringChecker::evalMemmove;
1890  else if (C.isCLibraryFunction(FDecl, "strcpy"))
1891  evalFunction = &CStringChecker::evalStrcpy;
1892  else if (C.isCLibraryFunction(FDecl, "strncpy"))
1893  evalFunction = &CStringChecker::evalStrncpy;
1894  else if (C.isCLibraryFunction(FDecl, "stpcpy"))
1895  evalFunction = &CStringChecker::evalStpcpy;
1896  else if (C.isCLibraryFunction(FDecl, "strcat"))
1897  evalFunction = &CStringChecker::evalStrcat;
1898  else if (C.isCLibraryFunction(FDecl, "strncat"))
1899  evalFunction = &CStringChecker::evalStrncat;
1900  else if (C.isCLibraryFunction(FDecl, "strlen"))
1901  evalFunction = &CStringChecker::evalstrLength;
1902  else if (C.isCLibraryFunction(FDecl, "strnlen"))
1903  evalFunction = &CStringChecker::evalstrnLength;
1904  else if (C.isCLibraryFunction(FDecl, "strcmp"))
1905  evalFunction = &CStringChecker::evalStrcmp;
1906  else if (C.isCLibraryFunction(FDecl, "strncmp"))
1907  evalFunction = &CStringChecker::evalStrncmp;
1908  else if (C.isCLibraryFunction(FDecl, "strcasecmp"))
1909  evalFunction = &CStringChecker::evalStrcasecmp;
1910  else if (C.isCLibraryFunction(FDecl, "strncasecmp"))
1911  evalFunction = &CStringChecker::evalStrncasecmp;
1912  else if (C.isCLibraryFunction(FDecl, "strsep"))
1913  evalFunction = &CStringChecker::evalStrsep;
1914  else if (C.isCLibraryFunction(FDecl, "bcopy"))
1915  evalFunction = &CStringChecker::evalBcopy;
1916  else if (C.isCLibraryFunction(FDecl, "bcmp"))
1917  evalFunction = &CStringChecker::evalMemcmp;
1918 
1919  // If the callee isn't a string function, let another checker handle it.
1920  if (!evalFunction)
1921  return false;
1922 
1923  // Check and evaluate the call.
1924  (this->*evalFunction)(C, CE);
1925 
1926  // If the evaluate call resulted in no change, chain to the next eval call
1927  // handler.
1928  // Note, the custom CString evaluation calls assume that basic safety
1929  // properties are held. However, if the user chooses to turn off some of these
1930  // checks, we ignore the issues and leave the call evaluation to a generic
1931  // handler.
1932  if (!C.isDifferent())
1933  return false;
1934 
1935  return true;
1936 }
1937 
1938 void CStringChecker::checkPreStmt(const DeclStmt *DS, CheckerContext &C) const {
1939  // Record string length for char a[] = "abc";
1940  ProgramStateRef state = C.getState();
1941 
1942  for (const auto *I : DS->decls()) {
1943  const VarDecl *D = dyn_cast<VarDecl>(I);
1944  if (!D)
1945  continue;
1946 
1947  // FIXME: Handle array fields of structs.
1948  if (!D->getType()->isArrayType())
1949  continue;
1950 
1951  const Expr *Init = D->getInit();
1952  if (!Init)
1953  continue;
1954  if (!isa<StringLiteral>(Init))
1955  continue;
1956 
1957  Loc VarLoc = state->getLValue(D, C.getLocationContext());
1958  const MemRegion *MR = VarLoc.getAsRegion();
1959  if (!MR)
1960  continue;
1961 
1962  SVal StrVal = state->getSVal(Init, C.getLocationContext());
1963  assert(StrVal.isValid() && "Initializer string is unknown or undefined");
1964  DefinedOrUnknownSVal strLength =
1965  getCStringLength(C, state, Init, StrVal).castAs<DefinedOrUnknownSVal>();
1966 
1967  state = state->set<CStringLength>(MR, strLength);
1968  }
1969 
1970  C.addTransition(state);
1971 }
1972 
1973 bool CStringChecker::wantsRegionChangeUpdate(ProgramStateRef state) const {
1974  CStringLengthTy Entries = state->get<CStringLength>();
1975  return !Entries.isEmpty();
1976 }
1977 
1978 ProgramStateRef
1979 CStringChecker::checkRegionChanges(ProgramStateRef state,
1980  const InvalidatedSymbols *,
1981  ArrayRef<const MemRegion *> ExplicitRegions,
1983  const CallEvent *Call) const {
1984  CStringLengthTy Entries = state->get<CStringLength>();
1985  if (Entries.isEmpty())
1986  return state;
1987 
1988  llvm::SmallPtrSet<const MemRegion *, 8> Invalidated;
1989  llvm::SmallPtrSet<const MemRegion *, 32> SuperRegions;
1990 
1991  // First build sets for the changed regions and their super-regions.
1993  I = Regions.begin(), E = Regions.end(); I != E; ++I) {
1994  const MemRegion *MR = *I;
1995  Invalidated.insert(MR);
1996 
1997  SuperRegions.insert(MR);
1998  while (const SubRegion *SR = dyn_cast<SubRegion>(MR)) {
1999  MR = SR->getSuperRegion();
2000  SuperRegions.insert(MR);
2001  }
2002  }
2003 
2004  CStringLengthTy::Factory &F = state->get_context<CStringLength>();
2005 
2006  // Then loop over the entries in the current state.
2007  for (CStringLengthTy::iterator I = Entries.begin(),
2008  E = Entries.end(); I != E; ++I) {
2009  const MemRegion *MR = I.getKey();
2010 
2011  // Is this entry for a super-region of a changed region?
2012  if (SuperRegions.count(MR)) {
2013  Entries = F.remove(Entries, MR);
2014  continue;
2015  }
2016 
2017  // Is this entry for a sub-region of a changed region?
2018  const MemRegion *Super = MR;
2019  while (const SubRegion *SR = dyn_cast<SubRegion>(Super)) {
2020  Super = SR->getSuperRegion();
2021  if (Invalidated.count(Super)) {
2022  Entries = F.remove(Entries, MR);
2023  break;
2024  }
2025  }
2026  }
2027 
2028  return state->set<CStringLength>(Entries);
2029 }
2030 
2031 void CStringChecker::checkLiveSymbols(ProgramStateRef state,
2032  SymbolReaper &SR) const {
2033  // Mark all symbols in our string length map as valid.
2034  CStringLengthTy Entries = state->get<CStringLength>();
2035 
2036  for (CStringLengthTy::iterator I = Entries.begin(), E = Entries.end();
2037  I != E; ++I) {
2038  SVal Len = I.getData();
2039 
2040  for (SymExpr::symbol_iterator si = Len.symbol_begin(),
2041  se = Len.symbol_end(); si != se; ++si)
2042  SR.markInUse(*si);
2043  }
2044 }
2045 
2046 void CStringChecker::checkDeadSymbols(SymbolReaper &SR,
2047  CheckerContext &C) const {
2048  if (!SR.hasDeadSymbols())
2049  return;
2050 
2051  ProgramStateRef state = C.getState();
2052  CStringLengthTy Entries = state->get<CStringLength>();
2053  if (Entries.isEmpty())
2054  return;
2055 
2056  CStringLengthTy::Factory &F = state->get_context<CStringLength>();
2057  for (CStringLengthTy::iterator I = Entries.begin(), E = Entries.end();
2058  I != E; ++I) {
2059  SVal Len = I.getData();
2060  if (SymbolRef Sym = Len.getAsSymbol()) {
2061  if (SR.isDead(Sym))
2062  Entries = F.remove(Entries, I.getKey());
2063  }
2064  }
2065 
2066  state = state->set<CStringLength>(Entries);
2067  C.addTransition(state);
2068 }
2069 
2070 #define REGISTER_CHECKER(name) \
2071  void ento::register##name(CheckerManager &mgr) { \
2072  CStringChecker *checker = mgr.registerChecker<CStringChecker>(); \
2073  checker->Filter.Check##name = true; \
2074  checker->Filter.CheckName##name = mgr.getCurrentCheckName(); \
2075  }
2076 
2077 REGISTER_CHECKER(CStringNullArg)
2078 REGISTER_CHECKER(CStringOutOfBounds)
2079 REGISTER_CHECKER(CStringBufferOverlap)
2080 REGISTER_CHECKER(CStringNotNullTerm)
2081 
2083  registerCStringNullArg(Mgr);
2084 }
const internal::VariadicDynCastAllOfMatcher< Stmt, Expr > expr
Matches expressions.
Definition: ASTMatchers.h:1110
TypedValueRegion - An abstract class representing regions having a typed value.
Definition: MemRegion.h:498
nonloc::ConcreteInt makeIntVal(const IntegerLiteral *integer)
Definition: SValBuilder.h:232
MemRegion - The root abstract class for all memory regions.
Definition: MemRegion.h:77
Expr * getArg(unsigned Arg)
getArg - Return the specified argument.
Definition: Expr.h:2216
bool hasDeadSymbols() const
Information about invalidation for a particular region/symbol.
Definition: MemRegion.h:1320
CanQualType getSizeType() const
Return the unique type for "size_t" (C99 7.17), defined in <stddef.h>.
A helper class which wraps a boolean value set to false by default.
Definition: Checker.h:523
ExplodedNode * addTransition(ProgramStateRef State=nullptr, const ProgramPointTag *Tag=nullptr)
Generates a new transition in the program state graph (ExplodedGraph). Uses the default CheckerContex...
virtual QualType getValueType() const =0
std::string getAsString() const
Definition: Type.h:897
const Expr * getInit() const
Definition: Decl.h:1068
SVal evalCast(SVal val, QualType castTy, QualType originalType)
Value representing integer constant.
Definition: SVals.h:339
void setTrait(SymbolRef Sym, InvalidationKinds IK)
Definition: MemRegion.cpp:1459
ExplodedNode * getPredecessor()
Returns the previous node in the exploded graph, which includes the state of the program before the c...
Symbolic value. These values used to capture symbolic execution of the program.
Definition: SymbolManager.h:42
void markInUse(SymbolRef sym)
Marks a symbol as important to a checker.
virtual SVal evalBinOpLN(ProgramStateRef state, BinaryOperator::Opcode op, Loc lhs, NonLoc rhs, QualType resultTy)=0
Holds long-lived AST nodes (such as types and decls) that can be referred to throughout the semantic ...
Definition: ASTContext.h:89
const FunctionDecl * getCalleeDecl(const CallExpr *CE) const
Get the declaration of the called function (path-sensitive).
LineState State
Kind getKind() const
Definition: MemRegion.h:184
unsigned blockCount() const
Returns the number of times the current block has been visited along the analyzed path...
void registerCStringCheckerBasic(CheckerManager &Mgr)
Register the checker which evaluates CString API calls.
const StringLiteral * getStringLiteral() const
Definition: MemRegion.h:755
A record of the "type" of an APSInt, used for conversions.
Definition: APSIntType.h:20
SymExpr::symbol_iterator symbol_begin() const
Definition: SVals.h:177
bool isValid() const
Definition: SVals.h:129
QualType getType() const
Definition: Decl.h:538
const MemRegion * getSuperRegion() const
Definition: MemRegion.h:421
const LocationContext * getLocationContext() const
ExplodedNode * generateSink(ProgramStateRef State=nullptr, ExplodedNode *Pred=nullptr, const ProgramPointTag *Tag=nullptr)
Generate a sink node. Generating a sink stops exploration of the given path.
#define REGISTER_CHECKER(name)
const MemRegion * StripCasts(bool StripBaseCasts=true) const
Definition: MemRegion.cpp:1089
QualType getPointeeType() const
Definition: Type.cpp:414
bool isDead(SymbolRef sym) const
Returns whether or not a symbol has been confirmed dead.
DefinedOrUnknownSVal makeZeroVal(QualType type)
Construct an SVal representing '0' for the specified type.
Definition: SValBuilder.cpp:32
const ProgramStateRef & getState() const
static bool isCLibraryFunction(const FunctionDecl *FD, StringRef Name=StringRef())
Returns true if the callee is an externally-visible function in the top-level namespace, such as malloc.
Optional< T > getAs() const
Convert to the specified SVal type, returning None if this SVal is not of the desired type...
Definition: SVals.h:86
virtual SVal evalBinOpLL(ProgramStateRef state, BinaryOperator::Opcode op, Loc lhs, Loc rhs, QualType resultTy)=0
The result type of a method or function.
QualType getConditionType() const
Definition: SValBuilder.h:126
void emitReport(std::unique_ptr< BugReport > R)
Emit the diagnostics report.
DefinedOrUnknownSVal conjureSymbolVal(const void *symbolTag, const Expr *expr, const LocationContext *LCtx, unsigned count)
Create a new symbol with a unique 'name'.
ASTContext & getContext()
Definition: SValBuilder.h:121
SymExpr::symbol_iterator symbol_end() const
Definition: SVals.h:185
A class responsible for cleaning up unused symbols.
bool isUndef() const
Definition: SVals.h:121
const llvm::APSInt * evalAPSInt(BinaryOperator::Opcode Op, const llvm::APSInt &V1, const llvm::APSInt &V2)
REGISTER_MAP_WITH_PROGRAMSTATE(AllocatedData, SymbolRef, MacOSKeychainAPIChecker::AllocationState) static bool isEnclosingFunctionParam(const Expr *E)
Tells that a region's contents is not changed.
Definition: MemRegion.h:1334
NonLoc getIndex() const
Definition: MemRegion.h:1027
virtual SVal evalBinOpNN(ProgramStateRef state, BinaryOperator::Opcode op, NonLoc lhs, NonLoc rhs, QualType resultTy)=0
QualType getType() const
Definition: Expr.h:125
CanQualType CharTy
Definition: ASTContext.h:819
llvm::APSInt getValue(uint64_t RawValue) const LLVM_READONLY
Definition: APSIntType.h:70
unsigned getByteLength() const
Definition: Expr.h:1553
QualType getPointerType(QualType T) const
Return the uniqued reference to the type for a pointer to the specified type.
StringRef getString() const
Definition: Expr.h:1521
DefinedSVal getMetadataSymbolVal(const void *symbolTag, const MemRegion *region, const Expr *expr, QualType type, unsigned count)
const MemRegion * getAsRegion() const
Definition: SVals.cpp:135
unsigned getNumArgs() const
Definition: Expr.h:2205
Represents an abstract call to a function or method along a particular path.
Definition: CallEvent.h:113
BasicValueFactory & getBasicValueFactory()
Definition: SValBuilder.h:134
bool isUnknown() const
Definition: SVals.h:117
decl_range decls()
Definition: Stmt.h:497
QualType getUnqualifiedType() const
Retrieve the unqualified variant of the given type, removing as little sugar as possible.
Definition: Type.h:5096
DefinedOrUnknownSVal evalEQ(ProgramStateRef state, DefinedOrUnknownSVal lhs, DefinedOrUnknownSVal rhs)
bool trackNullOrUndefValue(const ExplodedNode *N, const Stmt *S, BugReport &R, bool IsArg=false, bool EnableNullFPSuppression=true)
QualType getValueType() const override
Definition: MemRegion.h:1029
SymbolRef getAsSymbol(bool IncludeBaseRegions=false) const
If this SVal wraps a symbol return that SymbolRef. Otherwise, return 0.
Definition: SVals.cpp:111
bool isArrayType() const
Definition: Type.h:5271
SValBuilder & getSValBuilder()
const llvm::APSInt & getMaxValue(const llvm::APSInt &v)
StringRegion - Region associated with a StringLiteral.
Definition: MemRegion.h:741
ElementRegin is used to represent both array elements and casts.
Definition: MemRegion.h:1008
static LLVM_READONLY char toUppercase(char c)
Definition: CharInfo.h:174
bool isNull() const
isNull - Return true if this QualType doesn't point to a type yet.
Definition: Type.h:633
T castAs() const
Convert to the specified SVal type, asserting that this SVal is of the desired type.
Definition: SVals.h:75
bool isDifferent()
Check if the checker changed the state of the execution; ex: added a new transition or a bug report...
const LocationContext * getLocationContext() const
SVal getSVal(const Stmt *S) const
Get the value of arbitrary expressions at this point in the path.
Iterator over symbols that the current symbol depends on.
Definition: SymbolManager.h:76