clang  3.8.0
CStringChecker.cpp
Go to the documentation of this file.
1 //= CStringChecker.cpp - Checks calls to C string functions --------*- C++ -*-//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This defines CStringChecker, which is an assortment of checks on calls
11 // to functions in <string.h>.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "ClangSACheckers.h"
16 #include "InterCheckerAPI.h"
17 #include "clang/Basic/CharInfo.h"
23 #include "llvm/ADT/STLExtras.h"
24 #include "llvm/ADT/SmallString.h"
25 #include "llvm/ADT/StringSwitch.h"
26 #include "llvm/Support/raw_ostream.h"
27 
28 using namespace clang;
29 using namespace ento;
30 
31 namespace {
32 class CStringChecker : public Checker< eval::Call,
33  check::PreStmt<DeclStmt>,
34  check::LiveSymbols,
35  check::DeadSymbols,
36  check::RegionChanges
37  > {
38  mutable std::unique_ptr<BugType> BT_Null, BT_Bounds, BT_Overlap,
39  BT_NotCString, BT_AdditionOverflow;
40 
41  mutable const char *CurrentFunctionDescription;
42 
43 public:
44  /// The filter is used to filter out the diagnostics which are not enabled by
45  /// the user.
46  struct CStringChecksFilter {
47  DefaultBool CheckCStringNullArg;
48  DefaultBool CheckCStringOutOfBounds;
49  DefaultBool CheckCStringBufferOverlap;
50  DefaultBool CheckCStringNotNullTerm;
51 
52  CheckName CheckNameCStringNullArg;
53  CheckName CheckNameCStringOutOfBounds;
54  CheckName CheckNameCStringBufferOverlap;
55  CheckName CheckNameCStringNotNullTerm;
56  };
57 
58  CStringChecksFilter Filter;
59 
60  static void *getTag() { static int tag; return &tag; }
61 
62  bool evalCall(const CallExpr *CE, CheckerContext &C) const;
63  void checkPreStmt(const DeclStmt *DS, CheckerContext &C) const;
64  void checkLiveSymbols(ProgramStateRef state, SymbolReaper &SR) const;
65  void checkDeadSymbols(SymbolReaper &SR, CheckerContext &C) const;
66  bool wantsRegionChangeUpdate(ProgramStateRef state) const;
67 
69  checkRegionChanges(ProgramStateRef state,
70  const InvalidatedSymbols *,
71  ArrayRef<const MemRegion *> ExplicitRegions,
73  const CallEvent *Call) const;
74 
75  typedef void (CStringChecker::*FnCheck)(CheckerContext &,
76  const CallExpr *) const;
77 
78  void evalMemcpy(CheckerContext &C, const CallExpr *CE) const;
79  void evalMempcpy(CheckerContext &C, const CallExpr *CE) const;
80  void evalMemmove(CheckerContext &C, const CallExpr *CE) const;
81  void evalBcopy(CheckerContext &C, const CallExpr *CE) const;
82  void evalCopyCommon(CheckerContext &C, const CallExpr *CE,
83  ProgramStateRef state,
84  const Expr *Size,
85  const Expr *Source,
86  const Expr *Dest,
87  bool Restricted = false,
88  bool IsMempcpy = false) const;
89 
90  void evalMemcmp(CheckerContext &C, const CallExpr *CE) const;
91 
92  void evalstrLength(CheckerContext &C, const CallExpr *CE) const;
93  void evalstrnLength(CheckerContext &C, const CallExpr *CE) const;
94  void evalstrLengthCommon(CheckerContext &C,
95  const CallExpr *CE,
96  bool IsStrnlen = false) const;
97 
98  void evalStrcpy(CheckerContext &C, const CallExpr *CE) const;
99  void evalStrncpy(CheckerContext &C, const CallExpr *CE) const;
100  void evalStpcpy(CheckerContext &C, const CallExpr *CE) const;
101  void evalStrcpyCommon(CheckerContext &C,
102  const CallExpr *CE,
103  bool returnEnd,
104  bool isBounded,
105  bool isAppending) const;
106 
107  void evalStrcat(CheckerContext &C, const CallExpr *CE) const;
108  void evalStrncat(CheckerContext &C, const CallExpr *CE) const;
109 
110  void evalStrcmp(CheckerContext &C, const CallExpr *CE) const;
111  void evalStrncmp(CheckerContext &C, const CallExpr *CE) const;
112  void evalStrcasecmp(CheckerContext &C, const CallExpr *CE) const;
113  void evalStrncasecmp(CheckerContext &C, const CallExpr *CE) const;
114  void evalStrcmpCommon(CheckerContext &C,
115  const CallExpr *CE,
116  bool isBounded = false,
117  bool ignoreCase = false) const;
118 
119  void evalStrsep(CheckerContext &C, const CallExpr *CE) const;
120 
121  // Utility methods
122  std::pair<ProgramStateRef , ProgramStateRef >
123  static assumeZero(CheckerContext &C,
124  ProgramStateRef state, SVal V, QualType Ty);
125 
126  static ProgramStateRef setCStringLength(ProgramStateRef state,
127  const MemRegion *MR,
128  SVal strLength);
129  static SVal getCStringLengthForRegion(CheckerContext &C,
130  ProgramStateRef &state,
131  const Expr *Ex,
132  const MemRegion *MR,
133  bool hypothetical);
134  SVal getCStringLength(CheckerContext &C,
135  ProgramStateRef &state,
136  const Expr *Ex,
137  SVal Buf,
138  bool hypothetical = false) const;
139 
140  const StringLiteral *getCStringLiteral(CheckerContext &C,
141  ProgramStateRef &state,
142  const Expr *expr,
143  SVal val) const;
144 
145  static ProgramStateRef InvalidateBuffer(CheckerContext &C,
146  ProgramStateRef state,
147  const Expr *Ex, SVal V,
148  bool IsSourceBuffer,
149  const Expr *Size);
150 
151  static bool SummarizeRegion(raw_ostream &os, ASTContext &Ctx,
152  const MemRegion *MR);
153 
154  // Re-usable checks
155  ProgramStateRef checkNonNull(CheckerContext &C,
156  ProgramStateRef state,
157  const Expr *S,
158  SVal l) const;
159  ProgramStateRef CheckLocation(CheckerContext &C,
160  ProgramStateRef state,
161  const Expr *S,
162  SVal l,
163  const char *message = nullptr) const;
164  ProgramStateRef CheckBufferAccess(CheckerContext &C,
165  ProgramStateRef state,
166  const Expr *Size,
167  const Expr *FirstBuf,
168  const Expr *SecondBuf,
169  const char *firstMessage = nullptr,
170  const char *secondMessage = nullptr,
171  bool WarnAboutSize = false) const;
172 
173  ProgramStateRef CheckBufferAccess(CheckerContext &C,
174  ProgramStateRef state,
175  const Expr *Size,
176  const Expr *Buf,
177  const char *message = nullptr,
178  bool WarnAboutSize = false) const {
179  // This is a convenience override.
180  return CheckBufferAccess(C, state, Size, Buf, nullptr, message, nullptr,
181  WarnAboutSize);
182  }
183  ProgramStateRef CheckOverlap(CheckerContext &C,
184  ProgramStateRef state,
185  const Expr *Size,
186  const Expr *First,
187  const Expr *Second) const;
188  void emitOverlapBug(CheckerContext &C,
189  ProgramStateRef state,
190  const Stmt *First,
191  const Stmt *Second) const;
192 
193  ProgramStateRef checkAdditionOverflow(CheckerContext &C,
194  ProgramStateRef state,
195  NonLoc left,
196  NonLoc right) const;
197 
198  // Return true if the destination buffer of the copy function may be in bound.
199  // Expects SVal of Size to be positive and unsigned.
200  // Expects SVal of FirstBuf to be a FieldRegion.
201  static bool IsFirstBufInBound(CheckerContext &C,
202  ProgramStateRef state,
203  const Expr *FirstBuf,
204  const Expr *Size);
205 };
206 
207 } //end anonymous namespace
208 
209 REGISTER_MAP_WITH_PROGRAMSTATE(CStringLength, const MemRegion *, SVal)
210 
211 //===----------------------------------------------------------------------===//
212 // Individual checks and utility methods.
213 //===----------------------------------------------------------------------===//
214 
215 std::pair<ProgramStateRef , ProgramStateRef >
216 CStringChecker::assumeZero(CheckerContext &C, ProgramStateRef state, SVal V,
217  QualType Ty) {
218  Optional<DefinedSVal> val = V.getAs<DefinedSVal>();
219  if (!val)
220  return std::pair<ProgramStateRef , ProgramStateRef >(state, state);
221 
222  SValBuilder &svalBuilder = C.getSValBuilder();
223  DefinedOrUnknownSVal zero = svalBuilder.makeZeroVal(Ty);
224  return state->assume(svalBuilder.evalEQ(state, *val, zero));
225 }
226 
227 ProgramStateRef CStringChecker::checkNonNull(CheckerContext &C,
228  ProgramStateRef state,
229  const Expr *S, SVal l) const {
230  // If a previous check has failed, propagate the failure.
231  if (!state)
232  return nullptr;
233 
234  ProgramStateRef stateNull, stateNonNull;
235  std::tie(stateNull, stateNonNull) = assumeZero(C, state, l, S->getType());
236 
237  if (stateNull && !stateNonNull) {
238  if (!Filter.CheckCStringNullArg)
239  return nullptr;
240 
241  ExplodedNode *N = C.generateErrorNode(stateNull);
242  if (!N)
243  return nullptr;
244 
245  if (!BT_Null)
246  BT_Null.reset(new BuiltinBug(
247  Filter.CheckNameCStringNullArg, categories::UnixAPI,
248  "Null pointer argument in call to byte string function"));
249 
250  SmallString<80> buf;
251  llvm::raw_svector_ostream os(buf);
252  assert(CurrentFunctionDescription);
253  os << "Null pointer argument in call to " << CurrentFunctionDescription;
254 
255  // Generate a report for this bug.
256  BuiltinBug *BT = static_cast<BuiltinBug*>(BT_Null.get());
257  auto report = llvm::make_unique<BugReport>(*BT, os.str(), N);
258 
259  report->addRange(S->getSourceRange());
260  bugreporter::trackNullOrUndefValue(N, S, *report);
261  C.emitReport(std::move(report));
262  return nullptr;
263  }
264 
265  // From here on, assume that the value is non-null.
266  assert(stateNonNull);
267  return stateNonNull;
268 }
269 
270 // FIXME: This was originally copied from ArrayBoundChecker.cpp. Refactor?
271 ProgramStateRef CStringChecker::CheckLocation(CheckerContext &C,
272  ProgramStateRef state,
273  const Expr *S, SVal l,
274  const char *warningMsg) const {
275  // If a previous check has failed, propagate the failure.
276  if (!state)
277  return nullptr;
278 
279  // Check for out of bound array element access.
280  const MemRegion *R = l.getAsRegion();
281  if (!R)
282  return state;
283 
284  const ElementRegion *ER = dyn_cast<ElementRegion>(R);
285  if (!ER)
286  return state;
287 
288  assert(ER->getValueType() == C.getASTContext().CharTy &&
289  "CheckLocation should only be called with char* ElementRegions");
290 
291  // Get the size of the array.
292  const SubRegion *superReg = cast<SubRegion>(ER->getSuperRegion());
293  SValBuilder &svalBuilder = C.getSValBuilder();
294  SVal Extent =
295  svalBuilder.convertToArrayIndex(superReg->getExtent(svalBuilder));
297 
298  // Get the index of the accessed element.
300 
301  ProgramStateRef StInBound = state->assumeInBound(Idx, Size, true);
302  ProgramStateRef StOutBound = state->assumeInBound(Idx, Size, false);
303  if (StOutBound && !StInBound) {
304  ExplodedNode *N = C.generateErrorNode(StOutBound);
305  if (!N)
306  return nullptr;
307 
308  if (!BT_Bounds) {
309  BT_Bounds.reset(new BuiltinBug(
310  Filter.CheckNameCStringOutOfBounds, "Out-of-bound array access",
311  "Byte string function accesses out-of-bound array element"));
312  }
313  BuiltinBug *BT = static_cast<BuiltinBug*>(BT_Bounds.get());
314 
315  // Generate a report for this bug.
316  std::unique_ptr<BugReport> report;
317  if (warningMsg) {
318  report = llvm::make_unique<BugReport>(*BT, warningMsg, N);
319  } else {
320  assert(CurrentFunctionDescription);
321  assert(CurrentFunctionDescription[0] != '\0');
322 
323  SmallString<80> buf;
324  llvm::raw_svector_ostream os(buf);
325  os << toUppercase(CurrentFunctionDescription[0])
326  << &CurrentFunctionDescription[1]
327  << " accesses out-of-bound array element";
328  report = llvm::make_unique<BugReport>(*BT, os.str(), N);
329  }
330 
331  // FIXME: It would be nice to eventually make this diagnostic more clear,
332  // e.g., by referencing the original declaration or by saying *why* this
333  // reference is outside the range.
334 
335  report->addRange(S->getSourceRange());
336  C.emitReport(std::move(report));
337  return nullptr;
338  }
339 
340  // Array bound check succeeded. From this point forward the array bound
341  // should always succeed.
342  return StInBound;
343 }
344 
345 ProgramStateRef CStringChecker::CheckBufferAccess(CheckerContext &C,
346  ProgramStateRef state,
347  const Expr *Size,
348  const Expr *FirstBuf,
349  const Expr *SecondBuf,
350  const char *firstMessage,
351  const char *secondMessage,
352  bool WarnAboutSize) const {
353  // If a previous check has failed, propagate the failure.
354  if (!state)
355  return nullptr;
356 
357  SValBuilder &svalBuilder = C.getSValBuilder();
358  ASTContext &Ctx = svalBuilder.getContext();
359  const LocationContext *LCtx = C.getLocationContext();
360 
361  QualType sizeTy = Size->getType();
362  QualType PtrTy = Ctx.getPointerType(Ctx.CharTy);
363 
364  // Check that the first buffer is non-null.
365  SVal BufVal = state->getSVal(FirstBuf, LCtx);
366  state = checkNonNull(C, state, FirstBuf, BufVal);
367  if (!state)
368  return nullptr;
369 
370  // If out-of-bounds checking is turned off, skip the rest.
371  if (!Filter.CheckCStringOutOfBounds)
372  return state;
373 
374  // Get the access length and make sure it is known.
375  // FIXME: This assumes the caller has already checked that the access length
376  // is positive. And that it's unsigned.
377  SVal LengthVal = state->getSVal(Size, LCtx);
378  Optional<NonLoc> Length = LengthVal.getAs<NonLoc>();
379  if (!Length)
380  return state;
381 
382  // Compute the offset of the last element to be accessed: size-1.
383  NonLoc One = svalBuilder.makeIntVal(1, sizeTy).castAs<NonLoc>();
384  NonLoc LastOffset = svalBuilder
385  .evalBinOpNN(state, BO_Sub, *Length, One, sizeTy).castAs<NonLoc>();
386 
387  // Check that the first buffer is sufficiently long.
388  SVal BufStart = svalBuilder.evalCast(BufVal, PtrTy, FirstBuf->getType());
389  if (Optional<Loc> BufLoc = BufStart.getAs<Loc>()) {
390  const Expr *warningExpr = (WarnAboutSize ? Size : FirstBuf);
391 
392  SVal BufEnd = svalBuilder.evalBinOpLN(state, BO_Add, *BufLoc,
393  LastOffset, PtrTy);
394  state = CheckLocation(C, state, warningExpr, BufEnd, firstMessage);
395 
396  // If the buffer isn't large enough, abort.
397  if (!state)
398  return nullptr;
399  }
400 
401  // If there's a second buffer, check it as well.
402  if (SecondBuf) {
403  BufVal = state->getSVal(SecondBuf, LCtx);
404  state = checkNonNull(C, state, SecondBuf, BufVal);
405  if (!state)
406  return nullptr;
407 
408  BufStart = svalBuilder.evalCast(BufVal, PtrTy, SecondBuf->getType());
409  if (Optional<Loc> BufLoc = BufStart.getAs<Loc>()) {
410  const Expr *warningExpr = (WarnAboutSize ? Size : SecondBuf);
411 
412  SVal BufEnd = svalBuilder.evalBinOpLN(state, BO_Add, *BufLoc,
413  LastOffset, PtrTy);
414  state = CheckLocation(C, state, warningExpr, BufEnd, secondMessage);
415  }
416  }
417 
418  // Large enough or not, return this state!
419  return state;
420 }
421 
422 ProgramStateRef CStringChecker::CheckOverlap(CheckerContext &C,
423  ProgramStateRef state,
424  const Expr *Size,
425  const Expr *First,
426  const Expr *Second) const {
427  if (!Filter.CheckCStringBufferOverlap)
428  return state;
429 
430  // Do a simple check for overlap: if the two arguments are from the same
431  // buffer, see if the end of the first is greater than the start of the second
432  // or vice versa.
433 
434  // If a previous check has failed, propagate the failure.
435  if (!state)
436  return nullptr;
437 
438  ProgramStateRef stateTrue, stateFalse;
439 
440  // Get the buffer values and make sure they're known locations.
441  const LocationContext *LCtx = C.getLocationContext();
442  SVal firstVal = state->getSVal(First, LCtx);
443  SVal secondVal = state->getSVal(Second, LCtx);
444 
445  Optional<Loc> firstLoc = firstVal.getAs<Loc>();
446  if (!firstLoc)
447  return state;
448 
449  Optional<Loc> secondLoc = secondVal.getAs<Loc>();
450  if (!secondLoc)
451  return state;
452 
453  // Are the two values the same?
454  SValBuilder &svalBuilder = C.getSValBuilder();
455  std::tie(stateTrue, stateFalse) =
456  state->assume(svalBuilder.evalEQ(state, *firstLoc, *secondLoc));
457 
458  if (stateTrue && !stateFalse) {
459  // If the values are known to be equal, that's automatically an overlap.
460  emitOverlapBug(C, stateTrue, First, Second);
461  return nullptr;
462  }
463 
464  // assume the two expressions are not equal.
465  assert(stateFalse);
466  state = stateFalse;
467 
468  // Which value comes first?
469  QualType cmpTy = svalBuilder.getConditionType();
470  SVal reverse = svalBuilder.evalBinOpLL(state, BO_GT,
471  *firstLoc, *secondLoc, cmpTy);
472  Optional<DefinedOrUnknownSVal> reverseTest =
473  reverse.getAs<DefinedOrUnknownSVal>();
474  if (!reverseTest)
475  return state;
476 
477  std::tie(stateTrue, stateFalse) = state->assume(*reverseTest);
478  if (stateTrue) {
479  if (stateFalse) {
480  // If we don't know which one comes first, we can't perform this test.
481  return state;
482  } else {
483  // Switch the values so that firstVal is before secondVal.
484  std::swap(firstLoc, secondLoc);
485 
486  // Switch the Exprs as well, so that they still correspond.
487  std::swap(First, Second);
488  }
489  }
490 
491  // Get the length, and make sure it too is known.
492  SVal LengthVal = state->getSVal(Size, LCtx);
493  Optional<NonLoc> Length = LengthVal.getAs<NonLoc>();
494  if (!Length)
495  return state;
496 
497  // Convert the first buffer's start address to char*.
498  // Bail out if the cast fails.
499  ASTContext &Ctx = svalBuilder.getContext();
500  QualType CharPtrTy = Ctx.getPointerType(Ctx.CharTy);
501  SVal FirstStart = svalBuilder.evalCast(*firstLoc, CharPtrTy,
502  First->getType());
503  Optional<Loc> FirstStartLoc = FirstStart.getAs<Loc>();
504  if (!FirstStartLoc)
505  return state;
506 
507  // Compute the end of the first buffer. Bail out if THAT fails.
508  SVal FirstEnd = svalBuilder.evalBinOpLN(state, BO_Add,
509  *FirstStartLoc, *Length, CharPtrTy);
510  Optional<Loc> FirstEndLoc = FirstEnd.getAs<Loc>();
511  if (!FirstEndLoc)
512  return state;
513 
514  // Is the end of the first buffer past the start of the second buffer?
515  SVal Overlap = svalBuilder.evalBinOpLL(state, BO_GT,
516  *FirstEndLoc, *secondLoc, cmpTy);
517  Optional<DefinedOrUnknownSVal> OverlapTest =
518  Overlap.getAs<DefinedOrUnknownSVal>();
519  if (!OverlapTest)
520  return state;
521 
522  std::tie(stateTrue, stateFalse) = state->assume(*OverlapTest);
523 
524  if (stateTrue && !stateFalse) {
525  // Overlap!
526  emitOverlapBug(C, stateTrue, First, Second);
527  return nullptr;
528  }
529 
530  // assume the two expressions don't overlap.
531  assert(stateFalse);
532  return stateFalse;
533 }
534 
535 void CStringChecker::emitOverlapBug(CheckerContext &C, ProgramStateRef state,
536  const Stmt *First, const Stmt *Second) const {
537  ExplodedNode *N = C.generateErrorNode(state);
538  if (!N)
539  return;
540 
541  if (!BT_Overlap)
542  BT_Overlap.reset(new BugType(Filter.CheckNameCStringBufferOverlap,
543  categories::UnixAPI, "Improper arguments"));
544 
545  // Generate a report for this bug.
546  auto report = llvm::make_unique<BugReport>(
547  *BT_Overlap, "Arguments must not be overlapping buffers", N);
548  report->addRange(First->getSourceRange());
549  report->addRange(Second->getSourceRange());
550 
551  C.emitReport(std::move(report));
552 }
553 
554 ProgramStateRef CStringChecker::checkAdditionOverflow(CheckerContext &C,
555  ProgramStateRef state,
556  NonLoc left,
557  NonLoc right) const {
558  // If out-of-bounds checking is turned off, skip the rest.
559  if (!Filter.CheckCStringOutOfBounds)
560  return state;
561 
562  // If a previous check has failed, propagate the failure.
563  if (!state)
564  return nullptr;
565 
566  SValBuilder &svalBuilder = C.getSValBuilder();
567  BasicValueFactory &BVF = svalBuilder.getBasicValueFactory();
568 
569  QualType sizeTy = svalBuilder.getContext().getSizeType();
570  const llvm::APSInt &maxValInt = BVF.getMaxValue(sizeTy);
571  NonLoc maxVal = svalBuilder.makeIntVal(maxValInt);
572 
573  SVal maxMinusRight;
574  if (right.getAs<nonloc::ConcreteInt>()) {
575  maxMinusRight = svalBuilder.evalBinOpNN(state, BO_Sub, maxVal, right,
576  sizeTy);
577  } else {
578  // Try switching the operands. (The order of these two assignments is
579  // important!)
580  maxMinusRight = svalBuilder.evalBinOpNN(state, BO_Sub, maxVal, left,
581  sizeTy);
582  left = right;
583  }
584 
585  if (Optional<NonLoc> maxMinusRightNL = maxMinusRight.getAs<NonLoc>()) {
586  QualType cmpTy = svalBuilder.getConditionType();
587  // If left > max - right, we have an overflow.
588  SVal willOverflow = svalBuilder.evalBinOpNN(state, BO_GT, left,
589  *maxMinusRightNL, cmpTy);
590 
591  ProgramStateRef stateOverflow, stateOkay;
592  std::tie(stateOverflow, stateOkay) =
593  state->assume(willOverflow.castAs<DefinedOrUnknownSVal>());
594 
595  if (stateOverflow && !stateOkay) {
596  // We have an overflow. Emit a bug report.
597  ExplodedNode *N = C.generateErrorNode(stateOverflow);
598  if (!N)
599  return nullptr;
600 
601  if (!BT_AdditionOverflow)
602  BT_AdditionOverflow.reset(
603  new BuiltinBug(Filter.CheckNameCStringOutOfBounds, "API",
604  "Sum of expressions causes overflow"));
605 
606  // This isn't a great error message, but this should never occur in real
607  // code anyway -- you'd have to create a buffer longer than a size_t can
608  // represent, which is sort of a contradiction.
609  const char *warning =
610  "This expression will create a string whose length is too big to "
611  "be represented as a size_t";
612 
613  // Generate a report for this bug.
614  C.emitReport(
615  llvm::make_unique<BugReport>(*BT_AdditionOverflow, warning, N));
616 
617  return nullptr;
618  }
619 
620  // From now on, assume an overflow didn't occur.
621  assert(stateOkay);
622  state = stateOkay;
623  }
624 
625  return state;
626 }
627 
628 ProgramStateRef CStringChecker::setCStringLength(ProgramStateRef state,
629  const MemRegion *MR,
630  SVal strLength) {
631  assert(!strLength.isUndef() && "Attempt to set an undefined string length");
632 
633  MR = MR->StripCasts();
634 
635  switch (MR->getKind()) {
637  // FIXME: This can happen if we strcpy() into a string region. This is
638  // undefined [C99 6.4.5p6], but we should still warn about it.
639  return state;
640 
646  // These are the types we can currently track string lengths for.
647  break;
648 
650  // FIXME: Handle element regions by upper-bounding the parent region's
651  // string length.
652  return state;
653 
654  default:
655  // Other regions (mostly non-data) can't have a reliable C string length.
656  // For now, just ignore the change.
657  // FIXME: These are rare but not impossible. We should output some kind of
658  // warning for things like strcpy((char[]){'a', 0}, "b");
659  return state;
660  }
661 
662  if (strLength.isUnknown())
663  return state->remove<CStringLength>(MR);
664 
665  return state->set<CStringLength>(MR, strLength);
666 }
667 
668 SVal CStringChecker::getCStringLengthForRegion(CheckerContext &C,
669  ProgramStateRef &state,
670  const Expr *Ex,
671  const MemRegion *MR,
672  bool hypothetical) {
673  if (!hypothetical) {
674  // If there's a recorded length, go ahead and return it.
675  const SVal *Recorded = state->get<CStringLength>(MR);
676  if (Recorded)
677  return *Recorded;
678  }
679 
680  // Otherwise, get a new symbol and update the state.
681  SValBuilder &svalBuilder = C.getSValBuilder();
682  QualType sizeTy = svalBuilder.getContext().getSizeType();
683  SVal strLength = svalBuilder.getMetadataSymbolVal(CStringChecker::getTag(),
684  MR, Ex, sizeTy,
685  C.blockCount());
686 
687  if (!hypothetical) {
688  if (Optional<NonLoc> strLn = strLength.getAs<NonLoc>()) {
689  // In case of unbounded calls strlen etc bound the range to SIZE_MAX/4
690  BasicValueFactory &BVF = svalBuilder.getBasicValueFactory();
691  const llvm::APSInt &maxValInt = BVF.getMaxValue(sizeTy);
692  llvm::APSInt fourInt = APSIntType(maxValInt).getValue(4);
693  const llvm::APSInt *maxLengthInt = BVF.evalAPSInt(BO_Div, maxValInt,
694  fourInt);
695  NonLoc maxLength = svalBuilder.makeIntVal(*maxLengthInt);
696  SVal evalLength = svalBuilder.evalBinOpNN(state, BO_LE, *strLn,
697  maxLength, sizeTy);
698  state = state->assume(evalLength.castAs<DefinedOrUnknownSVal>(), true);
699  }
700  state = state->set<CStringLength>(MR, strLength);
701  }
702 
703  return strLength;
704 }
705 
706 SVal CStringChecker::getCStringLength(CheckerContext &C, ProgramStateRef &state,
707  const Expr *Ex, SVal Buf,
708  bool hypothetical) const {
709  const MemRegion *MR = Buf.getAsRegion();
710  if (!MR) {
711  // If we can't get a region, see if it's something we /know/ isn't a
712  // C string. In the context of locations, the only time we can issue such
713  // a warning is for labels.
714  if (Optional<loc::GotoLabel> Label = Buf.getAs<loc::GotoLabel>()) {
715  if (!Filter.CheckCStringNotNullTerm)
716  return UndefinedVal();
717 
718  if (ExplodedNode *N = C.generateNonFatalErrorNode(state)) {
719  if (!BT_NotCString)
720  BT_NotCString.reset(new BuiltinBug(
721  Filter.CheckNameCStringNotNullTerm, categories::UnixAPI,
722  "Argument is not a null-terminated string."));
723 
724  SmallString<120> buf;
725  llvm::raw_svector_ostream os(buf);
726  assert(CurrentFunctionDescription);
727  os << "Argument to " << CurrentFunctionDescription
728  << " is the address of the label '" << Label->getLabel()->getName()
729  << "', which is not a null-terminated string";
730 
731  // Generate a report for this bug.
732  auto report = llvm::make_unique<BugReport>(*BT_NotCString, os.str(), N);
733 
734  report->addRange(Ex->getSourceRange());
735  C.emitReport(std::move(report));
736  }
737  return UndefinedVal();
738 
739  }
740 
741  // If it's not a region and not a label, give up.
742  return UnknownVal();
743  }
744 
745  // If we have a region, strip casts from it and see if we can figure out
746  // its length. For anything we can't figure out, just return UnknownVal.
747  MR = MR->StripCasts();
748 
749  switch (MR->getKind()) {
751  // Modifying the contents of string regions is undefined [C99 6.4.5p6],
752  // so we can assume that the byte length is the correct C string length.
753  SValBuilder &svalBuilder = C.getSValBuilder();
754  QualType sizeTy = svalBuilder.getContext().getSizeType();
755  const StringLiteral *strLit = cast<StringRegion>(MR)->getStringLiteral();
756  return svalBuilder.makeIntVal(strLit->getByteLength(), sizeTy);
757  }
763  return getCStringLengthForRegion(C, state, Ex, MR, hypothetical);
765  // FIXME: Can we track this? Is it necessary?
766  return UnknownVal();
768  // FIXME: How can we handle this? It's not good enough to subtract the
769  // offset from the base string length; consider "123\x00567" and &a[5].
770  return UnknownVal();
771  default:
772  // Other regions (mostly non-data) can't have a reliable C string length.
773  // In this case, an error is emitted and UndefinedVal is returned.
774  // The caller should always be prepared to handle this case.
775  if (!Filter.CheckCStringNotNullTerm)
776  return UndefinedVal();
777 
778  if (ExplodedNode *N = C.generateNonFatalErrorNode(state)) {
779  if (!BT_NotCString)
780  BT_NotCString.reset(new BuiltinBug(
781  Filter.CheckNameCStringNotNullTerm, categories::UnixAPI,
782  "Argument is not a null-terminated string."));
783 
784  SmallString<120> buf;
785  llvm::raw_svector_ostream os(buf);
786 
787  assert(CurrentFunctionDescription);
788  os << "Argument to " << CurrentFunctionDescription << " is ";
789 
790  if (SummarizeRegion(os, C.getASTContext(), MR))
791  os << ", which is not a null-terminated string";
792  else
793  os << "not a null-terminated string";
794 
795  // Generate a report for this bug.
796  auto report = llvm::make_unique<BugReport>(*BT_NotCString, os.str(), N);
797 
798  report->addRange(Ex->getSourceRange());
799  C.emitReport(std::move(report));
800  }
801 
802  return UndefinedVal();
803  }
804 }
805 
806 const StringLiteral *CStringChecker::getCStringLiteral(CheckerContext &C,
807  ProgramStateRef &state, const Expr *expr, SVal val) const {
808 
809  // Get the memory region pointed to by the val.
810  const MemRegion *bufRegion = val.getAsRegion();
811  if (!bufRegion)
812  return nullptr;
813 
814  // Strip casts off the memory region.
815  bufRegion = bufRegion->StripCasts();
816 
817  // Cast the memory region to a string region.
818  const StringRegion *strRegion= dyn_cast<StringRegion>(bufRegion);
819  if (!strRegion)
820  return nullptr;
821 
822  // Return the actual string in the string region.
823  return strRegion->getStringLiteral();
824 }
825 
826 bool CStringChecker::IsFirstBufInBound(CheckerContext &C,
827  ProgramStateRef state,
828  const Expr *FirstBuf,
829  const Expr *Size) {
830  // If we do not know that the buffer is long enough we return 'true'.
831  // Otherwise the parent region of this field region would also get
832  // invalidated, which would lead to warnings based on an unknown state.
833 
834  // Originally copied from CheckBufferAccess and CheckLocation.
835  SValBuilder &svalBuilder = C.getSValBuilder();
836  ASTContext &Ctx = svalBuilder.getContext();
837  const LocationContext *LCtx = C.getLocationContext();
838 
839  QualType sizeTy = Size->getType();
840  QualType PtrTy = Ctx.getPointerType(Ctx.CharTy);
841  SVal BufVal = state->getSVal(FirstBuf, LCtx);
842 
843  SVal LengthVal = state->getSVal(Size, LCtx);
844  Optional<NonLoc> Length = LengthVal.getAs<NonLoc>();
845  if (!Length)
846  return true; // cf top comment.
847 
848  // Compute the offset of the last element to be accessed: size-1.
849  NonLoc One = svalBuilder.makeIntVal(1, sizeTy).castAs<NonLoc>();
850  NonLoc LastOffset =
851  svalBuilder.evalBinOpNN(state, BO_Sub, *Length, One, sizeTy)
852  .castAs<NonLoc>();
853 
854  // Check that the first buffer is sufficiently long.
855  SVal BufStart = svalBuilder.evalCast(BufVal, PtrTy, FirstBuf->getType());
856  Optional<Loc> BufLoc = BufStart.getAs<Loc>();
857  if (!BufLoc)
858  return true; // cf top comment.
859 
860  SVal BufEnd =
861  svalBuilder.evalBinOpLN(state, BO_Add, *BufLoc, LastOffset, PtrTy);
862 
863  // Check for out of bound array element access.
864  const MemRegion *R = BufEnd.getAsRegion();
865  if (!R)
866  return true; // cf top comment.
867 
868  const ElementRegion *ER = dyn_cast<ElementRegion>(R);
869  if (!ER)
870  return true; // cf top comment.
871 
872  assert(ER->getValueType() == C.getASTContext().CharTy &&
873  "IsFirstBufInBound should only be called with char* ElementRegions");
874 
875  // Get the size of the array.
876  const SubRegion *superReg = cast<SubRegion>(ER->getSuperRegion());
877  SVal Extent =
878  svalBuilder.convertToArrayIndex(superReg->getExtent(svalBuilder));
879  DefinedOrUnknownSVal ExtentSize = Extent.castAs<DefinedOrUnknownSVal>();
880 
881  // Get the index of the accessed element.
883 
884  ProgramStateRef StInBound = state->assumeInBound(Idx, ExtentSize, true);
885 
886  return static_cast<bool>(StInBound);
887 }
888 
889 ProgramStateRef CStringChecker::InvalidateBuffer(CheckerContext &C,
890  ProgramStateRef state,
891  const Expr *E, SVal V,
892  bool IsSourceBuffer,
893  const Expr *Size) {
894  Optional<Loc> L = V.getAs<Loc>();
895  if (!L)
896  return state;
897 
898  // FIXME: This is a simplified version of what's in CFRefCount.cpp -- it makes
899  // some assumptions about the value that CFRefCount can't. Even so, it should
900  // probably be refactored.
901  if (Optional<loc::MemRegionVal> MR = L->getAs<loc::MemRegionVal>()) {
902  const MemRegion *R = MR->getRegion()->StripCasts();
903 
904  // Are we dealing with an ElementRegion? If so, we should be invalidating
905  // the super-region.
906  if (const ElementRegion *ER = dyn_cast<ElementRegion>(R)) {
907  R = ER->getSuperRegion();
908  // FIXME: What about layers of ElementRegions?
909  }
910 
911  // Invalidate this region.
913 
914  bool CausesPointerEscape = false;
916  // Invalidate and escape only indirect regions accessible through the source
917  // buffer.
918  if (IsSourceBuffer) {
919  ITraits.setTrait(R,
922  CausesPointerEscape = true;
923  } else {
924  const MemRegion::Kind& K = R->getKind();
926  if (Size && IsFirstBufInBound(C, state, E, Size)) {
927  // If destination buffer is a field region and access is in bound,
928  // do not invalidate its super region.
929  ITraits.setTrait(
930  R,
932  }
933  }
934 
935  return state->invalidateRegions(R, E, C.blockCount(), LCtx,
936  CausesPointerEscape, nullptr, nullptr,
937  &ITraits);
938  }
939 
940  // If we have a non-region value by chance, just remove the binding.
941  // FIXME: is this necessary or correct? This handles the non-Region
942  // cases. Is it ever valid to store to these?
943  return state->killBinding(*L);
944 }
945 
946 bool CStringChecker::SummarizeRegion(raw_ostream &os, ASTContext &Ctx,
947  const MemRegion *MR) {
948  const TypedValueRegion *TVR = dyn_cast<TypedValueRegion>(MR);
949 
950  switch (MR->getKind()) {
952  const NamedDecl *FD = cast<FunctionCodeRegion>(MR)->getDecl();
953  if (FD)
954  os << "the address of the function '" << *FD << '\'';
955  else
956  os << "the address of a function";
957  return true;
958  }
960  os << "block text";
961  return true;
963  os << "a block";
964  return true;
967  os << "a C++ temp object of type " << TVR->getValueType().getAsString();
968  return true;
970  os << "a variable of type" << TVR->getValueType().getAsString();
971  return true;
973  os << "a field of type " << TVR->getValueType().getAsString();
974  return true;
976  os << "an instance variable of type " << TVR->getValueType().getAsString();
977  return true;
978  default:
979  return false;
980  }
981 }
982 
983 //===----------------------------------------------------------------------===//
984 // evaluation of individual function calls.
985 //===----------------------------------------------------------------------===//
986 
987 void CStringChecker::evalCopyCommon(CheckerContext &C,
988  const CallExpr *CE,
989  ProgramStateRef state,
990  const Expr *Size, const Expr *Dest,
991  const Expr *Source, bool Restricted,
992  bool IsMempcpy) const {
993  CurrentFunctionDescription = "memory copy function";
994 
995  // See if the size argument is zero.
996  const LocationContext *LCtx = C.getLocationContext();
997  SVal sizeVal = state->getSVal(Size, LCtx);
998  QualType sizeTy = Size->getType();
999 
1000  ProgramStateRef stateZeroSize, stateNonZeroSize;
1001  std::tie(stateZeroSize, stateNonZeroSize) =
1002  assumeZero(C, state, sizeVal, sizeTy);
1003 
1004  // Get the value of the Dest.
1005  SVal destVal = state->getSVal(Dest, LCtx);
1006 
1007  // If the size is zero, there won't be any actual memory access, so
1008  // just bind the return value to the destination buffer and return.
1009  if (stateZeroSize && !stateNonZeroSize) {
1010  stateZeroSize = stateZeroSize->BindExpr(CE, LCtx, destVal);
1011  C.addTransition(stateZeroSize);
1012  return;
1013  }
1014 
1015  // If the size can be nonzero, we have to check the other arguments.
1016  if (stateNonZeroSize) {
1017  state = stateNonZeroSize;
1018 
1019  // Ensure the destination is not null. If it is NULL there will be a
1020  // NULL pointer dereference.
1021  state = checkNonNull(C, state, Dest, destVal);
1022  if (!state)
1023  return;
1024 
1025  // Get the value of the Src.
1026  SVal srcVal = state->getSVal(Source, LCtx);
1027 
1028  // Ensure the source is not null. If it is NULL there will be a
1029  // NULL pointer dereference.
1030  state = checkNonNull(C, state, Source, srcVal);
1031  if (!state)
1032  return;
1033 
1034  // Ensure the accesses are valid and that the buffers do not overlap.
1035  const char * const writeWarning =
1036  "Memory copy function overflows destination buffer";
1037  state = CheckBufferAccess(C, state, Size, Dest, Source,
1038  writeWarning, /* sourceWarning = */ nullptr);
1039  if (Restricted)
1040  state = CheckOverlap(C, state, Size, Dest, Source);
1041 
1042  if (!state)
1043  return;
1044 
1045  // If this is mempcpy, get the byte after the last byte copied and
1046  // bind the expr.
1047  if (IsMempcpy) {
1048  loc::MemRegionVal destRegVal = destVal.castAs<loc::MemRegionVal>();
1049 
1050  // Get the length to copy.
1051  if (Optional<NonLoc> lenValNonLoc = sizeVal.getAs<NonLoc>()) {
1052  // Get the byte after the last byte copied.
1053  SValBuilder &SvalBuilder = C.getSValBuilder();
1054  ASTContext &Ctx = SvalBuilder.getContext();
1055  QualType CharPtrTy = Ctx.getPointerType(Ctx.CharTy);
1056  loc::MemRegionVal DestRegCharVal = SvalBuilder.evalCast(destRegVal,
1057  CharPtrTy, Dest->getType()).castAs<loc::MemRegionVal>();
1058  SVal lastElement = C.getSValBuilder().evalBinOpLN(state, BO_Add,
1059  DestRegCharVal,
1060  *lenValNonLoc,
1061  Dest->getType());
1062 
1063  // The byte after the last byte copied is the return value.
1064  state = state->BindExpr(CE, LCtx, lastElement);
1065  } else {
1066  // If we don't know how much we copied, we can at least
1067  // conjure a return value for later.
1068  SVal result = C.getSValBuilder().conjureSymbolVal(nullptr, CE, LCtx,
1069  C.blockCount());
1070  state = state->BindExpr(CE, LCtx, result);
1071  }
1072 
1073  } else {
1074  // All other copies return the destination buffer.
1075  // (Well, bcopy() has a void return type, but this won't hurt.)
1076  state = state->BindExpr(CE, LCtx, destVal);
1077  }
1078 
1079  // Invalidate the destination (regular invalidation without pointer-escaping
1080  // the address of the top-level region).
1081  // FIXME: Even if we can't perfectly model the copy, we should see if we
1082  // can use LazyCompoundVals to copy the source values into the destination.
1083  // This would probably remove any existing bindings past the end of the
1084  // copied region, but that's still an improvement over blank invalidation.
1085  state = InvalidateBuffer(C, state, Dest, C.getSVal(Dest),
1086  /*IsSourceBuffer*/false, Size);
1087 
1088  // Invalidate the source (const-invalidation without const-pointer-escaping
1089  // the address of the top-level region).
1090  state = InvalidateBuffer(C, state, Source, C.getSVal(Source),
1091  /*IsSourceBuffer*/true, nullptr);
1092 
1093  C.addTransition(state);
1094  }
1095 }
1096 
1097 
1098 void CStringChecker::evalMemcpy(CheckerContext &C, const CallExpr *CE) const {
1099  if (CE->getNumArgs() < 3)
1100  return;
1101 
1102  // void *memcpy(void *restrict dst, const void *restrict src, size_t n);
1103  // The return value is the address of the destination buffer.
1104  const Expr *Dest = CE->getArg(0);
1105  ProgramStateRef state = C.getState();
1106 
1107  evalCopyCommon(C, CE, state, CE->getArg(2), Dest, CE->getArg(1), true);
1108 }
1109 
1110 void CStringChecker::evalMempcpy(CheckerContext &C, const CallExpr *CE) const {
1111  if (CE->getNumArgs() < 3)
1112  return;
1113 
1114  // void *mempcpy(void *restrict dst, const void *restrict src, size_t n);
1115  // The return value is a pointer to the byte following the last written byte.
1116  const Expr *Dest = CE->getArg(0);
1117  ProgramStateRef state = C.getState();
1118 
1119  evalCopyCommon(C, CE, state, CE->getArg(2), Dest, CE->getArg(1), true, true);
1120 }
1121 
1122 void CStringChecker::evalMemmove(CheckerContext &C, const CallExpr *CE) const {
1123  if (CE->getNumArgs() < 3)
1124  return;
1125 
1126  // void *memmove(void *dst, const void *src, size_t n);
1127  // The return value is the address of the destination buffer.
1128  const Expr *Dest = CE->getArg(0);
1129  ProgramStateRef state = C.getState();
1130 
1131  evalCopyCommon(C, CE, state, CE->getArg(2), Dest, CE->getArg(1));
1132 }
1133 
1134 void CStringChecker::evalBcopy(CheckerContext &C, const CallExpr *CE) const {
1135  if (CE->getNumArgs() < 3)
1136  return;
1137 
1138  // void bcopy(const void *src, void *dst, size_t n);
1139  evalCopyCommon(C, CE, C.getState(),
1140  CE->getArg(2), CE->getArg(1), CE->getArg(0));
1141 }
1142 
1143 void CStringChecker::evalMemcmp(CheckerContext &C, const CallExpr *CE) const {
1144  if (CE->getNumArgs() < 3)
1145  return;
1146 
1147  // int memcmp(const void *s1, const void *s2, size_t n);
1148  CurrentFunctionDescription = "memory comparison function";
1149 
1150  const Expr *Left = CE->getArg(0);
1151  const Expr *Right = CE->getArg(1);
1152  const Expr *Size = CE->getArg(2);
1153 
1154  ProgramStateRef state = C.getState();
1155  SValBuilder &svalBuilder = C.getSValBuilder();
1156 
1157  // See if the size argument is zero.
1158  const LocationContext *LCtx = C.getLocationContext();
1159  SVal sizeVal = state->getSVal(Size, LCtx);
1160  QualType sizeTy = Size->getType();
1161 
1162  ProgramStateRef stateZeroSize, stateNonZeroSize;
1163  std::tie(stateZeroSize, stateNonZeroSize) =
1164  assumeZero(C, state, sizeVal, sizeTy);
1165 
1166  // If the size can be zero, the result will be 0 in that case, and we don't
1167  // have to check either of the buffers.
1168  if (stateZeroSize) {
1169  state = stateZeroSize;
1170  state = state->BindExpr(CE, LCtx,
1171  svalBuilder.makeZeroVal(CE->getType()));
1172  C.addTransition(state);
1173  }
1174 
1175  // If the size can be nonzero, we have to check the other arguments.
1176  if (stateNonZeroSize) {
1177  state = stateNonZeroSize;
1178  // If we know the two buffers are the same, we know the result is 0.
1179  // First, get the two buffers' addresses. Another checker will have already
1180  // made sure they're not undefined.
1182  state->getSVal(Left, LCtx).castAs<DefinedOrUnknownSVal>();
1184  state->getSVal(Right, LCtx).castAs<DefinedOrUnknownSVal>();
1185 
1186  // See if they are the same.
1187  DefinedOrUnknownSVal SameBuf = svalBuilder.evalEQ(state, LV, RV);
1188  ProgramStateRef StSameBuf, StNotSameBuf;
1189  std::tie(StSameBuf, StNotSameBuf) = state->assume(SameBuf);
1190 
1191  // If the two arguments might be the same buffer, we know the result is 0,
1192  // and we only need to check one size.
1193  if (StSameBuf) {
1194  state = StSameBuf;
1195  state = CheckBufferAccess(C, state, Size, Left);
1196  if (state) {
1197  state = StSameBuf->BindExpr(CE, LCtx,
1198  svalBuilder.makeZeroVal(CE->getType()));
1199  C.addTransition(state);
1200  }
1201  }
1202 
1203  // If the two arguments might be different buffers, we have to check the
1204  // size of both of them.
1205  if (StNotSameBuf) {
1206  state = StNotSameBuf;
1207  state = CheckBufferAccess(C, state, Size, Left, Right);
1208  if (state) {
1209  // The return value is the comparison result, which we don't know.
1210  SVal CmpV = svalBuilder.conjureSymbolVal(nullptr, CE, LCtx,
1211  C.blockCount());
1212  state = state->BindExpr(CE, LCtx, CmpV);
1213  C.addTransition(state);
1214  }
1215  }
1216  }
1217 }
1218 
1219 void CStringChecker::evalstrLength(CheckerContext &C,
1220  const CallExpr *CE) const {
1221  if (CE->getNumArgs() < 1)
1222  return;
1223 
1224  // size_t strlen(const char *s);
1225  evalstrLengthCommon(C, CE, /* IsStrnlen = */ false);
1226 }
1227 
1228 void CStringChecker::evalstrnLength(CheckerContext &C,
1229  const CallExpr *CE) const {
1230  if (CE->getNumArgs() < 2)
1231  return;
1232 
1233  // size_t strnlen(const char *s, size_t maxlen);
1234  evalstrLengthCommon(C, CE, /* IsStrnlen = */ true);
1235 }
1236 
1237 void CStringChecker::evalstrLengthCommon(CheckerContext &C, const CallExpr *CE,
1238  bool IsStrnlen) const {
1239  CurrentFunctionDescription = "string length function";
1240  ProgramStateRef state = C.getState();
1241  const LocationContext *LCtx = C.getLocationContext();
1242 
1243  if (IsStrnlen) {
1244  const Expr *maxlenExpr = CE->getArg(1);
1245  SVal maxlenVal = state->getSVal(maxlenExpr, LCtx);
1246 
1247  ProgramStateRef stateZeroSize, stateNonZeroSize;
1248  std::tie(stateZeroSize, stateNonZeroSize) =
1249  assumeZero(C, state, maxlenVal, maxlenExpr->getType());
1250 
1251  // If the size can be zero, the result will be 0 in that case, and we don't
1252  // have to check the string itself.
1253  if (stateZeroSize) {
1254  SVal zero = C.getSValBuilder().makeZeroVal(CE->getType());
1255  stateZeroSize = stateZeroSize->BindExpr(CE, LCtx, zero);
1256  C.addTransition(stateZeroSize);
1257  }
1258 
1259  // If the size is GUARANTEED to be zero, we're done!
1260  if (!stateNonZeroSize)
1261  return;
1262 
1263  // Otherwise, record the assumption that the size is nonzero.
1264  state = stateNonZeroSize;
1265  }
1266 
1267  // Check that the string argument is non-null.
1268  const Expr *Arg = CE->getArg(0);
1269  SVal ArgVal = state->getSVal(Arg, LCtx);
1270 
1271  state = checkNonNull(C, state, Arg, ArgVal);
1272 
1273  if (!state)
1274  return;
1275 
1276  SVal strLength = getCStringLength(C, state, Arg, ArgVal);
1277 
1278  // If the argument isn't a valid C string, there's no valid state to
1279  // transition to.
1280  if (strLength.isUndef())
1281  return;
1282 
1283  DefinedOrUnknownSVal result = UnknownVal();
1284 
1285  // If the check is for strnlen() then bind the return value to no more than
1286  // the maxlen value.
1287  if (IsStrnlen) {
1289 
1290  // It's a little unfortunate to be getting this again,
1291  // but it's not that expensive...
1292  const Expr *maxlenExpr = CE->getArg(1);
1293  SVal maxlenVal = state->getSVal(maxlenExpr, LCtx);
1294 
1295  Optional<NonLoc> strLengthNL = strLength.getAs<NonLoc>();
1296  Optional<NonLoc> maxlenValNL = maxlenVal.getAs<NonLoc>();
1297 
1298  if (strLengthNL && maxlenValNL) {
1299  ProgramStateRef stateStringTooLong, stateStringNotTooLong;
1300 
1301  // Check if the strLength is greater than the maxlen.
1302  std::tie(stateStringTooLong, stateStringNotTooLong) = state->assume(
1303  C.getSValBuilder()
1304  .evalBinOpNN(state, BO_GT, *strLengthNL, *maxlenValNL, cmpTy)
1306 
1307  if (stateStringTooLong && !stateStringNotTooLong) {
1308  // If the string is longer than maxlen, return maxlen.
1309  result = *maxlenValNL;
1310  } else if (stateStringNotTooLong && !stateStringTooLong) {
1311  // If the string is shorter than maxlen, return its length.
1312  result = *strLengthNL;
1313  }
1314  }
1315 
1316  if (result.isUnknown()) {
1317  // If we don't have enough information for a comparison, there's
1318  // no guarantee the full string length will actually be returned.
1319  // All we know is the return value is the min of the string length
1320  // and the limit. This is better than nothing.
1321  result = C.getSValBuilder().conjureSymbolVal(nullptr, CE, LCtx,
1322  C.blockCount());
1323  NonLoc resultNL = result.castAs<NonLoc>();
1324 
1325  if (strLengthNL) {
1326  state = state->assume(C.getSValBuilder().evalBinOpNN(
1327  state, BO_LE, resultNL, *strLengthNL, cmpTy)
1328  .castAs<DefinedOrUnknownSVal>(), true);
1329  }
1330 
1331  if (maxlenValNL) {
1332  state = state->assume(C.getSValBuilder().evalBinOpNN(
1333  state, BO_LE, resultNL, *maxlenValNL, cmpTy)
1334  .castAs<DefinedOrUnknownSVal>(), true);
1335  }
1336  }
1337 
1338  } else {
1339  // This is a plain strlen(), not strnlen().
1340  result = strLength.castAs<DefinedOrUnknownSVal>();
1341 
1342  // If we don't know the length of the string, conjure a return
1343  // value, so it can be used in constraints, at least.
1344  if (result.isUnknown()) {
1345  result = C.getSValBuilder().conjureSymbolVal(nullptr, CE, LCtx,
1346  C.blockCount());
1347  }
1348  }
1349 
1350  // Bind the return value.
1351  assert(!result.isUnknown() && "Should have conjured a value by now");
1352  state = state->BindExpr(CE, LCtx, result);
1353  C.addTransition(state);
1354 }
1355 
1356 void CStringChecker::evalStrcpy(CheckerContext &C, const CallExpr *CE) const {
1357  if (CE->getNumArgs() < 2)
1358  return;
1359 
1360  // char *strcpy(char *restrict dst, const char *restrict src);
1361  evalStrcpyCommon(C, CE,
1362  /* returnEnd = */ false,
1363  /* isBounded = */ false,
1364  /* isAppending = */ false);
1365 }
1366 
1367 void CStringChecker::evalStrncpy(CheckerContext &C, const CallExpr *CE) const {
1368  if (CE->getNumArgs() < 3)
1369  return;
1370 
1371  // char *strncpy(char *restrict dst, const char *restrict src, size_t n);
1372  evalStrcpyCommon(C, CE,
1373  /* returnEnd = */ false,
1374  /* isBounded = */ true,
1375  /* isAppending = */ false);
1376 }
1377 
1378 void CStringChecker::evalStpcpy(CheckerContext &C, const CallExpr *CE) const {
1379  if (CE->getNumArgs() < 2)
1380  return;
1381 
1382  // char *stpcpy(char *restrict dst, const char *restrict src);
1383  evalStrcpyCommon(C, CE,
1384  /* returnEnd = */ true,
1385  /* isBounded = */ false,
1386  /* isAppending = */ false);
1387 }
1388 
1389 void CStringChecker::evalStrcat(CheckerContext &C, const CallExpr *CE) const {
1390  if (CE->getNumArgs() < 2)
1391  return;
1392 
1393  //char *strcat(char *restrict s1, const char *restrict s2);
1394  evalStrcpyCommon(C, CE,
1395  /* returnEnd = */ false,
1396  /* isBounded = */ false,
1397  /* isAppending = */ true);
1398 }
1399 
1400 void CStringChecker::evalStrncat(CheckerContext &C, const CallExpr *CE) const {
1401  if (CE->getNumArgs() < 3)
1402  return;
1403 
1404  //char *strncat(char *restrict s1, const char *restrict s2, size_t n);
1405  evalStrcpyCommon(C, CE,
1406  /* returnEnd = */ false,
1407  /* isBounded = */ true,
1408  /* isAppending = */ true);
1409 }
1410 
1411 void CStringChecker::evalStrcpyCommon(CheckerContext &C, const CallExpr *CE,
1412  bool returnEnd, bool isBounded,
1413  bool isAppending) const {
1414  CurrentFunctionDescription = "string copy function";
1415  ProgramStateRef state = C.getState();
1416  const LocationContext *LCtx = C.getLocationContext();
1417 
1418  // Check that the destination is non-null.
1419  const Expr *Dst = CE->getArg(0);
1420  SVal DstVal = state->getSVal(Dst, LCtx);
1421 
1422  state = checkNonNull(C, state, Dst, DstVal);
1423  if (!state)
1424  return;
1425 
1426  // Check that the source is non-null.
1427  const Expr *srcExpr = CE->getArg(1);
1428  SVal srcVal = state->getSVal(srcExpr, LCtx);
1429  state = checkNonNull(C, state, srcExpr, srcVal);
1430  if (!state)
1431  return;
1432 
1433  // Get the string length of the source.
1434  SVal strLength = getCStringLength(C, state, srcExpr, srcVal);
1435 
1436  // If the source isn't a valid C string, give up.
1437  if (strLength.isUndef())
1438  return;
1439 
1440  SValBuilder &svalBuilder = C.getSValBuilder();
1441  QualType cmpTy = svalBuilder.getConditionType();
1442  QualType sizeTy = svalBuilder.getContext().getSizeType();
1443 
1444  // These two values allow checking two kinds of errors:
1445  // - actual overflows caused by a source that doesn't fit in the destination
1446  // - potential overflows caused by a bound that could exceed the destination
1447  SVal amountCopied = UnknownVal();
1448  SVal maxLastElementIndex = UnknownVal();
1449  const char *boundWarning = nullptr;
1450 
1451  // If the function is strncpy, strncat, etc... it is bounded.
1452  if (isBounded) {
1453  // Get the max number of characters to copy.
1454  const Expr *lenExpr = CE->getArg(2);
1455  SVal lenVal = state->getSVal(lenExpr, LCtx);
1456 
1457  // Protect against misdeclared strncpy().
1458  lenVal = svalBuilder.evalCast(lenVal, sizeTy, lenExpr->getType());
1459 
1460  Optional<NonLoc> strLengthNL = strLength.getAs<NonLoc>();
1461  Optional<NonLoc> lenValNL = lenVal.getAs<NonLoc>();
1462 
1463  // If we know both values, we might be able to figure out how much
1464  // we're copying.
1465  if (strLengthNL && lenValNL) {
1466  ProgramStateRef stateSourceTooLong, stateSourceNotTooLong;
1467 
1468  // Check if the max number to copy is less than the length of the src.
1469  // If the bound is equal to the source length, strncpy won't null-
1470  // terminate the result!
1471  std::tie(stateSourceTooLong, stateSourceNotTooLong) = state->assume(
1472  svalBuilder.evalBinOpNN(state, BO_GE, *strLengthNL, *lenValNL, cmpTy)
1474 
1475  if (stateSourceTooLong && !stateSourceNotTooLong) {
1476  // Max number to copy is less than the length of the src, so the actual
1477  // strLength copied is the max number arg.
1478  state = stateSourceTooLong;
1479  amountCopied = lenVal;
1480 
1481  } else if (!stateSourceTooLong && stateSourceNotTooLong) {
1482  // The source buffer entirely fits in the bound.
1483  state = stateSourceNotTooLong;
1484  amountCopied = strLength;
1485  }
1486  }
1487 
1488  // We still want to know if the bound is known to be too large.
1489  if (lenValNL) {
1490  if (isAppending) {
1491  // For strncat, the check is strlen(dst) + lenVal < sizeof(dst)
1492 
1493  // Get the string length of the destination. If the destination is
1494  // memory that can't have a string length, we shouldn't be copying
1495  // into it anyway.
1496  SVal dstStrLength = getCStringLength(C, state, Dst, DstVal);
1497  if (dstStrLength.isUndef())
1498  return;
1499 
1500  if (Optional<NonLoc> dstStrLengthNL = dstStrLength.getAs<NonLoc>()) {
1501  maxLastElementIndex = svalBuilder.evalBinOpNN(state, BO_Add,
1502  *lenValNL,
1503  *dstStrLengthNL,
1504  sizeTy);
1505  boundWarning = "Size argument is greater than the free space in the "
1506  "destination buffer";
1507  }
1508 
1509  } else {
1510  // For strncpy, this is just checking that lenVal <= sizeof(dst)
1511  // (Yes, strncpy and strncat differ in how they treat termination.
1512  // strncat ALWAYS terminates, but strncpy doesn't.)
1513 
1514  // We need a special case for when the copy size is zero, in which
1515  // case strncpy will do no work at all. Our bounds check uses n-1
1516  // as the last element accessed, so n == 0 is problematic.
1517  ProgramStateRef StateZeroSize, StateNonZeroSize;
1518  std::tie(StateZeroSize, StateNonZeroSize) =
1519  assumeZero(C, state, *lenValNL, sizeTy);
1520 
1521  // If the size is known to be zero, we're done.
1522  if (StateZeroSize && !StateNonZeroSize) {
1523  StateZeroSize = StateZeroSize->BindExpr(CE, LCtx, DstVal);
1524  C.addTransition(StateZeroSize);
1525  return;
1526  }
1527 
1528  // Otherwise, go ahead and figure out the last element we'll touch.
1529  // We don't record the non-zero assumption here because we can't
1530  // be sure. We won't warn on a possible zero.
1531  NonLoc one = svalBuilder.makeIntVal(1, sizeTy).castAs<NonLoc>();
1532  maxLastElementIndex = svalBuilder.evalBinOpNN(state, BO_Sub, *lenValNL,
1533  one, sizeTy);
1534  boundWarning = "Size argument is greater than the length of the "
1535  "destination buffer";
1536  }
1537  }
1538 
1539  // If we couldn't pin down the copy length, at least bound it.
1540  // FIXME: We should actually run this code path for append as well, but
1541  // right now it creates problems with constraints (since we can end up
1542  // trying to pass constraints from symbol to symbol).
1543  if (amountCopied.isUnknown() && !isAppending) {
1544  // Try to get a "hypothetical" string length symbol, which we can later
1545  // set as a real value if that turns out to be the case.
1546  amountCopied = getCStringLength(C, state, lenExpr, srcVal, true);
1547  assert(!amountCopied.isUndef());
1548 
1549  if (Optional<NonLoc> amountCopiedNL = amountCopied.getAs<NonLoc>()) {
1550  if (lenValNL) {
1551  // amountCopied <= lenVal
1552  SVal copiedLessThanBound = svalBuilder.evalBinOpNN(state, BO_LE,
1553  *amountCopiedNL,
1554  *lenValNL,
1555  cmpTy);
1556  state = state->assume(
1557  copiedLessThanBound.castAs<DefinedOrUnknownSVal>(), true);
1558  if (!state)
1559  return;
1560  }
1561 
1562  if (strLengthNL) {
1563  // amountCopied <= strlen(source)
1564  SVal copiedLessThanSrc = svalBuilder.evalBinOpNN(state, BO_LE,
1565  *amountCopiedNL,
1566  *strLengthNL,
1567  cmpTy);
1568  state = state->assume(
1569  copiedLessThanSrc.castAs<DefinedOrUnknownSVal>(), true);
1570  if (!state)
1571  return;
1572  }
1573  }
1574  }
1575 
1576  } else {
1577  // The function isn't bounded. The amount copied should match the length
1578  // of the source buffer.
1579  amountCopied = strLength;
1580  }
1581 
1582  assert(state);
1583 
1584  // This represents the number of characters copied into the destination
1585  // buffer. (It may not actually be the strlen if the destination buffer
1586  // is not terminated.)
1587  SVal finalStrLength = UnknownVal();
1588 
1589  // If this is an appending function (strcat, strncat...) then set the
1590  // string length to strlen(src) + strlen(dst) since the buffer will
1591  // ultimately contain both.
1592  if (isAppending) {
1593  // Get the string length of the destination. If the destination is memory
1594  // that can't have a string length, we shouldn't be copying into it anyway.
1595  SVal dstStrLength = getCStringLength(C, state, Dst, DstVal);
1596  if (dstStrLength.isUndef())
1597  return;
1598 
1599  Optional<NonLoc> srcStrLengthNL = amountCopied.getAs<NonLoc>();
1600  Optional<NonLoc> dstStrLengthNL = dstStrLength.getAs<NonLoc>();
1601 
1602  // If we know both string lengths, we might know the final string length.
1603  if (srcStrLengthNL && dstStrLengthNL) {
1604  // Make sure the two lengths together don't overflow a size_t.
1605  state = checkAdditionOverflow(C, state, *srcStrLengthNL, *dstStrLengthNL);
1606  if (!state)
1607  return;
1608 
1609  finalStrLength = svalBuilder.evalBinOpNN(state, BO_Add, *srcStrLengthNL,
1610  *dstStrLengthNL, sizeTy);
1611  }
1612 
1613  // If we couldn't get a single value for the final string length,
1614  // we can at least bound it by the individual lengths.
1615  if (finalStrLength.isUnknown()) {
1616  // Try to get a "hypothetical" string length symbol, which we can later
1617  // set as a real value if that turns out to be the case.
1618  finalStrLength = getCStringLength(C, state, CE, DstVal, true);
1619  assert(!finalStrLength.isUndef());
1620 
1621  if (Optional<NonLoc> finalStrLengthNL = finalStrLength.getAs<NonLoc>()) {
1622  if (srcStrLengthNL) {
1623  // finalStrLength >= srcStrLength
1624  SVal sourceInResult = svalBuilder.evalBinOpNN(state, BO_GE,
1625  *finalStrLengthNL,
1626  *srcStrLengthNL,
1627  cmpTy);
1628  state = state->assume(sourceInResult.castAs<DefinedOrUnknownSVal>(),
1629  true);
1630  if (!state)
1631  return;
1632  }
1633 
1634  if (dstStrLengthNL) {
1635  // finalStrLength >= dstStrLength
1636  SVal destInResult = svalBuilder.evalBinOpNN(state, BO_GE,
1637  *finalStrLengthNL,
1638  *dstStrLengthNL,
1639  cmpTy);
1640  state =
1641  state->assume(destInResult.castAs<DefinedOrUnknownSVal>(), true);
1642  if (!state)
1643  return;
1644  }
1645  }
1646  }
1647 
1648  } else {
1649  // Otherwise, this is a copy-over function (strcpy, strncpy, ...), and
1650  // the final string length will match the input string length.
1651  finalStrLength = amountCopied;
1652  }
1653 
1654  // The final result of the function will either be a pointer past the last
1655  // copied element, or a pointer to the start of the destination buffer.
1656  SVal Result = (returnEnd ? UnknownVal() : DstVal);
1657 
1658  assert(state);
1659 
1660  // If the destination is a MemRegion, try to check for a buffer overflow and
1661  // record the new string length.
1662  if (Optional<loc::MemRegionVal> dstRegVal =
1663  DstVal.getAs<loc::MemRegionVal>()) {
1664  QualType ptrTy = Dst->getType();
1665 
1666  // If we have an exact value on a bounded copy, use that to check for
1667  // overflows, rather than our estimate about how much is actually copied.
1668  if (boundWarning) {
1669  if (Optional<NonLoc> maxLastNL = maxLastElementIndex.getAs<NonLoc>()) {
1670  SVal maxLastElement = svalBuilder.evalBinOpLN(state, BO_Add, *dstRegVal,
1671  *maxLastNL, ptrTy);
1672  state = CheckLocation(C, state, CE->getArg(2), maxLastElement,
1673  boundWarning);
1674  if (!state)
1675  return;
1676  }
1677  }
1678 
1679  // Then, if the final length is known...
1680  if (Optional<NonLoc> knownStrLength = finalStrLength.getAs<NonLoc>()) {
1681  SVal lastElement = svalBuilder.evalBinOpLN(state, BO_Add, *dstRegVal,
1682  *knownStrLength, ptrTy);
1683 
1684  // ...and we haven't checked the bound, we'll check the actual copy.
1685  if (!boundWarning) {
1686  const char * const warningMsg =
1687  "String copy function overflows destination buffer";
1688  state = CheckLocation(C, state, Dst, lastElement, warningMsg);
1689  if (!state)
1690  return;
1691  }
1692 
1693  // If this is a stpcpy-style copy, the last element is the return value.
1694  if (returnEnd)
1695  Result = lastElement;
1696  }
1697 
1698  // Invalidate the destination (regular invalidation without pointer-escaping
1699  // the address of the top-level region). This must happen before we set the
1700  // C string length because invalidation will clear the length.
1701  // FIXME: Even if we can't perfectly model the copy, we should see if we
1702  // can use LazyCompoundVals to copy the source values into the destination.
1703  // This would probably remove any existing bindings past the end of the
1704  // string, but that's still an improvement over blank invalidation.
1705  state = InvalidateBuffer(C, state, Dst, *dstRegVal,
1706  /*IsSourceBuffer*/false, nullptr);
1707 
1708  // Invalidate the source (const-invalidation without const-pointer-escaping
1709  // the address of the top-level region).
1710  state = InvalidateBuffer(C, state, srcExpr, srcVal, /*IsSourceBuffer*/true,
1711  nullptr);
1712 
1713  // Set the C string length of the destination, if we know it.
1714  if (isBounded && !isAppending) {
1715  // strncpy is annoying in that it doesn't guarantee to null-terminate
1716  // the result string. If the original string didn't fit entirely inside
1717  // the bound (including the null-terminator), we don't know how long the
1718  // result is.
1719  if (amountCopied != strLength)
1720  finalStrLength = UnknownVal();
1721  }
1722  state = setCStringLength(state, dstRegVal->getRegion(), finalStrLength);
1723  }
1724 
1725  assert(state);
1726 
1727  // If this is a stpcpy-style copy, but we were unable to check for a buffer
1728  // overflow, we still need a result. Conjure a return value.
1729  if (returnEnd && Result.isUnknown()) {
1730  Result = svalBuilder.conjureSymbolVal(nullptr, CE, LCtx, C.blockCount());
1731  }
1732 
1733  // Set the return value.
1734  state = state->BindExpr(CE, LCtx, Result);
1735  C.addTransition(state);
1736 }
1737 
1738 void CStringChecker::evalStrcmp(CheckerContext &C, const CallExpr *CE) const {
1739  if (CE->getNumArgs() < 2)
1740  return;
1741 
1742  //int strcmp(const char *s1, const char *s2);
1743  evalStrcmpCommon(C, CE, /* isBounded = */ false, /* ignoreCase = */ false);
1744 }
1745 
1746 void CStringChecker::evalStrncmp(CheckerContext &C, const CallExpr *CE) const {
1747  if (CE->getNumArgs() < 3)
1748  return;
1749 
1750  //int strncmp(const char *s1, const char *s2, size_t n);
1751  evalStrcmpCommon(C, CE, /* isBounded = */ true, /* ignoreCase = */ false);
1752 }
1753 
1754 void CStringChecker::evalStrcasecmp(CheckerContext &C,
1755  const CallExpr *CE) const {
1756  if (CE->getNumArgs() < 2)
1757  return;
1758 
1759  //int strcasecmp(const char *s1, const char *s2);
1760  evalStrcmpCommon(C, CE, /* isBounded = */ false, /* ignoreCase = */ true);
1761 }
1762 
1763 void CStringChecker::evalStrncasecmp(CheckerContext &C,
1764  const CallExpr *CE) const {
1765  if (CE->getNumArgs() < 3)
1766  return;
1767 
1768  //int strncasecmp(const char *s1, const char *s2, size_t n);
1769  evalStrcmpCommon(C, CE, /* isBounded = */ true, /* ignoreCase = */ true);
1770 }
1771 
1772 void CStringChecker::evalStrcmpCommon(CheckerContext &C, const CallExpr *CE,
1773  bool isBounded, bool ignoreCase) const {
1774  CurrentFunctionDescription = "string comparison function";
1775  ProgramStateRef state = C.getState();
1776  const LocationContext *LCtx = C.getLocationContext();
1777 
1778  // Check that the first string is non-null
1779  const Expr *s1 = CE->getArg(0);
1780  SVal s1Val = state->getSVal(s1, LCtx);
1781  state = checkNonNull(C, state, s1, s1Val);
1782  if (!state)
1783  return;
1784 
1785  // Check that the second string is non-null.
1786  const Expr *s2 = CE->getArg(1);
1787  SVal s2Val = state->getSVal(s2, LCtx);
1788  state = checkNonNull(C, state, s2, s2Val);
1789  if (!state)
1790  return;
1791 
1792  // Get the string length of the first string or give up.
1793  SVal s1Length = getCStringLength(C, state, s1, s1Val);
1794  if (s1Length.isUndef())
1795  return;
1796 
1797  // Get the string length of the second string or give up.
1798  SVal s2Length = getCStringLength(C, state, s2, s2Val);
1799  if (s2Length.isUndef())
1800  return;
1801 
1802  // If we know the two buffers are the same, we know the result is 0.
1803  // First, get the two buffers' addresses. Another checker will have already
1804  // made sure they're not undefined.
1807 
1808  // See if they are the same.
1809  SValBuilder &svalBuilder = C.getSValBuilder();
1810  DefinedOrUnknownSVal SameBuf = svalBuilder.evalEQ(state, LV, RV);
1811  ProgramStateRef StSameBuf, StNotSameBuf;
1812  std::tie(StSameBuf, StNotSameBuf) = state->assume(SameBuf);
1813 
1814  // If the two arguments might be the same buffer, we know the result is 0,
1815  // and we only need to check one size.
1816  if (StSameBuf) {
1817  StSameBuf = StSameBuf->BindExpr(CE, LCtx,
1818  svalBuilder.makeZeroVal(CE->getType()));
1819  C.addTransition(StSameBuf);
1820 
1821  // If the two arguments are GUARANTEED to be the same, we're done!
1822  if (!StNotSameBuf)
1823  return;
1824  }
1825 
1826  assert(StNotSameBuf);
1827  state = StNotSameBuf;
1828 
1829  // At this point we can go about comparing the two buffers.
1830  // For now, we only do this if they're both known string literals.
1831 
1832  // Attempt to extract string literals from both expressions.
1833  const StringLiteral *s1StrLiteral = getCStringLiteral(C, state, s1, s1Val);
1834  const StringLiteral *s2StrLiteral = getCStringLiteral(C, state, s2, s2Val);
1835  bool canComputeResult = false;
1836 
1837  if (s1StrLiteral && s2StrLiteral) {
1838  StringRef s1StrRef = s1StrLiteral->getString();
1839  StringRef s2StrRef = s2StrLiteral->getString();
1840 
1841  if (isBounded) {
1842  // Get the max number of characters to compare.
1843  const Expr *lenExpr = CE->getArg(2);
1844  SVal lenVal = state->getSVal(lenExpr, LCtx);
1845 
1846  // If the length is known, we can get the right substrings.
1847  if (const llvm::APSInt *len = svalBuilder.getKnownValue(state, lenVal)) {
1848  // Create substrings of each to compare the prefix.
1849  s1StrRef = s1StrRef.substr(0, (size_t)len->getZExtValue());
1850  s2StrRef = s2StrRef.substr(0, (size_t)len->getZExtValue());
1851  canComputeResult = true;
1852  }
1853  } else {
1854  // This is a normal, unbounded strcmp.
1855  canComputeResult = true;
1856  }
1857 
1858  if (canComputeResult) {
1859  // Real strcmp stops at null characters.
1860  size_t s1Term = s1StrRef.find('\0');
1861  if (s1Term != StringRef::npos)
1862  s1StrRef = s1StrRef.substr(0, s1Term);
1863 
1864  size_t s2Term = s2StrRef.find('\0');
1865  if (s2Term != StringRef::npos)
1866  s2StrRef = s2StrRef.substr(0, s2Term);
1867 
1868  // Use StringRef's comparison methods to compute the actual result.
1869  int result;
1870 
1871  if (ignoreCase) {
1872  // Compare string 1 to string 2 the same way strcasecmp() does.
1873  result = s1StrRef.compare_lower(s2StrRef);
1874  } else {
1875  // Compare string 1 to string 2 the same way strcmp() does.
1876  result = s1StrRef.compare(s2StrRef);
1877  }
1878 
1879  // Build the SVal of the comparison and bind the return value.
1880  SVal resultVal = svalBuilder.makeIntVal(result, CE->getType());
1881  state = state->BindExpr(CE, LCtx, resultVal);
1882  }
1883  }
1884 
1885  if (!canComputeResult) {
1886  // Conjure a symbolic value. It's the best we can do.
1887  SVal resultVal = svalBuilder.conjureSymbolVal(nullptr, CE, LCtx,
1888  C.blockCount());
1889  state = state->BindExpr(CE, LCtx, resultVal);
1890  }
1891 
1892  // Record this as a possible path.
1893  C.addTransition(state);
1894 }
1895 
1896 void CStringChecker::evalStrsep(CheckerContext &C, const CallExpr *CE) const {
1897  //char *strsep(char **stringp, const char *delim);
1898  if (CE->getNumArgs() < 2)
1899  return;
1900 
1901  // Sanity: does the search string parameter match the return type?
1902  const Expr *SearchStrPtr = CE->getArg(0);
1903  QualType CharPtrTy = SearchStrPtr->getType()->getPointeeType();
1904  if (CharPtrTy.isNull() ||
1905  CE->getType().getUnqualifiedType() != CharPtrTy.getUnqualifiedType())
1906  return;
1907 
1908  CurrentFunctionDescription = "strsep()";
1909  ProgramStateRef State = C.getState();
1910  const LocationContext *LCtx = C.getLocationContext();
1911 
1912  // Check that the search string pointer is non-null (though it may point to
1913  // a null string).
1914  SVal SearchStrVal = State->getSVal(SearchStrPtr, LCtx);
1915  State = checkNonNull(C, State, SearchStrPtr, SearchStrVal);
1916  if (!State)
1917  return;
1918 
1919  // Check that the delimiter string is non-null.
1920  const Expr *DelimStr = CE->getArg(1);
1921  SVal DelimStrVal = State->getSVal(DelimStr, LCtx);
1922  State = checkNonNull(C, State, DelimStr, DelimStrVal);
1923  if (!State)
1924  return;
1925 
1926  SValBuilder &SVB = C.getSValBuilder();
1927  SVal Result;
1928  if (Optional<Loc> SearchStrLoc = SearchStrVal.getAs<Loc>()) {
1929  // Get the current value of the search string pointer, as a char*.
1930  Result = State->getSVal(*SearchStrLoc, CharPtrTy);
1931 
1932  // Invalidate the search string, representing the change of one delimiter
1933  // character to NUL.
1934  State = InvalidateBuffer(C, State, SearchStrPtr, Result,
1935  /*IsSourceBuffer*/false, nullptr);
1936 
1937  // Overwrite the search string pointer. The new value is either an address
1938  // further along in the same string, or NULL if there are no more tokens.
1939  State = State->bindLoc(*SearchStrLoc,
1940  SVB.conjureSymbolVal(getTag(), CE, LCtx, CharPtrTy,
1941  C.blockCount()));
1942  } else {
1943  assert(SearchStrVal.isUnknown());
1944  // Conjure a symbolic value. It's the best we can do.
1945  Result = SVB.conjureSymbolVal(nullptr, CE, LCtx, C.blockCount());
1946  }
1947 
1948  // Set the return value, and finish.
1949  State = State->BindExpr(CE, LCtx, Result);
1950  C.addTransition(State);
1951 }
1952 
1953 
1954 //===----------------------------------------------------------------------===//
1955 // The driver method, and other Checker callbacks.
1956 //===----------------------------------------------------------------------===//
1957 
1958 bool CStringChecker::evalCall(const CallExpr *CE, CheckerContext &C) const {
1959  const FunctionDecl *FDecl = C.getCalleeDecl(CE);
1960 
1961  if (!FDecl)
1962  return false;
1963 
1964  // FIXME: Poorly-factored string switches are slow.
1965  FnCheck evalFunction = nullptr;
1966  if (C.isCLibraryFunction(FDecl, "memcpy"))
1967  evalFunction = &CStringChecker::evalMemcpy;
1968  else if (C.isCLibraryFunction(FDecl, "mempcpy"))
1969  evalFunction = &CStringChecker::evalMempcpy;
1970  else if (C.isCLibraryFunction(FDecl, "memcmp"))
1971  evalFunction = &CStringChecker::evalMemcmp;
1972  else if (C.isCLibraryFunction(FDecl, "memmove"))
1973  evalFunction = &CStringChecker::evalMemmove;
1974  else if (C.isCLibraryFunction(FDecl, "strcpy"))
1975  evalFunction = &CStringChecker::evalStrcpy;
1976  else if (C.isCLibraryFunction(FDecl, "strncpy"))
1977  evalFunction = &CStringChecker::evalStrncpy;
1978  else if (C.isCLibraryFunction(FDecl, "stpcpy"))
1979  evalFunction = &CStringChecker::evalStpcpy;
1980  else if (C.isCLibraryFunction(FDecl, "strcat"))
1981  evalFunction = &CStringChecker::evalStrcat;
1982  else if (C.isCLibraryFunction(FDecl, "strncat"))
1983  evalFunction = &CStringChecker::evalStrncat;
1984  else if (C.isCLibraryFunction(FDecl, "strlen"))
1985  evalFunction = &CStringChecker::evalstrLength;
1986  else if (C.isCLibraryFunction(FDecl, "strnlen"))
1987  evalFunction = &CStringChecker::evalstrnLength;
1988  else if (C.isCLibraryFunction(FDecl, "strcmp"))
1989  evalFunction = &CStringChecker::evalStrcmp;
1990  else if (C.isCLibraryFunction(FDecl, "strncmp"))
1991  evalFunction = &CStringChecker::evalStrncmp;
1992  else if (C.isCLibraryFunction(FDecl, "strcasecmp"))
1993  evalFunction = &CStringChecker::evalStrcasecmp;
1994  else if (C.isCLibraryFunction(FDecl, "strncasecmp"))
1995  evalFunction = &CStringChecker::evalStrncasecmp;
1996  else if (C.isCLibraryFunction(FDecl, "strsep"))
1997  evalFunction = &CStringChecker::evalStrsep;
1998  else if (C.isCLibraryFunction(FDecl, "bcopy"))
1999  evalFunction = &CStringChecker::evalBcopy;
2000  else if (C.isCLibraryFunction(FDecl, "bcmp"))
2001  evalFunction = &CStringChecker::evalMemcmp;
2002 
2003  // If the callee isn't a string function, let another checker handle it.
2004  if (!evalFunction)
2005  return false;
2006 
2007  // Check and evaluate the call.
2008  (this->*evalFunction)(C, CE);
2009 
2010  // If the evaluate call resulted in no change, chain to the next eval call
2011  // handler.
2012  // Note, the custom CString evaluation calls assume that basic safety
2013  // properties are held. However, if the user chooses to turn off some of these
2014  // checks, we ignore the issues and leave the call evaluation to a generic
2015  // handler.
2016  return C.isDifferent();
2017 }
2018 
2019 void CStringChecker::checkPreStmt(const DeclStmt *DS, CheckerContext &C) const {
2020  // Record string length for char a[] = "abc";
2021  ProgramStateRef state = C.getState();
2022 
2023  for (const auto *I : DS->decls()) {
2024  const VarDecl *D = dyn_cast<VarDecl>(I);
2025  if (!D)
2026  continue;
2027 
2028  // FIXME: Handle array fields of structs.
2029  if (!D->getType()->isArrayType())
2030  continue;
2031 
2032  const Expr *Init = D->getInit();
2033  if (!Init)
2034  continue;
2035  if (!isa<StringLiteral>(Init))
2036  continue;
2037 
2038  Loc VarLoc = state->getLValue(D, C.getLocationContext());
2039  const MemRegion *MR = VarLoc.getAsRegion();
2040  if (!MR)
2041  continue;
2042 
2043  SVal StrVal = state->getSVal(Init, C.getLocationContext());
2044  assert(StrVal.isValid() && "Initializer string is unknown or undefined");
2045  DefinedOrUnknownSVal strLength =
2046  getCStringLength(C, state, Init, StrVal).castAs<DefinedOrUnknownSVal>();
2047 
2048  state = state->set<CStringLength>(MR, strLength);
2049  }
2050 
2051  C.addTransition(state);
2052 }
2053 
2054 bool CStringChecker::wantsRegionChangeUpdate(ProgramStateRef state) const {
2055  CStringLengthTy Entries = state->get<CStringLength>();
2056  return !Entries.isEmpty();
2057 }
2058 
2059 ProgramStateRef
2060 CStringChecker::checkRegionChanges(ProgramStateRef state,
2061  const InvalidatedSymbols *,
2062  ArrayRef<const MemRegion *> ExplicitRegions,
2064  const CallEvent *Call) const {
2065  CStringLengthTy Entries = state->get<CStringLength>();
2066  if (Entries.isEmpty())
2067  return state;
2068 
2069  llvm::SmallPtrSet<const MemRegion *, 8> Invalidated;
2070  llvm::SmallPtrSet<const MemRegion *, 32> SuperRegions;
2071 
2072  // First build sets for the changed regions and their super-regions.
2074  I = Regions.begin(), E = Regions.end(); I != E; ++I) {
2075  const MemRegion *MR = *I;
2076  Invalidated.insert(MR);
2077 
2078  SuperRegions.insert(MR);
2079  while (const SubRegion *SR = dyn_cast<SubRegion>(MR)) {
2080  MR = SR->getSuperRegion();
2081  SuperRegions.insert(MR);
2082  }
2083  }
2084 
2085  CStringLengthTy::Factory &F = state->get_context<CStringLength>();
2086 
2087  // Then loop over the entries in the current state.
2088  for (CStringLengthTy::iterator I = Entries.begin(),
2089  E = Entries.end(); I != E; ++I) {
2090  const MemRegion *MR = I.getKey();
2091 
2092  // Is this entry for a super-region of a changed region?
2093  if (SuperRegions.count(MR)) {
2094  Entries = F.remove(Entries, MR);
2095  continue;
2096  }
2097 
2098  // Is this entry for a sub-region of a changed region?
2099  const MemRegion *Super = MR;
2100  while (const SubRegion *SR = dyn_cast<SubRegion>(Super)) {
2101  Super = SR->getSuperRegion();
2102  if (Invalidated.count(Super)) {
2103  Entries = F.remove(Entries, MR);
2104  break;
2105  }
2106  }
2107  }
2108 
2109  return state->set<CStringLength>(Entries);
2110 }
2111 
2112 void CStringChecker::checkLiveSymbols(ProgramStateRef state,
2113  SymbolReaper &SR) const {
2114  // Mark all symbols in our string length map as valid.
2115  CStringLengthTy Entries = state->get<CStringLength>();
2116 
2117  for (CStringLengthTy::iterator I = Entries.begin(), E = Entries.end();
2118  I != E; ++I) {
2119  SVal Len = I.getData();
2120 
2121  for (SymExpr::symbol_iterator si = Len.symbol_begin(),
2122  se = Len.symbol_end(); si != se; ++si)
2123  SR.markInUse(*si);
2124  }
2125 }
2126 
2127 void CStringChecker::checkDeadSymbols(SymbolReaper &SR,
2128  CheckerContext &C) const {
2129  if (!SR.hasDeadSymbols())
2130  return;
2131 
2132  ProgramStateRef state = C.getState();
2133  CStringLengthTy Entries = state->get<CStringLength>();
2134  if (Entries.isEmpty())
2135  return;
2136 
2137  CStringLengthTy::Factory &F = state->get_context<CStringLength>();
2138  for (CStringLengthTy::iterator I = Entries.begin(), E = Entries.end();
2139  I != E; ++I) {
2140  SVal Len = I.getData();
2141  if (SymbolRef Sym = Len.getAsSymbol()) {
2142  if (SR.isDead(Sym))
2143  Entries = F.remove(Entries, I.getKey());
2144  }
2145  }
2146 
2147  state = state->set<CStringLength>(Entries);
2148  C.addTransition(state);
2149 }
2150 
2151 #define REGISTER_CHECKER(name) \
2152  void ento::register##name(CheckerManager &mgr) { \
2153  CStringChecker *checker = mgr.registerChecker<CStringChecker>(); \
2154  checker->Filter.Check##name = true; \
2155  checker->Filter.CheckName##name = mgr.getCurrentCheckName(); \
2156  }
2157 
2158 REGISTER_CHECKER(CStringNullArg)
2159 REGISTER_CHECKER(CStringOutOfBounds)
2160 REGISTER_CHECKER(CStringBufferOverlap)
2161 REGISTER_CHECKER(CStringNotNullTerm)
2162 
2164  registerCStringNullArg(Mgr);
2165 }
FunctionDecl - An instance of this class is created to represent a function declaration or definition...
Definition: Decl.h:1483
const internal::VariadicDynCastAllOfMatcher< Stmt, Expr > expr
Matches expressions.
Definition: ASTMatchers.h:1192
TypedValueRegion - An abstract class representing regions having a typed value.
Definition: MemRegion.h:510
nonloc::ConcreteInt makeIntVal(const IntegerLiteral *integer)
Definition: SValBuilder.h:236
unsigned Length
A (possibly-)qualified type.
Definition: Type.h:575
MemRegion - The root abstract class for all memory regions.
Definition: MemRegion.h:78
ExplodedNode * generateErrorNode(ProgramStateRef State=nullptr, const ProgramPointTag *Tag=nullptr)
Generate a transition to a node that will be used to report an error.
Expr * getArg(unsigned Arg)
getArg - Return the specified argument.
Definition: Expr.h:2199
bool hasDeadSymbols() const
Information about invalidation for a particular region/symbol.
Definition: MemRegion.h:1332
CanQualType getSizeType() const
Return the unique type for "size_t" (C99 7.17), defined in <stddef.h>.
A helper class which wraps a boolean value set to false by default.
Definition: Checker.h:542
ExplodedNode * addTransition(ProgramStateRef State=nullptr, const ProgramPointTag *Tag=nullptr)
Generates a new transition in the program state graph (ExplodedGraph).
virtual QualType getValueType() const =0
std::string getAsString() const
Definition: Type.h:901
const Expr * getInit() const
Definition: Decl.h:1070
SVal evalCast(SVal val, QualType castTy, QualType originalType)
Value representing integer constant.
Definition: SVals.h:339
VarDecl - An instance of this class is created to represent a variable declaration or definition...
Definition: Decl.h:699
void setTrait(SymbolRef Sym, InvalidationKinds IK)
Definition: MemRegion.cpp:1476
ExplodedNode * getPredecessor()
Returns the previous node in the exploded graph, which includes the state of the program before the c...
Symbolic value.
Definition: SymbolManager.h:42
void markInUse(SymbolRef sym)
Marks a symbol as important to a checker.
virtual SVal evalBinOpLN(ProgramStateRef state, BinaryOperator::Opcode op, Loc lhs, NonLoc rhs, QualType resultTy)=0
Create a new value which represents a binary expression with a memory location and non-location opera...
Holds long-lived AST nodes (such as types and decls) that can be referred to throughout the semantic ...
Definition: ASTContext.h:91
const FunctionDecl * getCalleeDecl(const CallExpr *CE) const
Get the declaration of the called function (path-sensitive).
LineState State
Kind getKind() const
Definition: MemRegion.h:185
unsigned blockCount() const
Returns the number of times the current block has been visited along the analyzed path...
void registerCStringCheckerBasic(CheckerManager &Mgr)
Register the checker which evaluates CString API calls.
const StringLiteral * getStringLiteral() const
Definition: MemRegion.h:767
A record of the "type" of an APSInt, used for conversions.
Definition: APSIntType.h:20
SymExpr::symbol_iterator symbol_begin() const
Definition: SVals.h:177
bool isValid() const
Definition: SVals.h:129
detail::InMemoryDirectory::const_iterator I
QualType getType() const
Definition: Decl.h:530
const MemRegion * getSuperRegion() const
Definition: MemRegion.h:433
const LocationContext * getLocationContext() const
#define REGISTER_CHECKER(name)
#define REGISTER_MAP_WITH_PROGRAMSTATE(Name, Key, Value)
Declares an immutable map of type NameTy, suitable for placement into the ProgramState.
const MemRegion * StripCasts(bool StripBaseCasts=true) const
Definition: MemRegion.cpp:1105
QualType getPointeeType() const
If this is a pointer, ObjC object pointer, or block pointer, this returns the respective pointee...
Definition: Type.cpp:415
bool isDead(SymbolRef sym) const
Returns whether or not a symbol has been confirmed dead.
DefinedOrUnknownSVal makeZeroVal(QualType type)
Construct an SVal representing '0' for the specified type.
Definition: SValBuilder.cpp:32
Expr - This represents one expression.
Definition: Expr.h:104
const ProgramStateRef & getState() const
static bool isCLibraryFunction(const FunctionDecl *FD, StringRef Name=StringRef())
Returns true if the callee is an externally-visible function in the top-level namespace, such as malloc.
Optional< T > getAs() const
Convert to the specified SVal type, returning None if this SVal is not of the desired type...
Definition: SVals.h:86
virtual SVal evalBinOpLL(ProgramStateRef state, BinaryOperator::Opcode op, Loc lhs, Loc rhs, QualType resultTy)=0
Create a new value which represents a binary expression with two memory location operands.
ExplodedNode * generateNonFatalErrorNode(ProgramStateRef State=nullptr, const ProgramPointTag *Tag=nullptr)
Generate a transition to a node that will be used to report an error.
The result type of a method or function.
QualType getConditionType() const
Definition: SValBuilder.h:130
void emitReport(std::unique_ptr< BugReport > R)
Emit the diagnostics report.
DefinedOrUnknownSVal conjureSymbolVal(const void *symbolTag, const Expr *expr, const LocationContext *LCtx, unsigned count)
Create a new symbol with a unique 'name'.
const TemplateArgument * iterator
Definition: Type.h:4070
DeclStmt - Adaptor class for mixing declarations with statements and expressions. ...
Definition: Stmt.h:431
ASTContext & getContext()
Definition: SValBuilder.h:125
SymExpr::symbol_iterator symbol_end() const
Definition: SVals.h:185
SVal - This represents a symbolic expression, which can be either an L-value or an R-value...
Definition: SVals.h:44
A class responsible for cleaning up unused symbols.
bool isUndef() const
Definition: SVals.h:121
const llvm::APSInt * evalAPSInt(BinaryOperator::Opcode Op, const llvm::APSInt &V1, const llvm::APSInt &V2)
Tells that a region's contents is not changed.
Definition: MemRegion.h:1346
NonLoc getIndex() const
Definition: MemRegion.h:1039
virtual SVal evalBinOpNN(ProgramStateRef state, BinaryOperator::Opcode op, NonLoc lhs, NonLoc rhs, QualType resultTy)=0
Create a new value which represents a binary expression with two non- location operands.
QualType getType() const
Definition: Expr.h:125
CanQualType CharTy
Definition: ASTContext.h:883
llvm::APSInt getValue(uint64_t RawValue) const LLVM_READONLY
Definition: APSIntType.h:70
unsigned getByteLength() const
Definition: Expr.h:1532
QualType getPointerType(QualType T) const
Return the uniqued reference to the type for a pointer to the specified type.
StringRef getString() const
Definition: Expr.h:1500
DefinedSVal getMetadataSymbolVal(const void *symbolTag, const MemRegion *region, const Expr *expr, QualType type, unsigned count)
detail::InMemoryDirectory::const_iterator E
const MemRegion * getAsRegion() const
Definition: SVals.cpp:135
unsigned getNumArgs() const
getNumArgs - Return the number of actual arguments to this call.
Definition: Expr.h:2187
Represents an abstract call to a function or method along a particular path.
Definition: CallEvent.h:113
SVal convertToArrayIndex(SVal val)
Definition: SValBuilder.cpp:76
BasicValueFactory & getBasicValueFactory()
Definition: SValBuilder.h:138
SubRegion - A region that subsets another larger region.
Definition: MemRegion.h:426
bool isUnknown() const
Definition: SVals.h:117
decl_range decls()
Definition: Stmt.h:479
QualType getUnqualifiedType() const
Retrieve the unqualified variant of the given type, removing as little sugar as possible.
Definition: Type.h:5169
DefinedOrUnknownSVal evalEQ(ProgramStateRef state, DefinedOrUnknownSVal lhs, DefinedOrUnknownSVal rhs)
bool trackNullOrUndefValue(const ExplodedNode *N, const Stmt *S, BugReport &R, bool IsArg=false, bool EnableNullFPSuppression=true)
Attempts to add visitors to trace a null or undefined value back to its point of origin, whether it is a symbol constrained to null or an explicit assignment.
QualType getValueType() const override
Definition: MemRegion.h:1041
SymbolRef getAsSymbol(bool IncludeBaseRegions=false) const
If this SVal wraps a symbol return that SymbolRef.
Definition: SVals.cpp:111
bool isArrayType() const
Definition: Type.h:5344
SValBuilder & getSValBuilder()
StringLiteral - This represents a string literal expression, e.g.
Definition: Expr.h:1452
CallExpr - Represents a function call (C99 6.5.2.2, C++ [expr.call]).
Definition: Expr.h:2134
const llvm::APSInt & getMaxValue(const llvm::APSInt &v)
StringRegion - Region associated with a StringLiteral.
Definition: MemRegion.h:753
ElementRegin is used to represent both array elements and casts.
Definition: MemRegion.h:1020
static LLVM_READONLY char toUppercase(char c)
Converts the given ASCII character to its uppercase equivalent.
Definition: CharInfo.h:174
NamedDecl - This represents a decl with a name.
Definition: Decl.h:145
bool isNull() const
Return true if this QualType doesn't point to a type yet.
Definition: Type.h:642
T castAs() const
Convert to the specified SVal type, asserting that this SVal is of the desired type.
Definition: SVals.h:75
bool isDifferent()
Check if the checker changed the state of the execution; ex: added a new transition or a bug report...
const LocationContext * getLocationContext() const
SVal getSVal(const Stmt *S) const
Get the value of arbitrary expressions at this point in the path.
Iterator over symbols that the current symbol depends on.
Definition: SymbolManager.h:84