27 using namespace clang;
31 class GenericTaintChecker :
public Checker< check::PostStmt<CallExpr>,
32 check::PreStmt<CallExpr> > {
34 static void *getTag() {
static int Tag;
return &Tag; }
41 static const unsigned InvalidArgIndex =
UINT_MAX;
43 static const unsigned ReturnValueIndex =
UINT_MAX - 1;
45 mutable std::unique_ptr<BugType> BT;
46 inline void initBugType()
const {
48 BT.reset(
new BugType(
this,
"Use of Untrusted Data",
"Untrusted Data"));
83 static const char MsgUncontrolledFormatString[];
84 bool checkUncontrolledFormatString(
const CallExpr *CE,
90 static const char MsgSanitizeSystemArgs[];
91 bool checkSystemCall(
const CallExpr *CE, StringRef Name,
96 static const char MsgTaintedBufferSize[];
101 bool generateReportIfTainted(
const Expr *E,
const char Msg[],
116 struct TaintPropagationRule {
123 TaintPropagationRule() {}
125 TaintPropagationRule(
unsigned SArg,
126 unsigned DArg,
bool TaintRet =
false) {
127 SrcArgs.push_back(SArg);
128 DstArgs.push_back(DArg);
130 DstArgs.push_back(ReturnValueIndex);
133 TaintPropagationRule(
unsigned SArg1,
unsigned SArg2,
134 unsigned DArg,
bool TaintRet =
false) {
135 SrcArgs.push_back(SArg1);
136 SrcArgs.push_back(SArg2);
137 DstArgs.push_back(DArg);
139 DstArgs.push_back(ReturnValueIndex);
143 static TaintPropagationRule
148 inline void addSrcArg(
unsigned A) { SrcArgs.push_back(A); }
149 inline void addDstArg(
unsigned A) { DstArgs.push_back(A); }
151 inline bool isNull()
const {
return SrcArgs.empty(); }
153 inline bool isDestinationArgument(
unsigned ArgNum)
const {
154 return (std::find(DstArgs.begin(),
155 DstArgs.end(), ArgNum) != DstArgs.end());
158 static inline bool isTaintedOrPointsToTainted(
const Expr *E,
163 State->isTainted(getPointedToSymbol(C, E))));
173 const unsigned GenericTaintChecker::ReturnValueIndex;
174 const unsigned GenericTaintChecker::InvalidArgIndex;
176 const char GenericTaintChecker::MsgUncontrolledFormatString[] =
177 "Untrusted data is used as a format string "
178 "(CWE-134: Uncontrolled Format String)";
180 const char GenericTaintChecker::MsgSanitizeSystemArgs[] =
181 "Untrusted data is passed to a system call "
182 "(CERT/STR02-C. Sanitize data passed to complex subsystems)";
184 const char GenericTaintChecker::MsgTaintedBufferSize[] =
185 "Untrusted data is used to specify the buffer size "
186 "(CERT/STR31-C. Guarantee that storage for strings has sufficient space for "
187 "character data and the null terminator)";
197 GenericTaintChecker::TaintPropagationRule
198 GenericTaintChecker::TaintPropagationRule::getTaintPropagationRule(
206 TaintPropagationRule Rule = llvm::StringSwitch<TaintPropagationRule>(Name)
207 .Case(
"atoi", TaintPropagationRule(0, ReturnValueIndex))
208 .Case(
"atol", TaintPropagationRule(0, ReturnValueIndex))
209 .Case(
"atoll", TaintPropagationRule(0, ReturnValueIndex))
210 .Case(
"getc", TaintPropagationRule(0, ReturnValueIndex))
211 .Case(
"fgetc", TaintPropagationRule(0, ReturnValueIndex))
212 .Case(
"getc_unlocked", TaintPropagationRule(0, ReturnValueIndex))
213 .Case(
"getw", TaintPropagationRule(0, ReturnValueIndex))
214 .Case(
"toupper", TaintPropagationRule(0, ReturnValueIndex))
215 .Case(
"tolower", TaintPropagationRule(0, ReturnValueIndex))
216 .Case(
"strchr", TaintPropagationRule(0, ReturnValueIndex))
217 .Case(
"strrchr", TaintPropagationRule(0, ReturnValueIndex))
218 .Case(
"read", TaintPropagationRule(0, 2, 1,
true))
219 .Case(
"pread", TaintPropagationRule(InvalidArgIndex, 1,
true))
220 .Case(
"gets", TaintPropagationRule(InvalidArgIndex, 0,
true))
221 .Case(
"fgets", TaintPropagationRule(2, 0,
true))
222 .Case(
"getline", TaintPropagationRule(2, 0))
223 .Case(
"getdelim", TaintPropagationRule(3, 0))
224 .Case(
"fgetln", TaintPropagationRule(0, ReturnValueIndex))
225 .Default(TaintPropagationRule());
233 if ( (BId = FDecl->getMemoryFunctionKind()) )
235 case Builtin::BImemcpy:
236 case Builtin::BImemmove:
237 case Builtin::BIstrncpy:
238 case Builtin::BIstrncat:
239 return TaintPropagationRule(1, 2, 0,
true);
240 case Builtin::BIstrlcpy:
241 case Builtin::BIstrlcat:
242 return TaintPropagationRule(1, 2, 0,
false);
243 case Builtin::BIstrndup:
244 return TaintPropagationRule(0, 1, ReturnValueIndex);
252 if (C.isCLibraryFunction(FDecl,
"snprintf") ||
253 C.isCLibraryFunction(FDecl,
"sprintf"))
254 return TaintPropagationRule(InvalidArgIndex, 0,
true);
255 else if (C.isCLibraryFunction(FDecl,
"strcpy") ||
256 C.isCLibraryFunction(FDecl,
"stpcpy") ||
257 C.isCLibraryFunction(FDecl,
"strcat"))
258 return TaintPropagationRule(1, 0,
true);
259 else if (C.isCLibraryFunction(FDecl,
"bcopy"))
260 return TaintPropagationRule(0, 2, 1,
false);
261 else if (C.isCLibraryFunction(FDecl,
"strdup") ||
262 C.isCLibraryFunction(FDecl,
"strdupa"))
263 return TaintPropagationRule(0, ReturnValueIndex);
264 else if (C.isCLibraryFunction(FDecl,
"wcsdup"))
265 return TaintPropagationRule(0, ReturnValueIndex);
272 return TaintPropagationRule();
275 void GenericTaintChecker::checkPreStmt(
const CallExpr *CE,
282 addSourcesPre(CE, C);
285 void GenericTaintChecker::checkPostStmt(
const CallExpr *CE,
287 if (propagateFromPre(CE, C))
289 addSourcesPost(CE, C);
292 void GenericTaintChecker::addSourcesPre(
const CallExpr *CE,
296 if (!FDecl || FDecl->
getKind() != Decl::Function)
304 TaintPropagationRule Rule =
305 TaintPropagationRule::getTaintPropagationRule(FDecl, Name, C);
306 if (!Rule.isNull()) {
307 State = Rule.process(CE, C);
315 FnCheck evalFunction = llvm::StringSwitch<FnCheck>(Name)
316 .Case(
"fscanf", &GenericTaintChecker::preFscanf)
320 State = (this->*evalFunction)(CE, C);
327 bool GenericTaintChecker::propagateFromPre(
const CallExpr *CE,
334 TaintArgsOnPostVisitTy TaintArgs = State->get<TaintArgsOnPostVisit>();
335 if (TaintArgs.isEmpty())
339 I = TaintArgs.begin(), E = TaintArgs.end(); I != E; ++I) {
340 unsigned ArgNum = *I;
343 if (ArgNum == ReturnValueIndex) {
353 SymbolRef Sym = getPointedToSymbol(C, Arg);
355 State = State->addTaint(Sym);
359 State = State->remove<TaintArgsOnPostVisit>();
368 void GenericTaintChecker::addSourcesPost(
const CallExpr *CE,
373 if (!FDecl || FDecl->
getKind() != Decl::Function)
379 FnCheck evalFunction = llvm::StringSwitch<FnCheck>(Name)
380 .Case(
"scanf", &GenericTaintChecker::postScanf)
382 .Case(
"getchar", &GenericTaintChecker::postRetTaint)
383 .Case(
"getchar_unlocked", &GenericTaintChecker::postRetTaint)
384 .Case(
"getenv", &GenericTaintChecker::postRetTaint)
385 .Case(
"fopen", &GenericTaintChecker::postRetTaint)
386 .Case(
"fdopen", &GenericTaintChecker::postRetTaint)
387 .Case(
"freopen", &GenericTaintChecker::postRetTaint)
388 .Case(
"getch", &GenericTaintChecker::postRetTaint)
389 .Case(
"wgetch", &GenericTaintChecker::postRetTaint)
390 .Case(
"socket", &GenericTaintChecker::postSocket)
397 State = (this->*evalFunction)(CE, C);
406 if (checkUncontrolledFormatString(CE, C))
410 if (!FDecl || FDecl->
getKind() != Decl::Function)
417 if (checkSystemCall(CE, Name, C))
420 if (checkTaintedBufferSize(CE, FDecl, C))
439 SVal Val = State->getSVal(*AddrLoc,
441 return Val.getAsSymbol();
445 GenericTaintChecker::TaintPropagationRule::process(
const CallExpr *CE,
450 bool IsTainted =
false;
451 for (ArgVector::const_iterator I = SrcArgs.begin(),
452 E = SrcArgs.end(); I != E; ++I) {
453 unsigned ArgNum = *I;
455 if (ArgNum == InvalidArgIndex) {
458 for (
unsigned int i = 0; i < CE->
getNumArgs(); ++i) {
459 if (isDestinationArgument(i))
461 if ((IsTainted = isTaintedOrPointsToTainted(CE->
getArg(i),
State,
C)))
469 if ((IsTainted = isTaintedOrPointsToTainted(CE->
getArg(ArgNum),
State,
C)))
476 for (ArgVector::const_iterator I = DstArgs.begin(),
477 E = DstArgs.end(); I != E; ++I) {
478 unsigned ArgNum = *I;
481 if (ArgNum == InvalidArgIndex) {
486 for (
unsigned int i = 0; i < CE->
getNumArgs(); ++i) {
493 State = State->add<TaintArgsOnPostVisit>(i);
499 if (ArgNum == ReturnValueIndex) {
500 State = State->add<TaintArgsOnPostVisit>(ReturnValueIndex);
505 assert(ArgNum < CE->getNumArgs());
506 State = State->add<TaintArgsOnPostVisit>(ArgNum);
524 for (
unsigned int i = 2; i < CE->
getNumArgs(); ++i)
525 State = State->add<TaintArgsOnPostVisit>(i);
543 if (DomName.equals(
"AF_SYSTEM") || DomName.equals(
"AF_LOCAL") ||
544 DomName.equals(
"AF_UNIX") || DomName.equals(
"AF_RESERVED_36"))
557 for (
unsigned int i = 1; i < CE->
getNumArgs(); ++i) {
561 SymbolRef Sym = getPointedToSymbol(C, Arg);
563 State = State->addTaint(Sym);
581 const SymbolicRegion *SymReg = dyn_cast_or_null<SymbolicRegion>(MemReg);
595 if (
const VarDecl *D = dyn_cast_or_null<VarDecl>(DeclReg->
getDecl())) {
596 D = D->getCanonicalDecl();
597 if ((D->getName().find(
"stdin") != StringRef::npos) && D->isExternC())
599 dyn_cast<PointerType>(D->getType().getTypePtr()))
608 unsigned int &ArgNum) {
616 ArgNum = Format->getFormatIdx() - 1;
617 if ((Format->getType()->getName() ==
"printf") &&
623 if (C.
getCalleeName(CE).find(
"setproctitle") != StringRef::npos) {
631 bool GenericTaintChecker::generateReportIfTainted(
const Expr *E,
638 if (!State->isTainted(getPointedToSymbol(C, E)) &&
645 auto report = llvm::make_unique<BugReport>(*BT, Msg, N);
646 report->addRange(E->getSourceRange());
653 bool GenericTaintChecker::checkUncontrolledFormatString(
const CallExpr *CE,
656 unsigned int ArgNum = 0;
661 if (generateReportIfTainted(CE->
getArg(ArgNum),
662 MsgUncontrolledFormatString,
C))
667 bool GenericTaintChecker::checkSystemCall(
const CallExpr *CE,
673 unsigned ArgNum = llvm::StringSwitch<unsigned>(Name)
689 if (generateReportIfTainted(CE->
getArg(ArgNum),
690 MsgSanitizeSystemArgs,
C))
698 bool GenericTaintChecker::checkTaintedBufferSize(
const CallExpr *CE,
702 unsigned ArgNum = InvalidArgIndex;
706 case Builtin::BImemcpy:
707 case Builtin::BImemmove:
708 case Builtin::BIstrncpy:
711 case Builtin::BIstrndup:
718 if (ArgNum == InvalidArgIndex) {
731 if (ArgNum != InvalidArgIndex && CE->
getNumArgs() > ArgNum &&
732 generateReportIfTainted(CE->
getArg(ArgNum), MsgTaintedBufferSize,
C))
StringRef getCalleeName(const FunctionDecl *FunDecl) const
Get the name of the called function (path-sensitive).
MemRegion - The root abstract class for all memory regions.
Expr * getArg(unsigned Arg)
getArg - Return the specified argument.
const Decl * getDecl() const
ExplodedNode * addTransition(ProgramStateRef State=nullptr, const ProgramPointTag *Tag=nullptr)
Generates a new transition in the program state graph (ExplodedGraph). Uses the default CheckerContex...
IntrusiveRefCntPtr< const ProgramState > ProgramStateRef
#define REGISTER_SET_WITH_PROGRAMSTATE(Name, Elem)
Symbolic value. These values used to capture symbolic execution of the program.
const FunctionDecl * getCalleeDecl(const CallExpr *CE) const
Get the declaration of the called function (path-sensitive).
bool isReferenceType() const
SymbolRef getSymbol() const
bool isUnknownOrUndef() const
static bool getPrintfFormatArgumentNum(const CallExpr *CE, const CheckerContext &C, unsigned int &ArgNum)
QualType getPointeeType() const
const ProgramStateRef & getState() const
static bool isCLibraryFunction(const FunctionDecl *FD, StringRef Name=StringRef())
Returns true if the callee is an externally-visible function in the top-level namespace, such as malloc.
Optional< T > getAs() const
Convert to the specified SVal type, returning None if this SVal is not of the desired type...
QualType getFILEType() const
Retrieve the C FILE type.
const TypedValueRegion * getRegion() const
void emitReport(std::unique_ptr< BugReport > R)
Emit the diagnostics report.
CHECKER * registerChecker()
Used to register checkers.
Encodes a location in the source. The SourceManager can decode this to get at the full include stack...
const Type * getTypePtr() const
unsigned getMemoryFunctionKind() const
Identify a memory copying or setting function. If the given function is a memory copy or setting func...
SourceLocation getExprLoc() const LLVM_READONLY
QualType getPointeeType() const
A symbol representing the value stored at a MemRegion.
ASTContext & getASTContext()
StringRef getMacroNameOrSpelling(SourceLocation &Loc)
Depending on wither the location corresponds to a macro, return either the macro name or the token sp...
const MemRegion * getAsRegion() const
unsigned getNumArgs() const
QualType getCanonicalType() const
llvm::iterator_range< specific_attr_iterator< T > > specific_attrs() const
bool isConstQualified() const
Determine whether this type is const-qualified.
bool isNull() const
isNull - Return true if this QualType doesn't point to a type yet.
Defines enum values for all the target-independent builtin functions.
Expr * IgnoreParens() LLVM_READONLY
const LocationContext * getLocationContext() const
bool isPointerType() const