clang  3.7.0
ScanfFormatString.cpp
Go to the documentation of this file.
1 //= ScanfFormatString.cpp - Analysis of printf format strings --*- C++ -*-===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // Handling of format string in scanf and friends. The structure of format
11 // strings for fscanf() are described in C99 7.19.6.2.
12 //
13 //===----------------------------------------------------------------------===//
14 
16 #include "FormatStringParsing.h"
17 #include "clang/Basic/TargetInfo.h"
18 
27 using namespace clang;
28 
31 
34  const char *&Beg, const char *E) {
35  const char *I = Beg;
36  const char *start = I - 1;
37  UpdateOnReturn <const char*> UpdateBeg(Beg, I);
38 
39  // No more characters?
40  if (I == E) {
41  H.HandleIncompleteScanList(start, I);
42  return true;
43  }
44 
45  // Special case: ']' is the first character.
46  if (*I == ']') {
47  if (++I == E) {
48  H.HandleIncompleteScanList(start, I - 1);
49  return true;
50  }
51  }
52 
53  // Special case: "^]" are the first characters.
54  if (I + 1 != E && I[0] == '^' && I[1] == ']') {
55  I += 2;
56  if (I == E) {
57  H.HandleIncompleteScanList(start, I - 1);
58  return true;
59  }
60  }
61 
62  // Look for a ']' character which denotes the end of the scan list.
63  while (*I != ']') {
64  if (++I == E) {
65  H.HandleIncompleteScanList(start, I - 1);
66  return true;
67  }
68  }
69 
70  CS.setEndScanList(I);
71  return false;
72 }
73 
74 // FIXME: Much of this is copy-paste from ParsePrintfSpecifier.
75 // We can possibly refactor.
77  const char *&Beg,
78  const char *E,
79  unsigned &argIndex,
80  const LangOptions &LO,
81  const TargetInfo &Target) {
82 
83  using namespace clang::analyze_scanf;
84  const char *I = Beg;
85  const char *Start = nullptr;
86  UpdateOnReturn <const char*> UpdateBeg(Beg, I);
87 
88  // Look for a '%' character that indicates the start of a format specifier.
89  for ( ; I != E ; ++I) {
90  char c = *I;
91  if (c == '\0') {
92  // Detect spurious null characters, which are likely errors.
93  H.HandleNullChar(I);
94  return true;
95  }
96  if (c == '%') {
97  Start = I++; // Record the start of the format specifier.
98  break;
99  }
100  }
101 
102  // No format specifier found?
103  if (!Start)
104  return false;
105 
106  if (I == E) {
107  // No more characters left?
108  H.HandleIncompleteSpecifier(Start, E - Start);
109  return true;
110  }
111 
112  ScanfSpecifier FS;
113  if (ParseArgPosition(H, FS, Start, I, E))
114  return true;
115 
116  if (I == E) {
117  // No more characters left?
118  H.HandleIncompleteSpecifier(Start, E - Start);
119  return true;
120  }
121 
122  // Look for '*' flag if it is present.
123  if (*I == '*') {
124  FS.setSuppressAssignment(I);
125  if (++I == E) {
126  H.HandleIncompleteSpecifier(Start, E - Start);
127  return true;
128  }
129  }
130 
131  // Look for the field width (if any). Unlike printf, this is either
132  // a fixed integer or isn't present.
134  if (Amt.getHowSpecified() != OptionalAmount::NotSpecified) {
135  assert(Amt.getHowSpecified() == OptionalAmount::Constant);
136  FS.setFieldWidth(Amt);
137 
138  if (I == E) {
139  // No more characters left?
140  H.HandleIncompleteSpecifier(Start, E - Start);
141  return true;
142  }
143  }
144 
145  // Look for the length modifier.
146  if (ParseLengthModifier(FS, I, E, LO, /*scanf=*/true) && I == E) {
147  // No more characters left?
148  H.HandleIncompleteSpecifier(Start, E - Start);
149  return true;
150  }
151 
152  // Detect spurious null characters, which are likely errors.
153  if (*I == '\0') {
154  H.HandleNullChar(I);
155  return true;
156  }
157 
158  // Finally, look for the conversion specifier.
159  const char *conversionPosition = I++;
161  switch (*conversionPosition) {
162  default:
163  break;
164  case '%': k = ConversionSpecifier::PercentArg; break;
165  case 'A': k = ConversionSpecifier::AArg; break;
166  case 'E': k = ConversionSpecifier::EArg; break;
167  case 'F': k = ConversionSpecifier::FArg; break;
168  case 'G': k = ConversionSpecifier::GArg; break;
169  case 'X': k = ConversionSpecifier::XArg; break;
170  case 'a': k = ConversionSpecifier::aArg; break;
171  case 'd': k = ConversionSpecifier::dArg; break;
172  case 'e': k = ConversionSpecifier::eArg; break;
173  case 'f': k = ConversionSpecifier::fArg; break;
174  case 'g': k = ConversionSpecifier::gArg; break;
175  case 'i': k = ConversionSpecifier::iArg; break;
176  case 'n': k = ConversionSpecifier::nArg; break;
177  case 'c': k = ConversionSpecifier::cArg; break;
178  case 'C': k = ConversionSpecifier::CArg; break;
179  case 'S': k = ConversionSpecifier::SArg; break;
180  case '[': k = ConversionSpecifier::ScanListArg; break;
181  case 'u': k = ConversionSpecifier::uArg; break;
182  case 'x': k = ConversionSpecifier::xArg; break;
183  case 'o': k = ConversionSpecifier::oArg; break;
184  case 's': k = ConversionSpecifier::sArg; break;
185  case 'p': k = ConversionSpecifier::pArg; break;
186  // Apple extensions
187  // Apple-specific
188  case 'D':
189  if (Target.getTriple().isOSDarwin())
190  k = ConversionSpecifier::DArg;
191  break;
192  case 'O':
193  if (Target.getTriple().isOSDarwin())
194  k = ConversionSpecifier::OArg;
195  break;
196  case 'U':
197  if (Target.getTriple().isOSDarwin())
198  k = ConversionSpecifier::UArg;
199  break;
200  }
201  ScanfConversionSpecifier CS(conversionPosition, k);
203  if (ParseScanList(H, CS, I, E))
204  return true;
205  }
206  FS.setConversionSpecifier(CS);
207  if (CS.consumesDataArgument() && !FS.getSuppressAssignment()
208  && !FS.usesPositionalArg())
209  FS.setArgIndex(argIndex++);
210 
211  // FIXME: '%' and '*' doesn't make sense. Issue a warning.
212  // FIXME: 'ConsumedSoFar' and '*' doesn't make sense.
213 
215  // Assume the conversion takes one argument.
216  return !H.HandleInvalidScanfConversionSpecifier(FS, Beg, I - Beg);
217  }
218  return ScanfSpecifierResult(Start, FS);
219 }
220 
221 ArgType ScanfSpecifier::getArgType(ASTContext &Ctx) const {
222  const ScanfConversionSpecifier &CS = getConversionSpecifier();
223 
224  if (!CS.consumesDataArgument())
225  return ArgType::Invalid();
226 
227  switch(CS.getKind()) {
228  // Signed int.
229  case ConversionSpecifier::dArg:
230  case ConversionSpecifier::DArg:
231  case ConversionSpecifier::iArg:
232  switch (LM.getKind()) {
234  return ArgType::PtrTo(Ctx.IntTy);
235  case LengthModifier::AsChar:
236  return ArgType::PtrTo(ArgType::AnyCharTy);
237  case LengthModifier::AsShort:
238  return ArgType::PtrTo(Ctx.ShortTy);
239  case LengthModifier::AsLong:
240  return ArgType::PtrTo(Ctx.LongTy);
241  case LengthModifier::AsLongLong:
242  case LengthModifier::AsQuad:
243  return ArgType::PtrTo(Ctx.LongLongTy);
244  case LengthModifier::AsInt64:
245  return ArgType::PtrTo(ArgType(Ctx.LongLongTy, "__int64"));
246  case LengthModifier::AsIntMax:
247  return ArgType::PtrTo(ArgType(Ctx.getIntMaxType(), "intmax_t"));
248  case LengthModifier::AsSizeT:
249  // FIXME: ssize_t.
250  return ArgType();
251  case LengthModifier::AsPtrDiff:
252  return ArgType::PtrTo(ArgType(Ctx.getPointerDiffType(), "ptrdiff_t"));
253  case LengthModifier::AsLongDouble:
254  // GNU extension.
255  return ArgType::PtrTo(Ctx.LongLongTy);
256  case LengthModifier::AsAllocate:
257  case LengthModifier::AsMAllocate:
258  case LengthModifier::AsInt32:
259  case LengthModifier::AsInt3264:
260  case LengthModifier::AsWide:
261  return ArgType::Invalid();
262  }
263 
264  // Unsigned int.
265  case ConversionSpecifier::oArg:
266  case ConversionSpecifier::OArg:
267  case ConversionSpecifier::uArg:
268  case ConversionSpecifier::UArg:
269  case ConversionSpecifier::xArg:
270  case ConversionSpecifier::XArg:
271  switch (LM.getKind()) {
273  return ArgType::PtrTo(Ctx.UnsignedIntTy);
274  case LengthModifier::AsChar:
275  return ArgType::PtrTo(Ctx.UnsignedCharTy);
276  case LengthModifier::AsShort:
277  return ArgType::PtrTo(Ctx.UnsignedShortTy);
278  case LengthModifier::AsLong:
279  return ArgType::PtrTo(Ctx.UnsignedLongTy);
280  case LengthModifier::AsLongLong:
281  case LengthModifier::AsQuad:
282  return ArgType::PtrTo(Ctx.UnsignedLongLongTy);
283  case LengthModifier::AsInt64:
284  return ArgType::PtrTo(ArgType(Ctx.UnsignedLongLongTy, "unsigned __int64"));
285  case LengthModifier::AsIntMax:
286  return ArgType::PtrTo(ArgType(Ctx.getUIntMaxType(), "uintmax_t"));
287  case LengthModifier::AsSizeT:
288  return ArgType::PtrTo(ArgType(Ctx.getSizeType(), "size_t"));
289  case LengthModifier::AsPtrDiff:
290  // FIXME: Unsigned version of ptrdiff_t?
291  return ArgType();
292  case LengthModifier::AsLongDouble:
293  // GNU extension.
294  return ArgType::PtrTo(Ctx.UnsignedLongLongTy);
295  case LengthModifier::AsAllocate:
296  case LengthModifier::AsMAllocate:
297  case LengthModifier::AsInt32:
298  case LengthModifier::AsInt3264:
299  case LengthModifier::AsWide:
300  return ArgType::Invalid();
301  }
302 
303  // Float.
304  case ConversionSpecifier::aArg:
305  case ConversionSpecifier::AArg:
306  case ConversionSpecifier::eArg:
307  case ConversionSpecifier::EArg:
308  case ConversionSpecifier::fArg:
309  case ConversionSpecifier::FArg:
310  case ConversionSpecifier::gArg:
311  case ConversionSpecifier::GArg:
312  switch (LM.getKind()) {
314  return ArgType::PtrTo(Ctx.FloatTy);
315  case LengthModifier::AsLong:
316  return ArgType::PtrTo(Ctx.DoubleTy);
317  case LengthModifier::AsLongDouble:
318  return ArgType::PtrTo(Ctx.LongDoubleTy);
319  default:
320  return ArgType::Invalid();
321  }
322 
323  // Char, string and scanlist.
324  case ConversionSpecifier::cArg:
325  case ConversionSpecifier::sArg:
326  case ConversionSpecifier::ScanListArg:
327  switch (LM.getKind()) {
329  return ArgType::PtrTo(ArgType::AnyCharTy);
330  case LengthModifier::AsLong:
331  case LengthModifier::AsWide:
332  return ArgType::PtrTo(ArgType(Ctx.getWideCharType(), "wchar_t"));
333  case LengthModifier::AsAllocate:
334  case LengthModifier::AsMAllocate:
335  return ArgType::PtrTo(ArgType::CStrTy);
336  case LengthModifier::AsShort:
337  if (Ctx.getTargetInfo().getTriple().isOSMSVCRT())
338  return ArgType::PtrTo(ArgType::AnyCharTy);
339  default:
340  return ArgType::Invalid();
341  }
342  case ConversionSpecifier::CArg:
343  case ConversionSpecifier::SArg:
344  // FIXME: Mac OS X specific?
345  switch (LM.getKind()) {
347  case LengthModifier::AsWide:
348  return ArgType::PtrTo(ArgType(Ctx.getWideCharType(), "wchar_t"));
349  case LengthModifier::AsAllocate:
350  case LengthModifier::AsMAllocate:
351  return ArgType::PtrTo(ArgType(ArgType::WCStrTy, "wchar_t *"));
352  case LengthModifier::AsShort:
353  if (Ctx.getTargetInfo().getTriple().isOSMSVCRT())
354  return ArgType::PtrTo(ArgType::AnyCharTy);
355  default:
356  return ArgType::Invalid();
357  }
358 
359  // Pointer.
360  case ConversionSpecifier::pArg:
361  return ArgType::PtrTo(ArgType::CPointerTy);
362 
363  // Write-back.
364  case ConversionSpecifier::nArg:
365  switch (LM.getKind()) {
367  return ArgType::PtrTo(Ctx.IntTy);
368  case LengthModifier::AsChar:
369  return ArgType::PtrTo(Ctx.SignedCharTy);
370  case LengthModifier::AsShort:
371  return ArgType::PtrTo(Ctx.ShortTy);
372  case LengthModifier::AsLong:
373  return ArgType::PtrTo(Ctx.LongTy);
374  case LengthModifier::AsLongLong:
375  case LengthModifier::AsQuad:
376  return ArgType::PtrTo(Ctx.LongLongTy);
377  case LengthModifier::AsInt64:
378  return ArgType::PtrTo(ArgType(Ctx.LongLongTy, "__int64"));
379  case LengthModifier::AsIntMax:
380  return ArgType::PtrTo(ArgType(Ctx.getIntMaxType(), "intmax_t"));
381  case LengthModifier::AsSizeT:
382  return ArgType(); // FIXME: ssize_t
383  case LengthModifier::AsPtrDiff:
384  return ArgType::PtrTo(ArgType(Ctx.getPointerDiffType(), "ptrdiff_t"));
385  case LengthModifier::AsLongDouble:
386  return ArgType(); // FIXME: Is this a known extension?
387  case LengthModifier::AsAllocate:
388  case LengthModifier::AsMAllocate:
389  case LengthModifier::AsInt32:
390  case LengthModifier::AsInt3264:
391  case LengthModifier::AsWide:
392  return ArgType::Invalid();
393  }
394 
395  default:
396  break;
397  }
398 
399  return ArgType();
400 }
401 
402 bool ScanfSpecifier::fixType(QualType QT, QualType RawQT,
403  const LangOptions &LangOpt,
404  ASTContext &Ctx) {
405 
406  // %n is different from other conversion specifiers; don't try to fix it.
407  if (CS.getKind() == ConversionSpecifier::nArg)
408  return false;
409 
410  if (!QT->isPointerType())
411  return false;
412 
413  QualType PT = QT->getPointeeType();
414 
415  // If it's an enum, get its underlying type.
416  if (const EnumType *ETy = PT->getAs<EnumType>())
417  PT = ETy->getDecl()->getIntegerType();
418 
419  const BuiltinType *BT = PT->getAs<BuiltinType>();
420  if (!BT)
421  return false;
422 
423  // Pointer to a character.
424  if (PT->isAnyCharacterType()) {
425  CS.setKind(ConversionSpecifier::sArg);
426  if (PT->isWideCharType())
427  LM.setKind(LengthModifier::AsWideChar);
428  else
429  LM.setKind(LengthModifier::None);
430 
431  // If we know the target array length, we can use it as a field width.
432  if (const ConstantArrayType *CAT = Ctx.getAsConstantArrayType(RawQT)) {
433  if (CAT->getSizeModifier() == ArrayType::Normal)
434  FieldWidth = OptionalAmount(OptionalAmount::Constant,
435  CAT->getSize().getZExtValue() - 1,
436  "", 0, false);
437 
438  }
439  return true;
440  }
441 
442  // Figure out the length modifier.
443  switch (BT->getKind()) {
444  // no modifier
445  case BuiltinType::UInt:
446  case BuiltinType::Int:
447  case BuiltinType::Float:
448  LM.setKind(LengthModifier::None);
449  break;
450 
451  // hh
452  case BuiltinType::Char_U:
453  case BuiltinType::UChar:
454  case BuiltinType::Char_S:
455  case BuiltinType::SChar:
456  LM.setKind(LengthModifier::AsChar);
457  break;
458 
459  // h
460  case BuiltinType::Short:
461  case BuiltinType::UShort:
462  LM.setKind(LengthModifier::AsShort);
463  break;
464 
465  // l
466  case BuiltinType::Long:
467  case BuiltinType::ULong:
468  case BuiltinType::Double:
469  LM.setKind(LengthModifier::AsLong);
470  break;
471 
472  // ll
473  case BuiltinType::LongLong:
474  case BuiltinType::ULongLong:
475  LM.setKind(LengthModifier::AsLongLong);
476  break;
477 
478  // L
479  case BuiltinType::LongDouble:
480  LM.setKind(LengthModifier::AsLongDouble);
481  break;
482 
483  // Don't know.
484  default:
485  return false;
486  }
487 
488  // Handle size_t, ptrdiff_t, etc. that have dedicated length modifiers in C99.
489  if (isa<TypedefType>(PT) && (LangOpt.C99 || LangOpt.CPlusPlus11))
490  namedTypeToLengthModifier(PT, LM);
491 
492  // If fixing the length modifier was enough, we are done.
493  if (hasValidLengthModifier(Ctx.getTargetInfo())) {
494  const analyze_scanf::ArgType &AT = getArgType(Ctx);
495  if (AT.isValid() && AT.matchesType(Ctx, QT))
496  return true;
497  }
498 
499  // Figure out the conversion specifier.
500  if (PT->isRealFloatingType())
501  CS.setKind(ConversionSpecifier::fArg);
502  else if (PT->isSignedIntegerType())
503  CS.setKind(ConversionSpecifier::dArg);
504  else if (PT->isUnsignedIntegerType())
505  CS.setKind(ConversionSpecifier::uArg);
506  else
507  llvm_unreachable("Unexpected type");
508 
509  return true;
510 }
511 
512 void ScanfSpecifier::toString(raw_ostream &os) const {
513  os << "%";
514 
515  if (usesPositionalArg())
516  os << getPositionalArgIndex() << "$";
517  if (SuppressAssignment)
518  os << "*";
519 
520  FieldWidth.toString(os);
521  os << LM.toString();
522  os << CS.toString();
523 }
524 
526  const char *I,
527  const char *E,
528  const LangOptions &LO,
529  const TargetInfo &Target) {
530 
531  unsigned argIndex = 0;
532 
533  // Keep looking for a format specifier until we have exhausted the string.
534  while (I != E) {
535  const ScanfSpecifierResult &FSR = ParseScanfSpecifier(H, I, E, argIndex,
536  LO, Target);
537  // Did a fail-stop error of any kind occur when parsing the specifier?
538  // If so, don't do any more processing.
539  if (FSR.shouldStop())
540  return true;
541  // Did we exhaust the string or encounter an error that
542  // we can recover from?
543  if (!FSR.hasValue())
544  continue;
545  // We have a format specifier. Pass it to the callback.
546  if (!H.HandleScanfSpecifier(FSR.getValue(), FSR.getStart(),
547  I - FSR.getStart())) {
548  return true;
549  }
550  }
551  assert(I == E && "Format string not exhausted");
552  return false;
553 }
Kind getKind() const
Definition: Type.h:2006
clang::analyze_format_string::SpecifierResult< ScanfSpecifier > ScanfSpecifierResult
CanQualType LongLongTy
Definition: ASTContext.h:825
virtual bool HandleScanfSpecifier(const analyze_scanf::ScanfSpecifier &FS, const char *startSpecifier, unsigned specifierLen)
Definition: FormatString.h:657
bool isAnyCharacterType() const
Determine whether this type is any of the built-in character types.
Definition: Type.cpp:1663
CanQualType getSizeType() const
Return the unique type for "size_t" (C99 7.17), defined in <stddef.h>.
bool ParseArgPosition(FormatStringHandler &H, FormatSpecifier &CS, const char *Start, const char *&Beg, const char *E)
CanQualType LongTy
Definition: ASTContext.h:825
CanQualType getIntMaxType() const
Return the unique type for "intmax_t" (C99 7.18.1.5), defined in <stdint.h>.
Holds long-lived AST nodes (such as types and decls) that can be referred to throughout the semantic ...
Definition: ASTContext.h:89
bool ParseScanfString(FormatStringHandler &H, const char *beg, const char *end, const LangOptions &LO, const TargetInfo &Target)
Keeps track of the various options that can be enabled, which controls the dialect of C or C++ that i...
Definition: LangOptions.h:48
static ScanfSpecifierResult ParseScanfSpecifier(FormatStringHandler &H, const char *&Beg, const char *E, unsigned &argIndex, const LangOptions &LO, const TargetInfo &Target)
const TargetInfo & getTargetInfo() const
Definition: ASTContext.h:518
Represents the length modifier in a format string in scanf/printf.
Definition: FormatString.h:65
bool isUnsignedIntegerType() const
Definition: Type.cpp:1723
CanQualType LongDoubleTy
Definition: ASTContext.h:828
CanQualType UnsignedCharTy
Definition: ASTContext.h:826
QualType getPointeeType() const
Definition: Type.cpp:414
bool isRealFloatingType() const
Floating point categories.
Definition: Type.cpp:1776
Exposes information about the current target.
CanQualType getUIntMaxType() const
Return the unique type for "uintmax_t" (C99 7.18.1.5), defined in <stdint.h>.
CanQualType ShortTy
Definition: ASTContext.h:825
virtual void HandleNullChar(const char *nullCharacter)
Definition: FormatString.h:612
QualType getWideCharType() const
Return the type of wide characters. In C++, this returns the unique wchar_t type. In C99...
Definition: ASTContext.h:1264
CanQualType SignedCharTy
Definition: ASTContext.h:825
CanQualType FloatTy
Definition: ASTContext.h:828
const ConstantArrayType * getAsConstantArrayType(QualType T) const
Definition: ASTContext.h:2003
CanQualType UnsignedShortTy
Definition: ASTContext.h:826
CanQualType UnsignedLongLongTy
Definition: ASTContext.h:827
const llvm::Triple & getTriple() const
Returns the target triple of the primary target.
static bool ParseScanList(FormatStringHandler &H, ScanfConversionSpecifier &CS, const char *&Beg, const char *E)
bool isWideCharType() const
Definition: Type.cpp:1642
std::string toString(const til::SExpr *E)
QualType getPointerDiffType() const
Return the unique type for "ptrdiff_t" (C99 7.17) defined in <stddef.h>. Pointer - pointer requires t...
const T * getAs() const
Definition: Type.h:5555
CanQualType UnsignedLongTy
Definition: ASTContext.h:826
bool ParseLengthModifier(FormatSpecifier &FS, const char *&Beg, const char *E, const LangOptions &LO, bool IsScanf=false)
virtual void HandleIncompleteSpecifier(const char *startSpecifier, unsigned specifierLen)
Definition: FormatString.h:621
Defines the clang::TargetInfo interface.
OptionalAmount ParseAmount(const char *&Beg, const char *E)
CanQualType IntTy
Definition: ASTContext.h:825
bool isSignedIntegerType() const
Definition: Type.cpp:1683
virtual void HandleIncompleteScanList(const char *start, const char *end)
Definition: FormatString.h:663
CanQualType DoubleTy
Definition: ASTContext.h:828
virtual bool HandleInvalidScanfConversionSpecifier(const analyze_scanf::ScanfSpecifier &FS, const char *startSpecifier, unsigned specifierLen)
Definition: FormatString.h:650
CanQualType UnsignedIntTy
Definition: ASTContext.h:826
bool isPointerType() const
Definition: Type.h:5232