From: Hans Wennborg Date: Sat, 10 Dec 2011 13:20:11 +0000 (+0000) Subject: Check that arguments to a scanf call match the format specifier, X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=6fcd932dfd6835f70cc00d6f7c6789793f6d7b66;p=clang Check that arguments to a scanf call match the format specifier, and offer fixits when there is a mismatch. git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@146326 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/include/clang/Analysis/Analyses/FormatString.h b/include/clang/Analysis/Analyses/FormatString.h index 2edbc8c296..f84ca988ec 100644 --- a/include/clang/Analysis/Analyses/FormatString.h +++ b/include/clang/Analysis/Analyses/FormatString.h @@ -185,6 +185,7 @@ public: return EndScanList ? EndScanList - Position : 1; } + bool isUIntArg() const { return kind >= UIntArgBeg && kind <= UIntArgEnd; } const char *toString() const; bool isPrintfKind() const { return IsPrintf; } @@ -364,7 +365,6 @@ public: bool isObjCArg() const { return kind >= ObjCBeg && kind <= ObjCEnd; } bool isIntArg() const { return kind >= IntArgBeg && kind <= IntArgEnd; } - bool isUIntArg() const { return kind >= UIntArgBeg && kind <= UIntArgEnd; } bool isDoubleArg() const { return kind >= DoubleArgBeg && kind <= DoubleArgBeg; } unsigned getLength() const { @@ -506,10 +506,35 @@ public: } }; +using analyze_format_string::ArgTypeResult; using analyze_format_string::LengthModifier; using analyze_format_string::OptionalAmount; using analyze_format_string::OptionalFlag; +class ScanfArgTypeResult : public ArgTypeResult { +public: + enum Kind { UnknownTy, InvalidTy, CStrTy, WCStrTy, PtrToArgTypeResultTy }; +private: + Kind K; + ArgTypeResult A; + const char *Name; + QualType getRepresentativeType(ASTContext &C) const; +public: + ScanfArgTypeResult(Kind k = UnknownTy, const char* n = 0) : K(k), Name(n) {} + ScanfArgTypeResult(ArgTypeResult a, const char *n = 0) + : K(PtrToArgTypeResultTy), A(a), Name(n) { + assert(A.isValid()); + } + + static ScanfArgTypeResult Invalid() { return ScanfArgTypeResult(InvalidTy); } + + bool isValid() const { return K != InvalidTy; } + + bool matchesType(ASTContext& C, QualType argTy) const; + + std::string getRepresentativeTypeName(ASTContext& C) const; +}; + class ScanfSpecifier : public analyze_format_string::FormatSpecifier { OptionalFlag SuppressAssignment; // '*' public: @@ -538,6 +563,12 @@ public: return CS.consumesDataArgument() && !SuppressAssignment; } + ScanfArgTypeResult getArgType(ASTContext &Ctx) const; + + bool fixType(QualType QT, const LangOptions &LangOpt); + + void toString(raw_ostream &os) const; + static ScanfSpecifier Parse(const char *beg, const char *end); }; diff --git a/lib/Analysis/ScanfFormatString.cpp b/lib/Analysis/ScanfFormatString.cpp index 6a8673ab55..77d9c9658d 100644 --- a/lib/Analysis/ScanfFormatString.cpp +++ b/lib/Analysis/ScanfFormatString.cpp @@ -20,9 +20,11 @@ using clang::analyze_format_string::FormatStringHandler; using clang::analyze_format_string::LengthModifier; using clang::analyze_format_string::OptionalAmount; using clang::analyze_format_string::ConversionSpecifier; +using clang::analyze_scanf::ScanfArgTypeResult; using clang::analyze_scanf::ScanfConversionSpecifier; using clang::analyze_scanf::ScanfSpecifier; using clang::UpdateOnReturn; +using namespace clang; typedef clang::analyze_format_string::SpecifierResult ScanfSpecifierResult; @@ -190,7 +192,213 @@ static ScanfSpecifierResult ParseScanfSpecifier(FormatStringHandler &H, } return ScanfSpecifierResult(Start, FS); } - + +ScanfArgTypeResult ScanfSpecifier::getArgType(ASTContext &Ctx) const { + const ScanfConversionSpecifier &CS = getConversionSpecifier(); + + if (!CS.consumesDataArgument()) + return ScanfArgTypeResult::Invalid(); + + switch(CS.getKind()) { + // Signed int. + case ConversionSpecifier::dArg: + case ConversionSpecifier::iArg: + switch (LM.getKind()) { + case LengthModifier::None: return ArgTypeResult(Ctx.IntTy); + case LengthModifier::AsChar: + return ArgTypeResult(ArgTypeResult::AnyCharTy); + case LengthModifier::AsShort: return ArgTypeResult(Ctx.ShortTy); + case LengthModifier::AsLong: return ArgTypeResult(Ctx.LongTy); + case LengthModifier::AsLongLong: return ArgTypeResult(Ctx.LongLongTy); + case LengthModifier::AsIntMax: + return ScanfArgTypeResult(Ctx.getIntMaxType(), "intmax_t *"); + case LengthModifier::AsSizeT: + // FIXME: ssize_t. + return ScanfArgTypeResult(); + case LengthModifier::AsPtrDiff: + return ScanfArgTypeResult(Ctx.getPointerDiffType(), "ptrdiff_t *"); + case LengthModifier::AsLongDouble: return ScanfArgTypeResult::Invalid(); + } + + // Unsigned int. + case ConversionSpecifier::oArg: + case ConversionSpecifier::uArg: + case ConversionSpecifier::xArg: + case ConversionSpecifier::XArg: + switch (LM.getKind()) { + case LengthModifier::None: return ArgTypeResult(Ctx.UnsignedIntTy); + case LengthModifier::AsChar: return ArgTypeResult(Ctx.UnsignedCharTy); + case LengthModifier::AsShort: return ArgTypeResult(Ctx.UnsignedShortTy); + case LengthModifier::AsLong: return ArgTypeResult(Ctx.UnsignedLongTy); + case LengthModifier::AsLongLong: + return ArgTypeResult(Ctx.UnsignedLongLongTy); + case LengthModifier::AsIntMax: + return ScanfArgTypeResult(Ctx.getUIntMaxType(), "uintmax_t *"); + case LengthModifier::AsSizeT: + return ScanfArgTypeResult(Ctx.getSizeType(), "size_t *"); + case LengthModifier::AsPtrDiff: + // FIXME: Unsigned version of ptrdiff_t? + return ScanfArgTypeResult(); + case LengthModifier::AsLongDouble: return ScanfArgTypeResult::Invalid(); + } + + // Float. + case ConversionSpecifier::aArg: + case ConversionSpecifier::AArg: + case ConversionSpecifier::eArg: + case ConversionSpecifier::EArg: + case ConversionSpecifier::fArg: + case ConversionSpecifier::FArg: + case ConversionSpecifier::gArg: + case ConversionSpecifier::GArg: + switch (LM.getKind()) { + case LengthModifier::None: return ArgTypeResult(Ctx.FloatTy); + case LengthModifier::AsLong: return ArgTypeResult(Ctx.DoubleTy); + case LengthModifier::AsLongDouble: + return ArgTypeResult(Ctx.LongDoubleTy); + default: + return ScanfArgTypeResult::Invalid(); + } + + // Char, string and scanlist. + case ConversionSpecifier::cArg: + case ConversionSpecifier::sArg: + case ConversionSpecifier::ScanListArg: + switch (LM.getKind()) { + case LengthModifier::None: return ScanfArgTypeResult::CStrTy; + case LengthModifier::AsLong: + return ScanfArgTypeResult(ScanfArgTypeResult::WCStrTy, "wchar_t *"); + default: + return ScanfArgTypeResult::Invalid(); + } + case ConversionSpecifier::CArg: + case ConversionSpecifier::SArg: + // FIXME: Mac OS X specific? + return ScanfArgTypeResult(ScanfArgTypeResult::WCStrTy, "wchar_t *"); + + // Pointer. + case ConversionSpecifier::pArg: + return ScanfArgTypeResult(ArgTypeResult(ArgTypeResult::CPointerTy)); + + default: + break; + } + + return ScanfArgTypeResult(); +} + +bool ScanfSpecifier::fixType(QualType QT, const LangOptions &LangOpt) +{ + if (!QT->isPointerType()) + return false; + + QualType PT = QT->getPointeeType(); + const BuiltinType *BT = PT->getAs(); + if (!BT) + return false; + + // Pointer to a character. + if (PT->isAnyCharacterType()) { + CS.setKind(ConversionSpecifier::sArg); + if (PT->isWideCharType()) + LM.setKind(LengthModifier::AsWideChar); + else + LM.setKind(LengthModifier::None); + return true; + } + + // Figure out the length modifier. + switch (BT->getKind()) { + // no modifier + case BuiltinType::UInt: + case BuiltinType::Int: + case BuiltinType::Float: + LM.setKind(LengthModifier::None); + break; + + // hh + case BuiltinType::Char_U: + case BuiltinType::UChar: + case BuiltinType::Char_S: + case BuiltinType::SChar: + LM.setKind(LengthModifier::AsChar); + break; + + // h + case BuiltinType::Short: + case BuiltinType::UShort: + LM.setKind(LengthModifier::AsShort); + break; + + // l + case BuiltinType::Long: + case BuiltinType::ULong: + case BuiltinType::Double: + LM.setKind(LengthModifier::AsLong); + break; + + // ll + case BuiltinType::LongLong: + case BuiltinType::ULongLong: + LM.setKind(LengthModifier::AsLongLong); + break; + + // L + case BuiltinType::LongDouble: + LM.setKind(LengthModifier::AsLongDouble); + break; + + // Don't know. + default: + return false; + } + + // Handle size_t, ptrdiff_t, etc. that have dedicated length modifiers in C99. + if (isa(PT) && (LangOpt.C99 || LangOpt.CPlusPlus0x)) { + const IdentifierInfo *Identifier = QT.getBaseTypeIdentifier(); + if (Identifier->getName() == "size_t") { + LM.setKind(LengthModifier::AsSizeT); + } else if (Identifier->getName() == "ssize_t") { + // Not C99, but common in Unix. + LM.setKind(LengthModifier::AsSizeT); + } else if (Identifier->getName() == "intmax_t") { + LM.setKind(LengthModifier::AsIntMax); + } else if (Identifier->getName() == "uintmax_t") { + LM.setKind(LengthModifier::AsIntMax); + } else if (Identifier->getName() == "ptrdiff_t") { + LM.setKind(LengthModifier::AsPtrDiff); + } + } + + // Figure out the conversion specifier. + if (PT->isRealFloatingType()) + CS.setKind(ConversionSpecifier::fArg); + else if (PT->isSignedIntegerType()) + CS.setKind(ConversionSpecifier::dArg); + else if (PT->isUnsignedIntegerType()) { + // Preserve the original formatting, e.g. 'X', 'o'. + if (!CS.isUIntArg()) { + CS.setKind(ConversionSpecifier::uArg); + } + } else + llvm_unreachable("Unexpected type"); + + return true; +} + +void ScanfSpecifier::toString(raw_ostream &os) const { + os << "%"; + + if (usesPositionalArg()) + os << getPositionalArgIndex() << "$"; + if (SuppressAssignment) + os << "*"; + + FieldWidth.toString(os); + os << LM.toString(); + os << CS.toString(); +} + bool clang::analyze_format_string::ParseScanfString(FormatStringHandler &H, const char *I, const char *E) { @@ -218,4 +426,47 @@ bool clang::analyze_format_string::ParseScanfString(FormatStringHandler &H, return false; } +bool ScanfArgTypeResult::matchesType(ASTContext& C, QualType argTy) const { + switch (K) { + case InvalidTy: + llvm_unreachable("ArgTypeResult must be valid"); + case UnknownTy: + return true; + case CStrTy: + return ArgTypeResult(ArgTypeResult::CStrTy).matchesType(C, argTy); + case WCStrTy: + return ArgTypeResult(ArgTypeResult::WCStrTy).matchesType(C, argTy); + case PtrToArgTypeResultTy: { + const PointerType *PT = argTy->getAs(); + if (!PT) + return false; + return A.matchesType(C, PT->getPointeeType()); + } + } + + return false; // Unreachable, but we still get a warning. +} + +QualType ScanfArgTypeResult::getRepresentativeType(ASTContext &C) const { + switch (K) { + case InvalidTy: + llvm_unreachable("No representative type for Invalid ArgTypeResult"); + case UnknownTy: + return QualType(); + case CStrTy: + return C.getPointerType(C.CharTy); + case WCStrTy: + return C.getPointerType(C.getWCharType()); + case PtrToArgTypeResultTy: + return C.getPointerType(A.getRepresentativeType(C)); + } + return QualType(); // Not reachable. +} + +std::string ScanfArgTypeResult::getRepresentativeTypeName(ASTContext& C) const { + std::string S = getRepresentativeType(C).getAsString(); + if (!Name) + return std::string("'") + S + "'"; + return std::string("'") + Name + "' (aka '" + S + "')"; +} diff --git a/lib/Sema/SemaChecking.cpp b/lib/Sema/SemaChecking.cpp index db60f2388a..1c93931b5c 100644 --- a/lib/Sema/SemaChecking.cpp +++ b/lib/Sema/SemaChecking.cpp @@ -2371,8 +2371,38 @@ bool CheckScanfHandler::HandleScanfSpecifier( if (!CheckNumArgs(FS, CS, startSpecifier, specifierLen, argIndex)) return false; - // FIXME: Check that the argument type matches the format specifier. - + // Check that the argument type matches the format specifier. + const Expr *Ex = getDataArg(argIndex); + const analyze_scanf::ScanfArgTypeResult &ATR = FS.getArgType(S.Context); + if (ATR.isValid() && !ATR.matchesType(S.Context, Ex->getType())) { + ScanfSpecifier fixedFS = FS; + bool success = fixedFS.fixType(Ex->getType(), S.getLangOptions()); + + if (success) { + // Get the fix string from the fixed format specifier. + llvm::SmallString<128> buf; + llvm::raw_svector_ostream os(buf); + fixedFS.toString(os); + + EmitFormatDiagnostic( + S.PDiag(diag::warn_printf_conversion_argument_type_mismatch) + << ATR.getRepresentativeTypeName(S.Context) << Ex->getType() + << Ex->getSourceRange(), + getLocationOfByte(CS.getStart()), + /*IsStringLocation*/true, + getSpecifierRange(startSpecifier, specifierLen), + FixItHint::CreateReplacement( + getSpecifierRange(startSpecifier, specifierLen), + os.str())); + } else { + S.Diag(getLocationOfByte(CS.getStart()), + diag::warn_printf_conversion_argument_type_mismatch) + << ATR.getRepresentativeTypeName(S.Context) << Ex->getType() + << getSpecifierRange(startSpecifier, specifierLen) + << Ex->getSourceRange(); + } + } + return true; } diff --git a/test/Analysis/taint-generic.c b/test/Analysis/taint-generic.c index eeec8869b0..9c99f908a8 100644 --- a/test/Analysis/taint-generic.c +++ b/test/Analysis/taint-generic.c @@ -39,7 +39,8 @@ void bufferScanfAssignment(int x) { void scanfArg() { int t; - scanf("%d", t); // expected-warning {{Pointer argument is expected}} + scanf("%d", t); // expected-warning {{Pointer argument is expected}} \ + // expected-warning {{conversion specifies type 'int *' but the argument has type 'int'}} } void bufferGetchar(int x) { diff --git a/test/Analysis/taint-tester.c b/test/Analysis/taint-tester.c index 8a82cd397a..da1ff024d1 100644 --- a/test/Analysis/taint-tester.c +++ b/test/Analysis/taint-tester.c @@ -49,8 +49,8 @@ void taintTracking(int x) { // Taint on fields of a struct. struct XYStruct xy = {2, 3, 11}; - scanf("%f", &xy.y); - scanf("%f", &xy.x); + scanf("%d", &xy.y); + scanf("%d", &xy.x); int tx = xy.x; // expected-warning {{tainted}} int ty = xy.y; // FIXME: This should be tainted as well. char ntz = xy.z;// no warning diff --git a/test/Sema/format-strings-fixit.c b/test/Sema/format-strings-fixit.c index bfb54327b7..4fb6d75d76 100644 --- a/test/Sema/format-strings-fixit.c +++ b/test/Sema/format-strings-fixit.c @@ -10,6 +10,11 @@ int printf(char const *, ...); +typedef __SIZE_TYPE__ size_t; +typedef __INTMAX_TYPE__ intmax_t; +typedef __UINTMAX_TYPE__ uintmax_t; +typedef __PTRDIFF_TYPE__ ptrdiff_t; + void test() { // Basic types printf("%s", (int) 123); @@ -47,11 +52,6 @@ void test() { unsigned long val = 42; printf("%X", val); - typedef __SIZE_TYPE__ size_t; - typedef __INTMAX_TYPE__ intmax_t; - typedef __UINTMAX_TYPE__ uintmax_t; - typedef __PTRDIFF_TYPE__ ptrdiff_t; - // size_t, etc. printf("%f", (size_t) 42); printf("%f", (intmax_t) 42); @@ -62,6 +62,51 @@ void test() { printf("%ld", "foo"); } +int scanf(char const *, ...); + +void test2() { + char str[100]; + short shortVar; + unsigned short uShortVar; + int intVar; + unsigned uIntVar; + float floatVar; + double doubleVar; + long double longDoubleVar; + long longVar; + unsigned long uLongVar; + long long longLongVar; + unsigned long long uLongLongVar; + size_t sizeVar; + intmax_t intmaxVar; + uintmax_t uIntmaxVar; + ptrdiff_t ptrdiffVar; + + scanf("%lf", str); + scanf("%f", &shortVar); + scanf("%f", &uShortVar); + scanf("%p", &intVar); + scanf("%Lf", &uIntVar); + scanf("%ld", &floatVar); + scanf("%f", &doubleVar); + scanf("%d", &longDoubleVar); + scanf("%f", &longVar); + scanf("%f", &uLongVar); + scanf("%f", &longLongVar); + scanf("%f", &uLongLongVar); + + // Some named ints. + scanf("%f", &sizeVar); + scanf("%f", &intmaxVar); + scanf("%f", &uIntmaxVar); + scanf("%f", &ptrdiffVar); + + // Perserve the original formatting for unsigned integers. + scanf("%o", &uLongVar); + scanf("%x", &uLongVar); + scanf("%X", &uLongVar); +} + // Validate the fixes... // CHECK: printf("%d", (int) 123); // CHECK: printf("abc%s", "testing testing 123"); @@ -87,3 +132,23 @@ void test() { // CHECK: printf("%ju", (uintmax_t) 42); // CHECK: printf("%td", (ptrdiff_t) 42); // CHECK: printf("%s", "foo"); + +// CHECK: scanf("%s", str); +// CHECK: scanf("%hd", &shortVar); +// CHECK: scanf("%hu", &uShortVar); +// CHECK: scanf("%d", &intVar); +// CHECK: scanf("%u", &uIntVar); +// CHECK: scanf("%f", &floatVar); +// CHECK: scanf("%lf", &doubleVar); +// CHECK: scanf("%Lf", &longDoubleVar); +// CHECK: scanf("%ld", &longVar); +// CHECK: scanf("%lu", &uLongVar); +// CHECK: scanf("%lld", &longLongVar); +// CHECK: scanf("%llu", &uLongLongVar); +// CHECK: scanf("%zu", &sizeVar); +// CHECK: scanf("%jd", &intmaxVar); +// CHECK: scanf("%ju", &uIntmaxVar); +// CHECK: scanf("%td", &ptrdiffVar); +// CHECK: scanf("%lo", &uLongVar); +// CHECK: scanf("%lx", &uLongVar); +// CHECK: scanf("%lX", &uLongVar); diff --git a/test/Sema/format-strings-int-typedefs.c b/test/Sema/format-strings-int-typedefs.c index 2568b8d69f..8d5dff1523 100644 --- a/test/Sema/format-strings-int-typedefs.c +++ b/test/Sema/format-strings-int-typedefs.c @@ -1,6 +1,7 @@ // RUN: %clang_cc1 -triple i386-apple-darwin9 -fsyntax-only -verify %s int printf(char const *, ...); +int scanf(char const *, ...); void test(void) { printf("%jd", 42.0); // expected-warning {{conversion specifies type 'intmax_t' (aka 'long long')}} @@ -12,6 +13,15 @@ void test(void) { printf("%S", 42.0); // expected-warning {{conversion specifies type 'wchar_t *' (aka 'int *')}} printf("%C", 42.0); // expected-warning {{conversion specifies type 'wchar_t' (aka 'int')}} + scanf("%jd", 0); // expected-warning {{conversion specifies type 'intmax_t *' (aka 'long long *')}} + scanf("%ju", 0); // expected-warning {{conversion specifies type 'uintmax_t *' (aka 'unsigned long long *')}} + scanf("%zu", 0); // expected-warning {{conversion specifies type 'size_t *' (aka 'unsigned long *')}} + scanf("%td", 0); // expected-warning {{conversion specifies type 'ptrdiff_t *' (aka 'int *')}} + scanf("%lc", 0); // expected-warning {{conversion specifies type 'wchar_t *' (aka 'int *')}} + scanf("%ls", 0); // expected-warning {{conversion specifies type 'wchar_t *' (aka 'int *')}} + scanf("%S", 0); // expected-warning {{conversion specifies type 'wchar_t *' (aka 'int *')}} + scanf("%C", 0); // expected-warning {{conversion specifies type 'wchar_t *' (aka 'int *')}} + // typedef size_t et al. to something crazy. typedef void *size_t; diff --git a/test/Sema/format-strings-scanf.c b/test/Sema/format-strings-scanf.c index ee559daef6..467586215b 100644 --- a/test/Sema/format-strings-scanf.c +++ b/test/Sema/format-strings-scanf.c @@ -28,7 +28,7 @@ void test(const char *s, int *i) { void bad_length_modifiers(char *s, void *p, wchar_t *ws, long double *ld) { scanf("%hhs", "foo"); // expected-warning{{length modifier 'hh' results in undefined behavior or no effect with 's' conversion specifier}} - scanf("%1$zp", p); // expected-warning{{length modifier 'z' results in undefined behavior or no effect with 'p' conversion specifier}} + scanf("%1$zp", &p); // expected-warning{{length modifier 'z' results in undefined behavior or no effect with 'p' conversion specifier}} scanf("%ls", ws); // no-warning scanf("%#.2Lf", ld); // expected-warning{{invalid conversion specifier '#'}} } @@ -37,10 +37,11 @@ void bad_length_modifiers(char *s, void *p, wchar_t *ws, long double *ld) { // format string is somewhere else, point to it in a note. void pr9751() { int *i; + char str[100]; const char kFormat1[] = "%00d"; // expected-note{{format string is defined here}}} scanf(kFormat1, i); // expected-warning{{zero field width in scanf format string is unused}} scanf("%00d", i); // expected-warning{{zero field width in scanf format string is unused}} const char kFormat2[] = "%["; // expected-note{{format string is defined here}}} - scanf(kFormat2, &i); // expected-warning{{no closing ']' for '%[' in scanf format string}} - scanf("%[", &i); // expected-warning{{no closing ']' for '%[' in scanf format string}} + scanf(kFormat2, str); // expected-warning{{no closing ']' for '%[' in scanf format string}} + scanf("%[", str); // expected-warning{{no closing ']' for '%[' in scanf format string}} }