--- /dev/null
+//= FormatString.h - Analysis of printf/fprintf format strings --*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines APIs for analyzing the format strings of printf, fscanf,
+// and friends.
+//
+// The structure of format strings for fprintf are described in C99 7.19.6.1.
+//
+// The structure of format strings for fscanf are described in C99 7.19.6.2.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_FORMAT_H
+#define LLVM_CLANG_FORMAT_H
+
+#include "clang/AST/CanonicalType.h"
+
+namespace clang {
+
+//===----------------------------------------------------------------------===//
+/// Common components of both fprintf and fscanf format strings.
+namespace analyze_format_string {
+
+/// Class representing optional flags with location and representation
+/// information.
+class OptionalFlag {
+public:
+ OptionalFlag(const char *Representation)
+ : representation(Representation), flag(false) {}
+ bool isSet() { return flag; }
+ void set() { flag = true; }
+ void clear() { flag = false; }
+ void setPosition(const char *position) {
+ assert(position);
+ this->position = position;
+ }
+ const char *getPosition() const {
+ assert(position);
+ return position;
+ }
+ const char *toString() const { return representation; }
+
+ // Overloaded operators for bool like qualities
+ operator bool() const { return flag; }
+ OptionalFlag& operator=(const bool &rhs) {
+ flag = rhs;
+ return *this; // Return a reference to myself.
+ }
+private:
+ const char *representation;
+ const char *position;
+ bool flag;
+};
+
+/// Represents the length modifier in a format string in scanf/printf.
+class LengthModifier {
+public:
+ enum Kind {
+ None,
+ AsChar, // 'hh'
+ AsShort, // 'h'
+ AsLong, // 'l'
+ AsLongLong, // 'll', 'q' (BSD, deprecated)
+ AsIntMax, // 'j'
+ AsSizeT, // 'z'
+ AsPtrDiff, // 't'
+ AsLongDouble, // 'L'
+ AsWideChar = AsLong // for '%ls', only makes sense for printf
+ };
+
+ LengthModifier()
+ : Position(0), kind(None) {}
+ LengthModifier(const char *pos, Kind k)
+ : Position(pos), kind(k) {}
+
+ const char *getStart() const {
+ return Position;
+ }
+
+ unsigned getLength() const {
+ switch (kind) {
+ default:
+ return 1;
+ case AsLongLong:
+ case AsChar:
+ return 2;
+ case None:
+ return 0;
+ }
+ }
+
+ Kind getKind() const { return kind; }
+ void setKind(Kind k) { kind = k; }
+
+ const char *toString() const;
+
+private:
+ const char *Position;
+ Kind kind;
+};
+
+class ArgTypeResult {
+public:
+ enum Kind { UnknownTy, InvalidTy, SpecificTy, ObjCPointerTy, CPointerTy,
+ CStrTy, WCStrTy };
+private:
+ const Kind K;
+ QualType T;
+ ArgTypeResult(bool) : K(InvalidTy) {}
+public:
+ ArgTypeResult(Kind k = UnknownTy) : K(k) {}
+ ArgTypeResult(QualType t) : K(SpecificTy), T(t) {}
+ ArgTypeResult(CanQualType t) : K(SpecificTy), T(t) {}
+
+ static ArgTypeResult Invalid() { return ArgTypeResult(true); }
+
+ bool isValid() const { return K != InvalidTy; }
+
+ const QualType *getSpecificType() const {
+ return K == SpecificTy ? &T : 0;
+ }
+
+ bool matchesType(ASTContext &C, QualType argTy) const;
+
+ bool matchesAnyObjCObjectRef() const { return K == ObjCPointerTy; }
+
+ QualType getRepresentativeType(ASTContext &C) const;
+};
+
+class OptionalAmount {
+public:
+ enum HowSpecified { NotSpecified, Constant, Arg, Invalid };
+
+ OptionalAmount(HowSpecified howSpecified,
+ unsigned amount,
+ const char *amountStart,
+ unsigned amountLength,
+ bool usesPositionalArg)
+ : start(amountStart), length(amountLength), hs(howSpecified), amt(amount),
+ UsesPositionalArg(usesPositionalArg), UsesDotPrefix(0) {}
+
+ OptionalAmount(bool valid = true)
+ : start(0),length(0), hs(valid ? NotSpecified : Invalid), amt(0),
+ UsesPositionalArg(0), UsesDotPrefix(0) {}
+
+ bool isInvalid() const {
+ return hs == Invalid;
+ }
+
+ HowSpecified getHowSpecified() const { return hs; }
+ void setHowSpecified(HowSpecified h) { hs = h; }
+
+ bool hasDataArgument() const { return hs == Arg; }
+
+ unsigned getArgIndex() const {
+ assert(hasDataArgument());
+ return amt;
+ }
+
+ unsigned getConstantAmount() const {
+ assert(hs == Constant);
+ return amt;
+ }
+
+ const char *getStart() const {
+ // We include the . character if it is given.
+ return start - UsesDotPrefix;
+ }
+
+ unsigned getConstantLength() const {
+ assert(hs == Constant);
+ return length + UsesDotPrefix;
+ }
+
+ ArgTypeResult getArgType(ASTContext &Ctx) const;
+
+ void toString(llvm::raw_ostream &os) const;
+
+ bool usesPositionalArg() const { return (bool) UsesPositionalArg; }
+ unsigned getPositionalArgIndex() const {
+ assert(hasDataArgument());
+ return amt + 1;
+ }
+
+ bool usesDotPrefix() const { return UsesDotPrefix; }
+ void setUsesDotPrefix() { UsesDotPrefix = true; }
+
+private:
+ const char *start;
+ unsigned length;
+ HowSpecified hs;
+ unsigned amt;
+ bool UsesPositionalArg : 1;
+ bool UsesDotPrefix;
+};
+
+
+class FormatSpecifier {
+protected:
+ LengthModifier LM;
+ OptionalAmount FieldWidth;
+ /// Positional arguments, an IEEE extension:
+ /// IEEE Std 1003.1, 2004 Edition
+ /// http://www.opengroup.org/onlinepubs/009695399/functions/printf.html
+ bool UsesPositionalArg;
+ unsigned argIndex;
+public:
+ FormatSpecifier() : UsesPositionalArg(false), argIndex(0) {}
+
+ void setLengthModifier(LengthModifier lm) {
+ LM = lm;
+ }
+
+ void setUsesPositionalArg() { UsesPositionalArg = true; }
+
+ void setArgIndex(unsigned i) {
+ // assert(CS.consumesDataArgument());
+ argIndex = i;
+ }
+
+ unsigned getArgIndex() const {
+ //assert(CS.consumesDataArgument());
+ return argIndex;
+ }
+
+ unsigned getPositionalArgIndex() const {
+ //assert(CS.consumesDataArgument());
+ return argIndex + 1;
+ }
+
+ const LengthModifier &getLengthModifier() const {
+ return LM;
+ }
+
+ const OptionalAmount &getFieldWidth() const {
+ return FieldWidth;
+ }
+
+ void setFieldWidth(const OptionalAmount &Amt) {
+ FieldWidth = Amt;
+ }
+
+ bool usesPositionalArg() const { return UsesPositionalArg; }
+};
+
+} // end analyze_format_string namespace
+
+//===----------------------------------------------------------------------===//
+/// Pieces specific to fprintf format strings.
+
+namespace analyze_printf {
+
+class ConversionSpecifier {
+public:
+ enum Kind {
+ InvalidSpecifier = 0,
+ // C99 conversion specifiers.
+ dArg, // 'd'
+ IntAsCharArg, // 'c'
+ iArg, // 'i',
+ oArg, // 'o',
+ uArg, // 'u',
+ xArg, // 'x',
+ XArg, // 'X',
+ fArg, // 'f',
+ FArg, // 'F',
+ eArg, // 'e',
+ EArg, // 'E',
+ gArg, // 'g',
+ GArg, // 'G',
+ aArg, // 'a',
+ AArg, // 'A',
+ CStrArg, // 's'
+ VoidPtrArg, // 'p'
+ OutIntPtrArg, // 'n'
+ PercentArg, // '%'
+ // MacOS X unicode extensions.
+ CArg, // 'C'
+ UnicodeStrArg, // 'S'
+ // Objective-C specific specifiers.
+ ObjCObjArg, // '@'
+ // GlibC specific specifiers.
+ PrintErrno, // 'm'
+ // Specifier ranges.
+ IntArgBeg = dArg,
+ IntArgEnd = iArg,
+ UIntArgBeg = oArg,
+ UIntArgEnd = XArg,
+ DoubleArgBeg = fArg,
+ DoubleArgEnd = AArg,
+ C99Beg = IntArgBeg,
+ C99End = DoubleArgEnd,
+ ObjCBeg = ObjCObjArg,
+ ObjCEnd = ObjCObjArg
+ };
+
+ ConversionSpecifier()
+ : Position(0), kind(InvalidSpecifier) {}
+
+ ConversionSpecifier(const char *pos, Kind k)
+ : Position(pos), kind(k) {}
+
+ const char *getStart() const {
+ return Position;
+ }
+
+ llvm::StringRef getCharacters() const {
+ return llvm::StringRef(getStart(), getLength());
+ }
+
+ bool consumesDataArgument() const {
+ switch (kind) {
+ case PercentArg:
+ case PrintErrno:
+ return false;
+ default:
+ return true;
+ }
+ }
+
+ bool isObjCArg() const { return kind >= ObjCBeg && kind <= ObjCEnd; }
+ bool isIntArg() const { return kind >= dArg && kind <= iArg; }
+ bool isUIntArg() const { return kind >= oArg && kind <= XArg; }
+ bool isDoubleArg() const { return kind >= fArg && kind <= AArg; }
+ Kind getKind() const { return kind; }
+ void setKind(Kind k) { kind = k; }
+ unsigned getLength() const {
+ // Conversion specifiers currently only are represented by
+ // single characters, but we be flexible.
+ return 1;
+ }
+ const char *toString() const;
+
+private:
+ const char *Position;
+ Kind kind;
+};
+
+using analyze_format_string::ArgTypeResult;
+using analyze_format_string::LengthModifier;
+using analyze_format_string::OptionalAmount;
+using analyze_format_string::OptionalFlag;
+
+class PrintfSpecifier : public analyze_format_string::FormatSpecifier {
+ OptionalFlag IsLeftJustified; // '-'
+ OptionalFlag HasPlusPrefix; // '+'
+ OptionalFlag HasSpacePrefix; // ' '
+ OptionalFlag HasAlternativeForm; // '#'
+ OptionalFlag HasLeadingZeroes; // '0'
+ ConversionSpecifier CS;
+ OptionalAmount Precision;
+public:
+ PrintfSpecifier() :
+ IsLeftJustified("-"), HasPlusPrefix("+"), HasSpacePrefix(" "),
+ HasAlternativeForm("#"), HasLeadingZeroes("0") {}
+
+ static PrintfSpecifier Parse(const char *beg, const char *end);
+
+ // Methods for incrementally constructing the PrintfSpecifier.
+ void setConversionSpecifier(const ConversionSpecifier &cs) {
+ CS = cs;
+ }
+ void setIsLeftJustified(const char *position) {
+ IsLeftJustified = true;
+ IsLeftJustified.setPosition(position);
+ }
+ void setHasPlusPrefix(const char *position) {
+ HasPlusPrefix = true;
+ HasPlusPrefix.setPosition(position);
+ }
+ void setHasSpacePrefix(const char *position) {
+ HasSpacePrefix = true;
+ HasSpacePrefix.setPosition(position);
+ }
+ void setHasAlternativeForm(const char *position) {
+ HasAlternativeForm = true;
+ HasAlternativeForm.setPosition(position);
+ }
+ void setHasLeadingZeros(const char *position) {
+ HasLeadingZeroes = true;
+ HasLeadingZeroes.setPosition(position);
+ }
+ void setUsesPositionalArg() { UsesPositionalArg = true; }
+
+ // Methods for querying the format specifier.
+
+ const ConversionSpecifier &getConversionSpecifier() const {
+ return CS;
+ }
+
+ void setPrecision(const OptionalAmount &Amt) {
+ Precision = Amt;
+ Precision.setUsesDotPrefix();
+ }
+
+ const OptionalAmount &getPrecision() const {
+ return Precision;
+ }
+
+ /// \brief Returns the builtin type that a data argument
+ /// paired with this format specifier should have. This method
+ /// will return null if the format specifier does not have
+ /// a matching data argument or the matching argument matches
+ /// more than one type.
+ ArgTypeResult getArgType(ASTContext &Ctx) const;
+
+ const OptionalFlag &isLeftJustified() const { return IsLeftJustified; }
+ const OptionalFlag &hasPlusPrefix() const { return HasPlusPrefix; }
+ const OptionalFlag &hasAlternativeForm() const { return HasAlternativeForm; }
+ const OptionalFlag &hasLeadingZeros() const { return HasLeadingZeroes; }
+ const OptionalFlag &hasSpacePrefix() const { return HasSpacePrefix; }
+ bool usesPositionalArg() const { return UsesPositionalArg; }
+
+ /// Changes the specifier and length according to a QualType, retaining any
+ /// flags or options. Returns true on success, or false when a conversion
+ /// was not successful.
+ bool fixType(QualType QT);
+
+ void toString(llvm::raw_ostream &os) const;
+
+ // Validation methods - to check if any element results in undefined behavior
+ bool hasValidPlusPrefix() const;
+ bool hasValidAlternativeForm() const;
+ bool hasValidLeadingZeros() const;
+ bool hasValidSpacePrefix() const;
+ bool hasValidLeftJustified() const;
+
+ bool hasValidLengthModifier() const;
+ bool hasValidPrecision() const;
+ bool hasValidFieldWidth() const;
+};
+} // end analyze_printf namespace
+
+//===----------------------------------------------------------------------===//
+/// Pieces specific to fscanf format strings.
+
+namespace analyze_scanf {
+
+class ConversionSpecifier {
+public:
+ enum Kind {
+ InvalidSpecifier = 0,
+ // C99 conversion specifiers.
+ dArg, // 'd'
+ iArg, // 'i',
+ oArg, // 'o',
+ uArg, // 'u',
+ xArg, // 'x',
+ XArg, // 'X',
+ fArg, // 'f',
+ FArg, // 'F',
+ eArg, // 'e',
+ EArg, // 'E',
+ gArg, // 'g',
+ GArg, // 'G',
+ aArg, // 'a',
+ AArg, // 'A',
+ sArg, // 's', // match sequence of non-write-space characters
+ VoidPtrArg, // 'p'
+ cArg, // 'c', differs from printf, writes array of characters
+ ConsumedSoFarArg, // 'n', differs from printf, writes back args consumed
+ PercentArg, // '%'
+ ScanListArg, // '[' followed by scan list
+ // IEEE Std 1003.1 extensions.
+ CArg, // 'C', same as writing 'lc'
+ SArg, // 'S', same as writing 'ls'
+ // Specifier ranges.
+ IntArgBeg = dArg,
+ IntArgEnd = iArg,
+ UIntArgBeg = oArg,
+ UIntArgEnd = XArg,
+ DoubleArgBeg = fArg,
+ DoubleArgEnd = AArg
+ };
+
+ ConversionSpecifier()
+ : Position(0), EndScanList(0), kind(InvalidSpecifier) {}
+
+ ConversionSpecifier(const char *pos, Kind k)
+ : Position(pos), EndScanList(0), kind(k) {}
+
+ const char *getStart() const {
+ return Position;
+ }
+
+ void setEndScanList(const char *pos) { EndScanList = pos; }
+
+ llvm::StringRef getCharacters() const {
+ return llvm::StringRef(getStart(), getLength());
+ }
+
+ bool consumesDataArgument() const {
+ return kind != PercentArg;
+ }
+
+ bool isIntArg() const { return kind >= dArg && kind <= iArg; }
+ bool isUIntArg() const { return kind >= oArg && kind <= XArg; }
+ bool isDoubleArg() const { return kind >= fArg && kind <= AArg; }
+ Kind getKind() const { return kind; }
+ void setKind(Kind k) { kind = k; }
+
+ unsigned getLength() const {
+ return EndScanList ? EndScanList - Position : 1;
+ }
+
+ const char *toString() const;
+
+private:
+ const char *Position;
+ const char *EndScanList;
+ Kind kind;
+};
+
+using analyze_format_string::LengthModifier;
+using analyze_format_string::OptionalAmount;
+using analyze_format_string::OptionalFlag;
+
+class ScanfSpecifier : public analyze_format_string::FormatSpecifier {
+ OptionalFlag SuppressAssignment; // '*'
+ ConversionSpecifier CS;
+public:
+ ScanfSpecifier() : SuppressAssignment("*") {}
+
+ void setSuppressAssignment(const char *position) {
+ SuppressAssignment = true;
+ SuppressAssignment.setPosition(position);
+ }
+
+ const OptionalFlag &getSuppressAssignment() const {
+ return SuppressAssignment;
+ }
+
+ void setConversionSpecifier(const ConversionSpecifier &cs) {
+ CS = cs;
+ }
+
+ const ConversionSpecifier &getConversionSpecifier() const {
+ return CS;
+ }
+
+ bool consumesDataArgument() const {
+ return CS.consumesDataArgument() && !SuppressAssignment;
+ }
+
+ static ScanfSpecifier Parse(const char *beg, const char *end);
+
+};
+
+} // end analyze_scanf namespace
+
+//===----------------------------------------------------------------------===//
+// Parsing and processing of format strings (both fprintf and fscanf).
+
+namespace analyze_format_string {
+
+enum PositionContext { FieldWidthPos = 0, PrecisionPos = 1 };
+
+class FormatStringHandler {
+public:
+ FormatStringHandler() {}
+ virtual ~FormatStringHandler();
+
+ virtual void HandleNullChar(const char *nullCharacter) {}
+
+ virtual void HandleInvalidPosition(const char *startPos, unsigned posLen,
+ PositionContext p) {}
+
+ virtual void HandleZeroPosition(const char *startPos, unsigned posLen) {}
+
+ virtual void HandleIncompleteSpecifier(const char *startSpecifier,
+ unsigned specifierLen) {}
+
+ // Printf-specific handlers.
+
+ virtual bool HandleInvalidPrintfConversionSpecifier(
+ const analyze_printf::PrintfSpecifier &FS,
+ const char *startSpecifier,
+ unsigned specifierLen) {
+ return true;
+ }
+
+ virtual bool HandlePrintfSpecifier(const analyze_printf::PrintfSpecifier &FS,
+ const char *startSpecifier,
+ unsigned specifierLen) {
+ return true;
+ }
+
+ // Scanf-specific handlers.
+
+ virtual bool HandleInvalidScanfConversionSpecifier(
+ const analyze_scanf::ScanfSpecifier &FS,
+ const char *startSpecifier,
+ unsigned specifierLen) {
+ return true;
+ }
+
+ virtual bool HandleScanfSpecifier(const analyze_scanf::ScanfSpecifier &FS,
+ const char *startSpecifier,
+ unsigned specifierLen) {
+ return true;
+ }
+
+ virtual void HandleIncompleteScanList(const char *start, const char *end) {}
+};
+
+bool ParsePrintfString(FormatStringHandler &H,
+ const char *beg, const char *end);
+
+bool ParseScanfString(FormatStringHandler &H,
+ const char *beg, const char *end);
+
+} // end analyze_format_string namespace
+} // end clang namespace
+#endif
+++ /dev/null
-//==- PrintfFormatStrings.h - Analysis of printf format strings --*- C++ -*-==//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// Handling of format string in printf and friends. The structure of format
-// strings for fprintf() are described in C99 7.19.6.1.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_CLANG_FPRINTF_FORMAT_H
-#define LLVM_CLANG_FPRINTF_FORMAT_H
-
-#include "clang/AST/CanonicalType.h"
-
-namespace clang {
-
-class ASTContext;
-
-namespace analyze_printf {
-
-class ArgTypeResult {
-public:
- enum Kind { UnknownTy, InvalidTy, SpecificTy, ObjCPointerTy, CPointerTy,
- CStrTy, WCStrTy };
-private:
- const Kind K;
- QualType T;
- ArgTypeResult(bool) : K(InvalidTy) {}
-public:
- ArgTypeResult(Kind k = UnknownTy) : K(k) {}
- ArgTypeResult(QualType t) : K(SpecificTy), T(t) {}
- ArgTypeResult(CanQualType t) : K(SpecificTy), T(t) {}
-
- static ArgTypeResult Invalid() { return ArgTypeResult(true); }
-
- bool isValid() const { return K != InvalidTy; }
-
- const QualType *getSpecificType() const {
- return K == SpecificTy ? &T : 0;
- }
-
- bool matchesType(ASTContext &C, QualType argTy) const;
-
- bool matchesAnyObjCObjectRef() const { return K == ObjCPointerTy; }
-
- QualType getRepresentativeType(ASTContext &C) const;
-};
-
-class ConversionSpecifier {
-public:
- enum Kind {
- InvalidSpecifier = 0,
- // C99 conversion specifiers.
- dArg, // 'd'
- IntAsCharArg, // 'c'
- iArg, // 'i',
- oArg, // 'o',
- uArg, // 'u',
- xArg, // 'x',
- XArg, // 'X',
- fArg, // 'f',
- FArg, // 'F',
- eArg, // 'e',
- EArg, // 'E',
- gArg, // 'g',
- GArg, // 'G',
- aArg, // 'a',
- AArg, // 'A',
- CStrArg, // 's'
- VoidPtrArg, // 'p'
- OutIntPtrArg, // 'n'
- PercentArg, // '%'
- // MacOS X unicode extensions.
- CArg, // 'C'
- UnicodeStrArg, // 'S'
- // Objective-C specific specifiers.
- ObjCObjArg, // '@'
- // GlibC specific specifiers.
- PrintErrno, // 'm'
- // Specifier ranges.
- IntArgBeg = dArg,
- IntArgEnd = iArg,
- UIntArgBeg = oArg,
- UIntArgEnd = XArg,
- DoubleArgBeg = fArg,
- DoubleArgEnd = AArg,
- C99Beg = IntArgBeg,
- C99End = DoubleArgEnd,
- ObjCBeg = ObjCObjArg,
- ObjCEnd = ObjCObjArg
- };
-
- ConversionSpecifier()
- : Position(0), kind(InvalidSpecifier) {}
-
- ConversionSpecifier(const char *pos, Kind k)
- : Position(pos), kind(k) {}
-
- const char *getStart() const {
- return Position;
- }
-
- llvm::StringRef getCharacters() const {
- return llvm::StringRef(getStart(), getLength());
- }
-
- bool consumesDataArgument() const {
- switch (kind) {
- case PercentArg:
- case PrintErrno:
- return false;
- default:
- return true;
- }
- }
-
- bool isObjCArg() const { return kind >= ObjCBeg && kind <= ObjCEnd; }
- bool isIntArg() const { return kind >= dArg && kind <= iArg; }
- bool isUIntArg() const { return kind >= oArg && kind <= XArg; }
- bool isDoubleArg() const { return kind >= fArg && kind <= AArg; }
- Kind getKind() const { return kind; }
- void setKind(Kind k) { kind = k; }
- unsigned getLength() const {
- // Conversion specifiers currently only are represented by
- // single characters, but we be flexible.
- return 1;
- }
- const char *toString() const;
-
-private:
- const char *Position;
- Kind kind;
-};
-
-class LengthModifier {
-public:
- enum Kind {
- None,
- AsChar, // 'hh'
- AsShort, // 'h'
- AsLong, // 'l'
- AsLongLong, // 'll', 'q' (BSD, deprecated)
- AsIntMax, // 'j'
- AsSizeT, // 'z'
- AsPtrDiff, // 't'
- AsLongDouble, // 'L'
- AsWideChar = AsLong // for '%ls'
- };
-
- LengthModifier()
- : Position(0), kind(None) {}
- LengthModifier(const char *pos, Kind k)
- : Position(pos), kind(k) {}
-
- const char *getStart() const {
- return Position;
- }
-
- unsigned getLength() const {
- switch (kind) {
- default:
- return 1;
- case AsLongLong:
- case AsChar:
- return 2;
- case None:
- return 0;
- }
- }
-
- Kind getKind() const { return kind; }
- void setKind(Kind k) { kind = k; }
-
- const char *toString() const;
-
-private:
- const char *Position;
- Kind kind;
-};
-
-class OptionalAmount {
-public:
- enum HowSpecified { NotSpecified, Constant, Arg, Invalid };
-
- OptionalAmount(HowSpecified howSpecified,
- unsigned amount,
- const char *amountStart,
- unsigned amountLength,
- bool usesPositionalArg)
- : start(amountStart), length(amountLength), hs(howSpecified), amt(amount),
- UsesPositionalArg(usesPositionalArg), UsesDotPrefix(0) {}
-
- OptionalAmount(bool valid = true)
- : start(0),length(0), hs(valid ? NotSpecified : Invalid), amt(0),
- UsesPositionalArg(0), UsesDotPrefix(0) {}
-
- bool isInvalid() const {
- return hs == Invalid;
- }
-
- HowSpecified getHowSpecified() const { return hs; }
- void setHowSpecified(HowSpecified h) { hs = h; }
-
- bool hasDataArgument() const { return hs == Arg; }
-
- unsigned getArgIndex() const {
- assert(hasDataArgument());
- return amt;
- }
-
- unsigned getConstantAmount() const {
- assert(hs == Constant);
- return amt;
- }
-
- const char *getStart() const {
- // We include the . character if it is given.
- return start - UsesDotPrefix;
- }
-
- unsigned getConstantLength() const {
- assert(hs == Constant);
- return length + UsesDotPrefix;
- }
-
- ArgTypeResult getArgType(ASTContext &Ctx) const;
-
- void toString(llvm::raw_ostream &os) const;
-
- bool usesPositionalArg() const { return (bool) UsesPositionalArg; }
- unsigned getPositionalArgIndex() const {
- assert(hasDataArgument());
- return amt + 1;
- }
-
- bool usesDotPrefix() const { return UsesDotPrefix; }
- void setUsesDotPrefix() { UsesDotPrefix = true; }
-
-private:
- const char *start;
- unsigned length;
- HowSpecified hs;
- unsigned amt;
- bool UsesPositionalArg : 1;
- bool UsesDotPrefix;
-};
-
-// Class representing optional flags with location and representation
-// information.
-class OptionalFlag {
-public:
- OptionalFlag(const char *Representation)
- : representation(Representation), flag(false) {}
- bool isSet() { return flag; }
- void set() { flag = true; }
- void clear() { flag = false; }
- void setPosition(const char *position) {
- assert(position);
- this->position = position;
- }
- const char *getPosition() const {
- assert(position);
- return position;
- }
- const char *toString() const { return representation; }
-
- // Overloaded operators for bool like qualities
- operator bool() const { return flag; }
- OptionalFlag& operator=(const bool &rhs) {
- flag = rhs;
- return *this; // Return a reference to myself.
- }
-private:
- const char *representation;
- const char *position;
- bool flag;
-};
-
-class FormatSpecifier {
- LengthModifier LM;
- OptionalFlag IsLeftJustified; // '-'
- OptionalFlag HasPlusPrefix; // '+'
- OptionalFlag HasSpacePrefix; // ' '
- OptionalFlag HasAlternativeForm; // '#'
- OptionalFlag HasLeadingZeroes; // '0'
- /// Positional arguments, an IEEE extension:
- /// IEEE Std 1003.1, 2004 Edition
- /// http://www.opengroup.org/onlinepubs/009695399/functions/printf.html
- bool UsesPositionalArg;
- unsigned argIndex;
- ConversionSpecifier CS;
- OptionalAmount FieldWidth;
- OptionalAmount Precision;
-public:
- FormatSpecifier() :
- IsLeftJustified("-"), HasPlusPrefix("+"), HasSpacePrefix(" "),
- HasAlternativeForm("#"), HasLeadingZeroes("0"), UsesPositionalArg(false),
- argIndex(0) {}
-
- static FormatSpecifier Parse(const char *beg, const char *end);
-
- // Methods for incrementally constructing the FormatSpecifier.
- void setConversionSpecifier(const ConversionSpecifier &cs) {
- CS = cs;
- }
- void setLengthModifier(LengthModifier lm) {
- LM = lm;
- }
- void setIsLeftJustified(const char *position) {
- IsLeftJustified = true;
- IsLeftJustified.setPosition(position);
- }
- void setHasPlusPrefix(const char *position) {
- HasPlusPrefix = true;
- HasPlusPrefix.setPosition(position);
- }
- void setHasSpacePrefix(const char *position) {
- HasSpacePrefix = true;
- HasSpacePrefix.setPosition(position);
- }
- void setHasAlternativeForm(const char *position) {
- HasAlternativeForm = true;
- HasAlternativeForm.setPosition(position);
- }
- void setHasLeadingZeros(const char *position) {
- HasLeadingZeroes = true;
- HasLeadingZeroes.setPosition(position);
- }
- void setUsesPositionalArg() { UsesPositionalArg = true; }
-
- void setArgIndex(unsigned i) {
- assert(CS.consumesDataArgument());
- argIndex = i;
- }
-
- unsigned getArgIndex() const {
- assert(CS.consumesDataArgument());
- return argIndex;
- }
-
- unsigned getPositionalArgIndex() const {
- assert(CS.consumesDataArgument());
- return argIndex + 1;
- }
-
- // Methods for querying the format specifier.
-
- const ConversionSpecifier &getConversionSpecifier() const {
- return CS;
- }
-
- const LengthModifier &getLengthModifier() const {
- return LM;
- }
-
- const OptionalAmount &getFieldWidth() const {
- return FieldWidth;
- }
-
- void setFieldWidth(const OptionalAmount &Amt) {
- FieldWidth = Amt;
- }
-
- void setPrecision(const OptionalAmount &Amt) {
- Precision = Amt;
- Precision.setUsesDotPrefix();
- }
-
- const OptionalAmount &getPrecision() const {
- return Precision;
- }
-
- /// \brief Returns the builtin type that a data argument
- /// paired with this format specifier should have. This method
- /// will return null if the format specifier does not have
- /// a matching data argument or the matching argument matches
- /// more than one type.
- ArgTypeResult getArgType(ASTContext &Ctx) const;
-
- const OptionalFlag &isLeftJustified() const { return IsLeftJustified; }
- const OptionalFlag &hasPlusPrefix() const { return HasPlusPrefix; }
- const OptionalFlag &hasAlternativeForm() const { return HasAlternativeForm; }
- const OptionalFlag &hasLeadingZeros() const { return HasLeadingZeroes; }
- const OptionalFlag &hasSpacePrefix() const { return HasSpacePrefix; }
- bool usesPositionalArg() const { return UsesPositionalArg; }
-
- /// Changes the specifier and length according to a QualType, retaining any
- /// flags or options. Returns true on success, or false when a conversion
- /// was not successful.
- bool fixType(QualType QT);
-
- void toString(llvm::raw_ostream &os) const;
-
- // Validation methods - to check if any element results in undefined behavior
- bool hasValidPlusPrefix() const;
- bool hasValidAlternativeForm() const;
- bool hasValidLeadingZeros() const;
- bool hasValidSpacePrefix() const;
- bool hasValidLeftJustified() const;
-
- bool hasValidLengthModifier() const;
- bool hasValidPrecision() const;
- bool hasValidFieldWidth() const;
-};
-
-enum PositionContext { FieldWidthPos = 0, PrecisionPos = 1 };
-
-class FormatStringHandler {
-public:
- FormatStringHandler() {}
- virtual ~FormatStringHandler();
-
- virtual void HandleIncompleteFormatSpecifier(const char *startSpecifier,
- unsigned specifierLen) {}
-
- virtual void HandleNullChar(const char *nullCharacter) {}
-
- virtual void HandleInvalidPosition(const char *startPos, unsigned posLen,
- PositionContext p) {}
-
- virtual void HandleZeroPosition(const char *startPos, unsigned posLen) {}
-
- virtual bool
- HandleInvalidConversionSpecifier(const analyze_printf::FormatSpecifier &FS,
- const char *startSpecifier,
- unsigned specifierLen) { return true; }
-
- virtual bool HandleFormatSpecifier(const analyze_printf::FormatSpecifier &FS,
- const char *startSpecifier,
- unsigned specifierLen) {
- return true;
- }
-};
-
-bool ParseFormatString(FormatStringHandler &H,
- const char *beg, const char *end);
-
-} // end printf namespace
-} // end clang namespace
-#endif
"initializer of a builtin type can only take one argument">;
def err_value_init_for_array_type : Error<
"array types cannot be value-initialized">;
-def warn_printf_nonliteral_noargs : Warning<
+def warn_format_nonliteral_noargs : Warning<
"format string is not a string literal (potentially insecure)">,
InGroup<FormatSecurity>;
-def warn_printf_nonliteral : Warning<
+def warn_format_nonliteral : Warning<
"format string is not a string literal">,
InGroup<FormatNonLiteral>, DefaultIgnore;
"invalid conversion specifier '%0'">, InGroup<Format>;
def warn_printf_incomplete_specifier : Warning<
"incomplete format specifier">, InGroup<Format>;
-def warn_printf_missing_format_string : Warning<
+def warn_missing_format_string : Warning<
"format string missing">, InGroup<Format>;
+def warn_scanf_nonzero_width : Warning<
+ "conversion specifies 0 input characters for field width">,
+ InGroup<Format>;
def warn_printf_conversion_argument_type_mismatch : Warning<
"conversion specifies type %0 but the argument has type %1">,
InGroup<Format>;
def warn_printf_positional_arg_exceeds_data_args : Warning <
"data argument position '%0' exceeds the number of data arguments (%1)">,
InGroup<Format>;
-def warn_printf_zero_positional_specifier : Warning<
+def warn_format_zero_positional_specifier : Warning<
"position arguments in format strings start counting at 1 (not 0)">,
InGroup<Format>;
-def warn_printf_invalid_positional_specifier : Warning<
+def warn_format_invalid_positional_specifier : Warning<
"invalid position specified for %select{field width|field precision}0">,
InGroup<Format>;
-def warn_printf_mix_positional_nonpositional_args : Warning<
+def warn_format_mix_positional_nonpositional_args : Warning<
"cannot mix positional and non-positional arguments in format string">,
InGroup<Format>;
def warn_null_arg : Warning<
"null passed to a callee which requires a non-null argument">,
InGroup<NonNull>;
-def warn_printf_empty_format_string : Warning<
+def warn_empty_format_string : Warning<
"format string is empty">, InGroup<FormatZeroLength>;
-def warn_printf_format_string_is_wide_literal : Warning<
+def warn_format_string_is_wide_literal : Warning<
"format string should not be a wide string">, InGroup<Format>;
def warn_printf_format_string_contains_null_char : Warning<
"format string contains '\\0' within the string body">, InGroup<Format>;
add_clang_library(clangAnalysis
AnalysisContext.cpp
CFG.cpp
+ FormatString.cpp
LiveVariables.cpp
PrintfFormatString.cpp
ReachableCode.cpp
+ ScanfFormatString.cpp
UninitializedValues.cpp
)
--- /dev/null
+// FormatString.cpp - Common stuff for handling printf/scanf formats -*- C++ -*-
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Shared details for processing format strings of printf and scanf
+// (and friends).
+//
+//===----------------------------------------------------------------------===//
+
+#include "FormatStringParsing.h"
+
+using clang::analyze_format_string::ArgTypeResult;
+using clang::analyze_format_string::FormatStringHandler;
+using clang::analyze_format_string::FormatSpecifier;
+using clang::analyze_format_string::LengthModifier;
+using clang::analyze_format_string::OptionalAmount;
+using clang::analyze_format_string::PositionContext;
+using namespace clang;
+
+// Key function to FormatStringHandler.
+FormatStringHandler::~FormatStringHandler() {}
+
+//===----------------------------------------------------------------------===//
+// Functions for parsing format strings components in both printf and
+// scanf format strings.
+//===----------------------------------------------------------------------===//
+
+OptionalAmount
+clang::analyze_format_string::ParseAmount(const char *&Beg, const char *E) {
+ const char *I = Beg;
+ UpdateOnReturn <const char*> UpdateBeg(Beg, I);
+
+ unsigned accumulator = 0;
+ bool hasDigits = false;
+
+ for ( ; I != E; ++I) {
+ char c = *I;
+ if (c >= '0' && c <= '9') {
+ hasDigits = true;
+ accumulator = (accumulator * 10) + (c - '0');
+ continue;
+ }
+
+ if (hasDigits)
+ return OptionalAmount(OptionalAmount::Constant, accumulator, Beg, I - Beg,
+ false);
+
+ break;
+ }
+
+ return OptionalAmount();
+}
+
+OptionalAmount
+clang::analyze_format_string::ParseNonPositionAmount(const char *&Beg,
+ const char *E,
+ unsigned &argIndex) {
+ if (*Beg == '*') {
+ ++Beg;
+ return OptionalAmount(OptionalAmount::Arg, argIndex++, Beg, 0, false);
+ }
+
+ return ParseAmount(Beg, E);
+}
+
+OptionalAmount
+clang::analyze_format_string::ParsePositionAmount(FormatStringHandler &H,
+ const char *Start,
+ const char *&Beg,
+ const char *E,
+ PositionContext p) {
+ if (*Beg == '*') {
+ const char *I = Beg + 1;
+ const OptionalAmount &Amt = ParseAmount(I, E);
+
+ if (Amt.getHowSpecified() == OptionalAmount::NotSpecified) {
+ H.HandleInvalidPosition(Beg, I - Beg, p);
+ return OptionalAmount(false);
+ }
+
+ if (I == E) {
+ // No more characters left?
+ H.HandleIncompleteSpecifier(Start, E - Start);
+ return OptionalAmount(false);
+ }
+
+ assert(Amt.getHowSpecified() == OptionalAmount::Constant);
+
+ if (*I == '$') {
+ // Handle positional arguments
+
+ // Special case: '*0$', since this is an easy mistake.
+ if (Amt.getConstantAmount() == 0) {
+ H.HandleZeroPosition(Beg, I - Beg + 1);
+ return OptionalAmount(false);
+ }
+
+ const char *Tmp = Beg;
+ Beg = ++I;
+
+ return OptionalAmount(OptionalAmount::Arg, Amt.getConstantAmount() - 1,
+ Tmp, 0, true);
+ }
+
+ H.HandleInvalidPosition(Beg, I - Beg, p);
+ return OptionalAmount(false);
+ }
+
+ return ParseAmount(Beg, E);
+}
+
+
+bool
+clang::analyze_format_string::ParseFieldWidth(FormatStringHandler &H,
+ FormatSpecifier &CS,
+ const char *Start,
+ const char *&Beg, const char *E,
+ unsigned *argIndex) {
+ // FIXME: Support negative field widths.
+ if (argIndex) {
+ CS.setFieldWidth(ParseNonPositionAmount(Beg, E, *argIndex));
+ }
+ else {
+ const OptionalAmount Amt =
+ ParsePositionAmount(H, Start, Beg, E,
+ analyze_format_string::FieldWidthPos);
+
+ if (Amt.isInvalid())
+ return true;
+ CS.setFieldWidth(Amt);
+ }
+ return false;
+}
+
+bool
+clang::analyze_format_string::ParseArgPosition(FormatStringHandler &H,
+ FormatSpecifier &FS,
+ const char *Start,
+ const char *&Beg,
+ const char *E) {
+ const char *I = Beg;
+
+ const OptionalAmount &Amt = ParseAmount(I, E);
+
+ if (I == E) {
+ // No more characters left?
+ H.HandleIncompleteSpecifier(Start, E - Start);
+ return true;
+ }
+
+ if (Amt.getHowSpecified() == OptionalAmount::Constant && *(I++) == '$') {
+ // Special case: '%0$', since this is an easy mistake.
+ if (Amt.getConstantAmount() == 0) {
+ H.HandleZeroPosition(Start, I - Start);
+ return true;
+ }
+
+ FS.setArgIndex(Amt.getConstantAmount() - 1);
+ FS.setUsesPositionalArg();
+ // Update the caller's pointer if we decided to consume
+ // these characters.
+ Beg = I;
+ return false;
+ }
+
+ return false;
+}
+
+bool
+clang::analyze_format_string::ParseLengthModifier(FormatSpecifier &FS,
+ const char *&I,
+ const char *E) {
+ LengthModifier::Kind lmKind = LengthModifier::None;
+ const char *lmPosition = I;
+ switch (*I) {
+ default:
+ return false;
+ case 'h':
+ ++I;
+ lmKind = (I != E && *I == 'h') ?
+ ++I, LengthModifier::AsChar : LengthModifier::AsShort;
+ break;
+ case 'l':
+ ++I;
+ lmKind = (I != E && *I == 'l') ?
+ ++I, LengthModifier::AsLongLong : LengthModifier::AsLong;
+ break;
+ case 'j': lmKind = LengthModifier::AsIntMax; ++I; break;
+ case 'z': lmKind = LengthModifier::AsSizeT; ++I; break;
+ case 't': lmKind = LengthModifier::AsPtrDiff; ++I; break;
+ case 'L': lmKind = LengthModifier::AsLongDouble; ++I; break;
+ case 'q': lmKind = LengthModifier::AsLongLong; ++I; break;
+ }
+ LengthModifier lm(lmPosition, lmKind);
+ FS.setLengthModifier(lm);
+ return true;
+}
+
+//===----------------------------------------------------------------------===//
+// Methods on ArgTypeResult.
+//===----------------------------------------------------------------------===//
+
+bool ArgTypeResult::matchesType(ASTContext &C, QualType argTy) const {
+ switch (K) {
+ case InvalidTy:
+ assert(false && "ArgTypeResult must be valid");
+ return true;
+
+ case UnknownTy:
+ return true;
+
+ case SpecificTy: {
+ argTy = C.getCanonicalType(argTy).getUnqualifiedType();
+ if (T == argTy)
+ return true;
+ if (const BuiltinType *BT = argTy->getAs<BuiltinType>())
+ switch (BT->getKind()) {
+ default:
+ break;
+ case BuiltinType::Char_S:
+ case BuiltinType::SChar:
+ return T == C.UnsignedCharTy;
+ case BuiltinType::Char_U:
+ case BuiltinType::UChar:
+ return T == C.SignedCharTy;
+ case BuiltinType::Short:
+ return T == C.UnsignedShortTy;
+ case BuiltinType::UShort:
+ return T == C.ShortTy;
+ case BuiltinType::Int:
+ return T == C.UnsignedIntTy;
+ case BuiltinType::UInt:
+ return T == C.IntTy;
+ case BuiltinType::Long:
+ return T == C.UnsignedLongTy;
+ case BuiltinType::ULong:
+ return T == C.LongTy;
+ case BuiltinType::LongLong:
+ return T == C.UnsignedLongLongTy;
+ case BuiltinType::ULongLong:
+ return T == C.LongLongTy;
+ }
+ return false;
+ }
+
+ case CStrTy: {
+ const PointerType *PT = argTy->getAs<PointerType>();
+ if (!PT)
+ return false;
+ QualType pointeeTy = PT->getPointeeType();
+ if (const BuiltinType *BT = pointeeTy->getAs<BuiltinType>())
+ switch (BT->getKind()) {
+ case BuiltinType::Void:
+ case BuiltinType::Char_U:
+ case BuiltinType::UChar:
+ case BuiltinType::Char_S:
+ case BuiltinType::SChar:
+ return true;
+ default:
+ break;
+ }
+
+ return false;
+ }
+
+ case WCStrTy: {
+ const PointerType *PT = argTy->getAs<PointerType>();
+ if (!PT)
+ return false;
+ QualType pointeeTy =
+ C.getCanonicalType(PT->getPointeeType()).getUnqualifiedType();
+ return pointeeTy == C.getWCharType();
+ }
+
+ case CPointerTy:
+ return argTy->getAs<PointerType>() != NULL ||
+ argTy->getAs<ObjCObjectPointerType>() != NULL;
+
+ case ObjCPointerTy:
+ return argTy->getAs<ObjCObjectPointerType>() != NULL;
+ }
+
+ // FIXME: Should be unreachable, but Clang is currently emitting
+ // a warning.
+ return false;
+}
+
+QualType ArgTypeResult::getRepresentativeType(ASTContext &C) const {
+ switch (K) {
+ case InvalidTy:
+ assert(false && "No representative type for Invalid ArgTypeResult");
+ // Fall-through.
+ case UnknownTy:
+ return QualType();
+ case SpecificTy:
+ return T;
+ case CStrTy:
+ return C.getPointerType(C.CharTy);
+ case WCStrTy:
+ return C.getPointerType(C.getWCharType());
+ case ObjCPointerTy:
+ return C.ObjCBuiltinIdTy;
+ case CPointerTy:
+ return C.VoidPtrTy;
+ }
+
+ // FIXME: Should be unreachable, but Clang is currently emitting
+ // a warning.
+ return QualType();
+}
+
+//===----------------------------------------------------------------------===//
+// Methods on OptionalAmount.
+//===----------------------------------------------------------------------===//
+
+ArgTypeResult
+analyze_format_string::OptionalAmount::getArgType(ASTContext &Ctx) const {
+ return Ctx.IntTy;
+}
+
+//===----------------------------------------------------------------------===//
+// Methods on LengthModifier.
+//===----------------------------------------------------------------------===//
+
+const char *
+analyze_format_string::LengthModifier::toString() const {
+ switch (kind) {
+ case AsChar:
+ return "hh";
+ case AsShort:
+ return "h";
+ case AsLong: // or AsWideChar
+ return "l";
+ case AsLongLong:
+ return "ll";
+ case AsIntMax:
+ return "j";
+ case AsSizeT:
+ return "z";
+ case AsPtrDiff:
+ return "t";
+ case AsLongDouble:
+ return "L";
+ case None:
+ return "";
+ }
+ return NULL;
+}
+
+//===----------------------------------------------------------------------===//
+// Methods on OptionalAmount.
+//===----------------------------------------------------------------------===//
+
+void
+analyze_format_string::OptionalAmount::toString(llvm::raw_ostream &os) const {
+ switch (hs) {
+ case Invalid:
+ case NotSpecified:
+ return;
+ case Arg:
+ if (UsesDotPrefix)
+ os << ".";
+ if (usesPositionalArg())
+ os << "*" << getPositionalArgIndex() << "$";
+ else
+ os << "*";
+ break;
+ case Constant:
+ if (UsesDotPrefix)
+ os << ".";
+ os << amt;
+ break;
+ }
+}
+
-//= PrintfFormatStrings.cpp - Analysis of printf format strings --*- C++ -*-==//
+//== PrintfFormatString.cpp - Analysis of printf format strings --*- C++ -*-==//
//
// The LLVM Compiler Infrastructure
//
//
//===----------------------------------------------------------------------===//
-#include "clang/Analysis/Analyses/PrintfFormatString.h"
-#include "clang/AST/ASTContext.h"
-#include "clang/AST/Type.h"
-#include "llvm/Support/raw_ostream.h"
+#include "clang/Analysis/Analyses/FormatString.h"
+#include "FormatStringParsing.h"
-using clang::analyze_printf::ArgTypeResult;
-using clang::analyze_printf::FormatSpecifier;
-using clang::analyze_printf::FormatStringHandler;
-using clang::analyze_printf::OptionalAmount;
-using clang::analyze_printf::PositionContext;
+using clang::analyze_format_string::ArgTypeResult;
+using clang::analyze_format_string::FormatStringHandler;
+using clang::analyze_format_string::LengthModifier;
+using clang::analyze_format_string::OptionalAmount;
using clang::analyze_printf::ConversionSpecifier;
-using clang::analyze_printf::LengthModifier;
+using clang::analyze_printf::PrintfSpecifier;
using namespace clang;
-namespace {
-class FormatSpecifierResult {
- FormatSpecifier FS;
- const char *Start;
- bool Stop;
-public:
- FormatSpecifierResult(bool stop = false)
- : Start(0), Stop(stop) {}
- FormatSpecifierResult(const char *start,
- const FormatSpecifier &fs)
- : FS(fs), Start(start), Stop(false) {}
-
- const char *getStart() const { return Start; }
- bool shouldStop() const { return Stop; }
- bool hasValue() const { return Start != 0; }
- const FormatSpecifier &getValue() const {
- assert(hasValue());
- return FS;
- }
- const FormatSpecifier &getValue() { return FS; }
-};
-} // end anonymous namespace
-
-template <typename T>
-class UpdateOnReturn {
- T &ValueToUpdate;
- const T &ValueToCopy;
-public:
- UpdateOnReturn(T &valueToUpdate, const T &valueToCopy)
- : ValueToUpdate(valueToUpdate), ValueToCopy(valueToCopy) {}
-
- ~UpdateOnReturn() {
- ValueToUpdate = ValueToCopy;
- }
-};
+typedef clang::analyze_format_string::SpecifierResult<PrintfSpecifier>
+ PrintfSpecifierResult;
//===----------------------------------------------------------------------===//
// Methods for parsing format strings.
//===----------------------------------------------------------------------===//
-static OptionalAmount ParseAmount(const char *&Beg, const char *E) {
- const char *I = Beg;
- UpdateOnReturn <const char*> UpdateBeg(Beg, I);
-
- unsigned accumulator = 0;
- bool hasDigits = false;
-
- for ( ; I != E; ++I) {
- char c = *I;
- if (c >= '0' && c <= '9') {
- hasDigits = true;
- accumulator = (accumulator * 10) + (c - '0');
- continue;
- }
-
- if (hasDigits)
- return OptionalAmount(OptionalAmount::Constant, accumulator, Beg, I - Beg,
- false);
-
- break;
- }
-
- return OptionalAmount();
-}
-
-static OptionalAmount ParseNonPositionAmount(const char *&Beg, const char *E,
- unsigned &argIndex) {
- if (*Beg == '*') {
- ++Beg;
- return OptionalAmount(OptionalAmount::Arg, argIndex++, Beg, 0, false);
- }
-
- return ParseAmount(Beg, E);
-}
-
-static OptionalAmount ParsePositionAmount(FormatStringHandler &H,
- const char *Start,
- const char *&Beg, const char *E,
- PositionContext p) {
- if (*Beg == '*') {
- const char *I = Beg + 1;
- const OptionalAmount &Amt = ParseAmount(I, E);
-
- if (Amt.getHowSpecified() == OptionalAmount::NotSpecified) {
- H.HandleInvalidPosition(Beg, I - Beg, p);
- return OptionalAmount(false);
- }
-
- if (I== E) {
- // No more characters left?
- H.HandleIncompleteFormatSpecifier(Start, E - Start);
- return OptionalAmount(false);
- }
-
- assert(Amt.getHowSpecified() == OptionalAmount::Constant);
-
- if (*I == '$') {
- // Handle positional arguments
-
- // Special case: '*0$', since this is an easy mistake.
- if (Amt.getConstantAmount() == 0) {
- H.HandleZeroPosition(Beg, I - Beg + 1);
- return OptionalAmount(false);
- }
-
- const char *Tmp = Beg;
- Beg = ++I;
-
- return OptionalAmount(OptionalAmount::Arg, Amt.getConstantAmount() - 1,
- Tmp, 0, true);
- }
-
- H.HandleInvalidPosition(Beg, I - Beg, p);
- return OptionalAmount(false);
- }
-
- return ParseAmount(Beg, E);
-}
+using analyze_format_string::ParseNonPositionAmount;
-static bool ParsePrecision(FormatStringHandler &H, FormatSpecifier &FS,
+static bool ParsePrecision(FormatStringHandler &H, PrintfSpecifier &FS,
const char *Start, const char *&Beg, const char *E,
unsigned *argIndex) {
if (argIndex) {
}
else {
const OptionalAmount Amt = ParsePositionAmount(H, Start, Beg, E,
- analyze_printf::PrecisionPos);
+ analyze_format_string::PrecisionPos);
if (Amt.isInvalid())
return true;
FS.setPrecision(Amt);
return false;
}
-static bool ParseFieldWidth(FormatStringHandler &H, FormatSpecifier &FS,
- const char *Start, const char *&Beg, const char *E,
- unsigned *argIndex) {
- // FIXME: Support negative field widths.
- if (argIndex) {
- FS.setFieldWidth(ParseNonPositionAmount(Beg, E, *argIndex));
- }
- else {
- const OptionalAmount Amt = ParsePositionAmount(H, Start, Beg, E,
- analyze_printf::FieldWidthPos);
- if (Amt.isInvalid())
- return true;
- FS.setFieldWidth(Amt);
- }
- return false;
-}
-
-static bool ParseArgPosition(FormatStringHandler &H,
- FormatSpecifier &FS, const char *Start,
- const char *&Beg, const char *E) {
-
- using namespace clang::analyze_printf;
- const char *I = Beg;
-
- const OptionalAmount &Amt = ParseAmount(I, E);
-
- if (I == E) {
- // No more characters left?
- H.HandleIncompleteFormatSpecifier(Start, E - Start);
- return true;
- }
-
- if (Amt.getHowSpecified() == OptionalAmount::Constant && *(I++) == '$') {
- // Special case: '%0$', since this is an easy mistake.
- if (Amt.getConstantAmount() == 0) {
- H.HandleZeroPosition(Start, I - Start);
- return true;
- }
-
- FS.setArgIndex(Amt.getConstantAmount() - 1);
- FS.setUsesPositionalArg();
- // Update the caller's pointer if we decided to consume
- // these characters.
- Beg = I;
- return false;
- }
-
- return false;
-}
-
-static FormatSpecifierResult ParseFormatSpecifier(FormatStringHandler &H,
+static PrintfSpecifierResult ParsePrintfSpecifier(FormatStringHandler &H,
const char *&Beg,
const char *E,
unsigned &argIndex) {
if (I == E) {
// No more characters left?
- H.HandleIncompleteFormatSpecifier(Start, E - Start);
+ H.HandleIncompleteSpecifier(Start, E - Start);
return true;
}
- FormatSpecifier FS;
+ PrintfSpecifier FS;
if (ParseArgPosition(H, FS, Start, I, E))
return true;
if (I == E) {
// No more characters left?
- H.HandleIncompleteFormatSpecifier(Start, E - Start);
+ H.HandleIncompleteSpecifier(Start, E - Start);
return true;
}
if (I == E) {
// No more characters left?
- H.HandleIncompleteFormatSpecifier(Start, E - Start);
+ H.HandleIncompleteSpecifier(Start, E - Start);
return true;
}
if (I == E) {
// No more characters left?
- H.HandleIncompleteFormatSpecifier(Start, E - Start);
+ H.HandleIncompleteSpecifier(Start, E - Start);
return true;
}
if (*I == '.') {
++I;
if (I == E) {
- H.HandleIncompleteFormatSpecifier(Start, E - Start);
+ H.HandleIncompleteSpecifier(Start, E - Start);
return true;
}
if (I == E) {
// No more characters left?
- H.HandleIncompleteFormatSpecifier(Start, E - Start);
+ H.HandleIncompleteSpecifier(Start, E - Start);
return true;
}
}
// Look for the length modifier.
- LengthModifier::Kind lmKind = LengthModifier::None;
- const char *lmPosition = I;
- switch (*I) {
- default:
- break;
- case 'h':
- ++I;
- lmKind = (I != E && *I == 'h') ?
- ++I, LengthModifier::AsChar : LengthModifier::AsShort;
- break;
- case 'l':
- ++I;
- lmKind = (I != E && *I == 'l') ?
- ++I, LengthModifier::AsLongLong : LengthModifier::AsLong;
- break;
- case 'j': lmKind = LengthModifier::AsIntMax; ++I; break;
- case 'z': lmKind = LengthModifier::AsSizeT; ++I; break;
- case 't': lmKind = LengthModifier::AsPtrDiff; ++I; break;
- case 'L': lmKind = LengthModifier::AsLongDouble; ++I; break;
- case 'q': lmKind = LengthModifier::AsLongLong; ++I; break;
- }
- LengthModifier lm(lmPosition, lmKind);
- FS.setLengthModifier(lm);
-
- if (I == E) {
+ if (ParseLengthModifier(FS, I, E) && I == E) {
// No more characters left?
- H.HandleIncompleteFormatSpecifier(Start, E - Start);
+ H.HandleIncompleteSpecifier(Start, E - Start);
return true;
}
if (k == ConversionSpecifier::InvalidSpecifier) {
// Assume the conversion takes one argument.
- return !H.HandleInvalidConversionSpecifier(FS, Beg, I - Beg);
+ return !H.HandleInvalidPrintfConversionSpecifier(FS, Beg, I - Beg);
}
- return FormatSpecifierResult(Start, FS);
+ return PrintfSpecifierResult(Start, FS);
}
-bool clang::analyze_printf::ParseFormatString(FormatStringHandler &H,
- const char *I, const char *E) {
+bool clang::analyze_format_string::ParsePrintfString(FormatStringHandler &H,
+ const char *I,
+ const char *E) {
unsigned argIndex = 0;
// Keep looking for a format specifier until we have exhausted the string.
while (I != E) {
- const FormatSpecifierResult &FSR = ParseFormatSpecifier(H, I, E, argIndex);
+ const PrintfSpecifierResult &FSR = ParsePrintfSpecifier(H, I, E, argIndex);
// Did a fail-stop error of any kind occur when parsing the specifier?
// If so, don't do any more processing.
if (FSR.shouldStop())
if (!FSR.hasValue())
continue;
// We have a format specifier. Pass it to the callback.
- if (!H.HandleFormatSpecifier(FSR.getValue(), FSR.getStart(),
+ if (!H.HandlePrintfSpecifier(FSR.getValue(), FSR.getStart(),
I - FSR.getStart()))
return true;
}
return false;
}
-FormatStringHandler::~FormatStringHandler() {}
-
-//===----------------------------------------------------------------------===//
-// Methods on ArgTypeResult.
-//===----------------------------------------------------------------------===//
-
-bool ArgTypeResult::matchesType(ASTContext &C, QualType argTy) const {
- switch (K) {
- case InvalidTy:
- assert(false && "ArgTypeResult must be valid");
- return true;
-
- case UnknownTy:
- return true;
-
- case SpecificTy: {
- argTy = C.getCanonicalType(argTy).getUnqualifiedType();
- if (T == argTy)
- return true;
- if (const BuiltinType *BT = argTy->getAs<BuiltinType>())
- switch (BT->getKind()) {
- default:
- break;
- case BuiltinType::Char_S:
- case BuiltinType::SChar:
- return T == C.UnsignedCharTy;
- case BuiltinType::Char_U:
- case BuiltinType::UChar:
- return T == C.SignedCharTy;
- case BuiltinType::Short:
- return T == C.UnsignedShortTy;
- case BuiltinType::UShort:
- return T == C.ShortTy;
- case BuiltinType::Int:
- return T == C.UnsignedIntTy;
- case BuiltinType::UInt:
- return T == C.IntTy;
- case BuiltinType::Long:
- return T == C.UnsignedLongTy;
- case BuiltinType::ULong:
- return T == C.LongTy;
- case BuiltinType::LongLong:
- return T == C.UnsignedLongLongTy;
- case BuiltinType::ULongLong:
- return T == C.LongLongTy;
- }
- return false;
- }
-
- case CStrTy: {
- const PointerType *PT = argTy->getAs<PointerType>();
- if (!PT)
- return false;
- QualType pointeeTy = PT->getPointeeType();
- if (const BuiltinType *BT = pointeeTy->getAs<BuiltinType>())
- switch (BT->getKind()) {
- case BuiltinType::Void:
- case BuiltinType::Char_U:
- case BuiltinType::UChar:
- case BuiltinType::Char_S:
- case BuiltinType::SChar:
- return true;
- default:
- break;
- }
-
- return false;
- }
-
- case WCStrTy: {
- const PointerType *PT = argTy->getAs<PointerType>();
- if (!PT)
- return false;
- QualType pointeeTy =
- C.getCanonicalType(PT->getPointeeType()).getUnqualifiedType();
- return pointeeTy == C.getWCharType();
- }
-
- case CPointerTy:
- return argTy->getAs<PointerType>() != NULL ||
- argTy->getAs<ObjCObjectPointerType>() != NULL;
-
- case ObjCPointerTy:
- return argTy->getAs<ObjCObjectPointerType>() != NULL;
- }
-
- // FIXME: Should be unreachable, but Clang is currently emitting
- // a warning.
- return false;
-}
-
-QualType ArgTypeResult::getRepresentativeType(ASTContext &C) const {
- switch (K) {
- case InvalidTy:
- assert(false && "No representative type for Invalid ArgTypeResult");
- // Fall-through.
- case UnknownTy:
- return QualType();
- case SpecificTy:
- return T;
- case CStrTy:
- return C.getPointerType(C.CharTy);
- case WCStrTy:
- return C.getPointerType(C.getWCharType());
- case ObjCPointerTy:
- return C.ObjCBuiltinIdTy;
- case CPointerTy:
- return C.VoidPtrTy;
- }
-
- // FIXME: Should be unreachable, but Clang is currently emitting
- // a warning.
- return QualType();
-}
-
-//===----------------------------------------------------------------------===//
-// Methods on OptionalAmount.
-//===----------------------------------------------------------------------===//
-
-ArgTypeResult OptionalAmount::getArgType(ASTContext &Ctx) const {
- return Ctx.IntTy;
-}
-
//===----------------------------------------------------------------------===//
// Methods on ConversionSpecifier.
//===----------------------------------------------------------------------===//
}
//===----------------------------------------------------------------------===//
-// Methods on LengthModifier.
-//===----------------------------------------------------------------------===//
-
-const char *LengthModifier::toString() const {
- switch (kind) {
- case AsChar:
- return "hh";
- case AsShort:
- return "h";
- case AsLong: // or AsWideChar
- return "l";
- case AsLongLong:
- return "ll";
- case AsIntMax:
- return "j";
- case AsSizeT:
- return "z";
- case AsPtrDiff:
- return "t";
- case AsLongDouble:
- return "L";
- case None:
- return "";
- }
- return NULL;
-}
-
-//===----------------------------------------------------------------------===//
-// Methods on OptionalAmount.
-//===----------------------------------------------------------------------===//
-
-void OptionalAmount::toString(llvm::raw_ostream &os) const {
- switch (hs) {
- case Invalid:
- case NotSpecified:
- return;
- case Arg:
- if (UsesDotPrefix)
- os << ".";
- if (usesPositionalArg())
- os << "*" << getPositionalArgIndex() << "$";
- else
- os << "*";
- break;
- case Constant:
- if (UsesDotPrefix)
- os << ".";
- os << amt;
- break;
- }
-}
-
-//===----------------------------------------------------------------------===//
-// Methods on FormatSpecifier.
+// Methods on PrintfSpecifier.
//===----------------------------------------------------------------------===//
-ArgTypeResult FormatSpecifier::getArgType(ASTContext &Ctx) const {
+ArgTypeResult PrintfSpecifier::getArgType(ASTContext &Ctx) const {
if (!CS.consumesDataArgument())
return ArgTypeResult::Invalid();
return ArgTypeResult();
}
-bool FormatSpecifier::fixType(QualType QT) {
+bool PrintfSpecifier::fixType(QualType QT) {
// Handle strings first (char *, wchar_t *)
if (QT->isPointerType() && (QT->getPointeeType()->isAnyCharacterType())) {
CS.setKind(ConversionSpecifier::CStrArg);
return true;
}
-void FormatSpecifier::toString(llvm::raw_ostream &os) const {
+void PrintfSpecifier::toString(llvm::raw_ostream &os) const {
// Whilst some features have no defined order, we are using the order
- // appearing in the C99 standard (ISO/IEC 9899:1999 (E) ¤7.19.6.1)
+ // appearing in the C99 standard (ISO/IEC 9899:1999 (E) ¤7.19.6.1)
os << "%";
// Positional args
os << CS.toString();
}
-bool FormatSpecifier::hasValidPlusPrefix() const {
+bool PrintfSpecifier::hasValidPlusPrefix() const {
if (!HasPlusPrefix)
return true;
}
}
-bool FormatSpecifier::hasValidAlternativeForm() const {
+bool PrintfSpecifier::hasValidAlternativeForm() const {
if (!HasAlternativeForm)
return true;
}
}
-bool FormatSpecifier::hasValidLeadingZeros() const {
+bool PrintfSpecifier::hasValidLeadingZeros() const {
if (!HasLeadingZeroes)
return true;
}
}
-bool FormatSpecifier::hasValidSpacePrefix() const {
+bool PrintfSpecifier::hasValidSpacePrefix() const {
if (!HasSpacePrefix)
return true;
}
}
-bool FormatSpecifier::hasValidLeftJustified() const {
+bool PrintfSpecifier::hasValidLeftJustified() const {
if (!IsLeftJustified)
return true;
}
}
-bool FormatSpecifier::hasValidLengthModifier() const {
+bool PrintfSpecifier::hasValidLengthModifier() const {
switch (LM.getKind()) {
case LengthModifier::None:
return true;
return false;
}
-bool FormatSpecifier::hasValidPrecision() const {
+bool PrintfSpecifier::hasValidPrecision() const {
if (Precision.getHowSpecified() == OptionalAmount::NotSpecified)
return true;
return false;
}
}
-bool FormatSpecifier::hasValidFieldWidth() const {
+bool PrintfSpecifier::hasValidFieldWidth() const {
if (FieldWidth.getHowSpecified() == OptionalAmount::NotSpecified)
return true;
OwningExprResult SemaBuiltinAtomicOverloaded(OwningExprResult TheCallResult);
bool SemaBuiltinConstantArg(CallExpr *TheCall, int ArgNum,
llvm::APSInt &Result);
+
bool SemaCheckStringLiteral(const Expr *E, const CallExpr *TheCall,
bool HasVAListArg, unsigned format_idx,
- unsigned firstDataArg);
- void CheckPrintfString(const StringLiteral *FExpr, const Expr *OrigFormatExpr,
+ unsigned firstDataArg, bool isPrintf);
+
+ void CheckFormatString(const StringLiteral *FExpr, const Expr *OrigFormatExpr,
const CallExpr *TheCall, bool HasVAListArg,
- unsigned format_idx, unsigned firstDataArg);
+ unsigned format_idx, unsigned firstDataArg,
+ bool isPrintf);
+
void CheckNonNullArguments(const NonNullAttr *NonNull,
const CallExpr *TheCall);
- void CheckPrintfArguments(const CallExpr *TheCall, bool HasVAListArg,
- unsigned format_idx, unsigned firstDataArg);
+
+ void CheckPrintfScanfArguments(const CallExpr *TheCall, bool HasVAListArg,
+ unsigned format_idx, unsigned firstDataArg,
+ bool isPrintf);
+
void CheckReturnStackAddr(Expr *RetValExp, QualType lhsType,
SourceLocation ReturnLoc);
void CheckFloatComparison(SourceLocation loc, Expr* lex, Expr* rex);
//===----------------------------------------------------------------------===//
#include "Sema.h"
-#include "clang/Analysis/Analyses/PrintfFormatString.h"
+#include "clang/Analysis/Analyses/FormatString.h"
#include "clang/AST/ASTContext.h"
#include "clang/AST/CharUnits.h"
#include "clang/AST/DeclObjC.h"
// Printf checking.
if (const FormatAttr *Format = FDecl->getAttr<FormatAttr>()) {
- if (CheckablePrintfAttr(Format, TheCall)) {
+ const bool b = Format->getType() == "scanf";
+ if (b || CheckablePrintfAttr(Format, TheCall)) {
bool HasVAListArg = Format->getFirstArg() == 0;
- CheckPrintfArguments(TheCall, HasVAListArg, Format->getFormatIdx() - 1,
- HasVAListArg ? 0 : Format->getFirstArg() - 1);
+ CheckPrintfScanfArguments(TheCall, HasVAListArg,
+ Format->getFormatIdx() - 1,
+ HasVAListArg ? 0 : Format->getFirstArg() - 1,
+ !b);
}
}
if (!Ty->isBlockPointerType())
return false;
- if (!CheckablePrintfAttr(Format, TheCall))
+ const bool b = Format->getType() == "scanf";
+ if (!b && !CheckablePrintfAttr(Format, TheCall))
return false;
bool HasVAListArg = Format->getFirstArg() == 0;
- CheckPrintfArguments(TheCall, HasVAListArg, Format->getFormatIdx() - 1,
- HasVAListArg ? 0 : Format->getFirstArg() - 1);
+ CheckPrintfScanfArguments(TheCall, HasVAListArg, Format->getFormatIdx() - 1,
+ HasVAListArg ? 0 : Format->getFirstArg() - 1, !b);
return false;
}
// Handle i > 1 ? "x" : "y", recursivelly
bool Sema::SemaCheckStringLiteral(const Expr *E, const CallExpr *TheCall,
bool HasVAListArg,
- unsigned format_idx, unsigned firstDataArg) {
+ unsigned format_idx, unsigned firstDataArg,
+ bool isPrintf) {
+
if (E->isTypeDependent() || E->isValueDependent())
return false;
switch (E->getStmtClass()) {
case Stmt::ConditionalOperatorClass: {
const ConditionalOperator *C = cast<ConditionalOperator>(E);
- return SemaCheckStringLiteral(C->getTrueExpr(), TheCall,
- HasVAListArg, format_idx, firstDataArg)
- && SemaCheckStringLiteral(C->getRHS(), TheCall,
- HasVAListArg, format_idx, firstDataArg);
+ return SemaCheckStringLiteral(C->getTrueExpr(), TheCall, HasVAListArg,
+ format_idx, firstDataArg, isPrintf)
+ && SemaCheckStringLiteral(C->getRHS(), TheCall, HasVAListArg,
+ format_idx, firstDataArg, isPrintf);
}
case Stmt::ImplicitCastExprClass: {
const ImplicitCastExpr *Expr = cast<ImplicitCastExpr>(E);
return SemaCheckStringLiteral(Expr->getSubExpr(), TheCall, HasVAListArg,
- format_idx, firstDataArg);
+ format_idx, firstDataArg, isPrintf);
}
case Stmt::ParenExprClass: {
const ParenExpr *Expr = cast<ParenExpr>(E);
return SemaCheckStringLiteral(Expr->getSubExpr(), TheCall, HasVAListArg,
- format_idx, firstDataArg);
+ format_idx, firstDataArg, isPrintf);
}
case Stmt::DeclRefExprClass: {
if (isConstant) {
if (const Expr *Init = VD->getAnyInitializer())
return SemaCheckStringLiteral(Init, TheCall,
- HasVAListArg, format_idx, firstDataArg);
+ HasVAListArg, format_idx, firstDataArg,
+ isPrintf);
}
// For vprintf* functions (i.e., HasVAListArg==true), we add a
const Expr *Arg = CE->getArg(ArgIndex - 1);
return SemaCheckStringLiteral(Arg, TheCall, HasVAListArg,
- format_idx, firstDataArg);
+ format_idx, firstDataArg, isPrintf);
}
}
}
StrE = cast<StringLiteral>(E);
if (StrE) {
- CheckPrintfString(StrE, E, TheCall, HasVAListArg, format_idx,
- firstDataArg);
+ CheckFormatString(StrE, E, TheCall, HasVAListArg, format_idx,
+ firstDataArg, isPrintf);
return true;
}
}
}
-/// CheckPrintfArguments - Check calls to printf (and similar functions) for
-/// correct use of format strings.
-///
-/// HasVAListArg - A predicate indicating whether the printf-like
-/// function is passed an explicit va_arg argument (e.g., vprintf)
-///
-/// format_idx - The index into Args for the format string.
-///
-/// Improper format strings to functions in the printf family can be
-/// the source of bizarre bugs and very serious security holes. A
-/// good source of information is available in the following paper
-/// (which includes additional references):
-///
-/// FormatGuard: Automatic Protection From printf Format String
-/// Vulnerabilities, Proceedings of the 10th USENIX Security Symposium, 2001.
-///
-/// TODO:
-/// Functionality implemented:
-///
-/// We can statically check the following properties for string
-/// literal format strings for non v.*printf functions (where the
-/// arguments are passed directly):
-//
-/// (1) Are the number of format conversions equal to the number of
-/// data arguments?
-///
-/// (2) Does each format conversion correctly match the type of the
-/// corresponding data argument?
-///
-/// Moreover, for all printf functions we can:
-///
-/// (3) Check for a missing format string (when not caught by type checking).
-///
-/// (4) Check for no-operation flags; e.g. using "#" with format
-/// conversion 'c' (TODO)
-///
-/// (5) Check the use of '%n', a major source of security holes.
-///
-/// (6) Check for malformed format conversions that don't specify anything.
-///
-/// (7) Check for empty format strings. e.g: printf("");
-///
-/// (8) Check that the format string is a wide literal.
-///
-/// All of these checks can be done by parsing the format string.
-///
+/// CheckPrintfScanfArguments - Check calls to printf and scanf (and similar
+/// functions) for correct use of format strings.
void
-Sema::CheckPrintfArguments(const CallExpr *TheCall, bool HasVAListArg,
- unsigned format_idx, unsigned firstDataArg) {
+Sema::CheckPrintfScanfArguments(const CallExpr *TheCall, bool HasVAListArg,
+ unsigned format_idx, unsigned firstDataArg,
+ bool isPrintf) {
+
const Expr *Fn = TheCall->getCallee();
// The way the format attribute works in GCC, the implicit this argument
--firstDataArg;
}
- // CHECK: printf-like function is called with no format string.
+ // CHECK: printf/scanf-like function is called with no format string.
if (format_idx >= TheCall->getNumArgs()) {
- Diag(TheCall->getRParenLoc(), diag::warn_printf_missing_format_string)
+ Diag(TheCall->getRParenLoc(), diag::warn_missing_format_string)
<< Fn->getSourceRange();
return;
}
// ObjC string uses the same format specifiers as C string, so we can use
// the same format string checking logic for both ObjC and C strings.
if (SemaCheckStringLiteral(OrigFormatExpr, TheCall, HasVAListArg, format_idx,
- firstDataArg))
+ firstDataArg, isPrintf))
return; // Literal format string found, check done!
// If there are no arguments specified, warn with -Wformat-security, otherwise
// warn only with -Wformat-nonliteral.
if (TheCall->getNumArgs() == format_idx+1)
Diag(TheCall->getArg(format_idx)->getLocStart(),
- diag::warn_printf_nonliteral_noargs)
+ diag::warn_format_nonliteral_noargs)
<< OrigFormatExpr->getSourceRange();
else
Diag(TheCall->getArg(format_idx)->getLocStart(),
- diag::warn_printf_nonliteral)
+ diag::warn_format_nonliteral)
<< OrigFormatExpr->getSourceRange();
}
namespace {
-class CheckPrintfHandler : public analyze_printf::FormatStringHandler {
+class CheckFormatHandler : public analyze_format_string::FormatStringHandler {
+protected:
Sema &S;
const StringLiteral *FExpr;
const Expr *OrigFormatExpr;
bool usesPositionalArgs;
bool atFirstArg;
public:
- CheckPrintfHandler(Sema &s, const StringLiteral *fexpr,
+ CheckFormatHandler(Sema &s, const StringLiteral *fexpr,
const Expr *origFormatExpr, unsigned firstDataArg,
unsigned numDataArgs, bool isObjCLiteral,
const char *beg, bool hasVAListArg,
void DoneProcessing();
- void HandleIncompleteFormatSpecifier(const char *startSpecifier,
- unsigned specifierLen);
-
- bool
- HandleInvalidConversionSpecifier(const analyze_printf::FormatSpecifier &FS,
- const char *startSpecifier,
- unsigned specifierLen);
-
+ void HandleIncompleteSpecifier(const char *startSpecifier,
+ unsigned specifierLen);
+
virtual void HandleInvalidPosition(const char *startSpecifier,
unsigned specifierLen,
- analyze_printf::PositionContext p);
+ analyze_format_string::PositionContext p);
virtual void HandleZeroPosition(const char *startPos, unsigned posLen);
void HandleNullChar(const char *nullCharacter);
- bool HandleFormatSpecifier(const analyze_printf::FormatSpecifier &FS,
- const char *startSpecifier,
- unsigned specifierLen);
-private:
+protected:
SourceRange getFormatStringRange();
- CharSourceRange getFormatSpecifierRange(const char *startSpecifier,
- unsigned specifierLen);
+ CharSourceRange getSpecifierRange(const char *startSpecifier,
+ unsigned specifierLen);
SourceLocation getLocationOfByte(const char *x);
- bool HandleAmount(const analyze_printf::OptionalAmount &Amt, unsigned k,
- const char *startSpecifier, unsigned specifierLen);
- void HandleInvalidAmount(const analyze_printf::FormatSpecifier &FS,
- const analyze_printf::OptionalAmount &Amt,
- unsigned type,
- const char *startSpecifier, unsigned specifierLen);
- void HandleFlag(const analyze_printf::FormatSpecifier &FS,
- const analyze_printf::OptionalFlag &flag,
- const char *startSpecifier, unsigned specifierLen);
- void HandleIgnoredFlag(const analyze_printf::FormatSpecifier &FS,
- const analyze_printf::OptionalFlag &ignoredFlag,
- const analyze_printf::OptionalFlag &flag,
- const char *startSpecifier, unsigned specifierLen);
-
const Expr *getDataArg(unsigned i) const;
};
}
-SourceRange CheckPrintfHandler::getFormatStringRange() {
+SourceRange CheckFormatHandler::getFormatStringRange() {
return OrigFormatExpr->getSourceRange();
}
-CharSourceRange CheckPrintfHandler::
-getFormatSpecifierRange(const char *startSpecifier, unsigned specifierLen) {
+CharSourceRange CheckFormatHandler::
+getSpecifierRange(const char *startSpecifier, unsigned specifierLen) {
SourceLocation Start = getLocationOfByte(startSpecifier);
SourceLocation End = getLocationOfByte(startSpecifier + specifierLen - 1);
return CharSourceRange::getCharRange(Start, End);
}
-SourceLocation CheckPrintfHandler::getLocationOfByte(const char *x) {
+SourceLocation CheckFormatHandler::getLocationOfByte(const char *x) {
return S.getLocationOfStringLiteralByte(FExpr, x - Beg);
}
-void CheckPrintfHandler::
-HandleIncompleteFormatSpecifier(const char *startSpecifier,
- unsigned specifierLen) {
+void CheckFormatHandler::HandleIncompleteSpecifier(const char *startSpecifier,
+ unsigned specifierLen){
SourceLocation Loc = getLocationOfByte(startSpecifier);
S.Diag(Loc, diag::warn_printf_incomplete_specifier)
- << getFormatSpecifierRange(startSpecifier, specifierLen);
+ << getSpecifierRange(startSpecifier, specifierLen);
}
void
-CheckPrintfHandler::HandleInvalidPosition(const char *startPos, unsigned posLen,
- analyze_printf::PositionContext p) {
+CheckFormatHandler::HandleInvalidPosition(const char *startPos, unsigned posLen,
+ analyze_format_string::PositionContext p) {
SourceLocation Loc = getLocationOfByte(startPos);
- S.Diag(Loc, diag::warn_printf_invalid_positional_specifier)
- << (unsigned) p << getFormatSpecifierRange(startPos, posLen);
+ S.Diag(Loc, diag::warn_format_invalid_positional_specifier)
+ << (unsigned) p << getSpecifierRange(startPos, posLen);
}
-void CheckPrintfHandler::HandleZeroPosition(const char *startPos,
+void CheckFormatHandler::HandleZeroPosition(const char *startPos,
unsigned posLen) {
SourceLocation Loc = getLocationOfByte(startPos);
- S.Diag(Loc, diag::warn_printf_zero_positional_specifier)
- << getFormatSpecifierRange(startPos, posLen);
+ S.Diag(Loc, diag::warn_format_zero_positional_specifier)
+ << getSpecifierRange(startPos, posLen);
}
-bool CheckPrintfHandler::
-HandleInvalidConversionSpecifier(const analyze_printf::FormatSpecifier &FS,
- const char *startSpecifier,
- unsigned specifierLen) {
+void CheckFormatHandler::HandleNullChar(const char *nullCharacter) {
+ // The presence of a null character is likely an error.
+ S.Diag(getLocationOfByte(nullCharacter),
+ diag::warn_printf_format_string_contains_null_char)
+ << getFormatStringRange();
+}
+
+const Expr *CheckFormatHandler::getDataArg(unsigned i) const {
+ return TheCall->getArg(FirstDataArg + i);
+}
+void CheckFormatHandler::DoneProcessing() {
+ // Does the number of data arguments exceed the number of
+ // format conversions in the format string?
+ if (!HasVAListArg) {
+ // Find any arguments that weren't covered.
+ CoveredArgs.flip();
+ signed notCoveredArg = CoveredArgs.find_first();
+ if (notCoveredArg >= 0) {
+ assert((unsigned)notCoveredArg < NumDataArgs);
+ S.Diag(getDataArg((unsigned) notCoveredArg)->getLocStart(),
+ diag::warn_printf_data_arg_not_used)
+ << getFormatStringRange();
+ }
+ }
+}
+
+//===--- CHECK: Printf format string checking ------------------------------===//
+
+namespace {
+class CheckPrintfHandler : public CheckFormatHandler {
+public:
+ CheckPrintfHandler(Sema &s, const StringLiteral *fexpr,
+ const Expr *origFormatExpr, unsigned firstDataArg,
+ unsigned numDataArgs, bool isObjCLiteral,
+ const char *beg, bool hasVAListArg,
+ const CallExpr *theCall, unsigned formatIdx)
+ : CheckFormatHandler(s, fexpr, origFormatExpr, firstDataArg,
+ numDataArgs, isObjCLiteral, beg, hasVAListArg,
+ theCall, formatIdx) {}
+
+
+ bool HandleInvalidPrintfConversionSpecifier(
+ const analyze_printf::PrintfSpecifier &FS,
+ const char *startSpecifier,
+ unsigned specifierLen);
+
+ bool HandlePrintfSpecifier(const analyze_printf::PrintfSpecifier &FS,
+ const char *startSpecifier,
+ unsigned specifierLen);
+
+ bool HandleAmount(const analyze_format_string::OptionalAmount &Amt, unsigned k,
+ const char *startSpecifier, unsigned specifierLen);
+ void HandleInvalidAmount(const analyze_printf::PrintfSpecifier &FS,
+ const analyze_printf::OptionalAmount &Amt,
+ unsigned type,
+ const char *startSpecifier, unsigned specifierLen);
+ void HandleFlag(const analyze_printf::PrintfSpecifier &FS,
+ const analyze_printf::OptionalFlag &flag,
+ const char *startSpecifier, unsigned specifierLen);
+ void HandleIgnoredFlag(const analyze_printf::PrintfSpecifier &FS,
+ const analyze_printf::OptionalFlag &ignoredFlag,
+ const analyze_printf::OptionalFlag &flag,
+ const char *startSpecifier, unsigned specifierLen);
+};
+}
+
+bool CheckPrintfHandler::HandleInvalidPrintfConversionSpecifier(
+ const analyze_printf::PrintfSpecifier &FS,
+ const char *startSpecifier,
+ unsigned specifierLen) {
+
unsigned argIndex = FS.getArgIndex();
bool keepGoing = true;
if (argIndex < NumDataArgs) {
- // Consider the argument coverered, even though the specifier doesn't
- // make sense.
+ // Consider the argument coverered, even though the specifier doesn't
+ // make sense.
CoveredArgs.set(argIndex);
}
else {
// gibberish when trying to match arguments.
keepGoing = false;
}
-
+
const analyze_printf::ConversionSpecifier &CS =
- FS.getConversionSpecifier();
+ FS.getConversionSpecifier();
SourceLocation Loc = getLocationOfByte(CS.getStart());
S.Diag(Loc, diag::warn_printf_invalid_conversion)
- << llvm::StringRef(CS.getStart(), CS.getLength())
- << getFormatSpecifierRange(startSpecifier, specifierLen);
-
+ << llvm::StringRef(CS.getStart(), CS.getLength())
+ << getSpecifierRange(startSpecifier, specifierLen);
+
return keepGoing;
}
-void CheckPrintfHandler::HandleNullChar(const char *nullCharacter) {
- // The presence of a null character is likely an error.
- S.Diag(getLocationOfByte(nullCharacter),
- diag::warn_printf_format_string_contains_null_char)
- << getFormatStringRange();
-}
-
-const Expr *CheckPrintfHandler::getDataArg(unsigned i) const {
- return TheCall->getArg(FirstDataArg + i);
-}
-
-bool
-CheckPrintfHandler::HandleAmount(const analyze_printf::OptionalAmount &Amt,
- unsigned k, const char *startSpecifier,
- unsigned specifierLen) {
+bool CheckPrintfHandler::HandleAmount(
+ const analyze_format_string::OptionalAmount &Amt,
+ unsigned k, const char *startSpecifier,
+ unsigned specifierLen) {
if (Amt.hasDataArgument()) {
if (!HasVAListArg) {
if (argIndex >= NumDataArgs) {
S.Diag(getLocationOfByte(Amt.getStart()),
diag::warn_printf_asterisk_missing_arg)
- << k << getFormatSpecifierRange(startSpecifier, specifierLen);
+ << k << getSpecifierRange(startSpecifier, specifierLen);
// Don't do any more checking. We will just emit
// spurious errors.
return false;
diag::warn_printf_asterisk_wrong_type)
<< k
<< ATR.getRepresentativeType(S.Context) << T
- << getFormatSpecifierRange(startSpecifier, specifierLen)
+ << getSpecifierRange(startSpecifier, specifierLen)
<< Arg->getSourceRange();
// Don't do any more checking. We will just emit
// spurious errors.
}
void CheckPrintfHandler::HandleInvalidAmount(
- const analyze_printf::FormatSpecifier &FS,
+ const analyze_printf::PrintfSpecifier &FS,
const analyze_printf::OptionalAmount &Amt,
unsigned type,
const char *startSpecifier,
diag::warn_printf_nonsensical_optional_amount)
<< type
<< CS.toString()
- << getFormatSpecifierRange(startSpecifier, specifierLen)
- << FixItHint::CreateRemoval(getFormatSpecifierRange(Amt.getStart(),
+ << getSpecifierRange(startSpecifier, specifierLen)
+ << FixItHint::CreateRemoval(getSpecifierRange(Amt.getStart(),
Amt.getConstantLength()));
break;
diag::warn_printf_nonsensical_optional_amount)
<< type
<< CS.toString()
- << getFormatSpecifierRange(startSpecifier, specifierLen);
+ << getSpecifierRange(startSpecifier, specifierLen);
break;
}
}
-void CheckPrintfHandler::HandleFlag(const analyze_printf::FormatSpecifier &FS,
+void CheckPrintfHandler::HandleFlag(const analyze_printf::PrintfSpecifier &FS,
const analyze_printf::OptionalFlag &flag,
const char *startSpecifier,
unsigned specifierLen) {
S.Diag(getLocationOfByte(flag.getPosition()),
diag::warn_printf_nonsensical_flag)
<< flag.toString() << CS.toString()
- << getFormatSpecifierRange(startSpecifier, specifierLen)
- << FixItHint::CreateRemoval(getFormatSpecifierRange(flag.getPosition(), 1));
+ << getSpecifierRange(startSpecifier, specifierLen)
+ << FixItHint::CreateRemoval(getSpecifierRange(flag.getPosition(), 1));
}
void CheckPrintfHandler::HandleIgnoredFlag(
- const analyze_printf::FormatSpecifier &FS,
+ const analyze_printf::PrintfSpecifier &FS,
const analyze_printf::OptionalFlag &ignoredFlag,
const analyze_printf::OptionalFlag &flag,
const char *startSpecifier,
S.Diag(getLocationOfByte(ignoredFlag.getPosition()),
diag::warn_printf_ignored_flag)
<< ignoredFlag.toString() << flag.toString()
- << getFormatSpecifierRange(startSpecifier, specifierLen)
- << FixItHint::CreateRemoval(getFormatSpecifierRange(
+ << getSpecifierRange(startSpecifier, specifierLen)
+ << FixItHint::CreateRemoval(getSpecifierRange(
ignoredFlag.getPosition(), 1));
}
bool
-CheckPrintfHandler::HandleFormatSpecifier(const analyze_printf::FormatSpecifier
+CheckPrintfHandler::HandlePrintfSpecifier(const analyze_printf::PrintfSpecifier
&FS,
const char *startSpecifier,
unsigned specifierLen) {
else if (usesPositionalArgs != FS.usesPositionalArg()) {
// Cannot mix-and-match positional and non-positional arguments.
S.Diag(getLocationOfByte(CS.getStart()),
- diag::warn_printf_mix_positional_nonpositional_args)
- << getFormatSpecifierRange(startSpecifier, specifierLen);
+ diag::warn_format_mix_positional_nonpositional_args)
+ << getSpecifierRange(startSpecifier, specifierLen);
return false;
}
// Check for using an Objective-C specific conversion specifier
// in a non-ObjC literal.
if (!IsObjCLiteral && CS.isObjCArg()) {
- return HandleInvalidConversionSpecifier(FS, startSpecifier, specifierLen);
+ return HandleInvalidPrintfConversionSpecifier(FS, startSpecifier,
+ specifierLen);
}
// Check for invalid use of field width
S.Diag(getLocationOfByte(LM.getStart()),
diag::warn_printf_nonsensical_length)
<< LM.toString() << CS.toString()
- << getFormatSpecifierRange(startSpecifier, specifierLen)
- << FixItHint::CreateRemoval(getFormatSpecifierRange(LM.getStart(),
+ << getSpecifierRange(startSpecifier, specifierLen)
+ << FixItHint::CreateRemoval(getSpecifierRange(LM.getStart(),
LM.getLength()));
// Are we using '%n'?
if (CS.getKind() == ConversionSpecifier::OutIntPtrArg) {
// Issue a warning about this being a possible security issue.
S.Diag(getLocationOfByte(CS.getStart()), diag::warn_printf_write_back)
- << getFormatSpecifierRange(startSpecifier, specifierLen);
+ << getSpecifierRange(startSpecifier, specifierLen);
// Continue checking the other format specifiers.
return true;
}
S.Diag(getLocationOfByte(CS.getStart()),
diag::warn_printf_positional_arg_exceeds_data_args)
<< (argIndex+1) << NumDataArgs
- << getFormatSpecifierRange(startSpecifier, specifierLen);
+ << getSpecifierRange(startSpecifier, specifierLen);
}
else {
S.Diag(getLocationOfByte(CS.getStart()),
diag::warn_printf_insufficient_data_args)
- << getFormatSpecifierRange(startSpecifier, specifierLen);
+ << getSpecifierRange(startSpecifier, specifierLen);
}
// Don't do any more checking.
return true;
// We may be able to offer a FixItHint if it is a supported type.
- FormatSpecifier fixedFS = FS;
+ PrintfSpecifier fixedFS = FS;
bool success = fixedFS.fixType(Ex->getType());
if (success) {
S.Diag(getLocationOfByte(CS.getStart()),
diag::warn_printf_conversion_argument_type_mismatch)
<< ATR.getRepresentativeType(S.Context) << Ex->getType()
- << getFormatSpecifierRange(startSpecifier, specifierLen)
+ << getSpecifierRange(startSpecifier, specifierLen)
<< Ex->getSourceRange()
<< FixItHint::CreateReplacement(
- getFormatSpecifierRange(startSpecifier, specifierLen),
+ getSpecifierRange(startSpecifier, specifierLen),
os.str());
}
else {
S.Diag(getLocationOfByte(CS.getStart()),
diag::warn_printf_conversion_argument_type_mismatch)
<< ATR.getRepresentativeType(S.Context) << Ex->getType()
- << getFormatSpecifierRange(startSpecifier, specifierLen)
+ << getSpecifierRange(startSpecifier, specifierLen)
<< Ex->getSourceRange();
}
}
return true;
}
-void CheckPrintfHandler::DoneProcessing() {
- // Does the number of data arguments exceed the number of
- // format conversions in the format string?
- if (!HasVAListArg) {
- // Find any arguments that weren't covered.
- CoveredArgs.flip();
- signed notCoveredArg = CoveredArgs.find_first();
- if (notCoveredArg >= 0) {
- assert((unsigned)notCoveredArg < NumDataArgs);
- S.Diag(getDataArg((unsigned) notCoveredArg)->getLocStart(),
- diag::warn_printf_data_arg_not_used)
- << getFormatStringRange();
+//===--- CHECK: Scanf format string checking ------------------------------===//
+
+namespace {
+class CheckScanfHandler : public CheckFormatHandler {
+public:
+ CheckScanfHandler(Sema &s, const StringLiteral *fexpr,
+ const Expr *origFormatExpr, unsigned firstDataArg,
+ unsigned numDataArgs, bool isObjCLiteral,
+ const char *beg, bool hasVAListArg,
+ const CallExpr *theCall, unsigned formatIdx)
+ : CheckFormatHandler(s, fexpr, origFormatExpr, firstDataArg,
+ numDataArgs, isObjCLiteral, beg, hasVAListArg,
+ theCall, formatIdx) {}
+
+ bool HandleScanfSpecifier(const analyze_scanf::ScanfSpecifier &FS,
+ const char *startSpecifier,
+ unsigned specifierLen);
+};
+}
+
+bool CheckScanfHandler::HandleScanfSpecifier(
+ const analyze_scanf::ScanfSpecifier &FS,
+ const char *startSpecifier,
+ unsigned specifierLen) {
+
+ using namespace analyze_scanf;
+ using namespace analyze_format_string;
+
+ const ConversionSpecifier &CS = FS.getConversionSpecifier();
+
+ // FIXME: Handle case where '%' and '*' don't consume an argument.
+ // This needs to be done for the printf case as well.
+ if (atFirstArg) {
+ atFirstArg = false;
+ usesPositionalArgs = FS.usesPositionalArg();
+ }
+ else if (usesPositionalArgs != FS.usesPositionalArg()) {
+ // Cannot mix-and-match positional and non-positional arguments.
+ S.Diag(getLocationOfByte(CS.getStart()),
+ diag::warn_format_mix_positional_nonpositional_args)
+ << getSpecifierRange(startSpecifier, specifierLen);
+ return false;
+ }
+
+ // Check if the field with is non-zero.
+ const OptionalAmount &Amt = FS.getFieldWidth();
+ if (Amt.getHowSpecified() == OptionalAmount::Constant) {
+ if (Amt.getConstantAmount() == 0) {
+ const CharSourceRange &R = getSpecifierRange(Amt.getStart(),
+ Amt.getConstantLength());
+ S.Diag(getLocationOfByte(Amt.getStart()),
+ diag::warn_scanf_nonzero_width)
+ << R << FixItHint::CreateRemoval(R);
}
}
+
+ if (!FS.consumesDataArgument()) {
+ // FIXME: Technically specifying a precision or field width here
+ // makes no sense. Worth issuing a warning at some point.
+ return true;
+ }
+
+ // Consume the argument.
+ unsigned argIndex = FS.getArgIndex();
+ if (argIndex < NumDataArgs) {
+ // The check to see if the argIndex is valid will come later.
+ // We set the bit here because we may exit early from this
+ // function if we encounter some other error.
+ CoveredArgs.set(argIndex);
+ }
+
+ // FIXME: Check that the length modifier is valid with the given
+ // conversion specifier.
+
+ // The remaining checks depend on the data arguments.
+ if (HasVAListArg)
+ return true;
+
+ if (argIndex >= NumDataArgs) {
+ if (FS.usesPositionalArg()) {
+ S.Diag(getLocationOfByte(CS.getStart()),
+ diag::warn_printf_positional_arg_exceeds_data_args)
+ << (argIndex+1) << NumDataArgs
+ << getSpecifierRange(startSpecifier, specifierLen);
+ }
+ else {
+ S.Diag(getLocationOfByte(CS.getStart()),
+ diag::warn_printf_insufficient_data_args)
+ << getSpecifierRange(startSpecifier, specifierLen);
+ }
+
+ // Don't do any more checking.
+ return false;
+ }
+
+ // FIXME: Check that the argument type matches the format specifier.
+
+ return true;
}
-void Sema::CheckPrintfString(const StringLiteral *FExpr,
+void Sema::CheckFormatString(const StringLiteral *FExpr,
const Expr *OrigFormatExpr,
const CallExpr *TheCall, bool HasVAListArg,
- unsigned format_idx, unsigned firstDataArg) {
-
+ unsigned format_idx, unsigned firstDataArg,
+ bool isPrintf) {
+
// CHECK: is the format string a wide literal?
if (FExpr->isWide()) {
Diag(FExpr->getLocStart(),
- diag::warn_printf_format_string_is_wide_literal)
+ diag::warn_format_string_is_wide_literal)
<< OrigFormatExpr->getSourceRange();
return;
}
-
+
// Str - The format string. NOTE: this is NOT null-terminated!
const char *Str = FExpr->getStrData();
-
+
// CHECK: empty format string?
unsigned StrLen = FExpr->getByteLength();
-
+
if (StrLen == 0) {
- Diag(FExpr->getLocStart(), diag::warn_printf_empty_format_string)
+ Diag(FExpr->getLocStart(), diag::warn_empty_format_string)
<< OrigFormatExpr->getSourceRange();
return;
}
-
- CheckPrintfHandler H(*this, FExpr, OrigFormatExpr, firstDataArg,
- TheCall->getNumArgs() - firstDataArg,
- isa<ObjCStringLiteral>(OrigFormatExpr), Str,
- HasVAListArg, TheCall, format_idx);
-
- if (!analyze_printf::ParseFormatString(H, Str, Str + StrLen))
- H.DoneProcessing();
+
+ if (isPrintf) {
+ CheckPrintfHandler H(*this, FExpr, OrigFormatExpr, firstDataArg,
+ TheCall->getNumArgs() - firstDataArg,
+ isa<ObjCStringLiteral>(OrigFormatExpr), Str,
+ HasVAListArg, TheCall, format_idx);
+
+ if (!analyze_format_string::ParsePrintfString(H, Str, Str + StrLen))
+ H.DoneProcessing();
+ }
+ else {
+ CheckScanfHandler H(*this, FExpr, OrigFormatExpr, firstDataArg,
+ TheCall->getNumArgs() - firstDataArg,
+ isa<ObjCStringLiteral>(OrigFormatExpr), Str,
+ HasVAListArg, TheCall, format_idx);
+
+ if (!analyze_format_string::ParseScanfString(H, Str, Str + StrLen))
+ H.DoneProcessing();
+ }
}
//===--- CHECK: Return Address of Stack Variable --------------------------===//