public:
enum HowSpecified { NotSpecified, Constant, Arg };
- OptionalAmount(HowSpecified h, const char *st)
- : start(st), hs(h), amt(0) {}
+ OptionalAmount(HowSpecified h, unsigned i, const char *st)
+ : start(st), hs(h), amt(i) {}
OptionalAmount()
: start(0), hs(NotSpecified), amt(0) {}
- OptionalAmount(unsigned i, const char *st)
- : start(st), hs(Constant), amt(i) {}
-
HowSpecified getHowSpecified() const { return hs; }
+
bool hasDataArgument() const { return hs == Arg; }
+ unsigned getArgIndex() const {
+ assert(hasDataArgument());
+ return amt;
+ }
+
unsigned getConstantAmount() const {
assert(hs == Constant);
return amt;
unsigned HasSpacePrefix : 1;
unsigned HasAlternativeForm : 1;
unsigned HasLeadingZeroes : 1;
- unsigned flags : 5;
+ unsigned argIndex;
ConversionSpecifier CS;
OptionalAmount FieldWidth;
OptionalAmount Precision;
public:
FormatSpecifier() : LM(None),
IsLeftJustified(0), HasPlusPrefix(0), HasSpacePrefix(0),
- HasAlternativeForm(0), HasLeadingZeroes(0) {}
+ HasAlternativeForm(0), HasLeadingZeroes(0), argIndex(0) {}
static FormatSpecifier Parse(const char *beg, const char *end);
void setHasAlternativeForm() { HasAlternativeForm = 1; }
void setHasLeadingZeros() { HasLeadingZeroes = 1; }
+ void setArgIndex(unsigned i) {
+ assert(CS.consumesDataArgument());
+ argIndex = i;
+ }
+
+ unsigned getArgIndex() const {
+ assert(CS.consumesDataArgument());
+ return argIndex;
+ }
+
// Methods for querying the format specifier.
const ConversionSpecifier &getConversionSpecifier() const {
virtual void HandleNullChar(const char *nullCharacter) {}
- virtual void
+ virtual bool
HandleInvalidConversionSpecifier(const analyze_printf::FormatSpecifier &FS,
const char *startSpecifier,
- unsigned specifierLen) {}
+ unsigned specifierLen) { return true; }
virtual bool HandleFormatSpecifier(const analyze_printf::FormatSpecifier &FS,
const char *startSpecifier,
// Methods for parsing format strings.
//===----------------------------------------------------------------------===//
-static OptionalAmount ParseAmount(const char *&Beg, const char *E) {
+static OptionalAmount ParseAmount(const char *&Beg, const char *E,
+ unsigned &argIndex) {
const char *I = Beg;
UpdateOnReturn <const char*> UpdateBeg(Beg, I);
}
if (foundDigits)
- return OptionalAmount(accumulator, Beg);
+ return OptionalAmount(OptionalAmount::Constant, accumulator, Beg);
if (c == '*') {
++I;
- return OptionalAmount(OptionalAmount::Arg, Beg);
+ return OptionalAmount(OptionalAmount::Arg, argIndex++, Beg);
}
break;
static FormatSpecifierResult ParseFormatSpecifier(FormatStringHandler &H,
const char *&Beg,
- const char *E) {
+ const char *E,
+ unsigned &argIndex) {
using namespace clang::analyze_printf;
}
// Look for the field width (if any).
- FS.setFieldWidth(ParseAmount(I, E));
+ FS.setFieldWidth(ParseAmount(I, E, argIndex));
if (I == E) {
// No more characters left?
return true;
}
- FS.setPrecision(ParseAmount(I, E));
+ FS.setPrecision(ParseAmount(I, E, argIndex));
if (I == E) {
// No more characters left?
// Glibc specific.
case 'm': k = ConversionSpecifier::PrintErrno; break;
}
- FS.setConversionSpecifier(ConversionSpecifier(conversionPosition, k));
+ ConversionSpecifier CS(conversionPosition, k);
+ FS.setConversionSpecifier(CS);
+ if (CS.consumesDataArgument())
+ FS.setArgIndex(argIndex++);
if (k == ConversionSpecifier::InvalidSpecifier) {
- H.HandleInvalidConversionSpecifier(FS, Beg, I - Beg);
- return false; // Keep processing format specifiers.
+ // Assume the conversion takes one argument.
+ return !H.HandleInvalidConversionSpecifier(FS, Beg, I - Beg);
}
return FormatSpecifierResult(Start, FS);
}
bool clang::analyze_printf::ParseFormatString(FormatStringHandler &H,
const char *I, const char *E) {
+
+ unsigned argIndex = 0;
+
// Keep looking for a format specifier until we have exhausted the string.
while (I != E) {
- const FormatSpecifierResult &FSR = ParseFormatSpecifier(H, I, E);
+ const FormatSpecifierResult &FSR = ParseFormatSpecifier(H, I, E, argIndex);
// Did a fail-stop error of any kind occur when parsing the specifier?
// If so, don't do any more processing.
if (FSR.shouldStop())
return Ctx.LongDoubleTy;
return Ctx.DoubleTy;
}
-
+
switch (CS.getKind()) {
case ConversionSpecifier::CStrArg:
return ArgTypeResult(LM == AsWideChar ? ArgTypeResult::WCStrTy : ArgTypeResult::CStrTy);
// FIXME: This appears to be Mac OS X specific.
return ArgTypeResult::WCStrTy;
case ConversionSpecifier::CArg:
- return Ctx.WCharTy;
+ return Ctx.WCharTy;
default:
break;
}
-
+
// FIXME: Handle other cases.
return ArgTypeResult();
}
/// FormatGuard: Automatic Protection From printf Format String
/// Vulnerabilities, Proceedings of the 10th USENIX Security Symposium, 2001.
///
+/// TODO:
/// Functionality implemented:
///
/// We can statically check the following properties for string
/// data arguments?
///
/// (2) Does each format conversion correctly match the type of the
-/// corresponding data argument? (TODO)
+/// corresponding data argument?
///
/// Moreover, for all printf functions we can:
///
///
/// All of these checks can be done by parsing the format string.
///
-/// For now, we ONLY do (1), (3), (5), (6), (7), and (8).
void
Sema::CheckPrintfArguments(const CallExpr *TheCall, bool HasVAListArg,
unsigned format_idx, unsigned firstDataArg) {
Sema &S;
const StringLiteral *FExpr;
const Expr *OrigFormatExpr;
- unsigned NumConversions;
const unsigned NumDataArgs;
const bool IsObjCLiteral;
const char *Beg; // Start of format string.
const bool HasVAListArg;
const CallExpr *TheCall;
unsigned FormatIdx;
+ llvm::BitVector CoveredArgs;
public:
CheckPrintfHandler(Sema &s, const StringLiteral *fexpr,
const Expr *origFormatExpr,
const char *beg, bool hasVAListArg,
const CallExpr *theCall, unsigned formatIdx)
: S(s), FExpr(fexpr), OrigFormatExpr(origFormatExpr),
- NumConversions(0), NumDataArgs(numDataArgs),
+ NumDataArgs(numDataArgs),
IsObjCLiteral(isObjCLiteral), Beg(beg),
HasVAListArg(hasVAListArg),
- TheCall(theCall), FormatIdx(formatIdx) {}
+ TheCall(theCall), FormatIdx(formatIdx) {
+ CoveredArgs.resize(numDataArgs);
+ CoveredArgs.reset();
+ }
void DoneProcessing();
void HandleIncompleteFormatSpecifier(const char *startSpecifier,
unsigned specifierLen);
- void
+ bool
HandleInvalidConversionSpecifier(const analyze_printf::FormatSpecifier &FS,
const char *startSpecifier,
unsigned specifierLen);
<< getFormatSpecifierRange(startSpecifier, specifierLen);
}
-void CheckPrintfHandler::
+bool CheckPrintfHandler::
HandleInvalidConversionSpecifier(const analyze_printf::FormatSpecifier &FS,
const char *startSpecifier,
unsigned specifierLen) {
- ++NumConversions;
+ unsigned argIndex = FS.getArgIndex();
+ bool keepGoing = true;
+ if (argIndex < NumDataArgs) {
+ // Consider the argument coverered, even though the specifier doesn't
+ // make sense.
+ CoveredArgs.set(argIndex);
+ }
+ else {
+ // If argIndex exceeds the number of data arguments we
+ // don't issue a warning because that is just a cascade of warnings (and
+ // they may have intended '%%' anyway). We don't want to continue processing
+ // the format string after this point, however, as we will like just get
+ // gibberish when trying to match arguments.
+ keepGoing = false;
+ }
+
const analyze_printf::ConversionSpecifier &CS =
FS.getConversionSpecifier();
SourceLocation Loc = getLocationOfByte(CS.getStart());
S.Diag(Loc, diag::warn_printf_invalid_conversion)
<< llvm::StringRef(CS.getStart(), CS.getLength())
<< getFormatSpecifierRange(startSpecifier, specifierLen);
+
+ return keepGoing;
}
void CheckPrintfHandler::HandleNullChar(const char *nullCharacter) {
}
const Expr *CheckPrintfHandler::getDataArg(unsigned i) const {
- return TheCall->getArg(FormatIdx + i);
+ return TheCall->getArg(FormatIdx + i + 1);
}
unsigned specifierLen) {
if (Amt.hasDataArgument()) {
- ++NumConversions;
if (!HasVAListArg) {
- if (NumConversions > NumDataArgs) {
+ unsigned argIndex = Amt.getArgIndex();
+ if (argIndex >= NumDataArgs) {
S.Diag(getLocationOfByte(Amt.getStart()), MissingArgDiag)
<< getFormatSpecifierRange(startSpecifier, specifierLen);
// Don't do any more checking. We will just emit
// Although not in conformance with C99, we also allow the argument to be
// an 'unsigned int' as that is a reasonably safe case. GCC also
// doesn't emit a warning for that case.
- const Expr *Arg = getDataArg(NumConversions);
+ CoveredArgs.set(argIndex);
+ const Expr *Arg = getDataArg(argIndex);
QualType T = Arg->getType();
const analyze_printf::ArgTypeResult &ATR = Amt.getArgType(S.Context);
return false;
}
- // Check for using an Objective-C specific conversion specifier
- // in a non-ObjC literal.
- if (!IsObjCLiteral && CS.isObjCArg()) {
- HandleInvalidConversionSpecifier(FS, startSpecifier, specifierLen);
-
- // Continue checking the other format specifiers.
- return true;
- }
-
if (!CS.consumesDataArgument()) {
// FIXME: Technically specifying a precision or field width here
// makes no sense. Worth issuing a warning at some point.
return true;
}
- ++NumConversions;
+ // Consume the argument.
+ unsigned argIndex = FS.getArgIndex();
+ CoveredArgs.set(argIndex);
+
+ // Check for using an Objective-C specific conversion specifier
+ // in a non-ObjC literal.
+ if (!IsObjCLiteral && CS.isObjCArg()) {
+ return HandleInvalidConversionSpecifier(FS, startSpecifier, specifierLen);
+ }
// Are we using '%n'? Issue a warning about this being
// a possible security issue.
if (HasVAListArg)
return true;
- if (NumConversions > NumDataArgs) {
+ if (argIndex >= NumDataArgs) {
S.Diag(getLocationOfByte(CS.getStart()),
diag::warn_printf_insufficient_data_args)
<< getFormatSpecifierRange(startSpecifier, specifierLen);
// Now type check the data expression that matches the
// format specifier.
- const Expr *Ex = getDataArg(NumConversions);
+ const Expr *Ex = getDataArg(argIndex);
const analyze_printf::ArgTypeResult &ATR = FS.getArgType(S.Context);
if (ATR.isValid() && !ATR.matchesType(S.Context, Ex->getType())) {
// Check if we didn't match because of an implicit cast from a 'char'
void CheckPrintfHandler::DoneProcessing() {
// Does the number of data arguments exceed the number of
// format conversions in the format string?
- if (!HasVAListArg && NumConversions < NumDataArgs)
- S.Diag(getDataArg(NumConversions+1)->getLocStart(),
- diag::warn_printf_too_many_data_args)
- << getFormatStringRange();
+ if (!HasVAListArg) {
+ // Find any arguments that weren't covered.
+ CoveredArgs.flip();
+ signed notCoveredArg = CoveredArgs.find_first();
+ if (notCoveredArg >= 0) {
+ assert((unsigned)notCoveredArg < NumDataArgs);
+ S.Diag(getDataArg((unsigned) notCoveredArg)->getLocStart(),
+ diag::warn_printf_data_arg_not_used)
+ << getFormatStringRange();
+ }
+ }
}
void Sema::CheckPrintfString(const StringLiteral *FExpr,