unsigned getLength() const {
return EndScanList ? EndScanList - Position : 1;
}
+ void setEndScanList(const char *pos) { EndScanList = pos; }
bool isIntArg() const { return (kind >= IntArgBeg && kind <= IntArgEnd) ||
kind == FreeBSDrArg || kind == FreeBSDyArg; }
bool isObjCArg() const { return kind >= ObjCBeg && kind <= ObjCEnd; }
bool isDoubleArg() const { return kind >= DoubleArgBeg &&
kind <= DoubleArgEnd; }
- unsigned getLength() const {
- // Conversion specifiers currently only are represented by
- // single characters, but we be flexible.
- return 1;
- }
static bool classof(const analyze_format_string::ConversionSpecifier *CS) {
return CS->isPrintfKind();
ScanfConversionSpecifier(const char *pos, Kind k)
: ConversionSpecifier(false, pos, k) {}
- void setEndScanList(const char *pos) { EndScanList = pos; }
-
static bool classof(const analyze_format_string::ConversionSpecifier *CS) {
return !CS->isPrintfKind();
}
#include "FormatStringParsing.h"
#include "clang/Basic/LangOptions.h"
#include "clang/Basic/TargetInfo.h"
+#include "llvm/Support/ConvertUTF.h"
using clang::analyze_format_string::ArgType;
using clang::analyze_format_string::FormatStringHandler;
return true;
}
+bool clang::analyze_format_string::ParseUTF8InvalidSpecifier(
+ const char *SpecifierBegin, const char *FmtStrEnd, unsigned &Len) {
+ if (SpecifierBegin + 1 >= FmtStrEnd)
+ return false;
+
+ const UTF8 *SB = reinterpret_cast<const UTF8 *>(SpecifierBegin + 1);
+ const UTF8 *SE = reinterpret_cast<const UTF8 *>(FmtStrEnd);
+ const char FirstByte = *SB;
+
+ // If the invalid specifier is a multibyte UTF-8 string, return the
+ // total length accordingly so that the conversion specifier can be
+ // properly updated to reflect a complete UTF-8 specifier.
+ unsigned NumBytes = getNumBytesForUTF8(FirstByte);
+ if (NumBytes == 1)
+ return false;
+ if (SB + NumBytes > SE)
+ return false;
+
+ Len = NumBytes + 1;
+ return true;
+}
+
//===----------------------------------------------------------------------===//
// Methods on ArgType.
//===----------------------------------------------------------------------===//
/// FormatSpecifier& argument, and false otherwise.
bool ParseLengthModifier(FormatSpecifier &FS, const char *&Beg, const char *E,
const LangOptions &LO, bool IsScanf = false);
-
+
+/// Returns true if the invalid specifier in \p SpecifierBegin is a UTF-8
+/// string; check that it won't go further than \p FmtStrEnd and write
+/// up the total size in \p Len.
+bool ParseUTF8InvalidSpecifier(const char *SpecifierBegin,
+ const char *FmtStrEnd, unsigned &Len);
+
template <typename T> class SpecifierResult {
T FS;
const char *Start;
argIndex++;
if (k == ConversionSpecifier::InvalidSpecifier) {
+ unsigned Len = I - Start;
+ if (ParseUTF8InvalidSpecifier(Start, E, Len)) {
+ CS.setEndScanList(Start + Len);
+ FS.setConversionSpecifier(CS);
+ }
// Assume the conversion takes one argument.
- return !H.HandleInvalidPrintfConversionSpecifier(FS, Start, I - Start);
+ return !H.HandleInvalidPrintfConversionSpecifier(FS, Start, Len);
}
return PrintfSpecifierResult(Start, FS);
}
unsigned &argIndex,
const LangOptions &LO,
const TargetInfo &Target) {
-
+ using namespace clang::analyze_format_string;
using namespace clang::analyze_scanf;
const char *I = Beg;
const char *Start = nullptr;
// FIXME: '%' and '*' doesn't make sense. Issue a warning.
// FIXME: 'ConsumedSoFar' and '*' doesn't make sense.
-
+
if (k == ScanfConversionSpecifier::InvalidSpecifier) {
+ unsigned Len = I - Beg;
+ if (ParseUTF8InvalidSpecifier(Beg, E, Len)) {
+ CS.setEndScanList(Beg + Len);
+ FS.setConversionSpecifier(CS);
+ }
// Assume the conversion takes one argument.
- return !H.HandleInvalidScanfConversionSpecifier(FS, Beg, I - Beg);
+ return !H.HandleInvalidScanfConversionSpecifier(FS, Beg, Len);
}
return ScanfSpecifierResult(Start, FS);
}
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallBitVector.h"
#include "llvm/ADT/SmallString.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/Locale.h"
#include "llvm/Support/ConvertUTF.h"
#include "llvm/Support/raw_ostream.h"
#include <limits>
// gibberish when trying to match arguments.
keepGoing = false;
}
-
- EmitFormatDiagnostic(S.PDiag(diag::warn_format_invalid_conversion)
- << StringRef(csStart, csLen),
- Loc, /*IsStringLocation*/true,
- getSpecifierRange(startSpec, specifierLen));
-
+
+ StringRef Specifier(csStart, csLen);
+
+ // If the specifier in non-printable, it could be the first byte of a UTF-8
+ // sequence. In that case, print the UTF-8 code point. If not, print the byte
+ // hex value.
+ std::string CodePointStr;
+ if (!llvm::sys::locale::isPrint(*csStart)) {
+ UTF32 CodePoint;
+ const UTF8 **B = reinterpret_cast<const UTF8 **>(&csStart);
+ const UTF8 *E =
+ reinterpret_cast<const UTF8 *>(csStart + csLen);
+ ConversionResult Result =
+ llvm::convertUTF8Sequence(B, E, &CodePoint, strictConversion);
+
+ if (Result != conversionOK) {
+ unsigned char FirstChar = *csStart;
+ CodePoint = (UTF32)FirstChar;
+ }
+
+ llvm::raw_string_ostream OS(CodePointStr);
+ if (CodePoint < 256)
+ OS << "\\x" << llvm::format("%02x", CodePoint);
+ else if (CodePoint <= 0xFFFF)
+ OS << "\\u" << llvm::format("%04x", CodePoint);
+ else
+ OS << "\\U" << llvm::format("%08x", CodePoint);
+ OS.flush();
+ Specifier = CodePointStr;
+ }
+
+ EmitFormatDiagnostic(
+ S.PDiag(diag::warn_format_invalid_conversion) << Specifier, Loc,
+ /*IsStringLocation*/ true, getSpecifierRange(startSpec, specifierLen));
+
return keepGoing;
}
scanf(i ? "%d" : "%d", i, s); // expected-warning{{data argument not used}}
scanf(i ? "%s" : "%d", s); // expected-warning{{format specifies type 'int *'}}
}
+
+void testInvalidNoPrintable(int *a) {
+ scanf("%\u25B9", a); // expected-warning {{invalid conversion specifier '\u25b9'}}
+ scanf("%\xE2\x96\xB9", a); // expected-warning {{invalid conversion specifier '\u25b9'}}
+ scanf("%\U00010348", a); // expected-warning {{invalid conversion specifier '\U00010348'}}
+ scanf("%\xF0\x90\x8D\x88", a); // expected-warning {{invalid conversion specifier '\U00010348'}}
+ scanf("%\xe2", a); // expected-warning {{invalid conversion specifier '\xe2'}}
+}
printf("%n", (cip_t)0); // expected-warning{{format specifies type 'int *' but the argument has type 'cip_t' (aka 'const int *')}}
}
+void testInvalidNoPrintable() {
+ printf("%\u25B9"); // expected-warning {{invalid conversion specifier '\u25b9'}}
+ printf("%\xE2\x96\xB9"); // expected-warning {{invalid conversion specifier '\u25b9'}}
+ printf("%\U00010348"); // expected-warning {{invalid conversion specifier '\U00010348'}}
+ printf("%\xF0\x90\x8D\x88"); // expected-warning {{invalid conversion specifier '\U00010348'}}
+ printf("%\xe2"); // expected-warning {{invalid conversion specifier '\xe2'}}
+}
+
#pragma GCC diagnostic ignored "-Wformat-nonliteral"
#pragma GCC diagnostic warning "-Wformat-security"
// <rdar://problem/14178260>
NSLog(@"%2$[tt]@ %1$[tt]s", @"Foo", @"Bar"); // expected-warning {{object format flags cannot be used with 's' conversion specifier}}
}
+// Test Objective-C invalid no printable specifiers
+void testObjcInvalidNoPrintable(int *a) {
+ NSLog(@"%\u25B9"); // expected-warning {{invalid conversion specifier '\u25b9'}}
+ NSLog(@"%\xE2\x96\xB9"); // expected-warning {{invalid conversion specifier '\u25b9'}}
+ NSLog(@"%\U00010348"); // expected-warning {{invalid conversion specifier '\U00010348'}}
+ NSLog(@"%\xF0\x90\x8D\x88"); // expected-warning {{invalid conversion specifier '\U00010348'}}
+ NSLog(@"%\xe2"); // expected-warning {{input conversion stopped}} expected-warning {{invalid conversion specifier '\xe2'}}
+}