From 60800081361b0ffc114877b8abbc81cb57b4edf6 Mon Sep 17 00:00:00 2001 From: Chris Lattner Date: Wed, 18 Feb 2009 17:49:48 +0000 Subject: [PATCH] Start improving diagnostics that relate to subcharacters of string literals. First step, handle diagnostics in StringLiteral's that are due to token pasting. For example, we now handle: id str2 = @"foo" "bar" @"baz" " b\0larg"; // expected-warning {{literal contains NUL character}} Correctly: test/SemaObjC/exprs.m:17:15: warning: CFString literal contains NUL character " b\0larg"; // expected-warning {{literal contains NUL character}} ~~~^~~~~~~ There are several other related issues still to be done. git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@64924 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/clang/AST/Expr.h | 1 - lib/Sema/Sema.h | 2 + lib/Sema/SemaChecking.cpp | 97 ++++++++++++++++++++++++++++++++------- test/SemaObjC/exprs.m | 14 +++++- 4 files changed, 95 insertions(+), 19 deletions(-) diff --git a/include/clang/AST/Expr.h b/include/clang/AST/Expr.h index ffdd996ced..f06fe09817 100644 --- a/include/clang/AST/Expr.h +++ b/include/clang/AST/Expr.h @@ -520,7 +520,6 @@ public: typedef const SourceLocation *tokloc_iterator; tokloc_iterator tokloc_begin() const { return TokLocs; } tokloc_iterator tokloc_end() const { return TokLocs+NumConcatenated; } - virtual SourceRange getSourceRange() const { return SourceRange(TokLocs[0], TokLocs[NumConcatenated-1]); diff --git a/lib/Sema/Sema.h b/lib/Sema/Sema.h index a748e5c028..4358051cc5 100644 --- a/lib/Sema/Sema.h +++ b/lib/Sema/Sema.h @@ -1989,6 +1989,8 @@ public: private: Action::OwningExprResult CheckFunctionCall(FunctionDecl *FDecl, CallExpr *TheCall); + SourceLocation getLocationOfStringLiteralByte(const StringLiteral *SL, + unsigned ByteNo) const; bool CheckObjCString(Expr *Arg); bool SemaBuiltinVAStart(CallExpr *TheCall); bool SemaBuiltinUnorderedCompare(CallExpr *TheCall); diff --git a/lib/Sema/SemaChecking.cpp b/lib/Sema/SemaChecking.cpp index db622f6648..f469684e50 100644 --- a/lib/Sema/SemaChecking.cpp +++ b/lib/Sema/SemaChecking.cpp @@ -20,6 +20,71 @@ #include "clang/Lex/Preprocessor.h" using namespace clang; +/// getLocationOfStringLiteralByte - Return a source location that points to the +/// specified byte of the specified string literal. +/// +/// Strings are amazingly complex. They can be formed from multiple tokens and +/// can have escape sequences in them in addition to the usual trigraph and +/// escaped newline business. This routine handles this complexity. +/// +SourceLocation Sema::getLocationOfStringLiteralByte(const StringLiteral *SL, + unsigned ByteNo) const { + assert(!SL->isWide() && "This doesn't work for wide strings yet"); + + // Loop over all of the tokens in this string until we find the one that + // contains the byte we're looking for. + unsigned TokNo = 0; + while (1) { + assert(TokNo < SL->getNumConcatenated() && "Invalid byte number!"); + SourceLocation StrTokLoc = SL->getStrTokenLoc(TokNo); + + // Get the spelling of the string so that we can get the data that makes up + // the string literal, not the identifier for the macro it is potentially + // expanded through. + SourceLocation StrTokSpellingLoc = SourceMgr.getSpellingLoc(StrTokLoc); + + // Re-lex the token to get its length and original spelling. + std::pair LocInfo = + SourceMgr.getDecomposedLoc(StrTokSpellingLoc); + std::pair Buffer = + SourceMgr.getBufferData(LocInfo.first); + const char *StrData = Buffer.first+LocInfo.second; + + // Create a langops struct and enable trigraphs. This is sufficient for + // relexing tokens. + LangOptions LangOpts; + LangOpts.Trigraphs = true; + + // Create a lexer starting at the beginning of this token. + Lexer TheLexer(StrTokSpellingLoc, LangOpts, Buffer.first, StrData, + Buffer.second); + Token TheTok; + TheLexer.LexFromRawLexer(TheTok); + + // The length of the string is the token length minus the two quotes. + unsigned TokNumBytes = TheTok.getLength()-2; + + // If we found the token we're looking for, return the location. + // FIXME: This should consider character escapes! + if (ByteNo < TokNumBytes || + (ByteNo == TokNumBytes && TokNo == SL->getNumConcatenated())) { + // If the original token came from a macro expansion, just return the + // start of the token. We don't want to magically jump to the spelling + // for a diagnostic. We do the above business in case some tokens come + // from a macro expansion but others don't. + if (!StrTokLoc.isFileID()) return StrTokLoc; + + // We advance +1 to step over the '"'. + return PP.AdvanceToTokenCharacter(StrTokLoc, ByteNo+1); + } + + // Move to the next string token. + ++TokNo; + ByteNo -= TokNumBytes; + } +} + + /// CheckFunctionCall - Check a direct function call for various correctness /// and safety properties not strictly enforced by the C type system. Action::OwningExprResult @@ -108,14 +173,14 @@ bool Sema::CheckObjCString(Expr *Arg) { for (unsigned i = 0; i < Length; ++i) { if (!isascii(Data[i])) { - Diag(PP.AdvanceToTokenCharacter(Arg->getLocStart(), i + 1), + Diag(getLocationOfStringLiteralByte(Literal, i), diag::warn_cfstring_literal_contains_non_ascii_character) << Arg->getSourceRange(); break; } if (!Data[i]) { - Diag(PP.AdvanceToTokenCharacter(Arg->getLocStart(), i + 1), + Diag(getLocationOfStringLiteralByte(Literal, i), diag::warn_cfstring_literal_contains_nul_character) << Arg->getSourceRange(); break; @@ -565,7 +630,7 @@ void Sema::CheckPrintfString(StringLiteral *FExpr, Expr *OrigFormatExpr, if (Str[StrIdx] == '\0') { // The string returned by getStrData() is not null-terminated, // so the presence of a null character is likely an error. - Diag(PP.AdvanceToTokenCharacter(FExpr->getLocStart(), StrIdx+1), + Diag(getLocationOfStringLiteralByte(FExpr, StrIdx), diag::warn_printf_format_string_contains_null_char) << OrigFormatExpr->getSourceRange(); return; @@ -587,8 +652,7 @@ void Sema::CheckPrintfString(StringLiteral *FExpr, Expr *OrigFormatExpr, ++numConversions; if (!HasVAListArg && numConversions > numDataArgs) { - SourceLocation Loc = FExpr->getLocStart(); - Loc = PP.AdvanceToTokenCharacter(Loc, StrIdx+1); + SourceLocation Loc = getLocationOfStringLiteralByte(FExpr, StrIdx); if (Str[StrIdx-1] == '.') Diag(Loc, diag::warn_printf_asterisk_precision_missing_arg) @@ -607,8 +671,7 @@ void Sema::CheckPrintfString(StringLiteral *FExpr, Expr *OrigFormatExpr, if (BT->getKind() == BuiltinType::Int) break; - SourceLocation Loc = - PP.AdvanceToTokenCharacter(FExpr->getLocStart(), StrIdx+1); + SourceLocation Loc = getLocationOfStringLiteralByte(FExpr, StrIdx); if (Str[StrIdx-1] == '.') Diag(Loc, diag::warn_printf_asterisk_precision_wrong_type) @@ -655,8 +718,8 @@ void Sema::CheckPrintfString(StringLiteral *FExpr, Expr *OrigFormatExpr, case 'n': { ++numConversions; CurrentState = state_OrdChr; - SourceLocation Loc = PP.AdvanceToTokenCharacter(FExpr->getLocStart(), - LastConversionIdx+1); + SourceLocation Loc = getLocationOfStringLiteralByte(FExpr, + LastConversionIdx); Diag(Loc, diag::warn_printf_write_back)<getSourceRange(); break; @@ -669,8 +732,8 @@ void Sema::CheckPrintfString(StringLiteral *FExpr, Expr *OrigFormatExpr, CurrentState = state_OrdChr; else { // Issue a warning: invalid format conversion. - SourceLocation Loc = PP.AdvanceToTokenCharacter(FExpr->getLocStart(), - LastConversionIdx+1); + SourceLocation Loc = + getLocationOfStringLiteralByte(FExpr, LastConversionIdx); Diag(Loc, diag::warn_printf_invalid_conversion) << std::string(Str+LastConversionIdx, @@ -690,8 +753,8 @@ void Sema::CheckPrintfString(StringLiteral *FExpr, Expr *OrigFormatExpr, CurrentState = state_OrdChr; else { // Issue a warning: invalid format conversion. - SourceLocation Loc = PP.AdvanceToTokenCharacter(FExpr->getLocStart(), - LastConversionIdx+1); + SourceLocation Loc = + getLocationOfStringLiteralByte(FExpr, LastConversionIdx); Diag(Loc, diag::warn_printf_invalid_conversion) << std::string(Str+LastConversionIdx, Str+StrIdx) @@ -713,8 +776,8 @@ void Sema::CheckPrintfString(StringLiteral *FExpr, Expr *OrigFormatExpr, if (CurrentState == state_Conversion) { // Issue a warning: invalid format conversion. - SourceLocation Loc = PP.AdvanceToTokenCharacter(FExpr->getLocStart(), - LastConversionIdx+1); + SourceLocation Loc = + getLocationOfStringLiteralByte(FExpr, LastConversionIdx); Diag(Loc, diag::warn_printf_invalid_conversion) << std::string(Str+LastConversionIdx, @@ -727,8 +790,8 @@ void Sema::CheckPrintfString(StringLiteral *FExpr, Expr *OrigFormatExpr, // CHECK: Does the number of format conversions exceed the number // of data arguments? if (numConversions > numDataArgs) { - SourceLocation Loc = PP.AdvanceToTokenCharacter(FExpr->getLocStart(), - LastConversionIdx); + SourceLocation Loc = + getLocationOfStringLiteralByte(FExpr, LastConversionIdx); Diag(Loc, diag::warn_printf_insufficient_data_args) << OrigFormatExpr->getSourceRange(); diff --git a/test/SemaObjC/exprs.m b/test/SemaObjC/exprs.m index cb7f723f39..3918923409 100644 --- a/test/SemaObjC/exprs.m +++ b/test/SemaObjC/exprs.m @@ -1,7 +1,19 @@ // RUN: clang %s -fsyntax-only -verify // rdar://6597252 -Class foo(Class X) { +Class test1(Class X) { return 1 ? X : X; } + +// rdar://6079877 +void test2() { + id str = @"foo" + "bar\0" // expected-warning {{literal contains NUL character}} + @"baz" " blarg"; + id str2 = @"foo" + "bar" + @"baz" + " b\0larg"; // expected-warning {{literal contains NUL character}} + +} \ No newline at end of file -- 2.40.0