From 0ff5074f37a66bca244a9d5d0da050ff68693ce2 Mon Sep 17 00:00:00 2001 From: Hans Wennborg Date: Wed, 15 May 2013 11:03:04 +0000 Subject: [PATCH] Better diagnostics for string initialization. This commit improves Clang's diagnostics for string initialization. Where it would previously say: /tmp/a.c:3:9: error: array initializer must be an initializer list wchar_t s[] = "Hi"; ^ /tmp/a.c:4:6: error: array initializer must be an initializer list or string literal char t[] = L"Hi"; ^ It will now say /tmp/a.c:3:9: error: initializing wide char array with non-wide string literal wchar_t s[] = "Hi"; ^ /tmp/a.c:4:6: error: initializing char array with wide string literal char t[] = L"Hi"; ^ As a bonus, it also fixes the fact that Clang would previously reject this valid C11 code: char16_t s[] = u"hi"; char32_t t[] = U"hi"; because it would only recognize the built-in types for char16_t and char32_t, which do not exist in C. git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@181880 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/clang/Basic/DiagnosticSemaKinds.td | 8 +- include/clang/Sema/Initialization.h | 10 ++ lib/Sema/SemaInit.cpp | 167 ++++++++++++++++----- test/Lexer/char-literal.cpp | 4 - test/Sema/ms-wchar.c | 5 +- test/Sema/string-init.c | 51 +++++++ test/Sema/wchar.c | 4 +- test/SemaCXX/ms-wchar.cpp | 3 + test/SemaCXX/string-init.cpp | 40 +++++ 9 files changed, 248 insertions(+), 44 deletions(-) create mode 100644 test/Sema/string-init.c create mode 100644 test/SemaCXX/string-init.cpp diff --git a/include/clang/Basic/DiagnosticSemaKinds.td b/include/clang/Basic/DiagnosticSemaKinds.td index db4696767a..bac681e871 100644 --- a/include/clang/Basic/DiagnosticSemaKinds.td +++ b/include/clang/Basic/DiagnosticSemaKinds.td @@ -4282,7 +4282,13 @@ def err_typecheck_incomplete_array_needs_initializer : Error< "or an initializer">; def err_array_init_not_init_list : Error< "array initializer must be an initializer " - "list%select{| or string literal}0">; + "list%select{| or string literal| or wide string literal}0">; +def err_array_init_narrow_string_into_wchar : Error< + "initializing wide char array with non-wide string literal">; +def err_array_init_wide_string_into_char : Error< + "initializing char array with wide string literal">; +def err_array_init_incompat_wide_string_into_wchar : Error< + "initializing wide char array with incompatible wide string literal">; def err_array_init_different_type : Error< "cannot initialize array %diff{of type $ with array of type $|" "with different type of array}0,1">; diff --git a/include/clang/Sema/Initialization.h b/include/clang/Sema/Initialization.h index 3f70672459..727ceebe8b 100644 --- a/include/clang/Sema/Initialization.h +++ b/include/clang/Sema/Initialization.h @@ -706,6 +706,16 @@ public: /// \brief Array must be initialized with an initializer list or a /// string literal. FK_ArrayNeedsInitListOrStringLiteral, + /// \brief Array must be initialized with an initializer list or a + /// wide string literal. + FK_ArrayNeedsInitListOrWideStringLiteral, + /// \brief Initializing a wide char array with narrow string literal. + FK_NarrowStringIntoWideCharArray, + /// \brief Initializing char array with wide string literal. + FK_WideStringIntoCharArray, + /// \brief Initializing wide char array with incompatible wide string + /// literal. + FK_IncompatWideStringIntoWideChar, /// \brief Array type mismatch. FK_ArrayTypeMismatch, /// \brief Non-constant array initializer diff --git a/lib/Sema/SemaInit.cpp b/lib/Sema/SemaInit.cpp index 7016e565da..a3b78787e4 100644 --- a/lib/Sema/SemaInit.cpp +++ b/lib/Sema/SemaInit.cpp @@ -32,54 +32,99 @@ using namespace clang; // Sema Initialization Checking //===----------------------------------------------------------------------===// -static Expr *IsStringInit(Expr *Init, const ArrayType *AT, - ASTContext &Context) { +/// \brief Check whether T is compatible with a wide character type (wchar_t, +/// char16_t or char32_t). +static bool IsWideCharCompatible(QualType T, ASTContext &Context) { + if (Context.typesAreCompatible(Context.getWideCharType(), T)) + return true; + if (Context.getLangOpts().CPlusPlus || Context.getLangOpts().C11) { + return Context.typesAreCompatible(Context.Char16Ty, T) || + Context.typesAreCompatible(Context.Char32Ty, T); + } + return false; +} + +enum StringInitFailureKind { + SIF_None, + SIF_NarrowStringIntoWideChar, + SIF_WideStringIntoChar, + SIF_IncompatWideStringIntoWideChar, + SIF_Other +}; + +/// \brief Check whether the array of type AT can be initialized by the Init +/// expression by means of string initialization. Returns SIF_None if so, +/// otherwise returns a StringInitFailureKind that describes why the +/// initialization would not work. +static StringInitFailureKind IsStringInit(Expr *Init, const ArrayType *AT, + ASTContext &Context) { if (!isa(AT) && !isa(AT)) - return 0; + return SIF_Other; // See if this is a string literal or @encode. Init = Init->IgnoreParens(); // Handle @encode, which is a narrow string. if (isa(Init) && AT->getElementType()->isCharType()) - return Init; + return SIF_None; // Otherwise we can only handle string literals. StringLiteral *SL = dyn_cast(Init); - if (SL == 0) return 0; + if (SL == 0) + return SIF_Other; - QualType ElemTy = Context.getCanonicalType(AT->getElementType()); + const QualType ElemTy = + Context.getCanonicalType(AT->getElementType()).getUnqualifiedType(); switch (SL->getKind()) { case StringLiteral::Ascii: case StringLiteral::UTF8: // char array can be initialized with a narrow string. // Only allow char x[] = "foo"; not char x[] = L"foo"; - return ElemTy->isCharType() ? Init : 0; + if (ElemTy->isCharType()) + return SIF_None; + if (IsWideCharCompatible(ElemTy, Context)) + return SIF_NarrowStringIntoWideChar; + return SIF_Other; + // C99 6.7.8p15 (with correction from DR343), or C11 6.7.9p15: + // "An array with element type compatible with a qualified or unqualified + // version of wchar_t, char16_t, or char32_t may be initialized by a wide + // string literal with the corresponding encoding prefix (L, u, or U, + // respectively), optionally enclosed in braces. case StringLiteral::UTF16: - return ElemTy->isChar16Type() ? Init : 0; + if (Context.typesAreCompatible(Context.Char16Ty, ElemTy)) + return SIF_None; + if (ElemTy->isCharType()) + return SIF_WideStringIntoChar; + if (IsWideCharCompatible(ElemTy, Context)) + return SIF_IncompatWideStringIntoWideChar; + return SIF_Other; case StringLiteral::UTF32: - return ElemTy->isChar32Type() ? Init : 0; + if (Context.typesAreCompatible(Context.Char32Ty, ElemTy)) + return SIF_None; + if (ElemTy->isCharType()) + return SIF_WideStringIntoChar; + if (IsWideCharCompatible(ElemTy, Context)) + return SIF_IncompatWideStringIntoWideChar; + return SIF_Other; case StringLiteral::Wide: - // wchar_t array can be initialized with a wide string: C99 6.7.8p15 (with - // correction from DR343): "An array with element type compatible with a - // qualified or unqualified version of wchar_t may be initialized by a wide - // string literal, optionally enclosed in braces." - if (Context.typesAreCompatible(Context.getWideCharType(), - ElemTy.getUnqualifiedType())) - return Init; - - return 0; + if (Context.typesAreCompatible(Context.getWideCharType(), ElemTy)) + return SIF_None; + if (ElemTy->isCharType()) + return SIF_WideStringIntoChar; + if (IsWideCharCompatible(ElemTy, Context)) + return SIF_IncompatWideStringIntoWideChar; + return SIF_Other; } llvm_unreachable("missed a StringLiteral kind?"); } -static Expr *IsStringInit(Expr *init, QualType declType, ASTContext &Context) { +static bool IsStringInit(Expr* init, QualType declType, ASTContext& Context) { const ArrayType *arrayType = Context.getAsArrayType(declType); - if (!arrayType) return 0; - - return IsStringInit(init, arrayType, Context); + if (!arrayType) + return false; + return IsStringInit(init, arrayType, Context) == SIF_None; } /// Update the type of a string literal, including any surrounding parentheses, @@ -806,10 +851,10 @@ void InitListChecker::CheckSubElementType(const InitializedEntity &Entity, // array member. There's nothing we can do with the completed // type here, though. - if (Expr *Str = IsStringInit(expr, arrayType, SemaRef.Context)) { + if (IsStringInit(expr, arrayType, SemaRef.Context) == SIF_None) { if (!VerifyOnly) { - CheckStringInit(Str, ElemType, arrayType, SemaRef); - UpdateStructuredListElement(StructuredList, StructuredIndex, Str); + CheckStringInit(expr, ElemType, arrayType, SemaRef); + UpdateStructuredListElement(StructuredList, StructuredIndex, expr); } ++Index; return; @@ -1189,16 +1234,17 @@ void InitListChecker::CheckArrayType(const InitializedEntity &Entity, // Check for the special-case of initializing an array with a string. if (Index < IList->getNumInits()) { - if (Expr *Str = IsStringInit(IList->getInit(Index), arrayType, - SemaRef.Context)) { + if (IsStringInit(IList->getInit(Index), arrayType, SemaRef.Context) == + SIF_None) { // We place the string literal directly into the resulting // initializer list. This is the only place where the structure // of the structured initializer list doesn't match exactly, // because doing so would involve allocating one character // constant for each string. if (!VerifyOnly) { - CheckStringInit(Str, DeclType, arrayType, SemaRef); - UpdateStructuredListElement(StructuredList, StructuredIndex, Str); + CheckStringInit(IList->getInit(Index), DeclType, arrayType, SemaRef); + UpdateStructuredListElement(StructuredList, StructuredIndex, + IList->getInit(Index)); StructuredList->resizeInits(SemaRef.Context, StructuredIndex); } ++Index; @@ -2507,6 +2553,10 @@ bool InitializationSequence::isAmbiguous() const { case FK_TooManyInitsForReference: case FK_ArrayNeedsInitList: case FK_ArrayNeedsInitListOrStringLiteral: + case FK_ArrayNeedsInitListOrWideStringLiteral: + case FK_NarrowStringIntoWideCharArray: + case FK_WideStringIntoCharArray: + case FK_IncompatWideStringIntoWideChar: case FK_AddressOfOverloadFailed: // FIXME: Could do better case FK_NonConstLValueReferenceBindingToTemporary: case FK_NonConstLValueReferenceBindingToUnrelated: @@ -4278,9 +4328,23 @@ InitializationSequence::InitializationSequence(Sema &S, return; } - if (Initializer && IsStringInit(Initializer, DestAT, Context)) { - TryStringLiteralInitialization(S, Entity, Kind, Initializer, *this); - return; + if (Initializer) { + switch (IsStringInit(Initializer, DestAT, Context)) { + case SIF_None: + TryStringLiteralInitialization(S, Entity, Kind, Initializer, *this); + return; + case SIF_NarrowStringIntoWideChar: + SetFailed(FK_NarrowStringIntoWideCharArray); + return; + case SIF_WideStringIntoChar: + SetFailed(FK_WideStringIntoCharArray); + return; + case SIF_IncompatWideStringIntoWideChar: + SetFailed(FK_IncompatWideStringIntoWideChar); + return; + case SIF_Other: + break; + } } // Note: as an GNU C extension, we allow initialization of an @@ -4307,8 +4371,10 @@ InitializationSequence::InitializationSequence(Sema &S, TryListInitialization(S, Entity, Kind, cast(Initializer), *this); AddParenthesizedArrayInitStep(DestType); - } else if (DestAT->getElementType()->isAnyCharacterType()) + } else if (DestAT->getElementType()->isCharType()) SetFailed(FK_ArrayNeedsInitListOrStringLiteral); + else if (IsWideCharCompatible(DestAT->getElementType(), Context)) + SetFailed(FK_ArrayNeedsInitListOrWideStringLiteral); else SetFailed(FK_ArrayNeedsInitList); @@ -5816,11 +5882,24 @@ bool InitializationSequence::Diagnose(Sema &S, break; case FK_ArrayNeedsInitList: + S.Diag(Kind.getLocation(), diag::err_array_init_not_init_list) << 0; + break; case FK_ArrayNeedsInitListOrStringLiteral: - S.Diag(Kind.getLocation(), diag::err_array_init_not_init_list) - << (Failure == FK_ArrayNeedsInitListOrStringLiteral); + S.Diag(Kind.getLocation(), diag::err_array_init_not_init_list) << 1; + break; + case FK_ArrayNeedsInitListOrWideStringLiteral: + S.Diag(Kind.getLocation(), diag::err_array_init_not_init_list) << 2; + break; + case FK_NarrowStringIntoWideCharArray: + S.Diag(Kind.getLocation(), diag::err_array_init_narrow_string_into_wchar); + break; + case FK_WideStringIntoCharArray: + S.Diag(Kind.getLocation(), diag::err_array_init_wide_string_into_char); + break; + case FK_IncompatWideStringIntoWideChar: + S.Diag(Kind.getLocation(), + diag::err_array_init_incompat_wide_string_into_wchar); break; - case FK_ArrayTypeMismatch: case FK_NonConstantArrayInit: S.Diag(Kind.getLocation(), @@ -6192,6 +6271,22 @@ void InitializationSequence::dump(raw_ostream &OS) const { OS << "array requires initializer list or string literal"; break; + case FK_ArrayNeedsInitListOrWideStringLiteral: + OS << "array requires initializer list or wide string literal"; + break; + + case FK_NarrowStringIntoWideCharArray: + OS << "narrow string into wide char array"; + break; + + case FK_WideStringIntoCharArray: + OS << "wide string into char array"; + break; + + case FK_IncompatWideStringIntoWideChar: + OS << "incompatible wide string into wide char array"; + break; + case FK_ArrayTypeMismatch: OS << "array type mismatch"; break; diff --git a/test/Lexer/char-literal.cpp b/test/Lexer/char-literal.cpp index b2fab34e44..1cd14a9b01 100644 --- a/test/Lexer/char-literal.cpp +++ b/test/Lexer/char-literal.cpp @@ -36,8 +36,4 @@ char16_t p[2] = u"\U0000FFFF"; char16_t q[2] = u"\U00010000"; #ifdef __cplusplus // expected-error@-2 {{too long}} -#else -// FIXME: The above should be accepted in C11 mode. -// expected-error@-6 {{must be an initializer list}} -// expected-error@-6 {{must be an initializer list}} #endif diff --git a/test/Sema/ms-wchar.c b/test/Sema/ms-wchar.c index 52a736c640..febaf283b3 100644 --- a/test/Sema/ms-wchar.c +++ b/test/Sema/ms-wchar.c @@ -12,4 +12,7 @@ __wchar_t g = L'a'; // expected-note {{previous}} unsigned short g; // expected-error {{redefinition of 'g' with a different type: 'unsigned short' vs '__wchar_t'}} // The type of a wide string literal is actually not __wchar_t. -__wchar_t s[] = L"Hello world!"; // expected-error {{array initializer must be an initializer list}} +__wchar_t s[] = L"Hello world!"; // expected-error-re {{array initializer must be an initializer list$}} + +// Do not suggest initializing with a string here, because it would not work. +__wchar_t t[] = 1; // expected-error-re {{array initializer must be an initializer list$}} diff --git a/test/Sema/string-init.c b/test/Sema/string-init.c new file mode 100644 index 0000000000..96ee360e44 --- /dev/null +++ b/test/Sema/string-init.c @@ -0,0 +1,51 @@ +// RUN: %clang_cc1 -std=c11 -fsyntax-only -triple x86_64-pc-linux -verify %s + +// Note: these match the types specified by the target above. +typedef int wchar_t; +typedef unsigned short char16_t; +typedef unsigned int char32_t; + +void f() { + char a1[] = "a"; // No error. + char a2[] = u8"a"; // No error. + char a3[] = u"a"; // expected-error{{initializing char array with wide string literal}} + char a4[] = U"a"; // expected-error{{initializing char array with wide string literal}} + char a5[] = L"a"; // expected-error{{initializing char array with wide string literal}} + + wchar_t b1[] = "a"; // expected-error{{initializing wide char array with non-wide string literal}} + wchar_t b2[] = u8"a"; // expected-error{{initializing wide char array with non-wide string literal}} + wchar_t b3[] = u"a"; // expected-error{{initializing wide char array with incompatible wide string literal}} + wchar_t b4[] = U"a"; // expected-error{{initializing wide char array with incompatible wide string literal}} + wchar_t b5[] = L"a"; // No error. + + char16_t c1[] = "a"; // expected-error{{initializing wide char array with non-wide string literal}} + char16_t c2[] = u8"a"; // expected-error{{initializing wide char array with non-wide string literal}} + char16_t c3[] = u"a"; // No error. + char16_t c4[] = U"a"; // expected-error{{initializing wide char array with incompatible wide string literal}} + char16_t c5[] = L"a"; // expected-error{{initializing wide char array with incompatible wide string literal}} + + char32_t d1[] = "a"; // expected-error{{initializing wide char array with non-wide string literal}} + char32_t d2[] = u8"a"; // expected-error{{initializing wide char array with non-wide string literal}} + char32_t d3[] = u"a"; // expected-error{{initializing wide char array with incompatible wide string literal}} + char32_t d4[] = U"a"; // No error. + char32_t d5[] = L"a"; // expected-error{{initializing wide char array with incompatible wide string literal}} + + int e1[] = "a"; // expected-error{{initializing wide char array with non-wide string literal}} + int e2[] = u8"a"; // expected-error{{initializing wide char array with non-wide string literal}} + int e3[] = u"a"; // expected-error{{initializing wide char array with incompatible wide string literal}} + int e4[] = U"a"; // expected-error{{initializing wide char array with incompatible wide string literal}} + int e5[] = L"a"; // No error. + + long f1[] = "a"; // expected-error{{array initializer must be an initializer list}} + long f2[] = u8"a"; // expected-error{{array initializer must be an initializer list}}} + long f3[] = u"a"; // expected-error{{array initializer must be an initializer list}} + long f4[] = U"a"; // expected-error{{array initializer must be an initializer list}} + long f5[] = L"a"; // expected-error{{array initializer must be an initializer list}} +} + +void g() { + char a[] = 1; // expected-error{{array initializer must be an initializer list or string literal}} + wchar_t b[] = 1; // expected-error{{array initializer must be an initializer list or wide string literal}} + char16_t c[] = 1; // expected-error{{array initializer must be an initializer list or wide string literal}} + char32_t d[] = 1; // expected-error{{array initializer must be an initializer list or wide string literal}} +} diff --git a/test/Sema/wchar.c b/test/Sema/wchar.c index 816245f3c0..13c2f5855d 100644 --- a/test/Sema/wchar.c +++ b/test/Sema/wchar.c @@ -19,6 +19,6 @@ int check_wchar_size[sizeof(*L"") == sizeof(wchar_t) ? 1 : -1]; void foo() { WCHAR_T_TYPE t1[] = L"x"; wchar_t tab[] = L"x"; - WCHAR_T_TYPE t2[] = "x"; // expected-error {{initializer}} - char t3[] = L"x"; // expected-error {{initializer}} + WCHAR_T_TYPE t2[] = "x"; // expected-error {{initializing wide char array with non-wide string literal}} + char t3[] = L"x"; // expected-error {{initializing char array with wide string literal}} } diff --git a/test/SemaCXX/ms-wchar.cpp b/test/SemaCXX/ms-wchar.cpp index 2cbf745d33..878d8cadce 100644 --- a/test/SemaCXX/ms-wchar.cpp +++ b/test/SemaCXX/ms-wchar.cpp @@ -7,3 +7,6 @@ __wchar_t g = L'a'; __wchar_t s[] = L"Hello world!"; unsigned short t[] = L"Hello world!"; // expected-error{{array initializer must be an initializer list}} + +wchar_t u[] = 1; // expected-error{{array initializer must be an initializer list or wide string literal}} +__wchar_t v[] = 1; // expected-error{{array initializer must be an initializer list or wide string literal}} diff --git a/test/SemaCXX/string-init.cpp b/test/SemaCXX/string-init.cpp new file mode 100644 index 0000000000..7e62d1855a --- /dev/null +++ b/test/SemaCXX/string-init.cpp @@ -0,0 +1,40 @@ +// RUN: %clang_cc1 -std=c++11 -fsyntax-only -verify %s + +void f() { + char a1[] = "a"; // No error. + char a2[] = u8"a"; // No error. + char a3[] = u"a"; // expected-error{{initializing char array with wide string literal}} + char a4[] = U"a"; // expected-error{{initializing char array with wide string literal}} + char a5[] = L"a"; // expected-error{{initializing char array with wide string literal}} + + wchar_t b1[] = "a"; // expected-error{{initializing wide char array with non-wide string literal}} + wchar_t b2[] = u8"a"; // expected-error{{initializing wide char array with non-wide string literal}} + wchar_t b3[] = u"a"; // expected-error{{initializing wide char array with incompatible wide string literal}} + wchar_t b4[] = U"a"; // expected-error{{initializing wide char array with incompatible wide string literal}} + wchar_t b5[] = L"a"; // No error. + + char16_t c1[] = "a"; // expected-error{{initializing wide char array with non-wide string literal}} + char16_t c2[] = u8"a"; // expected-error{{initializing wide char array with non-wide string literal}} + char16_t c3[] = u"a"; // No error. + char16_t c4[] = U"a"; // expected-error{{initializing wide char array with incompatible wide string literal}} + char16_t c5[] = L"a"; // expected-error{{initializing wide char array with incompatible wide string literal}} + + char32_t d1[] = "a"; // expected-error{{initializing wide char array with non-wide string literal}} + char32_t d2[] = u8"a"; // expected-error{{initializing wide char array with non-wide string literal}} + char32_t d3[] = u"a"; // expected-error{{initializing wide char array with incompatible wide string literal}} + char32_t d4[] = U"a"; // No error. + char32_t d5[] = L"a"; // expected-error{{initializing wide char array with incompatible wide string literal}} + + int e1[] = "a"; // expected-error{{array initializer must be an initializer list}} + int e2[] = u8"a"; // expected-error{{array initializer must be an initializer list}} + int e3[] = u"a"; // expected-error{{array initializer must be an initializer list}} + int e4[] = U"a"; // expected-error{{array initializer must be an initializer list}} + int e5[] = L"a"; // expected-error{{array initializer must be an initializer list}} +} + +void g() { + char a[] = 1; // expected-error{{array initializer must be an initializer list or string literal}} + wchar_t b[] = 1; // expected-error{{array initializer must be an initializer list or wide string literal}} + char16_t c[] = 1; // expected-error{{array initializer must be an initializer list or wide string literal}} + char32_t d[] = 1; // expected-error{{array initializer must be an initializer list or wide string literal}} +} -- 2.40.0