From: Seth Cantrell Date: Wed, 18 Jan 2012 12:27:10 +0000 (+0000) Subject: Add and update tests for character literals X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=7748cbc97ff9c6c3940549d30965a10b47a45ee8;p=clang Add and update tests for character literals git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@148392 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/test/CodeGen/char-literal.c b/test/CodeGen/char-literal.c index 5963ede392..5452392ea5 100644 --- a/test/CodeGen/char-literal.c +++ b/test/CodeGen/char-literal.c @@ -9,11 +9,26 @@ int main() { // CHECK-CPP0X: store i8 97 char a = 'a'; - // Should pick second character. + // Should truncate value (equal to last character). // CHECK-C: store i8 98 // CHECK-CPP0X: store i8 98 char b = 'ab'; + // Should get concatonated characters + // CHECK-C: store i32 24930 + // CHECK-CPP0X: store i32 24930 + int b1 = 'ab'; + + // Should get concatonated characters + // CHECK-C: store i32 808464432 + // CHECK-CPP0X: store i32 808464432 + int b2 = '0000'; + + // Should get truncated value (last four characters concatonated) + // CHECK-C: store i32 1919512167 + // CHECK-CPP0X: store i32 1919512167 + int b3 = 'somesillylongstring'; + // CHECK-C: store i32 97 // CHECK-CPP0X: store i32 97 wchar_t wa = L'a'; @@ -27,26 +42,11 @@ int main() { // CHECK-CPP0X: store i16 97 char16_t ua = u'a'; - // Should pick second character. - // CHECK-CPP0X: store i16 98 - char16_t ub = u'ab'; - // CHECK-CPP0X: store i32 97 char32_t Ua = U'a'; - // Should pick second character. - // CHECK-CPP0X: store i32 98 - char32_t Ub = U'ab'; #endif - // Should pick last character and store its lowest byte. - // This does not match gcc, which takes the last character, converts it to - // utf8, and then picks the second-lowest byte of that (they probably store - // the utf8 in uint16_ts internally and take the lower byte of that). - // CHECK-C: store i8 48 - // CHECK-CPP0X: store i8 48 - char c = '\u1120\u0220\U00102030'; - // CHECK-C: store i32 61451 // CHECK-CPP0X: store i32 61451 wchar_t wc = L'\uF00B'; @@ -65,13 +65,6 @@ int main() { wchar_t wd = L'\U0010F00B'; #if __cplusplus >= 201103L - // Should take lower word of the 4byte UNC sequence. This does not match - // gcc. I don't understand what gcc does (it looks like it converts to utf16, - // then takes the second (!) utf16 word, swaps the lower two nibbles, and - // stores that?). - // CHECK-CPP0X: store i16 -4085 - char16_t ud = u'\U0010F00B'; // has utf16 encoding dbc8 dcb0 - // CHECK-CPP0X: store i32 1110027 char32_t Ud = U'\U0010F00B'; #endif @@ -80,14 +73,4 @@ int main() { // CHECK-C: store i32 1110027 // CHECK-CPP0X: store i32 1110027 wchar_t we = L'\u1234\U0010F00B'; - -#if __cplusplus >= 201103L - // Should pick second character. - // CHECK-CPP0X: store i16 -4085 - char16_t ue = u'\u1234\U0010F00B'; - - // Should pick second character. - // CHECK-CPP0X: store i32 1110027 - char32_t Ue = U'\u1234\U0010F00B'; -#endif } diff --git a/test/CodeGen/string-literal-short-wstring.c b/test/CodeGen/string-literal-short-wstring.c index 309ffd33b2..88e4a1e400 100644 --- a/test/CodeGen/string-literal-short-wstring.c +++ b/test/CodeGen/string-literal-short-wstring.c @@ -29,15 +29,4 @@ int main() { // -4085 == 0xf00b // CHECK: store i16 -4085 wchar_t wc = L'\uF00B'; - - // Should take lower word of the 4byte UNC sequence. This does not match - // gcc. I don't understand what gcc does (it looks like it converts to utf16, - // then takes the second (!) utf16 word, swaps the lower two nibbles, and - // stores that?). - // CHECK: store i16 -4085 - wchar_t wd = L'\U0010F00B'; // has utf16 encoding dbc8 dcb0 - - // Should pick second character. (gcc: -9205) - // CHECK: store i16 -4085 - wchar_t we = L'\u1234\U0010F00B'; } diff --git a/test/Lexer/char-literal-encoding-error.c b/test/Lexer/char-literal-encoding-error.c new file mode 100644 index 0000000000..08f9a50a0d --- /dev/null +++ b/test/Lexer/char-literal-encoding-error.c @@ -0,0 +1,10 @@ +// RUN: %clang_cc1 -std=c++11 -fsyntax-only -verify -x c++ %s + +// This file is encoded using ISO-8859-1 + +int main() { + 'é'; // expected-error {{illegal sequence in character literal}} + u'é'; // expected-error {{illegal sequence in character literal}} + U'é'; // expected-error {{illegal sequence in character literal}} + L'é'; // expected-error {{illegal sequence in character literal}} +} diff --git a/test/Lexer/char-literal.cpp b/test/Lexer/char-literal.cpp new file mode 100644 index 0000000000..5dc53608f8 --- /dev/null +++ b/test/Lexer/char-literal.cpp @@ -0,0 +1,24 @@ +// RUN: %clang_cc1 -triple x86_64-apple-darwin -std=c++11 -Wfour-char-constants -fsyntax-only -verify %s + +int a = 'ab'; // expected-warning {{multi-character character constant}} +int b = '\xFF\xFF'; // expected-warning {{multi-character character constant}} +int c = 'APPS'; // expected-warning {{multi-character character constant}} + +char d = '⌘'; // expected-error {{character too large for enclosing character literal type}} +char e = '\u2318'; // expected-error {{character too large for enclosing character literal type}} + +auto f = '\xE2\x8C\x98'; // expected-warning {{multi-character character constant}} + +char16_t g = u'ab'; // expected-error {{Unicode character literals may not contain multiple characters}} +char16_t h = u'\U0010FFFD'; // expected-error {{character too large for enclosing character literal type}} + +wchar_t i = L'ab'; // expected-warning {{extraneous characters in character constant ignored}} +wchar_t j = L'\U0010FFFD'; + +char32_t k = U'\U0010FFFD'; + +char l = 'Ø'; // expected-error {{character too large for enclosing character literal type}} +char m = '👿'; // expected-error {{character too large for enclosing character literal type}} + +char32_t n = U'ab'; // expected-error {{Unicode character literals may not contain multiple characters}} +char16_t o = '👽'; // expected-error {{character too large for enclosing character literal type}} diff --git a/test/Lexer/constants.c b/test/Lexer/constants.c index 013103b1f5..290388543c 100644 --- a/test/Lexer/constants.c +++ b/test/Lexer/constants.c @@ -66,4 +66,4 @@ double t1[] = { // PR7888 double g = 1e100000000; // expected-warning {{too large}} -char h = '\u1234'; // expected-warning {{character unicode escape sequence too long for its type}} +char h = '\u1234'; // expected-error {{character too large for enclosing character literal type}} diff --git a/test/Lexer/utf8-char-literal.cpp b/test/Lexer/utf8-char-literal.cpp index c4ea5fc3c3..12b001e4b4 100644 --- a/test/Lexer/utf8-char-literal.cpp +++ b/test/Lexer/utf8-char-literal.cpp @@ -1,4 +1,5 @@ // RUN: %clang_cc1 -triple x86_64-apple-darwin -std=c++11 -fsyntax-only -verify %s -int array0[u'ñ' == u'\xf1'? 1 : -1]; -int array1['ñ' != u'\xf1'? 1 : -1]; +int array0[u'ñ' == u'\xf1'? 1 : -1]; +int array1['\xF1' != u'\xf1'? 1 : -1]; +int array1['ñ' != u'\xf1'? 1 : -1]; // expected-error {{character too large for enclosing character literal type}} diff --git a/test/Lexer/wchar.c b/test/Lexer/wchar.c index 648a38ef3f..de00c02f13 100644 --- a/test/Lexer/wchar.c +++ b/test/Lexer/wchar.c @@ -1,9 +1,9 @@ // RUN: %clang_cc1 -fsyntax-only -fshort-wchar -verify %s void f() { - (void)L"\U00010000"; // expected-warning {{character unicode escape sequence too long for its type}} + (void)L"\U00010000"; // unicode escape produces UTF-16 sequence, so no warning - (void)L'\U00010000'; // expected-warning {{character unicode escape sequence too long for its type}} + (void)L'\U00010000'; // expected-error {{character too large for enclosing character literal type}} (void)L'ab'; // expected-warning {{extraneous characters in character constant ignored}}