From 1f13fbd0c5bac045aa7d6ae3e998758e9dfa3ce4 Mon Sep 17 00:00:00 2001 From: Jordan Rose Date: Fri, 7 Jun 2013 17:16:01 +0000 Subject: [PATCH] Handle Unicode characters in fix-it replacement strings. Patch by Sukolsak Sakshuwong! git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@183535 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Frontend/TextDiagnostic.cpp | 36 ++++++++++++++++----------------- test/FixIt/fixit-unicode.c | 20 ++++++++++++++++++ 2 files changed, 37 insertions(+), 19 deletions(-) diff --git a/lib/Frontend/TextDiagnostic.cpp b/lib/Frontend/TextDiagnostic.cpp index 1572d0f1d0..2da66d3b32 100644 --- a/lib/Frontend/TextDiagnostic.cpp +++ b/lib/Frontend/TextDiagnostic.cpp @@ -336,13 +336,10 @@ static void selectInterestingSourceRegion(std::string &SourceLine, if (MaxColumns <= Columns) return; - // no special characters allowed in CaretLine or FixItInsertionLine + // No special characters are allowed in CaretLine. assert(CaretLine.end() == std::find_if(CaretLine.begin(), CaretLine.end(), char_out_of_range(' ','~'))); - assert(FixItInsertionLine.end() == - std::find_if(FixItInsertionLine.begin(), FixItInsertionLine.end(), - char_out_of_range(' ','~'))); // Find the slice that we need to display the full caret line // correctly. @@ -370,8 +367,15 @@ static void selectInterestingSourceRegion(std::string &SourceLine, if (!isWhitespace(FixItInsertionLine[FixItEnd - 1])) break; - CaretStart = std::min(FixItStart, CaretStart); - CaretEnd = std::max(FixItEnd, CaretEnd); + // We can safely use the byte offset FixItStart as the column offset + // because the characters up until FixItStart are all ASCII whitespace + // characters. + unsigned FixItStartCol = FixItStart; + unsigned FixItEndCol + = llvm::sys::locale::columnWidth(FixItInsertionLine.substr(0, FixItEnd)); + + CaretStart = std::min(FixItStartCol, CaretStart); + CaretEnd = std::max(FixItEndCol, CaretEnd); } // CaretEnd may have been set at the middle of a character @@ -1023,24 +1027,18 @@ static std::string buildFixItInsertionLine(unsigned LineNo, if (HintCol < PrevHintEndCol) HintCol = PrevHintEndCol + 1; - // FIXME: This function handles multibyte characters in the source, but - // not in the fixits. This assertion is intended to catch unintended - // use of multibyte characters in fixits. If we decide to do this, we'll - // have to track separate byte widths for the source and fixit lines. - assert((size_t)llvm::sys::locale::columnWidth(I->CodeToInsert) == - I->CodeToInsert.size()); - - // This relies on one byte per column in our fixit hints. // This should NOT use HintByteOffset, because the source might have // Unicode characters in earlier columns. - unsigned LastColumnModified = HintCol + I->CodeToInsert.size(); - if (LastColumnModified > FixItInsertionLine.size()) - FixItInsertionLine.resize(LastColumnModified, ' '); + unsigned NewFixItLineSize = FixItInsertionLine.size() + + (HintCol - PrevHintEndCol) + I->CodeToInsert.size(); + if (NewFixItLineSize > FixItInsertionLine.size()) + FixItInsertionLine.resize(NewFixItLineSize, ' '); std::copy(I->CodeToInsert.begin(), I->CodeToInsert.end(), - FixItInsertionLine.begin() + HintCol); + FixItInsertionLine.end() - I->CodeToInsert.size()); - PrevHintEndCol = LastColumnModified; + PrevHintEndCol = + HintCol + llvm::sys::locale::columnWidth(I->CodeToInsert); } else { FixItInsertionLine.clear(); break; diff --git a/test/FixIt/fixit-unicode.c b/test/FixIt/fixit-unicode.c index 9c0242e92e..216b73dbfc 100644 --- a/test/FixIt/fixit-unicode.c +++ b/test/FixIt/fixit-unicode.c @@ -34,3 +34,23 @@ void test2() { // CHECK-MACHINE: fix-it:"{{.*}}fixit-unicode.c":{[[@LINE-9]]:16-[[@LINE-9]]:18}:"%ld" } + +void test3() { + int กssss = 42; + int a = กsss; // expected-error{{use of undeclared identifier 'กsss'; did you mean 'กssss'?}} +// CHECK: {{^ \^}} +// CHECK: {{^ [^ ]+ssss}} +// CHECK-MACHINE: fix-it:"{{.*}}":{[[@LINE-3]]:11-[[@LINE-3]]:17}:"\340\270\201ssss" + + int ssกss = 42; + int b = ssกs; // expected-error{{use of undeclared identifier 'ssกs'; did you mean 'ssกss'?}} +// CHECK: {{^ \^}} +// CHECK: {{^ ss.+ss}} +// CHECK-MACHINE: fix-it:"{{.*}}":{[[@LINE-3]]:11-[[@LINE-3]]:17}:"ss\340\270\201ss" + + int sssssssssก = 42; + int c = sssssssss; // expected-error{{use of undeclared identifier 'sssssssss'; did you mean 'sssssssssก'?}} +// CHECK: {{^ \^}} +// CHECK: {{^ sssssssss.+}} +// CHECK-MACHINE: fix-it:"{{.*}}":{[[@LINE-3]]:11-[[@LINE-3]]:20}:"sssssssss\340\270\201" +} -- 2.50.1