From: Logan Chien Date: Thu, 8 Jan 2015 13:19:07 +0000 (+0000) Subject: Frontend: Fix SourceColumnMap assertion failure on non-ascii characters. X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=3ce12ac54fbf16fe381db0c3515b59a0149a233e;p=clang Frontend: Fix SourceColumnMap assertion failure on non-ascii characters. If there are some non-ascii character in the input source code, the column index might be smallar than the byte index. This will result in two possible assertion failures. This CL fixes the computation of the column index and byte index. 1. The assertion in startOfNextColumn() and startOfPreviousColumn() should not be raised when the byte index is greater than the column index since the non-ascii characters may use more than one bytes to store a character in a column. 2. The length of the caret line should be equal to the number of columns of source line, instead of the length of the source line. Otherwise, the assertion in selectInterestingSourceRegion will be raised because the removed columns plus the kept columns are not greater than the max column, which means that we should not remove any column at all. git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@225442 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Frontend/TextDiagnostic.cpp b/lib/Frontend/TextDiagnostic.cpp index 7b25da9b29..bbc99141f0 100644 --- a/lib/Frontend/TextDiagnostic.cpp +++ b/lib/Frontend/TextDiagnostic.cpp @@ -293,14 +293,14 @@ struct SourceColumnMap { /// \brief Map from a byte index to the next byte which starts a column. int startOfNextColumn(int N) const { - assert(0 <= N && N < static_cast(m_columnToByte.size() - 1)); + assert(0 <= N && N < static_cast(m_byteToColumn.size() - 1)); while (byteToColumn(++N) == -1) {} return N; } /// \brief Map from a byte index to the previous byte which starts a column. int startOfPreviousColumn(int N) const { - assert(0 < N && N < static_cast(m_columnToByte.size())); + assert(0 < N && N < static_cast(m_byteToColumn.size())); while (byteToColumn(--N) == -1) {} return N; } @@ -323,9 +323,10 @@ static void selectInterestingSourceRegion(std::string &SourceLine, std::string &FixItInsertionLine, unsigned Columns, const SourceColumnMap &map) { - unsigned MaxColumns = std::max(map.columns(), - std::max(CaretLine.size(), - FixItInsertionLine.size())); + unsigned CaretColumns = CaretLine.size(); + unsigned FixItColumns = llvm::sys::locale::columnWidth(FixItInsertionLine); + unsigned MaxColumns = std::max(static_cast(map.columns()), + std::max(CaretColumns, FixItColumns)); // if the number of columns is less than the desired number we're done if (MaxColumns <= Columns) return; @@ -1110,12 +1111,13 @@ void TextDiagnostic::emitSnippetAndCaret( // Copy the line of code into an std::string for ease of manipulation. std::string SourceLine(LineStart, LineEnd); - // Create a line for the caret that is filled with spaces that is the same - // length as the line of source code. - std::string CaretLine(LineEnd-LineStart, ' '); - + // Build the byte to column map. const SourceColumnMap sourceColMap(SourceLine, DiagOpts->TabStop); + // Create a line for the caret that is filled with spaces that is the same + // number of columns as the line of source code. + std::string CaretLine(sourceColMap.columns(), ' '); + // Highlight all of the characters covered by Ranges with ~ characters. for (SmallVectorImpl::iterator I = Ranges.begin(), E = Ranges.end(); diff --git a/test/Frontend/source-col-map.c b/test/Frontend/source-col-map.c new file mode 100644 index 0000000000..a14023bc82 --- /dev/null +++ b/test/Frontend/source-col-map.c @@ -0,0 +1,37 @@ +// RUN: not %clang_cc1 %s -fsyntax-only -fmessage-length 75 -o /dev/null 2>&1 | FileCheck %s -strict-whitespace + +// Test case for the text diagnostics source column conversion crash. + +// This test case tries to check the error diagnostic message printer, which is +// responsible to create the code snippet shorter than the message-length (in +// number of columns.) +// +// The error diagnostic message printer should be able to handle the non-ascii +// characters without any segmentation fault or assertion failure. If your +// changes to clang frontend crashes this case, it is likely that you are mixing +// column index with byte index which are two totally different concepts. + +// NOTE: This file is encoded in UTF-8 and intentionally contains some +// non-ASCII characters. + +__attribute__((format(printf, 1, 2))) +extern int printf(const char *fmt, ...); + +void test1(Unknown* b); // αααα αααα αααα αααα αααα αααα αααα αααα αααα αααα αααα +// CHECK: unknown type name 'Unknown' +// CHECK-NEXT: void test1(Unknown* b); // αααα αααα αααα αααα αααα αααα αααα ααα... +// CHECK-NEXT: {{^ \^$}} + +void test2(Unknown* b); // αααα αααα αααα αααα αααα αααα αααα αααα αααα + +// CHECK: unknown type name 'Unknown' +// CHECK-NEXT: void test2(Unknown* b); // αααα αααα αααα αααα αααα αααα αααα αααα αααα +// CHECK-NEXT: {{^ \^$}} + +void test3() { + /* αααα αααα αααα αααα αααα αααα αααα αααα αααα αααα */ printf("%d", "s"); +} +// CHECK: format specifies type 'int' but the argument has type 'char *' +// CHECK-NEXT: ...αααα αααα αααα αααα αααα αααα αααα αααα αααα */ printf("%d", "s"); +// CHECK-NEXT: {{^ ~~ \^~~$}} +// CHECK-NEXT: {{^ %s$}}