From 8a8f9973b93925be35c63e078ad5dfe7b7998c98 Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Mon, 10 Dec 2018 18:10:35 +0000 Subject: [PATCH] ComputeLineNumbers: delete SSE2 vectorization Summary: SSE2 vectorization was added in 2012, but it is 2018 now and I can't observe any performance boost (testing clang -E [all Sema/* CodeGen/* with proper -I options]) with the existing _mm_movemask_epi8+countTrailingZeros or the following SSE4.2 (compiling with -msse4.2): __m128i C = _mm_setr_epi8('\r','\n',0,0,0,0,0,0,0,0,0,0,0,0,0,0); _mm_cmpestri(C, 2, Chunk, 16, _SIDD_UBYTE_OPS | _SIDD_CMP_EQUAL_ANY | _SIDD_POSITIVE_POLARITY | _SIDD_LEAST_SIGNIFICANT) Delete the vectorization to simplify the code. Also simplify the code a bit and don't check the line ending sequence \n\r Reviewers: bkramer, #clang Reviewed By: bkramer Subscribers: cfe-commits Differential Revision: https://reviews.llvm.org/D55484 git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@348777 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Basic/SourceManager.cpp | 69 +++++++------------------------------ 1 file changed, 13 insertions(+), 56 deletions(-) diff --git a/lib/Basic/SourceManager.cpp b/lib/Basic/SourceManager.cpp index 14c229c2a9..ce8aa5d112 100644 --- a/lib/Basic/SourceManager.cpp +++ b/lib/Basic/SourceManager.cpp @@ -1216,65 +1216,22 @@ static void ComputeLineNumbers(DiagnosticsEngine &Diag, ContentCache *FI, const unsigned char *Buf = (const unsigned char *)Buffer->getBufferStart(); const unsigned char *End = (const unsigned char *)Buffer->getBufferEnd(); - unsigned Offs = 0; + unsigned I = 0; while (true) { // Skip over the contents of the line. - const unsigned char *NextBuf = (const unsigned char *)Buf; - -#ifdef __SSE2__ - // Try to skip to the next newline using SSE instructions. This is very - // performance sensitive for programs with lots of diagnostics and in -E - // mode. - __m128i CRs = _mm_set1_epi8('\r'); - __m128i LFs = _mm_set1_epi8('\n'); - - // First fix up the alignment to 16 bytes. - while (((uintptr_t)NextBuf & 0xF) != 0) { - if (*NextBuf == '\n' || *NextBuf == '\r' || *NextBuf == '\0') - goto FoundSpecialChar; - ++NextBuf; - } - - // Scan 16 byte chunks for '\r' and '\n'. Ignore '\0'. - while (NextBuf+16 <= End) { - const __m128i Chunk = *(const __m128i*)NextBuf; - __m128i Cmp = _mm_or_si128(_mm_cmpeq_epi8(Chunk, CRs), - _mm_cmpeq_epi8(Chunk, LFs)); - unsigned Mask = _mm_movemask_epi8(Cmp); - - // If we found a newline, adjust the pointer and jump to the handling code. - if (Mask != 0) { - NextBuf += llvm::countTrailingZeros(Mask); - goto FoundSpecialChar; - } - NextBuf += 16; - } -#endif - - while (*NextBuf != '\n' && *NextBuf != '\r' && *NextBuf != '\0') - ++NextBuf; - -#ifdef __SSE2__ -FoundSpecialChar: -#endif - Offs += NextBuf-Buf; - Buf = NextBuf; - - if (Buf[0] == '\n' || Buf[0] == '\r') { - // If this is \n\r or \r\n, skip both characters. - if ((Buf[1] == '\n' || Buf[1] == '\r') && Buf[0] != Buf[1]) { - ++Offs; - ++Buf; - } - ++Offs; - ++Buf; - LineOffsets.push_back(Offs); + while (Buf[I] != '\n' && Buf[I] != '\r' && Buf[I] != '\0') + ++I; + + if (Buf[I] == '\n' || Buf[I] == '\r') { + // If this is \r\n, skip both characters. + if (Buf[I] == '\r' && Buf[I+1] == '\n') + ++I; + ++I; + LineOffsets.push_back(I); } else { - // Otherwise, this is a null. If end of file, exit. - if (Buf == End) break; - // Otherwise, skip the null. - ++Offs; - ++Buf; + // Otherwise, this is a NUL. If end of file, exit. + if (Buf+I == End) break; + ++I; } } -- 2.40.0