From d2953ce57a4f1b0a6884b3d6b35266ab4f79fa7f Mon Sep 17 00:00:00 2001 From: Benjamin Kramer Date: Fri, 6 Apr 2012 20:49:55 +0000 Subject: [PATCH] SourceManager: Vectorize ComputeLineNumbers for SSE2. This method is very hot, it is called when emitting diagnostics, in -E mode and for many #pragma handlers. It scans through the whole source file to count newlines, records and caches them in a vector. The speedup from vectorization isn't very large, as we fall back to bytewise scanning when we hit a newline. There might be a way to avoid leaving the sse loop but everything I tried didn't work out because a call to push_back clobbers xmm registers. About 2% speedup on average on "clang -E > /dev/null" of all .cpp files in clang's lib/Sema. git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@154204 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Basic/SourceManager.cpp | 41 +++++++++++++++++++++++++++++++++++-- 1 file changed, 39 insertions(+), 2 deletions(-) diff --git a/lib/Basic/SourceManager.cpp b/lib/Basic/SourceManager.cpp index a2540bc18a..cef091c598 100644 --- a/lib/Basic/SourceManager.cpp +++ b/lib/Basic/SourceManager.cpp @@ -1037,6 +1037,10 @@ unsigned SourceManager::getPresumedColumnNumber(SourceLocation Loc, return getPresumedLoc(Loc).getColumn(); } +#ifdef __SSE2__ +#include +#endif + static LLVM_ATTRIBUTE_NOINLINE void ComputeLineNumbers(DiagnosticsEngine &Diag, ContentCache *FI, llvm::BumpPtrAllocator &Alloc, @@ -1062,11 +1066,44 @@ static void ComputeLineNumbers(DiagnosticsEngine &Diag, ContentCache *FI, unsigned Offs = 0; while (1) { // Skip over the contents of the line. - // TODO: Vectorize this? This is very performance sensitive for programs - // with lots of diagnostics and in -E mode. const unsigned char *NextBuf = (const unsigned char *)Buf; + +#ifdef __SSE2__ + // Try to skip to the next newline using SSE instructions. This is very + // performance sensitive for programs with lots of diagnostics and in -E + // mode. + __m128i CRs = _mm_set1_epi8('\r'); + __m128i LFs = _mm_set1_epi8('\n'); + + // First fix up the alignment to 16 bytes. + while (((uintptr_t)NextBuf & 0xF) != 0) { + if (*NextBuf == '\n' || *NextBuf == '\r' || *NextBuf == '\0') + goto FoundSpecialChar; + ++NextBuf; + } + + // Scan 16 byte chunks for '\r' and '\n'. Ignore '\0'. + while (NextBuf+16 <= End) { + __m128i Chunk = *(__m128i*)NextBuf; + __m128i Cmp = _mm_or_si128(_mm_cmpeq_epi8(Chunk, CRs), + _mm_cmpeq_epi8(Chunk, LFs)); + unsigned Mask = _mm_movemask_epi8(Cmp); + + // If we found a newline, adjust the pointer and jump to the handling code. + if (Mask != 0) { + NextBuf += llvm::CountTrailingZeros_32(Mask); + goto FoundSpecialChar; + } + NextBuf += 16; + } +#endif + while (*NextBuf != '\n' && *NextBuf != '\r' && *NextBuf != '\0') ++NextBuf; + +#ifdef __SSE2__ +FoundSpecialChar: +#endif Offs += NextBuf-Buf; Buf = NextBuf; -- 2.40.0