From: Fangrui Song Date: Fri, 26 Apr 2019 10:56:10 +0000 (+0000) Subject: caseFoldingDjbHash: simplify and make the US-ASCII fast path faster X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=f52833fc1139d62f4aea96081edda386210a30c1;p=llvm caseFoldingDjbHash: simplify and make the US-ASCII fast path faster The slow path (with at least one non US-ASCII) will be slower but that doesn't matter. Differential Revision: https://reviews.llvm.org/D61178 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@359294 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Support/DJB.cpp b/lib/Support/DJB.cpp index a20dfc08e35..b97e212aaaa 100644 --- a/lib/Support/DJB.cpp +++ b/lib/Support/DJB.cpp @@ -57,29 +57,26 @@ static UTF32 foldCharDwarf(UTF32 C) { return sys::unicode::foldCharSimple(C); } -static uint32_t caseFoldingDjbHashCharSlow(StringRef &Buffer, uint32_t H) { - UTF32 C = chopOneUTF32(Buffer); - - C = foldCharDwarf(C); - - std::array Storage; - StringRef Folded = toUTF8(C, Storage); - return djbHash(Folded, H); +static Optional fastCaseFoldingDjbHash(StringRef Buffer, uint32_t H) { + bool allASCII = true; + for (unsigned char C : Buffer) { + H = H * 33 + ('A' <= C && C <= 'Z' ? C - 'A' + 'a' : C); + allASCII &= C <= 0x7f; + } + if (allASCII) + return H; + return None; } uint32_t llvm::caseFoldingDjbHash(StringRef Buffer, uint32_t H) { + if (Optional Result = fastCaseFoldingDjbHash(Buffer, H)) + return *Result; + + std::array Storage; while (!Buffer.empty()) { - unsigned char C = Buffer.front(); - if (LLVM_LIKELY(C <= 0x7f)) { - // US-ASCII, encoded as one character in utf-8. - // This is by far the most common case, so handle this specially. - if (C >= 'A' && C <= 'Z') - C = 'a' + (C - 'A'); // fold uppercase into lowercase - H = (H << 5) + H + C; - Buffer = Buffer.drop_front(); - continue; - } - H = caseFoldingDjbHashCharSlow(Buffer, H); + UTF32 C = foldCharDwarf(chopOneUTF32(Buffer)); + StringRef Folded = toUTF8(C, Storage); + H = djbHash(Folded, H); } return H; }