From 24d58f9cbcdc09fae363e4b07b58cf504fa5b912 Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Wed, 14 Jun 2017 20:37:11 +0000 Subject: [PATCH] [x86] avoid unnecessary shuffle mask math in combineX86ShufflesRecursively() This is a follow-up to https://reviews.llvm.org/D34174 / https://reviews.llvm.org/rL305398. We mentioned replacing the multiplies with shifts, but the real win seems to be in bypassing the extra ops in the common case when the RootRatio and OpRatio are one. This gives us another 1-2% overall win for the test in PR32037: https://bugs.llvm.org/show_bug.cgi?id=32037 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@305414 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index c7e4f4dc14e..29b438e9bff 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -28005,10 +28005,10 @@ static bool combineX86ShufflesRecursively(ArrayRef SrcOps, continue; } - // TODO: Here and below, we could convert multiply to shift-left for - // performance because we know that our mask sizes are power-of-2. unsigned RootMaskedIdx = - RootMask[RootIdx] * RootRatio + (i & (RootRatio - 1)); + RootRatio == 1 + ? RootMask[RootIdx] + : (RootMask[RootIdx] << RootRatioLog2) + (i & (RootRatio - 1)); // Just insert the scaled root mask value if it references an input other // than the SrcOp we're currently inserting. @@ -28019,7 +28019,6 @@ static bool combineX86ShufflesRecursively(ArrayRef SrcOps, } RootMaskedIdx = RootMaskedIdx & (MaskWidth - 1); - unsigned OpIdx = RootMaskedIdx >> OpRatioLog2; if (OpMask[OpIdx] < 0) { // The incoming lanes are zero or undef, it doesn't matter which ones we @@ -28030,9 +28029,11 @@ static bool combineX86ShufflesRecursively(ArrayRef SrcOps, // Ok, we have non-zero lanes, map them through to one of the Op's inputs. unsigned OpMaskedIdx = - OpMask[OpIdx] * OpRatio + (RootMaskedIdx & (OpRatio - 1)); - OpMaskedIdx = OpMaskedIdx & (MaskWidth - 1); + OpRatio == 1 + ? OpMask[OpIdx] + : (OpMask[OpIdx] << OpRatioLog2) + (RootMaskedIdx & (OpRatio - 1)); + OpMaskedIdx = OpMaskedIdx & (MaskWidth - 1); if (OpMask[OpIdx] < (int)OpMask.size()) { assert(0 <= InputIdx0 && "Unknown target shuffle input"); OpMaskedIdx += InputIdx0 * MaskWidth; -- 2.50.1