From 24d58f9cbcdc09fae363e4b07b58cf504fa5b912 Mon Sep 17 00:00:00 2001
From: Sanjay Patel <spatel@rotateright.com>
Date: Wed, 14 Jun 2017 20:37:11 +0000
Subject: [PATCH] [x86] avoid unnecessary shuffle mask math in
 combineX86ShufflesRecursively()

This is a follow-up to https://reviews.llvm.org/D34174 / https://reviews.llvm.org/rL305398.

We mentioned replacing the multiplies with shifts, but the real win seems to be in
bypassing the extra ops in the common case when the RootRatio and OpRatio are one.

This gives us another 1-2% overall win for the test in PR32037:
https://bugs.llvm.org/show_bug.cgi?id=32037


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@305414 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Target/X86/X86ISelLowering.cpp | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index c7e4f4dc14e..29b438e9bff 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -28005,10 +28005,10 @@ static bool combineX86ShufflesRecursively(ArrayRef<SDValue> SrcOps,
       continue;
     }
 
-    // TODO: Here and below, we could convert multiply to shift-left for
-    // performance because we know that our mask sizes are power-of-2.
     unsigned RootMaskedIdx =
-        RootMask[RootIdx] * RootRatio + (i & (RootRatio - 1));
+        RootRatio == 1
+            ? RootMask[RootIdx]
+            : (RootMask[RootIdx] << RootRatioLog2) + (i & (RootRatio - 1));
 
     // Just insert the scaled root mask value if it references an input other
     // than the SrcOp we're currently inserting.
@@ -28019,7 +28019,6 @@ static bool combineX86ShufflesRecursively(ArrayRef<SDValue> SrcOps,
     }
 
     RootMaskedIdx = RootMaskedIdx & (MaskWidth - 1);
-
     unsigned OpIdx = RootMaskedIdx >> OpRatioLog2;
     if (OpMask[OpIdx] < 0) {
       // The incoming lanes are zero or undef, it doesn't matter which ones we
@@ -28030,9 +28029,11 @@ static bool combineX86ShufflesRecursively(ArrayRef<SDValue> SrcOps,
 
     // Ok, we have non-zero lanes, map them through to one of the Op's inputs.
     unsigned OpMaskedIdx =
-        OpMask[OpIdx] * OpRatio + (RootMaskedIdx & (OpRatio - 1));
-    OpMaskedIdx = OpMaskedIdx & (MaskWidth - 1);
+        OpRatio == 1
+            ? OpMask[OpIdx]
+            : (OpMask[OpIdx] << OpRatioLog2) + (RootMaskedIdx & (OpRatio - 1));
 
+    OpMaskedIdx = OpMaskedIdx & (MaskWidth - 1);
     if (OpMask[OpIdx] < (int)OpMask.size()) {
       assert(0 <= InputIdx0 && "Unknown target shuffle input");
       OpMaskedIdx += InputIdx0 * MaskWidth;
-- 
2.50.1