[X86] Recognize constant arrays with special values and replace loads from it with...

author Ayman Musa <ayman.musa@intel.com>

Tue, 12 Dec 2017 14:13:51 +0000 (14:13 +0000)

committer Ayman Musa <ayman.musa@intel.com>

Tue, 12 Dec 2017 14:13:51 +0000 (14:13 +0000)
author Ayman Musa <ayman.musa@intel.com>
Tue, 12 Dec 2017 14:13:51 +0000 (14:13 +0000)
committer Ayman Musa <ayman.musa@intel.com>
Tue, 12 Dec 2017 14:13:51 +0000 (14:13 +0000)
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp

index 2b64ed756991282ab3c8f3979d0e1dac0e3fade7..0ab6c106cd0fba24560f5c603dc039930713e574 100644 (file)
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -33066,6 +33066,124 @@ static SDValue combineAndMaskToShift(SDNode *N, SelectionDAG &DAG,
    return DAG.getBitcast(N->getValueType(0), Shift);
  }
  
+// Get the index node from the lowered DAG of a GEP IR instruction with one
+// indexing dimension.
+static SDValue getIndexFromUnindexedLoad(LoadSDNode *Ld) {
+  if (Ld->isIndexed())
+    return SDValue();
+
+  SDValue Base = Ld->getBasePtr();
+
+  if (Base.getOpcode() != ISD::ADD)
+    return SDValue();
+
+  SDValue ShiftedIndex = Base.getOperand(0);
+
+  if (ShiftedIndex.getOpcode() != ISD::SHL)
+    return SDValue();
+
+  return ShiftedIndex.getOperand(0);
+
+}
+
+static bool hasBZHI(const X86Subtarget &Subtarget, MVT VT) {
+  if (Subtarget.hasBMI2() && VT.isScalarInteger()) {
+    switch (VT.getSizeInBits()) {
+    default: return false;
+    case 64: return Subtarget.is64Bit() ? true : false;
+    case 32: return true;
+    }
+  }
+  return false;
+}
+
+// This function recognizes cases where X86 bzhi instruction can replace and
+// 'and-load' sequence.
+// In case of loading integer value from an array of constants which is defined
+// as follows:
+//
+//   int array[SIZE] = {0x0, 0x1, 0x3, 0x7, 0xF ..., 2^(SIZE-1) - 1}
+//
+// then applying a bitwise and on the result with another input.
+// It's equivalent to performing bzhi (zero high bits) on the input, with the
+// same index of the load.
+static SDValue combineAndLoadToBZHI(SDNode *Node, SelectionDAG &DAG,
+    const X86Subtarget &Subtarget) {
+  MVT VT = Node->getSimpleValueType(0);
+  SDLoc dl(Node);
+
+  // Check if subtarget has BZHI instruction for the node's type
+  if (!hasBZHI(Subtarget, VT))
+    return SDValue();
+
+  // Try matching the pattern for both operands.
+  for (unsigned i = 0; i < 2; i++) {
+    SDValue N = Node->getOperand(i);
+    LoadSDNode *Ld = dyn_cast<LoadSDNode>(N.getNode());
+
+     // continue if the operand is not a load instruction
+    if (!Ld)
+      return SDValue();
+
+    const Value *MemOp = Ld->getMemOperand()->getValue();
+
+    if (!MemOp)
+      return SDValue();
+
+    if (const GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(MemOp)) {
+      if (GlobalVariable *GV = dyn_cast<GlobalVariable>(GEP->getOperand(0))) {
+        if (GV->isConstant() && GV->hasDefinitiveInitializer()) {
+
+          Constant *Init = GV->getInitializer();
+          Type *Ty = Init->getType();
+          if (!isa<ConstantDataArray>(Init) ||
+              !Ty->getArrayElementType()->isIntegerTy() ||
+              Ty->getArrayElementType()->getScalarSizeInBits() !=
+                  VT.getSizeInBits() ||
+              Ty->getArrayNumElements() >
+                  Ty->getArrayElementType()->getScalarSizeInBits())
+            continue;
+
+          // Check if the array's constant elements are suitable to our case.
+          uint64_t ArrayElementCount = Init->getType()->getArrayNumElements();
+          bool ConstantsMatch = true;
+          for (uint64_t j = 0; j < ArrayElementCount; j++) {
+            ConstantInt *Elem =
+                dyn_cast<ConstantInt>(Init->getAggregateElement(j));
+            if (Elem->getZExtValue() != (((uint64_t)1 << j) - 1)) {
+              ConstantsMatch = false;
+              break;
+            }
+          }
+          if (!ConstantsMatch)
+            continue;
+
+          // Do the transformation (For 32-bit type):
+          // -> (and (load arr[idx]), inp)
+          // <- (and (srl 0xFFFFFFFF, (sub 32, idx)))
+          //    that will be replaced with one bzhi instruction.
+          SDValue Inp = (i == 0) ? Node->getOperand(1) : Node->getOperand(0);
+          SDValue SizeC = DAG.getConstant(VT.getSizeInBits(), dl, VT);
+
+          // Get the Node which indexes into the array.
+          SDValue Index = getIndexFromUnindexedLoad(Ld);
+          if (!Index)
+            return SDValue();
+          Index = DAG.getZExtOrTrunc(Index, dl, VT);
+
+          SDValue Sub = DAG.getNode(ISD::SUB, dl, VT, SizeC, Index);
+
+          SDValue AllOnes = DAG.getAllOnesConstant(dl, VT);
+          SDValue LShr = DAG.getNode(ISD::SRL, dl, VT, AllOnes, Sub);
+
+          return DAG.getNode(ISD::AND, dl, VT, Inp, LShr);
+        }
+      }
+    }
+  }
+  return SDValue();
+}
+
  static SDValue combineAnd(SDNode *N, SelectionDAG &DAG,
                            TargetLowering::DAGCombinerInfo &DCI,
                            const X86Subtarget &Subtarget) {
@@ -33094,6 +33212,9 @@ static SDValue combineAnd(SDNode *N, SelectionDAG &DAG,
    if (SDValue ShiftRight = combineAndMaskToShift(N, DAG, Subtarget))
      return ShiftRight;
  
+  if (SDValue R = combineAndLoadToBZHI(N, DAG, Subtarget))
+    return R;
+
    // Attempt to recursively combine a bitmask AND with shuffles.
    if (VT.isVector() && (VT.getScalarSizeInBits() % 8) == 0) {
      SDValue Op(N, 0);
diff --git a/test/CodeGen/X86/replace-load-and-with-bzhi.ll b/test/CodeGen/X86/replace-load-and-with-bzhi.ll

index 51aed408ad23343ecaf4abfbbf532d7efe26de65..9684d06b134e624a04752178a22993c34aab54d1 100644 (file)
--- a/test/CodeGen/X86/replace-load-and-with-bzhi.ll
+++ b/test/CodeGen/X86/replace-load-and-with-bzhi.ll
@@ -10,17 +10,14 @@
  define i32 @f32_bzhi(i32 %x, i32 %y) local_unnamed_addr {
  ; CHECK-LABEL: f32_bzhi:
  ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    movslq %esi, %rax
-; CHECK-NEXT:    andl fill_table32(,%rax,4), %edi
-; CHECK-NEXT:    movl %edi, %eax
-; CHECK-NEXT:    ret{{[l|q]}}
+; CHECK-NEXT:    bzhil %esi, %edi, %eax
+; CHECK-NEXT:    retq
  ;
  ; CHECK32-LABEL: f32_bzhi:
  ; CHECK32:       # %bb.0: # %entry
  ; CHECK32-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; CHECK32-NEXT:    movl fill_table32(,%eax,4), %eax
-; CHECK32-NEXT:    andl {{[0-9]+}}(%esp), %eax
-; CHECK32-NEXT:    ret{{[l|q]}}
+; CHECK32-NEXT:    bzhil %eax, {{[0-9]+}}(%esp), %eax
+; CHECK32-NEXT:    retl
  entry:
    %idxprom = sext i32 %y to i64
    %arrayidx = getelementptr inbounds [32 x i32], [32 x i32]* @fill_table32, i64 0, i64 %idxprom
@@ -32,17 +29,14 @@ entry:
  define i32 @f32_bzhi_partial(i32 %x, i32 %y) local_unnamed_addr {
  ; CHECK-LABEL: f32_bzhi_partial:
  ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    movslq %esi, %rax
-; CHECK-NEXT:    andl fill_table32_partial(,%rax,4), %edi
-; CHECK-NEXT:    movl %edi, %eax
-; CHECK-NEXT:    ret{{[l|q]}}
+; CHECK-NEXT:    bzhil %esi, %edi, %eax
+; CHECK-NEXT:    retq
  ;
  ; CHECK32-LABEL: f32_bzhi_partial:
  ; CHECK32:       # %bb.0: # %entry
  ; CHECK32-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; CHECK32-NEXT:    movl fill_table32_partial(,%eax,4), %eax
-; CHECK32-NEXT:    andl {{[0-9]+}}(%esp), %eax
-; CHECK32-NEXT:    ret{{[l|q]}}
+; CHECK32-NEXT:    bzhil %eax, {{[0-9]+}}(%esp), %eax
+; CHECK32-NEXT:    retl
  entry:
    %idxprom = sext i32 %y to i64
    %arrayidx = getelementptr inbounds [17 x i32], [17 x i32]* @fill_table32_partial, i64 0, i64 %idxprom
@@ -54,9 +48,8 @@ entry:
  define i64 @f64_bzhi(i64 %x, i64 %y) local_unnamed_addr {
  ; CHECK-LABEL: f64_bzhi:
  ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    andq fill_table64(,%rsi,8), %rdi
-; CHECK-NEXT:    movq %rdi, %rax
-; CHECK-NEXT:    ret{{[l|q]}}
+; CHECK-NEXT:    bzhiq %rsi, %rdi, %rax
+; CHECK-NEXT:    retq
  ;
  ; CHECK32-LABEL: f64_bzhi:
  ; CHECK32:       # %bb.0: # %entry
@@ -65,7 +58,7 @@ define i64 @f64_bzhi(i64 %x, i64 %y) local_unnamed_addr {
  ; CHECK32-NEXT:    movl fill_table64(,%eax,8), %eax
  ; CHECK32-NEXT:    andl {{[0-9]+}}(%esp), %eax
  ; CHECK32-NEXT:    andl {{[0-9]+}}(%esp), %edx
-; CHECK32-NEXT:    ret{{[l|q]}}
+; CHECK32-NEXT:    retl
  entry:
    %arrayidx = getelementptr inbounds [64 x i64], [64 x i64]* @fill_table64, i64 0, i64 %y
    %0 = load i64, i64* %arrayidx, align 8
@@ -76,9 +69,8 @@ entry:
  define i64 @f64_bzhi_partial(i64 %x, i64 %y) local_unnamed_addr {
  ; CHECK-LABEL: f64_bzhi_partial:
  ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    andq fill_table64_partial(,%rsi,8), %rdi
-; CHECK-NEXT:    movq %rdi, %rax
-; CHECK-NEXT:    ret{{[l|q]}}
+; CHECK-NEXT:    bzhiq %rsi, %rdi, %rax
+; CHECK-NEXT:    retq
  ;
  ; CHECK32-LABEL: f64_bzhi_partial:
  ; CHECK32:       # %bb.0: # %entry
@@ -87,7 +79,7 @@ define i64 @f64_bzhi_partial(i64 %x, i64 %y) local_unnamed_addr {
  ; CHECK32-NEXT:    movl fill_table64_partial(,%eax,8), %eax
  ; CHECK32-NEXT:    andl {{[0-9]+}}(%esp), %eax
  ; CHECK32-NEXT:    andl {{[0-9]+}}(%esp), %edx
-; CHECK32-NEXT:    ret{{[l|q]}}
+; CHECK32-NEXT:    retl
  entry:
    %arrayidx = getelementptr inbounds [51 x i64], [51 x i64]* @fill_table64_partial, i64 0, i64 %y
    %0 = load i64, i64* %arrayidx, align 8
author	Ayman Musa <ayman.musa@intel.com>
	Tue, 12 Dec 2017 14:13:51 +0000 (14:13 +0000)
committer	Ayman Musa <ayman.musa@intel.com>
	Tue, 12 Dec 2017 14:13:51 +0000 (14:13 +0000)
lib/Target/X86/X86ISelLowering.cpp		patch \| blob \| history
test/CodeGen/X86/replace-load-and-with-bzhi.ll		patch \| blob \| history