[AArch64][GlobalISel] Make vector dup optimization look at last elt of ZeroVec

author Jessica Paquette <jpaquette@apple.com>

Wed, 24 Jul 2019 17:18:51 +0000 (17:18 +0000)

committer Jessica Paquette <jpaquette@apple.com>

Wed, 24 Jul 2019 17:18:51 +0000 (17:18 +0000)
author Jessica Paquette <jpaquette@apple.com>
Wed, 24 Jul 2019 17:18:51 +0000 (17:18 +0000)
committer Jessica Paquette <jpaquette@apple.com>
Wed, 24 Jul 2019 17:18:51 +0000 (17:18 +0000)
diff --git a/lib/Target/AArch64/AArch64InstructionSelector.cpp b/lib/Target/AArch64/AArch64InstructionSelector.cpp

index 892d469d23217fc7c8a469385954703be9c39278..3bce4df88daa8ff5ef3dde1e9bc4817f92c4b615 100644 (file)
--- a/lib/Target/AArch64/AArch64InstructionSelector.cpp
+++ b/lib/Target/AArch64/AArch64InstructionSelector.cpp
@@ -3523,7 +3523,7 @@ bool AArch64InstructionSelector::tryOptVectorDup(MachineInstr &I) const {
    int64_t Zero = 0;
    if (!mi_match(ZeroVec->getOperand(1).getReg(), MRI, m_ICst(Zero)) || Zero)
      return false;
-  for (unsigned i = 1, e = ZeroVec->getNumOperands() - 1; i < e; ++i) {
+  for (unsigned i = 1, e = ZeroVec->getNumOperands(); i < e; ++i) {
      if (ZeroVec->getOperand(i).getReg() != ZeroVec->getOperand(1).getReg())
        return false; // This wasn't an all zeros vector.
    }
diff --git a/test/CodeGen/AArch64/GlobalISel/opt-shuffle-splat.mir b/test/CodeGen/AArch64/GlobalISel/opt-shuffle-splat.mir

index 6c831e13c7f6e3157ab76b16f89f0e1c32c9633c..b62f569dc4da7deb1c49022474b98a4d80311fcd 100644 (file)
--- a/test/CodeGen/AArch64/GlobalISel/opt-shuffle-splat.mir
+++ b/test/CodeGen/AArch64/GlobalISel/opt-shuffle-splat.mir
@@ -120,6 +120,14 @@ body:             |
  
      ; This test is exactly the same as splat_2xf64, except it adds two copies.
      ; These copies shouldn't get in the way of matching the dup pattern.
+    ; CHECK-LABEL: name: splat_2xf64_copies
+    ; CHECK: liveins: $d0
+    ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0
+    ; CHECK: [[DEF:%[0-9]+]]:fpr128 = IMPLICIT_DEF
+    ; CHECK: [[INSERT_SUBREG:%[0-9]+]]:fpr128 = INSERT_SUBREG [[DEF]], [[COPY]], %subreg.dsub
+    ; CHECK: [[DUPv2i64lane:%[0-9]+]]:fpr128 = DUPv2i64lane [[INSERT_SUBREG]], 0
+    ; CHECK: $q0 = COPY [[DUPv2i64lane]]
+    ; CHECK: RET_ReallyLR implicit $q0
      %0:fpr(s64) = COPY $d0
      %2:fpr(<2 x s64>) = G_IMPLICIT_DEF
      %6:fpr(<2 x s64>) = COPY %2
@@ -130,3 +138,35 @@ body:             |
      %4:fpr(<2 x s64>) = G_SHUFFLE_VECTOR %7(<2 x s64>), %2, %5(<2 x s32>)
      $q0 = COPY %4(<2 x s64>)
      RET_ReallyLR implicit $q0
+
+...
+---
+name:            not_all_zeros
+alignment:       2
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+body:             |
+  bb.1.entry:
+    liveins: $x0
+    ; Make sure that we don't do the optimization when it's not all zeroes.
+    ; CHECK-LABEL: name: not_all_zeros
+    ; CHECK: liveins: $x0
+    ; CHECK: [[COPY:%[0-9]+]]:gpr64 = COPY $x0
+    ; CHECK: [[DEF:%[0-9]+]]:fpr128 = IMPLICIT_DEF
+    ; CHECK: [[INSvi64gpr:%[0-9]+]]:fpr128 = INSvi64gpr [[DEF]], 0, [[COPY]]
+    ; CHECK: [[ADRP:%[0-9]+]]:gpr64common = ADRP target-flags(aarch64-page) %const.0
+    ; CHECK: [[LDRQui:%[0-9]+]]:fpr128 = LDRQui [[ADRP]], target-flags(aarch64-pageoff, aarch64-nc) %const.0
+    ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:qq = REG_SEQUENCE [[INSvi64gpr]], %subreg.qsub0, [[DEF]], %subreg.qsub1
+    ; CHECK: [[TBLv16i8Two:%[0-9]+]]:fpr128 = TBLv16i8Two [[REG_SEQUENCE]], [[LDRQui]]
+    ; CHECK: $q0 = COPY [[TBLv16i8Two]]
+    ; CHECK: RET_ReallyLR implicit $q0
+    %0:gpr(s64) = COPY $x0
+    %2:fpr(<2 x s64>) = G_IMPLICIT_DEF
+    %3:gpr(s32) = G_CONSTANT i32 0
+    %6:gpr(s32) = G_CONSTANT i32 1
+    %5:fpr(<2 x s32>) = G_BUILD_VECTOR %3(s32), %6(s32)
+    %1:fpr(<2 x s64>) = G_INSERT_VECTOR_ELT %2, %0(s64), %3(s32)
+    %4:fpr(<2 x s64>) = G_SHUFFLE_VECTOR %1(<2 x s64>), %2, %5(<2 x s32>)
+    $q0 = COPY %4(<2 x s64>)
+    RET_ReallyLR implicit $q0
author	Jessica Paquette <jpaquette@apple.com>
	Wed, 24 Jul 2019 17:18:51 +0000 (17:18 +0000)
committer	Jessica Paquette <jpaquette@apple.com>
	Wed, 24 Jul 2019 17:18:51 +0000 (17:18 +0000)
lib/Target/AArch64/AArch64InstructionSelector.cpp		patch \| blob \| history
test/CodeGen/AArch64/GlobalISel/opt-shuffle-splat.mir		patch \| blob \| history