[AArch64] Split the neon.addp intrinsic into integer and fp variants.

author Amara Emerson <aemerson@apple.com>

Thu, 21 Mar 2019 22:31:37 +0000 (22:31 +0000)

committer Amara Emerson <aemerson@apple.com>

Thu, 21 Mar 2019 22:31:37 +0000 (22:31 +0000)
author Amara Emerson <aemerson@apple.com>
Thu, 21 Mar 2019 22:31:37 +0000 (22:31 +0000)
committer Amara Emerson <aemerson@apple.com>
Thu, 21 Mar 2019 22:31:37 +0000 (22:31 +0000)
diff --git a/include/llvm/IR/IntrinsicsAArch64.td b/include/llvm/IR/IntrinsicsAArch64.td

index 3abeff4bb71cb66efc867dbf958bca874067b56b..b542faaed53ffaa2cd5dc2be5d06a489b18a6dc9 100644 (file)
--- a/include/llvm/IR/IntrinsicsAArch64.td
+++ b/include/llvm/IR/IntrinsicsAArch64.td
@@ -289,6 +289,7 @@ let TargetPrefix = "aarch64", IntrProperties = [IntrNoMem] in {
  
    // Pairwise Add
    def int_aarch64_neon_addp : AdvSIMD_2VectorArg_Intrinsic;
+  def int_aarch64_neon_faddp : AdvSIMD_2VectorArg_Intrinsic;
  
    // Long Pairwise Add
    // FIXME: In theory, we shouldn't need intrinsics for saddlp or
diff --git a/lib/IR/AutoUpgrade.cpp b/lib/IR/AutoUpgrade.cpp

index a7d7c2dbc64933cd613c6e0d347f2bd79f7afb29..2978d6af916eb3361bb1ed1b7c43f78e536b103d 100644 (file)
--- a/lib/IR/AutoUpgrade.cpp
+++ b/lib/IR/AutoUpgrade.cpp
@@ -568,6 +568,17 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
        NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::thread_pointer);
        return true;
      }
+    if (Name.startswith("aarch64.neon.addp")) {
+      if (F->arg_size() != 2)
+        break; // Invalid IR.
+      auto fArgs = F->getFunctionType()->params();
+      VectorType *ArgTy = dyn_cast<VectorType>(fArgs[0]);
+      if (ArgTy && ArgTy->getElementType()->isFloatingPointTy()) {
+        NewFn = Intrinsic::getDeclaration(F->getParent(),
+                                          Intrinsic::aarch64_neon_faddp, fArgs);
+        return true;
+      }
+    }
      break;
    }
  
diff --git a/lib/Target/AArch64/AArch64InstrInfo.td b/lib/Target/AArch64/AArch64InstrInfo.td

index acfec340563ce542a64b99471f3620ffbbf7a31b..fc9987fe0da56bc9d1d870799571ef7c7204238b 100644 (file)
--- a/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/lib/Target/AArch64/AArch64InstrInfo.td
@@ -3499,7 +3499,7 @@ def : Pat<(fabs (fsub VT:$Rn, VT:$Rm)), (!cast<Instruction>("FABD"#VT) VT:$Rn, V
  }
  defm FACGE   : SIMDThreeSameVectorFPCmp<1,0,0b101,"facge",int_aarch64_neon_facge>;
  defm FACGT   : SIMDThreeSameVectorFPCmp<1,1,0b101,"facgt",int_aarch64_neon_facgt>;
-defm FADDP   : SIMDThreeSameVectorFP<1,0,0b010,"faddp",int_aarch64_neon_addp>;
+defm FADDP   : SIMDThreeSameVectorFP<1,0,0b010,"faddp",int_aarch64_neon_faddp>;
  defm FADD    : SIMDThreeSameVectorFP<0,0,0b010,"fadd", fadd>;
  defm FCMEQ   : SIMDThreeSameVectorFPCmp<0, 0, 0b100, "fcmeq", AArch64fcmeq>;
  defm FCMGE   : SIMDThreeSameVectorFPCmp<1, 0, 0b100, "fcmge", AArch64fcmge>;
diff --git a/lib/Target/AArch64/AArch64LegalizerInfo.cpp b/lib/Target/AArch64/AArch64LegalizerInfo.cpp

index a51d507de9f4822af289ff68ac00b8ab51992c70..0a1b436dbac68833ad2a81c16f3e577267fb5389 100644 (file)
--- a/lib/Target/AArch64/AArch64LegalizerInfo.cpp
+++ b/lib/Target/AArch64/AArch64LegalizerInfo.cpp
@@ -64,11 +64,6 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) {
          return std::make_pair(0, EltTy);
        });
  
-  // HACK: Check that the intrinsic isn't ambiguous.
-  // (See: https://bugs.llvm.org/show_bug.cgi?id=40968)
-  getActionDefinitionsBuilder(G_INTRINSIC)
-    .custom();
-
    getActionDefinitionsBuilder(G_PHI)
        .legalFor({p0, s16, s32, s64})
        .clampScalar(0, s16, s64)
@@ -517,30 +512,11 @@ bool AArch64LegalizerInfo::legalizeCustom(MachineInstr &MI,
      return false;
    case TargetOpcode::G_VAARG:
      return legalizeVaArg(MI, MRI, MIRBuilder);
-  case TargetOpcode::G_INTRINSIC:
-    return legalizeIntrinsic(MI, MRI, MIRBuilder);
    }
  
    llvm_unreachable("expected switch to return");
  }
  
-bool AArch64LegalizerInfo::legalizeIntrinsic(
-    MachineInstr &MI, MachineRegisterInfo &MRI,
-    MachineIRBuilder &MIRBuilder) const {
-  // HACK: Don't allow faddp/addp for now. We don't pass down the type info
-  // necessary to get this right today.
-  //
-  // It looks like addp/faddp is the only intrinsic that's impacted by this.
-  // All other intrinsics fully describe the required types in their names.
-  //
-  // (See: https://bugs.llvm.org/show_bug.cgi?id=40968)
-  const MachineOperand &IntrinOp = MI.getOperand(1);
-  if (IntrinOp.isIntrinsicID() &&
-      IntrinOp.getIntrinsicID() == Intrinsic::aarch64_neon_addp)
-    return false;
-  return true;
-}
-
  bool AArch64LegalizerInfo::legalizeVaArg(MachineInstr &MI,
                                           MachineRegisterInfo &MRI,
                                           MachineIRBuilder &MIRBuilder) const {
diff --git a/lib/Target/AArch64/AArch64LegalizerInfo.h b/lib/Target/AArch64/AArch64LegalizerInfo.h

index 9472a4e9c933f06c3b6d2815a30186e88a643f67..c5979d8bbe5826b4b4842ad261230ed319eba5eb 100644 (file)
--- a/lib/Target/AArch64/AArch64LegalizerInfo.h
+++ b/lib/Target/AArch64/AArch64LegalizerInfo.h
@@ -34,9 +34,6 @@ public:
  private:
    bool legalizeVaArg(MachineInstr &MI, MachineRegisterInfo &MRI,
                       MachineIRBuilder &MIRBuilder) const;
-
-  bool legalizeIntrinsic(MachineInstr &MI, MachineRegisterInfo &MRI,
-                         MachineIRBuilder &MIRBuilder) const;
  };
  } // End llvm namespace.
  #endif
diff --git a/test/CodeGen/AArch64/GlobalISel/fallback-ambiguous-addp-intrinsic.mir b/test/CodeGen/AArch64/GlobalISel/fallback-ambiguous-addp-intrinsic.mir

deleted file mode 100644 (file)

index e0a7f71..0000000
--- a/test/CodeGen/AArch64/GlobalISel/fallback-ambiguous-addp-intrinsic.mir
+++ /dev/null
@@ -1,32 +0,0 @@
-# RUN: llc -mtriple aarch64-unknown-unknown -O0 -start-before=legalizer -pass-remarks-missed=gisel* %s -o - 2>&1 | FileCheck %s
-#
-# Check that we fall back on @llvm.aarch64.neon.addp and ensure that we get the
-# correct instruction.
-# https://bugs.llvm.org/show_bug.cgi?id=40968
-
---- |
-  define <2 x float> @foo(<2 x float> %v1, <2 x float> %v2) {
-  entry:
-    %v3 = call <2 x float> @llvm.aarch64.neon.addp.v2f32(<2 x float> %v1, <2 x float> %v2)
-    ret <2 x float> %v3
-  }
-  declare <2 x float> @llvm.aarch64.neon.addp.v2f32(<2 x float>, <2 x float>)
-...
----
-name:            foo
-alignment:       2
-tracksRegLiveness: true
-body:             |
-  bb.1.entry:
-    liveins: $d0, $d1
-    ; CHECK: remark:
-    ; CHECK-SAME: unable to legalize instruction: %2:_(<2 x s32>) = G_INTRINSIC intrinsic(@llvm.aarch64.neon.addp), %0:_(<2 x s32>), %1:_(<2 x s32>)
-    ; CHECK: faddp
-    ; CHECK-NOT: addp
-    %0:_(<2 x s32>) = COPY $d0
-    %1:_(<2 x s32>) = COPY $d1
-    %2:_(<2 x s32>) = G_INTRINSIC intrinsic(@llvm.aarch64.neon.addp), %0(<2 x s32>), %1(<2 x s32>)
-    $d0 = COPY %2(<2 x s32>)
-    RET_ReallyLR implicit $d0
-
-...
diff --git a/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir b/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir

index 06cc39187e7159e9ae827d0d37281a0289c7bf53..6b3211e673638fabcd616178ad1167f400dc2cd6 100644 (file)
--- a/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir
+++ b/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir
@@ -151,7 +151,7 @@
  # DEBUG:      .. the first uncovered type index: 1, OK
  #
  # DEBUG-NEXT: G_INTRINSIC (opcode {{[0-9]+}}): 0 type indices
-# DEBUG:      .. type index coverage check SKIPPED: user-defined predicate detected
+# DEBUG:      .. type index coverage check SKIPPED: no rules defined
  #
  # DEBUG-NEXT: G_INTRINSIC_W_SIDE_EFFECTS (opcode {{[0-9]+}}): 0 type indices
  # DEBUG:      .. type index coverage check SKIPPED: no rules defined
diff --git a/test/CodeGen/AArch64/arm64-neon-add-pairwise.ll b/test/CodeGen/AArch64/arm64-neon-add-pairwise.ll

index d3dc1b8d010fe89b90ce3be4cf1412045957e6b3..10e5e8bd45aef65e64d47e539604646c68d60468 100644 (file)
--- a/test/CodeGen/AArch64/arm64-neon-add-pairwise.ll
+++ b/test/CodeGen/AArch64/arm64-neon-add-pairwise.ll
@@ -65,27 +65,27 @@ define <2 x i64> @test_addp_v2i64(<2 x i64> %lhs, <2 x i64> %rhs) {
          ret <2 x i64> %val
  }
  
-declare <2 x float> @llvm.aarch64.neon.addp.v2f32(<2 x float>, <2 x float>)
-declare <4 x float> @llvm.aarch64.neon.addp.v4f32(<4 x float>, <4 x float>)
-declare <2 x double> @llvm.aarch64.neon.addp.v2f64(<2 x double>, <2 x double>)
+declare <2 x float> @llvm.aarch64.neon.faddp.v2f32(<2 x float>, <2 x float>)
+declare <4 x float> @llvm.aarch64.neon.faddp.v4f32(<4 x float>, <4 x float>)
+declare <2 x double> @llvm.aarch64.neon.faddp.v2f64(<2 x double>, <2 x double>)
  
  define <2 x float> @test_faddp_v2f32(<2 x float> %lhs, <2 x float> %rhs) {
  ; CHECK: test_faddp_v2f32:
-        %val = call <2 x float> @llvm.aarch64.neon.addp.v2f32(<2 x float> %lhs, <2 x float> %rhs)
+        %val = call <2 x float> @llvm.aarch64.neon.faddp.v2f32(<2 x float> %lhs, <2 x float> %rhs)
  ; CHECK: faddp v0.2s, v0.2s, v1.2s
          ret <2 x float> %val
  }
  
  define <4 x float> @test_faddp_v4f32(<4 x float> %lhs, <4 x float> %rhs) {
  ; CHECK: test_faddp_v4f32:
-        %val = call <4 x float> @llvm.aarch64.neon.addp.v4f32(<4 x float> %lhs, <4 x float> %rhs)
+        %val = call <4 x float> @llvm.aarch64.neon.faddp.v4f32(<4 x float> %lhs, <4 x float> %rhs)
  ; CHECK: faddp v0.4s, v0.4s, v1.4s
          ret <4 x float> %val
  }
  
  define <2 x double> @test_faddp_v2f64(<2 x double> %lhs, <2 x double> %rhs) {
  ; CHECK: test_faddp_v2f64:
-        %val = call <2 x double> @llvm.aarch64.neon.addp.v2f64(<2 x double> %lhs, <2 x double> %rhs)
+        %val = call <2 x double> @llvm.aarch64.neon.faddp.v2f64(<2 x double> %lhs, <2 x double> %rhs)
  ; CHECK: faddp v0.2d, v0.2d, v1.2d
          ret <2 x double> %val
  }
diff --git a/test/CodeGen/AArch64/arm64-vadd.ll b/test/CodeGen/AArch64/arm64-vadd.ll

index a244a0714c03995a5f9cfeb21ec70befc54d2e9e..d2f21acedd2cd387be8dc5b0852b6a670c0ef27c 100644 (file)
--- a/test/CodeGen/AArch64/arm64-vadd.ll
+++ b/test/CodeGen/AArch64/arm64-vadd.ll
@@ -712,7 +712,7 @@ define <2 x float> @faddp_2s(<2 x float>* %A, <2 x float>* %B) nounwind {
  ;CHECK: faddp.2s
          %tmp1 = load <2 x float>, <2 x float>* %A
          %tmp2 = load <2 x float>, <2 x float>* %B
-        %tmp3 = call <2 x float> @llvm.aarch64.neon.addp.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
+        %tmp3 = call <2 x float> @llvm.aarch64.neon.faddp.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
          ret <2 x float> %tmp3
  }
  
@@ -721,7 +721,7 @@ define <4 x float> @faddp_4s(<4 x float>* %A, <4 x float>* %B) nounwind {
  ;CHECK: faddp.4s
          %tmp1 = load <4 x float>, <4 x float>* %A
          %tmp2 = load <4 x float>, <4 x float>* %B
-        %tmp3 = call <4 x float> @llvm.aarch64.neon.addp.v4f32(<4 x float> %tmp1, <4 x float> %tmp2)
+        %tmp3 = call <4 x float> @llvm.aarch64.neon.faddp.v4f32(<4 x float> %tmp1, <4 x float> %tmp2)
          ret <4 x float> %tmp3
  }
  
@@ -730,13 +730,13 @@ define <2 x double> @faddp_2d(<2 x double>* %A, <2 x double>* %B) nounwind {
  ;CHECK: faddp.2d
          %tmp1 = load <2 x double>, <2 x double>* %A
          %tmp2 = load <2 x double>, <2 x double>* %B
-        %tmp3 = call <2 x double> @llvm.aarch64.neon.addp.v2f64(<2 x double> %tmp1, <2 x double> %tmp2)
+        %tmp3 = call <2 x double> @llvm.aarch64.neon.faddp.v2f64(<2 x double> %tmp1, <2 x double> %tmp2)
          ret <2 x double> %tmp3
  }
  
-declare <2 x float> @llvm.aarch64.neon.addp.v2f32(<2 x float>, <2 x float>) nounwind readnone
-declare <4 x float> @llvm.aarch64.neon.addp.v4f32(<4 x float>, <4 x float>) nounwind readnone
-declare <2 x double> @llvm.aarch64.neon.addp.v2f64(<2 x double>, <2 x double>) nounwind readnone
+declare <2 x float> @llvm.aarch64.neon.faddp.v2f32(<2 x float>, <2 x float>) nounwind readnone
+declare <4 x float> @llvm.aarch64.neon.faddp.v4f32(<4 x float>, <4 x float>) nounwind readnone
+declare <2 x double> @llvm.aarch64.neon.faddp.v2f64(<2 x double>, <2 x double>) nounwind readnone
  
  define <2 x i64> @uaddl_duprhs(<4 x i32> %lhs, i32 %rhs) {
  ; CHECK-LABEL: uaddl_duprhs
diff --git a/test/CodeGen/AArch64/autoupgrade-aarch64-neon-addp-float.ll b/test/CodeGen/AArch64/autoupgrade-aarch64-neon-addp-float.ll

new file mode 100644 (file)

index 0000000..d53a25c
--- /dev/null
+++ b/test/CodeGen/AArch64/autoupgrade-aarch64-neon-addp-float.ll
@@ -0,0 +1,9 @@
+; RUN: opt -S < %s -mtriple=arm64 | FileCheck %s
+declare <4 x float> @llvm.aarch64.neon.addp.v4f32(<4 x float>, <4 x float>)
+
+; CHECK: call <4 x float> @llvm.aarch64.neon.faddp.v4f32
+define <4 x float> @upgrade_aarch64_neon_addp_float(<4 x float> %a, <4 x float> %b) {
+  %res = call <4 x float> @llvm.aarch64.neon.addp.v4f32(<4 x float> %a, <4 x float> %b)
+  ret <4 x float> %res
+}
+
author	Amara Emerson <aemerson@apple.com>
	Thu, 21 Mar 2019 22:31:37 +0000 (22:31 +0000)
committer	Amara Emerson <aemerson@apple.com>
	Thu, 21 Mar 2019 22:31:37 +0000 (22:31 +0000)
include/llvm/IR/IntrinsicsAArch64.td		patch \| blob \| history
lib/IR/AutoUpgrade.cpp		patch \| blob \| history
lib/Target/AArch64/AArch64InstrInfo.td		patch \| blob \| history
lib/Target/AArch64/AArch64LegalizerInfo.cpp		patch \| blob \| history
lib/Target/AArch64/AArch64LegalizerInfo.h		patch \| blob \| history
test/CodeGen/AArch64/GlobalISel/fallback-ambiguous-addp-intrinsic.mir	[deleted file]	patch \| blob \| history
test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir		patch \| blob \| history
test/CodeGen/AArch64/arm64-neon-add-pairwise.ll		patch \| blob \| history
test/CodeGen/AArch64/arm64-vadd.ll		patch \| blob \| history
test/CodeGen/AArch64/autoupgrade-aarch64-neon-addp-float.ll	[new file with mode: 0644]	patch \| blob