[AArch64] Skip isZIPMask check for masks with an odd number of elements.

author Florian Hahn <flo@fhahn.com>

Mon, 5 Aug 2019 11:12:23 +0000 (11:12 +0000)

committer Florian Hahn <flo@fhahn.com>

Mon, 5 Aug 2019 11:12:23 +0000 (11:12 +0000)
author Florian Hahn <flo@fhahn.com>
Mon, 5 Aug 2019 11:12:23 +0000 (11:12 +0000)
committer Florian Hahn <flo@fhahn.com>
Mon, 5 Aug 2019 11:12:23 +0000 (11:12 +0000)
diff --git a/lib/Target/AArch64/AArch64ISelLowering.cpp b/lib/Target/AArch64/AArch64ISelLowering.cpp

index 6a7fdd4f662a5593bd52d5eff7f1c0bdfd718587..d8c12eb9a05e97e8b8477948ed7cdc34220776ba 100644 (file)
--- a/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -6312,6 +6312,8 @@ static bool isREVMask(ArrayRef<int> M, EVT VT, unsigned BlockSize) {
  
  static bool isZIPMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
    unsigned NumElts = VT.getVectorNumElements();
+  if (NumElts % 2 != 0)
+    return false;
    WhichResult = (M[0] == 0 ? 0 : 1);
    unsigned Idx = WhichResult * NumElts / 2;
    for (unsigned i = 0; i != NumElts; i += 2) {
diff --git a/test/CodeGen/AArch64/arm64-neon-vector-shuffle-extract.ll b/test/CodeGen/AArch64/arm64-neon-vector-shuffle-extract.ll

index 7ed0e59e23cef82ad429ca3e08e762d215649bb5..2be8b014ebbeefb79f98a053c7d0345341b7d7bf 100644 (file)
--- a/test/CodeGen/AArch64/arm64-neon-vector-shuffle-extract.ll
+++ b/test/CodeGen/AArch64/arm64-neon-vector-shuffle-extract.ll
@@ -31,3 +31,29 @@ define <4 x i32> @widen_shuffles_reduced(<3 x i32> %x, <3 x i32> %y) {
    %s3 = shufflevector <3 x i32> %y, <3 x i32> %x, <4 x i32> <i32 1, i32 4, i32 3, i32 0>
    ret <4 x i32> %s3
  }
+
+define void @zip_mask_check(<3 x float>* %p1, <3 x float>* %p2, i32* %p3) {
+; CHECK-LABEL: zip_mask_check:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldr q0, [x0]
+; CHECK-NEXT:    ldr d1, [x1]
+; CHECK-NEXT:    trn2 v0.4s, v0.4s, v1.4s
+; CHECK-NEXT:    fmla v0.4s, v0.4s, v0.4s
+; CHECK-NEXT:    fmla v0.4s, v0.4s, v0.4s
+; CHECK-NEXT:    str s0, [x2]
+; CHECK-NEXT:    ret
+  %tmp3 = load <3 x float>, <3 x float>* %p1, align 16
+  %tmp4 = load <3 x float>, <3 x float>* %p2, align 4
+  %tmp5 = shufflevector <3 x float> %tmp3, <3 x float> %tmp4, <4 x i32> <i32 1, i32 4, i32 undef, i32 undef>
+  %tmp6 = shufflevector <4 x float> %tmp5, <4 x float> undef, <4 x i32> <i32 0, i32 1, i32 5, i32 undef>
+  %tmp7 = shufflevector <4 x float> %tmp6, <4 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 5>
+  %tmp8 = call <4 x float> @llvm.fma.v4f32(<4 x float> %tmp7, <4 x float> undef, <4 x float> undef)
+  %tmp9 = call <4 x float> @llvm.fma.v4f32(<4 x float> undef, <4 x float> undef, <4 x float> %tmp8)
+  %tmp10 = shufflevector <4 x float> %tmp9, <4 x float> undef, <16 x i32> <i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+  %tmp11 = bitcast <16 x float> %tmp10 to <16 x i32>
+  %tmp12 = extractelement <16 x i32> %tmp11, i32 0
+  store i32 %tmp12, i32* %p3, align 4
+  ret void
+}
+
+declare <4 x float> @llvm.fma.v4f32(<4 x float>, <4 x float>, <4 x float>) #1
author	Florian Hahn <flo@fhahn.com>
	Mon, 5 Aug 2019 11:12:23 +0000 (11:12 +0000)
committer	Florian Hahn <flo@fhahn.com>
	Mon, 5 Aug 2019 11:12:23 +0000 (11:12 +0000)
lib/Target/AArch64/AArch64ISelLowering.cpp		patch \| blob \| history
test/CodeGen/AArch64/arm64-neon-vector-shuffle-extract.ll		patch \| blob \| history