From: Florian Hahn Date: Mon, 5 Aug 2019 11:12:23 +0000 (+0000) Subject: [AArch64] Skip isZIPMask check for masks with an odd number of elements. X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=1b9dd625a5fe458fd86e4e2783e98c565ba110ca;p=llvm [AArch64] Skip isZIPMask check for masks with an odd number of elements. We process 2 elements at a time and expect the number of elements to be even. Similar to D60690. Reviewers: dmgreen, samparker, t.p.northover Reviewed By: dmgreen Differential Revision: https://reviews.llvm.org/D65400 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@367831 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/AArch64/AArch64ISelLowering.cpp b/lib/Target/AArch64/AArch64ISelLowering.cpp index 6a7fdd4f662..d8c12eb9a05 100644 --- a/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -6312,6 +6312,8 @@ static bool isREVMask(ArrayRef M, EVT VT, unsigned BlockSize) { static bool isZIPMask(ArrayRef M, EVT VT, unsigned &WhichResult) { unsigned NumElts = VT.getVectorNumElements(); + if (NumElts % 2 != 0) + return false; WhichResult = (M[0] == 0 ? 0 : 1); unsigned Idx = WhichResult * NumElts / 2; for (unsigned i = 0; i != NumElts; i += 2) { diff --git a/test/CodeGen/AArch64/arm64-neon-vector-shuffle-extract.ll b/test/CodeGen/AArch64/arm64-neon-vector-shuffle-extract.ll index 7ed0e59e23c..2be8b014ebb 100644 --- a/test/CodeGen/AArch64/arm64-neon-vector-shuffle-extract.ll +++ b/test/CodeGen/AArch64/arm64-neon-vector-shuffle-extract.ll @@ -31,3 +31,29 @@ define <4 x i32> @widen_shuffles_reduced(<3 x i32> %x, <3 x i32> %y) { %s3 = shufflevector <3 x i32> %y, <3 x i32> %x, <4 x i32> ret <4 x i32> %s3 } + +define void @zip_mask_check(<3 x float>* %p1, <3 x float>* %p2, i32* %p3) { +; CHECK-LABEL: zip_mask_check: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr q0, [x0] +; CHECK-NEXT: ldr d1, [x1] +; CHECK-NEXT: trn2 v0.4s, v0.4s, v1.4s +; CHECK-NEXT: fmla v0.4s, v0.4s, v0.4s +; CHECK-NEXT: fmla v0.4s, v0.4s, v0.4s +; CHECK-NEXT: str s0, [x2] +; CHECK-NEXT: ret + %tmp3 = load <3 x float>, <3 x float>* %p1, align 16 + %tmp4 = load <3 x float>, <3 x float>* %p2, align 4 + %tmp5 = shufflevector <3 x float> %tmp3, <3 x float> %tmp4, <4 x i32> + %tmp6 = shufflevector <4 x float> %tmp5, <4 x float> undef, <4 x i32> + %tmp7 = shufflevector <4 x float> %tmp6, <4 x float> undef, <4 x i32> + %tmp8 = call <4 x float> @llvm.fma.v4f32(<4 x float> %tmp7, <4 x float> undef, <4 x float> undef) + %tmp9 = call <4 x float> @llvm.fma.v4f32(<4 x float> undef, <4 x float> undef, <4 x float> %tmp8) + %tmp10 = shufflevector <4 x float> %tmp9, <4 x float> undef, <16 x i32> + %tmp11 = bitcast <16 x float> %tmp10 to <16 x i32> + %tmp12 = extractelement <16 x i32> %tmp11, i32 0 + store i32 %tmp12, i32* %p3, align 4 + ret void +} + +declare <4 x float> @llvm.fma.v4f32(<4 x float>, <4 x float>, <4 x float>) #1