From 373d814f41b20b4675753894999c3dadead8628b Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Sat, 7 Jan 2017 22:20:23 +0000 Subject: [PATCH] [AVX-512] Add masked forms of the alternate MOVDDUP patterns. I'm not too sure how to get isel to select even all of the unmasked forms, but at least we have a consistent set now. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@291368 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86InstrAVX512.td | 22 +++++++++++++++++ test/CodeGen/X86/vector-shuffle-masked.ll | 30 +++++++++++++++++++++++ 2 files changed, 52 insertions(+) diff --git a/lib/Target/X86/X86InstrAVX512.td b/lib/Target/X86/X86InstrAVX512.td index 908053e1342..c8eebc1b88c 100644 --- a/lib/Target/X86/X86InstrAVX512.td +++ b/lib/Target/X86/X86InstrAVX512.td @@ -8646,6 +8646,28 @@ def : Pat<(v2f64 (X86VBroadcast (loadf64 addr:$src))), (VMOVDDUPZ128rm addr:$src)>; def : Pat<(v2f64 (X86VBroadcast f64:$src)), (VMOVDDUPZ128rr (COPY_TO_REGCLASS FR64X:$src, VR128X))>; + +def : Pat<(vselect (v2i1 VK2WM:$mask), (X86Movddup (loadv2f64 addr:$src)), + (v2f64 VR128X:$src0)), + (VMOVDDUPZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>; +def : Pat<(vselect (v2i1 VK2WM:$mask), (X86Movddup (loadv2f64 addr:$src)), + (bitconvert (v4i32 immAllZerosV))), + (VMOVDDUPZ128rmkz VK2WM:$mask, addr:$src)>; + +def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast f64:$src)), + (v2f64 VR128X:$src0)), + (VMOVDDUPZ128rrk VR128X:$src0, VK2WM:$mask, + (COPY_TO_REGCLASS FR64X:$src, VR128X))>; +def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast f64:$src)), + (bitconvert (v4i32 immAllZerosV))), + (VMOVDDUPZ128rrkz VK2WM:$mask, (COPY_TO_REGCLASS FR64X:$src, VR128X))>; + +def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast (loadf64 addr:$src))), + (v2f64 VR128X:$src0)), + (VMOVDDUPZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>; +def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast (loadf64 addr:$src))), + (bitconvert (v4i32 immAllZerosV))), + (VMOVDDUPZ128rmkz VK2WM:$mask, addr:$src)>; } //===----------------------------------------------------------------------===// diff --git a/test/CodeGen/X86/vector-shuffle-masked.ll b/test/CodeGen/X86/vector-shuffle-masked.ll index 04d6b373324..d57dc7194e9 100644 --- a/test/CodeGen/X86/vector-shuffle-masked.ll +++ b/test/CodeGen/X86/vector-shuffle-masked.ll @@ -686,3 +686,33 @@ define <2 x double> @mask_cast_extract_v16f32_v2f64_1(<16 x float> %a, <2 x doub %res = select <2 x i1> %mask.extract, <2 x double> %shuffle.cast, <2 x double> %passthru ret <2 x double> %res } + +define <2 x double> @broadcast_v4f32_0101_from_v2f32_mask(double* %x, <2 x double> %passthru, i8 %mask) { +; CHECK-LABEL: broadcast_v4f32_0101_from_v2f32_mask: +; CHECK: # BB#0: +; CHECK-NEXT: kmovb %esi, %k1 +; CHECK-NEXT: vblendmpd (%rdi){1to2}, %xmm0, %xmm0 {%k1} +; CHECK-NEXT: retq + %q = load double, double* %x, align 1 + %vecinit.i = insertelement <2 x double> undef, double %q, i32 0 + %vecinit2.i = insertelement <2 x double> %vecinit.i, double %q, i32 1 + %mask.cast = bitcast i8 %mask to <8 x i1> + %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <2 x i32> + %res = select <2 x i1> %mask.extract, <2 x double> %vecinit2.i, <2 x double> %passthru + ret <2 x double> %res +} + +define <2 x double> @broadcast_v4f32_0101_from_v2f32_maskz(double* %x, i8 %mask) { +; CHECK-LABEL: broadcast_v4f32_0101_from_v2f32_maskz: +; CHECK: # BB#0: +; CHECK-NEXT: kmovb %esi, %k1 +; CHECK-NEXT: vmovddup {{.*#+}} xmm0 {%k1} {z} = mem[0,0] +; CHECK-NEXT: retq + %q = load double, double* %x, align 1 + %vecinit.i = insertelement <2 x double> undef, double %q, i32 0 + %vecinit2.i = insertelement <2 x double> %vecinit.i, double %q, i32 1 + %mask.cast = bitcast i8 %mask to <8 x i1> + %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <2 x i32> + %res = select <2 x i1> %mask.extract, <2 x double> %vecinit2.i, <2 x double> zeroinitializer + ret <2 x double> %res +} -- 2.49.0