From fe4ad6d3ead205674c6c59f469a6ca6985ddb28b Mon Sep 17 00:00:00 2001 From: Nemanja Ivanovic Date: Mon, 18 Jul 2016 15:30:00 +0000 Subject: [PATCH] [PowerPC] Remove redundant direct moves when extracting integers and converting to FP This patch corresponds to review: https://reviews.llvm.org/D21354 We use direct moves for extracting integer elements from vectors. We also use direct moves when converting integers to FP. When these operations are chained, we get a direct move out of a VSR followed by a direct move back into a VSR. These are redundant - all we need to do is line up the element and convert. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@275796 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/PowerPC/PPCInstrVSX.td | 43 +++++++ .../CodeGen/PowerPC/remove-redundant-moves.ll | 107 ++++++++++++++++++ 2 files changed, 150 insertions(+) create mode 100644 test/CodeGen/PowerPC/remove-redundant-moves.ll diff --git a/lib/Target/PowerPC/PPCInstrVSX.td b/lib/Target/PowerPC/PPCInstrVSX.td index 40b7ae38530..a02ace00a76 100644 --- a/lib/Target/PowerPC/PPCInstrVSX.td +++ b/lib/Target/PowerPC/PPCInstrVSX.td @@ -1029,6 +1029,28 @@ def : Pat<(int_ppc_vsx_xvrsqrtesp v4f32:$A), def : Pat<(int_ppc_vsx_xvrsqrtedp v2f64:$A), (XVRSQRTEDP $A)>; +let Predicates = [IsLittleEndian] in { +def : Pat<(f64 (PPCfcfid (PPCmtvsra (i64 (vector_extract v2i64:$S, 0))))), + (f64 (XSCVSXDDP (COPY_TO_REGCLASS (XXPERMDI $S, $S, 2), VSFRC)))>; +def : Pat<(f64 (PPCfcfid (PPCmtvsra (i64 (vector_extract v2i64:$S, 1))))), + (f64 (XSCVSXDDP (COPY_TO_REGCLASS (f64 (COPY_TO_REGCLASS $S, VSRC)), VSFRC)))>; +def : Pat<(f64 (PPCfcfidu (PPCmtvsra (i64 (vector_extract v2i64:$S, 0))))), + (f64 (XSCVUXDDP (COPY_TO_REGCLASS (XXPERMDI $S, $S, 2), VSFRC)))>; +def : Pat<(f64 (PPCfcfidu (PPCmtvsra (i64 (vector_extract v2i64:$S, 1))))), + (f64 (XSCVUXDDP (COPY_TO_REGCLASS (f64 (COPY_TO_REGCLASS $S, VSRC)), VSFRC)))>; +} // IsLittleEndian + +let Predicates = [IsBigEndian] in { +def : Pat<(f64 (PPCfcfid (PPCmtvsra (i64 (vector_extract v2i64:$S, 0))))), + (f64 (XSCVSXDDP (COPY_TO_REGCLASS $S, VSFRC)))>; +def : Pat<(f64 (PPCfcfid (PPCmtvsra (i64 (vector_extract v2i64:$S, 1))))), + (f64 (XSCVSXDDP (COPY_TO_REGCLASS (XXPERMDI $S, $S, 2), VSFRC)))>; +def : Pat<(f64 (PPCfcfidu (PPCmtvsra (i64 (vector_extract v2i64:$S, 0))))), + (f64 (XSCVUXDDP (COPY_TO_REGCLASS $S, VSFRC)))>; +def : Pat<(f64 (PPCfcfidu (PPCmtvsra (i64 (vector_extract v2i64:$S, 1))))), + (f64 (XSCVUXDDP (COPY_TO_REGCLASS (XXPERMDI $S, $S, 2), VSFRC)))>; +} // IsBigEndian + } // AddedComplexity } // HasVSX @@ -1235,6 +1257,27 @@ let AddedComplexity = 400 in { // Prefer VSX patterns over non-VSX patterns. def XSCVSPDPN : XX2Form<60, 331, (outs vssrc:$XT), (ins vsrc:$XB), "xscvspdpn $XT, $XB", IIC_VecFP, []>; + let Predicates = [IsLittleEndian] in { + def : Pat<(f32 (PPCfcfids (PPCmtvsra (i64 (vector_extract v2i64:$S, 0))))), + (f32 (XSCVSXDSP (COPY_TO_REGCLASS (XXPERMDI $S, $S, 2), VSFRC)))>; + def : Pat<(f32 (PPCfcfids (PPCmtvsra (i64 (vector_extract v2i64:$S, 1))))), + (f32 (XSCVSXDSP (COPY_TO_REGCLASS (f64 (COPY_TO_REGCLASS $S, VSRC)), VSFRC)))>; + def : Pat<(f32 (PPCfcfidus (PPCmtvsra (i64 (vector_extract v2i64:$S, 0))))), + (f32 (XSCVUXDSP (COPY_TO_REGCLASS (XXPERMDI $S, $S, 2), VSFRC)))>; + def : Pat<(f32 (PPCfcfidus (PPCmtvsra (i64 (vector_extract v2i64:$S, 1))))), + (f32 (XSCVUXDSP (COPY_TO_REGCLASS (f64 (COPY_TO_REGCLASS $S, VSRC)), VSFRC)))>; + } + + let Predicates = [IsBigEndian] in { + def : Pat<(f32 (PPCfcfids (PPCmtvsra (i64 (vector_extract v2i64:$S, 0))))), + (f32 (XSCVSXDSP (COPY_TO_REGCLASS $S, VSFRC)))>; + def : Pat<(f32 (PPCfcfids (PPCmtvsra (i64 (vector_extract v2i64:$S, 1))))), + (f32 (XSCVSXDSP (COPY_TO_REGCLASS (XXPERMDI $S, $S, 2), VSFRC)))>; + def : Pat<(f32 (PPCfcfidus (PPCmtvsra (i64 (vector_extract v2i64:$S, 0))))), + (f32 (XSCVUXDSP (COPY_TO_REGCLASS $S, VSFRC)))>; + def : Pat<(f32 (PPCfcfidus (PPCmtvsra (i64 (vector_extract v2i64:$S, 1))))), + (f32 (XSCVUXDSP (COPY_TO_REGCLASS (XXPERMDI $S, $S, 2), VSFRC)))>; + } } // AddedComplexity = 400 } // HasP8Vector diff --git a/test/CodeGen/PowerPC/remove-redundant-moves.ll b/test/CodeGen/PowerPC/remove-redundant-moves.ll new file mode 100644 index 00000000000..6b845cbf380 --- /dev/null +++ b/test/CodeGen/PowerPC/remove-redundant-moves.ll @@ -0,0 +1,107 @@ +; RUN: llc -mcpu=pwr8 -mtriple=powerpc64le-unknown-linux-gnu \ +; RUN: -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mcpu=pwr8 -mtriple=powerpc64-unknown-linux-gnu \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefix=CHECK-BE +define double @test1(<2 x i64> %a) { +entry: +; CHECK-LABEL: test1 +; CHECK: xxswapd [[SW:[0-9]+]], 34 +; CHECK: xscvsxddp 1, [[SW]] +; CHECK-BE-LABEL: test1 +; CHECK-BE: xxlor [[CP:[0-9]+]], 34, 34 +; CHECK-BE: xscvsxddp 1, [[CP]] + %0 = extractelement <2 x i64> %a, i32 0 + %1 = sitofp i64 %0 to double + ret double %1 +} + +define double @test2(<2 x i64> %a) { +entry: +; CHECK-LABEL: test2 +; CHECK: xxlor [[CP:[0-9]+]], 34, 34 +; CHECK: xscvsxddp 1, [[CP]] +; CHECK-BE-LABEL: test2 +; CHECK-BE: xxswapd [[SW:[0-9]+]], 34 +; CHECK-BE: xscvsxddp 1, [[SW]] + %0 = extractelement <2 x i64> %a, i32 1 + %1 = sitofp i64 %0 to double + ret double %1 +} + +define float @test1f(<2 x i64> %a) { +entry: +; CHECK-LABEL: test1f +; CHECK: xxswapd [[SW:[0-9]+]], 34 +; CHECK: xscvsxdsp 1, [[SW]] +; CHECK-BE-LABEL: test1f +; CHECK-BE: xxlor [[CP:[0-9]+]], 34, 34 +; CHECK-BE: xscvsxdsp 1, [[CP]] + %0 = extractelement <2 x i64> %a, i32 0 + %1 = sitofp i64 %0 to float + ret float %1 +} + +define float @test2f(<2 x i64> %a) { +entry: +; CHECK-LABEL: test2f +; CHECK: xxlor [[CP:[0-9]+]], 34, 34 +; CHECK: xscvsxdsp 1, [[CP]] +; CHECK-BE-LABEL: test2f +; CHECK-BE: xxswapd [[SW:[0-9]+]], 34 +; CHECK-BE: xscvsxdsp 1, [[SW]] + %0 = extractelement <2 x i64> %a, i32 1 + %1 = sitofp i64 %0 to float + ret float %1 +} + +define double @test1u(<2 x i64> %a) { +entry: +; CHECK-LABEL: test1u +; CHECK: xxswapd [[SW:[0-9]+]], 34 +; CHECK: xscvuxddp 1, [[SW]] +; CHECK-BE-LABEL: test1u +; CHECK-BE: xxlor [[CP:[0-9]+]], 34, 34 +; CHECK-BE: xscvuxddp 1, [[CP]] + %0 = extractelement <2 x i64> %a, i32 0 + %1 = uitofp i64 %0 to double + ret double %1 +} + +define double @test2u(<2 x i64> %a) { +entry: +; CHECK-LABEL: test2u +; CHECK: xxlor [[CP:[0-9]+]], 34, 34 +; CHECK: xscvuxddp 1, [[CP]] +; CHECK-BE-LABEL: test2u +; CHECK-BE: xxswapd [[SW:[0-9]+]], 34 +; CHECK-BE: xscvuxddp 1, [[SW]] + %0 = extractelement <2 x i64> %a, i32 1 + %1 = uitofp i64 %0 to double + ret double %1 +} + +define float @test1fu(<2 x i64> %a) { +entry: +; CHECK-LABEL: test1fu +; CHECK: xxswapd [[SW:[0-9]+]], 34 +; CHECK: xscvuxdsp 1, [[SW]] +; CHECK-BE-LABEL: test1fu +; CHECK-BE: xxlor [[CP:[0-9]+]], 34, 34 +; CHECK-BE: xscvuxdsp 1, [[CP]] + %0 = extractelement <2 x i64> %a, i32 0 + %1 = uitofp i64 %0 to float + ret float %1 +} + +define float @test2fu(<2 x i64> %a) { +entry: +; CHECK-LABEL: test2fu +; CHECK: xxlor [[CP:[0-9]+]], 34, 34 +; CHECK: xscvuxdsp 1, [[CP]] +; CHECK-BE-LABEL: test2fu +; CHECK-BE: xxswapd [[SW:[0-9]+]], 34 +; CHECK-BE: xscvuxdsp 1, [[SW]] + %0 = extractelement <2 x i64> %a, i32 1 + %1 = uitofp i64 %0 to float + ret float %1 +} -- 2.40.0