From: Ulrich Weigand Date: Thu, 20 Dec 2018 13:05:03 +0000 (+0000) Subject: [SystemZ] Make better use of VLLEZ X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=17177f1518b43142a8615831a6332f345aef011e;p=llvm [SystemZ] Make better use of VLLEZ This patch fixes two deficiencies in current code that recognizes the VLLEZ idiom: - For the floating-point versions, we have ISel patterns that match on a bitconvert as the top node. In more complex cases, that bitconvert may already have been merged into something else. Fix the patterns to match the inner nodes instead. - For the 64-bit integer versions, depending on the surrounding code, we may get either a DAG tree based on JOIN_DWORDS or one based on INSERT_VECTOR_ELT. Use a PatFrags to simply match both variants. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@349749 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/SystemZ/SystemZInstrVector.td b/lib/Target/SystemZ/SystemZInstrVector.td index 8523af7e573..6c97b85277c 100644 --- a/lib/Target/SystemZ/SystemZInstrVector.td +++ b/lib/Target/SystemZ/SystemZInstrVector.td @@ -151,13 +151,13 @@ let Predicates = [FeatureVector] in { def VLLEZH : UnaryVRX<"vllezh", 0xE704, z_vllezi16, v128h, 2, 1>; def VLLEZF : UnaryVRX<"vllezf", 0xE704, z_vllezi32, v128f, 4, 2>; def VLLEZG : UnaryVRX<"vllezg", 0xE704, z_vllezi64, v128g, 8, 3>; - def : Pat<(v4f32 (z_vllezf32 bdxaddr12only:$addr)), + def : Pat<(z_vllezf32 bdxaddr12only:$addr), (VLLEZF bdxaddr12only:$addr)>; - def : Pat<(v2f64 (z_vllezf64 bdxaddr12only:$addr)), + def : Pat<(z_vllezf64 bdxaddr12only:$addr), (VLLEZG bdxaddr12only:$addr)>; let Predicates = [FeatureVectorEnhancements1] in { def VLLEZLF : UnaryVRX<"vllezlf", 0xE704, z_vllezli32, v128f, 4, 6>; - def : Pat<(v4f32 (z_vllezlf32 bdxaddr12only:$addr)), + def : Pat<(z_vllezlf32 bdxaddr12only:$addr), (VLLEZLF bdxaddr12only:$addr)>; } diff --git a/lib/Target/SystemZ/SystemZOperators.td b/lib/Target/SystemZ/SystemZOperators.td index c55a6273f5e..626675bfb70 100644 --- a/lib/Target/SystemZ/SystemZOperators.td +++ b/lib/Target/SystemZ/SystemZOperators.td @@ -745,37 +745,37 @@ class z_vllez def z_vllezi8 : z_vllez; def z_vllezi16 : z_vllez; def z_vllezi32 : z_vllez; -def z_vllezi64 : PatFrag<(ops node:$addr), - (z_join_dwords (i64 (load node:$addr)), (i64 0))>; +def z_vllezi64 : PatFrags<(ops node:$addr), + [(z_vector_insert (z_vzero), + (i64 (load node:$addr)), (i32 0)), + (z_join_dwords (i64 (load node:$addr)), (i64 0))]>; // We use high merges to form a v4f32 from four f32s. Propagating zero // into all elements but index 1 gives this expression. def z_vllezf32 : PatFrag<(ops node:$addr), - (bitconvert - (z_merge_high - (v2i64 - (z_unpackl_high - (v4i32 - (bitconvert - (v4f32 (scalar_to_vector - (f32 (load node:$addr)))))))), - (v2i64 (z_vzero))))>; + (z_merge_high + (v2i64 + (z_unpackl_high + (v4i32 + (bitconvert + (v4f32 (scalar_to_vector + (f32 (load node:$addr)))))))), + (v2i64 (z_vzero)))>; def z_vllezf64 : PatFrag<(ops node:$addr), (z_merge_high - (scalar_to_vector (f64 (load node:$addr))), + (v2f64 (scalar_to_vector (f64 (load node:$addr)))), (z_vzero))>; // Similarly for the high element of a zeroed vector. def z_vllezli32 : z_vllez; def z_vllezlf32 : PatFrag<(ops node:$addr), - (bitconvert - (z_merge_high - (v2i64 - (bitconvert - (z_merge_high - (v4f32 (scalar_to_vector - (f32 (load node:$addr)))), - (v4f32 (z_vzero))))), - (v2i64 (z_vzero))))>; + (z_merge_high + (v2i64 + (bitconvert + (z_merge_high + (v4f32 (scalar_to_vector + (f32 (load node:$addr)))), + (v4f32 (z_vzero))))), + (v2i64 (z_vzero)))>; // Store one element of a vector. class z_vste diff --git a/test/CodeGen/SystemZ/vec-move-14.ll b/test/CodeGen/SystemZ/vec-move-14.ll index e41eb9da034..e6415e84c9c 100644 --- a/test/CodeGen/SystemZ/vec-move-14.ll +++ b/test/CodeGen/SystemZ/vec-move-14.ll @@ -94,3 +94,45 @@ define <2 x double> @f9(double *%ptr) { %ret = insertelement <2 x double> zeroinitializer, double %val, i32 0 ret <2 x double> %ret } + +; Test VLLEZF with a float when the result is stored to memory. +define void @f10(float *%ptr, <4 x float> *%res) { +; CHECK-LABEL: f10: +; CHECK: vllezf [[REG:%v[0-9]+]], 0(%r2) +; CHECK: vst [[REG]], 0(%r3) +; CHECK: br %r14 + %val = load float, float *%ptr + %ret = insertelement <4 x float> zeroinitializer, float %val, i32 1 + store <4 x float> %ret, <4 x float> *%res + ret void +} + +; Test VLLEZG with a double when the result is stored to memory. +define void @f11(double *%ptr, <2 x double> *%res) { +; CHECK-LABEL: f11: +; CHECK: vllezg [[REG:%v[0-9]+]], 0(%r2) +; CHECK: vst [[REG]], 0(%r3) +; CHECK: br %r14 + %val = load double, double *%ptr + %ret = insertelement <2 x double> zeroinitializer, double %val, i32 0 + store <2 x double> %ret, <2 x double> *%res + ret void +} + +; Test VLLEZG when the zeroinitializer is shared. +define void @f12(i64 *%ptr, <2 x i64> *%res) { +; CHECK-LABEL: f12: +; CHECK: vllezg [[REG:%v[0-9]+]], 0(%r2) +; CHECK: vst [[REG]], 0(%r3) +; CHECK: vllezg [[REG1:%v[0-9]+]], 0(%r2) +; CHECK: vst [[REG1]], 0(%r3) +; CHECK: br %r14 + %val = load volatile i64, i64 *%ptr + %ret = insertelement <2 x i64> zeroinitializer, i64 %val, i32 0 + store volatile <2 x i64> %ret, <2 x i64> *%res + %val1 = load volatile i64, i64 *%ptr + %ret1 = insertelement <2 x i64> zeroinitializer, i64 %val1, i32 0 + store volatile <2 x i64> %ret1, <2 x i64> *%res + ret void +} + diff --git a/test/CodeGen/SystemZ/vec-move-18.ll b/test/CodeGen/SystemZ/vec-move-18.ll index 5d3d09d83ef..9bb61934ff2 100644 --- a/test/CodeGen/SystemZ/vec-move-18.ll +++ b/test/CodeGen/SystemZ/vec-move-18.ll @@ -22,3 +22,15 @@ define <4 x float> @f2(float *%ptr) { ret <4 x float> %ret } +; Test VLLEZLF with a float when the result is stored to memory. +define void @f3(float *%ptr, <4 x float> *%res) { +; CHECK-LABEL: f3: +; CHECK: vllezlf [[REG:%v[0-9]+]], 0(%r2) +; CHECK: vst [[REG]], 0(%r3) +; CHECK: br %r14 + %val = load float, float *%ptr + %ret = insertelement <4 x float> zeroinitializer, float %val, i32 0 + store <4 x float> %ret, <4 x float> *%res + ret void +} +