[SystemZ] Make better use of VLLEZ

author Ulrich Weigand <ulrich.weigand@de.ibm.com>

Thu, 20 Dec 2018 13:05:03 +0000 (13:05 +0000)

committer Ulrich Weigand <ulrich.weigand@de.ibm.com>

Thu, 20 Dec 2018 13:05:03 +0000 (13:05 +0000)
author Ulrich Weigand <ulrich.weigand@de.ibm.com>
Thu, 20 Dec 2018 13:05:03 +0000 (13:05 +0000)
committer Ulrich Weigand <ulrich.weigand@de.ibm.com>
Thu, 20 Dec 2018 13:05:03 +0000 (13:05 +0000)
diff --git a/lib/Target/SystemZ/SystemZInstrVector.td b/lib/Target/SystemZ/SystemZInstrVector.td

index 8523af7e57386519cf1f188e64091687d8801ae2..6c97b85277c32049c89466fecc5e34be0b26ae57 100644 (file)
--- a/lib/Target/SystemZ/SystemZInstrVector.td
+++ b/lib/Target/SystemZ/SystemZInstrVector.td
@@ -151,13 +151,13 @@ let Predicates = [FeatureVector] in {
    def VLLEZH : UnaryVRX<"vllezh", 0xE704, z_vllezi16, v128h, 2, 1>;
    def VLLEZF : UnaryVRX<"vllezf", 0xE704, z_vllezi32, v128f, 4, 2>;
    def VLLEZG : UnaryVRX<"vllezg", 0xE704, z_vllezi64, v128g, 8, 3>;
-  def : Pat<(v4f32 (z_vllezf32 bdxaddr12only:$addr)),
+  def : Pat<(z_vllezf32 bdxaddr12only:$addr),
              (VLLEZF bdxaddr12only:$addr)>;
-  def : Pat<(v2f64 (z_vllezf64 bdxaddr12only:$addr)),
+  def : Pat<(z_vllezf64 bdxaddr12only:$addr),
              (VLLEZG bdxaddr12only:$addr)>;
    let Predicates = [FeatureVectorEnhancements1] in {
      def VLLEZLF : UnaryVRX<"vllezlf", 0xE704, z_vllezli32, v128f, 4, 6>;
-    def : Pat<(v4f32 (z_vllezlf32 bdxaddr12only:$addr)),
+    def : Pat<(z_vllezlf32 bdxaddr12only:$addr),
                (VLLEZLF bdxaddr12only:$addr)>;
    }
  
diff --git a/lib/Target/SystemZ/SystemZOperators.td b/lib/Target/SystemZ/SystemZOperators.td

index c55a6273f5e447036540ca72c034fdc60cc0e659..626675bfb70c85a5303521cac8b7fa576a956ccb 100644 (file)
--- a/lib/Target/SystemZ/SystemZOperators.td
+++ b/lib/Target/SystemZ/SystemZOperators.td
@@ -745,37 +745,37 @@ class z_vllez<ValueType scalartype, SDPatternOperator load, int index>
  def z_vllezi8  : z_vllez<i32, anyextloadi8, 7>;
  def z_vllezi16 : z_vllez<i32, anyextloadi16, 3>;
  def z_vllezi32 : z_vllez<i32, load, 1>;
-def z_vllezi64 : PatFrag<(ops node:$addr),
-                         (z_join_dwords (i64 (load node:$addr)), (i64 0))>;
+def z_vllezi64 : PatFrags<(ops node:$addr),
+                          [(z_vector_insert (z_vzero),
+                                            (i64 (load node:$addr)), (i32 0)),
+                           (z_join_dwords (i64 (load node:$addr)), (i64 0))]>;
  // We use high merges to form a v4f32 from four f32s.  Propagating zero
  // into all elements but index 1 gives this expression.
  def z_vllezf32 : PatFrag<(ops node:$addr),
-                         (bitconvert
-                          (z_merge_high
-                           (v2i64
-                            (z_unpackl_high
-                             (v4i32
-                              (bitconvert
-                               (v4f32 (scalar_to_vector
-                                       (f32 (load node:$addr)))))))),
-                           (v2i64 (z_vzero))))>;
+                         (z_merge_high
+                          (v2i64
+                           (z_unpackl_high
+                            (v4i32
+                             (bitconvert
+                              (v4f32 (scalar_to_vector
+                                      (f32 (load node:$addr)))))))),
+                          (v2i64 (z_vzero)))>;
  def z_vllezf64 : PatFrag<(ops node:$addr),
                           (z_merge_high
-                          (scalar_to_vector (f64 (load node:$addr))),
+                          (v2f64 (scalar_to_vector (f64 (load node:$addr)))),
                            (z_vzero))>;
  
  // Similarly for the high element of a zeroed vector.
  def z_vllezli32 : z_vllez<i32, load, 0>;
  def z_vllezlf32 : PatFrag<(ops node:$addr),
-                          (bitconvert
-                           (z_merge_high
-                            (v2i64
-                             (bitconvert
-                              (z_merge_high
-                               (v4f32 (scalar_to_vector
-                                       (f32 (load node:$addr)))),
-                               (v4f32 (z_vzero))))),
-                            (v2i64 (z_vzero))))>;
+                          (z_merge_high
+                           (v2i64
+                            (bitconvert
+                             (z_merge_high
+                              (v4f32 (scalar_to_vector
+                                      (f32 (load node:$addr)))),
+                              (v4f32 (z_vzero))))),
+                           (v2i64 (z_vzero)))>;
  
  // Store one element of a vector.
  class z_vste<ValueType scalartype, SDPatternOperator store>
diff --git a/test/CodeGen/SystemZ/vec-move-14.ll b/test/CodeGen/SystemZ/vec-move-14.ll

index e41eb9da034653dd9f21ed6b6c19a6df05af7974..e6415e84c9c403896d85a7e537ab281440e99ac1 100644 (file)
--- a/test/CodeGen/SystemZ/vec-move-14.ll
+++ b/test/CodeGen/SystemZ/vec-move-14.ll
@@ -94,3 +94,45 @@ define <2 x double> @f9(double *%ptr) {
    %ret = insertelement <2 x double> zeroinitializer, double %val, i32 0
    ret <2 x double> %ret
  }
+
+; Test VLLEZF with a float when the result is stored to memory.
+define void @f10(float *%ptr, <4 x float> *%res) {
+; CHECK-LABEL: f10:
+; CHECK: vllezf [[REG:%v[0-9]+]], 0(%r2)
+; CHECK: vst [[REG]], 0(%r3)
+; CHECK: br %r14
+  %val = load float, float *%ptr
+  %ret = insertelement <4 x float> zeroinitializer, float %val, i32 1
+  store <4 x float> %ret, <4 x float> *%res
+  ret void
+}
+
+; Test VLLEZG with a double when the result is stored to memory.
+define void @f11(double *%ptr, <2 x double> *%res) {
+; CHECK-LABEL: f11:
+; CHECK: vllezg [[REG:%v[0-9]+]], 0(%r2)
+; CHECK: vst [[REG]], 0(%r3)
+; CHECK: br %r14
+  %val = load double, double *%ptr
+  %ret = insertelement <2 x double> zeroinitializer, double %val, i32 0
+  store <2 x double> %ret, <2 x double> *%res
+  ret void
+}
+
+; Test VLLEZG when the zeroinitializer is shared.
+define void @f12(i64 *%ptr, <2 x i64> *%res) {
+; CHECK-LABEL: f12:
+; CHECK: vllezg [[REG:%v[0-9]+]], 0(%r2)
+; CHECK: vst [[REG]], 0(%r3)
+; CHECK: vllezg [[REG1:%v[0-9]+]], 0(%r2)
+; CHECK: vst [[REG1]], 0(%r3)
+; CHECK: br %r14
+  %val = load volatile i64, i64 *%ptr
+  %ret = insertelement <2 x i64> zeroinitializer, i64 %val, i32 0
+  store volatile <2 x i64> %ret, <2 x i64> *%res
+  %val1 = load volatile i64, i64 *%ptr
+  %ret1 = insertelement <2 x i64> zeroinitializer, i64 %val1, i32 0
+  store volatile <2 x i64> %ret1, <2 x i64> *%res
+  ret void
+}
+
diff --git a/test/CodeGen/SystemZ/vec-move-18.ll b/test/CodeGen/SystemZ/vec-move-18.ll

index 5d3d09d83ef154005c2a1b802a721030edfad334..9bb61934ff2658f5ac7665de6f6a85def494ceec 100644 (file)
--- a/test/CodeGen/SystemZ/vec-move-18.ll
+++ b/test/CodeGen/SystemZ/vec-move-18.ll
@@ -22,3 +22,15 @@ define <4 x float> @f2(float *%ptr) {
    ret <4 x float> %ret
  }
  
+; Test VLLEZLF with a float when the result is stored to memory.
+define void @f3(float *%ptr, <4 x float> *%res) {
+; CHECK-LABEL: f3:
+; CHECK: vllezlf [[REG:%v[0-9]+]], 0(%r2)
+; CHECK: vst [[REG]], 0(%r3)
+; CHECK: br %r14
+  %val = load float, float *%ptr
+  %ret = insertelement <4 x float> zeroinitializer, float %val, i32 0
+  store <4 x float> %ret, <4 x float> *%res
+  ret void
+}
+
author	Ulrich Weigand <ulrich.weigand@de.ibm.com>
	Thu, 20 Dec 2018 13:05:03 +0000 (13:05 +0000)
committer	Ulrich Weigand <ulrich.weigand@de.ibm.com>
	Thu, 20 Dec 2018 13:05:03 +0000 (13:05 +0000)
lib/Target/SystemZ/SystemZInstrVector.td		patch \| blob \| history
lib/Target/SystemZ/SystemZOperators.td		patch \| blob \| history
test/CodeGen/SystemZ/vec-move-14.ll		patch \| blob \| history
test/CodeGen/SystemZ/vec-move-18.ll		patch \| blob \| history