[MIPS MSA] Avoid some DAG combines for vector shifts

author Petar Avramovic <Petar.Avramovic@rt-rk.com>

Wed, 20 Feb 2019 13:42:44 +0000 (13:42 +0000)

committer Petar Avramovic <Petar.Avramovic@rt-rk.com>

Wed, 20 Feb 2019 13:42:44 +0000 (13:42 +0000)
author Petar Avramovic <Petar.Avramovic@rt-rk.com>
Wed, 20 Feb 2019 13:42:44 +0000 (13:42 +0000)
committer Petar Avramovic <Petar.Avramovic@rt-rk.com>
Wed, 20 Feb 2019 13:42:44 +0000 (13:42 +0000)
diff --git a/lib/Target/Mips/MipsISelLowering.cpp b/lib/Target/Mips/MipsISelLowering.cpp

index 103d1fcb44bbd2e542d07ce299008de524e4a63a..d69effb62240f5877223befe2e352f0d044bbc4f 100644 (file)
--- a/lib/Target/Mips/MipsISelLowering.cpp
+++ b/lib/Target/Mips/MipsISelLowering.cpp
@@ -1190,6 +1190,13 @@ bool MipsTargetLowering::isCheapToSpeculateCtlz() const {
    return Subtarget.hasMips32();
  }
  
+bool MipsTargetLowering::shouldFoldShiftPairToMask(const SDNode *N,
+                                                   CombineLevel Level) const {
+  if (N->getOperand(0).getValueType().isVector())
+    return false;
+  return true;
+}
+
  void
  MipsTargetLowering::LowerOperationWrapper(SDNode *N,
                                            SmallVectorImpl<SDValue> &Results,
diff --git a/lib/Target/Mips/MipsISelLowering.h b/lib/Target/Mips/MipsISelLowering.h

index e25810c87416cf0a7cd2709cf8f9486306571585..3a9e61b97b4aab1466174d86accafeeed1012808 100644 (file)
--- a/lib/Target/Mips/MipsISelLowering.h
+++ b/lib/Target/Mips/MipsISelLowering.h
@@ -284,6 +284,8 @@ class TargetRegisterClass;
  
      bool isCheapToSpeculateCttz() const override;
      bool isCheapToSpeculateCtlz() const override;
+    bool shouldFoldShiftPairToMask(const SDNode *N,
+                                   CombineLevel Level) const override;
  
      /// Return the register type for a given MVT, ensuring vectors are treated
      /// as a series of gpr sized integers.
diff --git a/test/CodeGen/Mips/msa/vector_shift_combines.ll b/test/CodeGen/Mips/msa/avoid_vector_shift_combines.ll

similarity index 55%

rename from test/CodeGen/Mips/msa/vector_shift_combines.ll

rename to test/CodeGen/Mips/msa/avoid_vector_shift_combines.ll

index 2cd97d2bcac94243dbffbd4e096de0db12c28dd8..04633cb575ce5b028ccd3b03ac93f8081deea383 100644 (file)
--- a/test/CodeGen/Mips/msa/vector_shift_combines.ll
+++ b/test/CodeGen/Mips/msa/avoid_vector_shift_combines.ll
@@ -8,21 +8,18 @@ declare <2 x i64> @llvm.mips.srli.d(<2 x i64>, i32)
  declare <4 x i32> @llvm.mips.slli.w(<4 x i32>, i32)
  declare <4 x i32> @llvm.mips.srli.w(<4 x i32>, i32)
  
-; fold (shl (srl x, c1), c2) -> (and (srl x, (sub c1, c2), MASK) if C1 < C2
+; do not fold (shl (srl x, c1), c2) -> (and (srl x, (sub c1, c2), MASK) if C1 < C2
  ; MASK_TYPE1 = C2-C1 0s | 1s | ends with C1 0s
-define void @combine_shifts_to_shift_plus_and_mask_type1_i64(<2 x i64>* %a, <2 x i64>* %b) {
-; MIPSEL64R6-LABEL: combine_shifts_to_shift_plus_and_mask_type1_i64:
+define void @avoid_to_combine_shifts_to_shift_plus_and_mask_type1_i64(<2 x i64>* %a, <2 x i64>* %b) {
+; MIPSEL64R6-LABEL: avoid_to_combine_shifts_to_shift_plus_and_mask_type1_i64:
  ; MIPSEL64R6:       # %bb.0: # %entry
  ; MIPSEL64R6-NEXT:    ld.d $w0, 0($4)
-; MIPSEL64R6-NEXT:    srli.d $w0, $w0, 1
-; MIPSEL64R6-NEXT:    lui $1, 32760
-; MIPSEL64R6-NEXT:    dsll32 $1, $1, 0
-; MIPSEL64R6-NEXT:    fill.d $w1, $1
-; MIPSEL64R6-NEXT:    and.v $w0, $w0, $w1
+; MIPSEL64R6-NEXT:    srli.d $w0, $w0, 52
+; MIPSEL64R6-NEXT:    slli.d $w0, $w0, 51
  ; MIPSEL64R6-NEXT:    jr $ra
  ; MIPSEL64R6-NEXT:    st.d $w0, 0($5)
  ;
-; MIPSEL32R5-LABEL: combine_shifts_to_shift_plus_and_mask_type1_i64:
+; MIPSEL32R5-LABEL: avoid_to_combine_shifts_to_shift_plus_and_mask_type1_i64:
  ; MIPSEL32R5:       # %bb.0: # %entry
  ; MIPSEL32R5-NEXT:    ld.d $w0, 0($4)
  ; MIPSEL32R5-NEXT:    srli.d $w0, $w0, 52
@@ -37,23 +34,17 @@ entry:
    ret void
  }
  
-; fold (shl (srl x, c1), c2) -> (and (srl x, (sub c1, c2), MASK) if C1 < C2
-define void @combine_shifts_to_shift_plus_and_mask_type1_i64_long(<2 x i64>* %a, <2 x i64>* %b) {
-; MIPSEL64R6-LABEL: combine_shifts_to_shift_plus_and_mask_type1_i64_long:
+; do not fold (shl (srl x, c1), c2) -> (and (srl x, (sub c1, c2), MASK) if C1 < C2
+define void @avoid_to_combine_shifts_to_shift_plus_and_mask_type1_i64_long(<2 x i64>* %a, <2 x i64>* %b) {
+; MIPSEL64R6-LABEL: avoid_to_combine_shifts_to_shift_plus_and_mask_type1_i64_long:
  ; MIPSEL64R6:       # %bb.0: # %entry
-; MIPSEL64R6-NEXT:    lui $1, 65535
-; MIPSEL64R6-NEXT:    ori $1, $1, 65520
-; MIPSEL64R6-NEXT:    lui $2, 16383
-; MIPSEL64R6-NEXT:    ori $2, $2, 65535
-; MIPSEL64R6-NEXT:    dinsu $1, $2, 32, 32
  ; MIPSEL64R6-NEXT:    ld.d $w0, 0($4)
-; MIPSEL64R6-NEXT:    srli.d $w0, $w0, 2
-; MIPSEL64R6-NEXT:    fill.d $w1, $1
-; MIPSEL64R6-NEXT:    and.v $w0, $w0, $w1
+; MIPSEL64R6-NEXT:    srli.d $w0, $w0, 6
+; MIPSEL64R6-NEXT:    slli.d $w0, $w0, 4
  ; MIPSEL64R6-NEXT:    jr $ra
  ; MIPSEL64R6-NEXT:    st.d $w0, 0($5)
  ;
-; MIPSEL32R5-LABEL: combine_shifts_to_shift_plus_and_mask_type1_i64_long:
+; MIPSEL32R5-LABEL: avoid_to_combine_shifts_to_shift_plus_and_mask_type1_i64_long:
  ; MIPSEL32R5:       # %bb.0: # %entry
  ; MIPSEL32R5-NEXT:    ld.d $w0, 0($4)
  ; MIPSEL32R5-NEXT:    srli.d $w0, $w0, 6
@@ -68,19 +59,18 @@ entry:
    ret void
  }
  
-; fold (shl (srl x, c1), c2) -> (and (shl x, (sub c1, c2), MASK) if C1 >= C2
+; do not fold (shl (srl x, c1), c2) -> (and (shl x, (sub c1, c2), MASK) if C1 >= C2
  ; MASK_TYPE2 = 1s | C1 zeros
-define void @combine_shifts_to_shift_plus_and_mask_type2_i32(<2 x i64>* %a, <2 x i64>* %b) {
-; MIPSEL64R6-LABEL: combine_shifts_to_shift_plus_and_mask_type2_i32:
+define void @avoid_to_combine_shifts_to_shift_plus_and_mask_type2_i32(<2 x i64>* %a, <2 x i64>* %b) {
+; MIPSEL64R6-LABEL: avoid_to_combine_shifts_to_shift_plus_and_mask_type2_i32:
  ; MIPSEL64R6:       # %bb.0: # %entry
  ; MIPSEL64R6-NEXT:    ld.d $w0, 0($4)
-; MIPSEL64R6-NEXT:    slli.d $w0, $w0, 2
-; MIPSEL64R6-NEXT:    ldi.d $w1, -64
-; MIPSEL64R6-NEXT:    and.v $w0, $w0, $w1
+; MIPSEL64R6-NEXT:    srli.d $w0, $w0, 4
+; MIPSEL64R6-NEXT:    slli.d $w0, $w0, 6
  ; MIPSEL64R6-NEXT:    jr $ra
  ; MIPSEL64R6-NEXT:    st.d $w0, 0($5)
  ;
-; MIPSEL32R5-LABEL: combine_shifts_to_shift_plus_and_mask_type2_i32:
+; MIPSEL32R5-LABEL: avoid_to_combine_shifts_to_shift_plus_and_mask_type2_i32:
  ; MIPSEL32R5:       # %bb.0: # %entry
  ; MIPSEL32R5-NEXT:    ld.d $w0, 0($4)
  ; MIPSEL32R5-NEXT:    srli.d $w0, $w0, 4
@@ -95,27 +85,21 @@ entry:
    ret void
  }
  
-; fold (shl (srl x, c1), c2) -> (and (srl x, (sub c1, c2), MASK) if C1 < C2
-define void @combine_shifts_to_shift_plus_and_mask_type1_i32_long(<4 x i32>* %a, <4 x i32>* %b) {
-; MIPSEL64R6-LABEL: combine_shifts_to_shift_plus_and_mask_type1_i32_long:
+; do not fold (shl (srl x, c1), c2) -> (and (srl x, (sub c1, c2), MASK) if C1 < C2
+define void @avoid_to_combine_shifts_to_shift_plus_and_mask_type1_i32_long(<4 x i32>* %a, <4 x i32>* %b) {
+; MIPSEL64R6-LABEL: avoid_to_combine_shifts_to_shift_plus_and_mask_type1_i32_long:
  ; MIPSEL64R6:       # %bb.0: # %entry
  ; MIPSEL64R6-NEXT:    ld.w $w0, 0($4)
-; MIPSEL64R6-NEXT:    srli.w $w0, $w0, 4
-; MIPSEL64R6-NEXT:    lui $1, 4095
-; MIPSEL64R6-NEXT:    ori $1, $1, 65528
-; MIPSEL64R6-NEXT:    fill.w $w1, $1
-; MIPSEL64R6-NEXT:    and.v $w0, $w0, $w1
+; MIPSEL64R6-NEXT:    srli.w $w0, $w0, 7
+; MIPSEL64R6-NEXT:    slli.w $w0, $w0, 3
  ; MIPSEL64R6-NEXT:    jr $ra
  ; MIPSEL64R6-NEXT:    st.w $w0, 0($5)
  ;
-; MIPSEL32R5-LABEL: combine_shifts_to_shift_plus_and_mask_type1_i32_long:
+; MIPSEL32R5-LABEL: avoid_to_combine_shifts_to_shift_plus_and_mask_type1_i32_long:
  ; MIPSEL32R5:       # %bb.0: # %entry
  ; MIPSEL32R5-NEXT:    ld.w $w0, 0($4)
-; MIPSEL32R5-NEXT:    srli.w $w0, $w0, 4
-; MIPSEL32R5-NEXT:    lui $1, 4095
-; MIPSEL32R5-NEXT:    ori $1, $1, 65528
-; MIPSEL32R5-NEXT:    fill.w $w1, $1
-; MIPSEL32R5-NEXT:    and.v $w0, $w0, $w1
+; MIPSEL32R5-NEXT:    srli.w $w0, $w0, 7
+; MIPSEL32R5-NEXT:    slli.w $w0, $w0, 3
  ; MIPSEL32R5-NEXT:    jr $ra
  ; MIPSEL32R5-NEXT:    st.w $w0, 0($5)
  entry:
@@ -126,20 +110,17 @@ entry:
    ret void
  }
  
-; fold (shl (sra x, c1), c1) -> (and x, (shl -1, c1))
-define void @combine_shifts_to_and_mask_type2_i64_long(<2 x i64>* %a, <2 x i64>* %b) {
-; MIPSEL64R6-LABEL: combine_shifts_to_and_mask_type2_i64_long:
+; do not fold (shl (sra x, c1), c1) -> (and x, (shl -1, c1))
+define void @avoid_to_combine_shifts_to_and_mask_type2_i64_long(<2 x i64>* %a, <2 x i64>* %b) {
+; MIPSEL64R6-LABEL: avoid_to_combine_shifts_to_and_mask_type2_i64_long:
  ; MIPSEL64R6:       # %bb.0: # %entry
-; MIPSEL64R6-NEXT:    lui $1, 65535
-; MIPSEL64R6-NEXT:    ori $1, $1, 65472
-; MIPSEL64R6-NEXT:    dsll32 $1, $1, 0
  ; MIPSEL64R6-NEXT:    ld.d $w0, 0($4)
-; MIPSEL64R6-NEXT:    fill.d $w1, $1
-; MIPSEL64R6-NEXT:    and.v $w0, $w0, $w1
+; MIPSEL64R6-NEXT:    srli.d $w0, $w0, 38
+; MIPSEL64R6-NEXT:    slli.d $w0, $w0, 38
  ; MIPSEL64R6-NEXT:    jr $ra
  ; MIPSEL64R6-NEXT:    st.d $w0, 0($5)
  ;
-; MIPSEL32R5-LABEL: combine_shifts_to_and_mask_type2_i64_long:
+; MIPSEL32R5-LABEL: avoid_to_combine_shifts_to_and_mask_type2_i64_long:
  ; MIPSEL32R5:       # %bb.0: # %entry
  ; MIPSEL32R5-NEXT:    ld.d $w0, 0($4)
  ; MIPSEL32R5-NEXT:    srli.d $w0, $w0, 38
@@ -154,17 +135,17 @@ entry:
    ret void
  }
  
-; fold (shl (sra x, c1), c1) -> (and x, (shl -1, c1))
-define void @combine_shifts_to_and_mask_type2_i64(<2 x i64>* %a, <2 x i64>* %b) {
-; MIPSEL64R6-LABEL: combine_shifts_to_and_mask_type2_i64:
+; do not fold (shl (sra x, c1), c1) -> (and x, (shl -1, c1))
+define void @avoid_to_combine_shifts_to_and_mask_type2_i64(<2 x i64>* %a, <2 x i64>* %b) {
+; MIPSEL64R6-LABEL: avoid_to_combine_shifts_to_and_mask_type2_i64:
  ; MIPSEL64R6:       # %bb.0: # %entry
  ; MIPSEL64R6-NEXT:    ld.d $w0, 0($4)
-; MIPSEL64R6-NEXT:    ldi.d $w1, -8
-; MIPSEL64R6-NEXT:    and.v $w0, $w0, $w1
+; MIPSEL64R6-NEXT:    srli.d $w0, $w0, 3
+; MIPSEL64R6-NEXT:    slli.d $w0, $w0, 3
  ; MIPSEL64R6-NEXT:    jr $ra
  ; MIPSEL64R6-NEXT:    st.d $w0, 0($5)
  ;
-; MIPSEL32R5-LABEL: combine_shifts_to_and_mask_type2_i64:
+; MIPSEL32R5-LABEL: avoid_to_combine_shifts_to_and_mask_type2_i64:
  ; MIPSEL32R5:       # %bb.0: # %entry
  ; MIPSEL32R5-NEXT:    ld.d $w0, 0($4)
  ; MIPSEL32R5-NEXT:    srli.d $w0, $w0, 3
@@ -179,21 +160,21 @@ entry:
    ret void
  }
  
-; fold (shl (sra x, c1), c1) -> (and x, (shl -1, c1))
-define void @combine_shifts_to_and_mask_type1_long_i32_a(<4 x i32>* %a, <4 x i32>* %b) {
-; MIPSEL64R6-LABEL: combine_shifts_to_and_mask_type1_long_i32_a:
+; do not fold (shl (sra x, c1), c1) -> (and x, (shl -1, c1))
+define void @avoid_to_combine_shifts_to_and_mask_type1_long_i32_a(<4 x i32>* %a, <4 x i32>* %b) {
+; MIPSEL64R6-LABEL: avoid_to_combine_shifts_to_and_mask_type1_long_i32_a:
  ; MIPSEL64R6:       # %bb.0: # %entry
  ; MIPSEL64R6-NEXT:    ld.w $w0, 0($4)
-; MIPSEL64R6-NEXT:    ldi.w $w1, -32
-; MIPSEL64R6-NEXT:    and.v $w0, $w0, $w1
+; MIPSEL64R6-NEXT:    srli.w $w0, $w0, 5
+; MIPSEL64R6-NEXT:    slli.w $w0, $w0, 5
  ; MIPSEL64R6-NEXT:    jr $ra
  ; MIPSEL64R6-NEXT:    st.w $w0, 0($5)
  ;
-; MIPSEL32R5-LABEL: combine_shifts_to_and_mask_type1_long_i32_a:
+; MIPSEL32R5-LABEL: avoid_to_combine_shifts_to_and_mask_type1_long_i32_a:
  ; MIPSEL32R5:       # %bb.0: # %entry
  ; MIPSEL32R5-NEXT:    ld.w $w0, 0($4)
-; MIPSEL32R5-NEXT:    ldi.w $w1, -32
-; MIPSEL32R5-NEXT:    and.v $w0, $w0, $w1
+; MIPSEL32R5-NEXT:    srli.w $w0, $w0, 5
+; MIPSEL32R5-NEXT:    slli.w $w0, $w0, 5
  ; MIPSEL32R5-NEXT:    jr $ra
  ; MIPSEL32R5-NEXT:    st.w $w0, 0($5)
  entry:
@@ -204,23 +185,21 @@ entry:
    ret void
  }
  
-; fold (shl (sra x, c1), c1) -> (and x, (shl -1, c1))
-define void @combine_shifts_to_and_mask_type1_long_i32_b(<4 x i32>* %a, <4 x i32>* %b) {
-; MIPSEL64R6-LABEL: combine_shifts_to_and_mask_type1_long_i32_b:
+; do not fold (shl (sra x, c1), c1) -> (and x, (shl -1, c1))
+define void @avoid_to_combine_shifts_to_and_mask_type1_long_i32_b(<4 x i32>* %a, <4 x i32>* %b) {
+; MIPSEL64R6-LABEL: avoid_to_combine_shifts_to_and_mask_type1_long_i32_b:
  ; MIPSEL64R6:       # %bb.0: # %entry
-; MIPSEL64R6-NEXT:    lui $1, 49152
-; MIPSEL64R6-NEXT:    fill.w $w0, $1
-; MIPSEL64R6-NEXT:    ld.w $w1, 0($4)
-; MIPSEL64R6-NEXT:    and.v $w0, $w1, $w0
+; MIPSEL64R6-NEXT:    ld.w $w0, 0($4)
+; MIPSEL64R6-NEXT:    srli.w $w0, $w0, 30
+; MIPSEL64R6-NEXT:    slli.w $w0, $w0, 30
  ; MIPSEL64R6-NEXT:    jr $ra
  ; MIPSEL64R6-NEXT:    st.w $w0, 0($5)
  ;
-; MIPSEL32R5-LABEL: combine_shifts_to_and_mask_type1_long_i32_b:
+; MIPSEL32R5-LABEL: avoid_to_combine_shifts_to_and_mask_type1_long_i32_b:
  ; MIPSEL32R5:       # %bb.0: # %entry
-; MIPSEL32R5-NEXT:    lui $1, 49152
-; MIPSEL32R5-NEXT:    fill.w $w0, $1
-; MIPSEL32R5-NEXT:    ld.w $w1, 0($4)
-; MIPSEL32R5-NEXT:    and.v $w0, $w1, $w0
+; MIPSEL32R5-NEXT:    ld.w $w0, 0($4)
+; MIPSEL32R5-NEXT:    srli.w $w0, $w0, 30
+; MIPSEL32R5-NEXT:    slli.w $w0, $w0, 30
  ; MIPSEL32R5-NEXT:    jr $ra
  ; MIPSEL32R5-NEXT:    st.w $w0, 0($5)
  entry:
author	Petar Avramovic <Petar.Avramovic@rt-rk.com>
	Wed, 20 Feb 2019 13:42:44 +0000 (13:42 +0000)
committer	Petar Avramovic <Petar.Avramovic@rt-rk.com>
	Wed, 20 Feb 2019 13:42:44 +0000 (13:42 +0000)
lib/Target/Mips/MipsISelLowering.cpp		patch \| blob \| history
lib/Target/Mips/MipsISelLowering.h		patch \| blob \| history
test/CodeGen/Mips/msa/avoid_vector_shift_combines.ll	[moved from test/CodeGen/Mips/msa/vector_shift_combines.ll with 55% similarity]	patch \| blob \| history