[ARM] Check the right order for halves of VZIP/VUZP if both parts are used

author Martin Storsjo <martin@martin.st>

Sat, 19 Aug 2017 19:47:48 +0000 (19:47 +0000)

committer Martin Storsjo <martin@martin.st>

Sat, 19 Aug 2017 19:47:48 +0000 (19:47 +0000)
author Martin Storsjo <martin@martin.st>
Sat, 19 Aug 2017 19:47:48 +0000 (19:47 +0000)
committer Martin Storsjo <martin@martin.st>
Sat, 19 Aug 2017 19:47:48 +0000 (19:47 +0000)
diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp

index ff8491d2e62ec0d3be688fae75484e4fb017154d..320d7ccfbc2684b4d017e981d4804051b7168985 100644 (file)
--- a/lib/Target/ARM/ARMISelLowering.cpp
+++ b/lib/Target/ARM/ARMISelLowering.cpp
@@ -5901,7 +5901,10 @@ static bool isVUZPMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
      return false;
  
    for (unsigned i = 0; i < M.size(); i += NumElts) {
-    WhichResult = M[i] == 0 ? 0 : 1;
+    if (M.size() == NumElts * 2)
+      WhichResult = i / NumElts;
+    else
+      WhichResult = M[i] == 0 ? 0 : 1;
      for (unsigned j = 0; j < NumElts; ++j) {
        if (M[i+j] >= 0 && (unsigned) M[i+j] != 2 * j + WhichResult)
          return false;
@@ -5932,7 +5935,10 @@ static bool isVUZP_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult){
  
    unsigned Half = NumElts / 2;
    for (unsigned i = 0; i < M.size(); i += NumElts) {
-    WhichResult = M[i] == 0 ? 0 : 1;
+    if (M.size() == NumElts * 2)
+      WhichResult = i / NumElts;
+    else
+      WhichResult = M[i] == 0 ? 0 : 1;
      for (unsigned j = 0; j < NumElts; j += Half) {
        unsigned Idx = WhichResult;
        for (unsigned k = 0; k < Half; ++k) {
@@ -5972,7 +5978,10 @@ static bool isVZIPMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
      return false;
  
    for (unsigned i = 0; i < M.size(); i += NumElts) {
-    WhichResult = M[i] == 0 ? 0 : 1;
+    if (M.size() == NumElts * 2)
+      WhichResult = i / NumElts;
+    else
+      WhichResult = M[i] == 0 ? 0 : 1;
      unsigned Idx = WhichResult * NumElts / 2;
      for (unsigned j = 0; j < NumElts; j += 2) {
        if ((M[i+j] >= 0 && (unsigned) M[i+j] != Idx) ||
@@ -6005,7 +6014,10 @@ static bool isVZIP_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult){
      return false;
  
    for (unsigned i = 0; i < M.size(); i += NumElts) {
-    WhichResult = M[i] == 0 ? 0 : 1;
+    if (M.size() == NumElts * 2)
+      WhichResult = i / NumElts;
+    else
+      WhichResult = M[i] == 0 ? 0 : 1;
      unsigned Idx = WhichResult * NumElts / 2;
      for (unsigned j = 0; j < NumElts; j += 2) {
        if ((M[i+j] >= 0 && (unsigned) M[i+j] != Idx) ||
diff --git a/test/CodeGen/ARM/vzip.ll b/test/CodeGen/ARM/vzip.ll

index 771bf5f05215cb1ca292fc07e5cbb2b739f9036a..06b49ab94053d3dc6cbe75f99dbc3046362359b2 100644 (file)
--- a/test/CodeGen/ARM/vzip.ll
+++ b/test/CodeGen/ARM/vzip.ll
@@ -282,6 +282,25 @@ entry:
    ret <8 x i16> %0
  }
  
+; NOTE: The mask here looks like something that could be done with a vzip,
+; but which the current handling of two-result vzip can't do - thus ending up
+; as a vtrn.
+define <8 x i16> @vzip_lower_shufflemask_undef_rev(<4 x i16>* %A, <4 x i16>* %B) {
+; CHECK-LABEL: vzip_lower_shufflemask_undef_rev:
+; CHECK:       @ BB#0: @ %entry
+; CHECK-NEXT:    vldr d16, [r1]
+; CHECK-NEXT:    vldr d19, [r0]
+; CHECK-NEXT:    vtrn.16 d19, d16
+; CHECK-NEXT:    vmov r0, r1, d18
+; CHECK-NEXT:    vmov r2, r3, d19
+; CHECK-NEXT:    mov pc, lr
+entry:
+  %tmp1 = load <4 x i16>, <4 x i16>* %A
+  %tmp2 = load <4 x i16>, <4 x i16>* %B
+  %0 = shufflevector <4 x i16> %tmp1, <4 x i16> %tmp2, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 0, i32 4, i32 undef, i32 undef>
+  ret <8 x i16> %0
+}
+
  define <4 x i32> @vzip_lower_shufflemask_zeroed(<2 x i32>* %A) {
  ; CHECK-LABEL: vzip_lower_shufflemask_zeroed:
  ; CHECK:       @ BB#0: @ %entry
author	Martin Storsjo <martin@martin.st>
	Sat, 19 Aug 2017 19:47:48 +0000 (19:47 +0000)
committer	Martin Storsjo <martin@martin.st>
	Sat, 19 Aug 2017 19:47:48 +0000 (19:47 +0000)
lib/Target/ARM/ARMISelLowering.cpp		patch \| blob \| history
test/CodeGen/ARM/vzip.ll		patch \| blob \| history