[ARM] Prevent PerformVCVTCombine from combining a vmul/vcvt with 8 lanes

author Bradley Smith <bradley.smith@arm.com>

Tue, 16 Dec 2014 10:59:27 +0000 (10:59 +0000)

committer Bradley Smith <bradley.smith@arm.com>

Tue, 16 Dec 2014 10:59:27 +0000 (10:59 +0000)
author Bradley Smith <bradley.smith@arm.com>
Tue, 16 Dec 2014 10:59:27 +0000 (10:59 +0000)
committer Bradley Smith <bradley.smith@arm.com>
Tue, 16 Dec 2014 10:59:27 +0000 (10:59 +0000)
diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp

index 3fce38e2e3de69733432581d3eab5d7ca85cb8d4..e908c42e97521062d9e71c07cf94186ca6034acb 100644 (file)
--- a/lib/Target/ARM/ARMISelLowering.cpp
+++ b/lib/Target/ARM/ARMISelLowering.cpp
@@ -9355,16 +9355,18 @@ static SDValue PerformVCVTCombine(SDNode *N,
  
    MVT FloatTy = Op.getSimpleValueType().getVectorElementType();
    MVT IntTy = N->getSimpleValueType(0).getVectorElementType();
-  if (FloatTy.getSizeInBits() != 32 || IntTy.getSizeInBits() > 32) {
+  unsigned NumLanes = Op.getValueType().getVectorNumElements();
+  if (FloatTy.getSizeInBits() != 32 || IntTy.getSizeInBits() > 32 ||
+      NumLanes > 4) {
      // These instructions only exist converting from f32 to i32. We can handle
      // smaller integers by generating an extra truncate, but larger ones would
-    // be lossy.
+    // be lossy. We also can't handle more then 4 lanes, since these intructions
+    // only support v2i32/v4i32 types.
      return SDValue();
    }
  
    unsigned IntrinsicOpcode = isSigned ? Intrinsic::arm_neon_vcvtfp2fxs :
      Intrinsic::arm_neon_vcvtfp2fxu;
-  unsigned NumLanes = Op.getValueType().getVectorNumElements();
    SDValue FixConv =  DAG.getNode(ISD::INTRINSIC_WO_CHAIN, SDLoc(N),
                                   NumLanes == 2 ? MVT::v2i32 : MVT::v4i32,
                                   DAG.getConstant(IntrinsicOpcode, MVT::i32), N0,
diff --git a/test/CodeGen/ARM/isel-v8i32-crash.ll b/test/CodeGen/ARM/isel-v8i32-crash.ll

new file mode 100644 (file)

index 0000000..0116fe8
--- /dev/null
+++ b/test/CodeGen/ARM/isel-v8i32-crash.ll
@@ -0,0 +1,26 @@
+; RUN: llc < %s -mtriple=armv7-linux-gnu | FileCheck %s
+
+; Check we don't crash when trying to combine:
+;   (d1 = <float 8.000000e+00, float 8.000000e+00, ...>) (power of 2)
+;   vmul.f32        d0, d1, d0
+;   vcvt.s32.f32    d0, d0
+; into:
+;   vcvt.s32.f32    d0, d0, #3
+; when we have a vector length of 8, due to use of v8i32 types.
+
+target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64"
+
+; CHECK: func:
+; CHECK: vcvt.s32.f32  q[[R:[0-9]]], q[[R]], #3
+define void @func(i16* nocapture %pb, float* nocapture readonly %pf) #0 {
+entry:
+  %0 = bitcast float* %pf to <8 x float>*
+  %1 = load <8 x float>* %0, align 4
+  %2 = fmul <8 x float> %1, <float 8.000000e+00, float 8.000000e+00, float 8.000000e+00, float 8.000000e+00, float 8.000000e+00, float 8.000000e+00, float 8.000000e+00, float 8.000000e+00>
+  %3 = fptosi <8 x float> %2 to <8 x i16>
+  %4 = bitcast i16* %pb to <8 x i16>*
+  store <8 x i16> %3, <8 x i16>* %4, align 2
+  ret void
+}
+
+attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
author	Bradley Smith <bradley.smith@arm.com>
	Tue, 16 Dec 2014 10:59:27 +0000 (10:59 +0000)
committer	Bradley Smith <bradley.smith@arm.com>
	Tue, 16 Dec 2014 10:59:27 +0000 (10:59 +0000)
lib/Target/ARM/ARMISelLowering.cpp		patch \| blob \| history
test/CodeGen/ARM/isel-v8i32-crash.ll	[new file with mode: 0644]	patch \| blob