[SLP]Update test checks for the SPL vectorizer, NFC.

author Alexey Bataev <a.bataev@hotmail.com>

Fri, 11 Jan 2019 20:21:14 +0000 (20:21 +0000)

committer Alexey Bataev <a.bataev@hotmail.com>

Fri, 11 Jan 2019 20:21:14 +0000 (20:21 +0000)
author Alexey Bataev <a.bataev@hotmail.com>
Fri, 11 Jan 2019 20:21:14 +0000 (20:21 +0000)
committer Alexey Bataev <a.bataev@hotmail.com>
Fri, 11 Jan 2019 20:21:14 +0000 (20:21 +0000)
diff --git a/test/Transforms/SLPVectorizer/AArch64/64-bit-vector.ll b/test/Transforms/SLPVectorizer/AArch64/64-bit-vector.ll

index edc8042a217df59cda07404679e7f408640ce6db..ad970b2bec1bf4da5790682a2e9a96af4ab35684 100644 (file)
--- a/test/Transforms/SLPVectorizer/AArch64/64-bit-vector.ll
+++ b/test/Transforms/SLPVectorizer/AArch64/64-bit-vector.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
  ; RUN: opt -S -slp-vectorizer -mtriple=aarch64--linux-gnu -mcpu=generic < %s | FileCheck %s
  ; RUN: opt -S -slp-vectorizer -mtriple=aarch64-apple-ios -mcpu=cyclone < %s | FileCheck %s
  ; Currently disabled for a few subtargets (e.g. Kryo):
@@ -5,14 +6,36 @@
  ; RUN: opt -S -slp-vectorizer -mtriple=aarch64--linux-gnu -mcpu=generic -slp-min-reg-size=128 < %s | FileCheck --check-prefix=NO_SLP %s
  
  define void @f(float* %r, float* %w) {
+; CHECK-LABEL: @f(
+; CHECK-NEXT:    [[R0:%.*]] = getelementptr inbounds float, float* [[R:%.*]], i64 0
+; CHECK-NEXT:    [[R1:%.*]] = getelementptr inbounds float, float* [[R]], i64 1
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast float* [[R0]] to <2 x float>*
+; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x float>, <2 x float>* [[TMP1]], align 4
+; CHECK-NEXT:    [[TMP3:%.*]] = fadd <2 x float> [[TMP2]], [[TMP2]]
+; CHECK-NEXT:    [[W0:%.*]] = getelementptr inbounds float, float* [[W:%.*]], i64 0
+; CHECK-NEXT:    [[W1:%.*]] = getelementptr inbounds float, float* [[W]], i64 1
+; CHECK-NEXT:    [[TMP4:%.*]] = bitcast float* [[W0]] to <2 x float>*
+; CHECK-NEXT:    store <2 x float> [[TMP3]], <2 x float>* [[TMP4]], align 4
+; CHECK-NEXT:    ret void
+;
+; NO_SLP-LABEL: @f(
+; NO_SLP-NEXT:    [[R0:%.*]] = getelementptr inbounds float, float* [[R:%.*]], i64 0
+; NO_SLP-NEXT:    [[R1:%.*]] = getelementptr inbounds float, float* [[R]], i64 1
+; NO_SLP-NEXT:    [[F0:%.*]] = load float, float* [[R0]]
+; NO_SLP-NEXT:    [[F1:%.*]] = load float, float* [[R1]]
+; NO_SLP-NEXT:    [[ADD0:%.*]] = fadd float [[F0]], [[F0]]
+; NO_SLP-NEXT:    [[ADD1:%.*]] = fadd float [[F1]], [[F1]]
+; NO_SLP-NEXT:    [[W0:%.*]] = getelementptr inbounds float, float* [[W:%.*]], i64 0
+; NO_SLP-NEXT:    [[W1:%.*]] = getelementptr inbounds float, float* [[W]], i64 1
+; NO_SLP-NEXT:    store float [[ADD0]], float* [[W0]]
+; NO_SLP-NEXT:    store float [[ADD1]], float* [[W1]]
+; NO_SLP-NEXT:    ret void
+;
    %r0 = getelementptr inbounds float, float* %r, i64 0
    %r1 = getelementptr inbounds float, float* %r, i64 1
    %f0 = load float, float* %r0
    %f1 = load float, float* %r1
    %add0 = fadd float %f0, %f0
-; CHECK:  fadd <2 x float>
-; NO_SLP: fadd float
-; NO_SLP: fadd float
    %add1 = fadd float %f1, %f1
    %w0 = getelementptr inbounds float, float* %w, i64 0
    %w1 = getelementptr inbounds float, float* %w, i64 1
diff --git a/test/Transforms/SLPVectorizer/AArch64/commute.ll b/test/Transforms/SLPVectorizer/AArch64/commute.ll

index 2bce59c62000350ead31c1a09ace739f2b411acb..3f831fd2cc7d8ed2f372f63130241e5278ef789b 100644 (file)
--- a/test/Transforms/SLPVectorizer/AArch64/commute.ll
+++ b/test/Transforms/SLPVectorizer/AArch64/commute.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
  ; RUN: opt -S -slp-vectorizer %s -slp-threshold=-10 | FileCheck %s
  target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
  target triple = "aarch64--linux-gnu"
@@ -5,17 +6,27 @@ target triple = "aarch64--linux-gnu"
  %structA = type { [2 x float] }
  
  define void @test1(%structA* nocapture readonly %J, i32 %xmin, i32 %ymin) {
-; CHECK-LABEL: test1
-; CHECK: %arrayidx4 = getelementptr inbounds %structA, %structA* %J, i64 0, i32 0, i64 0
-; CHECK: %arrayidx9 = getelementptr inbounds %structA, %structA* %J, i64 0, i32 0, i64 1
-; CHECK: %3 = bitcast float* %arrayidx4 to <2 x float>*
-; CHECK: %4 = load <2 x float>, <2 x float>* %3, align 4
-; CHECK: %5 = fsub fast <2 x float> %2, %4
-; CHECK: %6 = fmul fast <2 x float> %5, %5
-; CHECK: %7 = extractelement <2 x float> %6, i32 0
-; CHECK: %8 = extractelement <2 x float> %6, i32 1
-; CHECK: %add = fadd fast float %7, %8
-; CHECK: %cmp = fcmp oeq float %add, 0.000000e+00
+; CHECK-LABEL: @test1(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = insertelement <2 x i32> undef, i32 [[XMIN:%.*]], i32 0
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x i32> [[TMP0]], i32 [[YMIN:%.*]], i32 1
+; CHECK-NEXT:    br label [[FOR_BODY3_LR_PH:%.*]]
+; CHECK:       for.body3.lr.ph:
+; CHECK-NEXT:    [[TMP2:%.*]] = sitofp <2 x i32> [[TMP1]] to <2 x float>
+; CHECK-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds [[STRUCTA:%.*]], %structA* [[J:%.*]], i64 0, i32 0, i64 0
+; CHECK-NEXT:    [[ARRAYIDX9:%.*]] = getelementptr inbounds [[STRUCTA]], %structA* [[J]], i64 0, i32 0, i64 1
+; CHECK-NEXT:    [[TMP3:%.*]] = bitcast float* [[ARRAYIDX4]] to <2 x float>*
+; CHECK-NEXT:    [[TMP4:%.*]] = load <2 x float>, <2 x float>* [[TMP3]], align 4
+; CHECK-NEXT:    [[TMP5:%.*]] = fsub fast <2 x float> [[TMP2]], [[TMP4]]
+; CHECK-NEXT:    [[TMP6:%.*]] = fmul fast <2 x float> [[TMP5]], [[TMP5]]
+; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <2 x float> [[TMP6]], i32 0
+; CHECK-NEXT:    [[TMP8:%.*]] = extractelement <2 x float> [[TMP6]], i32 1
+; CHECK-NEXT:    [[ADD:%.*]] = fadd fast float [[TMP7]], [[TMP8]]
+; CHECK-NEXT:    [[CMP:%.*]] = fcmp oeq float [[ADD]], 0.000000e+00
+; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_BODY3_LR_PH]], label [[FOR_END27:%.*]]
+; CHECK:       for.end27:
+; CHECK-NEXT:    ret void
+;
  
  entry:
    br label %for.body3.lr.ph
@@ -40,17 +51,27 @@ for.end27:
  }
  
  define void @test2(%structA* nocapture readonly %J, i32 %xmin, i32 %ymin) {
-; CHECK-LABEL: test2
-; CHECK: %arrayidx4 = getelementptr inbounds %structA, %structA* %J, i64 0, i32 0, i64 0
-; CHECK: %arrayidx9 = getelementptr inbounds %structA, %structA* %J, i64 0, i32 0, i64 1
-; CHECK: %3 = bitcast float* %arrayidx4 to <2 x float>*
-; CHECK: %4 = load <2 x float>, <2 x float>* %3, align 4
-; CHECK: %5 = fsub fast <2 x float> %2, %4
-; CHECK: %6 = fmul fast <2 x float> %5, %5
-; CHECK: %7 = extractelement <2 x float> %6, i32 0
-; CHECK: %8 = extractelement <2 x float> %6, i32 1
-; CHECK: %add = fadd fast float %8, %7
-; CHECK: %cmp = fcmp oeq float %add, 0.000000e+00
+; CHECK-LABEL: @test2(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = insertelement <2 x i32> undef, i32 [[XMIN:%.*]], i32 0
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x i32> [[TMP0]], i32 [[YMIN:%.*]], i32 1
+; CHECK-NEXT:    br label [[FOR_BODY3_LR_PH:%.*]]
+; CHECK:       for.body3.lr.ph:
+; CHECK-NEXT:    [[TMP2:%.*]] = sitofp <2 x i32> [[TMP1]] to <2 x float>
+; CHECK-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds [[STRUCTA:%.*]], %structA* [[J:%.*]], i64 0, i32 0, i64 0
+; CHECK-NEXT:    [[ARRAYIDX9:%.*]] = getelementptr inbounds [[STRUCTA]], %structA* [[J]], i64 0, i32 0, i64 1
+; CHECK-NEXT:    [[TMP3:%.*]] = bitcast float* [[ARRAYIDX4]] to <2 x float>*
+; CHECK-NEXT:    [[TMP4:%.*]] = load <2 x float>, <2 x float>* [[TMP3]], align 4
+; CHECK-NEXT:    [[TMP5:%.*]] = fsub fast <2 x float> [[TMP2]], [[TMP4]]
+; CHECK-NEXT:    [[TMP6:%.*]] = fmul fast <2 x float> [[TMP5]], [[TMP5]]
+; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <2 x float> [[TMP6]], i32 0
+; CHECK-NEXT:    [[TMP8:%.*]] = extractelement <2 x float> [[TMP6]], i32 1
+; CHECK-NEXT:    [[ADD:%.*]] = fadd fast float [[TMP8]], [[TMP7]]
+; CHECK-NEXT:    [[CMP:%.*]] = fcmp oeq float [[ADD]], 0.000000e+00
+; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_BODY3_LR_PH]], label [[FOR_END27:%.*]]
+; CHECK:       for.end27:
+; CHECK-NEXT:    ret void
+;
  
  entry:
    br label %for.body3.lr.ph
diff --git a/test/Transforms/SLPVectorizer/AArch64/gather-cost.ll b/test/Transforms/SLPVectorizer/AArch64/gather-cost.ll

index a4c655ca3cb0b74d99ac09829dbfe3698cda6187..401776aa270d250547ef727f603dd10fa638b5a6 100644 (file)
--- a/test/Transforms/SLPVectorizer/AArch64/gather-cost.ll
+++ b/test/Transforms/SLPVectorizer/AArch64/gather-cost.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
  ; RUN: opt < %s -S -slp-vectorizer -instcombine -pass-remarks-output=%t | FileCheck %s
  ; RUN: cat %t | FileCheck -check-prefix=REMARK %s
  ; RUN: opt < %s -S -passes='slp-vectorizer,instcombine' -pass-remarks-output=%t | FileCheck %s
@@ -6,25 +7,25 @@
  target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
  target triple = "aarch64--linux-gnu"
  
-; CHECK-LABEL:  @gather_multiple_use(
-; CHECK-NEXT:     [[TMP1:%.*]] = insertelement <4 x i32> undef, i32 [[C:%.*]], i32 0
-; CHECK-NEXT:     [[TMP2:%.*]] = insertelement <4 x i32> [[TMP1]], i32 [[A:%.*]], i32 1
-; CHECK-NEXT:     [[TMP3:%.*]] = insertelement <4 x i32> [[TMP2]], i32 [[B:%.*]], i32 2
-; CHECK-NEXT:     [[TMP4:%.*]] = insertelement <4 x i32> [[TMP3]], i32 [[D:%.*]], i32 3
-; CHECK-NEXT:     [[TMP5:%.*]] = lshr <4 x i32> [[TMP4]], <i32 15, i32 15, i32 15, i32 15>
-; CHECK-NEXT:     [[TMP6:%.*]] = and <4 x i32> [[TMP5]], <i32 65537, i32 65537, i32 65537, i32 65537>
-; CHECK-NEXT:     [[TMP7:%.*]] = mul nuw <4 x i32> [[TMP6]], <i32 65535, i32 65535, i32 65535, i32 65535>
-; CHECK-NEXT:     [[TMP8:%.*]] = add <4 x i32> [[TMP4]], [[TMP7]]
-; CHECK-NEXT:     [[TMP9:%.*]] = xor <4 x i32> [[TMP8]], [[TMP7]]
-; CHECK-NEXT:     [[TMP10:%.*]] = call i32 @llvm.experimental.vector.reduce.add.i32.v4i32(<4 x i32> [[TMP9]])
-; CHECK-NEXT:     ret i32 [[TMP10]]
-;
  ; REMARK-LABEL: Function: gather_multiple_use
  ; REMARK:       Args:
  ; REMARK-NEXT:    - String: 'Vectorized horizontal reduction with cost '
  ; REMARK-NEXT:    - Cost: '-7'
  ;
  define internal i32 @gather_multiple_use(i32 %a, i32 %b, i32 %c, i32 %d) {
+; CHECK-LABEL: @gather_multiple_use(
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x i32> undef, i32 [[C:%.*]], i32 0
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x i32> [[TMP1]], i32 [[A:%.*]], i32 1
+; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <4 x i32> [[TMP2]], i32 [[B:%.*]], i32 2
+; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <4 x i32> [[TMP3]], i32 [[D:%.*]], i32 3
+; CHECK-NEXT:    [[TMP5:%.*]] = lshr <4 x i32> [[TMP4]], <i32 15, i32 15, i32 15, i32 15>
+; CHECK-NEXT:    [[TMP6:%.*]] = and <4 x i32> [[TMP5]], <i32 65537, i32 65537, i32 65537, i32 65537>
+; CHECK-NEXT:    [[TMP7:%.*]] = mul nuw <4 x i32> [[TMP6]], <i32 65535, i32 65535, i32 65535, i32 65535>
+; CHECK-NEXT:    [[TMP8:%.*]] = add <4 x i32> [[TMP4]], [[TMP7]]
+; CHECK-NEXT:    [[TMP9:%.*]] = xor <4 x i32> [[TMP8]], [[TMP7]]
+; CHECK-NEXT:    [[TMP10:%.*]] = call i32 @llvm.experimental.vector.reduce.add.i32.v4i32(<4 x i32> [[TMP9]])
+; CHECK-NEXT:    ret i32 [[TMP10]]
+;
    %tmp00 = lshr i32 %a, 15
    %tmp01 = and i32 %tmp00, 65537
    %tmp02 = mul nuw i32 %tmp01, 65535
diff --git a/test/Transforms/SLPVectorizer/AArch64/getelementptr.ll b/test/Transforms/SLPVectorizer/AArch64/getelementptr.ll

index 3f61875b91825a94de2356c46c12cafdfe84b089..db02f55dcc7734ddb86c9dbbd53db4dfaa3e7163 100644 (file)
--- a/test/Transforms/SLPVectorizer/AArch64/getelementptr.ll
+++ b/test/Transforms/SLPVectorizer/AArch64/getelementptr.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
  ; RUN: opt -S -slp-vectorizer -slp-threshold=-18 -dce -instcombine -pass-remarks-output=%t < %s | FileCheck %s
  ; RUN: cat %t | FileCheck -check-prefix=YAML %s
  ; RUN: opt -S -passes='slp-vectorizer,dce,instcombine' -slp-threshold=-18 -pass-remarks-output=%t < %s | FileCheck %s
@@ -22,12 +23,6 @@ target triple = "aarch64--linux-gnu"
  ; }
  ;
  
-; CHECK-LABEL: @getelementptr_4x32
-;
-; CHECK: [[A:%[a-zA-Z0-9.]+]] = add nsw <4 x i32>
-; CHECK: [[X:%[a-zA-Z0-9.]+]] = extractelement <4 x i32> [[A]]
-; CHECK: sext i32 [[X]] to i64
-
  ; YAML:      --- !Passed
  ; YAML-NEXT: Pass:            slp-vectorizer
  ; YAML-NEXT: Name:            VectorizedList
@@ -49,6 +44,56 @@ target triple = "aarch64--linux-gnu"
  ; YAML-NEXT:   - TreeSize:        '3'
  
  define i32 @getelementptr_4x32(i32* nocapture readonly %g, i32 %n, i32 %x, i32 %y, i32 %z) {
+; CHECK-LABEL: @getelementptr_4x32(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CMP31:%.*]] = icmp sgt i32 [[N:%.*]], 0
+; CHECK-NEXT:    br i1 [[CMP31]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_COND_CLEANUP:%.*]]
+; CHECK:       for.body.preheader:
+; CHECK-NEXT:    [[TMP0:%.*]] = insertelement <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>, i32 [[X:%.*]], i32 1
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x i32> [[TMP0]], i32 [[Y:%.*]], i32 2
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x i32> [[TMP1]], i32 [[Z:%.*]], i32 3
+; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
+; CHECK:       for.cond.cleanup.loopexit:
+; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <2 x i32> [[TMP21:%.*]], i32 1
+; CHECK-NEXT:    br label [[FOR_COND_CLEANUP]]
+; CHECK:       for.cond.cleanup:
+; CHECK-NEXT:    [[SUM_0_LCSSA:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[TMP3]], [[FOR_COND_CLEANUP_LOOPEXIT:%.*]] ]
+; CHECK-NEXT:    ret i32 [[SUM_0_LCSSA]]
+; CHECK:       for.body:
+; CHECK-NEXT:    [[TMP4:%.*]] = phi <2 x i32> [ zeroinitializer, [[FOR_BODY_PREHEADER]] ], [ [[TMP21]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <2 x i32> [[TMP4]], i32 0
+; CHECK-NEXT:    [[T4:%.*]] = shl nsw i32 [[TMP5]], 1
+; CHECK-NEXT:    [[TMP6:%.*]] = insertelement <4 x i32> undef, i32 [[T4]], i32 0
+; CHECK-NEXT:    [[TMP7:%.*]] = shufflevector <4 x i32> [[TMP6]], <4 x i32> undef, <4 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP8:%.*]] = add nsw <4 x i32> [[TMP2]], [[TMP7]]
+; CHECK-NEXT:    [[TMP9:%.*]] = extractelement <4 x i32> [[TMP8]], i32 0
+; CHECK-NEXT:    [[TMP10:%.*]] = sext i32 [[TMP9]] to i64
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[G:%.*]], i64 [[TMP10]]
+; CHECK-NEXT:    [[T6:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
+; CHECK-NEXT:    [[TMP11:%.*]] = extractelement <2 x i32> [[TMP4]], i32 1
+; CHECK-NEXT:    [[ADD1:%.*]] = add nsw i32 [[T6]], [[TMP11]]
+; CHECK-NEXT:    [[TMP12:%.*]] = extractelement <4 x i32> [[TMP8]], i32 1
+; CHECK-NEXT:    [[TMP13:%.*]] = sext i32 [[TMP12]] to i64
+; CHECK-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, i32* [[G]], i64 [[TMP13]]
+; CHECK-NEXT:    [[T8:%.*]] = load i32, i32* [[ARRAYIDX5]], align 4
+; CHECK-NEXT:    [[ADD6:%.*]] = add nsw i32 [[ADD1]], [[T8]]
+; CHECK-NEXT:    [[TMP14:%.*]] = extractelement <4 x i32> [[TMP8]], i32 2
+; CHECK-NEXT:    [[TMP15:%.*]] = sext i32 [[TMP14]] to i64
+; CHECK-NEXT:    [[ARRAYIDX10:%.*]] = getelementptr inbounds i32, i32* [[G]], i64 [[TMP15]]
+; CHECK-NEXT:    [[T10:%.*]] = load i32, i32* [[ARRAYIDX10]], align 4
+; CHECK-NEXT:    [[ADD11:%.*]] = add nsw i32 [[ADD6]], [[T10]]
+; CHECK-NEXT:    [[TMP16:%.*]] = extractelement <4 x i32> [[TMP8]], i32 3
+; CHECK-NEXT:    [[TMP17:%.*]] = sext i32 [[TMP16]] to i64
+; CHECK-NEXT:    [[ARRAYIDX15:%.*]] = getelementptr inbounds i32, i32* [[G]], i64 [[TMP17]]
+; CHECK-NEXT:    [[T12:%.*]] = load i32, i32* [[ARRAYIDX15]], align 4
+; CHECK-NEXT:    [[TMP18:%.*]] = insertelement <2 x i32> <i32 1, i32 undef>, i32 [[ADD11]], i32 1
+; CHECK-NEXT:    [[TMP19:%.*]] = insertelement <2 x i32> undef, i32 [[TMP5]], i32 0
+; CHECK-NEXT:    [[TMP20:%.*]] = insertelement <2 x i32> [[TMP19]], i32 [[T12]], i32 1
+; CHECK-NEXT:    [[TMP21]] = add nsw <2 x i32> [[TMP18]], [[TMP20]]
+; CHECK-NEXT:    [[TMP22:%.*]] = extractelement <2 x i32> [[TMP21]], i32 0
+; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[TMP22]], [[N]]
+; CHECK-NEXT:    br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[FOR_BODY]]
+;
  entry:
    %cmp31 = icmp sgt i32 %n, 0
    br i1 %cmp31, label %for.body.preheader, label %for.cond.cleanup
@@ -88,12 +133,6 @@ for.body:
    br i1 %exitcond, label %for.cond.cleanup.loopexit, label %for.body
  }
  
-; CHECK-LABEL: @getelementptr_2x32
-;
-; CHECK: [[A:%[a-zA-Z0-9.]+]] = add nsw <2 x i32>
-; CHECK: [[X:%[a-zA-Z0-9.]+]] = extractelement <2 x i32> [[A]]
-; CHECK: sext i32 [[X]] to i64
-
  ; YAML:      --- !Passed
  ; YAML-NEXT: Pass:            slp-vectorizer
  ; YAML-NEXT: Name:            VectorizedList
@@ -115,6 +154,54 @@ for.body:
  ; YAML-NEXT:   - TreeSize:        '3'
  
  define i32 @getelementptr_2x32(i32* nocapture readonly %g, i32 %n, i32 %x, i32 %y, i32 %z) {
+; CHECK-LABEL: @getelementptr_2x32(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CMP31:%.*]] = icmp sgt i32 [[N:%.*]], 0
+; CHECK-NEXT:    br i1 [[CMP31]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_COND_CLEANUP:%.*]]
+; CHECK:       for.body.preheader:
+; CHECK-NEXT:    [[TMP0:%.*]] = insertelement <2 x i32> undef, i32 [[Y:%.*]], i32 0
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x i32> [[TMP0]], i32 [[Z:%.*]], i32 1
+; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
+; CHECK:       for.cond.cleanup.loopexit:
+; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <2 x i32> [[TMP18:%.*]], i32 1
+; CHECK-NEXT:    br label [[FOR_COND_CLEANUP]]
+; CHECK:       for.cond.cleanup:
+; CHECK-NEXT:    [[SUM_0_LCSSA:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[TMP2]], [[FOR_COND_CLEANUP_LOOPEXIT:%.*]] ]
+; CHECK-NEXT:    ret i32 [[SUM_0_LCSSA]]
+; CHECK:       for.body:
+; CHECK-NEXT:    [[TMP3:%.*]] = phi <2 x i32> [ zeroinitializer, [[FOR_BODY_PREHEADER]] ], [ [[TMP18]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <2 x i32> [[TMP3]], i32 0
+; CHECK-NEXT:    [[T4:%.*]] = shl nsw i32 [[TMP4]], 1
+; CHECK-NEXT:    [[TMP5:%.*]] = sext i32 [[T4]] to i64
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[G:%.*]], i64 [[TMP5]]
+; CHECK-NEXT:    [[T6:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
+; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <2 x i32> [[TMP3]], i32 1
+; CHECK-NEXT:    [[ADD1:%.*]] = add nsw i32 [[T6]], [[TMP6]]
+; CHECK-NEXT:    [[T7:%.*]] = or i32 [[T4]], 1
+; CHECK-NEXT:    [[TMP7:%.*]] = sext i32 [[T7]] to i64
+; CHECK-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, i32* [[G]], i64 [[TMP7]]
+; CHECK-NEXT:    [[T8:%.*]] = load i32, i32* [[ARRAYIDX5]], align 4
+; CHECK-NEXT:    [[ADD6:%.*]] = add nsw i32 [[ADD1]], [[T8]]
+; CHECK-NEXT:    [[TMP8:%.*]] = insertelement <2 x i32> undef, i32 [[T4]], i32 0
+; CHECK-NEXT:    [[TMP9:%.*]] = shufflevector <2 x i32> [[TMP8]], <2 x i32> undef, <2 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP10:%.*]] = add nsw <2 x i32> [[TMP1]], [[TMP9]]
+; CHECK-NEXT:    [[TMP11:%.*]] = extractelement <2 x i32> [[TMP10]], i32 0
+; CHECK-NEXT:    [[TMP12:%.*]] = sext i32 [[TMP11]] to i64
+; CHECK-NEXT:    [[ARRAYIDX10:%.*]] = getelementptr inbounds i32, i32* [[G]], i64 [[TMP12]]
+; CHECK-NEXT:    [[T10:%.*]] = load i32, i32* [[ARRAYIDX10]], align 4
+; CHECK-NEXT:    [[ADD11:%.*]] = add nsw i32 [[ADD6]], [[T10]]
+; CHECK-NEXT:    [[TMP13:%.*]] = extractelement <2 x i32> [[TMP10]], i32 1
+; CHECK-NEXT:    [[TMP14:%.*]] = sext i32 [[TMP13]] to i64
+; CHECK-NEXT:    [[ARRAYIDX15:%.*]] = getelementptr inbounds i32, i32* [[G]], i64 [[TMP14]]
+; CHECK-NEXT:    [[T12:%.*]] = load i32, i32* [[ARRAYIDX15]], align 4
+; CHECK-NEXT:    [[TMP15:%.*]] = insertelement <2 x i32> <i32 1, i32 undef>, i32 [[ADD11]], i32 1
+; CHECK-NEXT:    [[TMP16:%.*]] = insertelement <2 x i32> undef, i32 [[TMP4]], i32 0
+; CHECK-NEXT:    [[TMP17:%.*]] = insertelement <2 x i32> [[TMP16]], i32 [[T12]], i32 1
+; CHECK-NEXT:    [[TMP18]] = add nsw <2 x i32> [[TMP15]], [[TMP17]]
+; CHECK-NEXT:    [[TMP19:%.*]] = extractelement <2 x i32> [[TMP18]], i32 0
+; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[TMP19]], [[N]]
+; CHECK-NEXT:    br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[FOR_BODY]]
+;
  entry:
    %cmp31 = icmp sgt i32 %n, 0
    br i1 %cmp31, label %for.body.preheader, label %for.cond.cleanup
diff --git a/test/Transforms/SLPVectorizer/AArch64/horizontal.ll b/test/Transforms/SLPVectorizer/AArch64/horizontal.ll

index 02cf09d3434bc2404947356baa5017bafd7fdc95..c4a584fcbf3fa109388aa45f638d0ec3eb054ad5 100644 (file)
--- a/test/Transforms/SLPVectorizer/AArch64/horizontal.ll
+++ b/test/Transforms/SLPVectorizer/AArch64/horizontal.ll
@@ -1,6 +1,5 @@
-; RUN: opt -slp-vectorizer -slp-threshold=-6 -S -pass-remarks-output=%t <  %s | FileCheck %s
-; RUN: cat %t | FileCheck -check-prefix=YAML %s
-; RUN: opt -passes=slp-vectorizer -slp-threshold=-6 -S -pass-remarks-output=%t <  %s | FileCheck %s
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -slp-vectorizer -slp-threshold=-6 -S -pass-remarks-output=%t < %s | FileCheck %s
  ; RUN: cat %t | FileCheck -check-prefix=YAML %s
  
  
@@ -10,11 +9,6 @@
  target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
  target triple = "aarch64--linux"
  
-; CHECK-LABEL: test_select
-; CHECK: load <4 x i32>
-; CHECK: load <4 x i32>
-; CHECK: select <4 x i1>
-
  ; YAML:      --- !Passed
  ; YAML-NEXT: Pass:            slp-vectorizer
  ; YAML-NEXT: Name:            VectorizedHorizontalReduction
@@ -26,6 +20,49 @@ target triple = "aarch64--linux"
  ; YAML-NEXT:   - TreeSize:        '8'
  
  define i32 @test_select(i32* noalias nocapture readonly %blk1, i32* noalias nocapture readonly %blk2, i32 %lx, i32 %h) {
+; CHECK-LABEL: @test_select(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CMP_22:%.*]] = icmp sgt i32 [[H:%.*]], 0
+; CHECK-NEXT:    br i1 [[CMP_22]], label [[FOR_BODY_LR_PH:%.*]], label [[FOR_END:%.*]]
+; CHECK:       for.body.lr.ph:
+; CHECK-NEXT:    [[IDX_EXT:%.*]] = sext i32 [[LX:%.*]] to i64
+; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
+; CHECK:       for.body:
+; CHECK-NEXT:    [[S_026:%.*]] = phi i32 [ 0, [[FOR_BODY_LR_PH]] ], [ [[OP_EXTRA:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[J_025:%.*]] = phi i32 [ 0, [[FOR_BODY_LR_PH]] ], [ [[INC:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[P2_024:%.*]] = phi i32* [ [[BLK2:%.*]], [[FOR_BODY_LR_PH]] ], [ [[ADD_PTR29:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[P1_023:%.*]] = phi i32* [ [[BLK1:%.*]], [[FOR_BODY_LR_PH]] ], [ [[ADD_PTR:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds i32, i32* [[P1_023]], i64 1
+; CHECK-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, i32* [[P2_024]], i64 1
+; CHECK-NEXT:    [[ARRAYIDX12:%.*]] = getelementptr inbounds i32, i32* [[P1_023]], i64 2
+; CHECK-NEXT:    [[ARRAYIDX13:%.*]] = getelementptr inbounds i32, i32* [[P2_024]], i64 2
+; CHECK-NEXT:    [[ARRAYIDX20:%.*]] = getelementptr inbounds i32, i32* [[P1_023]], i64 3
+; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i32* [[P1_023]] to <4 x i32>*
+; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4
+; CHECK-NEXT:    [[ARRAYIDX21:%.*]] = getelementptr inbounds i32, i32* [[P2_024]], i64 3
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i32* [[P2_024]] to <4 x i32>*
+; CHECK-NEXT:    [[TMP3:%.*]] = load <4 x i32>, <4 x i32>* [[TMP2]], align 4
+; CHECK-NEXT:    [[TMP4:%.*]] = sub nsw <4 x i32> [[TMP1]], [[TMP3]]
+; CHECK-NEXT:    [[TMP5:%.*]] = icmp slt <4 x i32> [[TMP4]], zeroinitializer
+; CHECK-NEXT:    [[TMP6:%.*]] = sub nsw <4 x i32> zeroinitializer, [[TMP4]]
+; CHECK-NEXT:    [[TMP7:%.*]] = select <4 x i1> [[TMP5]], <4 x i32> [[TMP6]], <4 x i32> [[TMP4]]
+; CHECK-NEXT:    [[ADD:%.*]] = add nsw i32 undef, [[S_026]]
+; CHECK-NEXT:    [[ADD11:%.*]] = add nsw i32 [[ADD]], undef
+; CHECK-NEXT:    [[ADD19:%.*]] = add nsw i32 [[ADD11]], undef
+; CHECK-NEXT:    [[TMP8:%.*]] = call i32 @llvm.experimental.vector.reduce.add.i32.v4i32(<4 x i32> [[TMP7]])
+; CHECK-NEXT:    [[OP_EXTRA]] = add nsw i32 [[TMP8]], [[S_026]]
+; CHECK-NEXT:    [[ADD27:%.*]] = add nsw i32 [[ADD19]], undef
+; CHECK-NEXT:    [[ADD_PTR]] = getelementptr inbounds i32, i32* [[P1_023]], i64 [[IDX_EXT]]
+; CHECK-NEXT:    [[ADD_PTR29]] = getelementptr inbounds i32, i32* [[P2_024]], i64 [[IDX_EXT]]
+; CHECK-NEXT:    [[INC]] = add nuw nsw i32 [[J_025]], 1
+; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[INC]], [[H]]
+; CHECK-NEXT:    br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT:%.*]], label [[FOR_BODY]]
+; CHECK:       for.end.loopexit:
+; CHECK-NEXT:    br label [[FOR_END]]
+; CHECK:       for.end:
+; CHECK-NEXT:    [[S_0_LCSSA:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[OP_EXTRA]], [[FOR_END_LOOPEXIT]] ]
+; CHECK-NEXT:    ret i32 [[S_0_LCSSA]]
+;
  entry:
    %cmp.22 = icmp sgt i32 %h, 0
    br i1 %cmp.22, label %for.body.lr.ph, label %for.end
@@ -104,11 +141,6 @@ for.end:                                          ; preds = %for.end.loopexit, %
  ;;   p2 += lx;
  ;; }
  define i32 @reduction_with_br(i32* noalias nocapture readonly %blk1, i32* noalias nocapture readonly %blk2, i32 %lx, i32 %h, i32 %lim) {
-; CHECK-LABEL: reduction_with_br
-; CHECK: load <4 x i32>
-; CHECK: load <4 x i32>
-; CHECK: mul nsw <4 x i32>
-
  ; YAML:      --- !Passed
  ; YAML-NEXT: Pass:            slp-vectorizer
  ; YAML-NEXT: Name:            VectorizedHorizontalReduction
@@ -118,7 +150,49 @@ define i32 @reduction_with_br(i32* noalias nocapture readonly %blk1, i32* noalia
  ; YAML-NEXT:   - Cost:            '-11'
  ; YAML-NEXT:   - String:          ' and with tree size '
  ; YAML-NEXT:   - TreeSize:        '3'
-
+; CHECK-LABEL: @reduction_with_br(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CMP_16:%.*]] = icmp sgt i32 [[H:%.*]], 0
+; CHECK-NEXT:    br i1 [[CMP_16]], label [[FOR_BODY_LR_PH:%.*]], label [[FOR_END:%.*]]
+; CHECK:       for.body.lr.ph:
+; CHECK-NEXT:    [[IDX_EXT:%.*]] = sext i32 [[LX:%.*]] to i64
+; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
+; CHECK:       for.body:
+; CHECK-NEXT:    [[S_020:%.*]] = phi i32 [ 0, [[FOR_BODY_LR_PH]] ], [ [[OP_EXTRA:%.*]], [[IF_END:%.*]] ]
+; CHECK-NEXT:    [[J_019:%.*]] = phi i32 [ 0, [[FOR_BODY_LR_PH]] ], [ [[INC:%.*]], [[IF_END]] ]
+; CHECK-NEXT:    [[P2_018:%.*]] = phi i32* [ [[BLK2:%.*]], [[FOR_BODY_LR_PH]] ], [ [[ADD_PTR16:%.*]], [[IF_END]] ]
+; CHECK-NEXT:    [[P1_017:%.*]] = phi i32* [ [[BLK1:%.*]], [[FOR_BODY_LR_PH]] ], [ [[ADD_PTR:%.*]], [[IF_END]] ]
+; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, i32* [[P1_017]], i64 1
+; CHECK-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds i32, i32* [[P2_018]], i64 1
+; CHECK-NEXT:    [[ARRAYIDX6:%.*]] = getelementptr inbounds i32, i32* [[P1_017]], i64 2
+; CHECK-NEXT:    [[ARRAYIDX7:%.*]] = getelementptr inbounds i32, i32* [[P2_018]], i64 2
+; CHECK-NEXT:    [[ARRAYIDX10:%.*]] = getelementptr inbounds i32, i32* [[P1_017]], i64 3
+; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i32* [[P1_017]] to <4 x i32>*
+; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4
+; CHECK-NEXT:    [[ARRAYIDX11:%.*]] = getelementptr inbounds i32, i32* [[P2_018]], i64 3
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i32* [[P2_018]] to <4 x i32>*
+; CHECK-NEXT:    [[TMP3:%.*]] = load <4 x i32>, <4 x i32>* [[TMP2]], align 4
+; CHECK-NEXT:    [[TMP4:%.*]] = mul nsw <4 x i32> [[TMP3]], [[TMP1]]
+; CHECK-NEXT:    [[ADD:%.*]] = add nsw i32 undef, [[S_020]]
+; CHECK-NEXT:    [[ADD5:%.*]] = add nsw i32 [[ADD]], undef
+; CHECK-NEXT:    [[ADD9:%.*]] = add nsw i32 [[ADD5]], undef
+; CHECK-NEXT:    [[TMP5:%.*]] = call i32 @llvm.experimental.vector.reduce.add.i32.v4i32(<4 x i32> [[TMP4]])
+; CHECK-NEXT:    [[OP_EXTRA]] = add nsw i32 [[TMP5]], [[S_020]]
+; CHECK-NEXT:    [[ADD13:%.*]] = add nsw i32 [[ADD9]], undef
+; CHECK-NEXT:    [[CMP14:%.*]] = icmp slt i32 [[OP_EXTRA]], [[LIM:%.*]]
+; CHECK-NEXT:    br i1 [[CMP14]], label [[IF_END]], label [[FOR_END_LOOPEXIT:%.*]]
+; CHECK:       if.end:
+; CHECK-NEXT:    [[ADD_PTR]] = getelementptr inbounds i32, i32* [[P1_017]], i64 [[IDX_EXT]]
+; CHECK-NEXT:    [[ADD_PTR16]] = getelementptr inbounds i32, i32* [[P2_018]], i64 [[IDX_EXT]]
+; CHECK-NEXT:    [[INC]] = add nuw nsw i32 [[J_019]], 1
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[INC]], [[H]]
+; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END_LOOPEXIT]]
+; CHECK:       for.end.loopexit:
+; CHECK-NEXT:    br label [[FOR_END]]
+; CHECK:       for.end:
+; CHECK-NEXT:    [[S_1:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[OP_EXTRA]], [[FOR_END_LOOPEXIT]] ]
+; CHECK-NEXT:    ret i32 [[S_1]]
+;
  entry:
    %cmp.16 = icmp sgt i32 %h, 0
    br i1 %cmp.16, label %for.body.lr.ph, label %for.end
@@ -172,11 +246,6 @@ for.end:                                          ; preds = %for.end.loopexit, %
    ret i32 %s.1
  }
  
-; CHECK: test_unrolled_select
-; CHECK: load <8 x i8>
-; CHECK: load <8 x i8>
-; CHECK: select <8 x i1>
-
  ; YAML:      --- !Passed
  ; YAML-NEXT: Pass:            slp-vectorizer
  ; YAML-NEXT: Name:            VectorizedHorizontalReduction
@@ -188,6 +257,66 @@ for.end:                                          ; preds = %for.end.loopexit, %
  ; YAML-NEXT:   - TreeSize:        '10'
  
  define i32 @test_unrolled_select(i8* noalias nocapture readonly %blk1, i8* noalias nocapture readonly %blk2, i32 %lx, i32 %h, i32 %lim) #0 {
+; CHECK-LABEL: @test_unrolled_select(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CMP_43:%.*]] = icmp sgt i32 [[H:%.*]], 0
+; CHECK-NEXT:    br i1 [[CMP_43]], label [[FOR_BODY_LR_PH:%.*]], label [[FOR_END:%.*]]
+; CHECK:       for.body.lr.ph:
+; CHECK-NEXT:    [[IDX_EXT:%.*]] = sext i32 [[LX:%.*]] to i64
+; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
+; CHECK:       for.body:
+; CHECK-NEXT:    [[S_047:%.*]] = phi i32 [ 0, [[FOR_BODY_LR_PH]] ], [ [[OP_EXTRA:%.*]], [[IF_END_86:%.*]] ]
+; CHECK-NEXT:    [[J_046:%.*]] = phi i32 [ 0, [[FOR_BODY_LR_PH]] ], [ [[INC:%.*]], [[IF_END_86]] ]
+; CHECK-NEXT:    [[P2_045:%.*]] = phi i8* [ [[BLK2:%.*]], [[FOR_BODY_LR_PH]] ], [ [[ADD_PTR88:%.*]], [[IF_END_86]] ]
+; CHECK-NEXT:    [[P1_044:%.*]] = phi i8* [ [[BLK1:%.*]], [[FOR_BODY_LR_PH]] ], [ [[ADD_PTR:%.*]], [[IF_END_86]] ]
+; CHECK-NEXT:    [[ARRAYIDX6:%.*]] = getelementptr inbounds i8, i8* [[P1_044]], i64 1
+; CHECK-NEXT:    [[ARRAYIDX8:%.*]] = getelementptr inbounds i8, i8* [[P2_045]], i64 1
+; CHECK-NEXT:    [[ARRAYIDX17:%.*]] = getelementptr inbounds i8, i8* [[P1_044]], i64 2
+; CHECK-NEXT:    [[ARRAYIDX19:%.*]] = getelementptr inbounds i8, i8* [[P2_045]], i64 2
+; CHECK-NEXT:    [[ARRAYIDX28:%.*]] = getelementptr inbounds i8, i8* [[P1_044]], i64 3
+; CHECK-NEXT:    [[ARRAYIDX30:%.*]] = getelementptr inbounds i8, i8* [[P2_045]], i64 3
+; CHECK-NEXT:    [[ARRAYIDX39:%.*]] = getelementptr inbounds i8, i8* [[P1_044]], i64 4
+; CHECK-NEXT:    [[ARRAYIDX41:%.*]] = getelementptr inbounds i8, i8* [[P2_045]], i64 4
+; CHECK-NEXT:    [[ARRAYIDX50:%.*]] = getelementptr inbounds i8, i8* [[P1_044]], i64 5
+; CHECK-NEXT:    [[ARRAYIDX52:%.*]] = getelementptr inbounds i8, i8* [[P2_045]], i64 5
+; CHECK-NEXT:    [[ARRAYIDX61:%.*]] = getelementptr inbounds i8, i8* [[P1_044]], i64 6
+; CHECK-NEXT:    [[ARRAYIDX63:%.*]] = getelementptr inbounds i8, i8* [[P2_045]], i64 6
+; CHECK-NEXT:    [[ARRAYIDX72:%.*]] = getelementptr inbounds i8, i8* [[P1_044]], i64 7
+; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i8* [[P1_044]] to <8 x i8>*
+; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i8>, <8 x i8>* [[TMP0]], align 1
+; CHECK-NEXT:    [[TMP2:%.*]] = zext <8 x i8> [[TMP1]] to <8 x i32>
+; CHECK-NEXT:    [[ARRAYIDX74:%.*]] = getelementptr inbounds i8, i8* [[P2_045]], i64 7
+; CHECK-NEXT:    [[TMP3:%.*]] = bitcast i8* [[P2_045]] to <8 x i8>*
+; CHECK-NEXT:    [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[TMP3]], align 1
+; CHECK-NEXT:    [[TMP5:%.*]] = zext <8 x i8> [[TMP4]] to <8 x i32>
+; CHECK-NEXT:    [[TMP6:%.*]] = sub nsw <8 x i32> [[TMP2]], [[TMP5]]
+; CHECK-NEXT:    [[TMP7:%.*]] = icmp slt <8 x i32> [[TMP6]], zeroinitializer
+; CHECK-NEXT:    [[TMP8:%.*]] = sub nsw <8 x i32> zeroinitializer, [[TMP6]]
+; CHECK-NEXT:    [[TMP9:%.*]] = select <8 x i1> [[TMP7]], <8 x i32> [[TMP8]], <8 x i32> [[TMP6]]
+; CHECK-NEXT:    [[ADD:%.*]] = add nsw i32 undef, [[S_047]]
+; CHECK-NEXT:    [[ADD16:%.*]] = add nsw i32 [[ADD]], undef
+; CHECK-NEXT:    [[ADD27:%.*]] = add nsw i32 [[ADD16]], undef
+; CHECK-NEXT:    [[ADD38:%.*]] = add nsw i32 [[ADD27]], undef
+; CHECK-NEXT:    [[ADD49:%.*]] = add nsw i32 [[ADD38]], undef
+; CHECK-NEXT:    [[ADD60:%.*]] = add nsw i32 [[ADD49]], undef
+; CHECK-NEXT:    [[ADD71:%.*]] = add nsw i32 [[ADD60]], undef
+; CHECK-NEXT:    [[TMP10:%.*]] = call i32 @llvm.experimental.vector.reduce.add.i32.v8i32(<8 x i32> [[TMP9]])
+; CHECK-NEXT:    [[OP_EXTRA]] = add nsw i32 [[TMP10]], [[S_047]]
+; CHECK-NEXT:    [[ADD82:%.*]] = add nsw i32 [[ADD71]], undef
+; CHECK-NEXT:    [[CMP83:%.*]] = icmp slt i32 [[OP_EXTRA]], [[LIM:%.*]]
+; CHECK-NEXT:    br i1 [[CMP83]], label [[IF_END_86]], label [[FOR_END_LOOPEXIT:%.*]]
+; CHECK:       if.end.86:
+; CHECK-NEXT:    [[ADD_PTR]] = getelementptr inbounds i8, i8* [[P1_044]], i64 [[IDX_EXT]]
+; CHECK-NEXT:    [[ADD_PTR88]] = getelementptr inbounds i8, i8* [[P2_045]], i64 [[IDX_EXT]]
+; CHECK-NEXT:    [[INC]] = add nuw nsw i32 [[J_046]], 1
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[INC]], [[H]]
+; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END_LOOPEXIT]]
+; CHECK:       for.end.loopexit:
+; CHECK-NEXT:    br label [[FOR_END]]
+; CHECK:       for.end:
+; CHECK-NEXT:    [[S_1:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[OP_EXTRA]], [[FOR_END_LOOPEXIT]] ]
+; CHECK-NEXT:    ret i32 [[S_1]]
+;
  entry:
    %cmp.43 = icmp sgt i32 %h, 0
    br i1 %cmp.43, label %for.body.lr.ph, label %for.end
diff --git a/test/Transforms/SLPVectorizer/AArch64/minimum-sizes.ll b/test/Transforms/SLPVectorizer/AArch64/minimum-sizes.ll

index 7e1d670956645eaca91b703fab843d236f2a4dd6..0429665c0aef271623446e1c0d774bfda231dfcf 100644 (file)
--- a/test/Transforms/SLPVectorizer/AArch64/minimum-sizes.ll
+++ b/test/Transforms/SLPVectorizer/AArch64/minimum-sizes.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
  ; RUN: opt -S -slp-vectorizer < %s | FileCheck %s
  
  target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
@@ -7,10 +8,19 @@ target triple = "aarch64--linux-gnu"
  ; should not compute a smaller size for %k.13 since it is in a use-def cycle
  ; and cannot be demoted.
  ;
-; CHECK-LABEL: @PR26364
-; CHECK: %k.13 = phi i32
-;
  define fastcc void @PR26364() {
+; CHECK-LABEL: @PR26364(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br i1 undef, label [[FOR_END11:%.*]], label [[FOR_COND4:%.*]]
+; CHECK:       for.cond4:
+; CHECK-NEXT:    [[K_13:%.*]] = phi i32 [ undef, [[ENTRY:%.*]] ], [ [[K_3:%.*]], [[FOR_COND4]] ]
+; CHECK-NEXT:    [[E_02:%.*]] = phi i32 [ 1, [[ENTRY]] ], [ 0, [[FOR_COND4]] ]
+; CHECK-NEXT:    [[E_1:%.*]] = select i1 undef, i32 [[E_02]], i32 0
+; CHECK-NEXT:    [[K_3]] = select i1 undef, i32 [[K_13]], i32 undef
+; CHECK-NEXT:    br label [[FOR_COND4]]
+; CHECK:       for.end11:
+; CHECK-NEXT:    ret void
+;
  entry:
    br i1 undef, label %for.end11, label %for.cond4
  
@@ -29,10 +39,25 @@ for.end11:
  ; every root in the vectorizable tree when computing minimum sizes since one
  ; root may require fewer bits than another.
  ;
-; CHECK-LABEL: @PR26629
-; CHECK-NOT: {{.*}} and <2 x i72>
-;
  define void @PR26629(i32* %c) {
+; CHECK-LABEL: @PR26629(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br i1 undef, label [[FOR_PH:%.*]], label [[FOR_END:%.*]]
+; CHECK:       for.ph:
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[C:%.*]], align 4
+; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
+; CHECK:       for.body:
+; CHECK-NEXT:    [[D:%.*]] = phi i72 [ 576507472957710340, [[FOR_PH]] ], [ [[BF_SET17:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[SUB:%.*]] = sub i32 [[TMP0]], undef
+; CHECK-NEXT:    [[BF_CLEAR13:%.*]] = and i72 [[D]], -576460748008464384
+; CHECK-NEXT:    [[TMP1:%.*]] = zext i32 [[SUB]] to i72
+; CHECK-NEXT:    [[BF_VALUE15:%.*]] = and i72 [[TMP1]], 8191
+; CHECK-NEXT:    [[BF_CLEAR16:%.*]] = or i72 [[BF_VALUE15]], [[BF_CLEAR13]]
+; CHECK-NEXT:    [[BF_SET17]] = or i72 [[BF_CLEAR16]], undef
+; CHECK-NEXT:    br label [[FOR_BODY]]
+; CHECK:       for.end:
+; CHECK-NEXT:    ret void
+;
  entry:
    br i1 undef, label %for.ph, label %for.end
  
diff --git a/test/Transforms/SLPVectorizer/AArch64/mismatched-intrinsics.ll b/test/Transforms/SLPVectorizer/AArch64/mismatched-intrinsics.ll

index 3d6da124fc45ff4dc047fd8b528eff09397229d3..64b8743f8bd2a97a8818b79de20859270ddce90d 100644 (file)
--- a/test/Transforms/SLPVectorizer/AArch64/mismatched-intrinsics.ll
+++ b/test/Transforms/SLPVectorizer/AArch64/mismatched-intrinsics.ll
@@ -1,11 +1,16 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
  ; RUN: opt -S -slp-vectorizer %s | FileCheck %s
  target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
  target triple = "arm64-apple-ios5.0.0"
  
  define i64 @mismatched_intrinsics(<4 x i32> %in1, <2 x i32> %in2) nounwind {
-; CHECK-LABEL: @mismatched_intrinsics
-; CHECK: call i64 @llvm.arm64.neon.saddlv.i64.v4i32
-; CHECK: call i64 @llvm.arm64.neon.saddlv.i64.v2i32
+; CHECK-LABEL: @mismatched_intrinsics(
+; CHECK-NEXT:    [[VADDLVQ_S32_I:%.*]] = tail call i64 @llvm.arm64.neon.saddlv.i64.v4i32(<4 x i32> [[IN1:%.*]])
+; CHECK-NEXT:    [[VADDLV_S32_I:%.*]] = tail call i64 @llvm.arm64.neon.saddlv.i64.v2i32(<2 x i32> [[IN2:%.*]])
+; CHECK-NEXT:    [[TST:%.*]] = icmp sgt i64 [[VADDLVQ_S32_I]], [[VADDLV_S32_I]]
+; CHECK-NEXT:    [[EQUAL:%.*]] = sext i1 [[TST]] to i64
+; CHECK-NEXT:    ret i64 [[EQUAL]]
+;
  
    %vaddlvq_s32.i = tail call i64 @llvm.arm64.neon.saddlv.i64.v4i32(<4 x i32> %in1) #2
    %vaddlv_s32.i = tail call i64 @llvm.arm64.neon.saddlv.i64.v2i32(<2 x i32> %in2) #2
diff --git a/test/Transforms/SLPVectorizer/AArch64/nontemporal.ll b/test/Transforms/SLPVectorizer/AArch64/nontemporal.ll

index 87d021d534cff274ce4cfdb80cf9376d3c7fa605..98c033262a080b64ff4b37b1621661fe1f23b4bc 100644 (file)
--- a/test/Transforms/SLPVectorizer/AArch64/nontemporal.ll
+++ b/test/Transforms/SLPVectorizer/AArch64/nontemporal.ll
@@ -1,12 +1,22 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
  ; RUN: opt -S -basicaa -slp-vectorizer -dce < %s | FileCheck %s
  target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
  target triple = "arm64-apple-ios5.0.0"
  
-; CHECK-LABEL: @foo
  define void @foo(float* noalias %a, float* noalias %b, float* noalias %c) {
+; CHECK-LABEL: @foo(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = bitcast float* [[B:%.*]] to <4 x float>*
+; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x float>, <4 x float>* [[TMP0]], align 4, !nontemporal !0
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast float* [[C:%.*]] to <4 x float>*
+; CHECK-NEXT:    [[TMP3:%.*]] = load <4 x float>, <4 x float>* [[TMP2]], align 4
+; CHECK-NEXT:    [[TMP4:%.*]] = fadd <4 x float> [[TMP1]], [[TMP3]]
+; CHECK-NEXT:    [[TMP5:%.*]] = bitcast float* [[A:%.*]] to <4 x float>*
+; CHECK-NEXT:    store <4 x float> [[TMP4]], <4 x float>* [[TMP5]], align 4, !nontemporal !0
+; CHECK-NEXT:    ret void
+;
  entry:
  ; Check that we don't lose !nontemporal hint when vectorizing loads.
-; CHECK: %{{[0-9]*}} = load <4 x float>, <4 x float>* %{{[0-9]+}}, align 4, !nontemporal !0
    %b1 = load float, float* %b, align 4, !nontemporal !0
    %arrayidx.1 = getelementptr inbounds float, float* %b, i64 1
    %b2 = load float, float* %arrayidx.1, align 4, !nontemporal !0
@@ -16,7 +26,6 @@ entry:
    %b4 = load float, float* %arrayidx.3, align 4, !nontemporal !0
  
  ; Check that we don't introduce !nontemporal hint when the original scalar loads didn't have it.
-; CHECK: %{{[0-9]*}} = load <4 x float>, <4 x float>* %{{[0-9]+}}, align 4{{$}}
    %c1 = load float, float* %c, align 4
    %arrayidx2.1 = getelementptr inbounds float, float* %c, i64 1
    %c2 = load float, float* %arrayidx2.1, align 4
@@ -31,7 +40,6 @@ entry:
    %a4 = fadd float %b4, %c4
  
  ; Check that we don't lose !nontemporal hint when vectorizing stores.
-; CHECK: store <4 x float> %{{[0-9]+}}, <4 x float>* %{{[0-9]+}}, align 4, !nontemporal !0
    store float %a1, float* %a, align 4, !nontemporal !0
    %arrayidx3.1 = getelementptr inbounds float, float* %a, i64 1
    store float %a2, float* %arrayidx3.1, align 4, !nontemporal !0
@@ -40,16 +48,21 @@ entry:
    %arrayidx3.3 = getelementptr inbounds float, float* %a, i64 3
    store float %a4, float* %arrayidx3.3, align 4, !nontemporal !0
  
-; CHECK: ret void
    ret void
  }
  
-; CHECK-LABEL: @foo2
  define void @foo2(float* noalias %a, float* noalias %b) {
+; CHECK-LABEL: @foo2(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = bitcast float* [[B:%.*]] to <4 x float>*
+; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x float>, <4 x float>* [[TMP0]], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast float* [[A:%.*]] to <4 x float>*
+; CHECK-NEXT:    store <4 x float> [[TMP1]], <4 x float>* [[TMP2]], align 4
+; CHECK-NEXT:    ret void
+;
  entry:
  ; Check that we don't mark vector load with !nontemporal attribute if some of
  ; the original scalar loads don't have it.
-; CHECK: %{{[0-9]*}} = load <4 x float>, <4 x float>* %{{[0-9]+}}, align 4{{$}}
    %b1 = load float, float* %b, align 4, !nontemporal !0
    %arrayidx.1 = getelementptr inbounds float, float* %b, i64 1
    %b2 = load float, float* %arrayidx.1, align 4
@@ -60,7 +73,6 @@ entry:
  
  ; Check that we don't mark vector store with !nontemporal attribute if some of
  ; the original scalar stores don't have it.
-; CHECK: store <4 x float> %{{[0-9]+}}, <4 x float>* %{{[0-9]+}}, align 4{{$}}
    store float %b1, float* %a, align 4, !nontemporal !0
    %arrayidx3.1 = getelementptr inbounds float, float* %a, i64 1
    store float %b2, float* %arrayidx3.1, align 4
@@ -69,7 +81,6 @@ entry:
    %arrayidx3.3 = getelementptr inbounds float, float* %a, i64 3
    store float %b4, float* %arrayidx3.3, align 4, !nontemporal !0
  
-; CHECK: ret void
    ret void
  }
  
diff --git a/test/Transforms/SLPVectorizer/AArch64/sdiv-pow2.ll b/test/Transforms/SLPVectorizer/AArch64/sdiv-pow2.ll

index 72c70823e6988e667410dbbfc6c587909c7df8cd..8311f20514927478c9d27f70baf544265f45801c 100644 (file)
--- a/test/Transforms/SLPVectorizer/AArch64/sdiv-pow2.ll
+++ b/test/Transforms/SLPVectorizer/AArch64/sdiv-pow2.ll
@@ -1,13 +1,30 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
  ; RUN: opt < %s -basicaa -slp-vectorizer -S -mtriple=aarch64-unknown-linux-gnu -mcpu=cortex-a57 | FileCheck %s
  target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
  target triple = "aarch64--linux-gnu"
  
-; CHECK-LABEL: @test1
-; CHECK: load <4 x i32>
-; CHECK: add nsw <4 x i32>
-; CHECK: sdiv <4 x i32>
-
  define void @test1(i32* noalias nocapture %a, i32* noalias nocapture readonly %b, i32* noalias nocapture readonly %c) {
+; CHECK-LABEL: @test1(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 1
+; CHECK-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds i32, i32* [[C:%.*]], i64 1
+; CHECK-NEXT:    [[ARRAYIDX7:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 1
+; CHECK-NEXT:    [[ARRAYIDX8:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 2
+; CHECK-NEXT:    [[ARRAYIDX9:%.*]] = getelementptr inbounds i32, i32* [[C]], i64 2
+; CHECK-NEXT:    [[ARRAYIDX12:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 2
+; CHECK-NEXT:    [[ARRAYIDX13:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 3
+; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i32* [[B]] to <4 x i32>*
+; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4
+; CHECK-NEXT:    [[ARRAYIDX14:%.*]] = getelementptr inbounds i32, i32* [[C]], i64 3
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i32* [[C]] to <4 x i32>*
+; CHECK-NEXT:    [[TMP3:%.*]] = load <4 x i32>, <4 x i32>* [[TMP2]], align 4
+; CHECK-NEXT:    [[TMP4:%.*]] = add nsw <4 x i32> [[TMP3]], [[TMP1]]
+; CHECK-NEXT:    [[TMP5:%.*]] = sdiv <4 x i32> [[TMP4]], <i32 2, i32 2, i32 2, i32 2>
+; CHECK-NEXT:    [[ARRAYIDX17:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 3
+; CHECK-NEXT:    [[TMP6:%.*]] = bitcast i32* [[A]] to <4 x i32>*
+; CHECK-NEXT:    store <4 x i32> [[TMP5]], <4 x i32>* [[TMP6]], align 4
+; CHECK-NEXT:    ret void
+;
  entry:
    %0 = load i32, i32* %b, align 4
    %1 = load i32, i32* %c, align 4
diff --git a/test/Transforms/SLPVectorizer/ARM/memory.ll b/test/Transforms/SLPVectorizer/ARM/memory.ll

index 57d7cceac6b9bee2d667f6039f702e323d18742f..70e87033207a4e8ccca53e3746de29411ddcc449 100644 (file)
--- a/test/Transforms/SLPVectorizer/ARM/memory.ll
+++ b/test/Transforms/SLPVectorizer/ARM/memory.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
  ; RUN: opt < %s -basicaa -slp-vectorizer -S -mtriple=thumbv7-apple-ios3.0.0 -mcpu=swift | FileCheck %s
  
  target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n32-S32"
@@ -5,10 +6,17 @@ target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-
  ; On swift unaligned <2 x double> stores need 4uops and it is there for cheaper
  ; to do this scalar.
  
-; CHECK-LABEL: expensive_double_store
-; CHECK-NOT: load <2 x double>
-; CHECK-NOT: store <2 x double>
  define void @expensive_double_store(double* noalias %dst, double* noalias %src, i64 %count) {
+; CHECK-LABEL: @expensive_double_store(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = load double, double* [[SRC:%.*]], align 8
+; CHECK-NEXT:    store double [[TMP0]], double* [[DST:%.*]], align 8
+; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds double, double* [[SRC]], i64 1
+; CHECK-NEXT:    [[TMP1:%.*]] = load double, double* [[ARRAYIDX2]], align 8
+; CHECK-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds double, double* [[DST]], i64 1
+; CHECK-NEXT:    store double [[TMP1]], double* [[ARRAYIDX3]], align 8
+; CHECK-NEXT:    ret void
+;
  entry:
    %0 = load double, double* %src, align 8
    store double %0, double* %dst, align 8
diff --git a/test/Transforms/SLPVectorizer/ARM/sroa.ll b/test/Transforms/SLPVectorizer/ARM/sroa.ll

index 65e02608b2905a90b1f719d923e20190e7db5f4c..c43e8f10a59b8b01dbd00ad0d994513b30137d33 100644 (file)
--- a/test/Transforms/SLPVectorizer/ARM/sroa.ll
+++ b/test/Transforms/SLPVectorizer/ARM/sroa.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
  ; RUN: opt -S -mcpu=swift -mtriple=thumbv7-apple-ios -basicaa -slp-vectorizer < %s | FileCheck %s
  
  target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n32-S32"
@@ -8,11 +9,45 @@ target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-
  ; because the scalar version of the shl/or are handled by the
  ; backend and disappear, the vectorized code stays.
  
-; CHECK-LABEL: SROAed
-; CHECK-NOT: shl nuw <2 x i64>
-; CHECK-NOT: or <2 x i64>
-
  define void @SROAed(%class.Complex* noalias nocapture sret %agg.result, [4 x i32] %a.coerce, [4 x i32] %b.coerce) {
+; CHECK-LABEL: @SROAed(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[A_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [4 x i32] [[A_COERCE:%.*]], 0
+; CHECK-NEXT:    [[A_SROA_0_0_INSERT_EXT:%.*]] = zext i32 [[A_COERCE_FCA_0_EXTRACT]] to i64
+; CHECK-NEXT:    [[A_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [4 x i32] [[A_COERCE]], 1
+; CHECK-NEXT:    [[A_SROA_0_4_INSERT_EXT:%.*]] = zext i32 [[A_COERCE_FCA_1_EXTRACT]] to i64
+; CHECK-NEXT:    [[A_SROA_0_4_INSERT_SHIFT:%.*]] = shl nuw i64 [[A_SROA_0_4_INSERT_EXT]], 32
+; CHECK-NEXT:    [[A_SROA_0_4_INSERT_INSERT:%.*]] = or i64 [[A_SROA_0_4_INSERT_SHIFT]], [[A_SROA_0_0_INSERT_EXT]]
+; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i64 [[A_SROA_0_4_INSERT_INSERT]] to double
+; CHECK-NEXT:    [[A_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [4 x i32] [[A_COERCE]], 2
+; CHECK-NEXT:    [[A_SROA_3_8_INSERT_EXT:%.*]] = zext i32 [[A_COERCE_FCA_2_EXTRACT]] to i64
+; CHECK-NEXT:    [[A_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [4 x i32] [[A_COERCE]], 3
+; CHECK-NEXT:    [[A_SROA_3_12_INSERT_EXT:%.*]] = zext i32 [[A_COERCE_FCA_3_EXTRACT]] to i64
+; CHECK-NEXT:    [[A_SROA_3_12_INSERT_SHIFT:%.*]] = shl nuw i64 [[A_SROA_3_12_INSERT_EXT]], 32
+; CHECK-NEXT:    [[A_SROA_3_12_INSERT_INSERT:%.*]] = or i64 [[A_SROA_3_12_INSERT_SHIFT]], [[A_SROA_3_8_INSERT_EXT]]
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i64 [[A_SROA_3_12_INSERT_INSERT]] to double
+; CHECK-NEXT:    [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [4 x i32] [[B_COERCE:%.*]], 0
+; CHECK-NEXT:    [[B_SROA_0_0_INSERT_EXT:%.*]] = zext i32 [[B_COERCE_FCA_0_EXTRACT]] to i64
+; CHECK-NEXT:    [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [4 x i32] [[B_COERCE]], 1
+; CHECK-NEXT:    [[B_SROA_0_4_INSERT_EXT:%.*]] = zext i32 [[B_COERCE_FCA_1_EXTRACT]] to i64
+; CHECK-NEXT:    [[B_SROA_0_4_INSERT_SHIFT:%.*]] = shl nuw i64 [[B_SROA_0_4_INSERT_EXT]], 32
+; CHECK-NEXT:    [[B_SROA_0_4_INSERT_INSERT:%.*]] = or i64 [[B_SROA_0_4_INSERT_SHIFT]], [[B_SROA_0_0_INSERT_EXT]]
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i64 [[B_SROA_0_4_INSERT_INSERT]] to double
+; CHECK-NEXT:    [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [4 x i32] [[B_COERCE]], 2
+; CHECK-NEXT:    [[B_SROA_3_8_INSERT_EXT:%.*]] = zext i32 [[B_COERCE_FCA_2_EXTRACT]] to i64
+; CHECK-NEXT:    [[B_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [4 x i32] [[B_COERCE]], 3
+; CHECK-NEXT:    [[B_SROA_3_12_INSERT_EXT:%.*]] = zext i32 [[B_COERCE_FCA_3_EXTRACT]] to i64
+; CHECK-NEXT:    [[B_SROA_3_12_INSERT_SHIFT:%.*]] = shl nuw i64 [[B_SROA_3_12_INSERT_EXT]], 32
+; CHECK-NEXT:    [[B_SROA_3_12_INSERT_INSERT:%.*]] = or i64 [[B_SROA_3_12_INSERT_SHIFT]], [[B_SROA_3_8_INSERT_EXT]]
+; CHECK-NEXT:    [[TMP3:%.*]] = bitcast i64 [[B_SROA_3_12_INSERT_INSERT]] to double
+; CHECK-NEXT:    [[ADD:%.*]] = fadd double [[TMP0]], [[TMP2]]
+; CHECK-NEXT:    [[ADD3:%.*]] = fadd double [[TMP1]], [[TMP3]]
+; CHECK-NEXT:    [[RE_I_I:%.*]] = getelementptr inbounds [[CLASS_COMPLEX:%.*]], %class.Complex* [[AGG_RESULT:%.*]], i32 0, i32 0
+; CHECK-NEXT:    store double [[ADD]], double* [[RE_I_I]], align 4
+; CHECK-NEXT:    [[IM_I_I:%.*]] = getelementptr inbounds [[CLASS_COMPLEX]], %class.Complex* [[AGG_RESULT]], i32 0, i32 1
+; CHECK-NEXT:    store double [[ADD3]], double* [[IM_I_I]], align 4
+; CHECK-NEXT:    ret void
+;
  entry:
    %a.coerce.fca.0.extract = extractvalue [4 x i32] %a.coerce, 0
    %a.sroa.0.0.insert.ext = zext i32 %a.coerce.fca.0.extract to i64
diff --git a/test/Transforms/SLPVectorizer/NVPTX/v2f16.ll b/test/Transforms/SLPVectorizer/NVPTX/v2f16.ll

index d8b80f437b92195848866963623d31493daec823..7038b0f8e2762ec3c946f38dbd2e6523e696ca3f 100644 (file)
--- a/test/Transforms/SLPVectorizer/NVPTX/v2f16.ll
+++ b/test/Transforms/SLPVectorizer/NVPTX/v2f16.ll
@@ -1,20 +1,50 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
  ; RUN: opt < %s -basicaa -slp-vectorizer -S -mtriple=nvptx64-nvidia-cuda -mcpu=sm_70 | FileCheck %s
  ; RUN: opt < %s -basicaa -slp-vectorizer -S -mtriple=nvptx64-nvidia-cuda -mcpu=sm_40 | FileCheck %s -check-prefix=NOVECTOR
  
-; CHECK-LABEL: @fusion
-; CHECK: load <2 x half>, <2 x half>*
-; CHECK: fmul fast <2 x half>
-; CHECK: fadd fast <2 x half>
-; CHECK: store <2 x half> %4, <2 x half>
-
-; NOVECTOR-LABEL: @fusion
-; NOVECTOR: load half
-; NOVECTOR: fmul fast half
-; NOVECTOR: fadd fast half
-; NOVECTOR: fmul fast half
-; NOVECTOR: fadd fast half
-; NOVECTOR: store half
  define void @fusion(i8* noalias nocapture align 256 dereferenceable(19267584) %arg, i8* noalias nocapture readonly align 256 dereferenceable(19267584) %arg1, i32 %arg2, i32 %arg3) local_unnamed_addr #0 {
+; CHECK-LABEL: @fusion(
+; CHECK-NEXT:    [[TMP:%.*]] = shl nuw nsw i32 [[ARG2:%.*]], 6
+; CHECK-NEXT:    [[TMP4:%.*]] = or i32 [[TMP]], [[ARG3:%.*]]
+; CHECK-NEXT:    [[TMP5:%.*]] = shl nuw nsw i32 [[TMP4]], 2
+; CHECK-NEXT:    [[TMP6:%.*]] = zext i32 [[TMP5]] to i64
+; CHECK-NEXT:    [[TMP7:%.*]] = or i64 [[TMP6]], 1
+; CHECK-NEXT:    [[TMP10:%.*]] = bitcast i8* [[ARG1:%.*]] to half*
+; CHECK-NEXT:    [[TMP11:%.*]] = getelementptr inbounds half, half* [[TMP10]], i64 [[TMP6]]
+; CHECK-NEXT:    [[TMP15:%.*]] = bitcast i8* [[ARG:%.*]] to half*
+; CHECK-NEXT:    [[TMP16:%.*]] = getelementptr inbounds half, half* [[TMP15]], i64 [[TMP6]]
+; CHECK-NEXT:    [[TMP17:%.*]] = getelementptr inbounds half, half* [[TMP10]], i64 [[TMP7]]
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast half* [[TMP11]] to <2 x half>*
+; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x half>, <2 x half>* [[TMP1]], align 8
+; CHECK-NEXT:    [[TMP3:%.*]] = fmul fast <2 x half> <half 0xH5380, half 0xH5380>, [[TMP2]]
+; CHECK-NEXT:    [[TMP4:%.*]] = fadd fast <2 x half> <half 0xH57F0, half 0xH57F0>, [[TMP3]]
+; CHECK-NEXT:    [[TMP21:%.*]] = getelementptr inbounds half, half* [[TMP15]], i64 [[TMP7]]
+; CHECK-NEXT:    [[TMP5:%.*]] = bitcast half* [[TMP16]] to <2 x half>*
+; CHECK-NEXT:    store <2 x half> [[TMP4]], <2 x half>* [[TMP5]], align 8
+; CHECK-NEXT:    ret void
+;
+; NOVECTOR-LABEL: @fusion(
+; NOVECTOR-NEXT:    [[TMP:%.*]] = shl nuw nsw i32 [[ARG2:%.*]], 6
+; NOVECTOR-NEXT:    [[TMP4:%.*]] = or i32 [[TMP]], [[ARG3:%.*]]
+; NOVECTOR-NEXT:    [[TMP5:%.*]] = shl nuw nsw i32 [[TMP4]], 2
+; NOVECTOR-NEXT:    [[TMP6:%.*]] = zext i32 [[TMP5]] to i64
+; NOVECTOR-NEXT:    [[TMP7:%.*]] = or i64 [[TMP6]], 1
+; NOVECTOR-NEXT:    [[TMP10:%.*]] = bitcast i8* [[ARG1:%.*]] to half*
+; NOVECTOR-NEXT:    [[TMP11:%.*]] = getelementptr inbounds half, half* [[TMP10]], i64 [[TMP6]]
+; NOVECTOR-NEXT:    [[TMP12:%.*]] = load half, half* [[TMP11]], align 8
+; NOVECTOR-NEXT:    [[TMP13:%.*]] = fmul fast half [[TMP12]], 0xH5380
+; NOVECTOR-NEXT:    [[TMP14:%.*]] = fadd fast half [[TMP13]], 0xH57F0
+; NOVECTOR-NEXT:    [[TMP15:%.*]] = bitcast i8* [[ARG:%.*]] to half*
+; NOVECTOR-NEXT:    [[TMP16:%.*]] = getelementptr inbounds half, half* [[TMP15]], i64 [[TMP6]]
+; NOVECTOR-NEXT:    store half [[TMP14]], half* [[TMP16]], align 8
+; NOVECTOR-NEXT:    [[TMP17:%.*]] = getelementptr inbounds half, half* [[TMP10]], i64 [[TMP7]]
+; NOVECTOR-NEXT:    [[TMP18:%.*]] = load half, half* [[TMP17]], align 2
+; NOVECTOR-NEXT:    [[TMP19:%.*]] = fmul fast half [[TMP18]], 0xH5380
+; NOVECTOR-NEXT:    [[TMP20:%.*]] = fadd fast half [[TMP19]], 0xH57F0
+; NOVECTOR-NEXT:    [[TMP21:%.*]] = getelementptr inbounds half, half* [[TMP15]], i64 [[TMP7]]
+; NOVECTOR-NEXT:    store half [[TMP20]], half* [[TMP21]], align 2
+; NOVECTOR-NEXT:    ret void
+;
    %tmp = shl nuw nsw i32 %arg2, 6
    %tmp4 = or i32 %tmp, %arg3
    %tmp5 = shl nuw nsw i32 %tmp4, 2
diff --git a/test/Transforms/SLPVectorizer/PowerPC/pr27897.ll b/test/Transforms/SLPVectorizer/PowerPC/pr27897.ll

index dabb3380ef1c08453d3780664b0cda5573f3f9e9..7f7df82a38fc3bc17d1ac6f703db791bd97fa6cf 100644 (file)
--- a/test/Transforms/SLPVectorizer/PowerPC/pr27897.ll
+++ b/test/Transforms/SLPVectorizer/PowerPC/pr27897.ll
@@ -1,8 +1,28 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
  ; RUN: opt -S -mtriple=powerpc64-linux-gnu -mcpu=pwr8 -mattr=+vsx -slp-vectorizer < %s | FileCheck %s
  
  %struct.A = type { i8*, i8* }
  
  define i64 @foo(%struct.A* nocapture readonly %this) {
+; CHECK-LABEL: @foo(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[END_I:%.*]] = getelementptr inbounds [[STRUCT_A:%.*]], %struct.A* [[THIS:%.*]], i64 0, i32 1
+; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i8** [[END_I]] to i64*
+; CHECK-NEXT:    [[TMP1:%.*]] = load i64, i64* [[TMP0]], align 8
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast %struct.A* [[THIS]] to i64*
+; CHECK-NEXT:    [[TMP3:%.*]] = load i64, i64* [[TMP2]], align 8
+; CHECK-NEXT:    [[SUB_PTR_SUB_I:%.*]] = sub i64 [[TMP1]], [[TMP3]]
+; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt i64 [[SUB_PTR_SUB_I]], 9
+; CHECK-NEXT:    br i1 [[CMP]], label [[RETURN:%.*]], label [[LOR_LHS_FALSE:%.*]]
+; CHECK:       lor.lhs.false:
+; CHECK-NEXT:    [[TMP4:%.*]] = inttoptr i64 [[TMP3]] to i8*
+; CHECK-NEXT:    [[TMP5:%.*]] = inttoptr i64 [[TMP1]] to i8*
+; CHECK-NEXT:    [[CMP2:%.*]] = icmp ugt i8* [[TMP5]], [[TMP4]]
+; CHECK-NEXT:    [[DOT:%.*]] = select i1 [[CMP2]], i64 2, i64 -1
+; CHECK-NEXT:    ret i64 [[DOT]]
+; CHECK:       return:
+; CHECK-NEXT:    ret i64 2
+;
  entry:
    %end.i = getelementptr inbounds %struct.A, %struct.A* %this, i64 0, i32 1
    %0 = bitcast i8** %end.i to i64*
@@ -24,6 +44,3 @@ return:
    ret i64 2
  }
  
-; CHECK: load i64
-; CHECK-NOT: load <2 x i64>
-; CHECK-NOT: extractelement
diff --git a/test/Transforms/SLPVectorizer/X86/align.ll b/test/Transforms/SLPVectorizer/X86/align.ll

index b74b70900ee96b809464b0998dfb740c97a952c4..5c7c4ceae8173a0f7398b1a48f306f9be2e9f3e2 100644 (file)
--- a/test/Transforms/SLPVectorizer/X86/align.ll
+++ b/test/Transforms/SLPVectorizer/X86/align.ll
@@ -1,16 +1,31 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
  ; RUN: opt < %s -basicaa -slp-vectorizer -S -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck %s
  
  target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
  target triple = "x86_64-apple-macosx10.8.0"
  
  ; Simple 3-pair chain with loads and stores
-; CHECK-LABEL: @test1
  define void @test1(double* %a, double* %b, double* %c) {
+; CHECK-LABEL: @test1(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[AGG_TMP_I_I_SROA_0:%.*]] = alloca [3 x double], align 16
+; CHECK-NEXT:    [[STORE1:%.*]] = getelementptr inbounds [3 x double], [3 x double]* [[AGG_TMP_I_I_SROA_0]], i64 0, i64 1
+; CHECK-NEXT:    [[STORE2:%.*]] = getelementptr inbounds [3 x double], [3 x double]* [[AGG_TMP_I_I_SROA_0]], i64 0, i64 2
+; CHECK-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds double, double* [[A:%.*]], i64 1
+; CHECK-NEXT:    [[TMP0:%.*]] = bitcast double* [[A]] to <2 x double>*
+; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x double>, <2 x double>* [[TMP0]], align 8
+; CHECK-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds double, double* [[B:%.*]], i64 1
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast double* [[B]] to <2 x double>*
+; CHECK-NEXT:    [[TMP3:%.*]] = load <2 x double>, <2 x double>* [[TMP2]], align 8
+; CHECK-NEXT:    [[TMP4:%.*]] = fmul <2 x double> [[TMP1]], [[TMP3]]
+; CHECK-NEXT:    [[TMP5:%.*]] = bitcast double* [[STORE1]] to <2 x double>*
+; CHECK-NEXT:    store <2 x double> [[TMP4]], <2 x double>* [[TMP5]], align 8
+; CHECK-NEXT:    ret void
+;
  entry:
    %agg.tmp.i.i.sroa.0 = alloca [3 x double], align 16
-; CHECK: %[[V0:[0-9]+]] = load <2 x double>, <2 x double>* %[[V2:[0-9]+]], align 8
-  %i0 = load double, double* %a 
-  %i1 = load double, double* %b 
+  %i0 = load double, double* %a
+  %i1 = load double, double* %b
    %mul = fmul double %i0, %i1
    %store1 = getelementptr inbounds [3 x double], [3 x double]* %agg.tmp.i.i.sroa.0, i64 0, i64 1
    %store2 = getelementptr inbounds [3 x double], [3 x double]* %agg.tmp.i.i.sroa.0, i64 0, i64 2
@@ -19,23 +34,29 @@ entry:
    %arrayidx4 = getelementptr inbounds double, double* %b, i64 1
    %i4 = load double, double* %arrayidx4, align 8
    %mul5 = fmul double %i3, %i4
-; CHECK: store <2 x double> %[[V1:[0-9]+]], <2 x double>* %[[V2:[0-9]+]], align 8
    store double %mul, double* %store1
    store double %mul5, double* %store2, align 16
-; CHECK: ret
    ret void
  }
  
  ; Float has 4 byte abi alignment on x86_64. We must use the alignmnet of the
  ; value being loaded/stored not the alignment of the pointer type.
  
-; CHECK-LABEL: @test2
-; CHECK-NOT: align 8
-; CHECK: load <4 x float>{{.*}}, align 4
-; CHECK: store <4 x float>{{.*}}, align 4
-; CHECK: ret
-
  define void @test2(float * %a, float * %b) {
+; CHECK-LABEL: @test2(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[A1:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 1
+; CHECK-NEXT:    [[A2:%.*]] = getelementptr inbounds float, float* [[A]], i64 2
+; CHECK-NEXT:    [[A3:%.*]] = getelementptr inbounds float, float* [[A]], i64 3
+; CHECK-NEXT:    [[TMP0:%.*]] = bitcast float* [[A]] to <4 x float>*
+; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x float>, <4 x float>* [[TMP0]], align 4
+; CHECK-NEXT:    [[B1:%.*]] = getelementptr inbounds float, float* [[B:%.*]], i64 1
+; CHECK-NEXT:    [[B2:%.*]] = getelementptr inbounds float, float* [[B]], i64 2
+; CHECK-NEXT:    [[B3:%.*]] = getelementptr inbounds float, float* [[B]], i64 3
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast float* [[B]] to <4 x float>*
+; CHECK-NEXT:    store <4 x float> [[TMP1]], <4 x float>* [[TMP2]], align 4
+; CHECK-NEXT:    ret void
+;
  entry:
    %l0 = load float, float* %a
    %a1 = getelementptr inbounds float, float* %a, i64 1
diff --git a/test/Transforms/SLPVectorizer/X86/atomics.ll b/test/Transforms/SLPVectorizer/X86/atomics.ll

index a48b793922f4b599f021056c3e2304404dcd97e7..c7f05491a18597221ff74b2419a960a61e7d1fe0 100644 (file)
--- a/test/Transforms/SLPVectorizer/X86/atomics.ll
+++ b/test/Transforms/SLPVectorizer/X86/atomics.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
  ; RUN: opt < %s -basicaa -slp-vectorizer -S |FileCheck %s
  target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
  
@@ -7,16 +8,19 @@ target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
  ; The SLPVectorizer should not vectorize atomic stores and it should not
  ; schedule regular stores around atomic stores.
  
-; CHECK-LABEL: test
-; CHECK: store i32
-; CHECK: store atomic i32
-; CHECK: store i32
-; CHECK: store atomic i32
-; CHECK: store i32
-; CHECK: store atomic i32
-; CHECK: store i32
-; CHECK: store atomic i32
  define void @test() {
+; CHECK-LABEL: @test(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    store i32 0, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @a, i64 0, i64 0), align 16
+; CHECK-NEXT:    store atomic i32 0, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @x, i64 0, i64 0) release, align 16
+; CHECK-NEXT:    store i32 0, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @a, i64 0, i64 1), align 4
+; CHECK-NEXT:    store atomic i32 1, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @x, i64 0, i64 1) release, align 4
+; CHECK-NEXT:    store i32 0, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @a, i64 0, i64 2), align 8
+; CHECK-NEXT:    store atomic i32 2, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @x, i64 0, i64 2) release, align 8
+; CHECK-NEXT:    store i32 0, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @a, i64 0, i64 3), align 4
+; CHECK-NEXT:    store atomic i32 3, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @x, i64 0, i64 3) release, align 4
+; CHECK-NEXT:    ret void
+;
  entry:
    store i32 0, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @a, i64 0, i64 0), align 16
    store atomic i32 0, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @x, i64 0, i64 0) release, align 16
diff --git a/test/Transforms/SLPVectorizer/X86/bad_types.ll b/test/Transforms/SLPVectorizer/X86/bad_types.ll

index 98c29068bb96aa88792e309e4558dd805d0bf722..d229acd17894a749efb1924b5b89ebf3a907fc41 100644 (file)
--- a/test/Transforms/SLPVectorizer/X86/bad_types.ll
+++ b/test/Transforms/SLPVectorizer/X86/bad_types.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
  ; RUN: opt < %s -basicaa -slp-vectorizer -S -mcpu=corei7-avx | FileCheck %s
  
  target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
@@ -7,10 +8,17 @@ define void @test1(x86_mmx %a, x86_mmx %b, i64* %ptr) {
  ; Ensure we can handle x86_mmx values which are primitive and can be bitcast
  ; with integer types but can't be put into a vector.
  ;
-; CHECK-LABEL: @test1
-; CHECK:         store i64
-; CHECK:         store i64
-; CHECK:         ret void
+; CHECK-LABEL: @test1(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[A_CAST:%.*]] = bitcast x86_mmx [[A:%.*]] to i64
+; CHECK-NEXT:    [[B_CAST:%.*]] = bitcast x86_mmx [[B:%.*]] to i64
+; CHECK-NEXT:    [[A_AND:%.*]] = and i64 [[A_CAST]], 42
+; CHECK-NEXT:    [[B_AND:%.*]] = and i64 [[B_CAST]], 42
+; CHECK-NEXT:    [[GEP:%.*]] = getelementptr i64, i64* [[PTR:%.*]], i32 1
+; CHECK-NEXT:    store i64 [[A_AND]], i64* [[PTR]]
+; CHECK-NEXT:    store i64 [[B_AND]], i64* [[GEP]]
+; CHECK-NEXT:    ret void
+;
  entry:
    %a.cast = bitcast x86_mmx %a to i64
    %b.cast = bitcast x86_mmx %b to i64
@@ -26,10 +34,21 @@ define void @test2(x86_mmx %a, x86_mmx %b) {
  ; Same as @test1 but using phi-input vectorization instead of store
  ; vectorization.
  ;
-; CHECK-LABEL: @test2
-; CHECK:         and i64
-; CHECK:         and i64
-; CHECK:         ret void
+; CHECK-LABEL: @test2(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br i1 undef, label [[IF_THEN:%.*]], label [[EXIT:%.*]]
+; CHECK:       if.then:
+; CHECK-NEXT:    [[A_CAST:%.*]] = bitcast x86_mmx [[A:%.*]] to i64
+; CHECK-NEXT:    [[B_CAST:%.*]] = bitcast x86_mmx [[B:%.*]] to i64
+; CHECK-NEXT:    [[A_AND:%.*]] = and i64 [[A_CAST]], 42
+; CHECK-NEXT:    [[B_AND:%.*]] = and i64 [[B_CAST]], 42
+; CHECK-NEXT:    br label [[EXIT]]
+; CHECK:       exit:
+; CHECK-NEXT:    [[A_PHI:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[A_AND]], [[IF_THEN]] ]
+; CHECK-NEXT:    [[B_PHI:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[B_AND]], [[IF_THEN]] ]
+; CHECK-NEXT:    tail call void @f(i64 [[A_PHI]], i64 [[B_PHI]])
+; CHECK-NEXT:    ret void
+;
  entry:
    br i1 undef, label %if.then, label %exit
  
@@ -50,9 +69,26 @@ exit:
  define i8 @test3(i8 *%addr) {
  ; Check that we do not vectorize types that are padded to a bigger ones.
  ;
-; CHECK-LABEL: @test3
-; CHECK-NOT:   <4 x i2>
-; CHECK:       ret i8
+; CHECK-LABEL: @test3(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[A:%.*]] = bitcast i8* [[ADDR:%.*]] to i2*
+; CHECK-NEXT:    [[A0:%.*]] = getelementptr inbounds i2, i2* [[A]], i64 0
+; CHECK-NEXT:    [[A1:%.*]] = getelementptr inbounds i2, i2* [[A]], i64 1
+; CHECK-NEXT:    [[A2:%.*]] = getelementptr inbounds i2, i2* [[A]], i64 2
+; CHECK-NEXT:    [[A3:%.*]] = getelementptr inbounds i2, i2* [[A]], i64 3
+; CHECK-NEXT:    [[L0:%.*]] = load i2, i2* [[A0]], align 1
+; CHECK-NEXT:    [[L1:%.*]] = load i2, i2* [[A1]], align 1
+; CHECK-NEXT:    [[L2:%.*]] = load i2, i2* [[A2]], align 1
+; CHECK-NEXT:    [[L3:%.*]] = load i2, i2* [[A3]], align 1
+; CHECK-NEXT:    br label [[BB1:%.*]]
+; CHECK:       bb1:
+; CHECK-NEXT:    [[P0:%.*]] = phi i2 [ [[L0]], [[ENTRY:%.*]] ]
+; CHECK-NEXT:    [[P1:%.*]] = phi i2 [ [[L1]], [[ENTRY]] ]
+; CHECK-NEXT:    [[P2:%.*]] = phi i2 [ [[L2]], [[ENTRY]] ]
+; CHECK-NEXT:    [[P3:%.*]] = phi i2 [ [[L3]], [[ENTRY]] ]
+; CHECK-NEXT:    [[R:%.*]] = zext i2 [[P2]] to i8
+; CHECK-NEXT:    ret i8 [[R]]
+;
  entry:
    %a = bitcast i8* %addr to i2*
    %a0 = getelementptr inbounds i2, i2* %a, i64 0
diff --git a/test/Transforms/SLPVectorizer/X86/barriercall.ll b/test/Transforms/SLPVectorizer/X86/barriercall.ll

index 382a43fddf588ee4de7628628e241cc0da18e768..7378b8bcb1c93bd816b4e32912161edf3db452f2 100644 (file)
--- a/test/Transforms/SLPVectorizer/X86/barriercall.ll
+++ b/test/Transforms/SLPVectorizer/X86/barriercall.ll
@@ -1,12 +1,25 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
  ; RUN: opt < %s -basicaa -slp-vectorizer -dce -S -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck %s
  
  target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
  target triple = "x86_64-apple-macosx10.8.0"
  
-;CHECK-LABEL: @foo(
-;CHECK: store <4 x i32>
-;CHECK: ret
  define i32 @foo(i32* nocapture %A, i32 %n) {
+; CHECK-LABEL: @foo(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CALL:%.*]] = tail call i32 (...) @bar()
+; CHECK-NEXT:    [[TMP0:%.*]] = insertelement <4 x i32> undef, i32 [[N:%.*]], i32 0
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x i32> [[TMP0]], i32 [[N]], i32 1
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x i32> [[TMP1]], i32 [[N]], i32 2
+; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <4 x i32> [[TMP2]], i32 [[N]], i32 3
+; CHECK-NEXT:    [[TMP4:%.*]] = mul nsw <4 x i32> [[TMP3]], <i32 5, i32 9, i32 3, i32 10>
+; CHECK-NEXT:    [[TMP5:%.*]] = shl <4 x i32> [[TMP3]], <i32 5, i32 9, i32 3, i32 10>
+; CHECK-NEXT:    [[TMP6:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> [[TMP5]], <4 x i32> <i32 0, i32 1, i32 6, i32 3>
+; CHECK-NEXT:    [[TMP7:%.*]] = add nsw <4 x i32> <i32 9, i32 9, i32 9, i32 9>, [[TMP6]]
+; CHECK-NEXT:    [[TMP8:%.*]] = bitcast i32* [[A:%.*]] to <4 x i32>*
+; CHECK-NEXT:    store <4 x i32> [[TMP7]], <4 x i32>* [[TMP8]], align 4
+; CHECK-NEXT:    ret i32 undef
+;
  entry:
    %call = tail call i32 (...) @bar() #2
    %mul = mul nsw i32 %n, 5
diff --git a/test/Transforms/SLPVectorizer/X86/call.ll b/test/Transforms/SLPVectorizer/X86/call.ll

index 8397d348483cf4c908178c64f12a4a9f139b23a5..c93397c0337509d1b64655abdc3626d80e4c2310 100644 (file)
--- a/test/Transforms/SLPVectorizer/X86/call.ll
+++ b/test/Transforms/SLPVectorizer/X86/call.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
  ; RUN: opt < %s -basicaa -slp-vectorizer -slp-threshold=-999 -dce -S -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck %s
  
  target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
@@ -13,10 +14,10 @@ declare i64 @round(i64)
  
  define void @sin_libm(double* %a, double* %b) {
  ; CHECK-LABEL: @sin_libm(
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast double* %a to <2 x double>*
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast double* [[A:%.*]] to <2 x double>*
  ; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x double>, <2 x double>* [[TMP1]], align 8
  ; CHECK-NEXT:    [[TMP3:%.*]] = call <2 x double> @llvm.sin.v2f64(<2 x double> [[TMP2]])
-; CHECK-NEXT:    [[TMP4:%.*]] = bitcast double* %b to <2 x double>*
+; CHECK-NEXT:    [[TMP4:%.*]] = bitcast double* [[B:%.*]] to <2 x double>*
  ; CHECK-NEXT:    store <2 x double> [[TMP3]], <2 x double>* [[TMP4]], align 8
  ; CHECK-NEXT:    ret void
  ;
@@ -33,10 +34,10 @@ define void @sin_libm(double* %a, double* %b) {
  
  define void @cos_libm(double* %a, double* %b) {
  ; CHECK-LABEL: @cos_libm(
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast double* %a to <2 x double>*
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast double* [[A:%.*]] to <2 x double>*
  ; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x double>, <2 x double>* [[TMP1]], align 8
  ; CHECK-NEXT:    [[TMP3:%.*]] = call <2 x double> @llvm.cos.v2f64(<2 x double> [[TMP2]])
-; CHECK-NEXT:    [[TMP4:%.*]] = bitcast double* %b to <2 x double>*
+; CHECK-NEXT:    [[TMP4:%.*]] = bitcast double* [[B:%.*]] to <2 x double>*
  ; CHECK-NEXT:    store <2 x double> [[TMP3]], <2 x double>* [[TMP4]], align 8
  ; CHECK-NEXT:    ret void
  ;
@@ -53,10 +54,10 @@ define void @cos_libm(double* %a, double* %b) {
  
  define void @pow_libm(double* %a, double* %b) {
  ; CHECK-LABEL: @pow_libm(
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast double* %a to <2 x double>*
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast double* [[A:%.*]] to <2 x double>*
  ; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x double>, <2 x double>* [[TMP1]], align 8
  ; CHECK-NEXT:    [[TMP3:%.*]] = call <2 x double> @llvm.pow.v2f64(<2 x double> [[TMP2]], <2 x double> [[TMP2]])
-; CHECK-NEXT:    [[TMP4:%.*]] = bitcast double* %b to <2 x double>*
+; CHECK-NEXT:    [[TMP4:%.*]] = bitcast double* [[B:%.*]] to <2 x double>*
  ; CHECK-NEXT:    store <2 x double> [[TMP3]], <2 x double>* [[TMP4]], align 8
  ; CHECK-NEXT:    ret void
  ;
@@ -73,10 +74,10 @@ define void @pow_libm(double* %a, double* %b) {
  
  define void @exp_libm(double* %a, double* %b) {
  ; CHECK-LABEL: @exp_libm(
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast double* %a to <2 x double>*
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast double* [[A:%.*]] to <2 x double>*
  ; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x double>, <2 x double>* [[TMP1]], align 8
  ; CHECK-NEXT:    [[TMP3:%.*]] = call <2 x double> @llvm.exp2.v2f64(<2 x double> [[TMP2]])
-; CHECK-NEXT:    [[TMP4:%.*]] = bitcast double* %b to <2 x double>*
+; CHECK-NEXT:    [[TMP4:%.*]] = bitcast double* [[B:%.*]] to <2 x double>*
  ; CHECK-NEXT:    store <2 x double> [[TMP3]], <2 x double>* [[TMP4]], align 8
  ; CHECK-NEXT:    ret void
  ;
@@ -91,15 +92,15 @@ define void @exp_libm(double* %a, double* %b) {
    ret void
  }
  
-; No fast-math-flags are required to convert sqrt library calls to an intrinsic. 
+; No fast-math-flags are required to convert sqrt library calls to an intrinsic.
  ; We just need to know that errno is not set (readnone).
  
  define void @sqrt_libm_no_errno(double* %a, double* %b) {
  ; CHECK-LABEL: @sqrt_libm_no_errno(
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast double* %a to <2 x double>*
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast double* [[A:%.*]] to <2 x double>*
  ; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x double>, <2 x double>* [[TMP1]], align 8
  ; CHECK-NEXT:    [[TMP3:%.*]] = call <2 x double> @llvm.sqrt.v2f64(<2 x double> [[TMP2]])
-; CHECK-NEXT:    [[TMP4:%.*]] = bitcast double* %b to <2 x double>*
+; CHECK-NEXT:    [[TMP4:%.*]] = bitcast double* [[B:%.*]] to <2 x double>*
  ; CHECK-NEXT:    store <2 x double> [[TMP3]], <2 x double>* [[TMP4]], align 8
  ; CHECK-NEXT:    ret void
  ;
@@ -116,18 +117,18 @@ define void @sqrt_libm_no_errno(double* %a, double* %b) {
  
  ; The sqrt intrinsic does not set errno, but a non-constant sqrt call might, so this can't vectorize.
  ; The nnan on the call does not matter because there's no guarantee in the C standard that a negative
-; input would result in a nan output ("On a domain error, the function returns an 
+; input would result in a nan output ("On a domain error, the function returns an
  ; implementation-defined value.")
  
  define void @sqrt_libm_errno(double* %a, double* %b) {
  ; CHECK-LABEL: @sqrt_libm_errno(
-; CHECK-NEXT:    [[A0:%.*]] = load double, double* %a, align 8
-; CHECK-NEXT:    [[IDX1:%.*]] = getelementptr inbounds double, double* %a, i64 1
+; CHECK-NEXT:    [[A0:%.*]] = load double, double* [[A:%.*]], align 8
+; CHECK-NEXT:    [[IDX1:%.*]] = getelementptr inbounds double, double* [[A]], i64 1
  ; CHECK-NEXT:    [[A1:%.*]] = load double, double* [[IDX1]], align 8
  ; CHECK-NEXT:    [[SQRT1:%.*]] = tail call nnan double @sqrt(double [[A0]]) #2
  ; CHECK-NEXT:    [[SQRT2:%.*]] = tail call nnan double @sqrt(double [[A1]]) #2
-; CHECK-NEXT:    store double [[SQRT1]], double* %b, align 8
-; CHECK-NEXT:    [[IDX2:%.*]] = getelementptr inbounds double, double* %b, i64 1
+; CHECK-NEXT:    store double [[SQRT1]], double* [[B:%.*]], align 8
+; CHECK-NEXT:    [[IDX2:%.*]] = getelementptr inbounds double, double* [[B]], i64 1
  ; CHECK-NEXT:    store double [[SQRT2]], double* [[IDX2]], align 8
  ; CHECK-NEXT:    ret void
  ;
@@ -145,13 +146,13 @@ define void @sqrt_libm_errno(double* %a, double* %b) {
  ; Negative test case
  define void @round_custom(i64* %a, i64* %b) {
  ; CHECK-LABEL: @round_custom(
-; CHECK-NEXT:    [[A0:%.*]] = load i64, i64* %a, align 8
-; CHECK-NEXT:    [[IDX1:%.*]] = getelementptr inbounds i64, i64* %a, i64 1
+; CHECK-NEXT:    [[A0:%.*]] = load i64, i64* [[A:%.*]], align 8
+; CHECK-NEXT:    [[IDX1:%.*]] = getelementptr inbounds i64, i64* [[A]], i64 1
  ; CHECK-NEXT:    [[A1:%.*]] = load i64, i64* [[IDX1]], align 8
  ; CHECK-NEXT:    [[ROUND1:%.*]] = tail call i64 @round(i64 [[A0]]) #3
  ; CHECK-NEXT:    [[ROUND2:%.*]] = tail call i64 @round(i64 [[A1]]) #3
-; CHECK-NEXT:    store i64 [[ROUND1]], i64* %b, align 8
-; CHECK-NEXT:    [[IDX2:%.*]] = getelementptr inbounds i64, i64* %b, i64 1
+; CHECK-NEXT:    store i64 [[ROUND1]], i64* [[B:%.*]], align 8
+; CHECK-NEXT:    [[IDX2:%.*]] = getelementptr inbounds i64, i64* [[B]], i64 1
  ; CHECK-NEXT:    store i64 [[ROUND2]], i64* [[IDX2]], align 8
  ; CHECK-NEXT:    ret void
  ;
diff --git a/test/Transforms/SLPVectorizer/X86/commutativity.ll b/test/Transforms/SLPVectorizer/X86/commutativity.ll

index 2798ccb15e48ab131c192284023a7f89c0f3bc6f..9af59efd345349b8000390c051e5000a703942ae 100644 (file)
--- a/test/Transforms/SLPVectorizer/X86/commutativity.ll
+++ b/test/Transforms/SLPVectorizer/X86/commutativity.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
  ; RUN: opt -slp-vectorizer < %s -S | FileCheck %s
  
  ; Verify that the SLP vectorizer is able to figure out that commutativity
@@ -16,9 +17,31 @@ target triple = "x86_64-apple-macosx10.11.0"
  ; Check that we correctly detect a splat/broadcast by leveraging the
  ; commutativity property of `xor`.
  
-; CHECK-LABEL:  @splat
-; CHECK:  store <16 x i8>
  define void @splat(i8 %a, i8 %b, i8 %c) {
+; CHECK-LABEL: @splat(
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <16 x i8> undef, i8 [[C:%.*]], i32 0
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <16 x i8> [[TMP1]], i8 [[C]], i32 1
+; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <16 x i8> [[TMP2]], i8 [[C]], i32 2
+; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <16 x i8> [[TMP3]], i8 [[C]], i32 3
+; CHECK-NEXT:    [[TMP5:%.*]] = insertelement <16 x i8> [[TMP4]], i8 [[C]], i32 4
+; CHECK-NEXT:    [[TMP6:%.*]] = insertelement <16 x i8> [[TMP5]], i8 [[C]], i32 5
+; CHECK-NEXT:    [[TMP7:%.*]] = insertelement <16 x i8> [[TMP6]], i8 [[C]], i32 6
+; CHECK-NEXT:    [[TMP8:%.*]] = insertelement <16 x i8> [[TMP7]], i8 [[C]], i32 7
+; CHECK-NEXT:    [[TMP9:%.*]] = insertelement <16 x i8> [[TMP8]], i8 [[C]], i32 8
+; CHECK-NEXT:    [[TMP10:%.*]] = insertelement <16 x i8> [[TMP9]], i8 [[C]], i32 9
+; CHECK-NEXT:    [[TMP11:%.*]] = insertelement <16 x i8> [[TMP10]], i8 [[C]], i32 10
+; CHECK-NEXT:    [[TMP12:%.*]] = insertelement <16 x i8> [[TMP11]], i8 [[C]], i32 11
+; CHECK-NEXT:    [[TMP13:%.*]] = insertelement <16 x i8> [[TMP12]], i8 [[C]], i32 12
+; CHECK-NEXT:    [[TMP14:%.*]] = insertelement <16 x i8> [[TMP13]], i8 [[C]], i32 13
+; CHECK-NEXT:    [[TMP15:%.*]] = insertelement <16 x i8> [[TMP14]], i8 [[C]], i32 14
+; CHECK-NEXT:    [[TMP16:%.*]] = insertelement <16 x i8> [[TMP15]], i8 [[C]], i32 15
+; CHECK-NEXT:    [[TMP17:%.*]] = insertelement <2 x i8> undef, i8 [[A:%.*]], i32 0
+; CHECK-NEXT:    [[TMP18:%.*]] = insertelement <2 x i8> [[TMP17]], i8 [[B:%.*]], i32 1
+; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <2 x i8> [[TMP18]], <2 x i8> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+; CHECK-NEXT:    [[TMP19:%.*]] = xor <16 x i8> [[TMP16]], [[SHUFFLE]]
+; CHECK-NEXT:    store <16 x i8> [[TMP19]], <16 x i8>* bitcast ([32 x i8]* @cle to <16 x i8>*), align 16
+; CHECK-NEXT:    ret void
+;
    %1 = xor i8 %c, %a
    store i8 %1, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @cle, i64 0, i64 0), align 16
    %2 = xor i8 %a, %c
@@ -59,9 +82,24 @@ define void @splat(i8 %a, i8 %b, i8 %c) {
  ; Check that we correctly detect that we can have the same opcode on one side by
  ; leveraging the commutativity property of `xor`.
  
-; CHECK-LABEL:  @same_opcode_on_one_side
-; CHECK:  store <4 x i32>
  define void @same_opcode_on_one_side(i32 %a, i32 %b, i32 %c) {
+; CHECK-LABEL: @same_opcode_on_one_side(
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x i32> undef, i32 [[C:%.*]], i32 0
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x i32> [[TMP1]], i32 [[C]], i32 1
+; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <4 x i32> [[TMP2]], i32 [[C]], i32 2
+; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <4 x i32> [[TMP3]], i32 [[C]], i32 3
+; CHECK-NEXT:    [[TMP5:%.*]] = insertelement <4 x i32> undef, i32 [[A:%.*]], i32 0
+; CHECK-NEXT:    [[TMP6:%.*]] = insertelement <4 x i32> [[TMP5]], i32 [[A]], i32 1
+; CHECK-NEXT:    [[TMP7:%.*]] = insertelement <4 x i32> [[TMP6]], i32 [[A]], i32 2
+; CHECK-NEXT:    [[TMP8:%.*]] = insertelement <4 x i32> [[TMP7]], i32 [[A]], i32 3
+; CHECK-NEXT:    [[TMP9:%.*]] = add <4 x i32> [[TMP4]], [[TMP8]]
+; CHECK-NEXT:    [[TMP10:%.*]] = insertelement <4 x i32> [[TMP5]], i32 [[B:%.*]], i32 1
+; CHECK-NEXT:    [[TMP11:%.*]] = insertelement <4 x i32> [[TMP10]], i32 [[C]], i32 2
+; CHECK-NEXT:    [[TMP12:%.*]] = insertelement <4 x i32> [[TMP11]], i32 [[A]], i32 3
+; CHECK-NEXT:    [[TMP13:%.*]] = xor <4 x i32> [[TMP12]], [[TMP9]]
+; CHECK-NEXT:    store <4 x i32> [[TMP13]], <4 x i32>* bitcast ([32 x i32]* @cle32 to <4 x i32>*), align 16
+; CHECK-NEXT:    ret void
+;
    %add1 = add i32 %c, %a
    %add2 = add i32 %c, %a
    %add3 = add i32 %a, %c
diff --git a/test/Transforms/SLPVectorizer/X86/consecutive-access.ll b/test/Transforms/SLPVectorizer/X86/consecutive-access.ll

index aa682fc09f68c71675730697152fdc4c61103432..f394dc7439791289a6034b97eb3608c8a1132208 100644 (file)
--- a/test/Transforms/SLPVectorizer/X86/consecutive-access.ll
+++ b/test/Transforms/SLPVectorizer/X86/consecutive-access.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
  ; RUN: opt < %s -basicaa -slp-vectorizer -S | FileCheck %s
  target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
  target triple = "x86_64-apple-macosx10.9.0"
@@ -11,10 +12,38 @@ target triple = "x86_64-apple-macosx10.9.0"
  ; A[3*i], A[3*i+1] and A[3*i+2] are consecutive, but in future
  ; that would hopefully be fixed. For now, check that this isn't
  ; vectorized.
-; CHECK-LABEL: foo_3double
-; CHECK-NOT: x double>
  ; Function Attrs: nounwind ssp uwtable
  define void @foo_3double(i32 %u) #0 {
+; CHECK-LABEL: @foo_3double(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[U_ADDR:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    store i32 [[U:%.*]], i32* [[U_ADDR]], align 4
+; CHECK-NEXT:    [[MUL:%.*]] = mul nsw i32 [[U]], 3
+; CHECK-NEXT:    [[IDXPROM:%.*]] = sext i32 [[MUL]] to i64
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [2000 x double], [2000 x double]* @A, i32 0, i64 [[IDXPROM]]
+; CHECK-NEXT:    [[TMP0:%.*]] = load double, double* [[ARRAYIDX]], align 8
+; CHECK-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds [2000 x double], [2000 x double]* @B, i32 0, i64 [[IDXPROM]]
+; CHECK-NEXT:    [[TMP1:%.*]] = load double, double* [[ARRAYIDX4]], align 8
+; CHECK-NEXT:    [[ADD5:%.*]] = fadd double [[TMP0]], [[TMP1]]
+; CHECK-NEXT:    store double [[ADD5]], double* [[ARRAYIDX]], align 8
+; CHECK-NEXT:    [[ADD11:%.*]] = add nsw i32 [[MUL]], 1
+; CHECK-NEXT:    [[IDXPROM12:%.*]] = sext i32 [[ADD11]] to i64
+; CHECK-NEXT:    [[ARRAYIDX13:%.*]] = getelementptr inbounds [2000 x double], [2000 x double]* @A, i32 0, i64 [[IDXPROM12]]
+; CHECK-NEXT:    [[TMP2:%.*]] = load double, double* [[ARRAYIDX13]], align 8
+; CHECK-NEXT:    [[ARRAYIDX17:%.*]] = getelementptr inbounds [2000 x double], [2000 x double]* @B, i32 0, i64 [[IDXPROM12]]
+; CHECK-NEXT:    [[TMP3:%.*]] = load double, double* [[ARRAYIDX17]], align 8
+; CHECK-NEXT:    [[ADD18:%.*]] = fadd double [[TMP2]], [[TMP3]]
+; CHECK-NEXT:    store double [[ADD18]], double* [[ARRAYIDX13]], align 8
+; CHECK-NEXT:    [[ADD24:%.*]] = add nsw i32 [[MUL]], 2
+; CHECK-NEXT:    [[IDXPROM25:%.*]] = sext i32 [[ADD24]] to i64
+; CHECK-NEXT:    [[ARRAYIDX26:%.*]] = getelementptr inbounds [2000 x double], [2000 x double]* @A, i32 0, i64 [[IDXPROM25]]
+; CHECK-NEXT:    [[TMP4:%.*]] = load double, double* [[ARRAYIDX26]], align 8
+; CHECK-NEXT:    [[ARRAYIDX30:%.*]] = getelementptr inbounds [2000 x double], [2000 x double]* @B, i32 0, i64 [[IDXPROM25]]
+; CHECK-NEXT:    [[TMP5:%.*]] = load double, double* [[ARRAYIDX30]], align 8
+; CHECK-NEXT:    [[ADD31:%.*]] = fadd double [[TMP4]], [[TMP5]]
+; CHECK-NEXT:    store double [[ADD31]], double* [[ARRAYIDX26]], align 8
+; CHECK-NEXT:    ret void
+;
  entry:
    %u.addr = alloca i32, align 4
    store i32 %u, i32* %u.addr, align 4
@@ -48,10 +77,29 @@ entry:
  ; SCEV should be able to tell that accesses A[C1 + C2*i], A[C1 + C2*i], ...
  ; A[C1 + C2*i] are consecutive, if C2 is a power of 2, and C2 > C1 > 0.
  ; Thus, the following code should be vectorized.
-; CHECK-LABEL: foo_2double
-; CHECK: x double>
  ; Function Attrs: nounwind ssp uwtable
  define void @foo_2double(i32 %u) #0 {
+; CHECK-LABEL: @foo_2double(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[U_ADDR:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    store i32 [[U:%.*]], i32* [[U_ADDR]], align 4
+; CHECK-NEXT:    [[MUL:%.*]] = mul nsw i32 [[U]], 2
+; CHECK-NEXT:    [[IDXPROM:%.*]] = sext i32 [[MUL]] to i64
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [2000 x double], [2000 x double]* @A, i32 0, i64 [[IDXPROM]]
+; CHECK-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds [2000 x double], [2000 x double]* @B, i32 0, i64 [[IDXPROM]]
+; CHECK-NEXT:    [[ADD11:%.*]] = add nsw i32 [[MUL]], 1
+; CHECK-NEXT:    [[IDXPROM12:%.*]] = sext i32 [[ADD11]] to i64
+; CHECK-NEXT:    [[ARRAYIDX13:%.*]] = getelementptr inbounds [2000 x double], [2000 x double]* @A, i32 0, i64 [[IDXPROM12]]
+; CHECK-NEXT:    [[TMP0:%.*]] = bitcast double* [[ARRAYIDX]] to <2 x double>*
+; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x double>, <2 x double>* [[TMP0]], align 8
+; CHECK-NEXT:    [[ARRAYIDX17:%.*]] = getelementptr inbounds [2000 x double], [2000 x double]* @B, i32 0, i64 [[IDXPROM12]]
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast double* [[ARRAYIDX4]] to <2 x double>*
+; CHECK-NEXT:    [[TMP3:%.*]] = load <2 x double>, <2 x double>* [[TMP2]], align 8
+; CHECK-NEXT:    [[TMP4:%.*]] = fadd <2 x double> [[TMP1]], [[TMP3]]
+; CHECK-NEXT:    [[TMP5:%.*]] = bitcast double* [[ARRAYIDX]] to <2 x double>*
+; CHECK-NEXT:    store <2 x double> [[TMP4]], <2 x double>* [[TMP5]], align 8
+; CHECK-NEXT:    ret void
+;
  entry:
    %u.addr = alloca i32, align 4
    store i32 %u, i32* %u.addr, align 4
@@ -75,10 +123,37 @@ entry:
  }
  
  ; Similar to the previous test, but with different datatype.
-; CHECK-LABEL: foo_4float
-; CHECK: x float>
  ; Function Attrs: nounwind ssp uwtable
  define void @foo_4float(i32 %u) #0 {
+; CHECK-LABEL: @foo_4float(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[U_ADDR:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    store i32 [[U:%.*]], i32* [[U_ADDR]], align 4
+; CHECK-NEXT:    [[MUL:%.*]] = mul nsw i32 [[U]], 4
+; CHECK-NEXT:    [[IDXPROM:%.*]] = sext i32 [[MUL]] to i64
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [2000 x float], [2000 x float]* @C, i32 0, i64 [[IDXPROM]]
+; CHECK-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds [2000 x float], [2000 x float]* @D, i32 0, i64 [[IDXPROM]]
+; CHECK-NEXT:    [[ADD11:%.*]] = add nsw i32 [[MUL]], 1
+; CHECK-NEXT:    [[IDXPROM12:%.*]] = sext i32 [[ADD11]] to i64
+; CHECK-NEXT:    [[ARRAYIDX13:%.*]] = getelementptr inbounds [2000 x float], [2000 x float]* @C, i32 0, i64 [[IDXPROM12]]
+; CHECK-NEXT:    [[ARRAYIDX17:%.*]] = getelementptr inbounds [2000 x float], [2000 x float]* @D, i32 0, i64 [[IDXPROM12]]
+; CHECK-NEXT:    [[ADD24:%.*]] = add nsw i32 [[MUL]], 2
+; CHECK-NEXT:    [[IDXPROM25:%.*]] = sext i32 [[ADD24]] to i64
+; CHECK-NEXT:    [[ARRAYIDX26:%.*]] = getelementptr inbounds [2000 x float], [2000 x float]* @C, i32 0, i64 [[IDXPROM25]]
+; CHECK-NEXT:    [[ARRAYIDX30:%.*]] = getelementptr inbounds [2000 x float], [2000 x float]* @D, i32 0, i64 [[IDXPROM25]]
+; CHECK-NEXT:    [[ADD37:%.*]] = add nsw i32 [[MUL]], 3
+; CHECK-NEXT:    [[IDXPROM38:%.*]] = sext i32 [[ADD37]] to i64
+; CHECK-NEXT:    [[ARRAYIDX39:%.*]] = getelementptr inbounds [2000 x float], [2000 x float]* @C, i32 0, i64 [[IDXPROM38]]
+; CHECK-NEXT:    [[TMP0:%.*]] = bitcast float* [[ARRAYIDX]] to <4 x float>*
+; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x float>, <4 x float>* [[TMP0]], align 4
+; CHECK-NEXT:    [[ARRAYIDX43:%.*]] = getelementptr inbounds [2000 x float], [2000 x float]* @D, i32 0, i64 [[IDXPROM38]]
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast float* [[ARRAYIDX4]] to <4 x float>*
+; CHECK-NEXT:    [[TMP3:%.*]] = load <4 x float>, <4 x float>* [[TMP2]], align 4
+; CHECK-NEXT:    [[TMP4:%.*]] = fadd <4 x float> [[TMP1]], [[TMP3]]
+; CHECK-NEXT:    [[TMP5:%.*]] = bitcast float* [[ARRAYIDX]] to <4 x float>*
+; CHECK-NEXT:    store <4 x float> [[TMP4]], <4 x float>* [[TMP5]], align 4
+; CHECK-NEXT:    ret void
+;
  entry:
    %u.addr = alloca i32, align 4
    store i32 %u, i32* %u.addr, align 4
@@ -118,10 +193,51 @@ entry:
  }
  
  ; Similar to the previous tests, but now we are dealing with AddRec SCEV.
-; CHECK-LABEL: foo_loop
-; CHECK: x double>
  ; Function Attrs: nounwind ssp uwtable
  define i32 @foo_loop(double* %A, i32 %n) #0 {
+; CHECK-LABEL: @foo_loop(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[A_ADDR:%.*]] = alloca double*, align 8
+; CHECK-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[SUM:%.*]] = alloca double, align 8
+; CHECK-NEXT:    [[I:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    store double* [[A:%.*]], double** [[A_ADDR]], align 8
+; CHECK-NEXT:    store i32 [[N:%.*]], i32* [[N_ADDR]], align 4
+; CHECK-NEXT:    store double 0.000000e+00, double* [[SUM]], align 8
+; CHECK-NEXT:    store i32 0, i32* [[I]], align 4
+; CHECK-NEXT:    [[CMP1:%.*]] = icmp slt i32 0, [[N]]
+; CHECK-NEXT:    br i1 [[CMP1]], label [[FOR_BODY_LR_PH:%.*]], label [[FOR_END:%.*]]
+; CHECK:       for.body.lr.ph:
+; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
+; CHECK:       for.body:
+; CHECK-NEXT:    [[TMP0:%.*]] = phi i32 [ 0, [[FOR_BODY_LR_PH]] ], [ [[INC:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[TMP1:%.*]] = phi double [ 0.000000e+00, [[FOR_BODY_LR_PH]] ], [ [[ADD7:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[MUL:%.*]] = mul nsw i32 [[TMP0]], 2
+; CHECK-NEXT:    [[IDXPROM:%.*]] = sext i32 [[MUL]] to i64
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[A]], i64 [[IDXPROM]]
+; CHECK-NEXT:    [[ADD:%.*]] = add nsw i32 [[MUL]], 1
+; CHECK-NEXT:    [[IDXPROM3:%.*]] = sext i32 [[ADD]] to i64
+; CHECK-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds double, double* [[A]], i64 [[IDXPROM3]]
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast double* [[ARRAYIDX]] to <2 x double>*
+; CHECK-NEXT:    [[TMP3:%.*]] = load <2 x double>, <2 x double>* [[TMP2]], align 8
+; CHECK-NEXT:    [[TMP4:%.*]] = fmul <2 x double> <double 7.000000e+00, double 7.000000e+00>, [[TMP3]]
+; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <2 x double> [[TMP4]], i32 0
+; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <2 x double> [[TMP4]], i32 1
+; CHECK-NEXT:    [[ADD6:%.*]] = fadd double [[TMP5]], [[TMP6]]
+; CHECK-NEXT:    [[ADD7]] = fadd double [[TMP1]], [[ADD6]]
+; CHECK-NEXT:    store double [[ADD7]], double* [[SUM]], align 8
+; CHECK-NEXT:    [[INC]] = add nsw i32 [[TMP0]], 1
+; CHECK-NEXT:    store i32 [[INC]], i32* [[I]], align 4
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[INC]], [[N]]
+; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_FOR_END_CRIT_EDGE:%.*]]
+; CHECK:       for.cond.for.end_crit_edge:
+; CHECK-NEXT:    [[SPLIT:%.*]] = phi double [ [[ADD7]], [[FOR_BODY]] ]
+; CHECK-NEXT:    br label [[FOR_END]]
+; CHECK:       for.end:
+; CHECK-NEXT:    [[DOTLCSSA:%.*]] = phi double [ [[SPLIT]], [[FOR_COND_FOR_END_CRIT_EDGE]] ], [ 0.000000e+00, [[ENTRY:%.*]] ]
+; CHECK-NEXT:    [[CONV:%.*]] = fptosi double [[DOTLCSSA]] to i32
+; CHECK-NEXT:    ret i32 [[CONV]]
+;
  entry:
    %A.addr = alloca double*, align 8
    %n.addr = alloca i32, align 4
@@ -170,11 +286,30 @@ for.end:                                          ; preds = %for.cond.for.end_cr
  
  ; Similar to foo_2double but with a non-power-of-2 factor and potential
  ; wrapping (both indices wrap or both don't in the same time)
-; CHECK-LABEL: foo_2double_non_power_of_2
-; CHECK: load <2 x double>
-; CHECK: load <2 x double>
  ; Function Attrs: nounwind ssp uwtable
  define void @foo_2double_non_power_of_2(i32 %u) #0 {
+; CHECK-LABEL: @foo_2double_non_power_of_2(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[U_ADDR:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    store i32 [[U:%.*]], i32* [[U_ADDR]], align 4
+; CHECK-NEXT:    [[MUL:%.*]] = mul i32 [[U]], 6
+; CHECK-NEXT:    [[ADD6:%.*]] = add i32 [[MUL]], 6
+; CHECK-NEXT:    [[IDXPROM:%.*]] = sext i32 [[ADD6]] to i64
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [2000 x double], [2000 x double]* @A, i32 0, i64 [[IDXPROM]]
+; CHECK-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds [2000 x double], [2000 x double]* @B, i32 0, i64 [[IDXPROM]]
+; CHECK-NEXT:    [[ADD7:%.*]] = add i32 [[MUL]], 7
+; CHECK-NEXT:    [[IDXPROM12:%.*]] = sext i32 [[ADD7]] to i64
+; CHECK-NEXT:    [[ARRAYIDX13:%.*]] = getelementptr inbounds [2000 x double], [2000 x double]* @A, i32 0, i64 [[IDXPROM12]]
+; CHECK-NEXT:    [[TMP0:%.*]] = bitcast double* [[ARRAYIDX]] to <2 x double>*
+; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x double>, <2 x double>* [[TMP0]], align 8
+; CHECK-NEXT:    [[ARRAYIDX17:%.*]] = getelementptr inbounds [2000 x double], [2000 x double]* @B, i32 0, i64 [[IDXPROM12]]
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast double* [[ARRAYIDX4]] to <2 x double>*
+; CHECK-NEXT:    [[TMP3:%.*]] = load <2 x double>, <2 x double>* [[TMP2]], align 8
+; CHECK-NEXT:    [[TMP4:%.*]] = fadd <2 x double> [[TMP1]], [[TMP3]]
+; CHECK-NEXT:    [[TMP5:%.*]] = bitcast double* [[ARRAYIDX]] to <2 x double>*
+; CHECK-NEXT:    store <2 x double> [[TMP4]], <2 x double>* [[TMP5]], align 8
+; CHECK-NEXT:    ret void
+;
  entry:
    %u.addr = alloca i32, align 4
    store i32 %u, i32* %u.addr, align 4
@@ -199,11 +334,30 @@ entry:
  }
  
  ; Similar to foo_2double_non_power_of_2 but with zext's instead of sext's
-; CHECK-LABEL: foo_2double_non_power_of_2_zext
-; CHECK: load <2 x double>
-; CHECK: load <2 x double>
  ; Function Attrs: nounwind ssp uwtable
  define void @foo_2double_non_power_of_2_zext(i32 %u) #0 {
+; CHECK-LABEL: @foo_2double_non_power_of_2_zext(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[U_ADDR:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    store i32 [[U:%.*]], i32* [[U_ADDR]], align 4
+; CHECK-NEXT:    [[MUL:%.*]] = mul i32 [[U]], 6
+; CHECK-NEXT:    [[ADD6:%.*]] = add i32 [[MUL]], 6
+; CHECK-NEXT:    [[IDXPROM:%.*]] = zext i32 [[ADD6]] to i64
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [2000 x double], [2000 x double]* @A, i32 0, i64 [[IDXPROM]]
+; CHECK-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds [2000 x double], [2000 x double]* @B, i32 0, i64 [[IDXPROM]]
+; CHECK-NEXT:    [[ADD7:%.*]] = add i32 [[MUL]], 7
+; CHECK-NEXT:    [[IDXPROM12:%.*]] = zext i32 [[ADD7]] to i64
+; CHECK-NEXT:    [[ARRAYIDX13:%.*]] = getelementptr inbounds [2000 x double], [2000 x double]* @A, i32 0, i64 [[IDXPROM12]]
+; CHECK-NEXT:    [[TMP0:%.*]] = bitcast double* [[ARRAYIDX]] to <2 x double>*
+; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x double>, <2 x double>* [[TMP0]], align 8
+; CHECK-NEXT:    [[ARRAYIDX17:%.*]] = getelementptr inbounds [2000 x double], [2000 x double]* @B, i32 0, i64 [[IDXPROM12]]
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast double* [[ARRAYIDX4]] to <2 x double>*
+; CHECK-NEXT:    [[TMP3:%.*]] = load <2 x double>, <2 x double>* [[TMP2]], align 8
+; CHECK-NEXT:    [[TMP4:%.*]] = fadd <2 x double> [[TMP1]], [[TMP3]]
+; CHECK-NEXT:    [[TMP5:%.*]] = bitcast double* [[ARRAYIDX]] to <2 x double>*
+; CHECK-NEXT:    store <2 x double> [[TMP4]], <2 x double>* [[TMP5]], align 8
+; CHECK-NEXT:    ret void
+;
  entry:
    %u.addr = alloca i32, align 4
    store i32 %u, i32* %u.addr, align 4
@@ -230,10 +384,52 @@ entry:
  ; Similar to foo_2double_non_power_of_2, but now we are dealing with AddRec SCEV.
  ; Alternatively, this is like foo_loop, but with a non-power-of-2 factor and
  ; potential wrapping (both indices wrap or both don't in the same time)
-; CHECK-LABEL: foo_loop_non_power_of_2
-; CHECK: <2 x double>
  ; Function Attrs: nounwind ssp uwtable
  define i32 @foo_loop_non_power_of_2(double* %A, i32 %n) #0 {
+; CHECK-LABEL: @foo_loop_non_power_of_2(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[A_ADDR:%.*]] = alloca double*, align 8
+; CHECK-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[SUM:%.*]] = alloca double, align 8
+; CHECK-NEXT:    [[I:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    store double* [[A:%.*]], double** [[A_ADDR]], align 8
+; CHECK-NEXT:    store i32 [[N:%.*]], i32* [[N_ADDR]], align 4
+; CHECK-NEXT:    store double 0.000000e+00, double* [[SUM]], align 8
+; CHECK-NEXT:    store i32 0, i32* [[I]], align 4
+; CHECK-NEXT:    [[CMP1:%.*]] = icmp slt i32 0, [[N]]
+; CHECK-NEXT:    br i1 [[CMP1]], label [[FOR_BODY_LR_PH:%.*]], label [[FOR_END:%.*]]
+; CHECK:       for.body.lr.ph:
+; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
+; CHECK:       for.body:
+; CHECK-NEXT:    [[TMP0:%.*]] = phi i32 [ 0, [[FOR_BODY_LR_PH]] ], [ [[INC:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[TMP1:%.*]] = phi double [ 0.000000e+00, [[FOR_BODY_LR_PH]] ], [ [[ADD7:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[MUL:%.*]] = mul i32 [[TMP0]], 12
+; CHECK-NEXT:    [[ADD_5:%.*]] = add i32 [[MUL]], 5
+; CHECK-NEXT:    [[IDXPROM:%.*]] = sext i32 [[ADD_5]] to i64
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[A]], i64 [[IDXPROM]]
+; CHECK-NEXT:    [[ADD_6:%.*]] = add i32 [[MUL]], 6
+; CHECK-NEXT:    [[IDXPROM3:%.*]] = sext i32 [[ADD_6]] to i64
+; CHECK-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds double, double* [[A]], i64 [[IDXPROM3]]
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast double* [[ARRAYIDX]] to <2 x double>*
+; CHECK-NEXT:    [[TMP3:%.*]] = load <2 x double>, <2 x double>* [[TMP2]], align 8
+; CHECK-NEXT:    [[TMP4:%.*]] = fmul <2 x double> <double 7.000000e+00, double 7.000000e+00>, [[TMP3]]
+; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <2 x double> [[TMP4]], i32 0
+; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <2 x double> [[TMP4]], i32 1
+; CHECK-NEXT:    [[ADD6:%.*]] = fadd double [[TMP5]], [[TMP6]]
+; CHECK-NEXT:    [[ADD7]] = fadd double [[TMP1]], [[ADD6]]
+; CHECK-NEXT:    store double [[ADD7]], double* [[SUM]], align 8
+; CHECK-NEXT:    [[INC]] = add i32 [[TMP0]], 1
+; CHECK-NEXT:    store i32 [[INC]], i32* [[I]], align 4
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[INC]], [[N]]
+; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_FOR_END_CRIT_EDGE:%.*]]
+; CHECK:       for.cond.for.end_crit_edge:
+; CHECK-NEXT:    [[SPLIT:%.*]] = phi double [ [[ADD7]], [[FOR_BODY]] ]
+; CHECK-NEXT:    br label [[FOR_END]]
+; CHECK:       for.end:
+; CHECK-NEXT:    [[DOTLCSSA:%.*]] = phi double [ [[SPLIT]], [[FOR_COND_FOR_END_CRIT_EDGE]] ], [ 0.000000e+00, [[ENTRY:%.*]] ]
+; CHECK-NEXT:    [[CONV:%.*]] = fptosi double [[DOTLCSSA]] to i32
+; CHECK-NEXT:    ret i32 [[CONV]]
+;
  entry:
    %A.addr = alloca double*, align 8
    %n.addr = alloca i32, align 4
@@ -299,9 +495,32 @@ for.end:                                          ; preds = %for.cond.for.end_cr
  ;
  ; Make sure we are able to vectorize this from now on:
  ;
-; CHECK-LABEL: @bar
-; CHECK: load <2 x double>
  define double @bar(double* nocapture readonly %a, i32 %n) local_unnamed_addr #0 {
+; CHECK-LABEL: @bar(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CMP15:%.*]] = icmp eq i32 [[N:%.*]], 0
+; CHECK-NEXT:    br i1 [[CMP15]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY:%.*]]
+; CHECK:       for.cond.cleanup:
+; CHECK-NEXT:    [[TMP0:%.*]] = phi <2 x double> [ zeroinitializer, [[ENTRY:%.*]] ], [ [[TMP6:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <2 x double> [[TMP0]], i32 0
+; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <2 x double> [[TMP0]], i32 1
+; CHECK-NEXT:    [[MUL:%.*]] = fmul double [[TMP1]], [[TMP2]]
+; CHECK-NEXT:    ret double [[MUL]]
+; CHECK:       for.body:
+; CHECK-NEXT:    [[I_018:%.*]] = phi i32 [ [[ADD5:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY]] ]
+; CHECK-NEXT:    [[TMP3:%.*]] = phi <2 x double> [ [[TMP6]], [[FOR_BODY]] ], [ zeroinitializer, [[ENTRY]] ]
+; CHECK-NEXT:    [[IDXPROM:%.*]] = zext i32 [[I_018]] to i64
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[A:%.*]], i64 [[IDXPROM]]
+; CHECK-NEXT:    [[ADD1:%.*]] = or i32 [[I_018]], 1
+; CHECK-NEXT:    [[IDXPROM2:%.*]] = zext i32 [[ADD1]] to i64
+; CHECK-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds double, double* [[A]], i64 [[IDXPROM2]]
+; CHECK-NEXT:    [[TMP4:%.*]] = bitcast double* [[ARRAYIDX]] to <2 x double>*
+; CHECK-NEXT:    [[TMP5:%.*]] = load <2 x double>, <2 x double>* [[TMP4]], align 8
+; CHECK-NEXT:    [[TMP6]] = fadd <2 x double> [[TMP3]], [[TMP5]]
+; CHECK-NEXT:    [[ADD5]] = add i32 [[I_018]], 2
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i32 [[ADD5]], [[N]]
+; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP]]
+;
  entry:
    %cmp15 = icmp eq i32 %n, 0
    br i1 %cmp15, label %for.cond.cleanup, label %for.body
diff --git a/test/Transforms/SLPVectorizer/X86/continue_vectorizing.ll b/test/Transforms/SLPVectorizer/X86/continue_vectorizing.ll

index ecae70ecc918568fe7c33b113295de28f83b6e22..060cb054b7f00e1fc45c6b17619e67d99b46907c 100644 (file)
--- a/test/Transforms/SLPVectorizer/X86/continue_vectorizing.ll
+++ b/test/Transforms/SLPVectorizer/X86/continue_vectorizing.ll
@@ -1,13 +1,32 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
  ; RUN: opt < %s -basicaa -slp-vectorizer -S -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck %s
  
  target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
  target triple = "x86_64-apple-macosx10.8.0"
  
  ; We will keep trying to vectorize the basic block even we already find vectorized store.
-; CHECK: test1
-; CHECK: store <2 x double>
-; CHECK: ret
  define void @test1(double* %a, double* %b, double* %c, double* %d) {
+; CHECK-LABEL: @test1(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds double, double* [[A:%.*]], i64 1
+; CHECK-NEXT:    [[TMP0:%.*]] = bitcast double* [[A]] to <2 x double>*
+; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x double>, <2 x double>* [[TMP0]], align 8
+; CHECK-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds double, double* [[B:%.*]], i64 1
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast double* [[B]] to <2 x double>*
+; CHECK-NEXT:    [[TMP3:%.*]] = load <2 x double>, <2 x double>* [[TMP2]], align 8
+; CHECK-NEXT:    [[TMP4:%.*]] = fmul <2 x double> [[TMP1]], [[TMP3]]
+; CHECK-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds double, double* [[C:%.*]], i64 1
+; CHECK-NEXT:    [[TMP5:%.*]] = bitcast double* [[C]] to <2 x double>*
+; CHECK-NEXT:    store <2 x double> [[TMP4]], <2 x double>* [[TMP5]], align 8
+; CHECK-NEXT:    [[TMP6:%.*]] = bitcast double* [[A]] to <4 x i32>*
+; CHECK-NEXT:    [[TMP7:%.*]] = load <4 x i32>, <4 x i32>* [[TMP6]], align 8
+; CHECK-NEXT:    [[TMP8:%.*]] = bitcast double* [[B]] to <4 x i32>*
+; CHECK-NEXT:    [[TMP9:%.*]] = load <4 x i32>, <4 x i32>* [[TMP8]], align 8
+; CHECK-NEXT:    [[TMP10:%.*]] = mul <4 x i32> [[TMP7]], [[TMP9]]
+; CHECK-NEXT:    [[TMP11:%.*]] = bitcast double* [[D:%.*]] to <4 x i32>*
+; CHECK-NEXT:    store <4 x i32> [[TMP10]], <4 x i32>* [[TMP11]], align 8
+; CHECK-NEXT:    ret void
+;
  entry:
    %i0 = load double, double* %a, align 8
    %i1 = load double, double* %b, align 8
diff --git a/test/Transforms/SLPVectorizer/X86/crash_7zip.ll b/test/Transforms/SLPVectorizer/X86/crash_7zip.ll

index 54e5d5a5308021cc4e9658ddf215d3b673539c72..e7bff49c09f033342c720d63885e7927ed06b61f 100644 (file)
--- a/test/Transforms/SLPVectorizer/X86/crash_7zip.ll
+++ b/test/Transforms/SLPVectorizer/X86/crash_7zip.ll
@@ -1,4 +1,5 @@
-; RUN: opt < %s -basicaa -slp-vectorizer -dce -S -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -basicaa -slp-vectorizer -dce -S -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7 | FileCheck %s
  
  target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
  target triple = "x86_64-apple-macosx10.8.0"
@@ -7,6 +8,32 @@ target triple = "x86_64-apple-macosx10.8.0"
  %struct._CLzmaProps.0.27.54.81.102.123.144.165.180.195.228.258.333 = type { i32, i32, i32, i32 }
  
  define fastcc void @LzmaDec_DecodeReal2(%struct.CLzmaDec.1.28.55.82.103.124.145.166.181.196.229.259.334* %p) {
+; CHECK-LABEL: @LzmaDec_DecodeReal2(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[RANGE20_I:%.*]] = getelementptr inbounds [[STRUCT_CLZMADEC_1_28_55_82_103_124_145_166_181_196_229_259_334:%.*]], %struct.CLzmaDec.1.28.55.82.103.124.145.166.181.196.229.259.334* [[P:%.*]], i64 0, i32 4
+; CHECK-NEXT:    [[CODE21_I:%.*]] = getelementptr inbounds [[STRUCT_CLZMADEC_1_28_55_82_103_124_145_166_181_196_229_259_334]], %struct.CLzmaDec.1.28.55.82.103.124.145.166.181.196.229.259.334* [[P]], i64 0, i32 5
+; CHECK-NEXT:    br label [[DO_BODY66_I:%.*]]
+; CHECK:       do.body66.i:
+; CHECK-NEXT:    [[RANGE_2_I:%.*]] = phi i32 [ [[RANGE_4_I:%.*]], [[DO_COND_I:%.*]] ], [ undef, [[ENTRY:%.*]] ]
+; CHECK-NEXT:    [[CODE_2_I:%.*]] = phi i32 [ [[CODE_4_I:%.*]], [[DO_COND_I]] ], [ undef, [[ENTRY]] ]
+; CHECK-NEXT:    [[DOTRANGE_2_I:%.*]] = select i1 undef, i32 undef, i32 [[RANGE_2_I]]
+; CHECK-NEXT:    [[DOTCODE_2_I:%.*]] = select i1 undef, i32 undef, i32 [[CODE_2_I]]
+; CHECK-NEXT:    br i1 undef, label [[DO_COND_I]], label [[IF_ELSE_I:%.*]]
+; CHECK:       if.else.i:
+; CHECK-NEXT:    [[SUB91_I:%.*]] = sub i32 [[DOTRANGE_2_I]], undef
+; CHECK-NEXT:    [[SUB92_I:%.*]] = sub i32 [[DOTCODE_2_I]], undef
+; CHECK-NEXT:    br label [[DO_COND_I]]
+; CHECK:       do.cond.i:
+; CHECK-NEXT:    [[RANGE_4_I]] = phi i32 [ [[SUB91_I]], [[IF_ELSE_I]] ], [ undef, [[DO_BODY66_I]] ]
+; CHECK-NEXT:    [[CODE_4_I]] = phi i32 [ [[SUB92_I]], [[IF_ELSE_I]] ], [ [[DOTCODE_2_I]], [[DO_BODY66_I]] ]
+; CHECK-NEXT:    br i1 undef, label [[DO_BODY66_I]], label [[DO_END1006_I:%.*]]
+; CHECK:       do.end1006.i:
+; CHECK-NEXT:    [[DOTRANGE_4_I:%.*]] = select i1 undef, i32 undef, i32 [[RANGE_4_I]]
+; CHECK-NEXT:    [[DOTCODE_4_I:%.*]] = select i1 undef, i32 undef, i32 [[CODE_4_I]]
+; CHECK-NEXT:    store i32 [[DOTRANGE_4_I]], i32* [[RANGE20_I]], align 4
+; CHECK-NEXT:    store i32 [[DOTCODE_4_I]], i32* [[CODE21_I]], align 4
+; CHECK-NEXT:    ret void
+;
  entry:
    %range20.i = getelementptr inbounds %struct.CLzmaDec.1.28.55.82.103.124.145.166.181.196.229.259.334, %struct.CLzmaDec.1.28.55.82.103.124.145.166.181.196.229.259.334* %p, i64 0, i32 4
    %code21.i = getelementptr inbounds %struct.CLzmaDec.1.28.55.82.103.124.145.166.181.196.229.259.334, %struct.CLzmaDec.1.28.55.82.103.124.145.166.181.196.229.259.334* %p, i64 0, i32 5
diff --git a/test/Transforms/SLPVectorizer/X86/crash_binaryop.ll b/test/Transforms/SLPVectorizer/X86/crash_binaryop.ll

index 9046c35628216fbfb8482ed1ebec606156818a45..eec53739b6796f6658a83eafb0dc6617ae4299e1 100644 (file)
--- a/test/Transforms/SLPVectorizer/X86/crash_binaryop.ll
+++ b/test/Transforms/SLPVectorizer/X86/crash_binaryop.ll
@@ -1,4 +1,5 @@
-; RUN: opt < %s -basicaa -slp-vectorizer -S -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -basicaa -slp-vectorizer -S -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7 | FileCheck %s
  
  target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
  target triple = "x86_64-apple-darwin13.3.0"
@@ -6,6 +7,28 @@ target triple = "x86_64-apple-darwin13.3.0"
  @a = common global double 0.000000e+00, align 8
  
  define i32 @fn1() {
+; CHECK-LABEL: @fn1(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[INIT:%.*]] = load double, double* @a, align 8
+; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[PHI:%.*]] = phi double [ [[ADD2:%.*]], [[LOOP]] ], [ [[INIT]], [[ENTRY:%.*]] ]
+; CHECK-NEXT:    [[POSTADD1_PHI:%.*]] = phi double [ [[POSTADD1:%.*]], [[LOOP]] ], [ [[INIT]], [[ENTRY]] ]
+; CHECK-NEXT:    [[POSTADD2_PHI:%.*]] = phi double [ [[POSTADD2:%.*]], [[LOOP]] ], [ [[INIT]], [[ENTRY]] ]
+; CHECK-NEXT:    [[ADD1:%.*]] = fadd double [[POSTADD1_PHI]], undef
+; CHECK-NEXT:    [[ADD2]] = fadd double [[POSTADD2_PHI]], [[PHI]]
+; CHECK-NEXT:    [[MUL2:%.*]] = fmul double [[ADD2]], 0.000000e+00
+; CHECK-NEXT:    [[BINARYOP_B:%.*]] = fadd double [[POSTADD1_PHI]], [[MUL2]]
+; CHECK-NEXT:    [[MUL1:%.*]] = fmul double [[ADD1]], 0.000000e+00
+; CHECK-NEXT:    [[TMP:%.*]] = fadd double [[POSTADD2_PHI]], 0.000000e+00
+; CHECK-NEXT:    [[BINARY_V:%.*]] = fadd double [[MUL1]], [[BINARYOP_B]]
+; CHECK-NEXT:    [[POSTADD1]] = fadd double [[BINARY_V]], 0.000000e+00
+; CHECK-NEXT:    [[POSTADD2]] = fadd double [[TMP]], 1.000000e+00
+; CHECK-NEXT:    [[TOBOOL:%.*]] = fcmp une double [[POSTADD1]], 0.000000e+00
+; CHECK-NEXT:    br i1 [[TOBOOL]], label [[EXIT:%.*]], label [[LOOP]]
+; CHECK:       exit:
+; CHECK-NEXT:    ret i32 1
+;
  entry:
    %init = load double, double* @a, align 8
    br label %loop
diff --git a/test/Transforms/SLPVectorizer/X86/crash_bullet.ll b/test/Transforms/SLPVectorizer/X86/crash_bullet.ll

index 1bad671fd82414ec63e37b3bcfd768f2aaca9e43..a1a3f50765a413af4546f632649a0895855bd408 100644 (file)
--- a/test/Transforms/SLPVectorizer/X86/crash_bullet.ll
+++ b/test/Transforms/SLPVectorizer/X86/crash_bullet.ll
@@ -1,4 +1,5 @@
-; RUN: opt < %s -basicaa -slp-vectorizer -dce -S -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -basicaa -slp-vectorizer -dce -S -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7 | FileCheck %s
  
  target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
  target triple = "x86_64-apple-macosx10.8.0"
@@ -6,6 +7,32 @@ target triple = "x86_64-apple-macosx10.8.0"
  %"struct.btTypedConstraint::btConstraintInfo1.17.157.357.417.477.960" = type { i32, i32 }
  
  define void @_ZN23btGeneric6DofConstraint8getInfo1EPN17btTypedConstraint17btConstraintInfo1E(%"struct.btTypedConstraint::btConstraintInfo1.17.157.357.417.477.960"* nocapture %info) {
+; CHECK-LABEL: @_ZN23btGeneric6DofConstraint8getInfo1EPN17btTypedConstraint17btConstraintInfo1E(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br i1 undef, label [[IF_ELSE:%.*]], label [[IF_THEN:%.*]]
+; CHECK:       if.then:
+; CHECK-NEXT:    ret void
+; CHECK:       if.else:
+; CHECK-NEXT:    [[M_NUMCONSTRAINTROWS4:%.*]] = getelementptr inbounds %"struct.btTypedConstraint::btConstraintInfo1.17.157.357.417.477.960", %"struct.btTypedConstraint::btConstraintInfo1.17.157.357.417.477.960"* [[INFO:%.*]], i64 0, i32 0
+; CHECK-NEXT:    [[NUB5:%.*]] = getelementptr inbounds %"struct.btTypedConstraint::btConstraintInfo1.17.157.357.417.477.960", %"struct.btTypedConstraint::btConstraintInfo1.17.157.357.417.477.960"* [[INFO]], i64 0, i32 1
+; CHECK-NEXT:    br i1 undef, label [[LAND_LHS_TRUE_I_1:%.*]], label [[IF_THEN7_1:%.*]]
+; CHECK:       land.lhs.true.i.1:
+; CHECK-NEXT:    br i1 undef, label [[FOR_INC_1:%.*]], label [[IF_THEN7_1]]
+; CHECK:       if.then7.1:
+; CHECK-NEXT:    [[INC_1:%.*]] = add nsw i32 0, 1
+; CHECK-NEXT:    store i32 [[INC_1]], i32* [[M_NUMCONSTRAINTROWS4]], align 4
+; CHECK-NEXT:    [[DEC_1:%.*]] = add nsw i32 6, -1
+; CHECK-NEXT:    store i32 [[DEC_1]], i32* [[NUB5]], align 4
+; CHECK-NEXT:    br label [[FOR_INC_1]]
+; CHECK:       for.inc.1:
+; CHECK-NEXT:    [[TMP0:%.*]] = phi i32 [ [[DEC_1]], [[IF_THEN7_1]] ], [ 6, [[LAND_LHS_TRUE_I_1]] ]
+; CHECK-NEXT:    [[TMP1:%.*]] = phi i32 [ [[INC_1]], [[IF_THEN7_1]] ], [ 0, [[LAND_LHS_TRUE_I_1]] ]
+; CHECK-NEXT:    [[INC_2:%.*]] = add nsw i32 [[TMP1]], 1
+; CHECK-NEXT:    store i32 [[INC_2]], i32* [[M_NUMCONSTRAINTROWS4]], align 4
+; CHECK-NEXT:    [[DEC_2:%.*]] = add nsw i32 [[TMP0]], -1
+; CHECK-NEXT:    store i32 [[DEC_2]], i32* [[NUB5]], align 4
+; CHECK-NEXT:    unreachable
+;
  entry:
    br i1 undef, label %if.else, label %if.then
  
@@ -42,6 +69,30 @@ for.inc.1:                                        ; preds = %if.then7.1, %land.l
  %class.btVector4.7.32.67.92.117.142.177.187.262.282.331 = type { %class.btVector3.5.30.65.90.115.140.175.185.260.280.330 }
  
  define void @_ZN30GIM_TRIANGLE_CALCULATION_CACHE18triangle_collisionERK9btVector3S2_S2_fS2_S2_S2_fR25GIM_TRIANGLE_CONTACT_DATA(%class.GIM_TRIANGLE_CALCULATION_CACHE.9.34.69.94.119.144.179.189.264.284.332* %this) {
+; CHECK-LABEL: @_ZN30GIM_TRIANGLE_CALCULATION_CACHE18triangle_collisionERK9btVector3S2_S2_fS2_S2_S2_fR25GIM_TRIANGLE_CONTACT_DATA(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[ARRAYIDX26:%.*]] = getelementptr inbounds [[CLASS_GIM_TRIANGLE_CALCULATION_CACHE_9_34_69_94_119_144_179_189_264_284_332:%.*]], %class.GIM_TRIANGLE_CALCULATION_CACHE.9.34.69.94.119.144.179.189.264.284.332* [[THIS:%.*]], i64 0, i32 2, i64 0, i32 0, i64 1
+; CHECK-NEXT:    [[ARRAYIDX36:%.*]] = getelementptr inbounds [[CLASS_GIM_TRIANGLE_CALCULATION_CACHE_9_34_69_94_119_144_179_189_264_284_332]], %class.GIM_TRIANGLE_CALCULATION_CACHE.9.34.69.94.119.144.179.189.264.284.332* [[THIS]], i64 0, i32 2, i64 0, i32 0, i64 2
+; CHECK-NEXT:    [[TMP0:%.*]] = load float, float* [[ARRAYIDX36]], align 4
+; CHECK-NEXT:    [[ADD587:%.*]] = fadd float undef, undef
+; CHECK-NEXT:    [[SUB600:%.*]] = fsub float [[ADD587]], undef
+; CHECK-NEXT:    store float [[SUB600]], float* undef, align 4
+; CHECK-NEXT:    [[SUB613:%.*]] = fsub float [[ADD587]], [[SUB600]]
+; CHECK-NEXT:    store float [[SUB613]], float* [[ARRAYIDX26]], align 4
+; CHECK-NEXT:    [[ADD626:%.*]] = fadd float [[TMP0]], undef
+; CHECK-NEXT:    [[SUB639:%.*]] = fsub float [[ADD626]], undef
+; CHECK-NEXT:    [[SUB652:%.*]] = fsub float [[ADD626]], [[SUB639]]
+; CHECK-NEXT:    store float [[SUB652]], float* [[ARRAYIDX36]], align 4
+; CHECK-NEXT:    br i1 undef, label [[IF_ELSE1609:%.*]], label [[IF_THEN1595:%.*]]
+; CHECK:       if.then1595:
+; CHECK-NEXT:    br i1 undef, label [[RETURN:%.*]], label [[FOR_BODY_LR_PH_I_I1702:%.*]]
+; CHECK:       for.body.lr.ph.i.i1702:
+; CHECK-NEXT:    unreachable
+; CHECK:       if.else1609:
+; CHECK-NEXT:    unreachable
+; CHECK:       return:
+; CHECK-NEXT:    ret void
+;
  entry:
    %arrayidx26 = getelementptr inbounds %class.GIM_TRIANGLE_CALCULATION_CACHE.9.34.69.94.119.144.179.189.264.284.332, %class.GIM_TRIANGLE_CALCULATION_CACHE.9.34.69.94.119.144.179.189.264.284.332* %this, i64 0, i32 2, i64 0, i32 0, i64 1
    %arrayidx36 = getelementptr inbounds %class.GIM_TRIANGLE_CALCULATION_CACHE.9.34.69.94.119.144.179.189.264.284.332, %class.GIM_TRIANGLE_CALCULATION_CACHE.9.34.69.94.119.144.179.189.264.284.332* %this, i64 0, i32 2, i64 0, i32 0, i64 2
@@ -71,6 +122,40 @@ return:                                           ; preds = %if.then1595
  }
  
  define void @_Z8dBoxBox2RK9btVector3PKfS1_S1_S3_S1_RS_PfPiiP12dContactGeomiRN36btDiscreteCollisionDetectorInterface6ResultE() {
+; CHECK-LABEL: @_Z8dBoxBox2RK9btVector3PKfS1_S1_S3_S1_RS_PfPiiP12dContactGeomiRN36btDiscreteCollisionDetectorInterface6ResultE(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br i1 undef, label [[RETURN:%.*]], label [[IF_END:%.*]]
+; CHECK:       if.end:
+; CHECK-NEXT:    br i1 undef, label [[RETURN]], label [[IF_END111:%.*]]
+; CHECK:       if.end111:
+; CHECK-NEXT:    br i1 undef, label [[RETURN]], label [[IF_END136:%.*]]
+; CHECK:       if.end136:
+; CHECK-NEXT:    br i1 undef, label [[RETURN]], label [[IF_END162:%.*]]
+; CHECK:       if.end162:
+; CHECK-NEXT:    br i1 undef, label [[RETURN]], label [[IF_END189:%.*]]
+; CHECK:       if.end189:
+; CHECK-NEXT:    br i1 undef, label [[RETURN]], label [[IF_END216:%.*]]
+; CHECK:       if.end216:
+; CHECK-NEXT:    br i1 undef, label [[IF_THEN218:%.*]], label [[IF_END225:%.*]]
+; CHECK:       if.then218:
+; CHECK-NEXT:    br label [[IF_END225]]
+; CHECK:       if.end225:
+; CHECK-NEXT:    br i1 undef, label [[RETURN]], label [[IF_END248:%.*]]
+; CHECK:       if.end248:
+; CHECK-NEXT:    br i1 undef, label [[RETURN]], label [[IF_END304:%.*]]
+; CHECK:       if.end304:
+; CHECK-NEXT:    br i1 undef, label [[RETURN]], label [[IF_END361:%.*]]
+; CHECK:       if.end361:
+; CHECK-NEXT:    br i1 undef, label [[IF_THEN370:%.*]], label [[IF_END395:%.*]]
+; CHECK:       if.then370:
+; CHECK-NEXT:    br i1 undef, label [[IF_THEN374:%.*]], label [[IF_END395]]
+; CHECK:       if.then374:
+; CHECK-NEXT:    br label [[IF_END395]]
+; CHECK:       if.end395:
+; CHECK-NEXT:    unreachable
+; CHECK:       return:
+; CHECK-NEXT:    ret void
+;
  entry:
    %add8.i2343 = fadd float undef, undef
    %add8.i2381 = fadd float undef, undef
diff --git a/test/Transforms/SLPVectorizer/X86/crash_bullet3.ll b/test/Transforms/SLPVectorizer/X86/crash_bullet3.ll

index 8102769295307d5e6a1ca61bd2f882d62aba8014..7ec4fc18598a4ef861324ad6a70f03cb16523a4f 100644 (file)
--- a/test/Transforms/SLPVectorizer/X86/crash_bullet3.ll
+++ b/test/Transforms/SLPVectorizer/X86/crash_bullet3.ll
@@ -1,4 +1,5 @@
-; RUN: opt < %s -basicaa -slp-vectorizer -dce -S -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -basicaa -slp-vectorizer -dce -S -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7 | FileCheck %s
  
  target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
  target triple = "x86_64-apple-macosx10.8.0"
@@ -7,6 +8,62 @@ target triple = "x86_64-apple-macosx10.8.0"
  
  ; Function Attrs: ssp uwtable
  define void @_ZN11HullLibrary15CleanupVerticesEjPK9btVector3jRjPS0_fRS0_(%class.btVector3.23.221.463.485.507.573.595.683.727.749.815.837.991.1585.1607.1629.1651.1849.2047.2069.2091.2113* %vertices) #0 align 2 {
+; CHECK-LABEL: @_ZN11HullLibrary15CleanupVerticesEjPK9btVector3jRjPS0_fRS0_(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br i1 undef, label [[RETURN:%.*]], label [[IF_END:%.*]]
+; CHECK:       if.end:
+; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
+; CHECK:       for.body:
+; CHECK-NEXT:    br i1 undef, label [[IF_THEN17_1:%.*]], label [[IF_END22_1:%.*]]
+; CHECK:       for.end36:
+; CHECK-NEXT:    br label [[FOR_BODY144:%.*]]
+; CHECK:       for.body144:
+; CHECK-NEXT:    br i1 undef, label [[FOR_END227:%.*]], label [[FOR_BODY144]]
+; CHECK:       for.end227:
+; CHECK-NEXT:    br i1 undef, label [[FOR_END271:%.*]], label [[FOR_BODY233:%.*]]
+; CHECK:       for.body233:
+; CHECK-NEXT:    br i1 undef, label [[FOR_BODY233]], label [[FOR_END271]]
+; CHECK:       for.end271:
+; CHECK-NEXT:    [[TMP0:%.*]] = phi float [ 0x47EFFFFFE0000000, [[FOR_END227]] ], [ undef, [[FOR_BODY233]] ]
+; CHECK-NEXT:    [[TMP1:%.*]] = phi float [ 0x47EFFFFFE0000000, [[FOR_END227]] ], [ undef, [[FOR_BODY233]] ]
+; CHECK-NEXT:    [[SUB275:%.*]] = fsub float undef, [[TMP1]]
+; CHECK-NEXT:    [[SUB279:%.*]] = fsub float undef, [[TMP0]]
+; CHECK-NEXT:    br i1 undef, label [[IF_THEN291:%.*]], label [[RETURN]]
+; CHECK:       if.then291:
+; CHECK-NEXT:    [[MUL292:%.*]] = fmul float [[SUB275]], 5.000000e-01
+; CHECK-NEXT:    [[ADD294:%.*]] = fadd float [[TMP1]], [[MUL292]]
+; CHECK-NEXT:    [[MUL295:%.*]] = fmul float [[SUB279]], 5.000000e-01
+; CHECK-NEXT:    [[ADD297:%.*]] = fadd float [[TMP0]], [[MUL295]]
+; CHECK-NEXT:    br i1 undef, label [[IF_END332:%.*]], label [[IF_ELSE319:%.*]]
+; CHECK:       if.else319:
+; CHECK-NEXT:    br i1 undef, label [[IF_THEN325:%.*]], label [[IF_END327:%.*]]
+; CHECK:       if.then325:
+; CHECK-NEXT:    br label [[IF_END327]]
+; CHECK:       if.end327:
+; CHECK-NEXT:    br i1 undef, label [[IF_THEN329:%.*]], label [[IF_END332]]
+; CHECK:       if.then329:
+; CHECK-NEXT:    br label [[IF_END332]]
+; CHECK:       if.end332:
+; CHECK-NEXT:    [[DX272_1:%.*]] = phi float [ [[SUB275]], [[IF_THEN329]] ], [ [[SUB275]], [[IF_END327]] ], [ 0x3F847AE140000000, [[IF_THEN291]] ]
+; CHECK-NEXT:    [[DY276_1:%.*]] = phi float [ undef, [[IF_THEN329]] ], [ undef, [[IF_END327]] ], [ 0x3F847AE140000000, [[IF_THEN291]] ]
+; CHECK-NEXT:    [[SUB334:%.*]] = fsub float [[ADD294]], [[DX272_1]]
+; CHECK-NEXT:    [[SUB338:%.*]] = fsub float [[ADD297]], [[DY276_1]]
+; CHECK-NEXT:    [[ARRAYIDX_I_I606:%.*]] = getelementptr inbounds [[CLASS_BTVECTOR3_23_221_463_485_507_573_595_683_727_749_815_837_991_1585_1607_1629_1651_1849_2047_2069_2091_2113:%.*]], %class.btVector3.23.221.463.485.507.573.595.683.727.749.815.837.991.1585.1607.1629.1651.1849.2047.2069.2091.2113* [[VERTICES:%.*]], i64 0, i32 0, i64 0
+; CHECK-NEXT:    store float [[SUB334]], float* [[ARRAYIDX_I_I606]], align 4
+; CHECK-NEXT:    [[ARRAYIDX3_I607:%.*]] = getelementptr inbounds [[CLASS_BTVECTOR3_23_221_463_485_507_573_595_683_727_749_815_837_991_1585_1607_1629_1651_1849_2047_2069_2091_2113]], %class.btVector3.23.221.463.485.507.573.595.683.727.749.815.837.991.1585.1607.1629.1651.1849.2047.2069.2091.2113* [[VERTICES]], i64 0, i32 0, i64 1
+; CHECK-NEXT:    store float [[SUB338]], float* [[ARRAYIDX3_I607]], align 4
+; CHECK-NEXT:    br label [[RETURN]]
+; CHECK:       return:
+; CHECK-NEXT:    ret void
+; CHECK:       if.then17.1:
+; CHECK-NEXT:    br label [[IF_END22_1]]
+; CHECK:       if.end22.1:
+; CHECK-NEXT:    br i1 undef, label [[IF_THEN17_2:%.*]], label [[IF_END22_2:%.*]]
+; CHECK:       if.then17.2:
+; CHECK-NEXT:    br label [[IF_END22_2]]
+; CHECK:       if.end22.2:
+; CHECK-NEXT:    br i1 undef, label [[FOR_END36:%.*]], label [[FOR_BODY]]
+;
  entry:
    br i1 undef, label %return, label %if.end
  
diff --git a/test/Transforms/SLPVectorizer/X86/crash_cmpop.ll b/test/Transforms/SLPVectorizer/X86/crash_cmpop.ll

index f10c8626d417099e0bbebeaefa5c52c7f622529c..5cee363de16edfcacfe69d3d9c3c3b70401d7766 100644 (file)
--- a/test/Transforms/SLPVectorizer/X86/crash_cmpop.ll
+++ b/test/Transforms/SLPVectorizer/X86/crash_cmpop.ll
@@ -1,9 +1,50 @@
-; RUN: opt < %s -basicaa -slp-vectorizer -S
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -basicaa -slp-vectorizer -S | FileCheck %s
  
  target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
  target triple = "x86_64-apple-macosx10.10.0"
  
  define void @testfunc(float* nocapture %dest, float* nocapture readonly %src) {
+; CHECK-LABEL: @testfunc(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
+; CHECK:       for.body:
+; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[ACC1_056:%.*]] = phi float [ 0.000000e+00, [[ENTRY]] ], [ [[ADD13:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[TMP0:%.*]] = phi <2 x float> [ zeroinitializer, [[ENTRY]] ], [ [[TMP23:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[SRC:%.*]], i64 [[INDVARS_IV]]
+; CHECK-NEXT:    [[TMP1:%.*]] = load float, float* [[ARRAYIDX]], align 4
+; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
+; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds float, float* [[DEST:%.*]], i64 [[INDVARS_IV]]
+; CHECK-NEXT:    store float [[ACC1_056]], float* [[ARRAYIDX2]], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <2 x float> [[TMP0]], i32 1
+; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <2 x float> undef, float [[TMP2]], i32 0
+; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <2 x float> [[TMP0]], i32 0
+; CHECK-NEXT:    [[TMP5:%.*]] = insertelement <2 x float> [[TMP3]], float [[TMP4]], i32 1
+; CHECK-NEXT:    [[TMP6:%.*]] = insertelement <2 x float> undef, float [[TMP1]], i32 0
+; CHECK-NEXT:    [[TMP7:%.*]] = insertelement <2 x float> [[TMP6]], float [[TMP1]], i32 1
+; CHECK-NEXT:    [[TMP8:%.*]] = fadd <2 x float> [[TMP5]], [[TMP7]]
+; CHECK-NEXT:    [[TMP9:%.*]] = fmul <2 x float> zeroinitializer, [[TMP0]]
+; CHECK-NEXT:    [[TMP10:%.*]] = fadd <2 x float> [[TMP9]], [[TMP8]]
+; CHECK-NEXT:    [[TMP11:%.*]] = fcmp olt <2 x float> [[TMP10]], <float 1.000000e+00, float 1.000000e+00>
+; CHECK-NEXT:    [[TMP12:%.*]] = select <2 x i1> [[TMP11]], <2 x float> [[TMP10]], <2 x float> <float 1.000000e+00, float 1.000000e+00>
+; CHECK-NEXT:    [[TMP13:%.*]] = fcmp olt <2 x float> [[TMP12]], <float -1.000000e+00, float -1.000000e+00>
+; CHECK-NEXT:    [[TMP14:%.*]] = fmul <2 x float> zeroinitializer, [[TMP12]]
+; CHECK-NEXT:    [[TMP15:%.*]] = select <2 x i1> [[TMP13]], <2 x float> <float -0.000000e+00, float -0.000000e+00>, <2 x float> [[TMP14]]
+; CHECK-NEXT:    [[TMP16:%.*]] = extractelement <2 x float> [[TMP15]], i32 0
+; CHECK-NEXT:    [[TMP17:%.*]] = extractelement <2 x float> [[TMP15]], i32 1
+; CHECK-NEXT:    [[ADD13]] = fadd float [[TMP16]], [[TMP17]]
+; CHECK-NEXT:    [[TMP18:%.*]] = insertelement <2 x float> undef, float [[TMP17]], i32 0
+; CHECK-NEXT:    [[TMP19:%.*]] = insertelement <2 x float> [[TMP18]], float [[ADD13]], i32 1
+; CHECK-NEXT:    [[TMP20:%.*]] = fcmp olt <2 x float> [[TMP19]], <float 1.000000e+00, float 1.000000e+00>
+; CHECK-NEXT:    [[TMP21:%.*]] = select <2 x i1> [[TMP20]], <2 x float> [[TMP19]], <2 x float> <float 1.000000e+00, float 1.000000e+00>
+; CHECK-NEXT:    [[TMP22:%.*]] = fcmp olt <2 x float> [[TMP21]], <float -1.000000e+00, float -1.000000e+00>
+; CHECK-NEXT:    [[TMP23]] = select <2 x i1> [[TMP22]], <2 x float> <float -1.000000e+00, float -1.000000e+00>, <2 x float> [[TMP21]]
+; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 32
+; CHECK-NEXT:    br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]]
+; CHECK:       for.end:
+; CHECK-NEXT:    ret void
+;
  entry:
    br label %for.body
  
diff --git a/test/Transforms/SLPVectorizer/X86/crash_dequeue.ll b/test/Transforms/SLPVectorizer/X86/crash_dequeue.ll

index 28b7aa3c4de3ca298a4d53601525b227e209d06a..b5f736af62445f0efdf1ea3e822b0f4a7d629942 100644 (file)
--- a/test/Transforms/SLPVectorizer/X86/crash_dequeue.ll
+++ b/test/Transforms/SLPVectorizer/X86/crash_dequeue.ll
@@ -1,4 +1,5 @@
-; RUN: opt < %s -basicaa -slp-vectorizer -dce -S -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -basicaa -slp-vectorizer -dce -S -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7 | FileCheck %s
  
  target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
  target triple = "x86_64-apple-macosx10.8.0"
@@ -6,6 +7,33 @@ target triple = "x86_64-apple-macosx10.8.0"
  
  ; Function Attrs: nounwind ssp uwtable
  define void @_ZSt6uniqueISt15_Deque_iteratorIdRdPdEET_S4_S4_(%"struct.std::_Deque_iterator.4.157.174.208.259.276.344.731"* %__first, %"struct.std::_Deque_iterator.4.157.174.208.259.276.344.731"* nocapture %__last) {
+; CHECK-LABEL: @_ZSt6uniqueISt15_Deque_iteratorIdRdPdEET_S4_S4_(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[_M_CUR2_I_I:%.*]] = getelementptr inbounds %"struct.std::_Deque_iterator.4.157.174.208.259.276.344.731", %"struct.std::_Deque_iterator.4.157.174.208.259.276.344.731"* [[__FIRST:%.*]], i64 0, i32 0
+; CHECK-NEXT:    [[TMP0:%.*]] = load double*, double** [[_M_CUR2_I_I]], align 8
+; CHECK-NEXT:    [[_M_FIRST3_I_I:%.*]] = getelementptr inbounds %"struct.std::_Deque_iterator.4.157.174.208.259.276.344.731", %"struct.std::_Deque_iterator.4.157.174.208.259.276.344.731"* [[__FIRST]], i64 0, i32 1
+; CHECK-NEXT:    [[_M_CUR2_I_I81:%.*]] = getelementptr inbounds %"struct.std::_Deque_iterator.4.157.174.208.259.276.344.731", %"struct.std::_Deque_iterator.4.157.174.208.259.276.344.731"* [[__LAST:%.*]], i64 0, i32 0
+; CHECK-NEXT:    [[TMP1:%.*]] = load double*, double** [[_M_CUR2_I_I81]], align 8
+; CHECK-NEXT:    [[_M_FIRST3_I_I83:%.*]] = getelementptr inbounds %"struct.std::_Deque_iterator.4.157.174.208.259.276.344.731", %"struct.std::_Deque_iterator.4.157.174.208.259.276.344.731"* [[__LAST]], i64 0, i32 1
+; CHECK-NEXT:    [[TMP2:%.*]] = load double*, double** [[_M_FIRST3_I_I83]], align 8
+; CHECK-NEXT:    br i1 undef, label [[_ZST13ADJACENT_FINDIST15_DEQUE_ITERATORIDRDPDEET_S4_S4__EXIT:%.*]], label [[WHILE_COND_I_PREHEADER:%.*]]
+; CHECK:       while.cond.i.preheader:
+; CHECK-NEXT:    br label [[WHILE_COND_I:%.*]]
+; CHECK:       while.cond.i:
+; CHECK-NEXT:    br i1 undef, label [[_ZST13ADJACENT_FINDIST15_DEQUE_ITERATORIDRDPDEET_S4_S4__EXIT]], label [[WHILE_BODY_I:%.*]]
+; CHECK:       while.body.i:
+; CHECK-NEXT:    br i1 undef, label [[_ZST13ADJACENT_FINDIST15_DEQUE_ITERATORIDRDPDEET_S4_S4__EXIT]], label [[WHILE_COND_I]]
+; CHECK:       _ZSt13adjacent_findISt15_Deque_iteratorIdRdPdEET_S4_S4_.exit:
+; CHECK-NEXT:    [[TMP3:%.*]] = phi double* [ [[TMP2]], [[ENTRY:%.*]] ], [ [[TMP2]], [[WHILE_COND_I]] ], [ undef, [[WHILE_BODY_I]] ]
+; CHECK-NEXT:    [[TMP4:%.*]] = phi double* [ [[TMP0]], [[ENTRY]] ], [ [[TMP1]], [[WHILE_COND_I]] ], [ undef, [[WHILE_BODY_I]] ]
+; CHECK-NEXT:    store double* [[TMP4]], double** [[_M_CUR2_I_I]], align 8
+; CHECK-NEXT:    store double* [[TMP3]], double** [[_M_FIRST3_I_I]], align 8
+; CHECK-NEXT:    br i1 undef, label [[IF_THEN_I55:%.*]], label [[WHILE_COND:%.*]]
+; CHECK:       if.then.i55:
+; CHECK-NEXT:    br label [[WHILE_COND]]
+; CHECK:       while.cond:
+; CHECK-NEXT:    br label [[WHILE_COND]]
+;
  entry:
    %_M_cur2.i.i = getelementptr inbounds %"struct.std::_Deque_iterator.4.157.174.208.259.276.344.731", %"struct.std::_Deque_iterator.4.157.174.208.259.276.344.731"* %__first, i64 0, i32 0
    %0 = load double*, double** %_M_cur2.i.i, align 8
diff --git a/test/Transforms/SLPVectorizer/X86/crash_flop7.ll b/test/Transforms/SLPVectorizer/X86/crash_flop7.ll

index e11be488f795b23a98b578dddaa6eb9c5682d819..d149c27b7c7ee3342db619d00e57cf944320de4e 100644 (file)
--- a/test/Transforms/SLPVectorizer/X86/crash_flop7.ll
+++ b/test/Transforms/SLPVectorizer/X86/crash_flop7.ll
@@ -1,10 +1,41 @@
-; RUN: opt < %s -basicaa -slp-vectorizer -dce -S -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -basicaa -slp-vectorizer -dce -S -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7 | FileCheck %s
  
  target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
  target triple = "x86_64-apple-macosx10.8.0"
  
  ; Function Attrs: nounwind ssp uwtable
  define void @main() #0 {
+; CHECK-LABEL: @main(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br i1 undef, label [[WHILE_BODY:%.*]], label [[WHILE_END:%.*]]
+; CHECK:       while.body:
+; CHECK-NEXT:    unreachable
+; CHECK:       while.end:
+; CHECK-NEXT:    br i1 undef, label [[FOR_END80:%.*]], label [[FOR_BODY75_LR_PH:%.*]]
+; CHECK:       for.body75.lr.ph:
+; CHECK-NEXT:    br label [[FOR_BODY75:%.*]]
+; CHECK:       for.body75:
+; CHECK-NEXT:    br label [[FOR_BODY75]]
+; CHECK:       for.end80:
+; CHECK-NEXT:    br i1 undef, label [[FOR_END300:%.*]], label [[FOR_BODY267_LR_PH:%.*]]
+; CHECK:       for.body267.lr.ph:
+; CHECK-NEXT:    br label [[FOR_BODY267:%.*]]
+; CHECK:       for.body267:
+; CHECK-NEXT:    [[S_71010:%.*]] = phi double [ 0.000000e+00, [[FOR_BODY267_LR_PH]] ], [ [[ADD297:%.*]], [[FOR_BODY267]] ]
+; CHECK-NEXT:    [[MUL269:%.*]] = fmul double undef, undef
+; CHECK-NEXT:    [[MUL270:%.*]] = fmul double [[MUL269]], [[MUL269]]
+; CHECK-NEXT:    [[ADD282:%.*]] = fadd double undef, undef
+; CHECK-NEXT:    [[MUL283:%.*]] = fmul double [[MUL269]], [[ADD282]]
+; CHECK-NEXT:    [[ADD293:%.*]] = fadd double undef, undef
+; CHECK-NEXT:    [[MUL294:%.*]] = fmul double [[MUL270]], [[ADD293]]
+; CHECK-NEXT:    [[ADD295:%.*]] = fadd double undef, [[MUL294]]
+; CHECK-NEXT:    [[DIV296:%.*]] = fdiv double [[MUL283]], [[ADD295]]
+; CHECK-NEXT:    [[ADD297]] = fadd double [[S_71010]], [[DIV296]]
+; CHECK-NEXT:    br i1 undef, label [[FOR_BODY267]], label [[FOR_END300]]
+; CHECK:       for.end300:
+; CHECK-NEXT:    unreachable
+;
  entry:
    br i1 undef, label %while.body, label %while.end
  
diff --git a/test/Transforms/SLPVectorizer/X86/crash_gep.ll b/test/Transforms/SLPVectorizer/X86/crash_gep.ll

index bd1e8f7cc19d010a4c7a6c496175707e5ed7d302..eca21e7d3a1a01c5b931c8f2eb6f61bb058ca108 100644 (file)
--- a/test/Transforms/SLPVectorizer/X86/crash_gep.ll
+++ b/test/Transforms/SLPVectorizer/X86/crash_gep.ll
@@ -1,4 +1,5 @@
-; RUN: opt < %s -basicaa -slp-vectorizer -dce -S -mtriple=x86_64-unknown-linux-gnu
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -basicaa -slp-vectorizer -dce -S -mtriple=x86_64-unknown-linux-gnu | FileCheck %s
  
  target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
  target triple = "x86_64-unknown-linux-gnu"
@@ -7,6 +8,17 @@ target triple = "x86_64-unknown-linux-gnu"
  
  ; Function Attrs: nounwind uwtable
  define i32 @fn1() {
+; CHECK-LABEL: @fn1(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = load i64*, i64** @a, align 8
+; CHECK-NEXT:    [[ADD_PTR:%.*]] = getelementptr inbounds i64, i64* [[TMP0]], i64 1
+; CHECK-NEXT:    [[TMP1:%.*]] = ptrtoint i64* [[ADD_PTR]] to i64
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i64, i64* [[TMP0]], i64 2
+; CHECK-NEXT:    store i64 [[TMP1]], i64* [[ARRAYIDX]], align 8
+; CHECK-NEXT:    [[TMP2:%.*]] = ptrtoint i64* [[ARRAYIDX]] to i64
+; CHECK-NEXT:    store i64 [[TMP2]], i64* [[ADD_PTR]], align 8
+; CHECK-NEXT:    ret i32 undef
+;
  entry:
    %0 = load i64*, i64** @a, align 8
    %add.ptr = getelementptr inbounds i64, i64* %0, i64 1
diff --git a/test/Transforms/SLPVectorizer/X86/crash_lencod.ll b/test/Transforms/SLPVectorizer/X86/crash_lencod.ll

index 70b13fd75f14912304c820b17fb0be45bcd1545f..eb1cb32a6387177546c095068fbb223441edfea7 100644 (file)
--- a/test/Transforms/SLPVectorizer/X86/crash_lencod.ll
+++ b/test/Transforms/SLPVectorizer/X86/crash_lencod.ll
@@ -1,10 +1,45 @@
-; RUN: opt < %s -basicaa -slp-vectorizer -dce -S -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -basicaa -slp-vectorizer -dce -S -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7 | FileCheck %s
  
  target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
  target triple = "x86_64-apple-macosx10.8.0"
  
  ; Function Attrs: nounwind ssp uwtable
  define void @RCModelEstimator() {
+; CHECK-LABEL: @RCModelEstimator(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br i1 undef, label [[FOR_BODY_LR_PH:%.*]], label [[FOR_END_THREAD:%.*]]
+; CHECK:       for.end.thread:
+; CHECK-NEXT:    unreachable
+; CHECK:       for.body.lr.ph:
+; CHECK-NEXT:    br i1 undef, label [[FOR_END:%.*]], label [[FOR_BODY:%.*]]
+; CHECK:       for.body:
+; CHECK-NEXT:    br i1 undef, label [[FOR_END]], label [[FOR_BODY]]
+; CHECK:       for.end:
+; CHECK-NEXT:    br i1 undef, label [[FOR_BODY3:%.*]], label [[IF_END103:%.*]]
+; CHECK:       for.cond14.preheader:
+; CHECK-NEXT:    br i1 undef, label [[FOR_BODY16_LR_PH:%.*]], label [[IF_END103]]
+; CHECK:       for.body16.lr.ph:
+; CHECK-NEXT:    br label [[FOR_BODY16:%.*]]
+; CHECK:       for.body3:
+; CHECK-NEXT:    br i1 undef, label [[IF_THEN7:%.*]], label [[FOR_INC11:%.*]]
+; CHECK:       if.then7:
+; CHECK-NEXT:    br label [[FOR_INC11]]
+; CHECK:       for.inc11:
+; CHECK-NEXT:    br i1 false, label [[FOR_COND14_PREHEADER:%.*]], label [[FOR_BODY3]]
+; CHECK:       for.body16:
+; CHECK-NEXT:    br i1 undef, label [[FOR_END39:%.*]], label [[FOR_BODY16]]
+; CHECK:       for.end39:
+; CHECK-NEXT:    br i1 undef, label [[IF_END103]], label [[FOR_COND45_PREHEADER:%.*]]
+; CHECK:       for.cond45.preheader:
+; CHECK-NEXT:    br i1 undef, label [[IF_THEN88:%.*]], label [[IF_ELSE:%.*]]
+; CHECK:       if.then88:
+; CHECK-NEXT:    br label [[IF_END103]]
+; CHECK:       if.else:
+; CHECK-NEXT:    br label [[IF_END103]]
+; CHECK:       if.end103:
+; CHECK-NEXT:    ret void
+;
  entry:
    br i1 undef, label %for.body.lr.ph, label %for.end.thread
  
@@ -66,6 +101,17 @@ if.end103:                                        ; preds = %if.else, %if.then88
  
  
  define void @intrapred_luma() {
+; CHECK-LABEL: @intrapred_luma(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CONV153:%.*]] = trunc i32 undef to i16
+; CHECK-NEXT:    [[ARRAYIDX154:%.*]] = getelementptr inbounds [13 x i16], [13 x i16]* undef, i64 0, i64 12
+; CHECK-NEXT:    store i16 [[CONV153]], i16* [[ARRAYIDX154]], align 8
+; CHECK-NEXT:    [[ARRAYIDX155:%.*]] = getelementptr inbounds [13 x i16], [13 x i16]* undef, i64 0, i64 11
+; CHECK-NEXT:    store i16 [[CONV153]], i16* [[ARRAYIDX155]], align 2
+; CHECK-NEXT:    [[ARRAYIDX156:%.*]] = getelementptr inbounds [13 x i16], [13 x i16]* undef, i64 0, i64 10
+; CHECK-NEXT:    store i16 [[CONV153]], i16* [[ARRAYIDX156]], align 4
+; CHECK-NEXT:    ret void
+;
  entry:
    %conv153 = trunc i32 undef to i16
    %arrayidx154 = getelementptr inbounds [13 x i16], [13 x i16]* undef, i64 0, i64 12
@@ -78,6 +124,18 @@ entry:
  }
  
  define fastcc void @dct36(double* %inbuf) {
+; CHECK-LABEL: @dct36(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[ARRAYIDX41:%.*]] = getelementptr inbounds double, double* [[INBUF:%.*]], i64 2
+; CHECK-NEXT:    [[ARRAYIDX44:%.*]] = getelementptr inbounds double, double* [[INBUF]], i64 1
+; CHECK-NEXT:    [[TMP0:%.*]] = load double, double* [[ARRAYIDX44]], align 8
+; CHECK-NEXT:    [[ADD46:%.*]] = fadd double [[TMP0]], undef
+; CHECK-NEXT:    store double [[ADD46]], double* [[ARRAYIDX41]], align 8
+; CHECK-NEXT:    [[TMP1:%.*]] = load double, double* [[INBUF]], align 8
+; CHECK-NEXT:    [[ADD49:%.*]] = fadd double [[TMP1]], [[TMP0]]
+; CHECK-NEXT:    store double [[ADD49]], double* [[ARRAYIDX44]], align 8
+; CHECK-NEXT:    ret void
+;
  entry:
    %arrayidx41 = getelementptr inbounds double, double* %inbuf, i64 2
    %arrayidx44 = getelementptr inbounds double, double* %inbuf, i64 1
diff --git a/test/Transforms/SLPVectorizer/X86/crash_mandeltext.ll b/test/Transforms/SLPVectorizer/X86/crash_mandeltext.ll

index f82343fb433f503f051937ae27c3a26fdee5cd4d..f12de2ad199c44f02c9fbf3305a3fbb3ff3b686d 100644 (file)
--- a/test/Transforms/SLPVectorizer/X86/crash_mandeltext.ll
+++ b/test/Transforms/SLPVectorizer/X86/crash_mandeltext.ll
@@ -1,9 +1,46 @@
-; RUN: opt < %s -basicaa -slp-vectorizer -dce -S -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -basicaa -slp-vectorizer -dce -S -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7 | FileCheck %s
  
  target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
  target triple = "x86_64-apple-macosx10.8.0"
  
  define void @main() {
+; CHECK-LABEL: @main(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
+; CHECK:       for.body:
+; CHECK-NEXT:    br label [[FOR_COND4_PREHEADER:%.*]]
+; CHECK:       for.cond4.preheader:
+; CHECK-NEXT:    br label [[FOR_BODY6:%.*]]
+; CHECK:       for.body6:
+; CHECK-NEXT:    br label [[FOR_BODY12:%.*]]
+; CHECK:       for.body12:
+; CHECK-NEXT:    [[FZIMG_069:%.*]] = phi double [ undef, [[FOR_BODY6]] ], [ [[ADD19:%.*]], [[IF_END:%.*]] ]
+; CHECK-NEXT:    [[FZREAL_068:%.*]] = phi double [ undef, [[FOR_BODY6]] ], [ [[ADD20:%.*]], [[IF_END]] ]
+; CHECK-NEXT:    [[MUL13:%.*]] = fmul double [[FZREAL_068]], [[FZREAL_068]]
+; CHECK-NEXT:    [[MUL14:%.*]] = fmul double [[FZIMG_069]], [[FZIMG_069]]
+; CHECK-NEXT:    [[ADD15:%.*]] = fadd double [[MUL13]], [[MUL14]]
+; CHECK-NEXT:    [[CMP16:%.*]] = fcmp ogt double [[ADD15]], 4.000000e+00
+; CHECK-NEXT:    br i1 [[CMP16]], label [[FOR_INC21:%.*]], label [[IF_END]]
+; CHECK:       if.end:
+; CHECK-NEXT:    [[MUL18:%.*]] = fmul double undef, [[FZIMG_069]]
+; CHECK-NEXT:    [[ADD19]] = fadd double undef, [[MUL18]]
+; CHECK-NEXT:    [[SUB:%.*]] = fsub double [[MUL13]], [[MUL14]]
+; CHECK-NEXT:    [[ADD20]] = fadd double undef, [[SUB]]
+; CHECK-NEXT:    br i1 undef, label [[FOR_BODY12]], label [[FOR_INC21]]
+; CHECK:       for.inc21:
+; CHECK-NEXT:    br i1 undef, label [[FOR_END23:%.*]], label [[FOR_BODY6]]
+; CHECK:       for.end23:
+; CHECK-NEXT:    br i1 undef, label [[IF_THEN25:%.*]], label [[IF_THEN26:%.*]]
+; CHECK:       if.then25:
+; CHECK-NEXT:    br i1 undef, label [[FOR_END44:%.*]], label [[FOR_COND4_PREHEADER]]
+; CHECK:       if.then26:
+; CHECK-NEXT:    unreachable
+; CHECK:       for.end44:
+; CHECK-NEXT:    br i1 undef, label [[FOR_END48:%.*]], label [[FOR_BODY]]
+; CHECK:       for.end48:
+; CHECK-NEXT:    ret void
+;
  entry:
    br label %for.body
  
@@ -54,6 +91,26 @@ for.end48:                                        ; preds = %for.end44
  %struct.hoge = type { double, double, double}
  
  define void @zot(%struct.hoge* %arg) {
+; CHECK-LABEL: @zot(
+; CHECK-NEXT:  bb:
+; CHECK-NEXT:    [[TMP:%.*]] = load double, double* undef, align 8
+; CHECK-NEXT:    [[TMP2:%.*]] = load double, double* undef, align 8
+; CHECK-NEXT:    [[TMP0:%.*]] = insertelement <2 x double> undef, double [[TMP2]], i32 0
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x double> [[TMP0]], double [[TMP]], i32 1
+; CHECK-NEXT:    [[TMP2:%.*]] = fsub <2 x double> [[TMP1]], undef
+; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_HOGE:%.*]], %struct.hoge* [[ARG:%.*]], i64 0, i32 1
+; CHECK-NEXT:    [[TMP3:%.*]] = fmul <2 x double> undef, [[TMP2]]
+; CHECK-NEXT:    [[TMP4:%.*]] = fsub <2 x double> [[TMP3]], undef
+; CHECK-NEXT:    [[TMP5:%.*]] = bitcast double* [[TMP7]] to <2 x double>*
+; CHECK-NEXT:    store <2 x double> [[TMP4]], <2 x double>* [[TMP5]], align 8
+; CHECK-NEXT:    br i1 undef, label [[BB11:%.*]], label [[BB12:%.*]]
+; CHECK:       bb11:
+; CHECK-NEXT:    br label [[BB14:%.*]]
+; CHECK:       bb12:
+; CHECK-NEXT:    br label [[BB14]]
+; CHECK:       bb14:
+; CHECK-NEXT:    ret void
+;
  bb:
    %tmp = load double, double* undef, align 8
    %tmp1 = fsub double %tmp, undef
@@ -85,6 +142,22 @@ bb14:                                             ; preds = %bb12, %bb11
  %struct.rc4_state.0.24 = type { i32, i32, [256 x i32] }
  
  define void @rc4_crypt(%struct.rc4_state.0.24* nocapture %s) {
+; CHECK-LABEL: @rc4_crypt(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[X1:%.*]] = getelementptr inbounds [[STRUCT_RC4_STATE_0_24:%.*]], %struct.rc4_state.0.24* [[S:%.*]], i64 0, i32 0
+; CHECK-NEXT:    [[Y2:%.*]] = getelementptr inbounds [[STRUCT_RC4_STATE_0_24]], %struct.rc4_state.0.24* [[S]], i64 0, i32 1
+; CHECK-NEXT:    br i1 undef, label [[FOR_BODY:%.*]], label [[FOR_END:%.*]]
+; CHECK:       for.body:
+; CHECK-NEXT:    [[CONV4:%.*]] = and i32 undef, 255
+; CHECK-NEXT:    [[CONV7:%.*]] = and i32 undef, 255
+; CHECK-NEXT:    br i1 undef, label [[FOR_END]], label [[FOR_BODY]]
+; CHECK:       for.end:
+; CHECK-NEXT:    [[X_0_LCSSA:%.*]] = phi i32 [ undef, [[ENTRY:%.*]] ], [ [[CONV4]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[Y_0_LCSSA:%.*]] = phi i32 [ undef, [[ENTRY]] ], [ [[CONV7]], [[FOR_BODY]] ]
+; CHECK-NEXT:    store i32 [[X_0_LCSSA]], i32* [[X1]], align 4
+; CHECK-NEXT:    store i32 [[Y_0_LCSSA]], i32* [[Y2]], align 4
+; CHECK-NEXT:    ret void
+;
  entry:
    %x1 = getelementptr inbounds %struct.rc4_state.0.24, %struct.rc4_state.0.24* %s, i64 0, i32 0
    %y2 = getelementptr inbounds %struct.rc4_state.0.24, %struct.rc4_state.0.24* %s, i64 0, i32 1
diff --git a/test/Transforms/SLPVectorizer/X86/crash_netbsd_decompress.ll b/test/Transforms/SLPVectorizer/X86/crash_netbsd_decompress.ll

index e1df98d83bdbded0a63182422addefb4d727cd06..a52ec6ba9de005bf804b1bea8b59ee9237b3bac7 100644 (file)
--- a/test/Transforms/SLPVectorizer/X86/crash_netbsd_decompress.ll
+++ b/test/Transforms/SLPVectorizer/X86/crash_netbsd_decompress.ll
@@ -1,4 +1,5 @@
-; RUN: opt < %s -basicaa -slp-vectorizer -dce -S -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -basicaa -slp-vectorizer -dce -S -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7 | FileCheck %s
  
  target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
  target triple = "x86_64-apple-macosx10.8.0"
@@ -12,6 +13,30 @@ target triple = "x86_64-apple-macosx10.8.0"
  @e = common global i32 0, align 4
  
  define i32 @fn1() {
+; CHECK-LABEL: @fn1(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* getelementptr inbounds (%struct.DState, %struct.DState* @b, i32 0, i32 0), align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = load i32, i32* getelementptr inbounds (%struct.DState, %struct.DState* @b, i32 0, i32 1), align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = load i32, i32* @d, align 4
+; CHECK-NEXT:    [[COND:%.*]] = icmp eq i32 [[TMP2]], 0
+; CHECK-NEXT:    br i1 [[COND]], label [[SW_BB:%.*]], label [[SAVE_STATE_AND_RETURN:%.*]]
+; CHECK:       sw.bb:
+; CHECK-NEXT:    [[TMP3:%.*]] = load i32, i32* @c, align 4
+; CHECK-NEXT:    [[AND:%.*]] = and i32 [[TMP3]], 7
+; CHECK-NEXT:    store i32 [[AND]], i32* @a, align 4
+; CHECK-NEXT:    switch i32 [[AND]], label [[IF_END:%.*]] [
+; CHECK-NEXT:    i32 7, label [[SAVE_STATE_AND_RETURN]]
+; CHECK-NEXT:    i32 0, label [[SAVE_STATE_AND_RETURN]]
+; CHECK-NEXT:    ]
+; CHECK:       if.end:
+; CHECK-NEXT:    br label [[SAVE_STATE_AND_RETURN]]
+; CHECK:       save_state_and_return:
+; CHECK-NEXT:    [[T_0:%.*]] = phi i32 [ 0, [[IF_END]] ], [ [[TMP0]], [[ENTRY:%.*]] ], [ [[TMP0]], [[SW_BB]] ], [ [[TMP0]], [[SW_BB]] ]
+; CHECK-NEXT:    [[F_0:%.*]] = phi i32 [ 0, [[IF_END]] ], [ [[TMP1]], [[ENTRY]] ], [ 0, [[SW_BB]] ], [ 0, [[SW_BB]] ]
+; CHECK-NEXT:    store i32 [[T_0]], i32* getelementptr inbounds (%struct.DState, %struct.DState* @b, i32 0, i32 0), align 4
+; CHECK-NEXT:    store i32 [[F_0]], i32* getelementptr inbounds (%struct.DState, %struct.DState* @b, i32 0, i32 1), align 4
+; CHECK-NEXT:    ret i32 undef
+;
  entry:
    %0 = load i32, i32* getelementptr inbounds (%struct.DState, %struct.DState* @b, i32 0, i32 0), align 4
    %1 = load i32, i32* getelementptr inbounds (%struct.DState, %struct.DState* @b, i32 0, i32 1), align 4
@@ -24,8 +49,8 @@ sw.bb:                                            ; preds = %entry
    %and = and i32 %3, 7
    store i32 %and, i32* @a, align 4
    switch i32 %and, label %if.end [
-    i32 7, label %save_state_and_return
-    i32 0, label %save_state_and_return
+  i32 7, label %save_state_and_return
+  i32 0, label %save_state_and_return
    ]
  
  if.end:                                           ; preds = %sw.bb
diff --git a/test/Transforms/SLPVectorizer/X86/crash_scheduling.ll b/test/Transforms/SLPVectorizer/X86/crash_scheduling.ll

index 916772ca8ccd55422c4eac3563ac90ca86c571af..9108e84a3a64da6cd38971f0aa577a910e9c8467 100644 (file)
--- a/test/Transforms/SLPVectorizer/X86/crash_scheduling.ll
+++ b/test/Transforms/SLPVectorizer/X86/crash_scheduling.ll
@@ -1,9 +1,41 @@
-; RUN: opt < %s -basicaa -disable-verify -slp-vectorizer -S -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -basicaa -disable-verify -slp-vectorizer -S -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7 | FileCheck %s
  
  target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
  target triple = "x86_64-apple-darwin13.3.0"
  
  define void @_foo(double %p1, double %p2, double %p3) #0 {
+; CHECK-LABEL: @_foo(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TAB1:%.*]] = alloca [256 x i32], align 16
+; CHECK-NEXT:    [[TAB2:%.*]] = alloca [256 x i32], align 16
+; CHECK-NEXT:    br label [[BB1:%.*]]
+; CHECK:       bb1:
+; CHECK-NEXT:    [[MUL19:%.*]] = fmul double [[P1:%.*]], 1.638400e+04
+; CHECK-NEXT:    [[MUL20:%.*]] = fmul double [[P3:%.*]], 1.638400e+04
+; CHECK-NEXT:    [[ADD:%.*]] = fadd double [[MUL20]], 8.192000e+03
+; CHECK-NEXT:    [[MUL21:%.*]] = fmul double [[P2:%.*]], 1.638400e+04
+; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
+; CHECK:       for.body:
+; CHECK-NEXT:    [[INDVARS_IV266:%.*]] = phi i64 [ 0, [[BB1]] ], [ [[INDVARS_IV_NEXT267:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[T_0259:%.*]] = phi double [ 0.000000e+00, [[BB1]] ], [ [[ADD27:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[P3_ADDR_0258:%.*]] = phi double [ [[ADD]], [[BB1]] ], [ [[ADD28:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[VECINIT_I_I237:%.*]] = insertelement <2 x double> undef, double [[T_0259]], i32 0
+; CHECK-NEXT:    [[X13:%.*]] = tail call i32 @_xfn(<2 x double> [[VECINIT_I_I237]])
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [256 x i32], [256 x i32]* [[TAB1]], i64 0, i64 [[INDVARS_IV266]]
+; CHECK-NEXT:    store i32 [[X13]], i32* [[ARRAYIDX]], align 4, !tbaa !0
+; CHECK-NEXT:    [[VECINIT_I_I:%.*]] = insertelement <2 x double> undef, double [[P3_ADDR_0258]], i32 0
+; CHECK-NEXT:    [[X14:%.*]] = tail call i32 @_xfn(<2 x double> [[VECINIT_I_I]])
+; CHECK-NEXT:    [[ARRAYIDX26:%.*]] = getelementptr inbounds [256 x i32], [256 x i32]* [[TAB2]], i64 0, i64 [[INDVARS_IV266]]
+; CHECK-NEXT:    store i32 [[X14]], i32* [[ARRAYIDX26]], align 4, !tbaa !0
+; CHECK-NEXT:    [[ADD27]] = fadd double [[MUL19]], [[T_0259]]
+; CHECK-NEXT:    [[ADD28]] = fadd double [[MUL21]], [[P3_ADDR_0258]]
+; CHECK-NEXT:    [[INDVARS_IV_NEXT267]] = add nuw nsw i64 [[INDVARS_IV266]], 1
+; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT267]], 256
+; CHECK-NEXT:    br i1 [[EXITCOND]], label [[RETURN:%.*]], label [[FOR_BODY]]
+; CHECK:       return:
+; CHECK-NEXT:    ret void
+;
  entry:
    %tab1 = alloca [256 x i32], align 16
    %tab2 = alloca [256 x i32], align 16
diff --git a/test/Transforms/SLPVectorizer/X86/crash_sim4b1.ll b/test/Transforms/SLPVectorizer/X86/crash_sim4b1.ll

index 5a576c2a7222ebcbe3226855d4383fc1dbc4d41c..9a07cfc8431b3df65be775a7a723417ca81fc964 100644 (file)
--- a/test/Transforms/SLPVectorizer/X86/crash_sim4b1.ll
+++ b/test/Transforms/SLPVectorizer/X86/crash_sim4b1.ll
@@ -1,4 +1,5 @@
-; RUN: opt < %s -basicaa -slp-vectorizer -dce -S -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -basicaa -slp-vectorizer -dce -S -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7 | FileCheck %s
  
  target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
  target triple = "x86_64-apple-macosx10.8.0"
@@ -6,6 +7,83 @@ target triple = "x86_64-apple-macosx10.8.0"
  %struct._exon_t.12.103.220.363.480.649.740.857.1039.1065.1078.1091.1117.1130.1156.1169.1195.1221.1234.1286.1299.1312.1338.1429.1455.1468.1494.1520.1884.1897.1975.2066.2105.2170.2171 = type { i32, i32, i32, i32, i32, i32, [8 x i8] }
  
  define void @SIM4() {
+; CHECK-LABEL: @SIM4(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br i1 undef, label [[RETURN:%.*]], label [[LOR_LHS_FALSE:%.*]]
+; CHECK:       lor.lhs.false:
+; CHECK-NEXT:    br i1 undef, label [[RETURN]], label [[IF_END:%.*]]
+; CHECK:       if.end:
+; CHECK-NEXT:    br i1 undef, label [[FOR_END605:%.*]], label [[FOR_BODY_LR_PH:%.*]]
+; CHECK:       for.body.lr.ph:
+; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
+; CHECK:       for.body:
+; CHECK-NEXT:    br i1 undef, label [[FOR_INC603:%.*]], label [[IF_END12:%.*]]
+; CHECK:       if.end12:
+; CHECK-NEXT:    br i1 undef, label [[LAND_LHS_TRUE:%.*]], label [[LAND_LHS_TRUE167:%.*]]
+; CHECK:       land.lhs.true:
+; CHECK-NEXT:    br i1 undef, label [[IF_THEN17:%.*]], label [[LAND_LHS_TRUE167]]
+; CHECK:       if.then17:
+; CHECK-NEXT:    br i1 undef, label [[IF_END98:%.*]], label [[LAND_RHS_LR_PH:%.*]]
+; CHECK:       land.rhs.lr.ph:
+; CHECK-NEXT:    unreachable
+; CHECK:       if.end98:
+; CHECK-NEXT:    [[FROM299:%.*]] = getelementptr inbounds [[STRUCT__EXON_T_12_103_220_363_480_649_740_857_1039_1065_1078_1091_1117_1130_1156_1169_1195_1221_1234_1286_1299_1312_1338_1429_1455_1468_1494_1520_1884_1897_1975_2066_2105_2170_2171:%.*]], %struct._exon_t.12.103.220.363.480.649.740.857.1039.1065.1078.1091.1117.1130.1156.1169.1195.1221.1234.1286.1299.1312.1338.1429.1455.1468.1494.1520.1884.1897.1975.2066.2105.2170.2171* undef, i64 0, i32 1
+; CHECK-NEXT:    br i1 undef, label [[LAND_LHS_TRUE167]], label [[IF_THEN103:%.*]]
+; CHECK:       if.then103:
+; CHECK-NEXT:    [[DOTSUB100:%.*]] = select i1 undef, i32 250, i32 undef
+; CHECK-NEXT:    [[MUL114:%.*]] = shl nsw i32 [[DOTSUB100]], 2
+; CHECK-NEXT:    [[FROM1115:%.*]] = getelementptr inbounds [[STRUCT__EXON_T_12_103_220_363_480_649_740_857_1039_1065_1078_1091_1117_1130_1156_1169_1195_1221_1234_1286_1299_1312_1338_1429_1455_1468_1494_1520_1884_1897_1975_2066_2105_2170_2171]], %struct._exon_t.12.103.220.363.480.649.740.857.1039.1065.1078.1091.1117.1130.1156.1169.1195.1221.1234.1286.1299.1312.1338.1429.1455.1468.1494.1520.1884.1897.1975.2066.2105.2170.2171* undef, i64 0, i32 0
+; CHECK-NEXT:    [[COND125:%.*]] = select i1 undef, i32 undef, i32 [[MUL114]]
+; CHECK-NEXT:    br label [[FOR_COND_I:%.*]]
+; CHECK:       for.cond.i:
+; CHECK-NEXT:    [[ROW_0_I:%.*]] = phi i32 [ undef, [[LAND_RHS_I874:%.*]] ], [ [[DOTSUB100]], [[IF_THEN103]] ]
+; CHECK-NEXT:    [[COL_0_I:%.*]] = phi i32 [ undef, [[LAND_RHS_I874]] ], [ [[COND125]], [[IF_THEN103]] ]
+; CHECK-NEXT:    br i1 undef, label [[LAND_RHS_I874]], label [[FOR_END_I:%.*]]
+; CHECK:       land.rhs.i874:
+; CHECK-NEXT:    br i1 undef, label [[FOR_COND_I]], label [[FOR_END_I]]
+; CHECK:       for.end.i:
+; CHECK-NEXT:    br i1 undef, label [[IF_THEN_I:%.*]], label [[IF_END_I:%.*]]
+; CHECK:       if.then.i:
+; CHECK-NEXT:    [[ADD14_I:%.*]] = add nsw i32 [[ROW_0_I]], undef
+; CHECK-NEXT:    [[ADD15_I:%.*]] = add nsw i32 [[COL_0_I]], undef
+; CHECK-NEXT:    br label [[EXTEND_BW_EXIT:%.*]]
+; CHECK:       if.end.i:
+; CHECK-NEXT:    [[ADD16_I:%.*]] = add i32 [[COND125]], [[DOTSUB100]]
+; CHECK-NEXT:    [[CMP26514_I:%.*]] = icmp slt i32 [[ADD16_I]], 0
+; CHECK-NEXT:    br i1 [[CMP26514_I]], label [[FOR_END33_I:%.*]], label [[FOR_BODY28_LR_PH_I:%.*]]
+; CHECK:       for.body28.lr.ph.i:
+; CHECK-NEXT:    br label [[FOR_END33_I]]
+; CHECK:       for.end33.i:
+; CHECK-NEXT:    br i1 undef, label [[FOR_END58_I:%.*]], label [[FOR_BODY52_LR_PH_I:%.*]]
+; CHECK:       for.body52.lr.ph.i:
+; CHECK-NEXT:    br label [[FOR_END58_I]]
+; CHECK:       for.end58.i:
+; CHECK-NEXT:    br label [[WHILE_COND260_I:%.*]]
+; CHECK:       while.cond260.i:
+; CHECK-NEXT:    br i1 undef, label [[LAND_RHS263_I:%.*]], label [[WHILE_END275_I:%.*]]
+; CHECK:       land.rhs263.i:
+; CHECK-NEXT:    br i1 undef, label [[WHILE_COND260_I]], label [[WHILE_END275_I]]
+; CHECK:       while.end275.i:
+; CHECK-NEXT:    br label [[EXTEND_BW_EXIT]]
+; CHECK:       extend_bw.exit:
+; CHECK-NEXT:    [[ADD14_I1262:%.*]] = phi i32 [ [[ADD14_I]], [[IF_THEN_I]] ], [ undef, [[WHILE_END275_I]] ]
+; CHECK-NEXT:    [[ADD15_I1261:%.*]] = phi i32 [ [[ADD15_I]], [[IF_THEN_I]] ], [ undef, [[WHILE_END275_I]] ]
+; CHECK-NEXT:    br i1 false, label [[IF_THEN157:%.*]], label [[LAND_LHS_TRUE167]]
+; CHECK:       if.then157:
+; CHECK-NEXT:    [[ADD158:%.*]] = add nsw i32 [[ADD14_I1262]], 1
+; CHECK-NEXT:    store i32 [[ADD158]], i32* [[FROM299]], align 4
+; CHECK-NEXT:    [[ADD160:%.*]] = add nsw i32 [[ADD15_I1261]], 1
+; CHECK-NEXT:    store i32 [[ADD160]], i32* [[FROM1115]], align 4
+; CHECK-NEXT:    br label [[LAND_LHS_TRUE167]]
+; CHECK:       land.lhs.true167:
+; CHECK-NEXT:    unreachable
+; CHECK:       for.inc603:
+; CHECK-NEXT:    br i1 undef, label [[FOR_BODY]], label [[FOR_END605]]
+; CHECK:       for.end605:
+; CHECK-NEXT:    unreachable
+; CHECK:       return:
+; CHECK-NEXT:    ret void
+;
  entry:
    br i1 undef, label %return, label %lor.lhs.false
  
diff --git a/test/Transforms/SLPVectorizer/X86/crash_smallpt.ll b/test/Transforms/SLPVectorizer/X86/crash_smallpt.ll

index 273584c11a9c5aa9150f9568b6119da5483c1e6f..e2d36376f5ea78526c099421f5793cb7644ac475 100644 (file)
--- a/test/Transforms/SLPVectorizer/X86/crash_smallpt.ll
+++ b/test/Transforms/SLPVectorizer/X86/crash_smallpt.ll
@@ -1,4 +1,5 @@
-; RUN: opt < %s -basicaa -slp-vectorizer -dce -S -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -basicaa -slp-vectorizer -dce -S -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7 | FileCheck %s
  
  target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
  target triple = "x86_64-apple-macosx10.8.0"
@@ -8,6 +9,45 @@ target triple = "x86_64-apple-macosx10.8.0"
  
  ; Function Attrs: ssp uwtable
  define void @main() #0 {
+; CHECK-LABEL: @main(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br i1 undef, label [[COND_TRUE:%.*]], label [[COND_END:%.*]]
+; CHECK:       cond.true:
+; CHECK-NEXT:    unreachable
+; CHECK:       cond.end:
+; CHECK-NEXT:    br label [[INVOKE_CONT:%.*]]
+; CHECK:       invoke.cont:
+; CHECK-NEXT:    br i1 undef, label [[ARRAYCTOR_CONT:%.*]], label [[INVOKE_CONT]]
+; CHECK:       arrayctor.cont:
+; CHECK-NEXT:    [[AGG_TMP99208_SROA_0_0_IDX:%.*]] = getelementptr inbounds [[STRUCT_RAY_5_11_53_113_119_137_149_185_329_389_416:%.*]], %struct.Ray.5.11.53.113.119.137.149.185.329.389.416* undef, i64 0, i32 0, i32 0
+; CHECK-NEXT:    [[AGG_TMP101211_SROA_0_0_IDX:%.*]] = getelementptr inbounds [[STRUCT_RAY_5_11_53_113_119_137_149_185_329_389_416]], %struct.Ray.5.11.53.113.119.137.149.185.329.389.416* undef, i64 0, i32 1, i32 0
+; CHECK-NEXT:    br label [[FOR_COND36_PREHEADER:%.*]]
+; CHECK:       for.cond36.preheader:
+; CHECK-NEXT:    br i1 undef, label [[FOR_BODY42_LR_PH_US:%.*]], label [[_Z5CLAMPD_EXIT_1:%.*]]
+; CHECK:       cond.false51.us:
+; CHECK-NEXT:    unreachable
+; CHECK:       cond.true48.us:
+; CHECK-NEXT:    br i1 undef, label [[COND_TRUE63_US:%.*]], label [[COND_FALSE66_US:%.*]]
+; CHECK:       cond.false66.us:
+; CHECK-NEXT:    [[ADD_I276_US:%.*]] = fadd double 0.000000e+00, undef
+; CHECK-NEXT:    [[TMP0:%.*]] = insertelement <2 x double> undef, double [[ADD_I276_US]], i32 0
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x double> [[TMP0]], double 0xBFA5CC2D1960285F, i32 1
+; CHECK-NEXT:    [[TMP2:%.*]] = fadd <2 x double> <double 0.000000e+00, double undef>, [[TMP1]]
+; CHECK-NEXT:    [[TMP3:%.*]] = fmul <2 x double> <double 1.400000e+02, double 1.400000e+02>, [[TMP2]]
+; CHECK-NEXT:    [[TMP4:%.*]] = fadd <2 x double> <double 5.000000e+01, double 5.200000e+01>, [[TMP3]]
+; CHECK-NEXT:    [[TMP5:%.*]] = fmul <2 x double> undef, [[TMP2]]
+; CHECK-NEXT:    [[TMP6:%.*]] = bitcast double* [[AGG_TMP99208_SROA_0_0_IDX]] to <2 x double>*
+; CHECK-NEXT:    store <2 x double> [[TMP4]], <2 x double>* [[TMP6]], align 8
+; CHECK-NEXT:    [[TMP7:%.*]] = bitcast double* [[AGG_TMP101211_SROA_0_0_IDX]] to <2 x double>*
+; CHECK-NEXT:    store <2 x double> [[TMP5]], <2 x double>* [[TMP7]], align 8
+; CHECK-NEXT:    unreachable
+; CHECK:       cond.true63.us:
+; CHECK-NEXT:    unreachable
+; CHECK:       for.body42.lr.ph.us:
+; CHECK-NEXT:    br i1 undef, label [[COND_TRUE48_US:%.*]], label [[COND_FALSE51_US:%.*]]
+; CHECK:       _Z5clampd.exit.1:
+; CHECK-NEXT:    br label [[FOR_COND36_PREHEADER]]
+;
  entry:
    br i1 undef, label %cond.true, label %cond.end
  
@@ -67,6 +107,27 @@ _Z5clampd.exit.1:                                 ; preds = %for.cond36.preheade
  %struct.Vec.0.6.48.90.132.186.192.198.234.252.258.264.270.276.282.288.378.432.438.450.456.594.600 = type { double, double, double }
  
  define void @_Z8radianceRK3RayiPt() #0 {
+; CHECK-LABEL: @_Z8radianceRK3RayiPt(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br i1 undef, label [[IF_THEN78:%.*]], label [[IF_THEN38:%.*]]
+; CHECK:       if.then38:
+; CHECK-NEXT:    [[TMP0:%.*]] = insertelement <2 x double> undef, double undef, i32 1
+; CHECK-NEXT:    [[TMP1:%.*]] = fmul <2 x double> undef, [[TMP0]]
+; CHECK-NEXT:    [[TMP2:%.*]] = fsub <2 x double> undef, [[TMP1]]
+; CHECK-NEXT:    [[TMP3:%.*]] = fmul <2 x double> undef, [[TMP2]]
+; CHECK-NEXT:    [[TMP4:%.*]] = fmul <2 x double> undef, [[TMP3]]
+; CHECK-NEXT:    [[TMP5:%.*]] = fadd <2 x double> undef, [[TMP4]]
+; CHECK-NEXT:    [[TMP6:%.*]] = fadd <2 x double> undef, [[TMP5]]
+; CHECK-NEXT:    [[TMP7:%.*]] = fmul <2 x double> undef, [[TMP6]]
+; CHECK-NEXT:    [[AGG_TMP74663_SROA_0_0_IDX:%.*]] = getelementptr inbounds [[STRUCT_RAY_5_11_53_95_137_191_197_203_239_257_263_269_275_281_287_293_383_437_443_455_461_599_601:%.*]], %struct.Ray.5.11.53.95.137.191.197.203.239.257.263.269.275.281.287.293.383.437.443.455.461.599.601* undef, i64 0, i32 1, i32 0
+; CHECK-NEXT:    [[TMP8:%.*]] = bitcast double* [[AGG_TMP74663_SROA_0_0_IDX]] to <2 x double>*
+; CHECK-NEXT:    store <2 x double> [[TMP7]], <2 x double>* [[TMP8]], align 8
+; CHECK-NEXT:    br label [[RETURN:%.*]]
+; CHECK:       if.then78:
+; CHECK-NEXT:    br label [[RETURN]]
+; CHECK:       return:
+; CHECK-NEXT:    ret void
+;
  entry:
    br i1 undef, label %if.then78, label %if.then38
  
diff --git a/test/Transforms/SLPVectorizer/X86/crash_vectorizeTree.ll b/test/Transforms/SLPVectorizer/X86/crash_vectorizeTree.ll

index 45ca99a3ea161a29fa9a83ae509b3d795ac98ea9..24f5bad039894a290b168540b5ee91f3da245242 100644 (file)
--- a/test/Transforms/SLPVectorizer/X86/crash_vectorizeTree.ll
+++ b/test/Transforms/SLPVectorizer/X86/crash_vectorizeTree.ll
@@ -1,14 +1,14 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
  ; RUN: opt -basicaa -slp-vectorizer -mtriple=x86_64-apple-macosx10.9.0 -mcpu=corei7-avx -S < %s | FileCheck %s
  target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
  target triple = "x86_64-apple-macosx10.9.0"
  
  
  ; This test used to crash because we were following phi chains incorrectly.
-; We used indices to get the incoming value of two phi nodes rather than 
+; We used indices to get the incoming value of two phi nodes rather than
  ; incoming block lookup.
  ; This can give wrong results when the ordering of incoming
  ; edges in the two phi nodes don't match.
-;CHECK-LABEL: bar
  
  %0 = type { %1, %2 }
  %1 = type { double, double }
@@ -17,6 +17,36 @@ target triple = "x86_64-apple-macosx10.9.0"
  
  ;define fastcc void @bar() {
  define void @bar() {
+; CHECK-LABEL: @bar(
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [[TMP0:%.*]], %0* undef, i64 0, i32 1, i32 0
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds [[TMP0]], %0* undef, i64 0, i32 1, i32 1
+; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds [[TMP0]], %0* undef, i64 0, i32 1, i32 0
+; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [[TMP0]], %0* undef, i64 0, i32 1, i32 1
+; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [[TMP0]], %0* undef, i64 0, i32 1, i32 0
+; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [[TMP0]], %0* undef, i64 0, i32 1, i32 1
+; CHECK-NEXT:    br label [[TMP7:%.*]]
+; CHECK:         [[TMP8:%.*]] = phi <2 x double> [ <double 1.800000e+01, double 2.800000e+01>, [[TMP0]] ], [ [[TMP11:%.*]], [[TMP21:%.*]] ], [ [[TMP11]], [[TMP18:%.*]] ], [ [[TMP11]], [[TMP18]] ]
+; CHECK-NEXT:    [[TMP9:%.*]] = bitcast double* [[TMP1]] to <2 x double>*
+; CHECK-NEXT:    store <2 x double> [[TMP8]], <2 x double>* [[TMP9]], align 8
+; CHECK-NEXT:    [[TMP10:%.*]] = bitcast double* [[TMP3]] to <2 x double>*
+; CHECK-NEXT:    [[TMP11]] = load <2 x double>, <2 x double>* [[TMP10]], align 8
+; CHECK-NEXT:    br i1 undef, label [[TMP12:%.*]], label [[TMP13:%.*]]
+; CHECK:         ret void
+; CHECK:         [[TMP14:%.*]] = bitcast double* [[TMP5]] to <2 x double>*
+; CHECK-NEXT:    store <2 x double> [[TMP11]], <2 x double>* [[TMP14]], align 8
+; CHECK-NEXT:    br i1 undef, label [[TMP15:%.*]], label [[TMP16:%.*]]
+; CHECK:         br label [[TMP16]]
+; CHECK:         br i1 undef, label [[TMP17:%.*]], label [[TMP18]]
+; CHECK:         unreachable
+; CHECK:         [[TMP19:%.*]] = extractelement <2 x double> [[TMP11]], i32 0
+; CHECK-NEXT:    [[TMP20:%.*]] = extractelement <2 x double> [[TMP11]], i32 1
+; CHECK-NEXT:    switch i32 undef, label [[TMP21]] [
+; CHECK-NEXT:    i32 32, label [[TMP7]]
+; CHECK-NEXT:    i32 103, label [[TMP7]]
+; CHECK-NEXT:    ]
+; CHECK:         br i1 undef, label [[TMP7]], label [[TMP22:%.*]]
+; CHECK:         unreachable
+;
    %1 = getelementptr inbounds %0, %0* undef, i64 0, i32 1, i32 0
    %2 = getelementptr inbounds %0, %0* undef, i64 0, i32 1, i32 1
    %3 = getelementptr inbounds %0, %0* undef, i64 0, i32 1, i32 0
@@ -53,8 +83,8 @@ define void @bar() {
  
  ; <label>:17                                      ; preds = %15
    switch i32 undef, label %18 [
-    i32 32, label %7
-    i32 103, label %7
+  i32 32, label %7
+  i32 103, label %7
    ]
  
  ; <label>:18                                      ; preds = %17
diff --git a/test/Transforms/SLPVectorizer/X86/cross_block_slp.ll b/test/Transforms/SLPVectorizer/X86/cross_block_slp.ll

index eac20a00645007f932d0cabf6848e3c27c2c2e51..98db3edd90ead0d11dc06a6b2841aafc59cd1b1e 100644 (file)
--- a/test/Transforms/SLPVectorizer/X86/cross_block_slp.ll
+++ b/test/Transforms/SLPVectorizer/X86/cross_block_slp.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
  ; RUN: opt < %s -basicaa -slp-vectorizer -dce -S -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck %s
  
  target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
@@ -16,15 +17,26 @@ target triple = "x86_64-apple-macosx10.8.0"
  ; }
  
  
-;CHECK-LABEL: @foo(
-;CHECK: load <2 x float>
-;CHECK: fadd <2 x float>
-;CHECK: call i32
-;CHECK: load <2 x double>
-;CHECK: fadd <2 x double>
-;CHECK: store <2 x double>
-;CHECK: ret
  define i32 @foo(double* nocapture %A, float* nocapture %B, i32 %g) {
+; CHECK-LABEL: @foo(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = bitcast float* [[B:%.*]] to <2 x float>*
+; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x float>, <2 x float>* [[TMP0]], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = fadd <2 x float> <float 5.000000e+00, float 8.000000e+00>, [[TMP1]]
+; CHECK-NEXT:    [[TOBOOL:%.*]] = icmp eq i32 [[G:%.*]], 0
+; CHECK-NEXT:    br i1 [[TOBOOL]], label [[IF_END:%.*]], label [[IF_THEN:%.*]]
+; CHECK:       if.then:
+; CHECK-NEXT:    [[CALL:%.*]] = tail call i32 (...) @bar()
+; CHECK-NEXT:    br label [[IF_END]]
+; CHECK:       if.end:
+; CHECK-NEXT:    [[TMP3:%.*]] = fpext <2 x float> [[TMP2]] to <2 x double>
+; CHECK-NEXT:    [[TMP4:%.*]] = bitcast double* [[A:%.*]] to <2 x double>*
+; CHECK-NEXT:    [[TMP5:%.*]] = load <2 x double>, <2 x double>* [[TMP4]], align 8
+; CHECK-NEXT:    [[TMP6:%.*]] = fadd <2 x double> [[TMP3]], [[TMP5]]
+; CHECK-NEXT:    [[TMP7:%.*]] = bitcast double* [[A]] to <2 x double>*
+; CHECK-NEXT:    store <2 x double> [[TMP6]], <2 x double>* [[TMP7]], align 8
+; CHECK-NEXT:    ret i32 undef
+;
  entry:
    %0 = load float, float* %B, align 4
    %arrayidx1 = getelementptr inbounds float, float* %B, i64 1
diff --git a/test/Transforms/SLPVectorizer/X86/cycle_dup.ll b/test/Transforms/SLPVectorizer/X86/cycle_dup.ll

index 0a4e961c2e88540b13d31174711e514112ed47dc..ac6933304780fe0957cb4c58eefd3fd4e135c839 100644 (file)
--- a/test/Transforms/SLPVectorizer/X86/cycle_dup.ll
+++ b/test/Transforms/SLPVectorizer/X86/cycle_dup.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
  ; RUN: opt < %s -basicaa -slp-vectorizer -dce -S -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck %s
  
  target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
@@ -11,17 +12,28 @@ target triple = "x86_64-apple-macosx10.9.0"
  ;   A[0] = r; A[1] = g; A[2] = b; A[3] = a;
  ; }
  
-;CHECK-LABEL: @foo
-;CHECK: bitcast i32* %A to <4 x i32>*
-;CHECK-NEXT: load <4 x i32>
-;CHECK: phi <4 x i32>
-;CHECK-NEXT: mul nsw <4 x i32>
-;CHECK-NOT: mul
-;CHECK: phi <4 x i32>
-;CHECK: bitcast i32* %A to <4 x i32>*
-;CHECK-NEXT: store <4 x i32>
-;CHECK-NEXT:ret i32 undef
  define i32 @foo(i32* nocapture %A) #0 {
+; CHECK-LABEL: @foo(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i32* [[A:%.*]] to <4 x i32>*
+; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4
+; CHECK-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 13
+; CHECK-NEXT:    [[TMP2:%.*]] = load i32, i32* [[ARRAYIDX4]], align 4
+; CHECK-NEXT:    [[CMP24:%.*]] = icmp sgt i32 [[TMP2]], 0
+; CHECK-NEXT:    br i1 [[CMP24]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]]
+; CHECK:       for.body:
+; CHECK-NEXT:    [[I_029:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ]
+; CHECK-NEXT:    [[TMP3:%.*]] = phi <4 x i32> [ [[TMP4:%.*]], [[FOR_BODY]] ], [ [[TMP1]], [[ENTRY]] ]
+; CHECK-NEXT:    [[TMP4]] = mul nsw <4 x i32> <i32 18, i32 19, i32 12, i32 9>, [[TMP3]]
+; CHECK-NEXT:    [[INC]] = add nsw i32 [[I_029]], 1
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[INC]], [[TMP2]]
+; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END]]
+; CHECK:       for.end:
+; CHECK-NEXT:    [[TMP5:%.*]] = phi <4 x i32> [ [[TMP1]], [[ENTRY]] ], [ [[TMP4]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[TMP6:%.*]] = bitcast i32* [[A]] to <4 x i32>*
+; CHECK-NEXT:    store <4 x i32> [[TMP5]], <4 x i32>* [[TMP6]], align 4
+; CHECK-NEXT:    ret i32 undef
+;
  entry:
    %0 = load i32, i32* %A, align 4
    %arrayidx1 = getelementptr inbounds i32, i32* %A, i64 1
diff --git a/test/Transforms/SLPVectorizer/X86/debug_info.ll b/test/Transforms/SLPVectorizer/X86/debug_info.ll

index cdf2455d66598a734aa41b53634e8ec5a5d3ebb2..0fe399e725d724ed8d1a885816d2d99b1bb4dc73 100644 (file)
--- a/test/Transforms/SLPVectorizer/X86/debug_info.ll
+++ b/test/Transforms/SLPVectorizer/X86/debug_info.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
  ; RUN: opt < %s -basicaa -slp-vectorizer -dce -S -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck %s
  
  target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
@@ -12,16 +13,28 @@ target triple = "x86_64-apple-macosx10.7.0"
  ;   A[8] = y0; A[8+1] = y1;
  ; }
  
-;CHECK: @depth
-;CHECK: getelementptr inbounds {{.*}}, !dbg ![[LOC:[0-9]+]]
-;CHECK: bitcast double* {{.*}}, !dbg ![[LOC]]
-;CHECK: load <2 x double>, <2 x double>* {{.*}}, !dbg ![[LOC]]
-;CHECK: store <2 x double> {{.*}}, !dbg ![[LOC2:[0-9]+]]
-;CHECK: ret
-;CHECK: ![[LOC]] = !DILocation(line: 4, scope:
-;CHECK: ![[LOC2]] = !DILocation(line: 7, scope:
-
  define i32 @depth(double* nocapture %A, i32 %m) #0 !dbg !4 {
+; CHECK-LABEL: @depth(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    call void @llvm.dbg.value(metadata double* [[A:%.*]], metadata !12, metadata !DIExpression()), !dbg !18
+; CHECK-NEXT:    call void @llvm.dbg.value(metadata i32 [[M:%.*]], metadata !13, metadata !DIExpression()), !dbg !18
+; CHECK-NEXT:    call void @llvm.dbg.value(metadata double 0.000000e+00, metadata !14, metadata !DIExpression()), !dbg !19
+; CHECK-NEXT:    call void @llvm.dbg.value(metadata double 2.000000e-01, metadata !15, metadata !DIExpression()), !dbg !19
+; CHECK-NEXT:    call void @llvm.dbg.value(metadata i32 0, metadata !16, metadata !DIExpression()), !dbg !20
+; CHECK-NEXT:    [[CMP8:%.*]] = icmp sgt i32 [[M]], 0, !dbg !20
+; CHECK-NEXT:    br i1 [[CMP8]], label [[FOR_BODY_LR_PH:%.*]], label [[FOR_END:%.*]], !dbg !20
+; CHECK:       for.body.lr.ph:
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[A]], i64 4, !dbg !21
+; CHECK-NEXT:    [[TMP0:%.*]] = bitcast double* [[ARRAYIDX]] to <2 x double>*, !dbg !21
+; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x double>, <2 x double>* [[TMP0]], align 8, !dbg !21
+; CHECK-NEXT:    br label [[FOR_END]], !dbg !20
+; CHECK:       for.end:
+; CHECK-NEXT:    [[TMP2:%.*]] = phi <2 x double> [ [[TMP1]], [[FOR_BODY_LR_PH]] ], [ <double 0.000000e+00, double 1.000000e+00>, [[ENTRY:%.*]] ]
+; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds double, double* [[A]], i64 8, !dbg !23
+; CHECK-NEXT:    [[TMP3:%.*]] = bitcast double* [[ARRAYIDX2]] to <2 x double>*, !dbg !23
+; CHECK-NEXT:    store <2 x double> [[TMP2]], <2 x double>* [[TMP3]], align 8, !dbg !23
+; CHECK-NEXT:    ret i32 undef, !dbg !24
+;
  entry:
    tail call void @llvm.dbg.value(metadata double* %A, i64 0, metadata !12, metadata !DIExpression()), !dbg !19
    tail call void @llvm.dbg.value(metadata i32 %m, i64 0, metadata !13, metadata !DIExpression()), !dbg !19
diff --git a/test/Transforms/SLPVectorizer/X86/diamond.ll b/test/Transforms/SLPVectorizer/X86/diamond.ll

index 4e2c02f6965a808fab125230555ca0386073a39e..3cba8ea00f7cdc12b35db905fb12810da51aca4d 100644 (file)
--- a/test/Transforms/SLPVectorizer/X86/diamond.ll
+++ b/test/Transforms/SLPVectorizer/X86/diamond.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
  ; RUN: opt < %s -basicaa -slp-vectorizer -S -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck %s
  
  target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
@@ -11,12 +12,27 @@ target triple = "x86_64-apple-macosx10.8.0"
  ;   return 0;
  ; }
  
-; CHECK-LABEL: @foo(
-; CHECK: load <4 x i32>
-; CHECK: mul <4 x i32>
-; CHECK: store <4 x i32>
-; CHECK: ret
  define i32 @foo(i32* noalias nocapture %B, i32* noalias nocapture %A, i32 %n, i32 %m) #0 {
+; CHECK-LABEL: @foo(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[MUL238:%.*]] = add i32 [[M:%.*]], [[N:%.*]]
+; CHECK-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 1
+; CHECK-NEXT:    [[ARRAYIDX9:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 1
+; CHECK-NEXT:    [[ARRAYIDX10:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 2
+; CHECK-NEXT:    [[ARRAYIDX15:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 2
+; CHECK-NEXT:    [[ARRAYIDX16:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 3
+; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i32* [[A]] to <4 x i32>*
+; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x i32> undef, i32 [[MUL238]], i32 0
+; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <4 x i32> [[TMP2]], i32 [[MUL238]], i32 1
+; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <4 x i32> [[TMP3]], i32 [[MUL238]], i32 2
+; CHECK-NEXT:    [[TMP5:%.*]] = insertelement <4 x i32> [[TMP4]], i32 [[MUL238]], i32 3
+; CHECK-NEXT:    [[TMP6:%.*]] = mul <4 x i32> [[TMP1]], [[TMP5]]
+; CHECK-NEXT:    [[ARRAYIDX21:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 3
+; CHECK-NEXT:    [[TMP7:%.*]] = bitcast i32* [[B]] to <4 x i32>*
+; CHECK-NEXT:    store <4 x i32> [[TMP6]], <4 x i32>* [[TMP7]], align 4
+; CHECK-NEXT:    ret i32 0
+;
  entry:
    %0 = load i32, i32* %A, align 4
    %mul238 = add i32 %m, %n
@@ -49,12 +65,28 @@ entry:
  ;   return A[0];
  ; }
  
-; CHECK-LABEL: @extr_user(
-; CHECK: load <4 x i32>
-; CHECK: store <4 x i32>
-; CHECK: extractelement <4 x i32>
-; CHECK-NEXT: ret
  define i32 @extr_user(i32* noalias nocapture %B, i32* noalias nocapture %A, i32 %n, i32 %m) {
+; CHECK-LABEL: @extr_user(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[MUL238:%.*]] = add i32 [[M:%.*]], [[N:%.*]]
+; CHECK-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 1
+; CHECK-NEXT:    [[ARRAYIDX9:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 1
+; CHECK-NEXT:    [[ARRAYIDX10:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 2
+; CHECK-NEXT:    [[ARRAYIDX15:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 2
+; CHECK-NEXT:    [[ARRAYIDX16:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 3
+; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i32* [[A]] to <4 x i32>*
+; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x i32> undef, i32 [[MUL238]], i32 0
+; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <4 x i32> [[TMP2]], i32 [[MUL238]], i32 1
+; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <4 x i32> [[TMP3]], i32 [[MUL238]], i32 2
+; CHECK-NEXT:    [[TMP5:%.*]] = insertelement <4 x i32> [[TMP4]], i32 [[MUL238]], i32 3
+; CHECK-NEXT:    [[TMP6:%.*]] = mul <4 x i32> [[TMP1]], [[TMP5]]
+; CHECK-NEXT:    [[ARRAYIDX21:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 3
+; CHECK-NEXT:    [[TMP7:%.*]] = bitcast i32* [[B]] to <4 x i32>*
+; CHECK-NEXT:    store <4 x i32> [[TMP6]], <4 x i32>* [[TMP7]], align 4
+; CHECK-NEXT:    [[TMP8:%.*]] = extractelement <4 x i32> [[TMP1]], i32 0
+; CHECK-NEXT:    ret i32 [[TMP8]]
+;
  entry:
    %0 = load i32, i32* %A, align 4
    %mul238 = add i32 %m, %n
@@ -79,12 +111,28 @@ entry:
  }
  
  ; In this example we have an external user that is not the first element in the vector.
-; CHECK-LABEL: @extr_user1(
-; CHECK: load <4 x i32>
-; CHECK: store <4 x i32>
-; CHECK: extractelement <4 x i32>
-; CHECK-NEXT: ret
  define i32 @extr_user1(i32* noalias nocapture %B, i32* noalias nocapture %A, i32 %n, i32 %m) {
+; CHECK-LABEL: @extr_user1(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[MUL238:%.*]] = add i32 [[M:%.*]], [[N:%.*]]
+; CHECK-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 1
+; CHECK-NEXT:    [[ARRAYIDX9:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 1
+; CHECK-NEXT:    [[ARRAYIDX10:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 2
+; CHECK-NEXT:    [[ARRAYIDX15:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 2
+; CHECK-NEXT:    [[ARRAYIDX16:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 3
+; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i32* [[A]] to <4 x i32>*
+; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x i32> undef, i32 [[MUL238]], i32 0
+; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <4 x i32> [[TMP2]], i32 [[MUL238]], i32 1
+; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <4 x i32> [[TMP3]], i32 [[MUL238]], i32 2
+; CHECK-NEXT:    [[TMP5:%.*]] = insertelement <4 x i32> [[TMP4]], i32 [[MUL238]], i32 3
+; CHECK-NEXT:    [[TMP6:%.*]] = mul <4 x i32> [[TMP1]], [[TMP5]]
+; CHECK-NEXT:    [[ARRAYIDX21:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 3
+; CHECK-NEXT:    [[TMP7:%.*]] = bitcast i32* [[B]] to <4 x i32>*
+; CHECK-NEXT:    store <4 x i32> [[TMP6]], <4 x i32>* [[TMP7]], align 4
+; CHECK-NEXT:    [[TMP8:%.*]] = extractelement <4 x i32> [[TMP1]], i32 1
+; CHECK-NEXT:    ret i32 [[TMP8]]
+;
  entry:
    %0 = load i32, i32* %A, align 4
    %mul238 = add i32 %m, %n
diff --git a/test/Transforms/SLPVectorizer/X86/external_user.ll b/test/Transforms/SLPVectorizer/X86/external_user.ll

index bf2febda86b43aeea396176b27d647fd5138ddf3..8ee644f939ba015dbc1fa11091821abe9ec03f30 100644 (file)
--- a/test/Transforms/SLPVectorizer/X86/external_user.ll
+++ b/test/Transforms/SLPVectorizer/X86/external_user.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
  ; RUN: opt < %s -basicaa -slp-vectorizer -dce -S -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck %s
  
  target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
@@ -22,16 +23,27 @@ target triple = "x86_64-apple-macosx10.8.0"
  ;   return x; <-- must extract here!
  ; }
  
-;CHECK: ext_user
-;CHECK: phi <2 x double>
-;CHECK: fadd <2 x double>
-;CHECK: fmul <2 x double>
-;CHECK: br
-;CHECK: store <2 x double>
-;CHECK: extractelement <2 x double>
-;CHECK: ret double
-
  define double @ext_user(double* noalias nocapture %B, double* noalias nocapture %A, i32 %n, i32 %m) {
+; CHECK-LABEL: @ext_user(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = bitcast double* [[A:%.*]] to <2 x double>*
+; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x double>, <2 x double>* [[TMP0]], align 8
+; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
+; CHECK:       for.body:
+; CHECK-NEXT:    [[I_020:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[TMP2:%.*]] = phi <2 x double> [ [[TMP1]], [[ENTRY]] ], [ [[TMP5:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[TMP3:%.*]] = fadd <2 x double> <double 1.000000e+01, double 1.000000e+01>, [[TMP2]]
+; CHECK-NEXT:    [[TMP4:%.*]] = fmul <2 x double> <double 4.000000e+00, double 4.000000e+00>, [[TMP3]]
+; CHECK-NEXT:    [[TMP5]] = fadd <2 x double> <double 4.000000e+00, double 4.000000e+00>, [[TMP4]]
+; CHECK-NEXT:    [[INC]] = add nsw i32 [[I_020]], 1
+; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[INC]], 100
+; CHECK-NEXT:    br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]]
+; CHECK:       for.end:
+; CHECK-NEXT:    [[TMP6:%.*]] = bitcast double* [[B:%.*]] to <2 x double>*
+; CHECK-NEXT:    store <2 x double> [[TMP5]], <2 x double>* [[TMP6]], align 8
+; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <2 x double> [[TMP4]], i32 0
+; CHECK-NEXT:    ret double [[TMP7]]
+;
  entry:
    %arrayidx = getelementptr inbounds double, double* %A, i64 1
    %0 = load double, double* %arrayidx, align 8
@@ -65,9 +77,33 @@ for.end:                                          ; preds = %for.body
  ; This test would assert because we would keep the scalar fpext and fadd alive.
  ; PR18129
  
-; CHECK-LABEL: needtogather
  define i32 @needtogather(double *noalias %a, i32 *noalias %b,  float * noalias %c,
-                i32 * noalias %d) {
+; CHECK-LABEL: @needtogather(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[D:%.*]], align 4
+; CHECK-NEXT:    [[CONV:%.*]] = sitofp i32 [[TMP0]] to float
+; CHECK-NEXT:    [[TMP1:%.*]] = load float, float* [[C:%.*]]
+; CHECK-NEXT:    [[SUB:%.*]] = fsub float 0.000000e+00, [[TMP1]]
+; CHECK-NEXT:    [[MUL:%.*]] = fmul float [[SUB]], 0.000000e+00
+; CHECK-NEXT:    [[ADD:%.*]] = fadd float [[CONV]], [[MUL]]
+; CHECK-NEXT:    [[CONV1:%.*]] = fpext float [[ADD]] to double
+; CHECK-NEXT:    [[SUB3:%.*]] = fsub float 1.000000e+00, [[TMP1]]
+; CHECK-NEXT:    [[MUL4:%.*]] = fmul float [[SUB3]], 0.000000e+00
+; CHECK-NEXT:    [[ADD5:%.*]] = fadd float [[CONV]], [[MUL4]]
+; CHECK-NEXT:    [[CONV6:%.*]] = fpext float [[ADD5]] to double
+; CHECK-NEXT:    [[TOBOOL:%.*]] = fcmp une float [[ADD]], 0.000000e+00
+; CHECK-NEXT:    br i1 [[TOBOOL]], label [[IF_THEN:%.*]], label [[IF_END:%.*]]
+; CHECK:       if.then:
+; CHECK-NEXT:    br label [[IF_END]]
+; CHECK:       if.end:
+; CHECK-NEXT:    [[STOREMERGE:%.*]] = phi double [ [[CONV6]], [[IF_THEN]] ], [ [[CONV1]], [[ENTRY:%.*]] ]
+; CHECK-NEXT:    [[E_0:%.*]] = phi double [ [[CONV1]], [[IF_THEN]] ], [ [[CONV6]], [[ENTRY]] ]
+; CHECK-NEXT:    store double [[STOREMERGE]], double* [[A:%.*]], align 8
+; CHECK-NEXT:    [[CONV7:%.*]] = fptosi double [[E_0]] to i32
+; CHECK-NEXT:    store i32 [[CONV7]], i32* [[B:%.*]], align 4
+; CHECK-NEXT:    ret i32 undef
+;
+  i32 * noalias %d) {
  entry:
    %0 = load i32, i32* %d, align 4
    %conv = sitofp i32 %0 to float
diff --git a/test/Transforms/SLPVectorizer/X86/extractcost.ll b/test/Transforms/SLPVectorizer/X86/extractcost.ll

index 164ddf38c1929e1d445be91a168d4e4ac99a8190..c9fae4460e5708ec2435641722d9d1abb6279d74 100644 (file)
--- a/test/Transforms/SLPVectorizer/X86/extractcost.ll
+++ b/test/Transforms/SLPVectorizer/X86/extractcost.ll
@@ -1,12 +1,28 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
  ; RUN: opt < %s -basicaa -slp-vectorizer -dce -S -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck %s
  
  target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
  target triple = "x86_64-apple-macosx10.8.0"
  
-;CHECK-LABEL: @foo(
-;CHECK: store <4 x i32>
-;CHECK: ret
  define i32 @foo(i32* nocapture %A, i32 %n, i32 %m) {
+; CHECK-LABEL: @foo(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = insertelement <4 x i32> undef, i32 [[N:%.*]], i32 0
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x i32> [[TMP0]], i32 [[N]], i32 1
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x i32> [[TMP1]], i32 [[N]], i32 2
+; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <4 x i32> [[TMP2]], i32 [[N]], i32 3
+; CHECK-NEXT:    [[TMP4:%.*]] = mul nsw <4 x i32> [[TMP3]], <i32 5, i32 9, i32 3, i32 10>
+; CHECK-NEXT:    [[TMP5:%.*]] = shl <4 x i32> [[TMP3]], <i32 5, i32 9, i32 3, i32 10>
+; CHECK-NEXT:    [[TMP6:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> [[TMP5]], <4 x i32> <i32 0, i32 1, i32 6, i32 3>
+; CHECK-NEXT:    [[TMP7:%.*]] = add nsw <4 x i32> <i32 9, i32 9, i32 9, i32 9>, [[TMP6]]
+; CHECK-NEXT:    [[TMP8:%.*]] = bitcast i32* [[A:%.*]] to <4 x i32>*
+; CHECK-NEXT:    store <4 x i32> [[TMP7]], <4 x i32>* [[TMP8]], align 4
+; CHECK-NEXT:    [[TMP9:%.*]] = extractelement <4 x i32> [[TMP7]], i32 0
+; CHECK-NEXT:    [[EXTERNALUSE1:%.*]] = add nsw i32 [[TMP9]], [[M:%.*]]
+; CHECK-NEXT:    [[EXTERNALUSE2:%.*]] = mul nsw i32 [[TMP9]], [[M]]
+; CHECK-NEXT:    [[ADD10:%.*]] = add nsw i32 [[EXTERNALUSE1]], [[EXTERNALUSE2]]
+; CHECK-NEXT:    ret i32 [[ADD10]]
+;
  entry:
    %mul = mul nsw i32 %n, 5
    %add = add nsw i32 %mul, 9
@@ -23,7 +39,7 @@ entry:
    %add8 = add nsw i32 %mul7, 9
    %arrayidx9 = getelementptr inbounds i32, i32* %A, i64 3
    store i32 %add8, i32* %arrayidx9, align 4
-  %externaluse1 = add nsw i32 %add, %m  
+  %externaluse1 = add nsw i32 %add, %m
    %externaluse2 = mul nsw i32 %add, %m  ; we should add the extract cost only once and the store will be vectorized
    %add10 = add nsw i32 %externaluse1, %externaluse2
    ret i32 %add10
diff --git a/test/Transforms/SLPVectorizer/X86/fabs-cost-softfp.ll b/test/Transforms/SLPVectorizer/X86/fabs-cost-softfp.ll

index add4858b7c53bcb3430aeb70b26b38c09f8913c1..25394f488493fbf0bbbf1a1ecc3e5697fd8401db 100644 (file)
--- a/test/Transforms/SLPVectorizer/X86/fabs-cost-softfp.ll
+++ b/test/Transforms/SLPVectorizer/X86/fabs-cost-softfp.ll
@@ -1,14 +1,29 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
  ; Regression test from https://bugs.llvm.org/show_bug.cgi?id=39168
  ; Based on code from `compiler-rt/lib/builtins/multc3.c`
  ; On plaforms where fp128 lowers to an interger type (soft-fp) we
  ; shouldn't be calling isFAbsFree() on the legalized type.
  
  ; RUN: opt -slp-vectorizer -slp-threshold=-10 -S %s | FileCheck %s
-; CHECK: call <2 x fp128> @llvm.fabs.v2f128(<2 x fp128
  
  target triple = "i686-unknown-linux-gnu"
  
  define void @vectorize_fp128(fp128 %c, fp128 %d) #0 {
+; CHECK-LABEL: @vectorize_fp128(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = insertelement <2 x fp128> undef, fp128 [[C:%.*]], i32 0
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x fp128> [[TMP0]], fp128 [[D:%.*]], i32 1
+; CHECK-NEXT:    [[TMP2:%.*]] = call <2 x fp128> @llvm.fabs.v2f128(<2 x fp128> [[TMP1]])
+; CHECK-NEXT:    [[TMP3:%.*]] = fcmp oeq <2 x fp128> [[TMP2]], <fp128 0xL00000000000000007FFF000000000000, fp128 0xL00000000000000007FFF000000000000>
+; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <2 x i1> [[TMP3]], i32 0
+; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <2 x i1> [[TMP3]], i32 1
+; CHECK-NEXT:    [[OR_COND39:%.*]] = or i1 [[TMP4]], [[TMP5]]
+; CHECK-NEXT:    br i1 [[OR_COND39]], label [[IF_THEN13:%.*]], label [[IF_END24:%.*]]
+; CHECK:       if.then13:
+; CHECK-NEXT:    unreachable
+; CHECK:       if.end24:
+; CHECK-NEXT:    ret void
+;
  entry:
    %0 = tail call fp128 @llvm.fabs.f128(fp128 %c)
    %cmpinf10 = fcmp oeq fp128 %0, 0xL00000000000000007FFF000000000000
diff --git a/test/Transforms/SLPVectorizer/X86/flag.ll b/test/Transforms/SLPVectorizer/X86/flag.ll

index 7db8d75c20a7f1d9794818e4bba44ecf6fe1f20a..e695c02ef1042d71f622a1ac35692b0ab8394c5d 100644 (file)
--- a/test/Transforms/SLPVectorizer/X86/flag.ll
+++ b/test/Transforms/SLPVectorizer/X86/flag.ll
@@ -1,14 +1,50 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
  ; RUN: opt < %s -basicaa -slp-vectorizer -slp-threshold=1000 -dce -S -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck %s
  
  target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
  target triple = "x86_64-apple-macosx10.8.0"
  
  ; Check that the command line flag works.
-;CHECK:rollable
-;CHECK-NOT:load <4 x i32>
-;CHECK: ret
-
  define i32 @rollable(i32* noalias nocapture %in, i32* noalias nocapture %out, i64 %n) {
+; CHECK-LABEL: @rollable(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq i64 [[N:%.*]], 0
+; CHECK-NEXT:    br i1 [[TMP1]], label [[DOT_CRIT_EDGE:%.*]], label [[DOTLR_PH:%.*]]
+; CHECK:       .lr.ph:
+; CHECK-NEXT:    [[I_019:%.*]] = phi i64 [ [[TMP26:%.*]], [[DOTLR_PH]] ], [ 0, [[TMP0:%.*]] ]
+; CHECK-NEXT:    [[TMP2:%.*]] = shl i64 [[I_019]], 2
+; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i32, i32* [[IN:%.*]], i64 [[TMP2]]
+; CHECK-NEXT:    [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4
+; CHECK-NEXT:    [[TMP5:%.*]] = or i64 [[TMP2]], 1
+; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i32, i32* [[IN]], i64 [[TMP5]]
+; CHECK-NEXT:    [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 4
+; CHECK-NEXT:    [[TMP8:%.*]] = or i64 [[TMP2]], 2
+; CHECK-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i32, i32* [[IN]], i64 [[TMP8]]
+; CHECK-NEXT:    [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4
+; CHECK-NEXT:    [[TMP11:%.*]] = or i64 [[TMP2]], 3
+; CHECK-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i32, i32* [[IN]], i64 [[TMP11]]
+; CHECK-NEXT:    [[TMP13:%.*]] = load i32, i32* [[TMP12]], align 4
+; CHECK-NEXT:    [[TMP14:%.*]] = mul i32 [[TMP4]], 7
+; CHECK-NEXT:    [[TMP15:%.*]] = add i32 [[TMP14]], 7
+; CHECK-NEXT:    [[TMP16:%.*]] = mul i32 [[TMP7]], 7
+; CHECK-NEXT:    [[TMP17:%.*]] = add i32 [[TMP16]], 14
+; CHECK-NEXT:    [[TMP18:%.*]] = mul i32 [[TMP10]], 7
+; CHECK-NEXT:    [[TMP19:%.*]] = add i32 [[TMP18]], 21
+; CHECK-NEXT:    [[TMP20:%.*]] = mul i32 [[TMP13]], 7
+; CHECK-NEXT:    [[TMP21:%.*]] = add i32 [[TMP20]], 28
+; CHECK-NEXT:    [[TMP22:%.*]] = getelementptr inbounds i32, i32* [[OUT:%.*]], i64 [[TMP2]]
+; CHECK-NEXT:    store i32 [[TMP15]], i32* [[TMP22]], align 4
+; CHECK-NEXT:    [[TMP23:%.*]] = getelementptr inbounds i32, i32* [[OUT]], i64 [[TMP5]]
+; CHECK-NEXT:    store i32 [[TMP17]], i32* [[TMP23]], align 4
+; CHECK-NEXT:    [[TMP24:%.*]] = getelementptr inbounds i32, i32* [[OUT]], i64 [[TMP8]]
+; CHECK-NEXT:    store i32 [[TMP19]], i32* [[TMP24]], align 4
+; CHECK-NEXT:    [[TMP25:%.*]] = getelementptr inbounds i32, i32* [[OUT]], i64 [[TMP11]]
+; CHECK-NEXT:    store i32 [[TMP21]], i32* [[TMP25]], align 4
+; CHECK-NEXT:    [[TMP26]] = add i64 [[I_019]], 1
+; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[TMP26]], [[N]]
+; CHECK-NEXT:    br i1 [[EXITCOND]], label [[DOT_CRIT_EDGE]], label [[DOTLR_PH]]
+; CHECK:       ._crit_edge:
+; CHECK-NEXT:    ret i32 undef
+;
    %1 = icmp eq i64 %n, 0
    br i1 %1, label %._crit_edge, label %.lr.ph
  
diff --git a/test/Transforms/SLPVectorizer/X86/gep.ll b/test/Transforms/SLPVectorizer/X86/gep.ll

index 60b0a114c516c70e8d2d8b73c49a2c924e0b6e46..a3cce8b0c5ee0e03107fe4e75259bed02f43236d 100644 (file)
--- a/test/Transforms/SLPVectorizer/X86/gep.ll
+++ b/test/Transforms/SLPVectorizer/X86/gep.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
  ; RUN: opt < %s -basicaa -slp-vectorizer -S |FileCheck %s
  ; RUN: opt < %s -aa-pipeline=basic-aa -passes=slp-vectorizer -S |FileCheck %s
  target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
@@ -8,9 +9,19 @@ target triple = "x86_64-unknown-unknown"
  ;   x->first  = y->first  + 16
  ;   x->second = y->second + 16
  
-; CHECK-LABEL: foo1
-; CHECK: <2 x i32*>
  define void @foo1 ({ i32*, i32* }* noalias %x, { i32*, i32* }* noalias %y) {
+; CHECK-LABEL: @foo1(
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds { i32*, i32* }, { i32*, i32* }* [[Y:%.*]], i64 0, i32 0
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds { i32*, i32* }, { i32*, i32* }* [[X:%.*]], i64 0, i32 0
+; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds { i32*, i32* }, { i32*, i32* }* [[Y]], i64 0, i32 1
+; CHECK-NEXT:    [[TMP4:%.*]] = bitcast i32** [[TMP1]] to <2 x i32*>*
+; CHECK-NEXT:    [[TMP5:%.*]] = load <2 x i32*>, <2 x i32*>* [[TMP4]], align 8
+; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr i32, <2 x i32*> [[TMP5]], <2 x i64> <i64 16, i64 16>
+; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr inbounds { i32*, i32* }, { i32*, i32* }* [[X]], i64 0, i32 1
+; CHECK-NEXT:    [[TMP8:%.*]] = bitcast i32** [[TMP2]] to <2 x i32*>*
+; CHECK-NEXT:    store <2 x i32*> [[TMP6]], <2 x i32*>* [[TMP8]], align 8
+; CHECK-NEXT:    ret void
+;
    %1 = getelementptr inbounds { i32*, i32* }, { i32*, i32* }* %y, i64 0, i32 0
    %2 = load i32*, i32** %1, align 8
    %3 = getelementptr inbounds i32, i32* %2, i64 16
@@ -26,9 +37,20 @@ define void @foo1 ({ i32*, i32* }* noalias %x, { i32*, i32* }* noalias %y) {
  
  ; Test that we don't vectorize GEP expressions if indexes are not constants.
  ; We can't produce an efficient code in that case.
-; CHECK-LABEL: foo2
-; CHECK-NOT: <2 x i32*>
  define void @foo2 ({ i32*, i32* }* noalias %x, { i32*, i32* }* noalias %y, i32 %i) {
+; CHECK-LABEL: @foo2(
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds { i32*, i32* }, { i32*, i32* }* [[Y:%.*]], i64 0, i32 0
+; CHECK-NEXT:    [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 8
+; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i32, i32* [[TMP2]], i32 [[I:%.*]]
+; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds { i32*, i32* }, { i32*, i32* }* [[X:%.*]], i64 0, i32 0
+; CHECK-NEXT:    store i32* [[TMP3]], i32** [[TMP4]], align 8
+; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds { i32*, i32* }, { i32*, i32* }* [[Y]], i64 0, i32 1
+; CHECK-NEXT:    [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 8
+; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i32, i32* [[TMP6]], i32 [[I]]
+; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr inbounds { i32*, i32* }, { i32*, i32* }* [[X]], i64 0, i32 1
+; CHECK-NEXT:    store i32* [[TMP7]], i32** [[TMP8]], align 8
+; CHECK-NEXT:    ret void
+;
    %1 = getelementptr inbounds { i32*, i32* }, { i32*, i32* }* %y, i64 0, i32 0
    %2 = load i32*, i32** %1, align 8
    %3 = getelementptr inbounds i32, i32* %2, i32 %i
diff --git a/test/Transforms/SLPVectorizer/X86/gep_mismatch.ll b/test/Transforms/SLPVectorizer/X86/gep_mismatch.ll

index 1cd28a909f79076f7f0d90df49a8b6e9184db45f..f9b9995066a22e26de4b62169bf24866e0f5855c 100644 (file)
--- a/test/Transforms/SLPVectorizer/X86/gep_mismatch.ll
+++ b/test/Transforms/SLPVectorizer/X86/gep_mismatch.ll
@@ -1,4 +1,5 @@
-; RUN: opt < %s -S -slp-vectorizer
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -S -slp-vectorizer | FileCheck %s
  
  ; This code has GEPs with different index types, which should not
  ; matter for the SLPVectorizer.
@@ -6,6 +7,19 @@
  target triple = "x86_64--linux"
  
  define void @foo() {
+; CHECK-LABEL: @foo(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[BB1:%.*]]
+; CHECK:       bb1:
+; CHECK-NEXT:    [[LS1_PH:%.*]] = phi float* [ [[_TMP1:%.*]], [[BB1]] ], [ undef, [[ENTRY:%.*]] ]
+; CHECK-NEXT:    [[LS2_PH:%.*]] = phi float* [ [[_TMP2:%.*]], [[BB1]] ], [ undef, [[ENTRY]] ]
+; CHECK-NEXT:    store float undef, float* [[LS1_PH]]
+; CHECK-NEXT:    [[_TMP1]] = getelementptr float, float* [[LS1_PH]], i32 1
+; CHECK-NEXT:    [[_TMP2]] = getelementptr float, float* [[LS2_PH]], i64 4
+; CHECK-NEXT:    br i1 false, label [[BB1]], label [[BB2:%.*]]
+; CHECK:       bb2:
+; CHECK-NEXT:    ret void
+;
  entry:
    br label %bb1
  
diff --git a/test/Transforms/SLPVectorizer/X86/implicitfloat.ll b/test/Transforms/SLPVectorizer/X86/implicitfloat.ll

index f7283f0d0271918bb00b4fed18280abd57445d57..2fbb8f0477f23ae434369218ba8d00810ddc0338 100644 (file)
--- a/test/Transforms/SLPVectorizer/X86/implicitfloat.ll
+++ b/test/Transforms/SLPVectorizer/X86/implicitfloat.ll
@@ -1,13 +1,26 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
  ; RUN: opt < %s -basicaa -slp-vectorizer -dce -S -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck %s
  
  target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
  target triple = "x86_64-apple-macosx10.8.0"
  
  ; Don't vectorize when noimplicitfloat is used.
-; CHECK: test1
-; CHECK-NOT: store <2 x double>
-; CHECK: ret
  define void @test1(double* %a, double* %b, double* %c) noimplicitfloat { ; <------ noimplicitfloat attribute here!
+; CHECK-LABEL: @test1(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[I0:%.*]] = load double, double* [[A:%.*]], align 8
+; CHECK-NEXT:    [[I1:%.*]] = load double, double* [[B:%.*]], align 8
+; CHECK-NEXT:    [[MUL:%.*]] = fmul double [[I0]], [[I1]]
+; CHECK-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds double, double* [[A]], i64 1
+; CHECK-NEXT:    [[I3:%.*]] = load double, double* [[ARRAYIDX3]], align 8
+; CHECK-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds double, double* [[B]], i64 1
+; CHECK-NEXT:    [[I4:%.*]] = load double, double* [[ARRAYIDX4]], align 8
+; CHECK-NEXT:    [[MUL5:%.*]] = fmul double [[I3]], [[I4]]
+; CHECK-NEXT:    store double [[MUL]], double* [[C:%.*]], align 8
+; CHECK-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds double, double* [[C]], i64 1
+; CHECK-NEXT:    store double [[MUL5]], double* [[ARRAYIDX5]], align 8
+; CHECK-NEXT:    ret void
+;
  entry:
    %i0 = load double, double* %a, align 8
    %i1 = load double, double* %b, align 8
diff --git a/test/Transforms/SLPVectorizer/X86/intrinsic.ll b/test/Transforms/SLPVectorizer/X86/intrinsic.ll

index cc5a4afe43d91499b4ab3935e8c52ec3ce7ea7f1..fbce7551b00b2c872736d12cbb244b4f79578df0 100644 (file)
--- a/test/Transforms/SLPVectorizer/X86/intrinsic.ll
+++ b/test/Transforms/SLPVectorizer/X86/intrinsic.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
  ; RUN: opt < %s -basicaa -slp-vectorizer -slp-threshold=-999 -dce -S -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck %s
  
  target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
@@ -5,13 +6,19 @@ target triple = "x86_64-apple-macosx10.8.0"
  
  declare double @llvm.fabs.f64(double) nounwind readnone
  
-;CHECK-LABEL: @vec_fabs_f64(
-;CHECK: load <2 x double>
-;CHECK: load <2 x double>
-;CHECK: call <2 x double> @llvm.fabs.v2f64
-;CHECK: store <2 x double>
-;CHECK: ret
  define void @vec_fabs_f64(double* %a, double* %b, double* %c) {
+; CHECK-LABEL: @vec_fabs_f64(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = bitcast double* [[A:%.*]] to <2 x double>*
+; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x double>, <2 x double>* [[TMP0]], align 8
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast double* [[B:%.*]] to <2 x double>*
+; CHECK-NEXT:    [[TMP3:%.*]] = load <2 x double>, <2 x double>* [[TMP2]], align 8
+; CHECK-NEXT:    [[TMP4:%.*]] = fmul <2 x double> [[TMP1]], [[TMP3]]
+; CHECK-NEXT:    [[TMP5:%.*]] = call <2 x double> @llvm.fabs.v2f64(<2 x double> [[TMP4]])
+; CHECK-NEXT:    [[TMP6:%.*]] = bitcast double* [[C:%.*]] to <2 x double>*
+; CHECK-NEXT:    store <2 x double> [[TMP5]], <2 x double>* [[TMP6]], align 8
+; CHECK-NEXT:    ret void
+;
  entry:
    %i0 = load double, double* %a, align 8
    %i1 = load double, double* %b, align 8
@@ -31,13 +38,18 @@ entry:
  
  declare float @llvm.copysign.f32(float, float) nounwind readnone
  
-;CHECK-LABEL: @vec_copysign_f32(
-;CHECK: load <4 x float>
-;CHECK: load <4 x float>
-;CHECK: call <4 x float> @llvm.copysign.v4f32
-;CHECK: store <4 x float>
-;CHECK: ret
  define void @vec_copysign_f32(float* %a, float* %b, float* noalias %c) {
+; CHECK-LABEL: @vec_copysign_f32(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = bitcast float* [[A:%.*]] to <4 x float>*
+; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x float>, <4 x float>* [[TMP0]], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast float* [[B:%.*]] to <4 x float>*
+; CHECK-NEXT:    [[TMP3:%.*]] = load <4 x float>, <4 x float>* [[TMP2]], align 4
+; CHECK-NEXT:    [[TMP4:%.*]] = call <4 x float> @llvm.copysign.v4f32(<4 x float> [[TMP1]], <4 x float> [[TMP3]])
+; CHECK-NEXT:    [[TMP5:%.*]] = bitcast float* [[C:%.*]] to <4 x float>*
+; CHECK-NEXT:    store <4 x float> [[TMP4]], <4 x float>* [[TMP5]], align 4
+; CHECK-NEXT:    ret void
+;
  entry:
    %0 = load float, float* %a, align 4
    %1 = load float, float* %b, align 4
@@ -74,6 +86,18 @@ entry:
  declare i32 @llvm.bswap.i32(i32) nounwind readnone
  
  define void @vec_bswap_i32(i32* %a, i32* %b, i32* %c) {
+; CHECK-LABEL: @vec_bswap_i32(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i32* [[A:%.*]] to <4 x i32>*
+; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i32* [[B:%.*]] to <4 x i32>*
+; CHECK-NEXT:    [[TMP3:%.*]] = load <4 x i32>, <4 x i32>* [[TMP2]], align 4
+; CHECK-NEXT:    [[TMP4:%.*]] = add <4 x i32> [[TMP1]], [[TMP3]]
+; CHECK-NEXT:    [[TMP5:%.*]] = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> [[TMP4]])
+; CHECK-NEXT:    [[TMP6:%.*]] = bitcast i32* [[C:%.*]] to <4 x i32>*
+; CHECK-NEXT:    store <4 x i32> [[TMP5]], <4 x i32>* [[TMP6]], align 4
+; CHECK-NEXT:    ret void
+;
  entry:
    %i0 = load i32, i32* %a, align 4
    %i1 = load i32, i32* %b, align 4
@@ -110,17 +134,23 @@ entry:
    store i32 %call4, i32* %arrayidx10, align 4
    ret void
  
-; CHECK-LABEL: @vec_bswap_i32(
-; CHECK: load <4 x i32>
-; CHECK: load <4 x i32>
-; CHECK: call <4 x i32> @llvm.bswap.v4i32
-; CHECK: store <4 x i32>
-; CHECK: ret
  }
  
  declare i32 @llvm.ctlz.i32(i32,i1) nounwind readnone
  
  define void @vec_ctlz_i32(i32* %a, i32* %b, i32* %c, i1) {
+; CHECK-LABEL: @vec_ctlz_i32(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[A:%.*]] to <4 x i32>*
+; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 4
+; CHECK-NEXT:    [[TMP3:%.*]] = bitcast i32* [[B:%.*]] to <4 x i32>*
+; CHECK-NEXT:    [[TMP4:%.*]] = load <4 x i32>, <4 x i32>* [[TMP3]], align 4
+; CHECK-NEXT:    [[TMP5:%.*]] = add <4 x i32> [[TMP2]], [[TMP4]]
+; CHECK-NEXT:    [[TMP6:%.*]] = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> [[TMP5]], i1 true)
+; CHECK-NEXT:    [[TMP7:%.*]] = bitcast i32* [[C:%.*]] to <4 x i32>*
+; CHECK-NEXT:    store <4 x i32> [[TMP6]], <4 x i32>* [[TMP7]], align 4
+; CHECK-NEXT:    ret void
+;
  entry:
    %i0 = load i32, i32* %a, align 4
    %i1 = load i32, i32* %b, align 4
@@ -157,15 +187,42 @@ entry:
    store i32 %call4, i32* %arrayidx10, align 4
    ret void
  
-; CHECK-LABEL: @vec_ctlz_i32(
-; CHECK: load <4 x i32>
-; CHECK: load <4 x i32>
-; CHECK: call <4 x i32> @llvm.ctlz.v4i32
-; CHECK: store <4 x i32>
-; CHECK: ret
  }
  
  define void @vec_ctlz_i32_neg(i32* %a, i32* %b, i32* %c, i1) {
+; CHECK-LABEL: @vec_ctlz_i32_neg(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[I0:%.*]] = load i32, i32* [[A:%.*]], align 4
+; CHECK-NEXT:    [[I1:%.*]] = load i32, i32* [[B:%.*]], align 4
+; CHECK-NEXT:    [[ADD1:%.*]] = add i32 [[I0]], [[I1]]
+; CHECK-NEXT:    [[CALL1:%.*]] = tail call i32 @llvm.ctlz.i32(i32 [[ADD1]], i1 true) #3
+; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 1
+; CHECK-NEXT:    [[I2:%.*]] = load i32, i32* [[ARRAYIDX2]], align 4
+; CHECK-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds i32, i32* [[B]], i32 1
+; CHECK-NEXT:    [[I3:%.*]] = load i32, i32* [[ARRAYIDX3]], align 4
+; CHECK-NEXT:    [[ADD2:%.*]] = add i32 [[I2]], [[I3]]
+; CHECK-NEXT:    [[CALL2:%.*]] = tail call i32 @llvm.ctlz.i32(i32 [[ADD2]], i1 false) #3
+; CHECK-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 2
+; CHECK-NEXT:    [[I4:%.*]] = load i32, i32* [[ARRAYIDX4]], align 4
+; CHECK-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, i32* [[B]], i32 2
+; CHECK-NEXT:    [[I5:%.*]] = load i32, i32* [[ARRAYIDX5]], align 4
+; CHECK-NEXT:    [[ADD3:%.*]] = add i32 [[I4]], [[I5]]
+; CHECK-NEXT:    [[CALL3:%.*]] = tail call i32 @llvm.ctlz.i32(i32 [[ADD3]], i1 true) #3
+; CHECK-NEXT:    [[ARRAYIDX6:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 3
+; CHECK-NEXT:    [[I6:%.*]] = load i32, i32* [[ARRAYIDX6]], align 4
+; CHECK-NEXT:    [[ARRAYIDX7:%.*]] = getelementptr inbounds i32, i32* [[B]], i32 3
+; CHECK-NEXT:    [[I7:%.*]] = load i32, i32* [[ARRAYIDX7]], align 4
+; CHECK-NEXT:    [[ADD4:%.*]] = add i32 [[I6]], [[I7]]
+; CHECK-NEXT:    [[CALL4:%.*]] = tail call i32 @llvm.ctlz.i32(i32 [[ADD4]], i1 false) #3
+; CHECK-NEXT:    store i32 [[CALL1]], i32* [[C:%.*]], align 4
+; CHECK-NEXT:    [[ARRAYIDX8:%.*]] = getelementptr inbounds i32, i32* [[C]], i32 1
+; CHECK-NEXT:    store i32 [[CALL2]], i32* [[ARRAYIDX8]], align 4
+; CHECK-NEXT:    [[ARRAYIDX9:%.*]] = getelementptr inbounds i32, i32* [[C]], i32 2
+; CHECK-NEXT:    store i32 [[CALL3]], i32* [[ARRAYIDX9]], align 4
+; CHECK-NEXT:    [[ARRAYIDX10:%.*]] = getelementptr inbounds i32, i32* [[C]], i32 3
+; CHECK-NEXT:    store i32 [[CALL4]], i32* [[ARRAYIDX10]], align 4
+; CHECK-NEXT:    ret void
+;
  entry:
    %i0 = load i32, i32* %a, align 4
    %i1 = load i32, i32* %b, align 4
@@ -202,8 +259,6 @@ entry:
    store i32 %call4, i32* %arrayidx10, align 4
    ret void
  
-; CHECK-LABEL: @vec_ctlz_i32_neg(
-; CHECK-NOT: call <4 x i32> @llvm.ctlz.v4i32
  
  }
  
@@ -211,6 +266,18 @@ entry:
  declare i32 @llvm.cttz.i32(i32,i1) nounwind readnone
  
  define void @vec_cttz_i32(i32* %a, i32* %b, i32* %c, i1) {
+; CHECK-LABEL: @vec_cttz_i32(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[A:%.*]] to <4 x i32>*
+; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 4
+; CHECK-NEXT:    [[TMP3:%.*]] = bitcast i32* [[B:%.*]] to <4 x i32>*
+; CHECK-NEXT:    [[TMP4:%.*]] = load <4 x i32>, <4 x i32>* [[TMP3]], align 4
+; CHECK-NEXT:    [[TMP5:%.*]] = add <4 x i32> [[TMP2]], [[TMP4]]
+; CHECK-NEXT:    [[TMP6:%.*]] = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> [[TMP5]], i1 true)
+; CHECK-NEXT:    [[TMP7:%.*]] = bitcast i32* [[C:%.*]] to <4 x i32>*
+; CHECK-NEXT:    store <4 x i32> [[TMP6]], <4 x i32>* [[TMP7]], align 4
+; CHECK-NEXT:    ret void
+;
  entry:
    %i0 = load i32, i32* %a, align 4
    %i1 = load i32, i32* %b, align 4
@@ -247,15 +314,42 @@ entry:
    store i32 %call4, i32* %arrayidx10, align 4
    ret void
  
-; CHECK-LABEL: @vec_cttz_i32(
-; CHECK: load <4 x i32>
-; CHECK: load <4 x i32>
-; CHECK: call <4 x i32> @llvm.cttz.v4i32
-; CHECK: store <4 x i32>
-; CHECK: ret
  }
  
  define void @vec_cttz_i32_neg(i32* %a, i32* %b, i32* %c, i1) {
+; CHECK-LABEL: @vec_cttz_i32_neg(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[I0:%.*]] = load i32, i32* [[A:%.*]], align 4
+; CHECK-NEXT:    [[I1:%.*]] = load i32, i32* [[B:%.*]], align 4
+; CHECK-NEXT:    [[ADD1:%.*]] = add i32 [[I0]], [[I1]]
+; CHECK-NEXT:    [[CALL1:%.*]] = tail call i32 @llvm.cttz.i32(i32 [[ADD1]], i1 true) #3
+; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 1
+; CHECK-NEXT:    [[I2:%.*]] = load i32, i32* [[ARRAYIDX2]], align 4
+; CHECK-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds i32, i32* [[B]], i32 1
+; CHECK-NEXT:    [[I3:%.*]] = load i32, i32* [[ARRAYIDX3]], align 4
+; CHECK-NEXT:    [[ADD2:%.*]] = add i32 [[I2]], [[I3]]
+; CHECK-NEXT:    [[CALL2:%.*]] = tail call i32 @llvm.cttz.i32(i32 [[ADD2]], i1 false) #3
+; CHECK-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 2
+; CHECK-NEXT:    [[I4:%.*]] = load i32, i32* [[ARRAYIDX4]], align 4
+; CHECK-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, i32* [[B]], i32 2
+; CHECK-NEXT:    [[I5:%.*]] = load i32, i32* [[ARRAYIDX5]], align 4
+; CHECK-NEXT:    [[ADD3:%.*]] = add i32 [[I4]], [[I5]]
+; CHECK-NEXT:    [[CALL3:%.*]] = tail call i32 @llvm.cttz.i32(i32 [[ADD3]], i1 true) #3
+; CHECK-NEXT:    [[ARRAYIDX6:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 3
+; CHECK-NEXT:    [[I6:%.*]] = load i32, i32* [[ARRAYIDX6]], align 4
+; CHECK-NEXT:    [[ARRAYIDX7:%.*]] = getelementptr inbounds i32, i32* [[B]], i32 3
+; CHECK-NEXT:    [[I7:%.*]] = load i32, i32* [[ARRAYIDX7]], align 4
+; CHECK-NEXT:    [[ADD4:%.*]] = add i32 [[I6]], [[I7]]
+; CHECK-NEXT:    [[CALL4:%.*]] = tail call i32 @llvm.cttz.i32(i32 [[ADD4]], i1 false) #3
+; CHECK-NEXT:    store i32 [[CALL1]], i32* [[C:%.*]], align 4
+; CHECK-NEXT:    [[ARRAYIDX8:%.*]] = getelementptr inbounds i32, i32* [[C]], i32 1
+; CHECK-NEXT:    store i32 [[CALL2]], i32* [[ARRAYIDX8]], align 4
+; CHECK-NEXT:    [[ARRAYIDX9:%.*]] = getelementptr inbounds i32, i32* [[C]], i32 2
+; CHECK-NEXT:    store i32 [[CALL3]], i32* [[ARRAYIDX9]], align 4
+; CHECK-NEXT:    [[ARRAYIDX10:%.*]] = getelementptr inbounds i32, i32* [[C]], i32 3
+; CHECK-NEXT:    store i32 [[CALL4]], i32* [[ARRAYIDX10]], align 4
+; CHECK-NEXT:    ret void
+;
  entry:
    %i0 = load i32, i32* %a, align 4
    %i1 = load i32, i32* %b, align 4
@@ -292,13 +386,23 @@ entry:
    store i32 %call4, i32* %arrayidx10, align 4
    ret void
  
-; CHECK-LABEL: @vec_cttz_i32_neg(
-; CHECK-NOT: call <4 x i32> @llvm.cttz.v4i32
  }
  
  
  declare float @llvm.powi.f32(float, i32)
  define void @vec_powi_f32(float* %a, float* %b, float* %c, i32 %P) {
+; CHECK-LABEL: @vec_powi_f32(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = bitcast float* [[A:%.*]] to <4 x float>*
+; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x float>, <4 x float>* [[TMP0]], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast float* [[B:%.*]] to <4 x float>*
+; CHECK-NEXT:    [[TMP3:%.*]] = load <4 x float>, <4 x float>* [[TMP2]], align 4
+; CHECK-NEXT:    [[TMP4:%.*]] = fadd <4 x float> [[TMP1]], [[TMP3]]
+; CHECK-NEXT:    [[TMP5:%.*]] = call <4 x float> @llvm.powi.v4f32(<4 x float> [[TMP4]], i32 [[P:%.*]])
+; CHECK-NEXT:    [[TMP6:%.*]] = bitcast float* [[C:%.*]] to <4 x float>*
+; CHECK-NEXT:    store <4 x float> [[TMP5]], <4 x float>* [[TMP6]], align 4
+; CHECK-NEXT:    ret void
+;
  entry:
    %i0 = load float, float* %a, align 4
    %i1 = load float, float* %b, align 4
@@ -335,16 +439,43 @@ entry:
    store float %call4, float* %arrayidx10, align 4
    ret void
  
-; CHECK-LABEL: @vec_powi_f32(
-; CHECK: load <4 x float>
-; CHECK: load <4 x float>
-; CHECK: call <4 x float> @llvm.powi.v4f32
-; CHECK: store <4 x float>
-; CHECK: ret
  }
  
  
  define void @vec_powi_f32_neg(float* %a, float* %b, float* %c, i32 %P, i32 %Q) {
+; CHECK-LABEL: @vec_powi_f32_neg(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[I0:%.*]] = load float, float* [[A:%.*]], align 4
+; CHECK-NEXT:    [[I1:%.*]] = load float, float* [[B:%.*]], align 4
+; CHECK-NEXT:    [[ADD1:%.*]] = fadd float [[I0]], [[I1]]
+; CHECK-NEXT:    [[CALL1:%.*]] = tail call float @llvm.powi.f32(float [[ADD1]], i32 [[P:%.*]]) #3
+; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds float, float* [[A]], i32 1
+; CHECK-NEXT:    [[I2:%.*]] = load float, float* [[ARRAYIDX2]], align 4
+; CHECK-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds float, float* [[B]], i32 1
+; CHECK-NEXT:    [[I3:%.*]] = load float, float* [[ARRAYIDX3]], align 4
+; CHECK-NEXT:    [[ADD2:%.*]] = fadd float [[I2]], [[I3]]
+; CHECK-NEXT:    [[CALL2:%.*]] = tail call float @llvm.powi.f32(float [[ADD2]], i32 [[Q:%.*]]) #3
+; CHECK-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds float, float* [[A]], i32 2
+; CHECK-NEXT:    [[I4:%.*]] = load float, float* [[ARRAYIDX4]], align 4
+; CHECK-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds float, float* [[B]], i32 2
+; CHECK-NEXT:    [[I5:%.*]] = load float, float* [[ARRAYIDX5]], align 4
+; CHECK-NEXT:    [[ADD3:%.*]] = fadd float [[I4]], [[I5]]
+; CHECK-NEXT:    [[CALL3:%.*]] = tail call float @llvm.powi.f32(float [[ADD3]], i32 [[P]]) #3
+; CHECK-NEXT:    [[ARRAYIDX6:%.*]] = getelementptr inbounds float, float* [[A]], i32 3
+; CHECK-NEXT:    [[I6:%.*]] = load float, float* [[ARRAYIDX6]], align 4
+; CHECK-NEXT:    [[ARRAYIDX7:%.*]] = getelementptr inbounds float, float* [[B]], i32 3
+; CHECK-NEXT:    [[I7:%.*]] = load float, float* [[ARRAYIDX7]], align 4
+; CHECK-NEXT:    [[ADD4:%.*]] = fadd float [[I6]], [[I7]]
+; CHECK-NEXT:    [[CALL4:%.*]] = tail call float @llvm.powi.f32(float [[ADD4]], i32 [[Q]]) #3
+; CHECK-NEXT:    store float [[CALL1]], float* [[C:%.*]], align 4
+; CHECK-NEXT:    [[ARRAYIDX8:%.*]] = getelementptr inbounds float, float* [[C]], i32 1
+; CHECK-NEXT:    store float [[CALL2]], float* [[ARRAYIDX8]], align 4
+; CHECK-NEXT:    [[ARRAYIDX9:%.*]] = getelementptr inbounds float, float* [[C]], i32 2
+; CHECK-NEXT:    store float [[CALL3]], float* [[ARRAYIDX9]], align 4
+; CHECK-NEXT:    [[ARRAYIDX10:%.*]] = getelementptr inbounds float, float* [[C]], i32 3
+; CHECK-NEXT:    store float [[CALL4]], float* [[ARRAYIDX10]], align 4
+; CHECK-NEXT:    ret void
+;
  entry:
    %i0 = load float, float* %a, align 4
    %i1 = load float, float* %b, align 4
@@ -381,6 +512,4 @@ entry:
    store float %call4, float* %arrayidx10, align 4
    ret void
  
-; CHECK-LABEL: @vec_powi_f32_neg(
-; CHECK-NOT: call <4 x float> @llvm.powi.v4f32
  }
diff --git a/test/Transforms/SLPVectorizer/X86/long_chains.ll b/test/Transforms/SLPVectorizer/X86/long_chains.ll

index f87dabf4c9feb1e469e1d31a182412da5f767887..99b340addb925182cfcf53ef964ef3a1f3aece8b 100644 (file)
--- a/test/Transforms/SLPVectorizer/X86/long_chains.ll
+++ b/test/Transforms/SLPVectorizer/X86/long_chains.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
  ; RUN: opt < %s -basicaa -slp-vectorizer -dce -S -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck %s
  
  target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
@@ -5,13 +6,31 @@ target triple = "x86_64-apple-macosx10.8.0"
  
  ; At this point we can't vectorize only parts of the tree.
  
-; CHECK: test
-; CHECK: insertelement <2 x i8>
-; CHECK: insertelement <2 x i8>
-; CHECK: sitofp <2 x i8>
-; CHECK: fmul <2 x double>
-; CHECK: ret
  define i32 @test(double* nocapture %A, i8* nocapture %B) {
+; CHECK-LABEL: @test(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i8* [[B:%.*]] to <2 x i8>*
+; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i8>, <2 x i8>* [[TMP0]], align 1
+; CHECK-NEXT:    [[TMP2:%.*]] = add <2 x i8> <i8 3, i8 3>, [[TMP1]]
+; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <2 x i8> [[TMP2]], i32 0
+; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <2 x i8> undef, i8 [[TMP3]], i32 0
+; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <2 x i8> [[TMP2]], i32 1
+; CHECK-NEXT:    [[TMP6:%.*]] = insertelement <2 x i8> [[TMP4]], i8 [[TMP5]], i32 1
+; CHECK-NEXT:    [[TMP7:%.*]] = sitofp <2 x i8> [[TMP6]] to <2 x double>
+; CHECK-NEXT:    [[TMP8:%.*]] = fmul <2 x double> [[TMP7]], [[TMP7]]
+; CHECK-NEXT:    [[TMP9:%.*]] = fadd <2 x double> <double 1.000000e+00, double 1.000000e+00>, [[TMP8]]
+; CHECK-NEXT:    [[TMP10:%.*]] = fmul <2 x double> [[TMP9]], [[TMP9]]
+; CHECK-NEXT:    [[TMP11:%.*]] = fadd <2 x double> <double 1.000000e+00, double 1.000000e+00>, [[TMP10]]
+; CHECK-NEXT:    [[TMP12:%.*]] = fmul <2 x double> [[TMP11]], [[TMP11]]
+; CHECK-NEXT:    [[TMP13:%.*]] = fadd <2 x double> <double 1.000000e+00, double 1.000000e+00>, [[TMP12]]
+; CHECK-NEXT:    [[TMP14:%.*]] = fmul <2 x double> [[TMP13]], [[TMP13]]
+; CHECK-NEXT:    [[TMP15:%.*]] = fadd <2 x double> <double 1.000000e+00, double 1.000000e+00>, [[TMP14]]
+; CHECK-NEXT:    [[TMP16:%.*]] = fmul <2 x double> [[TMP15]], [[TMP15]]
+; CHECK-NEXT:    [[TMP17:%.*]] = fadd <2 x double> <double 1.000000e+00, double 1.000000e+00>, [[TMP16]]
+; CHECK-NEXT:    [[TMP18:%.*]] = bitcast double* [[A:%.*]] to <2 x double>*
+; CHECK-NEXT:    store <2 x double> [[TMP17]], <2 x double>* [[TMP18]], align 8
+; CHECK-NEXT:    ret i32 undef
+;
  entry:
    %0 = load i8, i8* %B, align 1
    %arrayidx1 = getelementptr inbounds i8, i8* %B, i64 1
@@ -19,7 +38,7 @@ entry:
    %add = add i8 %0, 3
    %add4 = add i8 %1, 3
    %conv6 = sitofp i8 %add to double
-  %conv7 = sitofp i8 %add4 to double 
+  %conv7 = sitofp i8 %add4 to double
    %mul = fmul double %conv6, %conv6
    %add8 = fadd double %mul, 1.000000e+00
    %mul9 = fmul double %conv7, %conv7
diff --git a/test/Transforms/SLPVectorizer/X86/loopinvariant.ll b/test/Transforms/SLPVectorizer/X86/loopinvariant.ll

index dace4b35b8711d6248aec8934b5c8fba50a14503..1b19aeae03776ec8385f71f43ba6ffb6b1ec6caa 100644 (file)
--- a/test/Transforms/SLPVectorizer/X86/loopinvariant.ll
+++ b/test/Transforms/SLPVectorizer/X86/loopinvariant.ll
@@ -1,14 +1,51 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
  ; RUN: opt < %s -basicaa -slp-vectorizer -S -mcpu=corei7-avx | FileCheck %s
  
  target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
  target triple = "x86_64-apple-macosx10.8.0"
  
-;CHECK-LABEL: @foo(
-;CHECK: load <8 x i32>
-;CHECK: add nsw <8 x i32>
-;CHECK: store <8 x i32>
-;CHECK: ret
  define i32 @foo(i32* nocapture %A, i32 %n) {
+; CHECK-LABEL: @foo(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CMP62:%.*]] = icmp sgt i32 [[N:%.*]], 0
+; CHECK-NEXT:    br i1 [[CMP62]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]]
+; CHECK:       for.body:
+; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ]
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDVARS_IV]]
+; CHECK-NEXT:    [[TMP0:%.*]] = or i64 [[INDVARS_IV]], 1
+; CHECK-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP0]]
+; CHECK-NEXT:    [[TMP1:%.*]] = or i64 [[INDVARS_IV]], 2
+; CHECK-NEXT:    [[ARRAYIDX8:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP1]]
+; CHECK-NEXT:    [[TMP2:%.*]] = or i64 [[INDVARS_IV]], 3
+; CHECK-NEXT:    [[ARRAYIDX12:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP2]]
+; CHECK-NEXT:    [[TMP3:%.*]] = or i64 [[INDVARS_IV]], 4
+; CHECK-NEXT:    [[ARRAYIDX16:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP3]]
+; CHECK-NEXT:    [[TMP4:%.*]] = or i64 [[INDVARS_IV]], 5
+; CHECK-NEXT:    [[ARRAYIDX20:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP4]]
+; CHECK-NEXT:    [[TMP5:%.*]] = or i64 [[INDVARS_IV]], 6
+; CHECK-NEXT:    [[ARRAYIDX24:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP5]]
+; CHECK-NEXT:    [[TMP6:%.*]] = or i64 [[INDVARS_IV]], 7
+; CHECK-NEXT:    [[ARRAYIDX28:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP6]]
+; CHECK-NEXT:    [[TMP7:%.*]] = bitcast i32* [[ARRAYIDX]] to <8 x i32>*
+; CHECK-NEXT:    [[TMP8:%.*]] = load <8 x i32>, <8 x i32>* [[TMP7]], align 4
+; CHECK-NEXT:    [[TMP9:%.*]] = insertelement <8 x i32> undef, i32 [[N]], i32 0
+; CHECK-NEXT:    [[TMP10:%.*]] = insertelement <8 x i32> [[TMP9]], i32 [[N]], i32 1
+; CHECK-NEXT:    [[TMP11:%.*]] = insertelement <8 x i32> [[TMP10]], i32 [[N]], i32 2
+; CHECK-NEXT:    [[TMP12:%.*]] = insertelement <8 x i32> [[TMP11]], i32 [[N]], i32 3
+; CHECK-NEXT:    [[TMP13:%.*]] = insertelement <8 x i32> [[TMP12]], i32 [[N]], i32 4
+; CHECK-NEXT:    [[TMP14:%.*]] = insertelement <8 x i32> [[TMP13]], i32 [[N]], i32 5
+; CHECK-NEXT:    [[TMP15:%.*]] = insertelement <8 x i32> [[TMP14]], i32 [[N]], i32 6
+; CHECK-NEXT:    [[TMP16:%.*]] = insertelement <8 x i32> [[TMP15]], i32 [[N]], i32 7
+; CHECK-NEXT:    [[TMP17:%.*]] = add nsw <8 x i32> [[TMP16]], [[TMP8]]
+; CHECK-NEXT:    [[TMP18:%.*]] = bitcast i32* [[ARRAYIDX]] to <8 x i32>*
+; CHECK-NEXT:    store <8 x i32> [[TMP17]], <8 x i32>* [[TMP18]], align 4
+; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add i64 [[INDVARS_IV]], 8
+; CHECK-NEXT:    [[TMP19:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[TMP19]], [[N]]
+; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END]]
+; CHECK:       for.end:
+; CHECK-NEXT:    ret i32 undef
+;
  entry:
    %cmp62 = icmp sgt i32 %n, 0
    br i1 %cmp62, label %for.body, label %for.end
diff --git a/test/Transforms/SLPVectorizer/X86/metadata.ll b/test/Transforms/SLPVectorizer/X86/metadata.ll

index ebef6b53c6a4c70e2e108bf5429061058fcc9988..fdfd032f3ce421a587f3124b73f6d3f5a9fc2712 100644 (file)
--- a/test/Transforms/SLPVectorizer/X86/metadata.ll
+++ b/test/Transforms/SLPVectorizer/X86/metadata.ll
@@ -1,16 +1,21 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
  ; RUN: opt < %s -basicaa -slp-vectorizer -dce -S -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck %s
  
  target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
  target triple = "x86_64-apple-macosx10.8.0"
  
-;CHECK-LABEL: test1
-;CHECK: load <2 x double>{{.*}}!tbaa ![[TBAA:[0-9]+]]
-;CHECK: load <2 x double>{{.*}}!tbaa ![[TBAA]]
-;CHECK: fmul <2 x double>{{.*}}!fpmath ![[FP1:[0-9]+]]
-;CHECK: store <2 x double>{{.*}}!tbaa ![[TBAA]]
-;CHECK: ret void
-
  define void @test1(double* %a, double* %b, double* %c) {
+; CHECK-LABEL: @test1(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = bitcast double* [[A:%.*]] to <2 x double>*
+; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x double>, <2 x double>* [[TMP0]], align 8, !tbaa !0
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast double* [[B:%.*]] to <2 x double>*
+; CHECK-NEXT:    [[TMP3:%.*]] = load <2 x double>, <2 x double>* [[TMP2]], align 8, !tbaa !0
+; CHECK-NEXT:    [[TMP4:%.*]] = fmul <2 x double> [[TMP1]], [[TMP3]], !fpmath !4
+; CHECK-NEXT:    [[TMP5:%.*]] = bitcast double* [[C:%.*]] to <2 x double>*
+; CHECK-NEXT:    store <2 x double> [[TMP4]], <2 x double>* [[TMP5]], align 8, !tbaa !0
+; CHECK-NEXT:    ret void
+;
  entry:
    %i0 = load double, double* %a, align 8, !tbaa !4
    %i1 = load double, double* %b, align 8, !tbaa !4
@@ -26,14 +31,19 @@ entry:
    ret void
  }
  
-;CHECK-LABEL: test2
-;CHECK: load <2 x double>{{.*}}!tbaa ![[TBAA]]
-;CHECK: load <2 x double>{{.*}}!tbaa ![[TBAA]]
-;CHECK: fmul <2 x double>{{.*}}!fpmath ![[FP2:[0-9]+]]
-;CHECK: store <2 x double>{{.*}}!tbaa ![[TBAA]]
-;CHECK: ret void
-
  define void @test2(double* %a, double* %b, i8* %e) {
+; CHECK-LABEL: @test2(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = bitcast double* [[A:%.*]] to <2 x double>*
+; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x double>, <2 x double>* [[TMP0]], align 8, !tbaa !0
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast double* [[B:%.*]] to <2 x double>*
+; CHECK-NEXT:    [[TMP3:%.*]] = load <2 x double>, <2 x double>* [[TMP2]], align 8, !tbaa !0
+; CHECK-NEXT:    [[TMP4:%.*]] = fmul <2 x double> [[TMP1]], [[TMP3]], !fpmath !5
+; CHECK-NEXT:    [[C:%.*]] = bitcast i8* [[E:%.*]] to double*
+; CHECK-NEXT:    [[TMP5:%.*]] = bitcast double* [[C]] to <2 x double>*
+; CHECK-NEXT:    store <2 x double> [[TMP4]], <2 x double>* [[TMP5]], align 8, !tbaa !0
+; CHECK-NEXT:    ret void
+;
  entry:
    %i0 = load double, double* %a, align 8, !tbaa !4
    %i1 = load double, double* %b, align 8, !tbaa !4
@@ -51,9 +61,9 @@ entry:
    ret void
  }
  
-;CHECK-DAG: ![[TBAA]] = !{[[TYPEC:!.*]], [[TYPEC]], i64 0}
-;CHECK-DAG: ![[FP1]] = !{float 5.000000e+00}
-;CHECK-DAG: ![[FP2]] = !{float 2.500000e+00}
+;CHECK-DAG: !0 = !{[[TYPEC:!.*]], [[TYPEC]], i64 0}
+;CHECK-DAG: !4 = !{float 5.000000e+00}
+;CHECK-DAG: !5 = !{float 2.500000e+00}
  !0 = !{ float 5.0 }
  !1 = !{ float 2.5 }
  !2 = !{!"Simple C/C++ TBAA"}
diff --git a/test/Transforms/SLPVectorizer/X86/multi_block.ll b/test/Transforms/SLPVectorizer/X86/multi_block.ll

index b381d06688bdeae48b5bfd4a43907c62ee1a05d8..d0216103d42ae46dd648a43821aa58121762a41d 100644 (file)
--- a/test/Transforms/SLPVectorizer/X86/multi_block.ll
+++ b/test/Transforms/SLPVectorizer/X86/multi_block.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
  ; RUN: opt < %s -basicaa -slp-vectorizer -dce -S -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck %s
  
  target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
@@ -16,15 +17,23 @@ target triple = "x86_64-apple-macosx10.7.0"
  ; }
  
  
-;CHECK-LABEL: @bar(
-;CHECK: load <2 x double>
-;CHECK: fptrunc <2 x double>
-;CHECK: call i32
-;CHECK: fadd <2 x float>
-;CHECK: fpext <2 x float>
-;CHECK: store <2 x double>
-;CHECK: ret
  define i32 @bar(double* nocapture %A, i32 %d) {
+; CHECK-LABEL: @bar(
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast double* [[A:%.*]] to <2 x double>*
+; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x double>, <2 x double>* [[TMP1]], align 8
+; CHECK-NEXT:    [[TMP3:%.*]] = fptrunc <2 x double> [[TMP2]] to <2 x float>
+; CHECK-NEXT:    [[TMP4:%.*]] = icmp eq i32 [[D:%.*]], 0
+; CHECK-NEXT:    br i1 [[TMP4]], label [[TMP7:%.*]], label [[TMP5:%.*]]
+; CHECK:         [[TMP6:%.*]] = tail call i32 (...) @foo()
+; CHECK-NEXT:    br label [[TMP7]]
+; CHECK:         [[TMP8:%.*]] = fadd <2 x float> <float 4.000000e+00, float 5.000000e+00>, [[TMP3]]
+; CHECK-NEXT:    [[TMP9:%.*]] = getelementptr inbounds double, double* [[A]], i64 8
+; CHECK-NEXT:    [[TMP10:%.*]] = fpext <2 x float> [[TMP8]] to <2 x double>
+; CHECK-NEXT:    [[TMP11:%.*]] = fadd <2 x double> <double 9.000000e+00, double 5.000000e+00>, [[TMP10]]
+; CHECK-NEXT:    [[TMP12:%.*]] = bitcast double* [[TMP9]] to <2 x double>*
+; CHECK-NEXT:    store <2 x double> [[TMP11]], <2 x double>* [[TMP12]], align 8
+; CHECK-NEXT:    ret i32 undef
+;
    %1 = load double, double* %A, align 8
    %2 = getelementptr inbounds double, double* %A, i64 1
    %3 = load double, double* %2, align 8
diff --git a/test/Transforms/SLPVectorizer/X86/multi_user.ll b/test/Transforms/SLPVectorizer/X86/multi_user.ll

index 3197f6db266f24dd74996c5de3fb03ff6fb1dfe4..ce8594ea84d77c9ff8134526424860d45b015b77 100644 (file)
--- a/test/Transforms/SLPVectorizer/X86/multi_user.ll
+++ b/test/Transforms/SLPVectorizer/X86/multi_user.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
  ; RUN: opt < %s -basicaa -slp-vectorizer -dce -S -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck %s
  
  target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
@@ -11,13 +12,26 @@ target triple = "x86_64-apple-macosx10.7.0"
  ;  A[4] += n * 5 + 11;
  ;}
  
-;CHECK-LABEL: @foo(
-;CHECK: insertelement <4 x i32>
-;CHECK: load <4 x i32>
-;CHECK: add nsw <4 x i32>
-;CHECK: store <4 x i32>
-;CHECK: ret
  define i32 @foo(i32* nocapture %A, i32 %n) {
+; CHECK-LABEL: @foo(
+; CHECK-NEXT:    [[TMP1:%.*]] = mul nsw i32 [[N:%.*]], 5
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x i32> undef, i32 [[TMP1]], i32 0
+; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <4 x i32> [[TMP2]], i32 [[TMP1]], i32 1
+; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <4 x i32> [[TMP3]], i32 [[TMP1]], i32 2
+; CHECK-NEXT:    [[TMP5:%.*]] = insertelement <4 x i32> [[TMP4]], i32 [[TMP1]], i32 3
+; CHECK-NEXT:    [[TMP6:%.*]] = add nsw <4 x i32> <i32 7, i32 8, i32 9, i32 10>, [[TMP5]]
+; CHECK-NEXT:    [[TMP7:%.*]] = bitcast i32* [[A:%.*]] to <4 x i32>*
+; CHECK-NEXT:    [[TMP8:%.*]] = load <4 x i32>, <4 x i32>* [[TMP7]], align 4
+; CHECK-NEXT:    [[TMP9:%.*]] = add nsw <4 x i32> [[TMP6]], [[TMP8]]
+; CHECK-NEXT:    [[TMP10:%.*]] = bitcast i32* [[A]] to <4 x i32>*
+; CHECK-NEXT:    store <4 x i32> [[TMP9]], <4 x i32>* [[TMP10]], align 4
+; CHECK-NEXT:    [[TMP11:%.*]] = add nsw i32 [[TMP1]], 11
+; CHECK-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 4
+; CHECK-NEXT:    [[TMP13:%.*]] = load i32, i32* [[TMP12]], align 4
+; CHECK-NEXT:    [[TMP14:%.*]] = add nsw i32 [[TMP11]], [[TMP13]]
+; CHECK-NEXT:    store i32 [[TMP14]], i32* [[TMP12]], align 4
+; CHECK-NEXT:    ret i32 undef
+;
    %1 = mul nsw i32 %n, 5
    %2 = add nsw i32 %1, 7
    %3 = load i32, i32* %A, align 4
diff --git a/test/Transforms/SLPVectorizer/X86/non-vectorizable-intrinsic.ll b/test/Transforms/SLPVectorizer/X86/non-vectorizable-intrinsic.ll

index b250735874c5881a645b0d7ed11aec2f98eed2c8..3eae2d0b010896182b578f6814bfe3a7e776a20b 100644 (file)
--- a/test/Transforms/SLPVectorizer/X86/non-vectorizable-intrinsic.ll
+++ b/test/Transforms/SLPVectorizer/X86/non-vectorizable-intrinsic.ll
@@ -1,4 +1,5 @@
-; RUN: opt < %s -slp-vectorizer -o - -S -slp-threshold=-1000
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -slp-vectorizer -o - -S -slp-threshold=-1000 | FileCheck %s
  
  target datalayout = "e-p:32:32-i64:64-v16:16-v32:32-n16:32:64"
  target triple = "nvptx--nvidiacl"
@@ -9,6 +10,16 @@ target triple = "nvptx--nvidiacl"
  ; Test causes an assert if LLVM tries to vectorize CTLZ.
  
  define <2 x i8> @cltz_test(<2 x i8> %x) #0 {
+; CHECK-LABEL: @cltz_test(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = extractelement <2 x i8> [[X:%.*]], i32 0
+; CHECK-NEXT:    [[CALL_I:%.*]] = call i8 @llvm.ctlz.i8(i8 [[TMP0]], i1 false)
+; CHECK-NEXT:    [[VECINIT:%.*]] = insertelement <2 x i8> undef, i8 [[CALL_I]], i32 0
+; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <2 x i8> [[X]], i32 1
+; CHECK-NEXT:    [[CALL_I4:%.*]] = call i8 @llvm.ctlz.i8(i8 [[TMP1]], i1 false)
+; CHECK-NEXT:    [[VECINIT2:%.*]] = insertelement <2 x i8> [[VECINIT]], i8 [[CALL_I4]], i32 1
+; CHECK-NEXT:    ret <2 x i8> [[VECINIT2]]
+;
  entry:
    %0 = extractelement <2 x i8> %x, i32 0
    %call.i = call i8 @llvm.ctlz.i8(i8 %0, i1 false)
@@ -20,6 +31,16 @@ entry:
  }
  
  define <2 x i8> @cltz_test2(<2 x i8> %x) #1 {
+; CHECK-LABEL: @cltz_test2(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = extractelement <2 x i8> [[X:%.*]], i32 0
+; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <2 x i8> [[X]], i32 1
+; CHECK-NEXT:    [[CALL_I:%.*]] = call i8 @llvm.ctlz.i8(i8 [[TMP0]], i1 false)
+; CHECK-NEXT:    [[CALL_I4:%.*]] = call i8 @llvm.ctlz.i8(i8 [[TMP1]], i1 false)
+; CHECK-NEXT:    [[VECINIT:%.*]] = insertelement <2 x i8> undef, i8 [[CALL_I]], i32 0
+; CHECK-NEXT:    [[VECINIT2:%.*]] = insertelement <2 x i8> [[VECINIT]], i8 [[CALL_I4]], i32 1
+; CHECK-NEXT:    ret <2 x i8> [[VECINIT2]]
+;
  entry:
    %0 = extractelement <2 x i8> %x, i32 0
    %1 = extractelement <2 x i8> %x, i32 1
diff --git a/test/Transforms/SLPVectorizer/X86/operandorder.ll b/test/Transforms/SLPVectorizer/X86/operandorder.ll

index dab7f296e20a6b7650e14c183932d56d80756b1d..2354ebd29879fd225374c1827cdf689f06ade9b3 100644 (file)
--- a/test/Transforms/SLPVectorizer/X86/operandorder.ll
+++ b/test/Transforms/SLPVectorizer/X86/operandorder.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
  ; RUN: opt < %s -basicaa -slp-vectorizer -slp-threshold=-100 -instcombine -dce -S -mtriple=i386-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck %s
  
  target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32-S128"
@@ -7,12 +8,18 @@ target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f3
  ; Make sure we order the operands of commutative operations so that we get
  ; bigger vectorizable trees.
  
-; CHECK-LABEL: shuffle_operands1
-; CHECK:         load <2 x double>
-; CHECK:         fadd <2 x double>
-
  define void @shuffle_operands1(double * noalias %from, double * noalias %to,
-                               double %v1, double %v2) {
+; CHECK-LABEL: @shuffle_operands1(
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast double* [[FROM:%.*]] to <2 x double>*
+; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x double>, <2 x double>* [[TMP1]], align 4
+; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <2 x double> undef, double [[V1:%.*]], i32 0
+; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <2 x double> [[TMP3]], double [[V2:%.*]], i32 1
+; CHECK-NEXT:    [[TMP5:%.*]] = fadd <2 x double> [[TMP4]], [[TMP2]]
+; CHECK-NEXT:    [[TMP6:%.*]] = bitcast double* [[TO:%.*]] to <2 x double>*
+; CHECK-NEXT:    store <2 x double> [[TMP5]], <2 x double>* [[TMP6]], align 4
+; CHECK-NEXT:    ret void
+;
+  double %v1, double %v2) {
    %from_1 = getelementptr double, double *%from, i64 1
    %v0_1 = load double , double * %from
    %v0_2 = load double , double * %from_1
@@ -24,12 +31,28 @@ define void @shuffle_operands1(double * noalias %from, double * noalias %to,
    ret void
  }
  
-; CHECK-LABEL: shuffle_preserve_broadcast
-; CHECK: %[[BCAST:[a-z0-9]+]] = insertelement <2 x double> undef, double %v0_1
-; CHECK:                      = shufflevector <2 x double> %[[BCAST]], <2 x double> undef, <2 x i32> zeroinitializer
  define void @shuffle_preserve_broadcast(double * noalias %from,
-                                        double * noalias %to,
-                                        double %v1, double %v2) {
+; CHECK-LABEL: @shuffle_preserve_broadcast(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[LP:%.*]]
+; CHECK:       lp:
+; CHECK-NEXT:    [[P:%.*]] = phi double [ 1.000000e+00, [[LP]] ], [ 0.000000e+00, [[ENTRY:%.*]] ]
+; CHECK-NEXT:    [[FROM_1:%.*]] = getelementptr double, double* [[FROM:%.*]], i32 1
+; CHECK-NEXT:    [[V0_1:%.*]] = load double, double* [[FROM]], align 4
+; CHECK-NEXT:    [[V0_2:%.*]] = load double, double* [[FROM_1]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = insertelement <2 x double> undef, double [[V0_1]], i32 0
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <2 x double> [[TMP0]], <2 x double> undef, <2 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <2 x double> undef, double [[P]], i32 0
+; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <2 x double> [[TMP2]], double [[V0_2]], i32 1
+; CHECK-NEXT:    [[TMP4:%.*]] = fadd <2 x double> [[TMP1]], [[TMP3]]
+; CHECK-NEXT:    [[TMP5:%.*]] = bitcast double* [[TO:%.*]] to <2 x double>*
+; CHECK-NEXT:    store <2 x double> [[TMP4]], <2 x double>* [[TMP5]], align 4
+; CHECK-NEXT:    br i1 undef, label [[LP]], label [[EXT:%.*]]
+; CHECK:       ext:
+; CHECK-NEXT:    ret void
+;
+  double * noalias %to,
+  double %v1, double %v2) {
  entry:
  br label %lp
  
@@ -49,12 +72,28 @@ ext:
    ret void
  }
  
-; CHECK-LABEL: shuffle_preserve_broadcast2
-; CHECK: %[[BCAST:[a-z0-9]+]] = insertelement <2 x double> undef, double %v0_1
-; CHECK:                      = shufflevector <2 x double> %[[BCAST]], <2 x double> undef, <2 x i32> zeroinitializer
  define void @shuffle_preserve_broadcast2(double * noalias %from,
-                                        double * noalias %to,
-                                        double %v1, double %v2) {
+; CHECK-LABEL: @shuffle_preserve_broadcast2(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[LP:%.*]]
+; CHECK:       lp:
+; CHECK-NEXT:    [[P:%.*]] = phi double [ 1.000000e+00, [[LP]] ], [ 0.000000e+00, [[ENTRY:%.*]] ]
+; CHECK-NEXT:    [[FROM_1:%.*]] = getelementptr double, double* [[FROM:%.*]], i32 1
+; CHECK-NEXT:    [[V0_1:%.*]] = load double, double* [[FROM]], align 4
+; CHECK-NEXT:    [[V0_2:%.*]] = load double, double* [[FROM_1]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = insertelement <2 x double> undef, double [[P]], i32 0
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x double> [[TMP0]], double [[V0_2]], i32 1
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <2 x double> undef, double [[V0_1]], i32 0
+; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <2 x double> [[TMP2]], <2 x double> undef, <2 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP4:%.*]] = fadd <2 x double> [[TMP1]], [[TMP3]]
+; CHECK-NEXT:    [[TMP5:%.*]] = bitcast double* [[TO:%.*]] to <2 x double>*
+; CHECK-NEXT:    store <2 x double> [[TMP4]], <2 x double>* [[TMP5]], align 4
+; CHECK-NEXT:    br i1 undef, label [[LP]], label [[EXT:%.*]]
+; CHECK:       ext:
+; CHECK-NEXT:    ret void
+;
+  double * noalias %to,
+  double %v1, double %v2) {
  entry:
  br label %lp
  
@@ -74,12 +113,28 @@ ext:
    ret void
  }
  
-; CHECK-LABEL: shuffle_preserve_broadcast3
-; CHECK: %[[BCAST:[a-z0-9]+]] = insertelement <2 x double> undef, double %v0_1
-; CHECK:                      = shufflevector <2 x double> %[[BCAST]], <2 x double> undef, <2 x i32> zeroinitializer
  define void @shuffle_preserve_broadcast3(double * noalias %from,
-                                        double * noalias %to,
-                                        double %v1, double %v2) {
+; CHECK-LABEL: @shuffle_preserve_broadcast3(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[LP:%.*]]
+; CHECK:       lp:
+; CHECK-NEXT:    [[P:%.*]] = phi double [ 1.000000e+00, [[LP]] ], [ 0.000000e+00, [[ENTRY:%.*]] ]
+; CHECK-NEXT:    [[FROM_1:%.*]] = getelementptr double, double* [[FROM:%.*]], i32 1
+; CHECK-NEXT:    [[V0_1:%.*]] = load double, double* [[FROM]], align 4
+; CHECK-NEXT:    [[V0_2:%.*]] = load double, double* [[FROM_1]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = insertelement <2 x double> undef, double [[P]], i32 0
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x double> [[TMP0]], double [[V0_2]], i32 1
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <2 x double> undef, double [[V0_1]], i32 0
+; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <2 x double> [[TMP2]], <2 x double> undef, <2 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP4:%.*]] = fadd <2 x double> [[TMP1]], [[TMP3]]
+; CHECK-NEXT:    [[TMP5:%.*]] = bitcast double* [[TO:%.*]] to <2 x double>*
+; CHECK-NEXT:    store <2 x double> [[TMP4]], <2 x double>* [[TMP5]], align 4
+; CHECK-NEXT:    br i1 undef, label [[LP]], label [[EXT:%.*]]
+; CHECK:       ext:
+; CHECK-NEXT:    ret void
+;
+  double * noalias %to,
+  double %v1, double %v2) {
  entry:
  br label %lp
  
@@ -100,12 +155,28 @@ ext:
  }
  
  
-; CHECK-LABEL: shuffle_preserve_broadcast4
-; CHECK: %[[BCAST:[a-z0-9]+]] = insertelement <2 x double> undef, double %v0_1
-; CHECK:                      = shufflevector <2 x double> %[[BCAST]], <2 x double> undef, <2 x i32> zeroinitializer
  define void @shuffle_preserve_broadcast4(double * noalias %from,
-                                        double * noalias %to,
-                                        double %v1, double %v2) {
+; CHECK-LABEL: @shuffle_preserve_broadcast4(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[LP:%.*]]
+; CHECK:       lp:
+; CHECK-NEXT:    [[P:%.*]] = phi double [ 1.000000e+00, [[LP]] ], [ 0.000000e+00, [[ENTRY:%.*]] ]
+; CHECK-NEXT:    [[FROM_1:%.*]] = getelementptr double, double* [[FROM:%.*]], i32 1
+; CHECK-NEXT:    [[V0_1:%.*]] = load double, double* [[FROM]], align 4
+; CHECK-NEXT:    [[V0_2:%.*]] = load double, double* [[FROM_1]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = insertelement <2 x double> undef, double [[V0_2]], i32 0
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x double> [[TMP0]], double [[P]], i32 1
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <2 x double> undef, double [[V0_1]], i32 0
+; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <2 x double> [[TMP2]], <2 x double> undef, <2 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP4:%.*]] = fadd <2 x double> [[TMP1]], [[TMP3]]
+; CHECK-NEXT:    [[TMP5:%.*]] = bitcast double* [[TO:%.*]] to <2 x double>*
+; CHECK-NEXT:    store <2 x double> [[TMP4]], <2 x double>* [[TMP5]], align 4
+; CHECK-NEXT:    br i1 undef, label [[LP]], label [[EXT:%.*]]
+; CHECK:       ext:
+; CHECK-NEXT:    ret void
+;
+  double * noalias %to,
+  double %v1, double %v2) {
  entry:
  br label %lp
  
@@ -125,12 +196,28 @@ ext:
    ret void
  }
  
-; CHECK-LABEL: shuffle_preserve_broadcast5
-; CHECK: %[[BCAST:[a-z0-9]+]] = insertelement <2 x double> undef, double %v0_1
-; CHECK:                      = shufflevector <2 x double> %[[BCAST]], <2 x double> undef, <2 x i32> zeroinitializer
  define void @shuffle_preserve_broadcast5(double * noalias %from,
-                                        double * noalias %to,
-                                        double %v1, double %v2) {
+; CHECK-LABEL: @shuffle_preserve_broadcast5(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[LP:%.*]]
+; CHECK:       lp:
+; CHECK-NEXT:    [[P:%.*]] = phi double [ 1.000000e+00, [[LP]] ], [ 0.000000e+00, [[ENTRY:%.*]] ]
+; CHECK-NEXT:    [[FROM_1:%.*]] = getelementptr double, double* [[FROM:%.*]], i32 1
+; CHECK-NEXT:    [[V0_1:%.*]] = load double, double* [[FROM]], align 4
+; CHECK-NEXT:    [[V0_2:%.*]] = load double, double* [[FROM_1]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = insertelement <2 x double> undef, double [[V0_1]], i32 0
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <2 x double> [[TMP0]], <2 x double> undef, <2 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <2 x double> undef, double [[V0_2]], i32 0
+; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <2 x double> [[TMP2]], double [[P]], i32 1
+; CHECK-NEXT:    [[TMP4:%.*]] = fadd <2 x double> [[TMP1]], [[TMP3]]
+; CHECK-NEXT:    [[TMP5:%.*]] = bitcast double* [[TO:%.*]] to <2 x double>*
+; CHECK-NEXT:    store <2 x double> [[TMP4]], <2 x double>* [[TMP5]], align 4
+; CHECK-NEXT:    br i1 undef, label [[LP]], label [[EXT:%.*]]
+; CHECK:       ext:
+; CHECK-NEXT:    ret void
+;
+  double * noalias %to,
+  double %v1, double %v2) {
  entry:
  br label %lp
  
@@ -151,12 +238,28 @@ ext:
  }
  
  
-; CHECK-LABEL: shuffle_preserve_broadcast6
-; CHECK: %[[BCAST:[a-z0-9]+]] = insertelement <2 x double> undef, double %v0_1
-; CHECK:                      = shufflevector <2 x double> %[[BCAST]], <2 x double> undef, <2 x i32> zeroinitializer
  define void @shuffle_preserve_broadcast6(double * noalias %from,
-                                        double * noalias %to,
-                                        double %v1, double %v2) {
+; CHECK-LABEL: @shuffle_preserve_broadcast6(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[LP:%.*]]
+; CHECK:       lp:
+; CHECK-NEXT:    [[P:%.*]] = phi double [ 1.000000e+00, [[LP]] ], [ 0.000000e+00, [[ENTRY:%.*]] ]
+; CHECK-NEXT:    [[FROM_1:%.*]] = getelementptr double, double* [[FROM:%.*]], i32 1
+; CHECK-NEXT:    [[V0_1:%.*]] = load double, double* [[FROM]], align 4
+; CHECK-NEXT:    [[V0_2:%.*]] = load double, double* [[FROM_1]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = insertelement <2 x double> undef, double [[V0_1]], i32 0
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <2 x double> [[TMP0]], <2 x double> undef, <2 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <2 x double> undef, double [[V0_2]], i32 0
+; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <2 x double> [[TMP2]], double [[P]], i32 1
+; CHECK-NEXT:    [[TMP4:%.*]] = fadd <2 x double> [[TMP1]], [[TMP3]]
+; CHECK-NEXT:    [[TMP5:%.*]] = bitcast double* [[TO:%.*]] to <2 x double>*
+; CHECK-NEXT:    store <2 x double> [[TMP4]], <2 x double>* [[TMP5]], align 4
+; CHECK-NEXT:    br i1 undef, label [[LP]], label [[EXT:%.*]]
+; CHECK:       ext:
+; CHECK-NEXT:    ret void
+;
+  double * noalias %to,
+  double %v1, double %v2) {
  entry:
  br label %lp
  
@@ -179,16 +282,46 @@ ext:
  ; Make sure we don't scramble operands when we reorder them and destroy
  ; 'good' source order.
  
-; CHECK-LABEL: good_load_order
-
-; CHECK: %[[V1:[0-9]+]] = load <4 x float>, <4 x float>*
-; CHECK: %[[V2:[0-9]+]] = insertelement <4 x float> undef, float %1, i32 0
-; CHECK: %[[V3:[0-9]+]] = shufflevector <4 x float> %[[V2]], <4 x float> %[[V1]], <4 x i32> <i32 0, i32 4, i32 5, i32 6>
-; CHECK:                = fmul <4 x float> %[[V1]], %[[V3]]
-
  @a = common global [32000 x float] zeroinitializer, align 16
  
  define void @good_load_order() {
+; CHECK-LABEL: @good_load_order(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[FOR_COND1_PREHEADER:%.*]]
+; CHECK:       for.cond1.preheader:
+; CHECK-NEXT:    [[TMP0:%.*]] = load float, float* getelementptr inbounds ([32000 x float], [32000 x float]* @a, i32 0, i32 0), align 16
+; CHECK-NEXT:    br label [[FOR_BODY3:%.*]]
+; CHECK:       for.body3:
+; CHECK-NEXT:    [[TMP1:%.*]] = phi float [ [[TMP0]], [[FOR_COND1_PREHEADER]] ], [ [[TMP14:%.*]], [[FOR_BODY3]] ]
+; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ 0, [[FOR_COND1_PREHEADER]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY3]] ]
+; CHECK-NEXT:    [[TMP2:%.*]] = trunc i64 [[INDVARS_IV]] to i32
+; CHECK-NEXT:    [[TMP3:%.*]] = add i32 [[TMP2]], 1
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [32000 x float], [32000 x float]* @a, i32 0, i32 [[TMP3]]
+; CHECK-NEXT:    [[TMP4:%.*]] = trunc i64 [[INDVARS_IV]] to i32
+; CHECK-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds [32000 x float], [32000 x float]* @a, i32 0, i32 [[TMP4]]
+; CHECK-NEXT:    [[TMP5:%.*]] = trunc i64 [[INDVARS_IV]] to i32
+; CHECK-NEXT:    [[TMP6:%.*]] = add i32 [[TMP5]], 4
+; CHECK-NEXT:    [[ARRAYIDX31:%.*]] = getelementptr inbounds [32000 x float], [32000 x float]* @a, i32 0, i32 [[TMP6]]
+; CHECK-NEXT:    [[TMP7:%.*]] = bitcast float* [[ARRAYIDX]] to <4 x float>*
+; CHECK-NEXT:    [[TMP8:%.*]] = load <4 x float>, <4 x float>* [[TMP7]], align 4
+; CHECK-NEXT:    [[TMP9:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
+; CHECK-NEXT:    [[TMP10:%.*]] = shufflevector <4 x float> [[TMP9]], <4 x float> [[TMP8]], <4 x i32> <i32 0, i32 4, i32 5, i32 6>
+; CHECK-NEXT:    [[TMP11:%.*]] = fmul <4 x float> [[TMP8]], [[TMP10]]
+; CHECK-NEXT:    [[TMP12:%.*]] = bitcast float* [[ARRAYIDX5]] to <4 x float>*
+; CHECK-NEXT:    store <4 x float> [[TMP11]], <4 x float>* [[TMP12]], align 4
+; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 5
+; CHECK-NEXT:    [[TMP13:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
+; CHECK-NEXT:    [[ARRAYIDX41:%.*]] = getelementptr inbounds [32000 x float], [32000 x float]* @a, i32 0, i32 [[TMP13]]
+; CHECK-NEXT:    [[TMP14]] = load float, float* [[ARRAYIDX41]], align 4
+; CHECK-NEXT:    [[TMP15:%.*]] = extractelement <4 x float> [[TMP8]], i32 3
+; CHECK-NEXT:    [[MUL45:%.*]] = fmul float [[TMP14]], [[TMP15]]
+; CHECK-NEXT:    store float [[MUL45]], float* [[ARRAYIDX31]], align 4
+; CHECK-NEXT:    [[TMP16:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
+; CHECK-NEXT:    [[CMP2:%.*]] = icmp slt i32 [[TMP16]], 31995
+; CHECK-NEXT:    br i1 [[CMP2]], label [[FOR_BODY3]], label [[FOR_END:%.*]]
+; CHECK:       for.end:
+; CHECK-NEXT:    ret void
+;
  entry:
    br label %for.cond1.preheader
  
@@ -237,10 +370,17 @@ for.end:
  ;  c[0] = a[0]+b[0];
  ;  c[1] = b[1]+a[1]; // swapped b[1] and a[1]
  
-; CHECK-LABEL: load_reorder_double
-; CHECK: load <2 x double>, <2 x double>*
-; CHECK: fadd <2 x double>
  define void @load_reorder_double(double* nocapture %c, double* noalias nocapture readonly %a, double* noalias nocapture readonly %b){
+; CHECK-LABEL: @load_reorder_double(
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast double* [[B:%.*]] to <2 x double>*
+; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x double>, <2 x double>* [[TMP1]], align 4
+; CHECK-NEXT:    [[TMP3:%.*]] = bitcast double* [[A:%.*]] to <2 x double>*
+; CHECK-NEXT:    [[TMP4:%.*]] = load <2 x double>, <2 x double>* [[TMP3]], align 4
+; CHECK-NEXT:    [[TMP5:%.*]] = fadd <2 x double> [[TMP4]], [[TMP2]]
+; CHECK-NEXT:    [[TMP6:%.*]] = bitcast double* [[C:%.*]] to <2 x double>*
+; CHECK-NEXT:    store <2 x double> [[TMP5]], <2 x double>* [[TMP6]], align 4
+; CHECK-NEXT:    ret void
+;
    %1 = load double, double* %a
    %2 = load double, double* %b
    %3 = fadd double %1, %2
@@ -261,10 +401,17 @@ define void @load_reorder_double(double* nocapture %c, double* noalias nocapture
  ;  c[2] = a[2]+b[2];
  ;  c[3] = a[3]+b[3];
  
-; CHECK-LABEL: load_reorder_float
-; CHECK: load <4 x float>, <4 x float>*
-; CHECK: fadd <4 x float>
  define void @load_reorder_float(float* nocapture %c, float* noalias nocapture readonly %a, float* noalias nocapture readonly %b){
+; CHECK-LABEL: @load_reorder_float(
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast float* [[A:%.*]] to <4 x float>*
+; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 4
+; CHECK-NEXT:    [[TMP3:%.*]] = bitcast float* [[B:%.*]] to <4 x float>*
+; CHECK-NEXT:    [[TMP4:%.*]] = load <4 x float>, <4 x float>* [[TMP3]], align 4
+; CHECK-NEXT:    [[TMP5:%.*]] = fadd <4 x float> [[TMP2]], [[TMP4]]
+; CHECK-NEXT:    [[TMP6:%.*]] = bitcast float* [[C:%.*]] to <4 x float>*
+; CHECK-NEXT:    store <4 x float> [[TMP5]], <4 x float>* [[TMP6]], align 4
+; CHECK-NEXT:    ret void
+;
    %1 = load float, float* %a
    %2 = load float, float* %b
    %3 = fadd float %1, %2
@@ -299,11 +446,21 @@ define void @load_reorder_float(float* nocapture %c, float* noalias nocapture re
  ; a[2] = (b[2]+c[2])+d[2];
  ; a[3] = (b[3]+c[3])+d[3];
  
-; CHECK-LABEL: opcode_reorder
-; CHECK: load <4 x float>, <4 x float>*
-; CHECK: fadd <4 x float>
-define void @opcode_reorder(float* noalias nocapture %a, float* noalias nocapture readonly %b, 
-                            float* noalias nocapture readonly %c,float* noalias nocapture readonly %d){
+define void @opcode_reorder(float* noalias nocapture %a, float* noalias nocapture readonly %b,
+; CHECK-LABEL: @opcode_reorder(
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast float* [[B:%.*]] to <4 x float>*
+; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 4
+; CHECK-NEXT:    [[TMP3:%.*]] = bitcast float* [[C:%.*]] to <4 x float>*
+; CHECK-NEXT:    [[TMP4:%.*]] = load <4 x float>, <4 x float>* [[TMP3]], align 4
+; CHECK-NEXT:    [[TMP5:%.*]] = fadd <4 x float> [[TMP2]], [[TMP4]]
+; CHECK-NEXT:    [[TMP6:%.*]] = bitcast float* [[D:%.*]] to <4 x float>*
+; CHECK-NEXT:    [[TMP7:%.*]] = load <4 x float>, <4 x float>* [[TMP6]], align 4
+; CHECK-NEXT:    [[TMP8:%.*]] = fadd <4 x float> [[TMP5]], [[TMP7]]
+; CHECK-NEXT:    [[TMP9:%.*]] = bitcast float* [[A:%.*]] to <4 x float>*
+; CHECK-NEXT:    store <4 x float> [[TMP8]], <4 x float>* [[TMP9]], align 4
+; CHECK-NEXT:    ret void
+;
+  float* noalias nocapture readonly %c,float* noalias nocapture readonly %d){
    %1 = load float, float* %b
    %2 = load float, float* %c
    %3 = fadd float %1, %2
diff --git a/test/Transforms/SLPVectorizer/X86/opt.ll b/test/Transforms/SLPVectorizer/X86/opt.ll

index 824e9992af03c04dbb136b926fe0d45862f2b889..97aa601b071f8effb82ec895c4363309ee0d98ae 100644 (file)
--- a/test/Transforms/SLPVectorizer/X86/opt.ll
+++ b/test/Transforms/SLPVectorizer/X86/opt.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
  ; RUN: opt < %s -O3 -S -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck %s --check-prefix=SLP
  ; RUN: opt < %s -O3 -disable-slp-vectorization -S -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck %s --check-prefix=NOSLP
  
@@ -6,14 +7,33 @@ target triple = "x86_64-apple-macosx10.8.0"
  
  ; Make sure we can disable slp vectorization in opt.
  
-; SLP-LABEL: test1
-; SLP: store <2 x double>
-
-; NOSLP-LABEL: test1
-; NOSLP-NOT: store <2 x double>
-
-
  define void @test1(double* %a, double* %b, double* %c) {
+; SLP-LABEL: @test1(
+; SLP-NEXT:  entry:
+; SLP-NEXT:    [[TMP0:%.*]] = bitcast double* [[A:%.*]] to <2 x double>*
+; SLP-NEXT:    [[TMP1:%.*]] = load <2 x double>, <2 x double>* [[TMP0]], align 8
+; SLP-NEXT:    [[TMP2:%.*]] = bitcast double* [[B:%.*]] to <2 x double>*
+; SLP-NEXT:    [[TMP3:%.*]] = load <2 x double>, <2 x double>* [[TMP2]], align 8
+; SLP-NEXT:    [[TMP4:%.*]] = fmul <2 x double> [[TMP1]], [[TMP3]]
+; SLP-NEXT:    [[TMP5:%.*]] = bitcast double* [[C:%.*]] to <2 x double>*
+; SLP-NEXT:    store <2 x double> [[TMP4]], <2 x double>* [[TMP5]], align 8
+; SLP-NEXT:    ret void
+;
+; NOSLP-LABEL: @test1(
+; NOSLP-NEXT:  entry:
+; NOSLP-NEXT:    [[I0:%.*]] = load double, double* [[A:%.*]], align 8
+; NOSLP-NEXT:    [[I1:%.*]] = load double, double* [[B:%.*]], align 8
+; NOSLP-NEXT:    [[MUL:%.*]] = fmul double [[I0]], [[I1]]
+; NOSLP-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds double, double* [[A]], i64 1
+; NOSLP-NEXT:    [[I3:%.*]] = load double, double* [[ARRAYIDX3]], align 8
+; NOSLP-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds double, double* [[B]], i64 1
+; NOSLP-NEXT:    [[I4:%.*]] = load double, double* [[ARRAYIDX4]], align 8
+; NOSLP-NEXT:    [[MUL5:%.*]] = fmul double [[I3]], [[I4]]
+; NOSLP-NEXT:    store double [[MUL]], double* [[C:%.*]], align 8
+; NOSLP-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds double, double* [[C]], i64 1
+; NOSLP-NEXT:    store double [[MUL5]], double* [[ARRAYIDX5]], align 8
+; NOSLP-NEXT:    ret void
+;
  entry:
    %i0 = load double, double* %a, align 8
    %i1 = load double, double* %b, align 8
diff --git a/test/Transforms/SLPVectorizer/X86/ordering.ll b/test/Transforms/SLPVectorizer/X86/ordering.ll

index 11f5a3ddc74181770170c4cb1f05ca0b2c66aa65..bc6ef182c98d0706007ac03ef57cf1d748239c66 100644 (file)
--- a/test/Transforms/SLPVectorizer/X86/ordering.ll
+++ b/test/Transforms/SLPVectorizer/X86/ordering.ll
@@ -1,9 +1,14 @@
-; RUN: opt < %s -basicaa -slp-vectorizer -dce -S -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -basicaa -slp-vectorizer -dce -S -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7 | FileCheck %s
  
  target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
  target triple = "x86_64-apple-macosx10.8.0"
  
  define void @updateModelQPFrame(i32 %m_Bits) {
+; CHECK-LABEL: @updateModelQPFrame(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    ret void
+;
  entry:
    %0 = load double, double* undef, align 8
    %mul = fmul double undef, %0
@@ -22,35 +27,52 @@ declare i8* @objc_msgSend(i8*, i8*, ...)
  declare i32 @personality_v0(...)
  
  define void @invoketest() personality i8* bitcast (i32 (...)* @personality_v0 to i8*) {
+; CHECK-LABEL: @invoketest(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br i1 undef, label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
+; CHECK:       cond.true:
+; CHECK-NEXT:    [[CALL49:%.*]] = invoke double bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to double (i8*, i8*)*)(i8* undef, i8* undef)
+; CHECK-NEXT:    to label [[COND_TRUE54:%.*]] unwind label [[LPAD:%.*]]
+; CHECK:       cond.false:
+; CHECK-NEXT:    [[CALL51:%.*]] = invoke double bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to double (i8*, i8*)*)(i8* undef, i8* undef)
+; CHECK-NEXT:    to label [[COND_FALSE57:%.*]] unwind label [[LPAD]]
+; CHECK:       cond.true54:
+; CHECK-NEXT:    [[CALL56:%.*]] = invoke double bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to double (i8*, i8*)*)(i8* undef, i8* undef)
+; CHECK-NEXT:    to label [[COND_END60:%.*]] unwind label [[LPAD]]
+; CHECK:       cond.false57:
+; CHECK-NEXT:    [[CALL59:%.*]] = invoke double bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to double (i8*, i8*)*)(i8* undef, i8* undef)
+; CHECK-NEXT:    to label [[COND_END60]] unwind label [[LPAD]]
+; CHECK:       cond.end60:
+; CHECK-NEXT:    br i1 undef, label [[IF_END98:%.*]], label [[IF_THEN63:%.*]]
+; CHECK:       if.then63:
+; CHECK-NEXT:    br label [[IF_END98]]
+; CHECK:       lpad:
+; CHECK-NEXT:    [[L:%.*]] = landingpad { i8*, i32 }
+; CHECK-NEXT:    cleanup
+; CHECK-NEXT:    resume { i8*, i32 } [[L]]
+; CHECK:       if.end98:
+; CHECK-NEXT:    br label [[IF_END99:%.*]]
+; CHECK:       if.end99:
+; CHECK-NEXT:    ret void
+;
  entry:
    br i1 undef, label %cond.true, label %cond.false
  
  cond.true:
-  %call49 = invoke double bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to double (i8*, i8*)*)(i8* undef, i8* undef) 
-          to label %cond.true54 unwind label %lpad
+  %call49 = invoke double bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to double (i8*, i8*)*)(i8* undef, i8* undef)
+  to label %cond.true54 unwind label %lpad
  
  cond.false:
    %call51 = invoke double bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to double (i8*, i8*)*)(i8* undef, i8* undef)
-          to label %cond.false57 unwind label %lpad
+  to label %cond.false57 unwind label %lpad
  
  cond.true54:
-  %call56 = invoke double bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to double (i8*, i8*)*)(i8* undef, i8* undef) 
-          to label %cond.end60 unwind label %lpad
+  %call56 = invoke double bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to double (i8*, i8*)*)(i8* undef, i8* undef)
+  to label %cond.end60 unwind label %lpad
  
  cond.false57:
    %call59 = invoke double bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to double (i8*, i8*)*)(i8* undef, i8* undef)
-          to label %cond.end60 unwind label %lpad
-
-; Make sure we don't vectorize these phis - they have invokes as inputs.
-
-; RUN: opt < %s -slp-vectorizer -S -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7 | FileCheck %s
-
-; CHECK-LABEL: invoketest
-
-; CHECK-LABEL: cond.end60
-; CHECK-NOT: phi <2 x double>
-; CHECK: insertelement
-; CHECK-LABEL: if.then63
+  to label %cond.end60 unwind label %lpad
  
  cond.end60:
    %cond126 = phi double [ %call49, %cond.true54 ], [ %call51, %cond.false57 ]
@@ -68,7 +90,7 @@ if.then63:
  
  lpad:
    %l = landingpad { i8*, i32 }
-          cleanup
+  cleanup
    resume { i8*, i32 } %l
  
  if.end98:
diff --git a/test/Transforms/SLPVectorizer/X86/phi.ll b/test/Transforms/SLPVectorizer/X86/phi.ll

index ef94467f509a1a9f28f2b1eb5ed714acdf0067b6..a0a13b2b5aac0373f6fcdcc6e02f3e235ed608d8 100644 (file)
--- a/test/Transforms/SLPVectorizer/X86/phi.ll
+++ b/test/Transforms/SLPVectorizer/X86/phi.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
  ; RUN: opt < %s -basicaa -slp-vectorizer -slp-threshold=-100 -dce -S -mtriple=i386-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck %s
  
  target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32-S128"
@@ -18,12 +19,22 @@ target triple = "i386-apple-macosx10.9.0"
  ;}
  
  
-;CHECK: i32 @foo
-;CHECK: load <2 x double>
-;CHECK: phi <2 x double>
-;CHECK: store <2 x double>
-;CHECK: ret i32 undef
  define i32 @foo(double* nocapture %A, i32 %k) {
+; CHECK-LABEL: @foo(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TOBOOL:%.*]] = icmp eq i32 [[K:%.*]], 0
+; CHECK-NEXT:    br i1 [[TOBOOL]], label [[IF_ELSE:%.*]], label [[IF_END:%.*]]
+; CHECK:       if.else:
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[A:%.*]], i64 10
+; CHECK-NEXT:    [[TMP0:%.*]] = bitcast double* [[ARRAYIDX]] to <2 x double>*
+; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x double>, <2 x double>* [[TMP0]], align 8
+; CHECK-NEXT:    br label [[IF_END]]
+; CHECK:       if.end:
+; CHECK-NEXT:    [[TMP2:%.*]] = phi <2 x double> [ [[TMP1]], [[IF_ELSE]] ], [ <double 3.000000e+00, double 5.000000e+00>, [[ENTRY:%.*]] ]
+; CHECK-NEXT:    [[TMP3:%.*]] = bitcast double* [[A]] to <2 x double>*
+; CHECK-NEXT:    store <2 x double> [[TMP2]], <2 x double>* [[TMP3]], align 8
+; CHECK-NEXT:    ret i32 undef
+;
  entry:
    %tobool = icmp eq i32 %k, 0
    br i1 %tobool, label %if.else, label %if.end
@@ -61,13 +72,26 @@ if.end:                                           ; preds = %entry, %if.else
  ;  return 0;
  ;}
  
-;CHECK: foo2
-;CHECK: load <2 x double>
-;CHECK: phi <2 x double>
-;CHECK: fmul <2 x double>
-;CHECK: store <2 x double>
-;CHECK: ret
  define i32 @foo2(double* noalias nocapture %B, double* noalias nocapture %A, i32 %n, i32 %m) #0 {
+; CHECK-LABEL: @foo2(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = bitcast double* [[A:%.*]] to <2 x double>*
+; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x double>, <2 x double>* [[TMP0]], align 8
+; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
+; CHECK:       for.body:
+; CHECK-NEXT:    [[I_019:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[TMP2:%.*]] = phi <2 x double> [ [[TMP1]], [[ENTRY]] ], [ [[TMP5:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[TMP3:%.*]] = fadd <2 x double> <double 1.000000e+01, double 1.000000e+01>, [[TMP2]]
+; CHECK-NEXT:    [[TMP4:%.*]] = fmul <2 x double> <double 4.000000e+00, double 4.000000e+00>, [[TMP3]]
+; CHECK-NEXT:    [[TMP5]] = fadd <2 x double> <double 4.000000e+00, double 4.000000e+00>, [[TMP4]]
+; CHECK-NEXT:    [[INC]] = add nsw i32 [[I_019]], 1
+; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[INC]], 100
+; CHECK-NEXT:    br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]]
+; CHECK:       for.end:
+; CHECK-NEXT:    [[TMP6:%.*]] = bitcast double* [[B:%.*]] to <2 x double>*
+; CHECK-NEXT:    store <2 x double> [[TMP5]], <2 x double>* [[TMP6]], align 8
+; CHECK-NEXT:    ret i32 0
+;
  entry:
    %arrayidx = getelementptr inbounds double, double* %A, i64 1
    %0 = load double, double* %arrayidx, align 8
@@ -113,15 +137,55 @@ for.end:                                          ; preds = %for.body
  ;   return R+G+B+Y+P;
  ; }
  
-;CHECK: foo3
-;CHECK: phi <4 x float>
-;CHECK: fmul <4 x float>
-;CHECK: fadd <4 x float>
-;CHECK-NOT: phi <5 x float>
-;CHECK-NOT: fmul <5 x float>
-;CHECK-NOT: fadd <5 x float>
-
  define float @foo3(float* nocapture readonly %A) #0 {
+; CHECK-LABEL: @foo3(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = load float, float* [[A:%.*]], align 4
+; CHECK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds float, float* [[A]], i64 1
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast float* [[ARRAYIDX1]] to <4 x float>*
+; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 4
+; CHECK-NEXT:    [[REORDER_SHUFFLE:%.*]] = shufflevector <4 x float> [[TMP2]], <4 x float> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <4 x float> [[REORDER_SHUFFLE]], i32 3
+; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
+; CHECK:       for.body:
+; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[R_052:%.*]] = phi float [ [[TMP0]], [[ENTRY]] ], [ [[ADD6:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[TMP4:%.*]] = phi float [ [[TMP3]], [[ENTRY]] ], [ [[TMP12:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[TMP5:%.*]] = phi float [ [[TMP0]], [[ENTRY]] ], [ [[TMP14:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[TMP6:%.*]] = phi <4 x float> [ [[REORDER_SHUFFLE]], [[ENTRY]] ], [ [[TMP19:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[MUL:%.*]] = fmul float [[TMP5]], 7.000000e+00
+; CHECK-NEXT:    [[ADD6]] = fadd float [[R_052]], [[MUL]]
+; CHECK-NEXT:    [[TMP7:%.*]] = add nsw i64 [[INDVARS_IV]], 2
+; CHECK-NEXT:    [[ARRAYIDX14:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP7]]
+; CHECK-NEXT:    [[TMP8:%.*]] = load float, float* [[ARRAYIDX14]], align 4
+; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 3
+; CHECK-NEXT:    [[ARRAYIDX19:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[INDVARS_IV_NEXT]]
+; CHECK-NEXT:    [[TMP9:%.*]] = bitcast float* [[ARRAYIDX19]] to <2 x float>*
+; CHECK-NEXT:    [[TMP10:%.*]] = load <2 x float>, <2 x float>* [[TMP9]], align 4
+; CHECK-NEXT:    [[REORDER_SHUFFLE1:%.*]] = shufflevector <2 x float> [[TMP10]], <2 x float> undef, <2 x i32> <i32 1, i32 0>
+; CHECK-NEXT:    [[TMP11:%.*]] = insertelement <4 x float> <float 1.100000e+01, float 1.000000e+01, float 9.000000e+00, float undef>, float [[TMP4]], i32 3
+; CHECK-NEXT:    [[TMP12]] = extractelement <2 x float> [[REORDER_SHUFFLE1]], i32 0
+; CHECK-NEXT:    [[TMP13:%.*]] = insertelement <4 x float> undef, float [[TMP12]], i32 0
+; CHECK-NEXT:    [[TMP14]] = extractelement <2 x float> [[REORDER_SHUFFLE1]], i32 1
+; CHECK-NEXT:    [[TMP15:%.*]] = insertelement <4 x float> [[TMP13]], float [[TMP14]], i32 1
+; CHECK-NEXT:    [[TMP16:%.*]] = insertelement <4 x float> [[TMP15]], float [[TMP8]], i32 2
+; CHECK-NEXT:    [[TMP17:%.*]] = insertelement <4 x float> [[TMP16]], float 8.000000e+00, i32 3
+; CHECK-NEXT:    [[TMP18:%.*]] = fmul <4 x float> [[TMP11]], [[TMP17]]
+; CHECK-NEXT:    [[TMP19]] = fadd <4 x float> [[TMP6]], [[TMP18]]
+; CHECK-NEXT:    [[TMP20:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[TMP20]], 121
+; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END:%.*]]
+; CHECK:       for.end:
+; CHECK-NEXT:    [[TMP21:%.*]] = extractelement <4 x float> [[TMP19]], i32 3
+; CHECK-NEXT:    [[ADD28:%.*]] = fadd float [[ADD6]], [[TMP21]]
+; CHECK-NEXT:    [[TMP22:%.*]] = extractelement <4 x float> [[TMP19]], i32 2
+; CHECK-NEXT:    [[ADD29:%.*]] = fadd float [[ADD28]], [[TMP22]]
+; CHECK-NEXT:    [[TMP23:%.*]] = extractelement <4 x float> [[TMP19]], i32 1
+; CHECK-NEXT:    [[ADD30:%.*]] = fadd float [[ADD29]], [[TMP23]]
+; CHECK-NEXT:    [[TMP24:%.*]] = extractelement <4 x float> [[TMP19]], i32 0
+; CHECK-NEXT:    [[ADD31:%.*]] = fadd float [[ADD30]], [[TMP24]]
+; CHECK-NEXT:    ret float [[ADD31]]
+;
  entry:
    %0 = load float, float* %A, align 4
    %arrayidx1 = getelementptr inbounds float, float* %A, i64 1
@@ -176,11 +240,35 @@ for.end:                                          ; preds = %for.body
  
  ; Make sure the order of phi nodes of different types does not prevent
  ; vectorization of same typed phi nodes.
-; CHECK-LABEL: sort_phi_type
-; CHECK: phi <4 x float>
-; CHECK: fmul <4 x float>
-
  define float @sort_phi_type(float* nocapture readonly %A) {
+; CHECK-LABEL: @sort_phi_type(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
+; CHECK:       for.body:
+; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[TMP0:%.*]] = phi <4 x float> [ <float 1.000000e+01, float 1.000000e+01, float 1.000000e+01, float 1.000000e+01>, [[ENTRY]] ], [ [[TMP9:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
+; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
+; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <4 x float> [[TMP2]], float [[TMP3]], i32 1
+; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
+; CHECK-NEXT:    [[TMP6:%.*]] = insertelement <4 x float> [[TMP4]], float [[TMP5]], i32 2
+; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
+; CHECK-NEXT:    [[TMP8:%.*]] = insertelement <4 x float> [[TMP6]], float [[TMP7]], i32 3
+; CHECK-NEXT:    [[TMP9]] = fmul <4 x float> <float 8.000000e+00, float 9.000000e+00, float 1.000000e+02, float 1.110000e+02>, [[TMP8]]
+; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nsw i64 [[INDVARS_IV]], 4
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i64 [[INDVARS_IV_NEXT]], 128
+; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END:%.*]]
+; CHECK:       for.end:
+; CHECK-NEXT:    [[TMP10:%.*]] = extractelement <4 x float> [[TMP9]], i32 0
+; CHECK-NEXT:    [[TMP11:%.*]] = extractelement <4 x float> [[TMP9]], i32 1
+; CHECK-NEXT:    [[ADD29:%.*]] = fadd float [[TMP10]], [[TMP11]]
+; CHECK-NEXT:    [[TMP12:%.*]] = extractelement <4 x float> [[TMP9]], i32 2
+; CHECK-NEXT:    [[ADD30:%.*]] = fadd float [[ADD29]], [[TMP12]]
+; CHECK-NEXT:    [[TMP13:%.*]] = extractelement <4 x float> [[TMP9]], i32 3
+; CHECK-NEXT:    [[ADD31:%.*]] = fadd float [[ADD30]], [[TMP13]]
+; CHECK-NEXT:    ret float [[ADD31]]
+;
  entry:
    br label %for.body
  
@@ -208,20 +296,33 @@ for.end:                                          ; preds = %for.body
  
  define void @test(x86_fp80* %i1, x86_fp80* %i2, x86_fp80* %o) {
  ; CHECK-LABEL: @test(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[I1_0:%.*]] = load x86_fp80, x86_fp80* [[I1:%.*]], align 16
+; CHECK-NEXT:    [[I1_GEP1:%.*]] = getelementptr x86_fp80, x86_fp80* [[I1]], i64 1
+; CHECK-NEXT:    [[I1_1:%.*]] = load x86_fp80, x86_fp80* [[I1_GEP1]], align 16
+; CHECK-NEXT:    br i1 undef, label [[THEN:%.*]], label [[END:%.*]]
+; CHECK:       then:
+; CHECK-NEXT:    [[I2_GEP0:%.*]] = getelementptr inbounds x86_fp80, x86_fp80* [[I2:%.*]], i64 0
+; CHECK-NEXT:    [[I2_0:%.*]] = load x86_fp80, x86_fp80* [[I2_GEP0]], align 16
+; CHECK-NEXT:    [[I2_GEP1:%.*]] = getelementptr inbounds x86_fp80, x86_fp80* [[I2]], i64 1
+; CHECK-NEXT:    [[I2_1:%.*]] = load x86_fp80, x86_fp80* [[I2_GEP1]], align 16
+; CHECK-NEXT:    br label [[END]]
+; CHECK:       end:
+; CHECK-NEXT:    [[PHI0:%.*]] = phi x86_fp80 [ [[I1_0]], [[ENTRY:%.*]] ], [ [[I2_0]], [[THEN]] ]
+; CHECK-NEXT:    [[PHI1:%.*]] = phi x86_fp80 [ [[I1_1]], [[ENTRY]] ], [ [[I2_1]], [[THEN]] ]
+; CHECK-NEXT:    store x86_fp80 [[PHI0]], x86_fp80* [[O:%.*]], align 16
+; CHECK-NEXT:    [[O_GEP1:%.*]] = getelementptr inbounds x86_fp80, x86_fp80* [[O]], i64 1
+; CHECK-NEXT:    store x86_fp80 [[PHI1]], x86_fp80* [[O_GEP1]], align 16
+; CHECK-NEXT:    ret void
  ;
  ; Test that we correctly recognize the discontiguous memory in arrays where the
  ; size is less than the alignment, and through various different GEP formations.
-;
-; We disable the vectorization of x86_fp80 for now. 
+; We disable the vectorization of x86_fp80 for now.
  
  entry:
    %i1.0 = load x86_fp80, x86_fp80* %i1, align 16
    %i1.gep1 = getelementptr x86_fp80, x86_fp80* %i1, i64 1
    %i1.1 = load x86_fp80, x86_fp80* %i1.gep1, align 16
-; CHECK: load x86_fp80, x86_fp80*
-; CHECK: load x86_fp80, x86_fp80*
-; CHECK-NOT: insertelement <2 x x86_fp80>
-; CHECK-NOT: insertelement <2 x x86_fp80>
    br i1 undef, label %then, label %end
  
  then:
@@ -229,18 +330,11 @@ then:
    %i2.0 = load x86_fp80, x86_fp80* %i2.gep0, align 16
    %i2.gep1 = getelementptr inbounds x86_fp80, x86_fp80* %i2, i64 1
    %i2.1 = load x86_fp80, x86_fp80* %i2.gep1, align 16
-; CHECK: load x86_fp80, x86_fp80*
-; CHECK: load x86_fp80, x86_fp80*
-; CHECK-NOT: insertelement <2 x x86_fp80>
-; CHECK-NOT: insertelement <2 x x86_fp80>
    br label %end
  
  end:
    %phi0 = phi x86_fp80 [ %i1.0, %entry ], [ %i2.0, %then ]
    %phi1 = phi x86_fp80 [ %i1.1, %entry ], [ %i2.1, %then ]
-; CHECK-NOT: phi <2 x x86_fp80>
-; CHECK-NOT: extractelement <2 x x86_fp80>
-; CHECK-NOT: extractelement <2 x x86_fp80>
    store x86_fp80 %phi0, x86_fp80* %o, align 16
    %o.gep1 = getelementptr inbounds x86_fp80, x86_fp80* %o, i64 1
    store x86_fp80 %phi1, x86_fp80* %o.gep1, align 16
diff --git a/test/Transforms/SLPVectorizer/X86/phi3.ll b/test/Transforms/SLPVectorizer/X86/phi3.ll

index 61628301aec79de300790f5ebe8bccab6124de9d..b08d9abf9ff55d55e713930cf00af859b553a8a1 100644 (file)
--- a/test/Transforms/SLPVectorizer/X86/phi3.ll
+++ b/test/Transforms/SLPVectorizer/X86/phi3.ll
@@ -1,4 +1,5 @@
-; RUN: opt < %s -basicaa -slp-vectorizer -dce -S -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -basicaa -slp-vectorizer -dce -S -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7 | FileCheck %s
  
  target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
  target triple = "x86_64-apple-macosx10.8.0"
@@ -10,6 +11,24 @@ target triple = "x86_64-apple-macosx10.8.0"
  declare %struct.GPar.0.16.26* @Rf_gpptr(...)
  
  define void @Rf_GReset() {
+; CHECK-LABEL: @Rf_GReset(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = load double, double* @d, align 8
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x double> undef, double [[TMP0]], i32 1
+; CHECK-NEXT:    [[TMP2:%.*]] = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, [[TMP1]]
+; CHECK-NEXT:    br i1 icmp eq (%struct.GPar.0.16.26* (...)* inttoptr (i64 115 to %struct.GPar.0.16.26* (...)*), %struct.GPar.0.16.26* (...)* @Rf_gpptr), label [[IF_THEN:%.*]], label [[IF_END7:%.*]]
+; CHECK:       if.then:
+; CHECK-NEXT:    [[TMP3:%.*]] = fsub <2 x double> [[TMP2]], undef
+; CHECK-NEXT:    [[TMP4:%.*]] = fdiv <2 x double> [[TMP3]], undef
+; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <2 x double> [[TMP4]], i32 0
+; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <2 x double> [[TMP4]], i32 1
+; CHECK-NEXT:    [[CMP:%.*]] = fcmp ogt double [[TMP5]], [[TMP6]]
+; CHECK-NEXT:    br i1 [[CMP]], label [[IF_THEN6:%.*]], label [[IF_END7]]
+; CHECK:       if.then6:
+; CHECK-NEXT:    br label [[IF_END7]]
+; CHECK:       if.end7:
+; CHECK-NEXT:    ret void
+;
  entry:
    %sub = fsub double -0.000000e+00, undef
    %0 = load double, double* @d, align 8
diff --git a/test/Transforms/SLPVectorizer/X86/phi_landingpad.ll b/test/Transforms/SLPVectorizer/X86/phi_landingpad.ll

index b47a6ce2a263a4a11a2d9b97deaeac82afe2bd50..0a752889065f0ff14600ac20d68b1c6f7a023a11 100644 (file)
--- a/test/Transforms/SLPVectorizer/X86/phi_landingpad.ll
+++ b/test/Transforms/SLPVectorizer/X86/phi_landingpad.ll
@@ -1,18 +1,35 @@
-; RUN: opt < %s -slp-vectorizer -mtriple=x86_64-apple-macosx10.9.0 -disable-output
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -slp-vectorizer -mtriple=x86_64-apple-macosx10.9.0 -S -o - | FileCheck %s
  
  target datalayout = "f64:64:64-v64:64:64"
  
  define void @test_phi_in_landingpad() personality i8*
-          bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
+; CHECK-LABEL: @test_phi_in_landingpad(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    invoke void @foo()
+; CHECK-NEXT:    to label [[INNER:%.*]] unwind label [[LPAD:%.*]]
+; CHECK:       inner:
+; CHECK-NEXT:    invoke void @foo()
+; CHECK-NEXT:    to label [[DONE:%.*]] unwind label [[LPAD]]
+; CHECK:       lpad:
+; CHECK-NEXT:    [[TMP0:%.*]] = phi <2 x double> [ undef, [[ENTRY:%.*]] ], [ undef, [[INNER]] ]
+; CHECK-NEXT:    [[TMP1:%.*]] = landingpad { i8*, i32 }
+; CHECK-NEXT:    catch i8* null
+; CHECK-NEXT:    br label [[DONE]]
+; CHECK:       done:
+; CHECK-NEXT:    [[TMP2:%.*]] = phi <2 x double> [ undef, [[INNER]] ], [ [[TMP0]], [[LPAD]] ]
+; CHECK-NEXT:    ret void
+;
+  bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
  entry:
    invoke void @foo()
-          to label %inner unwind label %lpad
+  to label %inner unwind label %lpad
  
  inner:
    %x0 = fsub double undef, undef
    %y0 = fsub double undef, undef
    invoke void @foo()
-          to label %done unwind label %lpad
+  to label %done unwind label %lpad
  
  lpad:
    %x1 = phi double [ undef, %entry ], [ undef, %inner ]
diff --git a/test/Transforms/SLPVectorizer/X86/phi_overalignedtype.ll b/test/Transforms/SLPVectorizer/X86/phi_overalignedtype.ll

index fa08effcd648ba6154493e473e4683d8a92dc54c..f708341877af55e8d977c1af2305a9173bf3d3a6 100644 (file)
--- a/test/Transforms/SLPVectorizer/X86/phi_overalignedtype.ll
+++ b/test/Transforms/SLPVectorizer/X86/phi_overalignedtype.ll
@@ -1,12 +1,36 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
  ; RUN: opt < %s -basicaa -slp-vectorizer -slp-threshold=-100 -dce -S -mtriple=i386-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck %s
  
-; We purposely over-align f64 to 128bit here. 
+; We purposely over-align f64 to 128bit here.
  target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:128:128-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32-S128"
  target triple = "i386-apple-macosx10.9.0"
  
  
  define void @test(double* %i1, double* %i2, double* %o) {
  ; CHECK-LABEL: @test(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[I1_0:%.*]] = load double, double* [[I1:%.*]], align 16
+; CHECK-NEXT:    [[I1_GEP1:%.*]] = getelementptr double, double* [[I1]], i64 1
+; CHECK-NEXT:    [[I1_1:%.*]] = load double, double* [[I1_GEP1]], align 16
+; CHECK-NEXT:    [[TMP0:%.*]] = insertelement <2 x double> undef, double [[I1_0]], i32 0
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x double> [[TMP0]], double [[I1_1]], i32 1
+; CHECK-NEXT:    br i1 undef, label [[THEN:%.*]], label [[END:%.*]]
+; CHECK:       then:
+; CHECK-NEXT:    [[I2_GEP0:%.*]] = getelementptr inbounds double, double* [[I2:%.*]], i64 0
+; CHECK-NEXT:    [[I2_0:%.*]] = load double, double* [[I2_GEP0]], align 16
+; CHECK-NEXT:    [[I2_GEP1:%.*]] = getelementptr inbounds double, double* [[I2]], i64 1
+; CHECK-NEXT:    [[I2_1:%.*]] = load double, double* [[I2_GEP1]], align 16
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <2 x double> undef, double [[I2_0]], i32 0
+; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <2 x double> [[TMP2]], double [[I2_1]], i32 1
+; CHECK-NEXT:    br label [[END]]
+; CHECK:       end:
+; CHECK-NEXT:    [[TMP4:%.*]] = phi <2 x double> [ [[TMP1]], [[ENTRY:%.*]] ], [ [[TMP3]], [[THEN]] ]
+; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <2 x double> [[TMP4]], i32 0
+; CHECK-NEXT:    store double [[TMP5]], double* [[O:%.*]], align 16
+; CHECK-NEXT:    [[O_GEP1:%.*]] = getelementptr inbounds double, double* [[O]], i64 1
+; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <2 x double> [[TMP4]], i32 1
+; CHECK-NEXT:    store double [[TMP6]], double* [[O_GEP1]], align 16
+; CHECK-NEXT:    ret void
  ;
  ; Test that we correctly recognize the discontiguous memory in arrays where the
  ; size is less than the alignment, and through various different GEP formations.
@@ -15,10 +39,6 @@ entry:
    %i1.0 = load double, double* %i1, align 16
    %i1.gep1 = getelementptr double, double* %i1, i64 1
    %i1.1 = load double, double* %i1.gep1, align 16
-; CHECK: load double, double*
-; CHECK: load double, double*
-; CHECK: insertelement <2 x double>
-; CHECK: insertelement <2 x double>
    br i1 undef, label %then, label %end
  
  then:
@@ -26,18 +46,11 @@ then:
    %i2.0 = load double, double* %i2.gep0, align 16
    %i2.gep1 = getelementptr inbounds double, double* %i2, i64 1
    %i2.1 = load double, double* %i2.gep1, align 16
-; CHECK: load double, double*
-; CHECK: load double, double*
-; CHECK: insertelement <2 x double>
-; CHECK: insertelement <2 x double>
    br label %end
  
  end:
    %phi0 = phi double [ %i1.0, %entry ], [ %i2.0, %then ]
    %phi1 = phi double [ %i1.1, %entry ], [ %i2.1, %then ]
-; CHECK: phi <2 x double>
-; CHECK: extractelement <2 x double>
-; CHECK: extractelement <2 x double>
    store double %phi0, double* %o, align 16
    %o.gep1 = getelementptr inbounds double, double* %o, i64 1
    store double %phi1, double* %o.gep1, align 16
diff --git a/test/Transforms/SLPVectorizer/X86/pr16628.ll b/test/Transforms/SLPVectorizer/X86/pr16628.ll

index 06abe9127e6f1ca804062f57a66a05e6d4bccc8e..9f195649c6330945db1c491dd05977e064e3370a 100644 (file)
--- a/test/Transforms/SLPVectorizer/X86/pr16628.ll
+++ b/test/Transforms/SLPVectorizer/X86/pr16628.ll
@@ -1,4 +1,5 @@
-; RUN: opt < %s -basicaa -slp-vectorizer -dce -S -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -basicaa -slp-vectorizer -dce -S -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7 | FileCheck %s
  
  target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
  target triple = "x86_64-apple-macosx10.9.0"
@@ -9,6 +10,21 @@ target triple = "x86_64-apple-macosx10.9.0"
  
  ; Function Attrs: nounwind ssp uwtable
  define void @f() {
+; CHECK-LABEL: @f(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CALL:%.*]] = tail call i32 (...) @g()
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* @c, align 4
+; CHECK-NEXT:    [[LNOT:%.*]] = icmp eq i32 [[TMP0]], 0
+; CHECK-NEXT:    [[LNOT_EXT:%.*]] = zext i1 [[LNOT]] to i32
+; CHECK-NEXT:    [[TMP1:%.*]] = load i16, i16* @a, align 2
+; CHECK-NEXT:    [[LNOT2:%.*]] = icmp eq i16 [[TMP1]], 0
+; CHECK-NEXT:    [[LNOT_EXT3:%.*]] = zext i1 [[LNOT2]] to i32
+; CHECK-NEXT:    [[OR:%.*]] = or i32 [[LNOT_EXT3]], [[LNOT_EXT]]
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[CALL]], [[OR]]
+; CHECK-NEXT:    [[CONV4:%.*]] = zext i1 [[CMP]] to i16
+; CHECK-NEXT:    store i16 [[CONV4]], i16* @b, align 2
+; CHECK-NEXT:    ret void
+;
  entry:
    %call = tail call i32 (...) @g()
    %0 = load i32, i32* @c, align 4
diff --git a/test/Transforms/SLPVectorizer/X86/pr16899.ll b/test/Transforms/SLPVectorizer/X86/pr16899.ll

index 0de14ec3585696415ba095c151e9c2cd2b8d6e4d..5b91c3072b1981000345f2229ea49c5f1ddfea92 100644 (file)
--- a/test/Transforms/SLPVectorizer/X86/pr16899.ll
+++ b/test/Transforms/SLPVectorizer/X86/pr16899.ll
@@ -1,4 +1,5 @@
-; RUN: opt < %s  -slp-vectorizer -S -mtriple=i386--netbsd -mcpu=i486
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s  -slp-vectorizer -S -mtriple=i386--netbsd -mcpu=i486 | FileCheck %s
  target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32-S128"
  target triple = "i386--netbsd"
  
@@ -6,6 +7,20 @@ target triple = "i386--netbsd"
  
  ; Function Attrs: noreturn nounwind readonly
  define i32 @fn1() #0 {
+; CHECK-LABEL: @fn1(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32*, i32** @a, align 4, !tbaa !0
+; CHECK-NEXT:    [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4, !tbaa !4
+; CHECK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, i32* [[TMP0]], i32 1
+; CHECK-NEXT:    [[TMP2:%.*]] = load i32, i32* [[ARRAYIDX1]], align 4, !tbaa !4
+; CHECK-NEXT:    br label [[DO_BODY:%.*]]
+; CHECK:       do.body:
+; CHECK-NEXT:    [[C_0:%.*]] = phi i32 [ [[TMP2]], [[ENTRY:%.*]] ], [ [[ADD2:%.*]], [[DO_BODY]] ]
+; CHECK-NEXT:    [[B_0:%.*]] = phi i32 [ [[TMP1]], [[ENTRY]] ], [ [[ADD:%.*]], [[DO_BODY]] ]
+; CHECK-NEXT:    [[ADD]] = add nsw i32 [[B_0]], [[C_0]]
+; CHECK-NEXT:    [[ADD2]] = add nsw i32 [[ADD]], 1
+; CHECK-NEXT:    br label [[DO_BODY]]
+;
  entry:
    %0 = load i32*, i32** @a, align 4, !tbaa !4
    %1 = load i32, i32* %0, align 4, !tbaa !5
diff --git a/test/Transforms/SLPVectorizer/X86/pr18060.ll b/test/Transforms/SLPVectorizer/X86/pr18060.ll

index e6813f3b315d5547c043545242d93d9c06761b4f..0af5d0fa0531db367e3380810e8369bc088c108a 100644 (file)
--- a/test/Transforms/SLPVectorizer/X86/pr18060.ll
+++ b/test/Transforms/SLPVectorizer/X86/pr18060.ll
@@ -1,19 +1,53 @@
-; RUN: opt < %s -slp-vectorizer -S -mtriple=i386-pc-linux
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -slp-vectorizer -S -mtriple=i386-pc-linux | FileCheck %s
  
  target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32-S128"
  target triple = "i386-pc-linux"
  
  ; Function Attrs: nounwind
  define i32 @_Z16adjustFixupValueyj(i64 %Value, i32 %Kind) {
+; CHECK-LABEL: @_Z16adjustFixupValueyj(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[EXTRACT_T:%.*]] = trunc i64 [[VALUE:%.*]] to i32
+; CHECK-NEXT:    [[EXTRACT:%.*]] = lshr i64 [[VALUE]], 12
+; CHECK-NEXT:    [[EXTRACT_T6:%.*]] = trunc i64 [[EXTRACT]] to i32
+; CHECK-NEXT:    switch i32 [[KIND:%.*]], label [[SW_DEFAULT:%.*]] [
+; CHECK-NEXT:    i32 0, label [[RETURN:%.*]]
+; CHECK-NEXT:    i32 1, label [[RETURN]]
+; CHECK-NEXT:    i32 129, label [[SW_BB1:%.*]]
+; CHECK-NEXT:    i32 130, label [[SW_BB2:%.*]]
+; CHECK-NEXT:    ]
+; CHECK:       sw.default:
+; CHECK-NEXT:    call void @_Z25llvm_unreachable_internalv()
+; CHECK-NEXT:    unreachable
+; CHECK:       sw.bb1:
+; CHECK-NEXT:    [[SHR:%.*]] = lshr i64 [[VALUE]], 16
+; CHECK-NEXT:    [[EXTRACT_T5:%.*]] = trunc i64 [[SHR]] to i32
+; CHECK-NEXT:    [[EXTRACT7:%.*]] = lshr i64 [[VALUE]], 28
+; CHECK-NEXT:    [[EXTRACT_T8:%.*]] = trunc i64 [[EXTRACT7]] to i32
+; CHECK-NEXT:    br label [[SW_BB2]]
+; CHECK:       sw.bb2:
+; CHECK-NEXT:    [[VALUE_ADDR_0_OFF0:%.*]] = phi i32 [ [[EXTRACT_T]], [[ENTRY:%.*]] ], [ [[EXTRACT_T5]], [[SW_BB1]] ]
+; CHECK-NEXT:    [[VALUE_ADDR_0_OFF12:%.*]] = phi i32 [ [[EXTRACT_T6]], [[ENTRY]] ], [ [[EXTRACT_T8]], [[SW_BB1]] ]
+; CHECK-NEXT:    [[CONV6:%.*]] = and i32 [[VALUE_ADDR_0_OFF0]], 4095
+; CHECK-NEXT:    [[CONV4:%.*]] = shl i32 [[VALUE_ADDR_0_OFF12]], 16
+; CHECK-NEXT:    [[SHL:%.*]] = and i32 [[CONV4]], 983040
+; CHECK-NEXT:    [[OR:%.*]] = or i32 [[SHL]], [[CONV6]]
+; CHECK-NEXT:    [[OR11:%.*]] = or i32 [[OR]], 8388608
+; CHECK-NEXT:    br label [[RETURN]]
+; CHECK:       return:
+; CHECK-NEXT:    [[RETVAL_0:%.*]] = phi i32 [ [[OR11]], [[SW_BB2]] ], [ [[EXTRACT_T]], [[ENTRY]] ], [ [[EXTRACT_T]], [[ENTRY]] ]
+; CHECK-NEXT:    ret i32 [[RETVAL_0]]
+;
  entry:
    %extract.t = trunc i64 %Value to i32
    %extract = lshr i64 %Value, 12
    %extract.t6 = trunc i64 %extract to i32
    switch i32 %Kind, label %sw.default [
-    i32 0, label %return
-    i32 1, label %return
-    i32 129, label %sw.bb1
-    i32 130, label %sw.bb2
+  i32 0, label %return
+  i32 1, label %return
+  i32 129, label %sw.bb1
+  i32 130, label %sw.bb2
    ]
  
  sw.default:                                       ; preds = %entry
diff --git a/test/Transforms/SLPVectorizer/X86/pr23510.ll b/test/Transforms/SLPVectorizer/X86/pr23510.ll

index efdb0ecd9996d72dfaf678521dd5a576ae11177a..420fdde3631bbd8afc2ef70e91f821b99e1c15b9 100644 (file)
--- a/test/Transforms/SLPVectorizer/X86/pr23510.ll
+++ b/test/Transforms/SLPVectorizer/X86/pr23510.ll
@@ -1,16 +1,37 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
  ; PR23510
  ; RUN: opt < %s -basicaa -slp-vectorizer -S | FileCheck %s
  
  target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
  target triple = "x86_64-unknown-linux-gnu"
  
-; CHECK-LABEL: @_Z3fooPml(
-; CHECK: lshr <2 x i64>
-; CHECK: lshr <2 x i64>
-
  @total = global i64 0, align 8
  
  define void @_Z3fooPml(i64* nocapture %a, i64 %i) {
+; CHECK-LABEL: @_Z3fooPml(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i64, i64* [[A:%.*]], i64 1
+; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i64* [[A]] to <2 x i64>*
+; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* [[TMP0]], align 8
+; CHECK-NEXT:    [[TMP2:%.*]] = lshr <2 x i64> [[TMP1]], <i64 4, i64 4>
+; CHECK-NEXT:    [[TMP3:%.*]] = bitcast i64* [[A]] to <2 x i64>*
+; CHECK-NEXT:    store <2 x i64> [[TMP2]], <2 x i64>* [[TMP3]], align 8
+; CHECK-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds i64, i64* [[A]], i64 [[I:%.*]]
+; CHECK-NEXT:    [[TMP2:%.*]] = load i64, i64* [[ARRAYIDX3]], align 8
+; CHECK-NEXT:    [[TMP3:%.*]] = load i64, i64* @total, align 8
+; CHECK-NEXT:    [[ADD:%.*]] = add i64 [[TMP3]], [[TMP2]]
+; CHECK-NEXT:    store i64 [[ADD]], i64* @total, align 8
+; CHECK-NEXT:    [[TMP4:%.*]] = bitcast i64* [[A]] to <2 x i64>*
+; CHECK-NEXT:    [[TMP5:%.*]] = load <2 x i64>, <2 x i64>* [[TMP4]], align 8
+; CHECK-NEXT:    [[TMP6:%.*]] = lshr <2 x i64> [[TMP5]], <i64 4, i64 4>
+; CHECK-NEXT:    [[TMP7:%.*]] = bitcast i64* [[A]] to <2 x i64>*
+; CHECK-NEXT:    store <2 x i64> [[TMP6]], <2 x i64>* [[TMP7]], align 8
+; CHECK-NEXT:    [[TMP6:%.*]] = load i64, i64* [[ARRAYIDX3]], align 8
+; CHECK-NEXT:    [[TMP7:%.*]] = load i64, i64* @total, align 8
+; CHECK-NEXT:    [[ADD9:%.*]] = add i64 [[TMP7]], [[TMP6]]
+; CHECK-NEXT:    store i64 [[ADD9]], i64* @total, align 8
+; CHECK-NEXT:    ret void
+;
  entry:
    %tmp = load i64, i64* %a, align 8
    %shr = lshr i64 %tmp, 4
diff --git a/test/Transforms/SLPVectorizer/X86/pr27163.ll b/test/Transforms/SLPVectorizer/X86/pr27163.ll

index 2b8480ef82c6d998915be1769aa6e9baa7f6d32a..b1c1d9540d01fea6536627e01ac022bd2ea2dd5a 100644 (file)
--- a/test/Transforms/SLPVectorizer/X86/pr27163.ll
+++ b/test/Transforms/SLPVectorizer/X86/pr27163.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
  ; RUN: opt -slp-vectorizer -S < %s | FileCheck %s
  target datalayout = "e-m:w-i64:64-f80:128-n8:16:32:64-S128"
  target triple = "x86_64-pc-windows-msvc18.0.0"
@@ -5,6 +6,31 @@ target triple = "x86_64-pc-windows-msvc18.0.0"
  %struct.B = type { i64, i64 }
  
  define void @test1(%struct.B* %p) personality i32 (...)* @__CxxFrameHandler3 {
+; CHECK-LABEL: @test1(
+; CHECK-NEXT:  invoke.cont:
+; CHECK-NEXT:    [[GEP1:%.*]] = getelementptr inbounds [[STRUCT_B:%.*]], %struct.B* [[P:%.*]], i64 0, i32 0
+; CHECK-NEXT:    [[GEP2:%.*]] = getelementptr inbounds [[STRUCT_B]], %struct.B* [[P]], i64 0, i32 1
+; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i64* [[GEP1]] to <2 x i64>*
+; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* [[TMP0]], align 8
+; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <2 x i64> [[TMP1]], i32 0
+; CHECK-NEXT:    [[TMP3:%.*]] = bitcast i64* [[GEP1]] to <2 x i64>*
+; CHECK-NEXT:    store <2 x i64> [[TMP1]], <2 x i64>* [[TMP3]], align 8
+; CHECK-NEXT:    invoke void @throw()
+; CHECK-NEXT:    to label [[UNREACHABLE:%.*]] unwind label [[CATCH_DISPATCH:%.*]]
+; CHECK:       catch.dispatch:
+; CHECK-NEXT:    [[CS:%.*]] = catchswitch within none [label %invoke.cont1] unwind label [[EHCLEANUP:%.*]]
+; CHECK:       invoke.cont1:
+; CHECK-NEXT:    [[CATCH:%.*]] = catchpad within [[CS]] [i8* null, i32 64, i8* null]
+; CHECK-NEXT:    invoke void @throw() [ "funclet"(token [[CATCH]]) ]
+; CHECK-NEXT:    to label [[UNREACHABLE]] unwind label [[EHCLEANUP]]
+; CHECK:       ehcleanup:
+; CHECK-NEXT:    [[PHI:%.*]] = phi i64 [ [[TMP2]], [[CATCH_DISPATCH]] ], [ 9, [[INVOKE_CONT1:%.*]] ]
+; CHECK-NEXT:    [[CLEANUP:%.*]] = cleanuppad within none []
+; CHECK-NEXT:    call void @release(i64 [[PHI]]) [ "funclet"(token [[CLEANUP]]) ]
+; CHECK-NEXT:    cleanupret from [[CLEANUP]] unwind to caller
+; CHECK:       unreachable:
+; CHECK-NEXT:    unreachable
+;
  invoke.cont:
    %gep1 = getelementptr inbounds %struct.B, %struct.B* %p, i64 0, i32 0
    %gep2 = getelementptr inbounds %struct.B, %struct.B* %p, i64 0, i32 1
@@ -13,7 +39,7 @@ invoke.cont:
    store i64 %load1, i64* %gep1, align 8
    store i64 %load2, i64* %gep2, align 8
    invoke void @throw()
-          to label %unreachable unwind label %catch.dispatch
+  to label %unreachable unwind label %catch.dispatch
  
  catch.dispatch:                                   ; preds = %invoke.cont
    %cs = catchswitch within none [label %invoke.cont1] unwind label %ehcleanup
@@ -21,7 +47,7 @@ catch.dispatch:                                   ; preds = %invoke.cont
  invoke.cont1:                                     ; preds = %catch.dispatch
    %catch = catchpad within %cs [i8* null, i32 64, i8* null]
    invoke void @throw() [ "funclet"(token %catch) ]
-          to label %unreachable unwind label %ehcleanup
+  to label %unreachable unwind label %ehcleanup
  
  ehcleanup:                                        ; preds = %invoke.cont1, %catch.dispatch
    %phi = phi i64 [ %load1, %catch.dispatch ], [ 9, %invoke.cont1 ]
@@ -33,16 +59,6 @@ unreachable:                                      ; preds = %invoke.cont1, %invo
    unreachable
  }
  
-
-; CHECK-LABEL: define void @test1(
-; CHECK: %[[gep:.*]] = getelementptr inbounds %struct.B, %struct.B* %p, i64 0, i32 0
-; CHECK: %[[bc:.*]]  = bitcast i64* %[[gep]] to <2 x i64>*
-; CHECK: %[[ld:.*]]  = load <2 x i64>, <2 x i64>* %[[bc]], align 8
-; CHECK: %[[ee:.*]]  = extractelement <2 x i64> %[[ld]], i32 0
-
-; CHECK: %[[phi:.*]] = phi i64 [ %[[ee]], {{.*}} ], [ 9, {{.*}} ]
-; CHECK: call void @release(i64 %[[phi]])
-
  declare i32 @__CxxFrameHandler3(...)
  
  declare void @throw()
diff --git a/test/Transforms/SLPVectorizer/X86/propagate_ir_flags.ll b/test/Transforms/SLPVectorizer/X86/propagate_ir_flags.ll

index 28217fcba17e174dbc38b36714d6d94ed934fc6e..7cc0194c730200a24ea8f4650cb22896f813b42d 100644 (file)
--- a/test/Transforms/SLPVectorizer/X86/propagate_ir_flags.ll
+++ b/test/Transforms/SLPVectorizer/X86/propagate_ir_flags.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
  ; RUN: opt < %s -basicaa -slp-vectorizer -S | FileCheck %s
  
  ; Check propagation of optional IR flags (PR20802). For a flag to
@@ -7,9 +8,19 @@
  target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
  target triple = "x86_64-unknown-unknown"
  
-; CHECK-LABEL: @exact(
-; CHECK: lshr exact <4 x i32>
  define void @exact(i32* %x) {
+; CHECK-LABEL: @exact(
+; CHECK-NEXT:    [[IDX1:%.*]] = getelementptr inbounds i32, i32* [[X:%.*]], i64 0
+; CHECK-NEXT:    [[IDX2:%.*]] = getelementptr inbounds i32, i32* [[X]], i64 1
+; CHECK-NEXT:    [[IDX3:%.*]] = getelementptr inbounds i32, i32* [[X]], i64 2
+; CHECK-NEXT:    [[IDX4:%.*]] = getelementptr inbounds i32, i32* [[X]], i64 3
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[IDX1]] to <4 x i32>*
+; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 4
+; CHECK-NEXT:    [[TMP3:%.*]] = lshr exact <4 x i32> [[TMP2]], <i32 1, i32 1, i32 1, i32 1>
+; CHECK-NEXT:    [[TMP4:%.*]] = bitcast i32* [[IDX1]] to <4 x i32>*
+; CHECK-NEXT:    store <4 x i32> [[TMP3]], <4 x i32>* [[TMP4]], align 4
+; CHECK-NEXT:    ret void
+;
    %idx1 = getelementptr inbounds i32, i32* %x, i64 0
    %idx2 = getelementptr inbounds i32, i32* %x, i64 1
    %idx3 = getelementptr inbounds i32, i32* %x, i64 2
@@ -33,9 +44,19 @@ define void @exact(i32* %x) {
    ret void
  }
  
-; CHECK-LABEL: @not_exact(
-; CHECK: lshr <4 x i32>
  define void @not_exact(i32* %x) {
+; CHECK-LABEL: @not_exact(
+; CHECK-NEXT:    [[IDX1:%.*]] = getelementptr inbounds i32, i32* [[X:%.*]], i64 0
+; CHECK-NEXT:    [[IDX2:%.*]] = getelementptr inbounds i32, i32* [[X]], i64 1
+; CHECK-NEXT:    [[IDX3:%.*]] = getelementptr inbounds i32, i32* [[X]], i64 2
+; CHECK-NEXT:    [[IDX4:%.*]] = getelementptr inbounds i32, i32* [[X]], i64 3
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[IDX1]] to <4 x i32>*
+; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 4
+; CHECK-NEXT:    [[TMP3:%.*]] = lshr <4 x i32> [[TMP2]], <i32 1, i32 1, i32 1, i32 1>
+; CHECK-NEXT:    [[TMP4:%.*]] = bitcast i32* [[IDX1]] to <4 x i32>*
+; CHECK-NEXT:    store <4 x i32> [[TMP3]], <4 x i32>* [[TMP4]], align 4
+; CHECK-NEXT:    ret void
+;
    %idx1 = getelementptr inbounds i32, i32* %x, i64 0
    %idx2 = getelementptr inbounds i32, i32* %x, i64 1
    %idx3 = getelementptr inbounds i32, i32* %x, i64 2
@@ -59,9 +80,19 @@ define void @not_exact(i32* %x) {
    ret void
  }
  
-; CHECK-LABEL: @nsw(
-; CHECK: add nsw <4 x i32>
  define void @nsw(i32* %x) {
+; CHECK-LABEL: @nsw(
+; CHECK-NEXT:    [[IDX1:%.*]] = getelementptr inbounds i32, i32* [[X:%.*]], i64 0
+; CHECK-NEXT:    [[IDX2:%.*]] = getelementptr inbounds i32, i32* [[X]], i64 1
+; CHECK-NEXT:    [[IDX3:%.*]] = getelementptr inbounds i32, i32* [[X]], i64 2
+; CHECK-NEXT:    [[IDX4:%.*]] = getelementptr inbounds i32, i32* [[X]], i64 3
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[IDX1]] to <4 x i32>*
+; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 4
+; CHECK-NEXT:    [[TMP3:%.*]] = add nsw <4 x i32> <i32 1, i32 1, i32 1, i32 1>, [[TMP2]]
+; CHECK-NEXT:    [[TMP4:%.*]] = bitcast i32* [[IDX1]] to <4 x i32>*
+; CHECK-NEXT:    store <4 x i32> [[TMP3]], <4 x i32>* [[TMP4]], align 4
+; CHECK-NEXT:    ret void
+;
    %idx1 = getelementptr inbounds i32, i32* %x, i64 0
    %idx2 = getelementptr inbounds i32, i32* %x, i64 1
    %idx3 = getelementptr inbounds i32, i32* %x, i64 2
@@ -85,9 +116,19 @@ define void @nsw(i32* %x) {
    ret void
  }
  
-; CHECK-LABEL: @not_nsw(
-; CHECK: add <4 x i32>
  define void @not_nsw(i32* %x) {
+; CHECK-LABEL: @not_nsw(
+; CHECK-NEXT:    [[IDX1:%.*]] = getelementptr inbounds i32, i32* [[X:%.*]], i64 0
+; CHECK-NEXT:    [[IDX2:%.*]] = getelementptr inbounds i32, i32* [[X]], i64 1
+; CHECK-NEXT:    [[IDX3:%.*]] = getelementptr inbounds i32, i32* [[X]], i64 2
+; CHECK-NEXT:    [[IDX4:%.*]] = getelementptr inbounds i32, i32* [[X]], i64 3
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[IDX1]] to <4 x i32>*
+; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 4
+; CHECK-NEXT:    [[TMP3:%.*]] = add <4 x i32> <i32 1, i32 1, i32 1, i32 1>, [[TMP2]]
+; CHECK-NEXT:    [[TMP4:%.*]] = bitcast i32* [[IDX1]] to <4 x i32>*
+; CHECK-NEXT:    store <4 x i32> [[TMP3]], <4 x i32>* [[TMP4]], align 4
+; CHECK-NEXT:    ret void
+;
    %idx1 = getelementptr inbounds i32, i32* %x, i64 0
    %idx2 = getelementptr inbounds i32, i32* %x, i64 1
    %idx3 = getelementptr inbounds i32, i32* %x, i64 2
@@ -111,9 +152,19 @@ define void @not_nsw(i32* %x) {
    ret void
  }
  
-; CHECK-LABEL: @nuw(
-; CHECK: add nuw <4 x i32>
  define void @nuw(i32* %x) {
+; CHECK-LABEL: @nuw(
+; CHECK-NEXT:    [[IDX1:%.*]] = getelementptr inbounds i32, i32* [[X:%.*]], i64 0
+; CHECK-NEXT:    [[IDX2:%.*]] = getelementptr inbounds i32, i32* [[X]], i64 1
+; CHECK-NEXT:    [[IDX3:%.*]] = getelementptr inbounds i32, i32* [[X]], i64 2
+; CHECK-NEXT:    [[IDX4:%.*]] = getelementptr inbounds i32, i32* [[X]], i64 3
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[IDX1]] to <4 x i32>*
+; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 4
+; CHECK-NEXT:    [[TMP3:%.*]] = add nuw <4 x i32> <i32 1, i32 1, i32 1, i32 1>, [[TMP2]]
+; CHECK-NEXT:    [[TMP4:%.*]] = bitcast i32* [[IDX1]] to <4 x i32>*
+; CHECK-NEXT:    store <4 x i32> [[TMP3]], <4 x i32>* [[TMP4]], align 4
+; CHECK-NEXT:    ret void
+;
    %idx1 = getelementptr inbounds i32, i32* %x, i64 0
    %idx2 = getelementptr inbounds i32, i32* %x, i64 1
    %idx3 = getelementptr inbounds i32, i32* %x, i64 2
@@ -136,10 +187,20 @@ define void @nuw(i32* %x) {
  
    ret void
  }
- 
-; CHECK-LABEL: @not_nuw(
-; CHECK: add <4 x i32>
+
  define void @not_nuw(i32* %x) {
+; CHECK-LABEL: @not_nuw(
+; CHECK-NEXT:    [[IDX1:%.*]] = getelementptr inbounds i32, i32* [[X:%.*]], i64 0
+; CHECK-NEXT:    [[IDX2:%.*]] = getelementptr inbounds i32, i32* [[X]], i64 1
+; CHECK-NEXT:    [[IDX3:%.*]] = getelementptr inbounds i32, i32* [[X]], i64 2
+; CHECK-NEXT:    [[IDX4:%.*]] = getelementptr inbounds i32, i32* [[X]], i64 3
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[IDX1]] to <4 x i32>*
+; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 4
+; CHECK-NEXT:    [[TMP3:%.*]] = add <4 x i32> <i32 1, i32 1, i32 1, i32 1>, [[TMP2]]
+; CHECK-NEXT:    [[TMP4:%.*]] = bitcast i32* [[IDX1]] to <4 x i32>*
+; CHECK-NEXT:    store <4 x i32> [[TMP3]], <4 x i32>* [[TMP4]], align 4
+; CHECK-NEXT:    ret void
+;
    %idx1 = getelementptr inbounds i32, i32* %x, i64 0
    %idx2 = getelementptr inbounds i32, i32* %x, i64 1
    %idx3 = getelementptr inbounds i32, i32* %x, i64 2
@@ -162,10 +223,20 @@ define void @not_nuw(i32* %x) {
  
    ret void
  }
- 
-; CHECK-LABEL: @nnan(
-; CHECK: fadd nnan <4 x float>
+
  define void @nnan(float* %x) {
+; CHECK-LABEL: @nnan(
+; CHECK-NEXT:    [[IDX1:%.*]] = getelementptr inbounds float, float* [[X:%.*]], i64 0
+; CHECK-NEXT:    [[IDX2:%.*]] = getelementptr inbounds float, float* [[X]], i64 1
+; CHECK-NEXT:    [[IDX3:%.*]] = getelementptr inbounds float, float* [[X]], i64 2
+; CHECK-NEXT:    [[IDX4:%.*]] = getelementptr inbounds float, float* [[X]], i64 3
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast float* [[IDX1]] to <4 x float>*
+; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 4
+; CHECK-NEXT:    [[TMP3:%.*]] = fadd nnan <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, [[TMP2]]
+; CHECK-NEXT:    [[TMP4:%.*]] = bitcast float* [[IDX1]] to <4 x float>*
+; CHECK-NEXT:    store <4 x float> [[TMP3]], <4 x float>* [[TMP4]], align 4
+; CHECK-NEXT:    ret void
+;
    %idx1 = getelementptr inbounds float, float* %x, i64 0
    %idx2 = getelementptr inbounds float, float* %x, i64 1
    %idx3 = getelementptr inbounds float, float* %x, i64 2
@@ -188,10 +259,20 @@ define void @nnan(float* %x) {
  
    ret void
  }
- 
-; CHECK-LABEL: @not_nnan(
-; CHECK: fadd <4 x float>
+
  define void @not_nnan(float* %x) {
+; CHECK-LABEL: @not_nnan(
+; CHECK-NEXT:    [[IDX1:%.*]] = getelementptr inbounds float, float* [[X:%.*]], i64 0
+; CHECK-NEXT:    [[IDX2:%.*]] = getelementptr inbounds float, float* [[X]], i64 1
+; CHECK-NEXT:    [[IDX3:%.*]] = getelementptr inbounds float, float* [[X]], i64 2
+; CHECK-NEXT:    [[IDX4:%.*]] = getelementptr inbounds float, float* [[X]], i64 3
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast float* [[IDX1]] to <4 x float>*
+; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 4
+; CHECK-NEXT:    [[TMP3:%.*]] = fadd <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, [[TMP2]]
+; CHECK-NEXT:    [[TMP4:%.*]] = bitcast float* [[IDX1]] to <4 x float>*
+; CHECK-NEXT:    store <4 x float> [[TMP3]], <4 x float>* [[TMP4]], align 4
+; CHECK-NEXT:    ret void
+;
    %idx1 = getelementptr inbounds float, float* %x, i64 0
    %idx2 = getelementptr inbounds float, float* %x, i64 1
    %idx3 = getelementptr inbounds float, float* %x, i64 2
@@ -214,10 +295,20 @@ define void @not_nnan(float* %x) {
  
    ret void
  }
- 
-; CHECK-LABEL: @only_fast(
-; CHECK: fadd fast <4 x float>
+
  define void @only_fast(float* %x) {
+; CHECK-LABEL: @only_fast(
+; CHECK-NEXT:    [[IDX1:%.*]] = getelementptr inbounds float, float* [[X:%.*]], i64 0
+; CHECK-NEXT:    [[IDX2:%.*]] = getelementptr inbounds float, float* [[X]], i64 1
+; CHECK-NEXT:    [[IDX3:%.*]] = getelementptr inbounds float, float* [[X]], i64 2
+; CHECK-NEXT:    [[IDX4:%.*]] = getelementptr inbounds float, float* [[X]], i64 3
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast float* [[IDX1]] to <4 x float>*
+; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 4
+; CHECK-NEXT:    [[TMP3:%.*]] = fadd fast <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, [[TMP2]]
+; CHECK-NEXT:    [[TMP4:%.*]] = bitcast float* [[IDX1]] to <4 x float>*
+; CHECK-NEXT:    store <4 x float> [[TMP3]], <4 x float>* [[TMP4]], align 4
+; CHECK-NEXT:    ret void
+;
    %idx1 = getelementptr inbounds float, float* %x, i64 0
    %idx2 = getelementptr inbounds float, float* %x, i64 1
    %idx3 = getelementptr inbounds float, float* %x, i64 2
@@ -240,10 +331,20 @@ define void @only_fast(float* %x) {
  
    ret void
  }
- 
-; CHECK-LABEL: @only_arcp(
-; CHECK: fadd arcp <4 x float>
+
  define void @only_arcp(float* %x) {
+; CHECK-LABEL: @only_arcp(
+; CHECK-NEXT:    [[IDX1:%.*]] = getelementptr inbounds float, float* [[X:%.*]], i64 0
+; CHECK-NEXT:    [[IDX2:%.*]] = getelementptr inbounds float, float* [[X]], i64 1
+; CHECK-NEXT:    [[IDX3:%.*]] = getelementptr inbounds float, float* [[X]], i64 2
+; CHECK-NEXT:    [[IDX4:%.*]] = getelementptr inbounds float, float* [[X]], i64 3
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast float* [[IDX1]] to <4 x float>*
+; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 4
+; CHECK-NEXT:    [[TMP3:%.*]] = fadd arcp <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, [[TMP2]]
+; CHECK-NEXT:    [[TMP4:%.*]] = bitcast float* [[IDX1]] to <4 x float>*
+; CHECK-NEXT:    store <4 x float> [[TMP3]], <4 x float>* [[TMP4]], align 4
+; CHECK-NEXT:    ret void
+;
    %idx1 = getelementptr inbounds float, float* %x, i64 0
    %idx2 = getelementptr inbounds float, float* %x, i64 1
    %idx3 = getelementptr inbounds float, float* %x, i64 2
@@ -267,10 +368,21 @@ define void @only_arcp(float* %x) {
    ret void
  }
  
-; CHECK-LABEL: @addsub_all_nsw
-; CHECK: add nsw <4 x i32>
-; CHECK: sub nsw <4 x i32>
  define void @addsub_all_nsw(i32* %x) {
+; CHECK-LABEL: @addsub_all_nsw(
+; CHECK-NEXT:    [[IDX1:%.*]] = getelementptr inbounds i32, i32* [[X:%.*]], i64 0
+; CHECK-NEXT:    [[IDX2:%.*]] = getelementptr inbounds i32, i32* [[X]], i64 1
+; CHECK-NEXT:    [[IDX3:%.*]] = getelementptr inbounds i32, i32* [[X]], i64 2
+; CHECK-NEXT:    [[IDX4:%.*]] = getelementptr inbounds i32, i32* [[X]], i64 3
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[IDX1]] to <4 x i32>*
+; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 4
+; CHECK-NEXT:    [[TMP3:%.*]] = add nsw <4 x i32> [[TMP2]], <i32 1, i32 1, i32 1, i32 1>
+; CHECK-NEXT:    [[TMP4:%.*]] = sub nsw <4 x i32> [[TMP2]], <i32 1, i32 1, i32 1, i32 1>
+; CHECK-NEXT:    [[TMP5:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> [[TMP4]], <4 x i32> <i32 0, i32 5, i32 2, i32 7>
+; CHECK-NEXT:    [[TMP6:%.*]] = bitcast i32* [[IDX1]] to <4 x i32>*
+; CHECK-NEXT:    store <4 x i32> [[TMP5]], <4 x i32>* [[TMP6]], align 4
+; CHECK-NEXT:    ret void
+;
    %idx1 = getelementptr inbounds i32, i32* %x, i64 0
    %idx2 = getelementptr inbounds i32, i32* %x, i64 1
    %idx3 = getelementptr inbounds i32, i32* %x, i64 2
@@ -293,11 +405,22 @@ define void @addsub_all_nsw(i32* %x) {
  
    ret void
  }
- 
-; CHECK-LABEL: @addsub_some_nsw
-; CHECK: add nsw <4 x i32>
-; CHECK: sub <4 x i32>
+
  define void @addsub_some_nsw(i32* %x) {
+; CHECK-LABEL: @addsub_some_nsw(
+; CHECK-NEXT:    [[IDX1:%.*]] = getelementptr inbounds i32, i32* [[X:%.*]], i64 0
+; CHECK-NEXT:    [[IDX2:%.*]] = getelementptr inbounds i32, i32* [[X]], i64 1
+; CHECK-NEXT:    [[IDX3:%.*]] = getelementptr inbounds i32, i32* [[X]], i64 2
+; CHECK-NEXT:    [[IDX4:%.*]] = getelementptr inbounds i32, i32* [[X]], i64 3
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[IDX1]] to <4 x i32>*
+; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 4
+; CHECK-NEXT:    [[TMP3:%.*]] = add nsw <4 x i32> [[TMP2]], <i32 1, i32 1, i32 1, i32 1>
+; CHECK-NEXT:    [[TMP4:%.*]] = sub <4 x i32> [[TMP2]], <i32 1, i32 1, i32 1, i32 1>
+; CHECK-NEXT:    [[TMP5:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> [[TMP4]], <4 x i32> <i32 0, i32 5, i32 2, i32 7>
+; CHECK-NEXT:    [[TMP6:%.*]] = bitcast i32* [[IDX1]] to <4 x i32>*
+; CHECK-NEXT:    store <4 x i32> [[TMP5]], <4 x i32>* [[TMP6]], align 4
+; CHECK-NEXT:    ret void
+;
    %idx1 = getelementptr inbounds i32, i32* %x, i64 0
    %idx2 = getelementptr inbounds i32, i32* %x, i64 1
    %idx3 = getelementptr inbounds i32, i32* %x, i64 2
@@ -320,11 +443,22 @@ define void @addsub_some_nsw(i32* %x) {
  
    ret void
  }
- 
-; CHECK-LABEL: @addsub_no_nsw
-; CHECK: add <4 x i32>
-; CHECK: sub <4 x i32>
+
  define void @addsub_no_nsw(i32* %x) {
+; CHECK-LABEL: @addsub_no_nsw(
+; CHECK-NEXT:    [[IDX1:%.*]] = getelementptr inbounds i32, i32* [[X:%.*]], i64 0
+; CHECK-NEXT:    [[IDX2:%.*]] = getelementptr inbounds i32, i32* [[X]], i64 1
+; CHECK-NEXT:    [[IDX3:%.*]] = getelementptr inbounds i32, i32* [[X]], i64 2
+; CHECK-NEXT:    [[IDX4:%.*]] = getelementptr inbounds i32, i32* [[X]], i64 3
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[IDX1]] to <4 x i32>*
+; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 4
+; CHECK-NEXT:    [[TMP3:%.*]] = add <4 x i32> [[TMP2]], <i32 1, i32 1, i32 1, i32 1>
+; CHECK-NEXT:    [[TMP4:%.*]] = sub <4 x i32> [[TMP2]], <i32 1, i32 1, i32 1, i32 1>
+; CHECK-NEXT:    [[TMP5:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> [[TMP4]], <4 x i32> <i32 0, i32 5, i32 2, i32 7>
+; CHECK-NEXT:    [[TMP6:%.*]] = bitcast i32* [[IDX1]] to <4 x i32>*
+; CHECK-NEXT:    store <4 x i32> [[TMP5]], <4 x i32>* [[TMP6]], align 4
+; CHECK-NEXT:    ret void
+;
    %idx1 = getelementptr inbounds i32, i32* %x, i64 0
    %idx2 = getelementptr inbounds i32, i32* %x, i64 1
    %idx3 = getelementptr inbounds i32, i32* %x, i64 2
@@ -347,11 +481,20 @@ define void @addsub_no_nsw(i32* %x) {
  
    ret void
  }
- 
-; CHECK-LABEL: @fcmp_fast
-; CHECK: fcmp fast oge <2 x double>
-; CHECK: sub fast <2 x double>
+
  define void @fcmp_fast(double* %x) #1 {
+; CHECK-LABEL: @fcmp_fast(
+; CHECK-NEXT:    [[IDX1:%.*]] = getelementptr inbounds double, double* [[X:%.*]], i64 0
+; CHECK-NEXT:    [[IDX2:%.*]] = getelementptr inbounds double, double* [[X]], i64 1
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast double* [[IDX1]] to <2 x double>*
+; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x double>, <2 x double>* [[TMP1]], align 8
+; CHECK-NEXT:    [[TMP3:%.*]] = fcmp fast oge <2 x double> [[TMP2]], zeroinitializer
+; CHECK-NEXT:    [[TMP4:%.*]] = fsub fast <2 x double> <double -0.000000e+00, double -0.000000e+00>, [[TMP2]]
+; CHECK-NEXT:    [[TMP5:%.*]] = select <2 x i1> [[TMP3]], <2 x double> [[TMP2]], <2 x double> [[TMP4]]
+; CHECK-NEXT:    [[TMP6:%.*]] = bitcast double* [[IDX1]] to <2 x double>*
+; CHECK-NEXT:    store <2 x double> [[TMP5]], <2 x double>* [[TMP6]], align 8
+; CHECK-NEXT:    ret void
+;
    %idx1 = getelementptr inbounds double, double* %x, i64 0
    %idx2 = getelementptr inbounds double, double* %x, i64 1
  
@@ -373,10 +516,19 @@ define void @fcmp_fast(double* %x) #1 {
    ret void
  }
  
-; CHECK-LABEL: @fcmp_no_fast
-; CHECK: fcmp oge <2 x double>
-; CHECK: sub <2 x double>
  define void @fcmp_no_fast(double* %x) #1 {
+; CHECK-LABEL: @fcmp_no_fast(
+; CHECK-NEXT:    [[IDX1:%.*]] = getelementptr inbounds double, double* [[X:%.*]], i64 0
+; CHECK-NEXT:    [[IDX2:%.*]] = getelementptr inbounds double, double* [[X]], i64 1
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast double* [[IDX1]] to <2 x double>*
+; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x double>, <2 x double>* [[TMP1]], align 8
+; CHECK-NEXT:    [[TMP3:%.*]] = fcmp oge <2 x double> [[TMP2]], zeroinitializer
+; CHECK-NEXT:    [[TMP4:%.*]] = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, [[TMP2]]
+; CHECK-NEXT:    [[TMP5:%.*]] = select <2 x i1> [[TMP3]], <2 x double> [[TMP2]], <2 x double> [[TMP4]]
+; CHECK-NEXT:    [[TMP6:%.*]] = bitcast double* [[IDX1]] to <2 x double>*
+; CHECK-NEXT:    store <2 x double> [[TMP5]], <2 x double>* [[TMP6]], align 8
+; CHECK-NEXT:    ret void
+;
    %idx1 = getelementptr inbounds double, double* %x, i64 0
    %idx2 = getelementptr inbounds double, double* %x, i64 1
  
@@ -400,9 +552,17 @@ define void @fcmp_no_fast(double* %x) #1 {
  
  declare double @llvm.fabs.f64(double) nounwind readnone
  
-;CHECK-LABEL: @call_fast(
-;CHECK: call fast <2 x double> @llvm.fabs.v2f64
  define void @call_fast(double* %x) {
+; CHECK-LABEL: @call_fast(
+; CHECK-NEXT:    [[IDX1:%.*]] = getelementptr inbounds double, double* [[X:%.*]], i64 0
+; CHECK-NEXT:    [[IDX2:%.*]] = getelementptr inbounds double, double* [[X]], i64 1
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast double* [[IDX1]] to <2 x double>*
+; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x double>, <2 x double>* [[TMP1]], align 8
+; CHECK-NEXT:    [[TMP3:%.*]] = call fast <2 x double> @llvm.fabs.v2f64(<2 x double> [[TMP2]])
+; CHECK-NEXT:    [[TMP4:%.*]] = bitcast double* [[IDX1]] to <2 x double>*
+; CHECK-NEXT:    store <2 x double> [[TMP3]], <2 x double>* [[TMP4]], align 8
+; CHECK-NEXT:    ret void
+;
    %idx1 = getelementptr inbounds double, double* %x, i64 0
    %idx2 = getelementptr inbounds double, double* %x, i64 1
  
@@ -418,9 +578,17 @@ define void @call_fast(double* %x) {
    ret void
  }
  
-;CHECK-LABEL: @call_no_fast(
-;CHECK: call <2 x double> @llvm.fabs.v2f64
  define void @call_no_fast(double* %x) {
+; CHECK-LABEL: @call_no_fast(
+; CHECK-NEXT:    [[IDX1:%.*]] = getelementptr inbounds double, double* [[X:%.*]], i64 0
+; CHECK-NEXT:    [[IDX2:%.*]] = getelementptr inbounds double, double* [[X]], i64 1
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast double* [[IDX1]] to <2 x double>*
+; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x double>, <2 x double>* [[TMP1]], align 8
+; CHECK-NEXT:    [[TMP3:%.*]] = call <2 x double> @llvm.fabs.v2f64(<2 x double> [[TMP2]])
+; CHECK-NEXT:    [[TMP4:%.*]] = bitcast double* [[IDX1]] to <2 x double>*
+; CHECK-NEXT:    store <2 x double> [[TMP3]], <2 x double>* [[TMP4]], align 8
+; CHECK-NEXT:    ret void
+;
    %idx1 = getelementptr inbounds double, double* %x, i64 0
    %idx2 = getelementptr inbounds double, double* %x, i64 1
  
diff --git a/test/Transforms/SLPVectorizer/X86/reduction.ll b/test/Transforms/SLPVectorizer/X86/reduction.ll

index 4c5f1266336b61cd08f0335c2276ec29d544c040..03b7f67ae4cabe5f06377b8b4ef715a02cf6f272 100644 (file)
--- a/test/Transforms/SLPVectorizer/X86/reduction.ll
+++ b/test/Transforms/SLPVectorizer/X86/reduction.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
  ; RUN: opt < %s -basicaa -slp-vectorizer -dce -S -mtriple=i386-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck %s
  
  target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32-S128"
@@ -10,11 +11,33 @@ target triple = "i386-apple-macosx10.8.0"
  ;   return sum;
  ; }
  
-;CHECK: reduce
-;CHECK: load <2 x double>
-;CHECK: fmul <2 x double>
-;CHECK: ret
  define i32 @reduce(double* nocapture %A, i32 %n, i32 %m) {
+; CHECK-LABEL: @reduce(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CMP13:%.*]] = icmp sgt i32 [[N:%.*]], 0
+; CHECK-NEXT:    br i1 [[CMP13]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]]
+; CHECK:       for.body:
+; CHECK-NEXT:    [[I_015:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ]
+; CHECK-NEXT:    [[SUM_014:%.*]] = phi double [ [[ADD6:%.*]], [[FOR_BODY]] ], [ 0.000000e+00, [[ENTRY]] ]
+; CHECK-NEXT:    [[MUL:%.*]] = shl nsw i32 [[I_015]], 1
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[A:%.*]], i32 [[MUL]]
+; CHECK-NEXT:    [[TMP0:%.*]] = bitcast double* [[ARRAYIDX]] to <2 x double>*
+; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x double>, <2 x double>* [[TMP0]], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = fmul <2 x double> <double 7.000000e+00, double 7.000000e+00>, [[TMP1]]
+; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <2 x double> [[TMP2]], i32 0
+; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <2 x double> [[TMP2]], i32 1
+; CHECK-NEXT:    [[ADD5:%.*]] = fadd double [[TMP3]], [[TMP4]]
+; CHECK-NEXT:    [[ADD6]] = fadd double [[SUM_014]], [[ADD5]]
+; CHECK-NEXT:    [[INC]] = add nsw i32 [[I_015]], 1
+; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[INC]], [[N]]
+; CHECK-NEXT:    br i1 [[EXITCOND]], label [[FOR_COND_FOR_END_CRIT_EDGE:%.*]], label [[FOR_BODY]]
+; CHECK:       for.cond.for.end_crit_edge:
+; CHECK-NEXT:    [[PHITMP:%.*]] = fptosi double [[ADD6]] to i32
+; CHECK-NEXT:    br label [[FOR_END]]
+; CHECK:       for.end:
+; CHECK-NEXT:    [[SUM_0_LCSSA:%.*]] = phi i32 [ [[PHITMP]], [[FOR_COND_FOR_END_CRIT_EDGE]] ], [ 0, [[ENTRY]] ]
+; CHECK-NEXT:    ret i32 [[SUM_0_LCSSA]]
+;
  entry:
    %cmp13 = icmp sgt i32 %n, 0
    br i1 %cmp13, label %for.body, label %for.end
diff --git a/test/Transforms/SLPVectorizer/X86/reduction2.ll b/test/Transforms/SLPVectorizer/X86/reduction2.ll

index 507a61aa16f479203a4d371bd103ebe27c369307..87a6af792f50c4f71cbb810cbe56a231be6cf4fa 100644 (file)
--- a/test/Transforms/SLPVectorizer/X86/reduction2.ll
+++ b/test/Transforms/SLPVectorizer/X86/reduction2.ll
@@ -1,12 +1,29 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
  ; RUN: opt < %s -basicaa -slp-vectorizer -dce -S -mtriple=i386-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck %s
  
  target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32-S128"
  target triple = "i386-apple-macosx10.8.0"
  
-;CHECK-LABEL: @foo(
-;CHECK: load <2 x double>
-;CHECK: ret
  define double @foo(double* nocapture %D) {
+; CHECK-LABEL: @foo(
+; CHECK-NEXT:    br label [[TMP1:%.*]]
+; CHECK:         [[I_02:%.*]] = phi i32 [ 0, [[TMP0:%.*]] ], [ [[TMP12:%.*]], [[TMP1]] ]
+; CHECK-NEXT:    [[SUM_01:%.*]] = phi double [ 0.000000e+00, [[TMP0]] ], [ [[TMP11:%.*]], [[TMP1]] ]
+; CHECK-NEXT:    [[TMP2:%.*]] = shl nsw i32 [[I_02]], 1
+; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds double, double* [[D:%.*]], i32 [[TMP2]]
+; CHECK-NEXT:    [[TMP4:%.*]] = bitcast double* [[TMP3]] to <2 x double>*
+; CHECK-NEXT:    [[TMP5:%.*]] = load <2 x double>, <2 x double>* [[TMP4]], align 4
+; CHECK-NEXT:    [[TMP6:%.*]] = fmul <2 x double> [[TMP5]], [[TMP5]]
+; CHECK-NEXT:    [[TMP7:%.*]] = fmul <2 x double> [[TMP6]], [[TMP6]]
+; CHECK-NEXT:    [[TMP8:%.*]] = extractelement <2 x double> [[TMP7]], i32 0
+; CHECK-NEXT:    [[TMP9:%.*]] = extractelement <2 x double> [[TMP7]], i32 1
+; CHECK-NEXT:    [[TMP10:%.*]] = fadd double [[TMP8]], [[TMP9]]
+; CHECK-NEXT:    [[TMP11]] = fadd double [[SUM_01]], [[TMP10]]
+; CHECK-NEXT:    [[TMP12]] = add nsw i32 [[I_02]], 1
+; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[TMP12]], 100
+; CHECK-NEXT:    br i1 [[EXITCOND]], label [[TMP13:%.*]], label [[TMP1]]
+; CHECK:         ret double [[TMP11]]
+;
    br label %1
  
  ; <label>:1                                       ; preds = %1, %0
diff --git a/test/Transforms/SLPVectorizer/X86/remark_horcost.ll b/test/Transforms/SLPVectorizer/X86/remark_horcost.ll

index 96c8d7f3b68753af8f5457497b8450c1d45441ca..27997f6af3d73b988c32b2d2df8a1add92fcb546 100644 (file)
--- a/test/Transforms/SLPVectorizer/X86/remark_horcost.ll
+++ b/test/Transforms/SLPVectorizer/X86/remark_horcost.ll
@@ -1,7 +1,59 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
  ; RUN: opt -S -mtriple=x86_64-pc-linux-gnu -mcpu=generic -slp-vectorizer -pass-remarks-output=%t < %s | FileCheck %s
  ; RUN: FileCheck --input-file=%t --check-prefix=YAML %s
  
  define i32 @foo(i32* %diff) #0 {
+; CHECK-LABEL: @foo(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[M2:%.*]] = alloca [8 x [8 x i32]], align 16
+; CHECK-NEXT:    [[TMP0:%.*]] = bitcast [8 x [8 x i32]]* [[M2]] to i8*
+; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
+; CHECK:       for.body:
+; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[A_088:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[OP_EXTRA:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[TMP1:%.*]] = shl i64 [[INDVARS_IV]], 3
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[DIFF:%.*]], i64 [[TMP1]]
+; CHECK-NEXT:    [[TMP2:%.*]] = or i64 [[TMP1]], 4
+; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, i32* [[DIFF]], i64 [[TMP2]]
+; CHECK-NEXT:    [[ARRAYIDX6:%.*]] = getelementptr inbounds [8 x [8 x i32]], [8 x [8 x i32]]* [[M2]], i64 0, i64 [[INDVARS_IV]], i64 0
+; CHECK-NEXT:    [[TMP3:%.*]] = or i64 [[TMP1]], 1
+; CHECK-NEXT:    [[ARRAYIDX13:%.*]] = getelementptr inbounds i32, i32* [[DIFF]], i64 [[TMP3]]
+; CHECK-NEXT:    [[TMP4:%.*]] = or i64 [[TMP1]], 5
+; CHECK-NEXT:    [[ARRAYIDX16:%.*]] = getelementptr inbounds i32, i32* [[DIFF]], i64 [[TMP4]]
+; CHECK-NEXT:    [[TMP5:%.*]] = or i64 [[TMP1]], 2
+; CHECK-NEXT:    [[ARRAYIDX27:%.*]] = getelementptr inbounds i32, i32* [[DIFF]], i64 [[TMP5]]
+; CHECK-NEXT:    [[TMP6:%.*]] = or i64 [[TMP1]], 6
+; CHECK-NEXT:    [[ARRAYIDX30:%.*]] = getelementptr inbounds i32, i32* [[DIFF]], i64 [[TMP6]]
+; CHECK-NEXT:    [[TMP7:%.*]] = or i64 [[TMP1]], 3
+; CHECK-NEXT:    [[ARRAYIDX41:%.*]] = getelementptr inbounds i32, i32* [[DIFF]], i64 [[TMP7]]
+; CHECK-NEXT:    [[TMP8:%.*]] = bitcast i32* [[ARRAYIDX]] to <4 x i32>*
+; CHECK-NEXT:    [[TMP9:%.*]] = load <4 x i32>, <4 x i32>* [[TMP8]], align 4
+; CHECK-NEXT:    [[TMP10:%.*]] = or i64 [[TMP1]], 7
+; CHECK-NEXT:    [[ARRAYIDX44:%.*]] = getelementptr inbounds i32, i32* [[DIFF]], i64 [[TMP10]]
+; CHECK-NEXT:    [[TMP11:%.*]] = bitcast i32* [[ARRAYIDX2]] to <4 x i32>*
+; CHECK-NEXT:    [[TMP12:%.*]] = load <4 x i32>, <4 x i32>* [[TMP11]], align 4
+; CHECK-NEXT:    [[TMP13:%.*]] = add nsw <4 x i32> [[TMP12]], [[TMP9]]
+; CHECK-NEXT:    [[ADD10:%.*]] = add nsw i32 undef, [[A_088]]
+; CHECK-NEXT:    [[ARRAYIDX20:%.*]] = getelementptr inbounds [8 x [8 x i32]], [8 x [8 x i32]]* [[M2]], i64 0, i64 [[INDVARS_IV]], i64 1
+; CHECK-NEXT:    [[ADD24:%.*]] = add nsw i32 [[ADD10]], undef
+; CHECK-NEXT:    [[ARRAYIDX34:%.*]] = getelementptr inbounds [8 x [8 x i32]], [8 x [8 x i32]]* [[M2]], i64 0, i64 [[INDVARS_IV]], i64 2
+; CHECK-NEXT:    [[ADD38:%.*]] = add nsw i32 [[ADD24]], undef
+; CHECK-NEXT:    [[ARRAYIDX48:%.*]] = getelementptr inbounds [8 x [8 x i32]], [8 x [8 x i32]]* [[M2]], i64 0, i64 [[INDVARS_IV]], i64 3
+; CHECK-NEXT:    [[TMP14:%.*]] = bitcast i32* [[ARRAYIDX6]] to <4 x i32>*
+; CHECK-NEXT:    store <4 x i32> [[TMP13]], <4 x i32>* [[TMP14]], align 16
+; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <4 x i32> [[TMP13]], <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
+; CHECK-NEXT:    [[BIN_RDX:%.*]] = add nsw <4 x i32> [[TMP13]], [[RDX_SHUF]]
+; CHECK-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <4 x i32> [[BIN_RDX]], <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
+; CHECK-NEXT:    [[BIN_RDX2:%.*]] = add nsw <4 x i32> [[BIN_RDX]], [[RDX_SHUF1]]
+; CHECK-NEXT:    [[TMP15:%.*]] = extractelement <4 x i32> [[BIN_RDX2]], i32 0
+; CHECK-NEXT:    [[OP_EXTRA]] = add nsw i32 [[TMP15]], [[A_088]]
+; CHECK-NEXT:    [[ADD52:%.*]] = add nsw i32 [[ADD38]], undef
+; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
+; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 8
+; CHECK-NEXT:    br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]]
+; CHECK:       for.end:
+; CHECK-NEXT:    ret i32 [[OP_EXTRA]]
+;
  entry:
    %m2 = alloca [8 x [8 x i32]], align 16
    %0 = bitcast [8 x [8 x i32]]* %m2 to i8*
@@ -19,7 +71,7 @@ for.body:                                         ; preds = %for.body, %entry
    %add3 = add nsw i32 %4, %2
    %arrayidx6 = getelementptr inbounds [8 x [8 x i32]], [8 x [8 x i32]]* %m2, i64 0, i64 %indvars.iv, i64 0
    store i32 %add3, i32* %arrayidx6, align 16
- 
+
    %add10 = add nsw i32 %add3, %a.088
    %5 = or i64 %1, 1
    %arrayidx13 = getelementptr inbounds i32, i32* %diff, i64 %5
@@ -30,7 +82,7 @@ for.body:                                         ; preds = %for.body, %entry
    %add17 = add nsw i32 %8, %6
    %arrayidx20 = getelementptr inbounds [8 x [8 x i32]], [8 x [8 x i32]]* %m2, i64 0, i64 %indvars.iv, i64 1
    store i32 %add17, i32* %arrayidx20, align 4
-  
+
    %add24 = add nsw i32 %add10, %add17
    %9 = or i64 %1, 2
    %arrayidx27 = getelementptr inbounds i32, i32* %diff, i64 %9
@@ -41,7 +93,7 @@ for.body:                                         ; preds = %for.body, %entry
    %add31 = add nsw i32 %12, %10
    %arrayidx34 = getelementptr inbounds [8 x [8 x i32]], [8 x [8 x i32]]* %m2, i64 0, i64 %indvars.iv, i64 2
    store i32 %add31, i32* %arrayidx34, align 8
-  
+
    %add38 = add nsw i32 %add24, %add31
    %13 = or i64 %1, 3
    %arrayidx41 = getelementptr inbounds i32, i32* %diff, i64 %13
@@ -49,35 +101,33 @@ for.body:                                         ; preds = %for.body, %entry
    %15 = or i64 %1, 7
    %arrayidx44 = getelementptr inbounds i32, i32* %diff, i64 %15
    %16 = load i32, i32* %arrayidx44, align 4
-  
+
    %add45 = add nsw i32 %16, %14
    %arrayidx48 = getelementptr inbounds [8 x [8 x i32]], [8 x [8 x i32]]* %m2, i64 0, i64 %indvars.iv, i64 3
    store i32 %add45, i32* %arrayidx48, align 4
-  
+
    %add52 = add nsw i32 %add38, %add45
- ; CHECK: add nsw <{{[0-9]+}} x i32>
- ; CHECK: add nsw <{{[0-9]+}} x i32>
- 
- ; YAML:      --- !Passed
- ; YAML-NEXT: Pass:            slp-vectorizer
- ; YAML-NEXT: Name:            StoresVectorized
- ; YAML-NEXT: Function:        foo
- ; YAML-NEXT: Args:
- ; YAML-NEXT:   - String:          'Stores SLP vectorized with cost '
- ; YAML-NEXT:   - Cost:            '-8'
- ; YAML-NEXT:   - String:          ' and with tree size '
- ; YAML-NEXT:   - TreeSize:        '4'
  
- ; YAML:      --- !Passed
- ; YAML-NEXT: Pass:            slp-vectorizer
- ; YAML-NEXT: Name:            VectorizedHorizontalReduction
- ; YAML-NEXT: Function:        foo
- ; YAML-NEXT: Args:
- ; YAML-NEXT:   - String:          'Vectorized horizontal reduction with cost '
- ; YAML-NEXT:   - Cost:            '-2'
- ; YAML-NEXT:   - String:          ' and with tree size '
- ; YAML-NEXT:   - TreeSize:        '1'
- 
+  ; YAML:      --- !Passed
+  ; YAML-NEXT: Pass:            slp-vectorizer
+  ; YAML-NEXT: Name:            StoresVectorized
+  ; YAML-NEXT: Function:        foo
+  ; YAML-NEXT: Args:
+  ; YAML-NEXT:   - String:          'Stores SLP vectorized with cost '
+  ; YAML-NEXT:   - Cost:            '-8'
+  ; YAML-NEXT:   - String:          ' and with tree size '
+  ; YAML-NEXT:   - TreeSize:        '4'
+
+  ; YAML:      --- !Passed
+  ; YAML-NEXT: Pass:            slp-vectorizer
+  ; YAML-NEXT: Name:            VectorizedHorizontalReduction
+  ; YAML-NEXT: Function:        foo
+  ; YAML-NEXT: Args:
+  ; YAML-NEXT:   - String:          'Vectorized horizontal reduction with cost '
+  ; YAML-NEXT:   - Cost:            '-2'
+  ; YAML-NEXT:   - String:          ' and with tree size '
+  ; YAML-NEXT:   - TreeSize:        '1'
+
    %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
    %exitcond = icmp eq i64 %indvars.iv.next, 8
    br i1 %exitcond, label %for.end, label %for.body
diff --git a/test/Transforms/SLPVectorizer/X86/remark_listcost.ll b/test/Transforms/SLPVectorizer/X86/remark_listcost.ll

index 6f6e00f9c89bfa31b2d13d18dbcb48f893c00270..ed2cd870cf6f0c56edba2117cf20f9c9fff2fa30 100644 (file)
--- a/test/Transforms/SLPVectorizer/X86/remark_listcost.ll
+++ b/test/Transforms/SLPVectorizer/X86/remark_listcost.ll
@@ -1,7 +1,26 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
  ; RUN: opt -S -mtriple=x86_64-pc-linux-gnu -mcpu=generic -slp-vectorizer -pass-remarks-output=%t < %s | FileCheck %s
  ; RUN: FileCheck --input-file=%t --check-prefix=YAML %s
  
  define void @vsub2_test(i32* %pin1, i32* %pin2, i32* %pout) #0 {
+; CHECK-LABEL: @vsub2_test(
+; CHECK-NEXT:    br label [[TMP1:%.*]]
+; CHECK:         [[IDX_04:%.*]] = phi i32 [ 0, [[TMP0:%.*]] ], [ [[TMP8:%.*]], [[TMP1]] ]
+; CHECK-NEXT:    [[PO_03:%.*]] = phi i32* [ [[POUT:%.*]], [[TMP0]] ], [ [[TMP7:%.*]], [[TMP1]] ]
+; CHECK-NEXT:    [[PTMPI2_02:%.*]] = phi i32* [ [[PIN2:%.*]], [[TMP0]] ], [ [[TMP4:%.*]], [[TMP1]] ]
+; CHECK-NEXT:    [[PTMPI1_01:%.*]] = phi i32* [ [[PIN1:%.*]], [[TMP0]] ], [ [[TMP2:%.*]], [[TMP1]] ]
+; CHECK-NEXT:    [[TMP2]] = getelementptr inbounds i32, i32* [[PTMPI1_01]], i64 1
+; CHECK-NEXT:    [[TMP3:%.*]] = load i32, i32* [[PTMPI1_01]], align 4, !tbaa !1
+; CHECK-NEXT:    [[TMP4]] = getelementptr inbounds i32, i32* [[PTMPI2_02]], i64 1
+; CHECK-NEXT:    [[TMP5:%.*]] = load i32, i32* [[PTMPI2_02]], align 4, !tbaa !1
+; CHECK-NEXT:    [[TMP6:%.*]] = sub nsw i32 [[TMP3]], [[TMP5]]
+; CHECK-NEXT:    [[TMP7]] = getelementptr inbounds i32, i32* [[PO_03]], i64 1
+; CHECK-NEXT:    store i32 [[TMP6]], i32* [[PO_03]], align 4, !tbaa !1
+; CHECK-NEXT:    [[TMP8]] = add nuw nsw i32 [[IDX_04]], 1
+; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[TMP8]], 64
+; CHECK-NEXT:    br i1 [[EXITCOND]], label [[TMP9:%.*]], label [[TMP1]], !llvm.loop !5
+; CHECK:         ret void
+;
    br label %1
  
    %idx.04 = phi i32 [ 0, %0 ], [ %8, %1 ]
@@ -14,15 +33,14 @@ define void @vsub2_test(i32* %pin1, i32* %pin2, i32* %pout) #0 {
    %5 = load i32, i32* %ptmpi2.02, align 4, !tbaa !1
    %6 = sub nsw i32 %3, %5
    %7 = getelementptr inbounds i32, i32* %po.03, i64 1
- ; CHECK-NOT: <{{[0-9]+}} x i32>
- ; YAML:      Pass:            slp-vectorizer
- ; YAML-NEXT: Name:            NotBeneficial
- ; YAML-NEXT: Function:        vsub2_test
- ; YAML-NEXT: Args:
- ; YAML-NEXT:   - String:          'List vectorization was possible but not beneficial with cost '
- ; YAML-NEXT:   - Cost:            '0'
- ; YAML-NEXT:   - String:          ' >= '
- ; YAML-NEXT:   - Treshold:        '0'
+  ; YAML:      Pass:            slp-vectorizer
+  ; YAML-NEXT: Name:            NotBeneficial
+  ; YAML-NEXT: Function:        vsub2_test
+  ; YAML-NEXT: Args:
+  ; YAML-NEXT:   - String:          'List vectorization was possible but not beneficial with cost '
+  ; YAML-NEXT:   - Cost:            '0'
+  ; YAML-NEXT:   - String:          ' >= '
+  ; YAML-NEXT:   - Treshold:        '0'
    store i32 %6, i32* %po.03, align 4, !tbaa !1
    %8 = add nuw nsw i32 %idx.04, 1
    %exitcond = icmp eq i32 %8, 64
diff --git a/test/Transforms/SLPVectorizer/X86/remark_not_all_parts.ll b/test/Transforms/SLPVectorizer/X86/remark_not_all_parts.ll

index 59d4bb0710e65c3f5fe7a59e20dfd1f8bf27e507..a0d3c939d4ff2981946d49b6c8084589ba037987 100644 (file)
--- a/test/Transforms/SLPVectorizer/X86/remark_not_all_parts.ll
+++ b/test/Transforms/SLPVectorizer/X86/remark_not_all_parts.ll
@@ -1,7 +1,43 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
  ; RUN: opt -S -mtriple=x86_64-pc-linux-gnu -mcpu=generic -slp-vectorizer -pass-remarks-output=%t < %s | FileCheck %s
  ; RUN: FileCheck --input-file=%t --check-prefix=YAML %s
  
  define i32 @foo(i32* nocapture readonly %diff) #0 {
+; CHECK-LABEL: @foo(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[M2:%.*]] = alloca [8 x [8 x i32]], align 16
+; CHECK-NEXT:    [[TMP0:%.*]] = bitcast [8 x [8 x i32]]* [[M2]] to i8*
+; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
+; CHECK:       for.body:
+; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[A_088:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[ADD24:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[TMP1:%.*]] = shl i64 [[INDVARS_IV]], 3
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[DIFF:%.*]], i64 [[TMP1]]
+; CHECK-NEXT:    [[TMP2:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
+; CHECK-NEXT:    [[TMP3:%.*]] = or i64 [[TMP1]], 4
+; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, i32* [[DIFF]], i64 [[TMP3]]
+; CHECK-NEXT:    [[TMP4:%.*]] = load i32, i32* [[ARRAYIDX2]], align 4
+; CHECK-NEXT:    [[ADD3:%.*]] = add nsw i32 [[TMP4]], [[TMP2]]
+; CHECK-NEXT:    [[ARRAYIDX6:%.*]] = getelementptr inbounds [8 x [8 x i32]], [8 x [8 x i32]]* [[M2]], i64 0, i64 [[INDVARS_IV]], i64 0
+; CHECK-NEXT:    store i32 [[ADD3]], i32* [[ARRAYIDX6]], align 16
+; CHECK-NEXT:    [[ADD10:%.*]] = add nsw i32 [[ADD3]], [[A_088]]
+; CHECK-NEXT:    [[TMP5:%.*]] = or i64 [[TMP1]], 1
+; CHECK-NEXT:    [[ARRAYIDX13:%.*]] = getelementptr inbounds i32, i32* [[DIFF]], i64 [[TMP5]]
+; CHECK-NEXT:    [[TMP6:%.*]] = load i32, i32* [[ARRAYIDX13]], align 4
+; CHECK-NEXT:    [[TMP7:%.*]] = or i64 [[TMP1]], 5
+; CHECK-NEXT:    [[ARRAYIDX16:%.*]] = getelementptr inbounds i32, i32* [[DIFF]], i64 [[TMP7]]
+; CHECK-NEXT:    [[TMP8:%.*]] = load i32, i32* [[ARRAYIDX16]], align 4
+; CHECK-NEXT:    [[ADD17:%.*]] = add nsw i32 [[TMP8]], [[TMP6]]
+; CHECK-NEXT:    [[ARRAYIDX20:%.*]] = getelementptr inbounds [8 x [8 x i32]], [8 x [8 x i32]]* [[M2]], i64 0, i64 [[INDVARS_IV]], i64 1
+; CHECK-NEXT:    store i32 [[ADD17]], i32* [[ARRAYIDX20]], align 4
+; CHECK-NEXT:    [[ADD24]] = add nsw i32 [[ADD10]], [[ADD17]]
+; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
+; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 8
+; CHECK-NEXT:    br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]]
+; CHECK:       for.end:
+; CHECK-NEXT:    [[ARRAYDECAY:%.*]] = getelementptr inbounds [8 x [8 x i32]], [8 x [8 x i32]]* [[M2]], i64 0, i64 0
+; CHECK-NEXT:    ret i32 [[ADD24]]
+;
  entry:
    %m2 = alloca [8 x [8 x i32]], align 16
    %0 = bitcast [8 x [8 x i32]]* %m2 to i8*
@@ -31,13 +67,12 @@ for.body:                                         ; preds = %for.body, %entry
    store i32 %add17, i32* %arrayidx20, align 4
    %add24 = add nsw i32 %add10, %add17
  
- ; CHECK-NOT: add nsw <{{[0-9]+}} x i32> 
- ; YAML:      Pass:            slp-vectorizer
- ; YAML-NEXT: Name:            NotPossible
- ; YAML-NEXT: Function:        foo
- ; YAML-NEXT: Args:
- ; YAML-NEXT:   - String:          'Cannot SLP vectorize list: vectorization was impossible'
- ; YAML-NEXT:   - String:          ' with available vectorization factors'
+  ; YAML:      Pass:            slp-vectorizer
+  ; YAML-NEXT: Name:            NotPossible
+  ; YAML-NEXT: Function:        foo
+  ; YAML-NEXT: Args:
+  ; YAML-NEXT:   - String:          'Cannot SLP vectorize list: vectorization was impossible'
+  ; YAML-NEXT:   - String:          ' with available vectorization factors'
  
    %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
    %exitcond = icmp eq i64 %indvars.iv.next, 8
diff --git a/test/Transforms/SLPVectorizer/X86/remark_unsupported.ll b/test/Transforms/SLPVectorizer/X86/remark_unsupported.ll

index d78d122cef7661f9f424bffded2669dc30aa4f27..a134aec00bbbcaba13a5cc175558903d97aeab57 100644 (file)
--- a/test/Transforms/SLPVectorizer/X86/remark_unsupported.ll
+++ b/test/Transforms/SLPVectorizer/X86/remark_unsupported.ll
@@ -1,9 +1,29 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
  ; RUN: opt -S -mtriple=x86_64-pc-linux-gnu -mcpu=generic -slp-vectorizer -pass-remarks-output=%t < %s | FileCheck %s
  ; RUN: FileCheck --input-file=%t --check-prefix=YAML %s
  
  ; This type is not supported by SLP
  define void @test(x86_fp80* %i1, x86_fp80* %i2, x86_fp80* %o) {
-
+; CHECK-LABEL: @test(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[I1_0:%.*]] = load x86_fp80, x86_fp80* [[I1:%.*]], align 16
+; CHECK-NEXT:    [[I1_GEP1:%.*]] = getelementptr x86_fp80, x86_fp80* [[I1]], i64 1
+; CHECK-NEXT:    [[I1_1:%.*]] = load x86_fp80, x86_fp80* [[I1_GEP1]], align 16
+; CHECK-NEXT:    br i1 undef, label [[THEN:%.*]], label [[END:%.*]]
+; CHECK:       then:
+; CHECK-NEXT:    [[I2_GEP0:%.*]] = getelementptr inbounds x86_fp80, x86_fp80* [[I2:%.*]], i64 0
+; CHECK-NEXT:    [[I2_0:%.*]] = load x86_fp80, x86_fp80* [[I2_GEP0]], align 16
+; CHECK-NEXT:    [[I2_GEP1:%.*]] = getelementptr inbounds x86_fp80, x86_fp80* [[I2]], i64 1
+; CHECK-NEXT:    [[I2_1:%.*]] = load x86_fp80, x86_fp80* [[I2_GEP1]], align 16
+; CHECK-NEXT:    br label [[END]]
+; CHECK:       end:
+; CHECK-NEXT:    [[PHI0:%.*]] = phi x86_fp80 [ [[I1_0]], [[ENTRY:%.*]] ], [ [[I2_0]], [[THEN]] ]
+; CHECK-NEXT:    [[PHI1:%.*]] = phi x86_fp80 [ [[I1_1]], [[ENTRY]] ], [ [[I2_1]], [[THEN]] ]
+; CHECK-NEXT:    store x86_fp80 [[PHI0]], x86_fp80* [[O:%.*]], align 16
+; CHECK-NEXT:    [[O_GEP1:%.*]] = getelementptr inbounds x86_fp80, x86_fp80* [[O]], i64 1
+; CHECK-NEXT:    store x86_fp80 [[PHI1]], x86_fp80* [[O_GEP1]], align 16
+; CHECK-NEXT:    ret void
+;
  entry:
    %i1.0 = load x86_fp80, x86_fp80* %i1, align 16
    %i1.gep1 = getelementptr x86_fp80, x86_fp80* %i1, i64 1
@@ -22,13 +42,12 @@ end:
    store x86_fp80 %phi0, x86_fp80* %o, align 16
    %o.gep1 = getelementptr inbounds x86_fp80, x86_fp80* %o, i64 1
    store x86_fp80 %phi1, x86_fp80* %o.gep1, align 16
- ; CHECK-NOT: <{{[0-9]+}} x x86_fp80>
- ; YAML:      Pass:            slp-vectorizer
- ; YAML-NEXT: Name:            UnsupportedType
- ; YAML-NEXT: Function:        test
- ; YAML-NEXT: Args:
- ; YAML-NEXT:   - String:          'Cannot SLP vectorize list: type '
- ; YAML-NEXT:   - String:          x86_fp80 is unsupported by vectorizer
+  ; YAML:      Pass:            slp-vectorizer
+  ; YAML-NEXT: Name:            UnsupportedType
+  ; YAML-NEXT: Function:        test
+  ; YAML-NEXT: Args:
+  ; YAML-NEXT:   - String:          'Cannot SLP vectorize list: type '
+  ; YAML-NEXT:   - String:          x86_fp80 is unsupported by vectorizer
  
    ret void
  }
diff --git a/test/Transforms/SLPVectorizer/X86/rgb_phi.ll b/test/Transforms/SLPVectorizer/X86/rgb_phi.ll

index 0bdb7dab172684ae15d69427bf4b819e7e9cd325..c7e419b5f562cf18ad89f0cf074cf66c2ddb1078 100644 (file)
--- a/test/Transforms/SLPVectorizer/X86/rgb_phi.ll
+++ b/test/Transforms/SLPVectorizer/X86/rgb_phi.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
  ; RUN: opt < %s -basicaa -slp-vectorizer -dce -S -mtriple=i386-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck %s
  
  target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32-S128"
@@ -19,19 +20,46 @@ target triple = "i386-apple-macosx10.9.0"
  ;   return R+G+B;
  ; }
  
-;CHECK-LABEL: @foo(
-;CHECK: br
-;CHECK-NOT: phi <3 x float>
-;CHECK-NOT: fmul <3 x float>
-;CHECK-NOT: fadd <3 x float>
-; At the moment we don't sink extractelements.
-;CHECK: br
-;CHECK-NOT: extractelement
-;CHECK-NOT: extractelement
-;CHECK-NOT: extractelement
-;CHECK: ret
-
  define float @foo(float* nocapture readonly %A) {
+; CHECK-LABEL: @foo(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = load float, float* [[A:%.*]], align 4
+; CHECK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds float, float* [[A]], i64 1
+; CHECK-NEXT:    [[TMP1:%.*]] = load float, float* [[ARRAYIDX1]], align 4
+; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds float, float* [[A]], i64 2
+; CHECK-NEXT:    [[TMP2:%.*]] = load float, float* [[ARRAYIDX2]], align 4
+; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
+; CHECK:       for.body:
+; CHECK-NEXT:    [[TMP3:%.*]] = phi float [ [[TMP0]], [[ENTRY:%.*]] ], [ [[DOTPRE:%.*]], [[FOR_BODY_FOR_BODY_CRIT_EDGE:%.*]] ]
+; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY_FOR_BODY_CRIT_EDGE]] ]
+; CHECK-NEXT:    [[B_032:%.*]] = phi float [ [[TMP2]], [[ENTRY]] ], [ [[ADD14:%.*]], [[FOR_BODY_FOR_BODY_CRIT_EDGE]] ]
+; CHECK-NEXT:    [[G_031:%.*]] = phi float [ [[TMP1]], [[ENTRY]] ], [ [[ADD9:%.*]], [[FOR_BODY_FOR_BODY_CRIT_EDGE]] ]
+; CHECK-NEXT:    [[R_030:%.*]] = phi float [ [[TMP0]], [[ENTRY]] ], [ [[ADD4:%.*]], [[FOR_BODY_FOR_BODY_CRIT_EDGE]] ]
+; CHECK-NEXT:    [[MUL:%.*]] = fmul float [[TMP3]], 7.000000e+00
+; CHECK-NEXT:    [[ADD4]] = fadd float [[R_030]], [[MUL]]
+; CHECK-NEXT:    [[TMP4:%.*]] = add nsw i64 [[INDVARS_IV]], 1
+; CHECK-NEXT:    [[ARRAYIDX7:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP4]]
+; CHECK-NEXT:    [[TMP5:%.*]] = load float, float* [[ARRAYIDX7]], align 4
+; CHECK-NEXT:    [[MUL8:%.*]] = fmul float [[TMP5]], 8.000000e+00
+; CHECK-NEXT:    [[ADD9]] = fadd float [[G_031]], [[MUL8]]
+; CHECK-NEXT:    [[TMP6:%.*]] = add nsw i64 [[INDVARS_IV]], 2
+; CHECK-NEXT:    [[ARRAYIDX12:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP6]]
+; CHECK-NEXT:    [[TMP7:%.*]] = load float, float* [[ARRAYIDX12]], align 4
+; CHECK-NEXT:    [[MUL13:%.*]] = fmul float [[TMP7]], 9.000000e+00
+; CHECK-NEXT:    [[ADD14]] = fadd float [[B_032]], [[MUL13]]
+; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add i64 [[INDVARS_IV]], 3
+; CHECK-NEXT:    [[TMP8:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[TMP8]], 121
+; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_BODY_FOR_BODY_CRIT_EDGE]], label [[FOR_END:%.*]]
+; CHECK:       for.body.for.body_crit_edge:
+; CHECK-NEXT:    [[ARRAYIDX3_PHI_TRANS_INSERT:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[INDVARS_IV_NEXT]]
+; CHECK-NEXT:    [[DOTPRE]] = load float, float* [[ARRAYIDX3_PHI_TRANS_INSERT]], align 4
+; CHECK-NEXT:    br label [[FOR_BODY]]
+; CHECK:       for.end:
+; CHECK-NEXT:    [[ADD16:%.*]] = fadd float [[ADD4]], [[ADD9]]
+; CHECK-NEXT:    [[ADD17:%.*]] = fadd float [[ADD16]], [[ADD14]]
+; CHECK-NEXT:    ret float [[ADD17]]
+;
  entry:
    %0 = load float, float* %A, align 4
    %arrayidx1 = getelementptr inbounds float, float* %A, i64 1
diff --git a/test/Transforms/SLPVectorizer/X86/saxpy.ll b/test/Transforms/SLPVectorizer/X86/saxpy.ll

index a9ca093c0cdf8cc4ca8dfbec595d08d73c398b2c..f2f858e3c7dd6bebefe3b363cd353f5c46a7eb58 100644 (file)
--- a/test/Transforms/SLPVectorizer/X86/saxpy.ll
+++ b/test/Transforms/SLPVectorizer/X86/saxpy.ll
@@ -1,14 +1,28 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
  ; RUN: opt < %s -basicaa -slp-vectorizer -dce -S -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck %s
  
  target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
  target triple = "x86_64-apple-macosx10.8.0"
  
  ; SLP vectorization example from http://cs.stanford.edu/people/eschkufz/research/asplos291-schkufza.pdf
-;CHECK: SAXPY
-;CHECK: mul nsw <4 x i32>
-;CHECK: ret
-
  define void @SAXPY(i32* noalias nocapture %x, i32* noalias nocapture %y, i32 %a, i64 %i) {
+; CHECK-LABEL: @SAXPY(
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i32, i32* [[X:%.*]], i64 [[I:%.*]]
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[Y:%.*]], i64 [[I]]
+; CHECK-NEXT:    [[TMP3:%.*]] = bitcast i32* [[TMP1]] to <4 x i32>*
+; CHECK-NEXT:    [[TMP4:%.*]] = load <4 x i32>, <4 x i32>* [[TMP3]], align 4
+; CHECK-NEXT:    [[TMP5:%.*]] = insertelement <4 x i32> undef, i32 [[A:%.*]], i32 0
+; CHECK-NEXT:    [[TMP6:%.*]] = insertelement <4 x i32> [[TMP5]], i32 [[A]], i32 1
+; CHECK-NEXT:    [[TMP7:%.*]] = insertelement <4 x i32> [[TMP6]], i32 [[A]], i32 2
+; CHECK-NEXT:    [[TMP8:%.*]] = insertelement <4 x i32> [[TMP7]], i32 [[A]], i32 3
+; CHECK-NEXT:    [[TMP9:%.*]] = mul nsw <4 x i32> [[TMP8]], [[TMP4]]
+; CHECK-NEXT:    [[TMP10:%.*]] = bitcast i32* [[TMP2]] to <4 x i32>*
+; CHECK-NEXT:    [[TMP11:%.*]] = load <4 x i32>, <4 x i32>* [[TMP10]], align 4
+; CHECK-NEXT:    [[TMP12:%.*]] = add nsw <4 x i32> [[TMP9]], [[TMP11]]
+; CHECK-NEXT:    [[TMP13:%.*]] = bitcast i32* [[TMP1]] to <4 x i32>*
+; CHECK-NEXT:    store <4 x i32> [[TMP12]], <4 x i32>* [[TMP13]], align 4
+; CHECK-NEXT:    ret void
+;
    %1 = getelementptr inbounds i32, i32* %x, i64 %i
    %2 = load i32, i32* %1, align 4
    %3 = mul nsw i32 %2, %a
@@ -45,6 +59,21 @@ define void @SAXPY(i32* noalias nocapture %x, i32* noalias nocapture %y, i32 %a,
  
  ; Make sure we don't crash on this one.
  define void @SAXPY_crash(i32* noalias nocapture %x, i32* noalias nocapture %y, i64 %i) {
+; CHECK-LABEL: @SAXPY_crash(
+; CHECK-NEXT:    [[TMP1:%.*]] = add i64 [[I:%.*]], 1
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[X:%.*]], i64 [[TMP1]]
+; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i32, i32* [[Y:%.*]], i64 [[TMP1]]
+; CHECK-NEXT:    [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4
+; CHECK-NEXT:    [[TMP5:%.*]] = add nsw i32 undef, [[TMP4]]
+; CHECK-NEXT:    store i32 [[TMP5]], i32* [[TMP2]], align 4
+; CHECK-NEXT:    [[TMP6:%.*]] = add i64 [[I]], 2
+; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i32, i32* [[X]], i64 [[TMP6]]
+; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i32, i32* [[Y]], i64 [[TMP6]]
+; CHECK-NEXT:    [[TMP9:%.*]] = load i32, i32* [[TMP8]], align 4
+; CHECK-NEXT:    [[TMP10:%.*]] = add nsw i32 undef, [[TMP9]]
+; CHECK-NEXT:    store i32 [[TMP10]], i32* [[TMP7]], align 4
+; CHECK-NEXT:    ret void
+;
    %1 = add i64 %i, 1
    %2 = getelementptr inbounds i32, i32* %x, i64 %1
    %3 = getelementptr inbounds i32, i32* %y, i64 %1
diff --git a/test/Transforms/SLPVectorizer/X86/schedule_budget.ll b/test/Transforms/SLPVectorizer/X86/schedule_budget.ll

index 2cb2373381c7f79a5ba502d6e143da1ee8695e1e..0cd08f0e8c338eca210a309d93d1bbfcb3575da9 100644 (file)
--- a/test/Transforms/SLPVectorizer/X86/schedule_budget.ll
+++ b/test/Transforms/SLPVectorizer/X86/schedule_budget.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
  ; RUN: opt < %s -basicaa -slp-vectorizer -S  -slp-schedule-budget=16 -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck %s
  
  target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
@@ -8,19 +9,63 @@ target triple = "x86_64-apple-macosx10.9.0"
  
  declare void @unknown()
  
-; CHECK-LABEL: @test
-; CHECK: load float
-; CHECK: load float
-; CHECK: load float
-; CHECK: load float
-; CHECK: call void @unknown
-; CHECK: store float
-; CHECK: store float
-; CHECK: store float
-; CHECK: store float
-; CHECK: load <4 x float>
-; CHECK: store <4 x float>
  define void @test(float * %a, float * %b, float * %c, float * %d) {
+; CHECK-LABEL: @test(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[L0:%.*]] = load float, float* [[A:%.*]]
+; CHECK-NEXT:    [[A1:%.*]] = getelementptr inbounds float, float* [[A]], i64 1
+; CHECK-NEXT:    [[L1:%.*]] = load float, float* [[A1]]
+; CHECK-NEXT:    [[A2:%.*]] = getelementptr inbounds float, float* [[A]], i64 2
+; CHECK-NEXT:    [[L2:%.*]] = load float, float* [[A2]]
+; CHECK-NEXT:    [[A3:%.*]] = getelementptr inbounds float, float* [[A]], i64 3
+; CHECK-NEXT:    [[L3:%.*]] = load float, float* [[A3]]
+; CHECK-NEXT:    call void @unknown()
+; CHECK-NEXT:    call void @unknown()
+; CHECK-NEXT:    call void @unknown()
+; CHECK-NEXT:    call void @unknown()
+; CHECK-NEXT:    call void @unknown()
+; CHECK-NEXT:    call void @unknown()
+; CHECK-NEXT:    call void @unknown()
+; CHECK-NEXT:    call void @unknown()
+; CHECK-NEXT:    call void @unknown()
+; CHECK-NEXT:    call void @unknown()
+; CHECK-NEXT:    call void @unknown()
+; CHECK-NEXT:    call void @unknown()
+; CHECK-NEXT:    call void @unknown()
+; CHECK-NEXT:    call void @unknown()
+; CHECK-NEXT:    call void @unknown()
+; CHECK-NEXT:    call void @unknown()
+; CHECK-NEXT:    call void @unknown()
+; CHECK-NEXT:    call void @unknown()
+; CHECK-NEXT:    call void @unknown()
+; CHECK-NEXT:    call void @unknown()
+; CHECK-NEXT:    call void @unknown()
+; CHECK-NEXT:    call void @unknown()
+; CHECK-NEXT:    call void @unknown()
+; CHECK-NEXT:    call void @unknown()
+; CHECK-NEXT:    call void @unknown()
+; CHECK-NEXT:    call void @unknown()
+; CHECK-NEXT:    call void @unknown()
+; CHECK-NEXT:    call void @unknown()
+; CHECK-NEXT:    store float [[L0]], float* [[B:%.*]]
+; CHECK-NEXT:    [[B1:%.*]] = getelementptr inbounds float, float* [[B]], i64 1
+; CHECK-NEXT:    store float [[L1]], float* [[B1]]
+; CHECK-NEXT:    [[B2:%.*]] = getelementptr inbounds float, float* [[B]], i64 2
+; CHECK-NEXT:    store float [[L2]], float* [[B2]]
+; CHECK-NEXT:    [[B3:%.*]] = getelementptr inbounds float, float* [[B]], i64 3
+; CHECK-NEXT:    store float [[L3]], float* [[B3]]
+; CHECK-NEXT:    [[C1:%.*]] = getelementptr inbounds float, float* [[C:%.*]], i64 1
+; CHECK-NEXT:    [[C2:%.*]] = getelementptr inbounds float, float* [[C]], i64 2
+; CHECK-NEXT:    [[C3:%.*]] = getelementptr inbounds float, float* [[C]], i64 3
+; CHECK-NEXT:    [[TMP0:%.*]] = bitcast float* [[C]] to <4 x float>*
+; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x float>, <4 x float>* [[TMP0]], align 4
+; CHECK-NEXT:    [[D1:%.*]] = getelementptr inbounds float, float* [[D:%.*]], i64 1
+; CHECK-NEXT:    [[D2:%.*]] = getelementptr inbounds float, float* [[D]], i64 2
+; CHECK-NEXT:    [[D3:%.*]] = getelementptr inbounds float, float* [[D]], i64 3
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast float* [[D]] to <4 x float>*
+; CHECK-NEXT:    store <4 x float> [[TMP1]], <4 x float>* [[TMP2]], align 4
+; CHECK-NEXT:    ret void
+;
  entry:
    ; Don't vectorize these loads.
    %l0 = load float, float* %a
diff --git a/test/Transforms/SLPVectorizer/X86/simple-loop.ll b/test/Transforms/SLPVectorizer/X86/simple-loop.ll

index c9bb88497ac10eb5d2f5583b3646e4fc3c61b8ee..975a1af7576a3ab4514d1c9821e0b5cad67f6ffe 100644 (file)
--- a/test/Transforms/SLPVectorizer/X86/simple-loop.ll
+++ b/test/Transforms/SLPVectorizer/X86/simple-loop.ll
@@ -1,10 +1,30 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
  ; RUN: opt < %s -basicaa -slp-vectorizer -dce -S -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck %s
  
  target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
  target triple = "x86_64-apple-macosx10.8.0"
  
-;CHECK:rollable
  define i32 @rollable(i32* noalias nocapture %in, i32* noalias nocapture %out, i64 %n) {
+; CHECK-LABEL: @rollable(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq i64 [[N:%.*]], 0
+; CHECK-NEXT:    br i1 [[TMP1]], label [[DOT_CRIT_EDGE:%.*]], label [[DOTLR_PH:%.*]]
+; CHECK:       .lr.ph:
+; CHECK-NEXT:    [[I_019:%.*]] = phi i64 [ [[TMP10:%.*]], [[DOTLR_PH]] ], [ 0, [[TMP0:%.*]] ]
+; CHECK-NEXT:    [[TMP2:%.*]] = shl i64 [[I_019]], 2
+; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i32, i32* [[IN:%.*]], i64 [[TMP2]]
+; CHECK-NEXT:    [[TMP4:%.*]] = bitcast i32* [[TMP3]] to <4 x i32>*
+; CHECK-NEXT:    [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* [[TMP4]], align 4
+; CHECK-NEXT:    [[TMP6:%.*]] = mul <4 x i32> <i32 7, i32 7, i32 7, i32 7>, [[TMP5]]
+; CHECK-NEXT:    [[TMP7:%.*]] = add <4 x i32> <i32 7, i32 14, i32 21, i32 28>, [[TMP6]]
+; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i32, i32* [[OUT:%.*]], i64 [[TMP2]]
+; CHECK-NEXT:    [[TMP9:%.*]] = bitcast i32* [[TMP8]] to <4 x i32>*
+; CHECK-NEXT:    store <4 x i32> [[TMP7]], <4 x i32>* [[TMP9]], align 4
+; CHECK-NEXT:    [[TMP10]] = add i64 [[I_019]], 1
+; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[TMP10]], [[N]]
+; CHECK-NEXT:    br i1 [[EXITCOND]], label [[DOT_CRIT_EDGE]], label [[DOTLR_PH]]
+; CHECK:       ._crit_edge:
+; CHECK-NEXT:    ret i32 undef
+;
    %1 = icmp eq i64 %n, 0
    br i1 %1, label %._crit_edge, label %.lr.ph
  
@@ -12,7 +32,6 @@ define i32 @rollable(i32* noalias nocapture %in, i32* noalias nocapture %out, i6
    %i.019 = phi i64 [ %26, %.lr.ph ], [ 0, %0 ]
    %2 = shl i64 %i.019, 2
    %3 = getelementptr inbounds i32, i32* %in, i64 %2
-;CHECK:load <4 x i32>
    %4 = load i32, i32* %3, align 4
    %5 = or i64 %2, 1
    %6 = getelementptr inbounds i32, i32* %in, i64 %5
@@ -23,9 +42,7 @@ define i32 @rollable(i32* noalias nocapture %in, i32* noalias nocapture %out, i6
    %11 = or i64 %2, 3
    %12 = getelementptr inbounds i32, i32* %in, i64 %11
    %13 = load i32, i32* %12, align 4
-;CHECK:mul <4 x i32>
    %14 = mul i32 %4, 7
-;CHECK:add <4 x i32>
    %15 = add i32 %14, 7
    %16 = mul i32 %7, 7
    %17 = add i32 %16, 14
@@ -34,7 +51,6 @@ define i32 @rollable(i32* noalias nocapture %in, i32* noalias nocapture %out, i6
    %20 = mul i32 %13, 7
    %21 = add i32 %20, 28
    %22 = getelementptr inbounds i32, i32* %out, i64 %2
-;CHECK:store <4 x i32>
    store i32 %15, i32* %22, align 4
    %23 = getelementptr inbounds i32, i32* %out, i64 %5
    store i32 %17, i32* %23, align 4
@@ -47,14 +63,50 @@ define i32 @rollable(i32* noalias nocapture %in, i32* noalias nocapture %out, i6
    br i1 %exitcond, label %._crit_edge, label %.lr.ph
  
  ._crit_edge:                                      ; preds = %.lr.ph, %0
-;CHECK: ret
    ret i32 undef
  }
  
-;CHECK:unrollable
-;CHECK-NOT: <4 x i32>
-;CHECK: ret
  define i32 @unrollable(i32* %in, i32* %out, i64 %n) nounwind ssp uwtable {
+; CHECK-LABEL: @unrollable(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq i64 [[N:%.*]], 0
+; CHECK-NEXT:    br i1 [[TMP1]], label [[DOT_CRIT_EDGE:%.*]], label [[DOTLR_PH:%.*]]
+; CHECK:       .lr.ph:
+; CHECK-NEXT:    [[I_019:%.*]] = phi i64 [ [[TMP26:%.*]], [[DOTLR_PH]] ], [ 0, [[TMP0:%.*]] ]
+; CHECK-NEXT:    [[TMP2:%.*]] = shl i64 [[I_019]], 2
+; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i32, i32* [[IN:%.*]], i64 [[TMP2]]
+; CHECK-NEXT:    [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4
+; CHECK-NEXT:    [[TMP5:%.*]] = or i64 [[TMP2]], 1
+; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i32, i32* [[IN]], i64 [[TMP5]]
+; CHECK-NEXT:    [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 4
+; CHECK-NEXT:    [[TMP8:%.*]] = or i64 [[TMP2]], 2
+; CHECK-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i32, i32* [[IN]], i64 [[TMP8]]
+; CHECK-NEXT:    [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4
+; CHECK-NEXT:    [[TMP11:%.*]] = or i64 [[TMP2]], 3
+; CHECK-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i32, i32* [[IN]], i64 [[TMP11]]
+; CHECK-NEXT:    [[TMP13:%.*]] = load i32, i32* [[TMP12]], align 4
+; CHECK-NEXT:    [[TMP14:%.*]] = mul i32 [[TMP4]], 7
+; CHECK-NEXT:    [[TMP15:%.*]] = add i32 [[TMP14]], 7
+; CHECK-NEXT:    [[TMP16:%.*]] = mul i32 [[TMP7]], 7
+; CHECK-NEXT:    [[TMP17:%.*]] = add i32 [[TMP16]], 14
+; CHECK-NEXT:    [[TMP18:%.*]] = mul i32 [[TMP10]], 7
+; CHECK-NEXT:    [[TMP19:%.*]] = add i32 [[TMP18]], 21
+; CHECK-NEXT:    [[TMP20:%.*]] = mul i32 [[TMP13]], 7
+; CHECK-NEXT:    [[TMP21:%.*]] = add i32 [[TMP20]], 28
+; CHECK-NEXT:    [[TMP22:%.*]] = getelementptr inbounds i32, i32* [[OUT:%.*]], i64 [[TMP2]]
+; CHECK-NEXT:    store i32 [[TMP15]], i32* [[TMP22]], align 4
+; CHECK-NEXT:    [[TMP23:%.*]] = getelementptr inbounds i32, i32* [[OUT]], i64 [[TMP5]]
+; CHECK-NEXT:    store i32 [[TMP17]], i32* [[TMP23]], align 4
+; CHECK-NEXT:    [[BARRIER:%.*]] = call i32 @goo(i32 0)
+; CHECK-NEXT:    [[TMP24:%.*]] = getelementptr inbounds i32, i32* [[OUT]], i64 [[TMP8]]
+; CHECK-NEXT:    store i32 [[TMP19]], i32* [[TMP24]], align 4
+; CHECK-NEXT:    [[TMP25:%.*]] = getelementptr inbounds i32, i32* [[OUT]], i64 [[TMP11]]
+; CHECK-NEXT:    store i32 [[TMP21]], i32* [[TMP25]], align 4
+; CHECK-NEXT:    [[TMP26]] = add i64 [[I_019]], 1
+; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[TMP26]], [[N]]
+; CHECK-NEXT:    br i1 [[EXITCOND]], label [[DOT_CRIT_EDGE]], label [[DOTLR_PH]]
+; CHECK:       ._crit_edge:
+; CHECK-NEXT:    ret i32 undef
+;
    %1 = icmp eq i64 %n, 0
    br i1 %1, label %._crit_edge, label %.lr.ph
  
diff --git a/test/Transforms/SLPVectorizer/X86/unreachable.ll b/test/Transforms/SLPVectorizer/X86/unreachable.ll

index f29f69d7e82f281d1f4d7196f0446aa6d0fa5358..cc1f1fca8cac6cda3a3173959564807f2eee3006 100644 (file)
--- a/test/Transforms/SLPVectorizer/X86/unreachable.ll
+++ b/test/Transforms/SLPVectorizer/X86/unreachable.ll
@@ -1,4 +1,5 @@
-; RUN: opt < %s -basicaa -slp-vectorizer -S -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -basicaa -slp-vectorizer -S -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7 | FileCheck %s
  
  ; Check if the SLPVectorizer does not crash when handling
  ; unreachable blocks with unscheduleable instructions.
@@ -7,6 +8,34 @@ target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
  target triple = "x86_64-apple-macosx10.9.0"
  
  define void @foo(i32* nocapture %x) #0 {
+; CHECK-LABEL: @foo(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[BB2:%.*]]
+; CHECK:       bb1:
+; CHECK-NEXT:    [[T3:%.*]] = getelementptr inbounds i32, i32* [[X:%.*]], i64 4
+; CHECK-NEXT:    [[T4:%.*]] = load i32, i32* [[T3]], align 4
+; CHECK-NEXT:    [[T5:%.*]] = getelementptr inbounds i32, i32* [[X]], i64 5
+; CHECK-NEXT:    [[T6:%.*]] = load i32, i32* [[T5]], align 4
+; CHECK-NEXT:    [[BAD:%.*]] = fadd float [[BAD]], 0.000000e+00
+; CHECK-NEXT:    [[T7:%.*]] = getelementptr inbounds i32, i32* [[X]], i64 6
+; CHECK-NEXT:    [[T8:%.*]] = load i32, i32* [[T7]], align 4
+; CHECK-NEXT:    [[T9:%.*]] = getelementptr inbounds i32, i32* [[X]], i64 7
+; CHECK-NEXT:    [[T10:%.*]] = load i32, i32* [[T9]], align 4
+; CHECK-NEXT:    br label [[BB2]]
+; CHECK:       bb2:
+; CHECK-NEXT:    [[T1_0:%.*]] = phi i32 [ [[T4]], [[BB1:%.*]] ], [ 2, [[ENTRY:%.*]] ]
+; CHECK-NEXT:    [[T2_0:%.*]] = phi i32 [ [[T6]], [[BB1]] ], [ 2, [[ENTRY]] ]
+; CHECK-NEXT:    [[T3_0:%.*]] = phi i32 [ [[T8]], [[BB1]] ], [ 2, [[ENTRY]] ]
+; CHECK-NEXT:    [[T4_0:%.*]] = phi i32 [ [[T10]], [[BB1]] ], [ 2, [[ENTRY]] ]
+; CHECK-NEXT:    store i32 [[T1_0]], i32* [[X]], align 4
+; CHECK-NEXT:    [[T12:%.*]] = getelementptr inbounds i32, i32* [[X]], i64 1
+; CHECK-NEXT:    store i32 [[T2_0]], i32* [[T12]], align 4
+; CHECK-NEXT:    [[T13:%.*]] = getelementptr inbounds i32, i32* [[X]], i64 2
+; CHECK-NEXT:    store i32 [[T3_0]], i32* [[T13]], align 4
+; CHECK-NEXT:    [[T14:%.*]] = getelementptr inbounds i32, i32* [[X]], i64 3
+; CHECK-NEXT:    store i32 [[T4_0]], i32* [[T14]], align 4
+; CHECK-NEXT:    ret void
+;
  entry:
    br label %bb2
  
@@ -16,7 +45,7 @@ bb1:                                    ; an unreachable block
    %t5 = getelementptr inbounds i32, i32* %x, i64 5
    %t6 = load i32, i32* %t5, align 4
    %bad = fadd float %bad, 0.000000e+00  ; <- an instruction with self dependency,
-                                        ;    but legal in unreachable code
+  ;    but legal in unreachable code
    %t7 = getelementptr inbounds i32, i32* %x, i64 6
    %t8 = load i32, i32* %t7, align 4
    %t9 = getelementptr inbounds i32, i32* %x, i64 7
diff --git a/test/Transforms/SLPVectorizer/X86/vector_gep.ll b/test/Transforms/SLPVectorizer/X86/vector_gep.ll

index 595a77f0cfbf412520a9a0a5afb69b9ab77cba3e..436f091d7242433fe9b0a09012d685d6f13950cb 100644 (file)
--- a/test/Transforms/SLPVectorizer/X86/vector_gep.ll
+++ b/test/Transforms/SLPVectorizer/X86/vector_gep.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
  ;RUN: opt < %s -slp-vectorizer -S | FileCheck %s
  
  target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
@@ -8,9 +9,14 @@ target triple = "x86_64-unknown-linux-gnu"
  
  ; Function Attrs: noreturn readonly uwtable
  define void @_Z3fn1v(i32 %x, <16 x i32*>%y) local_unnamed_addr #0 {
-; CHECK-LABEL: _Z3fn1v
-; CHECK: getelementptr i32, <16 x i32*>
-; CHECK: getelementptr i32, <16 x i32*>
+; CHECK-LABEL: @_Z3fn1v(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CONV42_LE:%.*]] = sext i32 [[X:%.*]] to i64
+; CHECK-NEXT:    [[CONV36109_LE:%.*]] = zext i32 2 to i64
+; CHECK-NEXT:    [[VECTORGEP:%.*]] = getelementptr i32, <16 x i32*> [[Y:%.*]], i64 [[CONV36109_LE]]
+; CHECK-NEXT:    [[VECTORGEP208:%.*]] = getelementptr i32, <16 x i32*> [[Y]], i64 [[CONV42_LE]]
+; CHECK-NEXT:    unreachable
+;
  
  entry:
    %conv42.le = sext i32 %x to i64
diff --git a/test/Transforms/SLPVectorizer/XCore/no-vector-registers.ll b/test/Transforms/SLPVectorizer/XCore/no-vector-registers.ll

index efd5386f520e0c7b49068bd6dd227b6bad0966db..d84a4993ec304fdf44c062fbaced7cd15ae40c9e 100644 (file)
--- a/test/Transforms/SLPVectorizer/XCore/no-vector-registers.ll
+++ b/test/Transforms/SLPVectorizer/XCore/no-vector-registers.ll
@@ -1,12 +1,26 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
  ; RUN: opt < %s -basicaa -slp-vectorizer -dce -S -mtriple=xcore  | FileCheck %s
  
  target datalayout = "e-p:32:32:32-a0:0:32-n32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f16:16:32-f32:32:32-f64:32:32"
  target triple = "xcore"
  
  ; Simple 3-pair chain with loads and stores
-; CHECK: test1
-; CHECK-NOT: <2 x double>
  define void @test1(double* %a, double* %b, double* %c) {
+; CHECK-LABEL: @test1(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[I0:%.*]] = load double, double* [[A:%.*]], align 8
+; CHECK-NEXT:    [[I1:%.*]] = load double, double* [[B:%.*]], align 8
+; CHECK-NEXT:    [[MUL:%.*]] = fmul double [[I0]], [[I1]]
+; CHECK-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds double, double* [[A]], i64 1
+; CHECK-NEXT:    [[I3:%.*]] = load double, double* [[ARRAYIDX3]], align 8
+; CHECK-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds double, double* [[B]], i64 1
+; CHECK-NEXT:    [[I4:%.*]] = load double, double* [[ARRAYIDX4]], align 8
+; CHECK-NEXT:    [[MUL5:%.*]] = fmul double [[I3]], [[I4]]
+; CHECK-NEXT:    store double [[MUL]], double* [[C:%.*]], align 8
+; CHECK-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds double, double* [[C]], i64 1
+; CHECK-NEXT:    store double [[MUL5]], double* [[ARRAYIDX5]], align 8
+; CHECK-NEXT:    ret void
+;
  entry:
    %i0 = load double, double* %a, align 8
    %i1 = load double, double* %b, align 8
diff --git a/test/Transforms/SLPVectorizer/int_sideeffect.ll b/test/Transforms/SLPVectorizer/int_sideeffect.ll

index a6123c11d92569337a17a6145dd5cb7aadbbdb8d..aab3f028f8d165dc506672461661726904b553d6 100644 (file)
--- a/test/Transforms/SLPVectorizer/int_sideeffect.ll
+++ b/test/Transforms/SLPVectorizer/int_sideeffect.ll
@@ -1,25 +1,37 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
  ; RUN: opt -S < %s -slp-vectorizer -slp-max-reg-size=128 -slp-min-reg-size=128 | FileCheck %s
  
  declare void @llvm.sideeffect()
  
  ; SLP vectorization across a @llvm.sideeffect.
  
-; CHECK-LABEL: test
-; CHECK: store <4 x float>
  define void @test(float* %p) {
-    %p0 = getelementptr float, float* %p, i64 0
-    %p1 = getelementptr float, float* %p, i64 1
-    %p2 = getelementptr float, float* %p, i64 2
-    %p3 = getelementptr float, float* %p, i64 3
-    %l0 = load float, float* %p0
-    %l1 = load float, float* %p1
-    %l2 = load float, float* %p2
-    call void @llvm.sideeffect()
-    %l3 = load float, float* %p3
-    store float %l0, float* %p0
-    call void @llvm.sideeffect()
-    store float %l1, float* %p1
-    store float %l2, float* %p2
-    store float %l3, float* %p3
-    ret void
+; CHECK-LABEL: @test(
+; CHECK-NEXT:    [[P0:%.*]] = getelementptr float, float* [[P:%.*]], i64 0
+; CHECK-NEXT:    [[P1:%.*]] = getelementptr float, float* [[P]], i64 1
+; CHECK-NEXT:    [[P2:%.*]] = getelementptr float, float* [[P]], i64 2
+; CHECK-NEXT:    [[P3:%.*]] = getelementptr float, float* [[P]], i64 3
+; CHECK-NEXT:    call void @llvm.sideeffect()
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast float* [[P0]] to <4 x float>*
+; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 4
+; CHECK-NEXT:    call void @llvm.sideeffect()
+; CHECK-NEXT:    [[TMP3:%.*]] = bitcast float* [[P0]] to <4 x float>*
+; CHECK-NEXT:    store <4 x float> [[TMP2]], <4 x float>* [[TMP3]], align 4
+; CHECK-NEXT:    ret void
+;
+  %p0 = getelementptr float, float* %p, i64 0
+  %p1 = getelementptr float, float* %p, i64 1
+  %p2 = getelementptr float, float* %p, i64 2
+  %p3 = getelementptr float, float* %p, i64 3
+  %l0 = load float, float* %p0
+  %l1 = load float, float* %p1
+  %l2 = load float, float* %p2
+  call void @llvm.sideeffect()
+  %l3 = load float, float* %p3
+  store float %l0, float* %p0
+  call void @llvm.sideeffect()
+  store float %l1, float* %p1
+  store float %l2, float* %p2
+  store float %l3, float* %p3
+  ret void
  }
author	Alexey Bataev <a.bataev@hotmail.com>
	Fri, 11 Jan 2019 20:21:14 +0000 (20:21 +0000)
committer	Alexey Bataev <a.bataev@hotmail.com>
	Fri, 11 Jan 2019 20:21:14 +0000 (20:21 +0000)
test/Transforms/SLPVectorizer/AArch64/64-bit-vector.ll		patch \| blob \| history
test/Transforms/SLPVectorizer/AArch64/commute.ll		patch \| blob \| history
test/Transforms/SLPVectorizer/AArch64/gather-cost.ll		patch \| blob \| history
test/Transforms/SLPVectorizer/AArch64/getelementptr.ll		patch \| blob \| history
test/Transforms/SLPVectorizer/AArch64/horizontal.ll		patch \| blob \| history
test/Transforms/SLPVectorizer/AArch64/minimum-sizes.ll		patch \| blob \| history
test/Transforms/SLPVectorizer/AArch64/mismatched-intrinsics.ll		patch \| blob \| history
test/Transforms/SLPVectorizer/AArch64/nontemporal.ll		patch \| blob \| history
test/Transforms/SLPVectorizer/AArch64/sdiv-pow2.ll		patch \| blob \| history
test/Transforms/SLPVectorizer/ARM/memory.ll		patch \| blob \| history
test/Transforms/SLPVectorizer/ARM/sroa.ll		patch \| blob \| history
test/Transforms/SLPVectorizer/NVPTX/v2f16.ll		patch \| blob \| history
test/Transforms/SLPVectorizer/PowerPC/pr27897.ll		patch \| blob \| history
test/Transforms/SLPVectorizer/X86/align.ll		patch \| blob \| history
test/Transforms/SLPVectorizer/X86/atomics.ll		patch \| blob \| history
test/Transforms/SLPVectorizer/X86/bad_types.ll		patch \| blob \| history
test/Transforms/SLPVectorizer/X86/barriercall.ll		patch \| blob \| history
test/Transforms/SLPVectorizer/X86/call.ll		patch \| blob \| history
test/Transforms/SLPVectorizer/X86/commutativity.ll		patch \| blob \| history
test/Transforms/SLPVectorizer/X86/consecutive-access.ll		patch \| blob \| history
test/Transforms/SLPVectorizer/X86/continue_vectorizing.ll		patch \| blob \| history
test/Transforms/SLPVectorizer/X86/crash_7zip.ll		patch \| blob \| history
test/Transforms/SLPVectorizer/X86/crash_binaryop.ll		patch \| blob \| history
test/Transforms/SLPVectorizer/X86/crash_bullet.ll		patch \| blob \| history
test/Transforms/SLPVectorizer/X86/crash_bullet3.ll		patch \| blob \| history
test/Transforms/SLPVectorizer/X86/crash_cmpop.ll		patch \| blob \| history
test/Transforms/SLPVectorizer/X86/crash_dequeue.ll		patch \| blob \| history
test/Transforms/SLPVectorizer/X86/crash_flop7.ll		patch \| blob \| history
test/Transforms/SLPVectorizer/X86/crash_gep.ll		patch \| blob \| history
test/Transforms/SLPVectorizer/X86/crash_lencod.ll		patch \| blob \| history
test/Transforms/SLPVectorizer/X86/crash_mandeltext.ll		patch \| blob \| history
test/Transforms/SLPVectorizer/X86/crash_netbsd_decompress.ll		patch \| blob \| history
test/Transforms/SLPVectorizer/X86/crash_scheduling.ll		patch \| blob \| history
test/Transforms/SLPVectorizer/X86/crash_sim4b1.ll		patch \| blob \| history
test/Transforms/SLPVectorizer/X86/crash_smallpt.ll		patch \| blob \| history
test/Transforms/SLPVectorizer/X86/crash_vectorizeTree.ll		patch \| blob \| history
test/Transforms/SLPVectorizer/X86/cross_block_slp.ll		patch \| blob \| history
test/Transforms/SLPVectorizer/X86/cycle_dup.ll		patch \| blob \| history
test/Transforms/SLPVectorizer/X86/debug_info.ll		patch \| blob \| history
test/Transforms/SLPVectorizer/X86/diamond.ll		patch \| blob \| history
test/Transforms/SLPVectorizer/X86/external_user.ll		patch \| blob \| history
test/Transforms/SLPVectorizer/X86/extractcost.ll		patch \| blob \| history
test/Transforms/SLPVectorizer/X86/fabs-cost-softfp.ll		patch \| blob \| history
test/Transforms/SLPVectorizer/X86/flag.ll		patch \| blob \| history
test/Transforms/SLPVectorizer/X86/gep.ll		patch \| blob \| history
test/Transforms/SLPVectorizer/X86/gep_mismatch.ll		patch \| blob \| history
test/Transforms/SLPVectorizer/X86/implicitfloat.ll		patch \| blob \| history
test/Transforms/SLPVectorizer/X86/intrinsic.ll		patch \| blob \| history
test/Transforms/SLPVectorizer/X86/long_chains.ll		patch \| blob \| history
test/Transforms/SLPVectorizer/X86/loopinvariant.ll		patch \| blob \| history
test/Transforms/SLPVectorizer/X86/metadata.ll		patch \| blob \| history
test/Transforms/SLPVectorizer/X86/multi_block.ll		patch \| blob \| history
test/Transforms/SLPVectorizer/X86/multi_user.ll		patch \| blob \| history
test/Transforms/SLPVectorizer/X86/non-vectorizable-intrinsic.ll		patch \| blob \| history
test/Transforms/SLPVectorizer/X86/operandorder.ll		patch \| blob \| history
test/Transforms/SLPVectorizer/X86/opt.ll		patch \| blob \| history
test/Transforms/SLPVectorizer/X86/ordering.ll		patch \| blob \| history
test/Transforms/SLPVectorizer/X86/phi.ll		patch \| blob \| history
test/Transforms/SLPVectorizer/X86/phi3.ll		patch \| blob \| history
test/Transforms/SLPVectorizer/X86/phi_landingpad.ll		patch \| blob \| history
test/Transforms/SLPVectorizer/X86/phi_overalignedtype.ll		patch \| blob \| history
test/Transforms/SLPVectorizer/X86/pr16628.ll		patch \| blob \| history
test/Transforms/SLPVectorizer/X86/pr16899.ll		patch \| blob \| history
test/Transforms/SLPVectorizer/X86/pr18060.ll		patch \| blob \| history
test/Transforms/SLPVectorizer/X86/pr23510.ll		patch \| blob \| history
test/Transforms/SLPVectorizer/X86/pr27163.ll		patch \| blob \| history
test/Transforms/SLPVectorizer/X86/propagate_ir_flags.ll		patch \| blob \| history
test/Transforms/SLPVectorizer/X86/reduction.ll		patch \| blob \| history
test/Transforms/SLPVectorizer/X86/reduction2.ll		patch \| blob \| history
test/Transforms/SLPVectorizer/X86/remark_horcost.ll		patch \| blob \| history
test/Transforms/SLPVectorizer/X86/remark_listcost.ll		patch \| blob \| history
test/Transforms/SLPVectorizer/X86/remark_not_all_parts.ll		patch \| blob \| history
test/Transforms/SLPVectorizer/X86/remark_unsupported.ll		patch \| blob \| history
test/Transforms/SLPVectorizer/X86/rgb_phi.ll		patch \| blob \| history
test/Transforms/SLPVectorizer/X86/saxpy.ll		patch \| blob \| history
test/Transforms/SLPVectorizer/X86/schedule_budget.ll		patch \| blob \| history
test/Transforms/SLPVectorizer/X86/simple-loop.ll		patch \| blob \| history
test/Transforms/SLPVectorizer/X86/unreachable.ll		patch \| blob \| history
test/Transforms/SLPVectorizer/X86/vector_gep.ll		patch \| blob \| history
test/Transforms/SLPVectorizer/XCore/no-vector-registers.ll		patch \| blob \| history
test/Transforms/SLPVectorizer/int_sideeffect.ll		patch \| blob \| history