From: Alexey Bataev Date: Tue, 12 Sep 2017 16:15:04 +0000 (+0000) Subject: [SLP] Test with mutiple uses of conditional op and wrong parent. X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=fb7fead2f10c060e7c8a56ad915cf3f034d9d730;p=llvm [SLP] Test with mutiple uses of conditional op and wrong parent. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@313042 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/test/Transforms/SLPVectorizer/X86/horizontal-minmax.ll b/test/Transforms/SLPVectorizer/X86/horizontal-minmax.ll index df6205065f3..2f1bac07a09 100644 --- a/test/Transforms/SLPVectorizer/X86/horizontal-minmax.ll +++ b/test/Transforms/SLPVectorizer/X86/horizontal-minmax.ll @@ -2,10 +2,11 @@ ; RUN: opt < %s -mtriple=x86_64-unknown-linux -slp-vectorizer -S | FileCheck %s ; RUN: opt < %s -mtriple=x86_64-unknown-linux -mcpu=corei7-avx -slp-vectorizer -S | FileCheck %s --check-prefix=AVX ; RUN: opt < %s -mtriple=x86_64-unknown-linux -mcpu=core-avx2 -slp-vectorizer -S | FileCheck %s --check-prefix=AVX2 -; RUN: opt < %s -mtriple=x86_64-unknown-linux -mcpu=skx -slp-vectorizer -S | FileCheck %s --check-prefix=SKX +; RUN: opt < %s -mtriple=x86_64-unknown-linux -mcpu=skx -slp-vectorizer -S -slp-threshold=-100 | FileCheck %s --check-prefix=SKX @arr = local_unnamed_addr global [32 x i32] zeroinitializer, align 16 @arr1 = local_unnamed_addr global [32 x float] zeroinitializer, align 16 +@var = global i32 zeroinitializer, align 8 define i32 @maxi8(i32) { ; CHECK-LABEL: @maxi8( @@ -35,45 +36,111 @@ define i32 @maxi8(i32) { ; ; AVX-LABEL: @maxi8( ; AVX-NEXT: [[TMP2:%.*]] = load <8 x i32>, <8 x i32>* bitcast ([32 x i32]* @arr to <8 x i32>*), align 16 -; AVX: [[RDX_SHUF:%.*]] = shufflevector <8 x i32> [[TMP2]], <8 x i32> undef, <8 x i32> -; AVX-NEXT: [[TMP24:%.*]] = icmp sgt <8 x i32> [[TMP2]], [[RDX_SHUF]] -; AVX-NEXT: [[BIN_RDX:%.*]] = select <8 x i1> [[TMP24]], <8 x i32> [[TMP2]], <8 x i32> [[RDX_SHUF]] -; AVX-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <8 x i32> [[BIN_RDX]], <8 x i32> undef, <8 x i32> -; AVX-NEXT: [[TMP25:%.*]] = icmp sgt <8 x i32> [[BIN_RDX]], [[RDX_SHUF1]] -; AVX-NEXT: [[BIN_RDX2:%.*]] = select <8 x i1> [[TMP25]], <8 x i32> [[BIN_RDX]], <8 x i32> [[RDX_SHUF1]] -; AVX-NEXT: [[RDX_SHUF3:%.*]] = shufflevector <8 x i32> [[BIN_RDX2]], <8 x i32> undef, <8 x i32> -; AVX-NEXT: [[TMP26:%.*]] = icmp sgt <8 x i32> [[BIN_RDX2]], [[RDX_SHUF3]] -; AVX-NEXT: [[BIN_RDX4:%.*]] = select <8 x i1> [[TMP26]], <8 x i32> [[BIN_RDX2]], <8 x i32> [[RDX_SHUF3]] -; AVX-NEXT: [[TMP27:%.*]] = extractelement <8 x i32> [[BIN_RDX4]], i32 0 -; AVX: ret i32 [[TMP27]] +; AVX-NEXT: [[TMP3:%.*]] = extractelement <8 x i32> [[TMP2]], i32 0 +; AVX-NEXT: [[TMP4:%.*]] = extractelement <8 x i32> [[TMP2]], i32 1 +; AVX-NEXT: [[TMP5:%.*]] = icmp sgt i32 [[TMP3]], [[TMP4]] +; AVX-NEXT: [[TMP6:%.*]] = select i1 [[TMP5]], i32 undef, i32 undef +; AVX-NEXT: [[TMP7:%.*]] = extractelement <8 x i32> [[TMP2]], i32 2 +; AVX-NEXT: [[TMP8:%.*]] = icmp sgt i32 [[TMP6]], [[TMP7]] +; AVX-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], i32 [[TMP6]], i32 undef +; AVX-NEXT: [[TMP10:%.*]] = extractelement <8 x i32> [[TMP2]], i32 3 +; AVX-NEXT: [[TMP11:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +; AVX-NEXT: [[TMP12:%.*]] = select i1 [[TMP11]], i32 [[TMP9]], i32 undef +; AVX-NEXT: [[TMP13:%.*]] = extractelement <8 x i32> [[TMP2]], i32 4 +; AVX-NEXT: [[TMP14:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +; AVX-NEXT: [[TMP15:%.*]] = select i1 [[TMP14]], i32 [[TMP12]], i32 undef +; AVX-NEXT: [[TMP16:%.*]] = extractelement <8 x i32> [[TMP2]], i32 5 +; AVX-NEXT: [[TMP17:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]] +; AVX-NEXT: [[TMP18:%.*]] = select i1 [[TMP17]], i32 [[TMP15]], i32 undef +; AVX-NEXT: [[TMP19:%.*]] = extractelement <8 x i32> [[TMP2]], i32 6 +; AVX-NEXT: [[TMP20:%.*]] = icmp sgt i32 [[TMP18]], [[TMP19]] +; AVX-NEXT: [[TMP21:%.*]] = select i1 [[TMP20]], i32 [[TMP18]], i32 undef +; AVX-NEXT: [[TMP22:%.*]] = extractelement <8 x i32> [[TMP2]], i32 7 +; AVX-NEXT: [[TMP23:%.*]] = icmp sgt i32 [[TMP21]], [[TMP22]] +; AVX-NEXT: [[RDX_SHUF:%.*]] = shufflevector <8 x i32> [[TMP2]], <8 x i32> undef, <8 x i32> +; AVX-NEXT: [[RDX_MINMAX_CMP:%.*]] = icmp sgt <8 x i32> [[TMP2]], [[RDX_SHUF]] +; AVX-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select <8 x i1> [[RDX_MINMAX_CMP]], <8 x i32> [[TMP2]], <8 x i32> [[RDX_SHUF]] +; AVX-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <8 x i32> [[RDX_MINMAX_SELECT]], <8 x i32> undef, <8 x i32> +; AVX-NEXT: [[RDX_MINMAX_CMP2:%.*]] = icmp sgt <8 x i32> [[RDX_MINMAX_SELECT]], [[RDX_SHUF1]] +; AVX-NEXT: [[RDX_MINMAX_SELECT3:%.*]] = select <8 x i1> [[RDX_MINMAX_CMP2]], <8 x i32> [[RDX_MINMAX_SELECT]], <8 x i32> [[RDX_SHUF1]] +; AVX-NEXT: [[RDX_SHUF4:%.*]] = shufflevector <8 x i32> [[RDX_MINMAX_SELECT3]], <8 x i32> undef, <8 x i32> +; AVX-NEXT: [[RDX_MINMAX_CMP5:%.*]] = icmp sgt <8 x i32> [[RDX_MINMAX_SELECT3]], [[RDX_SHUF4]] +; AVX-NEXT: [[RDX_MINMAX_SELECT6:%.*]] = select <8 x i1> [[RDX_MINMAX_CMP5]], <8 x i32> [[RDX_MINMAX_SELECT3]], <8 x i32> [[RDX_SHUF4]] +; AVX-NEXT: [[TMP24:%.*]] = extractelement <8 x i32> [[RDX_MINMAX_SELECT6]], i32 0 +; AVX-NEXT: [[TMP25:%.*]] = select i1 [[TMP23]], i32 [[TMP21]], i32 undef +; AVX-NEXT: ret i32 [[TMP24]] ; ; AVX2-LABEL: @maxi8( ; AVX2-NEXT: [[TMP2:%.*]] = load <8 x i32>, <8 x i32>* bitcast ([32 x i32]* @arr to <8 x i32>*), align 16 -; AVX2: [[RDX_SHUF:%.*]] = shufflevector <8 x i32> [[TMP2]], <8 x i32> undef, <8 x i32> -; AVX2-NEXT: [[TMP24:%.*]] = icmp sgt <8 x i32> [[TMP2]], [[RDX_SHUF]] -; AVX2-NEXT: [[BIN_RDX:%.*]] = select <8 x i1> [[TMP24]], <8 x i32> [[TMP2]], <8 x i32> [[RDX_SHUF]] -; AVX2-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <8 x i32> [[BIN_RDX]], <8 x i32> undef, <8 x i32> -; AVX2-NEXT: [[TMP25:%.*]] = icmp sgt <8 x i32> [[BIN_RDX]], [[RDX_SHUF1]] -; AVX2-NEXT: [[BIN_RDX2:%.*]] = select <8 x i1> [[TMP25]], <8 x i32> [[BIN_RDX]], <8 x i32> [[RDX_SHUF1]] -; AVX2-NEXT: [[RDX_SHUF3:%.*]] = shufflevector <8 x i32> [[BIN_RDX2]], <8 x i32> undef, <8 x i32> -; AVX2-NEXT: [[TMP26:%.*]] = icmp sgt <8 x i32> [[BIN_RDX2]], [[RDX_SHUF3]] -; AVX2-NEXT: [[BIN_RDX4:%.*]] = select <8 x i1> [[TMP26]], <8 x i32> [[BIN_RDX2]], <8 x i32> [[RDX_SHUF3]] -; AVX2-NEXT: [[TMP27:%.*]] = extractelement <8 x i32> [[BIN_RDX4]], i32 0 -; AVX2: ret i32 [[TMP27]] +; AVX2-NEXT: [[TMP3:%.*]] = extractelement <8 x i32> [[TMP2]], i32 0 +; AVX2-NEXT: [[TMP4:%.*]] = extractelement <8 x i32> [[TMP2]], i32 1 +; AVX2-NEXT: [[TMP5:%.*]] = icmp sgt i32 [[TMP3]], [[TMP4]] +; AVX2-NEXT: [[TMP6:%.*]] = select i1 [[TMP5]], i32 undef, i32 undef +; AVX2-NEXT: [[TMP7:%.*]] = extractelement <8 x i32> [[TMP2]], i32 2 +; AVX2-NEXT: [[TMP8:%.*]] = icmp sgt i32 [[TMP6]], [[TMP7]] +; AVX2-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], i32 [[TMP6]], i32 undef +; AVX2-NEXT: [[TMP10:%.*]] = extractelement <8 x i32> [[TMP2]], i32 3 +; AVX2-NEXT: [[TMP11:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +; AVX2-NEXT: [[TMP12:%.*]] = select i1 [[TMP11]], i32 [[TMP9]], i32 undef +; AVX2-NEXT: [[TMP13:%.*]] = extractelement <8 x i32> [[TMP2]], i32 4 +; AVX2-NEXT: [[TMP14:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +; AVX2-NEXT: [[TMP15:%.*]] = select i1 [[TMP14]], i32 [[TMP12]], i32 undef +; AVX2-NEXT: [[TMP16:%.*]] = extractelement <8 x i32> [[TMP2]], i32 5 +; AVX2-NEXT: [[TMP17:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]] +; AVX2-NEXT: [[TMP18:%.*]] = select i1 [[TMP17]], i32 [[TMP15]], i32 undef +; AVX2-NEXT: [[TMP19:%.*]] = extractelement <8 x i32> [[TMP2]], i32 6 +; AVX2-NEXT: [[TMP20:%.*]] = icmp sgt i32 [[TMP18]], [[TMP19]] +; AVX2-NEXT: [[TMP21:%.*]] = select i1 [[TMP20]], i32 [[TMP18]], i32 undef +; AVX2-NEXT: [[TMP22:%.*]] = extractelement <8 x i32> [[TMP2]], i32 7 +; AVX2-NEXT: [[TMP23:%.*]] = icmp sgt i32 [[TMP21]], [[TMP22]] +; AVX2-NEXT: [[RDX_SHUF:%.*]] = shufflevector <8 x i32> [[TMP2]], <8 x i32> undef, <8 x i32> +; AVX2-NEXT: [[RDX_MINMAX_CMP:%.*]] = icmp sgt <8 x i32> [[TMP2]], [[RDX_SHUF]] +; AVX2-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select <8 x i1> [[RDX_MINMAX_CMP]], <8 x i32> [[TMP2]], <8 x i32> [[RDX_SHUF]] +; AVX2-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <8 x i32> [[RDX_MINMAX_SELECT]], <8 x i32> undef, <8 x i32> +; AVX2-NEXT: [[RDX_MINMAX_CMP2:%.*]] = icmp sgt <8 x i32> [[RDX_MINMAX_SELECT]], [[RDX_SHUF1]] +; AVX2-NEXT: [[RDX_MINMAX_SELECT3:%.*]] = select <8 x i1> [[RDX_MINMAX_CMP2]], <8 x i32> [[RDX_MINMAX_SELECT]], <8 x i32> [[RDX_SHUF1]] +; AVX2-NEXT: [[RDX_SHUF4:%.*]] = shufflevector <8 x i32> [[RDX_MINMAX_SELECT3]], <8 x i32> undef, <8 x i32> +; AVX2-NEXT: [[RDX_MINMAX_CMP5:%.*]] = icmp sgt <8 x i32> [[RDX_MINMAX_SELECT3]], [[RDX_SHUF4]] +; AVX2-NEXT: [[RDX_MINMAX_SELECT6:%.*]] = select <8 x i1> [[RDX_MINMAX_CMP5]], <8 x i32> [[RDX_MINMAX_SELECT3]], <8 x i32> [[RDX_SHUF4]] +; AVX2-NEXT: [[TMP24:%.*]] = extractelement <8 x i32> [[RDX_MINMAX_SELECT6]], i32 0 +; AVX2-NEXT: [[TMP25:%.*]] = select i1 [[TMP23]], i32 [[TMP21]], i32 undef +; AVX2-NEXT: ret i32 [[TMP24]] ; ; SKX-LABEL: @maxi8( ; SKX-NEXT: [[TMP2:%.*]] = load <8 x i32>, <8 x i32>* bitcast ([32 x i32]* @arr to <8 x i32>*), align 16 -; SKX: [[RDX_SHUF:%.*]] = shufflevector <8 x i32> [[TMP2]], <8 x i32> undef, <8 x i32> -; SKX-NEXT: [[TMP24:%.*]] = icmp sgt <8 x i32> [[TMP2]], [[RDX_SHUF]] -; SKX-NEXT: [[BIN_RDX:%.*]] = select <8 x i1> [[TMP24]], <8 x i32> [[TMP2]], <8 x i32> [[RDX_SHUF]] -; SKX-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <8 x i32> [[BIN_RDX]], <8 x i32> undef, <8 x i32> -; SKX-NEXT: [[TMP25:%.*]] = icmp sgt <8 x i32> [[BIN_RDX]], [[RDX_SHUF1]] -; SKX-NEXT: [[BIN_RDX2:%.*]] = select <8 x i1> [[TMP25]], <8 x i32> [[BIN_RDX]], <8 x i32> [[RDX_SHUF1]] -; SKX-NEXT: [[RDX_SHUF3:%.*]] = shufflevector <8 x i32> [[BIN_RDX2]], <8 x i32> undef, <8 x i32> -; SKX-NEXT: [[TMP26:%.*]] = icmp sgt <8 x i32> [[BIN_RDX2]], [[RDX_SHUF3]] -; SKX-NEXT: [[BIN_RDX4:%.*]] = select <8 x i1> [[TMP26]], <8 x i32> [[BIN_RDX2]], <8 x i32> [[RDX_SHUF3]] -; SKX-NEXT: [[TMP27:%.*]] = extractelement <8 x i32> [[BIN_RDX4]], i32 0 -; SKX: ret i32 [[TMP27]] +; SKX-NEXT: [[TMP3:%.*]] = extractelement <8 x i32> [[TMP2]], i32 0 +; SKX-NEXT: [[TMP4:%.*]] = extractelement <8 x i32> [[TMP2]], i32 1 +; SKX-NEXT: [[TMP5:%.*]] = icmp sgt i32 [[TMP3]], [[TMP4]] +; SKX-NEXT: [[TMP6:%.*]] = select i1 [[TMP5]], i32 undef, i32 undef +; SKX-NEXT: [[TMP7:%.*]] = extractelement <8 x i32> [[TMP2]], i32 2 +; SKX-NEXT: [[TMP8:%.*]] = icmp sgt i32 [[TMP6]], [[TMP7]] +; SKX-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], i32 [[TMP6]], i32 undef +; SKX-NEXT: [[TMP10:%.*]] = extractelement <8 x i32> [[TMP2]], i32 3 +; SKX-NEXT: [[TMP11:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +; SKX-NEXT: [[TMP12:%.*]] = select i1 [[TMP11]], i32 [[TMP9]], i32 undef +; SKX-NEXT: [[TMP13:%.*]] = extractelement <8 x i32> [[TMP2]], i32 4 +; SKX-NEXT: [[TMP14:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +; SKX-NEXT: [[TMP15:%.*]] = select i1 [[TMP14]], i32 [[TMP12]], i32 undef +; SKX-NEXT: [[TMP16:%.*]] = extractelement <8 x i32> [[TMP2]], i32 5 +; SKX-NEXT: [[TMP17:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]] +; SKX-NEXT: [[TMP18:%.*]] = select i1 [[TMP17]], i32 [[TMP15]], i32 undef +; SKX-NEXT: [[TMP19:%.*]] = extractelement <8 x i32> [[TMP2]], i32 6 +; SKX-NEXT: [[TMP20:%.*]] = icmp sgt i32 [[TMP18]], [[TMP19]] +; SKX-NEXT: [[TMP21:%.*]] = select i1 [[TMP20]], i32 [[TMP18]], i32 undef +; SKX-NEXT: [[TMP22:%.*]] = extractelement <8 x i32> [[TMP2]], i32 7 +; SKX-NEXT: [[TMP23:%.*]] = icmp sgt i32 [[TMP21]], [[TMP22]] +; SKX-NEXT: [[RDX_SHUF:%.*]] = shufflevector <8 x i32> [[TMP2]], <8 x i32> undef, <8 x i32> +; SKX-NEXT: [[RDX_MINMAX_CMP:%.*]] = icmp sgt <8 x i32> [[TMP2]], [[RDX_SHUF]] +; SKX-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select <8 x i1> [[RDX_MINMAX_CMP]], <8 x i32> [[TMP2]], <8 x i32> [[RDX_SHUF]] +; SKX-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <8 x i32> [[RDX_MINMAX_SELECT]], <8 x i32> undef, <8 x i32> +; SKX-NEXT: [[RDX_MINMAX_CMP2:%.*]] = icmp sgt <8 x i32> [[RDX_MINMAX_SELECT]], [[RDX_SHUF1]] +; SKX-NEXT: [[RDX_MINMAX_SELECT3:%.*]] = select <8 x i1> [[RDX_MINMAX_CMP2]], <8 x i32> [[RDX_MINMAX_SELECT]], <8 x i32> [[RDX_SHUF1]] +; SKX-NEXT: [[RDX_SHUF4:%.*]] = shufflevector <8 x i32> [[RDX_MINMAX_SELECT3]], <8 x i32> undef, <8 x i32> +; SKX-NEXT: [[RDX_MINMAX_CMP5:%.*]] = icmp sgt <8 x i32> [[RDX_MINMAX_SELECT3]], [[RDX_SHUF4]] +; SKX-NEXT: [[RDX_MINMAX_SELECT6:%.*]] = select <8 x i1> [[RDX_MINMAX_CMP5]], <8 x i32> [[RDX_MINMAX_SELECT3]], <8 x i32> [[RDX_SHUF4]] +; SKX-NEXT: [[TMP24:%.*]] = extractelement <8 x i32> [[RDX_MINMAX_SELECT6]], i32 0 +; SKX-NEXT: [[TMP25:%.*]] = select i1 [[TMP23]], i32 [[TMP21]], i32 undef +; SKX-NEXT: ret i32 [[TMP24]] ; %2 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 0), align 16 %3 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 1), align 4 @@ -152,54 +219,192 @@ define i32 @maxi16(i32) { ; ; AVX-LABEL: @maxi16( ; AVX-NEXT: [[TMP2:%.*]] = load <16 x i32>, <16 x i32>* bitcast ([32 x i32]* @arr to <16 x i32>*), align 16 -; AVX: [[RDX_SHUF:%.*]] = shufflevector <16 x i32> [[TMP2]], <16 x i32> undef, <16 x i32> -; AVX-NEXT: [[TMP48:%.*]] = icmp sgt <16 x i32> [[TMP2]], [[RDX_SHUF]] -; AVX-NEXT: [[BIN_RDX:%.*]] = select <16 x i1> [[TMP48]], <16 x i32> [[TMP2]], <16 x i32> [[RDX_SHUF]] -; AVX-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <16 x i32> [[BIN_RDX]], <16 x i32> undef, <16 x i32> -; AVX-NEXT: [[TMP49:%.*]] = icmp sgt <16 x i32> [[BIN_RDX]], [[RDX_SHUF1]] -; AVX-NEXT: [[BIN_RDX2:%.*]] = select <16 x i1> [[TMP49]], <16 x i32> [[BIN_RDX]], <16 x i32> [[RDX_SHUF1]] -; AVX-NEXT: [[RDX_SHUF3:%.*]] = shufflevector <16 x i32> [[BIN_RDX2]], <16 x i32> undef, <16 x i32> -; AVX-NEXT: [[TMP50:%.*]] = icmp sgt <16 x i32> [[BIN_RDX2]], [[RDX_SHUF3]] -; AVX-NEXT: [[BIN_RDX4:%.*]] = select <16 x i1> [[TMP50]], <16 x i32> [[BIN_RDX2]], <16 x i32> [[RDX_SHUF3]] -; AVX-NEXT: [[RDX_SHUF5:%.*]] = shufflevector <16 x i32> [[BIN_RDX4]], <16 x i32> undef, <16 x i32> -; AVX-NEXT: [[TMP51:%.*]] = icmp sgt <16 x i32> [[BIN_RDX4]], [[RDX_SHUF5]] -; AVX-NEXT: [[BIN_RDX6:%.*]] = select <16 x i1> [[TMP51]], <16 x i32> [[BIN_RDX4]], <16 x i32> [[RDX_SHUF5]] -; AVX-NEXT: [[TMP52:%.*]] = extractelement <16 x i32> [[BIN_RDX6]], i32 0 -; AVX: ret i32 [[TMP52]] +; AVX-NEXT: [[TMP3:%.*]] = extractelement <16 x i32> [[TMP2]], i32 0 +; AVX-NEXT: [[TMP4:%.*]] = extractelement <16 x i32> [[TMP2]], i32 1 +; AVX-NEXT: [[TMP5:%.*]] = icmp sgt i32 [[TMP3]], [[TMP4]] +; AVX-NEXT: [[TMP6:%.*]] = select i1 [[TMP5]], i32 undef, i32 undef +; AVX-NEXT: [[TMP7:%.*]] = extractelement <16 x i32> [[TMP2]], i32 2 +; AVX-NEXT: [[TMP8:%.*]] = icmp sgt i32 [[TMP6]], [[TMP7]] +; AVX-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], i32 [[TMP6]], i32 undef +; AVX-NEXT: [[TMP10:%.*]] = extractelement <16 x i32> [[TMP2]], i32 3 +; AVX-NEXT: [[TMP11:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +; AVX-NEXT: [[TMP12:%.*]] = select i1 [[TMP11]], i32 [[TMP9]], i32 undef +; AVX-NEXT: [[TMP13:%.*]] = extractelement <16 x i32> [[TMP2]], i32 4 +; AVX-NEXT: [[TMP14:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +; AVX-NEXT: [[TMP15:%.*]] = select i1 [[TMP14]], i32 [[TMP12]], i32 undef +; AVX-NEXT: [[TMP16:%.*]] = extractelement <16 x i32> [[TMP2]], i32 5 +; AVX-NEXT: [[TMP17:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]] +; AVX-NEXT: [[TMP18:%.*]] = select i1 [[TMP17]], i32 [[TMP15]], i32 undef +; AVX-NEXT: [[TMP19:%.*]] = extractelement <16 x i32> [[TMP2]], i32 6 +; AVX-NEXT: [[TMP20:%.*]] = icmp sgt i32 [[TMP18]], [[TMP19]] +; AVX-NEXT: [[TMP21:%.*]] = select i1 [[TMP20]], i32 [[TMP18]], i32 undef +; AVX-NEXT: [[TMP22:%.*]] = extractelement <16 x i32> [[TMP2]], i32 7 +; AVX-NEXT: [[TMP23:%.*]] = icmp sgt i32 [[TMP21]], [[TMP22]] +; AVX-NEXT: [[TMP24:%.*]] = select i1 [[TMP23]], i32 [[TMP21]], i32 undef +; AVX-NEXT: [[TMP25:%.*]] = extractelement <16 x i32> [[TMP2]], i32 8 +; AVX-NEXT: [[TMP26:%.*]] = icmp sgt i32 [[TMP24]], [[TMP25]] +; AVX-NEXT: [[TMP27:%.*]] = select i1 [[TMP26]], i32 [[TMP24]], i32 undef +; AVX-NEXT: [[TMP28:%.*]] = extractelement <16 x i32> [[TMP2]], i32 9 +; AVX-NEXT: [[TMP29:%.*]] = icmp sgt i32 [[TMP27]], [[TMP28]] +; AVX-NEXT: [[TMP30:%.*]] = select i1 [[TMP29]], i32 [[TMP27]], i32 undef +; AVX-NEXT: [[TMP31:%.*]] = extractelement <16 x i32> [[TMP2]], i32 10 +; AVX-NEXT: [[TMP32:%.*]] = icmp sgt i32 [[TMP30]], [[TMP31]] +; AVX-NEXT: [[TMP33:%.*]] = select i1 [[TMP32]], i32 [[TMP30]], i32 undef +; AVX-NEXT: [[TMP34:%.*]] = extractelement <16 x i32> [[TMP2]], i32 11 +; AVX-NEXT: [[TMP35:%.*]] = icmp sgt i32 [[TMP33]], [[TMP34]] +; AVX-NEXT: [[TMP36:%.*]] = select i1 [[TMP35]], i32 [[TMP33]], i32 undef +; AVX-NEXT: [[TMP37:%.*]] = extractelement <16 x i32> [[TMP2]], i32 12 +; AVX-NEXT: [[TMP38:%.*]] = icmp sgt i32 [[TMP36]], [[TMP37]] +; AVX-NEXT: [[TMP39:%.*]] = select i1 [[TMP38]], i32 [[TMP36]], i32 undef +; AVX-NEXT: [[TMP40:%.*]] = extractelement <16 x i32> [[TMP2]], i32 13 +; AVX-NEXT: [[TMP41:%.*]] = icmp sgt i32 [[TMP39]], [[TMP40]] +; AVX-NEXT: [[TMP42:%.*]] = select i1 [[TMP41]], i32 [[TMP39]], i32 undef +; AVX-NEXT: [[TMP43:%.*]] = extractelement <16 x i32> [[TMP2]], i32 14 +; AVX-NEXT: [[TMP44:%.*]] = icmp sgt i32 [[TMP42]], [[TMP43]] +; AVX-NEXT: [[TMP45:%.*]] = select i1 [[TMP44]], i32 [[TMP42]], i32 undef +; AVX-NEXT: [[TMP46:%.*]] = extractelement <16 x i32> [[TMP2]], i32 15 +; AVX-NEXT: [[TMP47:%.*]] = icmp sgt i32 [[TMP45]], [[TMP46]] +; AVX-NEXT: [[RDX_SHUF:%.*]] = shufflevector <16 x i32> [[TMP2]], <16 x i32> undef, <16 x i32> +; AVX-NEXT: [[RDX_MINMAX_CMP:%.*]] = icmp sgt <16 x i32> [[TMP2]], [[RDX_SHUF]] +; AVX-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select <16 x i1> [[RDX_MINMAX_CMP]], <16 x i32> [[TMP2]], <16 x i32> [[RDX_SHUF]] +; AVX-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <16 x i32> [[RDX_MINMAX_SELECT]], <16 x i32> undef, <16 x i32> +; AVX-NEXT: [[RDX_MINMAX_CMP2:%.*]] = icmp sgt <16 x i32> [[RDX_MINMAX_SELECT]], [[RDX_SHUF1]] +; AVX-NEXT: [[RDX_MINMAX_SELECT3:%.*]] = select <16 x i1> [[RDX_MINMAX_CMP2]], <16 x i32> [[RDX_MINMAX_SELECT]], <16 x i32> [[RDX_SHUF1]] +; AVX-NEXT: [[RDX_SHUF4:%.*]] = shufflevector <16 x i32> [[RDX_MINMAX_SELECT3]], <16 x i32> undef, <16 x i32> +; AVX-NEXT: [[RDX_MINMAX_CMP5:%.*]] = icmp sgt <16 x i32> [[RDX_MINMAX_SELECT3]], [[RDX_SHUF4]] +; AVX-NEXT: [[RDX_MINMAX_SELECT6:%.*]] = select <16 x i1> [[RDX_MINMAX_CMP5]], <16 x i32> [[RDX_MINMAX_SELECT3]], <16 x i32> [[RDX_SHUF4]] +; AVX-NEXT: [[RDX_SHUF7:%.*]] = shufflevector <16 x i32> [[RDX_MINMAX_SELECT6]], <16 x i32> undef, <16 x i32> +; AVX-NEXT: [[RDX_MINMAX_CMP8:%.*]] = icmp sgt <16 x i32> [[RDX_MINMAX_SELECT6]], [[RDX_SHUF7]] +; AVX-NEXT: [[RDX_MINMAX_SELECT9:%.*]] = select <16 x i1> [[RDX_MINMAX_CMP8]], <16 x i32> [[RDX_MINMAX_SELECT6]], <16 x i32> [[RDX_SHUF7]] +; AVX-NEXT: [[TMP48:%.*]] = extractelement <16 x i32> [[RDX_MINMAX_SELECT9]], i32 0 +; AVX-NEXT: [[TMP49:%.*]] = select i1 [[TMP47]], i32 [[TMP45]], i32 undef +; AVX-NEXT: ret i32 [[TMP48]] ; ; AVX2-LABEL: @maxi16( ; AVX2-NEXT: [[TMP2:%.*]] = load <16 x i32>, <16 x i32>* bitcast ([32 x i32]* @arr to <16 x i32>*), align 16 -; AVX2: [[RDX_SHUF:%.*]] = shufflevector <16 x i32> [[TMP2]], <16 x i32> undef, <16 x i32> -; AVX2-NEXT: [[TMP48:%.*]] = icmp sgt <16 x i32> [[TMP2]], [[RDX_SHUF]] -; AVX2-NEXT: [[BIN_RDX:%.*]] = select <16 x i1> [[TMP48]], <16 x i32> [[TMP2]], <16 x i32> [[RDX_SHUF]] -; AVX2-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <16 x i32> [[BIN_RDX]], <16 x i32> undef, <16 x i32> -; AVX2-NEXT: [[TMP49:%.*]] = icmp sgt <16 x i32> [[BIN_RDX]], [[RDX_SHUF1]] -; AVX2-NEXT: [[BIN_RDX2:%.*]] = select <16 x i1> [[TMP49]], <16 x i32> [[BIN_RDX]], <16 x i32> [[RDX_SHUF1]] -; AVX2-NEXT: [[RDX_SHUF3:%.*]] = shufflevector <16 x i32> [[BIN_RDX2]], <16 x i32> undef, <16 x i32> -; AVX2-NEXT: [[TMP50:%.*]] = icmp sgt <16 x i32> [[BIN_RDX2]], [[RDX_SHUF3]] -; AVX2-NEXT: [[BIN_RDX4:%.*]] = select <16 x i1> [[TMP50]], <16 x i32> [[BIN_RDX2]], <16 x i32> [[RDX_SHUF3]] -; AVX2-NEXT: [[RDX_SHUF5:%.*]] = shufflevector <16 x i32> [[BIN_RDX4]], <16 x i32> undef, <16 x i32> -; AVX2-NEXT: [[TMP51:%.*]] = icmp sgt <16 x i32> [[BIN_RDX4]], [[RDX_SHUF5]] -; AVX2-NEXT: [[BIN_RDX6:%.*]] = select <16 x i1> [[TMP51]], <16 x i32> [[BIN_RDX4]], <16 x i32> [[RDX_SHUF5]] -; AVX2-NEXT: [[TMP52:%.*]] = extractelement <16 x i32> [[BIN_RDX6]], i32 0 -; AVX2: ret i32 [[TMP52]] +; AVX2-NEXT: [[TMP3:%.*]] = extractelement <16 x i32> [[TMP2]], i32 0 +; AVX2-NEXT: [[TMP4:%.*]] = extractelement <16 x i32> [[TMP2]], i32 1 +; AVX2-NEXT: [[TMP5:%.*]] = icmp sgt i32 [[TMP3]], [[TMP4]] +; AVX2-NEXT: [[TMP6:%.*]] = select i1 [[TMP5]], i32 undef, i32 undef +; AVX2-NEXT: [[TMP7:%.*]] = extractelement <16 x i32> [[TMP2]], i32 2 +; AVX2-NEXT: [[TMP8:%.*]] = icmp sgt i32 [[TMP6]], [[TMP7]] +; AVX2-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], i32 [[TMP6]], i32 undef +; AVX2-NEXT: [[TMP10:%.*]] = extractelement <16 x i32> [[TMP2]], i32 3 +; AVX2-NEXT: [[TMP11:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +; AVX2-NEXT: [[TMP12:%.*]] = select i1 [[TMP11]], i32 [[TMP9]], i32 undef +; AVX2-NEXT: [[TMP13:%.*]] = extractelement <16 x i32> [[TMP2]], i32 4 +; AVX2-NEXT: [[TMP14:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +; AVX2-NEXT: [[TMP15:%.*]] = select i1 [[TMP14]], i32 [[TMP12]], i32 undef +; AVX2-NEXT: [[TMP16:%.*]] = extractelement <16 x i32> [[TMP2]], i32 5 +; AVX2-NEXT: [[TMP17:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]] +; AVX2-NEXT: [[TMP18:%.*]] = select i1 [[TMP17]], i32 [[TMP15]], i32 undef +; AVX2-NEXT: [[TMP19:%.*]] = extractelement <16 x i32> [[TMP2]], i32 6 +; AVX2-NEXT: [[TMP20:%.*]] = icmp sgt i32 [[TMP18]], [[TMP19]] +; AVX2-NEXT: [[TMP21:%.*]] = select i1 [[TMP20]], i32 [[TMP18]], i32 undef +; AVX2-NEXT: [[TMP22:%.*]] = extractelement <16 x i32> [[TMP2]], i32 7 +; AVX2-NEXT: [[TMP23:%.*]] = icmp sgt i32 [[TMP21]], [[TMP22]] +; AVX2-NEXT: [[TMP24:%.*]] = select i1 [[TMP23]], i32 [[TMP21]], i32 undef +; AVX2-NEXT: [[TMP25:%.*]] = extractelement <16 x i32> [[TMP2]], i32 8 +; AVX2-NEXT: [[TMP26:%.*]] = icmp sgt i32 [[TMP24]], [[TMP25]] +; AVX2-NEXT: [[TMP27:%.*]] = select i1 [[TMP26]], i32 [[TMP24]], i32 undef +; AVX2-NEXT: [[TMP28:%.*]] = extractelement <16 x i32> [[TMP2]], i32 9 +; AVX2-NEXT: [[TMP29:%.*]] = icmp sgt i32 [[TMP27]], [[TMP28]] +; AVX2-NEXT: [[TMP30:%.*]] = select i1 [[TMP29]], i32 [[TMP27]], i32 undef +; AVX2-NEXT: [[TMP31:%.*]] = extractelement <16 x i32> [[TMP2]], i32 10 +; AVX2-NEXT: [[TMP32:%.*]] = icmp sgt i32 [[TMP30]], [[TMP31]] +; AVX2-NEXT: [[TMP33:%.*]] = select i1 [[TMP32]], i32 [[TMP30]], i32 undef +; AVX2-NEXT: [[TMP34:%.*]] = extractelement <16 x i32> [[TMP2]], i32 11 +; AVX2-NEXT: [[TMP35:%.*]] = icmp sgt i32 [[TMP33]], [[TMP34]] +; AVX2-NEXT: [[TMP36:%.*]] = select i1 [[TMP35]], i32 [[TMP33]], i32 undef +; AVX2-NEXT: [[TMP37:%.*]] = extractelement <16 x i32> [[TMP2]], i32 12 +; AVX2-NEXT: [[TMP38:%.*]] = icmp sgt i32 [[TMP36]], [[TMP37]] +; AVX2-NEXT: [[TMP39:%.*]] = select i1 [[TMP38]], i32 [[TMP36]], i32 undef +; AVX2-NEXT: [[TMP40:%.*]] = extractelement <16 x i32> [[TMP2]], i32 13 +; AVX2-NEXT: [[TMP41:%.*]] = icmp sgt i32 [[TMP39]], [[TMP40]] +; AVX2-NEXT: [[TMP42:%.*]] = select i1 [[TMP41]], i32 [[TMP39]], i32 undef +; AVX2-NEXT: [[TMP43:%.*]] = extractelement <16 x i32> [[TMP2]], i32 14 +; AVX2-NEXT: [[TMP44:%.*]] = icmp sgt i32 [[TMP42]], [[TMP43]] +; AVX2-NEXT: [[TMP45:%.*]] = select i1 [[TMP44]], i32 [[TMP42]], i32 undef +; AVX2-NEXT: [[TMP46:%.*]] = extractelement <16 x i32> [[TMP2]], i32 15 +; AVX2-NEXT: [[TMP47:%.*]] = icmp sgt i32 [[TMP45]], [[TMP46]] +; AVX2-NEXT: [[RDX_SHUF:%.*]] = shufflevector <16 x i32> [[TMP2]], <16 x i32> undef, <16 x i32> +; AVX2-NEXT: [[RDX_MINMAX_CMP:%.*]] = icmp sgt <16 x i32> [[TMP2]], [[RDX_SHUF]] +; AVX2-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select <16 x i1> [[RDX_MINMAX_CMP]], <16 x i32> [[TMP2]], <16 x i32> [[RDX_SHUF]] +; AVX2-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <16 x i32> [[RDX_MINMAX_SELECT]], <16 x i32> undef, <16 x i32> +; AVX2-NEXT: [[RDX_MINMAX_CMP2:%.*]] = icmp sgt <16 x i32> [[RDX_MINMAX_SELECT]], [[RDX_SHUF1]] +; AVX2-NEXT: [[RDX_MINMAX_SELECT3:%.*]] = select <16 x i1> [[RDX_MINMAX_CMP2]], <16 x i32> [[RDX_MINMAX_SELECT]], <16 x i32> [[RDX_SHUF1]] +; AVX2-NEXT: [[RDX_SHUF4:%.*]] = shufflevector <16 x i32> [[RDX_MINMAX_SELECT3]], <16 x i32> undef, <16 x i32> +; AVX2-NEXT: [[RDX_MINMAX_CMP5:%.*]] = icmp sgt <16 x i32> [[RDX_MINMAX_SELECT3]], [[RDX_SHUF4]] +; AVX2-NEXT: [[RDX_MINMAX_SELECT6:%.*]] = select <16 x i1> [[RDX_MINMAX_CMP5]], <16 x i32> [[RDX_MINMAX_SELECT3]], <16 x i32> [[RDX_SHUF4]] +; AVX2-NEXT: [[RDX_SHUF7:%.*]] = shufflevector <16 x i32> [[RDX_MINMAX_SELECT6]], <16 x i32> undef, <16 x i32> +; AVX2-NEXT: [[RDX_MINMAX_CMP8:%.*]] = icmp sgt <16 x i32> [[RDX_MINMAX_SELECT6]], [[RDX_SHUF7]] +; AVX2-NEXT: [[RDX_MINMAX_SELECT9:%.*]] = select <16 x i1> [[RDX_MINMAX_CMP8]], <16 x i32> [[RDX_MINMAX_SELECT6]], <16 x i32> [[RDX_SHUF7]] +; AVX2-NEXT: [[TMP48:%.*]] = extractelement <16 x i32> [[RDX_MINMAX_SELECT9]], i32 0 +; AVX2-NEXT: [[TMP49:%.*]] = select i1 [[TMP47]], i32 [[TMP45]], i32 undef +; AVX2-NEXT: ret i32 [[TMP48]] ; ; SKX-LABEL: @maxi16( ; SKX-NEXT: [[TMP2:%.*]] = load <16 x i32>, <16 x i32>* bitcast ([32 x i32]* @arr to <16 x i32>*), align 16 -; SKX: [[RDX_SHUF:%.*]] = shufflevector <16 x i32> [[TMP2]], <16 x i32> undef, <16 x i32> -; SKX-NEXT: [[TMP48:%.*]] = icmp sgt <16 x i32> [[TMP2]], [[RDX_SHUF]] -; SKX-NEXT: [[BIN_RDX:%.*]] = select <16 x i1> [[TMP48]], <16 x i32> [[TMP2]], <16 x i32> [[RDX_SHUF]] -; SKX-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <16 x i32> [[BIN_RDX]], <16 x i32> undef, <16 x i32> -; SKX-NEXT: [[TMP49:%.*]] = icmp sgt <16 x i32> [[BIN_RDX]], [[RDX_SHUF1]] -; SKX-NEXT: [[BIN_RDX2:%.*]] = select <16 x i1> [[TMP49]], <16 x i32> [[BIN_RDX]], <16 x i32> [[RDX_SHUF1]] -; SKX-NEXT: [[RDX_SHUF3:%.*]] = shufflevector <16 x i32> [[BIN_RDX2]], <16 x i32> undef, <16 x i32> -; SKX-NEXT: [[TMP50:%.*]] = icmp sgt <16 x i32> [[BIN_RDX2]], [[RDX_SHUF3]] -; SKX-NEXT: [[BIN_RDX4:%.*]] = select <16 x i1> [[TMP50]], <16 x i32> [[BIN_RDX2]], <16 x i32> [[RDX_SHUF3]] -; SKX-NEXT: [[RDX_SHUF5:%.*]] = shufflevector <16 x i32> [[BIN_RDX4]], <16 x i32> undef, <16 x i32> -; SKX-NEXT: [[TMP51:%.*]] = icmp sgt <16 x i32> [[BIN_RDX4]], [[RDX_SHUF5]] -; SKX-NEXT: [[BIN_RDX6:%.*]] = select <16 x i1> [[TMP51]], <16 x i32> [[BIN_RDX4]], <16 x i32> [[RDX_SHUF5]] -; SKX-NEXT: [[TMP52:%.*]] = extractelement <16 x i32> [[BIN_RDX6]], i32 0 -; SKX: ret i32 [[TMP52]] +; SKX-NEXT: [[TMP3:%.*]] = extractelement <16 x i32> [[TMP2]], i32 0 +; SKX-NEXT: [[TMP4:%.*]] = extractelement <16 x i32> [[TMP2]], i32 1 +; SKX-NEXT: [[TMP5:%.*]] = icmp sgt i32 [[TMP3]], [[TMP4]] +; SKX-NEXT: [[TMP6:%.*]] = select i1 [[TMP5]], i32 undef, i32 undef +; SKX-NEXT: [[TMP7:%.*]] = extractelement <16 x i32> [[TMP2]], i32 2 +; SKX-NEXT: [[TMP8:%.*]] = icmp sgt i32 [[TMP6]], [[TMP7]] +; SKX-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], i32 [[TMP6]], i32 undef +; SKX-NEXT: [[TMP10:%.*]] = extractelement <16 x i32> [[TMP2]], i32 3 +; SKX-NEXT: [[TMP11:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +; SKX-NEXT: [[TMP12:%.*]] = select i1 [[TMP11]], i32 [[TMP9]], i32 undef +; SKX-NEXT: [[TMP13:%.*]] = extractelement <16 x i32> [[TMP2]], i32 4 +; SKX-NEXT: [[TMP14:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +; SKX-NEXT: [[TMP15:%.*]] = select i1 [[TMP14]], i32 [[TMP12]], i32 undef +; SKX-NEXT: [[TMP16:%.*]] = extractelement <16 x i32> [[TMP2]], i32 5 +; SKX-NEXT: [[TMP17:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]] +; SKX-NEXT: [[TMP18:%.*]] = select i1 [[TMP17]], i32 [[TMP15]], i32 undef +; SKX-NEXT: [[TMP19:%.*]] = extractelement <16 x i32> [[TMP2]], i32 6 +; SKX-NEXT: [[TMP20:%.*]] = icmp sgt i32 [[TMP18]], [[TMP19]] +; SKX-NEXT: [[TMP21:%.*]] = select i1 [[TMP20]], i32 [[TMP18]], i32 undef +; SKX-NEXT: [[TMP22:%.*]] = extractelement <16 x i32> [[TMP2]], i32 7 +; SKX-NEXT: [[TMP23:%.*]] = icmp sgt i32 [[TMP21]], [[TMP22]] +; SKX-NEXT: [[TMP24:%.*]] = select i1 [[TMP23]], i32 [[TMP21]], i32 undef +; SKX-NEXT: [[TMP25:%.*]] = extractelement <16 x i32> [[TMP2]], i32 8 +; SKX-NEXT: [[TMP26:%.*]] = icmp sgt i32 [[TMP24]], [[TMP25]] +; SKX-NEXT: [[TMP27:%.*]] = select i1 [[TMP26]], i32 [[TMP24]], i32 undef +; SKX-NEXT: [[TMP28:%.*]] = extractelement <16 x i32> [[TMP2]], i32 9 +; SKX-NEXT: [[TMP29:%.*]] = icmp sgt i32 [[TMP27]], [[TMP28]] +; SKX-NEXT: [[TMP30:%.*]] = select i1 [[TMP29]], i32 [[TMP27]], i32 undef +; SKX-NEXT: [[TMP31:%.*]] = extractelement <16 x i32> [[TMP2]], i32 10 +; SKX-NEXT: [[TMP32:%.*]] = icmp sgt i32 [[TMP30]], [[TMP31]] +; SKX-NEXT: [[TMP33:%.*]] = select i1 [[TMP32]], i32 [[TMP30]], i32 undef +; SKX-NEXT: [[TMP34:%.*]] = extractelement <16 x i32> [[TMP2]], i32 11 +; SKX-NEXT: [[TMP35:%.*]] = icmp sgt i32 [[TMP33]], [[TMP34]] +; SKX-NEXT: [[TMP36:%.*]] = select i1 [[TMP35]], i32 [[TMP33]], i32 undef +; SKX-NEXT: [[TMP37:%.*]] = extractelement <16 x i32> [[TMP2]], i32 12 +; SKX-NEXT: [[TMP38:%.*]] = icmp sgt i32 [[TMP36]], [[TMP37]] +; SKX-NEXT: [[TMP39:%.*]] = select i1 [[TMP38]], i32 [[TMP36]], i32 undef +; SKX-NEXT: [[TMP40:%.*]] = extractelement <16 x i32> [[TMP2]], i32 13 +; SKX-NEXT: [[TMP41:%.*]] = icmp sgt i32 [[TMP39]], [[TMP40]] +; SKX-NEXT: [[TMP42:%.*]] = select i1 [[TMP41]], i32 [[TMP39]], i32 undef +; SKX-NEXT: [[TMP43:%.*]] = extractelement <16 x i32> [[TMP2]], i32 14 +; SKX-NEXT: [[TMP44:%.*]] = icmp sgt i32 [[TMP42]], [[TMP43]] +; SKX-NEXT: [[TMP45:%.*]] = select i1 [[TMP44]], i32 [[TMP42]], i32 undef +; SKX-NEXT: [[TMP46:%.*]] = extractelement <16 x i32> [[TMP2]], i32 15 +; SKX-NEXT: [[TMP47:%.*]] = icmp sgt i32 [[TMP45]], [[TMP46]] +; SKX-NEXT: [[RDX_SHUF:%.*]] = shufflevector <16 x i32> [[TMP2]], <16 x i32> undef, <16 x i32> +; SKX-NEXT: [[RDX_MINMAX_CMP:%.*]] = icmp sgt <16 x i32> [[TMP2]], [[RDX_SHUF]] +; SKX-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select <16 x i1> [[RDX_MINMAX_CMP]], <16 x i32> [[TMP2]], <16 x i32> [[RDX_SHUF]] +; SKX-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <16 x i32> [[RDX_MINMAX_SELECT]], <16 x i32> undef, <16 x i32> +; SKX-NEXT: [[RDX_MINMAX_CMP2:%.*]] = icmp sgt <16 x i32> [[RDX_MINMAX_SELECT]], [[RDX_SHUF1]] +; SKX-NEXT: [[RDX_MINMAX_SELECT3:%.*]] = select <16 x i1> [[RDX_MINMAX_CMP2]], <16 x i32> [[RDX_MINMAX_SELECT]], <16 x i32> [[RDX_SHUF1]] +; SKX-NEXT: [[RDX_SHUF4:%.*]] = shufflevector <16 x i32> [[RDX_MINMAX_SELECT3]], <16 x i32> undef, <16 x i32> +; SKX-NEXT: [[RDX_MINMAX_CMP5:%.*]] = icmp sgt <16 x i32> [[RDX_MINMAX_SELECT3]], [[RDX_SHUF4]] +; SKX-NEXT: [[RDX_MINMAX_SELECT6:%.*]] = select <16 x i1> [[RDX_MINMAX_CMP5]], <16 x i32> [[RDX_MINMAX_SELECT3]], <16 x i32> [[RDX_SHUF4]] +; SKX-NEXT: [[RDX_SHUF7:%.*]] = shufflevector <16 x i32> [[RDX_MINMAX_SELECT6]], <16 x i32> undef, <16 x i32> +; SKX-NEXT: [[RDX_MINMAX_CMP8:%.*]] = icmp sgt <16 x i32> [[RDX_MINMAX_SELECT6]], [[RDX_SHUF7]] +; SKX-NEXT: [[RDX_MINMAX_SELECT9:%.*]] = select <16 x i1> [[RDX_MINMAX_CMP8]], <16 x i32> [[RDX_MINMAX_SELECT6]], <16 x i32> [[RDX_SHUF7]] +; SKX-NEXT: [[TMP48:%.*]] = extractelement <16 x i32> [[RDX_MINMAX_SELECT9]], i32 0 +; SKX-NEXT: [[TMP49:%.*]] = select i1 [[TMP47]], i32 [[TMP45]], i32 undef +; SKX-NEXT: ret i32 [[TMP48]] ; %2 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 0), align 16 %3 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 1), align 4 @@ -253,83 +458,459 @@ define i32 @maxi16(i32) { define i32 @maxi32(i32) { ; CHECK-LABEL: @maxi32( ; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i32>, <32 x i32>* bitcast ([32 x i32]* @arr to <32 x i32>*), align 16 -; CHECK: [[RDX_SHUF:%.*]] = shufflevector <32 x i32> [[TMP2]], <32 x i32> undef, <32 x i32> -; CHECK-NEXT: [[TMP96:%.*]] = icmp sgt <32 x i32> [[TMP2]], [[RDX_SHUF]] -; CHECK-NEXT: [[BIN_RDX:%.*]] = select <32 x i1> [[TMP96]], <32 x i32> [[TMP2]], <32 x i32> [[RDX_SHUF]] -; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <32 x i32> [[BIN_RDX]], <32 x i32> undef, <32 x i32> -; CHECK-NEXT: [[TMP97:%.*]] = icmp sgt <32 x i32> [[BIN_RDX]], [[RDX_SHUF1]] -; CHECK-NEXT: [[BIN_RDX2:%.*]] = select <32 x i1> [[TMP97]], <32 x i32> [[BIN_RDX]], <32 x i32> [[RDX_SHUF1]] -; CHECK-NEXT: [[RDX_SHUF3:%.*]] = shufflevector <32 x i32> [[BIN_RDX2]], <32 x i32> undef, <32 x i32> -; CHECK-NEXT: [[TMP98:%.*]] = icmp sgt <32 x i32> [[BIN_RDX2]], [[RDX_SHUF3]] -; CHECK-NEXT: [[BIN_RDX4:%.*]] = select <32 x i1> [[TMP98]], <32 x i32> [[BIN_RDX2]], <32 x i32> [[RDX_SHUF3]] -; CHECK-NEXT: [[RDX_SHUF5:%.*]] = shufflevector <32 x i32> [[BIN_RDX4]], <32 x i32> undef, <32 x i32> -; CHECK-NEXT: [[TMP99:%.*]] = icmp sgt <32 x i32> [[BIN_RDX4]], [[RDX_SHUF5]] -; CHECK-NEXT: [[BIN_RDX6:%.*]] = select <32 x i1> [[TMP99]], <32 x i32> [[BIN_RDX4]], <32 x i32> [[RDX_SHUF5]] -; CHECK-NEXT: [[RDX_SHUF7:%.*]] = shufflevector <32 x i32> [[BIN_RDX6]], <32 x i32> undef, <32 x i32> -; CHECK-NEXT: [[TMP100:%.*]] = icmp sgt <32 x i32> [[BIN_RDX6]], [[RDX_SHUF7]] -; CHECK-NEXT: [[BIN_RDX8:%.*]] = select <32 x i1> [[TMP100]], <32 x i32> [[BIN_RDX6]], <32 x i32> [[RDX_SHUF7]] -; CHECK-NEXT: [[TMP101:%.*]] = extractelement <32 x i32> [[BIN_RDX8]], i32 0 -; CHECK: ret i32 [[TMP101]] +; CHECK-NEXT: [[TMP3:%.*]] = extractelement <32 x i32> [[TMP2]], i32 0 +; CHECK-NEXT: [[TMP4:%.*]] = extractelement <32 x i32> [[TMP2]], i32 1 +; CHECK-NEXT: [[TMP5:%.*]] = icmp sgt i32 [[TMP3]], [[TMP4]] +; CHECK-NEXT: [[TMP6:%.*]] = select i1 [[TMP5]], i32 undef, i32 undef +; CHECK-NEXT: [[TMP7:%.*]] = extractelement <32 x i32> [[TMP2]], i32 2 +; CHECK-NEXT: [[TMP8:%.*]] = icmp sgt i32 [[TMP6]], [[TMP7]] +; CHECK-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], i32 [[TMP6]], i32 undef +; CHECK-NEXT: [[TMP10:%.*]] = extractelement <32 x i32> [[TMP2]], i32 3 +; CHECK-NEXT: [[TMP11:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +; CHECK-NEXT: [[TMP12:%.*]] = select i1 [[TMP11]], i32 [[TMP9]], i32 undef +; CHECK-NEXT: [[TMP13:%.*]] = extractelement <32 x i32> [[TMP2]], i32 4 +; CHECK-NEXT: [[TMP14:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +; CHECK-NEXT: [[TMP15:%.*]] = select i1 [[TMP14]], i32 [[TMP12]], i32 undef +; CHECK-NEXT: [[TMP16:%.*]] = extractelement <32 x i32> [[TMP2]], i32 5 +; CHECK-NEXT: [[TMP17:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]] +; CHECK-NEXT: [[TMP18:%.*]] = select i1 [[TMP17]], i32 [[TMP15]], i32 undef +; CHECK-NEXT: [[TMP19:%.*]] = extractelement <32 x i32> [[TMP2]], i32 6 +; CHECK-NEXT: [[TMP20:%.*]] = icmp sgt i32 [[TMP18]], [[TMP19]] +; CHECK-NEXT: [[TMP21:%.*]] = select i1 [[TMP20]], i32 [[TMP18]], i32 undef +; CHECK-NEXT: [[TMP22:%.*]] = extractelement <32 x i32> [[TMP2]], i32 7 +; CHECK-NEXT: [[TMP23:%.*]] = icmp sgt i32 [[TMP21]], [[TMP22]] +; CHECK-NEXT: [[TMP24:%.*]] = select i1 [[TMP23]], i32 [[TMP21]], i32 undef +; CHECK-NEXT: [[TMP25:%.*]] = extractelement <32 x i32> [[TMP2]], i32 8 +; CHECK-NEXT: [[TMP26:%.*]] = icmp sgt i32 [[TMP24]], [[TMP25]] +; CHECK-NEXT: [[TMP27:%.*]] = select i1 [[TMP26]], i32 [[TMP24]], i32 undef +; CHECK-NEXT: [[TMP28:%.*]] = extractelement <32 x i32> [[TMP2]], i32 9 +; CHECK-NEXT: [[TMP29:%.*]] = icmp sgt i32 [[TMP27]], [[TMP28]] +; CHECK-NEXT: [[TMP30:%.*]] = select i1 [[TMP29]], i32 [[TMP27]], i32 undef +; CHECK-NEXT: [[TMP31:%.*]] = extractelement <32 x i32> [[TMP2]], i32 10 +; CHECK-NEXT: [[TMP32:%.*]] = icmp sgt i32 [[TMP30]], [[TMP31]] +; CHECK-NEXT: [[TMP33:%.*]] = select i1 [[TMP32]], i32 [[TMP30]], i32 undef +; CHECK-NEXT: [[TMP34:%.*]] = extractelement <32 x i32> [[TMP2]], i32 11 +; CHECK-NEXT: [[TMP35:%.*]] = icmp sgt i32 [[TMP33]], [[TMP34]] +; CHECK-NEXT: [[TMP36:%.*]] = select i1 [[TMP35]], i32 [[TMP33]], i32 undef +; CHECK-NEXT: [[TMP37:%.*]] = extractelement <32 x i32> [[TMP2]], i32 12 +; CHECK-NEXT: [[TMP38:%.*]] = icmp sgt i32 [[TMP36]], [[TMP37]] +; CHECK-NEXT: [[TMP39:%.*]] = select i1 [[TMP38]], i32 [[TMP36]], i32 undef +; CHECK-NEXT: [[TMP40:%.*]] = extractelement <32 x i32> [[TMP2]], i32 13 +; CHECK-NEXT: [[TMP41:%.*]] = icmp sgt i32 [[TMP39]], [[TMP40]] +; CHECK-NEXT: [[TMP42:%.*]] = select i1 [[TMP41]], i32 [[TMP39]], i32 undef +; CHECK-NEXT: [[TMP43:%.*]] = extractelement <32 x i32> [[TMP2]], i32 14 +; CHECK-NEXT: [[TMP44:%.*]] = icmp sgt i32 [[TMP42]], [[TMP43]] +; CHECK-NEXT: [[TMP45:%.*]] = select i1 [[TMP44]], i32 [[TMP42]], i32 undef +; CHECK-NEXT: [[TMP46:%.*]] = extractelement <32 x i32> [[TMP2]], i32 15 +; CHECK-NEXT: [[TMP47:%.*]] = icmp sgt i32 [[TMP45]], [[TMP46]] +; CHECK-NEXT: [[TMP48:%.*]] = select i1 [[TMP47]], i32 [[TMP45]], i32 undef +; CHECK-NEXT: [[TMP49:%.*]] = extractelement <32 x i32> [[TMP2]], i32 16 +; CHECK-NEXT: [[TMP50:%.*]] = icmp sgt i32 [[TMP48]], [[TMP49]] +; CHECK-NEXT: [[TMP51:%.*]] = select i1 [[TMP50]], i32 [[TMP48]], i32 undef +; CHECK-NEXT: [[TMP52:%.*]] = extractelement <32 x i32> [[TMP2]], i32 17 +; CHECK-NEXT: [[TMP53:%.*]] = icmp sgt i32 [[TMP51]], [[TMP52]] +; CHECK-NEXT: [[TMP54:%.*]] = select i1 [[TMP53]], i32 [[TMP51]], i32 undef +; CHECK-NEXT: [[TMP55:%.*]] = extractelement <32 x i32> [[TMP2]], i32 18 +; CHECK-NEXT: [[TMP56:%.*]] = icmp sgt i32 [[TMP54]], [[TMP55]] +; CHECK-NEXT: [[TMP57:%.*]] = select i1 [[TMP56]], i32 [[TMP54]], i32 undef +; CHECK-NEXT: [[TMP58:%.*]] = extractelement <32 x i32> [[TMP2]], i32 19 +; CHECK-NEXT: [[TMP59:%.*]] = icmp sgt i32 [[TMP57]], [[TMP58]] +; CHECK-NEXT: [[TMP60:%.*]] = select i1 [[TMP59]], i32 [[TMP57]], i32 undef +; CHECK-NEXT: [[TMP61:%.*]] = extractelement <32 x i32> [[TMP2]], i32 20 +; CHECK-NEXT: [[TMP62:%.*]] = icmp sgt i32 [[TMP60]], [[TMP61]] +; CHECK-NEXT: [[TMP63:%.*]] = select i1 [[TMP62]], i32 [[TMP60]], i32 undef +; CHECK-NEXT: [[TMP64:%.*]] = extractelement <32 x i32> [[TMP2]], i32 21 +; CHECK-NEXT: [[TMP65:%.*]] = icmp sgt i32 [[TMP63]], [[TMP64]] +; CHECK-NEXT: [[TMP66:%.*]] = select i1 [[TMP65]], i32 [[TMP63]], i32 undef +; CHECK-NEXT: [[TMP67:%.*]] = extractelement <32 x i32> [[TMP2]], i32 22 +; CHECK-NEXT: [[TMP68:%.*]] = icmp sgt i32 [[TMP66]], [[TMP67]] +; CHECK-NEXT: [[TMP69:%.*]] = select i1 [[TMP68]], i32 [[TMP66]], i32 undef +; CHECK-NEXT: [[TMP70:%.*]] = extractelement <32 x i32> [[TMP2]], i32 23 +; CHECK-NEXT: [[TMP71:%.*]] = icmp sgt i32 [[TMP69]], [[TMP70]] +; CHECK-NEXT: [[TMP72:%.*]] = select i1 [[TMP71]], i32 [[TMP69]], i32 undef +; CHECK-NEXT: [[TMP73:%.*]] = extractelement <32 x i32> [[TMP2]], i32 24 +; CHECK-NEXT: [[TMP74:%.*]] = icmp sgt i32 [[TMP72]], [[TMP73]] +; CHECK-NEXT: [[TMP75:%.*]] = select i1 [[TMP74]], i32 [[TMP72]], i32 undef +; CHECK-NEXT: [[TMP76:%.*]] = extractelement <32 x i32> [[TMP2]], i32 25 +; CHECK-NEXT: [[TMP77:%.*]] = icmp sgt i32 [[TMP75]], [[TMP76]] +; CHECK-NEXT: [[TMP78:%.*]] = select i1 [[TMP77]], i32 [[TMP75]], i32 undef +; CHECK-NEXT: [[TMP79:%.*]] = extractelement <32 x i32> [[TMP2]], i32 26 +; CHECK-NEXT: [[TMP80:%.*]] = icmp sgt i32 [[TMP78]], [[TMP79]] +; CHECK-NEXT: [[TMP81:%.*]] = select i1 [[TMP80]], i32 [[TMP78]], i32 undef +; CHECK-NEXT: [[TMP82:%.*]] = extractelement <32 x i32> [[TMP2]], i32 27 +; CHECK-NEXT: [[TMP83:%.*]] = icmp sgt i32 [[TMP81]], [[TMP82]] +; CHECK-NEXT: [[TMP84:%.*]] = select i1 [[TMP83]], i32 [[TMP81]], i32 undef +; CHECK-NEXT: [[TMP85:%.*]] = extractelement <32 x i32> [[TMP2]], i32 28 +; CHECK-NEXT: [[TMP86:%.*]] = icmp sgt i32 [[TMP84]], [[TMP85]] +; CHECK-NEXT: [[TMP87:%.*]] = select i1 [[TMP86]], i32 [[TMP84]], i32 undef +; CHECK-NEXT: [[TMP88:%.*]] = extractelement <32 x i32> [[TMP2]], i32 29 +; CHECK-NEXT: [[TMP89:%.*]] = icmp sgt i32 [[TMP87]], [[TMP88]] +; CHECK-NEXT: [[TMP90:%.*]] = select i1 [[TMP89]], i32 [[TMP87]], i32 undef +; CHECK-NEXT: [[TMP91:%.*]] = extractelement <32 x i32> [[TMP2]], i32 30 +; CHECK-NEXT: [[TMP92:%.*]] = icmp sgt i32 [[TMP90]], [[TMP91]] +; CHECK-NEXT: [[TMP93:%.*]] = select i1 [[TMP92]], i32 [[TMP90]], i32 undef +; CHECK-NEXT: [[TMP94:%.*]] = extractelement <32 x i32> [[TMP2]], i32 31 +; CHECK-NEXT: [[TMP95:%.*]] = icmp sgt i32 [[TMP93]], [[TMP94]] +; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <32 x i32> [[TMP2]], <32 x i32> undef, <32 x i32> +; CHECK-NEXT: [[RDX_MINMAX_CMP:%.*]] = icmp sgt <32 x i32> [[TMP2]], [[RDX_SHUF]] +; CHECK-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select <32 x i1> [[RDX_MINMAX_CMP]], <32 x i32> [[TMP2]], <32 x i32> [[RDX_SHUF]] +; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <32 x i32> [[RDX_MINMAX_SELECT]], <32 x i32> undef, <32 x i32> +; CHECK-NEXT: [[RDX_MINMAX_CMP2:%.*]] = icmp sgt <32 x i32> [[RDX_MINMAX_SELECT]], [[RDX_SHUF1]] +; CHECK-NEXT: [[RDX_MINMAX_SELECT3:%.*]] = select <32 x i1> [[RDX_MINMAX_CMP2]], <32 x i32> [[RDX_MINMAX_SELECT]], <32 x i32> [[RDX_SHUF1]] +; CHECK-NEXT: [[RDX_SHUF4:%.*]] = shufflevector <32 x i32> [[RDX_MINMAX_SELECT3]], <32 x i32> undef, <32 x i32> +; CHECK-NEXT: [[RDX_MINMAX_CMP5:%.*]] = icmp sgt <32 x i32> [[RDX_MINMAX_SELECT3]], [[RDX_SHUF4]] +; CHECK-NEXT: [[RDX_MINMAX_SELECT6:%.*]] = select <32 x i1> [[RDX_MINMAX_CMP5]], <32 x i32> [[RDX_MINMAX_SELECT3]], <32 x i32> [[RDX_SHUF4]] +; CHECK-NEXT: [[RDX_SHUF7:%.*]] = shufflevector <32 x i32> [[RDX_MINMAX_SELECT6]], <32 x i32> undef, <32 x i32> +; CHECK-NEXT: [[RDX_MINMAX_CMP8:%.*]] = icmp sgt <32 x i32> [[RDX_MINMAX_SELECT6]], [[RDX_SHUF7]] +; CHECK-NEXT: [[RDX_MINMAX_SELECT9:%.*]] = select <32 x i1> [[RDX_MINMAX_CMP8]], <32 x i32> [[RDX_MINMAX_SELECT6]], <32 x i32> [[RDX_SHUF7]] +; CHECK-NEXT: [[RDX_SHUF10:%.*]] = shufflevector <32 x i32> [[RDX_MINMAX_SELECT9]], <32 x i32> undef, <32 x i32> +; CHECK-NEXT: [[RDX_MINMAX_CMP11:%.*]] = icmp sgt <32 x i32> [[RDX_MINMAX_SELECT9]], [[RDX_SHUF10]] +; CHECK-NEXT: [[RDX_MINMAX_SELECT12:%.*]] = select <32 x i1> [[RDX_MINMAX_CMP11]], <32 x i32> [[RDX_MINMAX_SELECT9]], <32 x i32> [[RDX_SHUF10]] +; CHECK-NEXT: [[TMP96:%.*]] = extractelement <32 x i32> [[RDX_MINMAX_SELECT12]], i32 0 +; CHECK-NEXT: [[TMP97:%.*]] = select i1 [[TMP95]], i32 [[TMP93]], i32 undef +; CHECK-NEXT: ret i32 [[TMP96]] ; ; AVX-LABEL: @maxi32( ; AVX-NEXT: [[TMP2:%.*]] = load <32 x i32>, <32 x i32>* bitcast ([32 x i32]* @arr to <32 x i32>*), align 16 -; AVX: [[RDX_SHUF:%.*]] = shufflevector <32 x i32> [[TMP2]], <32 x i32> undef, <32 x i32> -; AVX-NEXT: [[TMP96:%.*]] = icmp sgt <32 x i32> [[TMP2]], [[RDX_SHUF]] -; AVX-NEXT: [[BIN_RDX:%.*]] = select <32 x i1> [[TMP96]], <32 x i32> [[TMP2]], <32 x i32> [[RDX_SHUF]] -; AVX-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <32 x i32> [[BIN_RDX]], <32 x i32> undef, <32 x i32> -; AVX-NEXT: [[TMP97:%.*]] = icmp sgt <32 x i32> [[BIN_RDX]], [[RDX_SHUF1]] -; AVX-NEXT: [[BIN_RDX2:%.*]] = select <32 x i1> [[TMP97]], <32 x i32> [[BIN_RDX]], <32 x i32> [[RDX_SHUF1]] -; AVX-NEXT: [[RDX_SHUF3:%.*]] = shufflevector <32 x i32> [[BIN_RDX2]], <32 x i32> undef, <32 x i32> -; AVX-NEXT: [[TMP98:%.*]] = icmp sgt <32 x i32> [[BIN_RDX2]], [[RDX_SHUF3]] -; AVX-NEXT: [[BIN_RDX4:%.*]] = select <32 x i1> [[TMP98]], <32 x i32> [[BIN_RDX2]], <32 x i32> [[RDX_SHUF3]] -; AVX-NEXT: [[RDX_SHUF5:%.*]] = shufflevector <32 x i32> [[BIN_RDX4]], <32 x i32> undef, <32 x i32> -; AVX-NEXT: [[TMP99:%.*]] = icmp sgt <32 x i32> [[BIN_RDX4]], [[RDX_SHUF5]] -; AVX-NEXT: [[BIN_RDX6:%.*]] = select <32 x i1> [[TMP99]], <32 x i32> [[BIN_RDX4]], <32 x i32> [[RDX_SHUF5]] -; AVX-NEXT: [[RDX_SHUF7:%.*]] = shufflevector <32 x i32> [[BIN_RDX6]], <32 x i32> undef, <32 x i32> -; AVX-NEXT: [[TMP100:%.*]] = icmp sgt <32 x i32> [[BIN_RDX6]], [[RDX_SHUF7]] -; AVX-NEXT: [[BIN_RDX8:%.*]] = select <32 x i1> [[TMP100]], <32 x i32> [[BIN_RDX6]], <32 x i32> [[RDX_SHUF7]] -; AVX-NEXT: [[TMP101:%.*]] = extractelement <32 x i32> [[BIN_RDX8]], i32 0 -; AVX: ret i32 [[TMP101]] +; AVX-NEXT: [[TMP3:%.*]] = extractelement <32 x i32> [[TMP2]], i32 0 +; AVX-NEXT: [[TMP4:%.*]] = extractelement <32 x i32> [[TMP2]], i32 1 +; AVX-NEXT: [[TMP5:%.*]] = icmp sgt i32 [[TMP3]], [[TMP4]] +; AVX-NEXT: [[TMP6:%.*]] = select i1 [[TMP5]], i32 undef, i32 undef +; AVX-NEXT: [[TMP7:%.*]] = extractelement <32 x i32> [[TMP2]], i32 2 +; AVX-NEXT: [[TMP8:%.*]] = icmp sgt i32 [[TMP6]], [[TMP7]] +; AVX-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], i32 [[TMP6]], i32 undef +; AVX-NEXT: [[TMP10:%.*]] = extractelement <32 x i32> [[TMP2]], i32 3 +; AVX-NEXT: [[TMP11:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +; AVX-NEXT: [[TMP12:%.*]] = select i1 [[TMP11]], i32 [[TMP9]], i32 undef +; AVX-NEXT: [[TMP13:%.*]] = extractelement <32 x i32> [[TMP2]], i32 4 +; AVX-NEXT: [[TMP14:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +; AVX-NEXT: [[TMP15:%.*]] = select i1 [[TMP14]], i32 [[TMP12]], i32 undef +; AVX-NEXT: [[TMP16:%.*]] = extractelement <32 x i32> [[TMP2]], i32 5 +; AVX-NEXT: [[TMP17:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]] +; AVX-NEXT: [[TMP18:%.*]] = select i1 [[TMP17]], i32 [[TMP15]], i32 undef +; AVX-NEXT: [[TMP19:%.*]] = extractelement <32 x i32> [[TMP2]], i32 6 +; AVX-NEXT: [[TMP20:%.*]] = icmp sgt i32 [[TMP18]], [[TMP19]] +; AVX-NEXT: [[TMP21:%.*]] = select i1 [[TMP20]], i32 [[TMP18]], i32 undef +; AVX-NEXT: [[TMP22:%.*]] = extractelement <32 x i32> [[TMP2]], i32 7 +; AVX-NEXT: [[TMP23:%.*]] = icmp sgt i32 [[TMP21]], [[TMP22]] +; AVX-NEXT: [[TMP24:%.*]] = select i1 [[TMP23]], i32 [[TMP21]], i32 undef +; AVX-NEXT: [[TMP25:%.*]] = extractelement <32 x i32> [[TMP2]], i32 8 +; AVX-NEXT: [[TMP26:%.*]] = icmp sgt i32 [[TMP24]], [[TMP25]] +; AVX-NEXT: [[TMP27:%.*]] = select i1 [[TMP26]], i32 [[TMP24]], i32 undef +; AVX-NEXT: [[TMP28:%.*]] = extractelement <32 x i32> [[TMP2]], i32 9 +; AVX-NEXT: [[TMP29:%.*]] = icmp sgt i32 [[TMP27]], [[TMP28]] +; AVX-NEXT: [[TMP30:%.*]] = select i1 [[TMP29]], i32 [[TMP27]], i32 undef +; AVX-NEXT: [[TMP31:%.*]] = extractelement <32 x i32> [[TMP2]], i32 10 +; AVX-NEXT: [[TMP32:%.*]] = icmp sgt i32 [[TMP30]], [[TMP31]] +; AVX-NEXT: [[TMP33:%.*]] = select i1 [[TMP32]], i32 [[TMP30]], i32 undef +; AVX-NEXT: [[TMP34:%.*]] = extractelement <32 x i32> [[TMP2]], i32 11 +; AVX-NEXT: [[TMP35:%.*]] = icmp sgt i32 [[TMP33]], [[TMP34]] +; AVX-NEXT: [[TMP36:%.*]] = select i1 [[TMP35]], i32 [[TMP33]], i32 undef +; AVX-NEXT: [[TMP37:%.*]] = extractelement <32 x i32> [[TMP2]], i32 12 +; AVX-NEXT: [[TMP38:%.*]] = icmp sgt i32 [[TMP36]], [[TMP37]] +; AVX-NEXT: [[TMP39:%.*]] = select i1 [[TMP38]], i32 [[TMP36]], i32 undef +; AVX-NEXT: [[TMP40:%.*]] = extractelement <32 x i32> [[TMP2]], i32 13 +; AVX-NEXT: [[TMP41:%.*]] = icmp sgt i32 [[TMP39]], [[TMP40]] +; AVX-NEXT: [[TMP42:%.*]] = select i1 [[TMP41]], i32 [[TMP39]], i32 undef +; AVX-NEXT: [[TMP43:%.*]] = extractelement <32 x i32> [[TMP2]], i32 14 +; AVX-NEXT: [[TMP44:%.*]] = icmp sgt i32 [[TMP42]], [[TMP43]] +; AVX-NEXT: [[TMP45:%.*]] = select i1 [[TMP44]], i32 [[TMP42]], i32 undef +; AVX-NEXT: [[TMP46:%.*]] = extractelement <32 x i32> [[TMP2]], i32 15 +; AVX-NEXT: [[TMP47:%.*]] = icmp sgt i32 [[TMP45]], [[TMP46]] +; AVX-NEXT: [[TMP48:%.*]] = select i1 [[TMP47]], i32 [[TMP45]], i32 undef +; AVX-NEXT: [[TMP49:%.*]] = extractelement <32 x i32> [[TMP2]], i32 16 +; AVX-NEXT: [[TMP50:%.*]] = icmp sgt i32 [[TMP48]], [[TMP49]] +; AVX-NEXT: [[TMP51:%.*]] = select i1 [[TMP50]], i32 [[TMP48]], i32 undef +; AVX-NEXT: [[TMP52:%.*]] = extractelement <32 x i32> [[TMP2]], i32 17 +; AVX-NEXT: [[TMP53:%.*]] = icmp sgt i32 [[TMP51]], [[TMP52]] +; AVX-NEXT: [[TMP54:%.*]] = select i1 [[TMP53]], i32 [[TMP51]], i32 undef +; AVX-NEXT: [[TMP55:%.*]] = extractelement <32 x i32> [[TMP2]], i32 18 +; AVX-NEXT: [[TMP56:%.*]] = icmp sgt i32 [[TMP54]], [[TMP55]] +; AVX-NEXT: [[TMP57:%.*]] = select i1 [[TMP56]], i32 [[TMP54]], i32 undef +; AVX-NEXT: [[TMP58:%.*]] = extractelement <32 x i32> [[TMP2]], i32 19 +; AVX-NEXT: [[TMP59:%.*]] = icmp sgt i32 [[TMP57]], [[TMP58]] +; AVX-NEXT: [[TMP60:%.*]] = select i1 [[TMP59]], i32 [[TMP57]], i32 undef +; AVX-NEXT: [[TMP61:%.*]] = extractelement <32 x i32> [[TMP2]], i32 20 +; AVX-NEXT: [[TMP62:%.*]] = icmp sgt i32 [[TMP60]], [[TMP61]] +; AVX-NEXT: [[TMP63:%.*]] = select i1 [[TMP62]], i32 [[TMP60]], i32 undef +; AVX-NEXT: [[TMP64:%.*]] = extractelement <32 x i32> [[TMP2]], i32 21 +; AVX-NEXT: [[TMP65:%.*]] = icmp sgt i32 [[TMP63]], [[TMP64]] +; AVX-NEXT: [[TMP66:%.*]] = select i1 [[TMP65]], i32 [[TMP63]], i32 undef +; AVX-NEXT: [[TMP67:%.*]] = extractelement <32 x i32> [[TMP2]], i32 22 +; AVX-NEXT: [[TMP68:%.*]] = icmp sgt i32 [[TMP66]], [[TMP67]] +; AVX-NEXT: [[TMP69:%.*]] = select i1 [[TMP68]], i32 [[TMP66]], i32 undef +; AVX-NEXT: [[TMP70:%.*]] = extractelement <32 x i32> [[TMP2]], i32 23 +; AVX-NEXT: [[TMP71:%.*]] = icmp sgt i32 [[TMP69]], [[TMP70]] +; AVX-NEXT: [[TMP72:%.*]] = select i1 [[TMP71]], i32 [[TMP69]], i32 undef +; AVX-NEXT: [[TMP73:%.*]] = extractelement <32 x i32> [[TMP2]], i32 24 +; AVX-NEXT: [[TMP74:%.*]] = icmp sgt i32 [[TMP72]], [[TMP73]] +; AVX-NEXT: [[TMP75:%.*]] = select i1 [[TMP74]], i32 [[TMP72]], i32 undef +; AVX-NEXT: [[TMP76:%.*]] = extractelement <32 x i32> [[TMP2]], i32 25 +; AVX-NEXT: [[TMP77:%.*]] = icmp sgt i32 [[TMP75]], [[TMP76]] +; AVX-NEXT: [[TMP78:%.*]] = select i1 [[TMP77]], i32 [[TMP75]], i32 undef +; AVX-NEXT: [[TMP79:%.*]] = extractelement <32 x i32> [[TMP2]], i32 26 +; AVX-NEXT: [[TMP80:%.*]] = icmp sgt i32 [[TMP78]], [[TMP79]] +; AVX-NEXT: [[TMP81:%.*]] = select i1 [[TMP80]], i32 [[TMP78]], i32 undef +; AVX-NEXT: [[TMP82:%.*]] = extractelement <32 x i32> [[TMP2]], i32 27 +; AVX-NEXT: [[TMP83:%.*]] = icmp sgt i32 [[TMP81]], [[TMP82]] +; AVX-NEXT: [[TMP84:%.*]] = select i1 [[TMP83]], i32 [[TMP81]], i32 undef +; AVX-NEXT: [[TMP85:%.*]] = extractelement <32 x i32> [[TMP2]], i32 28 +; AVX-NEXT: [[TMP86:%.*]] = icmp sgt i32 [[TMP84]], [[TMP85]] +; AVX-NEXT: [[TMP87:%.*]] = select i1 [[TMP86]], i32 [[TMP84]], i32 undef +; AVX-NEXT: [[TMP88:%.*]] = extractelement <32 x i32> [[TMP2]], i32 29 +; AVX-NEXT: [[TMP89:%.*]] = icmp sgt i32 [[TMP87]], [[TMP88]] +; AVX-NEXT: [[TMP90:%.*]] = select i1 [[TMP89]], i32 [[TMP87]], i32 undef +; AVX-NEXT: [[TMP91:%.*]] = extractelement <32 x i32> [[TMP2]], i32 30 +; AVX-NEXT: [[TMP92:%.*]] = icmp sgt i32 [[TMP90]], [[TMP91]] +; AVX-NEXT: [[TMP93:%.*]] = select i1 [[TMP92]], i32 [[TMP90]], i32 undef +; AVX-NEXT: [[TMP94:%.*]] = extractelement <32 x i32> [[TMP2]], i32 31 +; AVX-NEXT: [[TMP95:%.*]] = icmp sgt i32 [[TMP93]], [[TMP94]] +; AVX-NEXT: [[RDX_SHUF:%.*]] = shufflevector <32 x i32> [[TMP2]], <32 x i32> undef, <32 x i32> +; AVX-NEXT: [[RDX_MINMAX_CMP:%.*]] = icmp sgt <32 x i32> [[TMP2]], [[RDX_SHUF]] +; AVX-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select <32 x i1> [[RDX_MINMAX_CMP]], <32 x i32> [[TMP2]], <32 x i32> [[RDX_SHUF]] +; AVX-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <32 x i32> [[RDX_MINMAX_SELECT]], <32 x i32> undef, <32 x i32> +; AVX-NEXT: [[RDX_MINMAX_CMP2:%.*]] = icmp sgt <32 x i32> [[RDX_MINMAX_SELECT]], [[RDX_SHUF1]] +; AVX-NEXT: [[RDX_MINMAX_SELECT3:%.*]] = select <32 x i1> [[RDX_MINMAX_CMP2]], <32 x i32> [[RDX_MINMAX_SELECT]], <32 x i32> [[RDX_SHUF1]] +; AVX-NEXT: [[RDX_SHUF4:%.*]] = shufflevector <32 x i32> [[RDX_MINMAX_SELECT3]], <32 x i32> undef, <32 x i32> +; AVX-NEXT: [[RDX_MINMAX_CMP5:%.*]] = icmp sgt <32 x i32> [[RDX_MINMAX_SELECT3]], [[RDX_SHUF4]] +; AVX-NEXT: [[RDX_MINMAX_SELECT6:%.*]] = select <32 x i1> [[RDX_MINMAX_CMP5]], <32 x i32> [[RDX_MINMAX_SELECT3]], <32 x i32> [[RDX_SHUF4]] +; AVX-NEXT: [[RDX_SHUF7:%.*]] = shufflevector <32 x i32> [[RDX_MINMAX_SELECT6]], <32 x i32> undef, <32 x i32> +; AVX-NEXT: [[RDX_MINMAX_CMP8:%.*]] = icmp sgt <32 x i32> [[RDX_MINMAX_SELECT6]], [[RDX_SHUF7]] +; AVX-NEXT: [[RDX_MINMAX_SELECT9:%.*]] = select <32 x i1> [[RDX_MINMAX_CMP8]], <32 x i32> [[RDX_MINMAX_SELECT6]], <32 x i32> [[RDX_SHUF7]] +; AVX-NEXT: [[RDX_SHUF10:%.*]] = shufflevector <32 x i32> [[RDX_MINMAX_SELECT9]], <32 x i32> undef, <32 x i32> +; AVX-NEXT: [[RDX_MINMAX_CMP11:%.*]] = icmp sgt <32 x i32> [[RDX_MINMAX_SELECT9]], [[RDX_SHUF10]] +; AVX-NEXT: [[RDX_MINMAX_SELECT12:%.*]] = select <32 x i1> [[RDX_MINMAX_CMP11]], <32 x i32> [[RDX_MINMAX_SELECT9]], <32 x i32> [[RDX_SHUF10]] +; AVX-NEXT: [[TMP96:%.*]] = extractelement <32 x i32> [[RDX_MINMAX_SELECT12]], i32 0 +; AVX-NEXT: [[TMP97:%.*]] = select i1 [[TMP95]], i32 [[TMP93]], i32 undef +; AVX-NEXT: ret i32 [[TMP96]] ; ; AVX2-LABEL: @maxi32( ; AVX2-NEXT: [[TMP2:%.*]] = load <32 x i32>, <32 x i32>* bitcast ([32 x i32]* @arr to <32 x i32>*), align 16 -; AVX2: [[RDX_SHUF:%.*]] = shufflevector <32 x i32> [[TMP2]], <32 x i32> undef, <32 x i32> -; AVX2-NEXT: [[TMP96:%.*]] = icmp sgt <32 x i32> [[TMP2]], [[RDX_SHUF]] -; AVX2-NEXT: [[BIN_RDX:%.*]] = select <32 x i1> [[TMP96]], <32 x i32> [[TMP2]], <32 x i32> [[RDX_SHUF]] -; AVX2-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <32 x i32> [[BIN_RDX]], <32 x i32> undef, <32 x i32> -; AVX2-NEXT: [[TMP97:%.*]] = icmp sgt <32 x i32> [[BIN_RDX]], [[RDX_SHUF1]] -; AVX2-NEXT: [[BIN_RDX2:%.*]] = select <32 x i1> [[TMP97]], <32 x i32> [[BIN_RDX]], <32 x i32> [[RDX_SHUF1]] -; AVX2-NEXT: [[RDX_SHUF3:%.*]] = shufflevector <32 x i32> [[BIN_RDX2]], <32 x i32> undef, <32 x i32> -; AVX2-NEXT: [[TMP98:%.*]] = icmp sgt <32 x i32> [[BIN_RDX2]], [[RDX_SHUF3]] -; AVX2-NEXT: [[BIN_RDX4:%.*]] = select <32 x i1> [[TMP98]], <32 x i32> [[BIN_RDX2]], <32 x i32> [[RDX_SHUF3]] -; AVX2-NEXT: [[RDX_SHUF5:%.*]] = shufflevector <32 x i32> [[BIN_RDX4]], <32 x i32> undef, <32 x i32> -; AVX2-NEXT: [[TMP99:%.*]] = icmp sgt <32 x i32> [[BIN_RDX4]], [[RDX_SHUF5]] -; AVX2-NEXT: [[BIN_RDX6:%.*]] = select <32 x i1> [[TMP99]], <32 x i32> [[BIN_RDX4]], <32 x i32> [[RDX_SHUF5]] -; AVX2-NEXT: [[RDX_SHUF7:%.*]] = shufflevector <32 x i32> [[BIN_RDX6]], <32 x i32> undef, <32 x i32> -; AVX2-NEXT: [[TMP100:%.*]] = icmp sgt <32 x i32> [[BIN_RDX6]], [[RDX_SHUF7]] -; AVX2-NEXT: [[BIN_RDX8:%.*]] = select <32 x i1> [[TMP100]], <32 x i32> [[BIN_RDX6]], <32 x i32> [[RDX_SHUF7]] -; AVX2-NEXT: [[TMP101:%.*]] = extractelement <32 x i32> [[BIN_RDX8]], i32 0 -; AVX2: ret i32 [[TMP101]] +; AVX2-NEXT: [[TMP3:%.*]] = extractelement <32 x i32> [[TMP2]], i32 0 +; AVX2-NEXT: [[TMP4:%.*]] = extractelement <32 x i32> [[TMP2]], i32 1 +; AVX2-NEXT: [[TMP5:%.*]] = icmp sgt i32 [[TMP3]], [[TMP4]] +; AVX2-NEXT: [[TMP6:%.*]] = select i1 [[TMP5]], i32 undef, i32 undef +; AVX2-NEXT: [[TMP7:%.*]] = extractelement <32 x i32> [[TMP2]], i32 2 +; AVX2-NEXT: [[TMP8:%.*]] = icmp sgt i32 [[TMP6]], [[TMP7]] +; AVX2-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], i32 [[TMP6]], i32 undef +; AVX2-NEXT: [[TMP10:%.*]] = extractelement <32 x i32> [[TMP2]], i32 3 +; AVX2-NEXT: [[TMP11:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +; AVX2-NEXT: [[TMP12:%.*]] = select i1 [[TMP11]], i32 [[TMP9]], i32 undef +; AVX2-NEXT: [[TMP13:%.*]] = extractelement <32 x i32> [[TMP2]], i32 4 +; AVX2-NEXT: [[TMP14:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +; AVX2-NEXT: [[TMP15:%.*]] = select i1 [[TMP14]], i32 [[TMP12]], i32 undef +; AVX2-NEXT: [[TMP16:%.*]] = extractelement <32 x i32> [[TMP2]], i32 5 +; AVX2-NEXT: [[TMP17:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]] +; AVX2-NEXT: [[TMP18:%.*]] = select i1 [[TMP17]], i32 [[TMP15]], i32 undef +; AVX2-NEXT: [[TMP19:%.*]] = extractelement <32 x i32> [[TMP2]], i32 6 +; AVX2-NEXT: [[TMP20:%.*]] = icmp sgt i32 [[TMP18]], [[TMP19]] +; AVX2-NEXT: [[TMP21:%.*]] = select i1 [[TMP20]], i32 [[TMP18]], i32 undef +; AVX2-NEXT: [[TMP22:%.*]] = extractelement <32 x i32> [[TMP2]], i32 7 +; AVX2-NEXT: [[TMP23:%.*]] = icmp sgt i32 [[TMP21]], [[TMP22]] +; AVX2-NEXT: [[TMP24:%.*]] = select i1 [[TMP23]], i32 [[TMP21]], i32 undef +; AVX2-NEXT: [[TMP25:%.*]] = extractelement <32 x i32> [[TMP2]], i32 8 +; AVX2-NEXT: [[TMP26:%.*]] = icmp sgt i32 [[TMP24]], [[TMP25]] +; AVX2-NEXT: [[TMP27:%.*]] = select i1 [[TMP26]], i32 [[TMP24]], i32 undef +; AVX2-NEXT: [[TMP28:%.*]] = extractelement <32 x i32> [[TMP2]], i32 9 +; AVX2-NEXT: [[TMP29:%.*]] = icmp sgt i32 [[TMP27]], [[TMP28]] +; AVX2-NEXT: [[TMP30:%.*]] = select i1 [[TMP29]], i32 [[TMP27]], i32 undef +; AVX2-NEXT: [[TMP31:%.*]] = extractelement <32 x i32> [[TMP2]], i32 10 +; AVX2-NEXT: [[TMP32:%.*]] = icmp sgt i32 [[TMP30]], [[TMP31]] +; AVX2-NEXT: [[TMP33:%.*]] = select i1 [[TMP32]], i32 [[TMP30]], i32 undef +; AVX2-NEXT: [[TMP34:%.*]] = extractelement <32 x i32> [[TMP2]], i32 11 +; AVX2-NEXT: [[TMP35:%.*]] = icmp sgt i32 [[TMP33]], [[TMP34]] +; AVX2-NEXT: [[TMP36:%.*]] = select i1 [[TMP35]], i32 [[TMP33]], i32 undef +; AVX2-NEXT: [[TMP37:%.*]] = extractelement <32 x i32> [[TMP2]], i32 12 +; AVX2-NEXT: [[TMP38:%.*]] = icmp sgt i32 [[TMP36]], [[TMP37]] +; AVX2-NEXT: [[TMP39:%.*]] = select i1 [[TMP38]], i32 [[TMP36]], i32 undef +; AVX2-NEXT: [[TMP40:%.*]] = extractelement <32 x i32> [[TMP2]], i32 13 +; AVX2-NEXT: [[TMP41:%.*]] = icmp sgt i32 [[TMP39]], [[TMP40]] +; AVX2-NEXT: [[TMP42:%.*]] = select i1 [[TMP41]], i32 [[TMP39]], i32 undef +; AVX2-NEXT: [[TMP43:%.*]] = extractelement <32 x i32> [[TMP2]], i32 14 +; AVX2-NEXT: [[TMP44:%.*]] = icmp sgt i32 [[TMP42]], [[TMP43]] +; AVX2-NEXT: [[TMP45:%.*]] = select i1 [[TMP44]], i32 [[TMP42]], i32 undef +; AVX2-NEXT: [[TMP46:%.*]] = extractelement <32 x i32> [[TMP2]], i32 15 +; AVX2-NEXT: [[TMP47:%.*]] = icmp sgt i32 [[TMP45]], [[TMP46]] +; AVX2-NEXT: [[TMP48:%.*]] = select i1 [[TMP47]], i32 [[TMP45]], i32 undef +; AVX2-NEXT: [[TMP49:%.*]] = extractelement <32 x i32> [[TMP2]], i32 16 +; AVX2-NEXT: [[TMP50:%.*]] = icmp sgt i32 [[TMP48]], [[TMP49]] +; AVX2-NEXT: [[TMP51:%.*]] = select i1 [[TMP50]], i32 [[TMP48]], i32 undef +; AVX2-NEXT: [[TMP52:%.*]] = extractelement <32 x i32> [[TMP2]], i32 17 +; AVX2-NEXT: [[TMP53:%.*]] = icmp sgt i32 [[TMP51]], [[TMP52]] +; AVX2-NEXT: [[TMP54:%.*]] = select i1 [[TMP53]], i32 [[TMP51]], i32 undef +; AVX2-NEXT: [[TMP55:%.*]] = extractelement <32 x i32> [[TMP2]], i32 18 +; AVX2-NEXT: [[TMP56:%.*]] = icmp sgt i32 [[TMP54]], [[TMP55]] +; AVX2-NEXT: [[TMP57:%.*]] = select i1 [[TMP56]], i32 [[TMP54]], i32 undef +; AVX2-NEXT: [[TMP58:%.*]] = extractelement <32 x i32> [[TMP2]], i32 19 +; AVX2-NEXT: [[TMP59:%.*]] = icmp sgt i32 [[TMP57]], [[TMP58]] +; AVX2-NEXT: [[TMP60:%.*]] = select i1 [[TMP59]], i32 [[TMP57]], i32 undef +; AVX2-NEXT: [[TMP61:%.*]] = extractelement <32 x i32> [[TMP2]], i32 20 +; AVX2-NEXT: [[TMP62:%.*]] = icmp sgt i32 [[TMP60]], [[TMP61]] +; AVX2-NEXT: [[TMP63:%.*]] = select i1 [[TMP62]], i32 [[TMP60]], i32 undef +; AVX2-NEXT: [[TMP64:%.*]] = extractelement <32 x i32> [[TMP2]], i32 21 +; AVX2-NEXT: [[TMP65:%.*]] = icmp sgt i32 [[TMP63]], [[TMP64]] +; AVX2-NEXT: [[TMP66:%.*]] = select i1 [[TMP65]], i32 [[TMP63]], i32 undef +; AVX2-NEXT: [[TMP67:%.*]] = extractelement <32 x i32> [[TMP2]], i32 22 +; AVX2-NEXT: [[TMP68:%.*]] = icmp sgt i32 [[TMP66]], [[TMP67]] +; AVX2-NEXT: [[TMP69:%.*]] = select i1 [[TMP68]], i32 [[TMP66]], i32 undef +; AVX2-NEXT: [[TMP70:%.*]] = extractelement <32 x i32> [[TMP2]], i32 23 +; AVX2-NEXT: [[TMP71:%.*]] = icmp sgt i32 [[TMP69]], [[TMP70]] +; AVX2-NEXT: [[TMP72:%.*]] = select i1 [[TMP71]], i32 [[TMP69]], i32 undef +; AVX2-NEXT: [[TMP73:%.*]] = extractelement <32 x i32> [[TMP2]], i32 24 +; AVX2-NEXT: [[TMP74:%.*]] = icmp sgt i32 [[TMP72]], [[TMP73]] +; AVX2-NEXT: [[TMP75:%.*]] = select i1 [[TMP74]], i32 [[TMP72]], i32 undef +; AVX2-NEXT: [[TMP76:%.*]] = extractelement <32 x i32> [[TMP2]], i32 25 +; AVX2-NEXT: [[TMP77:%.*]] = icmp sgt i32 [[TMP75]], [[TMP76]] +; AVX2-NEXT: [[TMP78:%.*]] = select i1 [[TMP77]], i32 [[TMP75]], i32 undef +; AVX2-NEXT: [[TMP79:%.*]] = extractelement <32 x i32> [[TMP2]], i32 26 +; AVX2-NEXT: [[TMP80:%.*]] = icmp sgt i32 [[TMP78]], [[TMP79]] +; AVX2-NEXT: [[TMP81:%.*]] = select i1 [[TMP80]], i32 [[TMP78]], i32 undef +; AVX2-NEXT: [[TMP82:%.*]] = extractelement <32 x i32> [[TMP2]], i32 27 +; AVX2-NEXT: [[TMP83:%.*]] = icmp sgt i32 [[TMP81]], [[TMP82]] +; AVX2-NEXT: [[TMP84:%.*]] = select i1 [[TMP83]], i32 [[TMP81]], i32 undef +; AVX2-NEXT: [[TMP85:%.*]] = extractelement <32 x i32> [[TMP2]], i32 28 +; AVX2-NEXT: [[TMP86:%.*]] = icmp sgt i32 [[TMP84]], [[TMP85]] +; AVX2-NEXT: [[TMP87:%.*]] = select i1 [[TMP86]], i32 [[TMP84]], i32 undef +; AVX2-NEXT: [[TMP88:%.*]] = extractelement <32 x i32> [[TMP2]], i32 29 +; AVX2-NEXT: [[TMP89:%.*]] = icmp sgt i32 [[TMP87]], [[TMP88]] +; AVX2-NEXT: [[TMP90:%.*]] = select i1 [[TMP89]], i32 [[TMP87]], i32 undef +; AVX2-NEXT: [[TMP91:%.*]] = extractelement <32 x i32> [[TMP2]], i32 30 +; AVX2-NEXT: [[TMP92:%.*]] = icmp sgt i32 [[TMP90]], [[TMP91]] +; AVX2-NEXT: [[TMP93:%.*]] = select i1 [[TMP92]], i32 [[TMP90]], i32 undef +; AVX2-NEXT: [[TMP94:%.*]] = extractelement <32 x i32> [[TMP2]], i32 31 +; AVX2-NEXT: [[TMP95:%.*]] = icmp sgt i32 [[TMP93]], [[TMP94]] +; AVX2-NEXT: [[RDX_SHUF:%.*]] = shufflevector <32 x i32> [[TMP2]], <32 x i32> undef, <32 x i32> +; AVX2-NEXT: [[RDX_MINMAX_CMP:%.*]] = icmp sgt <32 x i32> [[TMP2]], [[RDX_SHUF]] +; AVX2-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select <32 x i1> [[RDX_MINMAX_CMP]], <32 x i32> [[TMP2]], <32 x i32> [[RDX_SHUF]] +; AVX2-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <32 x i32> [[RDX_MINMAX_SELECT]], <32 x i32> undef, <32 x i32> +; AVX2-NEXT: [[RDX_MINMAX_CMP2:%.*]] = icmp sgt <32 x i32> [[RDX_MINMAX_SELECT]], [[RDX_SHUF1]] +; AVX2-NEXT: [[RDX_MINMAX_SELECT3:%.*]] = select <32 x i1> [[RDX_MINMAX_CMP2]], <32 x i32> [[RDX_MINMAX_SELECT]], <32 x i32> [[RDX_SHUF1]] +; AVX2-NEXT: [[RDX_SHUF4:%.*]] = shufflevector <32 x i32> [[RDX_MINMAX_SELECT3]], <32 x i32> undef, <32 x i32> +; AVX2-NEXT: [[RDX_MINMAX_CMP5:%.*]] = icmp sgt <32 x i32> [[RDX_MINMAX_SELECT3]], [[RDX_SHUF4]] +; AVX2-NEXT: [[RDX_MINMAX_SELECT6:%.*]] = select <32 x i1> [[RDX_MINMAX_CMP5]], <32 x i32> [[RDX_MINMAX_SELECT3]], <32 x i32> [[RDX_SHUF4]] +; AVX2-NEXT: [[RDX_SHUF7:%.*]] = shufflevector <32 x i32> [[RDX_MINMAX_SELECT6]], <32 x i32> undef, <32 x i32> +; AVX2-NEXT: [[RDX_MINMAX_CMP8:%.*]] = icmp sgt <32 x i32> [[RDX_MINMAX_SELECT6]], [[RDX_SHUF7]] +; AVX2-NEXT: [[RDX_MINMAX_SELECT9:%.*]] = select <32 x i1> [[RDX_MINMAX_CMP8]], <32 x i32> [[RDX_MINMAX_SELECT6]], <32 x i32> [[RDX_SHUF7]] +; AVX2-NEXT: [[RDX_SHUF10:%.*]] = shufflevector <32 x i32> [[RDX_MINMAX_SELECT9]], <32 x i32> undef, <32 x i32> +; AVX2-NEXT: [[RDX_MINMAX_CMP11:%.*]] = icmp sgt <32 x i32> [[RDX_MINMAX_SELECT9]], [[RDX_SHUF10]] +; AVX2-NEXT: [[RDX_MINMAX_SELECT12:%.*]] = select <32 x i1> [[RDX_MINMAX_CMP11]], <32 x i32> [[RDX_MINMAX_SELECT9]], <32 x i32> [[RDX_SHUF10]] +; AVX2-NEXT: [[TMP96:%.*]] = extractelement <32 x i32> [[RDX_MINMAX_SELECT12]], i32 0 +; AVX2-NEXT: [[TMP97:%.*]] = select i1 [[TMP95]], i32 [[TMP93]], i32 undef +; AVX2-NEXT: ret i32 [[TMP96]] ; ; SKX-LABEL: @maxi32( ; SKX-NEXT: [[TMP2:%.*]] = load <32 x i32>, <32 x i32>* bitcast ([32 x i32]* @arr to <32 x i32>*), align 16 -; SKX: [[RDX_SHUF:%.*]] = shufflevector <32 x i32> [[TMP2]], <32 x i32> undef, <32 x i32> -; SKX-NEXT: [[TMP96:%.*]] = icmp sgt <32 x i32> [[TMP2]], [[RDX_SHUF]] -; SKX-NEXT: [[BIN_RDX:%.*]] = select <32 x i1> [[TMP96]], <32 x i32> [[TMP2]], <32 x i32> [[RDX_SHUF]] -; SKX-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <32 x i32> [[BIN_RDX]], <32 x i32> undef, <32 x i32> -; SKX-NEXT: [[TMP97:%.*]] = icmp sgt <32 x i32> [[BIN_RDX]], [[RDX_SHUF1]] -; SKX-NEXT: [[BIN_RDX2:%.*]] = select <32 x i1> [[TMP97]], <32 x i32> [[BIN_RDX]], <32 x i32> [[RDX_SHUF1]] -; SKX-NEXT: [[RDX_SHUF3:%.*]] = shufflevector <32 x i32> [[BIN_RDX2]], <32 x i32> undef, <32 x i32> -; SKX-NEXT: [[TMP98:%.*]] = icmp sgt <32 x i32> [[BIN_RDX2]], [[RDX_SHUF3]] -; SKX-NEXT: [[BIN_RDX4:%.*]] = select <32 x i1> [[TMP98]], <32 x i32> [[BIN_RDX2]], <32 x i32> [[RDX_SHUF3]] -; SKX-NEXT: [[RDX_SHUF5:%.*]] = shufflevector <32 x i32> [[BIN_RDX4]], <32 x i32> undef, <32 x i32> -; SKX-NEXT: [[TMP99:%.*]] = icmp sgt <32 x i32> [[BIN_RDX4]], [[RDX_SHUF5]] -; SKX-NEXT: [[BIN_RDX6:%.*]] = select <32 x i1> [[TMP99]], <32 x i32> [[BIN_RDX4]], <32 x i32> [[RDX_SHUF5]] -; SKX-NEXT: [[RDX_SHUF7:%.*]] = shufflevector <32 x i32> [[BIN_RDX6]], <32 x i32> undef, <32 x i32> -; SKX-NEXT: [[TMP100:%.*]] = icmp sgt <32 x i32> [[BIN_RDX6]], [[RDX_SHUF7]] -; SKX-NEXT: [[BIN_RDX8:%.*]] = select <32 x i1> [[TMP100]], <32 x i32> [[BIN_RDX6]], <32 x i32> [[RDX_SHUF7]] -; SKX-NEXT: [[TMP101:%.*]] = extractelement <32 x i32> [[BIN_RDX8]], i32 0 -; SKX: ret i32 [[TMP101]] +; SKX-NEXT: [[TMP3:%.*]] = extractelement <32 x i32> [[TMP2]], i32 0 +; SKX-NEXT: [[TMP4:%.*]] = extractelement <32 x i32> [[TMP2]], i32 1 +; SKX-NEXT: [[TMP5:%.*]] = icmp sgt i32 [[TMP3]], [[TMP4]] +; SKX-NEXT: [[TMP6:%.*]] = select i1 [[TMP5]], i32 undef, i32 undef +; SKX-NEXT: [[TMP7:%.*]] = extractelement <32 x i32> [[TMP2]], i32 2 +; SKX-NEXT: [[TMP8:%.*]] = icmp sgt i32 [[TMP6]], [[TMP7]] +; SKX-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], i32 [[TMP6]], i32 undef +; SKX-NEXT: [[TMP10:%.*]] = extractelement <32 x i32> [[TMP2]], i32 3 +; SKX-NEXT: [[TMP11:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +; SKX-NEXT: [[TMP12:%.*]] = select i1 [[TMP11]], i32 [[TMP9]], i32 undef +; SKX-NEXT: [[TMP13:%.*]] = extractelement <32 x i32> [[TMP2]], i32 4 +; SKX-NEXT: [[TMP14:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +; SKX-NEXT: [[TMP15:%.*]] = select i1 [[TMP14]], i32 [[TMP12]], i32 undef +; SKX-NEXT: [[TMP16:%.*]] = extractelement <32 x i32> [[TMP2]], i32 5 +; SKX-NEXT: [[TMP17:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]] +; SKX-NEXT: [[TMP18:%.*]] = select i1 [[TMP17]], i32 [[TMP15]], i32 undef +; SKX-NEXT: [[TMP19:%.*]] = extractelement <32 x i32> [[TMP2]], i32 6 +; SKX-NEXT: [[TMP20:%.*]] = icmp sgt i32 [[TMP18]], [[TMP19]] +; SKX-NEXT: [[TMP21:%.*]] = select i1 [[TMP20]], i32 [[TMP18]], i32 undef +; SKX-NEXT: [[TMP22:%.*]] = extractelement <32 x i32> [[TMP2]], i32 7 +; SKX-NEXT: [[TMP23:%.*]] = icmp sgt i32 [[TMP21]], [[TMP22]] +; SKX-NEXT: [[TMP24:%.*]] = select i1 [[TMP23]], i32 [[TMP21]], i32 undef +; SKX-NEXT: [[TMP25:%.*]] = extractelement <32 x i32> [[TMP2]], i32 8 +; SKX-NEXT: [[TMP26:%.*]] = icmp sgt i32 [[TMP24]], [[TMP25]] +; SKX-NEXT: [[TMP27:%.*]] = select i1 [[TMP26]], i32 [[TMP24]], i32 undef +; SKX-NEXT: [[TMP28:%.*]] = extractelement <32 x i32> [[TMP2]], i32 9 +; SKX-NEXT: [[TMP29:%.*]] = icmp sgt i32 [[TMP27]], [[TMP28]] +; SKX-NEXT: [[TMP30:%.*]] = select i1 [[TMP29]], i32 [[TMP27]], i32 undef +; SKX-NEXT: [[TMP31:%.*]] = extractelement <32 x i32> [[TMP2]], i32 10 +; SKX-NEXT: [[TMP32:%.*]] = icmp sgt i32 [[TMP30]], [[TMP31]] +; SKX-NEXT: [[TMP33:%.*]] = select i1 [[TMP32]], i32 [[TMP30]], i32 undef +; SKX-NEXT: [[TMP34:%.*]] = extractelement <32 x i32> [[TMP2]], i32 11 +; SKX-NEXT: [[TMP35:%.*]] = icmp sgt i32 [[TMP33]], [[TMP34]] +; SKX-NEXT: [[TMP36:%.*]] = select i1 [[TMP35]], i32 [[TMP33]], i32 undef +; SKX-NEXT: [[TMP37:%.*]] = extractelement <32 x i32> [[TMP2]], i32 12 +; SKX-NEXT: [[TMP38:%.*]] = icmp sgt i32 [[TMP36]], [[TMP37]] +; SKX-NEXT: [[TMP39:%.*]] = select i1 [[TMP38]], i32 [[TMP36]], i32 undef +; SKX-NEXT: [[TMP40:%.*]] = extractelement <32 x i32> [[TMP2]], i32 13 +; SKX-NEXT: [[TMP41:%.*]] = icmp sgt i32 [[TMP39]], [[TMP40]] +; SKX-NEXT: [[TMP42:%.*]] = select i1 [[TMP41]], i32 [[TMP39]], i32 undef +; SKX-NEXT: [[TMP43:%.*]] = extractelement <32 x i32> [[TMP2]], i32 14 +; SKX-NEXT: [[TMP44:%.*]] = icmp sgt i32 [[TMP42]], [[TMP43]] +; SKX-NEXT: [[TMP45:%.*]] = select i1 [[TMP44]], i32 [[TMP42]], i32 undef +; SKX-NEXT: [[TMP46:%.*]] = extractelement <32 x i32> [[TMP2]], i32 15 +; SKX-NEXT: [[TMP47:%.*]] = icmp sgt i32 [[TMP45]], [[TMP46]] +; SKX-NEXT: [[TMP48:%.*]] = select i1 [[TMP47]], i32 [[TMP45]], i32 undef +; SKX-NEXT: [[TMP49:%.*]] = extractelement <32 x i32> [[TMP2]], i32 16 +; SKX-NEXT: [[TMP50:%.*]] = icmp sgt i32 [[TMP48]], [[TMP49]] +; SKX-NEXT: [[TMP51:%.*]] = select i1 [[TMP50]], i32 [[TMP48]], i32 undef +; SKX-NEXT: [[TMP52:%.*]] = extractelement <32 x i32> [[TMP2]], i32 17 +; SKX-NEXT: [[TMP53:%.*]] = icmp sgt i32 [[TMP51]], [[TMP52]] +; SKX-NEXT: [[TMP54:%.*]] = select i1 [[TMP53]], i32 [[TMP51]], i32 undef +; SKX-NEXT: [[TMP55:%.*]] = extractelement <32 x i32> [[TMP2]], i32 18 +; SKX-NEXT: [[TMP56:%.*]] = icmp sgt i32 [[TMP54]], [[TMP55]] +; SKX-NEXT: [[TMP57:%.*]] = select i1 [[TMP56]], i32 [[TMP54]], i32 undef +; SKX-NEXT: [[TMP58:%.*]] = extractelement <32 x i32> [[TMP2]], i32 19 +; SKX-NEXT: [[TMP59:%.*]] = icmp sgt i32 [[TMP57]], [[TMP58]] +; SKX-NEXT: [[TMP60:%.*]] = select i1 [[TMP59]], i32 [[TMP57]], i32 undef +; SKX-NEXT: [[TMP61:%.*]] = extractelement <32 x i32> [[TMP2]], i32 20 +; SKX-NEXT: [[TMP62:%.*]] = icmp sgt i32 [[TMP60]], [[TMP61]] +; SKX-NEXT: [[TMP63:%.*]] = select i1 [[TMP62]], i32 [[TMP60]], i32 undef +; SKX-NEXT: [[TMP64:%.*]] = extractelement <32 x i32> [[TMP2]], i32 21 +; SKX-NEXT: [[TMP65:%.*]] = icmp sgt i32 [[TMP63]], [[TMP64]] +; SKX-NEXT: [[TMP66:%.*]] = select i1 [[TMP65]], i32 [[TMP63]], i32 undef +; SKX-NEXT: [[TMP67:%.*]] = extractelement <32 x i32> [[TMP2]], i32 22 +; SKX-NEXT: [[TMP68:%.*]] = icmp sgt i32 [[TMP66]], [[TMP67]] +; SKX-NEXT: [[TMP69:%.*]] = select i1 [[TMP68]], i32 [[TMP66]], i32 undef +; SKX-NEXT: [[TMP70:%.*]] = extractelement <32 x i32> [[TMP2]], i32 23 +; SKX-NEXT: [[TMP71:%.*]] = icmp sgt i32 [[TMP69]], [[TMP70]] +; SKX-NEXT: [[TMP72:%.*]] = select i1 [[TMP71]], i32 [[TMP69]], i32 undef +; SKX-NEXT: [[TMP73:%.*]] = extractelement <32 x i32> [[TMP2]], i32 24 +; SKX-NEXT: [[TMP74:%.*]] = icmp sgt i32 [[TMP72]], [[TMP73]] +; SKX-NEXT: [[TMP75:%.*]] = select i1 [[TMP74]], i32 [[TMP72]], i32 undef +; SKX-NEXT: [[TMP76:%.*]] = extractelement <32 x i32> [[TMP2]], i32 25 +; SKX-NEXT: [[TMP77:%.*]] = icmp sgt i32 [[TMP75]], [[TMP76]] +; SKX-NEXT: [[TMP78:%.*]] = select i1 [[TMP77]], i32 [[TMP75]], i32 undef +; SKX-NEXT: [[TMP79:%.*]] = extractelement <32 x i32> [[TMP2]], i32 26 +; SKX-NEXT: [[TMP80:%.*]] = icmp sgt i32 [[TMP78]], [[TMP79]] +; SKX-NEXT: [[TMP81:%.*]] = select i1 [[TMP80]], i32 [[TMP78]], i32 undef +; SKX-NEXT: [[TMP82:%.*]] = extractelement <32 x i32> [[TMP2]], i32 27 +; SKX-NEXT: [[TMP83:%.*]] = icmp sgt i32 [[TMP81]], [[TMP82]] +; SKX-NEXT: [[TMP84:%.*]] = select i1 [[TMP83]], i32 [[TMP81]], i32 undef +; SKX-NEXT: [[TMP85:%.*]] = extractelement <32 x i32> [[TMP2]], i32 28 +; SKX-NEXT: [[TMP86:%.*]] = icmp sgt i32 [[TMP84]], [[TMP85]] +; SKX-NEXT: [[TMP87:%.*]] = select i1 [[TMP86]], i32 [[TMP84]], i32 undef +; SKX-NEXT: [[TMP88:%.*]] = extractelement <32 x i32> [[TMP2]], i32 29 +; SKX-NEXT: [[TMP89:%.*]] = icmp sgt i32 [[TMP87]], [[TMP88]] +; SKX-NEXT: [[TMP90:%.*]] = select i1 [[TMP89]], i32 [[TMP87]], i32 undef +; SKX-NEXT: [[TMP91:%.*]] = extractelement <32 x i32> [[TMP2]], i32 30 +; SKX-NEXT: [[TMP92:%.*]] = icmp sgt i32 [[TMP90]], [[TMP91]] +; SKX-NEXT: [[TMP93:%.*]] = select i1 [[TMP92]], i32 [[TMP90]], i32 undef +; SKX-NEXT: [[TMP94:%.*]] = extractelement <32 x i32> [[TMP2]], i32 31 +; SKX-NEXT: [[TMP95:%.*]] = icmp sgt i32 [[TMP93]], [[TMP94]] +; SKX-NEXT: [[RDX_SHUF:%.*]] = shufflevector <32 x i32> [[TMP2]], <32 x i32> undef, <32 x i32> +; SKX-NEXT: [[RDX_MINMAX_CMP:%.*]] = icmp sgt <32 x i32> [[TMP2]], [[RDX_SHUF]] +; SKX-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select <32 x i1> [[RDX_MINMAX_CMP]], <32 x i32> [[TMP2]], <32 x i32> [[RDX_SHUF]] +; SKX-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <32 x i32> [[RDX_MINMAX_SELECT]], <32 x i32> undef, <32 x i32> +; SKX-NEXT: [[RDX_MINMAX_CMP2:%.*]] = icmp sgt <32 x i32> [[RDX_MINMAX_SELECT]], [[RDX_SHUF1]] +; SKX-NEXT: [[RDX_MINMAX_SELECT3:%.*]] = select <32 x i1> [[RDX_MINMAX_CMP2]], <32 x i32> [[RDX_MINMAX_SELECT]], <32 x i32> [[RDX_SHUF1]] +; SKX-NEXT: [[RDX_SHUF4:%.*]] = shufflevector <32 x i32> [[RDX_MINMAX_SELECT3]], <32 x i32> undef, <32 x i32> +; SKX-NEXT: [[RDX_MINMAX_CMP5:%.*]] = icmp sgt <32 x i32> [[RDX_MINMAX_SELECT3]], [[RDX_SHUF4]] +; SKX-NEXT: [[RDX_MINMAX_SELECT6:%.*]] = select <32 x i1> [[RDX_MINMAX_CMP5]], <32 x i32> [[RDX_MINMAX_SELECT3]], <32 x i32> [[RDX_SHUF4]] +; SKX-NEXT: [[RDX_SHUF7:%.*]] = shufflevector <32 x i32> [[RDX_MINMAX_SELECT6]], <32 x i32> undef, <32 x i32> +; SKX-NEXT: [[RDX_MINMAX_CMP8:%.*]] = icmp sgt <32 x i32> [[RDX_MINMAX_SELECT6]], [[RDX_SHUF7]] +; SKX-NEXT: [[RDX_MINMAX_SELECT9:%.*]] = select <32 x i1> [[RDX_MINMAX_CMP8]], <32 x i32> [[RDX_MINMAX_SELECT6]], <32 x i32> [[RDX_SHUF7]] +; SKX-NEXT: [[RDX_SHUF10:%.*]] = shufflevector <32 x i32> [[RDX_MINMAX_SELECT9]], <32 x i32> undef, <32 x i32> +; SKX-NEXT: [[RDX_MINMAX_CMP11:%.*]] = icmp sgt <32 x i32> [[RDX_MINMAX_SELECT9]], [[RDX_SHUF10]] +; SKX-NEXT: [[RDX_MINMAX_SELECT12:%.*]] = select <32 x i1> [[RDX_MINMAX_CMP11]], <32 x i32> [[RDX_MINMAX_SELECT9]], <32 x i32> [[RDX_SHUF10]] +; SKX-NEXT: [[TMP96:%.*]] = extractelement <32 x i32> [[RDX_MINMAX_SELECT12]], i32 0 +; SKX-NEXT: [[TMP97:%.*]] = select i1 [[TMP95]], i32 [[TMP93]], i32 undef +; SKX-NEXT: ret i32 [[TMP96]] ; %2 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 0), align 16 %3 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 1), align 4 @@ -456,45 +1037,111 @@ define float @maxf8(float) { ; ; AVX-LABEL: @maxf8( ; AVX-NEXT: [[TMP2:%.*]] = load <8 x float>, <8 x float>* bitcast ([32 x float]* @arr1 to <8 x float>*), align 16 -; AVX: [[RDX_SHUF:%.*]] = shufflevector <8 x float> [[TMP2]], <8 x float> undef, <8 x i32> -; AVX-NEXT: [[TMP24:%.*]] = fcmp fast ogt <8 x float> [[TMP2]], [[RDX_SHUF]] -; AVX-NEXT: [[BIN_RDX:%.*]] = select <8 x i1> [[TMP24]], <8 x float> [[TMP2]], <8 x float> [[RDX_SHUF]] -; AVX-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <8 x float> [[BIN_RDX]], <8 x float> undef, <8 x i32> -; AVX-NEXT: [[TMP25:%.*]] = fcmp fast ogt <8 x float> [[BIN_RDX]], [[RDX_SHUF1]] -; AVX-NEXT: [[BIN_RDX2:%.*]] = select <8 x i1> [[TMP25]], <8 x float> [[BIN_RDX]], <8 x float> [[RDX_SHUF1]] -; AVX-NEXT: [[RDX_SHUF3:%.*]] = shufflevector <8 x float> [[BIN_RDX2]], <8 x float> undef, <8 x i32> -; AVX-NEXT: [[TMP26:%.*]] = fcmp fast ogt <8 x float> [[BIN_RDX2]], [[RDX_SHUF3]] -; AVX-NEXT: [[BIN_RDX4:%.*]] = select <8 x i1> [[TMP26]], <8 x float> [[BIN_RDX2]], <8 x float> [[RDX_SHUF3]] -; AVX-NEXT: [[TMP27:%.*]] = extractelement <8 x float> [[BIN_RDX4]], i32 0 -; AVX: ret float [[TMP27]] +; AVX-NEXT: [[TMP3:%.*]] = extractelement <8 x float> [[TMP2]], i32 0 +; AVX-NEXT: [[TMP4:%.*]] = extractelement <8 x float> [[TMP2]], i32 1 +; AVX-NEXT: [[TMP5:%.*]] = fcmp fast ogt float [[TMP3]], [[TMP4]] +; AVX-NEXT: [[TMP6:%.*]] = select i1 [[TMP5]], float undef, float undef +; AVX-NEXT: [[TMP7:%.*]] = extractelement <8 x float> [[TMP2]], i32 2 +; AVX-NEXT: [[TMP8:%.*]] = fcmp fast ogt float [[TMP6]], [[TMP7]] +; AVX-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], float [[TMP6]], float undef +; AVX-NEXT: [[TMP10:%.*]] = extractelement <8 x float> [[TMP2]], i32 3 +; AVX-NEXT: [[TMP11:%.*]] = fcmp fast ogt float [[TMP9]], [[TMP10]] +; AVX-NEXT: [[TMP12:%.*]] = select i1 [[TMP11]], float [[TMP9]], float undef +; AVX-NEXT: [[TMP13:%.*]] = extractelement <8 x float> [[TMP2]], i32 4 +; AVX-NEXT: [[TMP14:%.*]] = fcmp fast ogt float [[TMP12]], [[TMP13]] +; AVX-NEXT: [[TMP15:%.*]] = select i1 [[TMP14]], float [[TMP12]], float undef +; AVX-NEXT: [[TMP16:%.*]] = extractelement <8 x float> [[TMP2]], i32 5 +; AVX-NEXT: [[TMP17:%.*]] = fcmp fast ogt float [[TMP15]], [[TMP16]] +; AVX-NEXT: [[TMP18:%.*]] = select i1 [[TMP17]], float [[TMP15]], float undef +; AVX-NEXT: [[TMP19:%.*]] = extractelement <8 x float> [[TMP2]], i32 6 +; AVX-NEXT: [[TMP20:%.*]] = fcmp fast ogt float [[TMP18]], [[TMP19]] +; AVX-NEXT: [[TMP21:%.*]] = select i1 [[TMP20]], float [[TMP18]], float undef +; AVX-NEXT: [[TMP22:%.*]] = extractelement <8 x float> [[TMP2]], i32 7 +; AVX-NEXT: [[TMP23:%.*]] = fcmp fast ogt float [[TMP21]], [[TMP22]] +; AVX-NEXT: [[RDX_SHUF:%.*]] = shufflevector <8 x float> [[TMP2]], <8 x float> undef, <8 x i32> +; AVX-NEXT: [[RDX_MINMAX_CMP:%.*]] = fcmp fast ogt <8 x float> [[TMP2]], [[RDX_SHUF]] +; AVX-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select <8 x i1> [[RDX_MINMAX_CMP]], <8 x float> [[TMP2]], <8 x float> [[RDX_SHUF]] +; AVX-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <8 x float> [[RDX_MINMAX_SELECT]], <8 x float> undef, <8 x i32> +; AVX-NEXT: [[RDX_MINMAX_CMP2:%.*]] = fcmp fast ogt <8 x float> [[RDX_MINMAX_SELECT]], [[RDX_SHUF1]] +; AVX-NEXT: [[RDX_MINMAX_SELECT3:%.*]] = select <8 x i1> [[RDX_MINMAX_CMP2]], <8 x float> [[RDX_MINMAX_SELECT]], <8 x float> [[RDX_SHUF1]] +; AVX-NEXT: [[RDX_SHUF4:%.*]] = shufflevector <8 x float> [[RDX_MINMAX_SELECT3]], <8 x float> undef, <8 x i32> +; AVX-NEXT: [[RDX_MINMAX_CMP5:%.*]] = fcmp fast ogt <8 x float> [[RDX_MINMAX_SELECT3]], [[RDX_SHUF4]] +; AVX-NEXT: [[RDX_MINMAX_SELECT6:%.*]] = select <8 x i1> [[RDX_MINMAX_CMP5]], <8 x float> [[RDX_MINMAX_SELECT3]], <8 x float> [[RDX_SHUF4]] +; AVX-NEXT: [[TMP24:%.*]] = extractelement <8 x float> [[RDX_MINMAX_SELECT6]], i32 0 +; AVX-NEXT: [[TMP25:%.*]] = select i1 [[TMP23]], float [[TMP21]], float undef +; AVX-NEXT: ret float [[TMP24]] ; ; AVX2-LABEL: @maxf8( ; AVX2-NEXT: [[TMP2:%.*]] = load <8 x float>, <8 x float>* bitcast ([32 x float]* @arr1 to <8 x float>*), align 16 -; AVX2: [[RDX_SHUF:%.*]] = shufflevector <8 x float> [[TMP2]], <8 x float> undef, <8 x i32> -; AVX2-NEXT: [[TMP24:%.*]] = fcmp fast ogt <8 x float> [[TMP2]], [[RDX_SHUF]] -; AVX2-NEXT: [[BIN_RDX:%.*]] = select <8 x i1> [[TMP24]], <8 x float> [[TMP2]], <8 x float> [[RDX_SHUF]] -; AVX2-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <8 x float> [[BIN_RDX]], <8 x float> undef, <8 x i32> -; AVX2-NEXT: [[TMP25:%.*]] = fcmp fast ogt <8 x float> [[BIN_RDX]], [[RDX_SHUF1]] -; AVX2-NEXT: [[BIN_RDX2:%.*]] = select <8 x i1> [[TMP25]], <8 x float> [[BIN_RDX]], <8 x float> [[RDX_SHUF1]] -; AVX2-NEXT: [[RDX_SHUF3:%.*]] = shufflevector <8 x float> [[BIN_RDX2]], <8 x float> undef, <8 x i32> -; AVX2-NEXT: [[TMP26:%.*]] = fcmp fast ogt <8 x float> [[BIN_RDX2]], [[RDX_SHUF3]] -; AVX2-NEXT: [[BIN_RDX4:%.*]] = select <8 x i1> [[TMP26]], <8 x float> [[BIN_RDX2]], <8 x float> [[RDX_SHUF3]] -; AVX2-NEXT: [[TMP27:%.*]] = extractelement <8 x float> [[BIN_RDX4]], i32 0 -; AVX2: ret float [[TMP27]] +; AVX2-NEXT: [[TMP3:%.*]] = extractelement <8 x float> [[TMP2]], i32 0 +; AVX2-NEXT: [[TMP4:%.*]] = extractelement <8 x float> [[TMP2]], i32 1 +; AVX2-NEXT: [[TMP5:%.*]] = fcmp fast ogt float [[TMP3]], [[TMP4]] +; AVX2-NEXT: [[TMP6:%.*]] = select i1 [[TMP5]], float undef, float undef +; AVX2-NEXT: [[TMP7:%.*]] = extractelement <8 x float> [[TMP2]], i32 2 +; AVX2-NEXT: [[TMP8:%.*]] = fcmp fast ogt float [[TMP6]], [[TMP7]] +; AVX2-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], float [[TMP6]], float undef +; AVX2-NEXT: [[TMP10:%.*]] = extractelement <8 x float> [[TMP2]], i32 3 +; AVX2-NEXT: [[TMP11:%.*]] = fcmp fast ogt float [[TMP9]], [[TMP10]] +; AVX2-NEXT: [[TMP12:%.*]] = select i1 [[TMP11]], float [[TMP9]], float undef +; AVX2-NEXT: [[TMP13:%.*]] = extractelement <8 x float> [[TMP2]], i32 4 +; AVX2-NEXT: [[TMP14:%.*]] = fcmp fast ogt float [[TMP12]], [[TMP13]] +; AVX2-NEXT: [[TMP15:%.*]] = select i1 [[TMP14]], float [[TMP12]], float undef +; AVX2-NEXT: [[TMP16:%.*]] = extractelement <8 x float> [[TMP2]], i32 5 +; AVX2-NEXT: [[TMP17:%.*]] = fcmp fast ogt float [[TMP15]], [[TMP16]] +; AVX2-NEXT: [[TMP18:%.*]] = select i1 [[TMP17]], float [[TMP15]], float undef +; AVX2-NEXT: [[TMP19:%.*]] = extractelement <8 x float> [[TMP2]], i32 6 +; AVX2-NEXT: [[TMP20:%.*]] = fcmp fast ogt float [[TMP18]], [[TMP19]] +; AVX2-NEXT: [[TMP21:%.*]] = select i1 [[TMP20]], float [[TMP18]], float undef +; AVX2-NEXT: [[TMP22:%.*]] = extractelement <8 x float> [[TMP2]], i32 7 +; AVX2-NEXT: [[TMP23:%.*]] = fcmp fast ogt float [[TMP21]], [[TMP22]] +; AVX2-NEXT: [[RDX_SHUF:%.*]] = shufflevector <8 x float> [[TMP2]], <8 x float> undef, <8 x i32> +; AVX2-NEXT: [[RDX_MINMAX_CMP:%.*]] = fcmp fast ogt <8 x float> [[TMP2]], [[RDX_SHUF]] +; AVX2-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select <8 x i1> [[RDX_MINMAX_CMP]], <8 x float> [[TMP2]], <8 x float> [[RDX_SHUF]] +; AVX2-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <8 x float> [[RDX_MINMAX_SELECT]], <8 x float> undef, <8 x i32> +; AVX2-NEXT: [[RDX_MINMAX_CMP2:%.*]] = fcmp fast ogt <8 x float> [[RDX_MINMAX_SELECT]], [[RDX_SHUF1]] +; AVX2-NEXT: [[RDX_MINMAX_SELECT3:%.*]] = select <8 x i1> [[RDX_MINMAX_CMP2]], <8 x float> [[RDX_MINMAX_SELECT]], <8 x float> [[RDX_SHUF1]] +; AVX2-NEXT: [[RDX_SHUF4:%.*]] = shufflevector <8 x float> [[RDX_MINMAX_SELECT3]], <8 x float> undef, <8 x i32> +; AVX2-NEXT: [[RDX_MINMAX_CMP5:%.*]] = fcmp fast ogt <8 x float> [[RDX_MINMAX_SELECT3]], [[RDX_SHUF4]] +; AVX2-NEXT: [[RDX_MINMAX_SELECT6:%.*]] = select <8 x i1> [[RDX_MINMAX_CMP5]], <8 x float> [[RDX_MINMAX_SELECT3]], <8 x float> [[RDX_SHUF4]] +; AVX2-NEXT: [[TMP24:%.*]] = extractelement <8 x float> [[RDX_MINMAX_SELECT6]], i32 0 +; AVX2-NEXT: [[TMP25:%.*]] = select i1 [[TMP23]], float [[TMP21]], float undef +; AVX2-NEXT: ret float [[TMP24]] ; ; SKX-LABEL: @maxf8( ; SKX-NEXT: [[TMP2:%.*]] = load <8 x float>, <8 x float>* bitcast ([32 x float]* @arr1 to <8 x float>*), align 16 -; SKX: [[RDX_SHUF:%.*]] = shufflevector <8 x float> [[TMP2]], <8 x float> undef, <8 x i32> -; SKX-NEXT: [[TMP24:%.*]] = fcmp fast ogt <8 x float> [[TMP2]], [[RDX_SHUF]] -; SKX-NEXT: [[BIN_RDX:%.*]] = select <8 x i1> [[TMP24]], <8 x float> [[TMP2]], <8 x float> [[RDX_SHUF]] -; SKX-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <8 x float> [[BIN_RDX]], <8 x float> undef, <8 x i32> -; SKX-NEXT: [[TMP25:%.*]] = fcmp fast ogt <8 x float> [[BIN_RDX]], [[RDX_SHUF1]] -; SKX-NEXT: [[BIN_RDX2:%.*]] = select <8 x i1> [[TMP25]], <8 x float> [[BIN_RDX]], <8 x float> [[RDX_SHUF1]] -; SKX-NEXT: [[RDX_SHUF3:%.*]] = shufflevector <8 x float> [[BIN_RDX2]], <8 x float> undef, <8 x i32> -; SKX-NEXT: [[TMP26:%.*]] = fcmp fast ogt <8 x float> [[BIN_RDX2]], [[RDX_SHUF3]] -; SKX-NEXT: [[BIN_RDX4:%.*]] = select <8 x i1> [[TMP26]], <8 x float> [[BIN_RDX2]], <8 x float> [[RDX_SHUF3]] -; SKX-NEXT: [[TMP27:%.*]] = extractelement <8 x float> [[BIN_RDX4]], i32 0 -; SKX: ret float [[TMP27]] +; SKX-NEXT: [[TMP3:%.*]] = extractelement <8 x float> [[TMP2]], i32 0 +; SKX-NEXT: [[TMP4:%.*]] = extractelement <8 x float> [[TMP2]], i32 1 +; SKX-NEXT: [[TMP5:%.*]] = fcmp fast ogt float [[TMP3]], [[TMP4]] +; SKX-NEXT: [[TMP6:%.*]] = select i1 [[TMP5]], float undef, float undef +; SKX-NEXT: [[TMP7:%.*]] = extractelement <8 x float> [[TMP2]], i32 2 +; SKX-NEXT: [[TMP8:%.*]] = fcmp fast ogt float [[TMP6]], [[TMP7]] +; SKX-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], float [[TMP6]], float undef +; SKX-NEXT: [[TMP10:%.*]] = extractelement <8 x float> [[TMP2]], i32 3 +; SKX-NEXT: [[TMP11:%.*]] = fcmp fast ogt float [[TMP9]], [[TMP10]] +; SKX-NEXT: [[TMP12:%.*]] = select i1 [[TMP11]], float [[TMP9]], float undef +; SKX-NEXT: [[TMP13:%.*]] = extractelement <8 x float> [[TMP2]], i32 4 +; SKX-NEXT: [[TMP14:%.*]] = fcmp fast ogt float [[TMP12]], [[TMP13]] +; SKX-NEXT: [[TMP15:%.*]] = select i1 [[TMP14]], float [[TMP12]], float undef +; SKX-NEXT: [[TMP16:%.*]] = extractelement <8 x float> [[TMP2]], i32 5 +; SKX-NEXT: [[TMP17:%.*]] = fcmp fast ogt float [[TMP15]], [[TMP16]] +; SKX-NEXT: [[TMP18:%.*]] = select i1 [[TMP17]], float [[TMP15]], float undef +; SKX-NEXT: [[TMP19:%.*]] = extractelement <8 x float> [[TMP2]], i32 6 +; SKX-NEXT: [[TMP20:%.*]] = fcmp fast ogt float [[TMP18]], [[TMP19]] +; SKX-NEXT: [[TMP21:%.*]] = select i1 [[TMP20]], float [[TMP18]], float undef +; SKX-NEXT: [[TMP22:%.*]] = extractelement <8 x float> [[TMP2]], i32 7 +; SKX-NEXT: [[TMP23:%.*]] = fcmp fast ogt float [[TMP21]], [[TMP22]] +; SKX-NEXT: [[RDX_SHUF:%.*]] = shufflevector <8 x float> [[TMP2]], <8 x float> undef, <8 x i32> +; SKX-NEXT: [[RDX_MINMAX_CMP:%.*]] = fcmp fast ogt <8 x float> [[TMP2]], [[RDX_SHUF]] +; SKX-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select <8 x i1> [[RDX_MINMAX_CMP]], <8 x float> [[TMP2]], <8 x float> [[RDX_SHUF]] +; SKX-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <8 x float> [[RDX_MINMAX_SELECT]], <8 x float> undef, <8 x i32> +; SKX-NEXT: [[RDX_MINMAX_CMP2:%.*]] = fcmp fast ogt <8 x float> [[RDX_MINMAX_SELECT]], [[RDX_SHUF1]] +; SKX-NEXT: [[RDX_MINMAX_SELECT3:%.*]] = select <8 x i1> [[RDX_MINMAX_CMP2]], <8 x float> [[RDX_MINMAX_SELECT]], <8 x float> [[RDX_SHUF1]] +; SKX-NEXT: [[RDX_SHUF4:%.*]] = shufflevector <8 x float> [[RDX_MINMAX_SELECT3]], <8 x float> undef, <8 x i32> +; SKX-NEXT: [[RDX_MINMAX_CMP5:%.*]] = fcmp fast ogt <8 x float> [[RDX_MINMAX_SELECT3]], [[RDX_SHUF4]] +; SKX-NEXT: [[RDX_MINMAX_SELECT6:%.*]] = select <8 x i1> [[RDX_MINMAX_CMP5]], <8 x float> [[RDX_MINMAX_SELECT3]], <8 x float> [[RDX_SHUF4]] +; SKX-NEXT: [[TMP24:%.*]] = extractelement <8 x float> [[RDX_MINMAX_SELECT6]], i32 0 +; SKX-NEXT: [[TMP25:%.*]] = select i1 [[TMP23]], float [[TMP21]], float undef +; SKX-NEXT: ret float [[TMP24]] ; %2 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 0), align 16 %3 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 1), align 4 @@ -573,54 +1220,192 @@ define float @maxf16(float) { ; ; AVX-LABEL: @maxf16( ; AVX-NEXT: [[TMP2:%.*]] = load <16 x float>, <16 x float>* bitcast ([32 x float]* @arr1 to <16 x float>*), align 16 -; AVX: [[RDX_SHUF:%.*]] = shufflevector <16 x float> [[TMP2]], <16 x float> undef, <16 x i32> -; AVX-NEXT: [[TMP48:%.*]] = fcmp fast ogt <16 x float> [[TMP2]], [[RDX_SHUF]] -; AVX-NEXT: [[BIN_RDX:%.*]] = select <16 x i1> [[TMP48]], <16 x float> [[TMP2]], <16 x float> [[RDX_SHUF]] -; AVX-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <16 x float> [[BIN_RDX]], <16 x float> undef, <16 x i32> -; AVX-NEXT: [[TMP49:%.*]] = fcmp fast ogt <16 x float> [[BIN_RDX]], [[RDX_SHUF1]] -; AVX-NEXT: [[BIN_RDX2:%.*]] = select <16 x i1> [[TMP49]], <16 x float> [[BIN_RDX]], <16 x float> [[RDX_SHUF1]] -; AVX-NEXT: [[RDX_SHUF3:%.*]] = shufflevector <16 x float> [[BIN_RDX2]], <16 x float> undef, <16 x i32> -; AVX-NEXT: [[TMP50:%.*]] = fcmp fast ogt <16 x float> [[BIN_RDX2]], [[RDX_SHUF3]] -; AVX-NEXT: [[BIN_RDX4:%.*]] = select <16 x i1> [[TMP50]], <16 x float> [[BIN_RDX2]], <16 x float> [[RDX_SHUF3]] -; AVX-NEXT: [[RDX_SHUF5:%.*]] = shufflevector <16 x float> [[BIN_RDX4]], <16 x float> undef, <16 x i32> -; AVX-NEXT: [[TMP51:%.*]] = fcmp fast ogt <16 x float> [[BIN_RDX4]], [[RDX_SHUF5]] -; AVX-NEXT: [[BIN_RDX6:%.*]] = select <16 x i1> [[TMP51]], <16 x float> [[BIN_RDX4]], <16 x float> [[RDX_SHUF5]] -; AVX-NEXT: [[TMP52:%.*]] = extractelement <16 x float> [[BIN_RDX6]], i32 0 -; AVX: ret float [[TMP52]] +; AVX-NEXT: [[TMP3:%.*]] = extractelement <16 x float> [[TMP2]], i32 0 +; AVX-NEXT: [[TMP4:%.*]] = extractelement <16 x float> [[TMP2]], i32 1 +; AVX-NEXT: [[TMP5:%.*]] = fcmp fast ogt float [[TMP3]], [[TMP4]] +; AVX-NEXT: [[TMP6:%.*]] = select i1 [[TMP5]], float undef, float undef +; AVX-NEXT: [[TMP7:%.*]] = extractelement <16 x float> [[TMP2]], i32 2 +; AVX-NEXT: [[TMP8:%.*]] = fcmp fast ogt float [[TMP6]], [[TMP7]] +; AVX-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], float [[TMP6]], float undef +; AVX-NEXT: [[TMP10:%.*]] = extractelement <16 x float> [[TMP2]], i32 3 +; AVX-NEXT: [[TMP11:%.*]] = fcmp fast ogt float [[TMP9]], [[TMP10]] +; AVX-NEXT: [[TMP12:%.*]] = select i1 [[TMP11]], float [[TMP9]], float undef +; AVX-NEXT: [[TMP13:%.*]] = extractelement <16 x float> [[TMP2]], i32 4 +; AVX-NEXT: [[TMP14:%.*]] = fcmp fast ogt float [[TMP12]], [[TMP13]] +; AVX-NEXT: [[TMP15:%.*]] = select i1 [[TMP14]], float [[TMP12]], float undef +; AVX-NEXT: [[TMP16:%.*]] = extractelement <16 x float> [[TMP2]], i32 5 +; AVX-NEXT: [[TMP17:%.*]] = fcmp fast ogt float [[TMP15]], [[TMP16]] +; AVX-NEXT: [[TMP18:%.*]] = select i1 [[TMP17]], float [[TMP15]], float undef +; AVX-NEXT: [[TMP19:%.*]] = extractelement <16 x float> [[TMP2]], i32 6 +; AVX-NEXT: [[TMP20:%.*]] = fcmp fast ogt float [[TMP18]], [[TMP19]] +; AVX-NEXT: [[TMP21:%.*]] = select i1 [[TMP20]], float [[TMP18]], float undef +; AVX-NEXT: [[TMP22:%.*]] = extractelement <16 x float> [[TMP2]], i32 7 +; AVX-NEXT: [[TMP23:%.*]] = fcmp fast ogt float [[TMP21]], [[TMP22]] +; AVX-NEXT: [[TMP24:%.*]] = select i1 [[TMP23]], float [[TMP21]], float undef +; AVX-NEXT: [[TMP25:%.*]] = extractelement <16 x float> [[TMP2]], i32 8 +; AVX-NEXT: [[TMP26:%.*]] = fcmp fast ogt float [[TMP24]], [[TMP25]] +; AVX-NEXT: [[TMP27:%.*]] = select i1 [[TMP26]], float [[TMP24]], float undef +; AVX-NEXT: [[TMP28:%.*]] = extractelement <16 x float> [[TMP2]], i32 9 +; AVX-NEXT: [[TMP29:%.*]] = fcmp fast ogt float [[TMP27]], [[TMP28]] +; AVX-NEXT: [[TMP30:%.*]] = select i1 [[TMP29]], float [[TMP27]], float undef +; AVX-NEXT: [[TMP31:%.*]] = extractelement <16 x float> [[TMP2]], i32 10 +; AVX-NEXT: [[TMP32:%.*]] = fcmp fast ogt float [[TMP30]], [[TMP31]] +; AVX-NEXT: [[TMP33:%.*]] = select i1 [[TMP32]], float [[TMP30]], float undef +; AVX-NEXT: [[TMP34:%.*]] = extractelement <16 x float> [[TMP2]], i32 11 +; AVX-NEXT: [[TMP35:%.*]] = fcmp fast ogt float [[TMP33]], [[TMP34]] +; AVX-NEXT: [[TMP36:%.*]] = select i1 [[TMP35]], float [[TMP33]], float undef +; AVX-NEXT: [[TMP37:%.*]] = extractelement <16 x float> [[TMP2]], i32 12 +; AVX-NEXT: [[TMP38:%.*]] = fcmp fast ogt float [[TMP36]], [[TMP37]] +; AVX-NEXT: [[TMP39:%.*]] = select i1 [[TMP38]], float [[TMP36]], float undef +; AVX-NEXT: [[TMP40:%.*]] = extractelement <16 x float> [[TMP2]], i32 13 +; AVX-NEXT: [[TMP41:%.*]] = fcmp fast ogt float [[TMP39]], [[TMP40]] +; AVX-NEXT: [[TMP42:%.*]] = select i1 [[TMP41]], float [[TMP39]], float undef +; AVX-NEXT: [[TMP43:%.*]] = extractelement <16 x float> [[TMP2]], i32 14 +; AVX-NEXT: [[TMP44:%.*]] = fcmp fast ogt float [[TMP42]], [[TMP43]] +; AVX-NEXT: [[TMP45:%.*]] = select i1 [[TMP44]], float [[TMP42]], float undef +; AVX-NEXT: [[TMP46:%.*]] = extractelement <16 x float> [[TMP2]], i32 15 +; AVX-NEXT: [[TMP47:%.*]] = fcmp fast ogt float [[TMP45]], [[TMP46]] +; AVX-NEXT: [[RDX_SHUF:%.*]] = shufflevector <16 x float> [[TMP2]], <16 x float> undef, <16 x i32> +; AVX-NEXT: [[RDX_MINMAX_CMP:%.*]] = fcmp fast ogt <16 x float> [[TMP2]], [[RDX_SHUF]] +; AVX-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select <16 x i1> [[RDX_MINMAX_CMP]], <16 x float> [[TMP2]], <16 x float> [[RDX_SHUF]] +; AVX-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <16 x float> [[RDX_MINMAX_SELECT]], <16 x float> undef, <16 x i32> +; AVX-NEXT: [[RDX_MINMAX_CMP2:%.*]] = fcmp fast ogt <16 x float> [[RDX_MINMAX_SELECT]], [[RDX_SHUF1]] +; AVX-NEXT: [[RDX_MINMAX_SELECT3:%.*]] = select <16 x i1> [[RDX_MINMAX_CMP2]], <16 x float> [[RDX_MINMAX_SELECT]], <16 x float> [[RDX_SHUF1]] +; AVX-NEXT: [[RDX_SHUF4:%.*]] = shufflevector <16 x float> [[RDX_MINMAX_SELECT3]], <16 x float> undef, <16 x i32> +; AVX-NEXT: [[RDX_MINMAX_CMP5:%.*]] = fcmp fast ogt <16 x float> [[RDX_MINMAX_SELECT3]], [[RDX_SHUF4]] +; AVX-NEXT: [[RDX_MINMAX_SELECT6:%.*]] = select <16 x i1> [[RDX_MINMAX_CMP5]], <16 x float> [[RDX_MINMAX_SELECT3]], <16 x float> [[RDX_SHUF4]] +; AVX-NEXT: [[RDX_SHUF7:%.*]] = shufflevector <16 x float> [[RDX_MINMAX_SELECT6]], <16 x float> undef, <16 x i32> +; AVX-NEXT: [[RDX_MINMAX_CMP8:%.*]] = fcmp fast ogt <16 x float> [[RDX_MINMAX_SELECT6]], [[RDX_SHUF7]] +; AVX-NEXT: [[RDX_MINMAX_SELECT9:%.*]] = select <16 x i1> [[RDX_MINMAX_CMP8]], <16 x float> [[RDX_MINMAX_SELECT6]], <16 x float> [[RDX_SHUF7]] +; AVX-NEXT: [[TMP48:%.*]] = extractelement <16 x float> [[RDX_MINMAX_SELECT9]], i32 0 +; AVX-NEXT: [[TMP49:%.*]] = select i1 [[TMP47]], float [[TMP45]], float undef +; AVX-NEXT: ret float [[TMP48]] ; ; AVX2-LABEL: @maxf16( ; AVX2-NEXT: [[TMP2:%.*]] = load <16 x float>, <16 x float>* bitcast ([32 x float]* @arr1 to <16 x float>*), align 16 -; AVX2: [[RDX_SHUF:%.*]] = shufflevector <16 x float> [[TMP2]], <16 x float> undef, <16 x i32> -; AVX2-NEXT: [[TMP48:%.*]] = fcmp fast ogt <16 x float> [[TMP2]], [[RDX_SHUF]] -; AVX2-NEXT: [[BIN_RDX:%.*]] = select <16 x i1> [[TMP48]], <16 x float> [[TMP2]], <16 x float> [[RDX_SHUF]] -; AVX2-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <16 x float> [[BIN_RDX]], <16 x float> undef, <16 x i32> -; AVX2-NEXT: [[TMP49:%.*]] = fcmp fast ogt <16 x float> [[BIN_RDX]], [[RDX_SHUF1]] -; AVX2-NEXT: [[BIN_RDX2:%.*]] = select <16 x i1> [[TMP49]], <16 x float> [[BIN_RDX]], <16 x float> [[RDX_SHUF1]] -; AVX2-NEXT: [[RDX_SHUF3:%.*]] = shufflevector <16 x float> [[BIN_RDX2]], <16 x float> undef, <16 x i32> -; AVX2-NEXT: [[TMP50:%.*]] = fcmp fast ogt <16 x float> [[BIN_RDX2]], [[RDX_SHUF3]] -; AVX2-NEXT: [[BIN_RDX4:%.*]] = select <16 x i1> [[TMP50]], <16 x float> [[BIN_RDX2]], <16 x float> [[RDX_SHUF3]] -; AVX2-NEXT: [[RDX_SHUF5:%.*]] = shufflevector <16 x float> [[BIN_RDX4]], <16 x float> undef, <16 x i32> -; AVX2-NEXT: [[TMP51:%.*]] = fcmp fast ogt <16 x float> [[BIN_RDX4]], [[RDX_SHUF5]] -; AVX2-NEXT: [[BIN_RDX6:%.*]] = select <16 x i1> [[TMP51]], <16 x float> [[BIN_RDX4]], <16 x float> [[RDX_SHUF5]] -; AVX2-NEXT: [[TMP52:%.*]] = extractelement <16 x float> [[BIN_RDX6]], i32 0 -; AVX2: ret float [[TMP52]] +; AVX2-NEXT: [[TMP3:%.*]] = extractelement <16 x float> [[TMP2]], i32 0 +; AVX2-NEXT: [[TMP4:%.*]] = extractelement <16 x float> [[TMP2]], i32 1 +; AVX2-NEXT: [[TMP5:%.*]] = fcmp fast ogt float [[TMP3]], [[TMP4]] +; AVX2-NEXT: [[TMP6:%.*]] = select i1 [[TMP5]], float undef, float undef +; AVX2-NEXT: [[TMP7:%.*]] = extractelement <16 x float> [[TMP2]], i32 2 +; AVX2-NEXT: [[TMP8:%.*]] = fcmp fast ogt float [[TMP6]], [[TMP7]] +; AVX2-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], float [[TMP6]], float undef +; AVX2-NEXT: [[TMP10:%.*]] = extractelement <16 x float> [[TMP2]], i32 3 +; AVX2-NEXT: [[TMP11:%.*]] = fcmp fast ogt float [[TMP9]], [[TMP10]] +; AVX2-NEXT: [[TMP12:%.*]] = select i1 [[TMP11]], float [[TMP9]], float undef +; AVX2-NEXT: [[TMP13:%.*]] = extractelement <16 x float> [[TMP2]], i32 4 +; AVX2-NEXT: [[TMP14:%.*]] = fcmp fast ogt float [[TMP12]], [[TMP13]] +; AVX2-NEXT: [[TMP15:%.*]] = select i1 [[TMP14]], float [[TMP12]], float undef +; AVX2-NEXT: [[TMP16:%.*]] = extractelement <16 x float> [[TMP2]], i32 5 +; AVX2-NEXT: [[TMP17:%.*]] = fcmp fast ogt float [[TMP15]], [[TMP16]] +; AVX2-NEXT: [[TMP18:%.*]] = select i1 [[TMP17]], float [[TMP15]], float undef +; AVX2-NEXT: [[TMP19:%.*]] = extractelement <16 x float> [[TMP2]], i32 6 +; AVX2-NEXT: [[TMP20:%.*]] = fcmp fast ogt float [[TMP18]], [[TMP19]] +; AVX2-NEXT: [[TMP21:%.*]] = select i1 [[TMP20]], float [[TMP18]], float undef +; AVX2-NEXT: [[TMP22:%.*]] = extractelement <16 x float> [[TMP2]], i32 7 +; AVX2-NEXT: [[TMP23:%.*]] = fcmp fast ogt float [[TMP21]], [[TMP22]] +; AVX2-NEXT: [[TMP24:%.*]] = select i1 [[TMP23]], float [[TMP21]], float undef +; AVX2-NEXT: [[TMP25:%.*]] = extractelement <16 x float> [[TMP2]], i32 8 +; AVX2-NEXT: [[TMP26:%.*]] = fcmp fast ogt float [[TMP24]], [[TMP25]] +; AVX2-NEXT: [[TMP27:%.*]] = select i1 [[TMP26]], float [[TMP24]], float undef +; AVX2-NEXT: [[TMP28:%.*]] = extractelement <16 x float> [[TMP2]], i32 9 +; AVX2-NEXT: [[TMP29:%.*]] = fcmp fast ogt float [[TMP27]], [[TMP28]] +; AVX2-NEXT: [[TMP30:%.*]] = select i1 [[TMP29]], float [[TMP27]], float undef +; AVX2-NEXT: [[TMP31:%.*]] = extractelement <16 x float> [[TMP2]], i32 10 +; AVX2-NEXT: [[TMP32:%.*]] = fcmp fast ogt float [[TMP30]], [[TMP31]] +; AVX2-NEXT: [[TMP33:%.*]] = select i1 [[TMP32]], float [[TMP30]], float undef +; AVX2-NEXT: [[TMP34:%.*]] = extractelement <16 x float> [[TMP2]], i32 11 +; AVX2-NEXT: [[TMP35:%.*]] = fcmp fast ogt float [[TMP33]], [[TMP34]] +; AVX2-NEXT: [[TMP36:%.*]] = select i1 [[TMP35]], float [[TMP33]], float undef +; AVX2-NEXT: [[TMP37:%.*]] = extractelement <16 x float> [[TMP2]], i32 12 +; AVX2-NEXT: [[TMP38:%.*]] = fcmp fast ogt float [[TMP36]], [[TMP37]] +; AVX2-NEXT: [[TMP39:%.*]] = select i1 [[TMP38]], float [[TMP36]], float undef +; AVX2-NEXT: [[TMP40:%.*]] = extractelement <16 x float> [[TMP2]], i32 13 +; AVX2-NEXT: [[TMP41:%.*]] = fcmp fast ogt float [[TMP39]], [[TMP40]] +; AVX2-NEXT: [[TMP42:%.*]] = select i1 [[TMP41]], float [[TMP39]], float undef +; AVX2-NEXT: [[TMP43:%.*]] = extractelement <16 x float> [[TMP2]], i32 14 +; AVX2-NEXT: [[TMP44:%.*]] = fcmp fast ogt float [[TMP42]], [[TMP43]] +; AVX2-NEXT: [[TMP45:%.*]] = select i1 [[TMP44]], float [[TMP42]], float undef +; AVX2-NEXT: [[TMP46:%.*]] = extractelement <16 x float> [[TMP2]], i32 15 +; AVX2-NEXT: [[TMP47:%.*]] = fcmp fast ogt float [[TMP45]], [[TMP46]] +; AVX2-NEXT: [[RDX_SHUF:%.*]] = shufflevector <16 x float> [[TMP2]], <16 x float> undef, <16 x i32> +; AVX2-NEXT: [[RDX_MINMAX_CMP:%.*]] = fcmp fast ogt <16 x float> [[TMP2]], [[RDX_SHUF]] +; AVX2-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select <16 x i1> [[RDX_MINMAX_CMP]], <16 x float> [[TMP2]], <16 x float> [[RDX_SHUF]] +; AVX2-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <16 x float> [[RDX_MINMAX_SELECT]], <16 x float> undef, <16 x i32> +; AVX2-NEXT: [[RDX_MINMAX_CMP2:%.*]] = fcmp fast ogt <16 x float> [[RDX_MINMAX_SELECT]], [[RDX_SHUF1]] +; AVX2-NEXT: [[RDX_MINMAX_SELECT3:%.*]] = select <16 x i1> [[RDX_MINMAX_CMP2]], <16 x float> [[RDX_MINMAX_SELECT]], <16 x float> [[RDX_SHUF1]] +; AVX2-NEXT: [[RDX_SHUF4:%.*]] = shufflevector <16 x float> [[RDX_MINMAX_SELECT3]], <16 x float> undef, <16 x i32> +; AVX2-NEXT: [[RDX_MINMAX_CMP5:%.*]] = fcmp fast ogt <16 x float> [[RDX_MINMAX_SELECT3]], [[RDX_SHUF4]] +; AVX2-NEXT: [[RDX_MINMAX_SELECT6:%.*]] = select <16 x i1> [[RDX_MINMAX_CMP5]], <16 x float> [[RDX_MINMAX_SELECT3]], <16 x float> [[RDX_SHUF4]] +; AVX2-NEXT: [[RDX_SHUF7:%.*]] = shufflevector <16 x float> [[RDX_MINMAX_SELECT6]], <16 x float> undef, <16 x i32> +; AVX2-NEXT: [[RDX_MINMAX_CMP8:%.*]] = fcmp fast ogt <16 x float> [[RDX_MINMAX_SELECT6]], [[RDX_SHUF7]] +; AVX2-NEXT: [[RDX_MINMAX_SELECT9:%.*]] = select <16 x i1> [[RDX_MINMAX_CMP8]], <16 x float> [[RDX_MINMAX_SELECT6]], <16 x float> [[RDX_SHUF7]] +; AVX2-NEXT: [[TMP48:%.*]] = extractelement <16 x float> [[RDX_MINMAX_SELECT9]], i32 0 +; AVX2-NEXT: [[TMP49:%.*]] = select i1 [[TMP47]], float [[TMP45]], float undef +; AVX2-NEXT: ret float [[TMP48]] ; ; SKX-LABEL: @maxf16( ; SKX-NEXT: [[TMP2:%.*]] = load <16 x float>, <16 x float>* bitcast ([32 x float]* @arr1 to <16 x float>*), align 16 -; SKX: [[RDX_SHUF:%.*]] = shufflevector <16 x float> [[TMP2]], <16 x float> undef, <16 x i32> -; SKX-NEXT: [[TMP48:%.*]] = fcmp fast ogt <16 x float> [[TMP2]], [[RDX_SHUF]] -; SKX-NEXT: [[BIN_RDX:%.*]] = select <16 x i1> [[TMP48]], <16 x float> [[TMP2]], <16 x float> [[RDX_SHUF]] -; SKX-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <16 x float> [[BIN_RDX]], <16 x float> undef, <16 x i32> -; SKX-NEXT: [[TMP49:%.*]] = fcmp fast ogt <16 x float> [[BIN_RDX]], [[RDX_SHUF1]] -; SKX-NEXT: [[BIN_RDX2:%.*]] = select <16 x i1> [[TMP49]], <16 x float> [[BIN_RDX]], <16 x float> [[RDX_SHUF1]] -; SKX-NEXT: [[RDX_SHUF3:%.*]] = shufflevector <16 x float> [[BIN_RDX2]], <16 x float> undef, <16 x i32> -; SKX-NEXT: [[TMP50:%.*]] = fcmp fast ogt <16 x float> [[BIN_RDX2]], [[RDX_SHUF3]] -; SKX-NEXT: [[BIN_RDX4:%.*]] = select <16 x i1> [[TMP50]], <16 x float> [[BIN_RDX2]], <16 x float> [[RDX_SHUF3]] -; SKX-NEXT: [[RDX_SHUF5:%.*]] = shufflevector <16 x float> [[BIN_RDX4]], <16 x float> undef, <16 x i32> -; SKX-NEXT: [[TMP51:%.*]] = fcmp fast ogt <16 x float> [[BIN_RDX4]], [[RDX_SHUF5]] -; SKX-NEXT: [[BIN_RDX6:%.*]] = select <16 x i1> [[TMP51]], <16 x float> [[BIN_RDX4]], <16 x float> [[RDX_SHUF5]] -; SKX-NEXT: [[TMP52:%.*]] = extractelement <16 x float> [[BIN_RDX6]], i32 0 -; SKX: ret float [[TMP52]] +; SKX-NEXT: [[TMP3:%.*]] = extractelement <16 x float> [[TMP2]], i32 0 +; SKX-NEXT: [[TMP4:%.*]] = extractelement <16 x float> [[TMP2]], i32 1 +; SKX-NEXT: [[TMP5:%.*]] = fcmp fast ogt float [[TMP3]], [[TMP4]] +; SKX-NEXT: [[TMP6:%.*]] = select i1 [[TMP5]], float undef, float undef +; SKX-NEXT: [[TMP7:%.*]] = extractelement <16 x float> [[TMP2]], i32 2 +; SKX-NEXT: [[TMP8:%.*]] = fcmp fast ogt float [[TMP6]], [[TMP7]] +; SKX-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], float [[TMP6]], float undef +; SKX-NEXT: [[TMP10:%.*]] = extractelement <16 x float> [[TMP2]], i32 3 +; SKX-NEXT: [[TMP11:%.*]] = fcmp fast ogt float [[TMP9]], [[TMP10]] +; SKX-NEXT: [[TMP12:%.*]] = select i1 [[TMP11]], float [[TMP9]], float undef +; SKX-NEXT: [[TMP13:%.*]] = extractelement <16 x float> [[TMP2]], i32 4 +; SKX-NEXT: [[TMP14:%.*]] = fcmp fast ogt float [[TMP12]], [[TMP13]] +; SKX-NEXT: [[TMP15:%.*]] = select i1 [[TMP14]], float [[TMP12]], float undef +; SKX-NEXT: [[TMP16:%.*]] = extractelement <16 x float> [[TMP2]], i32 5 +; SKX-NEXT: [[TMP17:%.*]] = fcmp fast ogt float [[TMP15]], [[TMP16]] +; SKX-NEXT: [[TMP18:%.*]] = select i1 [[TMP17]], float [[TMP15]], float undef +; SKX-NEXT: [[TMP19:%.*]] = extractelement <16 x float> [[TMP2]], i32 6 +; SKX-NEXT: [[TMP20:%.*]] = fcmp fast ogt float [[TMP18]], [[TMP19]] +; SKX-NEXT: [[TMP21:%.*]] = select i1 [[TMP20]], float [[TMP18]], float undef +; SKX-NEXT: [[TMP22:%.*]] = extractelement <16 x float> [[TMP2]], i32 7 +; SKX-NEXT: [[TMP23:%.*]] = fcmp fast ogt float [[TMP21]], [[TMP22]] +; SKX-NEXT: [[TMP24:%.*]] = select i1 [[TMP23]], float [[TMP21]], float undef +; SKX-NEXT: [[TMP25:%.*]] = extractelement <16 x float> [[TMP2]], i32 8 +; SKX-NEXT: [[TMP26:%.*]] = fcmp fast ogt float [[TMP24]], [[TMP25]] +; SKX-NEXT: [[TMP27:%.*]] = select i1 [[TMP26]], float [[TMP24]], float undef +; SKX-NEXT: [[TMP28:%.*]] = extractelement <16 x float> [[TMP2]], i32 9 +; SKX-NEXT: [[TMP29:%.*]] = fcmp fast ogt float [[TMP27]], [[TMP28]] +; SKX-NEXT: [[TMP30:%.*]] = select i1 [[TMP29]], float [[TMP27]], float undef +; SKX-NEXT: [[TMP31:%.*]] = extractelement <16 x float> [[TMP2]], i32 10 +; SKX-NEXT: [[TMP32:%.*]] = fcmp fast ogt float [[TMP30]], [[TMP31]] +; SKX-NEXT: [[TMP33:%.*]] = select i1 [[TMP32]], float [[TMP30]], float undef +; SKX-NEXT: [[TMP34:%.*]] = extractelement <16 x float> [[TMP2]], i32 11 +; SKX-NEXT: [[TMP35:%.*]] = fcmp fast ogt float [[TMP33]], [[TMP34]] +; SKX-NEXT: [[TMP36:%.*]] = select i1 [[TMP35]], float [[TMP33]], float undef +; SKX-NEXT: [[TMP37:%.*]] = extractelement <16 x float> [[TMP2]], i32 12 +; SKX-NEXT: [[TMP38:%.*]] = fcmp fast ogt float [[TMP36]], [[TMP37]] +; SKX-NEXT: [[TMP39:%.*]] = select i1 [[TMP38]], float [[TMP36]], float undef +; SKX-NEXT: [[TMP40:%.*]] = extractelement <16 x float> [[TMP2]], i32 13 +; SKX-NEXT: [[TMP41:%.*]] = fcmp fast ogt float [[TMP39]], [[TMP40]] +; SKX-NEXT: [[TMP42:%.*]] = select i1 [[TMP41]], float [[TMP39]], float undef +; SKX-NEXT: [[TMP43:%.*]] = extractelement <16 x float> [[TMP2]], i32 14 +; SKX-NEXT: [[TMP44:%.*]] = fcmp fast ogt float [[TMP42]], [[TMP43]] +; SKX-NEXT: [[TMP45:%.*]] = select i1 [[TMP44]], float [[TMP42]], float undef +; SKX-NEXT: [[TMP46:%.*]] = extractelement <16 x float> [[TMP2]], i32 15 +; SKX-NEXT: [[TMP47:%.*]] = fcmp fast ogt float [[TMP45]], [[TMP46]] +; SKX-NEXT: [[RDX_SHUF:%.*]] = shufflevector <16 x float> [[TMP2]], <16 x float> undef, <16 x i32> +; SKX-NEXT: [[RDX_MINMAX_CMP:%.*]] = fcmp fast ogt <16 x float> [[TMP2]], [[RDX_SHUF]] +; SKX-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select <16 x i1> [[RDX_MINMAX_CMP]], <16 x float> [[TMP2]], <16 x float> [[RDX_SHUF]] +; SKX-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <16 x float> [[RDX_MINMAX_SELECT]], <16 x float> undef, <16 x i32> +; SKX-NEXT: [[RDX_MINMAX_CMP2:%.*]] = fcmp fast ogt <16 x float> [[RDX_MINMAX_SELECT]], [[RDX_SHUF1]] +; SKX-NEXT: [[RDX_MINMAX_SELECT3:%.*]] = select <16 x i1> [[RDX_MINMAX_CMP2]], <16 x float> [[RDX_MINMAX_SELECT]], <16 x float> [[RDX_SHUF1]] +; SKX-NEXT: [[RDX_SHUF4:%.*]] = shufflevector <16 x float> [[RDX_MINMAX_SELECT3]], <16 x float> undef, <16 x i32> +; SKX-NEXT: [[RDX_MINMAX_CMP5:%.*]] = fcmp fast ogt <16 x float> [[RDX_MINMAX_SELECT3]], [[RDX_SHUF4]] +; SKX-NEXT: [[RDX_MINMAX_SELECT6:%.*]] = select <16 x i1> [[RDX_MINMAX_CMP5]], <16 x float> [[RDX_MINMAX_SELECT3]], <16 x float> [[RDX_SHUF4]] +; SKX-NEXT: [[RDX_SHUF7:%.*]] = shufflevector <16 x float> [[RDX_MINMAX_SELECT6]], <16 x float> undef, <16 x i32> +; SKX-NEXT: [[RDX_MINMAX_CMP8:%.*]] = fcmp fast ogt <16 x float> [[RDX_MINMAX_SELECT6]], [[RDX_SHUF7]] +; SKX-NEXT: [[RDX_MINMAX_SELECT9:%.*]] = select <16 x i1> [[RDX_MINMAX_CMP8]], <16 x float> [[RDX_MINMAX_SELECT6]], <16 x float> [[RDX_SHUF7]] +; SKX-NEXT: [[TMP48:%.*]] = extractelement <16 x float> [[RDX_MINMAX_SELECT9]], i32 0 +; SKX-NEXT: [[TMP49:%.*]] = select i1 [[TMP47]], float [[TMP45]], float undef +; SKX-NEXT: ret float [[TMP48]] ; %2 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 0), align 16 %3 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 1), align 4 @@ -771,63 +1556,345 @@ define float @maxf32(float) { ; ; AVX-LABEL: @maxf32( ; AVX-NEXT: [[TMP2:%.*]] = load <32 x float>, <32 x float>* bitcast ([32 x float]* @arr1 to <32 x float>*), align 16 -; AVX: [[RDX_SHUF:%.*]] = shufflevector <32 x float> [[TMP2]], <32 x float> undef, <32 x i32> -; AVX-NEXT: [[TMP96:%.*]] = fcmp fast ogt <32 x float> [[TMP2]], [[RDX_SHUF]] -; AVX-NEXT: [[BIN_RDX:%.*]] = select <32 x i1> [[TMP96]], <32 x float> [[TMP2]], <32 x float> [[RDX_SHUF]] -; AVX-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <32 x float> [[BIN_RDX]], <32 x float> undef, <32 x i32> -; AVX-NEXT: [[TMP97:%.*]] = fcmp fast ogt <32 x float> [[BIN_RDX]], [[RDX_SHUF1]] -; AVX-NEXT: [[BIN_RDX2:%.*]] = select <32 x i1> [[TMP97]], <32 x float> [[BIN_RDX]], <32 x float> [[RDX_SHUF1]] -; AVX-NEXT: [[RDX_SHUF3:%.*]] = shufflevector <32 x float> [[BIN_RDX2]], <32 x float> undef, <32 x i32> -; AVX-NEXT: [[TMP98:%.*]] = fcmp fast ogt <32 x float> [[BIN_RDX2]], [[RDX_SHUF3]] -; AVX-NEXT: [[BIN_RDX4:%.*]] = select <32 x i1> [[TMP98]], <32 x float> [[BIN_RDX2]], <32 x float> [[RDX_SHUF3]] -; AVX-NEXT: [[RDX_SHUF5:%.*]] = shufflevector <32 x float> [[BIN_RDX4]], <32 x float> undef, <32 x i32> -; AVX-NEXT: [[TMP99:%.*]] = fcmp fast ogt <32 x float> [[BIN_RDX4]], [[RDX_SHUF5]] -; AVX-NEXT: [[BIN_RDX6:%.*]] = select <32 x i1> [[TMP99]], <32 x float> [[BIN_RDX4]], <32 x float> [[RDX_SHUF5]] -; AVX-NEXT: [[RDX_SHUF7:%.*]] = shufflevector <32 x float> [[BIN_RDX6]], <32 x float> undef, <32 x i32> -; AVX-NEXT: [[TMP100:%.*]] = fcmp fast ogt <32 x float> [[BIN_RDX6]], [[RDX_SHUF7]] -; AVX-NEXT: [[BIN_RDX8:%.*]] = select <32 x i1> [[TMP100]], <32 x float> [[BIN_RDX6]], <32 x float> [[RDX_SHUF7]] -; AVX-NEXT: [[TMP101:%.*]] = extractelement <32 x float> [[BIN_RDX8]], i32 0 -; AVX: ret float [[TMP101]] +; AVX-NEXT: [[TMP3:%.*]] = extractelement <32 x float> [[TMP2]], i32 0 +; AVX-NEXT: [[TMP4:%.*]] = extractelement <32 x float> [[TMP2]], i32 1 +; AVX-NEXT: [[TMP5:%.*]] = fcmp fast ogt float [[TMP3]], [[TMP4]] +; AVX-NEXT: [[TMP6:%.*]] = select i1 [[TMP5]], float undef, float undef +; AVX-NEXT: [[TMP7:%.*]] = extractelement <32 x float> [[TMP2]], i32 2 +; AVX-NEXT: [[TMP8:%.*]] = fcmp fast ogt float [[TMP6]], [[TMP7]] +; AVX-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], float [[TMP6]], float undef +; AVX-NEXT: [[TMP10:%.*]] = extractelement <32 x float> [[TMP2]], i32 3 +; AVX-NEXT: [[TMP11:%.*]] = fcmp fast ogt float [[TMP9]], [[TMP10]] +; AVX-NEXT: [[TMP12:%.*]] = select i1 [[TMP11]], float [[TMP9]], float undef +; AVX-NEXT: [[TMP13:%.*]] = extractelement <32 x float> [[TMP2]], i32 4 +; AVX-NEXT: [[TMP14:%.*]] = fcmp fast ogt float [[TMP12]], [[TMP13]] +; AVX-NEXT: [[TMP15:%.*]] = select i1 [[TMP14]], float [[TMP12]], float undef +; AVX-NEXT: [[TMP16:%.*]] = extractelement <32 x float> [[TMP2]], i32 5 +; AVX-NEXT: [[TMP17:%.*]] = fcmp fast ogt float [[TMP15]], [[TMP16]] +; AVX-NEXT: [[TMP18:%.*]] = select i1 [[TMP17]], float [[TMP15]], float undef +; AVX-NEXT: [[TMP19:%.*]] = extractelement <32 x float> [[TMP2]], i32 6 +; AVX-NEXT: [[TMP20:%.*]] = fcmp fast ogt float [[TMP18]], [[TMP19]] +; AVX-NEXT: [[TMP21:%.*]] = select i1 [[TMP20]], float [[TMP18]], float undef +; AVX-NEXT: [[TMP22:%.*]] = extractelement <32 x float> [[TMP2]], i32 7 +; AVX-NEXT: [[TMP23:%.*]] = fcmp fast ogt float [[TMP21]], [[TMP22]] +; AVX-NEXT: [[TMP24:%.*]] = select i1 [[TMP23]], float [[TMP21]], float undef +; AVX-NEXT: [[TMP25:%.*]] = extractelement <32 x float> [[TMP2]], i32 8 +; AVX-NEXT: [[TMP26:%.*]] = fcmp fast ogt float [[TMP24]], [[TMP25]] +; AVX-NEXT: [[TMP27:%.*]] = select i1 [[TMP26]], float [[TMP24]], float undef +; AVX-NEXT: [[TMP28:%.*]] = extractelement <32 x float> [[TMP2]], i32 9 +; AVX-NEXT: [[TMP29:%.*]] = fcmp fast ogt float [[TMP27]], [[TMP28]] +; AVX-NEXT: [[TMP30:%.*]] = select i1 [[TMP29]], float [[TMP27]], float undef +; AVX-NEXT: [[TMP31:%.*]] = extractelement <32 x float> [[TMP2]], i32 10 +; AVX-NEXT: [[TMP32:%.*]] = fcmp fast ogt float [[TMP30]], [[TMP31]] +; AVX-NEXT: [[TMP33:%.*]] = select i1 [[TMP32]], float [[TMP30]], float undef +; AVX-NEXT: [[TMP34:%.*]] = extractelement <32 x float> [[TMP2]], i32 11 +; AVX-NEXT: [[TMP35:%.*]] = fcmp fast ogt float [[TMP33]], [[TMP34]] +; AVX-NEXT: [[TMP36:%.*]] = select i1 [[TMP35]], float [[TMP33]], float undef +; AVX-NEXT: [[TMP37:%.*]] = extractelement <32 x float> [[TMP2]], i32 12 +; AVX-NEXT: [[TMP38:%.*]] = fcmp fast ogt float [[TMP36]], [[TMP37]] +; AVX-NEXT: [[TMP39:%.*]] = select i1 [[TMP38]], float [[TMP36]], float undef +; AVX-NEXT: [[TMP40:%.*]] = extractelement <32 x float> [[TMP2]], i32 13 +; AVX-NEXT: [[TMP41:%.*]] = fcmp fast ogt float [[TMP39]], [[TMP40]] +; AVX-NEXT: [[TMP42:%.*]] = select i1 [[TMP41]], float [[TMP39]], float undef +; AVX-NEXT: [[TMP43:%.*]] = extractelement <32 x float> [[TMP2]], i32 14 +; AVX-NEXT: [[TMP44:%.*]] = fcmp fast ogt float [[TMP42]], [[TMP43]] +; AVX-NEXT: [[TMP45:%.*]] = select i1 [[TMP44]], float [[TMP42]], float undef +; AVX-NEXT: [[TMP46:%.*]] = extractelement <32 x float> [[TMP2]], i32 15 +; AVX-NEXT: [[TMP47:%.*]] = fcmp fast ogt float [[TMP45]], [[TMP46]] +; AVX-NEXT: [[TMP48:%.*]] = select i1 [[TMP47]], float [[TMP45]], float undef +; AVX-NEXT: [[TMP49:%.*]] = extractelement <32 x float> [[TMP2]], i32 16 +; AVX-NEXT: [[TMP50:%.*]] = fcmp fast ogt float [[TMP48]], [[TMP49]] +; AVX-NEXT: [[TMP51:%.*]] = select i1 [[TMP50]], float [[TMP48]], float undef +; AVX-NEXT: [[TMP52:%.*]] = extractelement <32 x float> [[TMP2]], i32 17 +; AVX-NEXT: [[TMP53:%.*]] = fcmp fast ogt float [[TMP51]], [[TMP52]] +; AVX-NEXT: [[TMP54:%.*]] = select i1 [[TMP53]], float [[TMP51]], float undef +; AVX-NEXT: [[TMP55:%.*]] = extractelement <32 x float> [[TMP2]], i32 18 +; AVX-NEXT: [[TMP56:%.*]] = fcmp fast ogt float [[TMP54]], [[TMP55]] +; AVX-NEXT: [[TMP57:%.*]] = select i1 [[TMP56]], float [[TMP54]], float undef +; AVX-NEXT: [[TMP58:%.*]] = extractelement <32 x float> [[TMP2]], i32 19 +; AVX-NEXT: [[TMP59:%.*]] = fcmp fast ogt float [[TMP57]], [[TMP58]] +; AVX-NEXT: [[TMP60:%.*]] = select i1 [[TMP59]], float [[TMP57]], float undef +; AVX-NEXT: [[TMP61:%.*]] = extractelement <32 x float> [[TMP2]], i32 20 +; AVX-NEXT: [[TMP62:%.*]] = fcmp fast ogt float [[TMP60]], [[TMP61]] +; AVX-NEXT: [[TMP63:%.*]] = select i1 [[TMP62]], float [[TMP60]], float undef +; AVX-NEXT: [[TMP64:%.*]] = extractelement <32 x float> [[TMP2]], i32 21 +; AVX-NEXT: [[TMP65:%.*]] = fcmp fast ogt float [[TMP63]], [[TMP64]] +; AVX-NEXT: [[TMP66:%.*]] = select i1 [[TMP65]], float [[TMP63]], float undef +; AVX-NEXT: [[TMP67:%.*]] = extractelement <32 x float> [[TMP2]], i32 22 +; AVX-NEXT: [[TMP68:%.*]] = fcmp fast ogt float [[TMP66]], [[TMP67]] +; AVX-NEXT: [[TMP69:%.*]] = select i1 [[TMP68]], float [[TMP66]], float undef +; AVX-NEXT: [[TMP70:%.*]] = extractelement <32 x float> [[TMP2]], i32 23 +; AVX-NEXT: [[TMP71:%.*]] = fcmp fast ogt float [[TMP69]], [[TMP70]] +; AVX-NEXT: [[TMP72:%.*]] = select i1 [[TMP71]], float [[TMP69]], float undef +; AVX-NEXT: [[TMP73:%.*]] = extractelement <32 x float> [[TMP2]], i32 24 +; AVX-NEXT: [[TMP74:%.*]] = fcmp fast ogt float [[TMP72]], [[TMP73]] +; AVX-NEXT: [[TMP75:%.*]] = select i1 [[TMP74]], float [[TMP72]], float undef +; AVX-NEXT: [[TMP76:%.*]] = extractelement <32 x float> [[TMP2]], i32 25 +; AVX-NEXT: [[TMP77:%.*]] = fcmp fast ogt float [[TMP75]], [[TMP76]] +; AVX-NEXT: [[TMP78:%.*]] = select i1 [[TMP77]], float [[TMP75]], float undef +; AVX-NEXT: [[TMP79:%.*]] = extractelement <32 x float> [[TMP2]], i32 26 +; AVX-NEXT: [[TMP80:%.*]] = fcmp fast ogt float [[TMP78]], [[TMP79]] +; AVX-NEXT: [[TMP81:%.*]] = select i1 [[TMP80]], float [[TMP78]], float undef +; AVX-NEXT: [[TMP82:%.*]] = extractelement <32 x float> [[TMP2]], i32 27 +; AVX-NEXT: [[TMP83:%.*]] = fcmp fast ogt float [[TMP81]], [[TMP82]] +; AVX-NEXT: [[TMP84:%.*]] = select i1 [[TMP83]], float [[TMP81]], float undef +; AVX-NEXT: [[TMP85:%.*]] = extractelement <32 x float> [[TMP2]], i32 28 +; AVX-NEXT: [[TMP86:%.*]] = fcmp fast ogt float [[TMP84]], [[TMP85]] +; AVX-NEXT: [[TMP87:%.*]] = select i1 [[TMP86]], float [[TMP84]], float undef +; AVX-NEXT: [[TMP88:%.*]] = extractelement <32 x float> [[TMP2]], i32 29 +; AVX-NEXT: [[TMP89:%.*]] = fcmp fast ogt float [[TMP87]], [[TMP88]] +; AVX-NEXT: [[TMP90:%.*]] = select i1 [[TMP89]], float [[TMP87]], float undef +; AVX-NEXT: [[TMP91:%.*]] = extractelement <32 x float> [[TMP2]], i32 30 +; AVX-NEXT: [[TMP92:%.*]] = fcmp fast ogt float [[TMP90]], [[TMP91]] +; AVX-NEXT: [[TMP93:%.*]] = select i1 [[TMP92]], float [[TMP90]], float undef +; AVX-NEXT: [[TMP94:%.*]] = extractelement <32 x float> [[TMP2]], i32 31 +; AVX-NEXT: [[TMP95:%.*]] = fcmp fast ogt float [[TMP93]], [[TMP94]] +; AVX-NEXT: [[RDX_SHUF:%.*]] = shufflevector <32 x float> [[TMP2]], <32 x float> undef, <32 x i32> +; AVX-NEXT: [[RDX_MINMAX_CMP:%.*]] = fcmp fast ogt <32 x float> [[TMP2]], [[RDX_SHUF]] +; AVX-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select <32 x i1> [[RDX_MINMAX_CMP]], <32 x float> [[TMP2]], <32 x float> [[RDX_SHUF]] +; AVX-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <32 x float> [[RDX_MINMAX_SELECT]], <32 x float> undef, <32 x i32> +; AVX-NEXT: [[RDX_MINMAX_CMP2:%.*]] = fcmp fast ogt <32 x float> [[RDX_MINMAX_SELECT]], [[RDX_SHUF1]] +; AVX-NEXT: [[RDX_MINMAX_SELECT3:%.*]] = select <32 x i1> [[RDX_MINMAX_CMP2]], <32 x float> [[RDX_MINMAX_SELECT]], <32 x float> [[RDX_SHUF1]] +; AVX-NEXT: [[RDX_SHUF4:%.*]] = shufflevector <32 x float> [[RDX_MINMAX_SELECT3]], <32 x float> undef, <32 x i32> +; AVX-NEXT: [[RDX_MINMAX_CMP5:%.*]] = fcmp fast ogt <32 x float> [[RDX_MINMAX_SELECT3]], [[RDX_SHUF4]] +; AVX-NEXT: [[RDX_MINMAX_SELECT6:%.*]] = select <32 x i1> [[RDX_MINMAX_CMP5]], <32 x float> [[RDX_MINMAX_SELECT3]], <32 x float> [[RDX_SHUF4]] +; AVX-NEXT: [[RDX_SHUF7:%.*]] = shufflevector <32 x float> [[RDX_MINMAX_SELECT6]], <32 x float> undef, <32 x i32> +; AVX-NEXT: [[RDX_MINMAX_CMP8:%.*]] = fcmp fast ogt <32 x float> [[RDX_MINMAX_SELECT6]], [[RDX_SHUF7]] +; AVX-NEXT: [[RDX_MINMAX_SELECT9:%.*]] = select <32 x i1> [[RDX_MINMAX_CMP8]], <32 x float> [[RDX_MINMAX_SELECT6]], <32 x float> [[RDX_SHUF7]] +; AVX-NEXT: [[RDX_SHUF10:%.*]] = shufflevector <32 x float> [[RDX_MINMAX_SELECT9]], <32 x float> undef, <32 x i32> +; AVX-NEXT: [[RDX_MINMAX_CMP11:%.*]] = fcmp fast ogt <32 x float> [[RDX_MINMAX_SELECT9]], [[RDX_SHUF10]] +; AVX-NEXT: [[RDX_MINMAX_SELECT12:%.*]] = select <32 x i1> [[RDX_MINMAX_CMP11]], <32 x float> [[RDX_MINMAX_SELECT9]], <32 x float> [[RDX_SHUF10]] +; AVX-NEXT: [[TMP96:%.*]] = extractelement <32 x float> [[RDX_MINMAX_SELECT12]], i32 0 +; AVX-NEXT: [[TMP97:%.*]] = select i1 [[TMP95]], float [[TMP93]], float undef +; AVX-NEXT: ret float [[TMP96]] ; ; AVX2-LABEL: @maxf32( ; AVX2-NEXT: [[TMP2:%.*]] = load <32 x float>, <32 x float>* bitcast ([32 x float]* @arr1 to <32 x float>*), align 16 -; AVX2: [[RDX_SHUF:%.*]] = shufflevector <32 x float> [[TMP2]], <32 x float> undef, <32 x i32> -; AVX2-NEXT: [[TMP96:%.*]] = fcmp fast ogt <32 x float> [[TMP2]], [[RDX_SHUF]] -; AVX2-NEXT: [[BIN_RDX:%.*]] = select <32 x i1> [[TMP96]], <32 x float> [[TMP2]], <32 x float> [[RDX_SHUF]] -; AVX2-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <32 x float> [[BIN_RDX]], <32 x float> undef, <32 x i32> -; AVX2-NEXT: [[TMP97:%.*]] = fcmp fast ogt <32 x float> [[BIN_RDX]], [[RDX_SHUF1]] -; AVX2-NEXT: [[BIN_RDX2:%.*]] = select <32 x i1> [[TMP97]], <32 x float> [[BIN_RDX]], <32 x float> [[RDX_SHUF1]] -; AVX2-NEXT: [[RDX_SHUF3:%.*]] = shufflevector <32 x float> [[BIN_RDX2]], <32 x float> undef, <32 x i32> -; AVX2-NEXT: [[TMP98:%.*]] = fcmp fast ogt <32 x float> [[BIN_RDX2]], [[RDX_SHUF3]] -; AVX2-NEXT: [[BIN_RDX4:%.*]] = select <32 x i1> [[TMP98]], <32 x float> [[BIN_RDX2]], <32 x float> [[RDX_SHUF3]] -; AVX2-NEXT: [[RDX_SHUF5:%.*]] = shufflevector <32 x float> [[BIN_RDX4]], <32 x float> undef, <32 x i32> -; AVX2-NEXT: [[TMP99:%.*]] = fcmp fast ogt <32 x float> [[BIN_RDX4]], [[RDX_SHUF5]] -; AVX2-NEXT: [[BIN_RDX6:%.*]] = select <32 x i1> [[TMP99]], <32 x float> [[BIN_RDX4]], <32 x float> [[RDX_SHUF5]] -; AVX2-NEXT: [[RDX_SHUF7:%.*]] = shufflevector <32 x float> [[BIN_RDX6]], <32 x float> undef, <32 x i32> -; AVX2-NEXT: [[TMP100:%.*]] = fcmp fast ogt <32 x float> [[BIN_RDX6]], [[RDX_SHUF7]] -; AVX2-NEXT: [[BIN_RDX8:%.*]] = select <32 x i1> [[TMP100]], <32 x float> [[BIN_RDX6]], <32 x float> [[RDX_SHUF7]] -; AVX2-NEXT: [[TMP101:%.*]] = extractelement <32 x float> [[BIN_RDX8]], i32 0 -; AVX2: ret float [[TMP101]] +; AVX2-NEXT: [[TMP3:%.*]] = extractelement <32 x float> [[TMP2]], i32 0 +; AVX2-NEXT: [[TMP4:%.*]] = extractelement <32 x float> [[TMP2]], i32 1 +; AVX2-NEXT: [[TMP5:%.*]] = fcmp fast ogt float [[TMP3]], [[TMP4]] +; AVX2-NEXT: [[TMP6:%.*]] = select i1 [[TMP5]], float undef, float undef +; AVX2-NEXT: [[TMP7:%.*]] = extractelement <32 x float> [[TMP2]], i32 2 +; AVX2-NEXT: [[TMP8:%.*]] = fcmp fast ogt float [[TMP6]], [[TMP7]] +; AVX2-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], float [[TMP6]], float undef +; AVX2-NEXT: [[TMP10:%.*]] = extractelement <32 x float> [[TMP2]], i32 3 +; AVX2-NEXT: [[TMP11:%.*]] = fcmp fast ogt float [[TMP9]], [[TMP10]] +; AVX2-NEXT: [[TMP12:%.*]] = select i1 [[TMP11]], float [[TMP9]], float undef +; AVX2-NEXT: [[TMP13:%.*]] = extractelement <32 x float> [[TMP2]], i32 4 +; AVX2-NEXT: [[TMP14:%.*]] = fcmp fast ogt float [[TMP12]], [[TMP13]] +; AVX2-NEXT: [[TMP15:%.*]] = select i1 [[TMP14]], float [[TMP12]], float undef +; AVX2-NEXT: [[TMP16:%.*]] = extractelement <32 x float> [[TMP2]], i32 5 +; AVX2-NEXT: [[TMP17:%.*]] = fcmp fast ogt float [[TMP15]], [[TMP16]] +; AVX2-NEXT: [[TMP18:%.*]] = select i1 [[TMP17]], float [[TMP15]], float undef +; AVX2-NEXT: [[TMP19:%.*]] = extractelement <32 x float> [[TMP2]], i32 6 +; AVX2-NEXT: [[TMP20:%.*]] = fcmp fast ogt float [[TMP18]], [[TMP19]] +; AVX2-NEXT: [[TMP21:%.*]] = select i1 [[TMP20]], float [[TMP18]], float undef +; AVX2-NEXT: [[TMP22:%.*]] = extractelement <32 x float> [[TMP2]], i32 7 +; AVX2-NEXT: [[TMP23:%.*]] = fcmp fast ogt float [[TMP21]], [[TMP22]] +; AVX2-NEXT: [[TMP24:%.*]] = select i1 [[TMP23]], float [[TMP21]], float undef +; AVX2-NEXT: [[TMP25:%.*]] = extractelement <32 x float> [[TMP2]], i32 8 +; AVX2-NEXT: [[TMP26:%.*]] = fcmp fast ogt float [[TMP24]], [[TMP25]] +; AVX2-NEXT: [[TMP27:%.*]] = select i1 [[TMP26]], float [[TMP24]], float undef +; AVX2-NEXT: [[TMP28:%.*]] = extractelement <32 x float> [[TMP2]], i32 9 +; AVX2-NEXT: [[TMP29:%.*]] = fcmp fast ogt float [[TMP27]], [[TMP28]] +; AVX2-NEXT: [[TMP30:%.*]] = select i1 [[TMP29]], float [[TMP27]], float undef +; AVX2-NEXT: [[TMP31:%.*]] = extractelement <32 x float> [[TMP2]], i32 10 +; AVX2-NEXT: [[TMP32:%.*]] = fcmp fast ogt float [[TMP30]], [[TMP31]] +; AVX2-NEXT: [[TMP33:%.*]] = select i1 [[TMP32]], float [[TMP30]], float undef +; AVX2-NEXT: [[TMP34:%.*]] = extractelement <32 x float> [[TMP2]], i32 11 +; AVX2-NEXT: [[TMP35:%.*]] = fcmp fast ogt float [[TMP33]], [[TMP34]] +; AVX2-NEXT: [[TMP36:%.*]] = select i1 [[TMP35]], float [[TMP33]], float undef +; AVX2-NEXT: [[TMP37:%.*]] = extractelement <32 x float> [[TMP2]], i32 12 +; AVX2-NEXT: [[TMP38:%.*]] = fcmp fast ogt float [[TMP36]], [[TMP37]] +; AVX2-NEXT: [[TMP39:%.*]] = select i1 [[TMP38]], float [[TMP36]], float undef +; AVX2-NEXT: [[TMP40:%.*]] = extractelement <32 x float> [[TMP2]], i32 13 +; AVX2-NEXT: [[TMP41:%.*]] = fcmp fast ogt float [[TMP39]], [[TMP40]] +; AVX2-NEXT: [[TMP42:%.*]] = select i1 [[TMP41]], float [[TMP39]], float undef +; AVX2-NEXT: [[TMP43:%.*]] = extractelement <32 x float> [[TMP2]], i32 14 +; AVX2-NEXT: [[TMP44:%.*]] = fcmp fast ogt float [[TMP42]], [[TMP43]] +; AVX2-NEXT: [[TMP45:%.*]] = select i1 [[TMP44]], float [[TMP42]], float undef +; AVX2-NEXT: [[TMP46:%.*]] = extractelement <32 x float> [[TMP2]], i32 15 +; AVX2-NEXT: [[TMP47:%.*]] = fcmp fast ogt float [[TMP45]], [[TMP46]] +; AVX2-NEXT: [[TMP48:%.*]] = select i1 [[TMP47]], float [[TMP45]], float undef +; AVX2-NEXT: [[TMP49:%.*]] = extractelement <32 x float> [[TMP2]], i32 16 +; AVX2-NEXT: [[TMP50:%.*]] = fcmp fast ogt float [[TMP48]], [[TMP49]] +; AVX2-NEXT: [[TMP51:%.*]] = select i1 [[TMP50]], float [[TMP48]], float undef +; AVX2-NEXT: [[TMP52:%.*]] = extractelement <32 x float> [[TMP2]], i32 17 +; AVX2-NEXT: [[TMP53:%.*]] = fcmp fast ogt float [[TMP51]], [[TMP52]] +; AVX2-NEXT: [[TMP54:%.*]] = select i1 [[TMP53]], float [[TMP51]], float undef +; AVX2-NEXT: [[TMP55:%.*]] = extractelement <32 x float> [[TMP2]], i32 18 +; AVX2-NEXT: [[TMP56:%.*]] = fcmp fast ogt float [[TMP54]], [[TMP55]] +; AVX2-NEXT: [[TMP57:%.*]] = select i1 [[TMP56]], float [[TMP54]], float undef +; AVX2-NEXT: [[TMP58:%.*]] = extractelement <32 x float> [[TMP2]], i32 19 +; AVX2-NEXT: [[TMP59:%.*]] = fcmp fast ogt float [[TMP57]], [[TMP58]] +; AVX2-NEXT: [[TMP60:%.*]] = select i1 [[TMP59]], float [[TMP57]], float undef +; AVX2-NEXT: [[TMP61:%.*]] = extractelement <32 x float> [[TMP2]], i32 20 +; AVX2-NEXT: [[TMP62:%.*]] = fcmp fast ogt float [[TMP60]], [[TMP61]] +; AVX2-NEXT: [[TMP63:%.*]] = select i1 [[TMP62]], float [[TMP60]], float undef +; AVX2-NEXT: [[TMP64:%.*]] = extractelement <32 x float> [[TMP2]], i32 21 +; AVX2-NEXT: [[TMP65:%.*]] = fcmp fast ogt float [[TMP63]], [[TMP64]] +; AVX2-NEXT: [[TMP66:%.*]] = select i1 [[TMP65]], float [[TMP63]], float undef +; AVX2-NEXT: [[TMP67:%.*]] = extractelement <32 x float> [[TMP2]], i32 22 +; AVX2-NEXT: [[TMP68:%.*]] = fcmp fast ogt float [[TMP66]], [[TMP67]] +; AVX2-NEXT: [[TMP69:%.*]] = select i1 [[TMP68]], float [[TMP66]], float undef +; AVX2-NEXT: [[TMP70:%.*]] = extractelement <32 x float> [[TMP2]], i32 23 +; AVX2-NEXT: [[TMP71:%.*]] = fcmp fast ogt float [[TMP69]], [[TMP70]] +; AVX2-NEXT: [[TMP72:%.*]] = select i1 [[TMP71]], float [[TMP69]], float undef +; AVX2-NEXT: [[TMP73:%.*]] = extractelement <32 x float> [[TMP2]], i32 24 +; AVX2-NEXT: [[TMP74:%.*]] = fcmp fast ogt float [[TMP72]], [[TMP73]] +; AVX2-NEXT: [[TMP75:%.*]] = select i1 [[TMP74]], float [[TMP72]], float undef +; AVX2-NEXT: [[TMP76:%.*]] = extractelement <32 x float> [[TMP2]], i32 25 +; AVX2-NEXT: [[TMP77:%.*]] = fcmp fast ogt float [[TMP75]], [[TMP76]] +; AVX2-NEXT: [[TMP78:%.*]] = select i1 [[TMP77]], float [[TMP75]], float undef +; AVX2-NEXT: [[TMP79:%.*]] = extractelement <32 x float> [[TMP2]], i32 26 +; AVX2-NEXT: [[TMP80:%.*]] = fcmp fast ogt float [[TMP78]], [[TMP79]] +; AVX2-NEXT: [[TMP81:%.*]] = select i1 [[TMP80]], float [[TMP78]], float undef +; AVX2-NEXT: [[TMP82:%.*]] = extractelement <32 x float> [[TMP2]], i32 27 +; AVX2-NEXT: [[TMP83:%.*]] = fcmp fast ogt float [[TMP81]], [[TMP82]] +; AVX2-NEXT: [[TMP84:%.*]] = select i1 [[TMP83]], float [[TMP81]], float undef +; AVX2-NEXT: [[TMP85:%.*]] = extractelement <32 x float> [[TMP2]], i32 28 +; AVX2-NEXT: [[TMP86:%.*]] = fcmp fast ogt float [[TMP84]], [[TMP85]] +; AVX2-NEXT: [[TMP87:%.*]] = select i1 [[TMP86]], float [[TMP84]], float undef +; AVX2-NEXT: [[TMP88:%.*]] = extractelement <32 x float> [[TMP2]], i32 29 +; AVX2-NEXT: [[TMP89:%.*]] = fcmp fast ogt float [[TMP87]], [[TMP88]] +; AVX2-NEXT: [[TMP90:%.*]] = select i1 [[TMP89]], float [[TMP87]], float undef +; AVX2-NEXT: [[TMP91:%.*]] = extractelement <32 x float> [[TMP2]], i32 30 +; AVX2-NEXT: [[TMP92:%.*]] = fcmp fast ogt float [[TMP90]], [[TMP91]] +; AVX2-NEXT: [[TMP93:%.*]] = select i1 [[TMP92]], float [[TMP90]], float undef +; AVX2-NEXT: [[TMP94:%.*]] = extractelement <32 x float> [[TMP2]], i32 31 +; AVX2-NEXT: [[TMP95:%.*]] = fcmp fast ogt float [[TMP93]], [[TMP94]] +; AVX2-NEXT: [[RDX_SHUF:%.*]] = shufflevector <32 x float> [[TMP2]], <32 x float> undef, <32 x i32> +; AVX2-NEXT: [[RDX_MINMAX_CMP:%.*]] = fcmp fast ogt <32 x float> [[TMP2]], [[RDX_SHUF]] +; AVX2-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select <32 x i1> [[RDX_MINMAX_CMP]], <32 x float> [[TMP2]], <32 x float> [[RDX_SHUF]] +; AVX2-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <32 x float> [[RDX_MINMAX_SELECT]], <32 x float> undef, <32 x i32> +; AVX2-NEXT: [[RDX_MINMAX_CMP2:%.*]] = fcmp fast ogt <32 x float> [[RDX_MINMAX_SELECT]], [[RDX_SHUF1]] +; AVX2-NEXT: [[RDX_MINMAX_SELECT3:%.*]] = select <32 x i1> [[RDX_MINMAX_CMP2]], <32 x float> [[RDX_MINMAX_SELECT]], <32 x float> [[RDX_SHUF1]] +; AVX2-NEXT: [[RDX_SHUF4:%.*]] = shufflevector <32 x float> [[RDX_MINMAX_SELECT3]], <32 x float> undef, <32 x i32> +; AVX2-NEXT: [[RDX_MINMAX_CMP5:%.*]] = fcmp fast ogt <32 x float> [[RDX_MINMAX_SELECT3]], [[RDX_SHUF4]] +; AVX2-NEXT: [[RDX_MINMAX_SELECT6:%.*]] = select <32 x i1> [[RDX_MINMAX_CMP5]], <32 x float> [[RDX_MINMAX_SELECT3]], <32 x float> [[RDX_SHUF4]] +; AVX2-NEXT: [[RDX_SHUF7:%.*]] = shufflevector <32 x float> [[RDX_MINMAX_SELECT6]], <32 x float> undef, <32 x i32> +; AVX2-NEXT: [[RDX_MINMAX_CMP8:%.*]] = fcmp fast ogt <32 x float> [[RDX_MINMAX_SELECT6]], [[RDX_SHUF7]] +; AVX2-NEXT: [[RDX_MINMAX_SELECT9:%.*]] = select <32 x i1> [[RDX_MINMAX_CMP8]], <32 x float> [[RDX_MINMAX_SELECT6]], <32 x float> [[RDX_SHUF7]] +; AVX2-NEXT: [[RDX_SHUF10:%.*]] = shufflevector <32 x float> [[RDX_MINMAX_SELECT9]], <32 x float> undef, <32 x i32> +; AVX2-NEXT: [[RDX_MINMAX_CMP11:%.*]] = fcmp fast ogt <32 x float> [[RDX_MINMAX_SELECT9]], [[RDX_SHUF10]] +; AVX2-NEXT: [[RDX_MINMAX_SELECT12:%.*]] = select <32 x i1> [[RDX_MINMAX_CMP11]], <32 x float> [[RDX_MINMAX_SELECT9]], <32 x float> [[RDX_SHUF10]] +; AVX2-NEXT: [[TMP96:%.*]] = extractelement <32 x float> [[RDX_MINMAX_SELECT12]], i32 0 +; AVX2-NEXT: [[TMP97:%.*]] = select i1 [[TMP95]], float [[TMP93]], float undef +; AVX2-NEXT: ret float [[TMP96]] ; ; SKX-LABEL: @maxf32( ; SKX-NEXT: [[TMP2:%.*]] = load <32 x float>, <32 x float>* bitcast ([32 x float]* @arr1 to <32 x float>*), align 16 -; SKX: [[RDX_SHUF:%.*]] = shufflevector <32 x float> [[TMP2]], <32 x float> undef, <32 x i32> -; SKX-NEXT: [[TMP96:%.*]] = fcmp fast ogt <32 x float> [[TMP2]], [[RDX_SHUF]] -; SKX-NEXT: [[BIN_RDX:%.*]] = select <32 x i1> [[TMP96]], <32 x float> [[TMP2]], <32 x float> [[RDX_SHUF]] -; SKX-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <32 x float> [[BIN_RDX]], <32 x float> undef, <32 x i32> -; SKX-NEXT: [[TMP97:%.*]] = fcmp fast ogt <32 x float> [[BIN_RDX]], [[RDX_SHUF1]] -; SKX-NEXT: [[BIN_RDX2:%.*]] = select <32 x i1> [[TMP97]], <32 x float> [[BIN_RDX]], <32 x float> [[RDX_SHUF1]] -; SKX-NEXT: [[RDX_SHUF3:%.*]] = shufflevector <32 x float> [[BIN_RDX2]], <32 x float> undef, <32 x i32> -; SKX-NEXT: [[TMP98:%.*]] = fcmp fast ogt <32 x float> [[BIN_RDX2]], [[RDX_SHUF3]] -; SKX-NEXT: [[BIN_RDX4:%.*]] = select <32 x i1> [[TMP98]], <32 x float> [[BIN_RDX2]], <32 x float> [[RDX_SHUF3]] -; SKX-NEXT: [[RDX_SHUF5:%.*]] = shufflevector <32 x float> [[BIN_RDX4]], <32 x float> undef, <32 x i32> -; SKX-NEXT: [[TMP99:%.*]] = fcmp fast ogt <32 x float> [[BIN_RDX4]], [[RDX_SHUF5]] -; SKX-NEXT: [[BIN_RDX6:%.*]] = select <32 x i1> [[TMP99]], <32 x float> [[BIN_RDX4]], <32 x float> [[RDX_SHUF5]] -; SKX-NEXT: [[RDX_SHUF7:%.*]] = shufflevector <32 x float> [[BIN_RDX6]], <32 x float> undef, <32 x i32> -; SKX-NEXT: [[TMP100:%.*]] = fcmp fast ogt <32 x float> [[BIN_RDX6]], [[RDX_SHUF7]] -; SKX-NEXT: [[BIN_RDX8:%.*]] = select <32 x i1> [[TMP100]], <32 x float> [[BIN_RDX6]], <32 x float> [[RDX_SHUF7]] -; SKX-NEXT: [[TMP101:%.*]] = extractelement <32 x float> [[BIN_RDX8]], i32 0 -; SKX: ret float [[TMP101]] +; SKX-NEXT: [[TMP3:%.*]] = extractelement <32 x float> [[TMP2]], i32 0 +; SKX-NEXT: [[TMP4:%.*]] = extractelement <32 x float> [[TMP2]], i32 1 +; SKX-NEXT: [[TMP5:%.*]] = fcmp fast ogt float [[TMP3]], [[TMP4]] +; SKX-NEXT: [[TMP6:%.*]] = select i1 [[TMP5]], float undef, float undef +; SKX-NEXT: [[TMP7:%.*]] = extractelement <32 x float> [[TMP2]], i32 2 +; SKX-NEXT: [[TMP8:%.*]] = fcmp fast ogt float [[TMP6]], [[TMP7]] +; SKX-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], float [[TMP6]], float undef +; SKX-NEXT: [[TMP10:%.*]] = extractelement <32 x float> [[TMP2]], i32 3 +; SKX-NEXT: [[TMP11:%.*]] = fcmp fast ogt float [[TMP9]], [[TMP10]] +; SKX-NEXT: [[TMP12:%.*]] = select i1 [[TMP11]], float [[TMP9]], float undef +; SKX-NEXT: [[TMP13:%.*]] = extractelement <32 x float> [[TMP2]], i32 4 +; SKX-NEXT: [[TMP14:%.*]] = fcmp fast ogt float [[TMP12]], [[TMP13]] +; SKX-NEXT: [[TMP15:%.*]] = select i1 [[TMP14]], float [[TMP12]], float undef +; SKX-NEXT: [[TMP16:%.*]] = extractelement <32 x float> [[TMP2]], i32 5 +; SKX-NEXT: [[TMP17:%.*]] = fcmp fast ogt float [[TMP15]], [[TMP16]] +; SKX-NEXT: [[TMP18:%.*]] = select i1 [[TMP17]], float [[TMP15]], float undef +; SKX-NEXT: [[TMP19:%.*]] = extractelement <32 x float> [[TMP2]], i32 6 +; SKX-NEXT: [[TMP20:%.*]] = fcmp fast ogt float [[TMP18]], [[TMP19]] +; SKX-NEXT: [[TMP21:%.*]] = select i1 [[TMP20]], float [[TMP18]], float undef +; SKX-NEXT: [[TMP22:%.*]] = extractelement <32 x float> [[TMP2]], i32 7 +; SKX-NEXT: [[TMP23:%.*]] = fcmp fast ogt float [[TMP21]], [[TMP22]] +; SKX-NEXT: [[TMP24:%.*]] = select i1 [[TMP23]], float [[TMP21]], float undef +; SKX-NEXT: [[TMP25:%.*]] = extractelement <32 x float> [[TMP2]], i32 8 +; SKX-NEXT: [[TMP26:%.*]] = fcmp fast ogt float [[TMP24]], [[TMP25]] +; SKX-NEXT: [[TMP27:%.*]] = select i1 [[TMP26]], float [[TMP24]], float undef +; SKX-NEXT: [[TMP28:%.*]] = extractelement <32 x float> [[TMP2]], i32 9 +; SKX-NEXT: [[TMP29:%.*]] = fcmp fast ogt float [[TMP27]], [[TMP28]] +; SKX-NEXT: [[TMP30:%.*]] = select i1 [[TMP29]], float [[TMP27]], float undef +; SKX-NEXT: [[TMP31:%.*]] = extractelement <32 x float> [[TMP2]], i32 10 +; SKX-NEXT: [[TMP32:%.*]] = fcmp fast ogt float [[TMP30]], [[TMP31]] +; SKX-NEXT: [[TMP33:%.*]] = select i1 [[TMP32]], float [[TMP30]], float undef +; SKX-NEXT: [[TMP34:%.*]] = extractelement <32 x float> [[TMP2]], i32 11 +; SKX-NEXT: [[TMP35:%.*]] = fcmp fast ogt float [[TMP33]], [[TMP34]] +; SKX-NEXT: [[TMP36:%.*]] = select i1 [[TMP35]], float [[TMP33]], float undef +; SKX-NEXT: [[TMP37:%.*]] = extractelement <32 x float> [[TMP2]], i32 12 +; SKX-NEXT: [[TMP38:%.*]] = fcmp fast ogt float [[TMP36]], [[TMP37]] +; SKX-NEXT: [[TMP39:%.*]] = select i1 [[TMP38]], float [[TMP36]], float undef +; SKX-NEXT: [[TMP40:%.*]] = extractelement <32 x float> [[TMP2]], i32 13 +; SKX-NEXT: [[TMP41:%.*]] = fcmp fast ogt float [[TMP39]], [[TMP40]] +; SKX-NEXT: [[TMP42:%.*]] = select i1 [[TMP41]], float [[TMP39]], float undef +; SKX-NEXT: [[TMP43:%.*]] = extractelement <32 x float> [[TMP2]], i32 14 +; SKX-NEXT: [[TMP44:%.*]] = fcmp fast ogt float [[TMP42]], [[TMP43]] +; SKX-NEXT: [[TMP45:%.*]] = select i1 [[TMP44]], float [[TMP42]], float undef +; SKX-NEXT: [[TMP46:%.*]] = extractelement <32 x float> [[TMP2]], i32 15 +; SKX-NEXT: [[TMP47:%.*]] = fcmp fast ogt float [[TMP45]], [[TMP46]] +; SKX-NEXT: [[TMP48:%.*]] = select i1 [[TMP47]], float [[TMP45]], float undef +; SKX-NEXT: [[TMP49:%.*]] = extractelement <32 x float> [[TMP2]], i32 16 +; SKX-NEXT: [[TMP50:%.*]] = fcmp fast ogt float [[TMP48]], [[TMP49]] +; SKX-NEXT: [[TMP51:%.*]] = select i1 [[TMP50]], float [[TMP48]], float undef +; SKX-NEXT: [[TMP52:%.*]] = extractelement <32 x float> [[TMP2]], i32 17 +; SKX-NEXT: [[TMP53:%.*]] = fcmp fast ogt float [[TMP51]], [[TMP52]] +; SKX-NEXT: [[TMP54:%.*]] = select i1 [[TMP53]], float [[TMP51]], float undef +; SKX-NEXT: [[TMP55:%.*]] = extractelement <32 x float> [[TMP2]], i32 18 +; SKX-NEXT: [[TMP56:%.*]] = fcmp fast ogt float [[TMP54]], [[TMP55]] +; SKX-NEXT: [[TMP57:%.*]] = select i1 [[TMP56]], float [[TMP54]], float undef +; SKX-NEXT: [[TMP58:%.*]] = extractelement <32 x float> [[TMP2]], i32 19 +; SKX-NEXT: [[TMP59:%.*]] = fcmp fast ogt float [[TMP57]], [[TMP58]] +; SKX-NEXT: [[TMP60:%.*]] = select i1 [[TMP59]], float [[TMP57]], float undef +; SKX-NEXT: [[TMP61:%.*]] = extractelement <32 x float> [[TMP2]], i32 20 +; SKX-NEXT: [[TMP62:%.*]] = fcmp fast ogt float [[TMP60]], [[TMP61]] +; SKX-NEXT: [[TMP63:%.*]] = select i1 [[TMP62]], float [[TMP60]], float undef +; SKX-NEXT: [[TMP64:%.*]] = extractelement <32 x float> [[TMP2]], i32 21 +; SKX-NEXT: [[TMP65:%.*]] = fcmp fast ogt float [[TMP63]], [[TMP64]] +; SKX-NEXT: [[TMP66:%.*]] = select i1 [[TMP65]], float [[TMP63]], float undef +; SKX-NEXT: [[TMP67:%.*]] = extractelement <32 x float> [[TMP2]], i32 22 +; SKX-NEXT: [[TMP68:%.*]] = fcmp fast ogt float [[TMP66]], [[TMP67]] +; SKX-NEXT: [[TMP69:%.*]] = select i1 [[TMP68]], float [[TMP66]], float undef +; SKX-NEXT: [[TMP70:%.*]] = extractelement <32 x float> [[TMP2]], i32 23 +; SKX-NEXT: [[TMP71:%.*]] = fcmp fast ogt float [[TMP69]], [[TMP70]] +; SKX-NEXT: [[TMP72:%.*]] = select i1 [[TMP71]], float [[TMP69]], float undef +; SKX-NEXT: [[TMP73:%.*]] = extractelement <32 x float> [[TMP2]], i32 24 +; SKX-NEXT: [[TMP74:%.*]] = fcmp fast ogt float [[TMP72]], [[TMP73]] +; SKX-NEXT: [[TMP75:%.*]] = select i1 [[TMP74]], float [[TMP72]], float undef +; SKX-NEXT: [[TMP76:%.*]] = extractelement <32 x float> [[TMP2]], i32 25 +; SKX-NEXT: [[TMP77:%.*]] = fcmp fast ogt float [[TMP75]], [[TMP76]] +; SKX-NEXT: [[TMP78:%.*]] = select i1 [[TMP77]], float [[TMP75]], float undef +; SKX-NEXT: [[TMP79:%.*]] = extractelement <32 x float> [[TMP2]], i32 26 +; SKX-NEXT: [[TMP80:%.*]] = fcmp fast ogt float [[TMP78]], [[TMP79]] +; SKX-NEXT: [[TMP81:%.*]] = select i1 [[TMP80]], float [[TMP78]], float undef +; SKX-NEXT: [[TMP82:%.*]] = extractelement <32 x float> [[TMP2]], i32 27 +; SKX-NEXT: [[TMP83:%.*]] = fcmp fast ogt float [[TMP81]], [[TMP82]] +; SKX-NEXT: [[TMP84:%.*]] = select i1 [[TMP83]], float [[TMP81]], float undef +; SKX-NEXT: [[TMP85:%.*]] = extractelement <32 x float> [[TMP2]], i32 28 +; SKX-NEXT: [[TMP86:%.*]] = fcmp fast ogt float [[TMP84]], [[TMP85]] +; SKX-NEXT: [[TMP87:%.*]] = select i1 [[TMP86]], float [[TMP84]], float undef +; SKX-NEXT: [[TMP88:%.*]] = extractelement <32 x float> [[TMP2]], i32 29 +; SKX-NEXT: [[TMP89:%.*]] = fcmp fast ogt float [[TMP87]], [[TMP88]] +; SKX-NEXT: [[TMP90:%.*]] = select i1 [[TMP89]], float [[TMP87]], float undef +; SKX-NEXT: [[TMP91:%.*]] = extractelement <32 x float> [[TMP2]], i32 30 +; SKX-NEXT: [[TMP92:%.*]] = fcmp fast ogt float [[TMP90]], [[TMP91]] +; SKX-NEXT: [[TMP93:%.*]] = select i1 [[TMP92]], float [[TMP90]], float undef +; SKX-NEXT: [[TMP94:%.*]] = extractelement <32 x float> [[TMP2]], i32 31 +; SKX-NEXT: [[TMP95:%.*]] = fcmp fast ogt float [[TMP93]], [[TMP94]] +; SKX-NEXT: [[RDX_SHUF:%.*]] = shufflevector <32 x float> [[TMP2]], <32 x float> undef, <32 x i32> +; SKX-NEXT: [[RDX_MINMAX_CMP:%.*]] = fcmp fast ogt <32 x float> [[TMP2]], [[RDX_SHUF]] +; SKX-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select <32 x i1> [[RDX_MINMAX_CMP]], <32 x float> [[TMP2]], <32 x float> [[RDX_SHUF]] +; SKX-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <32 x float> [[RDX_MINMAX_SELECT]], <32 x float> undef, <32 x i32> +; SKX-NEXT: [[RDX_MINMAX_CMP2:%.*]] = fcmp fast ogt <32 x float> [[RDX_MINMAX_SELECT]], [[RDX_SHUF1]] +; SKX-NEXT: [[RDX_MINMAX_SELECT3:%.*]] = select <32 x i1> [[RDX_MINMAX_CMP2]], <32 x float> [[RDX_MINMAX_SELECT]], <32 x float> [[RDX_SHUF1]] +; SKX-NEXT: [[RDX_SHUF4:%.*]] = shufflevector <32 x float> [[RDX_MINMAX_SELECT3]], <32 x float> undef, <32 x i32> +; SKX-NEXT: [[RDX_MINMAX_CMP5:%.*]] = fcmp fast ogt <32 x float> [[RDX_MINMAX_SELECT3]], [[RDX_SHUF4]] +; SKX-NEXT: [[RDX_MINMAX_SELECT6:%.*]] = select <32 x i1> [[RDX_MINMAX_CMP5]], <32 x float> [[RDX_MINMAX_SELECT3]], <32 x float> [[RDX_SHUF4]] +; SKX-NEXT: [[RDX_SHUF7:%.*]] = shufflevector <32 x float> [[RDX_MINMAX_SELECT6]], <32 x float> undef, <32 x i32> +; SKX-NEXT: [[RDX_MINMAX_CMP8:%.*]] = fcmp fast ogt <32 x float> [[RDX_MINMAX_SELECT6]], [[RDX_SHUF7]] +; SKX-NEXT: [[RDX_MINMAX_SELECT9:%.*]] = select <32 x i1> [[RDX_MINMAX_CMP8]], <32 x float> [[RDX_MINMAX_SELECT6]], <32 x float> [[RDX_SHUF7]] +; SKX-NEXT: [[RDX_SHUF10:%.*]] = shufflevector <32 x float> [[RDX_MINMAX_SELECT9]], <32 x float> undef, <32 x i32> +; SKX-NEXT: [[RDX_MINMAX_CMP11:%.*]] = fcmp fast ogt <32 x float> [[RDX_MINMAX_SELECT9]], [[RDX_SHUF10]] +; SKX-NEXT: [[RDX_MINMAX_SELECT12:%.*]] = select <32 x i1> [[RDX_MINMAX_CMP11]], <32 x float> [[RDX_MINMAX_SELECT9]], <32 x float> [[RDX_SHUF10]] +; SKX-NEXT: [[TMP96:%.*]] = extractelement <32 x float> [[RDX_MINMAX_SELECT12]], i32 0 +; SKX-NEXT: [[TMP97:%.*]] = select i1 [[TMP95]], float [[TMP93]], float undef +; SKX-NEXT: ret float [[TMP96]] ; %2 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 0), align 16 %3 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 1), align 4 @@ -926,3 +1993,368 @@ define float @maxf32(float) { ret float %95 } +define i32 @maxi8_mutiple_uses(i32) { +; CHECK-LABEL: @maxi8_mutiple_uses( +; CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 0), align 16 +; CHECK-NEXT: [[TMP3:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 1), align 4 +; CHECK-NEXT: [[TMP4:%.*]] = icmp sgt i32 [[TMP2]], [[TMP3]] +; CHECK-NEXT: [[TMP5:%.*]] = select i1 [[TMP4]], i32 [[TMP2]], i32 [[TMP3]] +; CHECK-NEXT: [[TMP6:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 2), align 8 +; CHECK-NEXT: [[TMP7:%.*]] = icmp sgt i32 [[TMP5]], [[TMP6]] +; CHECK-NEXT: [[TMP8:%.*]] = select i1 [[TMP7]], i32 [[TMP5]], i32 [[TMP6]] +; CHECK-NEXT: [[TMP9:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 3), align 4 +; CHECK-NEXT: [[TMP10:%.*]] = icmp sgt i32 [[TMP8]], [[TMP9]] +; CHECK-NEXT: [[TMP11:%.*]] = select i1 [[TMP10]], i32 [[TMP8]], i32 [[TMP9]] +; CHECK-NEXT: [[TMP12:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 4), align 16 +; CHECK-NEXT: [[TMP13:%.*]] = icmp sgt i32 [[TMP11]], [[TMP12]] +; CHECK-NEXT: [[TMP14:%.*]] = select i1 [[TMP13]], i32 [[TMP11]], i32 [[TMP12]] +; CHECK-NEXT: [[TMP15:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 5), align 4 +; CHECK-NEXT: [[TMP16:%.*]] = icmp sgt i32 [[TMP14]], [[TMP15]] +; CHECK-NEXT: [[TMP17:%.*]] = select i1 [[TMP16]], i32 [[TMP14]], i32 [[TMP15]] +; CHECK-NEXT: [[TMP18:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 6), align 8 +; CHECK-NEXT: [[TMP19:%.*]] = icmp sgt i32 [[TMP17]], [[TMP18]] +; CHECK-NEXT: [[TMP20:%.*]] = select i1 [[TMP19]], i32 [[TMP17]], i32 [[TMP18]] +; CHECK-NEXT: [[TMP21:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 7), align 4 +; CHECK-NEXT: [[TMP22:%.*]] = icmp sgt i32 [[TMP20]], [[TMP21]] +; CHECK-NEXT: [[TMP23:%.*]] = select i1 [[TMP22]], i32 [[TMP20]], i32 [[TMP21]] +; CHECK-NEXT: [[TMP24:%.*]] = select i1 [[TMP4]], i32 3, i32 4 +; CHECK-NEXT: store i32 [[TMP24]], i32* @var, align 8 +; CHECK-NEXT: ret i32 [[TMP23]] +; +; AVX-LABEL: @maxi8_mutiple_uses( +; AVX-NEXT: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([32 x i32]* @arr to <4 x i32>*), align 16 +; AVX-NEXT: [[TMP3:%.*]] = extractelement <4 x i32> [[TMP2]], i32 0 +; AVX-NEXT: [[TMP4:%.*]] = extractelement <4 x i32> [[TMP2]], i32 1 +; AVX-NEXT: [[TMP5:%.*]] = icmp sgt i32 [[TMP3]], [[TMP4]] +; AVX-NEXT: [[TMP6:%.*]] = select i1 [[TMP5]], i32 undef, i32 undef +; AVX-NEXT: [[TMP7:%.*]] = extractelement <4 x i32> [[TMP2]], i32 2 +; AVX-NEXT: [[TMP8:%.*]] = icmp sgt i32 [[TMP6]], [[TMP7]] +; AVX-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], i32 [[TMP6]], i32 undef +; AVX-NEXT: [[TMP10:%.*]] = extractelement <4 x i32> [[TMP2]], i32 3 +; AVX-NEXT: [[TMP11:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +; AVX-NEXT: [[TMP12:%.*]] = select i1 [[TMP11]], i32 [[TMP9]], i32 undef +; AVX-NEXT: [[TMP13:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 4), align 16 +; AVX-NEXT: [[TMP14:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +; AVX-NEXT: [[TMP15:%.*]] = select i1 [[TMP14]], i32 [[TMP12]], i32 [[TMP13]] +; AVX-NEXT: [[TMP16:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 5), align 4 +; AVX-NEXT: [[TMP17:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]] +; AVX-NEXT: [[TMP18:%.*]] = select i1 [[TMP17]], i32 [[TMP15]], i32 [[TMP16]] +; AVX-NEXT: [[TMP19:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 6), align 8 +; AVX-NEXT: [[TMP20:%.*]] = icmp sgt i32 [[TMP18]], [[TMP19]] +; AVX-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> undef, <4 x i32> +; AVX-NEXT: [[RDX_MINMAX_CMP:%.*]] = icmp sgt <4 x i32> [[TMP2]], [[RDX_SHUF]] +; AVX-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select <4 x i1> [[RDX_MINMAX_CMP]], <4 x i32> [[TMP2]], <4 x i32> [[RDX_SHUF]] +; AVX-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <4 x i32> [[RDX_MINMAX_SELECT]], <4 x i32> undef, <4 x i32> +; AVX-NEXT: [[RDX_MINMAX_CMP2:%.*]] = icmp sgt <4 x i32> [[RDX_MINMAX_SELECT]], [[RDX_SHUF1]] +; AVX-NEXT: [[RDX_MINMAX_SELECT3:%.*]] = select <4 x i1> [[RDX_MINMAX_CMP2]], <4 x i32> [[RDX_MINMAX_SELECT]], <4 x i32> [[RDX_SHUF1]] +; AVX-NEXT: [[TMP21:%.*]] = extractelement <4 x i32> [[RDX_MINMAX_SELECT3]], i32 0 +; AVX-NEXT: [[TMP22:%.*]] = icmp sgt i32 [[TMP21]], [[TMP13]] +; AVX-NEXT: [[TMP23:%.*]] = select i1 [[TMP22]], i32 [[TMP21]], i32 [[TMP13]] +; AVX-NEXT: [[TMP24:%.*]] = icmp sgt i32 [[TMP23]], [[TMP16]] +; AVX-NEXT: [[TMP25:%.*]] = select i1 [[TMP24]], i32 [[TMP23]], i32 [[TMP16]] +; AVX-NEXT: [[TMP26:%.*]] = icmp sgt i32 [[TMP25]], [[TMP19]] +; AVX-NEXT: [[TMP27:%.*]] = select i1 [[TMP26]], i32 [[TMP25]], i32 [[TMP19]] +; AVX-NEXT: [[TMP28:%.*]] = select i1 [[TMP20]], i32 [[TMP18]], i32 [[TMP19]] +; AVX-NEXT: [[TMP29:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 7), align 4 +; AVX-NEXT: [[TMP30:%.*]] = icmp sgt i32 [[TMP27]], [[TMP29]] +; AVX-NEXT: [[TMP31:%.*]] = select i1 [[TMP30]], i32 [[TMP27]], i32 [[TMP29]] +; AVX-NEXT: [[TMP32:%.*]] = select i1 [[TMP5]], i32 3, i32 4 +; AVX-NEXT: store i32 [[TMP32]], i32* @var, align 8 +; AVX-NEXT: ret i32 [[TMP31]] +; +; AVX2-LABEL: @maxi8_mutiple_uses( +; AVX2-NEXT: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([32 x i32]* @arr to <4 x i32>*), align 16 +; AVX2-NEXT: [[TMP3:%.*]] = extractelement <4 x i32> [[TMP2]], i32 0 +; AVX2-NEXT: [[TMP4:%.*]] = extractelement <4 x i32> [[TMP2]], i32 1 +; AVX2-NEXT: [[TMP5:%.*]] = icmp sgt i32 [[TMP3]], [[TMP4]] +; AVX2-NEXT: [[TMP6:%.*]] = select i1 [[TMP5]], i32 undef, i32 undef +; AVX2-NEXT: [[TMP7:%.*]] = extractelement <4 x i32> [[TMP2]], i32 2 +; AVX2-NEXT: [[TMP8:%.*]] = icmp sgt i32 [[TMP6]], [[TMP7]] +; AVX2-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], i32 [[TMP6]], i32 undef +; AVX2-NEXT: [[TMP10:%.*]] = extractelement <4 x i32> [[TMP2]], i32 3 +; AVX2-NEXT: [[TMP11:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +; AVX2-NEXT: [[TMP12:%.*]] = select i1 [[TMP11]], i32 [[TMP9]], i32 undef +; AVX2-NEXT: [[TMP13:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 4), align 16 +; AVX2-NEXT: [[TMP14:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +; AVX2-NEXT: [[TMP15:%.*]] = select i1 [[TMP14]], i32 [[TMP12]], i32 [[TMP13]] +; AVX2-NEXT: [[TMP16:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 5), align 4 +; AVX2-NEXT: [[TMP17:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]] +; AVX2-NEXT: [[TMP18:%.*]] = select i1 [[TMP17]], i32 [[TMP15]], i32 [[TMP16]] +; AVX2-NEXT: [[TMP19:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 6), align 8 +; AVX2-NEXT: [[TMP20:%.*]] = icmp sgt i32 [[TMP18]], [[TMP19]] +; AVX2-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> undef, <4 x i32> +; AVX2-NEXT: [[RDX_MINMAX_CMP:%.*]] = icmp sgt <4 x i32> [[TMP2]], [[RDX_SHUF]] +; AVX2-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select <4 x i1> [[RDX_MINMAX_CMP]], <4 x i32> [[TMP2]], <4 x i32> [[RDX_SHUF]] +; AVX2-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <4 x i32> [[RDX_MINMAX_SELECT]], <4 x i32> undef, <4 x i32> +; AVX2-NEXT: [[RDX_MINMAX_CMP2:%.*]] = icmp sgt <4 x i32> [[RDX_MINMAX_SELECT]], [[RDX_SHUF1]] +; AVX2-NEXT: [[RDX_MINMAX_SELECT3:%.*]] = select <4 x i1> [[RDX_MINMAX_CMP2]], <4 x i32> [[RDX_MINMAX_SELECT]], <4 x i32> [[RDX_SHUF1]] +; AVX2-NEXT: [[TMP21:%.*]] = extractelement <4 x i32> [[RDX_MINMAX_SELECT3]], i32 0 +; AVX2-NEXT: [[TMP22:%.*]] = icmp sgt i32 [[TMP21]], [[TMP13]] +; AVX2-NEXT: [[TMP23:%.*]] = select i1 [[TMP22]], i32 [[TMP21]], i32 [[TMP13]] +; AVX2-NEXT: [[TMP24:%.*]] = icmp sgt i32 [[TMP23]], [[TMP16]] +; AVX2-NEXT: [[TMP25:%.*]] = select i1 [[TMP24]], i32 [[TMP23]], i32 [[TMP16]] +; AVX2-NEXT: [[TMP26:%.*]] = icmp sgt i32 [[TMP25]], [[TMP19]] +; AVX2-NEXT: [[TMP27:%.*]] = select i1 [[TMP26]], i32 [[TMP25]], i32 [[TMP19]] +; AVX2-NEXT: [[TMP28:%.*]] = select i1 [[TMP20]], i32 [[TMP18]], i32 [[TMP19]] +; AVX2-NEXT: [[TMP29:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 7), align 4 +; AVX2-NEXT: [[TMP30:%.*]] = icmp sgt i32 [[TMP27]], [[TMP29]] +; AVX2-NEXT: [[TMP31:%.*]] = select i1 [[TMP30]], i32 [[TMP27]], i32 [[TMP29]] +; AVX2-NEXT: [[TMP32:%.*]] = select i1 [[TMP5]], i32 3, i32 4 +; AVX2-NEXT: store i32 [[TMP32]], i32* @var, align 8 +; AVX2-NEXT: ret i32 [[TMP31]] +; +; SKX-LABEL: @maxi8_mutiple_uses( +; SKX-NEXT: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([32 x i32]* @arr to <4 x i32>*), align 16 +; SKX-NEXT: [[TMP3:%.*]] = extractelement <4 x i32> [[TMP2]], i32 0 +; SKX-NEXT: [[TMP4:%.*]] = extractelement <4 x i32> [[TMP2]], i32 1 +; SKX-NEXT: [[TMP5:%.*]] = icmp sgt i32 [[TMP3]], [[TMP4]] +; SKX-NEXT: [[TMP6:%.*]] = select i1 [[TMP5]], i32 undef, i32 undef +; SKX-NEXT: [[TMP7:%.*]] = extractelement <4 x i32> [[TMP2]], i32 2 +; SKX-NEXT: [[TMP8:%.*]] = icmp sgt i32 [[TMP6]], [[TMP7]] +; SKX-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], i32 [[TMP6]], i32 undef +; SKX-NEXT: [[TMP10:%.*]] = extractelement <4 x i32> [[TMP2]], i32 3 +; SKX-NEXT: [[TMP11:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +; SKX-NEXT: [[TMP12:%.*]] = select i1 [[TMP11]], i32 [[TMP9]], i32 undef +; SKX-NEXT: [[TMP13:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 4), align 16 +; SKX-NEXT: [[TMP14:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +; SKX-NEXT: [[TMP15:%.*]] = select i1 [[TMP14]], i32 [[TMP12]], i32 [[TMP13]] +; SKX-NEXT: [[TMP16:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 5), align 4 +; SKX-NEXT: [[TMP17:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]] +; SKX-NEXT: [[TMP18:%.*]] = select i1 [[TMP17]], i32 [[TMP15]], i32 [[TMP16]] +; SKX-NEXT: [[TMP19:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 6), align 8 +; SKX-NEXT: [[TMP20:%.*]] = icmp sgt i32 [[TMP18]], [[TMP19]] +; SKX-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> undef, <4 x i32> +; SKX-NEXT: [[RDX_MINMAX_CMP:%.*]] = icmp sgt <4 x i32> [[TMP2]], [[RDX_SHUF]] +; SKX-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select <4 x i1> [[RDX_MINMAX_CMP]], <4 x i32> [[TMP2]], <4 x i32> [[RDX_SHUF]] +; SKX-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <4 x i32> [[RDX_MINMAX_SELECT]], <4 x i32> undef, <4 x i32> +; SKX-NEXT: [[RDX_MINMAX_CMP2:%.*]] = icmp sgt <4 x i32> [[RDX_MINMAX_SELECT]], [[RDX_SHUF1]] +; SKX-NEXT: [[RDX_MINMAX_SELECT3:%.*]] = select <4 x i1> [[RDX_MINMAX_CMP2]], <4 x i32> [[RDX_MINMAX_SELECT]], <4 x i32> [[RDX_SHUF1]] +; SKX-NEXT: [[TMP21:%.*]] = extractelement <4 x i32> [[RDX_MINMAX_SELECT3]], i32 0 +; SKX-NEXT: [[TMP22:%.*]] = icmp sgt i32 [[TMP21]], [[TMP13]] +; SKX-NEXT: [[TMP23:%.*]] = select i1 [[TMP22]], i32 [[TMP21]], i32 [[TMP13]] +; SKX-NEXT: [[TMP24:%.*]] = icmp sgt i32 [[TMP23]], [[TMP16]] +; SKX-NEXT: [[TMP25:%.*]] = select i1 [[TMP24]], i32 [[TMP23]], i32 [[TMP16]] +; SKX-NEXT: [[TMP26:%.*]] = icmp sgt i32 [[TMP25]], [[TMP19]] +; SKX-NEXT: [[TMP27:%.*]] = select i1 [[TMP26]], i32 [[TMP25]], i32 [[TMP19]] +; SKX-NEXT: [[TMP28:%.*]] = select i1 [[TMP20]], i32 [[TMP18]], i32 [[TMP19]] +; SKX-NEXT: [[TMP29:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 7), align 4 +; SKX-NEXT: [[TMP30:%.*]] = icmp sgt i32 [[TMP27]], [[TMP29]] +; SKX-NEXT: [[TMP31:%.*]] = select i1 [[TMP30]], i32 [[TMP27]], i32 [[TMP29]] +; SKX-NEXT: [[TMP32:%.*]] = select i1 [[TMP5]], i32 3, i32 4 +; SKX-NEXT: store i32 [[TMP32]], i32* @var, align 8 +; SKX-NEXT: ret i32 [[TMP31]] +; + %2 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 0), align 16 + %3 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 1), align 4 + %4 = icmp sgt i32 %2, %3 + %5 = select i1 %4, i32 %2, i32 %3 + %6 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 2), align 8 + %7 = icmp sgt i32 %5, %6 + %8 = select i1 %7, i32 %5, i32 %6 + %9 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 3), align 4 + %10 = icmp sgt i32 %8, %9 + %11 = select i1 %10, i32 %8, i32 %9 + %12 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 4), align 16 + %13 = icmp sgt i32 %11, %12 + %14 = select i1 %13, i32 %11, i32 %12 + %15 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 5), align 4 + %16 = icmp sgt i32 %14, %15 + %17 = select i1 %16, i32 %14, i32 %15 + %18 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 6), align 8 + %19 = icmp sgt i32 %17, %18 + %20 = select i1 %19, i32 %17, i32 %18 + %21 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 7), align 4 + %22 = icmp sgt i32 %20, %21 + %23 = select i1 %22, i32 %20, i32 %21 + %24 = select i1 %4, i32 3, i32 4 + store i32 %24, i32* @var, align 8 + ret i32 %23 +} + +define i32 @maxi8_wrong_parent(i32) { +; CHECK-LABEL: @maxi8_wrong_parent( +; CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 0), align 16 +; CHECK-NEXT: [[TMP3:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 1), align 4 +; CHECK-NEXT: [[TMP4:%.*]] = icmp sgt i32 [[TMP2]], [[TMP3]] +; CHECK-NEXT: br label [[PP:%.*]] +; CHECK: pp: +; CHECK-NEXT: [[TMP5:%.*]] = select i1 [[TMP4]], i32 [[TMP2]], i32 [[TMP3]] +; CHECK-NEXT: [[TMP6:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 2), align 8 +; CHECK-NEXT: [[TMP7:%.*]] = icmp sgt i32 [[TMP5]], [[TMP6]] +; CHECK-NEXT: [[TMP8:%.*]] = select i1 [[TMP7]], i32 [[TMP5]], i32 [[TMP6]] +; CHECK-NEXT: [[TMP9:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 3), align 4 +; CHECK-NEXT: [[TMP10:%.*]] = icmp sgt i32 [[TMP8]], [[TMP9]] +; CHECK-NEXT: [[TMP11:%.*]] = select i1 [[TMP10]], i32 [[TMP8]], i32 [[TMP9]] +; CHECK-NEXT: [[TMP12:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 4), align 16 +; CHECK-NEXT: [[TMP13:%.*]] = icmp sgt i32 [[TMP11]], [[TMP12]] +; CHECK-NEXT: [[TMP14:%.*]] = select i1 [[TMP13]], i32 [[TMP11]], i32 [[TMP12]] +; CHECK-NEXT: [[TMP15:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 5), align 4 +; CHECK-NEXT: [[TMP16:%.*]] = icmp sgt i32 [[TMP14]], [[TMP15]] +; CHECK-NEXT: [[TMP17:%.*]] = select i1 [[TMP16]], i32 [[TMP14]], i32 [[TMP15]] +; CHECK-NEXT: [[TMP18:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 6), align 8 +; CHECK-NEXT: [[TMP19:%.*]] = icmp sgt i32 [[TMP17]], [[TMP18]] +; CHECK-NEXT: [[TMP20:%.*]] = select i1 [[TMP19]], i32 [[TMP17]], i32 [[TMP18]] +; CHECK-NEXT: [[TMP21:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 7), align 4 +; CHECK-NEXT: [[TMP22:%.*]] = icmp sgt i32 [[TMP20]], [[TMP21]] +; CHECK-NEXT: [[TMP23:%.*]] = select i1 [[TMP22]], i32 [[TMP20]], i32 [[TMP21]] +; CHECK-NEXT: ret i32 [[TMP23]] +; +; AVX-LABEL: @maxi8_wrong_parent( +; AVX-NEXT: [[TMP2:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 0), align 16 +; AVX-NEXT: [[TMP3:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 1), align 4 +; AVX-NEXT: [[TMP4:%.*]] = icmp sgt i32 [[TMP2]], [[TMP3]] +; AVX-NEXT: br label [[PP:%.*]] +; AVX: pp: +; AVX-NEXT: [[TMP5:%.*]] = select i1 [[TMP4]], i32 [[TMP2]], i32 [[TMP3]] +; AVX-NEXT: [[TMP6:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 2) to <4 x i32>*), align 8 +; AVX-NEXT: [[TMP7:%.*]] = extractelement <4 x i32> [[TMP6]], i32 0 +; AVX-NEXT: [[TMP8:%.*]] = icmp sgt i32 [[TMP5]], [[TMP7]] +; AVX-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], i32 [[TMP5]], i32 undef +; AVX-NEXT: [[TMP10:%.*]] = extractelement <4 x i32> [[TMP6]], i32 1 +; AVX-NEXT: [[TMP11:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +; AVX-NEXT: [[TMP12:%.*]] = select i1 [[TMP11]], i32 [[TMP9]], i32 undef +; AVX-NEXT: [[TMP13:%.*]] = extractelement <4 x i32> [[TMP6]], i32 2 +; AVX-NEXT: [[TMP14:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +; AVX-NEXT: [[TMP15:%.*]] = select i1 [[TMP14]], i32 [[TMP12]], i32 undef +; AVX-NEXT: [[TMP16:%.*]] = extractelement <4 x i32> [[TMP6]], i32 3 +; AVX-NEXT: [[TMP17:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]] +; AVX-NEXT: [[TMP18:%.*]] = select i1 [[TMP17]], i32 [[TMP15]], i32 undef +; AVX-NEXT: [[TMP19:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 6), align 8 +; AVX-NEXT: [[TMP20:%.*]] = icmp sgt i32 [[TMP18]], [[TMP19]] +; AVX-NEXT: [[TMP21:%.*]] = select i1 [[TMP20]], i32 [[TMP18]], i32 [[TMP19]] +; AVX-NEXT: [[TMP22:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 7), align 4 +; AVX-NEXT: [[TMP23:%.*]] = icmp sgt i32 [[TMP21]], [[TMP22]] +; AVX-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x i32> [[TMP6]], <4 x i32> undef, <4 x i32> +; AVX-NEXT: [[RDX_MINMAX_CMP:%.*]] = icmp sgt <4 x i32> [[TMP6]], [[RDX_SHUF]] +; AVX-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select <4 x i1> [[RDX_MINMAX_CMP]], <4 x i32> [[TMP6]], <4 x i32> [[RDX_SHUF]] +; AVX-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <4 x i32> [[RDX_MINMAX_SELECT]], <4 x i32> undef, <4 x i32> +; AVX-NEXT: [[RDX_MINMAX_CMP2:%.*]] = icmp sgt <4 x i32> [[RDX_MINMAX_SELECT]], [[RDX_SHUF1]] +; AVX-NEXT: [[RDX_MINMAX_SELECT3:%.*]] = select <4 x i1> [[RDX_MINMAX_CMP2]], <4 x i32> [[RDX_MINMAX_SELECT]], <4 x i32> [[RDX_SHUF1]] +; AVX-NEXT: [[TMP24:%.*]] = extractelement <4 x i32> [[RDX_MINMAX_SELECT3]], i32 0 +; AVX-NEXT: [[TMP25:%.*]] = icmp sgt i32 [[TMP24]], [[TMP19]] +; AVX-NEXT: [[TMP26:%.*]] = select i1 [[TMP25]], i32 [[TMP24]], i32 [[TMP19]] +; AVX-NEXT: [[TMP27:%.*]] = icmp sgt i32 [[TMP26]], [[TMP22]] +; AVX-NEXT: [[TMP28:%.*]] = select i1 [[TMP27]], i32 [[TMP26]], i32 [[TMP22]] +; AVX-NEXT: [[TMP29:%.*]] = icmp sgt i32 [[TMP28]], [[TMP5]] +; AVX-NEXT: [[OP_EXTRA:%.*]] = select i1 [[TMP29]], i32 [[TMP28]], i32 [[TMP5]] +; AVX-NEXT: [[TMP30:%.*]] = select i1 [[TMP23]], i32 [[TMP21]], i32 [[TMP22]] +; AVX-NEXT: ret i32 [[OP_EXTRA]] +; +; AVX2-LABEL: @maxi8_wrong_parent( +; AVX2-NEXT: [[TMP2:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 0), align 16 +; AVX2-NEXT: [[TMP3:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 1), align 4 +; AVX2-NEXT: [[TMP4:%.*]] = icmp sgt i32 [[TMP2]], [[TMP3]] +; AVX2-NEXT: br label [[PP:%.*]] +; AVX2: pp: +; AVX2-NEXT: [[TMP5:%.*]] = select i1 [[TMP4]], i32 [[TMP2]], i32 [[TMP3]] +; AVX2-NEXT: [[TMP6:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 2) to <4 x i32>*), align 8 +; AVX2-NEXT: [[TMP7:%.*]] = extractelement <4 x i32> [[TMP6]], i32 0 +; AVX2-NEXT: [[TMP8:%.*]] = icmp sgt i32 [[TMP5]], [[TMP7]] +; AVX2-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], i32 [[TMP5]], i32 undef +; AVX2-NEXT: [[TMP10:%.*]] = extractelement <4 x i32> [[TMP6]], i32 1 +; AVX2-NEXT: [[TMP11:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +; AVX2-NEXT: [[TMP12:%.*]] = select i1 [[TMP11]], i32 [[TMP9]], i32 undef +; AVX2-NEXT: [[TMP13:%.*]] = extractelement <4 x i32> [[TMP6]], i32 2 +; AVX2-NEXT: [[TMP14:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +; AVX2-NEXT: [[TMP15:%.*]] = select i1 [[TMP14]], i32 [[TMP12]], i32 undef +; AVX2-NEXT: [[TMP16:%.*]] = extractelement <4 x i32> [[TMP6]], i32 3 +; AVX2-NEXT: [[TMP17:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]] +; AVX2-NEXT: [[TMP18:%.*]] = select i1 [[TMP17]], i32 [[TMP15]], i32 undef +; AVX2-NEXT: [[TMP19:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 6), align 8 +; AVX2-NEXT: [[TMP20:%.*]] = icmp sgt i32 [[TMP18]], [[TMP19]] +; AVX2-NEXT: [[TMP21:%.*]] = select i1 [[TMP20]], i32 [[TMP18]], i32 [[TMP19]] +; AVX2-NEXT: [[TMP22:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 7), align 4 +; AVX2-NEXT: [[TMP23:%.*]] = icmp sgt i32 [[TMP21]], [[TMP22]] +; AVX2-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x i32> [[TMP6]], <4 x i32> undef, <4 x i32> +; AVX2-NEXT: [[RDX_MINMAX_CMP:%.*]] = icmp sgt <4 x i32> [[TMP6]], [[RDX_SHUF]] +; AVX2-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select <4 x i1> [[RDX_MINMAX_CMP]], <4 x i32> [[TMP6]], <4 x i32> [[RDX_SHUF]] +; AVX2-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <4 x i32> [[RDX_MINMAX_SELECT]], <4 x i32> undef, <4 x i32> +; AVX2-NEXT: [[RDX_MINMAX_CMP2:%.*]] = icmp sgt <4 x i32> [[RDX_MINMAX_SELECT]], [[RDX_SHUF1]] +; AVX2-NEXT: [[RDX_MINMAX_SELECT3:%.*]] = select <4 x i1> [[RDX_MINMAX_CMP2]], <4 x i32> [[RDX_MINMAX_SELECT]], <4 x i32> [[RDX_SHUF1]] +; AVX2-NEXT: [[TMP24:%.*]] = extractelement <4 x i32> [[RDX_MINMAX_SELECT3]], i32 0 +; AVX2-NEXT: [[TMP25:%.*]] = icmp sgt i32 [[TMP24]], [[TMP19]] +; AVX2-NEXT: [[TMP26:%.*]] = select i1 [[TMP25]], i32 [[TMP24]], i32 [[TMP19]] +; AVX2-NEXT: [[TMP27:%.*]] = icmp sgt i32 [[TMP26]], [[TMP22]] +; AVX2-NEXT: [[TMP28:%.*]] = select i1 [[TMP27]], i32 [[TMP26]], i32 [[TMP22]] +; AVX2-NEXT: [[TMP29:%.*]] = icmp sgt i32 [[TMP28]], [[TMP5]] +; AVX2-NEXT: [[OP_EXTRA:%.*]] = select i1 [[TMP29]], i32 [[TMP28]], i32 [[TMP5]] +; AVX2-NEXT: [[TMP30:%.*]] = select i1 [[TMP23]], i32 [[TMP21]], i32 [[TMP22]] +; AVX2-NEXT: ret i32 [[OP_EXTRA]] +; +; SKX-LABEL: @maxi8_wrong_parent( +; SKX-NEXT: [[TMP2:%.*]] = load <2 x i32>, <2 x i32>* bitcast ([32 x i32]* @arr to <2 x i32>*), align 16 +; SKX-NEXT: [[TMP3:%.*]] = extractelement <2 x i32> [[TMP2]], i32 0 +; SKX-NEXT: [[TMP4:%.*]] = extractelement <2 x i32> [[TMP2]], i32 1 +; SKX-NEXT: [[TMP5:%.*]] = icmp sgt i32 [[TMP3]], [[TMP4]] +; SKX-NEXT: br label [[PP:%.*]] +; SKX: pp: +; SKX-NEXT: [[TMP6:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 2) to <4 x i32>*), align 8 +; SKX-NEXT: [[TMP7:%.*]] = extractelement <4 x i32> [[TMP6]], i32 0 +; SKX-NEXT: [[TMP8:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 6), align 8 +; SKX-NEXT: [[TMP9:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 7), align 4 +; SKX-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x i32> [[TMP6]], <4 x i32> undef, <4 x i32> +; SKX-NEXT: [[RDX_MINMAX_CMP:%.*]] = icmp sgt <4 x i32> [[TMP6]], [[RDX_SHUF]] +; SKX-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select <4 x i1> [[RDX_MINMAX_CMP]], <4 x i32> [[TMP6]], <4 x i32> [[RDX_SHUF]] +; SKX-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <4 x i32> [[RDX_MINMAX_SELECT]], <4 x i32> undef, <4 x i32> +; SKX-NEXT: [[RDX_MINMAX_CMP2:%.*]] = icmp sgt <4 x i32> [[RDX_MINMAX_SELECT]], [[RDX_SHUF1]] +; SKX-NEXT: [[RDX_MINMAX_SELECT3:%.*]] = select <4 x i1> [[RDX_MINMAX_CMP2]], <4 x i32> [[RDX_MINMAX_SELECT]], <4 x i32> [[RDX_SHUF1]] +; SKX-NEXT: [[TMP10:%.*]] = extractelement <4 x i32> [[RDX_MINMAX_SELECT3]], i32 0 +; SKX-NEXT: [[TMP11:%.*]] = icmp sgt i32 [[TMP10]], [[TMP8]] +; SKX-NEXT: [[TMP12:%.*]] = select i1 [[TMP11]], i32 [[TMP10]], i32 [[TMP8]] +; SKX-NEXT: [[TMP13:%.*]] = icmp sgt i32 [[TMP12]], [[TMP9]] +; SKX-NEXT: [[TMP14:%.*]] = insertelement <2 x i1> undef, i1 [[TMP13]], i32 0 +; SKX-NEXT: [[TMP15:%.*]] = insertelement <2 x i1> [[TMP14]], i1 [[TMP5]], i32 1 +; SKX-NEXT: [[TMP16:%.*]] = insertelement <2 x i32> undef, i32 [[TMP12]], i32 0 +; SKX-NEXT: [[TMP17:%.*]] = insertelement <2 x i32> [[TMP16]], i32 [[TMP3]], i32 1 +; SKX-NEXT: [[TMP18:%.*]] = insertelement <2 x i32> undef, i32 [[TMP9]], i32 0 +; SKX-NEXT: [[TMP19:%.*]] = insertelement <2 x i32> [[TMP18]], i32 [[TMP4]], i32 1 +; SKX-NEXT: [[TMP20:%.*]] = select <2 x i1> [[TMP15]], <2 x i32> [[TMP17]], <2 x i32> [[TMP19]] +; SKX-NEXT: [[TMP21:%.*]] = extractelement <2 x i32> [[TMP20]], i32 1 +; SKX-NEXT: [[TMP22:%.*]] = icmp sgt i32 [[TMP21]], [[TMP7]] +; SKX-NEXT: [[TMP23:%.*]] = select i1 [[TMP22]], i32 [[TMP21]], i32 undef +; SKX-NEXT: [[TMP24:%.*]] = extractelement <4 x i32> [[TMP6]], i32 1 +; SKX-NEXT: [[TMP25:%.*]] = icmp sgt i32 [[TMP23]], [[TMP24]] +; SKX-NEXT: [[TMP26:%.*]] = select i1 [[TMP25]], i32 [[TMP23]], i32 undef +; SKX-NEXT: [[TMP27:%.*]] = extractelement <4 x i32> [[TMP6]], i32 2 +; SKX-NEXT: [[TMP28:%.*]] = icmp sgt i32 [[TMP26]], [[TMP27]] +; SKX-NEXT: [[TMP29:%.*]] = select i1 [[TMP28]], i32 [[TMP26]], i32 undef +; SKX-NEXT: [[TMP30:%.*]] = extractelement <4 x i32> [[TMP6]], i32 3 +; SKX-NEXT: [[TMP31:%.*]] = icmp sgt i32 [[TMP29]], [[TMP30]] +; SKX-NEXT: [[TMP32:%.*]] = select i1 [[TMP31]], i32 [[TMP29]], i32 undef +; SKX-NEXT: [[TMP33:%.*]] = icmp sgt i32 [[TMP32]], [[TMP8]] +; SKX-NEXT: [[TMP34:%.*]] = select i1 [[TMP33]], i32 [[TMP32]], i32 [[TMP8]] +; SKX-NEXT: [[TMP35:%.*]] = icmp sgt i32 [[TMP34]], [[TMP9]] +; SKX-NEXT: [[TMP36:%.*]] = extractelement <2 x i32> [[TMP20]], i32 0 +; SKX-NEXT: [[TMP37:%.*]] = icmp sgt i32 [[TMP36]], [[TMP21]] +; SKX-NEXT: [[OP_EXTRA:%.*]] = select i1 [[TMP37]], i32 [[TMP36]], i32 [[TMP21]] +; SKX-NEXT: [[TMP38:%.*]] = select i1 [[TMP35]], i32 [[TMP34]], i32 [[TMP9]] +; SKX-NEXT: ret i32 [[OP_EXTRA]] +; + %2 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 0), align 16 + %3 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 1), align 4 + %4 = icmp sgt i32 %2, %3 + br label %pp + +pp: + %5 = select i1 %4, i32 %2, i32 %3 + %6 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 2), align 8 + %7 = icmp sgt i32 %5, %6 + %8 = select i1 %7, i32 %5, i32 %6 + %9 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 3), align 4 + %10 = icmp sgt i32 %8, %9 + %11 = select i1 %10, i32 %8, i32 %9 + %12 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 4), align 16 + %13 = icmp sgt i32 %11, %12 + %14 = select i1 %13, i32 %11, i32 %12 + %15 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 5), align 4 + %16 = icmp sgt i32 %14, %15 + %17 = select i1 %16, i32 %14, i32 %15 + %18 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 6), align 8 + %19 = icmp sgt i32 %17, %18 + %20 = select i1 %19, i32 %17, i32 %18 + %21 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 7), align 4 + %22 = icmp sgt i32 %20, %21 + %23 = select i1 %22, i32 %20, i32 %21 + ret i32 %23 +} +