From 7359f171d76bbafdf680f447f088d236ff449aab Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Tue, 20 Jun 2017 17:04:46 +0000 Subject: [PATCH] [CostModel][X86] Declare costs variables based on type The alphabetical progression isn't that useful git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@305808 91177308-0d34-0410-b5e6-96231b3b80d8 --- test/Analysis/CostModel/X86/arith.ll | 940 +++++++++++++-------------- 1 file changed, 470 insertions(+), 470 deletions(-) diff --git a/test/Analysis/CostModel/X86/arith.ll b/test/Analysis/CostModel/X86/arith.ll index b7a615f55cd..75ee0f71db6 100644 --- a/test/Analysis/CostModel/X86/arith.ll +++ b/test/Analysis/CostModel/X86/arith.ll @@ -11,506 +11,506 @@ target triple = "x86_64-apple-macosx10.8.0" ; CHECK-LABEL: 'add' define i32 @add(i32 %arg) { - ; SSSE3: cost of 1 {{.*}} %A = add - ; SSE42: cost of 1 {{.*}} %A = add - ; AVX: cost of 1 {{.*}} %A = add - ; AVX2: cost of 1 {{.*}} %A = add - ; AVX512: cost of 1 {{.*}} %A = add - %A = add <2 x i64> undef, undef - ; SSSE3: cost of 2 {{.*}} %B = add - ; SSE42: cost of 2 {{.*}} %B = add - ; AVX: cost of 4 {{.*}} %B = add - ; AVX2: cost of 1 {{.*}} %B = add - ; AVX512: cost of 1 {{.*}} %B = add - %B = add <4 x i64> undef, undef - ; SSSE3: cost of 4 {{.*}} %C = add - ; SSE42: cost of 4 {{.*}} %C = add - ; AVX: cost of 8 {{.*}} %C = add - ; AVX2: cost of 2 {{.*}} %C = add - ; AVX512: cost of 1 {{.*}} %C = add - %C = add <8 x i64> undef, undef - - ; SSSE3: cost of 1 {{.*}} %D = add - ; SSE42: cost of 1 {{.*}} %D = add - ; AVX: cost of 1 {{.*}} %D = add - ; AVX2: cost of 1 {{.*}} %D = add - ; AVX512: cost of 1 {{.*}} %D = add - %D = add <4 x i32> undef, undef - ; SSSE3: cost of 2 {{.*}} %E = add - ; SSE42: cost of 2 {{.*}} %E = add - ; AVX: cost of 4 {{.*}} %E = add - ; AVX2: cost of 1 {{.*}} %E = add - ; AVX512: cost of 1 {{.*}} %E = add - %E = add <8 x i32> undef, undef - ; SSSE3: cost of 4 {{.*}} %F = add - ; SSE42: cost of 4 {{.*}} %F = add - ; AVX: cost of 8 {{.*}} %F = add - ; AVX2: cost of 2 {{.*}} %F = add - ; AVX512: cost of 1 {{.*}} %F = add - %F = add <16 x i32> undef, undef - - ; SSSE3: cost of 1 {{.*}} %G = add - ; SSE42: cost of 1 {{.*}} %G = add - ; AVX: cost of 1 {{.*}} %G = add - ; AVX2: cost of 1 {{.*}} %G = add - ; AVX512: cost of 1 {{.*}} %G = add - %G = add <8 x i16> undef, undef - ; SSSE3: cost of 2 {{.*}} %H = add - ; SSE42: cost of 2 {{.*}} %H = add - ; AVX: cost of 4 {{.*}} %H = add - ; AVX2: cost of 1 {{.*}} %H = add - ; AVX512: cost of 1 {{.*}} %H = add - %H = add <16 x i16> undef, undef - ; SSSE3: cost of 4 {{.*}} %I = add - ; SSE42: cost of 4 {{.*}} %I = add - ; AVX: cost of 8 {{.*}} %I = add - ; AVX2: cost of 2 {{.*}} %I = add - ; AVX512F: cost of 2 {{.*}} %I = add - ; AVX512BW: cost of 1 {{.*}} %I = add - %I = add <32 x i16> undef, undef - - ; SSSE3: cost of 1 {{.*}} %J = add - ; SSE42: cost of 1 {{.*}} %J = add - ; AVX: cost of 1 {{.*}} %J = add - ; AVX2: cost of 1 {{.*}} %J = add - ; AVX512: cost of 1 {{.*}} %J = add - %J = add <16 x i8> undef, undef - ; SSSE3: cost of 2 {{.*}} %K = add - ; SSE42: cost of 2 {{.*}} %K = add - ; AVX: cost of 4 {{.*}} %K = add - ; AVX2: cost of 1 {{.*}} %K = add - ; AVX512: cost of 1 {{.*}} %K = add - %K = add <32 x i8> undef, undef - ; SSSE3: cost of 4 {{.*}} %L = add - ; SSE42: cost of 4 {{.*}} %L = add - ; AVX: cost of 8 {{.*}} %L = add - ; AVX2: cost of 2 {{.*}} %L = add - ; AVX512F: cost of 2 {{.*}} %L = add - ; AVX512BW: cost of 1 {{.*}} %L = add - %L = add <64 x i8> undef, undef + ; SSSE3: cost of 1 {{.*}} %V2I64 = add + ; SSE42: cost of 1 {{.*}} %V2I64 = add + ; AVX: cost of 1 {{.*}} %V2I64 = add + ; AVX2: cost of 1 {{.*}} %V2I64 = add + ; AVX512: cost of 1 {{.*}} %V2I64 = add + %V2I64 = add <2 x i64> undef, undef + ; SSSE3: cost of 2 {{.*}} %V4I64 = add + ; SSE42: cost of 2 {{.*}} %V4I64 = add + ; AVX: cost of 4 {{.*}} %V4I64 = add + ; AVX2: cost of 1 {{.*}} %V4I64 = add + ; AVX512: cost of 1 {{.*}} %V4I64 = add + %V4I64 = add <4 x i64> undef, undef + ; SSSE3: cost of 4 {{.*}} %V8I64 = add + ; SSE42: cost of 4 {{.*}} %V8I64 = add + ; AVX: cost of 8 {{.*}} %V8I64 = add + ; AVX2: cost of 2 {{.*}} %V8I64 = add + ; AVX512: cost of 1 {{.*}} %V8I64 = add + %V8I64 = add <8 x i64> undef, undef + + ; SSSE3: cost of 1 {{.*}} %V4I32 = add + ; SSE42: cost of 1 {{.*}} %V4I32 = add + ; AVX: cost of 1 {{.*}} %V4I32 = add + ; AVX2: cost of 1 {{.*}} %V4I32 = add + ; AVX512: cost of 1 {{.*}} %V4I32 = add + %V4I32 = add <4 x i32> undef, undef + ; SSSE3: cost of 2 {{.*}} %V8I32 = add + ; SSE42: cost of 2 {{.*}} %V8I32 = add + ; AVX: cost of 4 {{.*}} %V8I32 = add + ; AVX2: cost of 1 {{.*}} %V8I32 = add + ; AVX512: cost of 1 {{.*}} %V8I32 = add + %V8I32 = add <8 x i32> undef, undef + ; SSSE3: cost of 4 {{.*}} %V16I32 = add + ; SSE42: cost of 4 {{.*}} %V16I32 = add + ; AVX: cost of 8 {{.*}} %V16I32 = add + ; AVX2: cost of 2 {{.*}} %V16I32 = add + ; AVX512: cost of 1 {{.*}} %V16I32 = add + %V16I32 = add <16 x i32> undef, undef + + ; SSSE3: cost of 1 {{.*}} %V8I16 = add + ; SSE42: cost of 1 {{.*}} %V8I16 = add + ; AVX: cost of 1 {{.*}} %V8I16 = add + ; AVX2: cost of 1 {{.*}} %V8I16 = add + ; AVX512: cost of 1 {{.*}} %V8I16 = add + %V8I16 = add <8 x i16> undef, undef + ; SSSE3: cost of 2 {{.*}} %V16I16 = add + ; SSE42: cost of 2 {{.*}} %V16I16 = add + ; AVX: cost of 4 {{.*}} %V16I16 = add + ; AVX2: cost of 1 {{.*}} %V16I16 = add + ; AVX512: cost of 1 {{.*}} %V16I16 = add + %V16I16 = add <16 x i16> undef, undef + ; SSSE3: cost of 4 {{.*}} %V32I16 = add + ; SSE42: cost of 4 {{.*}} %V32I16 = add + ; AVX: cost of 8 {{.*}} %V32I16 = add + ; AVX2: cost of 2 {{.*}} %V32I16 = add + ; AVX512F: cost of 2 {{.*}} %V32I16 = add + ; AVX512BW: cost of 1 {{.*}} %V32I16 = add + %V32I16 = add <32 x i16> undef, undef + + ; SSSE3: cost of 1 {{.*}} %V16I8 = add + ; SSE42: cost of 1 {{.*}} %V16I8 = add + ; AVX: cost of 1 {{.*}} %V16I8 = add + ; AVX2: cost of 1 {{.*}} %V16I8 = add + ; AVX512: cost of 1 {{.*}} %V16I8 = add + %V16I8 = add <16 x i8> undef, undef + ; SSSE3: cost of 2 {{.*}} %V32I8 = add + ; SSE42: cost of 2 {{.*}} %V32I8 = add + ; AVX: cost of 4 {{.*}} %V32I8 = add + ; AVX2: cost of 1 {{.*}} %V32I8 = add + ; AVX512: cost of 1 {{.*}} %V32I8 = add + %V32I8 = add <32 x i8> undef, undef + ; SSSE3: cost of 4 {{.*}} %V64I8 = add + ; SSE42: cost of 4 {{.*}} %V64I8 = add + ; AVX: cost of 8 {{.*}} %V64I8 = add + ; AVX2: cost of 2 {{.*}} %V64I8 = add + ; AVX512F: cost of 2 {{.*}} %V64I8 = add + ; AVX512BW: cost of 1 {{.*}} %V64I8 = add + %V64I8 = add <64 x i8> undef, undef ret i32 undef } ; CHECK-LABEL: 'sub' define i32 @sub(i32 %arg) { - ; SSSE3: cost of 1 {{.*}} %A = sub - ; SSE42: cost of 1 {{.*}} %A = sub - ; AVX: cost of 1 {{.*}} %A = sub - ; AVX2: cost of 1 {{.*}} %A = sub - ; AVX512: cost of 1 {{.*}} %A = sub - %A = sub <2 x i64> undef, undef - ; SSSE3: cost of 2 {{.*}} %B = sub - ; SSE42: cost of 2 {{.*}} %B = sub - ; AVX: cost of 4 {{.*}} %B = sub - ; AVX2: cost of 1 {{.*}} %B = sub - ; AVX512: cost of 1 {{.*}} %B = sub - %B = sub <4 x i64> undef, undef - ; SSSE3: cost of 4 {{.*}} %C = sub - ; SSE42: cost of 4 {{.*}} %C = sub - ; AVX: cost of 8 {{.*}} %C = sub - ; AVX2: cost of 2 {{.*}} %C = sub - ; AVX512: cost of 1 {{.*}} %C = sub - %C = sub <8 x i64> undef, undef - - ; SSSE3: cost of 1 {{.*}} %D = sub - ; SSE42: cost of 1 {{.*}} %D = sub - ; AVX: cost of 1 {{.*}} %D = sub - ; AVX2: cost of 1 {{.*}} %D = sub - ; AVX512: cost of 1 {{.*}} %D = sub - %D = sub <4 x i32> undef, undef - ; SSSE3: cost of 2 {{.*}} %E = sub - ; SSE42: cost of 2 {{.*}} %E = sub - ; AVX: cost of 4 {{.*}} %E = sub - ; AVX2: cost of 1 {{.*}} %E = sub - ; AVX512: cost of 1 {{.*}} %E = sub - %E = sub <8 x i32> undef, undef - ; SSSE3: cost of 4 {{.*}} %F = sub - ; SSE42: cost of 4 {{.*}} %F = sub - ; AVX: cost of 8 {{.*}} %F = sub - ; AVX2: cost of 2 {{.*}} %F = sub - ; AVX512: cost of 1 {{.*}} %F = sub - %F = sub <16 x i32> undef, undef - - ; SSSE3: cost of 1 {{.*}} %G = sub - ; SSE42: cost of 1 {{.*}} %G = sub - ; AVX: cost of 1 {{.*}} %G = sub - ; AVX2: cost of 1 {{.*}} %G = sub - ; AVX512: cost of 1 {{.*}} %G = sub - %G = sub <8 x i16> undef, undef - ; SSSE3: cost of 2 {{.*}} %H = sub - ; SSE42: cost of 2 {{.*}} %H = sub - ; AVX: cost of 4 {{.*}} %H = sub - ; AVX2: cost of 1 {{.*}} %H = sub - ; AVX512: cost of 1 {{.*}} %H = sub - %H = sub <16 x i16> undef, undef - ; SSSE3: cost of 4 {{.*}} %I = sub - ; SSE42: cost of 4 {{.*}} %I = sub - ; AVX: cost of 8 {{.*}} %I = sub - ; AVX2: cost of 2 {{.*}} %I = sub - ; AVX512F: cost of 2 {{.*}} %I = sub - ; AVX512BW: cost of 1 {{.*}} %I = sub - %I = sub <32 x i16> undef, undef - - ; SSSE3: cost of 1 {{.*}} %J = sub - ; SSE42: cost of 1 {{.*}} %J = sub - ; AVX: cost of 1 {{.*}} %J = sub - ; AVX2: cost of 1 {{.*}} %J = sub - ; AVX512: cost of 1 {{.*}} %J = sub - %J = sub <16 x i8> undef, undef - ; SSSE3: cost of 2 {{.*}} %K = sub - ; SSE42: cost of 2 {{.*}} %K = sub - ; AVX: cost of 4 {{.*}} %K = sub - ; AVX2: cost of 1 {{.*}} %K = sub - ; AVX512: cost of 1 {{.*}} %K = sub - %K = sub <32 x i8> undef, undef - ; SSSE3: cost of 4 {{.*}} %L = sub - ; SSE42: cost of 4 {{.*}} %L = sub - ; AVX: cost of 8 {{.*}} %L = sub - ; AVX2: cost of 2 {{.*}} %L = sub - ; AVX512F: cost of 2 {{.*}} %L = sub - ; AVX512BW: cost of 1 {{.*}} %L = sub - %L = sub <64 x i8> undef, undef + ; SSSE3: cost of 1 {{.*}} %V2I64 = sub + ; SSE42: cost of 1 {{.*}} %V2I64 = sub + ; AVX: cost of 1 {{.*}} %V2I64 = sub + ; AVX2: cost of 1 {{.*}} %V2I64 = sub + ; AVX512: cost of 1 {{.*}} %V2I64 = sub + %V2I64 = sub <2 x i64> undef, undef + ; SSSE3: cost of 2 {{.*}} %V4I64 = sub + ; SSE42: cost of 2 {{.*}} %V4I64 = sub + ; AVX: cost of 4 {{.*}} %V4I64 = sub + ; AVX2: cost of 1 {{.*}} %V4I64 = sub + ; AVX512: cost of 1 {{.*}} %V4I64 = sub + %V4I64 = sub <4 x i64> undef, undef + ; SSSE3: cost of 4 {{.*}} %V8I64 = sub + ; SSE42: cost of 4 {{.*}} %V8I64 = sub + ; AVX: cost of 8 {{.*}} %V8I64 = sub + ; AVX2: cost of 2 {{.*}} %V8I64 = sub + ; AVX512: cost of 1 {{.*}} %V8I64 = sub + %V8I64 = sub <8 x i64> undef, undef + + ; SSSE3: cost of 1 {{.*}} %V4I32 = sub + ; SSE42: cost of 1 {{.*}} %V4I32 = sub + ; AVX: cost of 1 {{.*}} %V4I32 = sub + ; AVX2: cost of 1 {{.*}} %V4I32 = sub + ; AVX512: cost of 1 {{.*}} %V4I32 = sub + %V4I32 = sub <4 x i32> undef, undef + ; SSSE3: cost of 2 {{.*}} %V8I32 = sub + ; SSE42: cost of 2 {{.*}} %V8I32 = sub + ; AVX: cost of 4 {{.*}} %V8I32 = sub + ; AVX2: cost of 1 {{.*}} %V8I32 = sub + ; AVX512: cost of 1 {{.*}} %V8I32 = sub + %V8I32 = sub <8 x i32> undef, undef + ; SSSE3: cost of 4 {{.*}} %V16I32 = sub + ; SSE42: cost of 4 {{.*}} %V16I32 = sub + ; AVX: cost of 8 {{.*}} %V16I32 = sub + ; AVX2: cost of 2 {{.*}} %V16I32 = sub + ; AVX512: cost of 1 {{.*}} %V16I32 = sub + %V16I32 = sub <16 x i32> undef, undef + + ; SSSE3: cost of 1 {{.*}} %V8I16 = sub + ; SSE42: cost of 1 {{.*}} %V8I16 = sub + ; AVX: cost of 1 {{.*}} %V8I16 = sub + ; AVX2: cost of 1 {{.*}} %V8I16 = sub + ; AVX512: cost of 1 {{.*}} %V8I16 = sub + %V8I16 = sub <8 x i16> undef, undef + ; SSSE3: cost of 2 {{.*}} %V16I16 = sub + ; SSE42: cost of 2 {{.*}} %V16I16 = sub + ; AVX: cost of 4 {{.*}} %V16I16 = sub + ; AVX2: cost of 1 {{.*}} %V16I16 = sub + ; AVX512: cost of 1 {{.*}} %V16I16 = sub + %V16I16 = sub <16 x i16> undef, undef + ; SSSE3: cost of 4 {{.*}} %V32I16 = sub + ; SSE42: cost of 4 {{.*}} %V32I16 = sub + ; AVX: cost of 8 {{.*}} %V32I16 = sub + ; AVX2: cost of 2 {{.*}} %V32I16 = sub + ; AVX512F: cost of 2 {{.*}} %V32I16 = sub + ; AVX512BW: cost of 1 {{.*}} %V32I16 = sub + %V32I16 = sub <32 x i16> undef, undef + + ; SSSE3: cost of 1 {{.*}} %V16I8 = sub + ; SSE42: cost of 1 {{.*}} %V16I8 = sub + ; AVX: cost of 1 {{.*}} %V16I8 = sub + ; AVX2: cost of 1 {{.*}} %V16I8 = sub + ; AVX512: cost of 1 {{.*}} %V16I8 = sub + %V16I8 = sub <16 x i8> undef, undef + ; SSSE3: cost of 2 {{.*}} %V32I8 = sub + ; SSE42: cost of 2 {{.*}} %V32I8 = sub + ; AVX: cost of 4 {{.*}} %V32I8 = sub + ; AVX2: cost of 1 {{.*}} %V32I8 = sub + ; AVX512: cost of 1 {{.*}} %V32I8 = sub + %V32I8 = sub <32 x i8> undef, undef + ; SSSE3: cost of 4 {{.*}} %V64I8 = sub + ; SSE42: cost of 4 {{.*}} %V64I8 = sub + ; AVX: cost of 8 {{.*}} %V64I8 = sub + ; AVX2: cost of 2 {{.*}} %V64I8 = sub + ; AVX512F: cost of 2 {{.*}} %V64I8 = sub + ; AVX512BW: cost of 1 {{.*}} %V64I8 = sub + %V64I8 = sub <64 x i8> undef, undef ret i32 undef } ; CHECK-LABEL: 'or' define i32 @or(i32 %arg) { - ; SSSE3: cost of 1 {{.*}} %A = or - ; SSE42: cost of 1 {{.*}} %A = or - ; AVX: cost of 1 {{.*}} %A = or - ; AVX2: cost of 1 {{.*}} %A = or - ; AVX512: cost of 1 {{.*}} %A = or - %A = or <2 x i64> undef, undef - ; SSSE3: cost of 2 {{.*}} %B = or - ; SSE42: cost of 2 {{.*}} %B = or - ; AVX: cost of 1 {{.*}} %B = or - ; AVX2: cost of 1 {{.*}} %B = or - ; AVX512: cost of 1 {{.*}} %B = or - %B = or <4 x i64> undef, undef - ; SSSE3: cost of 4 {{.*}} %C = or - ; SSE42: cost of 4 {{.*}} %C = or - ; AVX: cost of 2 {{.*}} %C = or - ; AVX2: cost of 2 {{.*}} %C = or - ; AVX512: cost of 1 {{.*}} %C = or - %C = or <8 x i64> undef, undef - - ; SSSE3: cost of 1 {{.*}} %D = or - ; SSE42: cost of 1 {{.*}} %D = or - ; AVX: cost of 1 {{.*}} %D = or - ; AVX2: cost of 1 {{.*}} %D = or - ; AVX512: cost of 1 {{.*}} %D = or - %D = or <4 x i32> undef, undef - ; SSSE3: cost of 2 {{.*}} %E = or - ; SSE42: cost of 2 {{.*}} %E = or - ; AVX: cost of 1 {{.*}} %E = or - ; AVX2: cost of 1 {{.*}} %E = or - ; AVX512: cost of 1 {{.*}} %E = or - %E = or <8 x i32> undef, undef - ; SSSE3: cost of 4 {{.*}} %F = or - ; SSE42: cost of 4 {{.*}} %F = or - ; AVX: cost of 2 {{.*}} %F = or - ; AVX2: cost of 2 {{.*}} %F = or - ; AVX512: cost of 1 {{.*}} %F = or - %F = or <16 x i32> undef, undef - - ; SSSE3: cost of 1 {{.*}} %G = or - ; SSE42: cost of 1 {{.*}} %G = or - ; AVX: cost of 1 {{.*}} %G = or - ; AVX2: cost of 1 {{.*}} %G = or - ; AVX512: cost of 1 {{.*}} %G = or - %G = or <8 x i16> undef, undef - ; SSSE3: cost of 2 {{.*}} %H = or - ; SSE42: cost of 2 {{.*}} %H = or - ; AVX: cost of 1 {{.*}} %H = or - ; AVX2: cost of 1 {{.*}} %H = or - ; AVX512: cost of 1 {{.*}} %H = or - %H = or <16 x i16> undef, undef - ; SSSE3: cost of 4 {{.*}} %I = or - ; SSE42: cost of 4 {{.*}} %I = or - ; AVX: cost of 2 {{.*}} %I = or - ; AVX2: cost of 2 {{.*}} %I = or - ; AVX512F: cost of 2 {{.*}} %I = or - ; AVX512BW: cost of 1 {{.*}} %I = or - %I = or <32 x i16> undef, undef - - ; SSSE3: cost of 1 {{.*}} %J = or - ; SSE42: cost of 1 {{.*}} %J = or - ; AVX: cost of 1 {{.*}} %J = or - ; AVX2: cost of 1 {{.*}} %J = or - ; AVX512: cost of 1 {{.*}} %J = or - %J = or <16 x i8> undef, undef - ; SSSE3: cost of 2 {{.*}} %K = or - ; SSE42: cost of 2 {{.*}} %K = or - ; AVX: cost of 1 {{.*}} %K = or - ; AVX2: cost of 1 {{.*}} %K = or - ; AVX512: cost of 1 {{.*}} %K = or - %K = or <32 x i8> undef, undef - ; SSSE3: cost of 4 {{.*}} %L = or - ; SSE42: cost of 4 {{.*}} %L = or - ; AVX: cost of 2 {{.*}} %L = or - ; AVX2: cost of 2 {{.*}} %L = or - ; AVX512F: cost of 2 {{.*}} %L = or - ; AVX512BW: cost of 1 {{.*}} %L = or - %L = or <64 x i8> undef, undef + ; SSSE3: cost of 1 {{.*}} %V2I64 = or + ; SSE42: cost of 1 {{.*}} %V2I64 = or + ; AVX: cost of 1 {{.*}} %V2I64 = or + ; AVX2: cost of 1 {{.*}} %V2I64 = or + ; AVX512: cost of 1 {{.*}} %V2I64 = or + %V2I64 = or <2 x i64> undef, undef + ; SSSE3: cost of 2 {{.*}} %V4I64 = or + ; SSE42: cost of 2 {{.*}} %V4I64 = or + ; AVX: cost of 1 {{.*}} %V4I64 = or + ; AVX2: cost of 1 {{.*}} %V4I64 = or + ; AVX512: cost of 1 {{.*}} %V4I64 = or + %V4I64 = or <4 x i64> undef, undef + ; SSSE3: cost of 4 {{.*}} %V8I64 = or + ; SSE42: cost of 4 {{.*}} %V8I64 = or + ; AVX: cost of 2 {{.*}} %V8I64 = or + ; AVX2: cost of 2 {{.*}} %V8I64 = or + ; AVX512: cost of 1 {{.*}} %V8I64 = or + %V8I64 = or <8 x i64> undef, undef + + ; SSSE3: cost of 1 {{.*}} %V4I32 = or + ; SSE42: cost of 1 {{.*}} %V4I32 = or + ; AVX: cost of 1 {{.*}} %V4I32 = or + ; AVX2: cost of 1 {{.*}} %V4I32 = or + ; AVX512: cost of 1 {{.*}} %V4I32 = or + %V4I32 = or <4 x i32> undef, undef + ; SSSE3: cost of 2 {{.*}} %V8I32 = or + ; SSE42: cost of 2 {{.*}} %V8I32 = or + ; AVX: cost of 1 {{.*}} %V8I32 = or + ; AVX2: cost of 1 {{.*}} %V8I32 = or + ; AVX512: cost of 1 {{.*}} %V8I32 = or + %V8I32 = or <8 x i32> undef, undef + ; SSSE3: cost of 4 {{.*}} %V16I32 = or + ; SSE42: cost of 4 {{.*}} %V16I32 = or + ; AVX: cost of 2 {{.*}} %V16I32 = or + ; AVX2: cost of 2 {{.*}} %V16I32 = or + ; AVX512: cost of 1 {{.*}} %V16I32 = or + %V16I32 = or <16 x i32> undef, undef + + ; SSSE3: cost of 1 {{.*}} %V8I16 = or + ; SSE42: cost of 1 {{.*}} %V8I16 = or + ; AVX: cost of 1 {{.*}} %V8I16 = or + ; AVX2: cost of 1 {{.*}} %V8I16 = or + ; AVX512: cost of 1 {{.*}} %V8I16 = or + %V8I16 = or <8 x i16> undef, undef + ; SSSE3: cost of 2 {{.*}} %V16I16 = or + ; SSE42: cost of 2 {{.*}} %V16I16 = or + ; AVX: cost of 1 {{.*}} %V16I16 = or + ; AVX2: cost of 1 {{.*}} %V16I16 = or + ; AVX512: cost of 1 {{.*}} %V16I16 = or + %V16I16 = or <16 x i16> undef, undef + ; SSSE3: cost of 4 {{.*}} %V32I16 = or + ; SSE42: cost of 4 {{.*}} %V32I16 = or + ; AVX: cost of 2 {{.*}} %V32I16 = or + ; AVX2: cost of 2 {{.*}} %V32I16 = or + ; AVX512F: cost of 2 {{.*}} %V32I16 = or + ; AVX512BW: cost of 1 {{.*}} %V32I16 = or + %V32I16 = or <32 x i16> undef, undef + + ; SSSE3: cost of 1 {{.*}} %V16I8 = or + ; SSE42: cost of 1 {{.*}} %V16I8 = or + ; AVX: cost of 1 {{.*}} %V16I8 = or + ; AVX2: cost of 1 {{.*}} %V16I8 = or + ; AVX512: cost of 1 {{.*}} %V16I8 = or + %V16I8 = or <16 x i8> undef, undef + ; SSSE3: cost of 2 {{.*}} %V32I8 = or + ; SSE42: cost of 2 {{.*}} %V32I8 = or + ; AVX: cost of 1 {{.*}} %V32I8 = or + ; AVX2: cost of 1 {{.*}} %V32I8 = or + ; AVX512: cost of 1 {{.*}} %V32I8 = or + %V32I8 = or <32 x i8> undef, undef + ; SSSE3: cost of 4 {{.*}} %V64I8 = or + ; SSE42: cost of 4 {{.*}} %V64I8 = or + ; AVX: cost of 2 {{.*}} %V64I8 = or + ; AVX2: cost of 2 {{.*}} %V64I8 = or + ; AVX512F: cost of 2 {{.*}} %V64I8 = or + ; AVX512BW: cost of 1 {{.*}} %V64I8 = or + %V64I8 = or <64 x i8> undef, undef ret i32 undef } ; CHECK-LABEL: 'xor' define i32 @xor(i32 %arg) { - ; SSSE3: cost of 1 {{.*}} %A = xor - ; SSE42: cost of 1 {{.*}} %A = xor - ; AVX: cost of 1 {{.*}} %A = xor - ; AVX2: cost of 1 {{.*}} %A = xor - ; AVX512: cost of 1 {{.*}} %A = xor - %A = xor <2 x i64> undef, undef - ; SSSE3: cost of 2 {{.*}} %B = xor - ; SSE42: cost of 2 {{.*}} %B = xor - ; AVX: cost of 1 {{.*}} %B = xor - ; AVX2: cost of 1 {{.*}} %B = xor - ; AVX512: cost of 1 {{.*}} %B = xor - %B = xor <4 x i64> undef, undef - ; SSSE3: cost of 4 {{.*}} %C = xor - ; SSE42: cost of 4 {{.*}} %C = xor - ; AVX: cost of 2 {{.*}} %C = xor - ; AVX2: cost of 2 {{.*}} %C = xor - ; AVX512: cost of 1 {{.*}} %C = xor - %C = xor <8 x i64> undef, undef - - ; SSSE3: cost of 1 {{.*}} %D = xor - ; SSE42: cost of 1 {{.*}} %D = xor - ; AVX: cost of 1 {{.*}} %D = xor - ; AVX2: cost of 1 {{.*}} %D = xor - ; AVX512: cost of 1 {{.*}} %D = xor - %D = xor <4 x i32> undef, undef - ; SSSE3: cost of 2 {{.*}} %E = xor - ; SSE42: cost of 2 {{.*}} %E = xor - ; AVX: cost of 1 {{.*}} %E = xor - ; AVX2: cost of 1 {{.*}} %E = xor - ; AVX512: cost of 1 {{.*}} %E = xor - %E = xor <8 x i32> undef, undef - ; SSSE3: cost of 4 {{.*}} %F = xor - ; SSE42: cost of 4 {{.*}} %F = xor - ; AVX: cost of 2 {{.*}} %F = xor - ; AVX2: cost of 2 {{.*}} %F = xor - ; AVX512: cost of 1 {{.*}} %F = xor - %F = xor <16 x i32> undef, undef - - ; SSSE3: cost of 1 {{.*}} %G = xor - ; SSE42: cost of 1 {{.*}} %G = xor - ; AVX: cost of 1 {{.*}} %G = xor - ; AVX2: cost of 1 {{.*}} %G = xor - ; AVX512: cost of 1 {{.*}} %G = xor - %G = xor <8 x i16> undef, undef - ; SSSE3: cost of 2 {{.*}} %H = xor - ; SSE42: cost of 2 {{.*}} %H = xor - ; AVX: cost of 1 {{.*}} %H = xor - ; AVX2: cost of 1 {{.*}} %H = xor - ; AVX512: cost of 1 {{.*}} %H = xor - %H = xor <16 x i16> undef, undef - ; SSSE3: cost of 4 {{.*}} %I = xor - ; SSE42: cost of 4 {{.*}} %I = xor - ; AVX: cost of 2 {{.*}} %I = xor - ; AVX2: cost of 2 {{.*}} %I = xor - ; AVX512F: cost of 2 {{.*}} %I = xor - ; AVX512BW: cost of 1 {{.*}} %I = xor - %I = xor <32 x i16> undef, undef - - ; SSSE3: cost of 1 {{.*}} %J = xor - ; SSE42: cost of 1 {{.*}} %J = xor - ; AVX: cost of 1 {{.*}} %J = xor - ; AVX2: cost of 1 {{.*}} %J = xor - ; AVX512: cost of 1 {{.*}} %J = xor - %J = xor <16 x i8> undef, undef - ; SSSE3: cost of 2 {{.*}} %K = xor - ; SSE42: cost of 2 {{.*}} %K = xor - ; AVX: cost of 1 {{.*}} %K = xor - ; AVX2: cost of 1 {{.*}} %K = xor - ; AVX512: cost of 1 {{.*}} %K = xor - %K = xor <32 x i8> undef, undef - ; SSSE3: cost of 4 {{.*}} %L = xor - ; SSE42: cost of 4 {{.*}} %L = xor - ; AVX: cost of 2 {{.*}} %L = xor - ; AVX2: cost of 2 {{.*}} %L = xor - ; AVX512F: cost of 2 {{.*}} %L = xor - ; AVX512BW: cost of 1 {{.*}} %L = xor - %L = xor <64 x i8> undef, undef + ; SSSE3: cost of 1 {{.*}} %V2I64 = xor + ; SSE42: cost of 1 {{.*}} %V2I64 = xor + ; AVX: cost of 1 {{.*}} %V2I64 = xor + ; AVX2: cost of 1 {{.*}} %V2I64 = xor + ; AVX512: cost of 1 {{.*}} %V2I64 = xor + %V2I64 = xor <2 x i64> undef, undef + ; SSSE3: cost of 2 {{.*}} %V4I64 = xor + ; SSE42: cost of 2 {{.*}} %V4I64 = xor + ; AVX: cost of 1 {{.*}} %V4I64 = xor + ; AVX2: cost of 1 {{.*}} %V4I64 = xor + ; AVX512: cost of 1 {{.*}} %V4I64 = xor + %V4I64 = xor <4 x i64> undef, undef + ; SSSE3: cost of 4 {{.*}} %V8I64 = xor + ; SSE42: cost of 4 {{.*}} %V8I64 = xor + ; AVX: cost of 2 {{.*}} %V8I64 = xor + ; AVX2: cost of 2 {{.*}} %V8I64 = xor + ; AVX512: cost of 1 {{.*}} %V8I64 = xor + %V8I64 = xor <8 x i64> undef, undef + + ; SSSE3: cost of 1 {{.*}} %V4I32 = xor + ; SSE42: cost of 1 {{.*}} %V4I32 = xor + ; AVX: cost of 1 {{.*}} %V4I32 = xor + ; AVX2: cost of 1 {{.*}} %V4I32 = xor + ; AVX512: cost of 1 {{.*}} %V4I32 = xor + %V4I32 = xor <4 x i32> undef, undef + ; SSSE3: cost of 2 {{.*}} %V8I32 = xor + ; SSE42: cost of 2 {{.*}} %V8I32 = xor + ; AVX: cost of 1 {{.*}} %V8I32 = xor + ; AVX2: cost of 1 {{.*}} %V8I32 = xor + ; AVX512: cost of 1 {{.*}} %V8I32 = xor + %V8I32 = xor <8 x i32> undef, undef + ; SSSE3: cost of 4 {{.*}} %V16I32 = xor + ; SSE42: cost of 4 {{.*}} %V16I32 = xor + ; AVX: cost of 2 {{.*}} %V16I32 = xor + ; AVX2: cost of 2 {{.*}} %V16I32 = xor + ; AVX512: cost of 1 {{.*}} %V16I32 = xor + %V16I32 = xor <16 x i32> undef, undef + + ; SSSE3: cost of 1 {{.*}} %V8I16 = xor + ; SSE42: cost of 1 {{.*}} %V8I16 = xor + ; AVX: cost of 1 {{.*}} %V8I16 = xor + ; AVX2: cost of 1 {{.*}} %V8I16 = xor + ; AVX512: cost of 1 {{.*}} %V8I16 = xor + %V8I16 = xor <8 x i16> undef, undef + ; SSSE3: cost of 2 {{.*}} %V16I16 = xor + ; SSE42: cost of 2 {{.*}} %V16I16 = xor + ; AVX: cost of 1 {{.*}} %V16I16 = xor + ; AVX2: cost of 1 {{.*}} %V16I16 = xor + ; AVX512: cost of 1 {{.*}} %V16I16 = xor + %V16I16 = xor <16 x i16> undef, undef + ; SSSE3: cost of 4 {{.*}} %V32I16 = xor + ; SSE42: cost of 4 {{.*}} %V32I16 = xor + ; AVX: cost of 2 {{.*}} %V32I16 = xor + ; AVX2: cost of 2 {{.*}} %V32I16 = xor + ; AVX512F: cost of 2 {{.*}} %V32I16 = xor + ; AVX512BW: cost of 1 {{.*}} %V32I16 = xor + %V32I16 = xor <32 x i16> undef, undef + + ; SSSE3: cost of 1 {{.*}} %V16I8 = xor + ; SSE42: cost of 1 {{.*}} %V16I8 = xor + ; AVX: cost of 1 {{.*}} %V16I8 = xor + ; AVX2: cost of 1 {{.*}} %V16I8 = xor + ; AVX512: cost of 1 {{.*}} %V16I8 = xor + %V16I8 = xor <16 x i8> undef, undef + ; SSSE3: cost of 2 {{.*}} %V32I8 = xor + ; SSE42: cost of 2 {{.*}} %V32I8 = xor + ; AVX: cost of 1 {{.*}} %V32I8 = xor + ; AVX2: cost of 1 {{.*}} %V32I8 = xor + ; AVX512: cost of 1 {{.*}} %V32I8 = xor + %V32I8 = xor <32 x i8> undef, undef + ; SSSE3: cost of 4 {{.*}} %V64I8 = xor + ; SSE42: cost of 4 {{.*}} %V64I8 = xor + ; AVX: cost of 2 {{.*}} %V64I8 = xor + ; AVX2: cost of 2 {{.*}} %V64I8 = xor + ; AVX512F: cost of 2 {{.*}} %V64I8 = xor + ; AVX512BW: cost of 1 {{.*}} %V64I8 = xor + %V64I8 = xor <64 x i8> undef, undef ret i32 undef } ; CHECK-LABEL: 'and' define i32 @and(i32 %arg) { - ; SSSE3: cost of 1 {{.*}} %A = and - ; SSE42: cost of 1 {{.*}} %A = and - ; AVX: cost of 1 {{.*}} %A = and - ; AVX2: cost of 1 {{.*}} %A = and - ; AVX512: cost of 1 {{.*}} %A = and - %A = and <2 x i64> undef, undef - ; SSSE3: cost of 2 {{.*}} %B = and - ; SSE42: cost of 2 {{.*}} %B = and - ; AVX: cost of 1 {{.*}} %B = and - ; AVX2: cost of 1 {{.*}} %B = and - ; AVX512: cost of 1 {{.*}} %B = and - %B = and <4 x i64> undef, undef - ; SSSE3: cost of 4 {{.*}} %C = and - ; SSE42: cost of 4 {{.*}} %C = and - ; AVX: cost of 2 {{.*}} %C = and - ; AVX2: cost of 2 {{.*}} %C = and - ; AVX512: cost of 1 {{.*}} %C = and - %C = and <8 x i64> undef, undef - - ; SSSE3: cost of 1 {{.*}} %D = and - ; SSE42: cost of 1 {{.*}} %D = and - ; AVX: cost of 1 {{.*}} %D = and - ; AVX2: cost of 1 {{.*}} %D = and - ; AVX512: cost of 1 {{.*}} %D = and - %D = and <4 x i32> undef, undef - ; SSSE3: cost of 2 {{.*}} %E = and - ; SSE42: cost of 2 {{.*}} %E = and - ; AVX: cost of 1 {{.*}} %E = and - ; AVX2: cost of 1 {{.*}} %E = and - ; AVX512: cost of 1 {{.*}} %E = and - %E = and <8 x i32> undef, undef - ; SSSE3: cost of 4 {{.*}} %F = and - ; SSE42: cost of 4 {{.*}} %F = and - ; AVX: cost of 2 {{.*}} %F = and - ; AVX2: cost of 2 {{.*}} %F = and - ; AVX512: cost of 1 {{.*}} %F = and - %F = and <16 x i32> undef, undef - - ; SSSE3: cost of 1 {{.*}} %G = and - ; SSE42: cost of 1 {{.*}} %G = and - ; AVX: cost of 1 {{.*}} %G = and - ; AVX2: cost of 1 {{.*}} %G = and - ; AVX512: cost of 1 {{.*}} %G = and - %G = and <8 x i16> undef, undef - ; SSSE3: cost of 2 {{.*}} %H = and - ; SSE42: cost of 2 {{.*}} %H = and - ; AVX: cost of 1 {{.*}} %H = and - ; AVX2: cost of 1 {{.*}} %H = and - ; AVX512: cost of 1 {{.*}} %H = and - %H = and <16 x i16> undef, undef - ; SSSE3: cost of 4 {{.*}} %I = and - ; SSE42: cost of 4 {{.*}} %I = and - ; AVX: cost of 2 {{.*}} %I = and - ; AVX2: cost of 2 {{.*}} %I = and - ; AVX512F: cost of 2 {{.*}} %I = and - ; AVX512BW: cost of 1 {{.*}} %I = and - %I = and <32 x i16> undef, undef - - ; SSSE3: cost of 1 {{.*}} %J = and - ; SSE42: cost of 1 {{.*}} %J = and - ; AVX: cost of 1 {{.*}} %J = and - ; AVX2: cost of 1 {{.*}} %J = and - ; AVX512: cost of 1 {{.*}} %J = and - %J = and <16 x i8> undef, undef - ; SSSE3: cost of 2 {{.*}} %K = and - ; SSE42: cost of 2 {{.*}} %K = and - ; AVX: cost of 1 {{.*}} %K = and - ; AVX2: cost of 1 {{.*}} %K = and - ; AVX512: cost of 1 {{.*}} %K = and - %K = and <32 x i8> undef, undef - ; SSSE3: cost of 4 {{.*}} %L = and - ; SSE42: cost of 4 {{.*}} %L = and - ; AVX: cost of 2 {{.*}} %L = and - ; AVX2: cost of 2 {{.*}} %L = and - ; AVX512F: cost of 2 {{.*}} %L = and - ; AVX512BW: cost of 1 {{.*}} %L = and - %L = and <64 x i8> undef, undef + ; SSSE3: cost of 1 {{.*}} %V2I64 = and + ; SSE42: cost of 1 {{.*}} %V2I64 = and + ; AVX: cost of 1 {{.*}} %V2I64 = and + ; AVX2: cost of 1 {{.*}} %V2I64 = and + ; AVX512: cost of 1 {{.*}} %V2I64 = and + %V2I64 = and <2 x i64> undef, undef + ; SSSE3: cost of 2 {{.*}} %V4I64 = and + ; SSE42: cost of 2 {{.*}} %V4I64 = and + ; AVX: cost of 1 {{.*}} %V4I64 = and + ; AVX2: cost of 1 {{.*}} %V4I64 = and + ; AVX512: cost of 1 {{.*}} %V4I64 = and + %V4I64 = and <4 x i64> undef, undef + ; SSSE3: cost of 4 {{.*}} %V8I64 = and + ; SSE42: cost of 4 {{.*}} %V8I64 = and + ; AVX: cost of 2 {{.*}} %V8I64 = and + ; AVX2: cost of 2 {{.*}} %V8I64 = and + ; AVX512: cost of 1 {{.*}} %V8I64 = and + %V8I64 = and <8 x i64> undef, undef + + ; SSSE3: cost of 1 {{.*}} %V4I32 = and + ; SSE42: cost of 1 {{.*}} %V4I32 = and + ; AVX: cost of 1 {{.*}} %V4I32 = and + ; AVX2: cost of 1 {{.*}} %V4I32 = and + ; AVX512: cost of 1 {{.*}} %V4I32 = and + %V4I32 = and <4 x i32> undef, undef + ; SSSE3: cost of 2 {{.*}} %V8I32 = and + ; SSE42: cost of 2 {{.*}} %V8I32 = and + ; AVX: cost of 1 {{.*}} %V8I32 = and + ; AVX2: cost of 1 {{.*}} %V8I32 = and + ; AVX512: cost of 1 {{.*}} %V8I32 = and + %V8I32 = and <8 x i32> undef, undef + ; SSSE3: cost of 4 {{.*}} %V16I32 = and + ; SSE42: cost of 4 {{.*}} %V16I32 = and + ; AVX: cost of 2 {{.*}} %V16I32 = and + ; AVX2: cost of 2 {{.*}} %V16I32 = and + ; AVX512: cost of 1 {{.*}} %V16I32 = and + %V16I32 = and <16 x i32> undef, undef + + ; SSSE3: cost of 1 {{.*}} %V8I16 = and + ; SSE42: cost of 1 {{.*}} %V8I16 = and + ; AVX: cost of 1 {{.*}} %V8I16 = and + ; AVX2: cost of 1 {{.*}} %V8I16 = and + ; AVX512: cost of 1 {{.*}} %V8I16 = and + %V8I16 = and <8 x i16> undef, undef + ; SSSE3: cost of 2 {{.*}} %V16I16 = and + ; SSE42: cost of 2 {{.*}} %V16I16 = and + ; AVX: cost of 1 {{.*}} %V16I16 = and + ; AVX2: cost of 1 {{.*}} %V16I16 = and + ; AVX512: cost of 1 {{.*}} %V16I16 = and + %V16I16 = and <16 x i16> undef, undef + ; SSSE3: cost of 4 {{.*}} %V32I16 = and + ; SSE42: cost of 4 {{.*}} %V32I16 = and + ; AVX: cost of 2 {{.*}} %V32I16 = and + ; AVX2: cost of 2 {{.*}} %V32I16 = and + ; AVX512F: cost of 2 {{.*}} %V32I16 = and + ; AVX512BW: cost of 1 {{.*}} %V32I16 = and + %V32I16 = and <32 x i16> undef, undef + + ; SSSE3: cost of 1 {{.*}} %V16I8 = and + ; SSE42: cost of 1 {{.*}} %V16I8 = and + ; AVX: cost of 1 {{.*}} %V16I8 = and + ; AVX2: cost of 1 {{.*}} %V16I8 = and + ; AVX512: cost of 1 {{.*}} %V16I8 = and + %V16I8 = and <16 x i8> undef, undef + ; SSSE3: cost of 2 {{.*}} %V32I8 = and + ; SSE42: cost of 2 {{.*}} %V32I8 = and + ; AVX: cost of 1 {{.*}} %V32I8 = and + ; AVX2: cost of 1 {{.*}} %V32I8 = and + ; AVX512: cost of 1 {{.*}} %V32I8 = and + %V32I8 = and <32 x i8> undef, undef + ; SSSE3: cost of 4 {{.*}} %V64I8 = and + ; SSE42: cost of 4 {{.*}} %V64I8 = and + ; AVX: cost of 2 {{.*}} %V64I8 = and + ; AVX2: cost of 2 {{.*}} %V64I8 = and + ; AVX512F: cost of 2 {{.*}} %V64I8 = and + ; AVX512BW: cost of 1 {{.*}} %V64I8 = and + %V64I8 = and <64 x i8> undef, undef ret i32 undef } ; CHECK-LABEL: 'mul' define i32 @mul(i32 %arg) { - ; SSSE3: cost of 8 {{.*}} %A = mul - ; SSE42: cost of 8 {{.*}} %A = mul - ; AVX: cost of 8 {{.*}} %A = mul - ; AVX2: cost of 8 {{.*}} %A = mul - ; AVX512F: cost of 8 {{.*}} %A = mul - ; AVX512BW: cost of 8 {{.*}} %A = mul - ; AVX512DQ: cost of 1 {{.*}} %A = mul - %A = mul <2 x i64> undef, undef - ; SSSE3: cost of 16 {{.*}} %B = mul - ; SSE42: cost of 16 {{.*}} %B = mul - ; AVX: cost of 18 {{.*}} %B = mul - ; AVX2: cost of 8 {{.*}} %B = mul - ; AVX512F: cost of 8 {{.*}} %B = mul - ; AVX512BW: cost of 8 {{.*}} %B = mul - ; AVX512DQ: cost of 1 {{.*}} %B = mul - %B = mul <4 x i64> undef, undef - ; SSSE3: cost of 32 {{.*}} %C = mul - ; SSE42: cost of 32 {{.*}} %C = mul - ; AVX: cost of 36 {{.*}} %C = mul - ; AVX2: cost of 16 {{.*}} %C = mul - ; AVX512F: cost of 8 {{.*}} %C = mul - ; AVX512BW: cost of 8 {{.*}} %C = mul - ; AVX512DQ: cost of 1 {{.*}} %C = mul - %C = mul <8 x i64> undef, undef - - ; SSSE3: cost of 6 {{.*}} %D = mul - ; SSE42: cost of 1 {{.*}} %D = mul - ; AVX: cost of 1 {{.*}} %D = mul - ; AVX2: cost of 1 {{.*}} %D = mul - ; AVX512: cost of 1 {{.*}} %D = mul - %D = mul <4 x i32> undef, undef - ; SSSE3: cost of 12 {{.*}} %E = mul - ; SSE42: cost of 2 {{.*}} %E = mul - ; AVX: cost of 4 {{.*}} %E = mul - ; AVX2: cost of 1 {{.*}} %E = mul - ; AVX512: cost of 1 {{.*}} %E = mul - %E = mul <8 x i32> undef, undef - ; SSSE3: cost of 24 {{.*}} %F = mul - ; SSE42: cost of 4 {{.*}} %F = mul - ; AVX: cost of 8 {{.*}} %F = mul - ; AVX2: cost of 2 {{.*}} %F = mul - ; AVX512: cost of 1 {{.*}} %F = mul - %F = mul <16 x i32> undef, undef - - ; SSSE3: cost of 1 {{.*}} %G = mul - ; SSE42: cost of 1 {{.*}} %G = mul - ; AVX: cost of 1 {{.*}} %G = mul - ; AVX2: cost of 1 {{.*}} %G = mul - ; AVX512: cost of 1 {{.*}} %G = mul - %G = mul <8 x i16> undef, undef - ; SSSE3: cost of 2 {{.*}} %H = mul - ; SSE42: cost of 2 {{.*}} %H = mul - ; AVX: cost of 4 {{.*}} %H = mul - ; AVX2: cost of 1 {{.*}} %H = mul - ; AVX512: cost of 1 {{.*}} %H = mul - %H = mul <16 x i16> undef, undef - ; SSSE3: cost of 4 {{.*}} %I = mul - ; SSE42: cost of 4 {{.*}} %I = mul - ; AVX: cost of 8 {{.*}} %I = mul - ; AVX2: cost of 2 {{.*}} %I = mul - ; AVX512F: cost of 2 {{.*}} %I = mul - ; AVX512BW: cost of 1 {{.*}} %I = mul - %I = mul <32 x i16> undef, undef - - ; SSSE3: cost of 12 {{.*}} %J = mul - ; SSE42: cost of 12 {{.*}} %J = mul - ; AVX: cost of 12 {{.*}} %J = mul - ; AVX2: cost of 7 {{.*}} %J = mul - ; AVX512F: cost of 5 {{.*}} %J = mul - ; AVX512BW: cost of 4 {{.*}} %J = mul - %J = mul <16 x i8> undef, undef - ; SSSE3: cost of 24 {{.*}} %K = mul - ; SSE42: cost of 24 {{.*}} %K = mul - ; AVX: cost of 26 {{.*}} %K = mul - ; AVX2: cost of 17 {{.*}} %K = mul - ; AVX512F: cost of 13 {{.*}} %K = mul - ; AVX512BW: cost of 4 {{.*}} %K = mul - %K = mul <32 x i8> undef, undef - ; SSSE3: cost of 48 {{.*}} %L = mul - ; SSE42: cost of 48 {{.*}} %L = mul - ; AVX: cost of 52 {{.*}} %L = mul - ; AVX2: cost of 34 {{.*}} %L = mul - ; AVX512F: cost of 26 {{.*}} %L = mul - ; AVX512BW: cost of 11 {{.*}} %L = mul - %L = mul <64 x i8> undef, undef + ; SSSE3: cost of 8 {{.*}} %V2I64 = mul + ; SSE42: cost of 8 {{.*}} %V2I64 = mul + ; AVX: cost of 8 {{.*}} %V2I64 = mul + ; AVX2: cost of 8 {{.*}} %V2I64 = mul + ; AVX512F: cost of 8 {{.*}} %V2I64 = mul + ; AVX512BW: cost of 8 {{.*}} %V2I64 = mul + ; AVX512DQ: cost of 1 {{.*}} %V2I64 = mul + %V2I64 = mul <2 x i64> undef, undef + ; SSSE3: cost of 16 {{.*}} %V4I64 = mul + ; SSE42: cost of 16 {{.*}} %V4I64 = mul + ; AVX: cost of 18 {{.*}} %V4I64 = mul + ; AVX2: cost of 8 {{.*}} %V4I64 = mul + ; AVX512F: cost of 8 {{.*}} %V4I64 = mul + ; AVX512BW: cost of 8 {{.*}} %V4I64 = mul + ; AVX512DQ: cost of 1 {{.*}} %V4I64 = mul + %V4I64 = mul <4 x i64> undef, undef + ; SSSE3: cost of 32 {{.*}} %V8I64 = mul + ; SSE42: cost of 32 {{.*}} %V8I64 = mul + ; AVX: cost of 36 {{.*}} %V8I64 = mul + ; AVX2: cost of 16 {{.*}} %V8I64 = mul + ; AVX512F: cost of 8 {{.*}} %V8I64 = mul + ; AVX512BW: cost of 8 {{.*}} %V8I64 = mul + ; AVX512DQ: cost of 1 {{.*}} %V8I64 = mul + %V8I64 = mul <8 x i64> undef, undef + + ; SSSE3: cost of 6 {{.*}} %V4I32 = mul + ; SSE42: cost of 1 {{.*}} %V4I32 = mul + ; AVX: cost of 1 {{.*}} %V4I32 = mul + ; AVX2: cost of 1 {{.*}} %V4I32 = mul + ; AVX512: cost of 1 {{.*}} %V4I32 = mul + %V4I32 = mul <4 x i32> undef, undef + ; SSSE3: cost of 12 {{.*}} %V8I32 = mul + ; SSE42: cost of 2 {{.*}} %V8I32 = mul + ; AVX: cost of 4 {{.*}} %V8I32 = mul + ; AVX2: cost of 1 {{.*}} %V8I32 = mul + ; AVX512: cost of 1 {{.*}} %V8I32 = mul + %V8I32 = mul <8 x i32> undef, undef + ; SSSE3: cost of 24 {{.*}} %V16I32 = mul + ; SSE42: cost of 4 {{.*}} %V16I32 = mul + ; AVX: cost of 8 {{.*}} %V16I32 = mul + ; AVX2: cost of 2 {{.*}} %V16I32 = mul + ; AVX512: cost of 1 {{.*}} %V16I32 = mul + %V16I32 = mul <16 x i32> undef, undef + + ; SSSE3: cost of 1 {{.*}} %V8I16 = mul + ; SSE42: cost of 1 {{.*}} %V8I16 = mul + ; AVX: cost of 1 {{.*}} %V8I16 = mul + ; AVX2: cost of 1 {{.*}} %V8I16 = mul + ; AVX512: cost of 1 {{.*}} %V8I16 = mul + %V8I16 = mul <8 x i16> undef, undef + ; SSSE3: cost of 2 {{.*}} %V16I16 = mul + ; SSE42: cost of 2 {{.*}} %V16I16 = mul + ; AVX: cost of 4 {{.*}} %V16I16 = mul + ; AVX2: cost of 1 {{.*}} %V16I16 = mul + ; AVX512: cost of 1 {{.*}} %V16I16 = mul + %V16I16 = mul <16 x i16> undef, undef + ; SSSE3: cost of 4 {{.*}} %V32I16 = mul + ; SSE42: cost of 4 {{.*}} %V32I16 = mul + ; AVX: cost of 8 {{.*}} %V32I16 = mul + ; AVX2: cost of 2 {{.*}} %V32I16 = mul + ; AVX512F: cost of 2 {{.*}} %V32I16 = mul + ; AVX512BW: cost of 1 {{.*}} %V32I16 = mul + %V32I16 = mul <32 x i16> undef, undef + + ; SSSE3: cost of 12 {{.*}} %V16I8 = mul + ; SSE42: cost of 12 {{.*}} %V16I8 = mul + ; AVX: cost of 12 {{.*}} %V16I8 = mul + ; AVX2: cost of 7 {{.*}} %V16I8 = mul + ; AVX512F: cost of 5 {{.*}} %V16I8 = mul + ; AVX512BW: cost of 4 {{.*}} %V16I8 = mul + %V16I8 = mul <16 x i8> undef, undef + ; SSSE3: cost of 24 {{.*}} %V32I8 = mul + ; SSE42: cost of 24 {{.*}} %V32I8 = mul + ; AVX: cost of 26 {{.*}} %V32I8 = mul + ; AVX2: cost of 17 {{.*}} %V32I8 = mul + ; AVX512F: cost of 13 {{.*}} %V32I8 = mul + ; AVX512BW: cost of 4 {{.*}} %V32I8 = mul + %V32I8 = mul <32 x i8> undef, undef + ; SSSE3: cost of 48 {{.*}} %V64I8 = mul + ; SSE42: cost of 48 {{.*}} %V64I8 = mul + ; AVX: cost of 52 {{.*}} %V64I8 = mul + ; AVX2: cost of 34 {{.*}} %V64I8 = mul + ; AVX512F: cost of 26 {{.*}} %V64I8 = mul + ; AVX512BW: cost of 11 {{.*}} %V64I8 = mul + %V64I8 = mul <64 x i8> undef, undef ret i32 undef } -- 2.50.1