From: Michael Kuperstein Date: Tue, 21 Jun 2016 20:41:40 +0000 (+0000) Subject: [X86] Make arithmetic operations cost model test saner. NFC. X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=4e93d1c1e6031863aab58918a2516e2083dfd4db;p=llvm [X86] Make arithmetic operations cost model test saner. NFC. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@273316 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/test/Analysis/CostModel/X86/arith.ll b/test/Analysis/CostModel/X86/arith.ll index 0b61d3cd421..a35db9c68ff 100644 --- a/test/Analysis/CostModel/X86/arith.ll +++ b/test/Analysis/CostModel/X86/arith.ll @@ -1,128 +1,173 @@ -; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck %s -; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=core2 | FileCheck %s --check-prefix=SSE3 -; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=core-avx2 | FileCheck %s --check-prefix=AVX2 +; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+ssse3 | FileCheck %s --check-prefix=CHECK --check-prefix=SSSE3 +; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+sse4.2 | FileCheck %s --check-prefix=CHECK --check-prefix=SSE42 +; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx | FileCheck %s --check-prefix=CHECK --check-prefix=AVX +; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx2 | FileCheck %s --check-prefix=CHECK --check-prefix=AVX2 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" target triple = "x86_64-apple-macosx10.8.0" +; CHECK-LABEL: 'add' define i32 @add(i32 %arg) { - ;CHECK: cost of 1 {{.*}} add + ; SSSE3: cost of 1 {{.*}} %A = add + ; SSE42: cost of 1 {{.*}} %A = add + ; AVX: cost of 1 {{.*}} %A = add + ; AVX2: cost of 1 {{.*}} %A = add %A = add <4 x i32> undef, undef - ;CHECK: cost of 4 {{.*}} add + ; SSSE3: cost of 2 {{.*}} %B = add + ; SSE42: cost of 2 {{.*}} %B = add + ; AVX: cost of 4 {{.*}} %B = add + ; AVX2: cost of 1 {{.*}} %B = add %B = add <8 x i32> undef, undef - ;CHECK: cost of 1 {{.*}} add + ; SSSE3: cost of 1 {{.*}} %C = add + ; SSE42: cost of 1 {{.*}} %C = add + ; AVX: cost of 1 {{.*}} %C = add + ; AVX2: cost of 1 {{.*}} %C = add %C = add <2 x i64> undef, undef - ;CHECK: cost of 4 {{.*}} add + ; SSSE3: cost of 2 {{.*}} %D = add + ; SSE42: cost of 2 {{.*}} %D = add + ; AVX: cost of 4 {{.*}} %D = add + ; AVX2: cost of 1 {{.*}} %D = add %D = add <4 x i64> undef, undef - ;CHECK: cost of 8 {{.*}} add + ; SSSE3: cost of 4 {{.*}} %E = add + ; SSE42: cost of 4 {{.*}} %E = add + ; AVX: cost of 8 {{.*}} %E = add + ; AVX2: cost of 2 {{.*}} %E = add %E = add <8 x i64> undef, undef - ;CHECK: cost of 0 {{.*}} ret ret i32 undef } - +; CHECK-LABEL: 'xor' define i32 @xor(i32 %arg) { - ;CHECK: cost of 1 {{.*}} xor + ; SSSE3: cost of 1 {{.*}} %A = xor + ; SSE42: cost of 1 {{.*}} %A = xor + ; AVX: cost of 1 {{.*}} %A = xor + ; AVX2: cost of 1 {{.*}} %A = xor %A = xor <4 x i32> undef, undef - ;CHECK: cost of 1 {{.*}} xor + ; SSSE3: cost of 2 {{.*}} %B = xor + ; SSE42: cost of 2 {{.*}} %B = xor + ; AVX: cost of 1 {{.*}} %B = xor + ; AVX2: cost of 1 {{.*}} %B = xor %B = xor <8 x i32> undef, undef - ;CHECK: cost of 1 {{.*}} xor + ; SSSE3: cost of 1 {{.*}} %C = xor + ; SSE42: cost of 1 {{.*}} %C = xor + ; AVX: cost of 1 {{.*}} %C = xor + ; AVX2: cost of 1 {{.*}} %C = xor %C = xor <2 x i64> undef, undef - ;CHECK: cost of 1 {{.*}} xor + ; SSSE3: cost of 2 {{.*}} %D = xor + ; SSE42: cost of 2 {{.*}} %D = xor + ; AVX: cost of 1 {{.*}} %D = xor + ; AVX2: cost of 1 {{.*}} %D = xor %D = xor <4 x i64> undef, undef - ;CHECK: cost of 0 {{.*}} ret ret i32 undef } -; CHECK: mul +; CHECK-LABEL: 'mul' define void @mul() { ; A <2 x i32> gets expanded to a <2 x i64> vector. ; A <2 x i64> vector multiply is implemented using ; 3 PMULUDQ and 2 PADDS and 4 shifts. - ;CHECK: cost of 9 {{.*}} mul + ; SSSE3: cost of 9 {{.*}} %A0 = mul + ; SSE42: cost of 9 {{.*}} %A0 = mul + ; AVX: cost of 9 {{.*}} %A0 = mul + ; AVX2: cost of 9 {{.*}} %A0 = mul %A0 = mul <2 x i32> undef, undef - ;CHECK: cost of 9 {{.*}} mul - %A1 = mul <2 x i64> undef, undef - ;CHECK: cost of 18 {{.*}} mul - %A2 = mul <4 x i64> undef, undef - ret void -} - -; SSE3: sse3mull -define void @sse3mull() { - ; SSE3: cost of 6 {{.*}} mul - %A0 = mul <4 x i32> undef, undef + ; SSSE3: cost of 6 {{.*}} %A1 = mul + ; SSE42: cost of 1 {{.*}} %A1 = mul + ; AVX: cost of 1 {{.*}} %A1 = mul + ; AVX2: cost of 1 {{.*}} %A1 = mul + %A1 = mul <4 x i32> undef, undef + ; SSSE3: cost of 9 {{.*}} %A2 = mul + ; SSE42: cost of 9 {{.*}} %A2 = mul + ; AVX: cost of 9 {{.*}} %A2 = mul + ; AVX2: cost of 9 {{.*}} %A2 = mul + %A2 = mul <2 x i64> undef, undef + ; SSSE3: cost of 18 {{.*}} %A3 = mul + ; SSE42: cost of 18 {{.*}} %A3 = mul + ; AVX: cost of 18 {{.*}} %A3 = mul + ; AVX2: cost of 9 {{.*}} %A3 = mul + %A3 = mul <4 x i64> undef, undef ret void - ; SSE3: avx2mull } -; AVX2: avx2mull -define void @avx2mull() { - ; AVX2: cost of 9 {{.*}} mul - %A0 = mul <4 x i64> undef, undef - ret void - ; AVX2: fmul -} - -; CHECK: fmul +; CHECK-LABEL: 'fmul' define i32 @fmul(i32 %arg) { - ;CHECK: cost of 2 {{.*}} fmul + ; SSSE3: cost of 2 {{.*}} %A = fmul + ; SSE42: cost of 2 {{.*}} %A = fmul + ; AVX: cost of 2 {{.*}} %A = fmul + ; AVX2: cost of 2 {{.*}} %A = fmul %A = fmul <4 x float> undef, undef - ;CHECK: cost of 2 {{.*}} fmul + ; SSSE3: cost of 4 {{.*}} %B = fmul + ; SSE42: cost of 4 {{.*}} %B = fmul + ; AVX: cost of 2 {{.*}} %B = fmul + ; AVX2: cost of 2 {{.*}} %B = fmul %B = fmul <8 x float> undef, undef ret i32 undef } -; AVX: shift -; AVX2: shift +; CHECK-LABEL: 'shift' define void @shift() { - ; AVX: cost of 2 {{.*}} shl - ; AVX2: cost of 1 {{.*}} shl + ; SSSE3: cost of 10 {{.*}} %A0 = shl + ; SSE42: cost of 10 {{.*}} %A0 = shl + ; AVX: cost of 10 {{.*}} %A0 = shl + ; AVX2: cost of 1 {{.*}} %A0 = shl %A0 = shl <4 x i32> undef, undef - ; AVX: cost of 2 {{.*}} shl - ; AVX2: cost of 1 {{.*}} shl + ; SSSE3: cost of 4 {{.*}} %A1 = shl + ; SSE42: cost of 4 {{.*}} %A1 = shl + ; AVX: cost of 4 {{.*}} %A1 = shl + ; AVX2: cost of 1 {{.*}} %A1 = shl %A1 = shl <2 x i64> undef, undef - - ; AVX: cost of 2 {{.*}} lshr - ; AVX2: cost of 1 {{.*}} lshr + ; SSSE3: cost of 20 {{.*}} %A2 = shl + ; SSE42: cost of 20 {{.*}} %A2 = shl + ; AVX: cost of 20 {{.*}} %A2 = shl + ; AVX2: cost of 1 {{.*}} %A2 = shl + %A2 = shl <8 x i32> undef, undef + ; SSSE3: cost of 8 {{.*}} %A3 = shl + ; SSE42: cost of 8 {{.*}} %A3 = shl + ; AVX: cost of 8 {{.*}} %A3 = shl + ; AVX2: cost of 1 {{.*}} %A3 = shl + %A3 = shl <4 x i64> undef, undef + + ; SSSE3: cost of 16 {{.*}} %B0 = lshr + ; SSE42: cost of 16 {{.*}} %B0 = lshr + ; AVX: cost of 16 {{.*}} %B0 = lshr + ; AVX2: cost of 1 {{.*}} %B0 = lshr %B0 = lshr <4 x i32> undef, undef - ; AVX: cost of 2 {{.*}} lshr - ; AVX2: cost of 1 {{.*}} lshr + ; SSSE3: cost of 4 {{.*}} %B1 = lshr + ; SSE42: cost of 4 {{.*}} %B1 = lshr + ; AVX: cost of 4 {{.*}} %B1 = lshr + ; AVX2: cost of 1 {{.*}} %B1 = lshr %B1 = lshr <2 x i64> undef, undef - - ; AVX: cost of 2 {{.*}} ashr - ; AVX2: cost of 1 {{.*}} ashr + ; SSSE3: cost of 32 {{.*}} %B2 = lshr + ; SSE42: cost of 32 {{.*}} %B2 = lshr + ; AVX: cost of 32 {{.*}} %B2 = lshr + ; AVX2: cost of 1 {{.*}} %B2 = lshr + %B2 = lshr <8 x i32> undef, undef + ; SSSE3: cost of 8 {{.*}} %B3 = lshr + ; SSE42: cost of 8 {{.*}} %B3 = lshr + ; AVX: cost of 8 {{.*}} %B3 = lshr + ; AVX2: cost of 1 {{.*}} %B3 = lshr + %B3 = lshr <4 x i64> undef, undef + + ; SSSE3: cost of 16 {{.*}} %C0 = ashr + ; SSE42: cost of 16 {{.*}} %C0 = ashr + ; AVX: cost of 16 {{.*}} %C0 = ashr + ; AVX2: cost of 1 {{.*}} %C0 = ashr %C0 = ashr <4 x i32> undef, undef - ; AVX: cost of 6 {{.*}} ashr - ; AVX2: cost of 4 {{.*}} ashr + ; SSSE3: cost of 12 {{.*}} %C1 = ashr + ; SSE42: cost of 12 {{.*}} %C1 = ashr + ; AVX: cost of 12 {{.*}} %C1 = ashr + ; AVX2: cost of 4 {{.*}} %C1 = ashr %C1 = ashr <2 x i64> undef, undef - - ret void -} - -; AVX: avx2shift -; AVX2: avx2shift -define void @avx2shift() { - ; AVX: cost of 2 {{.*}} shl - ; AVX2: cost of 1 {{.*}} shl - %A0 = shl <8 x i32> undef, undef - ; AVX: cost of 2 {{.*}} shl - ; AVX2: cost of 1 {{.*}} shl - %A1 = shl <4 x i64> undef, undef - - ; AVX: cost of 2 {{.*}} lshr - ; AVX2: cost of 1 {{.*}} lshr - %B0 = lshr <8 x i32> undef, undef - ; AVX: cost of 2 {{.*}} lshr - ; AVX2: cost of 1 {{.*}} lshr - %B1 = lshr <4 x i64> undef, undef - - ; AVX: cost of 2 {{.*}} ashr - ; AVX2: cost of 1 {{.*}} ashr - %C0 = ashr <8 x i32> undef, undef - ; AVX: cost of 12 {{.*}} ashr - ; AVX2: cost of 4 {{.*}} ashr - %C1 = ashr <4 x i64> undef, undef + ; SSSE3: cost of 32 {{.*}} %C2 = ashr + ; SSE42: cost of 32 {{.*}} %C2 = ashr + ; AVX: cost of 32 {{.*}} %C2 = ashr + ; AVX2: cost of 1 {{.*}} %C2 = ashr + %C2 = ashr <8 x i32> undef, undef + ; SSSE3: cost of 24 {{.*}} %C3 = ashr + ; SSE42: cost of 24 {{.*}} %C3 = ashr + ; AVX: cost of 24 {{.*}} %C3 = ashr + ; AVX2: cost of 4 {{.*}} %C3 = ashr + %C3 = ashr <4 x i64> undef, undef ret void }