if (const auto *Entry = CostTableLookup(AVX2ShuffleTbl, Kind, LT.second))
return LT.first * Entry->Cost;
+ static const CostTblEntry XOPShuffleTbl[] = {
+ { TTI::SK_PermuteSingleSrc, MVT::v4f64, 2 }, // vperm2f128 + vpermil2pd
+ { TTI::SK_PermuteSingleSrc, MVT::v8f32, 2 }, // vperm2f128 + vpermil2ps
+ { TTI::SK_PermuteSingleSrc, MVT::v4i64, 2 }, // vperm2f128 + vpermil2pd
+ { TTI::SK_PermuteSingleSrc, MVT::v8i32, 2 }, // vperm2f128 + vpermil2ps
+ { TTI::SK_PermuteSingleSrc, MVT::v16i16, 4 }, // vextractf128 + 2*vpperm
+ // + vinsertf128
+ { TTI::SK_PermuteSingleSrc, MVT::v32i8, 4 }, // vextractf128 + 2*vpperm
+ // + vinsertf128
+
+ { TTI::SK_PermuteTwoSrc, MVT::v16i16, 9 }, // 2*vextractf128 + 6*vpperm
+ // + vinsertf128
+ { TTI::SK_PermuteTwoSrc, MVT::v8i16, 1 }, // vpperm
+ { TTI::SK_PermuteTwoSrc, MVT::v32i8, 9 }, // 2*vextractf128 + 6*vpperm
+ // + vinsertf128
+ { TTI::SK_PermuteTwoSrc, MVT::v16i8, 1 }, // vpperm
+ };
+
+ if (ST->hasXOP())
+ if (const auto *Entry = CostTableLookup(XOPShuffleTbl, Kind, LT.second))
+ return LT.first * Entry->Cost;
+
static const CostTblEntry AVX1ShuffleTbl[] = {
{ TTI::SK_Broadcast, MVT::v4f64, 2 }, // vperm2f128 + vpermilpd
{ TTI::SK_Broadcast, MVT::v8f32, 2 }, // vperm2f128 + vpermilps
; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-unknown-linux-gnu -mattr=+sse2 | FileCheck %s -check-prefix=CHECK -check-prefix=SSE -check-prefix=SSE2
; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-unknown-linux-gnu -mattr=+ssse3 | FileCheck %s -check-prefix=CHECK -check-prefix=SSE -check-prefix=SSSE3
; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-unknown-linux-gnu -mattr=+sse4.2 | FileCheck %s -check-prefix=CHECK -check-prefix=SSE -check-prefix=SSE42
+; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-unknown-linux-gnu -mattr=+xop | FileCheck %s -check-prefix=CHECK -check-prefix=XOP
; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-unknown-linux-gnu -mattr=+avx | FileCheck %s -check-prefix=CHECK -check-prefix=AVX -check-prefix=AVX1
; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-unknown-linux-gnu -mattr=+avx2 | FileCheck %s -check-prefix=CHECK -check-prefix=AVX -check-prefix=AVX2
; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512f | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512 --check-prefix=AVX512F
; SSE2: cost of 1 {{.*}} %V128 = shufflevector
; SSSE3: cost of 1 {{.*}} %V128 = shufflevector
; SSE42: cost of 1 {{.*}} %V128 = shufflevector
+ ; XOP: cost of 1 {{.*}} %V128 = shufflevector
; AVX1: cost of 1 {{.*}} %V128 = shufflevector
; AVX2: cost of 1 {{.*}} %V128 = shufflevector
; AVX512: cost of 1 {{.*}} %V128 = shufflevector
; SSE2: cost of 2 {{.*}} %V256 = shufflevector
; SSSE3: cost of 2 {{.*}} %V256 = shufflevector
; SSE42: cost of 2 {{.*}} %V256 = shufflevector
+ ; XOP: cost of 2 {{.*}} %V256 = shufflevector
; AVX1: cost of 3 {{.*}} %V256 = shufflevector
; AVX2: cost of 1 {{.*}} %V256 = shufflevector
; AVX512: cost of 1 {{.*}} %V256 = shufflevector
; SSE2: cost of 12 {{.*}} %V512 = shufflevector
; SSSE3: cost of 12 {{.*}} %V512 = shufflevector
; SSE42: cost of 12 {{.*}} %V512 = shufflevector
+ ; XOP: cost of 8 {{.*}} %V512 = shufflevector
; AVX1: cost of 8 {{.*}} %V512 = shufflevector
; AVX2: cost of 6 {{.*}} %V512 = shufflevector
; AVX512: cost of 1 {{.*}} %V512 = shufflevector
; SSE2: cost of 56 {{.*}} %V1024 = shufflevector
; SSSE3: cost of 56 {{.*}} %V1024 = shufflevector
; SSE42: cost of 56 {{.*}} %V1024 = shufflevector
+ ; XOP: cost of 48 {{.*}} %V1024 = shufflevector
; AVX1: cost of 48 {{.*}} %V1024 = shufflevector
; AVX2: cost of 36 {{.*}} %V1024 = shufflevector
; AVX512: cost of 2 {{.*}} %V1024 = shufflevector
; SSE2: cost of 1 {{.*}} %V128 = shufflevector
; SSSE3: cost of 1 {{.*}} %V128 = shufflevector
; SSE42: cost of 1 {{.*}} %V128 = shufflevector
+ ; XOP: cost of 1 {{.*}} %V128 = shufflevector
; AVX1: cost of 1 {{.*}} %V128 = shufflevector
; AVX2: cost of 1 {{.*}} %V128 = shufflevector
; AVX512: cost of 1 {{.*}} %V128 = shufflevector
; SSE2: cost of 2 {{.*}} %V256 = shufflevector
; SSSE3: cost of 2 {{.*}} %V256 = shufflevector
; SSE42: cost of 2 {{.*}} %V256 = shufflevector
+ ; XOP: cost of 2 {{.*}} %V256 = shufflevector
; AVX1: cost of 3 {{.*}} %V256 = shufflevector
; AVX2: cost of 1 {{.*}} %V256 = shufflevector
; AVX512: cost of 1 {{.*}} %V256 = shufflevector
; SSE2: cost of 12 {{.*}} %V512 = shufflevector
; SSSE3: cost of 12 {{.*}} %V512 = shufflevector
; SSE42: cost of 12 {{.*}} %V512 = shufflevector
+ ; XOP: cost of 8 {{.*}} %V512 = shufflevector
; AVX1: cost of 8 {{.*}} %V512 = shufflevector
; AVX2: cost of 6 {{.*}} %V512 = shufflevector
; AVX512: cost of 1 {{.*}} %V512 = shufflevector
; SSE2: cost of 1 {{.*}} %V128 = shufflevector
; SSSE3: cost of 1 {{.*}} %V128 = shufflevector
; SSE42: cost of 1 {{.*}} %V128 = shufflevector
+ ; XOP: cost of 1 {{.*}} %V128 = shufflevector
; AVX1: cost of 1 {{.*}} %V128 = shufflevector
; AVX2: cost of 1 {{.*}} %V128 = shufflevector
; AVX512: cost of 1 {{.*}} %V128 = shufflevector
; SSE2: cost of 4 {{.*}} %V256 = shufflevector
; SSSE3: cost of 4 {{.*}} %V256 = shufflevector
; SSE42: cost of 4 {{.*}} %V256 = shufflevector
+ ; XOP: cost of 2 {{.*}} %V256 = shufflevector
; AVX1: cost of 4 {{.*}} %V256 = shufflevector
; AVX2: cost of 1 {{.*}} %V256 = shufflevector
; AVX512: cost of 1 {{.*}} %V256 = shufflevector
; SSE2: cost of 24 {{.*}} %V512 = shufflevector
; SSSE3: cost of 24 {{.*}} %V512 = shufflevector
; SSE42: cost of 24 {{.*}} %V512 = shufflevector
+ ; XOP: cost of 8 {{.*}} %V512 = shufflevector
; AVX1: cost of 8 {{.*}} %V512 = shufflevector
; AVX2: cost of 6 {{.*}} %V512 = shufflevector
; AVX512: cost of 1 {{.*}} %V512 = shufflevector
; SSE2: cost of 1 {{.*}} %V128 = shufflevector
; SSSE3: cost of 1 {{.*}} %V128 = shufflevector
; SSE42: cost of 1 {{.*}} %V128 = shufflevector
+ ; XOP: cost of 1 {{.*}} %V128 = shufflevector
; AVX1: cost of 1 {{.*}} %V128 = shufflevector
; AVX2: cost of 1 {{.*}} %V128 = shufflevector
; AVX512: cost of 1 {{.*}} %V128 = shufflevector
; SSE2: cost of 4 {{.*}} %V256 = shufflevector
; SSSE3: cost of 4 {{.*}} %V256 = shufflevector
; SSE42: cost of 4 {{.*}} %V256 = shufflevector
+ ; XOP: cost of 2 {{.*}} %V256 = shufflevector
; AVX1: cost of 4 {{.*}} %V256 = shufflevector
; AVX2: cost of 1 {{.*}} %V256 = shufflevector
; AVX512: cost of 1 {{.*}} %V256 = shufflevector
; SSE2: cost of 24 {{.*}} %V512 = shufflevector
; SSSE3: cost of 24 {{.*}} %V512 = shufflevector
; SSE42: cost of 24 {{.*}} %V512 = shufflevector
+ ; XOP: cost of 8 {{.*}} %V512 = shufflevector
; AVX1: cost of 8 {{.*}} %V512 = shufflevector
; AVX2: cost of 6 {{.*}} %V512 = shufflevector
; AVX512: cost of 1 {{.*}} %V512 = shufflevector
; SSE2: cost of 112 {{.*}} %V1024 = shufflevector
; SSSE3: cost of 112 {{.*}} %V1024 = shufflevector
; SSE42: cost of 112 {{.*}} %V1024 = shufflevector
+ ; XOP: cost of 48 {{.*}} %V1024 = shufflevector
; AVX1: cost of 48 {{.*}} %V1024 = shufflevector
; AVX2: cost of 36 {{.*}} %V1024 = shufflevector
; AVX512: cost of 2 {{.*}} %V1024 = shufflevector
; SSE2: cost of 5 {{.*}} %V128 = shufflevector
; SSSE3: cost of 1 {{.*}} %V128 = shufflevector
; SSE42: cost of 1 {{.*}} %V128 = shufflevector
+ ; XOP: cost of 1 {{.*}} %V128 = shufflevector
; AVX1: cost of 1 {{.*}} %V128 = shufflevector
; AVX2: cost of 1 {{.*}} %V128 = shufflevector
; AVX512F: cost of 1 {{.*}} %V128 = shufflevector
; SSE2: cost of 16 {{.*}} %V256 = shufflevector
; SSSE3: cost of 6 {{.*}} %V256 = shufflevector
; SSE42: cost of 6 {{.*}} %V256 = shufflevector
+ ; XOP: cost of 4 {{.*}} %V256 = shufflevector
; AVX1: cost of 8 {{.*}} %V256 = shufflevector
; AVX2: cost of 4 {{.*}} %V256 = shufflevector
; AVX512F: cost of 4 {{.*}} %V256 = shufflevector
; SSE2: cost of 96 {{.*}} %V512 = shufflevector
; SSSE3: cost of 36 {{.*}} %V512 = shufflevector
; SSE42: cost of 36 {{.*}} %V512 = shufflevector
+ ; XOP: cost of 18 {{.*}} %V512 = shufflevector
; AVX1: cost of 30 {{.*}} %V512 = shufflevector
; AVX2: cost of 14 {{.*}} %V512 = shufflevector
; AVX512F: cost of 14 {{.*}} %V512 = shufflevector
; SSE2: cost of 448 {{.*}} %V1024 = shufflevector
; SSSE3: cost of 168 {{.*}} %V1024 = shufflevector
; SSE42: cost of 168 {{.*}} %V1024 = shufflevector
+ ; XOP: cost of 108 {{.*}} %V1024 = shufflevector
; AVX1: cost of 180 {{.*}} %V1024 = shufflevector
; AVX2: cost of 84 {{.*}} %V1024 = shufflevector
; AVX512F: cost of 84 {{.*}} %V1024 = shufflevector
; SSE2: cost of 10 {{.*}} %V128 = shufflevector
; SSSE3: cost of 1 {{.*}} %V128 = shufflevector
; SSE42: cost of 1 {{.*}} %V128 = shufflevector
+ ; XOP: cost of 1 {{.*}} %V128 = shufflevector
; AVX1: cost of 1 {{.*}} %V128 = shufflevector
; AVX2: cost of 1 {{.*}} %V128 = shufflevector
; AVX512: cost of 1 {{.*}} %V128 = shufflevector
; SSE2: cost of 26 {{.*}} %V256 = shufflevector
; SSSE3: cost of 6 {{.*}} %V256 = shufflevector
; SSE42: cost of 6 {{.*}} %V256 = shufflevector
+ ; XOP: cost of 4 {{.*}} %V256 = shufflevector
; AVX1: cost of 8 {{.*}} %V256 = shufflevector
; AVX2: cost of 4 {{.*}} %V256 = shufflevector
; AVX512F: cost of 4 {{.*}} %V256 = shufflevector
; SSE2: cost of 156 {{.*}} %V512 = shufflevector
; SSSE3: cost of 36 {{.*}} %V512 = shufflevector
; SSE42: cost of 36 {{.*}} %V512 = shufflevector
+ ; XOP: cost of 18 {{.*}} %V512 = shufflevector
; AVX1: cost of 30 {{.*}} %V512 = shufflevector
; AVX2: cost of 14 {{.*}} %V512 = shufflevector
; AVX512F: cost of 14 {{.*}} %V512 = shufflevector
; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-unknown-linux-gnu -mattr=+sse2 | FileCheck %s -check-prefix=CHECK -check-prefix=SSE -check-prefix=SSE2
; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-unknown-linux-gnu -mattr=+ssse3 | FileCheck %s -check-prefix=CHECK -check-prefix=SSE -check-prefix=SSSE3
; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-unknown-linux-gnu -mattr=+sse4.2 | FileCheck %s -check-prefix=CHECK -check-prefix=SSE -check-prefix=SSE42
+; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-unknown-linux-gnu -mattr=+xop | FileCheck %s -check-prefix=CHECK -check-prefix=XOP
; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-unknown-linux-gnu -mattr=+avx | FileCheck %s -check-prefix=CHECK -check-prefix=AVX -check-prefix=AVX1
; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-unknown-linux-gnu -mattr=+avx2 | FileCheck %s -check-prefix=CHECK -check-prefix=AVX -check-prefix=AVX2
; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512f | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512 --check-prefix=AVX512F
; SSE2: cost of 1 {{.*}} %V128 = shufflevector
; SSSE3: cost of 1 {{.*}} %V128 = shufflevector
; SSE42: cost of 1 {{.*}} %V128 = shufflevector
+ ; XOP: cost of 1 {{.*}} %V128 = shufflevector
; AVX1: cost of 1 {{.*}} %V128 = shufflevector
; AVX2: cost of 1 {{.*}} %V128 = shufflevector
; AVX512: cost of 1 {{.*}} %V128 = shufflevector
; SSE2: cost of 6 {{.*}} %V256 = shufflevector
; SSSE3: cost of 6 {{.*}} %V256 = shufflevector
; SSE42: cost of 6 {{.*}} %V256 = shufflevector
+ ; XOP: cost of 4 {{.*}} %V256 = shufflevector
; AVX1: cost of 4 {{.*}} %V256 = shufflevector
; AVX2: cost of 3 {{.*}} %V256 = shufflevector
; AVX512: cost of 1 {{.*}} %V256 = shufflevector
; SSE2: cost of 28 {{.*}} %V512 = shufflevector
; SSSE3: cost of 28 {{.*}} %V512 = shufflevector
; SSE42: cost of 28 {{.*}} %V512 = shufflevector
+ ; XOP: cost of 24 {{.*}} %V512 = shufflevector
; AVX1: cost of 24 {{.*}} %V512 = shufflevector
; AVX2: cost of 18 {{.*}} %V512 = shufflevector
; AVX512: cost of 1 {{.*}} %V512 = shufflevector
; SSE2: cost of 120 {{.*}} %V1024 = shufflevector
; SSSE3: cost of 120 {{.*}} %V1024 = shufflevector
; SSE42: cost of 120 {{.*}} %V1024 = shufflevector
+ ; XOP: cost of 112 {{.*}} %V1024 = shufflevector
; AVX1: cost of 112 {{.*}} %V1024 = shufflevector
; AVX2: cost of 84 {{.*}} %V1024 = shufflevector
; AVX512: cost of 6 {{.*}} %V1024 = shufflevector
; SSE2: cost of 1 {{.*}} %V128 = shufflevector
; SSSE3: cost of 1 {{.*}} %V128 = shufflevector
; SSE42: cost of 1 {{.*}} %V128 = shufflevector
+ ; XOP: cost of 1 {{.*}} %V128 = shufflevector
; AVX1: cost of 1 {{.*}} %V128 = shufflevector
; AVX2: cost of 1 {{.*}} %V128 = shufflevector
; AVX512: cost of 1 {{.*}} %V128 = shufflevector
; SSE2: cost of 6 {{.*}} %V256 = shufflevector
; SSSE3: cost of 6 {{.*}} %V256 = shufflevector
; SSE42: cost of 6 {{.*}} %V256 = shufflevector
+ ; XOP: cost of 4 {{.*}} %V256 = shufflevector
; AVX1: cost of 4 {{.*}} %V256 = shufflevector
; AVX2: cost of 3 {{.*}} %V256 = shufflevector
; AVX512: cost of 1 {{.*}} %V256 = shufflevector
; SSE2: cost of 28 {{.*}} %V512 = shufflevector
; SSSE3: cost of 28 {{.*}} %V512 = shufflevector
; SSE42: cost of 28 {{.*}} %V512 = shufflevector
+ ; XOP: cost of 24 {{.*}} %V512 = shufflevector
; AVX1: cost of 24 {{.*}} %V512 = shufflevector
; AVX2: cost of 18 {{.*}} %V512 = shufflevector
; AVX512: cost of 1 {{.*}} %V512 = shufflevector
; SSE2: cost of 120 {{.*}} %V1024 = shufflevector
; SSSE3: cost of 120 {{.*}} %V1024 = shufflevector
; SSE42: cost of 120 {{.*}} %V1024 = shufflevector
+ ; XOP: cost of 112 {{.*}} %V1024 = shufflevector
; AVX1: cost of 112 {{.*}} %V1024 = shufflevector
; AVX2: cost of 84 {{.*}} %V1024 = shufflevector
; AVX512: cost of 6 {{.*}} %V1024 = shufflevector
; SSE2: cost of 2 {{.*}} %V128 = shufflevector
; SSSE3: cost of 2 {{.*}} %V128 = shufflevector
; SSE42: cost of 2 {{.*}} %V128 = shufflevector
+ ; XOP: cost of 2 {{.*}} %V128 = shufflevector
; AVX1: cost of 2 {{.*}} %V128 = shufflevector
; AVX2: cost of 2 {{.*}} %V128 = shufflevector
; AVX512: cost of 1 {{.*}} %V128 = shufflevector
; SSE2: cost of 12 {{.*}} %V256 = shufflevector
; SSSE3: cost of 12 {{.*}} %V256 = shufflevector
; SSE42: cost of 12 {{.*}} %V256 = shufflevector
+ ; XOP: cost of 4 {{.*}} %V256 = shufflevector
; AVX1: cost of 4 {{.*}} %V256 = shufflevector
; AVX2: cost of 3 {{.*}} %V256 = shufflevector
; AVX512: cost of 1 {{.*}} %V256 = shufflevector
; SSE2: cost of 56 {{.*}} %V512 = shufflevector
; SSSE3: cost of 56 {{.*}} %V512 = shufflevector
; SSE42: cost of 56 {{.*}} %V512 = shufflevector
+ ; XOP: cost of 24 {{.*}} %V512 = shufflevector
; AVX1: cost of 24 {{.*}} %V512 = shufflevector
; AVX2: cost of 18 {{.*}} %V512 = shufflevector
; AVX512: cost of 1 {{.*}} %V512 = shufflevector
; SSE2: cost of 240 {{.*}} %V1024 = shufflevector
; SSSE3: cost of 240 {{.*}} %V1024 = shufflevector
; SSE42: cost of 240 {{.*}} %V1024 = shufflevector
+ ; XOP: cost of 112 {{.*}} %V1024 = shufflevector
; AVX1: cost of 112 {{.*}} %V1024 = shufflevector
; AVX2: cost of 84 {{.*}} %V1024 = shufflevector
; AVX512: cost of 6 {{.*}} %V1024 = shufflevector
; SSE2: cost of 2 {{.*}} %V128 = shufflevector
; SSSE3: cost of 2 {{.*}} %V128 = shufflevector
; SSE42: cost of 2 {{.*}} %V128 = shufflevector
+ ; XOP: cost of 2 {{.*}} %V128 = shufflevector
; AVX1: cost of 2 {{.*}} %V128 = shufflevector
; AVX2: cost of 2 {{.*}} %V128 = shufflevector
; AVX512: cost of 1 {{.*}} %V128 = shufflevector
; SSE2: cost of 12 {{.*}} %V256 = shufflevector
; SSSE3: cost of 12 {{.*}} %V256 = shufflevector
; SSE42: cost of 12 {{.*}} %V256 = shufflevector
+ ; XOP: cost of 4 {{.*}} %V256 = shufflevector
; AVX1: cost of 4 {{.*}} %V256 = shufflevector
; AVX2: cost of 3 {{.*}} %V256 = shufflevector
; AVX512: cost of 1 {{.*}} %V256 = shufflevector
; SSE2: cost of 56 {{.*}} %V512 = shufflevector
; SSSE3: cost of 56 {{.*}} %V512 = shufflevector
; SSE42: cost of 56 {{.*}} %V512 = shufflevector
+ ; XOP: cost of 24 {{.*}} %V512 = shufflevector
; AVX1: cost of 24 {{.*}} %V512 = shufflevector
; AVX2: cost of 18 {{.*}} %V512 = shufflevector
; AVX512: cost of 1 {{.*}} %V512 = shufflevector
; SSE2: cost of 240 {{.*}} %V1024 = shufflevector
; SSSE3: cost of 240 {{.*}} %V1024 = shufflevector
; SSE42: cost of 240 {{.*}} %V1024 = shufflevector
+ ; XOP: cost of 112 {{.*}} %V1024 = shufflevector
; AVX1: cost of 112 {{.*}} %V1024 = shufflevector
; AVX2: cost of 84 {{.*}} %V1024 = shufflevector
; AVX512: cost of 6 {{.*}} %V1024 = shufflevector
; SSE2: cost of 8 {{.*}} %V128 = shufflevector
; SSSE3: cost of 3 {{.*}} %V128 = shufflevector
; SSE42: cost of 3 {{.*}} %V128 = shufflevector
+ ; XOP: cost of 1 {{.*}} %V128 = shufflevector
; AVX1: cost of 3 {{.*}} %V128 = shufflevector
; AVX2: cost of 3 {{.*}} %V128 = shufflevector
; AVX512F: cost of 3 {{.*}} %V128 = shufflevector
; SSE2: cost of 48 {{.*}} %V256 = shufflevector
; SSSE3: cost of 18 {{.*}} %V256 = shufflevector
; SSE42: cost of 18 {{.*}} %V256 = shufflevector
+ ; XOP: cost of 9 {{.*}} %V256 = shufflevector
; AVX1: cost of 15 {{.*}} %V256 = shufflevector
; AVX2: cost of 7 {{.*}} %V256 = shufflevector
; AVX512F: cost of 7 {{.*}} %V256 = shufflevector
; SSE2: cost of 224 {{.*}} %V512 = shufflevector
; SSSE3: cost of 84 {{.*}} %V512 = shufflevector
; SSE42: cost of 84 {{.*}} %V512 = shufflevector
+ ; XOP: cost of 54 {{.*}} %V512 = shufflevector
; AVX1: cost of 90 {{.*}} %V512 = shufflevector
; AVX2: cost of 42 {{.*}} %V512 = shufflevector
; AVX512F: cost of 42 {{.*}} %V512 = shufflevector
; SSE2: cost of 960 {{.*}} %V1024 = shufflevector
; SSSE3: cost of 360 {{.*}} %V1024 = shufflevector
; SSE42: cost of 360 {{.*}} %V1024 = shufflevector
+ ; XOP: cost of 252 {{.*}} %V1024 = shufflevector
; AVX1: cost of 420 {{.*}} %V1024 = shufflevector
; AVX2: cost of 196 {{.*}} %V1024 = shufflevector
; AVX512F: cost of 196 {{.*}} %V1024 = shufflevector
; SSE2: cost of 13 {{.*}} %V128 = shufflevector
; SSSE3: cost of 3 {{.*}} %V128 = shufflevector
; SSE42: cost of 3 {{.*}} %V128 = shufflevector
+ ; XOP: cost of 1 {{.*}} %V128 = shufflevector
; AVX1: cost of 3 {{.*}} %V128 = shufflevector
; AVX2: cost of 3 {{.*}} %V128 = shufflevector
; AVX512F: cost of 3 {{.*}} %V128 = shufflevector
; SSE2: cost of 78 {{.*}} %V256 = shufflevector
; SSSE3: cost of 18 {{.*}} %V256 = shufflevector
; SSE42: cost of 18 {{.*}} %V256 = shufflevector
+ ; XOP: cost of 9 {{.*}} %V256 = shufflevector
; AVX1: cost of 15 {{.*}} %V256 = shufflevector
; AVX2: cost of 7 {{.*}} %V256 = shufflevector
; AVX512F: cost of 7 {{.*}} %V256 = shufflevector
; SSE2: cost of 364 {{.*}} %V512 = shufflevector
; SSSE3: cost of 84 {{.*}} %V512 = shufflevector
; SSE42: cost of 84 {{.*}} %V512 = shufflevector
+ ; XOP: cost of 54 {{.*}} %V512 = shufflevector
; AVX1: cost of 90 {{.*}} %V512 = shufflevector
; AVX2: cost of 42 {{.*}} %V512 = shufflevector
; AVX512F: cost of 42 {{.*}} %V512 = shufflevector