From a3d48590915f3ee13da6d10400feef659e4f63fc Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Mon, 5 Dec 2016 05:50:51 +0000 Subject: [PATCH] [AVX-512] Teach fast isel to handle 512-bit vector bitcasts. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@288641 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86FastISel.cpp | 10 +- test/CodeGen/X86/fast-isel-bitcasts-avx512.ll | 244 ++++++++++++++++++ 2 files changed, 252 insertions(+), 2 deletions(-) create mode 100644 test/CodeGen/X86/fast-isel-bitcasts-avx512.ll diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp index 44d918f3786..53e6ab6e359 100644 --- a/lib/Target/X86/X86FastISel.cpp +++ b/lib/Target/X86/X86FastISel.cpp @@ -3563,8 +3563,14 @@ X86FastISel::fastSelectInstruction(const Instruction *I) { if (!SrcVT.isSimple() || !DstVT.isSimple()) return false; - if (!SrcVT.is128BitVector() && - !(Subtarget->hasAVX() && SrcVT.is256BitVector())) + MVT SVT = SrcVT.getSimpleVT(); + MVT DVT = DstVT.getSimpleVT(); + + if (!SVT.is128BitVector() && + !(Subtarget->hasAVX() && SVT.is256BitVector()) && + !(Subtarget->hasAVX512() && SVT.is512BitVector() && + (Subtarget->hasBWI() || (SVT.getScalarSizeInBits() >= 32 && + DVT.getScalarSizeInBits() >= 32)))) return false; unsigned Reg = getRegForValue(I->getOperand(0)); diff --git a/test/CodeGen/X86/fast-isel-bitcasts-avx512.ll b/test/CodeGen/X86/fast-isel-bitcasts-avx512.ll new file mode 100644 index 00000000000..7b81be3fc43 --- /dev/null +++ b/test/CodeGen/X86/fast-isel-bitcasts-avx512.ll @@ -0,0 +1,244 @@ +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw -fast-isel -fast-isel-abort=1 -asm-verbose=0 | FileCheck %s +; +; Bitcasts between 512-bit vector types are no-ops since no instruction is +; needed for the conversion. + +define <8 x i64> @v16i32_to_v8i64(<16 x i32> %a) { +;CHECK-LABEL: v16i32_to_v8i64: +;CHECK-NEXT: .cfi_startproc +;CHECK-NEXT: ret + %1 = bitcast <16 x i32> %a to <8 x i64> + ret <8 x i64> %1 +} + +define <8 x i64> @v32i16_to_v8i64(<32 x i16> %a) { +;CHECK-LABEL: v32i16_to_v8i64: +;CHECK-NEXT: .cfi_startproc +;CHECK-NEXT: ret + %1 = bitcast <32 x i16> %a to <8 x i64> + ret <8 x i64> %1 +} + +define <8 x i64> @v64i8_to_v8i64(<64 x i8> %a) { +;CHECK-LABEL: v64i8_to_v8i64: +;CHECK-NEXT: .cfi_startproc +;CHECK-NEXT: ret + %1 = bitcast <64 x i8> %a to <8 x i64> + ret <8 x i64> %1 +} + +define <8 x i64> @v8f64_to_v8i64(<8 x double> %a) { +;CHECK-LABEL: v8f64_to_v8i64: +;CHECK-NEXT: .cfi_startproc +;CHECK-NEXT: ret + %1 = bitcast <8 x double> %a to <8 x i64> + ret <8 x i64> %1 +} + +define <8 x i64> @v16f32_to_v8i64(<16 x float> %a) { +;CHECK-LABEL: v16f32_to_v8i64: +;CHECK-NEXT: .cfi_startproc +;CHECK-NEXT: ret + %1 = bitcast <16 x float> %a to <8 x i64> + ret <8 x i64> %1 +} + +define <16 x i32> @v8i64_to_v16i32(<8 x i64> %a) { +;CHECK-LABEL: v8i64_to_v16i32: +;CHECK-NEXT: .cfi_startproc +;CHECK-NEXT: ret + %1 = bitcast <8 x i64> %a to <16 x i32> + ret <16 x i32> %1 +} + +define <16 x i32> @v32i16_to_v16i32(<32 x i16> %a) { +;CHECK-LABEL: v32i16_to_v16i32: +;CHECK-NEXT: .cfi_startproc +;CHECK-NEXT: ret + %1 = bitcast <32 x i16> %a to <16 x i32> + ret <16 x i32> %1 +} + +define <16 x i32> @v64i8_to_v16i32(<64 x i8> %a) { +;CHECK-LABEL: v64i8_to_v16i32: +;CHECK-NEXT: .cfi_startproc +;CHECK-NEXT: ret + %1 = bitcast <64 x i8> %a to <16 x i32> + ret <16 x i32> %1 +} + +define <16 x i32> @v8f64_to_v16i32(<8 x double> %a) { +;CHECK-LABEL: v8f64_to_v16i32: +;CHECK-NEXT: .cfi_startproc +;CHECK-NEXT: ret + %1 = bitcast <8 x double> %a to <16 x i32> + ret <16 x i32> %1 +} + +define <16 x i32> @v16f32_to_v16i32(<16 x float> %a) { +;CHECK-LABEL: v16f32_to_v16i32: +;CHECK-NEXT: .cfi_startproc +;CHECK-NEXT: ret + %1 = bitcast <16 x float> %a to <16 x i32> + ret <16 x i32> %1 +} + +define <32 x i16> @v8i64_to_v32i16(<8 x i64> %a) { +;CHECK-LABEL: v8i64_to_v32i16: +;CHECK-NEXT: .cfi_startproc +;CHECK-NEXT: ret + %1 = bitcast <8 x i64> %a to <32 x i16> + ret <32 x i16> %1 +} + +define <32 x i16> @v16i32_to_v32i16(<16 x i32> %a) { +;CHECK-LABEL: v16i32_to_v32i16: +;CHECK-NEXT: .cfi_startproc +;CHECK-NEXT: ret + %1 = bitcast <16 x i32> %a to <32 x i16> + ret <32 x i16> %1 +} + +define <32 x i16> @v64i8_to_v32i16(<64 x i8> %a) { +;CHECK-LABEL: v64i8_to_v32i16: +;CHECK-NEXT: .cfi_startproc +;CHECK-NEXT: ret + %1 = bitcast <64 x i8> %a to <32 x i16> + ret <32 x i16> %1 +} + +define <32 x i16> @v8f64_to_v32i16(<8 x double> %a) { +;CHECK-LABEL: v8f64_to_v32i16: +;CHECK-NEXT: .cfi_startproc +;CHECK-NEXT: ret + %1 = bitcast <8 x double> %a to <32 x i16> + ret <32 x i16> %1 +} + +define <32 x i16> @v16f32_to_v32i16(<16 x float> %a) { +;CHECK-LABEL: v16f32_to_v32i16: +;CHECK-NEXT: .cfi_startproc +;CHECK-NEXT: ret + %1 = bitcast <16 x float> %a to <32 x i16> + ret <32 x i16> %1 +} + +define <64 x i8> @v32i16_to_v64i8(<32 x i16> %a) { +;CHECK-LABEL: v32i16_to_v64i8: +;CHECK-NEXT: .cfi_startproc +;CHECK-NEXT: ret + %1 = bitcast <32 x i16> %a to <64 x i8> + ret <64 x i8> %1 +} + +define <64 x i8> @v8i64_to_v64i8(<8 x i64> %a) { +;CHECK-LABEL: v8i64_to_v64i8: +;CHECK-NEXT: .cfi_startproc +;CHECK-NEXT: ret + %1 = bitcast <8 x i64> %a to <64 x i8> + ret <64 x i8> %1 +} + +define <64 x i8> @v16i32_to_v64i8(<16 x i32> %a) { +;CHECK-LABEL: v16i32_to_v64i8: +;CHECK-NEXT: .cfi_startproc +;CHECK-NEXT: ret + %1 = bitcast <16 x i32> %a to <64 x i8> + ret <64 x i8> %1 +} + +define <64 x i8> @v8f64_to_v64i8(<8 x double> %a) { +;CHECK-LABEL: v8f64_to_v64i8: +;CHECK-NEXT: .cfi_startproc +;CHECK-NEXT: ret + %1 = bitcast <8 x double> %a to <64 x i8> + ret <64 x i8> %1 +} + +define <64 x i8> @v16f32_to_v64i8(<16 x float> %a) { +;CHECK-LABEL: v16f32_to_v64i8: +;CHECK-NEXT: .cfi_startproc +;CHECK-NEXT: ret + %1 = bitcast <16 x float> %a to <64 x i8> + ret <64 x i8> %1 +} + +define <16 x float> @v64i8_to_v16f32(<64 x i8> %a) { +;CHECK-LABEL: v64i8_to_v16f32: +;CHECK-NEXT: .cfi_startproc +;CHECK-NEXT: ret + %1 = bitcast <64 x i8> %a to <16 x float> + ret <16 x float> %1 +} + +define <16 x float> @v32i16_to_v16f32(<32 x i16> %a) { +;CHECK-LABEL: v32i16_to_v16f32: +;CHECK-NEXT: .cfi_startproc +;CHECK-NEXT: ret + %1 = bitcast <32 x i16> %a to <16 x float> + ret <16 x float> %1 +} + +define <16 x float> @v8i64_to_v16f32(<8 x i64> %a) { +;CHECK-LABEL: v8i64_to_v16f32: +;CHECK-NEXT: .cfi_startproc +;CHECK-NEXT: ret + %1 = bitcast <8 x i64> %a to <16 x float> + ret <16 x float> %1 +} + +define <16 x float> @v16i32_to_v16f32(<16 x i32> %a) { +;CHECK-LABEL: v16i32_to_v16f32: +;CHECK-NEXT: .cfi_startproc +;CHECK-NEXT: ret + %1 = bitcast <16 x i32> %a to <16 x float> + ret <16 x float> %1 +} + +define <16 x float> @v8f64_to_v16f32(<8 x double> %a) { +;CHECK-LABEL: v8f64_to_v16f32: +;CHECK-NEXT: .cfi_startproc +;CHECK-NEXT: ret + %1 = bitcast <8 x double> %a to <16 x float> + ret <16 x float> %1 +} + +define <8 x double> @v16f32_to_v8f64(<16 x float> %a) { +;CHECK-LABEL: v16f32_to_v8f64: +;CHECK-NEXT: .cfi_startproc +;CHECK-NEXT: ret + %1 = bitcast <16 x float> %a to <8 x double> + ret <8 x double> %1 +} + +define <8 x double> @v64i8_to_v8f64(<64 x i8> %a) { +;CHECK-LABEL: v64i8_to_v8f64: +;CHECK-NEXT: .cfi_startproc +;CHECK-NEXT: ret + %1 = bitcast <64 x i8> %a to <8 x double> + ret <8 x double> %1 +} + +define <8 x double> @v32i16_to_v8f64(<32 x i16> %a) { +;CHECK-LABEL: v32i16_to_v8f64: +;CHECK-NEXT: .cfi_startproc +;CHECK-NEXT: ret + %1 = bitcast <32 x i16> %a to <8 x double> + ret <8 x double> %1 +} + +define <8 x double> @v8i64_to_v8f64(<8 x i64> %a) { +;CHECK-LABEL: v8i64_to_v8f64: +;CHECK-NEXT: .cfi_startproc +;CHECK-NEXT: ret + %1 = bitcast <8 x i64> %a to <8 x double> + ret <8 x double> %1 +} + +define <8 x double> @v16i32_to_v8f64(<16 x i32> %a) { +;CHECK-LABEL: v16i32_to_v8f64: +;CHECK-NEXT: .cfi_startproc +;CHECK-NEXT: ret + %1 = bitcast <16 x i32> %a to <8 x double> + ret <8 x double> %1 +} -- 2.50.1