From: Stanislav Mekhanoshin Date: Mon, 13 May 2019 19:30:06 +0000 (+0000) Subject: [AMDGPU] gfx1010 tests. NFC. X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=16bc05331b316770ed086a20b39b4639c4ed7cbb;p=llvm [AMDGPU] gfx1010 tests. NFC. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@360615 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/test/CodeGen/AMDGPU/elf-header-flags-mach.ll b/test/CodeGen/AMDGPU/elf-header-flags-mach.ll index 88fdf300e3d..0e7d3e66d98 100644 --- a/test/CodeGen/AMDGPU/elf-header-flags-mach.ll +++ b/test/CodeGen/AMDGPU/elf-header-flags-mach.ll @@ -1,53 +1,53 @@ -; RUN: llc -filetype=obj -march=r600 -mcpu=r600 < %s | llvm-readobj --file-headers - | FileCheck --check-prefixes=ALL,ARCH-R600,R600 %s -; RUN: llc -filetype=obj -march=r600 -mcpu=r630 < %s | llvm-readobj --file-headers - | FileCheck --check-prefixes=ALL,ARCH-R600,R630 %s -; RUN: llc -filetype=obj -march=r600 -mcpu=rs880 < %s | llvm-readobj --file-headers - | FileCheck --check-prefixes=ALL,ARCH-R600,RS880 %s -; RUN: llc -filetype=obj -march=r600 -mcpu=rv670 < %s | llvm-readobj --file-headers - | FileCheck --check-prefixes=ALL,ARCH-R600,RV670 %s -; RUN: llc -filetype=obj -march=r600 -mcpu=rv710 < %s | llvm-readobj --file-headers - | FileCheck --check-prefixes=ALL,ARCH-R600,RV710 %s -; RUN: llc -filetype=obj -march=r600 -mcpu=rv730 < %s | llvm-readobj --file-headers - | FileCheck --check-prefixes=ALL,ARCH-R600,RV730 %s -; RUN: llc -filetype=obj -march=r600 -mcpu=rv770 < %s | llvm-readobj --file-headers - | FileCheck --check-prefixes=ALL,ARCH-R600,RV770 %s -; RUN: llc -filetype=obj -march=r600 -mcpu=cedar < %s | llvm-readobj --file-headers - | FileCheck --check-prefixes=ALL,ARCH-R600,CEDAR %s -; RUN: llc -filetype=obj -march=r600 -mcpu=cypress < %s | llvm-readobj --file-headers - | FileCheck --check-prefixes=ALL,ARCH-R600,CYPRESS %s -; RUN: llc -filetype=obj -march=r600 -mcpu=juniper < %s | llvm-readobj --file-headers - | FileCheck --check-prefixes=ALL,ARCH-R600,JUNIPER %s -; RUN: llc -filetype=obj -march=r600 -mcpu=redwood < %s | llvm-readobj --file-headers - | FileCheck --check-prefixes=ALL,ARCH-R600,REDWOOD %s -; RUN: llc -filetype=obj -march=r600 -mcpu=sumo < %s | llvm-readobj --file-headers - | FileCheck --check-prefixes=ALL,ARCH-R600,SUMO %s -; RUN: llc -filetype=obj -march=r600 -mcpu=barts < %s | llvm-readobj --file-headers - | FileCheck --check-prefixes=ALL,ARCH-R600,BARTS %s -; RUN: llc -filetype=obj -march=r600 -mcpu=caicos < %s | llvm-readobj --file-headers - | FileCheck --check-prefixes=ALL,ARCH-R600,CAICOS %s -; RUN: llc -filetype=obj -march=r600 -mcpu=cayman < %s | llvm-readobj --file-headers - | FileCheck --check-prefixes=ALL,ARCH-R600,CAYMAN %s -; RUN: llc -filetype=obj -march=r600 -mcpu=turks < %s | llvm-readobj --file-headers - | FileCheck --check-prefixes=ALL,ARCH-R600,TURKS %s -; RUN: llc -filetype=obj -march=amdgcn -mcpu=gfx600 < %s | llvm-readobj --file-headers - | FileCheck --check-prefixes=ALL,ARCH-GCN,GFX600 %s -; RUN: llc -filetype=obj -march=amdgcn -mcpu=tahiti < %s | llvm-readobj --file-headers - | FileCheck --check-prefixes=ALL,ARCH-GCN,GFX600 %s -; RUN: llc -filetype=obj -march=amdgcn -mcpu=gfx601 < %s | llvm-readobj --file-headers - | FileCheck --check-prefixes=ALL,ARCH-GCN,GFX601 %s -; RUN: llc -filetype=obj -march=amdgcn -mcpu=hainan < %s | llvm-readobj --file-headers - | FileCheck --check-prefixes=ALL,ARCH-GCN,GFX601 %s -; RUN: llc -filetype=obj -march=amdgcn -mcpu=oland < %s | llvm-readobj --file-headers - | FileCheck --check-prefixes=ALL,ARCH-GCN,GFX601 %s -; RUN: llc -filetype=obj -march=amdgcn -mcpu=pitcairn < %s | llvm-readobj --file-headers - | FileCheck --check-prefixes=ALL,ARCH-GCN,GFX601 %s -; RUN: llc -filetype=obj -march=amdgcn -mcpu=verde < %s | llvm-readobj --file-headers - | FileCheck --check-prefixes=ALL,ARCH-GCN,GFX601 %s -; RUN: llc -filetype=obj -march=amdgcn -mcpu=gfx700 < %s | llvm-readobj --file-headers - | FileCheck --check-prefixes=ALL,ARCH-GCN,GFX700 %s -; RUN: llc -filetype=obj -march=amdgcn -mcpu=kaveri < %s | llvm-readobj --file-headers - | FileCheck --check-prefixes=ALL,ARCH-GCN,GFX700 %s -; RUN: llc -filetype=obj -march=amdgcn -mcpu=gfx701 < %s | llvm-readobj --file-headers - | FileCheck --check-prefixes=ALL,ARCH-GCN,GFX701 %s -; RUN: llc -filetype=obj -march=amdgcn -mcpu=hawaii < %s | llvm-readobj --file-headers - | FileCheck --check-prefixes=ALL,ARCH-GCN,GFX701 %s -; RUN: llc -filetype=obj -march=amdgcn -mcpu=gfx702 < %s | llvm-readobj --file-headers - | FileCheck --check-prefixes=ALL,ARCH-GCN,GFX702 %s -; RUN: llc -filetype=obj -march=amdgcn -mcpu=gfx703 < %s | llvm-readobj --file-headers - | FileCheck --check-prefixes=ALL,ARCH-GCN,GFX703 %s -; RUN: llc -filetype=obj -march=amdgcn -mcpu=kabini < %s | llvm-readobj --file-headers - | FileCheck --check-prefixes=ALL,ARCH-GCN,GFX703 %s -; RUN: llc -filetype=obj -march=amdgcn -mcpu=mullins < %s | llvm-readobj --file-headers - | FileCheck --check-prefixes=ALL,ARCH-GCN,GFX703 %s -; RUN: llc -filetype=obj -march=amdgcn -mcpu=gfx704 < %s | llvm-readobj --file-headers - | FileCheck --check-prefixes=ALL,ARCH-GCN,GFX704 %s -; RUN: llc -filetype=obj -march=amdgcn -mcpu=bonaire < %s | llvm-readobj --file-headers - | FileCheck --check-prefixes=ALL,ARCH-GCN,GFX704 %s -; RUN: llc -filetype=obj -march=amdgcn -mcpu=gfx801 < %s | llvm-readobj --file-headers - | FileCheck --check-prefixes=ALL,ARCH-GCN,GFX801 %s -; RUN: llc -filetype=obj -march=amdgcn -mcpu=carrizo < %s | llvm-readobj --file-headers - | FileCheck --check-prefixes=ALL,ARCH-GCN,GFX801 %s -; RUN: llc -filetype=obj -march=amdgcn -mcpu=gfx802 < %s | llvm-readobj --file-headers - | FileCheck --check-prefixes=ALL,ARCH-GCN,GFX802 %s -; RUN: llc -filetype=obj -march=amdgcn -mcpu=iceland < %s | llvm-readobj --file-headers - | FileCheck --check-prefixes=ALL,ARCH-GCN,GFX802 %s -; RUN: llc -filetype=obj -march=amdgcn -mcpu=tonga < %s | llvm-readobj --file-headers - | FileCheck --check-prefixes=ALL,ARCH-GCN,GFX802 %s -; RUN: llc -filetype=obj -march=amdgcn -mcpu=gfx803 < %s | llvm-readobj --file-headers - | FileCheck --check-prefixes=ALL,ARCH-GCN,GFX803 %s -; RUN: llc -filetype=obj -march=amdgcn -mcpu=fiji < %s | llvm-readobj --file-headers - | FileCheck --check-prefixes=ALL,ARCH-GCN,GFX803 %s -; RUN: llc -filetype=obj -march=amdgcn -mcpu=polaris10 < %s | llvm-readobj --file-headers - | FileCheck --check-prefixes=ALL,ARCH-GCN,GFX803 %s -; RUN: llc -filetype=obj -march=amdgcn -mcpu=polaris11 < %s | llvm-readobj --file-headers - | FileCheck --check-prefixes=ALL,ARCH-GCN,GFX803 %s -; RUN: llc -filetype=obj -march=amdgcn -mcpu=gfx810 < %s | llvm-readobj --file-headers - | FileCheck --check-prefixes=ALL,ARCH-GCN,GFX810 %s -; RUN: llc -filetype=obj -march=amdgcn -mcpu=stoney < %s | llvm-readobj --file-headers - | FileCheck --check-prefixes=ALL,ARCH-GCN,GFX810 %s -; RUN: llc -filetype=obj -march=amdgcn -mcpu=gfx900 < %s | llvm-readobj --file-headers - | FileCheck --check-prefixes=ALL,ARCH-GCN,GFX900 %s -; RUN: llc -filetype=obj -march=amdgcn -mcpu=gfx902 < %s | llvm-readobj --file-headers - | FileCheck --check-prefixes=ALL,ARCH-GCN,GFX902 %s -; RUN: llc -filetype=obj -march=amdgcn -mcpu=gfx904 < %s | llvm-readobj --file-headers - | FileCheck --check-prefixes=ALL,ARCH-GCN,GFX904 %s -; RUN: llc -filetype=obj -march=amdgcn -mcpu=gfx906 < %s | llvm-readobj --file-headers - | FileCheck --check-prefixes=ALL,ARCH-GCN,GFX906 %s -; RUN: llc -filetype=obj -march=amdgcn -mcpu=gfx909 < %s | llvm-readobj --file-headers - | FileCheck --check-prefixes=ALL,ARCH-GCN,GFX909 %s -; RUN: llc -filetype=obj -march=amdgcn -mcpu=gfx1010 < %s | llvm-readobj --file-headers - | FileCheck --check-prefixes=ALL,ARCH-GCN,GFX1010 %s +; RUN: llc -filetype=obj -march=r600 -mcpu=r600 < %s | llvm-readobj -file-headers - | FileCheck --check-prefixes=ALL,ARCH-R600,R600 %s +; RUN: llc -filetype=obj -march=r600 -mcpu=r630 < %s | llvm-readobj -file-headers - | FileCheck --check-prefixes=ALL,ARCH-R600,R630 %s +; RUN: llc -filetype=obj -march=r600 -mcpu=rs880 < %s | llvm-readobj -file-headers - | FileCheck --check-prefixes=ALL,ARCH-R600,RS880 %s +; RUN: llc -filetype=obj -march=r600 -mcpu=rv670 < %s | llvm-readobj -file-headers - | FileCheck --check-prefixes=ALL,ARCH-R600,RV670 %s +; RUN: llc -filetype=obj -march=r600 -mcpu=rv710 < %s | llvm-readobj -file-headers - | FileCheck --check-prefixes=ALL,ARCH-R600,RV710 %s +; RUN: llc -filetype=obj -march=r600 -mcpu=rv730 < %s | llvm-readobj -file-headers - | FileCheck --check-prefixes=ALL,ARCH-R600,RV730 %s +; RUN: llc -filetype=obj -march=r600 -mcpu=rv770 < %s | llvm-readobj -file-headers - | FileCheck --check-prefixes=ALL,ARCH-R600,RV770 %s +; RUN: llc -filetype=obj -march=r600 -mcpu=cedar < %s | llvm-readobj -file-headers - | FileCheck --check-prefixes=ALL,ARCH-R600,CEDAR %s +; RUN: llc -filetype=obj -march=r600 -mcpu=cypress < %s | llvm-readobj -file-headers - | FileCheck --check-prefixes=ALL,ARCH-R600,CYPRESS %s +; RUN: llc -filetype=obj -march=r600 -mcpu=juniper < %s | llvm-readobj -file-headers - | FileCheck --check-prefixes=ALL,ARCH-R600,JUNIPER %s +; RUN: llc -filetype=obj -march=r600 -mcpu=redwood < %s | llvm-readobj -file-headers - | FileCheck --check-prefixes=ALL,ARCH-R600,REDWOOD %s +; RUN: llc -filetype=obj -march=r600 -mcpu=sumo < %s | llvm-readobj -file-headers - | FileCheck --check-prefixes=ALL,ARCH-R600,SUMO %s +; RUN: llc -filetype=obj -march=r600 -mcpu=barts < %s | llvm-readobj -file-headers - | FileCheck --check-prefixes=ALL,ARCH-R600,BARTS %s +; RUN: llc -filetype=obj -march=r600 -mcpu=caicos < %s | llvm-readobj -file-headers - | FileCheck --check-prefixes=ALL,ARCH-R600,CAICOS %s +; RUN: llc -filetype=obj -march=r600 -mcpu=cayman < %s | llvm-readobj -file-headers - | FileCheck --check-prefixes=ALL,ARCH-R600,CAYMAN %s +; RUN: llc -filetype=obj -march=r600 -mcpu=turks < %s | llvm-readobj -file-headers - | FileCheck --check-prefixes=ALL,ARCH-R600,TURKS %s +; RUN: llc -filetype=obj -march=amdgcn -mcpu=gfx600 < %s | llvm-readobj -file-headers - | FileCheck --check-prefixes=ALL,ARCH-GCN,GFX600 %s +; RUN: llc -filetype=obj -march=amdgcn -mcpu=tahiti < %s | llvm-readobj -file-headers - | FileCheck --check-prefixes=ALL,ARCH-GCN,GFX600 %s +; RUN: llc -filetype=obj -march=amdgcn -mcpu=gfx601 < %s | llvm-readobj -file-headers - | FileCheck --check-prefixes=ALL,ARCH-GCN,GFX601 %s +; RUN: llc -filetype=obj -march=amdgcn -mcpu=hainan < %s | llvm-readobj -file-headers - | FileCheck --check-prefixes=ALL,ARCH-GCN,GFX601 %s +; RUN: llc -filetype=obj -march=amdgcn -mcpu=oland < %s | llvm-readobj -file-headers - | FileCheck --check-prefixes=ALL,ARCH-GCN,GFX601 %s +; RUN: llc -filetype=obj -march=amdgcn -mcpu=pitcairn < %s | llvm-readobj -file-headers - | FileCheck --check-prefixes=ALL,ARCH-GCN,GFX601 %s +; RUN: llc -filetype=obj -march=amdgcn -mcpu=verde < %s | llvm-readobj -file-headers - | FileCheck --check-prefixes=ALL,ARCH-GCN,GFX601 %s +; RUN: llc -filetype=obj -march=amdgcn -mcpu=gfx700 < %s | llvm-readobj -file-headers - | FileCheck --check-prefixes=ALL,ARCH-GCN,GFX700 %s +; RUN: llc -filetype=obj -march=amdgcn -mcpu=kaveri < %s | llvm-readobj -file-headers - | FileCheck --check-prefixes=ALL,ARCH-GCN,GFX700 %s +; RUN: llc -filetype=obj -march=amdgcn -mcpu=gfx701 < %s | llvm-readobj -file-headers - | FileCheck --check-prefixes=ALL,ARCH-GCN,GFX701 %s +; RUN: llc -filetype=obj -march=amdgcn -mcpu=hawaii < %s | llvm-readobj -file-headers - | FileCheck --check-prefixes=ALL,ARCH-GCN,GFX701 %s +; RUN: llc -filetype=obj -march=amdgcn -mcpu=gfx702 < %s | llvm-readobj -file-headers - | FileCheck --check-prefixes=ALL,ARCH-GCN,GFX702 %s +; RUN: llc -filetype=obj -march=amdgcn -mcpu=gfx703 < %s | llvm-readobj -file-headers - | FileCheck --check-prefixes=ALL,ARCH-GCN,GFX703 %s +; RUN: llc -filetype=obj -march=amdgcn -mcpu=kabini < %s | llvm-readobj -file-headers - | FileCheck --check-prefixes=ALL,ARCH-GCN,GFX703 %s +; RUN: llc -filetype=obj -march=amdgcn -mcpu=mullins < %s | llvm-readobj -file-headers - | FileCheck --check-prefixes=ALL,ARCH-GCN,GFX703 %s +; RUN: llc -filetype=obj -march=amdgcn -mcpu=gfx704 < %s | llvm-readobj -file-headers - | FileCheck --check-prefixes=ALL,ARCH-GCN,GFX704 %s +; RUN: llc -filetype=obj -march=amdgcn -mcpu=bonaire < %s | llvm-readobj -file-headers - | FileCheck --check-prefixes=ALL,ARCH-GCN,GFX704 %s +; RUN: llc -filetype=obj -march=amdgcn -mcpu=gfx801 < %s | llvm-readobj -file-headers - | FileCheck --check-prefixes=ALL,ARCH-GCN,GFX801 %s +; RUN: llc -filetype=obj -march=amdgcn -mcpu=carrizo < %s | llvm-readobj -file-headers - | FileCheck --check-prefixes=ALL,ARCH-GCN,GFX801 %s +; RUN: llc -filetype=obj -march=amdgcn -mcpu=gfx802 < %s | llvm-readobj -file-headers - | FileCheck --check-prefixes=ALL,ARCH-GCN,GFX802 %s +; RUN: llc -filetype=obj -march=amdgcn -mcpu=iceland < %s | llvm-readobj -file-headers - | FileCheck --check-prefixes=ALL,ARCH-GCN,GFX802 %s +; RUN: llc -filetype=obj -march=amdgcn -mcpu=tonga < %s | llvm-readobj -file-headers - | FileCheck --check-prefixes=ALL,ARCH-GCN,GFX802 %s +; RUN: llc -filetype=obj -march=amdgcn -mcpu=gfx803 < %s | llvm-readobj -file-headers - | FileCheck --check-prefixes=ALL,ARCH-GCN,GFX803 %s +; RUN: llc -filetype=obj -march=amdgcn -mcpu=fiji < %s | llvm-readobj -file-headers - | FileCheck --check-prefixes=ALL,ARCH-GCN,GFX803 %s +; RUN: llc -filetype=obj -march=amdgcn -mcpu=polaris10 < %s | llvm-readobj -file-headers - | FileCheck --check-prefixes=ALL,ARCH-GCN,GFX803 %s +; RUN: llc -filetype=obj -march=amdgcn -mcpu=polaris11 < %s | llvm-readobj -file-headers - | FileCheck --check-prefixes=ALL,ARCH-GCN,GFX803 %s +; RUN: llc -filetype=obj -march=amdgcn -mcpu=gfx810 < %s | llvm-readobj -file-headers - | FileCheck --check-prefixes=ALL,ARCH-GCN,GFX810 %s +; RUN: llc -filetype=obj -march=amdgcn -mcpu=stoney < %s | llvm-readobj -file-headers - | FileCheck --check-prefixes=ALL,ARCH-GCN,GFX810 %s +; RUN: llc -filetype=obj -march=amdgcn -mcpu=gfx900 < %s | llvm-readobj -file-headers - | FileCheck --check-prefixes=ALL,ARCH-GCN,GFX900 %s +; RUN: llc -filetype=obj -march=amdgcn -mcpu=gfx902 < %s | llvm-readobj -file-headers - | FileCheck --check-prefixes=ALL,ARCH-GCN,GFX902 %s +; RUN: llc -filetype=obj -march=amdgcn -mcpu=gfx904 < %s | llvm-readobj -file-headers - | FileCheck --check-prefixes=ALL,ARCH-GCN,GFX904 %s +; RUN: llc -filetype=obj -march=amdgcn -mcpu=gfx906 < %s | llvm-readobj -file-headers - | FileCheck --check-prefixes=ALL,ARCH-GCN,GFX906 %s +; RUN: llc -filetype=obj -march=amdgcn -mcpu=gfx909 < %s | llvm-readobj -file-headers - | FileCheck --check-prefixes=ALL,ARCH-GCN,GFX909 %s +; RUN: llc -filetype=obj -march=amdgcn -mcpu=gfx1010 < %s | llvm-readobj -file-headers - | FileCheck --check-prefixes=ALL,ARCH-GCN,GFX1010 %s ; ARCH-R600: Arch: r600 ; ARCH-GCN: Arch: amdgcn diff --git a/test/CodeGen/AMDGPU/fcanonicalize.ll b/test/CodeGen/AMDGPU/fcanonicalize.ll index 72870c58dcd..8ea72ec3fdb 100644 --- a/test/CodeGen/AMDGPU/fcanonicalize.ll +++ b/test/CodeGen/AMDGPU/fcanonicalize.ll @@ -485,7 +485,7 @@ define amdgpu_kernel void @test_canonicalize_value_v2f16_flush_gfx8(<2 x half> a } ; GCN-LABEL: {{^}}test_canonicalize_value_v2f16_flush_gfx9: -; GCN-DAG: v_pk_mul_f16 v{{[0-9]+}}, 1.0, v{{[0-9]+}} +; GCN-DAG: v_pk_mul_f16 v{{[0-9]+}}, 1.0, v{{[0-9]+}} op_sel_hi:[0,1]{{$}} define amdgpu_kernel void @test_canonicalize_value_v2f16_flush_gfx9(<2 x half> addrspace(1)* %arg, <2 x half> addrspace(1)* %out) #6 { %id = tail call i32 @llvm.amdgcn.workitem.id.x() %gep = getelementptr inbounds <2 x half>, <2 x half> addrspace(1)* %arg, i32 %id diff --git a/test/CodeGen/AMDGPU/inst-select-load-flat.mir b/test/CodeGen/AMDGPU/inst-select-load-flat.mir new file mode 100644 index 00000000000..31345a6e860 --- /dev/null +++ b/test/CodeGen/AMDGPU/inst-select-load-flat.mir @@ -0,0 +1,28 @@ +# RUN: llc -march=amdgcn -mcpu=hawaii -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s -check-prefixes=GCN +# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s -check-prefixes=GCN + +# REQUIRES: global-isel + +--- | + define amdgpu_kernel void @global_addrspace(i32 addrspace(1)* %global0) { ret void } +... +--- + +name: global_addrspace +legalized: true +regBankSelected: true + +# GCN: global_addrspace +# GCN: [[PTR:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 +# GCN: FLAT_LOAD_DWORD [[PTR]], 0, 0, 0, 0 + +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + %0:vgpr(p1) = COPY $vgpr0_vgpr1 + %1:vgpr(s32) = G_LOAD %0 :: (load 4 from %ir.global0) + $vgpr0 = COPY %1 + +... +--- diff --git a/test/CodeGen/AMDGPU/inst-select-load-smrd.mir b/test/CodeGen/AMDGPU/inst-select-load-smrd.mir new file mode 100644 index 00000000000..e1f0f5c5fc9 --- /dev/null +++ b/test/CodeGen/AMDGPU/inst-select-load-smrd.mir @@ -0,0 +1,159 @@ +# RUN: llc -march=amdgcn -mcpu=tahiti -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s -check-prefixes=GCN,SI,SICI,SIVI +# RUN: llc -march=amdgcn -mcpu=hawaii -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s -check-prefixes=GCN,CI,SICI +# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s -check-prefixes=GCN,VI,SIVI + +# REQUIRES: global-isel + +--- | + define amdgpu_kernel void @smrd_imm(i32 addrspace(4)* %const0) { ret void } +... +--- + +name: smrd_imm +legalized: true +regBankSelected: true + +# GCN: body: +# GCN: [[PTR:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 + +# Immediate offset: +# SICI: S_LOAD_DWORD_IMM [[PTR]], 1, 0, 0 +# VI: S_LOAD_DWORD_IMM [[PTR]], 4, 0, 0 + +# Max immediate offset for SI +# SICI: S_LOAD_DWORD_IMM [[PTR]], 255, 0, 0 +# VI: S_LOAD_DWORD_IMM [[PTR]], 1020, 0, 0 + +# Immediate overflow for SI +# SI: [[K1024:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 1024 +# SI: S_LOAD_DWORD_SGPR [[PTR]], [[K1024]], 0 +# CI: S_LOAD_DWORD_IMM_ci [[PTR]], 256, 0 +# VI: S_LOAD_DWORD_IMM [[PTR]], 1024, 0 + +# Max immediate offset for VI +# SI: [[K1048572:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 1048572 +# CI: S_LOAD_DWORD_IMM_ci [[PTR]], 262143 +# VI: S_LOAD_DWORD_IMM [[PTR]], 1048572 + +# +# Immediate overflow for VI +# SIVI: [[K1048576:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 1048576 +# SIVI: S_LOAD_DWORD_SGPR [[PTR]], [[K1048576]], 0 +# CI: S_LOAD_DWORD_IMM_ci [[PTR]], 262144, 0 + +# Max immediate for CI +# SIVI: [[K_LO:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 4294967292 +# SIVI: [[K_HI:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 3 +# SIVI: [[K:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[K_LO]], %subreg.sub0, [[K_HI]], %subreg.sub1 +# SIVI-DAG: [[K_SUB0:%[0-9]+]]:sgpr_32 = COPY [[K]].sub0 +# SIVI-DAG: [[PTR_LO:%[0-9]+]]:sgpr_32 = COPY [[PTR]].sub0 +# SIVI: [[ADD_PTR_LO:%[0-9]+]]:sreg_32 = S_ADD_U32 [[PTR_LO]], [[K_SUB0]] +# SIVI-DAG: [[K_SUB1:%[0-9]+]]:sgpr_32 = COPY [[K]].sub1 +# SIVI-DAG: [[PTR_HI:%[0-9]+]]:sgpr_32 = COPY [[PTR]].sub1 +# SIVI: [[ADD_PTR_HI:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[PTR_HI]], [[K_SUB1]] +# SIVI: [[ADD_PTR:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[ADD_PTR_LO]], %subreg.sub0, [[ADD_PTR_HI]], %subreg.sub1 +# SIVI: S_LOAD_DWORD_IMM [[ADD_PTR]], 0, 0, 0 +# CI: S_LOAD_DWORD_IMM_ci [[PTR]], 4294967295, 0, 0 + +# Immediate overflow for CI +# GCN: [[K_LO:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 0 +# GCN: [[K_HI:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 4 +# GCN: [[K:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[K_LO]], %subreg.sub0, [[K_HI]], %subreg.sub1 +# GCN-DAG: [[K_SUB0:%[0-9]+]]:sgpr_32 = COPY [[K]].sub0 +# GCN-DAG: [[PTR_LO:%[0-9]+]]:sgpr_32 = COPY [[PTR]].sub0 +# GCN: [[ADD_PTR_LO:%[0-9]+]]:sreg_32 = S_ADD_U32 [[PTR_LO]], [[K_SUB0]] +# GCN-DAG: [[K_SUB1:%[0-9]+]]:sgpr_32 = COPY [[K]].sub1 +# GCN-DAG: [[PTR_HI:%[0-9]+]]:sgpr_32 = COPY [[PTR]].sub1 +# GCN: [[ADD_PTR_HI:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[PTR_HI]], [[K_SUB1]] +# GCN: [[ADD_PTR:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[ADD_PTR_LO]], %subreg.sub0, [[ADD_PTR_HI]], %subreg.sub1 +# GCN: S_LOAD_DWORD_IMM [[ADD_PTR]], 0, 0, 0 + +# Max 32-bit byte offset +# SIVI: [[K4294967292:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 4294967292 +# SIVI: S_LOAD_DWORD_SGPR [[PTR]], [[K4294967292]], 0 +# CI: S_LOAD_DWORD_IMM_ci [[PTR]], 1073741823, 0 + +# Overflow 32-bit byte offset +# SIVI: [[K_LO:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 0 +# SIVI: [[K_HI:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 1 +# SIVI: [[K:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[K_LO]], %subreg.sub0, [[K_HI]], %subreg.sub1 +# SIVI-DAG: [[K_SUB0:%[0-9]+]]:sgpr_32 = COPY [[K]].sub0 +# SIVI-DAG: [[PTR_LO:%[0-9]+]]:sgpr_32 = COPY [[PTR]].sub0 +# SIVI: [[ADD_PTR_LO:%[0-9]+]]:sreg_32 = S_ADD_U32 [[PTR_LO]], [[K_SUB0]] +# SIVI-DAG: [[K_SUB1:%[0-9]+]]:sgpr_32 = COPY [[K]].sub1 +# SIVI-DAG: [[PTR_HI:%[0-9]+]]:sgpr_32 = COPY [[PTR]].sub1 +# SIVI: [[ADD_PTR_HI:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[PTR_HI]], [[K_SUB1]] +# SIVI: [[ADD_PTR:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[ADD_PTR_LO]], %subreg.sub0, [[ADD_PTR_HI]], %subreg.sub1 +# SIVI: S_LOAD_DWORD_IMM [[ADD_PTR]], 0, 0, 0 +# CI: S_LOAD_DWORD_IMM_ci [[PTR]], 1073741824, 0, 0 + +# Pointer loads +# GCN: [[AS0:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0 +# GCN: $sgpr0_sgpr1 = COPY [[AS0]] +# GCN: [[AS1:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0 +# GCN: $sgpr0_sgpr1 = COPY [[AS1]] +# GCN: [[AS4:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0 +# GCN: $sgpr0_sgpr1 = COPY [[AS4]] + +body: | + bb.0: + liveins: $sgpr0_sgpr1 + + %0:sgpr(p4) = COPY $sgpr0_sgpr1 + + %1:sgpr(s64) = G_CONSTANT i64 4 + %2:sgpr(p4) = G_GEP %0, %1 + %3:sgpr(s32) = G_LOAD %2 :: (load 4 from %ir.const0, addrspace 4) + $sgpr0 = COPY %3 + + %4:sgpr(s64) = G_CONSTANT i64 1020 + %5:sgpr(p4) = G_GEP %0, %4 + %6:sgpr(s32) = G_LOAD %5 :: (load 4 from %ir.const0, addrspace 4) + $sgpr0 = COPY %6 + + %7:sgpr(s64) = G_CONSTANT i64 1024 + %8:sgpr(p4) = G_GEP %0, %7 + %9:sgpr(s32) = G_LOAD %8 :: (load 4 from %ir.const0, addrspace 4) + $sgpr0 = COPY %9 + + %10:sgpr(s64) = G_CONSTANT i64 1048572 + %11:sgpr(p4) = G_GEP %0, %10 + %12:sgpr(s32) = G_LOAD %11 :: (load 4 from %ir.const0, addrspace 4) + $sgpr0 = COPY %12 + + %13:sgpr(s64) = G_CONSTANT i64 1048576 + %14:sgpr(p4) = G_GEP %0, %13 + %15:sgpr(s32) = G_LOAD %14 :: (load 4 from %ir.const0, addrspace 4) + $sgpr0 = COPY %15 + + %16:sgpr(s64) = G_CONSTANT i64 17179869180 + %17:sgpr(p4) = G_GEP %0, %16 + %18:sgpr(s32) = G_LOAD %17 :: (load 4 from %ir.const0, addrspace 4) + $sgpr0 = COPY %18 + + %19:sgpr(s64) = G_CONSTANT i64 17179869184 + %20:sgpr(p4) = G_GEP %0, %19 + %21:sgpr(s32) = G_LOAD %20 :: (load 4 from %ir.const0, addrspace 4) + $sgpr0 = COPY %21 + + %22:sgpr(s64) = G_CONSTANT i64 4294967292 + %23:sgpr(p4) = G_GEP %0, %22 + %24:sgpr(s32) = G_LOAD %23 :: (load 4 from %ir.const0, addrspace 4) + $sgpr0 = COPY %24 + + %25:sgpr(s64) = G_CONSTANT i64 4294967296 + %26:sgpr(p4) = G_GEP %0, %25 + %27:sgpr(s32) = G_LOAD %26 :: (load 4 from %ir.const0, addrspace 4) + $sgpr0 = COPY %27 + + %28:sgpr(p0) = G_LOAD %0 :: (load 8 from %ir.const0, addrspace 4) + $sgpr0_sgpr1 = COPY %28 + + %29:sgpr(p1) = G_LOAD %0 :: (load 8 from %ir.const0, addrspace 4) + $sgpr0_sgpr1 = COPY %29 + + %30:sgpr(p4) = G_LOAD %0 :: (load 8 from %ir.const0, addrspace 4) + $sgpr0_sgpr1 = COPY %30 + +... +--- diff --git a/test/CodeGen/AMDGPU/large-alloca-compute.ll b/test/CodeGen/AMDGPU/large-alloca-compute.ll index 0343052601f..8cb822938fd 100644 --- a/test/CodeGen/AMDGPU/large-alloca-compute.ll +++ b/test/CodeGen/AMDGPU/large-alloca-compute.ll @@ -3,6 +3,7 @@ ; RUN: llc -march=amdgcn -mcpu=gfx900 --show-mc-encoding < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 -check-prefix=ALL %s ; RUN: llc -march=amdgcn -mcpu=bonaire -mtriple=amdgcn-unknown-amdhsa -mattr=-code-object-v3 < %s -mattr=-flat-for-global | FileCheck -check-prefix=GCNHSA -check-prefix=CIHSA -check-prefix=ALL %s ; RUN: llc -march=amdgcn -mcpu=carrizo -mtriple=amdgcn-unknown-amdhsa -mattr=-code-object-v3,-flat-for-global < %s | FileCheck -check-prefix=GCNHSA -check-prefix=VIHSA -check-prefix=ALL %s +; RUN: llc -march=amdgcn -mcpu=gfx1010 -mtriple=amdgcn-unknown-amdhsa -mattr=-code-object-v3,-flat-for-global < %s | FileCheck -check-prefix=GCNHSA -check-prefix=GFX10HSA -check-prefix=ALL %s ; FIXME: align on alloca seems to be ignored for private_segment_alignment @@ -42,6 +43,10 @@ ; GCNHSA: private_segment_alignment = 4 ; GCNHSA: .end_amd_kernel_code_t +; GFX10HSA: s_add_u32 [[FLAT_SCR_LO:s[0-9]+]], s{{[0-9]+}}, s{{[0-9]+}} +; GFX10HSA-DAG: s_addc_u32 [[FLAT_SCR_HI:s[0-9]+]], s{{[0-9]+}}, 0 +; GFX10HSA-DAG: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), [[FLAT_SCR_LO]] +; GFX10HSA-DAG: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), [[FLAT_SCR_HI]] ; GCNHSA: buffer_store_dword {{v[0-9]+}}, {{v[0-9]+}}, s[0:3], s9 offen ; GCNHSA: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, s[0:3], s9 offen diff --git a/test/CodeGen/AMDGPU/llvm.amdgcn.sendmsg.ll b/test/CodeGen/AMDGPU/llvm.amdgcn.sendmsg.ll index 4f8cd6f682e..1125d970ba9 100644 --- a/test/CodeGen/AMDGPU/llvm.amdgcn.sendmsg.ll +++ b/test/CodeGen/AMDGPU/llvm.amdgcn.sendmsg.ll @@ -1,5 +1,6 @@ -;RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s -;RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s +;RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI -check-prefix=SIVI %s +;RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VIPLUS -check-prefix=SIVI %s +;RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VIPLUS -check-prefix=GFX9 %s ; GCN-LABEL: {{^}}test_interrupt: ; GCN: s_mov_b32 m0, 0 @@ -51,9 +52,21 @@ body: ret void } +; GCN-LABEL: {{^}}test_gs_alloc_req: +; GCN: s_mov_b32 m0, s0 +; GCN-NOT: s_mov_b32 m0 +; VIPLUS-NEXT: s_nop 0 +; SIVI: s_sendmsg 9 +; GFX9: s_sendmsg sendmsg(MSG_GS_ALLOC_REQ) +define amdgpu_kernel void @test_gs_alloc_req(i32 inreg %a) { +body: + call void @llvm.amdgcn.s.sendmsg(i32 9, i32 %a) + ret void +} + ; GCN-LABEL: {{^}}sendmsg: ; GCN: s_mov_b32 m0, s0 -; VI-NEXT: s_nop 0 +; VIPLUS-NEXT: s_nop 0 ; GCN-NEXT: sendmsg(MSG_GS_DONE, GS_OP_NOP) ; GCN-NEXT: s_endpgm define amdgpu_gs void @sendmsg(i32 inreg %a) #0 { @@ -63,7 +76,7 @@ define amdgpu_gs void @sendmsg(i32 inreg %a) #0 { ; GCN-LABEL: {{^}}sendmsghalt: ; GCN: s_mov_b32 m0, s0 -; VI-NEXT: s_nop 0 +; VIPLUS-NEXT: s_nop 0 ; GCN-NEXT: s_sendmsghalt sendmsg(MSG_INTERRUPT) ; GCN-NEXT: s_endpgm define amdgpu_kernel void @sendmsghalt(i32 inreg %a) #0 { diff --git a/test/CodeGen/AMDGPU/si-scheduler.ll b/test/CodeGen/AMDGPU/si-scheduler.ll index eca97c40b19..76bfa178653 100644 --- a/test/CodeGen/AMDGPU/si-scheduler.ll +++ b/test/CodeGen/AMDGPU/si-scheduler.ll @@ -4,6 +4,7 @@ ; is to specify -mattr=si-scheduler. If we just pass --misched=si, the backend ; won't know what scheduler we are using. ; RUN: llc -march=amdgcn --misched=si -mattr=si-scheduler < %s | FileCheck %s +; RUN: llc -march=amdgcn -mcpu=gfx1010 --misched=si -mattr=si-scheduler < %s | FileCheck %s ; The test checks the "si" machine scheduler pass works correctly. @@ -61,3 +62,31 @@ attributes #2 = { nounwind readonly } !0 = !{!1, !1, i64 0, i32 1} !1 = !{!"const", !2} !2 = !{!"tbaa root"} + + +; CHECK-LABEL: amdgpu_ps_main: +; CHECK s_buffer_load_dword +define amdgpu_ps void @_amdgpu_ps_main(i32 %arg) local_unnamed_addr { +.entry: + %tmp = insertelement <2 x i32> zeroinitializer, i32 %arg, i32 0 + %tmp1 = bitcast <2 x i32> %tmp to i64 + %tmp2 = inttoptr i64 %tmp1 to <4 x i32> addrspace(4)* + %tmp3 = load <4 x i32>, <4 x i32> addrspace(4)* %tmp2, align 16 + %tmp4 = tail call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %tmp3, i32 0, i32 0) #0 + switch i32 %tmp4, label %bb [ + i32 0, label %bb5 + i32 1, label %bb6 + ] + +bb: ; preds = %.entry + unreachable + +bb5: ; preds = %.entry + unreachable + +bb6: ; preds = %.entry + unreachable +} + +; Function Attrs: nounwind readnone +declare i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32>, i32, i32 immarg) #1 diff --git a/test/CodeGen/AMDGPU/v_cndmask.ll b/test/CodeGen/AMDGPU/v_cndmask.ll index 53ef9d536cb..524e0f49e62 100644 --- a/test/CodeGen/AMDGPU/v_cndmask.ll +++ b/test/CodeGen/AMDGPU/v_cndmask.ll @@ -1,5 +1,6 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=SI %s -; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=VI %s +; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,SI,SIVI %s +; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,VI,SIVI %s +; RUN: llc -march=amdgcn -mcpu=gfx1010 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX10 %s declare i32 @llvm.amdgcn.workitem.id.x() #1 @@ -23,10 +24,13 @@ define amdgpu_kernel void @v_cnd_nan_nosgpr(float addrspace(1)* %out, i32 %c, fl ; This requires slightly trickier SGPR operand legalization since the ; single constant bus SGPR usage is the last operand, and it should ; never be moved. +; However on GFX10 constant bus is limited to 2 scalar operands, not one. ; GCN-LABEL: {{^}}v_cnd_nan: -; GCN: v_cmp_eq_u32_e64 vcc, s{{[0-9]+}}, 0 -; GCN: v_cndmask_b32_e32 v{{[0-9]}}, -1, v{{[0-9]}}, vcc +; SIVI: v_cmp_eq_u32_e64 vcc, s{{[0-9]+}}, 0 +; SIVI: v_cndmask_b32_e32 v{{[0-9]+}}, -1, v{{[0-9]+}}, vcc +; GFX10: v_cmp_eq_u32_e64 [[CC:s\[[0-9:]+\]]], s{{[0-9]+}}, 0 +; GFX10: v_cndmask_b32_e64 v{{[0-9]+}}, -1, s{{[0-9]+}}, [[CC]] ; GCN-DAG: v{{[0-9]}} ; All nan values are converted to 0xffffffff ; GCN: s_endpgm @@ -44,9 +48,11 @@ define amdgpu_kernel void @v_cnd_nan(float addrspace(1)* %out, i32 %c, float %f) ; GCN-LABEL: {{^}}fcmp_sgprX_k0_select_k1_sgprZ_f32: ; GCN: s_load_dwordx2 ; GCN: s_load_dwordx2 s{{\[}}[[X:[0-9]+]]:[[Z:[0-9]+]]{{\]}} -; GCN-DAG: v_cmp_nlg_f32_e64 vcc, s[[X]], 0 -; GCN-DAG: v_mov_b32_e32 [[VZ:v[0-9]+]], s[[Z]] -; GCN: v_cndmask_b32_e32 v{{[0-9]+}}, 1.0, [[VZ]], vcc +; SIVI-DAG: v_cmp_nlg_f32_e64 [[CC:vcc]], s[[X]], 0 +; GFX10-DAG: v_cmp_nlg_f32_e64 [[CC:s\[[0-9:]+\]]], s[[X]], 0 +; SIVI-DAG: v_mov_b32_e32 [[VZ:v[0-9]+]], s[[Z]] +; SIVI: v_cndmask_b32_e32 v{{[0-9]+}}, 1.0, [[VZ]], [[CC]] +; GFX10: v_cndmask_b32_e64 v{{[0-9]+}}, 1.0, s[[Z]], [[CC]] define amdgpu_kernel void @fcmp_sgprX_k0_select_k1_sgprZ_f32(float addrspace(1)* %out, [8 x i32], float %x, float %z) #0 { %tid = call i32 @llvm.amdgcn.workitem.id.x() #1 %tid.ext = sext i32 %tid to i64 @@ -59,9 +65,11 @@ define amdgpu_kernel void @fcmp_sgprX_k0_select_k1_sgprZ_f32(float addrspace(1)* ; GCN-LABEL: {{^}}fcmp_sgprX_k0_select_k1_sgprX_f32: ; GCN: s_load_dword [[X:s[0-9]+]] -; GCN-DAG: v_cmp_nlg_f32_e64 vcc, [[X]], 0 -; GCN-DAG: v_mov_b32_e32 [[VX:v[0-9]+]], [[X]] -; GCN: v_cndmask_b32_e32 v{{[0-9]+}}, 1.0, [[VX]], vcc +; SIVI-DAG: v_cmp_nlg_f32_e64 [[CC:vcc]], [[X]], 0 +; GFX10-DAG: v_cmp_nlg_f32_e64 [[CC:s\[[0-9:]+\]]], [[X]], 0 +; SIVI-DAG: v_mov_b32_e32 [[VX:v[0-9]+]], [[X]] +; SIVI: v_cndmask_b32_e32 v{{[0-9]+}}, 1.0, [[VX]], [[CC]] +; GFX10: v_cndmask_b32_e64 v{{[0-9]+}}, 1.0, [[X]], [[CC]] define amdgpu_kernel void @fcmp_sgprX_k0_select_k1_sgprX_f32(float addrspace(1)* %out, float %x) #0 { %tid = call i32 @llvm.amdgcn.workitem.id.x() #1 %tid.ext = sext i32 %tid to i64 @@ -74,9 +82,11 @@ define amdgpu_kernel void @fcmp_sgprX_k0_select_k1_sgprX_f32(float addrspace(1)* ; GCN-LABEL: {{^}}fcmp_sgprX_k0_select_k0_sgprZ_f32: ; GCN-DAG: s_load_dwordx2 s{{\[}}[[X:[0-9]+]]:[[Z:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, {{0x13|0x4c}} -; GCN-DAG: v_cmp_nlg_f32_e64 vcc, s[[X]], 0 -; GCN-DAG: v_mov_b32_e32 [[VZ:v[0-9]+]], s[[Z]] -; GCN: v_cndmask_b32_e32 v{{[0-9]+}}, 0, [[VZ]], vcc +; SIVI-DAG: v_cmp_nlg_f32_e64 [[CC:vcc]], s[[X]], 0 +; GFX10-DAG: v_cmp_nlg_f32_e64 [[CC:s\[[0-9:]+\]]], s[[X]], 0 +; SIVI-DAG: v_mov_b32_e32 [[VZ:v[0-9]+]], s[[Z]] +; SIVI: v_cndmask_b32_e32 v{{[0-9]+}}, 0, [[VZ]], [[CC]] +; GFX10: v_cndmask_b32_e64 v{{[0-9]+}}, 0, s[[Z]], [[CC]] define amdgpu_kernel void @fcmp_sgprX_k0_select_k0_sgprZ_f32(float addrspace(1)* %out, [8 x i32], float %x, float %z) #0 { %tid = call i32 @llvm.amdgcn.workitem.id.x() #1 %tid.ext = sext i32 %tid to i64 @@ -89,9 +99,11 @@ define amdgpu_kernel void @fcmp_sgprX_k0_select_k0_sgprZ_f32(float addrspace(1)* ; GCN-LABEL: {{^}}fcmp_sgprX_k0_select_k0_sgprX_f32: ; GCN: s_load_dword [[X:s[0-9]+]] -; GCN-DAG: v_cmp_nlg_f32_e64 vcc, [[X]], 0 -; GCN-DAG: v_mov_b32_e32 [[VX:v[0-9]+]], [[X]] -; GCN: v_cndmask_b32_e32 v{{[0-9]+}}, 0, [[VX]], vcc +; SIVI-DAG: v_cmp_nlg_f32_e64 [[CC:vcc]], [[X]], 0 +; GFX10-DAG: v_cmp_nlg_f32_e64 [[CC:s\[[0-9:]+\]]], [[X]], 0 +; SIVI-DAG: v_mov_b32_e32 [[VX:v[0-9]+]], [[X]] +; SIVI: v_cndmask_b32_e32 v{{[0-9]+}}, 0, [[VX]], [[CC]] +; GFX10: v_cndmask_b32_e64 v{{[0-9]+}}, 0, [[X]], [[CC]] define amdgpu_kernel void @fcmp_sgprX_k0_select_k0_sgprX_f32(float addrspace(1)* %out, float %x) #0 { %tid = call i32 @llvm.amdgcn.workitem.id.x() #1 %tid.ext = sext i32 %tid to i64 @@ -104,7 +116,7 @@ define amdgpu_kernel void @fcmp_sgprX_k0_select_k0_sgprX_f32(float addrspace(1)* ; GCN-LABEL: {{^}}fcmp_sgprX_k0_select_k0_vgprZ_f32: ; GCN-DAG: s_load_dword [[X:s[0-9]+]] -; GCN-DAG: {{buffer|flat}}_load_dword [[Z:v[0-9]+]] +; GCN-DAG: {{buffer|flat|global}}_load_dword [[Z:v[0-9]+]] ; GCN-DAG: v_cmp_nlg_f32_e64 [[COND:vcc|s\[[0-9]+:[0-9]+\]]], [[X]], 0 ; GCN: v_cndmask_b32_e{{32|64}} v{{[0-9]+}}, 0, [[Z]], [[COND]] define amdgpu_kernel void @fcmp_sgprX_k0_select_k0_vgprZ_f32(float addrspace(1)* %out, float %x, float addrspace(1)* %z.ptr) #0 { @@ -120,7 +132,7 @@ define amdgpu_kernel void @fcmp_sgprX_k0_select_k0_vgprZ_f32(float addrspace(1)* } ; GCN-LABEL: {{^}}fcmp_sgprX_k0_select_k1_vgprZ_f32: -; GCN-DAG: {{buffer|flat}}_load_dword [[Z:v[0-9]+]] +; GCN-DAG: {{buffer|flat|global}}_load_dword [[Z:v[0-9]+]] ; GCN-DAG: s_load_dword [[X:s[0-9]+]] ; GCN-DAG: v_cmp_nlg_f32_e64 [[COND:vcc|s\[[0-9]+:[0-9]+\]]], [[X]], 0 ; GCN: v_cndmask_b32_e{{32|64}} v{{[0-9]+}}, 1.0, [[Z]], [[COND]] @@ -137,11 +149,12 @@ define amdgpu_kernel void @fcmp_sgprX_k0_select_k1_vgprZ_f32(float addrspace(1)* } ; GCN-LABEL: {{^}}fcmp_vgprX_k0_select_k1_sgprZ_f32: -; GCN-DAG: {{buffer|flat}}_load_dword [[X:v[0-9]+]] +; GCN-DAG: {{buffer|flat|global}}_load_dword [[X:v[0-9]+]] ; GCN-DAG: s_load_dword [[Z:s[0-9]+]] ; GCN-DAG: v_cmp_ngt_f32_e32 vcc, 0, [[X]] -; GCN-DAG: v_mov_b32_e32 [[VZ:v[0-9]+]], [[Z]] -; GCN: v_cndmask_b32_e32 v{{[0-9]+}}, 1.0, [[VZ]], vcc +; SIVI-DAG: v_mov_b32_e32 [[VZ:v[0-9]+]], [[Z]] +; SIVI: v_cndmask_b32_e32 v{{[0-9]+}}, 1.0, [[VZ]], vcc +; GFX10: v_cndmask_b32_e64 v{{[0-9]+}}, 1.0, [[Z]], vcc define amdgpu_kernel void @fcmp_vgprX_k0_select_k1_sgprZ_f32(float addrspace(1)* %out, float addrspace(1)* %x.ptr, float %z) #0 { %tid = call i32 @llvm.amdgcn.workitem.id.x() #1 %tid.ext = sext i32 %tid to i64 @@ -155,8 +168,8 @@ define amdgpu_kernel void @fcmp_vgprX_k0_select_k1_sgprZ_f32(float addrspace(1)* } ; GCN-LABEL: {{^}}fcmp_vgprX_k0_select_k1_vgprZ_f32: -; GCN: {{buffer|flat}}_load_dword [[X:v[0-9]+]] -; GCN: {{buffer|flat}}_load_dword [[Z:v[0-9]+]] +; GCN: {{buffer|flat|global}}_load_dword [[X:v[0-9]+]] +; GCN: {{buffer|flat|global}}_load_dword [[Z:v[0-9]+]] ; GCN: v_cmp_le_f32_e32 vcc, 0, [[X]] ; GCN: v_cndmask_b32_e32 v{{[0-9]+}}, 1.0, [[Z]], vcc define amdgpu_kernel void @fcmp_vgprX_k0_select_k1_vgprZ_f32(float addrspace(1)* %out, float addrspace(1)* %x.ptr, float addrspace(1)* %z.ptr) #0 { @@ -174,8 +187,8 @@ define amdgpu_kernel void @fcmp_vgprX_k0_select_k1_vgprZ_f32(float addrspace(1)* } ; GCN-LABEL: {{^}}icmp_vgprX_k0_select_k1_vgprZ_i32: -; GCN: {{buffer|flat}}_load_dword [[X:v[0-9]+]] -; GCN: {{buffer|flat}}_load_dword [[Z:v[0-9]+]] +; GCN: {{buffer|flat|global}}_load_dword [[X:v[0-9]+]] +; GCN: {{buffer|flat|global}}_load_dword [[Z:v[0-9]+]] ; GCN: v_cmp_lt_i32_e32 vcc, -1, [[X]] ; GCN: v_cndmask_b32_e32 v{{[0-9]+}}, 2, [[Z]], vcc define amdgpu_kernel void @icmp_vgprX_k0_select_k1_vgprZ_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %x.ptr, i32 addrspace(1)* %z.ptr) #0 { @@ -194,8 +207,8 @@ define amdgpu_kernel void @icmp_vgprX_k0_select_k1_vgprZ_i32(i32 addrspace(1)* % ; FIXME: Why does VI make the wrong regalloc choice? ; GCN-LABEL: {{^}}icmp_vgprX_k0_select_k1_vgprZ_i64: -; GCN: {{buffer|flat}}_load_dwordx2 v{{\[}}[[X_LO:[0-9]+]]:[[X_HI:[0-9]+]]{{\]}} -; GCN-DAG: {{buffer|flat}}_load_dwordx2 v{{\[}}[[Z_LO:[0-9]+]]:[[Z_HI:[0-9]+]]{{\]}} +; GCN: {{buffer|flat|global}}_load_dwordx2 v{{\[}}[[X_LO:[0-9]+]]:[[X_HI:[0-9]+]]{{\]}} +; GCN-DAG: {{buffer|flat|global}}_load_dwordx2 v{{\[}}[[Z_LO:[0-9]+]]:[[Z_HI:[0-9]+]]{{\]}} ; SI-DAG: v_cmp_lt_i64_e32 vcc, -1, v{{\[}}[[X_LO]]:[[X_HI]]{{\]}} ; SI-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 0, v[[Z_HI]], vcc ; SI-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 2, v[[Z_LO]], vcc @@ -218,8 +231,8 @@ define amdgpu_kernel void @icmp_vgprX_k0_select_k1_vgprZ_i64(i64 addrspace(1)* % } ; GCN-LABEL: {{^}}fcmp_vgprX_k0_select_vgprZ_k1_v4f32: -; GCN: {{buffer|flat}}_load_dword [[X:v[0-9]+]] -; GCN: {{buffer|flat}}_load_dwordx4 +; GCN: {{buffer|flat|global}}_load_dword [[X:v[0-9]+]] +; GCN: {{buffer|flat|global}}_load_dwordx4 ; GCN: v_cmp_nge_f32_e32 vcc, 4.0, [[X]] ; GCN-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 1.0, v{{[0-9]+}}, vcc @@ -241,8 +254,8 @@ define amdgpu_kernel void @fcmp_vgprX_k0_select_vgprZ_k1_v4f32(<4 x float> addrs } ; GCN-LABEL: {{^}}fcmp_vgprX_k0_select_k1_vgprZ_v4f32: -; GCN: {{buffer|flat}}_load_dword [[X:v[0-9]+]] -; GCN: {{buffer|flat}}_load_dwordx4 +; GCN: {{buffer|flat|global}}_load_dword [[X:v[0-9]+]] +; GCN: {{buffer|flat|global}}_load_dwordx4 ; GCN: v_cmp_ge_f32_e32 vcc, 4.0, [[X]] ; GCN-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 1.0, v{{[0-9]+}}, vcc @@ -267,8 +280,8 @@ define amdgpu_kernel void @fcmp_vgprX_k0_select_k1_vgprZ_v4f32(<4 x float> addrs ; multiple uses. ; GCN-LABEL: {{^}}fcmp_k0_vgprX_select_k1_vgprZ_v4f32: -; GCN: {{buffer|flat}}_load_dword [[X:v[0-9]+]] -; GCN: {{buffer|flat}}_load_dwordx4 +; GCN: {{buffer|flat|global}}_load_dword [[X:v[0-9]+]] +; GCN: {{buffer|flat|global}}_load_dwordx4 ; GCN: v_cmp_le_f32_e32 vcc, 4.0, [[X]] ; GCN-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 1.0, v{{[0-9]+}}, vcc @@ -314,12 +327,13 @@ define amdgpu_kernel void @icmp_vgprX_k0_select_k1_vgprZ_i1(i1 addrspace(1)* %ou ; Different types compared vs. selected ; GCN-LABEL: {{^}}fcmp_vgprX_k0_selectf64_k1_vgprZ_f32: -; GCN-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 0x3ff00000 -; GCN-DAG: {{buffer|flat}}_load_dword [[X:v[0-9]+]] -; GCN-DAG: {{buffer|flat}}_load_dwordx2 +; SIVI-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 0x3ff00000 +; GCN-DAG: {{buffer|flat|global}}_load_dword [[X:v[0-9]+]] +; GCN-DAG: {{buffer|flat|global}}_load_dwordx2 ; GCN: v_cmp_le_f32_e32 vcc, 0, [[X]] -; GCN-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, [[K]], v{{[0-9]+}}, vcc +; SIVI-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, [[K]], v{{[0-9]+}}, vcc +; GFX10-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 0x3ff00000, v{{[0-9]+}}, vcc ; GCN-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 0, v{{[0-9]+}}, vcc define amdgpu_kernel void @fcmp_vgprX_k0_selectf64_k1_vgprZ_f32(double addrspace(1)* %out, float addrspace(1)* %x.ptr, double addrspace(1)* %z.ptr) #0 { %tid = call i32 @llvm.amdgcn.workitem.id.x() #1 @@ -337,8 +351,8 @@ define amdgpu_kernel void @fcmp_vgprX_k0_selectf64_k1_vgprZ_f32(double addrspace ; Different types compared vs. selected ; GCN-LABEL: {{^}}fcmp_vgprX_k0_selecti64_k1_vgprZ_f32: -; GCN: {{buffer|flat}}_load_dword [[X:v[0-9]+]] -; GCN: {{buffer|flat}}_load_dwordx2 +; GCN: {{buffer|flat|global}}_load_dword [[X:v[0-9]+]] +; GCN: {{buffer|flat|global}}_load_dwordx2 ; GCN: v_cmp_nlg_f32_e32 vcc, 0, [[X]] ; GCN-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 3, v{{[0-9]+}}, vcc @@ -359,8 +373,8 @@ define amdgpu_kernel void @fcmp_vgprX_k0_selecti64_k1_vgprZ_f32(i64 addrspace(1) ; Different types compared vs. selected ; GCN-LABEL: {{^}}icmp_vgprX_k0_selectf32_k1_vgprZ_i32: -; GCN: {{buffer|flat}}_load_dword [[X:v[0-9]+]] -; GCN: {{buffer|flat}}_load_dword [[Z:v[0-9]+]] +; GCN: {{buffer|flat|global}}_load_dword [[X:v[0-9]+]] +; GCN: {{buffer|flat|global}}_load_dword [[Z:v[0-9]+]] ; GCN: v_cmp_gt_u32_e32 vcc, 2, [[X]] ; GCN-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 4.0, [[Z]], vcc @@ -381,7 +395,7 @@ define amdgpu_kernel void @icmp_vgprX_k0_selectf32_k1_vgprZ_i32(float addrspace( ; FIXME: Should be able to handle multiple uses ; GCN-LABEL: {{^}}fcmp_k0_vgprX_select_k1_vgprZ_f32_cond_use_x2: -; GCN: {{buffer|flat}}_load_dword [[X:v[0-9]+]] +; GCN: {{buffer|flat|global}}_load_dword [[X:v[0-9]+]] ; GCN: v_cmp_nle_f32_e32 vcc, 4.0, [[X]] ; GCN-DAG: v_cndmask_b32_e64 v{{[0-9]+}}, v{{[0-9]+}}, -1.0, vcc diff --git a/test/CodeGen/MIR/AMDGPU/load-store-opt-dlc.mir b/test/CodeGen/MIR/AMDGPU/load-store-opt-dlc.mir new file mode 100644 index 00000000000..574aea17319 --- /dev/null +++ b/test/CodeGen/MIR/AMDGPU/load-store-opt-dlc.mir @@ -0,0 +1,155 @@ +# RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=si-load-store-opt -verify-machineinstrs -o - %s | FileCheck %s + +# The purpose of this test is to make sure we are combining relevant memory +# operations correctly with/without DLC bit. + +--- | + define amdgpu_kernel void @test1(i32 addrspace(1)* %out) { + %out.gep.1 = getelementptr i32, i32 addrspace(1)* %out, i32 1 + store i32 123, i32 addrspace(1)* %out.gep.1 + store i32 456, i32 addrspace(1)* %out + ret void + } + + define amdgpu_kernel void @test2(i32 addrspace(1)* %out) { + %out.gep.1 = getelementptr i32, i32 addrspace(1)* %out, i32 1 + store i32 123, i32 addrspace(1)* %out.gep.1 + store i32 456, i32 addrspace(1)* %out + ret void + } + + define amdgpu_kernel void @test3(i32 addrspace(1)* %out) { + %out.gep.1 = getelementptr i32, i32 addrspace(1)* %out, i32 1 + store i32 123, i32 addrspace(1)* %out.gep.1 + store i32 456, i32 addrspace(1)* %out + ret void + } + define amdgpu_kernel void @test4(i32 addrspace(1)* %out) { + %out.gep.1 = getelementptr i32, i32 addrspace(1)* %out, i32 1 + store i32 123, i32 addrspace(1)* %out.gep.1 + store i32 456, i32 addrspace(1)* %out + ret void + } +... + +# CHECK: BUFFER_STORE_DWORDX2_OFFSET killed %{{[0-9]+}}, %{{[0-9]+}}, 0, 4, 0, 0, 0, 0, implicit $exec :: (store 4 into %ir.out.gep.1, addrspace 1) +--- +name: test1 +liveins: + - { reg: '$sgpr0_sgpr1', virtual-reg: '' } +body: | + bb.0 (%ir-block.0): + liveins: $sgpr0_sgpr1 + + $vgpr0 = V_MOV_B32_e32 123, implicit $exec + $vgpr1 = V_MOV_B32_e32 456, implicit $exec + + $sgpr2 = S_MOV_B32 -1 + $sgpr3 = S_MOV_B32 61440 + + %0:sgpr_64 = COPY $sgpr0_sgpr1 + %1:sgpr_64 = S_LOAD_DWORDX2_IMM %1, 36, 0, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, addrspace 4) + %2:sgpr_32 = COPY $sgpr2 + %3:sgpr_32 = COPY $sgpr3 + %4:sgpr_128 = REG_SEQUENCE %1, %2, %3 + + %5:vgpr_32 = COPY $vgpr0 + %6:vgpr_32 = COPY $vgpr1 + + BUFFER_STORE_DWORD_OFFSET %5, %4, 0, 4, 0, 0, 0, 0, implicit $exec :: (store 4 into %ir.out.gep.1, addrspace 1) + BUFFER_STORE_DWORD_OFFSET %6, %4, 0, 8, 0, 0, 0, 0, implicit $exec :: (store 4 into %ir.out.gep.1, addrspace 1) + + S_ENDPGM 0 +... + +# CHECK: BUFFER_STORE_DWORD_OFFSET %{{[0-9]+}}, %{{[0-9]+}}, 0, 4, 0, 0, 0, 1, implicit $exec :: (store 4 into %ir.out.gep.1, addrspace 1) +# CHECK: BUFFER_STORE_DWORD_OFFSET %{{[0-9]+}}, %{{[0-9]+}}, 0, 8, 0, 0, 0, 0, implicit $exec :: (store 4 into %ir.out.gep.1, addrspace 1) +--- +name: test2 +liveins: + - { reg: '$sgpr0_sgpr1', virtual-reg: '' } +body: | + bb.0 (%ir-block.0): + liveins: $sgpr0_sgpr1 + + $vgpr0 = V_MOV_B32_e32 123, implicit $exec + $vgpr1 = V_MOV_B32_e32 456, implicit $exec + + $sgpr2 = S_MOV_B32 -1 + $sgpr3 = S_MOV_B32 61440 + + %0:sgpr_64 = COPY $sgpr0_sgpr1 + %1:sgpr_64 = S_LOAD_DWORDX2_IMM %1, 36, 0, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, addrspace 4) + %2:sgpr_32 = COPY $sgpr2 + %3:sgpr_32 = COPY $sgpr3 + %4:sgpr_128 = REG_SEQUENCE %1, %2, %3 + + %5:vgpr_32 = COPY $vgpr0 + %6:vgpr_32 = COPY $vgpr1 + + BUFFER_STORE_DWORD_OFFSET %5, %4, 0, 4, 0, 0, 0, 1, implicit $exec :: (store 4 into %ir.out.gep.1, addrspace 1) + BUFFER_STORE_DWORD_OFFSET %6, %4, 0, 8, 0, 0, 0, 0, implicit $exec :: (store 4 into %ir.out.gep.1, addrspace 1) + + S_ENDPGM 0 +... + +# CHECK: BUFFER_STORE_DWORD_OFFSET %{{[0-9]+}}, %{{[0-9]+}}, 0, 4, 0, 0, 0, 0, implicit $exec :: (store 4 into %ir.out.gep.1, addrspace 1) +# CHECK: BUFFER_STORE_DWORD_OFFSET %{{[0-9]+}}, %{{[0-9]+}}, 0, 8, 0, 0, 0, 1, implicit $exec :: (store 4 into %ir.out.gep.1, addrspace 1) +--- +name: test3 +liveins: + - { reg: '$sgpr0_sgpr1', virtual-reg: '' } +body: | + bb.0 (%ir-block.0): + liveins: $sgpr0_sgpr1 + + $vgpr0 = V_MOV_B32_e32 123, implicit $exec + $vgpr1 = V_MOV_B32_e32 456, implicit $exec + + $sgpr2 = S_MOV_B32 -1 + $sgpr3 = S_MOV_B32 61440 + + %0:sgpr_64 = COPY $sgpr0_sgpr1 + %1:sgpr_64 = S_LOAD_DWORDX2_IMM %1, 36, 0, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, addrspace 4) + %2:sgpr_32 = COPY $sgpr2 + %3:sgpr_32 = COPY $sgpr3 + %4:sgpr_128 = REG_SEQUENCE %1, %2, %3 + + %5:vgpr_32 = COPY $vgpr0 + %6:vgpr_32 = COPY $vgpr1 + + BUFFER_STORE_DWORD_OFFSET %5, %4, 0, 4, 0, 0, 0, 0, implicit $exec :: (store 4 into %ir.out.gep.1, addrspace 1) + BUFFER_STORE_DWORD_OFFSET %6, %4, 0, 8, 0, 0, 0, 1, implicit $exec :: (store 4 into %ir.out.gep.1, addrspace 1) + + S_ENDPGM 0 +... + +# CHECK: BUFFER_STORE_DWORDX2_OFFSET killed %{{[0-9]+}}, %{{[0-9]+}}, 0, 4, 0, 0, 0, 1, implicit $exec :: (store 4 into %ir.out.gep.1, addrspace 1) +--- +name: test4 +liveins: + - { reg: '$sgpr0_sgpr1', virtual-reg: '' } +body: | + bb.0 (%ir-block.0): + liveins: $sgpr0_sgpr1 + + $vgpr0 = V_MOV_B32_e32 123, implicit $exec + $vgpr1 = V_MOV_B32_e32 456, implicit $exec + + $sgpr2 = S_MOV_B32 -1 + $sgpr3 = S_MOV_B32 61440 + + %0:sgpr_64 = COPY $sgpr0_sgpr1 + %1:sgpr_64 = S_LOAD_DWORDX2_IMM %1, 36, 0, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, addrspace 4) + %2:sgpr_32 = COPY $sgpr2 + %3:sgpr_32 = COPY $sgpr3 + %4:sgpr_128 = REG_SEQUENCE %1, %2, %3 + + %5:vgpr_32 = COPY $vgpr0 + %6:vgpr_32 = COPY $vgpr1 + + BUFFER_STORE_DWORD_OFFSET %5, %4, 0, 4, 0, 0, 0, 1, implicit $exec :: (store 4 into %ir.out.gep.1, addrspace 1) + BUFFER_STORE_DWORD_OFFSET %6, %4, 0, 8, 0, 0, 0, 1, implicit $exec :: (store 4 into %ir.out.gep.1, addrspace 1) + + S_ENDPGM 0 +... diff --git a/test/MC/Disassembler/AMDGPU/gfx10-sgpr-max.txt b/test/MC/Disassembler/AMDGPU/gfx10-sgpr-max.txt new file mode 100644 index 00000000000..4e8efd1120a --- /dev/null +++ b/test/MC/Disassembler/AMDGPU/gfx10-sgpr-max.txt @@ -0,0 +1,10 @@ +# RUN: llvm-mc -arch=amdgcn -mcpu=gfx1010 -disassemble -show-encoding < %s | FileCheck -check-prefix=GFX10 %s + +# GFX10: v_mov_b32_e32 v0, s105 ; encoding: [0x69,0x02,0x00,0x7e] +0x69,0x02,0x00,0x7e + +# GFX10: v_mov_b32_sdwa v0, s105 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD +0xf9,0x02,0x00,0x7e,0x69,0x16,0x86,0x00 + +# GFX10: s_mov_b32 s105, s104 ; encoding: [0x68,0x03,0xe9,0xbe] +0x68,0x03,0xe9,0xbe diff --git a/test/Object/AMDGPU/elf-header-flags-mach.yaml b/test/Object/AMDGPU/elf-header-flags-mach.yaml index 501c276c892..8a31a2129b0 100644 --- a/test/Object/AMDGPU/elf-header-flags-mach.yaml +++ b/test/Object/AMDGPU/elf-header-flags-mach.yaml @@ -94,7 +94,9 @@ # RUN: yaml2obj -docnum=32 %s > %t.o.32 # RUN: llvm-readobj -S --file-headers %t.o.32 | FileCheck --check-prefixes=ELF-ALL,ELF-GFX909 %s # RUN: obj2yaml %t.o.32 | FileCheck --check-prefixes=YAML-GFX909 %s - +# RUN: yaml2obj -docnum=33 %s > %t.o.33 +# RUN: llvm-readobj -s -file-headers %t.o.33 | FileCheck --check-prefixes=ELF-ALL,ELF-GFX1010 %s +# RUN: obj2yaml %t.o.33 | FileCheck --check-prefixes=YAML-GFX1010 %s # ELF-ALL: Flags [ # ELF-R600: EF_AMDGPU_MACH_R600_R600 (0x1) @@ -129,6 +131,7 @@ # ELF-GFX904: EF_AMDGPU_MACH_AMDGCN_GFX904 (0x2E) # ELF-GFX906: EF_AMDGPU_MACH_AMDGCN_GFX906 (0x2F) # ELF-GFX909: EF_AMDGPU_MACH_AMDGCN_GFX909 (0x31) +# ELF-GFX1010: EF_AMDGPU_MACH_AMDGCN_GFX1010 (0x33) # ELF-ALL: ] # YAML-R600: Flags: [ EF_AMDGPU_MACH_R600_R600 ] @@ -163,6 +166,7 @@ # YAML-GFX904: Flags: [ EF_AMDGPU_MACH_AMDGCN_GFX904 ] # YAML-GFX906: Flags: [ EF_AMDGPU_MACH_AMDGCN_GFX906 ] # YAML-GFX909: Flags: [ EF_AMDGPU_MACH_AMDGCN_GFX909 ] +# YAML-GFX1010: Flags: [ EF_AMDGPU_MACH_AMDGCN_GFX1010 ] # Doc1 --- !ELF @@ -515,3 +519,14 @@ FileHeader: Machine: EM_AMDGPU Flags: [ EF_AMDGPU_MACH_AMDGCN_GFX909 ] ... + +# Doc33 +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + OSABI: ELFOSABI_NONE + Type: ET_REL + Machine: EM_AMDGPU + Flags: [ EF_AMDGPU_MACH_AMDGCN_GFX1010 ] +...