From 8daa536902752068708f1e418b1db9067544f349 Mon Sep 17 00:00:00 2001 From: Matt Arsenault <Matthew.Arsenault@amd.com> Date: Tue, 16 Jul 2019 18:21:25 +0000 Subject: [PATCH] AMDGPU: Replace store PatFrags Convert the easy cases to formats understood for GlobalISel. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@366240 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/AMDGPU/AMDGPUInstructions.td | 44 ++++++++++++++++++------- lib/Target/AMDGPU/FLATInstructions.td | 4 +-- 2 files changed, 34 insertions(+), 14 deletions(-) diff --git a/lib/Target/AMDGPU/AMDGPUInstructions.td b/lib/Target/AMDGPU/AMDGPUInstructions.td index d470b3cd514..61bc415c839 100644 --- a/lib/Target/AMDGPU/AMDGPUInstructions.td +++ b/lib/Target/AMDGPU/AMDGPUInstructions.td @@ -467,25 +467,48 @@ def atomic_load_64_#as : PatFrag<(ops node:$ptr), (atomic_load_64 node:$ptr)> { let MemoryVT = i64; } +def store_#as : PatFrag<(ops node:$val, node:$ptr), + (unindexedstore node:$val, node:$ptr)> { + let IsStore = 1; + let IsTruncStore = 0; +} + +// truncstore fragments. +def truncstore_#as : PatFrag<(ops node:$val, node:$ptr), + (unindexedstore node:$val, node:$ptr)> { + let IsStore = 1; + let IsTruncStore = 1; +} + +// TODO: We don't really need the truncstore here. We can use +// unindexedstore with MemoryVT directly, which will save an +// unnecessary check that the memory size is less than the value type +// in the generated matcher table. +def truncstorei8_#as : PatFrag<(ops node:$val, node:$ptr), + (truncstore node:$val, node:$ptr)> { + let IsStore = 1; + let MemoryVT = i8; +} + +def truncstorei16_#as : PatFrag<(ops node:$val, node:$ptr), + (truncstore node:$val, node:$ptr)> { + let IsStore = 1; + let MemoryVT = i16; +} + +defm atomic_store_#as : binary_atomic_op<atomic_store>; + } // End let AddressSpaces = ... } // End foreach AddrSpace -def store_private : PrivateStore <store>; -def truncstorei8_private : PrivateStore<truncstorei8>; -def truncstorei16_private : PrivateStore <truncstorei16>; + def store_hi16_private : StoreHi16 <truncstorei16>, PrivateAddress; def truncstorei8_hi16_private : StoreHi16<truncstorei8>, PrivateAddress; -def store_global : GlobalStore <store>; -def truncstorei8_global : GlobalStore <truncstorei8>; -def truncstorei16_global : GlobalStore <truncstorei16>; def store_atomic_global : GlobalStore<atomic_store>; def truncstorei8_hi16_global : StoreHi16 <truncstorei8>, GlobalAddress; def truncstorei16_hi16_global : StoreHi16 <truncstorei16>, GlobalAddress; -def store_local : LocalStore <store>; -def truncstorei8_local : LocalStore <truncstorei8>; -def truncstorei16_local : LocalStore <truncstorei16>; def store_local_hi16 : StoreHi16 <truncstorei16>, LocalAddress; def truncstorei8_local_hi16 : StoreHi16<truncstorei8>, LocalAddress; def atomic_store_local : LocalStore <atomic_store>; @@ -506,9 +529,6 @@ def store_align16_local : Aligned16Bytes < (ops node:$val, node:$ptr), (store_local node:$val, node:$ptr) >; -def store_flat : FlatStore <store>; -def truncstorei8_flat : FlatStore <truncstorei8>; -def truncstorei16_flat : FlatStore <truncstorei16>; def atomic_store_flat : FlatStore <atomic_store>; def truncstorei8_hi16_flat : StoreHi16<truncstorei8>, FlatStoreAddress; def truncstorei16_hi16_flat : StoreHi16<truncstorei16>, FlatStoreAddress; diff --git a/lib/Target/AMDGPU/FLATInstructions.td b/lib/Target/AMDGPU/FLATInstructions.td index 9d541560613..8ddf4e2aa2b 100644 --- a/lib/Target/AMDGPU/FLATInstructions.td +++ b/lib/Target/AMDGPU/FLATInstructions.td @@ -792,8 +792,8 @@ def : FlatStorePat <FLAT_STORE_DWORDX2, store_flat, v2i32>; def : FlatStorePat <FLAT_STORE_DWORDX3, store_flat, v3i32>; def : FlatStorePat <FLAT_STORE_DWORDX4, store_flat, v4i32>; -def : FlatStoreAtomicPat <FLAT_STORE_DWORD, atomic_store_flat, i32>; -def : FlatStoreAtomicPat <FLAT_STORE_DWORDX2, atomic_store_flat, i64>; +def : FlatStoreAtomicPat <FLAT_STORE_DWORD, atomic_store_flat_32, i32>; +def : FlatStoreAtomicPat <FLAT_STORE_DWORDX2, atomic_store_flat_64, i64>; def : FlatAtomicPat <FLAT_ATOMIC_ADD_RTN, atomic_add_global, i32>; def : FlatAtomicPat <FLAT_ATOMIC_SUB_RTN, atomic_sub_global, i32>; -- 2.40.0