From 484f483ab45aee4075780f256026f4857e94d050 Mon Sep 17 00:00:00 2001 From: Nirav Dave Date: Sun, 23 Jul 2017 02:06:28 +0000 Subject: [PATCH] [DAG] Fix typo preventing some stores merges to truncated stores. Check the actual memory type stored and not the extended value size when considering if truncated store merge is worthwhile. Reviewers: efriedma, RKSimon, spatel, jyknight Reviewed By: efriedma Subscribers: llvm-commits, nhaehnle Differential Revision: https://reviews.llvm.org/D35623 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@308833 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 8 ++++---- test/CodeGen/AMDGPU/merge-stores.ll | 6 ++---- test/CodeGen/BPF/undef.ll | 7 +++---- 3 files changed, 9 insertions(+), 12 deletions(-) diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 677690b954f..411d5c054b5 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -12866,8 +12866,8 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) { TLI.getTypeToTransformTo(Context, StoredVal.getValueType()); if (TLI.isTruncStoreLegal(LegalizedStoredValueTy, StoreTy) && TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValueTy, DAG) && - TLI.allowsMemoryAccess(Context, DL, LegalizedStoredValueTy, - FirstStoreAS, FirstStoreAlign, &IsFast) && + TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS, + FirstStoreAlign, &IsFast) && IsFast) { LastIntegerTrunc = true; LastLegalType = i + 1; @@ -13098,8 +13098,8 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) { TLI.isLoadExtLegal(ISD::SEXTLOAD, LegalizedStoredValueTy, StoreTy) && TLI.isLoadExtLegal(ISD::EXTLOAD, LegalizedStoredValueTy, StoreTy) && - TLI.allowsMemoryAccess(Context, DL, LegalizedStoredValueTy, - FirstStoreAS, FirstStoreAlign, &IsFastSt) && + TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS, + FirstStoreAlign, &IsFastSt) && IsFastSt && TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstLoadAS, FirstLoadAlign, &IsFastLd) && diff --git a/test/CodeGen/AMDGPU/merge-stores.ll b/test/CodeGen/AMDGPU/merge-stores.ll index 6b0ec483247..59aa89f69ed 100644 --- a/test/CodeGen/AMDGPU/merge-stores.ll +++ b/test/CodeGen/AMDGPU/merge-stores.ll @@ -10,8 +10,7 @@ ; GCN-LABEL: {{^}}merge_global_store_2_constants_i8: -; GCN: buffer_store_byte -; GCN: buffer_store_byte +; GCN: buffer_store_short ; GCN: s_endpgm define amdgpu_kernel void @merge_global_store_2_constants_i8(i8 addrspace(1)* %out) #0 { %out.gep.1 = getelementptr i8, i8 addrspace(1)* %out, i32 1 @@ -489,8 +488,7 @@ define amdgpu_kernel void @merge_global_store_4_vector_elts_loads_v4i32(i32 addr } ; GCN-LABEL: {{^}}merge_local_store_2_constants_i8: -; GCN: ds_write_b8 -; GCN: ds_write_b8 +; GCN: ds_write_b16 ; GCN: s_endpgm define amdgpu_kernel void @merge_local_store_2_constants_i8(i8 addrspace(3)* %out) #0 { %out.gep.1 = getelementptr i8, i8 addrspace(3)* %out, i32 1 diff --git a/test/CodeGen/BPF/undef.ll b/test/CodeGen/BPF/undef.ll index 8d8a5f42951..205d97c80ef 100644 --- a/test/CodeGen/BPF/undef.ll +++ b/test/CodeGen/BPF/undef.ll @@ -23,10 +23,9 @@ define i32 @ebpf_filter(%struct.__sk_buff* nocapture readnone %ebpf_packet) #0 s ; EL: r1 = 134678021 ; EB: r1 = 84281096 ; CHECK: *(u32 *)(r10 - 8) = r1 -; CHECK: r1 = 9 -; CHECK: *(u8 *)(r10 - 4) = r1 -; CHECK: r1 = 10 -; CHECK: *(u8 *)(r10 - 3) = r1 +; EL: r1 = 2569 +; EB: r1 = 2314 +; CHECK: *(u16 *)(r10 - 4) = r1 ; CHECK: *(u16 *)(r10 + 24) = r2 ; CHECK: *(u16 *)(r10 + 22) = r2 ; CHECK: *(u16 *)(r10 + 20) = r2 -- 2.40.0