From f031b0c0443e85b59ec926fed6b8a1027b65d13d Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Fri, 8 Mar 2019 20:30:50 +0000 Subject: [PATCH] AMDGPU: Correct DS implementation of areLoadsFromSameBasePtr This was checking the wrong operands for the base register and the offsets. The indexes are shifted by the number of output registers from the machine instruction definition, and the chain is moved to the end. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@355722 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/AMDGPU/SIInstrInfo.cpp | 8 ++++---- test/CodeGen/AMDGPU/ds-combine-with-dependence.ll | 4 ++-- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/lib/Target/AMDGPU/SIInstrInfo.cpp b/lib/Target/AMDGPU/SIInstrInfo.cpp index 8abf91b8ae2..db622f39933 100644 --- a/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -167,7 +167,7 @@ bool SIInstrInfo::areLoadsFromSameBasePtr(SDNode *Load0, SDNode *Load1, return false; // Check base reg. - if (Load0->getOperand(1) != Load1->getOperand(1)) + if (Load0->getOperand(0) != Load1->getOperand(0)) return false; // Check chain. @@ -181,8 +181,8 @@ bool SIInstrInfo::areLoadsFromSameBasePtr(SDNode *Load0, SDNode *Load1, AMDGPU::getNamedOperandIdx(Opc1, AMDGPU::OpName::data1) != -1) return false; - Offset0 = cast(Load0->getOperand(2))->getZExtValue(); - Offset1 = cast(Load1->getOperand(2))->getZExtValue(); + Offset0 = cast(Load0->getOperand(1))->getZExtValue(); + Offset1 = cast(Load1->getOperand(1))->getZExtValue(); return true; } @@ -232,7 +232,7 @@ bool SIInstrInfo::areLoadsFromSameBasePtr(SDNode *Load0, SDNode *Load1, return false; // getNamedOperandIdx returns the index for MachineInstrs. Since they - // inlcude the output in the operand list, but SDNodes don't, we need to + // include the output in the operand list, but SDNodes don't, we need to // subtract the index by one. --OffIdx0; --OffIdx1; diff --git a/test/CodeGen/AMDGPU/ds-combine-with-dependence.ll b/test/CodeGen/AMDGPU/ds-combine-with-dependence.ll index 06fa048124a..ba7c0f3983c 100644 --- a/test/CodeGen/AMDGPU/ds-combine-with-dependence.ll +++ b/test/CodeGen/AMDGPU/ds-combine-with-dependence.ll @@ -6,8 +6,8 @@ ; GCN-LABEL: {{^}}ds_combine_nodep -; GCN: ds_write2_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset0:26 offset1:27 -; GCN-NEXT: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:7 offset1:8 +; GCN: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:7 offset1:8 +; GCN-NEXT: ds_write2_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset0:26 offset1:27 define amdgpu_kernel void @ds_combine_nodep(float addrspace(1)* %out, float addrspace(3)* %inptr) { %base = bitcast float addrspace(3)* %inptr to i8 addrspace(3)* -- 2.50.1