From: Matt Arsenault Date: Sat, 8 Apr 2017 21:28:38 +0000 (+0000) Subject: AMDGPU: Actually write nops for writeNopData X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=1ad9b2d946fc914d736278a3ea861afe829643bf;p=llvm AMDGPU: Actually write nops for writeNopData Before this was just writing 0s, which ends up looking like a v_cndmask_b32 v0, s0, v0, vcc. Write out an encoded s_nop instead. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@299816 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp index 5a7790fb9f5..f3266fe8295 100644 --- a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp +++ b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp @@ -164,7 +164,20 @@ const MCFixupKindInfo &AMDGPUAsmBackend::getFixupKindInfo( } bool AMDGPUAsmBackend::writeNopData(uint64_t Count, MCObjectWriter *OW) const { - OW->WriteZeros(Count); + // If the count is not 4-byte aligned, we must be writing data into the text + // section (otherwise we have unaligned instructions, and thus have far + // bigger problems), so just write zeros instead. + OW->WriteZeros(Count % 4); + + // We are properly aligned, so write NOPs as requested. + Count /= 4; + + // FIXME: R600 support. + // s_nop 0 + const uint32_t Encoded_S_NOP_0 = 0xbf800000; + + for (uint64_t I = 0; I != Count; ++I) + OW->write32(Encoded_S_NOP_0); return true; } diff --git a/test/CodeGen/AMDGPU/nop-data.ll b/test/CodeGen/AMDGPU/nop-data.ll new file mode 100644 index 00000000000..b68f343097e --- /dev/null +++ b/test/CodeGen/AMDGPU/nop-data.ll @@ -0,0 +1,87 @@ +; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=fiji -filetype=obj < %s | llvm-objdump -d - -mcpu=fiji | FileCheck %s + +; CHECK: kernel0: +; CHECK-NEXT: s_endpgm +define amdgpu_kernel void @kernel0() align 256 { +entry: + ret void +} + +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: s_nop 0 + +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: s_nop 0 + +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: s_nop 0 + +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: s_nop 0 + +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: s_nop 0 + +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: s_nop 0 + +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: s_nop 0 + +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: s_nop 0 // 0000000001FC: BF800000 + +; CHECK-NEXT: {{^$}} +; CHECK-NEXT: kernel1: +; CHECK-NEXT: s_endpgm +define amdgpu_kernel void @kernel1(i32 addrspace(1)* addrspace(2)* %ptr.out) align 256 { +entry: + ret void +}