From: Thomas Lively Date: Sat, 9 Mar 2019 04:31:37 +0000 (+0000) Subject: [WebAssembly] Use named operands to identify loads and stores X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=aca9dda712959d055e6fd6a6597815ad42d807ca;p=llvm [WebAssembly] Use named operands to identify loads and stores Summary: Uses the named operands tablegen feature to look up the indices of offset, address, and p2align operands for all load and store instructions. This replaces brittle, incorrect logic for identifying loads and store when eliminating frame indices, which previously crashed on bulk-memory ops. It also cleans up the SetP2Alignment pass. Reviewers: aheejin, dschuff Subscribers: sbc100, jgravelle-google, hiraditya, sunfish, jfb, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D59007 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@355770 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h index faa8c69baa4..5db72910315 100644 --- a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h +++ b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h @@ -331,14 +331,6 @@ inline unsigned GetDefaultP2Align(unsigned Opcode) { } } -/// The operand number of the load or store address in load/store instructions. -static const unsigned LoadAddressOperandNo = 3; -static const unsigned StoreAddressOperandNo = 2; - -/// The operand number of the load or store p2align in load/store instructions. -static const unsigned LoadP2AlignOperandNo = 1; -static const unsigned StoreP2AlignOperandNo = 0; - /// This is used to indicate block signatures. enum class ExprType : unsigned { Void = 0x40, diff --git a/lib/Target/WebAssembly/WebAssemblyInstrAtomics.td b/lib/Target/WebAssembly/WebAssemblyInstrAtomics.td index 89ea8def27f..f0cb0c284bf 100644 --- a/lib/Target/WebAssembly/WebAssemblyInstrAtomics.td +++ b/lib/Target/WebAssembly/WebAssemblyInstrAtomics.td @@ -11,6 +11,7 @@ /// //===----------------------------------------------------------------------===// +let UseNamedOperandTable = 1 in multiclass ATOMIC_I pattern_r, string asmstr_r = "", string asmstr_s = "", bits<32> atomic_op = -1> { @@ -810,9 +811,9 @@ multiclass WebAssemblyTerRMW; } diff --git a/lib/Target/WebAssembly/WebAssemblyInstrInfo.cpp b/lib/Target/WebAssembly/WebAssemblyInstrInfo.cpp index 995dc88bb33..a7c120963e0 100644 --- a/lib/Target/WebAssembly/WebAssemblyInstrInfo.cpp +++ b/lib/Target/WebAssembly/WebAssemblyInstrInfo.cpp @@ -27,6 +27,10 @@ using namespace llvm; #define GET_INSTRINFO_CTOR_DTOR #include "WebAssemblyGenInstrInfo.inc" +// defines WebAssembly::getNamedOperandIdx +#define GET_INSTRINFO_NAMED_OPS +#include "WebAssemblyGenInstrInfo.inc" + WebAssemblyInstrInfo::WebAssemblyInstrInfo(const WebAssemblySubtarget &STI) : WebAssemblyGenInstrInfo(WebAssembly::ADJCALLSTACKDOWN, WebAssembly::ADJCALLSTACKUP, diff --git a/lib/Target/WebAssembly/WebAssemblyInstrInfo.h b/lib/Target/WebAssembly/WebAssemblyInstrInfo.h index 20eec684b77..df1051b4f42 100644 --- a/lib/Target/WebAssembly/WebAssemblyInstrInfo.h +++ b/lib/Target/WebAssembly/WebAssemblyInstrInfo.h @@ -21,8 +21,17 @@ #define GET_INSTRINFO_HEADER #include "WebAssemblyGenInstrInfo.inc" +#define GET_INSTRINFO_OPERAND_ENUM +#include "WebAssemblyGenInstrInfo.inc" + namespace llvm { +namespace WebAssembly { + +int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIndex); + +} + class WebAssemblySubtarget; class WebAssemblyInstrInfo final : public WebAssemblyGenInstrInfo { diff --git a/lib/Target/WebAssembly/WebAssemblyInstrMemory.td b/lib/Target/WebAssembly/WebAssemblyInstrMemory.td index 2070f8ad822..0d6d12f49ca 100644 --- a/lib/Target/WebAssembly/WebAssemblyInstrMemory.td +++ b/lib/Target/WebAssembly/WebAssemblyInstrMemory.td @@ -52,7 +52,7 @@ def regPlusGA : PatFrag<(ops node:$addr, node:$off), // Defines atomic and non-atomic loads, regular and extending. multiclass WebAssemblyLoad { - let mayLoad = 1 in + let mayLoad = 1, UseNamedOperandTable = 1 in defm "": I<(outs rc:$dst), (ins P2Align:$p2align, offset32_op:$off, I32:$addr), (outs), (ins P2Align:$p2align, offset32_op:$off), @@ -294,7 +294,7 @@ def : LoadPatExternSymOffOnly; // Defines atomic and non-atomic stores, regular and truncating multiclass WebAssemblyStore { - let mayStore = 1 in + let mayStore = 1, UseNamedOperandTable = 1 in defm "" : I<(outs), (ins P2Align:$p2align, offset32_op:$off, I32:$addr, rc:$val), (outs), diff --git a/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td b/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td index 8169e6a6233..0d0948c00e0 100644 --- a/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td +++ b/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td @@ -41,12 +41,12 @@ def LaneIdx#SIZE : ImmLeaf; // Load: v128.load multiclass SIMDLoad { - let mayLoad = 1 in + let mayLoad = 1, UseNamedOperandTable = 1 in defm LOAD_#vec_t : - SIMD_I<(outs V128:$dst), (ins P2Align:$align, offset32_op:$off, I32:$addr), - (outs), (ins P2Align:$align, offset32_op:$off), [], - "v128.load\t$dst, ${off}(${addr})$align", - "v128.load\t$off$align", 0>; + SIMD_I<(outs V128:$dst), (ins P2Align:$p2align, offset32_op:$off, I32:$addr), + (outs), (ins P2Align:$p2align, offset32_op:$off), [], + "v128.load\t$dst, ${off}(${addr})$p2align", + "v128.load\t$off$p2align", 0>; } foreach vec_t = [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64] in { @@ -65,12 +65,12 @@ def : LoadPatExternSymOffOnly("LOAD_"#vec_t)>; // Store: v128.store multiclass SIMDStore { - let mayStore = 1 in + let mayStore = 1, UseNamedOperandTable = 1 in defm STORE_#vec_t : - SIMD_I<(outs), (ins P2Align:$align, offset32_op:$off, I32:$addr, V128:$vec), - (outs), (ins P2Align:$align, offset32_op:$off), [], - "v128.store\t${off}(${addr})$align, $vec", - "v128.store\t$off$align", 1>; + SIMD_I<(outs), (ins P2Align:$p2align, offset32_op:$off, I32:$addr, V128:$vec), + (outs), (ins P2Align:$p2align, offset32_op:$off), [], + "v128.store\t${off}(${addr})$p2align, $vec", + "v128.store\t$off$p2align", 1>; } foreach vec_t = [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64] in { diff --git a/lib/Target/WebAssembly/WebAssemblyRegisterInfo.cpp b/lib/Target/WebAssembly/WebAssemblyRegisterInfo.cpp index ae808dd6597..d0002d9e10f 100644 --- a/lib/Target/WebAssembly/WebAssemblyRegisterInfo.cpp +++ b/lib/Target/WebAssembly/WebAssemblyRegisterInfo.cpp @@ -70,13 +70,16 @@ void WebAssemblyRegisterInfo::eliminateFrameIndex( // If this is the address operand of a load or store, make it relative to SP // and fold the frame offset directly in. - if ((MI.mayLoad() && FIOperandNum == WebAssembly::LoadAddressOperandNo) || - (MI.mayStore() && FIOperandNum == WebAssembly::StoreAddressOperandNo)) { - assert(FrameOffset >= 0 && MI.getOperand(FIOperandNum - 1).getImm() >= 0); - int64_t Offset = MI.getOperand(FIOperandNum - 1).getImm() + FrameOffset; + unsigned AddrOperandNum = WebAssembly::getNamedOperandIdx( + MI.getOpcode(), WebAssembly::OpName::addr); + if (AddrOperandNum == FIOperandNum) { + unsigned OffsetOperandNum = WebAssembly::getNamedOperandIdx( + MI.getOpcode(), WebAssembly::OpName::off); + assert(FrameOffset >= 0 && MI.getOperand(OffsetOperandNum).getImm() >= 0); + int64_t Offset = MI.getOperand(OffsetOperandNum).getImm() + FrameOffset; if (static_cast(Offset) <= std::numeric_limits::max()) { - MI.getOperand(FIOperandNum - 1).setImm(Offset); + MI.getOperand(OffsetOperandNum).setImm(Offset); MI.getOperand(FIOperandNum) .ChangeToRegister(FrameRegister, /*IsDef=*/false); return; diff --git a/lib/Target/WebAssembly/WebAssemblySetP2AlignOperands.cpp b/lib/Target/WebAssembly/WebAssemblySetP2AlignOperands.cpp index 68ab925e433..a249ccf1763 100644 --- a/lib/Target/WebAssembly/WebAssemblySetP2AlignOperands.cpp +++ b/lib/Target/WebAssembly/WebAssemblySetP2AlignOperands.cpp @@ -13,6 +13,7 @@ #include "MCTargetDesc/WebAssemblyMCTargetDesc.h" #include "WebAssembly.h" +#include "WebAssemblyInstrInfo.h" #include "WebAssemblyMachineFunctionInfo.h" #include "llvm/CodeGen/MachineBlockFrequencyInfo.h" #include "llvm/CodeGen/MachineMemOperand.h" @@ -83,114 +84,11 @@ bool WebAssemblySetP2AlignOperands::runOnMachineFunction(MachineFunction &MF) { for (auto &MBB : MF) { for (auto &MI : MBB) { - switch (MI.getOpcode()) { - case WebAssembly::LOAD_I32: - case WebAssembly::LOAD_I64: - case WebAssembly::LOAD_F32: - case WebAssembly::LOAD_F64: - case WebAssembly::LOAD_v16i8: - case WebAssembly::LOAD_v8i16: - case WebAssembly::LOAD_v4i32: - case WebAssembly::LOAD_v2i64: - case WebAssembly::LOAD_v4f32: - case WebAssembly::LOAD_v2f64: - case WebAssembly::LOAD8_S_I32: - case WebAssembly::LOAD8_U_I32: - case WebAssembly::LOAD16_S_I32: - case WebAssembly::LOAD16_U_I32: - case WebAssembly::LOAD8_S_I64: - case WebAssembly::LOAD8_U_I64: - case WebAssembly::LOAD16_S_I64: - case WebAssembly::LOAD16_U_I64: - case WebAssembly::LOAD32_S_I64: - case WebAssembly::LOAD32_U_I64: - case WebAssembly::ATOMIC_LOAD_I32: - case WebAssembly::ATOMIC_LOAD8_U_I32: - case WebAssembly::ATOMIC_LOAD16_U_I32: - case WebAssembly::ATOMIC_LOAD_I64: - case WebAssembly::ATOMIC_LOAD8_U_I64: - case WebAssembly::ATOMIC_LOAD16_U_I64: - case WebAssembly::ATOMIC_LOAD32_U_I64: - case WebAssembly::ATOMIC_RMW8_U_ADD_I32: - case WebAssembly::ATOMIC_RMW8_U_ADD_I64: - case WebAssembly::ATOMIC_RMW8_U_SUB_I32: - case WebAssembly::ATOMIC_RMW8_U_SUB_I64: - case WebAssembly::ATOMIC_RMW8_U_AND_I32: - case WebAssembly::ATOMIC_RMW8_U_AND_I64: - case WebAssembly::ATOMIC_RMW8_U_OR_I32: - case WebAssembly::ATOMIC_RMW8_U_OR_I64: - case WebAssembly::ATOMIC_RMW8_U_XOR_I32: - case WebAssembly::ATOMIC_RMW8_U_XOR_I64: - case WebAssembly::ATOMIC_RMW8_U_XCHG_I32: - case WebAssembly::ATOMIC_RMW8_U_XCHG_I64: - case WebAssembly::ATOMIC_RMW8_U_CMPXCHG_I32: - case WebAssembly::ATOMIC_RMW8_U_CMPXCHG_I64: - case WebAssembly::ATOMIC_RMW16_U_ADD_I32: - case WebAssembly::ATOMIC_RMW16_U_ADD_I64: - case WebAssembly::ATOMIC_RMW16_U_SUB_I32: - case WebAssembly::ATOMIC_RMW16_U_SUB_I64: - case WebAssembly::ATOMIC_RMW16_U_AND_I32: - case WebAssembly::ATOMIC_RMW16_U_AND_I64: - case WebAssembly::ATOMIC_RMW16_U_OR_I32: - case WebAssembly::ATOMIC_RMW16_U_OR_I64: - case WebAssembly::ATOMIC_RMW16_U_XOR_I32: - case WebAssembly::ATOMIC_RMW16_U_XOR_I64: - case WebAssembly::ATOMIC_RMW16_U_XCHG_I32: - case WebAssembly::ATOMIC_RMW16_U_XCHG_I64: - case WebAssembly::ATOMIC_RMW16_U_CMPXCHG_I32: - case WebAssembly::ATOMIC_RMW16_U_CMPXCHG_I64: - case WebAssembly::ATOMIC_RMW_ADD_I32: - case WebAssembly::ATOMIC_RMW32_U_ADD_I64: - case WebAssembly::ATOMIC_RMW_SUB_I32: - case WebAssembly::ATOMIC_RMW32_U_SUB_I64: - case WebAssembly::ATOMIC_RMW_AND_I32: - case WebAssembly::ATOMIC_RMW32_U_AND_I64: - case WebAssembly::ATOMIC_RMW_OR_I32: - case WebAssembly::ATOMIC_RMW32_U_OR_I64: - case WebAssembly::ATOMIC_RMW_XOR_I32: - case WebAssembly::ATOMIC_RMW32_U_XOR_I64: - case WebAssembly::ATOMIC_RMW_XCHG_I32: - case WebAssembly::ATOMIC_RMW32_U_XCHG_I64: - case WebAssembly::ATOMIC_RMW_CMPXCHG_I32: - case WebAssembly::ATOMIC_RMW32_U_CMPXCHG_I64: - case WebAssembly::ATOMIC_RMW_ADD_I64: - case WebAssembly::ATOMIC_RMW_SUB_I64: - case WebAssembly::ATOMIC_RMW_AND_I64: - case WebAssembly::ATOMIC_RMW_OR_I64: - case WebAssembly::ATOMIC_RMW_XOR_I64: - case WebAssembly::ATOMIC_RMW_XCHG_I64: - case WebAssembly::ATOMIC_RMW_CMPXCHG_I64: - case WebAssembly::ATOMIC_NOTIFY: - case WebAssembly::ATOMIC_WAIT_I32: - case WebAssembly::ATOMIC_WAIT_I64: - rewriteP2Align(MI, WebAssembly::LoadP2AlignOperandNo); - break; - case WebAssembly::STORE_I32: - case WebAssembly::STORE_I64: - case WebAssembly::STORE_F32: - case WebAssembly::STORE_F64: - case WebAssembly::STORE_v16i8: - case WebAssembly::STORE_v8i16: - case WebAssembly::STORE_v4i32: - case WebAssembly::STORE_v2i64: - case WebAssembly::STORE_v4f32: - case WebAssembly::STORE_v2f64: - case WebAssembly::STORE8_I32: - case WebAssembly::STORE16_I32: - case WebAssembly::STORE8_I64: - case WebAssembly::STORE16_I64: - case WebAssembly::STORE32_I64: - case WebAssembly::ATOMIC_STORE_I32: - case WebAssembly::ATOMIC_STORE8_I32: - case WebAssembly::ATOMIC_STORE16_I32: - case WebAssembly::ATOMIC_STORE_I64: - case WebAssembly::ATOMIC_STORE8_I64: - case WebAssembly::ATOMIC_STORE16_I64: - case WebAssembly::ATOMIC_STORE32_I64: - rewriteP2Align(MI, WebAssembly::StoreP2AlignOperandNo); - break; - default: - break; + int16_t P2AlignOpNum = WebAssembly::getNamedOperandIdx( + MI.getOpcode(), WebAssembly::OpName::p2align); + if (P2AlignOpNum != -1) { + rewriteP2Align(MI, P2AlignOpNum); + Changed = true; } } } diff --git a/test/CodeGen/WebAssembly/bulk-memory.ll b/test/CodeGen/WebAssembly/bulk-memory.ll index b7b34bdfdcd..6574511089b 100644 --- a/test/CodeGen/WebAssembly/bulk-memory.ll +++ b/test/CodeGen/WebAssembly/bulk-memory.ll @@ -140,3 +140,68 @@ define void @memset_1024(i8* %dest, i8 %val) { call void @llvm.memset.p0i8.i32(i8* %dest, i8 %val, i32 1024, i1 0) ret void } + +; The following tests check that frame index elimination works for +; bulk memory instructions. The stack pointer is bumped by 16 instead +; of 10 because the stack pointer in WebAssembly is currently always +; 16-byte aligned, even in leaf functions, although it is not written +; back to the global in this case. + +; TODO: Change TransientStackAlignment to 1 to avoid this extra +; arithmetic. This will require forcing the use of StackAlignment in +; PrologEpilogEmitter.cpp when +; WebAssemblyFrameLowering::needsSPWriteback would be true. + +; CHECK-LABEL: memcpy_alloca_src: +; NO-BULK-MEM-NOT: memory.copy +; BULK-MEM-NEXT: .functype memcpy_alloca_src (i32) -> () +; BULK-MEM-NEXT: global.get $push[[L0:[0-9]+]]=, __stack_pointer +; BULK-MEM-NEXT: i32.const $push[[L1:[0-9]+]]=, 16 +; BULK-MEM-NEXT: i32.sub $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]] +; BULK-MEM-NEXT: i32.const $push[[L3:[0-9]+]]=, 6 +; BULK-MEM-NEXT: i32.add $push[[L4:[0-9]+]]=, $pop[[L2]], $pop[[L3]] +; BULK-MEM-NEXT: i32.const $push[[L5:[0-9]+]]=, 10 +; BULK-MEM-NEXT: memory.copy 0, 0, $0, $pop[[L4]], $pop[[L5]] +; BULK-MEM-NEXT: return +define void @memcpy_alloca_src(i8* %dst) { + %a = alloca [10 x i8] + %p = bitcast [10 x i8]* %a to i8* + call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dst, i8* %p, i32 10, i1 false) + ret void +} + +; CHECK-LABEL: memcpy_alloca_dst: +; NO-BULK-MEM-NOT: memory.copy +; BULK-MEM-NEXT: .functype memcpy_alloca_dst (i32) -> () +; BULK-MEM-NEXT: global.get $push[[L0:[0-9]+]]=, __stack_pointer +; BULK-MEM-NEXT: i32.const $push[[L1:[0-9]+]]=, 16 +; BULK-MEM-NEXT: i32.sub $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]] +; BULK-MEM-NEXT: i32.const $push[[L3:[0-9]+]]=, 6 +; BULK-MEM-NEXT: i32.add $push[[L4:[0-9]+]]=, $pop[[L2]], $pop[[L3]] +; BULK-MEM-NEXT: i32.const $push[[L5:[0-9]+]]=, 10 +; BULK-MEM-NEXT: memory.copy 0, 0, $pop[[L4]], $0, $pop[[L5]] +; BULK-MEM-NEXT: return +define void @memcpy_alloca_dst(i8* %src) { + %a = alloca [10 x i8] + %p = bitcast [10 x i8]* %a to i8* + call void @llvm.memcpy.p0i8.p0i8.i32(i8* %p, i8* %src, i32 10, i1 false) + ret void +} + +; CHECK-LABEL: memset_alloca: +; NO-BULK-MEM-NOT: memory.fill +; BULK-MEM-NEXT: .functype memset_alloca (i32) -> () +; BULK-MEM-NEXT: global.get $push[[L0:[0-9]+]]=, __stack_pointer +; BULK-MEM-NEXT: i32.const $push[[L1:[0-9]+]]=, 16 +; BULK-MEM-NEXT: i32.sub $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]] +; BULK-MEM-NEXT: i32.const $push[[L3:[0-9]+]]=, 6 +; BULK-MEM-NEXT: i32.add $push[[L4:[0-9]+]]=, $pop[[L2]], $pop[[L3]] +; BULK-MEM-NEXT: i32.const $push[[L5:[0-9]+]]=, 10 +; BULK-MEM-NEXT: memory.fill 0, $pop[[L4]], $0, $pop[[L5]] +; BULK-MEM-NEXT: return +define void @memset_alloca(i8 %val) { + %a = alloca [10 x i8] + %p = bitcast [10 x i8]* %a to i8* + call void @llvm.memset.p0i8.i32(i8* %p, i8 %val, i32 10, i1 false) + ret void +}