}
}
-/// The operand number of the load or store address in load/store instructions.
-static const unsigned LoadAddressOperandNo = 3;
-static const unsigned StoreAddressOperandNo = 2;
-
-/// The operand number of the load or store p2align in load/store instructions.
-static const unsigned LoadP2AlignOperandNo = 1;
-static const unsigned StoreP2AlignOperandNo = 0;
-
/// This is used to indicate block signatures.
enum class ExprType : unsigned {
Void = 0x40,
///
//===----------------------------------------------------------------------===//
+let UseNamedOperandTable = 1 in
multiclass ATOMIC_I<dag oops_r, dag iops_r, dag oops_s, dag iops_s,
list<dag> pattern_r, string asmstr_r = "",
string asmstr_s = "", bits<32> atomic_op = -1> {
defm "" :
ATOMIC_I<(outs rc:$dst),
(ins P2Align:$p2align, offset32_op:$off, I32:$addr, rc:$exp,
- rc:$new),
+ rc:$new_),
(outs), (ins P2Align:$p2align, offset32_op:$off), [],
- !strconcat(name, "\t$dst, ${off}(${addr})${p2align}, $exp, $new"),
+ !strconcat(name, "\t$dst, ${off}(${addr})${p2align}, $exp, $new_"),
!strconcat(name, "\t${off}${p2align}"), atomic_op>;
}
#define GET_INSTRINFO_CTOR_DTOR
#include "WebAssemblyGenInstrInfo.inc"
+// defines WebAssembly::getNamedOperandIdx
+#define GET_INSTRINFO_NAMED_OPS
+#include "WebAssemblyGenInstrInfo.inc"
+
WebAssemblyInstrInfo::WebAssemblyInstrInfo(const WebAssemblySubtarget &STI)
: WebAssemblyGenInstrInfo(WebAssembly::ADJCALLSTACKDOWN,
WebAssembly::ADJCALLSTACKUP,
#define GET_INSTRINFO_HEADER
#include "WebAssemblyGenInstrInfo.inc"
+#define GET_INSTRINFO_OPERAND_ENUM
+#include "WebAssemblyGenInstrInfo.inc"
+
namespace llvm {
+namespace WebAssembly {
+
+int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIndex);
+
+}
+
class WebAssemblySubtarget;
class WebAssemblyInstrInfo final : public WebAssemblyGenInstrInfo {
// Defines atomic and non-atomic loads, regular and extending.
multiclass WebAssemblyLoad<WebAssemblyRegClass rc, string Name, int Opcode> {
- let mayLoad = 1 in
+ let mayLoad = 1, UseNamedOperandTable = 1 in
defm "": I<(outs rc:$dst),
(ins P2Align:$p2align, offset32_op:$off, I32:$addr),
(outs), (ins P2Align:$p2align, offset32_op:$off),
// Defines atomic and non-atomic stores, regular and truncating
multiclass WebAssemblyStore<WebAssemblyRegClass rc, string Name, int Opcode> {
- let mayStore = 1 in
+ let mayStore = 1, UseNamedOperandTable = 1 in
defm "" : I<(outs),
(ins P2Align:$p2align, offset32_op:$off, I32:$addr, rc:$val),
(outs),
// Load: v128.load
multiclass SIMDLoad<ValueType vec_t> {
- let mayLoad = 1 in
+ let mayLoad = 1, UseNamedOperandTable = 1 in
defm LOAD_#vec_t :
- SIMD_I<(outs V128:$dst), (ins P2Align:$align, offset32_op:$off, I32:$addr),
- (outs), (ins P2Align:$align, offset32_op:$off), [],
- "v128.load\t$dst, ${off}(${addr})$align",
- "v128.load\t$off$align", 0>;
+ SIMD_I<(outs V128:$dst), (ins P2Align:$p2align, offset32_op:$off, I32:$addr),
+ (outs), (ins P2Align:$p2align, offset32_op:$off), [],
+ "v128.load\t$dst, ${off}(${addr})$p2align",
+ "v128.load\t$off$p2align", 0>;
}
foreach vec_t = [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64] in {
// Store: v128.store
multiclass SIMDStore<ValueType vec_t> {
- let mayStore = 1 in
+ let mayStore = 1, UseNamedOperandTable = 1 in
defm STORE_#vec_t :
- SIMD_I<(outs), (ins P2Align:$align, offset32_op:$off, I32:$addr, V128:$vec),
- (outs), (ins P2Align:$align, offset32_op:$off), [],
- "v128.store\t${off}(${addr})$align, $vec",
- "v128.store\t$off$align", 1>;
+ SIMD_I<(outs), (ins P2Align:$p2align, offset32_op:$off, I32:$addr, V128:$vec),
+ (outs), (ins P2Align:$p2align, offset32_op:$off), [],
+ "v128.store\t${off}(${addr})$p2align, $vec",
+ "v128.store\t$off$p2align", 1>;
}
foreach vec_t = [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64] in {
// If this is the address operand of a load or store, make it relative to SP
// and fold the frame offset directly in.
- if ((MI.mayLoad() && FIOperandNum == WebAssembly::LoadAddressOperandNo) ||
- (MI.mayStore() && FIOperandNum == WebAssembly::StoreAddressOperandNo)) {
- assert(FrameOffset >= 0 && MI.getOperand(FIOperandNum - 1).getImm() >= 0);
- int64_t Offset = MI.getOperand(FIOperandNum - 1).getImm() + FrameOffset;
+ unsigned AddrOperandNum = WebAssembly::getNamedOperandIdx(
+ MI.getOpcode(), WebAssembly::OpName::addr);
+ if (AddrOperandNum == FIOperandNum) {
+ unsigned OffsetOperandNum = WebAssembly::getNamedOperandIdx(
+ MI.getOpcode(), WebAssembly::OpName::off);
+ assert(FrameOffset >= 0 && MI.getOperand(OffsetOperandNum).getImm() >= 0);
+ int64_t Offset = MI.getOperand(OffsetOperandNum).getImm() + FrameOffset;
if (static_cast<uint64_t>(Offset) <= std::numeric_limits<uint32_t>::max()) {
- MI.getOperand(FIOperandNum - 1).setImm(Offset);
+ MI.getOperand(OffsetOperandNum).setImm(Offset);
MI.getOperand(FIOperandNum)
.ChangeToRegister(FrameRegister, /*IsDef=*/false);
return;
#include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
#include "WebAssembly.h"
+#include "WebAssemblyInstrInfo.h"
#include "WebAssemblyMachineFunctionInfo.h"
#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
#include "llvm/CodeGen/MachineMemOperand.h"
for (auto &MBB : MF) {
for (auto &MI : MBB) {
- switch (MI.getOpcode()) {
- case WebAssembly::LOAD_I32:
- case WebAssembly::LOAD_I64:
- case WebAssembly::LOAD_F32:
- case WebAssembly::LOAD_F64:
- case WebAssembly::LOAD_v16i8:
- case WebAssembly::LOAD_v8i16:
- case WebAssembly::LOAD_v4i32:
- case WebAssembly::LOAD_v2i64:
- case WebAssembly::LOAD_v4f32:
- case WebAssembly::LOAD_v2f64:
- case WebAssembly::LOAD8_S_I32:
- case WebAssembly::LOAD8_U_I32:
- case WebAssembly::LOAD16_S_I32:
- case WebAssembly::LOAD16_U_I32:
- case WebAssembly::LOAD8_S_I64:
- case WebAssembly::LOAD8_U_I64:
- case WebAssembly::LOAD16_S_I64:
- case WebAssembly::LOAD16_U_I64:
- case WebAssembly::LOAD32_S_I64:
- case WebAssembly::LOAD32_U_I64:
- case WebAssembly::ATOMIC_LOAD_I32:
- case WebAssembly::ATOMIC_LOAD8_U_I32:
- case WebAssembly::ATOMIC_LOAD16_U_I32:
- case WebAssembly::ATOMIC_LOAD_I64:
- case WebAssembly::ATOMIC_LOAD8_U_I64:
- case WebAssembly::ATOMIC_LOAD16_U_I64:
- case WebAssembly::ATOMIC_LOAD32_U_I64:
- case WebAssembly::ATOMIC_RMW8_U_ADD_I32:
- case WebAssembly::ATOMIC_RMW8_U_ADD_I64:
- case WebAssembly::ATOMIC_RMW8_U_SUB_I32:
- case WebAssembly::ATOMIC_RMW8_U_SUB_I64:
- case WebAssembly::ATOMIC_RMW8_U_AND_I32:
- case WebAssembly::ATOMIC_RMW8_U_AND_I64:
- case WebAssembly::ATOMIC_RMW8_U_OR_I32:
- case WebAssembly::ATOMIC_RMW8_U_OR_I64:
- case WebAssembly::ATOMIC_RMW8_U_XOR_I32:
- case WebAssembly::ATOMIC_RMW8_U_XOR_I64:
- case WebAssembly::ATOMIC_RMW8_U_XCHG_I32:
- case WebAssembly::ATOMIC_RMW8_U_XCHG_I64:
- case WebAssembly::ATOMIC_RMW8_U_CMPXCHG_I32:
- case WebAssembly::ATOMIC_RMW8_U_CMPXCHG_I64:
- case WebAssembly::ATOMIC_RMW16_U_ADD_I32:
- case WebAssembly::ATOMIC_RMW16_U_ADD_I64:
- case WebAssembly::ATOMIC_RMW16_U_SUB_I32:
- case WebAssembly::ATOMIC_RMW16_U_SUB_I64:
- case WebAssembly::ATOMIC_RMW16_U_AND_I32:
- case WebAssembly::ATOMIC_RMW16_U_AND_I64:
- case WebAssembly::ATOMIC_RMW16_U_OR_I32:
- case WebAssembly::ATOMIC_RMW16_U_OR_I64:
- case WebAssembly::ATOMIC_RMW16_U_XOR_I32:
- case WebAssembly::ATOMIC_RMW16_U_XOR_I64:
- case WebAssembly::ATOMIC_RMW16_U_XCHG_I32:
- case WebAssembly::ATOMIC_RMW16_U_XCHG_I64:
- case WebAssembly::ATOMIC_RMW16_U_CMPXCHG_I32:
- case WebAssembly::ATOMIC_RMW16_U_CMPXCHG_I64:
- case WebAssembly::ATOMIC_RMW_ADD_I32:
- case WebAssembly::ATOMIC_RMW32_U_ADD_I64:
- case WebAssembly::ATOMIC_RMW_SUB_I32:
- case WebAssembly::ATOMIC_RMW32_U_SUB_I64:
- case WebAssembly::ATOMIC_RMW_AND_I32:
- case WebAssembly::ATOMIC_RMW32_U_AND_I64:
- case WebAssembly::ATOMIC_RMW_OR_I32:
- case WebAssembly::ATOMIC_RMW32_U_OR_I64:
- case WebAssembly::ATOMIC_RMW_XOR_I32:
- case WebAssembly::ATOMIC_RMW32_U_XOR_I64:
- case WebAssembly::ATOMIC_RMW_XCHG_I32:
- case WebAssembly::ATOMIC_RMW32_U_XCHG_I64:
- case WebAssembly::ATOMIC_RMW_CMPXCHG_I32:
- case WebAssembly::ATOMIC_RMW32_U_CMPXCHG_I64:
- case WebAssembly::ATOMIC_RMW_ADD_I64:
- case WebAssembly::ATOMIC_RMW_SUB_I64:
- case WebAssembly::ATOMIC_RMW_AND_I64:
- case WebAssembly::ATOMIC_RMW_OR_I64:
- case WebAssembly::ATOMIC_RMW_XOR_I64:
- case WebAssembly::ATOMIC_RMW_XCHG_I64:
- case WebAssembly::ATOMIC_RMW_CMPXCHG_I64:
- case WebAssembly::ATOMIC_NOTIFY:
- case WebAssembly::ATOMIC_WAIT_I32:
- case WebAssembly::ATOMIC_WAIT_I64:
- rewriteP2Align(MI, WebAssembly::LoadP2AlignOperandNo);
- break;
- case WebAssembly::STORE_I32:
- case WebAssembly::STORE_I64:
- case WebAssembly::STORE_F32:
- case WebAssembly::STORE_F64:
- case WebAssembly::STORE_v16i8:
- case WebAssembly::STORE_v8i16:
- case WebAssembly::STORE_v4i32:
- case WebAssembly::STORE_v2i64:
- case WebAssembly::STORE_v4f32:
- case WebAssembly::STORE_v2f64:
- case WebAssembly::STORE8_I32:
- case WebAssembly::STORE16_I32:
- case WebAssembly::STORE8_I64:
- case WebAssembly::STORE16_I64:
- case WebAssembly::STORE32_I64:
- case WebAssembly::ATOMIC_STORE_I32:
- case WebAssembly::ATOMIC_STORE8_I32:
- case WebAssembly::ATOMIC_STORE16_I32:
- case WebAssembly::ATOMIC_STORE_I64:
- case WebAssembly::ATOMIC_STORE8_I64:
- case WebAssembly::ATOMIC_STORE16_I64:
- case WebAssembly::ATOMIC_STORE32_I64:
- rewriteP2Align(MI, WebAssembly::StoreP2AlignOperandNo);
- break;
- default:
- break;
+ int16_t P2AlignOpNum = WebAssembly::getNamedOperandIdx(
+ MI.getOpcode(), WebAssembly::OpName::p2align);
+ if (P2AlignOpNum != -1) {
+ rewriteP2Align(MI, P2AlignOpNum);
+ Changed = true;
}
}
}
call void @llvm.memset.p0i8.i32(i8* %dest, i8 %val, i32 1024, i1 0)
ret void
}
+
+; The following tests check that frame index elimination works for
+; bulk memory instructions. The stack pointer is bumped by 16 instead
+; of 10 because the stack pointer in WebAssembly is currently always
+; 16-byte aligned, even in leaf functions, although it is not written
+; back to the global in this case.
+
+; TODO: Change TransientStackAlignment to 1 to avoid this extra
+; arithmetic. This will require forcing the use of StackAlignment in
+; PrologEpilogEmitter.cpp when
+; WebAssemblyFrameLowering::needsSPWriteback would be true.
+
+; CHECK-LABEL: memcpy_alloca_src:
+; NO-BULK-MEM-NOT: memory.copy
+; BULK-MEM-NEXT: .functype memcpy_alloca_src (i32) -> ()
+; BULK-MEM-NEXT: global.get $push[[L0:[0-9]+]]=, __stack_pointer
+; BULK-MEM-NEXT: i32.const $push[[L1:[0-9]+]]=, 16
+; BULK-MEM-NEXT: i32.sub $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]]
+; BULK-MEM-NEXT: i32.const $push[[L3:[0-9]+]]=, 6
+; BULK-MEM-NEXT: i32.add $push[[L4:[0-9]+]]=, $pop[[L2]], $pop[[L3]]
+; BULK-MEM-NEXT: i32.const $push[[L5:[0-9]+]]=, 10
+; BULK-MEM-NEXT: memory.copy 0, 0, $0, $pop[[L4]], $pop[[L5]]
+; BULK-MEM-NEXT: return
+define void @memcpy_alloca_src(i8* %dst) {
+ %a = alloca [10 x i8]
+ %p = bitcast [10 x i8]* %a to i8*
+ call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dst, i8* %p, i32 10, i1 false)
+ ret void
+}
+
+; CHECK-LABEL: memcpy_alloca_dst:
+; NO-BULK-MEM-NOT: memory.copy
+; BULK-MEM-NEXT: .functype memcpy_alloca_dst (i32) -> ()
+; BULK-MEM-NEXT: global.get $push[[L0:[0-9]+]]=, __stack_pointer
+; BULK-MEM-NEXT: i32.const $push[[L1:[0-9]+]]=, 16
+; BULK-MEM-NEXT: i32.sub $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]]
+; BULK-MEM-NEXT: i32.const $push[[L3:[0-9]+]]=, 6
+; BULK-MEM-NEXT: i32.add $push[[L4:[0-9]+]]=, $pop[[L2]], $pop[[L3]]
+; BULK-MEM-NEXT: i32.const $push[[L5:[0-9]+]]=, 10
+; BULK-MEM-NEXT: memory.copy 0, 0, $pop[[L4]], $0, $pop[[L5]]
+; BULK-MEM-NEXT: return
+define void @memcpy_alloca_dst(i8* %src) {
+ %a = alloca [10 x i8]
+ %p = bitcast [10 x i8]* %a to i8*
+ call void @llvm.memcpy.p0i8.p0i8.i32(i8* %p, i8* %src, i32 10, i1 false)
+ ret void
+}
+
+; CHECK-LABEL: memset_alloca:
+; NO-BULK-MEM-NOT: memory.fill
+; BULK-MEM-NEXT: .functype memset_alloca (i32) -> ()
+; BULK-MEM-NEXT: global.get $push[[L0:[0-9]+]]=, __stack_pointer
+; BULK-MEM-NEXT: i32.const $push[[L1:[0-9]+]]=, 16
+; BULK-MEM-NEXT: i32.sub $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]]
+; BULK-MEM-NEXT: i32.const $push[[L3:[0-9]+]]=, 6
+; BULK-MEM-NEXT: i32.add $push[[L4:[0-9]+]]=, $pop[[L2]], $pop[[L3]]
+; BULK-MEM-NEXT: i32.const $push[[L5:[0-9]+]]=, 10
+; BULK-MEM-NEXT: memory.fill 0, $pop[[L4]], $0, $pop[[L5]]
+; BULK-MEM-NEXT: return
+define void @memset_alloca(i8 %val) {
+ %a = alloca [10 x i8]
+ %p = bitcast [10 x i8]* %a to i8*
+ call void @llvm.memset.p0i8.i32(i8* %p, i8 %val, i32 10, i1 false)
+ ret void
+}