From: Matt Arsenault Date: Wed, 17 Dec 2014 21:04:08 +0000 (+0000) Subject: R600/SI: Fix f64 inline immediates X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=aa14ffddcfae4502978d22bbb9fe21c551d254b9;p=llvm R600/SI: Fix f64 inline immediates git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@224458 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/R600/AMDGPUMCInstLower.cpp b/lib/Target/R600/AMDGPUMCInstLower.cpp index 2dcebf5f7e1..84ee7842a72 100644 --- a/lib/Target/R600/AMDGPUMCInstLower.cpp +++ b/lib/Target/R600/AMDGPUMCInstLower.cpp @@ -70,9 +70,14 @@ void AMDGPUMCInstLower::lower(const MachineInstr *MI, MCInst &OutMI) const { llvm_unreachable("unknown operand type"); case MachineOperand::MO_FPImmediate: { const APFloat &FloatValue = MO.getFPImm()->getValueAPF(); - assert(&FloatValue.getSemantics() == &APFloat::IEEEsingle && - "Only floating point immediates are supported at the moment."); - MCOp = MCOperand::CreateFPImm(FloatValue.convertToFloat()); + + if (&FloatValue.getSemantics() == &APFloat::IEEEsingle) + MCOp = MCOperand::CreateFPImm(FloatValue.convertToFloat()); + else if (&FloatValue.getSemantics() == &APFloat::IEEEdouble) + MCOp = MCOperand::CreateFPImm(FloatValue.convertToDouble()); + else + llvm_unreachable("Unhandled floating point type"); + break; } case MachineOperand::MO_Immediate: diff --git a/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp b/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp index 12d077c0464..cb44c3e409f 100644 --- a/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp +++ b/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp @@ -14,6 +14,7 @@ #include "MCTargetDesc/AMDGPUMCTargetDesc.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" +#include "llvm/MC/MCInstrInfo.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/Support/MathExtras.h" @@ -208,7 +209,7 @@ void AMDGPUInstPrinter::printRegOperand(unsigned reg, raw_ostream &O) { O << Type << '[' << RegIdx << ':' << (RegIdx + NumRegs - 1) << ']'; } -void AMDGPUInstPrinter::printImmediate(uint32_t Imm, raw_ostream &O) { +void AMDGPUInstPrinter::printImmediate32(uint32_t Imm, raw_ostream &O) { int32_t SImm = static_cast(Imm); if (SImm >= -16 && SImm <= 64) { O << SImm; @@ -233,9 +234,37 @@ void AMDGPUInstPrinter::printImmediate(uint32_t Imm, raw_ostream &O) { O << "4.0"; else if (Imm == FloatToBits(-4.0f)) O << "-4.0"; - else { + else O << formatHex(static_cast(Imm)); +} + +void AMDGPUInstPrinter::printImmediate64(uint64_t Imm, raw_ostream &O) { + int64_t SImm = static_cast(Imm); + if (SImm >= -16 && SImm <= 64) { + O << SImm; + return; } + + if (Imm == DoubleToBits(0.0)) + O << "0.0"; + else if (Imm == DoubleToBits(1.0)) + O << "1.0"; + else if (Imm == DoubleToBits(-1.0)) + O << "-1.0"; + else if (Imm == DoubleToBits(0.5)) + O << "0.5"; + else if (Imm == DoubleToBits(-0.5)) + O << "-0.5"; + else if (Imm == DoubleToBits(2.0)) + O << "2.0"; + else if (Imm == DoubleToBits(-2.0)) + O << "-2.0"; + else if (Imm == DoubleToBits(4.0)) + O << "4.0"; + else if (Imm == DoubleToBits(-4.0)) + O << "-4.0"; + else + llvm_unreachable("64-bit literal constants not supported"); } void AMDGPUInstPrinter::printOperand(const MCInst *MI, unsigned OpNo, @@ -253,14 +282,37 @@ void AMDGPUInstPrinter::printOperand(const MCInst *MI, unsigned OpNo, break; } } else if (Op.isImm()) { - printImmediate(Op.getImm(), O); + const MCInstrDesc &Desc = MII.get(MI->getOpcode()); + int RCID = Desc.OpInfo[OpNo].RegClass; + if (RCID != -1) { + const MCRegisterClass &ImmRC = MRI.getRegClass(RCID); + if (ImmRC.getSize() == 4) + printImmediate32(Op.getImm(), O); + else if (ImmRC.getSize() == 8) + printImmediate64(Op.getImm(), O); + else + llvm_unreachable("Invalid register class size"); + } else { + // We hit this for the immediate instruction bits that don't yet have a + // custom printer. + // TODO: Eventually this should be unnecessary. + O << formatDec(Op.getImm()); + } } else if (Op.isFPImm()) { - // We special case 0.0 because otherwise it will be printed as an integer. if (Op.getFPImm() == 0.0) O << "0.0"; - else - printImmediate(FloatToBits(Op.getFPImm()), O); + else { + const MCInstrDesc &Desc = MII.get(MI->getOpcode()); + const MCRegisterClass &ImmRC = MRI.getRegClass(Desc.OpInfo[OpNo].RegClass); + + if (ImmRC.getSize() == 4) + printImmediate32(FloatToBits(Op.getFPImm()), O); + else if (ImmRC.getSize() == 8) + printImmediate64(DoubleToBits(Op.getFPImm()), O); + else + llvm_unreachable("Invalid register class size"); + } } else if (Op.isExpr()) { const MCExpr *Exp = Op.getExpr(); Exp->print(O); diff --git a/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.h b/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.h index 4c06ac0b36b..1d43c7acbe7 100644 --- a/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.h +++ b/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.h @@ -48,7 +48,8 @@ private: void printSLC(const MCInst *MI, unsigned OpNo, raw_ostream &O); void printTFE(const MCInst *MI, unsigned OpNo, raw_ostream &O); void printRegOperand(unsigned RegNo, raw_ostream &O); - void printImmediate(uint32_t Imm, raw_ostream &O); + void printImmediate32(uint32_t I, raw_ostream &O); + void printImmediate64(uint64_t I, raw_ostream &O); void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); void printOperandAndMods(const MCInst *MI, unsigned OpNo, raw_ostream &O); static void printInterpSlot(const MCInst *MI, unsigned OpNum, raw_ostream &O); diff --git a/lib/Target/R600/SIISelLowering.cpp b/lib/Target/R600/SIISelLowering.cpp index e41cad23de2..5702ee67123 100644 --- a/lib/Target/R600/SIISelLowering.cpp +++ b/lib/Target/R600/SIISelLowering.cpp @@ -1578,31 +1578,30 @@ static bool isSSrc(unsigned RegClass) { /// and the immediate value if it's a literal immediate int32_t SITargetLowering::analyzeImmediate(const SDNode *N) const { - union { - int32_t I; - float F; - } Imm; + const SIInstrInfo *TII = static_cast( + getTargetMachine().getSubtargetImpl()->getInstrInfo()); if (const ConstantSDNode *Node = dyn_cast(N)) { - if (Node->getZExtValue() >> 32) { - return -1; - } - Imm.I = Node->getSExtValue(); - } else if (const ConstantFPSDNode *Node = dyn_cast(N)) { - if (N->getValueType(0) != MVT::f32) + if (Node->getZExtValue() >> 32) return -1; - Imm.F = Node->getValueAPF().convertToFloat(); - } else - return -1; // It isn't an immediate - - if ((Imm.I >= -16 && Imm.I <= 64) || - Imm.F == 0.5f || Imm.F == -0.5f || - Imm.F == 1.0f || Imm.F == -1.0f || - Imm.F == 2.0f || Imm.F == -2.0f || - Imm.F == 4.0f || Imm.F == -4.0f) - return 0; // It's an inline immediate - - return Imm.I; // It's a literal immediate + + if (TII->isInlineConstant(Node->getAPIntValue())) + return 0; + + return Node->getZExtValue(); + } + + if (const ConstantFPSDNode *Node = dyn_cast(N)) { + if (TII->isInlineConstant(Node->getValueAPF().bitcastToAPInt())) + return 0; + + if (Node->getValueType(0) == MVT::f32) + return FloatToBits(Node->getValueAPF().convertToFloat()); + + return -1; + } + + return -1; } /// \brief Try to fold an immediate directly into an instruction diff --git a/lib/Target/R600/SIInstrInfo.cpp b/lib/Target/R600/SIInstrInfo.cpp index 9328c2c2c23..08bfc5e1422 100644 --- a/lib/Target/R600/SIInstrInfo.cpp +++ b/lib/Target/R600/SIInstrInfo.cpp @@ -896,10 +896,23 @@ bool SIInstrInfo::areMemAccessesTriviallyDisjoint(MachineInstr *MIa, } bool SIInstrInfo::isInlineConstant(const APInt &Imm) const { - int32_t Val = Imm.getSExtValue(); - if (Val >= -16 && Val <= 64) + int64_t SVal = Imm.getSExtValue(); + if (SVal >= -16 && SVal <= 64) return true; + if (Imm.getBitWidth() == 64) { + uint64_t Val = Imm.getZExtValue(); + return (DoubleToBits(0.0) == Val) || + (DoubleToBits(1.0) == Val) || + (DoubleToBits(-1.0) == Val) || + (DoubleToBits(0.5) == Val) || + (DoubleToBits(-0.5) == Val) || + (DoubleToBits(2.0) == Val) || + (DoubleToBits(-2.0) == Val) || + (DoubleToBits(4.0) == Val) || + (DoubleToBits(-4.0) == Val); + } + // The actual type of the operand does not seem to matter as long // as the bits match one of the inline immediate values. For example: // @@ -908,16 +921,17 @@ bool SIInstrInfo::isInlineConstant(const APInt &Imm) const { // // 1065353216 has the hexadecimal encoding 0x3f800000 which is 1.0f in // floating-point, so it is a legal inline immediate. - - return (APInt::floatToBits(0.0f) == Imm) || - (APInt::floatToBits(1.0f) == Imm) || - (APInt::floatToBits(-1.0f) == Imm) || - (APInt::floatToBits(0.5f) == Imm) || - (APInt::floatToBits(-0.5f) == Imm) || - (APInt::floatToBits(2.0f) == Imm) || - (APInt::floatToBits(-2.0f) == Imm) || - (APInt::floatToBits(4.0f) == Imm) || - (APInt::floatToBits(-4.0f) == Imm); + uint32_t Val = Imm.getZExtValue(); + + return (FloatToBits(0.0f) == Val) || + (FloatToBits(1.0f) == Val) || + (FloatToBits(-1.0f) == Val) || + (FloatToBits(0.5f) == Val) || + (FloatToBits(-0.5f) == Val) || + (FloatToBits(2.0f) == Val) || + (FloatToBits(-2.0f) == Val) || + (FloatToBits(4.0f) == Val) || + (FloatToBits(-4.0f) == Val); } bool SIInstrInfo::isInlineConstant(const MachineOperand &MO) const { diff --git a/lib/Target/R600/SIInstrInfo.td b/lib/Target/R600/SIInstrInfo.td index 6323591359d..8122ccb65cc 100644 --- a/lib/Target/R600/SIInstrInfo.td +++ b/lib/Target/R600/SIInstrInfo.td @@ -188,6 +188,10 @@ class InlineImm : PatLeaf <(vt imm), [{ return isInlineImmediate(N); }]>; +class InlineFPImm : PatLeaf <(vt fpimm), [{ + return isInlineImmediate(N); +}]>; + class SGPRImm : PatLeaf().getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS) { diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td index 8c3f1403ca2..463287eaa97 100644 --- a/lib/Target/R600/SIInstructions.td +++ b/lib/Target/R600/SIInstructions.td @@ -2572,6 +2572,11 @@ def : Pat < (S_MOV_B64 (i64 (as_i64imm $imm))) >; +def : Pat < + (f64 InlineFPImm:$imm), + (S_MOV_B64 InlineFPImm:$imm) +>; + /********** ===================== **********/ /********** Interpolation Paterns **********/ /********** ===================== **********/ diff --git a/lib/Target/R600/SIRegisterInfo.td b/lib/Target/R600/SIRegisterInfo.td index 45c2b414a3e..a79da004def 100644 --- a/lib/Target/R600/SIRegisterInfo.td +++ b/lib/Target/R600/SIRegisterInfo.td @@ -184,9 +184,9 @@ def SReg_32 : RegisterClass<"AMDGPU", [f32, i32], 32, (add SGPR_32, M0Reg, VCC_LO, VCC_HI, EXEC_LO, EXEC_HI, FLAT_SCR_LO, FLAT_SCR_HI) >; -def SGPR_64 : RegisterClass<"AMDGPU", [v2i32, i64], 64, (add SGPR_64Regs)>; +def SGPR_64 : RegisterClass<"AMDGPU", [v2i32, i64, f64], 64, (add SGPR_64Regs)>; -def SReg_64 : RegisterClass<"AMDGPU", [v2i32, i64, i1], 64, +def SReg_64 : RegisterClass<"AMDGPU", [v2i32, i64, f64, i1], 64, (add SGPR_64, VCCReg, EXECReg, FLAT_SCR) >; diff --git a/test/CodeGen/R600/imm.ll b/test/CodeGen/R600/imm.ll index 1fcaf292d7a..79f36b6b68c 100644 --- a/test/CodeGen/R600/imm.ll +++ b/test/CodeGen/R600/imm.ll @@ -30,6 +30,14 @@ define void @store_inline_imm_0.0_f32(float addrspace(1)* %out) { ret void } +; CHECK-LABEL: {{^}}store_imm_neg_0.0_f32 +; CHECK: v_mov_b32_e32 [[REG:v[0-9]+]], 0x80000000 +; CHECK-NEXT: buffer_store_dword [[REG]] +define void @store_imm_neg_0.0_f32(float addrspace(1)* %out) { + store float -0.0, float addrspace(1)* %out + ret void +} + ; CHECK-LABEL: {{^}}store_inline_imm_0.5_f32 ; CHECK: v_mov_b32_e32 [[REG:v[0-9]+]], 0.5{{$}} ; CHECK-NEXT: buffer_store_dword [[REG]] @@ -213,3 +221,267 @@ define void @commute_add_literal_f32(float addrspace(1)* %out, float addrspace(1 store float %y, float addrspace(1)* %out ret void } + +; CHECK-LABEL: {{^}}add_inline_imm_1_f32 +; CHECK: s_load_dword [[VAL:s[0-9]+]] +; CHECK: v_add_f32_e64 [[REG:v[0-9]+]], 1, [[VAL]]{{$}} +; CHECK-NEXT: buffer_store_dword [[REG]] +define void @add_inline_imm_1_f32(float addrspace(1)* %out, float %x) { + %y = fadd float %x, 0x36a0000000000000 + store float %y, float addrspace(1)* %out + ret void +} + +; CHECK-LABEL: {{^}}add_inline_imm_2_f32 +; CHECK: s_load_dword [[VAL:s[0-9]+]] +; CHECK: v_add_f32_e64 [[REG:v[0-9]+]], 2, [[VAL]]{{$}} +; CHECK-NEXT: buffer_store_dword [[REG]] +define void @add_inline_imm_2_f32(float addrspace(1)* %out, float %x) { + %y = fadd float %x, 0x36b0000000000000 + store float %y, float addrspace(1)* %out + ret void +} + +; CHECK-LABEL: {{^}}add_inline_imm_16_f32 +; CHECK: s_load_dword [[VAL:s[0-9]+]] +; CHECK: v_add_f32_e64 [[REG:v[0-9]+]], 16, [[VAL]] +; CHECK-NEXT: buffer_store_dword [[REG]] +define void @add_inline_imm_16_f32(float addrspace(1)* %out, float %x) { + %y = fadd float %x, 0x36e0000000000000 + store float %y, float addrspace(1)* %out + ret void +} + +; CHECK-LABEL: {{^}}add_inline_imm_neg_1_f32 +; CHECK: s_load_dword [[VAL:s[0-9]+]] +; CHECK: v_add_f32_e64 [[REG:v[0-9]+]], -1, [[VAL]] +; CHECK-NEXT: buffer_store_dword [[REG]] +define void @add_inline_imm_neg_1_f32(float addrspace(1)* %out, float %x) { + %y = fadd float %x, 0xffffffffe0000000 + store float %y, float addrspace(1)* %out + ret void +} + +; CHECK-LABEL: {{^}}add_inline_imm_neg_2_f32 +; CHECK: s_load_dword [[VAL:s[0-9]+]] +; CHECK: v_add_f32_e64 [[REG:v[0-9]+]], -2, [[VAL]] +; CHECK-NEXT: buffer_store_dword [[REG]] +define void @add_inline_imm_neg_2_f32(float addrspace(1)* %out, float %x) { + %y = fadd float %x, 0xffffffffc0000000 + store float %y, float addrspace(1)* %out + ret void +} + +; CHECK-LABEL: {{^}}add_inline_imm_neg_16_f32 +; CHECK: s_load_dword [[VAL:s[0-9]+]] +; CHECK: v_add_f32_e64 [[REG:v[0-9]+]], -16, [[VAL]] +; CHECK-NEXT: buffer_store_dword [[REG]] +define void @add_inline_imm_neg_16_f32(float addrspace(1)* %out, float %x) { + %y = fadd float %x, 0xfffffffe00000000 + store float %y, float addrspace(1)* %out + ret void +} + +; CHECK-LABEL: {{^}}add_inline_imm_63_f32 +; CHECK: s_load_dword [[VAL:s[0-9]+]] +; CHECK: v_add_f32_e64 [[REG:v[0-9]+]], 63, [[VAL]] +; CHECK-NEXT: buffer_store_dword [[REG]] +define void @add_inline_imm_63_f32(float addrspace(1)* %out, float %x) { + %y = fadd float %x, 0x36ff800000000000 + store float %y, float addrspace(1)* %out + ret void +} + +; CHECK-LABEL: {{^}}add_inline_imm_64_f32 +; CHECK: s_load_dword [[VAL:s[0-9]+]] +; CHECK: v_add_f32_e64 [[REG:v[0-9]+]], 64, [[VAL]] +; CHECK-NEXT: buffer_store_dword [[REG]] +define void @add_inline_imm_64_f32(float addrspace(1)* %out, float %x) { + %y = fadd float %x, 0x3700000000000000 + store float %y, float addrspace(1)* %out + ret void +} + +; CHECK-LABEL: {{^}}add_inline_imm_0.0_f64 +; CHECK: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb +; CHECK: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], 0.0, [[VAL]] +; CHECK-NEXT: buffer_store_dwordx2 [[REG]] +define void @add_inline_imm_0.0_f64(double addrspace(1)* %out, double %x) { + %y = fadd double %x, 0.0 + store double %y, double addrspace(1)* %out + ret void +} + +; CHECK-LABEL: {{^}}add_inline_imm_0.5_f64 +; CHECK: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb +; CHECK: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], 0.5, [[VAL]] +; CHECK-NEXT: buffer_store_dwordx2 [[REG]] +define void @add_inline_imm_0.5_f64(double addrspace(1)* %out, double %x) { + %y = fadd double %x, 0.5 + store double %y, double addrspace(1)* %out + ret void +} + +; CHECK-LABEL: {{^}}add_inline_imm_neg_0.5_f64 +; CHECK: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb +; CHECK: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], -0.5, [[VAL]] +; CHECK-NEXT: buffer_store_dwordx2 [[REG]] +define void @add_inline_imm_neg_0.5_f64(double addrspace(1)* %out, double %x) { + %y = fadd double %x, -0.5 + store double %y, double addrspace(1)* %out + ret void +} + +; CHECK-LABEL: {{^}}add_inline_imm_1.0_f64 +; CHECK: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb +; CHECK: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], 1.0, [[VAL]] +; CHECK-NEXT: buffer_store_dwordx2 [[REG]] +define void @add_inline_imm_1.0_f64(double addrspace(1)* %out, double %x) { + %y = fadd double %x, 1.0 + store double %y, double addrspace(1)* %out + ret void +} + +; CHECK-LABEL: {{^}}add_inline_imm_neg_1.0_f64 +; CHECK: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb +; CHECK: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], -1.0, [[VAL]] +; CHECK-NEXT: buffer_store_dwordx2 [[REG]] +define void @add_inline_imm_neg_1.0_f64(double addrspace(1)* %out, double %x) { + %y = fadd double %x, -1.0 + store double %y, double addrspace(1)* %out + ret void +} + +; CHECK-LABEL: {{^}}add_inline_imm_2.0_f64 +; CHECK: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb +; CHECK: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], 2.0, [[VAL]] +; CHECK-NEXT: buffer_store_dwordx2 [[REG]] +define void @add_inline_imm_2.0_f64(double addrspace(1)* %out, double %x) { + %y = fadd double %x, 2.0 + store double %y, double addrspace(1)* %out + ret void +} + +; CHECK-LABEL: {{^}}add_inline_imm_neg_2.0_f64 +; CHECK: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb +; CHECK: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], -2.0, [[VAL]] +; CHECK-NEXT: buffer_store_dwordx2 [[REG]] +define void @add_inline_imm_neg_2.0_f64(double addrspace(1)* %out, double %x) { + %y = fadd double %x, -2.0 + store double %y, double addrspace(1)* %out + ret void +} + +; CHECK-LABEL: {{^}}add_inline_imm_4.0_f64 +; CHECK: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb +; CHECK: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], 4.0, [[VAL]] +; CHECK-NEXT: buffer_store_dwordx2 [[REG]] +define void @add_inline_imm_4.0_f64(double addrspace(1)* %out, double %x) { + %y = fadd double %x, 4.0 + store double %y, double addrspace(1)* %out + ret void +} + +; CHECK-LABEL: {{^}}add_inline_imm_neg_4.0_f64 +; CHECK: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb +; CHECK: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], -4.0, [[VAL]] +; CHECK-NEXT: buffer_store_dwordx2 [[REG]] +define void @add_inline_imm_neg_4.0_f64(double addrspace(1)* %out, double %x) { + %y = fadd double %x, -4.0 + store double %y, double addrspace(1)* %out + ret void +} + + +; CHECK-LABEL: {{^}}add_inline_imm_1_f64 +; CHECK: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb +; CHECK: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], 1, [[VAL]] +; CHECK-NEXT: buffer_store_dwordx2 [[REG]] +define void @add_inline_imm_1_f64(double addrspace(1)* %out, double %x) { + %y = fadd double %x, 0x0000000000000001 + store double %y, double addrspace(1)* %out + ret void +} + +; CHECK-LABEL: {{^}}add_inline_imm_2_f64 +; CHECK: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb +; CHECK: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], 2, [[VAL]] +; CHECK-NEXT: buffer_store_dwordx2 [[REG]] +define void @add_inline_imm_2_f64(double addrspace(1)* %out, double %x) { + %y = fadd double %x, 0x0000000000000002 + store double %y, double addrspace(1)* %out + ret void +} + +; CHECK-LABEL: {{^}}add_inline_imm_16_f64 +; CHECK: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb +; CHECK: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], 16, [[VAL]] +; CHECK-NEXT: buffer_store_dwordx2 [[REG]] +define void @add_inline_imm_16_f64(double addrspace(1)* %out, double %x) { + %y = fadd double %x, 0x0000000000000010 + store double %y, double addrspace(1)* %out + ret void +} + +; CHECK-LABEL: {{^}}add_inline_imm_neg_1_f64 +; CHECK: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb +; CHECK: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], -1, [[VAL]] +; CHECK-NEXT: buffer_store_dwordx2 [[REG]] +define void @add_inline_imm_neg_1_f64(double addrspace(1)* %out, double %x) { + %y = fadd double %x, 0xffffffffffffffff + store double %y, double addrspace(1)* %out + ret void +} + +; CHECK-LABEL: {{^}}add_inline_imm_neg_2_f64 +; CHECK: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb +; CHECK: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], -2, [[VAL]] +; CHECK-NEXT: buffer_store_dwordx2 [[REG]] +define void @add_inline_imm_neg_2_f64(double addrspace(1)* %out, double %x) { + %y = fadd double %x, 0xfffffffffffffffe + store double %y, double addrspace(1)* %out + ret void +} + +; CHECK-LABEL: {{^}}add_inline_imm_neg_16_f64 +; CHECK: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb +; CHECK: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], -16, [[VAL]] +; CHECK-NEXT: buffer_store_dwordx2 [[REG]] +define void @add_inline_imm_neg_16_f64(double addrspace(1)* %out, double %x) { + %y = fadd double %x, 0xfffffffffffffff0 + store double %y, double addrspace(1)* %out + ret void +} + +; CHECK-LABEL: {{^}}add_inline_imm_63_f64 +; CHECK: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb +; CHECK: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], 63, [[VAL]] +; CHECK-NEXT: buffer_store_dwordx2 [[REG]] +define void @add_inline_imm_63_f64(double addrspace(1)* %out, double %x) { + %y = fadd double %x, 0x000000000000003F + store double %y, double addrspace(1)* %out + ret void +} + +; CHECK-LABEL: {{^}}add_inline_imm_64_f64 +; CHECK: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb +; CHECK: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], 64, [[VAL]] +; CHECK-NEXT: buffer_store_dwordx2 [[REG]] +define void @add_inline_imm_64_f64(double addrspace(1)* %out, double %x) { + %y = fadd double %x, 0x0000000000000040 + store double %y, double addrspace(1)* %out + ret void +} + + +; FIXME: These shoudn't bother materializing in SGPRs + +; CHECK-LABEL: {{^}}store_inline_imm_0.0_f64 +; CHECK: s_mov_b64 s{{\[}}[[LO_SREG:[0-9]+]]:[[HI_SREG:[0-9]+]]{{\]}}, 0{{$}} +; CHECK-DAG: v_mov_b32_e32 v[[LO_VREG:[0-9]+]], s[[LO_SREG]] +; CHECK-DAG: v_mov_b32_e32 v[[HI_VREG:[0-9]+]], s[[HI_SREG]] +; CHECK: buffer_store_dwordx2 v{{\[}}[[LO_VREG]]:[[HI_VREG]]{{\]}} +define void @store_inline_imm_0.0_f64(double addrspace(1)* %out) { + store double 0.0, double addrspace(1)* %out + ret void +}