return false;
}
+ if (MI.isInlineAsm()) {
+ // Verify register classes for inlineasm constraints.
+ for (unsigned I = InlineAsm::MIOp_FirstOperand, E = MI.getNumOperands();
+ I != E; ++I) {
+ const TargetRegisterClass *RC = MI.getRegClassConstraint(I, this, &RI);
+ if (!RC)
+ continue;
+
+ const MachineOperand &Op = MI.getOperand(I);
+ if (!Op.isReg())
+ continue;
+
+ unsigned Reg = Op.getReg();
+ if (!TargetRegisterInfo::isVirtualRegister(Reg) && !RC->contains(Reg)) {
+ ErrInfo = "inlineasm operand has incorrect register class.";
+ return false;
+ }
+ }
+
+ return true;
+ }
+
// Make sure the register classes are correct.
for (int i = 0, e = Desc.getNumOperands(); i != e; ++i) {
if (MI.getOperand(i).isFPImm()) {
return isInlineImmediate(N);
}]>;
-class SGPRImm <dag frag> : PatLeaf<frag, [{
+class VGPRImm <dag frag> : PatLeaf<frag, [{
if (Subtarget->getGeneration() < SISubtarget::SOUTHERN_ISLANDS) {
return false;
}
const SIRegisterInfo *SIRI =
static_cast<const SIRegisterInfo *>(Subtarget->getRegisterInfo());
+ unsigned Limit = 0;
for (SDNode::use_iterator U = N->use_begin(), E = SDNode::use_end();
- U != E; ++U) {
+ Limit < 10 && U != E; ++U, ++Limit) {
const TargetRegisterClass *RC = getOperandRegClass(*U, U.getOperandNo());
- if (RC && SIRI->isSGPRClass(RC))
- return true;
+
+ // If the register class is unknown, it could be an unknown
+ // register class that needs to be an SGPR, e.g. an inline asm
+ // constraint
+ if (!RC || SIRI->isSGPRClass(RC))
+ return false;
}
- return false;
+
+ return Limit < 10;
}]>;
//===----------------------------------------------------------------------===//
/********** ================== **********/
def : Pat <
- (SGPRImm<(i32 imm)>:$imm),
- (S_MOV_B32 imm:$imm)
+ (VGPRImm<(i32 imm)>:$imm),
+ (V_MOV_B32_e32 imm:$imm)
>;
def : Pat <
- (SGPRImm<(f32 fpimm)>:$imm),
- (S_MOV_B32 (f32 (bitcast_fpimm_to_i32 $imm)))
+ (VGPRImm<(f32 fpimm)>:$imm),
+ (V_MOV_B32_e32 (f32 (bitcast_fpimm_to_i32 $imm)))
>;
def : Pat <
(i32 imm:$imm),
- (V_MOV_B32_e32 imm:$imm)
+ (S_MOV_B32 imm:$imm)
>;
def : Pat <
(f32 fpimm:$imm),
- (V_MOV_B32_e32 (f32 (bitcast_fpimm_to_i32 $imm)))
+ (S_MOV_B32 (f32 (bitcast_fpimm_to_i32 $imm)))
>;
def : Pat <
; GCN: s_mov_b32 m0, -1
; GCN: ds_read_b32 [[LOAD0:v[0-9]+]]
-; GCN: v_cmp_eq_u32_e64 [[CMP0:s\[[0-9]+:[0-9]\]]], v0,
+; GCN: v_cmp_eq_u32_e64 [[CMP0:s\[[0-9]+:[0-9]\]]], s{{[0-9]+}}, v0
; GCN: s_mov_b64 s{{\[}}[[SAVEEXEC_LO:[0-9]+]]:[[SAVEEXEC_HI:[0-9]+]]{{\]}}, exec
; GCN: s_and_b64 s{{\[}}[[ANDEXEC_LO:[0-9]+]]:[[ANDEXEC_HI:[0-9]+]]{{\]}}, s{{\[}}[[SAVEEXEC_LO]]:[[SAVEEXEC_HI]]{{\]}}, [[CMP0]]
; GCN: s_xor_b64 s{{\[}}[[SAVEEXEC_LO]]:[[SAVEEXEC_HI]]{{\]}}, s{{\[}}[[ANDEXEC_LO]]:[[ANDEXEC_HI]]{{\]}}, s{{\[}}[[SAVEEXEC_LO]]:[[SAVEEXEC_HI]]{{\]}}
; GCN: s_mov_b32 m0, -1
; GCN: ds_read_b32 [[LOAD0:v[0-9]+]]
-; GCN: v_cmp_eq_u32_e64 [[CMP0:s\[[0-9]+:[0-9]\]]], v0,
+; GCN: v_cmp_eq_u32_e64 [[CMP0:s\[[0-9]+:[0-9]\]]], s{{[0-9]+}}, v0
; GCN: s_mov_b64 s{{\[}}[[SAVEEXEC_LO:[0-9]+]]:[[SAVEEXEC_HI:[0-9]+]]{{\]}}, exec
; GCN: s_and_b64 s{{\[}}[[ANDEXEC_LO:[0-9]+]]:[[ANDEXEC_HI:[0-9]+]]{{\]}}, s{{\[}}[[SAVEEXEC_LO:[0-9]+]]:[[SAVEEXEC_HI:[0-9]+]]{{\]}}, [[CMP0]]
; GCN: s_xor_b64 s{{\[}}[[SAVEEXEC_LO]]:[[SAVEEXEC_HI]]{{\]}}, s{{\[}}[[ANDEXEC_LO]]:[[ANDEXEC_HI]]{{\]}}, s{{\[}}[[SAVEEXEC_LO]]:[[SAVEEXEC_HI]]{{\]}}
+; Spill load
+; GCN: buffer_store_dword [[LOAD0]], off, s[0:3], s7 ; 4-byte Folded Spill
+
; Spill saved exec
; VGPR: v_writelane_b32 [[SPILL_VGPR:v[0-9]+]], s[[SAVEEXEC_LO]], [[SAVEEXEC_LO_LANE:[0-9]+]]
; VGPR: v_writelane_b32 [[SPILL_VGPR]], s[[SAVEEXEC_HI]], [[SAVEEXEC_HI_LANE:[0-9]+]]
; VMEM: v_mov_b32_e32 v[[V_SAVEEXEC_LO:[0-9]+]], s[[SAVEEXEC_LO]]
-; VMEM: buffer_store_dword v[[V_SAVEEXEC_LO]], off, s[0:3], s7 ; 8-byte Folded Spill
+; VMEM: buffer_store_dword v[[V_SAVEEXEC_LO]], off, s[0:3], s7 offset:12 ; 8-byte Folded Spill
; VMEM: v_mov_b32_e32 v[[V_SAVEEXEC_HI:[0-9]+]], s[[SAVEEXEC_HI]]
-; VMEM: buffer_store_dword v[[V_SAVEEXEC_HI]], off, s[0:3], s7 offset:4 ; 8-byte Folded Spill
-
-; Spill load
-; GCN: buffer_store_dword [[LOAD0]], off, s[0:3], s7 offset:[[VAL_OFFSET:[0-9]+]] ; 4-byte Folded Spill
+; VMEM: buffer_store_dword v[[V_SAVEEXEC_HI]], off, s[0:3], s7 offset:16 ; 8-byte Folded Spill
; GCN: s_mov_b64 exec, s{{\[}}[[ANDEXEC_LO]]:[[ANDEXEC_HI]]{{\]}}
; GCN: [[LOOP:BB[0-9]+_[0-9]+]]:
-; GCN: buffer_load_dword v[[VAL_LOOP_RELOAD:[0-9]+]], off, s[0:3], s7 offset:[[VAL_OFFSET]] ; 4-byte Folded Reload
+; GCN: buffer_load_dword v[[VAL_LOOP_RELOAD:[0-9]+]], off, s[0:3], s7 ; 4-byte Folded Reload
; GCN: v_subrev_i32_e32 [[VAL_LOOP:v[0-9]+]], vcc, v{{[0-9]+}}, v[[VAL_LOOP_RELOAD]]
; GCN: v_cmp_ne_u32_e32 vcc,
; GCN: s_and_b64 vcc, exec, vcc
; VGPR: v_readlane_b32 s[[S_RELOAD_SAVEEXEC_LO:[0-9]+]], [[SPILL_VGPR]], [[SAVEEXEC_LO_LANE]]
; VGPR: v_readlane_b32 s[[S_RELOAD_SAVEEXEC_HI:[0-9]+]], [[SPILL_VGPR]], [[SAVEEXEC_HI_LANE]]
-; VMEM: buffer_load_dword v[[V_RELOAD_SAVEEXEC_LO:[0-9]+]], off, s[0:3], s7 ; 8-byte Folded Reload
+; VMEM: buffer_load_dword v[[V_RELOAD_SAVEEXEC_LO:[0-9]+]], off, s[0:3], s7 offset:12 ; 8-byte Folded Reload
; VMEM: s_waitcnt vmcnt(0)
; VMEM: v_readfirstlane_b32 s[[S_RELOAD_SAVEEXEC_LO:[0-9]+]], v[[V_RELOAD_SAVEEXEC_LO]]
-; VMEM: buffer_load_dword v[[V_RELOAD_SAVEEXEC_HI:[0-9]+]], off, s[0:3], s7 offset:4 ; 8-byte Folded Reload
+; VMEM: buffer_load_dword v[[V_RELOAD_SAVEEXEC_HI:[0-9]+]], off, s[0:3], s7 offset:16 ; 8-byte Folded Reload
; VMEM: s_waitcnt vmcnt(0)
; VMEM: v_readfirstlane_b32 s[[S_RELOAD_SAVEEXEC_HI:[0-9]+]], v[[V_RELOAD_SAVEEXEC_HI]]
; GCN: s_mov_b32 m0, -1
; GCN-NOT: s_mov_b32 s{{[0-9]+}}, m0
; GCN: ; use m0
-define void @inline_sreg_constraint_m0(i32 addrspace(1)* %ptr) {
+define void @inline_sreg_constraint_m0() {
%m0 = tail call i32 asm sideeffect "s_mov_b32 m0, -1", "={M0}"()
tail call void asm sideeffect "; use $0", "s"(i32 %m0)
ret void
}
+
+; GCN-LABEL: {{^}}inline_sreg_constraint_imm_i32:
+; GCN: s_mov_b32 [[REG:s[0-9]+]], 32
+; GCN: ; use [[REG]]
+define void @inline_sreg_constraint_imm_i32() {
+ tail call void asm sideeffect "; use $0", "s"(i32 32)
+ ret void
+}
+
+; GCN-LABEL: {{^}}inline_sreg_constraint_imm_f32:
+; GCN: s_mov_b32 [[REG:s[0-9]+]], 1.0
+; GCN: ; use [[REG]]
+define void @inline_sreg_constraint_imm_f32() {
+ tail call void asm sideeffect "; use $0", "s"(float 1.0)
+ ret void
+}
+
+; FIXME: Should be able to use s_mov_b64
+; GCN-LABEL: {{^}}inline_sreg_constraint_imm_i64:
+; GCN-DAG: s_mov_b32 s[[REG_LO:[0-9]+]], -4{{$}}
+; GCN-DAG: s_mov_b32 s[[REG_HI:[0-9]+]], -1{{$}}
+; GCN: ; use s{{\[}}[[REG_LO]]:[[REG_HI]]{{\]}}
+define void @inline_sreg_constraint_imm_i64() {
+ tail call void asm sideeffect "; use $0", "s"(i64 -4)
+ ret void
+}
+
+; GCN-LABEL: {{^}}inline_sreg_constraint_imm_f64:
+; GCN-DAG: s_mov_b32 s[[REG_LO:[0-9]+]], 0{{$}}
+; GCN-DAG: s_mov_b32 s[[REG_HI:[0-9]+]], 0x3ff00000{{$}}
+; GCN: ; use s{{\[}}[[REG_LO]]:[[REG_HI]]{{\]}}
+define void @inline_sreg_constraint_imm_f64() {
+ tail call void asm sideeffect "; use $0", "s"(double 1.0)
+ ret void
+}
; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, s{{[0-9]+}}
; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, s{{[0-9]+}}
; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, s{{[0-9]+}}
-; GCN-DAG: v_mov_b32_e32 [[CONSTREG:v[0-9]+]], 0x40a00000
+; GCN-DAG: s_mov_b32 [[CONSTREG:s[0-9]+]], 0x40a00000
; GCN-DAG: v_mov_b32_e32 v[[LOW_REG:[0-9]+]], [[CONSTREG]]
; GCN: buffer_store_dwordx4 v{{\[}}[[LOW_REG]]:
define void @insertelement_v4f32_0(<4 x float> addrspace(1)* %out, <4 x float> %a) nounwind {