setTargetDAGCombine(ISD::FADD);
setTargetDAGCombine(ISD::FSUB);
setTargetDAGCombine(ISD::FNEG);
+ setTargetDAGCombine(ISD::FABS);
}
//===----------------------------------------------------------------------===//
SDValue Neg = DAG.getNode(ISD::FNEG, SL, CvtSrc.getValueType(), CvtSrc);
return DAG.getNode(ISD::FP_ROUND, SL, VT, Neg, N0.getOperand(1));
}
+ case ISD::FP16_TO_FP: {
+ // v_cvt_f32_f16 supports source modifiers on pre-VI targets without legal
+ // f16, but legalization of f16 fneg ends up pulling it out of the source.
+ // Put the fneg back as a legal source operation that can be matched later.
+ SDLoc SL(N);
+
+ SDValue Src = N0.getOperand(0);
+ EVT SrcVT = Src.getValueType();
+
+ // fneg (fp16_to_fp x) -> fp16_to_fp (xor x, 0x8000)
+ SDValue IntFNeg = DAG.getNode(ISD::XOR, SL, SrcVT, Src,
+ DAG.getConstant(0x8000, SL, SrcVT));
+ return DAG.getNode(ISD::FP16_TO_FP, SL, N->getValueType(0), IntFNeg);
+ }
+ default:
+ return SDValue();
+ }
+}
+
+SDValue AMDGPUTargetLowering::performFAbsCombine(SDNode *N,
+ DAGCombinerInfo &DCI) const {
+ SelectionDAG &DAG = DCI.DAG;
+ SDValue N0 = N->getOperand(0);
+
+ if (!N0.hasOneUse())
+ return SDValue();
+
+ switch (N0.getOpcode()) {
+ case ISD::FP16_TO_FP: {
+ assert(!Subtarget->has16BitInsts() && "should only see if f16 is illegal");
+ SDLoc SL(N);
+ SDValue Src = N0.getOperand(0);
+ EVT SrcVT = Src.getValueType();
+
+ // fabs (fp16_to_fp x) -> fp16_to_fp (and x, 0x7fff)
+ SDValue IntFAbs = DAG.getNode(ISD::AND, SL, SrcVT, Src,
+ DAG.getConstant(0x7fff, SL, SrcVT));
+ return DAG.getNode(ISD::FP16_TO_FP, SL, N->getValueType(0), IntFAbs);
+ }
default:
return SDValue();
}
return performSelectCombine(N, DCI);
case ISD::FNEG:
return performFNegCombine(N, DCI);
+ case ISD::FABS:
+ return performFAbsCombine(N, DCI);
case AMDGPUISD::BFE_I32:
case AMDGPUISD::BFE_U32: {
assert(!N->getValueType(0).isVector() &&
SDValue RHS, DAGCombinerInfo &DCI) const;
SDValue performSelectCombine(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue performFNegCombine(SDNode *N, DAGCombinerInfo &DCI) const;
+ SDValue performFAbsCombine(SDNode *N, DAGCombinerInfo &DCI) const;
static EVT getEquivalentMemType(LLVMContext &Context, EVT VT);
def umin_oneuse : HasOneUseBinOp<umin>;
def fminnum_oneuse : HasOneUseBinOp<fminnum>;
def fmaxnum_oneuse : HasOneUseBinOp<fmaxnum>;
+def and_oneuse : HasOneUseBinOp<and>;
+def or_oneuse : HasOneUseBinOp<or>;
+def xor_oneuse : HasOneUseBinOp<xor>;
} // Properties = [SDNPCommutative, SDNPAssociative]
def sub_oneuse : HasOneUseBinOp<sub>;
case 8:
return getLit64Encoding(static_cast<uint64_t>(Imm), STI);
case 2:
+ // FIXME Is this correct? What do inline immediates do on SI for f16 src
+ // which does not have f16 support?
return getLit16Encoding(static_cast<uint16_t>(Imm), STI);
default:
llvm_unreachable("invalid operand size");
return AMDGPU::isInlinableLiteral64(Imm.getSExtValue(),
ST.hasInv2PiInlineImm());
case 16:
- return AMDGPU::isInlinableLiteral16(Imm.getSExtValue(),
+ return ST.has16BitInsts() &&
+ AMDGPU::isInlinableLiteral16(Imm.getSExtValue(),
ST.hasInv2PiInlineImm());
default:
llvm_unreachable("invalid bitwidth");
}
case 16: {
if (isInt<16>(Imm) || isUInt<16>(Imm)) {
+ // A few special case instructions have 16-bit operands on subtargets
+ // where 16-bit instructions are not legal.
+ // TODO: Do the 32-bit immediates work? We shouldn't really need to handle
+ // constants in these cases
int16_t Trunc = static_cast<int16_t>(Imm);
- return AMDGPU::isInlinableLiteral16(Trunc, ST.hasInv2PiInlineImm());
+ return ST.has16BitInsts() &&
+ AMDGPU::isInlinableLiteral16(Trunc, ST.hasInv2PiInlineImm());
}
return false;
def SRCMODS {
int NONE = 0;
int NEG = 1;
+ int ABS = 2;
+ int NEG_ABS = 3;
}
def DSTCLAMP {
def VOP_I32_F32 : VOPProfile <[i32, f32, untyped, untyped]>;
def VOP_I32_F64 : VOPProfile <[i32, f64, untyped, untyped]>;
def VOP_I32_I32 : VOPProfile <[i32, i32, untyped, untyped]>;
+def VOP_F16_F32 : VOPProfile <[f16, f32, untyped, untyped]>;
+def VOP_F32_F16 : VOPProfile <[f32, f16, untyped, untyped]>;
def VOP_F32_F32_F16 : VOPProfile <[f32, f32, f16, untyped]>;
def VOP_F32_F32_F32 : VOPProfile <[f32, f32, f32, untyped]>;
} // End Predicates = [UnsafeFPMath]
+
+// f16_to_fp patterns
+def : Pat <
+ (f32 (f16_to_fp i32:$src0)),
+ (V_CVT_F32_F16_e64 SRCMODS.NONE, $src0, DSTCLAMP.NONE, DSTOMOD.NONE)
+>;
+
+def : Pat <
+ (f32 (f16_to_fp (and_oneuse i32:$src0, 0x7fff))),
+ (V_CVT_F32_F16_e64 SRCMODS.ABS, $src0, DSTCLAMP.NONE, DSTOMOD.NONE)
+>;
+
+def : Pat <
+ (f32 (f16_to_fp (or_oneuse i32:$src0, 0x8000))),
+ (V_CVT_F32_F16_e64 SRCMODS.NEG_ABS, $src0, DSTCLAMP.NONE, DSTOMOD.NONE)
+>;
+
def : Pat <
- (f32 (fpextend f16:$src)),
- (V_CVT_F32_F16_e32 $src)
+ (f32 (f16_to_fp (xor_oneuse i32:$src0, 0x8000))),
+ (V_CVT_F32_F16_e64 SRCMODS.NEG, $src0, DSTCLAMP.NONE, DSTOMOD.NONE)
>;
def : Pat <
(V_CVT_F64_F32_e32 (V_CVT_F32_F16_e32 $src))
>;
+// fp_to_fp16 patterns
def : Pat <
- (f16 (fpround f32:$src)),
- (V_CVT_F16_F32_e32 $src)
+ (i32 (fp_to_f16 (f32 (VOP3Mods0 f32:$src0, i32:$src0_modifiers, i1:$clamp, i32:$omod)))),
+ (V_CVT_F16_F32_e64 $src0_modifiers, f32:$src0, $clamp, $omod)
>;
def : Pat <
defm V_CVT_F32_U32 : VOP1Inst <"v_cvt_f32_u32", VOP_F32_I32, uint_to_fp>;
defm V_CVT_U32_F32 : VOP1Inst <"v_cvt_u32_f32", VOP_I32_F32, fp_to_uint>;
defm V_CVT_I32_F32 : VOP1Inst <"v_cvt_i32_f32", VOP_I32_F32, fp_to_sint>;
-defm V_CVT_F16_F32 : VOP1Inst <"v_cvt_f16_f32", VOP_I32_F32, fp_to_f16>;
-defm V_CVT_F32_F16 : VOP1Inst <"v_cvt_f32_f16", VOP_F32_I32, f16_to_fp>;
+defm V_CVT_F16_F32 : VOP1Inst <"v_cvt_f16_f32", VOP_F16_F32, fpround>;
+defm V_CVT_F32_F16 : VOP1Inst <"v_cvt_f32_f16", VOP_F32_F16, fpextend>;
defm V_CVT_RPI_I32_F32 : VOP1Inst <"v_cvt_rpi_i32_f32", VOP_I32_F32, cvt_rpi_i32_f32>;
defm V_CVT_FLR_I32_F32 : VOP1Inst <"v_cvt_flr_i32_f32", VOP_I32_F32, cvt_flr_i32_f32>;
defm V_CVT_OFF_F32_I4 : VOP1Inst <"v_cvt_off_f32_i4", VOP_F32_I32>;
; GCN-LABEL: {{^}}fabs_f16:
; CI: flat_load_ushort [[VAL:v[0-9]+]],
-; CI: v_cvt_f32_f16_e32 [[CVT0:v[0-9]+]], [[VAL]]
-; CI: v_cvt_f16_f32_e64 [[RESULT:v[0-9]+]], |[[CVT0]]|
+; CI: v_and_b32_e32 [[CVT0:v[0-9]+]], 0x7fff, [[VAL]]
; CI: flat_store_short v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
define void @fabs_f16(half addrspace(1)* %out, half %in) {
%fabs = call half @llvm.fabs.f16(half %in)
; FIXME: Should be able to use single and
; GCN-LABEL: {{^}}fabs_v2f16:
-; CI: v_cvt_f32_f16_e32 v{{[0-9]+}},
-; CI: v_cvt_f32_f16_e32 v{{[0-9]+}},
-; CI: v_cvt_f16_f32_e64 v{{[0-9]+}}, |v{{[0-9]+}}|
-; CI: v_cvt_f16_f32_e64 v{{[0-9]+}}, |v{{[0-9]+}}|
+
+; CI: s_movk_i32 [[MASK:s[0-9]+]], 0x7fff
+; CI: v_and_b32_e32 v{{[0-9]+}}, [[MASK]], v{{[0-9]+}}
+; CI: v_and_b32_e32 v{{[0-9]+}}, [[MASK]], v{{[0-9]+}}
; VI: flat_load_ushort [[LO:v[0-9]+]]
; VI: flat_load_ushort [[HI:v[0-9]+]]
}
; GCN-LABEL: {{^}}fabs_v4f16:
-; CI: v_cvt_f16_f32_e64 v{{[0-9]+}}, |v{{[0-9]+}}|
-; CI: v_cvt_f16_f32_e64 v{{[0-9]+}}, |v{{[0-9]+}}|
-; CI: v_cvt_f16_f32_e64 v{{[0-9]+}}, |v{{[0-9]+}}|
-; CI: v_cvt_f16_f32_e64 v{{[0-9]+}}, |v{{[0-9]+}}|
+; CI: s_movk_i32 [[MASK:s[0-9]+]], 0x7fff
+; CI: v_and_b32_e32 v{{[0-9]+}}, [[MASK]], v{{[0-9]+}}
+; CI: v_and_b32_e32 v{{[0-9]+}}, [[MASK]], v{{[0-9]+}}
+; CI: v_and_b32_e32 v{{[0-9]+}}, [[MASK]], v{{[0-9]+}}
+; CI: v_and_b32_e32 v{{[0-9]+}}, [[MASK]], v{{[0-9]+}}
; VI: v_mov_b32_e32 [[MASK:v[0-9]+]], 0x7fff{{$}}
; VI: v_and_b32_e32 v{{[0-9]+}}, [[MASK]], v{{[0-9]+}}
; GCN-LABEL: {{^}}fabs_fold_f16:
; GCN: flat_load_ushort [[IN0:v[0-9]+]]
; GCN: flat_load_ushort [[IN1:v[0-9]+]]
+
; CI-DAG: v_cvt_f32_f16_e32 [[CVT0:v[0-9]+]], [[IN0]]
-; CI-DAG: v_cvt_f32_f16_e32 [[CVT1:v[0-9]+]], [[IN1]]
-; CI: v_mul_f32_e64 [[RESULT:v[0-9]+]], |[[CVT1]]|, [[CVT0]]
+; CI-DAG: v_cvt_f32_f16_e64 [[ABS_CVT1:v[0-9]+]], |[[IN1]]|
+; CI: v_mul_f32_e32 [[RESULT:v[0-9]+]], [[CVT0]], [[ABS_CVT1]]
; CI: v_cvt_f16_f32_e32 [[CVTRESULT:v[0-9]+]], [[RESULT]]
; CI: flat_store_short v{{\[[0-9]+:[0-9]+\]}}, [[CVTRESULT]]
; GCN: buffer_load_ushort v[[A_F16:[0-9]+]]
; GCN: buffer_load_ushort v[[B_F16:[0-9]+]]
-; SI: v_cvt_f32_f16_e32 v[[A_F32:[0-9]+]], v[[A_F16]]
-; SI: v_cvt_f32_f16_e32 v[[B_F32:[0-9]+]], v[[B_F16]]
+; SI: v_cvt_f32_f16_e64 v[[A_F32:[0-9]+]], |v[[A_F16]]|
+; SI: v_cvt_f32_f16_e64 v[[B_F32:[0-9]+]], |v[[B_F16]]|
-; SI: v_cmp_lt_f32_e64 s{{\[[0-9]+:[0-9]+\]}}, |v[[A_F32]]|, |v[[B_F32]]|
+; SI: v_cmp_lt_f32_e32 vcc, v[[A_F32]], v[[B_F32]]
; VI: v_cmp_lt_f16_e64 s{{\[[0-9]+:[0-9]+\]}}, |v[[A_F16]]|, |v[[B_F16]]|
; GCN: v_cndmask_b32_e64 v[[R_I32:[0-9]+]]
; GCN-LABEL: {{^}}fneg_fabs_fadd_f16:
; CI: v_cvt_f32_f16_e32
-; CI: v_cvt_f32_f16_e32
-; CI: v_sub_f32_e64 v{{[0-9]+}}, v{{[0-9]+}}, |v{{[0-9]+}}|
+; CI: v_cvt_f32_f16_e64 [[CVT_ABS_X:v[0-9]+]], |v{{[0-9]+}}|
+; CI: v_subrev_f32_e32 v{{[0-9]+}}, [[CVT_ABS_X]], v{{[0-9]+}}
; VI-NOT: and
; VI: v_sub_f16_e64 {{v[0-9]+}}, {{v[0-9]+}}, |{{v[0-9]+}}|
}
; GCN-LABEL: {{^}}fneg_fabs_fmul_f16:
-; CI: v_cvt_f32_f16_e32
-; CI: v_cvt_f32_f16_e32
-; CI: v_mul_f32_e64 {{v[0-9]+}}, {{v[0-9]+}}, -|{{v[0-9]+}}|
+; CI-DAG: v_cvt_f32_f16_e32
+; CI-DAG: v_cvt_f32_f16_e64 [[CVT_NEG_ABS_X:v[0-9]+]], -|{{v[0-9]+}}|
+; CI: v_mul_f32_e32 {{v[0-9]+}}, [[CVT_NEG_ABS_X]], {{v[0-9]+}}
; CI: v_cvt_f16_f32_e32
; VI-NOT: and
-; VI: v_mul_f16_e64 {{v[0-9]+}}, {{v[0-9]+}}, -|{{v[0-9]+}}|
-; VI-NOT: and
+; VI: v_mul_f16_e64 [[MUL:v[0-9]+]], {{v[0-9]+}}, -|{{v[0-9]+}}|
+; VI-NOT: [[MUL]]
+; VI: flat_store_short v{{\[[0-9]+:[0-9]+\]}}, [[MUL]]
define void @fneg_fabs_fmul_f16(half addrspace(1)* %out, half %x, half %y) {
%fabs = call half @llvm.fabs.f16(half %x)
%fsub = fsub half -0.000000e+00, %fabs
; FIXME: Should use or
; GCN-LABEL: {{^}}fneg_fabs_f16:
-; CI: v_cvt_f32_f16_e32 v{{[0-9]+}},
-; CI: v_cvt_f16_f32_e64 v{{[0-9]+}}, -|v{{[0-9]+}}|
-
-; VI: v_or_b32_e32 v{{[0-9]+}}, 0x8000, v{{[0-9]+}}
+; GCN: v_or_b32_e32 v{{[0-9]+}}, 0x8000, v{{[0-9]+}}
define void @fneg_fabs_f16(half addrspace(1)* %out, half %in) {
%fabs = call half @llvm.fabs.f16(half %in)
%fsub = fsub half -0.000000e+00, %fabs
}
; GCN-LABEL: {{^}}v_fneg_fabs_f16:
-; CI: v_cvt_f32_f16_e32 v{{[0-9]+}},
-; CI: v_cvt_f16_f32_e64 v{{[0-9]+}}, -|v{{[0-9]+}}|
-
-; VI: v_or_b32_e32 v{{[0-9]+}}, 0x8000, v{{[0-9]+}}
+; GCN: v_or_b32_e32 v{{[0-9]+}}, 0x8000, v{{[0-9]+}}
define void @v_fneg_fabs_f16(half addrspace(1)* %out, half addrspace(1)* %in) {
%val = load half, half addrspace(1)* %in, align 2
%fabs = call half @llvm.fabs.f16(half %val)
; FIXME: single bit op
; GCN-LABEL: {{^}}fneg_fabs_v2f16:
-; CI: v_cvt_f16_f32_e64 v{{[0-9]+}}, -|v{{[0-9]+}}|
-; CI: v_cvt_f16_f32_e64 v{{[0-9]+}}, -|v{{[0-9]+}}|
-
-; VI: s_mov_b32 [[MASK:s[0-9]+]], 0x8000{{$}}
-; VI: v_or_b32_e32 v{{[0-9]+}}, [[MASK]],
-; VI: v_or_b32_e32 v{{[0-9]+}}, [[MASK]],
-; VI: flat_store_dword
+; GCN: s_mov_b32 [[MASK:s[0-9]+]], 0x8000{{$}}
+; GCN: v_or_b32_e32 v{{[0-9]+}}, [[MASK]],
+; GCN: v_or_b32_e32 v{{[0-9]+}}, [[MASK]],
+; GCN: store_dword
define void @fneg_fabs_v2f16(<2 x half> addrspace(1)* %out, <2 x half> %in) {
%fabs = call <2 x half> @llvm.fabs.v2f16(<2 x half> %in)
%fsub = fsub <2 x half> <half -0.000000e+00, half -0.000000e+00>, %fabs
}
; GCN-LABEL: {{^}}fneg_fabs_v4f16:
-; CI: v_cvt_f16_f32_e64 v{{[0-9]+}}, -|v{{[0-9]+}}|
-; CI: v_cvt_f16_f32_e64 v{{[0-9]+}}, -|v{{[0-9]+}}|
-; CI: v_cvt_f16_f32_e64 v{{[0-9]+}}, -|v{{[0-9]+}}|
-; CI: v_cvt_f16_f32_e64 v{{[0-9]+}}, -|v{{[0-9]+}}|
-
-; VI: s_mov_b32 [[MASK:s[0-9]+]], 0x8000{{$}}
-; VI: v_or_b32_e32 v{{[0-9]+}}, [[MASK]],
-; VI: v_or_b32_e32 v{{[0-9]+}}, [[MASK]],
-; VI: v_or_b32_e32 v{{[0-9]+}}, [[MASK]],
-; VI: v_or_b32_e32 v{{[0-9]+}}, [[MASK]],
-; VI: flat_store_dwordx2
+; GCN: s_mov_b32 [[MASK:s[0-9]+]], 0x8000{{$}}
+; GCN: v_or_b32_e32 v{{[0-9]+}}, [[MASK]],
+; GCN: v_or_b32_e32 v{{[0-9]+}}, [[MASK]],
+; GCN: v_or_b32_e32 v{{[0-9]+}}, [[MASK]],
+; GCN: v_or_b32_e32 v{{[0-9]+}}, [[MASK]],
+; GCN: store_dwordx2
define void @fneg_fabs_v4f16(<4 x half> addrspace(1)* %out, <4 x half> %in) {
%fabs = call <4 x half> @llvm.fabs.v4f16(<4 x half> %in)
%fsub = fsub <4 x half> <half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00>, %fabs
; FUNC-LABEL: {{^}}v_fneg_f16:
; GCN: flat_load_ushort [[VAL:v[0-9]+]],
-
-; CI: v_cvt_f32_f16_e32 [[CVT0:v[0-9]+]], [[VAL]]
-; CI: v_cvt_f16_f32_e64 [[CVT1:v[0-9]+]], -[[CVT0]]
-; CI: flat_store_short v{{\[[0-9]+:[0-9]+\]}}, [[CVT1]]
-
-; VI: v_xor_b32_e32 [[XOR:v[0-9]+]], 0x8000, [[VAL]]
+; GCN: v_xor_b32_e32 [[XOR:v[0-9]+]], 0x8000, [[VAL]]
; VI: flat_store_short v{{\[[0-9]+:[0-9]+\]}}, [[XOR]]
+; SI: buffer_store_short [[XOR]]
define void @v_fneg_f16(half addrspace(1)* %out, half addrspace(1)* %in) {
%val = load half, half addrspace(1)* %in, align 2
%fneg = fsub half -0.000000e+00, %val
; FUNC-LABEL: {{^}}v_fneg_fold_f16:
; GCN: flat_load_ushort [[NEG_VALUE:v[0-9]+]]
-; CI: v_cvt_f32_f16_e32 [[CVT0:v[0-9]+]], [[CVT0]]
-; CI: v_mul_f32_e64 [[MUL:v[0-9]+]], -[[CVT0]], [[CVT0]]
+; CI-DAG: v_cvt_f32_f16_e32 [[CVT_VAL:v[0-9]+]], [[NEG_VALUE]]
+; CI-DAG: v_cvt_f32_f16_e64 [[NEG_CVT0:v[0-9]+]], -[[NEG_VALUE]]
+; CI: v_mul_f32_e32 [[MUL:v[0-9]+]], [[CVT_VAL]], [[NEG_CVT0]]
; CI: v_cvt_f16_f32_e32 [[CVT1:v[0-9]+]], [[MUL]]
; CI: flat_store_short v{{\[[0-9]+:[0-9]+\]}}, [[CVT1]]
store <2 x double> %r.val, <2 x double> addrspace(1)* %r
ret void
}
+
+; GCN-LABEL: {{^}}s_fneg_fpext_f16_to_f32:
+; GCN: v_cvt_f32_f16_e32 v{{[0-9]+}}, s{{[0-9]+}}
+define void @s_fneg_fpext_f16_to_f32(float addrspace(1)* %r, i32 %a) {
+entry:
+ %a.trunc = trunc i32 %a to i16
+ %a.val = bitcast i16 %a.trunc to half
+ %r.val = fpext half %a.val to float
+ store float %r.val, float addrspace(1)* %r
+ ret void
+}
+
+; GCN-LABEL: {{^}}fneg_fpext_f16_to_f32:
+; GCN: {{buffer|flat}}_load_ushort [[A:v[0-9]+]]
+; GCN: v_cvt_f32_f16_e64 v{{[0-9]+}}, -[[A]]
+define void @fneg_fpext_f16_to_f32(
+ float addrspace(1)* %r,
+ half addrspace(1)* %a) {
+entry:
+ %a.val = load half, half addrspace(1)* %a
+ %a.neg = fsub half -0.0, %a.val
+ %r.val = fpext half %a.neg to float
+ store float %r.val, float addrspace(1)* %r
+ ret void
+}
+
+; GCN-LABEL: {{^}}fabs_fpext_f16_to_f32:
+; GCN: {{buffer|flat}}_load_ushort [[A:v[0-9]+]]
+; GCN: v_cvt_f32_f16_e64 v{{[0-9]+}}, |[[A]]|
+define void @fabs_fpext_f16_to_f32(
+ float addrspace(1)* %r,
+ half addrspace(1)* %a) {
+entry:
+ %a.val = load half, half addrspace(1)* %a
+ %a.fabs = call half @llvm.fabs.f16(half %a.val)
+ %r.val = fpext half %a.fabs to float
+ store float %r.val, float addrspace(1)* %r
+ ret void
+}
+
+; GCN-LABEL: {{^}}fneg_fabs_fpext_f16_to_f32:
+; GCN: {{buffer|flat}}_load_ushort [[A:v[0-9]+]]
+; GCN: v_cvt_f32_f16_e64 v{{[0-9]+}}, -|[[A]]|
+define void @fneg_fabs_fpext_f16_to_f32(
+ float addrspace(1)* %r,
+ half addrspace(1)* %a) {
+entry:
+ %a.val = load half, half addrspace(1)* %a
+ %a.fabs = call half @llvm.fabs.f16(half %a.val)
+ %a.fneg.fabs = fsub half -0.0, %a.fabs
+ %r.val = fpext half %a.fneg.fabs to float
+ store float %r.val, float addrspace(1)* %r
+ ret void
+}
+
+; GCN-LABEL: {{^}}fneg_multi_use_fpext_f16_to_f32:
+; GCN: {{buffer|flat}}_load_ushort [[A:v[0-9]+]]
+; GCN-DAG: v_xor_b32_e32 [[XOR:v[0-9]+]], 0x8000, [[A]]
+
+; FIXME: Using the source modifier here only wastes code size
+; SI-DAG: v_cvt_f32_f16_e32 [[CVT:v[0-9]+]], [[A]]
+; VI-DAG: v_cvt_f32_f16_e64 [[CVT:v[0-9]+]], -[[A]]
+
+; GCN: store_dword [[CVT]]
+; GCN: store_short [[XOR]]
+define void @fneg_multi_use_fpext_f16_to_f32(
+ float addrspace(1)* %r,
+ half addrspace(1)* %a) {
+entry:
+ %a.val = load half, half addrspace(1)* %a
+ %a.neg = fsub half -0.0, %a.val
+ %r.val = fpext half %a.neg to float
+ store volatile float %r.val, float addrspace(1)* %r
+ store volatile half %a.neg, half addrspace(1)* undef
+ ret void
+}
+
+; GCN-LABEL: {{^}}fneg_multi_foldable_use_fpext_f16_to_f32:
+; GCN: {{buffer|flat}}_load_ushort [[A:v[0-9]+]]
+; GCN-DAG: v_cvt_f32_f16_e64 [[CVTA_NEG:v[0-9]+]], -[[A]]
+; SI-DAG: v_cvt_f32_f16_e32 [[CVTA:v[0-9]+]], [[A]]
+; SI: v_mul_f32_e32 [[MUL_F32:v[0-9]+]], [[CVTA]], [[CVTA_NEG]]
+; SI: v_cvt_f16_f32_e32 [[MUL:v[0-9]+]], [[MUL_F32]]
+
+; VI-DAG: v_cvt_f32_f16_e64 [[CVT_NEGA:v[0-9]+]], -[[A]]
+; VI: v_mul_f16_e64 [[MUL:v[0-9]+]], -[[A]], [[A]]
+
+; GCN: buffer_store_dword [[CVTA_NEG]]
+; GCN: buffer_store_short [[MUL]]
+define void @fneg_multi_foldable_use_fpext_f16_to_f32(
+ float addrspace(1)* %r,
+ half addrspace(1)* %a) {
+entry:
+ %a.val = load half, half addrspace(1)* %a
+ %a.neg = fsub half -0.0, %a.val
+ %r.val = fpext half %a.neg to float
+ %mul = fmul half %a.neg, %a.val
+ store volatile float %r.val, float addrspace(1)* %r
+ store volatile half %mul, half addrspace(1)* undef
+ ret void
+}
+
+; GCN-LABEL: {{^}}fabs_multi_use_fpext_f16_to_f32:
+; GCN: {{buffer|flat}}_load_ushort [[A:v[0-9]+]]
+; GCN-DAG: v_and_b32_e32 [[XOR:v[0-9]+]], 0x7fff, [[A]]
+
+; SI-DAG: v_cvt_f32_f16_e32 [[CVT:v[0-9]+]], [[A]]
+; VI-DAG: v_cvt_f32_f16_e64 [[CVT:v[0-9]+]], |[[A]]|
+
+; GCN: store_dword [[CVT]]
+; GCN: store_short [[XOR]]
+define void @fabs_multi_use_fpext_f16_to_f32(
+ float addrspace(1)* %r,
+ half addrspace(1)* %a) {
+entry:
+ %a.val = load half, half addrspace(1)* %a
+ %a.fabs = call half @llvm.fabs.f16(half %a.val)
+ %r.val = fpext half %a.fabs to float
+ store volatile float %r.val, float addrspace(1)* %r
+ store volatile half %a.fabs, half addrspace(1)* undef
+ ret void
+}
+
+; GCN-LABEL: {{^}}fabs_multi_foldable_use_fpext_f16_to_f32:
+; GCN: {{buffer|flat}}_load_ushort [[A:v[0-9]+]]
+; SI: v_cvt_f32_f16_e32 [[CVTA:v[0-9]+]], [[A]]
+; SI: v_mul_f32_e64 [[MUL_F32:v[0-9]+]], |[[CVTA]]|, [[CVTA]]
+; SI: v_cvt_f16_f32_e32 [[MUL:v[0-9]+]], [[MUL_F32]]
+; SI: v_and_b32_e32 [[ABS_A:v[0-9]+]], 0x7fffffff, [[CVTA]]
+
+; VI-DAG: v_cvt_f32_f16_e64 [[ABS_A:v[0-9]+]], |[[A]]|
+; VI: v_mul_f16_e64 [[MUL:v[0-9]+]], |[[A]]|, [[A]]
+
+; GCN: buffer_store_dword [[ABS_A]]
+; GCN: buffer_store_short [[MUL]]
+define void @fabs_multi_foldable_use_fpext_f16_to_f32(
+ float addrspace(1)* %r,
+ half addrspace(1)* %a) {
+entry:
+ %a.val = load half, half addrspace(1)* %a
+ %a.fabs = call half @llvm.fabs.f16(half %a.val)
+ %r.val = fpext half %a.fabs to float
+ %mul = fmul half %a.fabs, %a.val
+ store volatile float %r.val, float addrspace(1)* %r
+ store volatile half %mul, half addrspace(1)* undef
+ ret void
+}
+
+; GCN-LABEL: {{^}}fabs_fneg_multi_use_fpext_f16_to_f32:
+; GCN: {{buffer|flat}}_load_ushort [[A:v[0-9]+]]
+; GCN-DAG: v_or_b32_e32 [[OR:v[0-9]+]], 0x8000, [[A]]
+
+; SI: v_cvt_f32_f16_e32 [[CVT:v[0-9]+]], [[OR]]
+; VI-DAG: v_cvt_f32_f16_e64 [[CVT:v[0-9]+]], -|[[OR]]|
+
+; GCN: buffer_store_dword [[CVT]]
+; GCN: buffer_store_short [[OR]]
+define void @fabs_fneg_multi_use_fpext_f16_to_f32(
+ float addrspace(1)* %r,
+ half addrspace(1)* %a) {
+entry:
+ %a.val = load half, half addrspace(1)* %a
+ %a.fabs = call half @llvm.fabs.f16(half %a.val)
+ %a.fneg.fabs = fsub half -0.0, %a.fabs
+ %r.val = fpext half %a.fneg.fabs to float
+ store volatile float %r.val, float addrspace(1)* %r
+ store volatile half %a.fneg.fabs, half addrspace(1)* undef
+ ret void
+}
+
+; GCN-LABEL: {{^}}fabs_fneg_multi_foldable_use_fpext_f16_to_f32:
+; GCN: {{buffer|flat}}_load_ushort [[A:v[0-9]+]]
+; SI: v_cvt_f32_f16_e32 [[CVTA:v[0-9]+]], [[A]]
+; SI: v_mul_f32_e64 [[MUL_F32:v[0-9]+]], -|[[CVTA]]|, [[CVTA]]
+; SI: v_cvt_f16_f32_e32 [[MUL:v[0-9]+]], [[MUL_F32]]
+; SI: v_or_b32_e32 [[FABS_FNEG:v[0-9]+]], 0x80000000, [[CVTA]]
+
+; VI-DAG: v_cvt_f32_f16_e64 [[FABS_FNEG:v[0-9]+]], -|[[A]]|
+; VI-DAG: v_mul_f16_e64 [[MUL:v[0-9]+]], -|[[A]]|, [[A]]
+
+; GCN: buffer_store_dword [[FABS_FNEG]]
+; GCN: buffer_store_short [[MUL]]
+define void @fabs_fneg_multi_foldable_use_fpext_f16_to_f32(
+ float addrspace(1)* %r,
+ half addrspace(1)* %a) {
+entry:
+ %a.val = load half, half addrspace(1)* %a
+ %a.fabs = call half @llvm.fabs.f16(half %a.val)
+ %a.fneg.fabs = fsub half -0.0, %a.fabs
+ %r.val = fpext half %a.fneg.fabs to float
+ %mul = fmul half %a.fneg.fabs, %a.val
+ store volatile float %r.val, float addrspace(1)* %r
+ store volatile half %mul, half addrspace(1)* undef
+ ret void
+}
+
+declare half @llvm.fabs.f16(half) #1
+
+attributes #1 = { nounwind readnone }
; RUN: llc -march=amdgcn -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
; RUN: llc -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
-; GCN-LABEL: {{^}}fptrunc_f32_to_f16
+; GCN-LABEL: {{^}}fptrunc_f32_to_f16:
; GCN: buffer_load_dword v[[A_F32:[0-9]+]]
; GCN: v_cvt_f16_f32_e32 v[[R_F16:[0-9]+]], v[[A_F32]]
; GCN: buffer_store_short v[[R_F16]]
ret void
}
-; GCN-LABEL: {{^}}fptrunc_f64_to_f16
+; GCN-LABEL: {{^}}fptrunc_f64_to_f16:
; GCN: buffer_load_dwordx2 v{{\[}}[[A_F64_0:[0-9]+]]:[[A_F64_1:[0-9]+]]{{\]}}
; GCN: v_cvt_f32_f64_e32 v[[A_F32:[0-9]+]], v{{\[}}[[A_F64_0]]:[[A_F64_1]]{{\]}}
; GCN: v_cvt_f16_f32_e32 v[[R_F16:[0-9]+]], v[[A_F32]]
ret void
}
-; GCN-LABEL: {{^}}fptrunc_v2f32_to_v2f16
+; GCN-LABEL: {{^}}fptrunc_v2f32_to_v2f16:
; GCN: buffer_load_dwordx2 v{{\[}}[[A_F32_0:[0-9]+]]:[[A_F32_1:[0-9]+]]{{\]}}
; GCN-DAG: v_cvt_f16_f32_e32 v[[R_F16_0:[0-9]+]], v[[A_F32_0]]
; GCN-DAG: v_cvt_f16_f32_e32 v[[R_F16_1:[0-9]+]], v[[A_F32_1]]
ret void
}
-; GCN-LABEL: {{^}}fptrunc_v2f64_to_v2f16
+; GCN-LABEL: {{^}}fptrunc_v2f64_to_v2f16:
; GCN: buffer_load_dwordx4 v{{\[}}[[A_F64_0:[0-9]+]]:[[A_F64_3:[0-9]+]]{{\]}}
; GCN: v_cvt_f32_f64_e32 v[[A_F32_0:[0-9]+]], v{{\[}}[[A_F64_0]]:{{[0-9]+}}{{\]}}
; GCN: v_cvt_f32_f64_e32 v[[A_F32_1:[0-9]+]], v{{\[}}{{[0-9]+}}:[[A_F64_3]]{{\]}}
store <2 x half> %r.val, <2 x half> addrspace(1)* %r
ret void
}
+
+; GCN-LABEL: {{^}}fneg_fptrunc_f32_to_f16:
+; GCN: buffer_load_dword v[[A_F32:[0-9]+]]
+; GCN: v_cvt_f16_f32_e64 v[[R_F16:[0-9]+]], -v[[A_F32]]
+; GCN: buffer_store_short v[[R_F16]]
+; GCN: s_endpgm
+define void @fneg_fptrunc_f32_to_f16(
+ half addrspace(1)* %r,
+ float addrspace(1)* %a) {
+entry:
+ %a.val = load float, float addrspace(1)* %a
+ %a.fneg = fsub float -0.0, %a.val
+ %r.val = fptrunc float %a.fneg to half
+ store half %r.val, half addrspace(1)* %r
+ ret void
+}
+
+; GCN-LABEL: {{^}}fabs_fptrunc_f32_to_f16:
+; GCN: buffer_load_dword v[[A_F32:[0-9]+]]
+; GCN: v_cvt_f16_f32_e64 v[[R_F16:[0-9]+]], |v[[A_F32]]|
+; GCN: buffer_store_short v[[R_F16]]
+; GCN: s_endpgm
+define void @fabs_fptrunc_f32_to_f16(
+ half addrspace(1)* %r,
+ float addrspace(1)* %a) {
+entry:
+ %a.val = load float, float addrspace(1)* %a
+ %a.fabs = call float @llvm.fabs.f32(float %a.val)
+ %r.val = fptrunc float %a.fabs to half
+ store half %r.val, half addrspace(1)* %r
+ ret void
+}
+
+; GCN-LABEL: {{^}}fneg_fabs_fptrunc_f32_to_f16:
+; GCN: buffer_load_dword v[[A_F32:[0-9]+]]
+; GCN: v_cvt_f16_f32_e64 v[[R_F16:[0-9]+]], -|v[[A_F32]]|
+; GCN: buffer_store_short v[[R_F16]]
+; GCN: s_endpgm
+define void @fneg_fabs_fptrunc_f32_to_f16(
+ half addrspace(1)* %r,
+ float addrspace(1)* %a) {
+entry:
+ %a.val = load float, float addrspace(1)* %a
+ %a.fabs = call float @llvm.fabs.f32(float %a.val)
+ %a.fneg.fabs = fsub float -0.0, %a.fabs
+ %r.val = fptrunc float %a.fneg.fabs to half
+ store half %r.val, half addrspace(1)* %r
+ ret void
+}
+
+declare float @llvm.fabs.f32(float) #1
+
+attributes #1 = { nounwind readnone }
ret void
}
-; GCN-LABEL: {{^}}mac_f16_same_add
+; GCN-LABEL: {{^}}mac_f16_same_add:
; SI: v_mad_f32 v{{[0-9]}}, v{{[0-9]+}}, v{{[0-9]+}}, [[ADD:v[0-9]+]]
; SI: v_mac_f32_e32 [[ADD]], v{{[0-9]+}}, v{{[0-9]+}}
+
; VI: v_mad_f16 v{{[0-9]}}, v{{[0-9]+}}, v{{[0-9]+}}, [[ADD:v[0-9]+]]
; VI: v_mac_f16_e32 [[ADD]], v{{[0-9]+}}, v{{[0-9]+}}
; GCN: s_endpgm
ret void
}
-; GCN-LABEL: {{^}}mac_f16_neg_a
-; SI-NOT: v_mac_f32
-; SI: v_mad_f32 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
+; GCN-LABEL: {{^}}mac_f16_neg_a:
+; SI-DAG: v_cvt_f32_f16_e64 [[CVT_NEG:v[0-9]+]], -v{{[0-9]+}}
+; SI-DAG: v_cvt_f32_f16_e32 [[CVT_OTHER:v[0-9]+]], v{{[0-9]+}}
+; SI: v_mac_f32_e32 v{{[0-9]+}}, [[CVT_OTHER]], [[CVT_NEG]]
+
; VI-NOT: v_mac_f16
; VI: v_mad_f16 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
; GCN: s_endpgm
ret void
}
-; GCN-LABEL: {{^}}mac_f16_neg_b
-; SI-NOT: v_mac_f32
-; SI: v_mad_f32 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
+; GCN-LABEL: {{^}}mac_f16_neg_b:
+; SI-DAG: v_cvt_f32_f16_e64 [[CVT_NEG:v[0-9]+]], -v{{[0-9]+}}
+; SI-DAG: v_cvt_f32_f16_e32 [[CVT_OTHER:v[0-9]+]], v{{[0-9]+}}
+; SI: v_mac_f32_e32 v{{[0-9]+}}, [[CVT_OTHER]], [[CVT_NEG]]
; VI-NOT: v_mac_f16
; VI: v_mad_f16 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
; GCN: s_endpgm
ret void
}
-; GCN-LABEL: {{^}}mac_f16_neg_c
-; SI-NOT: v_mac_f32
-; SI: v_mad_f32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, -v{{[0-9]+}}
+; GCN-LABEL: {{^}}mac_f16_neg_c:
+; SI: v_cvt_f32_f16_e32
+; SI-DAG: v_cvt_f32_f16_e32
+; SI-DAG: v_cvt_f32_f16_e64 [[CVT_NEG:v[0-9]+]], -v{{[0-9]+}}
+; SI: v_mac_f32_e32 [[CVT_NEG]], v{{[0-9]+}}, v{{[0-9]+}}
+
; VI-NOT: v_mac_f16
; VI: v_mad_f16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, -v{{[0-9]+}}
; GCN: s_endpgm
ret void
}
-; GCN-LABEL: {{^}}mac_f16_neg_a_unsafe_fp_math
-; SI-NOT: v_mac_f32
-; SI: v_mad_f32 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]}}
+; GCN-LABEL: {{^}}mac_f16_neg_a_unsafe_fp_math:
+; SI-DAG: v_cvt_f32_f16_e64 [[CVT_NEG:v[0-9]+]], -v{{[0-9]+}}
+; SI-DAG: v_cvt_f32_f16_e32 [[CVT_OTHER:v[0-9]+]], v{{[0-9]+}}
+; SI: v_mac_f32_e32 v{{[0-9]+}}, [[CVT_OTHER]], [[CVT_NEG]]
+
; VI-NOT: v_mac_f16
; VI: v_mad_f16 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]}}
; GCN: s_endpgm
ret void
}
-; GCN-LABEL: {{^}}mac_f16_neg_b_unsafe_fp_math
-; SI-NOT: v_mac_f32
-; SI: v_mad_f32 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]}}
+; GCN-LABEL: {{^}}mac_f16_neg_b_unsafe_fp_math:
+; SI-DAG: v_cvt_f32_f16_e64 [[CVT_NEG:v[0-9]+]], -v{{[0-9]+}}
+; SI-DAG: v_cvt_f32_f16_e32 [[CVT_OTHER:v[0-9]+]], v{{[0-9]+}}
+; SI: v_mac_f32_e32 v{{[0-9]+}}, [[CVT_OTHER]], [[CVT_NEG]]
+
; VI-NOT: v_mac_f16
; VI: v_mad_f16 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]}}
; GCN: s_endpgm
ret void
}
-; GCN-LABEL: {{^}}mac_f16_neg_c_unsafe_fp_math
-; SI-NOT: v_mac_f32
-; SI: v_mad_f32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, -v{{[0-9]}}
+; GCN-LABEL: {{^}}mac_f16_neg_c_unsafe_fp_math:
+; SI: v_cvt_f32_f16_e32
+; SI: v_cvt_f32_f16_e32
+; SI-DAG: v_cvt_f32_f16_e64 [[CVT_NEG:v[0-9]+]], -v{{[0-9]+}}
+; SI: v_mac_f32_e32 [[CVT_NEG]], v{{[0-9]+}}, v{{[0-9]+}}
+
; VI-NOT: v_mac_f16
; VI: v_mad_f16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, -v{{[0-9]}}
; GCN: s_endpgm
ret void
}
-; GCN-LABEL: {{^}}mac_v2f16
+; GCN-LABEL: {{^}}mac_v2f16:
; GCN: {{buffer|flat}}_load_dword v[[A_V2_F16:[0-9]+]]
; GCN: {{buffer|flat}}_load_dword v[[B_V2_F16:[0-9]+]]
; GCN: {{buffer|flat}}_load_dword v[[C_V2_F16:[0-9]+]]
ret void
}
-; GCN-LABEL: {{^}}mac_v2f16_same_add
+; GCN-LABEL: {{^}}mac_v2f16_same_add:
; SI: v_mad_f32 v{{[0-9]}}, v{{[0-9]+}}, v{{[0-9]+}}, [[ADD0:v[0-9]+]]
; SI: v_mad_f32 v{{[0-9]}}, v{{[0-9]+}}, v{{[0-9]+}}, [[ADD1:v[0-9]+]]
; SI: v_mac_f32_e32 [[ADD0]], v{{[0-9]+}}, v{{[0-9]+}}
ret void
}
-; GCN-LABEL: {{^}}mac_v2f16_neg_a
-; SI-NOT: v_mac_f32
-; SI: v_mad_f32 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
-; SI: v_mad_f32 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
+; GCN-LABEL: {{^}}mac_v2f16_neg_a:
+; SI: v_cvt_f32_f16_e64 [[CVT_NEG0:v[0-9]+]], -{{v[0-9]+}}
+; SI: v_cvt_f32_f16_e64 [[CVT_NEG1:v[0-9]+]], -{{v[0-9]+}}
+
+; SI-DAG: v_mac_f32_e32 v{{[0-9]+}}, v{{[0-9]+}}, [[CVT_NEG0]]
+; SI-DAG: v_mac_f32_e32 v{{[0-9]+}}, v{{[0-9]+}}, [[CVT_NEG1]]
+
; VI-NOT: v_mac_f16
; VI: v_mad_f16 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
; VI: v_mad_f16 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
}
; GCN-LABEL: {{^}}mac_v2f16_neg_b
-; SI-NOT: v_mac_f32
-; SI: v_mad_f32 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
-; SI: v_mad_f32 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
+; SI: v_cvt_f32_f16_e64 [[CVT_NEG0:v[0-9]+]], -{{v[0-9]+}}
+; SI: v_cvt_f32_f16_e64 [[CVT_NEG1:v[0-9]+]], -{{v[0-9]+}}
+; SI-DAG: v_mac_f32_e32 v{{[0-9]+}}, v{{[0-9]+}}, [[CVT_NEG0]]
+; SI-DAG: v_mac_f32_e32 v{{[0-9]+}}, v{{[0-9]+}}, [[CVT_NEG1]]
+
+
; VI-NOT: v_mac_f16
; VI: v_mad_f16 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
; VI: v_mad_f16 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
ret void
}
-; GCN-LABEL: {{^}}mac_v2f16_neg_c
-; SI-NOT: v_mac_f32
-; SI: v_mad_f32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, -v{{[0-9]+}}
-; SI: v_mad_f32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, -v{{[0-9]+}}
+; GCN-LABEL: {{^}}mac_v2f16_neg_c:
+; SI: v_cvt_f32_f16_e64 [[CVT_NEG0:v[0-9]+]], -{{v[0-9]+}}
+; SI: v_cvt_f32_f16_e64 [[CVT_NEG1:v[0-9]+]], -{{v[0-9]+}}
+
+; SI-DAG: v_mac_f32_e32 [[CVT_NEG0]], v{{[0-9]+}}, v{{[0-9]+}}
+; SI-DAG: v_mac_f32_e32 [[CVT_NEG1]], v{{[0-9]+}}, v{{[0-9]+}}
+
; VI-NOT: v_mac_f16
; VI: v_mad_f16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, -v{{[0-9]+}}
; VI: v_mad_f16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, -v{{[0-9]+}}
ret void
}
-; GCN-LABEL: {{^}}mac_v2f16_neg_b_safe_fp_math
+; GCN-LABEL: {{^}}mac_v2f16_neg_b_safe_fp_math:
; SI: v_sub_f32_e32 v[[NEG_A0:[0-9]+]], 0, v{{[0-9]+}}
; SI: v_sub_f32_e32 v[[NEG_A1:[0-9]+]], 0, v{{[0-9]+}}
; SI: v_mac_f32_e32 v{{[0-9]+}}, v[[NEG_A0]], v{{[0-9]+}}
ret void
}
-; GCN-LABEL: {{^}}mac_v2f16_neg_c_safe_fp_math
+; GCN-LABEL: {{^}}mac_v2f16_neg_c_safe_fp_math:
; SI: v_sub_f32_e32 v[[NEG_A0:[0-9]+]], 0, v{{[0-9]+}}
; SI: v_sub_f32_e32 v[[NEG_A1:[0-9]+]], 0, v{{[0-9]+}}
; SI: v_mac_f32_e32 v[[NEG_A0]], v{{[0-9]+}}, v{{[0-9]+}}
ret void
}
-; GCN-LABEL: {{^}}mac_v2f16_neg_a_unsafe_fp_math
-; SI-NOT: v_mac_f32
-; SI: v_mad_f32 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}, v{{[-0-9]}}
-; SI: v_mad_f32 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}, v{{[-0-9]}}
+; GCN-LABEL: {{^}}mac_v2f16_neg_a_unsafe_fp_math:
+; SI: v_cvt_f32_f16_e64 [[CVT_NEG0:v[0-9]+]], -{{v[0-9]+}}
+; SI: v_cvt_f32_f16_e64 [[CVT_NEG1:v[0-9]+]], -{{v[0-9]+}}
+
+; SI-DAG: v_mac_f32_e32 v{{[0-9]+}}, v{{[0-9]+}}, [[CVT_NEG0]]
+; SI-DAG: v_mac_f32_e32 v{{[0-9]+}}, v{{[0-9]+}}, [[CVT_NEG1]]
+
; VI-NOT: v_mac_f16
; VI: v_mad_f16 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}, v{{[-0-9]}}
; VI: v_mad_f16 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}, v{{[-0-9]}}
ret void
}
-; GCN-LABEL: {{^}}mac_v2f16_neg_b_unsafe_fp_math
-; SI-NOT: v_mac_f32
-; SI: v_mad_f32 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}, v{{[-0-9]}}
-; SI: v_mad_f32 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}, v{{[-0-9]}}
+; GCN-LABEL: {{^}}mac_v2f16_neg_b_unsafe_fp_math:
+; SI: v_cvt_f32_f16_e64 [[CVT_NEG0:v[0-9]+]], -{{v[0-9]+}}
+; SI: v_cvt_f32_f16_e64 [[CVT_NEG1:v[0-9]+]], -{{v[0-9]+}}
+
+; SI-DAG: v_mac_f32_e32 v{{[0-9]+}}, v{{[0-9]+}}, [[CVT_NEG0]]
+; SI-DAG: v_mac_f32_e32 v{{[0-9]+}}, v{{[0-9]+}}, [[CVT_NEG1]]
+
; VI-NOT: v_mac_f16
; VI: v_mad_f16 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}, v{{[-0-9]}}
; VI: v_mad_f16 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}, v{{[-0-9]}}
ret void
}
-; GCN-LABEL: {{^}}mac_v2f16_neg_c_unsafe_fp_math
-; SI-NOT: v_mac_f32
-; SI: v_mad_f32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, -v{{[-0-9]}}
-; SI: v_mad_f32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, -v{{[-0-9]}}
+; GCN-LABEL: {{^}}mac_v2f16_neg_c_unsafe_fp_math:
+; SI: v_cvt_f32_f16_e64 [[CVT_NEG0:v[0-9]+]], -{{v[0-9]+}}
+; SI: v_cvt_f32_f16_e64 [[CVT_NEG1:v[0-9]+]], -{{v[0-9]+}}
+
+; SI-DAG: v_mac_f32_e32 [[CVT_NEG0]], v{{[0-9]+}}, v{{[0-9]+}}
+; SI-DAG: v_mac_f32_e32 [[CVT_NEG1]], v{{[0-9]+}}, v{{[0-9]+}}
+
; VI-NOT: v_mac_f16
; VI: v_mad_f16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, -v{{[-0-9]}}
; VI: v_mad_f16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, -v{{[-0-9]}}
v_cvt_f32_f16_e64 v0, 0
// CHECK: [0x00,0x00,0x4b,0xd1,0x80,0x00,0x00,0x00]
+// FIXME: Parsing source modifiers
v_cvt_f32_f16_e64 v0, -1
-// CHECK: [0x00,0x00,0x4b,0xd1,0xc1,0x00,0x00,0x00]
+// CHECK: [0x00,0x00,0x4b,0xd1,0x81,0x00,0x00,0x20]
v_cvt_f32_f16_e64 v0, 0.5
// CHECK: [0x00,0x00,0x4b,0xd1,0xf0,0x00,0x00,0x00]
+// FIXME: Parsing source modifiers
v_cvt_f32_f16_e64 v0, -4.0
-// CHECK: [0x00,0x00,0x4b,0xd1,0xf7,0x00,0x00,0x00]
+// CHECK: [0x00,0x00,0x4b,0xd1,0xf6,0x00,0x00,0x20]
v_cvt_f32_f16_e64 v0, v0
// CHECK: [0x00,0x00,0x4b,0xd1,0x00,0x01,0x00,0x00]