}
let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
- isPseudo = 1, Predicates = [HasVLX], SchedRW = [WriteZero] in {
+ isPseudo = 1, Predicates = [HasAVX512], SchedRW = [WriteZero] in {
def AVX512_128_SET0 : I<0, Pseudo, (outs VR128X:$dst), (ins), "",
[(set VR128X:$dst, (v4i32 immAllZerosV))]>;
def AVX512_256_SET0 : I<0, Pseudo, (outs VR256X:$dst), (ins), "",
// Alias instructions that map fld0 to xorps for sse or vxorps for avx.
// This is expanded by ExpandPostRAPseudos.
let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
- isPseudo = 1, SchedRW = [WriteZero], Predicates = [HasVLX, HasDQI] in {
+ isPseudo = 1, SchedRW = [WriteZero], Predicates = [HasAVX512] in {
def AVX512_FsFLD0SS : I<0, Pseudo, (outs FR32X:$dst), (ins), "",
[(set FR32X:$dst, fp32imm0)]>;
def AVX512_FsFLD0SD : I<0, Pseudo, (outs FR64X:$dst), (ins), "",
// Move scalar to XMM zero-extended, zeroing a VR128X then do a
// MOVS{S,D} to the lower bits.
def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector FR32X:$src)))),
- (VMOVSSZrr (v4f32 (V_SET0)), FR32X:$src)>;
+ (VMOVSSZrr (v4f32 (AVX512_128_SET0)), FR32X:$src)>;
def : Pat<(v4f32 (X86vzmovl (v4f32 VR128X:$src))),
- (VMOVSSZrr (v4f32 (V_SET0)), (COPY_TO_REGCLASS VR128X:$src, FR32X))>;
+ (VMOVSSZrr (v4f32 (AVX512_128_SET0)), (COPY_TO_REGCLASS VR128X:$src, FR32X))>;
def : Pat<(v4i32 (X86vzmovl (v4i32 VR128X:$src))),
- (VMOVSSZrr (v4i32 (V_SET0)), (COPY_TO_REGCLASS VR128X:$src, FR32X))>;
+ (VMOVSSZrr (v4i32 (AVX512_128_SET0)), (COPY_TO_REGCLASS VR128X:$src, FR32X))>;
def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector FR64X:$src)))),
- (VMOVSDZrr (v2f64 (V_SET0)), FR64X:$src)>;
+ (VMOVSDZrr (v2f64 (AVX512_128_SET0)), FR64X:$src)>;
}
// Move low f32 and clear high bits.
def : Pat<(v8f32 (X86vzmovl (v8f32 VR256X:$src))),
(SUBREG_TO_REG (i32 0),
- (VMOVSSZrr (v4f32 (V_SET0)),
+ (VMOVSSZrr (v4f32 (AVX512_128_SET0)),
(EXTRACT_SUBREG (v8f32 VR256X:$src), sub_xmm)), sub_xmm)>;
def : Pat<(v8i32 (X86vzmovl (v8i32 VR256X:$src))),
(SUBREG_TO_REG (i32 0),
- (VMOVSSZrr (v4i32 (V_SET0)),
+ (VMOVSSZrr (v4i32 (AVX512_128_SET0)),
(EXTRACT_SUBREG (v8i32 VR256X:$src), sub_xmm)), sub_xmm)>;
def : Pat<(v16f32 (X86vzmovl (v16f32 VR512:$src))),
(SUBREG_TO_REG (i32 0),
- (VMOVSSZrr (v4f32 (V_SET0)),
+ (VMOVSSZrr (v4f32 (AVX512_128_SET0)),
(EXTRACT_SUBREG (v16f32 VR512:$src), sub_xmm)), sub_xmm)>;
def : Pat<(v16i32 (X86vzmovl (v16i32 VR512:$src))),
(SUBREG_TO_REG (i32 0),
- (VMOVSSZrr (v4i32 (V_SET0)),
+ (VMOVSSZrr (v4i32 (AVX512_128_SET0)),
(EXTRACT_SUBREG (v16i32 VR512:$src), sub_xmm)), sub_xmm)>;
let AddedComplexity = 20 in {
}
def : Pat<(v8f32 (X86vzmovl (insert_subvector undef,
(v4f32 (scalar_to_vector FR32X:$src)), (iPTR 0)))),
- (SUBREG_TO_REG (i32 0), (v4f32 (VMOVSSZrr (v4f32 (V_SET0)),
+ (SUBREG_TO_REG (i32 0), (v4f32 (VMOVSSZrr (v4f32 (AVX512_128_SET0)),
FR32X:$src)), sub_xmm)>;
def : Pat<(v4f64 (X86vzmovl (insert_subvector undef,
(v2f64 (scalar_to_vector FR64X:$src)), (iPTR 0)))),
- (SUBREG_TO_REG (i64 0), (v2f64 (VMOVSDZrr (v2f64 (V_SET0)),
+ (SUBREG_TO_REG (i64 0), (v2f64 (VMOVSDZrr (v2f64 (AVX512_128_SET0)),
FR64X:$src)), sub_xmm)>;
def : Pat<(v4i64 (X86vzmovl (insert_subvector undef,
(v2i64 (scalar_to_vector (loadi64 addr:$src))), (iPTR 0)))),
// Move low f64 and clear high bits.
def : Pat<(v4f64 (X86vzmovl (v4f64 VR256X:$src))),
(SUBREG_TO_REG (i32 0),
- (VMOVSDZrr (v2f64 (V_SET0)),
+ (VMOVSDZrr (v2f64 (AVX512_128_SET0)),
(EXTRACT_SUBREG (v4f64 VR256X:$src), sub_xmm)), sub_xmm)>;
def : Pat<(v8f64 (X86vzmovl (v8f64 VR512:$src))),
(SUBREG_TO_REG (i32 0),
- (VMOVSDZrr (v2f64 (V_SET0)),
+ (VMOVSDZrr (v2f64 (AVX512_128_SET0)),
(EXTRACT_SUBREG (v8f64 VR512:$src), sub_xmm)), sub_xmm)>;
def : Pat<(v4i64 (X86vzmovl (v4i64 VR256X:$src))),
- (SUBREG_TO_REG (i32 0), (VMOVSDZrr (v2i64 (V_SET0)),
+ (SUBREG_TO_REG (i32 0), (VMOVSDZrr (v2i64 (AVX512_128_SET0)),
(EXTRACT_SUBREG (v4i64 VR256X:$src), sub_xmm)), sub_xmm)>;
def : Pat<(v8i64 (X86vzmovl (v8i64 VR512:$src))),
- (SUBREG_TO_REG (i32 0), (VMOVSDZrr (v2i64 (V_SET0)),
+ (SUBREG_TO_REG (i32 0), (VMOVSDZrr (v2i64 (AVX512_128_SET0)),
(EXTRACT_SUBREG (v8i64 VR512:$src), sub_xmm)), sub_xmm)>;
// Extract and store.
assert(HasAVX && "AVX not supported");
return Expand2AddrUndef(MIB, get(X86::VXORPSYrr));
case X86::AVX512_128_SET0:
- return Expand2AddrUndef(MIB, get(X86::VPXORDZ128rr));
- case X86::AVX512_256_SET0:
- return Expand2AddrUndef(MIB, get(X86::VPXORDZ256rr));
+ case X86::AVX512_FsFLD0SS:
+ case X86::AVX512_FsFLD0SD: {
+ bool HasVLX = Subtarget.hasVLX();
+ unsigned SrcReg = MIB->getOperand(0).getReg();
+ const TargetRegisterInfo *TRI = &getRegisterInfo();
+ if (HasVLX || TRI->getEncodingValue(SrcReg) < 16)
+ return Expand2AddrUndef(MIB,
+ get(HasVLX ? X86::VPXORDZ128rr : X86::VXORPSrr));
+ // Extended register without VLX. Use a larger XOR.
+ SrcReg = TRI->getMatchingSuperReg(SrcReg, X86::sub_xmm, &X86::VR512RegClass);
+ MIB->getOperand(0).setReg(SrcReg);
+ return Expand2AddrUndef(MIB, get(X86::VPXORDZrr));
+ }
+ case X86::AVX512_256_SET0: {
+ bool HasVLX = Subtarget.hasVLX();
+ unsigned SrcReg = MIB->getOperand(0).getReg();
+ const TargetRegisterInfo *TRI = &getRegisterInfo();
+ if (HasVLX || TRI->getEncodingValue(SrcReg) < 16)
+ return Expand2AddrUndef(MIB,
+ get(HasVLX ? X86::VPXORDZ256rr : X86::VXORPSYrr));
+ // Extended register without VLX. Use a larger XOR.
+ SrcReg = TRI->getMatchingSuperReg(SrcReg, X86::sub_ymm, &X86::VR512RegClass);
+ MIB->getOperand(0).setReg(SrcReg);
+ return Expand2AddrUndef(MIB, get(X86::VPXORDZrr));
+ }
case X86::AVX512_512_SET0:
return Expand2AddrUndef(MIB, get(X86::VPXORDZrr));
- case X86::AVX512_FsFLD0SS:
- case X86::AVX512_FsFLD0SD:
- return Expand2AddrUndef(MIB, get(X86::VXORPSZ128rr));
case X86::V_SETALLONES:
return Expand2AddrUndef(MIB, get(HasAVX ? X86::VPCMPEQDrr : X86::PCMPEQDrr));
case X86::AVX2_SETALLONES:
let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
isPseudo = 1, SchedRW = [WriteZero] in {
def FsFLD0SS : I<0, Pseudo, (outs FR32:$dst), (ins), "",
- [(set FR32:$dst, fp32imm0)]>, Requires<[HasSSE1, NoVLX_Or_NoDQI]>;
+ [(set FR32:$dst, fp32imm0)]>, Requires<[HasSSE1, NoAVX512]>;
def FsFLD0SD : I<0, Pseudo, (outs FR64:$dst), (ins), "",
- [(set FR64:$dst, fpimm0)]>, Requires<[HasSSE2, NoVLX_Or_NoDQI]>;
+ [(set FR64:$dst, fpimm0)]>, Requires<[HasSSE2, NoAVX512]>;
}
//===----------------------------------------------------------------------===//
// We set canFoldAsLoad because this can be converted to a constant-pool
// load of an all-zeros value if folding it would be beneficial.
let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
- isPseudo = 1, Predicates = [NoVLX], SchedRW = [WriteZero] in {
+ isPseudo = 1, SchedRW = [WriteZero] in {
def V_SET0 : I<0, Pseudo, (outs VR128:$dst), (ins), "",
[(set VR128:$dst, (v4f32 immAllZerosV))]>;
}
-let Predicates = [NoVLX] in
+let Predicates = [NoAVX512] in
def : Pat<(v4i32 immAllZerosV), (V_SET0)>;
// at the rename stage without using any execution unit, so SET0PSY
// and SET0PDY can be used for vector int instructions without penalty
let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
- isPseudo = 1, Predicates = [HasAVX, NoVLX], SchedRW = [WriteZero] in {
+ isPseudo = 1, Predicates = [NoAVX512], SchedRW = [WriteZero] in {
def AVX_SET0 : I<0, Pseudo, (outs VR256:$dst), (ins), "",
[(set VR256:$dst, (v8i32 immAllZerosV))]>;
}