RegisterClass FRC = !if (!eq (EltTypeName, "f32"), FR32X, FR64X);
- dag ImmAllZerosV = (VT immAllZerosV);
+ // A vector type of the same width with element type i32. This is used to
+ // create the canonical constant zero node ImmAllZerosV.
+ ValueType i32VT = !cast<ValueType>("v" # !srl(Size, 5) # "i32");
+ dag ImmAllZerosV = (VT (bitconvert (i32VT immAllZerosV)));
string ZSuffix = !if (!eq (Size, 128), "Z128",
!if (!eq (Size, 256), "Z256", "Z"));
def AVX512_512_SEXT_MASK_64 : I<0, Pseudo, (outs VR512:$dst),
(ins VK8WM:$mask), "",
[(set VR512:$dst, (vselect (v8i1 VK8WM:$mask),
- (v8i64 immAllOnesV),
- (v8i64 immAllZerosV)))]>;
+ (bc_v8i64 (v16i32 immAllOnesV)),
+ (bc_v8i64 (v16i32 immAllZerosV))))]>;
}
let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
// Patterns for selects of bitcasted operations.
def : Pat<(vselect VK16WM:$mask,
(bc_v16f32 (v8f64 (X86SubVBroadcast (loadv2f64 addr:$src)))),
- (v16f32 immAllZerosV)),
+ (bc_v16f32 (v16i32 immAllZerosV))),
(VBROADCASTF32X4rmkz VK16WM:$mask, addr:$src)>;
def : Pat<(vselect VK16WM:$mask,
(bc_v16f32 (v8f64 (X86SubVBroadcast (loadv2f64 addr:$src)))),
def : Pat<(vselect VK8WM:$mask,
(bc_v8f64 (v16f32 (X86SubVBroadcast (loadv8f32 addr:$src)))),
- (v8f64 immAllZerosV)),
+ (bc_v8f64 (v16i32 immAllZerosV))),
(VBROADCASTF64X4rmkz VK8WM:$mask, addr:$src)>;
def : Pat<(vselect VK8WM:$mask,
(bc_v8f64 (v16f32 (X86SubVBroadcast (loadv8f32 addr:$src)))),
(VBROADCASTF64X4rmk VR512:$src0, VK8WM:$mask, addr:$src)>;
def : Pat<(vselect VK8WM:$mask,
(bc_v8i64 (v16i32 (X86SubVBroadcast (loadv8i32 addr:$src)))),
- (v8i64 immAllZerosV)),
+ (bc_v8i64 (v16i32 immAllZerosV))),
(VBROADCASTI64X4rmkz VK8WM:$mask, addr:$src)>;
def : Pat<(vselect VK8WM:$mask,
(bc_v8i64 (v16i32 (X86SubVBroadcast (loadv8i32 addr:$src)))),
// Patterns for selects of bitcasted operations.
def : Pat<(vselect VK8WM:$mask,
(bc_v8f32 (v4f64 (X86SubVBroadcast (loadv2f64 addr:$src)))),
- (v8f32 immAllZerosV)),
+ (bc_v8f32 (v8i32 immAllZerosV))),
(VBROADCASTF32X4Z256rmkz VK8WM:$mask, addr:$src)>;
def : Pat<(vselect VK8WM:$mask,
(bc_v8f32 (v4f64 (X86SubVBroadcast (loadv2f64 addr:$src)))),
// Patterns for selects of bitcasted operations.
def : Pat<(vselect VK4WM:$mask,
(bc_v4f64 (v8f32 (X86SubVBroadcast (loadv4f32 addr:$src)))),
- (v4f64 immAllZerosV)),
+ (bc_v4f64 (v8i32 immAllZerosV))),
(VBROADCASTF64X2Z128rmkz VK4WM:$mask, addr:$src)>;
def : Pat<(vselect VK4WM:$mask,
(bc_v4f64 (v8f32 (X86SubVBroadcast (loadv4f32 addr:$src)))),
(VBROADCASTF64X2Z128rmk VR256X:$src0, VK4WM:$mask, addr:$src)>;
def : Pat<(vselect VK4WM:$mask,
(bc_v4i64 (v8i32 (X86SubVBroadcast (loadv4i32 addr:$src)))),
- (v4i64 immAllZerosV)),
+ (bc_v4i64 (v8i32 immAllZerosV))),
(VBROADCASTI64X2Z128rmkz VK4WM:$mask, addr:$src)>;
def : Pat<(vselect VK4WM:$mask,
(bc_v4i64 (v8i32 (X86SubVBroadcast (loadv4i32 addr:$src)))),
// Patterns for selects of bitcasted operations.
def : Pat<(vselect VK16WM:$mask,
(bc_v16f32 (v8f64 (X86SubVBroadcast (loadv4f64 addr:$src)))),
- (v16f32 immAllZerosV)),
+ (bc_v16f32 (v16i32 immAllZerosV))),
(VBROADCASTF32X8rmkz VK16WM:$mask, addr:$src)>;
def : Pat<(vselect VK16WM:$mask,
(bc_v16f32 (v8f64 (X86SubVBroadcast (loadv4f64 addr:$src)))),
def : Pat<(vselect VK8WM:$mask,
(bc_v8f64 (v16f32 (X86SubVBroadcast (loadv4f32 addr:$src)))),
- (v8f64 immAllZerosV)),
+ (bc_v8f64 (v16i32 immAllZerosV))),
(VBROADCASTF64X2rmkz VK8WM:$mask, addr:$src)>;
def : Pat<(vselect VK8WM:$mask,
(bc_v8f64 (v16f32 (X86SubVBroadcast (loadv4f32 addr:$src)))),
(VBROADCASTF64X2rmk VR512:$src0, VK8WM:$mask, addr:$src)>;
def : Pat<(vselect VK8WM:$mask,
(bc_v8i64 (v16i32 (X86SubVBroadcast (loadv4i32 addr:$src)))),
- (v8i64 immAllZerosV)),
+ (bc_v8i64 (v16i32 immAllZerosV))),
(VBROADCASTI64X2rmkz VK8WM:$mask, addr:$src)>;
def : Pat<(vselect VK8WM:$mask,
(bc_v8i64 (v16i32 (X86SubVBroadcast (loadv4i32 addr:$src)))),
"", []>, Sched<[WriteFStoreY]>;
}
-def : Pat<(v8i64 (vselect VK8WM:$mask, (v8i64 immAllZerosV),
+def : Pat<(v8i64 (vselect VK8WM:$mask, (bc_v8i64 (v16i32 immAllZerosV)),
(v8i64 VR512:$src))),
(VMOVDQA64Zrrkz (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK8:$mask, VK16)),
VK8), VR512:$src)>;
// These patterns exist to prevent the above patterns from introducing a second
// mask inversion when one already exists.
def : Pat<(v8i64 (vselect (xor VK8:$mask, (v8i1 immAllOnesV)),
- (v8i64 immAllZerosV),
+ (bc_v8i64 (v16i32 immAllZerosV)),
(v8i64 VR512:$src))),
(VMOVDQA64Zrrkz VK8:$mask, VR512:$src)>;
def : Pat<(v16i32 (vselect (xor VK16:$mask, (v16i1 immAllOnesV)),
def : Pat<(_.info128.VT (extract_subvector
(_.info512.VT (masked_load addr:$srcAddr, Mask,
- _.info512.ImmAllZerosV)),
+ (_.info512.VT (bitconvert
+ (v16i32 immAllZerosV))))),
(iPTR 0))),
(!cast<Instruction>(InstrStr#rmkz)
(COPY_TO_REGCLASS MaskRC:$mask, VK1WM),
def : Pat<(_.info128.VT (extract_subvector
(_.info512.VT (masked_load addr:$srcAddr, Mask,
- _.info512.ImmAllZerosV)),
+ (_.info512.VT (bitconvert
+ (v16i32 immAllZerosV))))),
(iPTR 0))),
(!cast<Instruction>(InstrStr#rmkz)
(COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
// AVX512F patterns.
def : Pat<(_.info128.VT (extract_subvector
(_.info512.VT (masked_load addr:$srcAddr, Mask512,
- _.info512.ImmAllZerosV)),
+ (_.info512.VT (bitconvert
+ (v16i32 immAllZerosV))))),
(iPTR 0))),
(!cast<Instruction>(InstrStr#rmkz)
(COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
// AVX512Vl patterns.
def : Pat<(_.info128.VT (masked_load addr:$srcAddr, Mask128,
- _.info128.ImmAllZerosV)),
+ (_.info128.VT (bitconvert (v4i32 immAllZerosV))))),
(!cast<Instruction>(InstrStr#rmkz)
(COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
addr:$srcAddr)>;
(VMOVDDUPZ128rrk VR128X:$src0, VK2WM:$mask,
(v2f64 (COPY_TO_REGCLASS FR64X:$src, VR128X)))>;
def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast f64:$src)),
- immAllZerosV),
+ (bitconvert (v4i32 immAllZerosV))),
(VMOVDDUPZ128rrkz VK2WM:$mask, (v2f64 (COPY_TO_REGCLASS FR64X:$src, VR128X)))>;
def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast (loadf64 addr:$src))),
(v2f64 VR128X:$src0)),
(VMOVDDUPZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast (loadf64 addr:$src))),
- immAllZerosV),
+ (bitconvert (v4i32 immAllZerosV))),
(VMOVDDUPZ128rmkz VK2WM:$mask, addr:$src)>;
def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast (loadv2f64 addr:$src))),
(v2f64 VR128X:$src0)),
(VMOVDDUPZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast (loadv2f64 addr:$src))),
- immAllZerosV),
+ (bitconvert (v4i32 immAllZerosV))),
(VMOVDDUPZ128rmkz VK2WM:$mask, addr:$src)>;
}
// TODO: We should maybe have a more generalized algorithm for folding to
// vpternlog.
let Predicates = [HasAVX512] in {
- def : Pat<(xor VR512:$src, (v64i8 immAllOnesV)),
+ def : Pat<(xor VR512:$src, (bc_v64i8 (v16i32 immAllOnesV))),
(VPTERNLOGQZrri VR512:$src, VR512:$src, VR512:$src, (i8 15))>;
- def : Pat<(xor VR512:$src, (v32i16 immAllOnesV)),
+ def : Pat<(xor VR512:$src, (bc_v32i16 (v16i32 immAllOnesV))),
(VPTERNLOGQZrri VR512:$src, VR512:$src, VR512:$src, (i8 15))>;
- def : Pat<(xor VR512:$src, (v16i32 immAllOnesV)),
+ def : Pat<(xor VR512:$src, (bc_v16i32 (v16i32 immAllOnesV))),
(VPTERNLOGQZrri VR512:$src, VR512:$src, VR512:$src, (i8 15))>;
- def : Pat<(xor VR512:$src, (v8i64 immAllOnesV)),
+ def : Pat<(xor VR512:$src, (bc_v8i64 (v16i32 immAllOnesV))),
(VPTERNLOGQZrri VR512:$src, VR512:$src, VR512:$src, (i8 15))>;
}
let Predicates = [HasAVX512, NoVLX] in {
- def : Pat<(xor VR128X:$src, (v16i8 immAllOnesV)),
+ def : Pat<(xor VR128X:$src, (bc_v16i8 (v4i32 immAllOnesV))),
(EXTRACT_SUBREG
(VPTERNLOGQZrri
(INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
(INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
(INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
(i8 15)), sub_xmm)>;
- def : Pat<(xor VR128X:$src, (v8i16 immAllOnesV)),
+ def : Pat<(xor VR128X:$src, (bc_v8i16 (v4i32 immAllOnesV))),
(EXTRACT_SUBREG
(VPTERNLOGQZrri
(INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
(INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
(INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
(i8 15)), sub_xmm)>;
- def : Pat<(xor VR128X:$src, (v4i32 immAllOnesV)),
+ def : Pat<(xor VR128X:$src, (bc_v4i32 (v4i32 immAllOnesV))),
(EXTRACT_SUBREG
(VPTERNLOGQZrri
(INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
(INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
(INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
(i8 15)), sub_xmm)>;
- def : Pat<(xor VR128X:$src, (v2i64 immAllOnesV)),
+ def : Pat<(xor VR128X:$src, (bc_v2i64 (v4i32 immAllOnesV))),
(EXTRACT_SUBREG
(VPTERNLOGQZrri
(INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
(INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
(i8 15)), sub_xmm)>;
- def : Pat<(xor VR256X:$src, (v32i8 immAllOnesV)),
+ def : Pat<(xor VR256X:$src, (bc_v32i8 (v8i32 immAllOnesV))),
(EXTRACT_SUBREG
(VPTERNLOGQZrri
(INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
(INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
(INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
(i8 15)), sub_ymm)>;
- def : Pat<(xor VR256X:$src, (v16i16 immAllOnesV)),
+ def : Pat<(xor VR256X:$src, (bc_v16i16 (v8i32 immAllOnesV))),
(EXTRACT_SUBREG
(VPTERNLOGQZrri
(INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
(INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
(INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
(i8 15)), sub_ymm)>;
- def : Pat<(xor VR256X:$src, (v8i32 immAllOnesV)),
+ def : Pat<(xor VR256X:$src, (bc_v8i32 (v8i32 immAllOnesV))),
(EXTRACT_SUBREG
(VPTERNLOGQZrri
(INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
(INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
(INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
(i8 15)), sub_ymm)>;
- def : Pat<(xor VR256X:$src, (v4i64 immAllOnesV)),
+ def : Pat<(xor VR256X:$src, (bc_v4i64 (v8i32 immAllOnesV))),
(EXTRACT_SUBREG
(VPTERNLOGQZrri
(INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
}
let Predicates = [HasVLX] in {
- def : Pat<(xor VR128X:$src, (v16i8 immAllOnesV)),
+ def : Pat<(xor VR128X:$src, (bc_v16i8 (v4i32 immAllOnesV))),
(VPTERNLOGQZ128rri VR128X:$src, VR128X:$src, VR128X:$src, (i8 15))>;
- def : Pat<(xor VR128X:$src, (v8i16 immAllOnesV)),
+ def : Pat<(xor VR128X:$src, (bc_v8i16 (v4i32 immAllOnesV))),
(VPTERNLOGQZ128rri VR128X:$src, VR128X:$src, VR128X:$src, (i8 15))>;
- def : Pat<(xor VR128X:$src, (v4i32 immAllOnesV)),
+ def : Pat<(xor VR128X:$src, (bc_v4i32 (v4i32 immAllOnesV))),
(VPTERNLOGQZ128rri VR128X:$src, VR128X:$src, VR128X:$src, (i8 15))>;
- def : Pat<(xor VR128X:$src, (v2i64 immAllOnesV)),
+ def : Pat<(xor VR128X:$src, (bc_v2i64 (v4i32 immAllOnesV))),
(VPTERNLOGQZ128rri VR128X:$src, VR128X:$src, VR128X:$src, (i8 15))>;
- def : Pat<(xor VR256X:$src, (v32i8 immAllOnesV)),
+ def : Pat<(xor VR256X:$src, (bc_v32i8 (v8i32 immAllOnesV))),
(VPTERNLOGQZ256rri VR256X:$src, VR256X:$src, VR256X:$src, (i8 15))>;
- def : Pat<(xor VR256X:$src, (v16i16 immAllOnesV)),
+ def : Pat<(xor VR256X:$src, (bc_v16i16 (v8i32 immAllOnesV))),
(VPTERNLOGQZ256rri VR256X:$src, VR256X:$src, VR256X:$src, (i8 15))>;
- def : Pat<(xor VR256X:$src, (v8i32 immAllOnesV)),
+ def : Pat<(xor VR256X:$src, (bc_v8i32 (v8i32 immAllOnesV))),
(VPTERNLOGQZ256rri VR256X:$src, VR256X:$src, VR256X:$src, (i8 15))>;
- def : Pat<(xor VR256X:$src, (v4i64 immAllOnesV)),
+ def : Pat<(xor VR256X:$src, (bc_v4i64 (v8i32 immAllOnesV))),
(VPTERNLOGQZ256rri VR256X:$src, VR256X:$src, VR256X:$src, (i8 15))>;
}