(!cast<I>("V"#OpcPrefix#SSZrr_Int) v4f32:$dst,
(COPY_TO_REGCLASS FR32X:$src, VR128X))>;
- // extracted scalar math op with insert via blend
- def : Pat<(v4f32 (X86Blendi (v4f32 VR128X:$dst), (v4f32 (scalar_to_vector
- (Op (f32 (extractelt (v4f32 VR128X:$dst), (iPTR 0))),
- FR32X:$src))), (i8 1))),
- (!cast<I>("V"#OpcPrefix#SSZrr_Int) v4f32:$dst,
- (COPY_TO_REGCLASS FR32X:$src, VR128X))>;
-
// vector math op with insert via movss
def : Pat<(v4f32 (X86Movss (v4f32 VR128X:$dst),
(Op (v4f32 VR128X:$dst), (v4f32 VR128X:$src)))),
(!cast<I>("V"#OpcPrefix#SSZrr_Int) v4f32:$dst, v4f32:$src)>;
- // vector math op with insert via blend
- def : Pat<(v4f32 (X86Blendi (v4f32 VR128X:$dst),
- (Op (v4f32 VR128X:$dst), (v4f32 VR128X:$src)), (i8 1))),
- (!cast<I>("V"#OpcPrefix#SSZrr_Int) v4f32:$dst, v4f32:$src)>;
-
// extracted masked scalar math op with insert via movss
def : Pat<(X86Movss (v4f32 VR128X:$src1),
(scalar_to_vector
(!cast<I>("V"#OpcPrefix#SDZrr_Int) v2f64:$dst,
(COPY_TO_REGCLASS FR64X:$src, VR128X))>;
- // extracted scalar math op with insert via blend
- def : Pat<(v2f64 (X86Blendi (v2f64 VR128X:$dst), (v2f64 (scalar_to_vector
- (Op (f64 (extractelt (v2f64 VR128X:$dst), (iPTR 0))),
- FR64X:$src))), (i8 1))),
- (!cast<I>("V"#OpcPrefix#SDZrr_Int) v2f64:$dst,
- (COPY_TO_REGCLASS FR64X:$src, VR128X))>;
-
// vector math op with insert via movsd
def : Pat<(v2f64 (X86Movsd (v2f64 VR128X:$dst),
(Op (v2f64 VR128X:$dst), (v2f64 VR128X:$src)))),
(!cast<I>("V"#OpcPrefix#SDZrr_Int) v2f64:$dst, v2f64:$src)>;
- // vector math op with insert via blend
- def : Pat<(v2f64 (X86Blendi (v2f64 VR128X:$dst),
- (Op (v2f64 VR128X:$dst), (v2f64 VR128X:$src)), (i8 1))),
- (!cast<I>("V"#OpcPrefix#SDZrr_Int) v2f64:$dst, v2f64:$src)>;
-
// extracted masked scalar math op with insert via movss
def : Pat<(X86Movsd (v2f64 VR128X:$src1),
(scalar_to_vector
(!cast<I>(OpcPrefix#SSrr_Int) v4f32:$dst, v4f32:$src)>;
}
- // With SSE 4.1, blendi is preferred to movsd, so match that too.
- let Predicates = [UseSSE41] in {
- // extracted scalar math op with insert via blend
- def : Pat<(v4f32 (X86Blendi (v4f32 VR128:$dst), (v4f32 (scalar_to_vector
- (Op (f32 (extractelt (v4f32 VR128:$dst), (iPTR 0))),
- FR32:$src))), (i8 1))),
- (!cast<I>(OpcPrefix#SSrr_Int) v4f32:$dst,
- (COPY_TO_REGCLASS FR32:$src, VR128))>;
-
- // vector math op with insert via blend
- def : Pat<(v4f32 (X86Blendi (v4f32 VR128:$dst),
- (Op (v4f32 VR128:$dst), (v4f32 VR128:$src)), (i8 1))),
- (!cast<I>(OpcPrefix#SSrr_Int)v4f32:$dst, v4f32:$src)>;
-
- }
-
// Repeat everything for AVX.
let Predicates = [UseAVX] in {
// extracted scalar math op with insert via movss
(!cast<I>("V"#OpcPrefix#SSrr_Int) v4f32:$dst,
(COPY_TO_REGCLASS FR32:$src, VR128))>;
- // extracted scalar math op with insert via blend
- def : Pat<(v4f32 (X86Blendi (v4f32 VR128:$dst), (v4f32 (scalar_to_vector
- (Op (f32 (extractelt (v4f32 VR128:$dst), (iPTR 0))),
- FR32:$src))), (i8 1))),
- (!cast<I>("V"#OpcPrefix#SSrr_Int) v4f32:$dst,
- (COPY_TO_REGCLASS FR32:$src, VR128))>;
-
// vector math op with insert via movss
def : Pat<(v4f32 (X86Movss (v4f32 VR128:$dst),
(Op (v4f32 VR128:$dst), (v4f32 VR128:$src)))),
(!cast<I>("V"#OpcPrefix#SSrr_Int) v4f32:$dst, v4f32:$src)>;
-
- // vector math op with insert via blend
- def : Pat<(v4f32 (X86Blendi (v4f32 VR128:$dst),
- (Op (v4f32 VR128:$dst), (v4f32 VR128:$src)), (i8 1))),
- (!cast<I>("V"#OpcPrefix#SSrr_Int) v4f32:$dst, v4f32:$src)>;
}
}
(!cast<I>(OpcPrefix#SDrr_Int) v2f64:$dst, v2f64:$src)>;
}
- // With SSE 4.1, blendi is preferred to movsd, so match those too.
- let Predicates = [UseSSE41] in {
- // extracted scalar math op with insert via blend
- def : Pat<(v2f64 (X86Blendi (v2f64 VR128:$dst), (v2f64 (scalar_to_vector
- (Op (f64 (extractelt (v2f64 VR128:$dst), (iPTR 0))),
- FR64:$src))), (i8 1))),
- (!cast<I>(OpcPrefix#SDrr_Int) v2f64:$dst,
- (COPY_TO_REGCLASS FR64:$src, VR128))>;
-
- // vector math op with insert via blend
- def : Pat<(v2f64 (X86Blendi (v2f64 VR128:$dst),
- (Op (v2f64 VR128:$dst), (v2f64 VR128:$src)), (i8 1))),
- (!cast<I>(OpcPrefix#SDrr_Int) v2f64:$dst, v2f64:$src)>;
- }
-
// Repeat everything for AVX.
let Predicates = [UseAVX] in {
// extracted scalar math op with insert via movsd
(!cast<I>("V"#OpcPrefix#SDrr_Int) v2f64:$dst,
(COPY_TO_REGCLASS FR64:$src, VR128))>;
- // extracted scalar math op with insert via blend
- def : Pat<(v2f64 (X86Blendi (v2f64 VR128:$dst), (v2f64 (scalar_to_vector
- (Op (f64 (extractelt (v2f64 VR128:$dst), (iPTR 0))),
- FR64:$src))), (i8 1))),
- (!cast<I>("V"#OpcPrefix#SDrr_Int) v2f64:$dst,
- (COPY_TO_REGCLASS FR64:$src, VR128))>;
-
// vector math op with insert via movsd
def : Pat<(v2f64 (X86Movsd (v2f64 VR128:$dst),
(Op (v2f64 VR128:$dst), (v2f64 VR128:$src)))),
(!cast<I>("V"#OpcPrefix#SDrr_Int) v2f64:$dst, v2f64:$src)>;
-
- // vector math op with insert via blend
- def : Pat<(v2f64 (X86Blendi (v2f64 VR128:$dst),
- (Op (v2f64 VR128:$dst), (v2f64 VR128:$src)), (i8 1))),
- (!cast<I>("V"#OpcPrefix#SDrr_Int) v2f64:$dst, v2f64:$src)>;
}
}
(!cast<I>(OpcPrefix#r_Int) VT:$dst, VT:$src)>;
}
- // With SSE 4.1, blendi is preferred to movs*, so match that too.
- let Predicates = [UseSSE41] in {
- def : Pat<(VT (X86Blendi VT:$dst, (Intr VT:$src), (i8 1))),
- (!cast<I>(OpcPrefix#r_Int) VT:$dst, VT:$src)>;
- }
-
// Repeat for AVX versions of the instructions.
let Predicates = [HasAVX] in {
def : Pat<(VT (Move VT:$dst, (Intr VT:$src))),
(!cast<I>("V"#OpcPrefix#r_Int) VT:$dst, VT:$src)>;
-
- def : Pat<(VT (X86Blendi VT:$dst, (Intr VT:$src), (i8 1))),
- (!cast<I>("V"#OpcPrefix#r_Int) VT:$dst, VT:$src)>;
}
}