X86FoldableSchedWrite sched,
string Broadcast = _.BroadcastStr,
string Alias = "", X86MemOperand MemOp = _Src.MemOp,
- RegisterClass MaskRC = _.KRCWM> {
+ RegisterClass MaskRC = _.KRCWM,
+ dag LdDAG = (_.VT (OpNode (_Src.VT (_Src.LdFrag addr:$src))))> {
defm rr : AVX512_maskable_common<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _Src.RC:$src),
(ins _.RC:$src0, MaskRC:$mask, MemOp:$src),
(ins MaskRC:$mask, MemOp:$src),
OpcodeStr#Alias, "$src", "$src",
- (_.VT (OpNode (_Src.VT
- (_Src.LdFrag addr:$src)))),
+ LdDAG,
(vselect MaskRC:$mask,
(_.VT (OpNode (_Src.VT
(_Src.LdFrag addr:$src)))),
X86FoldableSchedWrite sched,
string Broadcast = _.BroadcastStr,
string Alias = "", X86MemOperand MemOp = _Src.MemOp,
- RegisterClass MaskRC = _.KRCWM,
- PatFrag LdFrag = !cast<PatFrag>("extload"#_Src.VTName)> {
-
- defm rr : AVX512_maskable_common<opc, MRMSrcReg, _, (outs _.RC:$dst),
- (ins _Src.RC:$src),
- (ins _.RC:$src0, MaskRC:$mask, _Src.RC:$src),
- (ins MaskRC:$mask, _Src.RC:$src),
- OpcodeStr, "$src", "$src",
- (_.VT (OpNode (_Src.VT _Src.RC:$src))),
- (vselect MaskRC:$mask,
- (_.VT (OpNode (_Src.VT _Src.RC:$src))),
- _.RC:$src0),
- vselect, "$src0 = $dst">,
- EVEX, Sched<[sched]>;
-
- defm rm : AVX512_maskable_common<opc, MRMSrcMem, _, (outs _.RC:$dst),
- (ins MemOp:$src),
- (ins _.RC:$src0, MaskRC:$mask, MemOp:$src),
- (ins MaskRC:$mask, MemOp:$src),
- OpcodeStr#Alias, "$src", "$src",
- (_.VT (LdFrag addr:$src)),
- (vselect MaskRC:$mask,
- (_.VT (OpNode (_Src.VT
- (_Src.LdFrag addr:$src)))),
- _.RC:$src0),
- vselect, "$src0 = $dst">,
- EVEX, Sched<[sched.Folded]>;
-
- defm rmb : AVX512_maskable_common<opc, MRMSrcMem, _, (outs _.RC:$dst),
- (ins _Src.ScalarMemOp:$src),
- (ins _.RC:$src0, MaskRC:$mask, _Src.ScalarMemOp:$src),
- (ins MaskRC:$mask, _Src.ScalarMemOp:$src),
- OpcodeStr,
- "${src}"##Broadcast, "${src}"##Broadcast,
- (_.VT (OpNode (_Src.VT
- (X86VBroadcast (_Src.ScalarLdFrag addr:$src)))
- )),
- (vselect MaskRC:$mask,
- (_.VT
- (OpNode
- (_Src.VT
- (X86VBroadcast
- (_Src.ScalarLdFrag addr:$src))))),
- _.RC:$src0),
- vselect, "$src0 = $dst">,
- EVEX, EVEX_B, Sched<[sched.Folded]>;
-}
+ RegisterClass MaskRC = _.KRCWM>
+ : avx512_vcvt_fp<opc, OpcodeStr, _, _Src, OpNode, sched, Broadcast, Alias,
+ MemOp, MaskRC,
+ (_.VT (!cast<PatFrag>("extload"#_Src.VTName) addr:$src))>;
// Extend Float to Double
multiclass avx512_cvtps2pd<bits<8> opc, string OpcodeStr,
let Predicates = [HasVLX] in {
defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2f64x_info, v4i32x_info,
- OpNode128, sched.XMM, "{1to2}", "", i64mem>, EVEX_V128;
+ OpNode128, sched.XMM, "{1to2}", "", i64mem, VK2WM,
+ (v2f64 (OpNode128 (bc_v4i32
+ (v2i64
+ (scalar_to_vector (loadi64 addr:$src))))))>,
+ EVEX_V128;
defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f64x_info, v4i32x_info, OpNode,
sched.YMM>, EVEX_V256;
}
def VCVTDQ2PDrm : S2SI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
"vcvtdq2pd\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
- (v2f64 (X86VSintToFP (loadv4i32 addr:$src))))]>,
+ (v2f64 (X86VSintToFP
+ (bc_v4i32
+ (v2i64 (scalar_to_vector
+ (loadi64 addr:$src)))))))]>,
VEX, Sched<[WriteCvtI2PDLd]>, VEX_WIG;
def VCVTDQ2PDrr : S2SI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"vcvtdq2pd\t{$src, $dst|$dst, $src}",
def CVTDQ2PDrm : S2SI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
"cvtdq2pd\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
- (v2f64 (X86VSintToFP (loadv4i32 addr:$src))))]>,
+ (v2f64 (X86VSintToFP
+ (bc_v4i32
+ (v2i64 (scalar_to_vector
+ (loadi64 addr:$src)))))))]>,
Sched<[WriteCvtI2PDLd]>;
def CVTDQ2PDrr : S2SI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"cvtdq2pd\t{$src, $dst|$dst, $src}",
// AVX register conversion intrinsics
let Predicates = [HasAVX, NoVLX] in {
- def : Pat<(v2f64 (X86VSintToFP (bc_v4i32 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
- (VCVTDQ2PDrm addr:$src)>;
def : Pat<(v2f64 (X86VSintToFP (bc_v4i32 (v2i64 (X86vzload addr:$src))))),
(VCVTDQ2PDrm addr:$src)>;
} // Predicates = [HasAVX, NoVLX]
// SSE2 register conversion intrinsics
let Predicates = [UseSSE2] in {
- def : Pat<(v2f64 (X86VSintToFP (bc_v4i32 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
- (CVTDQ2PDrm addr:$src)>;
def : Pat<(v2f64 (X86VSintToFP (bc_v4i32 (v2i64 (X86vzload addr:$src))))),
(CVTDQ2PDrm addr:$src)>;
} // Predicates = [UseSSE2]
; VLDQ-LABEL: test_sito2f64_mask_load:
; VLDQ: # %bb.0:
; VLDQ-NEXT: vpmovq2m %xmm0, %k1
-; VLDQ-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
-; VLDQ-NEXT: vcvtdq2pd %xmm0, %xmm0 {%k1} {z}
+; VLDQ-NEXT: vcvtdq2pd (%rdi), %xmm0 {%k1} {z}
; VLDQ-NEXT: retq
;
; VLNODQ-LABEL: test_sito2f64_mask_load:
; VLNODQ: # %bb.0:
; VLNODQ-NEXT: vpxor %xmm1, %xmm1, %xmm1
; VLNODQ-NEXT: vpcmpgtq %xmm0, %xmm1, %k1
-; VLNODQ-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
-; VLNODQ-NEXT: vcvtdq2pd %xmm0, %xmm0 {%k1} {z}
+; VLNODQ-NEXT: vcvtdq2pd (%rdi), %xmm0 {%k1} {z}
; VLNODQ-NEXT: retq
;
; DQNOVL-LABEL: test_sito2f64_mask_load:
; VLDQ-LABEL: test_uito2f64_mask_load:
; VLDQ: # %bb.0:
; VLDQ-NEXT: vpmovq2m %xmm0, %k1
-; VLDQ-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
-; VLDQ-NEXT: vcvtudq2pd %xmm0, %xmm0 {%k1} {z}
+; VLDQ-NEXT: vcvtudq2pd (%rdi), %xmm0 {%k1} {z}
; VLDQ-NEXT: retq
;
; VLNODQ-LABEL: test_uito2f64_mask_load:
; VLNODQ: # %bb.0:
; VLNODQ-NEXT: vpxor %xmm1, %xmm1, %xmm1
; VLNODQ-NEXT: vpcmpgtq %xmm0, %xmm1, %k1
-; VLNODQ-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
-; VLNODQ-NEXT: vcvtudq2pd %xmm0, %xmm0 {%k1} {z}
+; VLNODQ-NEXT: vcvtudq2pd (%rdi), %xmm0 {%k1} {z}
; VLNODQ-NEXT: retq
;
; DQNOVL-LABEL: test_uito2f64_mask_load:
; VLDQ-LABEL: test_sito2f64_mask_load:
; VLDQ: # %bb.0:
; VLDQ-NEXT: vpmovq2m %xmm0, %k1
-; VLDQ-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
-; VLDQ-NEXT: vcvtdq2pd %xmm0, %xmm0 {%k1} {z}
+; VLDQ-NEXT: vcvtdq2pd (%rdi), %xmm0 {%k1} {z}
; VLDQ-NEXT: retq
;
; VLNODQ-LABEL: test_sito2f64_mask_load:
; VLNODQ: # %bb.0:
; VLNODQ-NEXT: vpxor %xmm1, %xmm1, %xmm1
; VLNODQ-NEXT: vpcmpgtq %xmm0, %xmm1, %k1
-; VLNODQ-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
-; VLNODQ-NEXT: vcvtdq2pd %xmm0, %xmm0 {%k1} {z}
+; VLNODQ-NEXT: vcvtdq2pd (%rdi), %xmm0 {%k1} {z}
; VLNODQ-NEXT: retq
;
; DQNOVL-LABEL: test_sito2f64_mask_load:
; VLDQ-LABEL: test_uito2f64_mask_load:
; VLDQ: # %bb.0:
; VLDQ-NEXT: vpmovq2m %xmm0, %k1
-; VLDQ-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
-; VLDQ-NEXT: vcvtudq2pd %xmm0, %xmm0 {%k1} {z}
+; VLDQ-NEXT: vcvtudq2pd (%rdi), %xmm0 {%k1} {z}
; VLDQ-NEXT: retq
;
; VLNODQ-LABEL: test_uito2f64_mask_load:
; VLNODQ: # %bb.0:
; VLNODQ-NEXT: vpxor %xmm1, %xmm1, %xmm1
; VLNODQ-NEXT: vpcmpgtq %xmm0, %xmm1, %k1
-; VLNODQ-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
-; VLNODQ-NEXT: vcvtudq2pd %xmm0, %xmm0 {%k1} {z}
+; VLNODQ-NEXT: vcvtudq2pd (%rdi), %xmm0 {%k1} {z}
; VLNODQ-NEXT: retq
;
; DQNOVL-LABEL: test_uito2f64_mask_load:
define <2 x double> @sitofp_volatile_load_4i32_to_2f64(<4 x i32> *%a) {
; SSE-LABEL: sitofp_volatile_load_4i32_to_2f64:
; SSE: # %bb.0:
-; SSE-NEXT: cvtdq2pd (%rdi), %xmm0
+; SSE-NEXT: movaps (%rdi), %xmm0
+; SSE-NEXT: cvtdq2pd %xmm0, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: sitofp_volatile_load_4i32_to_2f64:
; AVX: # %bb.0:
-; AVX-NEXT: vcvtdq2pd (%rdi), %xmm0
+; AVX-NEXT: vmovaps (%rdi), %xmm0
+; AVX-NEXT: vcvtdq2pd %xmm0, %xmm0
; AVX-NEXT: retq
%ld = load volatile <4 x i32>, <4 x i32> *%a
%b = shufflevector <4 x i32> %ld, <4 x i32> undef, <2 x i32> <i32 0, i32 1>
define <2 x double> @sitofp_volatile_load_4i32_to_2f64(<4 x i32> *%a) {
; SSE-LABEL: sitofp_volatile_load_4i32_to_2f64:
; SSE: # %bb.0:
-; SSE-NEXT: cvtdq2pd (%rdi), %xmm0
+; SSE-NEXT: movaps (%rdi), %xmm0
+; SSE-NEXT: cvtdq2pd %xmm0, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: sitofp_volatile_load_4i32_to_2f64:
; AVX: # %bb.0:
-; AVX-NEXT: vcvtdq2pd (%rdi), %xmm0
+; AVX-NEXT: vmovaps (%rdi), %xmm0
+; AVX-NEXT: vcvtdq2pd %xmm0, %xmm0
; AVX-NEXT: retq
%ld = load volatile <4 x i32>, <4 x i32> *%a
%b = shufflevector <4 x i32> %ld, <4 x i32> undef, <2 x i32> <i32 0, i32 1>