def : Pat<(v2i64 (add (X86pmuldq (v4i32 VR128:$src1), (v4i32 VR128:$src2)),
(v2i64 VR128:$src3))),
(VPMACSDQLrr VR128:$src1, VR128:$src2, VR128:$src3)>;
+ def : Pat<(v4i32 (add (X86vpmaddwd (v8i16 VR128:$src1), (v8i16 VR128:$src2)),
+ (v4i32 VR128:$src3))),
+ (VPMADCSWDrr VR128:$src1, VR128:$src2, VR128:$src3)>;
}
// Instruction where second source can be memory, third must be imm8
define <4 x i32> @test_pmaddwd_v8i16_add_v4i32(<8 x i16> %a0, <8 x i16> %a1, <4 x i32> %a2) {
; XOP-LABEL: test_pmaddwd_v8i16_add_v4i32:
; XOP: # BB#0:
-; XOP-NEXT: vpmaddwd %xmm1, %xmm0, %xmm0
-; XOP-NEXT: vpaddd %xmm2, %xmm0, %xmm0
+; XOP-NEXT: vpmadcswd %xmm2, %xmm1, %xmm0, %xmm0
; XOP-NEXT: retq
%1 = call <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16> %a0, <8 x i16> %a1)
%2 = add <4 x i32> %1, %a2