}
//Bitwise Ops
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
+ def int_x86_avx512_mask_pand_d_128 : GCCBuiltin<"__builtin_ia32_pandd128_mask">,
+ Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty,
+ llvm_v4i32_ty, llvm_i8_ty], [IntrNoMem]>;
+ def int_x86_avx512_mask_pand_d_256 : GCCBuiltin<"__builtin_ia32_pandd256_mask">,
+ Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty,
+ llvm_v8i32_ty, llvm_i8_ty], [IntrNoMem]>;
def int_x86_avx512_mask_pand_d_512 : GCCBuiltin<"__builtin_ia32_pandd512_mask">,
Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty, llvm_v16i32_ty,
llvm_v16i32_ty, llvm_i16_ty], [IntrNoMem]>;
+ def int_x86_avx512_mask_pand_q_128 : GCCBuiltin<"__builtin_ia32_pandq128_mask">,
+ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty,
+ llvm_v2i64_ty, llvm_i8_ty], [IntrNoMem]>;
+ def int_x86_avx512_mask_pand_q_256 : GCCBuiltin<"__builtin_ia32_pandq256_mask">,
+ Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty,
+ llvm_v4i64_ty, llvm_i8_ty], [IntrNoMem]>;
def int_x86_avx512_mask_pand_q_512 : GCCBuiltin<"__builtin_ia32_pandq512_mask">,
Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty, llvm_v8i64_ty,
llvm_v8i64_ty, llvm_i8_ty], [IntrNoMem]>;
+ def int_x86_avx512_mask_por_d_128 : GCCBuiltin<"__builtin_ia32_pord128_mask">,
+ Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty,
+ llvm_v4i32_ty, llvm_i8_ty], [IntrNoMem]>;
+ def int_x86_avx512_mask_por_d_256 : GCCBuiltin<"__builtin_ia32_pord256_mask">,
+ Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty,
+ llvm_v8i32_ty, llvm_i8_ty], [IntrNoMem]>;
def int_x86_avx512_mask_por_d_512 : GCCBuiltin<"__builtin_ia32_pord512_mask">,
Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty, llvm_v16i32_ty,
llvm_v16i32_ty, llvm_i16_ty], [IntrNoMem]>;
+ def int_x86_avx512_mask_por_q_128 : GCCBuiltin<"__builtin_ia32_porq128_mask">,
+ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty,
+ llvm_v2i64_ty, llvm_i8_ty], [IntrNoMem]>;
+ def int_x86_avx512_mask_por_q_256 : GCCBuiltin<"__builtin_ia32_porq256_mask">,
+ Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty,
+ llvm_v4i64_ty, llvm_i8_ty], [IntrNoMem]>;
def int_x86_avx512_mask_por_q_512 : GCCBuiltin<"__builtin_ia32_porq512_mask">,
Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty, llvm_v8i64_ty,
llvm_v8i64_ty, llvm_i8_ty], [IntrNoMem]>;
+ def int_x86_avx512_mask_pxor_d_128 : GCCBuiltin<"__builtin_ia32_pxord128_mask">,
+ Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty,
+ llvm_v4i32_ty, llvm_i8_ty], [IntrNoMem]>;
+ def int_x86_avx512_mask_pxor_d_256 : GCCBuiltin<"__builtin_ia32_pxord256_mask">,
+ Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty,
+ llvm_v8i32_ty, llvm_i8_ty], [IntrNoMem]>;
def int_x86_avx512_mask_pxor_d_512 : GCCBuiltin<"__builtin_ia32_pxord512_mask">,
Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty, llvm_v16i32_ty,
llvm_v16i32_ty, llvm_i16_ty], [IntrNoMem]>;
+ def int_x86_avx512_mask_pxor_q_128 : GCCBuiltin<"__builtin_ia32_pxorq128_mask">,
+ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty,
+ llvm_v2i64_ty, llvm_i8_ty], [IntrNoMem]>;
+ def int_x86_avx512_mask_pxor_q_256 : GCCBuiltin<"__builtin_ia32_pxorq256_mask">,
+ Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty,
+ llvm_v4i64_ty, llvm_i8_ty], [IntrNoMem]>;
def int_x86_avx512_mask_pxor_q_512 : GCCBuiltin<"__builtin_ia32_pxorq512_mask">,
Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty, llvm_v8i64_ty,
llvm_v8i64_ty, llvm_i8_ty], [IntrNoMem]>;
llvm_v2f64_ty, llvm_i8_ty, llvm_i32_ty],
[IntrNoMem]>;
}
-
+// FP logical ops
+let TargetPrefix = "x86" in {
+ def int_x86_avx512_mask_and_pd_128 : GCCBuiltin<"__builtin_ia32_andpd128_mask">,
+ Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty,
+ llvm_v2f64_ty, llvm_i8_ty], [IntrNoMem]>;
+ def int_x86_avx512_mask_and_pd_256 : GCCBuiltin<"__builtin_ia32_andpd256_mask">,
+ Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v4f64_ty,
+ llvm_v4f64_ty, llvm_i8_ty], [IntrNoMem]>;
+ def int_x86_avx512_mask_and_pd_512 : GCCBuiltin<"__builtin_ia32_andpd512_mask">,
+ Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_v8f64_ty,
+ llvm_v8f64_ty, llvm_i8_ty], [IntrNoMem]>;
+ def int_x86_avx512_mask_and_ps_128 : GCCBuiltin<"__builtin_ia32_andps128_mask">,
+ Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty,
+ llvm_v4f32_ty, llvm_i8_ty], [IntrNoMem]>;
+ def int_x86_avx512_mask_and_ps_256 : GCCBuiltin<"__builtin_ia32_andps256_mask">,
+ Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_v8f32_ty,
+ llvm_v8f32_ty, llvm_i8_ty], [IntrNoMem]>;
+ def int_x86_avx512_mask_and_ps_512 : GCCBuiltin<"__builtin_ia32_andps512_mask">,
+ Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_v16f32_ty,
+ llvm_v16f32_ty, llvm_i16_ty], [IntrNoMem]>;
+ def int_x86_avx512_mask_andn_pd_128 : GCCBuiltin<"__builtin_ia32_andnpd128_mask">,
+ Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty,
+ llvm_v2f64_ty, llvm_i8_ty], [IntrNoMem]>;
+ def int_x86_avx512_mask_andn_pd_256 : GCCBuiltin<"__builtin_ia32_andnpd256_mask">,
+ Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v4f64_ty,
+ llvm_v4f64_ty, llvm_i8_ty], [IntrNoMem]>;
+ def int_x86_avx512_mask_andn_pd_512 : GCCBuiltin<"__builtin_ia32_andnpd512_mask">,
+ Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_v8f64_ty,
+ llvm_v8f64_ty, llvm_i8_ty], [IntrNoMem]>;
+ def int_x86_avx512_mask_andn_ps_128 : GCCBuiltin<"__builtin_ia32_andnps128_mask">,
+ Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty,
+ llvm_v4f32_ty, llvm_i8_ty], [IntrNoMem]>;
+ def int_x86_avx512_mask_andn_ps_256 : GCCBuiltin<"__builtin_ia32_andnps256_mask">,
+ Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_v8f32_ty,
+ llvm_v8f32_ty, llvm_i8_ty], [IntrNoMem]>;
+ def int_x86_avx512_mask_andn_ps_512 : GCCBuiltin<"__builtin_ia32_andnps512_mask">,
+ Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_v16f32_ty,
+ llvm_v16f32_ty, llvm_i16_ty], [IntrNoMem]>;
+ def int_x86_avx512_mask_or_pd_128 : GCCBuiltin<"__builtin_ia32_orpd128_mask">,
+ Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty,
+ llvm_v2f64_ty, llvm_i8_ty], [IntrNoMem]>;
+ def int_x86_avx512_mask_or_pd_256 : GCCBuiltin<"__builtin_ia32_orpd256_mask">,
+ Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v4f64_ty,
+ llvm_v4f64_ty, llvm_i8_ty], [IntrNoMem]>;
+ def int_x86_avx512_mask_or_pd_512 : GCCBuiltin<"__builtin_ia32_orpd512_mask">,
+ Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_v8f64_ty,
+ llvm_v8f64_ty, llvm_i8_ty], [IntrNoMem]>;
+ def int_x86_avx512_mask_or_ps_128 : GCCBuiltin<"__builtin_ia32_orps128_mask">,
+ Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty,
+ llvm_v4f32_ty, llvm_i8_ty], [IntrNoMem]>;
+ def int_x86_avx512_mask_or_ps_256 : GCCBuiltin<"__builtin_ia32_orps256_mask">,
+ Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_v8f32_ty,
+ llvm_v8f32_ty, llvm_i8_ty], [IntrNoMem]>;
+ def int_x86_avx512_mask_or_ps_512 : GCCBuiltin<"__builtin_ia32_orps512_mask">,
+ Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_v16f32_ty,
+ llvm_v16f32_ty, llvm_i16_ty], [IntrNoMem]>;
+
+ def int_x86_avx512_mask_xor_pd_128 : GCCBuiltin<"__builtin_ia32_xorpd128_mask">,
+ Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty,
+ llvm_v2f64_ty, llvm_i8_ty], [IntrNoMem]>;
+ def int_x86_avx512_mask_xor_pd_256 : GCCBuiltin<"__builtin_ia32_xorpd256_mask">,
+ Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v4f64_ty,
+ llvm_v4f64_ty, llvm_i8_ty], [IntrNoMem]>;
+ def int_x86_avx512_mask_xor_pd_512 : GCCBuiltin<"__builtin_ia32_xorpd512_mask">,
+ Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_v8f64_ty,
+ llvm_v8f64_ty, llvm_i8_ty], [IntrNoMem]>;
+ def int_x86_avx512_mask_xor_ps_128 : GCCBuiltin<"__builtin_ia32_xorps128_mask">,
+ Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty,
+ llvm_v4f32_ty, llvm_i8_ty], [IntrNoMem]>;
+ def int_x86_avx512_mask_xor_ps_256 : GCCBuiltin<"__builtin_ia32_xorps256_mask">,
+ Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_v8f32_ty,
+ llvm_v8f32_ty, llvm_i8_ty], [IntrNoMem]>;
+ def int_x86_avx512_mask_xor_ps_512 : GCCBuiltin<"__builtin_ia32_xorps512_mask">,
+ Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_v16f32_ty,
+ llvm_v16f32_ty, llvm_i16_ty], [IntrNoMem]>;
+}
// Integer arithmetic ops
let TargetPrefix = "x86" in {
+ def int_x86_avx512_mask_padd_b_128 : GCCBuiltin<"__builtin_ia32_paddb128_mask">,
+ Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty,
+ llvm_v16i8_ty, llvm_i16_ty], [IntrNoMem]>;
+ def int_x86_avx512_mask_padd_b_256 : GCCBuiltin<"__builtin_ia32_paddb256_mask">,
+ Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty,
+ llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>;
+ def int_x86_avx512_mask_padd_b_512 : GCCBuiltin<"__builtin_ia32_paddb512_mask">,
+ Intrinsic<[llvm_v64i8_ty], [llvm_v64i8_ty, llvm_v64i8_ty,
+ llvm_v64i8_ty, llvm_i64_ty], [IntrNoMem]>;
+ def int_x86_avx512_mask_padd_w_128 : GCCBuiltin<"__builtin_ia32_paddw128_mask">,
+ Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty,
+ llvm_v8i16_ty, llvm_i8_ty], [IntrNoMem]>;
+ def int_x86_avx512_mask_padd_w_256 : GCCBuiltin<"__builtin_ia32_paddw256_mask">,
+ Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty,
+ llvm_v16i16_ty, llvm_i16_ty], [IntrNoMem]>;
+ def int_x86_avx512_mask_padd_w_512 : GCCBuiltin<"__builtin_ia32_paddw512_mask">,
+ Intrinsic<[llvm_v32i16_ty], [llvm_v32i16_ty, llvm_v32i16_ty,
+ llvm_v32i16_ty, llvm_i32_ty], [IntrNoMem]>;
+ def int_x86_avx512_mask_padd_d_128 : GCCBuiltin<"__builtin_ia32_paddd128_mask">,
+ Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty,
+ llvm_v4i32_ty, llvm_i8_ty], [IntrNoMem]>;
+ def int_x86_avx512_mask_padd_d_256 : GCCBuiltin<"__builtin_ia32_paddd256_mask">,
+ Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty,
+ llvm_v8i32_ty, llvm_i8_ty], [IntrNoMem]>;
def int_x86_avx512_mask_padd_d_512 : GCCBuiltin<"__builtin_ia32_paddd512_mask">,
Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty, llvm_v16i32_ty,
llvm_v16i32_ty, llvm_i16_ty], [IntrNoMem]>;
+ def int_x86_avx512_mask_padd_q_128 : GCCBuiltin<"__builtin_ia32_paddq128_mask">,
+ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty,
+ llvm_v2i64_ty, llvm_i8_ty], [IntrNoMem]>;
+ def int_x86_avx512_mask_padd_q_256 : GCCBuiltin<"__builtin_ia32_paddq256_mask">,
+ Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty,
+ llvm_v4i64_ty, llvm_i8_ty], [IntrNoMem]>;
def int_x86_avx512_mask_padd_q_512 : GCCBuiltin<"__builtin_ia32_paddq512_mask">,
Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty, llvm_v8i64_ty,
llvm_v8i64_ty, llvm_i8_ty], [IntrNoMem]>;
+ def int_x86_avx512_mask_psub_b_128 : GCCBuiltin<"__builtin_ia32_psubb128_mask">,
+ Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty,
+ llvm_v16i8_ty, llvm_i16_ty], [IntrNoMem]>;
+ def int_x86_avx512_mask_psub_b_256 : GCCBuiltin<"__builtin_ia32_psubb256_mask">,
+ Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty,
+ llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>;
+ def int_x86_avx512_mask_psub_b_512 : GCCBuiltin<"__builtin_ia32_psubb512_mask">,
+ Intrinsic<[llvm_v64i8_ty], [llvm_v64i8_ty, llvm_v64i8_ty,
+ llvm_v64i8_ty, llvm_i64_ty], [IntrNoMem]>;
+ def int_x86_avx512_mask_psub_w_128 : GCCBuiltin<"__builtin_ia32_psubw128_mask">,
+ Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty,
+ llvm_v8i16_ty, llvm_i8_ty], [IntrNoMem]>;
+ def int_x86_avx512_mask_psub_w_256 : GCCBuiltin<"__builtin_ia32_psubw256_mask">,
+ Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty,
+ llvm_v16i16_ty, llvm_i16_ty], [IntrNoMem]>;
+ def int_x86_avx512_mask_psub_w_512 : GCCBuiltin<"__builtin_ia32_psubw512_mask">,
+ Intrinsic<[llvm_v32i16_ty], [llvm_v32i16_ty, llvm_v32i16_ty,
+ llvm_v32i16_ty, llvm_i32_ty], [IntrNoMem]>;
+ def int_x86_avx512_mask_psub_d_128 : GCCBuiltin<"__builtin_ia32_psubd128_mask">,
+ Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty,
+ llvm_v4i32_ty, llvm_i8_ty], [IntrNoMem]>;
+ def int_x86_avx512_mask_psub_d_256 : GCCBuiltin<"__builtin_ia32_psubd256_mask">,
+ Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty,
+ llvm_v8i32_ty, llvm_i8_ty], [IntrNoMem]>;
def int_x86_avx512_mask_psub_d_512 : GCCBuiltin<"__builtin_ia32_psubd512_mask">,
Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty, llvm_v16i32_ty,
llvm_v16i32_ty, llvm_i16_ty], [IntrNoMem]>;
+ def int_x86_avx512_mask_psub_q_128 : GCCBuiltin<"__builtin_ia32_psubq128_mask">,
+ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty,
+ llvm_v2i64_ty, llvm_i8_ty], [IntrNoMem]>;
+ def int_x86_avx512_mask_psub_q_256 : GCCBuiltin<"__builtin_ia32_psubq256_mask">,
+ Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty,
+ llvm_v4i64_ty, llvm_i8_ty], [IntrNoMem]>;
def int_x86_avx512_mask_psub_q_512 : GCCBuiltin<"__builtin_ia32_psubq512_mask">,
Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty, llvm_v8i64_ty,
llvm_v8i64_ty, llvm_i8_ty], [IntrNoMem]>;
+ def int_x86_avx512_mask_pmulu_dq_128 : GCCBuiltin<"__builtin_ia32_pmuludq128_mask">,
+ Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty, llvm_v4i32_ty,
+ llvm_v2i64_ty, llvm_i8_ty], [IntrNoMem]>;
+ def int_x86_avx512_mask_pmul_dq_128 : GCCBuiltin<"__builtin_ia32_pmuldq128_mask">,
+ Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty, llvm_v4i32_ty,
+ llvm_v2i64_ty, llvm_i8_ty], [IntrNoMem]>;
+ def int_x86_avx512_mask_pmulu_dq_256 : GCCBuiltin<"__builtin_ia32_pmuludq256_mask">,
+ Intrinsic<[llvm_v4i64_ty], [llvm_v8i32_ty, llvm_v8i32_ty,
+ llvm_v4i64_ty, llvm_i8_ty], [IntrNoMem]>;
+ def int_x86_avx512_mask_pmul_dq_256 : GCCBuiltin<"__builtin_ia32_pmuldq256_mask">,
+ Intrinsic<[llvm_v4i64_ty], [llvm_v8i32_ty, llvm_v8i32_ty,
+ llvm_v4i64_ty, llvm_i8_ty], [IntrNoMem]>;
def int_x86_avx512_mask_pmulu_dq_512 : GCCBuiltin<"__builtin_ia32_pmuludq512_mask">,
Intrinsic<[llvm_v8i64_ty], [llvm_v16i32_ty, llvm_v16i32_ty,
llvm_v8i64_ty, llvm_i8_ty], [IntrNoMem]>;
def int_x86_avx512_mask_pmul_dq_512 : GCCBuiltin<"__builtin_ia32_pmuldq512_mask">,
Intrinsic<[llvm_v8i64_ty], [llvm_v16i32_ty, llvm_v16i32_ty,
llvm_v8i64_ty, llvm_i8_ty], [IntrNoMem]>;
+ def int_x86_avx512_mask_pmull_w_128 : GCCBuiltin<"__builtin_ia32_pmullw128_mask">,
+ Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty,
+ llvm_v8i16_ty, llvm_i8_ty], [IntrNoMem]>;
+ def int_x86_avx512_mask_pmull_w_256 : GCCBuiltin<"__builtin_ia32_pmullw256_mask">,
+ Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty,
+ llvm_v16i16_ty, llvm_i16_ty], [IntrNoMem]>;
+ def int_x86_avx512_mask_pmull_w_512 : GCCBuiltin<"__builtin_ia32_pmullw512_mask">,
+ Intrinsic<[llvm_v32i16_ty], [llvm_v32i16_ty, llvm_v32i16_ty,
+ llvm_v32i16_ty, llvm_i32_ty], [IntrNoMem]>;
+ def int_x86_avx512_mask_pmull_d_128 : GCCBuiltin<"__builtin_ia32_pmulld128_mask">,
+ Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty,
+ llvm_v4i32_ty, llvm_i8_ty], [IntrNoMem]>;
+ def int_x86_avx512_mask_pmull_d_256 : GCCBuiltin<"__builtin_ia32_pmulld256_mask">,
+ Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty,
+ llvm_v8i32_ty, llvm_i8_ty], [IntrNoMem]>;
+ def int_x86_avx512_mask_pmull_d_512 : GCCBuiltin<"__builtin_ia32_pmulld512_mask">,
+ Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty, llvm_v16i32_ty,
+ llvm_v16i32_ty, llvm_i16_ty], [IntrNoMem]>;
+ def int_x86_avx512_mask_pmull_q_128 : GCCBuiltin<"__builtin_ia32_pmullq128_mask">,
+ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty,
+ llvm_v2i64_ty, llvm_i8_ty], [IntrNoMem]>;
+ def int_x86_avx512_mask_pmull_q_256 : GCCBuiltin<"__builtin_ia32_pmullq256_mask">,
+ Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty,
+ llvm_v4i64_ty, llvm_i8_ty], [IntrNoMem]>;
+ def int_x86_avx512_mask_pmull_q_512 : GCCBuiltin<"__builtin_ia32_pmullq512_mask">,
+ Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty, llvm_v8i64_ty,
+ llvm_v8i64_ty, llvm_i8_ty], [IntrNoMem]>;
}
+
// Gather and Scatter ops
let TargetPrefix = "x86" in {
def int_x86_avx512_gather_dpd_512 : GCCBuiltin<"__builtin_ia32_gathersiv8df">,
setOperationAction(ISD::CTLZ, MVT::v8i64, Legal);
setOperationAction(ISD::CTLZ, MVT::v16i32, Legal);
}
-
+ if (Subtarget->hasDQI()) {
+ setOperationAction(ISD::MUL, MVT::v2i64, Legal);
+ setOperationAction(ISD::MUL, MVT::v4i64, Legal);
+ setOperationAction(ISD::MUL, MVT::v8i64, Legal);
+ }
// Custom lower several nodes.
for (MVT VT : MVT::vector_valuetypes()) {
unsigned EltSize = VT.getVectorElementType().getSizeInBits();
OpcodeStr,
"${src2}"##_Dst.BroadcastStr##", $src1",
"$src1, ${src2}"##_Dst.BroadcastStr,
- (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1), (bc_v16i32
+ (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1), (bitconvert
(_Dst.VT (X86VBroadcast
(_Dst.ScalarLdFrag addr:$src2)))))),
"", itins.rm>,
defm VPMULLQ : avx512_binop_rm_vl_q<0x40, "vpmull", mul,
SSE_INTALU_ITINS_P, HasDQI, 1>, T8PD;
-defm VPMULDQZ : avx512_binop_rm2<0x28, "vpmuldq", SSE_INTALU_ITINS_P,
- X86pmuldq, v16i32_info, v8i64_info, 1>,
- T8PD, EVEX_V512, EVEX_CD8<64, CD8VF>, VEX_W;
+
+multiclass avx512_binop_all<bits<8> opc, string OpcodeStr, OpndItins itins,
+ SDNode OpNode, bit IsCommutable = 0> {
-defm VPMULUDQZ : avx512_binop_rm2<0xF4, "vpmuludq", SSE_INTMUL_ITINS_P,
- X86pmuludq, v16i32_info, v8i64_info, 1>,
- EVEX_V512, EVEX_CD8<64, CD8VF>, VEX_W;
+ defm NAME#Z : avx512_binop_rm2<opc, OpcodeStr, itins, OpNode,
+ v16i32_info, v8i64_info, IsCommutable>,
+ EVEX_V512, EVEX_CD8<64, CD8VF>, VEX_W;
+ let Predicates = [HasVLX] in {
+ defm NAME#Z256 : avx512_binop_rm2<opc, OpcodeStr, itins, OpNode,
+ v8i32x_info, v4i64x_info, IsCommutable>,
+ EVEX_V256, EVEX_CD8<64, CD8VF>, VEX_W;
+ defm NAME#Z128 : avx512_binop_rm2<opc, OpcodeStr, itins, OpNode,
+ v4i32x_info, v2i64x_info, IsCommutable>,
+ EVEX_V128, EVEX_CD8<64, CD8VF>, VEX_W;
+ }
+}
+
+defm VPMULDQ : avx512_binop_all<0x28, "vpmuldq", SSE_INTALU_ITINS_P,
+ X86pmuldq, 1>,T8PD;
+defm VPMULUDQ : avx512_binop_all<0xF4, "vpmuludq", SSE_INTMUL_ITINS_P,
+ X86pmuludq, 1>;
defm VPMAXSB : avx512_binop_rm_vl_b<0x3C, "vpmaxs", X86smax,
SSE_INTALU_ITINS_P, HasBWI, 1>, T8PD;
avx512_fp_binop_p_round<0x5E, "vdiv", X86fdivRnd>;
defm VMIN : avx512_fp_binop_p<0x5D, "vmin", X86fmin, 1>;
defm VMAX : avx512_fp_binop_p<0x5F, "vmax", X86fmax, 1>;
-
+let Predicates = [HasDQI] in {
+ defm VAND : avx512_fp_binop_p<0x54, "vand", X86fand, 1>;
+ defm VANDN : avx512_fp_binop_p<0x55, "vandn", X86fandn, 0>;
+ defm VOR : avx512_fp_binop_p<0x56, "vor", X86for, 1>;
+ defm VXOR : avx512_fp_binop_p<0x57, "vxor", X86fxor, 1>;
+}
def : Pat<(v16f32 (int_x86_avx512_mask_max_ps_512 (v16f32 VR512:$src1),
(v16f32 VR512:$src2), (bc_v16f32 (v16i32 immAllZerosV)),
(i16 -1), FROUND_CURRENT)),
* the alphabetical order.
*/
static const IntrinsicData IntrinsicsWithoutChain[] = {
- X86_INTRINSIC_DATA(avx2_packssdw, INTR_TYPE_2OP, X86ISD::PACKSS, 0),
- X86_INTRINSIC_DATA(avx2_packsswb, INTR_TYPE_2OP, X86ISD::PACKSS, 0),
- X86_INTRINSIC_DATA(avx2_packusdw, INTR_TYPE_2OP, X86ISD::PACKUS, 0),
- X86_INTRINSIC_DATA(avx2_packuswb, INTR_TYPE_2OP, X86ISD::PACKUS, 0),
- X86_INTRINSIC_DATA(avx2_phadd_d, INTR_TYPE_2OP, X86ISD::HADD, 0),
- X86_INTRINSIC_DATA(avx2_phadd_w, INTR_TYPE_2OP, X86ISD::HADD, 0),
- X86_INTRINSIC_DATA(avx2_phsub_d, INTR_TYPE_2OP, X86ISD::HSUB, 0),
- X86_INTRINSIC_DATA(avx2_phsub_w, INTR_TYPE_2OP, X86ISD::HSUB, 0),
- X86_INTRINSIC_DATA(avx2_pmaxs_b, INTR_TYPE_2OP, X86ISD::SMAX, 0),
- X86_INTRINSIC_DATA(avx2_pmaxs_d, INTR_TYPE_2OP, X86ISD::SMAX, 0),
- X86_INTRINSIC_DATA(avx2_pmaxs_w, INTR_TYPE_2OP, X86ISD::SMAX, 0),
- X86_INTRINSIC_DATA(avx2_pmaxu_b, INTR_TYPE_2OP, X86ISD::UMAX, 0),
- X86_INTRINSIC_DATA(avx2_pmaxu_d, INTR_TYPE_2OP, X86ISD::UMAX, 0),
- X86_INTRINSIC_DATA(avx2_pmaxu_w, INTR_TYPE_2OP, X86ISD::UMAX, 0),
- X86_INTRINSIC_DATA(avx2_pmins_b, INTR_TYPE_2OP, X86ISD::SMIN, 0),
- X86_INTRINSIC_DATA(avx2_pmins_d, INTR_TYPE_2OP, X86ISD::SMIN, 0),
- X86_INTRINSIC_DATA(avx2_pmins_w, INTR_TYPE_2OP, X86ISD::SMIN, 0),
- X86_INTRINSIC_DATA(avx2_pminu_b, INTR_TYPE_2OP, X86ISD::UMIN, 0),
- X86_INTRINSIC_DATA(avx2_pminu_d, INTR_TYPE_2OP, X86ISD::UMIN, 0),
- X86_INTRINSIC_DATA(avx2_pminu_w, INTR_TYPE_2OP, X86ISD::UMIN, 0),
- X86_INTRINSIC_DATA(avx2_pmovsxbd, INTR_TYPE_1OP, X86ISD::VSEXT, 0),
- X86_INTRINSIC_DATA(avx2_pmovsxbq, INTR_TYPE_1OP, X86ISD::VSEXT, 0),
- X86_INTRINSIC_DATA(avx2_pmovsxbw, INTR_TYPE_1OP, X86ISD::VSEXT, 0),
- X86_INTRINSIC_DATA(avx2_pmovsxdq, INTR_TYPE_1OP, X86ISD::VSEXT, 0),
- X86_INTRINSIC_DATA(avx2_pmovsxwd, INTR_TYPE_1OP, X86ISD::VSEXT, 0),
- X86_INTRINSIC_DATA(avx2_pmovsxwq, INTR_TYPE_1OP, X86ISD::VSEXT, 0),
- X86_INTRINSIC_DATA(avx2_pmovzxbd, INTR_TYPE_1OP, X86ISD::VZEXT, 0),
- X86_INTRINSIC_DATA(avx2_pmovzxbq, INTR_TYPE_1OP, X86ISD::VZEXT, 0),
- X86_INTRINSIC_DATA(avx2_pmovzxbw, INTR_TYPE_1OP, X86ISD::VZEXT, 0),
- X86_INTRINSIC_DATA(avx2_pmovzxdq, INTR_TYPE_1OP, X86ISD::VZEXT, 0),
- X86_INTRINSIC_DATA(avx2_pmovzxwd, INTR_TYPE_1OP, X86ISD::VZEXT, 0),
- X86_INTRINSIC_DATA(avx2_pmovzxwq, INTR_TYPE_1OP, X86ISD::VZEXT, 0),
- X86_INTRINSIC_DATA(avx2_pmul_dq, INTR_TYPE_2OP, X86ISD::PMULDQ, 0),
- X86_INTRINSIC_DATA(avx2_pmulh_w, INTR_TYPE_2OP, ISD::MULHS, 0),
- X86_INTRINSIC_DATA(avx2_pmulhu_w, INTR_TYPE_2OP, ISD::MULHU, 0),
- X86_INTRINSIC_DATA(avx2_pmulu_dq, INTR_TYPE_2OP, X86ISD::PMULUDQ, 0),
- X86_INTRINSIC_DATA(avx2_pshuf_b, INTR_TYPE_2OP, X86ISD::PSHUFB, 0),
- X86_INTRINSIC_DATA(avx2_psign_b, INTR_TYPE_2OP, X86ISD::PSIGN, 0),
- X86_INTRINSIC_DATA(avx2_psign_d, INTR_TYPE_2OP, X86ISD::PSIGN, 0),
- X86_INTRINSIC_DATA(avx2_psign_w, INTR_TYPE_2OP, X86ISD::PSIGN, 0),
- X86_INTRINSIC_DATA(avx2_psll_d, INTR_TYPE_2OP, X86ISD::VSHL, 0),
- X86_INTRINSIC_DATA(avx2_psll_q, INTR_TYPE_2OP, X86ISD::VSHL, 0),
- X86_INTRINSIC_DATA(avx2_psll_w, INTR_TYPE_2OP, X86ISD::VSHL, 0),
- X86_INTRINSIC_DATA(avx2_pslli_d, VSHIFT, X86ISD::VSHLI, 0),
- X86_INTRINSIC_DATA(avx2_pslli_q, VSHIFT, X86ISD::VSHLI, 0),
- X86_INTRINSIC_DATA(avx2_pslli_w, VSHIFT, X86ISD::VSHLI, 0),
- X86_INTRINSIC_DATA(avx2_psllv_d, INTR_TYPE_2OP, ISD::SHL, 0),
- X86_INTRINSIC_DATA(avx2_psllv_d_256, INTR_TYPE_2OP, ISD::SHL, 0),
- X86_INTRINSIC_DATA(avx2_psllv_q, INTR_TYPE_2OP, ISD::SHL, 0),
- X86_INTRINSIC_DATA(avx2_psllv_q_256, INTR_TYPE_2OP, ISD::SHL, 0),
- X86_INTRINSIC_DATA(avx2_psra_d, INTR_TYPE_2OP, X86ISD::VSRA, 0),
- X86_INTRINSIC_DATA(avx2_psra_w, INTR_TYPE_2OP, X86ISD::VSRA, 0),
- X86_INTRINSIC_DATA(avx2_psrai_d, VSHIFT, X86ISD::VSRAI, 0),
- X86_INTRINSIC_DATA(avx2_psrai_w, VSHIFT, X86ISD::VSRAI, 0),
- X86_INTRINSIC_DATA(avx2_psrav_d, INTR_TYPE_2OP, ISD::SRA, 0),
- X86_INTRINSIC_DATA(avx2_psrav_d_256, INTR_TYPE_2OP, ISD::SRA, 0),
- X86_INTRINSIC_DATA(avx2_psrl_d, INTR_TYPE_2OP, X86ISD::VSRL, 0),
- X86_INTRINSIC_DATA(avx2_psrl_q, INTR_TYPE_2OP, X86ISD::VSRL, 0),
- X86_INTRINSIC_DATA(avx2_psrl_w, INTR_TYPE_2OP, X86ISD::VSRL, 0),
- X86_INTRINSIC_DATA(avx2_psrli_d, VSHIFT, X86ISD::VSRLI, 0),
- X86_INTRINSIC_DATA(avx2_psrli_q, VSHIFT, X86ISD::VSRLI, 0),
- X86_INTRINSIC_DATA(avx2_psrli_w, VSHIFT, X86ISD::VSRLI, 0),
- X86_INTRINSIC_DATA(avx2_psrlv_d, INTR_TYPE_2OP, ISD::SRL, 0),
- X86_INTRINSIC_DATA(avx2_psrlv_d_256, INTR_TYPE_2OP, ISD::SRL, 0),
- X86_INTRINSIC_DATA(avx2_psrlv_q, INTR_TYPE_2OP, ISD::SRL, 0),
- X86_INTRINSIC_DATA(avx2_psrlv_q_256, INTR_TYPE_2OP, ISD::SRL, 0),
- X86_INTRINSIC_DATA(avx2_psubus_b, INTR_TYPE_2OP, X86ISD::SUBUS, 0),
- X86_INTRINSIC_DATA(avx2_psubus_w, INTR_TYPE_2OP, X86ISD::SUBUS, 0),
- X86_INTRINSIC_DATA(avx2_vperm2i128, INTR_TYPE_3OP, X86ISD::VPERM2X128, 0),
- X86_INTRINSIC_DATA(avx512_exp2_pd, INTR_TYPE_1OP_MASK_RM,X86ISD::EXP2, 0),
- X86_INTRINSIC_DATA(avx512_exp2_ps, INTR_TYPE_1OP_MASK_RM,X86ISD::EXP2, 0),
+ X86_INTRINSIC_DATA(avx2_packssdw, INTR_TYPE_2OP, X86ISD::PACKSS, 0),
+ X86_INTRINSIC_DATA(avx2_packsswb, INTR_TYPE_2OP, X86ISD::PACKSS, 0),
+ X86_INTRINSIC_DATA(avx2_packusdw, INTR_TYPE_2OP, X86ISD::PACKUS, 0),
+ X86_INTRINSIC_DATA(avx2_packuswb, INTR_TYPE_2OP, X86ISD::PACKUS, 0),
+ X86_INTRINSIC_DATA(avx2_phadd_d, INTR_TYPE_2OP, X86ISD::HADD, 0),
+ X86_INTRINSIC_DATA(avx2_phadd_w, INTR_TYPE_2OP, X86ISD::HADD, 0),
+ X86_INTRINSIC_DATA(avx2_phsub_d, INTR_TYPE_2OP, X86ISD::HSUB, 0),
+ X86_INTRINSIC_DATA(avx2_phsub_w, INTR_TYPE_2OP, X86ISD::HSUB, 0),
+ X86_INTRINSIC_DATA(avx2_pmaxs_b, INTR_TYPE_2OP, X86ISD::SMAX, 0),
+ X86_INTRINSIC_DATA(avx2_pmaxs_d, INTR_TYPE_2OP, X86ISD::SMAX, 0),
+ X86_INTRINSIC_DATA(avx2_pmaxs_w, INTR_TYPE_2OP, X86ISD::SMAX, 0),
+ X86_INTRINSIC_DATA(avx2_pmaxu_b, INTR_TYPE_2OP, X86ISD::UMAX, 0),
+ X86_INTRINSIC_DATA(avx2_pmaxu_d, INTR_TYPE_2OP, X86ISD::UMAX, 0),
+ X86_INTRINSIC_DATA(avx2_pmaxu_w, INTR_TYPE_2OP, X86ISD::UMAX, 0),
+ X86_INTRINSIC_DATA(avx2_pmins_b, INTR_TYPE_2OP, X86ISD::SMIN, 0),
+ X86_INTRINSIC_DATA(avx2_pmins_d, INTR_TYPE_2OP, X86ISD::SMIN, 0),
+ X86_INTRINSIC_DATA(avx2_pmins_w, INTR_TYPE_2OP, X86ISD::SMIN, 0),
+ X86_INTRINSIC_DATA(avx2_pminu_b, INTR_TYPE_2OP, X86ISD::UMIN, 0),
+ X86_INTRINSIC_DATA(avx2_pminu_d, INTR_TYPE_2OP, X86ISD::UMIN, 0),
+ X86_INTRINSIC_DATA(avx2_pminu_w, INTR_TYPE_2OP, X86ISD::UMIN, 0),
+ X86_INTRINSIC_DATA(avx2_pmovsxbd, INTR_TYPE_1OP, X86ISD::VSEXT, 0),
+ X86_INTRINSIC_DATA(avx2_pmovsxbq, INTR_TYPE_1OP, X86ISD::VSEXT, 0),
+ X86_INTRINSIC_DATA(avx2_pmovsxbw, INTR_TYPE_1OP, X86ISD::VSEXT, 0),
+ X86_INTRINSIC_DATA(avx2_pmovsxdq, INTR_TYPE_1OP, X86ISD::VSEXT, 0),
+ X86_INTRINSIC_DATA(avx2_pmovsxwd, INTR_TYPE_1OP, X86ISD::VSEXT, 0),
+ X86_INTRINSIC_DATA(avx2_pmovsxwq, INTR_TYPE_1OP, X86ISD::VSEXT, 0),
+ X86_INTRINSIC_DATA(avx2_pmovzxbd, INTR_TYPE_1OP, X86ISD::VZEXT, 0),
+ X86_INTRINSIC_DATA(avx2_pmovzxbq, INTR_TYPE_1OP, X86ISD::VZEXT, 0),
+ X86_INTRINSIC_DATA(avx2_pmovzxbw, INTR_TYPE_1OP, X86ISD::VZEXT, 0),
+ X86_INTRINSIC_DATA(avx2_pmovzxdq, INTR_TYPE_1OP, X86ISD::VZEXT, 0),
+ X86_INTRINSIC_DATA(avx2_pmovzxwd, INTR_TYPE_1OP, X86ISD::VZEXT, 0),
+ X86_INTRINSIC_DATA(avx2_pmovzxwq, INTR_TYPE_1OP, X86ISD::VZEXT, 0),
+ X86_INTRINSIC_DATA(avx2_pmul_dq, INTR_TYPE_2OP, X86ISD::PMULDQ, 0),
+ X86_INTRINSIC_DATA(avx2_pmulh_w, INTR_TYPE_2OP, ISD::MULHS, 0),
+ X86_INTRINSIC_DATA(avx2_pmulhu_w, INTR_TYPE_2OP, ISD::MULHU, 0),
+ X86_INTRINSIC_DATA(avx2_pmulu_dq, INTR_TYPE_2OP, X86ISD::PMULUDQ, 0),
+ X86_INTRINSIC_DATA(avx2_pshuf_b, INTR_TYPE_2OP, X86ISD::PSHUFB, 0),
+ X86_INTRINSIC_DATA(avx2_psign_b, INTR_TYPE_2OP, X86ISD::PSIGN, 0),
+ X86_INTRINSIC_DATA(avx2_psign_d, INTR_TYPE_2OP, X86ISD::PSIGN, 0),
+ X86_INTRINSIC_DATA(avx2_psign_w, INTR_TYPE_2OP, X86ISD::PSIGN, 0),
+ X86_INTRINSIC_DATA(avx2_psll_d, INTR_TYPE_2OP, X86ISD::VSHL, 0),
+ X86_INTRINSIC_DATA(avx2_psll_q, INTR_TYPE_2OP, X86ISD::VSHL, 0),
+ X86_INTRINSIC_DATA(avx2_psll_w, INTR_TYPE_2OP, X86ISD::VSHL, 0),
+ X86_INTRINSIC_DATA(avx2_pslli_d, VSHIFT, X86ISD::VSHLI, 0),
+ X86_INTRINSIC_DATA(avx2_pslli_q, VSHIFT, X86ISD::VSHLI, 0),
+ X86_INTRINSIC_DATA(avx2_pslli_w, VSHIFT, X86ISD::VSHLI, 0),
+ X86_INTRINSIC_DATA(avx2_psllv_d, INTR_TYPE_2OP, ISD::SHL, 0),
+ X86_INTRINSIC_DATA(avx2_psllv_d_256, INTR_TYPE_2OP, ISD::SHL, 0),
+ X86_INTRINSIC_DATA(avx2_psllv_q, INTR_TYPE_2OP, ISD::SHL, 0),
+ X86_INTRINSIC_DATA(avx2_psllv_q_256, INTR_TYPE_2OP, ISD::SHL, 0),
+ X86_INTRINSIC_DATA(avx2_psra_d, INTR_TYPE_2OP, X86ISD::VSRA, 0),
+ X86_INTRINSIC_DATA(avx2_psra_w, INTR_TYPE_2OP, X86ISD::VSRA, 0),
+ X86_INTRINSIC_DATA(avx2_psrai_d, VSHIFT, X86ISD::VSRAI, 0),
+ X86_INTRINSIC_DATA(avx2_psrai_w, VSHIFT, X86ISD::VSRAI, 0),
+ X86_INTRINSIC_DATA(avx2_psrav_d, INTR_TYPE_2OP, ISD::SRA, 0),
+ X86_INTRINSIC_DATA(avx2_psrav_d_256, INTR_TYPE_2OP, ISD::SRA, 0),
+ X86_INTRINSIC_DATA(avx2_psrl_d, INTR_TYPE_2OP, X86ISD::VSRL, 0),
+ X86_INTRINSIC_DATA(avx2_psrl_q, INTR_TYPE_2OP, X86ISD::VSRL, 0),
+ X86_INTRINSIC_DATA(avx2_psrl_w, INTR_TYPE_2OP, X86ISD::VSRL, 0),
+ X86_INTRINSIC_DATA(avx2_psrli_d, VSHIFT, X86ISD::VSRLI, 0),
+ X86_INTRINSIC_DATA(avx2_psrli_q, VSHIFT, X86ISD::VSRLI, 0),
+ X86_INTRINSIC_DATA(avx2_psrli_w, VSHIFT, X86ISD::VSRLI, 0),
+ X86_INTRINSIC_DATA(avx2_psrlv_d, INTR_TYPE_2OP, ISD::SRL, 0),
+ X86_INTRINSIC_DATA(avx2_psrlv_d_256, INTR_TYPE_2OP, ISD::SRL, 0),
+ X86_INTRINSIC_DATA(avx2_psrlv_q, INTR_TYPE_2OP, ISD::SRL, 0),
+ X86_INTRINSIC_DATA(avx2_psrlv_q_256, INTR_TYPE_2OP, ISD::SRL, 0),
+ X86_INTRINSIC_DATA(avx2_psubus_b, INTR_TYPE_2OP, X86ISD::SUBUS, 0),
+ X86_INTRINSIC_DATA(avx2_psubus_w, INTR_TYPE_2OP, X86ISD::SUBUS, 0),
+ X86_INTRINSIC_DATA(avx2_vperm2i128, INTR_TYPE_3OP, X86ISD::VPERM2X128, 0),
+ X86_INTRINSIC_DATA(avx512_exp2_pd, INTR_TYPE_1OP_MASK_RM, X86ISD::EXP2, 0),
+ X86_INTRINSIC_DATA(avx512_exp2_ps, INTR_TYPE_1OP_MASK_RM, X86ISD::EXP2, 0),
X86_INTRINSIC_DATA(avx512_mask_add_pd_512, INTR_TYPE_2OP_MASK, ISD::FADD,
- X86ISD::FADD_RND),
+ X86ISD::FADD_RND),
X86_INTRINSIC_DATA(avx512_mask_add_ps_512, INTR_TYPE_2OP_MASK, ISD::FADD,
- X86ISD::FADD_RND),
+ X86ISD::FADD_RND),
+ X86_INTRINSIC_DATA(avx512_mask_and_pd_128, INTR_TYPE_2OP_MASK, X86ISD::FAND, 0),
+ X86_INTRINSIC_DATA(avx512_mask_and_pd_256, INTR_TYPE_2OP_MASK, X86ISD::FAND, 0),
+ X86_INTRINSIC_DATA(avx512_mask_and_pd_512, INTR_TYPE_2OP_MASK, X86ISD::FAND, 0),
+ X86_INTRINSIC_DATA(avx512_mask_and_ps_128, INTR_TYPE_2OP_MASK, X86ISD::FAND, 0),
+ X86_INTRINSIC_DATA(avx512_mask_and_ps_256, INTR_TYPE_2OP_MASK, X86ISD::FAND, 0),
+ X86_INTRINSIC_DATA(avx512_mask_and_ps_512, INTR_TYPE_2OP_MASK, X86ISD::FAND, 0),
+ X86_INTRINSIC_DATA(avx512_mask_andn_pd_128, INTR_TYPE_2OP_MASK, X86ISD::FANDN, 0),
+ X86_INTRINSIC_DATA(avx512_mask_andn_pd_256, INTR_TYPE_2OP_MASK, X86ISD::FANDN, 0),
+ X86_INTRINSIC_DATA(avx512_mask_andn_pd_512, INTR_TYPE_2OP_MASK, X86ISD::FANDN, 0),
+ X86_INTRINSIC_DATA(avx512_mask_andn_ps_128, INTR_TYPE_2OP_MASK, X86ISD::FANDN, 0),
+ X86_INTRINSIC_DATA(avx512_mask_andn_ps_256, INTR_TYPE_2OP_MASK, X86ISD::FANDN, 0),
+ X86_INTRINSIC_DATA(avx512_mask_andn_ps_512, INTR_TYPE_2OP_MASK, X86ISD::FANDN, 0),
X86_INTRINSIC_DATA(avx512_mask_blend_b_128, BLEND, X86ISD::SELECT, 0),
X86_INTRINSIC_DATA(avx512_mask_blend_b_256, BLEND, X86ISD::SELECT, 0),
X86_INTRINSIC_DATA(avx512_mask_blend_b_512, BLEND, X86ISD::SELECT, 0),
X86ISD::FMUL_RND),
X86_INTRINSIC_DATA(avx512_mask_mul_ps_512, INTR_TYPE_2OP_MASK, ISD::FMUL,
X86ISD::FMUL_RND),
+ X86_INTRINSIC_DATA(avx512_mask_or_pd_128, INTR_TYPE_2OP_MASK, X86ISD::FOR, 0),
+ X86_INTRINSIC_DATA(avx512_mask_or_pd_256, INTR_TYPE_2OP_MASK, X86ISD::FOR, 0),
+ X86_INTRINSIC_DATA(avx512_mask_or_pd_512, INTR_TYPE_2OP_MASK, X86ISD::FOR, 0),
+ X86_INTRINSIC_DATA(avx512_mask_or_ps_128, INTR_TYPE_2OP_MASK, X86ISD::FOR, 0),
+ X86_INTRINSIC_DATA(avx512_mask_or_ps_256, INTR_TYPE_2OP_MASK, X86ISD::FOR, 0),
+ X86_INTRINSIC_DATA(avx512_mask_or_ps_512, INTR_TYPE_2OP_MASK, X86ISD::FOR, 0),
+ X86_INTRINSIC_DATA(avx512_mask_padd_b_128, INTR_TYPE_2OP_MASK, ISD::ADD, 0),
+ X86_INTRINSIC_DATA(avx512_mask_padd_b_256, INTR_TYPE_2OP_MASK, ISD::ADD, 0),
+ X86_INTRINSIC_DATA(avx512_mask_padd_b_512, INTR_TYPE_2OP_MASK, ISD::ADD, 0),
+ X86_INTRINSIC_DATA(avx512_mask_padd_d_128, INTR_TYPE_2OP_MASK, ISD::ADD, 0),
+ X86_INTRINSIC_DATA(avx512_mask_padd_d_256, INTR_TYPE_2OP_MASK, ISD::ADD, 0),
X86_INTRINSIC_DATA(avx512_mask_padd_d_512, INTR_TYPE_2OP_MASK, ISD::ADD, 0),
+ X86_INTRINSIC_DATA(avx512_mask_padd_q_128, INTR_TYPE_2OP_MASK, ISD::ADD, 0),
+ X86_INTRINSIC_DATA(avx512_mask_padd_q_256, INTR_TYPE_2OP_MASK, ISD::ADD, 0),
X86_INTRINSIC_DATA(avx512_mask_padd_q_512, INTR_TYPE_2OP_MASK, ISD::ADD, 0),
+ X86_INTRINSIC_DATA(avx512_mask_padd_w_128, INTR_TYPE_2OP_MASK, ISD::ADD, 0),
+ X86_INTRINSIC_DATA(avx512_mask_padd_w_256, INTR_TYPE_2OP_MASK, ISD::ADD, 0),
+ X86_INTRINSIC_DATA(avx512_mask_padd_w_512, INTR_TYPE_2OP_MASK, ISD::ADD, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pand_d_128, INTR_TYPE_2OP_MASK, ISD::AND, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pand_d_256, INTR_TYPE_2OP_MASK, ISD::AND, 0),
X86_INTRINSIC_DATA(avx512_mask_pand_d_512, INTR_TYPE_2OP_MASK, ISD::AND, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pand_q_128, INTR_TYPE_2OP_MASK, ISD::AND, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pand_q_256, INTR_TYPE_2OP_MASK, ISD::AND, 0),
X86_INTRINSIC_DATA(avx512_mask_pand_q_512, INTR_TYPE_2OP_MASK, ISD::AND, 0),
X86_INTRINSIC_DATA(avx512_mask_pcmpeq_b_128, CMP_MASK, X86ISD::PCMPEQM, 0),
X86_INTRINSIC_DATA(avx512_mask_pcmpeq_b_256, CMP_MASK, X86ISD::PCMPEQM, 0),
X86_INTRINSIC_DATA(avx512_mask_pcmpgt_w_128, CMP_MASK, X86ISD::PCMPGTM, 0),
X86_INTRINSIC_DATA(avx512_mask_pcmpgt_w_256, CMP_MASK, X86ISD::PCMPGTM, 0),
X86_INTRINSIC_DATA(avx512_mask_pcmpgt_w_512, CMP_MASK, X86ISD::PCMPGTM, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmul_dq_128, INTR_TYPE_2OP_MASK,
+ X86ISD::PMULDQ, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmul_dq_256, INTR_TYPE_2OP_MASK,
+ X86ISD::PMULDQ, 0),
X86_INTRINSIC_DATA(avx512_mask_pmul_dq_512, INTR_TYPE_2OP_MASK,
X86ISD::PMULDQ, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmull_d_128, INTR_TYPE_2OP_MASK, ISD::MUL, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmull_d_256, INTR_TYPE_2OP_MASK, ISD::MUL, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmull_d_512, INTR_TYPE_2OP_MASK, ISD::MUL, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmull_q_128, INTR_TYPE_2OP_MASK, ISD::MUL, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmull_q_256, INTR_TYPE_2OP_MASK, ISD::MUL, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmull_q_512, INTR_TYPE_2OP_MASK, ISD::MUL, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmull_w_128, INTR_TYPE_2OP_MASK, ISD::MUL, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmull_w_256, INTR_TYPE_2OP_MASK, ISD::MUL, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmull_w_512, INTR_TYPE_2OP_MASK, ISD::MUL, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmulu_dq_128, INTR_TYPE_2OP_MASK,
+ X86ISD::PMULUDQ, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmulu_dq_256, INTR_TYPE_2OP_MASK,
+ X86ISD::PMULUDQ, 0),
X86_INTRINSIC_DATA(avx512_mask_pmulu_dq_512, INTR_TYPE_2OP_MASK,
X86ISD::PMULUDQ, 0),
+ X86_INTRINSIC_DATA(avx512_mask_por_d_128, INTR_TYPE_2OP_MASK, ISD::OR, 0),
+ X86_INTRINSIC_DATA(avx512_mask_por_d_256, INTR_TYPE_2OP_MASK, ISD::OR, 0),
X86_INTRINSIC_DATA(avx512_mask_por_d_512, INTR_TYPE_2OP_MASK, ISD::OR, 0),
+ X86_INTRINSIC_DATA(avx512_mask_por_q_128, INTR_TYPE_2OP_MASK, ISD::OR, 0),
+ X86_INTRINSIC_DATA(avx512_mask_por_q_256, INTR_TYPE_2OP_MASK, ISD::OR, 0),
X86_INTRINSIC_DATA(avx512_mask_por_q_512, INTR_TYPE_2OP_MASK, ISD::OR, 0),
X86_INTRINSIC_DATA(avx512_mask_psll_d, INTR_TYPE_2OP_MASK, X86ISD::VSHL, 0),
X86_INTRINSIC_DATA(avx512_mask_psll_q, INTR_TYPE_2OP_MASK, X86ISD::VSHL, 0),
X86_INTRINSIC_DATA(avx512_mask_psrli_q, VSHIFT_MASK, X86ISD::VSRLI, 0),
X86_INTRINSIC_DATA(avx512_mask_psrlv_d, INTR_TYPE_2OP_MASK, ISD::SRL, 0),
X86_INTRINSIC_DATA(avx512_mask_psrlv_q, INTR_TYPE_2OP_MASK, ISD::SRL, 0),
+ X86_INTRINSIC_DATA(avx512_mask_psub_b_128, INTR_TYPE_2OP_MASK, ISD::SUB, 0),
+ X86_INTRINSIC_DATA(avx512_mask_psub_b_256, INTR_TYPE_2OP_MASK, ISD::SUB, 0),
+ X86_INTRINSIC_DATA(avx512_mask_psub_b_512, INTR_TYPE_2OP_MASK, ISD::SUB, 0),
+ X86_INTRINSIC_DATA(avx512_mask_psub_d_128, INTR_TYPE_2OP_MASK, ISD::SUB, 0),
+ X86_INTRINSIC_DATA(avx512_mask_psub_d_256, INTR_TYPE_2OP_MASK, ISD::SUB, 0),
X86_INTRINSIC_DATA(avx512_mask_psub_d_512, INTR_TYPE_2OP_MASK, ISD::SUB, 0),
+ X86_INTRINSIC_DATA(avx512_mask_psub_q_128, INTR_TYPE_2OP_MASK, ISD::SUB, 0),
+ X86_INTRINSIC_DATA(avx512_mask_psub_q_256, INTR_TYPE_2OP_MASK, ISD::SUB, 0),
X86_INTRINSIC_DATA(avx512_mask_psub_q_512, INTR_TYPE_2OP_MASK, ISD::SUB, 0),
+ X86_INTRINSIC_DATA(avx512_mask_psub_w_128, INTR_TYPE_2OP_MASK, ISD::SUB, 0),
+ X86_INTRINSIC_DATA(avx512_mask_psub_w_256, INTR_TYPE_2OP_MASK, ISD::SUB, 0),
+ X86_INTRINSIC_DATA(avx512_mask_psub_w_512, INTR_TYPE_2OP_MASK, ISD::SUB, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pxor_d_128, INTR_TYPE_2OP_MASK, ISD::XOR, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pxor_d_256, INTR_TYPE_2OP_MASK, ISD::XOR, 0),
X86_INTRINSIC_DATA(avx512_mask_pxor_d_512, INTR_TYPE_2OP_MASK, ISD::XOR, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pxor_q_128, INTR_TYPE_2OP_MASK, ISD::XOR, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pxor_q_256, INTR_TYPE_2OP_MASK, ISD::XOR, 0),
X86_INTRINSIC_DATA(avx512_mask_pxor_q_512, INTR_TYPE_2OP_MASK, ISD::XOR, 0),
X86_INTRINSIC_DATA(avx512_mask_rndscale_sd, INTR_TYPE_SCALAR_MASK_RM,
X86ISD::RNDSCALE, 0),
X86_INTRINSIC_DATA(avx512_mask_ucmp_w_128, CMP_MASK_CC, X86ISD::CMPMU, 0),
X86_INTRINSIC_DATA(avx512_mask_ucmp_w_256, CMP_MASK_CC, X86ISD::CMPMU, 0),
X86_INTRINSIC_DATA(avx512_mask_ucmp_w_512, CMP_MASK_CC, X86ISD::CMPMU, 0),
+ X86_INTRINSIC_DATA(avx512_mask_xor_pd_128, INTR_TYPE_2OP_MASK, X86ISD::FXOR, 0),
+ X86_INTRINSIC_DATA(avx512_mask_xor_pd_256, INTR_TYPE_2OP_MASK, X86ISD::FXOR, 0),
+ X86_INTRINSIC_DATA(avx512_mask_xor_pd_512, INTR_TYPE_2OP_MASK, X86ISD::FXOR, 0),
+ X86_INTRINSIC_DATA(avx512_mask_xor_ps_128, INTR_TYPE_2OP_MASK, X86ISD::FXOR, 0),
+ X86_INTRINSIC_DATA(avx512_mask_xor_ps_256, INTR_TYPE_2OP_MASK, X86ISD::FXOR, 0),
+ X86_INTRINSIC_DATA(avx512_mask_xor_ps_512, INTR_TYPE_2OP_MASK, X86ISD::FXOR, 0),
X86_INTRINSIC_DATA(avx512_rcp28_pd, INTR_TYPE_1OP_MASK_RM,X86ISD::RCP28, 0),
X86_INTRINSIC_DATA(avx512_rcp28_ps, INTR_TYPE_1OP_MASK_RM,X86ISD::RCP28, 0),
X86_INTRINSIC_DATA(avx512_rcp28_sd, INTR_TYPE_SCALAR_MASK_RM, X86ISD::RCP28, 0),
}
declare <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32>, <16 x i32>, <8 x i64>, i8)
+
+define <16 x i32> @test_mask_mullo_epi32_rr_512(<16 x i32> %a, <16 x i32> %b) {
+ ;CHECK-LABEL: test_mask_mullo_epi32_rr_512
+ ;CHECK: vpmulld %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf2,0x7d,0x48,0x40,0xc1]
+ %res = call <16 x i32> @llvm.x86.avx512.mask.pmull.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 -1)
+ ret <16 x i32> %res
+}
+
+define <16 x i32> @test_mask_mullo_epi32_rrk_512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask) {
+ ;CHECK-LABEL: test_mask_mullo_epi32_rrk_512
+ ;CHECK: vpmulld %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x40,0xd1]
+ %res = call <16 x i32> @llvm.x86.avx512.mask.pmull.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask)
+ ret < 16 x i32> %res
+}
+
+define <16 x i32> @test_mask_mullo_epi32_rrkz_512(<16 x i32> %a, <16 x i32> %b, i16 %mask) {
+ ;CHECK-LABEL: test_mask_mullo_epi32_rrkz_512
+ ;CHECK: vpmulld %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x40,0xc1]
+ %res = call <16 x i32> @llvm.x86.avx512.mask.pmull.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 %mask)
+ ret < 16 x i32> %res
+}
+
+define <16 x i32> @test_mask_mullo_epi32_rm_512(<16 x i32> %a, <16 x i32>* %ptr_b) {
+ ;CHECK-LABEL: test_mask_mullo_epi32_rm_512
+ ;CHECK: vpmulld (%rdi), %zmm0, %zmm0 ## encoding: [0x62,0xf2,0x7d,0x48,0x40,0x07]
+ %b = load <16 x i32>, <16 x i32>* %ptr_b
+ %res = call <16 x i32> @llvm.x86.avx512.mask.pmull.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 -1)
+ ret < 16 x i32> %res
+}
+
+define <16 x i32> @test_mask_mullo_epi32_rmk_512(<16 x i32> %a, <16 x i32>* %ptr_b, <16 x i32> %passThru, i16 %mask) {
+ ;CHECK-LABEL: test_mask_mullo_epi32_rmk_512
+ ;CHECK: vpmulld (%rdi), %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x40,0x0f]
+ %b = load <16 x i32>, <16 x i32>* %ptr_b
+ %res = call <16 x i32> @llvm.x86.avx512.mask.pmull.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask)
+ ret < 16 x i32> %res
+}
+
+define <16 x i32> @test_mask_mullo_epi32_rmkz_512(<16 x i32> %a, <16 x i32>* %ptr_b, i16 %mask) {
+ ;CHECK-LABEL: test_mask_mullo_epi32_rmkz_512
+ ;CHECK: vpmulld (%rdi), %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x40,0x07]
+ %b = load <16 x i32>, <16 x i32>* %ptr_b
+ %res = call <16 x i32> @llvm.x86.avx512.mask.pmull.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 %mask)
+ ret < 16 x i32> %res
+}
+
+define <16 x i32> @test_mask_mullo_epi32_rmb_512(<16 x i32> %a, i32* %ptr_b) {
+ ;CHECK-LABEL: test_mask_mullo_epi32_rmb_512
+ ;CHECK: vpmulld (%rdi){1to16}, %zmm0, %zmm0 ## encoding: [0x62,0xf2,0x7d,0x58,0x40,0x07]
+ %q = load i32, i32* %ptr_b
+ %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
+ %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
+ %res = call <16 x i32> @llvm.x86.avx512.mask.pmull.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 -1)
+ ret < 16 x i32> %res
+}
+
+define <16 x i32> @test_mask_mullo_epi32_rmbk_512(<16 x i32> %a, i32* %ptr_b, <16 x i32> %passThru, i16 %mask) {
+ ;CHECK-LABEL: test_mask_mullo_epi32_rmbk_512
+ ;CHECK: vpmulld (%rdi){1to16}, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x59,0x40,0x0f]
+ %q = load i32, i32* %ptr_b
+ %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
+ %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
+ %res = call <16 x i32> @llvm.x86.avx512.mask.pmull.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask)
+ ret < 16 x i32> %res
+}
+
+define <16 x i32> @test_mask_mullo_epi32_rmbkz_512(<16 x i32> %a, i32* %ptr_b, i16 %mask) {
+ ;CHECK-LABEL: test_mask_mullo_epi32_rmbkz_512
+ ;CHECK: vpmulld (%rdi){1to16}, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xd9,0x40,0x07]
+ %q = load i32, i32* %ptr_b
+ %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
+ %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
+ %res = call <16 x i32> @llvm.x86.avx512.mask.pmull.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 %mask)
+ ret < 16 x i32> %res
+}
+
+declare <16 x i32> @llvm.x86.avx512.mask.pmull.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
%res = call <4 x double> @llvm.x86.fma.mask.vfmadd.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, i8 -1) nounwind
ret <4 x double> %res
}
+define <8 x i16> @test_mask_add_epi16_rr_128(<8 x i16> %a, <8 x i16> %b) {
+ ;CHECK-LABEL: test_mask_add_epi16_rr_128
+ ;CHECK: vpaddw %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xfd,0xc1]
+ %res = call <8 x i16> @llvm.x86.avx512.mask.padd.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 -1)
+ ret <8 x i16> %res
+}
+
+define <8 x i16> @test_mask_add_epi16_rrk_128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask) {
+ ;CHECK-LABEL: test_mask_add_epi16_rrk_128
+ ;CHECK: vpaddw %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xfd,0xd1]
+ %res = call <8 x i16> @llvm.x86.avx512.mask.padd.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask)
+ ret <8 x i16> %res
+}
+
+define <8 x i16> @test_mask_add_epi16_rrkz_128(<8 x i16> %a, <8 x i16> %b, i8 %mask) {
+ ;CHECK-LABEL: test_mask_add_epi16_rrkz_128
+ ;CHECK: vpaddw %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xfd,0xc1]
+ %res = call <8 x i16> @llvm.x86.avx512.mask.padd.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 %mask)
+ ret <8 x i16> %res
+}
+
+define <8 x i16> @test_mask_add_epi16_rm_128(<8 x i16> %a, <8 x i16>* %ptr_b) {
+ ;CHECK-LABEL: test_mask_add_epi16_rm_128
+ ;CHECK: vpaddw (%rdi), %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xfd,0x07]
+ %b = load <8 x i16>, <8 x i16>* %ptr_b
+ %res = call <8 x i16> @llvm.x86.avx512.mask.padd.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 -1)
+ ret <8 x i16> %res
+}
+
+define <8 x i16> @test_mask_add_epi16_rmk_128(<8 x i16> %a, <8 x i16>* %ptr_b, <8 x i16> %passThru, i8 %mask) {
+ ;CHECK-LABEL: test_mask_add_epi16_rmk_128
+ ;CHECK: vpaddw (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xfd,0x0f]
+ %b = load <8 x i16>, <8 x i16>* %ptr_b
+ %res = call <8 x i16> @llvm.x86.avx512.mask.padd.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask)
+ ret <8 x i16> %res
+}
+
+define <8 x i16> @test_mask_add_epi16_rmkz_128(<8 x i16> %a, <8 x i16>* %ptr_b, i8 %mask) {
+ ;CHECK-LABEL: test_mask_add_epi16_rmkz_128
+ ;CHECK: vpaddw (%rdi), %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xfd,0x07]
+ %b = load <8 x i16>, <8 x i16>* %ptr_b
+ %res = call <8 x i16> @llvm.x86.avx512.mask.padd.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 %mask)
+ ret <8 x i16> %res
+}
+
+declare <8 x i16> @llvm.x86.avx512.mask.padd.w.128(<8 x i16>, <8 x i16>, <8 x i16>, i8)
+
+define <16 x i16> @test_mask_add_epi16_rr_256(<16 x i16> %a, <16 x i16> %b) {
+ ;CHECK-LABEL: test_mask_add_epi16_rr_256
+ ;CHECK: vpaddw %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xfd,0xc1]
+ %res = call <16 x i16> @llvm.x86.avx512.mask.padd.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 -1)
+ ret <16 x i16> %res
+}
+
+define <16 x i16> @test_mask_add_epi16_rrk_256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask) {
+ ;CHECK-LABEL: test_mask_add_epi16_rrk_256
+ ;CHECK: vpaddw %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xfd,0xd1]
+ %res = call <16 x i16> @llvm.x86.avx512.mask.padd.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask)
+ ret <16 x i16> %res
+}
+
+define <16 x i16> @test_mask_add_epi16_rrkz_256(<16 x i16> %a, <16 x i16> %b, i16 %mask) {
+ ;CHECK-LABEL: test_mask_add_epi16_rrkz_256
+ ;CHECK: vpaddw %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xfd,0xc1]
+ %res = call <16 x i16> @llvm.x86.avx512.mask.padd.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 %mask)
+ ret <16 x i16> %res
+}
+
+define <16 x i16> @test_mask_add_epi16_rm_256(<16 x i16> %a, <16 x i16>* %ptr_b) {
+ ;CHECK-LABEL: test_mask_add_epi16_rm_256
+ ;CHECK: vpaddw (%rdi), %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xfd,0x07]
+ %b = load <16 x i16>, <16 x i16>* %ptr_b
+ %res = call <16 x i16> @llvm.x86.avx512.mask.padd.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 -1)
+ ret <16 x i16> %res
+}
+
+define <16 x i16> @test_mask_add_epi16_rmk_256(<16 x i16> %a, <16 x i16>* %ptr_b, <16 x i16> %passThru, i16 %mask) {
+ ;CHECK-LABEL: test_mask_add_epi16_rmk_256
+ ;CHECK: vpaddw (%rdi), %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xfd,0x0f]
+ %b = load <16 x i16>, <16 x i16>* %ptr_b
+ %res = call <16 x i16> @llvm.x86.avx512.mask.padd.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask)
+ ret <16 x i16> %res
+}
+
+define <16 x i16> @test_mask_add_epi16_rmkz_256(<16 x i16> %a, <16 x i16>* %ptr_b, i16 %mask) {
+ ;CHECK-LABEL: test_mask_add_epi16_rmkz_256
+ ;CHECK: vpaddw (%rdi), %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xfd,0x07]
+ %b = load <16 x i16>, <16 x i16>* %ptr_b
+ %res = call <16 x i16> @llvm.x86.avx512.mask.padd.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 %mask)
+ ret <16 x i16> %res
+}
+
+declare <16 x i16> @llvm.x86.avx512.mask.padd.w.256(<16 x i16>, <16 x i16>, <16 x i16>, i16)
+
+define <8 x i16> @test_mask_sub_epi16_rr_128(<8 x i16> %a, <8 x i16> %b) {
+ ;CHECK-LABEL: test_mask_sub_epi16_rr_128
+ ;CHECK: vpsubw %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xf9,0xc1]
+ %res = call <8 x i16> @llvm.x86.avx512.mask.psub.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 -1)
+ ret <8 x i16> %res
+}
+
+define <8 x i16> @test_mask_sub_epi16_rrk_128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask) {
+ ;CHECK-LABEL: test_mask_sub_epi16_rrk_128
+ ;CHECK: vpsubw %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xf9,0xd1]
+ %res = call <8 x i16> @llvm.x86.avx512.mask.psub.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask)
+ ret <8 x i16> %res
+}
+
+define <8 x i16> @test_mask_sub_epi16_rrkz_128(<8 x i16> %a, <8 x i16> %b, i8 %mask) {
+ ;CHECK-LABEL: test_mask_sub_epi16_rrkz_128
+ ;CHECK: vpsubw %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xf9,0xc1]
+ %res = call <8 x i16> @llvm.x86.avx512.mask.psub.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 %mask)
+ ret <8 x i16> %res
+}
+
+define <8 x i16> @test_mask_sub_epi16_rm_128(<8 x i16> %a, <8 x i16>* %ptr_b) {
+ ;CHECK-LABEL: test_mask_sub_epi16_rm_128
+ ;CHECK: vpsubw (%rdi), %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xf9,0x07]
+ %b = load <8 x i16>, <8 x i16>* %ptr_b
+ %res = call <8 x i16> @llvm.x86.avx512.mask.psub.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 -1)
+ ret <8 x i16> %res
+}
+
+define <8 x i16> @test_mask_sub_epi16_rmk_128(<8 x i16> %a, <8 x i16>* %ptr_b, <8 x i16> %passThru, i8 %mask) {
+ ;CHECK-LABEL: test_mask_sub_epi16_rmk_128
+ ;CHECK: vpsubw (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xf9,0x0f]
+ %b = load <8 x i16>, <8 x i16>* %ptr_b
+ %res = call <8 x i16> @llvm.x86.avx512.mask.psub.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask)
+ ret <8 x i16> %res
+}
+
+define <8 x i16> @test_mask_sub_epi16_rmkz_128(<8 x i16> %a, <8 x i16>* %ptr_b, i8 %mask) {
+ ;CHECK-LABEL: test_mask_sub_epi16_rmkz_128
+ ;CHECK: vpsubw (%rdi), %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xf9,0x07]
+ %b = load <8 x i16>, <8 x i16>* %ptr_b
+ %res = call <8 x i16> @llvm.x86.avx512.mask.psub.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 %mask)
+ ret <8 x i16> %res
+}
+
+declare <8 x i16> @llvm.x86.avx512.mask.psub.w.128(<8 x i16>, <8 x i16>, <8 x i16>, i8)
+
+define <16 x i16> @test_mask_sub_epi16_rr_256(<16 x i16> %a, <16 x i16> %b) {
+ ;CHECK-LABEL: test_mask_sub_epi16_rr_256
+ ;CHECK: vpsubw %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xf9,0xc1]
+ %res = call <16 x i16> @llvm.x86.avx512.mask.psub.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 -1)
+ ret <16 x i16> %res
+}
+
+define <16 x i16> @test_mask_sub_epi16_rrk_256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask) {
+ ;CHECK-LABEL: test_mask_sub_epi16_rrk_256
+ ;CHECK: vpsubw %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xf9,0xd1]
+ %res = call <16 x i16> @llvm.x86.avx512.mask.psub.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask)
+ ret <16 x i16> %res
+}
+
+define <16 x i16> @test_mask_sub_epi16_rrkz_256(<16 x i16> %a, <16 x i16> %b, i16 %mask) {
+ ;CHECK-LABEL: test_mask_sub_epi16_rrkz_256
+ ;CHECK: vpsubw %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xf9,0xc1]
+ %res = call <16 x i16> @llvm.x86.avx512.mask.psub.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 %mask)
+ ret <16 x i16> %res
+}
+
+define <16 x i16> @test_mask_sub_epi16_rm_256(<16 x i16> %a, <16 x i16>* %ptr_b) {
+ ;CHECK-LABEL: test_mask_sub_epi16_rm_256
+ ;CHECK: vpsubw (%rdi), %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xf9,0x07]
+ %b = load <16 x i16>, <16 x i16>* %ptr_b
+ %res = call <16 x i16> @llvm.x86.avx512.mask.psub.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 -1)
+ ret <16 x i16> %res
+}
+
+define <16 x i16> @test_mask_sub_epi16_rmk_256(<16 x i16> %a, <16 x i16>* %ptr_b, <16 x i16> %passThru, i16 %mask) {
+ ;CHECK-LABEL: test_mask_sub_epi16_rmk_256
+ ;CHECK: vpsubw (%rdi), %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xf9,0x0f]
+ %b = load <16 x i16>, <16 x i16>* %ptr_b
+ %res = call <16 x i16> @llvm.x86.avx512.mask.psub.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask)
+ ret <16 x i16> %res
+}
+
+define <16 x i16> @test_mask_sub_epi16_rmkz_256(<16 x i16> %a, <16 x i16>* %ptr_b, i16 %mask) {
+ ;CHECK-LABEL: test_mask_sub_epi16_rmkz_256
+ ;CHECK: vpsubw (%rdi), %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xf9,0x07]
+ %b = load <16 x i16>, <16 x i16>* %ptr_b
+ %res = call <16 x i16> @llvm.x86.avx512.mask.psub.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 %mask)
+ ret <16 x i16> %res
+}
+
+declare <16 x i16> @llvm.x86.avx512.mask.psub.w.256(<16 x i16>, <16 x i16>, <16 x i16>, i16)
+
+define <32 x i16> @test_mask_add_epi16_rr_512(<32 x i16> %a, <32 x i16> %b) {
+ ;CHECK-LABEL: test_mask_add_epi16_rr_512
+ ;CHECK: vpaddw %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xfd,0xc1]
+ %res = call <32 x i16> @llvm.x86.avx512.mask.padd.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1)
+ ret <32 x i16> %res
+}
+
+define <32 x i16> @test_mask_add_epi16_rrk_512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) {
+ ;CHECK-LABEL: test_mask_add_epi16_rrk_512
+ ;CHECK: vpaddw %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0xfd,0xd1]
+ %res = call <32 x i16> @llvm.x86.avx512.mask.padd.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask)
+ ret <32 x i16> %res
+}
+
+define <32 x i16> @test_mask_add_epi16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i32 %mask) {
+ ;CHECK-LABEL: test_mask_add_epi16_rrkz_512
+ ;CHECK: vpaddw %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0xfd,0xc1]
+ %res = call <32 x i16> @llvm.x86.avx512.mask.padd.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask)
+ ret <32 x i16> %res
+}
+
+define <32 x i16> @test_mask_add_epi16_rm_512(<32 x i16> %a, <32 x i16>* %ptr_b) {
+ ;CHECK-LABEL: test_mask_add_epi16_rm_512
+ ;CHECK: vpaddw (%rdi), %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xfd,0x07]
+ %b = load <32 x i16>, <32 x i16>* %ptr_b
+ %res = call <32 x i16> @llvm.x86.avx512.mask.padd.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1)
+ ret <32 x i16> %res
+}
+
+define <32 x i16> @test_mask_add_epi16_rmk_512(<32 x i16> %a, <32 x i16>* %ptr_b, <32 x i16> %passThru, i32 %mask) {
+ ;CHECK-LABEL: test_mask_add_epi16_rmk_512
+ ;CHECK: vpaddw (%rdi), %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0xfd,0x0f]
+ %b = load <32 x i16>, <32 x i16>* %ptr_b
+ %res = call <32 x i16> @llvm.x86.avx512.mask.padd.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask)
+ ret <32 x i16> %res
+}
+
+define <32 x i16> @test_mask_add_epi16_rmkz_512(<32 x i16> %a, <32 x i16>* %ptr_b, i32 %mask) {
+ ;CHECK-LABEL: test_mask_add_epi16_rmkz_512
+ ;CHECK: vpaddw (%rdi), %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0xfd,0x07]
+ %b = load <32 x i16>, <32 x i16>* %ptr_b
+ %res = call <32 x i16> @llvm.x86.avx512.mask.padd.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask)
+ ret <32 x i16> %res
+}
+
+declare <32 x i16> @llvm.x86.avx512.mask.padd.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
+
+define <32 x i16> @test_mask_sub_epi16_rr_512(<32 x i16> %a, <32 x i16> %b) {
+ ;CHECK-LABEL: test_mask_sub_epi16_rr_512
+ ;CHECK: vpsubw %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xf9,0xc1]
+ %res = call <32 x i16> @llvm.x86.avx512.mask.psub.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1)
+ ret <32 x i16> %res
+}
+
+define <32 x i16> @test_mask_sub_epi16_rrk_512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) {
+ ;CHECK-LABEL: test_mask_sub_epi16_rrk_512
+ ;CHECK: vpsubw %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0xf9,0xd1]
+ %res = call <32 x i16> @llvm.x86.avx512.mask.psub.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask)
+ ret <32 x i16> %res
+}
+
+define <32 x i16> @test_mask_sub_epi16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i32 %mask) {
+ ;CHECK-LABEL: test_mask_sub_epi16_rrkz_512
+ ;CHECK: vpsubw %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0xf9,0xc1]
+ %res = call <32 x i16> @llvm.x86.avx512.mask.psub.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask)
+ ret <32 x i16> %res
+}
+
+define <32 x i16> @test_mask_sub_epi16_rm_512(<32 x i16> %a, <32 x i16>* %ptr_b) {
+ ;CHECK-LABEL: test_mask_sub_epi16_rm_512
+ ;CHECK: vpsubw (%rdi), %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xf9,0x07]
+ %b = load <32 x i16>, <32 x i16>* %ptr_b
+ %res = call <32 x i16> @llvm.x86.avx512.mask.psub.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1)
+ ret <32 x i16> %res
+}
+
+define <32 x i16> @test_mask_sub_epi16_rmk_512(<32 x i16> %a, <32 x i16>* %ptr_b, <32 x i16> %passThru, i32 %mask) {
+ ;CHECK-LABEL: test_mask_sub_epi16_rmk_512
+ ;CHECK: vpsubw (%rdi), %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0xf9,0x0f]
+ %b = load <32 x i16>, <32 x i16>* %ptr_b
+ %res = call <32 x i16> @llvm.x86.avx512.mask.psub.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask)
+ ret <32 x i16> %res
+}
+
+define <32 x i16> @test_mask_sub_epi16_rmkz_512(<32 x i16> %a, <32 x i16>* %ptr_b, i32 %mask) {
+ ;CHECK-LABEL: test_mask_sub_epi16_rmkz_512
+ ;CHECK: vpsubw (%rdi), %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0xf9,0x07]
+ %b = load <32 x i16>, <32 x i16>* %ptr_b
+ %res = call <32 x i16> @llvm.x86.avx512.mask.psub.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask)
+ ret <32 x i16> %res
+}
+
+declare <32 x i16> @llvm.x86.avx512.mask.psub.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
+
+define <32 x i16> @test_mask_mullo_epi16_rr_512(<32 x i16> %a, <32 x i16> %b) {
+ ;CHECK-LABEL: test_mask_mullo_epi16_rr_512
+ ;CHECK: vpmullw %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xd5,0xc1]
+ %res = call <32 x i16> @llvm.x86.avx512.mask.pmull.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1)
+ ret <32 x i16> %res
+}
+
+define <32 x i16> @test_mask_mullo_epi16_rrk_512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) {
+ ;CHECK-LABEL: test_mask_mullo_epi16_rrk_512
+ ;CHECK: vpmullw %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0xd5,0xd1]
+ %res = call <32 x i16> @llvm.x86.avx512.mask.pmull.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask)
+ ret <32 x i16> %res
+}
+
+define <32 x i16> @test_mask_mullo_epi16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i32 %mask) {
+ ;CHECK-LABEL: test_mask_mullo_epi16_rrkz_512
+ ;CHECK: vpmullw %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0xd5,0xc1]
+ %res = call <32 x i16> @llvm.x86.avx512.mask.pmull.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask)
+ ret <32 x i16> %res
+}
+
+define <32 x i16> @test_mask_mullo_epi16_rm_512(<32 x i16> %a, <32 x i16>* %ptr_b) {
+ ;CHECK-LABEL: test_mask_mullo_epi16_rm_512
+ ;CHECK: vpmullw (%rdi), %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xd5,0x07]
+ %b = load <32 x i16>, <32 x i16>* %ptr_b
+ %res = call <32 x i16> @llvm.x86.avx512.mask.pmull.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1)
+ ret <32 x i16> %res
+}
+
+define <32 x i16> @test_mask_mullo_epi16_rmk_512(<32 x i16> %a, <32 x i16>* %ptr_b, <32 x i16> %passThru, i32 %mask) {
+ ;CHECK-LABEL: test_mask_mullo_epi16_rmk_512
+ ;CHECK: vpmullw (%rdi), %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0xd5,0x0f]
+ %b = load <32 x i16>, <32 x i16>* %ptr_b
+ %res = call <32 x i16> @llvm.x86.avx512.mask.pmull.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask)
+ ret <32 x i16> %res
+}
+
+define <32 x i16> @test_mask_mullo_epi16_rmkz_512(<32 x i16> %a, <32 x i16>* %ptr_b, i32 %mask) {
+ ;CHECK-LABEL: test_mask_mullo_epi16_rmkz_512
+ ;CHECK: vpmullw (%rdi), %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0xd5,0x07]
+ %b = load <32 x i16>, <32 x i16>* %ptr_b
+ %res = call <32 x i16> @llvm.x86.avx512.mask.pmull.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask)
+ ret <32 x i16> %res
+}
+
+declare <32 x i16> @llvm.x86.avx512.mask.pmull.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
+
+define <8 x i16> @test_mask_mullo_epi16_rr_128(<8 x i16> %a, <8 x i16> %b) {
+ ;CHECK-LABEL: test_mask_mullo_epi16_rr_128
+ ;CHECK: vpmullw %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xd5,0xc1]
+ %res = call <8 x i16> @llvm.x86.avx512.mask.pmull.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 -1)
+ ret <8 x i16> %res
+}
+
+define <8 x i16> @test_mask_mullo_epi16_rrk_128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask) {
+ ;CHECK-LABEL: test_mask_mullo_epi16_rrk_128
+ ;CHECK: vpmullw %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xd5,0xd1]
+ %res = call <8 x i16> @llvm.x86.avx512.mask.pmull.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask)
+ ret <8 x i16> %res
+}
+
+define <8 x i16> @test_mask_mullo_epi16_rrkz_128(<8 x i16> %a, <8 x i16> %b, i8 %mask) {
+ ;CHECK-LABEL: test_mask_mullo_epi16_rrkz_128
+ ;CHECK: vpmullw %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xd5,0xc1]
+ %res = call <8 x i16> @llvm.x86.avx512.mask.pmull.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 %mask)
+ ret <8 x i16> %res
+}
+
+define <8 x i16> @test_mask_mullo_epi16_rm_128(<8 x i16> %a, <8 x i16>* %ptr_b) {
+ ;CHECK-LABEL: test_mask_mullo_epi16_rm_128
+ ;CHECK: vpmullw (%rdi), %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xd5,0x07]
+ %b = load <8 x i16>, <8 x i16>* %ptr_b
+ %res = call <8 x i16> @llvm.x86.avx512.mask.pmull.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 -1)
+ ret <8 x i16> %res
+}
+
+define <8 x i16> @test_mask_mullo_epi16_rmk_128(<8 x i16> %a, <8 x i16>* %ptr_b, <8 x i16> %passThru, i8 %mask) {
+ ;CHECK-LABEL: test_mask_mullo_epi16_rmk_128
+ ;CHECK: vpmullw (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xd5,0x0f]
+ %b = load <8 x i16>, <8 x i16>* %ptr_b
+ %res = call <8 x i16> @llvm.x86.avx512.mask.pmull.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask)
+ ret <8 x i16> %res
+}
+
+define <8 x i16> @test_mask_mullo_epi16_rmkz_128(<8 x i16> %a, <8 x i16>* %ptr_b, i8 %mask) {
+ ;CHECK-LABEL: test_mask_mullo_epi16_rmkz_128
+ ;CHECK: vpmullw (%rdi), %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xd5,0x07]
+ %b = load <8 x i16>, <8 x i16>* %ptr_b
+ %res = call <8 x i16> @llvm.x86.avx512.mask.pmull.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 %mask)
+ ret <8 x i16> %res
+}
+
+declare <8 x i16> @llvm.x86.avx512.mask.pmull.w.128(<8 x i16>, <8 x i16>, <8 x i16>, i8)
+
+define <16 x i16> @test_mask_mullo_epi16_rr_256(<16 x i16> %a, <16 x i16> %b) {
+ ;CHECK-LABEL: test_mask_mullo_epi16_rr_256
+ ;CHECK: vpmullw %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xd5,0xc1]
+ %res = call <16 x i16> @llvm.x86.avx512.mask.pmull.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 -1)
+ ret <16 x i16> %res
+}
+
+define <16 x i16> @test_mask_mullo_epi16_rrk_256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask) {
+ ;CHECK-LABEL: test_mask_mullo_epi16_rrk_256
+ ;CHECK: vpmullw %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xd5,0xd1]
+ %res = call <16 x i16> @llvm.x86.avx512.mask.pmull.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask)
+ ret <16 x i16> %res
+}
+
+define <16 x i16> @test_mask_mullo_epi16_rrkz_256(<16 x i16> %a, <16 x i16> %b, i16 %mask) {
+ ;CHECK-LABEL: test_mask_mullo_epi16_rrkz_256
+ ;CHECK: vpmullw %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xd5,0xc1]
+ %res = call <16 x i16> @llvm.x86.avx512.mask.pmull.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 %mask)
+ ret <16 x i16> %res
+}
+
+define <16 x i16> @test_mask_mullo_epi16_rm_256(<16 x i16> %a, <16 x i16>* %ptr_b) {
+ ;CHECK-LABEL: test_mask_mullo_epi16_rm_256
+ ;CHECK: vpmullw (%rdi), %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xd5,0x07]
+ %b = load <16 x i16>, <16 x i16>* %ptr_b
+ %res = call <16 x i16> @llvm.x86.avx512.mask.pmull.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 -1)
+ ret <16 x i16> %res
+}
+
+define <16 x i16> @test_mask_mullo_epi16_rmk_256(<16 x i16> %a, <16 x i16>* %ptr_b, <16 x i16> %passThru, i16 %mask) {
+ ;CHECK-LABEL: test_mask_mullo_epi16_rmk_256
+ ;CHECK: vpmullw (%rdi), %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xd5,0x0f]
+ %b = load <16 x i16>, <16 x i16>* %ptr_b
+ %res = call <16 x i16> @llvm.x86.avx512.mask.pmull.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask)
+ ret <16 x i16> %res
+}
+
+define <16 x i16> @test_mask_mullo_epi16_rmkz_256(<16 x i16> %a, <16 x i16>* %ptr_b, i16 %mask) {
+ ;CHECK-LABEL: test_mask_mullo_epi16_rmkz_256
+ ;CHECK: vpmullw (%rdi), %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xd5,0x07]
+ %b = load <16 x i16>, <16 x i16>* %ptr_b
+ %res = call <16 x i16> @llvm.x86.avx512.mask.pmull.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 %mask)
+ ret <16 x i16> %res
+}
+
+declare <16 x i16> @llvm.x86.avx512.mask.pmull.w.256(<16 x i16>, <16 x i16>, <16 x i16>, i16)
+
ret <2 x i64> %res
}
declare <2 x i64> @llvm.x86.avx512.mask.blend.q.128(<2 x i64>, <2 x i64>, i8) nounwind readonly
+
+
+define < 2 x i64> @test_mask_mul_epi32_rr_128(< 4 x i32> %a, < 4 x i32> %b) {
+ ;CHECK-LABEL: test_mask_mul_epi32_rr_128
+ ;CHECK: vpmuldq %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf2,0xfd,0x08,0x28,0xc1]
+ %res = call < 2 x i64> @llvm.x86.avx512.mask.pmul.dq.128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> zeroinitializer, i8 -1)
+ ret < 2 x i64> %res
+}
+
+define < 2 x i64> @test_mask_mul_epi32_rrk_128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> %passThru, i8 %mask) {
+ ;CHECK-LABEL: test_mask_mul_epi32_rrk_128
+ ;CHECK: vpmuldq %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0x28,0xd1]
+ %res = call < 2 x i64> @llvm.x86.avx512.mask.pmul.dq.128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> %passThru, i8 %mask)
+ ret < 2 x i64> %res
+}
+
+define < 2 x i64> @test_mask_mul_epi32_rrkz_128(< 4 x i32> %a, < 4 x i32> %b, i8 %mask) {
+ ;CHECK-LABEL: test_mask_mul_epi32_rrkz_128
+ ;CHECK: vpmuldq %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0x89,0x28,0xc1]
+ %res = call < 2 x i64> @llvm.x86.avx512.mask.pmul.dq.128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> zeroinitializer, i8 %mask)
+ ret < 2 x i64> %res
+}
+
+define < 2 x i64> @test_mask_mul_epi32_rm_128(< 4 x i32> %a, < 4 x i32>* %ptr_b) {
+ ;CHECK-LABEL: test_mask_mul_epi32_rm_128
+ ;CHECK: vpmuldq (%rdi), %xmm0, %xmm0 ## encoding: [0x62,0xf2,0xfd,0x08,0x28,0x07]
+ %b = load < 4 x i32>, < 4 x i32>* %ptr_b
+ %res = call < 2 x i64> @llvm.x86.avx512.mask.pmul.dq.128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> zeroinitializer, i8 -1)
+ ret < 2 x i64> %res
+}
+
+define < 2 x i64> @test_mask_mul_epi32_rmk_128(< 4 x i32> %a, < 4 x i32>* %ptr_b, < 2 x i64> %passThru, i8 %mask) {
+ ;CHECK-LABEL: test_mask_mul_epi32_rmk_128
+ ;CHECK: vpmuldq (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0x28,0x0f]
+ %b = load < 4 x i32>, < 4 x i32>* %ptr_b
+ %res = call < 2 x i64> @llvm.x86.avx512.mask.pmul.dq.128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> %passThru, i8 %mask)
+ ret < 2 x i64> %res
+}
+
+define < 2 x i64> @test_mask_mul_epi32_rmkz_128(< 4 x i32> %a, < 4 x i32>* %ptr_b, i8 %mask) {
+ ;CHECK-LABEL: test_mask_mul_epi32_rmkz_128
+ ;CHECK: vpmuldq (%rdi), %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0x89,0x28,0x07]
+ %b = load < 4 x i32>, < 4 x i32>* %ptr_b
+ %res = call < 2 x i64> @llvm.x86.avx512.mask.pmul.dq.128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> zeroinitializer, i8 %mask)
+ ret < 2 x i64> %res
+}
+
+define < 2 x i64> @test_mask_mul_epi32_rmb_128(< 4 x i32> %a, i64* %ptr_b) {
+ ;CHECK-LABEL: test_mask_mul_epi32_rmb_128
+ ;CHECK: vpmuldq (%rdi){1to2}, %xmm0, %xmm0 ## encoding: [0x62,0xf2,0xfd,0x18,0x28,0x07]
+ %q = load i64, i64* %ptr_b
+ %vecinit.i = insertelement < 2 x i64> undef, i64 %q, i32 0
+ %b64 = shufflevector < 2 x i64> %vecinit.i, < 2 x i64> undef, <2 x i32> zeroinitializer
+ %b = bitcast < 2 x i64> %b64 to < 4 x i32>
+ %res = call < 2 x i64> @llvm.x86.avx512.mask.pmul.dq.128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> zeroinitializer, i8 -1)
+ ret < 2 x i64> %res
+}
+
+define < 2 x i64> @test_mask_mul_epi32_rmbk_128(< 4 x i32> %a, i64* %ptr_b, < 2 x i64> %passThru, i8 %mask) {
+ ;CHECK-LABEL: test_mask_mul_epi32_rmbk_128
+ ;CHECK: vpmuldq (%rdi){1to2}, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x19,0x28,0x0f]
+ %q = load i64, i64* %ptr_b
+ %vecinit.i = insertelement < 2 x i64> undef, i64 %q, i32 0
+ %b64 = shufflevector < 2 x i64> %vecinit.i, < 2 x i64> undef, <2 x i32> zeroinitializer
+ %b = bitcast < 2 x i64> %b64 to < 4 x i32>
+ %res = call < 2 x i64> @llvm.x86.avx512.mask.pmul.dq.128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> %passThru, i8 %mask)
+ ret < 2 x i64> %res
+}
+
+define < 2 x i64> @test_mask_mul_epi32_rmbkz_128(< 4 x i32> %a, i64* %ptr_b, i8 %mask) {
+ ;CHECK-LABEL: test_mask_mul_epi32_rmbkz_128
+ ;CHECK: vpmuldq (%rdi){1to2}, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0x99,0x28,0x07]
+ %q = load i64, i64* %ptr_b
+ %vecinit.i = insertelement < 2 x i64> undef, i64 %q, i32 0
+ %b64 = shufflevector < 2 x i64> %vecinit.i, < 2 x i64> undef, < 2 x i32> zeroinitializer
+ %b = bitcast < 2 x i64> %b64 to < 4 x i32>
+ %res = call < 2 x i64> @llvm.x86.avx512.mask.pmul.dq.128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> zeroinitializer, i8 %mask)
+ ret < 2 x i64> %res
+}
+
+declare < 2 x i64> @llvm.x86.avx512.mask.pmul.dq.128(< 4 x i32>, < 4 x i32>, < 2 x i64>, i8)
+
+define < 4 x i64> @test_mask_mul_epi32_rr_256(< 8 x i32> %a, < 8 x i32> %b) {
+ ;CHECK-LABEL: test_mask_mul_epi32_rr_256
+ ;CHECK: vpmuldq %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf2,0xfd,0x28,0x28,0xc1]
+ %res = call < 4 x i64> @llvm.x86.avx512.mask.pmul.dq.256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> zeroinitializer, i8 -1)
+ ret < 4 x i64> %res
+}
+
+define < 4 x i64> @test_mask_mul_epi32_rrk_256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> %passThru, i8 %mask) {
+ ;CHECK-LABEL: test_mask_mul_epi32_rrk_256
+ ;CHECK: vpmuldq %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x28,0xd1]
+ %res = call < 4 x i64> @llvm.x86.avx512.mask.pmul.dq.256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> %passThru, i8 %mask)
+ ret < 4 x i64> %res
+}
+
+define < 4 x i64> @test_mask_mul_epi32_rrkz_256(< 8 x i32> %a, < 8 x i32> %b, i8 %mask) {
+ ;CHECK-LABEL: test_mask_mul_epi32_rrkz_256
+ ;CHECK: vpmuldq %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xa9,0x28,0xc1]
+ %res = call < 4 x i64> @llvm.x86.avx512.mask.pmul.dq.256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> zeroinitializer, i8 %mask)
+ ret < 4 x i64> %res
+}
+
+define < 4 x i64> @test_mask_mul_epi32_rm_256(< 8 x i32> %a, < 8 x i32>* %ptr_b) {
+ ;CHECK-LABEL: test_mask_mul_epi32_rm_256
+ ;CHECK: vpmuldq (%rdi), %ymm0, %ymm0 ## encoding: [0x62,0xf2,0xfd,0x28,0x28,0x07]
+ %b = load < 8 x i32>, < 8 x i32>* %ptr_b
+ %res = call < 4 x i64> @llvm.x86.avx512.mask.pmul.dq.256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> zeroinitializer, i8 -1)
+ ret < 4 x i64> %res
+}
+
+define < 4 x i64> @test_mask_mul_epi32_rmk_256(< 8 x i32> %a, < 8 x i32>* %ptr_b, < 4 x i64> %passThru, i8 %mask) {
+ ;CHECK-LABEL: test_mask_mul_epi32_rmk_256
+ ;CHECK: vpmuldq (%rdi), %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x28,0x0f]
+ %b = load < 8 x i32>, < 8 x i32>* %ptr_b
+ %res = call < 4 x i64> @llvm.x86.avx512.mask.pmul.dq.256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> %passThru, i8 %mask)
+ ret < 4 x i64> %res
+}
+
+define < 4 x i64> @test_mask_mul_epi32_rmkz_256(< 8 x i32> %a, < 8 x i32>* %ptr_b, i8 %mask) {
+ ;CHECK-LABEL: test_mask_mul_epi32_rmkz_256
+ ;CHECK: vpmuldq (%rdi), %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xa9,0x28,0x07]
+ %b = load < 8 x i32>, < 8 x i32>* %ptr_b
+ %res = call < 4 x i64> @llvm.x86.avx512.mask.pmul.dq.256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> zeroinitializer, i8 %mask)
+ ret < 4 x i64> %res
+}
+
+define < 4 x i64> @test_mask_mul_epi32_rmb_256(< 8 x i32> %a, i64* %ptr_b) {
+ ;CHECK-LABEL: test_mask_mul_epi32_rmb_256
+ ;CHECK: vpmuldq (%rdi){1to4}, %ymm0, %ymm0 ## encoding: [0x62,0xf2,0xfd,0x38,0x28,0x07]
+ %q = load i64, i64* %ptr_b
+ %vecinit.i = insertelement < 4 x i64> undef, i64 %q, i32 0
+ %b64 = shufflevector < 4 x i64> %vecinit.i, < 4 x i64> undef, < 4 x i32> zeroinitializer
+ %b = bitcast < 4 x i64> %b64 to < 8 x i32>
+ %res = call < 4 x i64> @llvm.x86.avx512.mask.pmul.dq.256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> zeroinitializer, i8 -1)
+ ret < 4 x i64> %res
+}
+
+define < 4 x i64> @test_mask_mul_epi32_rmbk_256(< 8 x i32> %a, i64* %ptr_b, < 4 x i64> %passThru, i8 %mask) {
+ ;CHECK-LABEL: test_mask_mul_epi32_rmbk_256
+ ;CHECK: vpmuldq (%rdi){1to4}, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x39,0x28,0x0f]
+ %q = load i64, i64* %ptr_b
+ %vecinit.i = insertelement < 4 x i64> undef, i64 %q, i32 0
+ %b64 = shufflevector < 4 x i64> %vecinit.i, < 4 x i64> undef, < 4 x i32> zeroinitializer
+ %b = bitcast < 4 x i64> %b64 to < 8 x i32>
+ %res = call < 4 x i64> @llvm.x86.avx512.mask.pmul.dq.256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> %passThru, i8 %mask)
+ ret < 4 x i64> %res
+}
+
+define < 4 x i64> @test_mask_mul_epi32_rmbkz_256(< 8 x i32> %a, i64* %ptr_b, i8 %mask) {
+ ;CHECK-LABEL: test_mask_mul_epi32_rmbkz_256
+ ;CHECK: vpmuldq (%rdi){1to4}, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xb9,0x28,0x07]
+ %q = load i64, i64* %ptr_b
+ %vecinit.i = insertelement < 4 x i64> undef, i64 %q, i32 0
+ %b64 = shufflevector < 4 x i64> %vecinit.i, < 4 x i64> undef, < 4 x i32> zeroinitializer
+ %b = bitcast < 4 x i64> %b64 to < 8 x i32>
+ %res = call < 4 x i64> @llvm.x86.avx512.mask.pmul.dq.256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> zeroinitializer, i8 %mask)
+ ret < 4 x i64> %res
+}
+
+declare < 4 x i64> @llvm.x86.avx512.mask.pmul.dq.256(< 8 x i32>, < 8 x i32>, < 4 x i64>, i8)
+
+define < 2 x i64> @test_mask_mul_epu32_rr_128(< 4 x i32> %a, < 4 x i32> %b) {
+ ;CHECK-LABEL: test_mask_mul_epu32_rr_128
+ ;CHECK: vpmuludq %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xf4,0xc1]
+ %res = call < 2 x i64> @llvm.x86.avx512.mask.pmulu.dq.128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> zeroinitializer, i8 -1)
+ ret < 2 x i64> %res
+}
+
+define < 2 x i64> @test_mask_mul_epu32_rrk_128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> %passThru, i8 %mask) {
+ ;CHECK-LABEL: test_mask_mul_epu32_rrk_128
+ ;CHECK: vpmuludq %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x09,0xf4,0xd1]
+ %res = call < 2 x i64> @llvm.x86.avx512.mask.pmulu.dq.128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> %passThru, i8 %mask)
+ ret < 2 x i64> %res
+}
+
+define < 2 x i64> @test_mask_mul_epu32_rrkz_128(< 4 x i32> %a, < 4 x i32> %b, i8 %mask) {
+ ;CHECK-LABEL: test_mask_mul_epu32_rrkz_128
+ ;CHECK: vpmuludq %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0x89,0xf4,0xc1]
+ %res = call < 2 x i64> @llvm.x86.avx512.mask.pmulu.dq.128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> zeroinitializer, i8 %mask)
+ ret < 2 x i64> %res
+}
+
+define < 2 x i64> @test_mask_mul_epu32_rm_128(< 4 x i32> %a, < 4 x i32>* %ptr_b) {
+ ;CHECK-LABEL: test_mask_mul_epu32_rm_128
+ ;CHECK: vpmuludq (%rdi), %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xf4,0x07]
+ %b = load < 4 x i32>, < 4 x i32>* %ptr_b
+ %res = call < 2 x i64> @llvm.x86.avx512.mask.pmulu.dq.128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> zeroinitializer, i8 -1)
+ ret < 2 x i64> %res
+}
+
+define < 2 x i64> @test_mask_mul_epu32_rmk_128(< 4 x i32> %a, < 4 x i32>* %ptr_b, < 2 x i64> %passThru, i8 %mask) {
+ ;CHECK-LABEL: test_mask_mul_epu32_rmk_128
+ ;CHECK: vpmuludq (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x09,0xf4,0x0f]
+ %b = load < 4 x i32>, < 4 x i32>* %ptr_b
+ %res = call < 2 x i64> @llvm.x86.avx512.mask.pmulu.dq.128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> %passThru, i8 %mask)
+ ret < 2 x i64> %res
+}
+
+define < 2 x i64> @test_mask_mul_epu32_rmkz_128(< 4 x i32> %a, < 4 x i32>* %ptr_b, i8 %mask) {
+ ;CHECK-LABEL: test_mask_mul_epu32_rmkz_128
+ ;CHECK: vpmuludq (%rdi), %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0x89,0xf4,0x07]
+ %b = load < 4 x i32>, < 4 x i32>* %ptr_b
+ %res = call < 2 x i64> @llvm.x86.avx512.mask.pmulu.dq.128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> zeroinitializer, i8 %mask)
+ ret < 2 x i64> %res
+}
+
+define < 2 x i64> @test_mask_mul_epu32_rmb_128(< 4 x i32> %a, i64* %ptr_b) {
+ ;CHECK-LABEL: test_mask_mul_epu32_rmb_128
+ ;CHECK: vpmuludq (%rdi){1to2}, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x18,0xf4,0x07]
+ %q = load i64, i64* %ptr_b
+ %vecinit.i = insertelement < 2 x i64> undef, i64 %q, i32 0
+ %b64 = shufflevector < 2 x i64> %vecinit.i, < 2 x i64> undef, <2 x i32> zeroinitializer
+ %b = bitcast < 2 x i64> %b64 to < 4 x i32>
+ %res = call < 2 x i64> @llvm.x86.avx512.mask.pmulu.dq.128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> zeroinitializer, i8 -1)
+ ret < 2 x i64> %res
+}
+
+define < 2 x i64> @test_mask_mul_epu32_rmbk_128(< 4 x i32> %a, i64* %ptr_b, < 2 x i64> %passThru, i8 %mask) {
+ ;CHECK-LABEL: test_mask_mul_epu32_rmbk_128
+ ;CHECK: vpmuludq (%rdi){1to2}, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x19,0xf4,0x0f]
+ %q = load i64, i64* %ptr_b
+ %vecinit.i = insertelement < 2 x i64> undef, i64 %q, i32 0
+ %b64 = shufflevector < 2 x i64> %vecinit.i, < 2 x i64> undef, <2 x i32> zeroinitializer
+ %b = bitcast < 2 x i64> %b64 to < 4 x i32>
+ %res = call < 2 x i64> @llvm.x86.avx512.mask.pmulu.dq.128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> %passThru, i8 %mask)
+ ret < 2 x i64> %res
+}
+
+define < 2 x i64> @test_mask_mul_epu32_rmbkz_128(< 4 x i32> %a, i64* %ptr_b, i8 %mask) {
+ ;CHECK-LABEL: test_mask_mul_epu32_rmbkz_128
+ ;CHECK: vpmuludq (%rdi){1to2}, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0x99,0xf4,0x07]
+ %q = load i64, i64* %ptr_b
+ %vecinit.i = insertelement < 2 x i64> undef, i64 %q, i32 0
+ %b64 = shufflevector < 2 x i64> %vecinit.i, < 2 x i64> undef, < 2 x i32> zeroinitializer
+ %b = bitcast < 2 x i64> %b64 to < 4 x i32>
+ %res = call < 2 x i64> @llvm.x86.avx512.mask.pmulu.dq.128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> zeroinitializer, i8 %mask)
+ ret < 2 x i64> %res
+}
+
+declare < 2 x i64> @llvm.x86.avx512.mask.pmulu.dq.128(< 4 x i32>, < 4 x i32>, < 2 x i64>, i8)
+
+define < 4 x i64> @test_mask_mul_epu32_rr_256(< 8 x i32> %a, < 8 x i32> %b) {
+ ;CHECK-LABEL: test_mask_mul_epu32_rr_256
+ ;CHECK: vpmuludq %ymm1, %ymm0, %ymm0 ## encoding: [0xc5,0xfd,0xf4,0xc1]
+ %res = call < 4 x i64> @llvm.x86.avx512.mask.pmulu.dq.256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> zeroinitializer, i8 -1)
+ ret < 4 x i64> %res
+}
+
+define < 4 x i64> @test_mask_mul_epu32_rrk_256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> %passThru, i8 %mask) {
+ ;CHECK-LABEL: test_mask_mul_epu32_rrk_256
+ ;CHECK: vpmuludq %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x29,0xf4,0xd1]
+ %res = call < 4 x i64> @llvm.x86.avx512.mask.pmulu.dq.256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> %passThru, i8 %mask)
+ ret < 4 x i64> %res
+}
+
+define < 4 x i64> @test_mask_mul_epu32_rrkz_256(< 8 x i32> %a, < 8 x i32> %b, i8 %mask) {
+ ;CHECK-LABEL: test_mask_mul_epu32_rrkz_256
+ ;CHECK: vpmuludq %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xa9,0xf4,0xc1]
+ %res = call < 4 x i64> @llvm.x86.avx512.mask.pmulu.dq.256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> zeroinitializer, i8 %mask)
+ ret < 4 x i64> %res
+}
+
+define < 4 x i64> @test_mask_mul_epu32_rm_256(< 8 x i32> %a, < 8 x i32>* %ptr_b) {
+ ;CHECK-LABEL: test_mask_mul_epu32_rm_256
+ ;CHECK: vpmuludq (%rdi), %ymm0, %ymm0 ## encoding: [0xc5,0xfd,0xf4,0x07]
+ %b = load < 8 x i32>, < 8 x i32>* %ptr_b
+ %res = call < 4 x i64> @llvm.x86.avx512.mask.pmulu.dq.256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> zeroinitializer, i8 -1)
+ ret < 4 x i64> %res
+}
+
+define < 4 x i64> @test_mask_mul_epu32_rmk_256(< 8 x i32> %a, < 8 x i32>* %ptr_b, < 4 x i64> %passThru, i8 %mask) {
+ ;CHECK-LABEL: test_mask_mul_epu32_rmk_256
+ ;CHECK: vpmuludq (%rdi), %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x29,0xf4,0x0f]
+ %b = load < 8 x i32>, < 8 x i32>* %ptr_b
+ %res = call < 4 x i64> @llvm.x86.avx512.mask.pmulu.dq.256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> %passThru, i8 %mask)
+ ret < 4 x i64> %res
+}
+
+define < 4 x i64> @test_mask_mul_epu32_rmkz_256(< 8 x i32> %a, < 8 x i32>* %ptr_b, i8 %mask) {
+ ;CHECK-LABEL: test_mask_mul_epu32_rmkz_256
+ ;CHECK: vpmuludq (%rdi), %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xa9,0xf4,0x07]
+ %b = load < 8 x i32>, < 8 x i32>* %ptr_b
+ %res = call < 4 x i64> @llvm.x86.avx512.mask.pmulu.dq.256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> zeroinitializer, i8 %mask)
+ ret < 4 x i64> %res
+}
+
+define < 4 x i64> @test_mask_mul_epu32_rmb_256(< 8 x i32> %a, i64* %ptr_b) {
+ ;CHECK-LABEL: test_mask_mul_epu32_rmb_256
+ ;CHECK: vpmuludq (%rdi){1to4}, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x38,0xf4,0x07]
+ %q = load i64, i64* %ptr_b
+ %vecinit.i = insertelement < 4 x i64> undef, i64 %q, i32 0
+ %b64 = shufflevector < 4 x i64> %vecinit.i, < 4 x i64> undef, < 4 x i32> zeroinitializer
+ %b = bitcast < 4 x i64> %b64 to < 8 x i32>
+ %res = call < 4 x i64> @llvm.x86.avx512.mask.pmulu.dq.256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> zeroinitializer, i8 -1)
+ ret < 4 x i64> %res
+}
+
+define < 4 x i64> @test_mask_mul_epu32_rmbk_256(< 8 x i32> %a, i64* %ptr_b, < 4 x i64> %passThru, i8 %mask) {
+ ;CHECK-LABEL: test_mask_mul_epu32_rmbk_256
+ ;CHECK: vpmuludq (%rdi){1to4}, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x39,0xf4,0x0f]
+ %q = load i64, i64* %ptr_b
+ %vecinit.i = insertelement < 4 x i64> undef, i64 %q, i32 0
+ %b64 = shufflevector < 4 x i64> %vecinit.i, < 4 x i64> undef, < 4 x i32> zeroinitializer
+ %b = bitcast < 4 x i64> %b64 to < 8 x i32>
+ %res = call < 4 x i64> @llvm.x86.avx512.mask.pmulu.dq.256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> %passThru, i8 %mask)
+ ret < 4 x i64> %res
+}
+
+define < 4 x i64> @test_mask_mul_epu32_rmbkz_256(< 8 x i32> %a, i64* %ptr_b, i8 %mask) {
+ ;CHECK-LABEL: test_mask_mul_epu32_rmbkz_256
+ ;CHECK: vpmuludq (%rdi){1to4}, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xb9,0xf4,0x07]
+ %q = load i64, i64* %ptr_b
+ %vecinit.i = insertelement < 4 x i64> undef, i64 %q, i32 0
+ %b64 = shufflevector < 4 x i64> %vecinit.i, < 4 x i64> undef, < 4 x i32> zeroinitializer
+ %b = bitcast < 4 x i64> %b64 to < 8 x i32>
+ %res = call < 4 x i64> @llvm.x86.avx512.mask.pmulu.dq.256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> zeroinitializer, i8 %mask)
+ ret < 4 x i64> %res
+}
+
+declare < 4 x i64> @llvm.x86.avx512.mask.pmulu.dq.256(< 8 x i32>, < 8 x i32>, < 4 x i64>, i8)
+
+define <4 x i32> @test_mask_add_epi32_rr_128(<4 x i32> %a, <4 x i32> %b) {
+ ;CHECK-LABEL: test_mask_add_epi32_rr_128
+ ;CHECK: vpaddd %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xfe,0xc1]
+ %res = call <4 x i32> @llvm.x86.avx512.mask.padd.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1)
+ ret <4 x i32> %res
+}
+
+define <4 x i32> @test_mask_add_epi32_rrk_128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask) {
+ ;CHECK-LABEL: test_mask_add_epi32_rrk_128
+ ;CHECK: vpaddd %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xfe,0xd1]
+ %res = call <4 x i32> @llvm.x86.avx512.mask.padd.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask)
+ ret <4 x i32> %res
+}
+
+define <4 x i32> @test_mask_add_epi32_rrkz_128(<4 x i32> %a, <4 x i32> %b, i8 %mask) {
+ ;CHECK-LABEL: test_mask_add_epi32_rrkz_128
+ ;CHECK: vpaddd %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xfe,0xc1]
+ %res = call <4 x i32> @llvm.x86.avx512.mask.padd.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 %mask)
+ ret <4 x i32> %res
+}
+
+define <4 x i32> @test_mask_add_epi32_rm_128(<4 x i32> %a, <4 x i32>* %ptr_b) {
+ ;CHECK-LABEL: test_mask_add_epi32_rm_128
+ ;CHECK: vpaddd (%rdi), %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xfe,0x07]
+ %b = load <4 x i32>, <4 x i32>* %ptr_b
+ %res = call <4 x i32> @llvm.x86.avx512.mask.padd.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1)
+ ret <4 x i32> %res
+}
+
+define <4 x i32> @test_mask_add_epi32_rmk_128(<4 x i32> %a, <4 x i32>* %ptr_b, <4 x i32> %passThru, i8 %mask) {
+ ;CHECK-LABEL: test_mask_add_epi32_rmk_128
+ ;CHECK: vpaddd (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xfe,0x0f]
+ %b = load <4 x i32>, <4 x i32>* %ptr_b
+ %res = call <4 x i32> @llvm.x86.avx512.mask.padd.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask)
+ ret <4 x i32> %res
+}
+
+define <4 x i32> @test_mask_add_epi32_rmkz_128(<4 x i32> %a, <4 x i32>* %ptr_b, i8 %mask) {
+ ;CHECK-LABEL: test_mask_add_epi32_rmkz_128
+ ;CHECK: vpaddd (%rdi), %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xfe,0x07]
+ %b = load <4 x i32>, <4 x i32>* %ptr_b
+ %res = call <4 x i32> @llvm.x86.avx512.mask.padd.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 %mask)
+ ret <4 x i32> %res
+}
+
+define <4 x i32> @test_mask_add_epi32_rmb_128(<4 x i32> %a, i32* %ptr_b) {
+ ;CHECK-LABEL: test_mask_add_epi32_rmb_128
+ ;CHECK: vpaddd (%rdi){1to4}, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x18,0xfe,0x07]
+ %q = load i32, i32* %ptr_b
+ %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0
+ %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer
+ %res = call <4 x i32> @llvm.x86.avx512.mask.padd.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1)
+ ret <4 x i32> %res
+}
+
+define <4 x i32> @test_mask_add_epi32_rmbk_128(<4 x i32> %a, i32* %ptr_b, <4 x i32> %passThru, i8 %mask) {
+ ;CHECK-LABEL: test_mask_add_epi32_rmbk_128
+ ;CHECK: vpaddd (%rdi){1to4}, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x19,0xfe,0x0f]
+ %q = load i32, i32* %ptr_b
+ %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0
+ %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer
+ %res = call <4 x i32> @llvm.x86.avx512.mask.padd.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask)
+ ret <4 x i32> %res
+}
+
+define <4 x i32> @test_mask_add_epi32_rmbkz_128(<4 x i32> %a, i32* %ptr_b, i8 %mask) {
+ ;CHECK-LABEL: test_mask_add_epi32_rmbkz_128
+ ;CHECK: vpaddd (%rdi){1to4}, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x99,0xfe,0x07]
+ %q = load i32, i32* %ptr_b
+ %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0
+ %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer
+ %res = call <4 x i32> @llvm.x86.avx512.mask.padd.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 %mask)
+ ret <4 x i32> %res
+}
+
+declare <4 x i32> @llvm.x86.avx512.mask.padd.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8)
+
+define <4 x i32> @test_mask_sub_epi32_rr_128(<4 x i32> %a, <4 x i32> %b) {
+ ;CHECK-LABEL: test_mask_sub_epi32_rr_128
+ ;CHECK: vpsubd %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xfa,0xc1]
+ %res = call <4 x i32> @llvm.x86.avx512.mask.psub.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1)
+ ret <4 x i32> %res
+}
+
+define <4 x i32> @test_mask_sub_epi32_rrk_128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask) {
+ ;CHECK-LABEL: test_mask_sub_epi32_rrk_128
+ ;CHECK: vpsubd %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xfa,0xd1]
+ %res = call <4 x i32> @llvm.x86.avx512.mask.psub.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask)
+ ret <4 x i32> %res
+}
+
+define <4 x i32> @test_mask_sub_epi32_rrkz_128(<4 x i32> %a, <4 x i32> %b, i8 %mask) {
+ ;CHECK-LABEL: test_mask_sub_epi32_rrkz_128
+ ;CHECK: vpsubd %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xfa,0xc1]
+ %res = call <4 x i32> @llvm.x86.avx512.mask.psub.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 %mask)
+ ret <4 x i32> %res
+}
+
+define <4 x i32> @test_mask_sub_epi32_rm_128(<4 x i32> %a, <4 x i32>* %ptr_b) {
+ ;CHECK-LABEL: test_mask_sub_epi32_rm_128
+ ;CHECK: (%rdi), %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xfa,0x07]
+ %b = load <4 x i32>, <4 x i32>* %ptr_b
+ %res = call <4 x i32> @llvm.x86.avx512.mask.psub.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1)
+ ret <4 x i32> %res
+}
+
+define <4 x i32> @test_mask_sub_epi32_rmk_128(<4 x i32> %a, <4 x i32>* %ptr_b, <4 x i32> %passThru, i8 %mask) {
+ ;CHECK-LABEL: test_mask_sub_epi32_rmk_128
+ ;CHECK: vpsubd (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xfa,0x0f]
+ %b = load <4 x i32>, <4 x i32>* %ptr_b
+ %res = call <4 x i32> @llvm.x86.avx512.mask.psub.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask)
+ ret <4 x i32> %res
+}
+
+define <4 x i32> @test_mask_sub_epi32_rmkz_128(<4 x i32> %a, <4 x i32>* %ptr_b, i8 %mask) {
+ ;CHECK-LABEL: test_mask_sub_epi32_rmkz_128
+ ;CHECK: vpsubd (%rdi), %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xfa,0x07]
+ %b = load <4 x i32>, <4 x i32>* %ptr_b
+ %res = call <4 x i32> @llvm.x86.avx512.mask.psub.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 %mask)
+ ret <4 x i32> %res
+}
+
+define <4 x i32> @test_mask_sub_epi32_rmb_128(<4 x i32> %a, i32* %ptr_b) {
+ ;CHECK-LABEL: test_mask_sub_epi32_rmb_128
+ ;CHECK: vpsubd (%rdi){1to4}, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x18,0xfa,0x07]
+ %q = load i32, i32* %ptr_b
+ %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0
+ %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer
+ %res = call <4 x i32> @llvm.x86.avx512.mask.psub.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1)
+ ret <4 x i32> %res
+}
+
+define <4 x i32> @test_mask_sub_epi32_rmbk_128(<4 x i32> %a, i32* %ptr_b, <4 x i32> %passThru, i8 %mask) {
+ ;CHECK-LABEL: test_mask_sub_epi32_rmbk_128
+ ;CHECK: vpsubd (%rdi){1to4}, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x19,0xfa,0x0f]
+ %q = load i32, i32* %ptr_b
+ %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0
+ %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer
+ %res = call <4 x i32> @llvm.x86.avx512.mask.psub.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask)
+ ret <4 x i32> %res
+}
+
+define <4 x i32> @test_mask_sub_epi32_rmbkz_128(<4 x i32> %a, i32* %ptr_b, i8 %mask) {
+ ;CHECK-LABEL: test_mask_sub_epi32_rmbkz_128
+ ;CHECK: vpsubd (%rdi){1to4}, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x99,0xfa,0x07]
+ %q = load i32, i32* %ptr_b
+ %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0
+ %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer
+ %res = call <4 x i32> @llvm.x86.avx512.mask.psub.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 %mask)
+ ret <4 x i32> %res
+}
+
+declare <4 x i32> @llvm.x86.avx512.mask.psub.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8)
+
+define <8 x i32> @test_mask_sub_epi32_rr_256(<8 x i32> %a, <8 x i32> %b) {
+ ;CHECK-LABEL: test_mask_sub_epi32_rr_256
+ ;CHECK: vpsubd %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xfa,0xc1]
+ %res = call <8 x i32> @llvm.x86.avx512.mask.psub.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1)
+ ret <8 x i32> %res
+}
+
+define <8 x i32> @test_mask_sub_epi32_rrk_256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask) {
+ ;CHECK-LABEL: test_mask_sub_epi32_rrk_256
+ ;CHECK: vpsubd %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xfa,0xd1]
+ %res = call <8 x i32> @llvm.x86.avx512.mask.psub.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask)
+ ret <8 x i32> %res
+}
+
+define <8 x i32> @test_mask_sub_epi32_rrkz_256(<8 x i32> %a, <8 x i32> %b, i8 %mask) {
+ ;CHECK-LABEL: test_mask_sub_epi32_rrkz_256
+ ;CHECK: vpsubd %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xfa,0xc1]
+ %res = call <8 x i32> @llvm.x86.avx512.mask.psub.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 %mask)
+ ret <8 x i32> %res
+}
+
+define <8 x i32> @test_mask_sub_epi32_rm_256(<8 x i32> %a, <8 x i32>* %ptr_b) {
+ ;CHECK-LABEL: test_mask_sub_epi32_rm_256
+ ;CHECK: vpsubd (%rdi), %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xfa,0x07]
+ %b = load <8 x i32>, <8 x i32>* %ptr_b
+ %res = call <8 x i32> @llvm.x86.avx512.mask.psub.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1)
+ ret <8 x i32> %res
+}
+
+define <8 x i32> @test_mask_sub_epi32_rmk_256(<8 x i32> %a, <8 x i32>* %ptr_b, <8 x i32> %passThru, i8 %mask) {
+ ;CHECK-LABEL: test_mask_sub_epi32_rmk_256
+ ;CHECK: vpsubd (%rdi), %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xfa,0x0f]
+ %b = load <8 x i32>, <8 x i32>* %ptr_b
+ %res = call <8 x i32> @llvm.x86.avx512.mask.psub.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask)
+ ret <8 x i32> %res
+}
+
+define <8 x i32> @test_mask_sub_epi32_rmkz_256(<8 x i32> %a, <8 x i32>* %ptr_b, i8 %mask) {
+ ;CHECK-LABEL: test_mask_sub_epi32_rmkz_256
+ ;CHECK: vpsubd (%rdi), %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xfa,0x07]
+ %b = load <8 x i32>, <8 x i32>* %ptr_b
+ %res = call <8 x i32> @llvm.x86.avx512.mask.psub.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 %mask)
+ ret <8 x i32> %res
+}
+
+define <8 x i32> @test_mask_sub_epi32_rmb_256(<8 x i32> %a, i32* %ptr_b) {
+ ;CHECK-LABEL: test_mask_sub_epi32_rmb_256
+ ;CHECK: vpsubd (%rdi){1to8}, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x38,0xfa,0x07]
+ %q = load i32, i32* %ptr_b
+ %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0
+ %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer
+ %res = call <8 x i32> @llvm.x86.avx512.mask.psub.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1)
+ ret <8 x i32> %res
+}
+
+define <8 x i32> @test_mask_sub_epi32_rmbk_256(<8 x i32> %a, i32* %ptr_b, <8 x i32> %passThru, i8 %mask) {
+ ;CHECK-LABEL: test_mask_sub_epi32_rmbk_256
+ ;CHECK: vpsubd (%rdi){1to8}, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x39,0xfa,0x0f]
+ %q = load i32, i32* %ptr_b
+ %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0
+ %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer
+ %res = call <8 x i32> @llvm.x86.avx512.mask.psub.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask)
+ ret <8 x i32> %res
+}
+
+define <8 x i32> @test_mask_sub_epi32_rmbkz_256(<8 x i32> %a, i32* %ptr_b, i8 %mask) {
+ ;CHECK-LABEL: test_mask_sub_epi32_rmbkz_256
+ ;CHECK: vpsubd (%rdi){1to8}, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xb9,0xfa,0x07]
+ %q = load i32, i32* %ptr_b
+ %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0
+ %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer
+ %res = call <8 x i32> @llvm.x86.avx512.mask.psub.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 %mask)
+ ret <8 x i32> %res
+}
+
+declare <8 x i32> @llvm.x86.avx512.mask.psub.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i8)
+
+define <8 x i32> @test_mask_add_epi32_rr_256(<8 x i32> %a, <8 x i32> %b) {
+ ;CHECK-LABEL: test_mask_add_epi32_rr_256
+ ;CHECK: vpaddd %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xfe,0xc1]
+ %res = call <8 x i32> @llvm.x86.avx512.mask.padd.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1)
+ ret <8 x i32> %res
+}
+
+define <8 x i32> @test_mask_add_epi32_rrk_256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask) {
+ ;CHECK-LABEL: test_mask_add_epi32_rrk_256
+ ;CHECK: vpaddd %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xfe,0xd1]
+ %res = call <8 x i32> @llvm.x86.avx512.mask.padd.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask)
+ ret <8 x i32> %res
+}
+
+define <8 x i32> @test_mask_add_epi32_rrkz_256(<8 x i32> %a, <8 x i32> %b, i8 %mask) {
+ ;CHECK-LABEL: test_mask_add_epi32_rrkz_256
+ ;CHECK: vpaddd %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xfe,0xc1]
+ %res = call <8 x i32> @llvm.x86.avx512.mask.padd.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 %mask)
+ ret <8 x i32> %res
+}
+
+define <8 x i32> @test_mask_add_epi32_rm_256(<8 x i32> %a, <8 x i32>* %ptr_b) {
+ ;CHECK-LABEL: test_mask_add_epi32_rm_256
+ ;CHECK: vpaddd (%rdi), %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xfe,0x07]
+ %b = load <8 x i32>, <8 x i32>* %ptr_b
+ %res = call <8 x i32> @llvm.x86.avx512.mask.padd.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1)
+ ret <8 x i32> %res
+}
+
+define <8 x i32> @test_mask_add_epi32_rmk_256(<8 x i32> %a, <8 x i32>* %ptr_b, <8 x i32> %passThru, i8 %mask) {
+ ;CHECK-LABEL: test_mask_add_epi32_rmk_256
+ ;CHECK: vpaddd (%rdi), %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xfe,0x0f]
+ %b = load <8 x i32>, <8 x i32>* %ptr_b
+ %res = call <8 x i32> @llvm.x86.avx512.mask.padd.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask)
+ ret <8 x i32> %res
+}
+
+define <8 x i32> @test_mask_add_epi32_rmkz_256(<8 x i32> %a, <8 x i32>* %ptr_b, i8 %mask) {
+ ;CHECK-LABEL: test_mask_add_epi32_rmkz_256
+ ;CHECK: vpaddd (%rdi), %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xfe,0x07]
+ %b = load <8 x i32>, <8 x i32>* %ptr_b
+ %res = call <8 x i32> @llvm.x86.avx512.mask.padd.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 %mask)
+ ret <8 x i32> %res
+}
+
+define <8 x i32> @test_mask_add_epi32_rmb_256(<8 x i32> %a, i32* %ptr_b) {
+ ;CHECK-LABEL: test_mask_add_epi32_rmb_256
+ ;CHECK: vpaddd (%rdi){1to8}, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x38,0xfe,0x07]
+ %q = load i32, i32* %ptr_b
+ %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0
+ %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer
+ %res = call <8 x i32> @llvm.x86.avx512.mask.padd.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1)
+ ret <8 x i32> %res
+}
+
+define <8 x i32> @test_mask_add_epi32_rmbk_256(<8 x i32> %a, i32* %ptr_b, <8 x i32> %passThru, i8 %mask) {
+ ;CHECK-LABEL: test_mask_add_epi32_rmbk_256
+ ;CHECK: vpaddd (%rdi){1to8}, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x39,0xfe,0x0f]
+ %q = load i32, i32* %ptr_b
+ %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0
+ %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer
+ %res = call <8 x i32> @llvm.x86.avx512.mask.padd.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask)
+ ret <8 x i32> %res
+}
+
+define <8 x i32> @test_mask_add_epi32_rmbkz_256(<8 x i32> %a, i32* %ptr_b, i8 %mask) {
+ ;CHECK-LABEL: test_mask_add_epi32_rmbkz_256
+ ;CHECK: vpaddd (%rdi){1to8}, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xb9,0xfe,0x07]
+ %q = load i32, i32* %ptr_b
+ %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0
+ %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer
+ %res = call <8 x i32> @llvm.x86.avx512.mask.padd.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 %mask)
+ ret <8 x i32> %res
+}
+
+declare <8 x i32> @llvm.x86.avx512.mask.padd.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i8)
+
+define <4 x i32> @test_mask_and_epi32_rr_128(<4 x i32> %a, <4 x i32> %b) {
+ ;CHECK-LABEL: test_mask_and_epi32_rr_128
+ ;CHECK: vpandd %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xdb,0xc1]
+ %res = call <4 x i32> @llvm.x86.avx512.mask.pand.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1)
+ ret <4 x i32> %res
+}
+
+define <4 x i32> @test_mask_and_epi32_rrk_128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask) {
+ ;CHECK-LABEL: test_mask_and_epi32_rrk_128
+ ;CHECK: vpandd %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xdb,0xd1]
+ %res = call <4 x i32> @llvm.x86.avx512.mask.pand.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask)
+ ret <4 x i32> %res
+}
+
+define <4 x i32> @test_mask_and_epi32_rrkz_128(<4 x i32> %a, <4 x i32> %b, i8 %mask) {
+ ;CHECK-LABEL: test_mask_and_epi32_rrkz_128
+ ;CHECK: vpandd %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xdb,0xc1]
+ %res = call <4 x i32> @llvm.x86.avx512.mask.pand.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 %mask)
+ ret <4 x i32> %res
+}
+
+define <4 x i32> @test_mask_and_epi32_rm_128(<4 x i32> %a, <4 x i32>* %ptr_b) {
+ ;CHECK-LABEL: test_mask_and_epi32_rm_128
+ ;CHECK: vpandd (%rdi), %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xdb,0x07]
+ %b = load <4 x i32>, <4 x i32>* %ptr_b
+ %res = call <4 x i32> @llvm.x86.avx512.mask.pand.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1)
+ ret <4 x i32> %res
+}
+
+define <4 x i32> @test_mask_and_epi32_rmk_128(<4 x i32> %a, <4 x i32>* %ptr_b, <4 x i32> %passThru, i8 %mask) {
+ ;CHECK-LABEL: test_mask_and_epi32_rmk_128
+ ;CHECK: vpandd (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xdb,0x0f]
+ %b = load <4 x i32>, <4 x i32>* %ptr_b
+ %res = call <4 x i32> @llvm.x86.avx512.mask.pand.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask)
+ ret <4 x i32> %res
+}
+
+define <4 x i32> @test_mask_and_epi32_rmkz_128(<4 x i32> %a, <4 x i32>* %ptr_b, i8 %mask) {
+ ;CHECK-LABEL: test_mask_and_epi32_rmkz_128
+ ;CHECK: vpandd (%rdi), %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xdb,0x07]
+ %b = load <4 x i32>, <4 x i32>* %ptr_b
+ %res = call <4 x i32> @llvm.x86.avx512.mask.pand.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 %mask)
+ ret <4 x i32> %res
+}
+
+define <4 x i32> @test_mask_and_epi32_rmb_128(<4 x i32> %a, i32* %ptr_b) {
+ ;CHECK-LABEL: test_mask_and_epi32_rmb_128
+ ;CHECK: vpandd (%rdi){1to4}, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x18,0xdb,0x07]
+ %q = load i32, i32* %ptr_b
+ %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0
+ %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer
+ %res = call <4 x i32> @llvm.x86.avx512.mask.pand.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1)
+ ret <4 x i32> %res
+}
+
+define <4 x i32> @test_mask_and_epi32_rmbk_128(<4 x i32> %a, i32* %ptr_b, <4 x i32> %passThru, i8 %mask) {
+ ;CHECK-LABEL: test_mask_and_epi32_rmbk_128
+ ;CHECK: vpandd (%rdi){1to4}, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x19,0xdb,0x0f]
+ %q = load i32, i32* %ptr_b
+ %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0
+ %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer
+ %res = call <4 x i32> @llvm.x86.avx512.mask.pand.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask)
+ ret <4 x i32> %res
+}
+
+define <4 x i32> @test_mask_and_epi32_rmbkz_128(<4 x i32> %a, i32* %ptr_b, i8 %mask) {
+ ;CHECK-LABEL: test_mask_and_epi32_rmbkz_128
+ ;CHECK: vpandd (%rdi){1to4}, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x99,0xdb,0x07]
+ %q = load i32, i32* %ptr_b
+ %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0
+ %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer
+ %res = call <4 x i32> @llvm.x86.avx512.mask.pand.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 %mask)
+ ret <4 x i32> %res
+}
+
+declare <4 x i32> @llvm.x86.avx512.mask.pand.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8)
+
+define <8 x i32> @test_mask_and_epi32_rr_256(<8 x i32> %a, <8 x i32> %b) {
+ ;CHECK-LABEL: test_mask_and_epi32_rr_256
+ ;CHECK: vpandd %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xdb,0xc1]
+ %res = call <8 x i32> @llvm.x86.avx512.mask.pand.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1)
+ ret <8 x i32> %res
+}
+
+define <8 x i32> @test_mask_and_epi32_rrk_256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask) {
+ ;CHECK-LABEL: test_mask_and_epi32_rrk_256
+ ;CHECK: vpandd %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xdb,0xd1]
+ %res = call <8 x i32> @llvm.x86.avx512.mask.pand.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask)
+ ret <8 x i32> %res
+}
+
+define <8 x i32> @test_mask_and_epi32_rrkz_256(<8 x i32> %a, <8 x i32> %b, i8 %mask) {
+ ;CHECK-LABEL: test_mask_and_epi32_rrkz_256
+ ;CHECK: vpandd %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xdb,0xc1]
+ %res = call <8 x i32> @llvm.x86.avx512.mask.pand.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 %mask)
+ ret <8 x i32> %res
+}
+
+define <8 x i32> @test_mask_and_epi32_rm_256(<8 x i32> %a, <8 x i32>* %ptr_b) {
+ ;CHECK-LABEL: test_mask_and_epi32_rm_256
+ ;CHECK: vpandd (%rdi), %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xdb,0x07]
+ %b = load <8 x i32>, <8 x i32>* %ptr_b
+ %res = call <8 x i32> @llvm.x86.avx512.mask.pand.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1)
+ ret <8 x i32> %res
+}
+
+define <8 x i32> @test_mask_and_epi32_rmk_256(<8 x i32> %a, <8 x i32>* %ptr_b, <8 x i32> %passThru, i8 %mask) {
+ ;CHECK-LABEL: test_mask_and_epi32_rmk_256
+ ;CHECK: vpandd (%rdi), %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xdb,0x0f]
+ %b = load <8 x i32>, <8 x i32>* %ptr_b
+ %res = call <8 x i32> @llvm.x86.avx512.mask.pand.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask)
+ ret <8 x i32> %res
+}
+
+define <8 x i32> @test_mask_and_epi32_rmkz_256(<8 x i32> %a, <8 x i32>* %ptr_b, i8 %mask) {
+ ;CHECK-LABEL: test_mask_and_epi32_rmkz_256
+ ;CHECK: vpandd (%rdi), %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xdb,0x07]
+ %b = load <8 x i32>, <8 x i32>* %ptr_b
+ %res = call <8 x i32> @llvm.x86.avx512.mask.pand.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 %mask)
+ ret <8 x i32> %res
+}
+
+define <8 x i32> @test_mask_and_epi32_rmb_256(<8 x i32> %a, i32* %ptr_b) {
+ ;CHECK-LABEL: test_mask_and_epi32_rmb_256
+ ;CHECK: vpandd (%rdi){1to8}, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x38,0xdb,0x07]
+ %q = load i32, i32* %ptr_b
+ %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0
+ %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer
+ %res = call <8 x i32> @llvm.x86.avx512.mask.pand.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1)
+ ret <8 x i32> %res
+}
+
+define <8 x i32> @test_mask_and_epi32_rmbk_256(<8 x i32> %a, i32* %ptr_b, <8 x i32> %passThru, i8 %mask) {
+ ;CHECK-LABEL: test_mask_and_epi32_rmbk_256
+ ;CHECK: vpandd (%rdi){1to8}, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x39,0xdb,0x0f]
+ %q = load i32, i32* %ptr_b
+ %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0
+ %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer
+ %res = call <8 x i32> @llvm.x86.avx512.mask.pand.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask)
+ ret <8 x i32> %res
+}
+
+define <8 x i32> @test_mask_and_epi32_rmbkz_256(<8 x i32> %a, i32* %ptr_b, i8 %mask) {
+ ;CHECK-LABEL: test_mask_and_epi32_rmbkz_256
+ ;CHECK: vpandd (%rdi){1to8}, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xb9,0xdb,0x07]
+ %q = load i32, i32* %ptr_b
+ %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0
+ %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer
+ %res = call <8 x i32> @llvm.x86.avx512.mask.pand.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 %mask)
+ ret <8 x i32> %res
+}
+
+declare <8 x i32> @llvm.x86.avx512.mask.pand.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i8)
+
+define <4 x i32> @test_mask_or_epi32_rr_128(<4 x i32> %a, <4 x i32> %b) {
+ ;CHECK-LABEL: test_mask_or_epi32_rr_128
+ ;CHECK: vpord %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xeb,0xc1]
+ %res = call <4 x i32> @llvm.x86.avx512.mask.por.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1)
+ ret <4 x i32> %res
+}
+
+define <4 x i32> @test_mask_or_epi32_rrk_128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask) {
+ ;CHECK-LABEL: test_mask_or_epi32_rrk_128
+ ;CHECK: vpord %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xeb,0xd1]
+ %res = call <4 x i32> @llvm.x86.avx512.mask.por.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask)
+ ret <4 x i32> %res
+}
+
+define <4 x i32> @test_mask_or_epi32_rrkz_128(<4 x i32> %a, <4 x i32> %b, i8 %mask) {
+ ;CHECK-LABEL: test_mask_or_epi32_rrkz_128
+ ;CHECK: vpord %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xeb,0xc1]
+ %res = call <4 x i32> @llvm.x86.avx512.mask.por.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 %mask)
+ ret <4 x i32> %res
+}
+
+define <4 x i32> @test_mask_or_epi32_rm_128(<4 x i32> %a, <4 x i32>* %ptr_b) {
+ ;CHECK-LABEL: test_mask_or_epi32_rm_128
+ ;CHECK: vpord (%rdi), %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xeb,0x07]
+ %b = load <4 x i32>, <4 x i32>* %ptr_b
+ %res = call <4 x i32> @llvm.x86.avx512.mask.por.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1)
+ ret <4 x i32> %res
+}
+
+define <4 x i32> @test_mask_or_epi32_rmk_128(<4 x i32> %a, <4 x i32>* %ptr_b, <4 x i32> %passThru, i8 %mask) {
+ ;CHECK-LABEL: test_mask_or_epi32_rmk_128
+ ;CHECK: vpord (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xeb,0x0f]
+ %b = load <4 x i32>, <4 x i32>* %ptr_b
+ %res = call <4 x i32> @llvm.x86.avx512.mask.por.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask)
+ ret <4 x i32> %res
+}
+
+define <4 x i32> @test_mask_or_epi32_rmkz_128(<4 x i32> %a, <4 x i32>* %ptr_b, i8 %mask) {
+ ;CHECK-LABEL: test_mask_or_epi32_rmkz_128
+ ;CHECK: vpord (%rdi), %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xeb,0x07]
+ %b = load <4 x i32>, <4 x i32>* %ptr_b
+ %res = call <4 x i32> @llvm.x86.avx512.mask.por.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 %mask)
+ ret <4 x i32> %res
+}
+
+define <4 x i32> @test_mask_or_epi32_rmb_128(<4 x i32> %a, i32* %ptr_b) {
+ ;CHECK-LABEL: test_mask_or_epi32_rmb_128
+ ;CHECK: vpord (%rdi){1to4}, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x18,0xeb,0x07]
+ %q = load i32, i32* %ptr_b
+ %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0
+ %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer
+ %res = call <4 x i32> @llvm.x86.avx512.mask.por.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1)
+ ret <4 x i32> %res
+}
+
+define <4 x i32> @test_mask_or_epi32_rmbk_128(<4 x i32> %a, i32* %ptr_b, <4 x i32> %passThru, i8 %mask) {
+ ;CHECK-LABEL: test_mask_or_epi32_rmbk_128
+ ;CHECK: vpord (%rdi){1to4}, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x19,0xeb,0x0f]
+ %q = load i32, i32* %ptr_b
+ %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0
+ %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer
+ %res = call <4 x i32> @llvm.x86.avx512.mask.por.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask)
+ ret <4 x i32> %res
+}
+
+define <4 x i32> @test_mask_or_epi32_rmbkz_128(<4 x i32> %a, i32* %ptr_b, i8 %mask) {
+ ;CHECK-LABEL: test_mask_or_epi32_rmbkz_128
+ ;CHECK: vpord (%rdi){1to4}, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x99,0xeb,0x07]
+ %q = load i32, i32* %ptr_b
+ %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0
+ %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer
+ %res = call <4 x i32> @llvm.x86.avx512.mask.por.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 %mask)
+ ret <4 x i32> %res
+}
+
+declare <4 x i32> @llvm.x86.avx512.mask.por.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8)
+
+define <8 x i32> @test_mask_or_epi32_rr_256(<8 x i32> %a, <8 x i32> %b) {
+ ;CHECK-LABEL: test_mask_or_epi32_rr_256
+ ;CHECK: vpord %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xeb,0xc1]
+ %res = call <8 x i32> @llvm.x86.avx512.mask.por.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1)
+ ret <8 x i32> %res
+}
+
+define <8 x i32> @test_mask_or_epi32_rrk_256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask) {
+ ;CHECK-LABEL: test_mask_or_epi32_rrk_256
+ ;CHECK: vpord %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xeb,0xd1]
+ %res = call <8 x i32> @llvm.x86.avx512.mask.por.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask)
+ ret <8 x i32> %res
+}
+
+define <8 x i32> @test_mask_or_epi32_rrkz_256(<8 x i32> %a, <8 x i32> %b, i8 %mask) {
+ ;CHECK-LABEL: test_mask_or_epi32_rrkz_256
+ ;CHECK: vpord %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xeb,0xc1]
+ %res = call <8 x i32> @llvm.x86.avx512.mask.por.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 %mask)
+ ret <8 x i32> %res
+}
+
+define <8 x i32> @test_mask_or_epi32_rm_256(<8 x i32> %a, <8 x i32>* %ptr_b) {
+ ;CHECK-LABEL: test_mask_or_epi32_rm_256
+ ;CHECK: vpord (%rdi), %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xeb,0x07]
+ %b = load <8 x i32>, <8 x i32>* %ptr_b
+ %res = call <8 x i32> @llvm.x86.avx512.mask.por.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1)
+ ret <8 x i32> %res
+}
+
+define <8 x i32> @test_mask_or_epi32_rmk_256(<8 x i32> %a, <8 x i32>* %ptr_b, <8 x i32> %passThru, i8 %mask) {
+ ;CHECK-LABEL: test_mask_or_epi32_rmk_256
+ ;CHECK: vpord (%rdi), %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xeb,0x0f]
+ %b = load <8 x i32>, <8 x i32>* %ptr_b
+ %res = call <8 x i32> @llvm.x86.avx512.mask.por.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask)
+ ret <8 x i32> %res
+}
+
+define <8 x i32> @test_mask_or_epi32_rmkz_256(<8 x i32> %a, <8 x i32>* %ptr_b, i8 %mask) {
+ ;CHECK-LABEL: test_mask_or_epi32_rmkz_256
+ ;CHECK: vpord (%rdi), %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xeb,0x07]
+ %b = load <8 x i32>, <8 x i32>* %ptr_b
+ %res = call <8 x i32> @llvm.x86.avx512.mask.por.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 %mask)
+ ret <8 x i32> %res
+}
+
+define <8 x i32> @test_mask_or_epi32_rmb_256(<8 x i32> %a, i32* %ptr_b) {
+ ;CHECK-LABEL: test_mask_or_epi32_rmb_256
+ ;CHECK: vpord (%rdi){1to8}, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x38,0xeb,0x07]
+ %q = load i32, i32* %ptr_b
+ %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0
+ %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer
+ %res = call <8 x i32> @llvm.x86.avx512.mask.por.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1)
+ ret <8 x i32> %res
+}
+
+define <8 x i32> @test_mask_or_epi32_rmbk_256(<8 x i32> %a, i32* %ptr_b, <8 x i32> %passThru, i8 %mask) {
+ ;CHECK-LABEL: test_mask_or_epi32_rmbk_256
+ ;CHECK: vpord (%rdi){1to8}, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x39,0xeb,0x0f]
+ %q = load i32, i32* %ptr_b
+ %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0
+ %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer
+ %res = call <8 x i32> @llvm.x86.avx512.mask.por.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask)
+ ret <8 x i32> %res
+}
+
+define <8 x i32> @test_mask_or_epi32_rmbkz_256(<8 x i32> %a, i32* %ptr_b, i8 %mask) {
+ ;CHECK-LABEL: test_mask_or_epi32_rmbkz_256
+ ;CHECK: vpord (%rdi){1to8}, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xb9,0xeb,0x07]
+ %q = load i32, i32* %ptr_b
+ %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0
+ %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer
+ %res = call <8 x i32> @llvm.x86.avx512.mask.por.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 %mask)
+ ret <8 x i32> %res
+}
+
+declare <8 x i32> @llvm.x86.avx512.mask.por.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i8)
+
+define <4 x i32> @test_mask_xor_epi32_rr_128(<4 x i32> %a, <4 x i32> %b) {
+ ;CHECK-LABEL: test_mask_xor_epi32_rr_128
+ ;CHECK: vpxord %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xef,0xc1]
+ %res = call <4 x i32> @llvm.x86.avx512.mask.pxor.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1)
+ ret <4 x i32> %res
+}
+
+define <4 x i32> @test_mask_xor_epi32_rrk_128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask) {
+ ;CHECK-LABEL: test_mask_xor_epi32_rrk_128
+ ;CHECK: vpxord %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xef,0xd1]
+ %res = call <4 x i32> @llvm.x86.avx512.mask.pxor.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask)
+ ret <4 x i32> %res
+}
+
+define <4 x i32> @test_mask_xor_epi32_rrkz_128(<4 x i32> %a, <4 x i32> %b, i8 %mask) {
+ ;CHECK-LABEL: test_mask_xor_epi32_rrkz_128
+ ;CHECK: vpxord %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xef,0xc1]
+ %res = call <4 x i32> @llvm.x86.avx512.mask.pxor.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 %mask)
+ ret <4 x i32> %res
+}
+
+define <4 x i32> @test_mask_xor_epi32_rm_128(<4 x i32> %a, <4 x i32>* %ptr_b) {
+ ;CHECK-LABEL: test_mask_xor_epi32_rm_128
+ ;CHECK: vpxord (%rdi), %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xef,0x07]
+ %b = load <4 x i32>, <4 x i32>* %ptr_b
+ %res = call <4 x i32> @llvm.x86.avx512.mask.pxor.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1)
+ ret <4 x i32> %res
+}
+
+define <4 x i32> @test_mask_xor_epi32_rmk_128(<4 x i32> %a, <4 x i32>* %ptr_b, <4 x i32> %passThru, i8 %mask) {
+ ;CHECK-LABEL: test_mask_xor_epi32_rmk_128
+ ;CHECK: vpxord (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xef,0x0f]
+ %b = load <4 x i32>, <4 x i32>* %ptr_b
+ %res = call <4 x i32> @llvm.x86.avx512.mask.pxor.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask)
+ ret <4 x i32> %res
+}
+
+define <4 x i32> @test_mask_xor_epi32_rmkz_128(<4 x i32> %a, <4 x i32>* %ptr_b, i8 %mask) {
+ ;CHECK-LABEL: test_mask_xor_epi32_rmkz_128
+ ;CHECK: vpxord (%rdi), %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xef,0x07]
+ %b = load <4 x i32>, <4 x i32>* %ptr_b
+ %res = call <4 x i32> @llvm.x86.avx512.mask.pxor.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 %mask)
+ ret <4 x i32> %res
+}
+
+define <4 x i32> @test_mask_xor_epi32_rmb_128(<4 x i32> %a, i32* %ptr_b) {
+ ;CHECK-LABEL: test_mask_xor_epi32_rmb_128
+ ;CHECK: vpxord (%rdi){1to4}, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x18,0xef,0x07]
+ %q = load i32, i32* %ptr_b
+ %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0
+ %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer
+ %res = call <4 x i32> @llvm.x86.avx512.mask.pxor.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1)
+ ret <4 x i32> %res
+}
+
+define <4 x i32> @test_mask_xor_epi32_rmbk_128(<4 x i32> %a, i32* %ptr_b, <4 x i32> %passThru, i8 %mask) {
+ ;CHECK-LABEL: test_mask_xor_epi32_rmbk_128
+ ;CHECK: vpxord (%rdi){1to4}, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x19,0xef,0x0f]
+ %q = load i32, i32* %ptr_b
+ %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0
+ %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer
+ %res = call <4 x i32> @llvm.x86.avx512.mask.pxor.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask)
+ ret <4 x i32> %res
+}
+
+define <4 x i32> @test_mask_xor_epi32_rmbkz_128(<4 x i32> %a, i32* %ptr_b, i8 %mask) {
+ ;CHECK-LABEL: test_mask_xor_epi32_rmbkz_128
+ ;CHECK: vpxord (%rdi){1to4}, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x99,0xef,0x07]
+ %q = load i32, i32* %ptr_b
+ %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0
+ %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer
+ %res = call <4 x i32> @llvm.x86.avx512.mask.pxor.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 %mask)
+ ret <4 x i32> %res
+}
+
+declare <4 x i32> @llvm.x86.avx512.mask.pxor.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8)
+
+define <8 x i32> @test_mask_xor_epi32_rr_256(<8 x i32> %a, <8 x i32> %b) {
+ ;CHECK-LABEL: test_mask_xor_epi32_rr_256
+ ;CHECK: vpxord %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xef,0xc1]
+ %res = call <8 x i32> @llvm.x86.avx512.mask.pxor.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1)
+ ret <8 x i32> %res
+}
+
+define <8 x i32> @test_mask_xor_epi32_rrk_256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask) {
+ ;CHECK-LABEL: test_mask_xor_epi32_rrk_256
+ ;CHECK: vpxord %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xef,0xd1]
+ %res = call <8 x i32> @llvm.x86.avx512.mask.pxor.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask)
+ ret <8 x i32> %res
+}
+
+define <8 x i32> @test_mask_xor_epi32_rrkz_256(<8 x i32> %a, <8 x i32> %b, i8 %mask) {
+ ;CHECK-LABEL: test_mask_xor_epi32_rrkz_256
+ ;CHECK: vpxord %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xef,0xc1]
+ %res = call <8 x i32> @llvm.x86.avx512.mask.pxor.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 %mask)
+ ret <8 x i32> %res
+}
+
+define <8 x i32> @test_mask_xor_epi32_rm_256(<8 x i32> %a, <8 x i32>* %ptr_b) {
+ ;CHECK-LABEL: test_mask_xor_epi32_rm_256
+ ;CHECK: vpxord (%rdi), %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xef,0x07]
+ %b = load <8 x i32>, <8 x i32>* %ptr_b
+ %res = call <8 x i32> @llvm.x86.avx512.mask.pxor.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1)
+ ret <8 x i32> %res
+}
+
+define <8 x i32> @test_mask_xor_epi32_rmk_256(<8 x i32> %a, <8 x i32>* %ptr_b, <8 x i32> %passThru, i8 %mask) {
+ ;CHECK-LABEL: test_mask_xor_epi32_rmk_256
+ ;CHECK: vpxord (%rdi), %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xef,0x0f]
+ %b = load <8 x i32>, <8 x i32>* %ptr_b
+ %res = call <8 x i32> @llvm.x86.avx512.mask.pxor.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask)
+ ret <8 x i32> %res
+}
+
+define <8 x i32> @test_mask_xor_epi32_rmkz_256(<8 x i32> %a, <8 x i32>* %ptr_b, i8 %mask) {
+ ;CHECK-LABEL: test_mask_xor_epi32_rmkz_256
+ ;CHECK: vpxord (%rdi), %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xef,0x07]
+ %b = load <8 x i32>, <8 x i32>* %ptr_b
+ %res = call <8 x i32> @llvm.x86.avx512.mask.pxor.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 %mask)
+ ret <8 x i32> %res
+}
+
+define <8 x i32> @test_mask_xor_epi32_rmb_256(<8 x i32> %a, i32* %ptr_b) {
+ ;CHECK-LABEL: test_mask_xor_epi32_rmb_256
+ ;CHECK: vpxord (%rdi){1to8}, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x38,0xef,0x07]
+ %q = load i32, i32* %ptr_b
+ %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0
+ %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer
+ %res = call <8 x i32> @llvm.x86.avx512.mask.pxor.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1)
+ ret <8 x i32> %res
+}
+
+define <8 x i32> @test_mask_xor_epi32_rmbk_256(<8 x i32> %a, i32* %ptr_b, <8 x i32> %passThru, i8 %mask) {
+ ;CHECK-LABEL: test_mask_xor_epi32_rmbk_256
+ ;CHECK: vpxord (%rdi){1to8}, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x39,0xef,0x0f]
+ %q = load i32, i32* %ptr_b
+ %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0
+ %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer
+ %res = call <8 x i32> @llvm.x86.avx512.mask.pxor.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask)
+ ret <8 x i32> %res
+}
+
+define <8 x i32> @test_mask_xor_epi32_rmbkz_256(<8 x i32> %a, i32* %ptr_b, i8 %mask) {
+ ;CHECK-LABEL: test_mask_xor_epi32_rmbkz_256
+ ;CHECK: vpxord (%rdi){1to8}, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xb9,0xef,0x07]
+ %q = load i32, i32* %ptr_b
+ %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0
+ %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer
+ %res = call <8 x i32> @llvm.x86.avx512.mask.pxor.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 %mask)
+ ret <8 x i32> %res
+}
+
+declare <8 x i32> @llvm.x86.avx512.mask.pxor.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i8)
\ No newline at end of file
// CHECK: kmovb %k3, %r13d
// CHECK: encoding: [0xc5,0x79,0x93,0xeb]
kmovb %k3, %r13d
+
+// CHECK: vandpd %zmm27, %zmm28, %zmm19
+// CHECK: encoding: [0x62,0x81,0x9d,0x40,0x54,0xdb]
+ vandpd %zmm27, %zmm28, %zmm19
+
+// CHECK: vandpd %zmm27, %zmm28, %zmm19 {%k5}
+// CHECK: encoding: [0x62,0x81,0x9d,0x45,0x54,0xdb]
+ vandpd %zmm27, %zmm28, %zmm19 {%k5}
+
+// CHECK: vandpd %zmm27, %zmm28, %zmm19 {%k5} {z}
+// CHECK: encoding: [0x62,0x81,0x9d,0xc5,0x54,0xdb]
+ vandpd %zmm27, %zmm28, %zmm19 {%k5} {z}
+
+// CHECK: vandpd (%rcx), %zmm28, %zmm19
+// CHECK: encoding: [0x62,0xe1,0x9d,0x40,0x54,0x19]
+ vandpd (%rcx), %zmm28, %zmm19
+
+// CHECK: vandpd 291(%rax,%r14,8), %zmm28, %zmm19
+// CHECK: encoding: [0x62,0xa1,0x9d,0x40,0x54,0x9c,0xf0,0x23,0x01,0x00,0x00]
+ vandpd 291(%rax,%r14,8), %zmm28, %zmm19
+
+// CHECK: vandpd (%rcx){1to8}, %zmm28, %zmm19
+// CHECK: encoding: [0x62,0xe1,0x9d,0x50,0x54,0x19]
+ vandpd (%rcx){1to8}, %zmm28, %zmm19
+
+// CHECK: vandpd 8128(%rdx), %zmm28, %zmm19
+// CHECK: encoding: [0x62,0xe1,0x9d,0x40,0x54,0x5a,0x7f]
+ vandpd 8128(%rdx), %zmm28, %zmm19
+
+// CHECK: vandpd 8192(%rdx), %zmm28, %zmm19
+// CHECK: encoding: [0x62,0xe1,0x9d,0x40,0x54,0x9a,0x00,0x20,0x00,0x00]
+ vandpd 8192(%rdx), %zmm28, %zmm19
+
+// CHECK: vandpd -8192(%rdx), %zmm28, %zmm19
+// CHECK: encoding: [0x62,0xe1,0x9d,0x40,0x54,0x5a,0x80]
+ vandpd -8192(%rdx), %zmm28, %zmm19
+
+// CHECK: vandpd -8256(%rdx), %zmm28, %zmm19
+// CHECK: encoding: [0x62,0xe1,0x9d,0x40,0x54,0x9a,0xc0,0xdf,0xff,0xff]
+ vandpd -8256(%rdx), %zmm28, %zmm19
+
+// CHECK: vandpd 1016(%rdx){1to8}, %zmm28, %zmm19
+// CHECK: encoding: [0x62,0xe1,0x9d,0x50,0x54,0x5a,0x7f]
+ vandpd 1016(%rdx){1to8}, %zmm28, %zmm19
+
+// CHECK: vandpd 1024(%rdx){1to8}, %zmm28, %zmm19
+// CHECK: encoding: [0x62,0xe1,0x9d,0x50,0x54,0x9a,0x00,0x04,0x00,0x00]
+ vandpd 1024(%rdx){1to8}, %zmm28, %zmm19
+
+// CHECK: vandpd -1024(%rdx){1to8}, %zmm28, %zmm19
+// CHECK: encoding: [0x62,0xe1,0x9d,0x50,0x54,0x5a,0x80]
+ vandpd -1024(%rdx){1to8}, %zmm28, %zmm19
+
+// CHECK: vandpd -1032(%rdx){1to8}, %zmm28, %zmm19
+// CHECK: encoding: [0x62,0xe1,0x9d,0x50,0x54,0x9a,0xf8,0xfb,0xff,0xff]
+ vandpd -1032(%rdx){1to8}, %zmm28, %zmm19
+
+// CHECK: vandps %zmm25, %zmm22, %zmm17
+// CHECK: encoding: [0x62,0x81,0x4c,0x40,0x54,0xc9]
+ vandps %zmm25, %zmm22, %zmm17
+
+// CHECK: vandps %zmm25, %zmm22, %zmm17 {%k4}
+// CHECK: encoding: [0x62,0x81,0x4c,0x44,0x54,0xc9]
+ vandps %zmm25, %zmm22, %zmm17 {%k4}
+
+// CHECK: vandps %zmm25, %zmm22, %zmm17 {%k4} {z}
+// CHECK: encoding: [0x62,0x81,0x4c,0xc4,0x54,0xc9]
+ vandps %zmm25, %zmm22, %zmm17 {%k4} {z}
+
+// CHECK: vandps (%rcx), %zmm22, %zmm17
+// CHECK: encoding: [0x62,0xe1,0x4c,0x40,0x54,0x09]
+ vandps (%rcx), %zmm22, %zmm17
+
+// CHECK: vandps 291(%rax,%r14,8), %zmm22, %zmm17
+// CHECK: encoding: [0x62,0xa1,0x4c,0x40,0x54,0x8c,0xf0,0x23,0x01,0x00,0x00]
+ vandps 291(%rax,%r14,8), %zmm22, %zmm17
+
+// CHECK: vandps (%rcx){1to16}, %zmm22, %zmm17
+// CHECK: encoding: [0x62,0xe1,0x4c,0x50,0x54,0x09]
+ vandps (%rcx){1to16}, %zmm22, %zmm17
+
+// CHECK: vandps 8128(%rdx), %zmm22, %zmm17
+// CHECK: encoding: [0x62,0xe1,0x4c,0x40,0x54,0x4a,0x7f]
+ vandps 8128(%rdx), %zmm22, %zmm17
+
+// CHECK: vandps 8192(%rdx), %zmm22, %zmm17
+// CHECK: encoding: [0x62,0xe1,0x4c,0x40,0x54,0x8a,0x00,0x20,0x00,0x00]
+ vandps 8192(%rdx), %zmm22, %zmm17
+
+// CHECK: vandps -8192(%rdx), %zmm22, %zmm17
+// CHECK: encoding: [0x62,0xe1,0x4c,0x40,0x54,0x4a,0x80]
+ vandps -8192(%rdx), %zmm22, %zmm17
+
+// CHECK: vandps -8256(%rdx), %zmm22, %zmm17
+// CHECK: encoding: [0x62,0xe1,0x4c,0x40,0x54,0x8a,0xc0,0xdf,0xff,0xff]
+ vandps -8256(%rdx), %zmm22, %zmm17
+
+// CHECK: vandps 508(%rdx){1to16}, %zmm22, %zmm17
+// CHECK: encoding: [0x62,0xe1,0x4c,0x50,0x54,0x4a,0x7f]
+ vandps 508(%rdx){1to16}, %zmm22, %zmm17
+
+// CHECK: vandps 512(%rdx){1to16}, %zmm22, %zmm17
+// CHECK: encoding: [0x62,0xe1,0x4c,0x50,0x54,0x8a,0x00,0x02,0x00,0x00]
+ vandps 512(%rdx){1to16}, %zmm22, %zmm17
+
+// CHECK: vandps -512(%rdx){1to16}, %zmm22, %zmm17
+// CHECK: encoding: [0x62,0xe1,0x4c,0x50,0x54,0x4a,0x80]
+ vandps -512(%rdx){1to16}, %zmm22, %zmm17
+
+// CHECK: vandps -516(%rdx){1to16}, %zmm22, %zmm17
+// CHECK: encoding: [0x62,0xe1,0x4c,0x50,0x54,0x8a,0xfc,0xfd,0xff,0xff]
+ vandps -516(%rdx){1to16}, %zmm22, %zmm17
+
+// CHECK: vandnpd %zmm22, %zmm17, %zmm20
+// CHECK: encoding: [0x62,0xa1,0xf5,0x40,0x55,0xe6]
+ vandnpd %zmm22, %zmm17, %zmm20
+
+// CHECK: vandnpd %zmm22, %zmm17, %zmm20 {%k1}
+// CHECK: encoding: [0x62,0xa1,0xf5,0x41,0x55,0xe6]
+ vandnpd %zmm22, %zmm17, %zmm20 {%k1}
+
+// CHECK: vandnpd %zmm22, %zmm17, %zmm20 {%k1} {z}
+// CHECK: encoding: [0x62,0xa1,0xf5,0xc1,0x55,0xe6]
+ vandnpd %zmm22, %zmm17, %zmm20 {%k1} {z}
+
+// CHECK: vandnpd (%rcx), %zmm17, %zmm20
+// CHECK: encoding: [0x62,0xe1,0xf5,0x40,0x55,0x21]
+ vandnpd (%rcx), %zmm17, %zmm20
+
+// CHECK: vandnpd 291(%rax,%r14,8), %zmm17, %zmm20
+// CHECK: encoding: [0x62,0xa1,0xf5,0x40,0x55,0xa4,0xf0,0x23,0x01,0x00,0x00]
+ vandnpd 291(%rax,%r14,8), %zmm17, %zmm20
+
+// CHECK: vandnpd (%rcx){1to8}, %zmm17, %zmm20
+// CHECK: encoding: [0x62,0xe1,0xf5,0x50,0x55,0x21]
+ vandnpd (%rcx){1to8}, %zmm17, %zmm20
+
+// CHECK: vandnpd 8128(%rdx), %zmm17, %zmm20
+// CHECK: encoding: [0x62,0xe1,0xf5,0x40,0x55,0x62,0x7f]
+ vandnpd 8128(%rdx), %zmm17, %zmm20
+
+// CHECK: vandnpd 8192(%rdx), %zmm17, %zmm20
+// CHECK: encoding: [0x62,0xe1,0xf5,0x40,0x55,0xa2,0x00,0x20,0x00,0x00]
+ vandnpd 8192(%rdx), %zmm17, %zmm20
+
+// CHECK: vandnpd -8192(%rdx), %zmm17, %zmm20
+// CHECK: encoding: [0x62,0xe1,0xf5,0x40,0x55,0x62,0x80]
+ vandnpd -8192(%rdx), %zmm17, %zmm20
+
+// CHECK: vandnpd -8256(%rdx), %zmm17, %zmm20
+// CHECK: encoding: [0x62,0xe1,0xf5,0x40,0x55,0xa2,0xc0,0xdf,0xff,0xff]
+ vandnpd -8256(%rdx), %zmm17, %zmm20
+
+// CHECK: vandnpd 1016(%rdx){1to8}, %zmm17, %zmm20
+// CHECK: encoding: [0x62,0xe1,0xf5,0x50,0x55,0x62,0x7f]
+ vandnpd 1016(%rdx){1to8}, %zmm17, %zmm20
+
+// CHECK: vandnpd 1024(%rdx){1to8}, %zmm17, %zmm20
+// CHECK: encoding: [0x62,0xe1,0xf5,0x50,0x55,0xa2,0x00,0x04,0x00,0x00]
+ vandnpd 1024(%rdx){1to8}, %zmm17, %zmm20
+
+// CHECK: vandnpd -1024(%rdx){1to8}, %zmm17, %zmm20
+// CHECK: encoding: [0x62,0xe1,0xf5,0x50,0x55,0x62,0x80]
+ vandnpd -1024(%rdx){1to8}, %zmm17, %zmm20
+
+// CHECK: vandnpd -1032(%rdx){1to8}, %zmm17, %zmm20
+// CHECK: encoding: [0x62,0xe1,0xf5,0x50,0x55,0xa2,0xf8,0xfb,0xff,0xff]
+ vandnpd -1032(%rdx){1to8}, %zmm17, %zmm20
+
+// CHECK: vandnps %zmm19, %zmm17, %zmm22
+// CHECK: encoding: [0x62,0xa1,0x74,0x40,0x55,0xf3]
+ vandnps %zmm19, %zmm17, %zmm22
+
+// CHECK: vandnps %zmm19, %zmm17, %zmm22 {%k2}
+// CHECK: encoding: [0x62,0xa1,0x74,0x42,0x55,0xf3]
+ vandnps %zmm19, %zmm17, %zmm22 {%k2}
+
+// CHECK: vandnps %zmm19, %zmm17, %zmm22 {%k2} {z}
+// CHECK: encoding: [0x62,0xa1,0x74,0xc2,0x55,0xf3]
+ vandnps %zmm19, %zmm17, %zmm22 {%k2} {z}
+
+// CHECK: vandnps (%rcx), %zmm17, %zmm22
+// CHECK: encoding: [0x62,0xe1,0x74,0x40,0x55,0x31]
+ vandnps (%rcx), %zmm17, %zmm22
+
+// CHECK: vandnps 291(%rax,%r14,8), %zmm17, %zmm22
+// CHECK: encoding: [0x62,0xa1,0x74,0x40,0x55,0xb4,0xf0,0x23,0x01,0x00,0x00]
+ vandnps 291(%rax,%r14,8), %zmm17, %zmm22
+
+// CHECK: vandnps (%rcx){1to16}, %zmm17, %zmm22
+// CHECK: encoding: [0x62,0xe1,0x74,0x50,0x55,0x31]
+ vandnps (%rcx){1to16}, %zmm17, %zmm22
+
+// CHECK: vandnps 8128(%rdx), %zmm17, %zmm22
+// CHECK: encoding: [0x62,0xe1,0x74,0x40,0x55,0x72,0x7f]
+ vandnps 8128(%rdx), %zmm17, %zmm22
+
+// CHECK: vandnps 8192(%rdx), %zmm17, %zmm22
+// CHECK: encoding: [0x62,0xe1,0x74,0x40,0x55,0xb2,0x00,0x20,0x00,0x00]
+ vandnps 8192(%rdx), %zmm17, %zmm22
+
+// CHECK: vandnps -8192(%rdx), %zmm17, %zmm22
+// CHECK: encoding: [0x62,0xe1,0x74,0x40,0x55,0x72,0x80]
+ vandnps -8192(%rdx), %zmm17, %zmm22
+
+// CHECK: vandnps -8256(%rdx), %zmm17, %zmm22
+// CHECK: encoding: [0x62,0xe1,0x74,0x40,0x55,0xb2,0xc0,0xdf,0xff,0xff]
+ vandnps -8256(%rdx), %zmm17, %zmm22
+
+// CHECK: vandnps 508(%rdx){1to16}, %zmm17, %zmm22
+// CHECK: encoding: [0x62,0xe1,0x74,0x50,0x55,0x72,0x7f]
+ vandnps 508(%rdx){1to16}, %zmm17, %zmm22
+
+// CHECK: vandnps 512(%rdx){1to16}, %zmm17, %zmm22
+// CHECK: encoding: [0x62,0xe1,0x74,0x50,0x55,0xb2,0x00,0x02,0x00,0x00]
+ vandnps 512(%rdx){1to16}, %zmm17, %zmm22
+
+// CHECK: vandnps -512(%rdx){1to16}, %zmm17, %zmm22
+// CHECK: encoding: [0x62,0xe1,0x74,0x50,0x55,0x72,0x80]
+ vandnps -512(%rdx){1to16}, %zmm17, %zmm22
+
+// CHECK: vandnps -516(%rdx){1to16}, %zmm17, %zmm22
+// CHECK: encoding: [0x62,0xe1,0x74,0x50,0x55,0xb2,0xfc,0xfd,0xff,0xff]
+ vandnps -516(%rdx){1to16}, %zmm17, %zmm22
+
+// CHECK: vorpd %zmm21, %zmm22, %zmm30
+// CHECK: encoding: [0x62,0x21,0xcd,0x40,0x56,0xf5]
+ vorpd %zmm21, %zmm22, %zmm30
+
+// CHECK: vorpd %zmm21, %zmm22, %zmm30 {%k6}
+// CHECK: encoding: [0x62,0x21,0xcd,0x46,0x56,0xf5]
+ vorpd %zmm21, %zmm22, %zmm30 {%k6}
+
+// CHECK: vorpd %zmm21, %zmm22, %zmm30 {%k6} {z}
+// CHECK: encoding: [0x62,0x21,0xcd,0xc6,0x56,0xf5]
+ vorpd %zmm21, %zmm22, %zmm30 {%k6} {z}
+
+// CHECK: vorpd (%rcx), %zmm22, %zmm30
+// CHECK: encoding: [0x62,0x61,0xcd,0x40,0x56,0x31]
+ vorpd (%rcx), %zmm22, %zmm30
+
+// CHECK: vorpd 291(%rax,%r14,8), %zmm22, %zmm30
+// CHECK: encoding: [0x62,0x21,0xcd,0x40,0x56,0xb4,0xf0,0x23,0x01,0x00,0x00]
+ vorpd 291(%rax,%r14,8), %zmm22, %zmm30
+
+// CHECK: vorpd (%rcx){1to8}, %zmm22, %zmm30
+// CHECK: encoding: [0x62,0x61,0xcd,0x50,0x56,0x31]
+ vorpd (%rcx){1to8}, %zmm22, %zmm30
+
+// CHECK: vorpd 8128(%rdx), %zmm22, %zmm30
+// CHECK: encoding: [0x62,0x61,0xcd,0x40,0x56,0x72,0x7f]
+ vorpd 8128(%rdx), %zmm22, %zmm30
+
+// CHECK: vorpd 8192(%rdx), %zmm22, %zmm30
+// CHECK: encoding: [0x62,0x61,0xcd,0x40,0x56,0xb2,0x00,0x20,0x00,0x00]
+ vorpd 8192(%rdx), %zmm22, %zmm30
+
+// CHECK: vorpd -8192(%rdx), %zmm22, %zmm30
+// CHECK: encoding: [0x62,0x61,0xcd,0x40,0x56,0x72,0x80]
+ vorpd -8192(%rdx), %zmm22, %zmm30
+
+// CHECK: vorpd -8256(%rdx), %zmm22, %zmm30
+// CHECK: encoding: [0x62,0x61,0xcd,0x40,0x56,0xb2,0xc0,0xdf,0xff,0xff]
+ vorpd -8256(%rdx), %zmm22, %zmm30
+
+// CHECK: vorpd 1016(%rdx){1to8}, %zmm22, %zmm30
+// CHECK: encoding: [0x62,0x61,0xcd,0x50,0x56,0x72,0x7f]
+ vorpd 1016(%rdx){1to8}, %zmm22, %zmm30
+
+// CHECK: vorpd 1024(%rdx){1to8}, %zmm22, %zmm30
+// CHECK: encoding: [0x62,0x61,0xcd,0x50,0x56,0xb2,0x00,0x04,0x00,0x00]
+ vorpd 1024(%rdx){1to8}, %zmm22, %zmm30
+
+// CHECK: vorpd -1024(%rdx){1to8}, %zmm22, %zmm30
+// CHECK: encoding: [0x62,0x61,0xcd,0x50,0x56,0x72,0x80]
+ vorpd -1024(%rdx){1to8}, %zmm22, %zmm30
+
+// CHECK: vorpd -1032(%rdx){1to8}, %zmm22, %zmm30
+// CHECK: encoding: [0x62,0x61,0xcd,0x50,0x56,0xb2,0xf8,0xfb,0xff,0xff]
+ vorpd -1032(%rdx){1to8}, %zmm22, %zmm30
+
+// CHECK: vorps %zmm26, %zmm21, %zmm22
+// CHECK: encoding: [0x62,0x81,0x54,0x40,0x56,0xf2]
+ vorps %zmm26, %zmm21, %zmm22
+
+// CHECK: vorps %zmm26, %zmm21, %zmm22 {%k7}
+// CHECK: encoding: [0x62,0x81,0x54,0x47,0x56,0xf2]
+ vorps %zmm26, %zmm21, %zmm22 {%k7}
+
+// CHECK: vorps %zmm26, %zmm21, %zmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0x81,0x54,0xc7,0x56,0xf2]
+ vorps %zmm26, %zmm21, %zmm22 {%k7} {z}
+
+// CHECK: vorps (%rcx), %zmm21, %zmm22
+// CHECK: encoding: [0x62,0xe1,0x54,0x40,0x56,0x31]
+ vorps (%rcx), %zmm21, %zmm22
+
+// CHECK: vorps 291(%rax,%r14,8), %zmm21, %zmm22
+// CHECK: encoding: [0x62,0xa1,0x54,0x40,0x56,0xb4,0xf0,0x23,0x01,0x00,0x00]
+ vorps 291(%rax,%r14,8), %zmm21, %zmm22
+
+// CHECK: vorps (%rcx){1to16}, %zmm21, %zmm22
+// CHECK: encoding: [0x62,0xe1,0x54,0x50,0x56,0x31]
+ vorps (%rcx){1to16}, %zmm21, %zmm22
+
+// CHECK: vorps 8128(%rdx), %zmm21, %zmm22
+// CHECK: encoding: [0x62,0xe1,0x54,0x40,0x56,0x72,0x7f]
+ vorps 8128(%rdx), %zmm21, %zmm22
+
+// CHECK: vorps 8192(%rdx), %zmm21, %zmm22
+// CHECK: encoding: [0x62,0xe1,0x54,0x40,0x56,0xb2,0x00,0x20,0x00,0x00]
+ vorps 8192(%rdx), %zmm21, %zmm22
+
+// CHECK: vorps -8192(%rdx), %zmm21, %zmm22
+// CHECK: encoding: [0x62,0xe1,0x54,0x40,0x56,0x72,0x80]
+ vorps -8192(%rdx), %zmm21, %zmm22
+
+// CHECK: vorps -8256(%rdx), %zmm21, %zmm22
+// CHECK: encoding: [0x62,0xe1,0x54,0x40,0x56,0xb2,0xc0,0xdf,0xff,0xff]
+ vorps -8256(%rdx), %zmm21, %zmm22
+
+// CHECK: vorps 508(%rdx){1to16}, %zmm21, %zmm22
+// CHECK: encoding: [0x62,0xe1,0x54,0x50,0x56,0x72,0x7f]
+ vorps 508(%rdx){1to16}, %zmm21, %zmm22
+
+// CHECK: vorps 512(%rdx){1to16}, %zmm21, %zmm22
+// CHECK: encoding: [0x62,0xe1,0x54,0x50,0x56,0xb2,0x00,0x02,0x00,0x00]
+ vorps 512(%rdx){1to16}, %zmm21, %zmm22
+
+// CHECK: vorps -512(%rdx){1to16}, %zmm21, %zmm22
+// CHECK: encoding: [0x62,0xe1,0x54,0x50,0x56,0x72,0x80]
+ vorps -512(%rdx){1to16}, %zmm21, %zmm22
+
+// CHECK: vorps -516(%rdx){1to16}, %zmm21, %zmm22
+// CHECK: encoding: [0x62,0xe1,0x54,0x50,0x56,0xb2,0xfc,0xfd,0xff,0xff]
+ vorps -516(%rdx){1to16}, %zmm21, %zmm22
+
+// CHECK: vxorpd %zmm24, %zmm24, %zmm27
+// CHECK: encoding: [0x62,0x01,0xbd,0x40,0x57,0xd8]
+ vxorpd %zmm24, %zmm24, %zmm27
+
+// CHECK: vxorpd %zmm24, %zmm24, %zmm27 {%k5}
+// CHECK: encoding: [0x62,0x01,0xbd,0x45,0x57,0xd8]
+ vxorpd %zmm24, %zmm24, %zmm27 {%k5}
+
+// CHECK: vxorpd %zmm24, %zmm24, %zmm27 {%k5} {z}
+// CHECK: encoding: [0x62,0x01,0xbd,0xc5,0x57,0xd8]
+ vxorpd %zmm24, %zmm24, %zmm27 {%k5} {z}
+
+// CHECK: vxorpd (%rcx), %zmm24, %zmm27
+// CHECK: encoding: [0x62,0x61,0xbd,0x40,0x57,0x19]
+ vxorpd (%rcx), %zmm24, %zmm27
+
+// CHECK: vxorpd 291(%rax,%r14,8), %zmm24, %zmm27
+// CHECK: encoding: [0x62,0x21,0xbd,0x40,0x57,0x9c,0xf0,0x23,0x01,0x00,0x00]
+ vxorpd 291(%rax,%r14,8), %zmm24, %zmm27
+
+// CHECK: vxorpd (%rcx){1to8}, %zmm24, %zmm27
+// CHECK: encoding: [0x62,0x61,0xbd,0x50,0x57,0x19]
+ vxorpd (%rcx){1to8}, %zmm24, %zmm27
+
+// CHECK: vxorpd 8128(%rdx), %zmm24, %zmm27
+// CHECK: encoding: [0x62,0x61,0xbd,0x40,0x57,0x5a,0x7f]
+ vxorpd 8128(%rdx), %zmm24, %zmm27
+
+// CHECK: vxorpd 8192(%rdx), %zmm24, %zmm27
+// CHECK: encoding: [0x62,0x61,0xbd,0x40,0x57,0x9a,0x00,0x20,0x00,0x00]
+ vxorpd 8192(%rdx), %zmm24, %zmm27
+
+// CHECK: vxorpd -8192(%rdx), %zmm24, %zmm27
+// CHECK: encoding: [0x62,0x61,0xbd,0x40,0x57,0x5a,0x80]
+ vxorpd -8192(%rdx), %zmm24, %zmm27
+
+// CHECK: vxorpd -8256(%rdx), %zmm24, %zmm27
+// CHECK: encoding: [0x62,0x61,0xbd,0x40,0x57,0x9a,0xc0,0xdf,0xff,0xff]
+ vxorpd -8256(%rdx), %zmm24, %zmm27
+
+// CHECK: vxorpd 1016(%rdx){1to8}, %zmm24, %zmm27
+// CHECK: encoding: [0x62,0x61,0xbd,0x50,0x57,0x5a,0x7f]
+ vxorpd 1016(%rdx){1to8}, %zmm24, %zmm27
+
+// CHECK: vxorpd 1024(%rdx){1to8}, %zmm24, %zmm27
+// CHECK: encoding: [0x62,0x61,0xbd,0x50,0x57,0x9a,0x00,0x04,0x00,0x00]
+ vxorpd 1024(%rdx){1to8}, %zmm24, %zmm27
+
+// CHECK: vxorpd -1024(%rdx){1to8}, %zmm24, %zmm27
+// CHECK: encoding: [0x62,0x61,0xbd,0x50,0x57,0x5a,0x80]
+ vxorpd -1024(%rdx){1to8}, %zmm24, %zmm27
+
+// CHECK: vxorpd -1032(%rdx){1to8}, %zmm24, %zmm27
+// CHECK: encoding: [0x62,0x61,0xbd,0x50,0x57,0x9a,0xf8,0xfb,0xff,0xff]
+ vxorpd -1032(%rdx){1to8}, %zmm24, %zmm27
+
+// CHECK: vxorps %zmm19, %zmm18, %zmm18
+// CHECK: encoding: [0x62,0xa1,0x6c,0x40,0x57,0xd3]
+ vxorps %zmm19, %zmm18, %zmm18
+
+// CHECK: vxorps %zmm19, %zmm18, %zmm18 {%k2}
+// CHECK: encoding: [0x62,0xa1,0x6c,0x42,0x57,0xd3]
+ vxorps %zmm19, %zmm18, %zmm18 {%k2}
+
+// CHECK: vxorps %zmm19, %zmm18, %zmm18 {%k2} {z}
+// CHECK: encoding: [0x62,0xa1,0x6c,0xc2,0x57,0xd3]
+ vxorps %zmm19, %zmm18, %zmm18 {%k2} {z}
+
+// CHECK: vxorps (%rcx), %zmm18, %zmm18
+// CHECK: encoding: [0x62,0xe1,0x6c,0x40,0x57,0x11]
+ vxorps (%rcx), %zmm18, %zmm18
+
+// CHECK: vxorps 291(%rax,%r14,8), %zmm18, %zmm18
+// CHECK: encoding: [0x62,0xa1,0x6c,0x40,0x57,0x94,0xf0,0x23,0x01,0x00,0x00]
+ vxorps 291(%rax,%r14,8), %zmm18, %zmm18
+
+// CHECK: vxorps (%rcx){1to16}, %zmm18, %zmm18
+// CHECK: encoding: [0x62,0xe1,0x6c,0x50,0x57,0x11]
+ vxorps (%rcx){1to16}, %zmm18, %zmm18
+
+// CHECK: vxorps 8128(%rdx), %zmm18, %zmm18
+// CHECK: encoding: [0x62,0xe1,0x6c,0x40,0x57,0x52,0x7f]
+ vxorps 8128(%rdx), %zmm18, %zmm18
+
+// CHECK: vxorps 8192(%rdx), %zmm18, %zmm18
+// CHECK: encoding: [0x62,0xe1,0x6c,0x40,0x57,0x92,0x00,0x20,0x00,0x00]
+ vxorps 8192(%rdx), %zmm18, %zmm18
+
+// CHECK: vxorps -8192(%rdx), %zmm18, %zmm18
+// CHECK: encoding: [0x62,0xe1,0x6c,0x40,0x57,0x52,0x80]
+ vxorps -8192(%rdx), %zmm18, %zmm18
+
+// CHECK: vxorps -8256(%rdx), %zmm18, %zmm18
+// CHECK: encoding: [0x62,0xe1,0x6c,0x40,0x57,0x92,0xc0,0xdf,0xff,0xff]
+ vxorps -8256(%rdx), %zmm18, %zmm18
+
+// CHECK: vxorps 508(%rdx){1to16}, %zmm18, %zmm18
+// CHECK: encoding: [0x62,0xe1,0x6c,0x50,0x57,0x52,0x7f]
+ vxorps 508(%rdx){1to16}, %zmm18, %zmm18
+
+// CHECK: vxorps 512(%rdx){1to16}, %zmm18, %zmm18
+// CHECK: encoding: [0x62,0xe1,0x6c,0x50,0x57,0x92,0x00,0x02,0x00,0x00]
+ vxorps 512(%rdx){1to16}, %zmm18, %zmm18
+
+// CHECK: vxorps -512(%rdx){1to16}, %zmm18, %zmm18
+// CHECK: encoding: [0x62,0xe1,0x6c,0x50,0x57,0x52,0x80]
+ vxorps -512(%rdx){1to16}, %zmm18, %zmm18
+
+// CHECK: vxorps -516(%rdx){1to16}, %zmm18, %zmm18
+// CHECK: encoding: [0x62,0xe1,0x6c,0x50,0x57,0x92,0xfc,0xfd,0xff,0xff]
+ vxorps -516(%rdx){1to16}, %zmm18, %zmm18
+// CHECK: vandpd %zmm22, %zmm22, %zmm24
+// CHECK: encoding: [0x62,0x21,0xcd,0x40,0x54,0xc6]
+ vandpd %zmm22, %zmm22, %zmm24
+
+// CHECK: vandpd %zmm22, %zmm22, %zmm24 {%k4}
+// CHECK: encoding: [0x62,0x21,0xcd,0x44,0x54,0xc6]
+ vandpd %zmm22, %zmm22, %zmm24 {%k4}
+
+// CHECK: vandpd %zmm22, %zmm22, %zmm24 {%k4} {z}
+// CHECK: encoding: [0x62,0x21,0xcd,0xc4,0x54,0xc6]
+ vandpd %zmm22, %zmm22, %zmm24 {%k4} {z}
+
+// CHECK: vandpd (%rcx), %zmm22, %zmm24
+// CHECK: encoding: [0x62,0x61,0xcd,0x40,0x54,0x01]
+ vandpd (%rcx), %zmm22, %zmm24
+
+// CHECK: vandpd 4660(%rax,%r14,8), %zmm22, %zmm24
+// CHECK: encoding: [0x62,0x21,0xcd,0x40,0x54,0x84,0xf0,0x34,0x12,0x00,0x00]
+ vandpd 4660(%rax,%r14,8), %zmm22, %zmm24
+
+// CHECK: vandpd (%rcx){1to8}, %zmm22, %zmm24
+// CHECK: encoding: [0x62,0x61,0xcd,0x50,0x54,0x01]
+ vandpd (%rcx){1to8}, %zmm22, %zmm24
+
+// CHECK: vandpd 8128(%rdx), %zmm22, %zmm24
+// CHECK: encoding: [0x62,0x61,0xcd,0x40,0x54,0x42,0x7f]
+ vandpd 8128(%rdx), %zmm22, %zmm24
+
+// CHECK: vandpd 8192(%rdx), %zmm22, %zmm24
+// CHECK: encoding: [0x62,0x61,0xcd,0x40,0x54,0x82,0x00,0x20,0x00,0x00]
+ vandpd 8192(%rdx), %zmm22, %zmm24
+
+// CHECK: vandpd -8192(%rdx), %zmm22, %zmm24
+// CHECK: encoding: [0x62,0x61,0xcd,0x40,0x54,0x42,0x80]
+ vandpd -8192(%rdx), %zmm22, %zmm24
+
+// CHECK: vandpd -8256(%rdx), %zmm22, %zmm24
+// CHECK: encoding: [0x62,0x61,0xcd,0x40,0x54,0x82,0xc0,0xdf,0xff,0xff]
+ vandpd -8256(%rdx), %zmm22, %zmm24
+
+// CHECK: vandpd 1016(%rdx){1to8}, %zmm22, %zmm24
+// CHECK: encoding: [0x62,0x61,0xcd,0x50,0x54,0x42,0x7f]
+ vandpd 1016(%rdx){1to8}, %zmm22, %zmm24
+
+// CHECK: vandpd 1024(%rdx){1to8}, %zmm22, %zmm24
+// CHECK: encoding: [0x62,0x61,0xcd,0x50,0x54,0x82,0x00,0x04,0x00,0x00]
+ vandpd 1024(%rdx){1to8}, %zmm22, %zmm24
+
+// CHECK: vandpd -1024(%rdx){1to8}, %zmm22, %zmm24
+// CHECK: encoding: [0x62,0x61,0xcd,0x50,0x54,0x42,0x80]
+ vandpd -1024(%rdx){1to8}, %zmm22, %zmm24
+
+// CHECK: vandpd -1032(%rdx){1to8}, %zmm22, %zmm24
+// CHECK: encoding: [0x62,0x61,0xcd,0x50,0x54,0x82,0xf8,0xfb,0xff,0xff]
+ vandpd -1032(%rdx){1to8}, %zmm22, %zmm24
+
+// CHECK: vandps %zmm23, %zmm23, %zmm30
+// CHECK: encoding: [0x62,0x21,0x44,0x40,0x54,0xf7]
+ vandps %zmm23, %zmm23, %zmm30
+
+// CHECK: vandps %zmm23, %zmm23, %zmm30 {%k5}
+// CHECK: encoding: [0x62,0x21,0x44,0x45,0x54,0xf7]
+ vandps %zmm23, %zmm23, %zmm30 {%k5}
+
+// CHECK: vandps %zmm23, %zmm23, %zmm30 {%k5} {z}
+// CHECK: encoding: [0x62,0x21,0x44,0xc5,0x54,0xf7]
+ vandps %zmm23, %zmm23, %zmm30 {%k5} {z}
+
+// CHECK: vandps (%rcx), %zmm23, %zmm30
+// CHECK: encoding: [0x62,0x61,0x44,0x40,0x54,0x31]
+ vandps (%rcx), %zmm23, %zmm30
+
+// CHECK: vandps 4660(%rax,%r14,8), %zmm23, %zmm30
+// CHECK: encoding: [0x62,0x21,0x44,0x40,0x54,0xb4,0xf0,0x34,0x12,0x00,0x00]
+ vandps 4660(%rax,%r14,8), %zmm23, %zmm30
+
+// CHECK: vandps (%rcx){1to16}, %zmm23, %zmm30
+// CHECK: encoding: [0x62,0x61,0x44,0x50,0x54,0x31]
+ vandps (%rcx){1to16}, %zmm23, %zmm30
+
+// CHECK: vandps 8128(%rdx), %zmm23, %zmm30
+// CHECK: encoding: [0x62,0x61,0x44,0x40,0x54,0x72,0x7f]
+ vandps 8128(%rdx), %zmm23, %zmm30
+
+// CHECK: vandps 8192(%rdx), %zmm23, %zmm30
+// CHECK: encoding: [0x62,0x61,0x44,0x40,0x54,0xb2,0x00,0x20,0x00,0x00]
+ vandps 8192(%rdx), %zmm23, %zmm30
+
+// CHECK: vandps -8192(%rdx), %zmm23, %zmm30
+// CHECK: encoding: [0x62,0x61,0x44,0x40,0x54,0x72,0x80]
+ vandps -8192(%rdx), %zmm23, %zmm30
+
+// CHECK: vandps -8256(%rdx), %zmm23, %zmm30
+// CHECK: encoding: [0x62,0x61,0x44,0x40,0x54,0xb2,0xc0,0xdf,0xff,0xff]
+ vandps -8256(%rdx), %zmm23, %zmm30
+
+// CHECK: vandps 508(%rdx){1to16}, %zmm23, %zmm30
+// CHECK: encoding: [0x62,0x61,0x44,0x50,0x54,0x72,0x7f]
+ vandps 508(%rdx){1to16}, %zmm23, %zmm30
+
+// CHECK: vandps 512(%rdx){1to16}, %zmm23, %zmm30
+// CHECK: encoding: [0x62,0x61,0x44,0x50,0x54,0xb2,0x00,0x02,0x00,0x00]
+ vandps 512(%rdx){1to16}, %zmm23, %zmm30
+
+// CHECK: vandps -512(%rdx){1to16}, %zmm23, %zmm30
+// CHECK: encoding: [0x62,0x61,0x44,0x50,0x54,0x72,0x80]
+ vandps -512(%rdx){1to16}, %zmm23, %zmm30
+
+// CHECK: vandps -516(%rdx){1to16}, %zmm23, %zmm30
+// CHECK: encoding: [0x62,0x61,0x44,0x50,0x54,0xb2,0xfc,0xfd,0xff,0xff]
+ vandps -516(%rdx){1to16}, %zmm23, %zmm30
+
+// CHECK: vandnpd %zmm21, %zmm21, %zmm25
+// CHECK: encoding: [0x62,0x21,0xd5,0x40,0x55,0xcd]
+ vandnpd %zmm21, %zmm21, %zmm25
+
+// CHECK: vandnpd %zmm21, %zmm21, %zmm25 {%k2}
+// CHECK: encoding: [0x62,0x21,0xd5,0x42,0x55,0xcd]
+ vandnpd %zmm21, %zmm21, %zmm25 {%k2}
+
+// CHECK: vandnpd %zmm21, %zmm21, %zmm25 {%k2} {z}
+// CHECK: encoding: [0x62,0x21,0xd5,0xc2,0x55,0xcd]
+ vandnpd %zmm21, %zmm21, %zmm25 {%k2} {z}
+
+// CHECK: vandnpd (%rcx), %zmm21, %zmm25
+// CHECK: encoding: [0x62,0x61,0xd5,0x40,0x55,0x09]
+ vandnpd (%rcx), %zmm21, %zmm25
+
+// CHECK: vandnpd 4660(%rax,%r14,8), %zmm21, %zmm25
+// CHECK: encoding: [0x62,0x21,0xd5,0x40,0x55,0x8c,0xf0,0x34,0x12,0x00,0x00]
+ vandnpd 4660(%rax,%r14,8), %zmm21, %zmm25
+
+// CHECK: vandnpd (%rcx){1to8}, %zmm21, %zmm25
+// CHECK: encoding: [0x62,0x61,0xd5,0x50,0x55,0x09]
+ vandnpd (%rcx){1to8}, %zmm21, %zmm25
+
+// CHECK: vandnpd 8128(%rdx), %zmm21, %zmm25
+// CHECK: encoding: [0x62,0x61,0xd5,0x40,0x55,0x4a,0x7f]
+ vandnpd 8128(%rdx), %zmm21, %zmm25
+
+// CHECK: vandnpd 8192(%rdx), %zmm21, %zmm25
+// CHECK: encoding: [0x62,0x61,0xd5,0x40,0x55,0x8a,0x00,0x20,0x00,0x00]
+ vandnpd 8192(%rdx), %zmm21, %zmm25
+
+// CHECK: vandnpd -8192(%rdx), %zmm21, %zmm25
+// CHECK: encoding: [0x62,0x61,0xd5,0x40,0x55,0x4a,0x80]
+ vandnpd -8192(%rdx), %zmm21, %zmm25
+
+// CHECK: vandnpd -8256(%rdx), %zmm21, %zmm25
+// CHECK: encoding: [0x62,0x61,0xd5,0x40,0x55,0x8a,0xc0,0xdf,0xff,0xff]
+ vandnpd -8256(%rdx), %zmm21, %zmm25
+
+// CHECK: vandnpd 1016(%rdx){1to8}, %zmm21, %zmm25
+// CHECK: encoding: [0x62,0x61,0xd5,0x50,0x55,0x4a,0x7f]
+ vandnpd 1016(%rdx){1to8}, %zmm21, %zmm25
+
+// CHECK: vandnpd 1024(%rdx){1to8}, %zmm21, %zmm25
+// CHECK: encoding: [0x62,0x61,0xd5,0x50,0x55,0x8a,0x00,0x04,0x00,0x00]
+ vandnpd 1024(%rdx){1to8}, %zmm21, %zmm25
+
+// CHECK: vandnpd -1024(%rdx){1to8}, %zmm21, %zmm25
+// CHECK: encoding: [0x62,0x61,0xd5,0x50,0x55,0x4a,0x80]
+ vandnpd -1024(%rdx){1to8}, %zmm21, %zmm25
+
+// CHECK: vandnpd -1032(%rdx){1to8}, %zmm21, %zmm25
+// CHECK: encoding: [0x62,0x61,0xd5,0x50,0x55,0x8a,0xf8,0xfb,0xff,0xff]
+ vandnpd -1032(%rdx){1to8}, %zmm21, %zmm25
+
+// CHECK: vandnps %zmm18, %zmm21, %zmm17
+// CHECK: encoding: [0x62,0xa1,0x54,0x40,0x55,0xca]
+ vandnps %zmm18, %zmm21, %zmm17
+
+// CHECK: vandnps %zmm18, %zmm21, %zmm17 {%k1}
+// CHECK: encoding: [0x62,0xa1,0x54,0x41,0x55,0xca]
+ vandnps %zmm18, %zmm21, %zmm17 {%k1}
+
+// CHECK: vandnps %zmm18, %zmm21, %zmm17 {%k1} {z}
+// CHECK: encoding: [0x62,0xa1,0x54,0xc1,0x55,0xca]
+ vandnps %zmm18, %zmm21, %zmm17 {%k1} {z}
+
+// CHECK: vandnps (%rcx), %zmm21, %zmm17
+// CHECK: encoding: [0x62,0xe1,0x54,0x40,0x55,0x09]
+ vandnps (%rcx), %zmm21, %zmm17
+
+// CHECK: vandnps 4660(%rax,%r14,8), %zmm21, %zmm17
+// CHECK: encoding: [0x62,0xa1,0x54,0x40,0x55,0x8c,0xf0,0x34,0x12,0x00,0x00]
+ vandnps 4660(%rax,%r14,8), %zmm21, %zmm17
+
+// CHECK: vandnps (%rcx){1to16}, %zmm21, %zmm17
+// CHECK: encoding: [0x62,0xe1,0x54,0x50,0x55,0x09]
+ vandnps (%rcx){1to16}, %zmm21, %zmm17
+
+// CHECK: vandnps 8128(%rdx), %zmm21, %zmm17
+// CHECK: encoding: [0x62,0xe1,0x54,0x40,0x55,0x4a,0x7f]
+ vandnps 8128(%rdx), %zmm21, %zmm17
+
+// CHECK: vandnps 8192(%rdx), %zmm21, %zmm17
+// CHECK: encoding: [0x62,0xe1,0x54,0x40,0x55,0x8a,0x00,0x20,0x00,0x00]
+ vandnps 8192(%rdx), %zmm21, %zmm17
+
+// CHECK: vandnps -8192(%rdx), %zmm21, %zmm17
+// CHECK: encoding: [0x62,0xe1,0x54,0x40,0x55,0x4a,0x80]
+ vandnps -8192(%rdx), %zmm21, %zmm17
+
+// CHECK: vandnps -8256(%rdx), %zmm21, %zmm17
+// CHECK: encoding: [0x62,0xe1,0x54,0x40,0x55,0x8a,0xc0,0xdf,0xff,0xff]
+ vandnps -8256(%rdx), %zmm21, %zmm17
+
+// CHECK: vandnps 508(%rdx){1to16}, %zmm21, %zmm17
+// CHECK: encoding: [0x62,0xe1,0x54,0x50,0x55,0x4a,0x7f]
+ vandnps 508(%rdx){1to16}, %zmm21, %zmm17
+
+// CHECK: vandnps 512(%rdx){1to16}, %zmm21, %zmm17
+// CHECK: encoding: [0x62,0xe1,0x54,0x50,0x55,0x8a,0x00,0x02,0x00,0x00]
+ vandnps 512(%rdx){1to16}, %zmm21, %zmm17
+
+// CHECK: vandnps -512(%rdx){1to16}, %zmm21, %zmm17
+// CHECK: encoding: [0x62,0xe1,0x54,0x50,0x55,0x4a,0x80]
+ vandnps -512(%rdx){1to16}, %zmm21, %zmm17
+
+// CHECK: vandnps -516(%rdx){1to16}, %zmm21, %zmm17
+// CHECK: encoding: [0x62,0xe1,0x54,0x50,0x55,0x8a,0xfc,0xfd,0xff,0xff]
+ vandnps -516(%rdx){1to16}, %zmm21, %zmm17
+
+// CHECK: vorpd %zmm24, %zmm28, %zmm18
+// CHECK: encoding: [0x62,0x81,0x9d,0x40,0x56,0xd0]
+ vorpd %zmm24, %zmm28, %zmm18
+
+// CHECK: vorpd %zmm24, %zmm28, %zmm18 {%k1}
+// CHECK: encoding: [0x62,0x81,0x9d,0x41,0x56,0xd0]
+ vorpd %zmm24, %zmm28, %zmm18 {%k1}
+
+// CHECK: vorpd %zmm24, %zmm28, %zmm18 {%k1} {z}
+// CHECK: encoding: [0x62,0x81,0x9d,0xc1,0x56,0xd0]
+ vorpd %zmm24, %zmm28, %zmm18 {%k1} {z}
+
+// CHECK: vorpd (%rcx), %zmm28, %zmm18
+// CHECK: encoding: [0x62,0xe1,0x9d,0x40,0x56,0x11]
+ vorpd (%rcx), %zmm28, %zmm18
+
+// CHECK: vorpd 4660(%rax,%r14,8), %zmm28, %zmm18
+// CHECK: encoding: [0x62,0xa1,0x9d,0x40,0x56,0x94,0xf0,0x34,0x12,0x00,0x00]
+ vorpd 4660(%rax,%r14,8), %zmm28, %zmm18
+
+// CHECK: vorpd (%rcx){1to8}, %zmm28, %zmm18
+// CHECK: encoding: [0x62,0xe1,0x9d,0x50,0x56,0x11]
+ vorpd (%rcx){1to8}, %zmm28, %zmm18
+
+// CHECK: vorpd 8128(%rdx), %zmm28, %zmm18
+// CHECK: encoding: [0x62,0xe1,0x9d,0x40,0x56,0x52,0x7f]
+ vorpd 8128(%rdx), %zmm28, %zmm18
+
+// CHECK: vorpd 8192(%rdx), %zmm28, %zmm18
+// CHECK: encoding: [0x62,0xe1,0x9d,0x40,0x56,0x92,0x00,0x20,0x00,0x00]
+ vorpd 8192(%rdx), %zmm28, %zmm18
+
+// CHECK: vorpd -8192(%rdx), %zmm28, %zmm18
+// CHECK: encoding: [0x62,0xe1,0x9d,0x40,0x56,0x52,0x80]
+ vorpd -8192(%rdx), %zmm28, %zmm18
+
+// CHECK: vorpd -8256(%rdx), %zmm28, %zmm18
+// CHECK: encoding: [0x62,0xe1,0x9d,0x40,0x56,0x92,0xc0,0xdf,0xff,0xff]
+ vorpd -8256(%rdx), %zmm28, %zmm18
+
+// CHECK: vorpd 1016(%rdx){1to8}, %zmm28, %zmm18
+// CHECK: encoding: [0x62,0xe1,0x9d,0x50,0x56,0x52,0x7f]
+ vorpd 1016(%rdx){1to8}, %zmm28, %zmm18
+
+// CHECK: vorpd 1024(%rdx){1to8}, %zmm28, %zmm18
+// CHECK: encoding: [0x62,0xe1,0x9d,0x50,0x56,0x92,0x00,0x04,0x00,0x00]
+ vorpd 1024(%rdx){1to8}, %zmm28, %zmm18
+
+// CHECK: vorpd -1024(%rdx){1to8}, %zmm28, %zmm18
+// CHECK: encoding: [0x62,0xe1,0x9d,0x50,0x56,0x52,0x80]
+ vorpd -1024(%rdx){1to8}, %zmm28, %zmm18
+
+// CHECK: vorpd -1032(%rdx){1to8}, %zmm28, %zmm18
+// CHECK: encoding: [0x62,0xe1,0x9d,0x50,0x56,0x92,0xf8,0xfb,0xff,0xff]
+ vorpd -1032(%rdx){1to8}, %zmm28, %zmm18
+
+// CHECK: vorps %zmm23, %zmm17, %zmm28
+// CHECK: encoding: [0x62,0x21,0x74,0x40,0x56,0xe7]
+ vorps %zmm23, %zmm17, %zmm28
+
+// CHECK: vorps %zmm23, %zmm17, %zmm28 {%k7}
+// CHECK: encoding: [0x62,0x21,0x74,0x47,0x56,0xe7]
+ vorps %zmm23, %zmm17, %zmm28 {%k7}
+
+// CHECK: vorps %zmm23, %zmm17, %zmm28 {%k7} {z}
+// CHECK: encoding: [0x62,0x21,0x74,0xc7,0x56,0xe7]
+ vorps %zmm23, %zmm17, %zmm28 {%k7} {z}
+
+// CHECK: vorps (%rcx), %zmm17, %zmm28
+// CHECK: encoding: [0x62,0x61,0x74,0x40,0x56,0x21]
+ vorps (%rcx), %zmm17, %zmm28
+
+// CHECK: vorps 4660(%rax,%r14,8), %zmm17, %zmm28
+// CHECK: encoding: [0x62,0x21,0x74,0x40,0x56,0xa4,0xf0,0x34,0x12,0x00,0x00]
+ vorps 4660(%rax,%r14,8), %zmm17, %zmm28
+
+// CHECK: vorps (%rcx){1to16}, %zmm17, %zmm28
+// CHECK: encoding: [0x62,0x61,0x74,0x50,0x56,0x21]
+ vorps (%rcx){1to16}, %zmm17, %zmm28
+
+// CHECK: vorps 8128(%rdx), %zmm17, %zmm28
+// CHECK: encoding: [0x62,0x61,0x74,0x40,0x56,0x62,0x7f]
+ vorps 8128(%rdx), %zmm17, %zmm28
+
+// CHECK: vorps 8192(%rdx), %zmm17, %zmm28
+// CHECK: encoding: [0x62,0x61,0x74,0x40,0x56,0xa2,0x00,0x20,0x00,0x00]
+ vorps 8192(%rdx), %zmm17, %zmm28
+
+// CHECK: vorps -8192(%rdx), %zmm17, %zmm28
+// CHECK: encoding: [0x62,0x61,0x74,0x40,0x56,0x62,0x80]
+ vorps -8192(%rdx), %zmm17, %zmm28
+
+// CHECK: vorps -8256(%rdx), %zmm17, %zmm28
+// CHECK: encoding: [0x62,0x61,0x74,0x40,0x56,0xa2,0xc0,0xdf,0xff,0xff]
+ vorps -8256(%rdx), %zmm17, %zmm28
+
+// CHECK: vorps 508(%rdx){1to16}, %zmm17, %zmm28
+// CHECK: encoding: [0x62,0x61,0x74,0x50,0x56,0x62,0x7f]
+ vorps 508(%rdx){1to16}, %zmm17, %zmm28
+
+// CHECK: vorps 512(%rdx){1to16}, %zmm17, %zmm28
+// CHECK: encoding: [0x62,0x61,0x74,0x50,0x56,0xa2,0x00,0x02,0x00,0x00]
+ vorps 512(%rdx){1to16}, %zmm17, %zmm28
+
+// CHECK: vorps -512(%rdx){1to16}, %zmm17, %zmm28
+// CHECK: encoding: [0x62,0x61,0x74,0x50,0x56,0x62,0x80]
+ vorps -512(%rdx){1to16}, %zmm17, %zmm28
+
+// CHECK: vorps -516(%rdx){1to16}, %zmm17, %zmm28
+// CHECK: encoding: [0x62,0x61,0x74,0x50,0x56,0xa2,0xfc,0xfd,0xff,0xff]
+ vorps -516(%rdx){1to16}, %zmm17, %zmm28
+
+// CHECK: vxorpd %zmm27, %zmm18, %zmm28
+// CHECK: encoding: [0x62,0x01,0xed,0x40,0x57,0xe3]
+ vxorpd %zmm27, %zmm18, %zmm28
+
+// CHECK: vxorpd %zmm27, %zmm18, %zmm28 {%k4}
+// CHECK: encoding: [0x62,0x01,0xed,0x44,0x57,0xe3]
+ vxorpd %zmm27, %zmm18, %zmm28 {%k4}
+
+// CHECK: vxorpd %zmm27, %zmm18, %zmm28 {%k4} {z}
+// CHECK: encoding: [0x62,0x01,0xed,0xc4,0x57,0xe3]
+ vxorpd %zmm27, %zmm18, %zmm28 {%k4} {z}
+
+// CHECK: vxorpd (%rcx), %zmm18, %zmm28
+// CHECK: encoding: [0x62,0x61,0xed,0x40,0x57,0x21]
+ vxorpd (%rcx), %zmm18, %zmm28
+
+// CHECK: vxorpd 4660(%rax,%r14,8), %zmm18, %zmm28
+// CHECK: encoding: [0x62,0x21,0xed,0x40,0x57,0xa4,0xf0,0x34,0x12,0x00,0x00]
+ vxorpd 4660(%rax,%r14,8), %zmm18, %zmm28
+
+// CHECK: vxorpd (%rcx){1to8}, %zmm18, %zmm28
+// CHECK: encoding: [0x62,0x61,0xed,0x50,0x57,0x21]
+ vxorpd (%rcx){1to8}, %zmm18, %zmm28
+
+// CHECK: vxorpd 8128(%rdx), %zmm18, %zmm28
+// CHECK: encoding: [0x62,0x61,0xed,0x40,0x57,0x62,0x7f]
+ vxorpd 8128(%rdx), %zmm18, %zmm28
+
+// CHECK: vxorpd 8192(%rdx), %zmm18, %zmm28
+// CHECK: encoding: [0x62,0x61,0xed,0x40,0x57,0xa2,0x00,0x20,0x00,0x00]
+ vxorpd 8192(%rdx), %zmm18, %zmm28
+
+// CHECK: vxorpd -8192(%rdx), %zmm18, %zmm28
+// CHECK: encoding: [0x62,0x61,0xed,0x40,0x57,0x62,0x80]
+ vxorpd -8192(%rdx), %zmm18, %zmm28
+
+// CHECK: vxorpd -8256(%rdx), %zmm18, %zmm28
+// CHECK: encoding: [0x62,0x61,0xed,0x40,0x57,0xa2,0xc0,0xdf,0xff,0xff]
+ vxorpd -8256(%rdx), %zmm18, %zmm28
+
+// CHECK: vxorpd 1016(%rdx){1to8}, %zmm18, %zmm28
+// CHECK: encoding: [0x62,0x61,0xed,0x50,0x57,0x62,0x7f]
+ vxorpd 1016(%rdx){1to8}, %zmm18, %zmm28
+
+// CHECK: vxorpd 1024(%rdx){1to8}, %zmm18, %zmm28
+// CHECK: encoding: [0x62,0x61,0xed,0x50,0x57,0xa2,0x00,0x04,0x00,0x00]
+ vxorpd 1024(%rdx){1to8}, %zmm18, %zmm28
+
+// CHECK: vxorpd -1024(%rdx){1to8}, %zmm18, %zmm28
+// CHECK: encoding: [0x62,0x61,0xed,0x50,0x57,0x62,0x80]
+ vxorpd -1024(%rdx){1to8}, %zmm18, %zmm28
+
+// CHECK: vxorpd -1032(%rdx){1to8}, %zmm18, %zmm28
+// CHECK: encoding: [0x62,0x61,0xed,0x50,0x57,0xa2,0xf8,0xfb,0xff,0xff]
+ vxorpd -1032(%rdx){1to8}, %zmm18, %zmm28
+
+// CHECK: vxorps %zmm18, %zmm28, %zmm24
+// CHECK: encoding: [0x62,0x21,0x1c,0x40,0x57,0xc2]
+ vxorps %zmm18, %zmm28, %zmm24
+
+// CHECK: vxorps %zmm18, %zmm28, %zmm24 {%k4}
+// CHECK: encoding: [0x62,0x21,0x1c,0x44,0x57,0xc2]
+ vxorps %zmm18, %zmm28, %zmm24 {%k4}
+
+// CHECK: vxorps %zmm18, %zmm28, %zmm24 {%k4} {z}
+// CHECK: encoding: [0x62,0x21,0x1c,0xc4,0x57,0xc2]
+ vxorps %zmm18, %zmm28, %zmm24 {%k4} {z}
+
+// CHECK: vxorps (%rcx), %zmm28, %zmm24
+// CHECK: encoding: [0x62,0x61,0x1c,0x40,0x57,0x01]
+ vxorps (%rcx), %zmm28, %zmm24
+
+// CHECK: vxorps 4660(%rax,%r14,8), %zmm28, %zmm24
+// CHECK: encoding: [0x62,0x21,0x1c,0x40,0x57,0x84,0xf0,0x34,0x12,0x00,0x00]
+ vxorps 4660(%rax,%r14,8), %zmm28, %zmm24
+
+// CHECK: vxorps (%rcx){1to16}, %zmm28, %zmm24
+// CHECK: encoding: [0x62,0x61,0x1c,0x50,0x57,0x01]
+ vxorps (%rcx){1to16}, %zmm28, %zmm24
+
+// CHECK: vxorps 8128(%rdx), %zmm28, %zmm24
+// CHECK: encoding: [0x62,0x61,0x1c,0x40,0x57,0x42,0x7f]
+ vxorps 8128(%rdx), %zmm28, %zmm24
+
+// CHECK: vxorps 8192(%rdx), %zmm28, %zmm24
+// CHECK: encoding: [0x62,0x61,0x1c,0x40,0x57,0x82,0x00,0x20,0x00,0x00]
+ vxorps 8192(%rdx), %zmm28, %zmm24
+
+// CHECK: vxorps -8192(%rdx), %zmm28, %zmm24
+// CHECK: encoding: [0x62,0x61,0x1c,0x40,0x57,0x42,0x80]
+ vxorps -8192(%rdx), %zmm28, %zmm24
+
+// CHECK: vxorps -8256(%rdx), %zmm28, %zmm24
+// CHECK: encoding: [0x62,0x61,0x1c,0x40,0x57,0x82,0xc0,0xdf,0xff,0xff]
+ vxorps -8256(%rdx), %zmm28, %zmm24
+
+// CHECK: vxorps 508(%rdx){1to16}, %zmm28, %zmm24
+// CHECK: encoding: [0x62,0x61,0x1c,0x50,0x57,0x42,0x7f]
+ vxorps 508(%rdx){1to16}, %zmm28, %zmm24
+
+// CHECK: vxorps 512(%rdx){1to16}, %zmm28, %zmm24
+// CHECK: encoding: [0x62,0x61,0x1c,0x50,0x57,0x82,0x00,0x02,0x00,0x00]
+ vxorps 512(%rdx){1to16}, %zmm28, %zmm24
+
+// CHECK: vxorps -512(%rdx){1to16}, %zmm28, %zmm24
+// CHECK: encoding: [0x62,0x61,0x1c,0x50,0x57,0x42,0x80]
+ vxorps -512(%rdx){1to16}, %zmm28, %zmm24
+
+// CHECK: vxorps -516(%rdx){1to16}, %zmm28, %zmm24
+// CHECK: encoding: [0x62,0x61,0x1c,0x50,0x57,0x82,0xfc,0xfd,0xff,0xff]
+ vxorps -516(%rdx){1to16}, %zmm28, %zmm24
// CHECK: vpmullq -1032(%rdx){1to4}, %ymm25, %ymm25
// CHECK: encoding: [0x62,0x62,0xb5,0x30,0x40,0x8a,0xf8,0xfb,0xff,0xff]
vpmullq -1032(%rdx){1to4}, %ymm25, %ymm25
+
+// CHECK: vandpd %xmm20, %xmm29, %xmm21
+// CHECK: encoding: [0x62,0xa1,0x95,0x00,0x54,0xec]
+ vandpd %xmm20, %xmm29, %xmm21
+
+// CHECK: vandpd %xmm20, %xmm29, %xmm21 {%k6}
+// CHECK: encoding: [0x62,0xa1,0x95,0x06,0x54,0xec]
+ vandpd %xmm20, %xmm29, %xmm21 {%k6}
+
+// CHECK: vandpd %xmm20, %xmm29, %xmm21 {%k6} {z}
+// CHECK: encoding: [0x62,0xa1,0x95,0x86,0x54,0xec]
+ vandpd %xmm20, %xmm29, %xmm21 {%k6} {z}
+
+// CHECK: vandpd (%rcx), %xmm29, %xmm21
+// CHECK: encoding: [0x62,0xe1,0x95,0x00,0x54,0x29]
+ vandpd (%rcx), %xmm29, %xmm21
+
+// CHECK: vandpd 291(%rax,%r14,8), %xmm29, %xmm21
+// CHECK: encoding: [0x62,0xa1,0x95,0x00,0x54,0xac,0xf0,0x23,0x01,0x00,0x00]
+ vandpd 291(%rax,%r14,8), %xmm29, %xmm21
+
+// CHECK: vandpd (%rcx){1to2}, %xmm29, %xmm21
+// CHECK: encoding: [0x62,0xe1,0x95,0x10,0x54,0x29]
+ vandpd (%rcx){1to2}, %xmm29, %xmm21
+
+// CHECK: vandpd 2032(%rdx), %xmm29, %xmm21
+// CHECK: encoding: [0x62,0xe1,0x95,0x00,0x54,0x6a,0x7f]
+ vandpd 2032(%rdx), %xmm29, %xmm21
+
+// CHECK: vandpd 2048(%rdx), %xmm29, %xmm21
+// CHECK: encoding: [0x62,0xe1,0x95,0x00,0x54,0xaa,0x00,0x08,0x00,0x00]
+ vandpd 2048(%rdx), %xmm29, %xmm21
+
+// CHECK: vandpd -2048(%rdx), %xmm29, %xmm21
+// CHECK: encoding: [0x62,0xe1,0x95,0x00,0x54,0x6a,0x80]
+ vandpd -2048(%rdx), %xmm29, %xmm21
+
+// CHECK: vandpd -2064(%rdx), %xmm29, %xmm21
+// CHECK: encoding: [0x62,0xe1,0x95,0x00,0x54,0xaa,0xf0,0xf7,0xff,0xff]
+ vandpd -2064(%rdx), %xmm29, %xmm21
+
+// CHECK: vandpd 1016(%rdx){1to2}, %xmm29, %xmm21
+// CHECK: encoding: [0x62,0xe1,0x95,0x10,0x54,0x6a,0x7f]
+ vandpd 1016(%rdx){1to2}, %xmm29, %xmm21
+
+// CHECK: vandpd 1024(%rdx){1to2}, %xmm29, %xmm21
+// CHECK: encoding: [0x62,0xe1,0x95,0x10,0x54,0xaa,0x00,0x04,0x00,0x00]
+ vandpd 1024(%rdx){1to2}, %xmm29, %xmm21
+
+// CHECK: vandpd -1024(%rdx){1to2}, %xmm29, %xmm21
+// CHECK: encoding: [0x62,0xe1,0x95,0x10,0x54,0x6a,0x80]
+ vandpd -1024(%rdx){1to2}, %xmm29, %xmm21
+
+// CHECK: vandpd -1032(%rdx){1to2}, %xmm29, %xmm21
+// CHECK: encoding: [0x62,0xe1,0x95,0x10,0x54,0xaa,0xf8,0xfb,0xff,0xff]
+ vandpd -1032(%rdx){1to2}, %xmm29, %xmm21
+
+// CHECK: vandpd %ymm28, %ymm21, %ymm28
+// CHECK: encoding: [0x62,0x01,0xd5,0x20,0x54,0xe4]
+ vandpd %ymm28, %ymm21, %ymm28
+
+// CHECK: vandpd %ymm28, %ymm21, %ymm28 {%k4}
+// CHECK: encoding: [0x62,0x01,0xd5,0x24,0x54,0xe4]
+ vandpd %ymm28, %ymm21, %ymm28 {%k4}
+
+// CHECK: vandpd %ymm28, %ymm21, %ymm28 {%k4} {z}
+// CHECK: encoding: [0x62,0x01,0xd5,0xa4,0x54,0xe4]
+ vandpd %ymm28, %ymm21, %ymm28 {%k4} {z}
+
+// CHECK: vandpd (%rcx), %ymm21, %ymm28
+// CHECK: encoding: [0x62,0x61,0xd5,0x20,0x54,0x21]
+ vandpd (%rcx), %ymm21, %ymm28
+
+// CHECK: vandpd 291(%rax,%r14,8), %ymm21, %ymm28
+// CHECK: encoding: [0x62,0x21,0xd5,0x20,0x54,0xa4,0xf0,0x23,0x01,0x00,0x00]
+ vandpd 291(%rax,%r14,8), %ymm21, %ymm28
+
+// CHECK: vandpd (%rcx){1to4}, %ymm21, %ymm28
+// CHECK: encoding: [0x62,0x61,0xd5,0x30,0x54,0x21]
+ vandpd (%rcx){1to4}, %ymm21, %ymm28
+
+// CHECK: vandpd 4064(%rdx), %ymm21, %ymm28
+// CHECK: encoding: [0x62,0x61,0xd5,0x20,0x54,0x62,0x7f]
+ vandpd 4064(%rdx), %ymm21, %ymm28
+
+// CHECK: vandpd 4096(%rdx), %ymm21, %ymm28
+// CHECK: encoding: [0x62,0x61,0xd5,0x20,0x54,0xa2,0x00,0x10,0x00,0x00]
+ vandpd 4096(%rdx), %ymm21, %ymm28
+
+// CHECK: vandpd -4096(%rdx), %ymm21, %ymm28
+// CHECK: encoding: [0x62,0x61,0xd5,0x20,0x54,0x62,0x80]
+ vandpd -4096(%rdx), %ymm21, %ymm28
+
+// CHECK: vandpd -4128(%rdx), %ymm21, %ymm28
+// CHECK: encoding: [0x62,0x61,0xd5,0x20,0x54,0xa2,0xe0,0xef,0xff,0xff]
+ vandpd -4128(%rdx), %ymm21, %ymm28
+
+// CHECK: vandpd 1016(%rdx){1to4}, %ymm21, %ymm28
+// CHECK: encoding: [0x62,0x61,0xd5,0x30,0x54,0x62,0x7f]
+ vandpd 1016(%rdx){1to4}, %ymm21, %ymm28
+
+// CHECK: vandpd 1024(%rdx){1to4}, %ymm21, %ymm28
+// CHECK: encoding: [0x62,0x61,0xd5,0x30,0x54,0xa2,0x00,0x04,0x00,0x00]
+ vandpd 1024(%rdx){1to4}, %ymm21, %ymm28
+
+// CHECK: vandpd -1024(%rdx){1to4}, %ymm21, %ymm28
+// CHECK: encoding: [0x62,0x61,0xd5,0x30,0x54,0x62,0x80]
+ vandpd -1024(%rdx){1to4}, %ymm21, %ymm28
+
+// CHECK: vandpd -1032(%rdx){1to4}, %ymm21, %ymm28
+// CHECK: encoding: [0x62,0x61,0xd5,0x30,0x54,0xa2,0xf8,0xfb,0xff,0xff]
+ vandpd -1032(%rdx){1to4}, %ymm21, %ymm28
+
+// CHECK: vandps %xmm24, %xmm21, %xmm23
+// CHECK: encoding: [0x62,0x81,0x54,0x00,0x54,0xf8]
+ vandps %xmm24, %xmm21, %xmm23
+
+// CHECK: vandps %xmm24, %xmm21, %xmm23 {%k5}
+// CHECK: encoding: [0x62,0x81,0x54,0x05,0x54,0xf8]
+ vandps %xmm24, %xmm21, %xmm23 {%k5}
+
+// CHECK: vandps %xmm24, %xmm21, %xmm23 {%k5} {z}
+// CHECK: encoding: [0x62,0x81,0x54,0x85,0x54,0xf8]
+ vandps %xmm24, %xmm21, %xmm23 {%k5} {z}
+
+// CHECK: vandps (%rcx), %xmm21, %xmm23
+// CHECK: encoding: [0x62,0xe1,0x54,0x00,0x54,0x39]
+ vandps (%rcx), %xmm21, %xmm23
+
+// CHECK: vandps 291(%rax,%r14,8), %xmm21, %xmm23
+// CHECK: encoding: [0x62,0xa1,0x54,0x00,0x54,0xbc,0xf0,0x23,0x01,0x00,0x00]
+ vandps 291(%rax,%r14,8), %xmm21, %xmm23
+
+// CHECK: vandps (%rcx){1to4}, %xmm21, %xmm23
+// CHECK: encoding: [0x62,0xe1,0x54,0x10,0x54,0x39]
+ vandps (%rcx){1to4}, %xmm21, %xmm23
+
+// CHECK: vandps 2032(%rdx), %xmm21, %xmm23
+// CHECK: encoding: [0x62,0xe1,0x54,0x00,0x54,0x7a,0x7f]
+ vandps 2032(%rdx), %xmm21, %xmm23
+
+// CHECK: vandps 2048(%rdx), %xmm21, %xmm23
+// CHECK: encoding: [0x62,0xe1,0x54,0x00,0x54,0xba,0x00,0x08,0x00,0x00]
+ vandps 2048(%rdx), %xmm21, %xmm23
+
+// CHECK: vandps -2048(%rdx), %xmm21, %xmm23
+// CHECK: encoding: [0x62,0xe1,0x54,0x00,0x54,0x7a,0x80]
+ vandps -2048(%rdx), %xmm21, %xmm23
+
+// CHECK: vandps -2064(%rdx), %xmm21, %xmm23
+// CHECK: encoding: [0x62,0xe1,0x54,0x00,0x54,0xba,0xf0,0xf7,0xff,0xff]
+ vandps -2064(%rdx), %xmm21, %xmm23
+
+// CHECK: vandps 508(%rdx){1to4}, %xmm21, %xmm23
+// CHECK: encoding: [0x62,0xe1,0x54,0x10,0x54,0x7a,0x7f]
+ vandps 508(%rdx){1to4}, %xmm21, %xmm23
+
+// CHECK: vandps 512(%rdx){1to4}, %xmm21, %xmm23
+// CHECK: encoding: [0x62,0xe1,0x54,0x10,0x54,0xba,0x00,0x02,0x00,0x00]
+ vandps 512(%rdx){1to4}, %xmm21, %xmm23
+
+// CHECK: vandps -512(%rdx){1to4}, %xmm21, %xmm23
+// CHECK: encoding: [0x62,0xe1,0x54,0x10,0x54,0x7a,0x80]
+ vandps -512(%rdx){1to4}, %xmm21, %xmm23
+
+// CHECK: vandps -516(%rdx){1to4}, %xmm21, %xmm23
+// CHECK: encoding: [0x62,0xe1,0x54,0x10,0x54,0xba,0xfc,0xfd,0xff,0xff]
+ vandps -516(%rdx){1to4}, %xmm21, %xmm23
+
+// CHECK: vandps %ymm23, %ymm18, %ymm26
+// CHECK: encoding: [0x62,0x21,0x6c,0x20,0x54,0xd7]
+ vandps %ymm23, %ymm18, %ymm26
+
+// CHECK: vandps %ymm23, %ymm18, %ymm26 {%k6}
+// CHECK: encoding: [0x62,0x21,0x6c,0x26,0x54,0xd7]
+ vandps %ymm23, %ymm18, %ymm26 {%k6}
+
+// CHECK: vandps %ymm23, %ymm18, %ymm26 {%k6} {z}
+// CHECK: encoding: [0x62,0x21,0x6c,0xa6,0x54,0xd7]
+ vandps %ymm23, %ymm18, %ymm26 {%k6} {z}
+
+// CHECK: vandps (%rcx), %ymm18, %ymm26
+// CHECK: encoding: [0x62,0x61,0x6c,0x20,0x54,0x11]
+ vandps (%rcx), %ymm18, %ymm26
+
+// CHECK: vandps 291(%rax,%r14,8), %ymm18, %ymm26
+// CHECK: encoding: [0x62,0x21,0x6c,0x20,0x54,0x94,0xf0,0x23,0x01,0x00,0x00]
+ vandps 291(%rax,%r14,8), %ymm18, %ymm26
+
+// CHECK: vandps (%rcx){1to8}, %ymm18, %ymm26
+// CHECK: encoding: [0x62,0x61,0x6c,0x30,0x54,0x11]
+ vandps (%rcx){1to8}, %ymm18, %ymm26
+
+// CHECK: vandps 4064(%rdx), %ymm18, %ymm26
+// CHECK: encoding: [0x62,0x61,0x6c,0x20,0x54,0x52,0x7f]
+ vandps 4064(%rdx), %ymm18, %ymm26
+
+// CHECK: vandps 4096(%rdx), %ymm18, %ymm26
+// CHECK: encoding: [0x62,0x61,0x6c,0x20,0x54,0x92,0x00,0x10,0x00,0x00]
+ vandps 4096(%rdx), %ymm18, %ymm26
+
+// CHECK: vandps -4096(%rdx), %ymm18, %ymm26
+// CHECK: encoding: [0x62,0x61,0x6c,0x20,0x54,0x52,0x80]
+ vandps -4096(%rdx), %ymm18, %ymm26
+
+// CHECK: vandps -4128(%rdx), %ymm18, %ymm26
+// CHECK: encoding: [0x62,0x61,0x6c,0x20,0x54,0x92,0xe0,0xef,0xff,0xff]
+ vandps -4128(%rdx), %ymm18, %ymm26
+
+// CHECK: vandps 508(%rdx){1to8}, %ymm18, %ymm26
+// CHECK: encoding: [0x62,0x61,0x6c,0x30,0x54,0x52,0x7f]
+ vandps 508(%rdx){1to8}, %ymm18, %ymm26
+
+// CHECK: vandps 512(%rdx){1to8}, %ymm18, %ymm26
+// CHECK: encoding: [0x62,0x61,0x6c,0x30,0x54,0x92,0x00,0x02,0x00,0x00]
+ vandps 512(%rdx){1to8}, %ymm18, %ymm26
+
+// CHECK: vandps -512(%rdx){1to8}, %ymm18, %ymm26
+// CHECK: encoding: [0x62,0x61,0x6c,0x30,0x54,0x52,0x80]
+ vandps -512(%rdx){1to8}, %ymm18, %ymm26
+
+// CHECK: vandps -516(%rdx){1to8}, %ymm18, %ymm26
+// CHECK: encoding: [0x62,0x61,0x6c,0x30,0x54,0x92,0xfc,0xfd,0xff,0xff]
+ vandps -516(%rdx){1to8}, %ymm18, %ymm26
+
+// CHECK: vandnpd %xmm25, %xmm27, %xmm25
+// CHECK: encoding: [0x62,0x01,0xa5,0x00,0x55,0xc9]
+ vandnpd %xmm25, %xmm27, %xmm25
+
+// CHECK: vandnpd %xmm25, %xmm27, %xmm25 {%k5}
+// CHECK: encoding: [0x62,0x01,0xa5,0x05,0x55,0xc9]
+ vandnpd %xmm25, %xmm27, %xmm25 {%k5}
+
+// CHECK: vandnpd %xmm25, %xmm27, %xmm25 {%k5} {z}
+// CHECK: encoding: [0x62,0x01,0xa5,0x85,0x55,0xc9]
+ vandnpd %xmm25, %xmm27, %xmm25 {%k5} {z}
+
+// CHECK: vandnpd (%rcx), %xmm27, %xmm25
+// CHECK: encoding: [0x62,0x61,0xa5,0x00,0x55,0x09]
+ vandnpd (%rcx), %xmm27, %xmm25
+
+// CHECK: vandnpd 291(%rax,%r14,8), %xmm27, %xmm25
+// CHECK: encoding: [0x62,0x21,0xa5,0x00,0x55,0x8c,0xf0,0x23,0x01,0x00,0x00]
+ vandnpd 291(%rax,%r14,8), %xmm27, %xmm25
+
+// CHECK: vandnpd (%rcx){1to2}, %xmm27, %xmm25
+// CHECK: encoding: [0x62,0x61,0xa5,0x10,0x55,0x09]
+ vandnpd (%rcx){1to2}, %xmm27, %xmm25
+
+// CHECK: vandnpd 2032(%rdx), %xmm27, %xmm25
+// CHECK: encoding: [0x62,0x61,0xa5,0x00,0x55,0x4a,0x7f]
+ vandnpd 2032(%rdx), %xmm27, %xmm25
+
+// CHECK: vandnpd 2048(%rdx), %xmm27, %xmm25
+// CHECK: encoding: [0x62,0x61,0xa5,0x00,0x55,0x8a,0x00,0x08,0x00,0x00]
+ vandnpd 2048(%rdx), %xmm27, %xmm25
+
+// CHECK: vandnpd -2048(%rdx), %xmm27, %xmm25
+// CHECK: encoding: [0x62,0x61,0xa5,0x00,0x55,0x4a,0x80]
+ vandnpd -2048(%rdx), %xmm27, %xmm25
+
+// CHECK: vandnpd -2064(%rdx), %xmm27, %xmm25
+// CHECK: encoding: [0x62,0x61,0xa5,0x00,0x55,0x8a,0xf0,0xf7,0xff,0xff]
+ vandnpd -2064(%rdx), %xmm27, %xmm25
+
+// CHECK: vandnpd 1016(%rdx){1to2}, %xmm27, %xmm25
+// CHECK: encoding: [0x62,0x61,0xa5,0x10,0x55,0x4a,0x7f]
+ vandnpd 1016(%rdx){1to2}, %xmm27, %xmm25
+
+// CHECK: vandnpd 1024(%rdx){1to2}, %xmm27, %xmm25
+// CHECK: encoding: [0x62,0x61,0xa5,0x10,0x55,0x8a,0x00,0x04,0x00,0x00]
+ vandnpd 1024(%rdx){1to2}, %xmm27, %xmm25
+
+// CHECK: vandnpd -1024(%rdx){1to2}, %xmm27, %xmm25
+// CHECK: encoding: [0x62,0x61,0xa5,0x10,0x55,0x4a,0x80]
+ vandnpd -1024(%rdx){1to2}, %xmm27, %xmm25
+
+// CHECK: vandnpd -1032(%rdx){1to2}, %xmm27, %xmm25
+// CHECK: encoding: [0x62,0x61,0xa5,0x10,0x55,0x8a,0xf8,0xfb,0xff,0xff]
+ vandnpd -1032(%rdx){1to2}, %xmm27, %xmm25
+
+// CHECK: vandnpd %ymm22, %ymm18, %ymm22
+// CHECK: encoding: [0x62,0xa1,0xed,0x20,0x55,0xf6]
+ vandnpd %ymm22, %ymm18, %ymm22
+
+// CHECK: vandnpd %ymm22, %ymm18, %ymm22 {%k7}
+// CHECK: encoding: [0x62,0xa1,0xed,0x27,0x55,0xf6]
+ vandnpd %ymm22, %ymm18, %ymm22 {%k7}
+
+// CHECK: vandnpd %ymm22, %ymm18, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xa1,0xed,0xa7,0x55,0xf6]
+ vandnpd %ymm22, %ymm18, %ymm22 {%k7} {z}
+
+// CHECK: vandnpd (%rcx), %ymm18, %ymm22
+// CHECK: encoding: [0x62,0xe1,0xed,0x20,0x55,0x31]
+ vandnpd (%rcx), %ymm18, %ymm22
+
+// CHECK: vandnpd 291(%rax,%r14,8), %ymm18, %ymm22
+// CHECK: encoding: [0x62,0xa1,0xed,0x20,0x55,0xb4,0xf0,0x23,0x01,0x00,0x00]
+ vandnpd 291(%rax,%r14,8), %ymm18, %ymm22
+
+// CHECK: vandnpd (%rcx){1to4}, %ymm18, %ymm22
+// CHECK: encoding: [0x62,0xe1,0xed,0x30,0x55,0x31]
+ vandnpd (%rcx){1to4}, %ymm18, %ymm22
+
+// CHECK: vandnpd 4064(%rdx), %ymm18, %ymm22
+// CHECK: encoding: [0x62,0xe1,0xed,0x20,0x55,0x72,0x7f]
+ vandnpd 4064(%rdx), %ymm18, %ymm22
+
+// CHECK: vandnpd 4096(%rdx), %ymm18, %ymm22
+// CHECK: encoding: [0x62,0xe1,0xed,0x20,0x55,0xb2,0x00,0x10,0x00,0x00]
+ vandnpd 4096(%rdx), %ymm18, %ymm22
+
+// CHECK: vandnpd -4096(%rdx), %ymm18, %ymm22
+// CHECK: encoding: [0x62,0xe1,0xed,0x20,0x55,0x72,0x80]
+ vandnpd -4096(%rdx), %ymm18, %ymm22
+
+// CHECK: vandnpd -4128(%rdx), %ymm18, %ymm22
+// CHECK: encoding: [0x62,0xe1,0xed,0x20,0x55,0xb2,0xe0,0xef,0xff,0xff]
+ vandnpd -4128(%rdx), %ymm18, %ymm22
+
+// CHECK: vandnpd 1016(%rdx){1to4}, %ymm18, %ymm22
+// CHECK: encoding: [0x62,0xe1,0xed,0x30,0x55,0x72,0x7f]
+ vandnpd 1016(%rdx){1to4}, %ymm18, %ymm22
+
+// CHECK: vandnpd 1024(%rdx){1to4}, %ymm18, %ymm22
+// CHECK: encoding: [0x62,0xe1,0xed,0x30,0x55,0xb2,0x00,0x04,0x00,0x00]
+ vandnpd 1024(%rdx){1to4}, %ymm18, %ymm22
+
+// CHECK: vandnpd -1024(%rdx){1to4}, %ymm18, %ymm22
+// CHECK: encoding: [0x62,0xe1,0xed,0x30,0x55,0x72,0x80]
+ vandnpd -1024(%rdx){1to4}, %ymm18, %ymm22
+
+// CHECK: vandnpd -1032(%rdx){1to4}, %ymm18, %ymm22
+// CHECK: encoding: [0x62,0xe1,0xed,0x30,0x55,0xb2,0xf8,0xfb,0xff,0xff]
+ vandnpd -1032(%rdx){1to4}, %ymm18, %ymm22
+
+// CHECK: vandnps %xmm27, %xmm21, %xmm21
+// CHECK: encoding: [0x62,0x81,0x54,0x00,0x55,0xeb]
+ vandnps %xmm27, %xmm21, %xmm21
+
+// CHECK: vandnps %xmm27, %xmm21, %xmm21 {%k2}
+// CHECK: encoding: [0x62,0x81,0x54,0x02,0x55,0xeb]
+ vandnps %xmm27, %xmm21, %xmm21 {%k2}
+
+// CHECK: vandnps %xmm27, %xmm21, %xmm21 {%k2} {z}
+// CHECK: encoding: [0x62,0x81,0x54,0x82,0x55,0xeb]
+ vandnps %xmm27, %xmm21, %xmm21 {%k2} {z}
+
+// CHECK: vandnps (%rcx), %xmm21, %xmm21
+// CHECK: encoding: [0x62,0xe1,0x54,0x00,0x55,0x29]
+ vandnps (%rcx), %xmm21, %xmm21
+
+// CHECK: vandnps 291(%rax,%r14,8), %xmm21, %xmm21
+// CHECK: encoding: [0x62,0xa1,0x54,0x00,0x55,0xac,0xf0,0x23,0x01,0x00,0x00]
+ vandnps 291(%rax,%r14,8), %xmm21, %xmm21
+
+// CHECK: vandnps (%rcx){1to4}, %xmm21, %xmm21
+// CHECK: encoding: [0x62,0xe1,0x54,0x10,0x55,0x29]
+ vandnps (%rcx){1to4}, %xmm21, %xmm21
+
+// CHECK: vandnps 2032(%rdx), %xmm21, %xmm21
+// CHECK: encoding: [0x62,0xe1,0x54,0x00,0x55,0x6a,0x7f]
+ vandnps 2032(%rdx), %xmm21, %xmm21
+
+// CHECK: vandnps 2048(%rdx), %xmm21, %xmm21
+// CHECK: encoding: [0x62,0xe1,0x54,0x00,0x55,0xaa,0x00,0x08,0x00,0x00]
+ vandnps 2048(%rdx), %xmm21, %xmm21
+
+// CHECK: vandnps -2048(%rdx), %xmm21, %xmm21
+// CHECK: encoding: [0x62,0xe1,0x54,0x00,0x55,0x6a,0x80]
+ vandnps -2048(%rdx), %xmm21, %xmm21
+
+// CHECK: vandnps -2064(%rdx), %xmm21, %xmm21
+// CHECK: encoding: [0x62,0xe1,0x54,0x00,0x55,0xaa,0xf0,0xf7,0xff,0xff]
+ vandnps -2064(%rdx), %xmm21, %xmm21
+
+// CHECK: vandnps 508(%rdx){1to4}, %xmm21, %xmm21
+// CHECK: encoding: [0x62,0xe1,0x54,0x10,0x55,0x6a,0x7f]
+ vandnps 508(%rdx){1to4}, %xmm21, %xmm21
+
+// CHECK: vandnps 512(%rdx){1to4}, %xmm21, %xmm21
+// CHECK: encoding: [0x62,0xe1,0x54,0x10,0x55,0xaa,0x00,0x02,0x00,0x00]
+ vandnps 512(%rdx){1to4}, %xmm21, %xmm21
+
+// CHECK: vandnps -512(%rdx){1to4}, %xmm21, %xmm21
+// CHECK: encoding: [0x62,0xe1,0x54,0x10,0x55,0x6a,0x80]
+ vandnps -512(%rdx){1to4}, %xmm21, %xmm21
+
+// CHECK: vandnps -516(%rdx){1to4}, %xmm21, %xmm21
+// CHECK: encoding: [0x62,0xe1,0x54,0x10,0x55,0xaa,0xfc,0xfd,0xff,0xff]
+ vandnps -516(%rdx){1to4}, %xmm21, %xmm21
+
+// CHECK: vandnps %ymm25, %ymm23, %ymm19
+// CHECK: encoding: [0x62,0x81,0x44,0x20,0x55,0xd9]
+ vandnps %ymm25, %ymm23, %ymm19
+
+// CHECK: vandnps %ymm25, %ymm23, %ymm19 {%k1}
+// CHECK: encoding: [0x62,0x81,0x44,0x21,0x55,0xd9]
+ vandnps %ymm25, %ymm23, %ymm19 {%k1}
+
+// CHECK: vandnps %ymm25, %ymm23, %ymm19 {%k1} {z}
+// CHECK: encoding: [0x62,0x81,0x44,0xa1,0x55,0xd9]
+ vandnps %ymm25, %ymm23, %ymm19 {%k1} {z}
+
+// CHECK: vandnps (%rcx), %ymm23, %ymm19
+// CHECK: encoding: [0x62,0xe1,0x44,0x20,0x55,0x19]
+ vandnps (%rcx), %ymm23, %ymm19
+
+// CHECK: vandnps 291(%rax,%r14,8), %ymm23, %ymm19
+// CHECK: encoding: [0x62,0xa1,0x44,0x20,0x55,0x9c,0xf0,0x23,0x01,0x00,0x00]
+ vandnps 291(%rax,%r14,8), %ymm23, %ymm19
+
+// CHECK: vandnps (%rcx){1to8}, %ymm23, %ymm19
+// CHECK: encoding: [0x62,0xe1,0x44,0x30,0x55,0x19]
+ vandnps (%rcx){1to8}, %ymm23, %ymm19
+
+// CHECK: vandnps 4064(%rdx), %ymm23, %ymm19
+// CHECK: encoding: [0x62,0xe1,0x44,0x20,0x55,0x5a,0x7f]
+ vandnps 4064(%rdx), %ymm23, %ymm19
+
+// CHECK: vandnps 4096(%rdx), %ymm23, %ymm19
+// CHECK: encoding: [0x62,0xe1,0x44,0x20,0x55,0x9a,0x00,0x10,0x00,0x00]
+ vandnps 4096(%rdx), %ymm23, %ymm19
+
+// CHECK: vandnps -4096(%rdx), %ymm23, %ymm19
+// CHECK: encoding: [0x62,0xe1,0x44,0x20,0x55,0x5a,0x80]
+ vandnps -4096(%rdx), %ymm23, %ymm19
+
+// CHECK: vandnps -4128(%rdx), %ymm23, %ymm19
+// CHECK: encoding: [0x62,0xe1,0x44,0x20,0x55,0x9a,0xe0,0xef,0xff,0xff]
+ vandnps -4128(%rdx), %ymm23, %ymm19
+
+// CHECK: vandnps 508(%rdx){1to8}, %ymm23, %ymm19
+// CHECK: encoding: [0x62,0xe1,0x44,0x30,0x55,0x5a,0x7f]
+ vandnps 508(%rdx){1to8}, %ymm23, %ymm19
+
+// CHECK: vandnps 512(%rdx){1to8}, %ymm23, %ymm19
+// CHECK: encoding: [0x62,0xe1,0x44,0x30,0x55,0x9a,0x00,0x02,0x00,0x00]
+ vandnps 512(%rdx){1to8}, %ymm23, %ymm19
+
+// CHECK: vandnps -512(%rdx){1to8}, %ymm23, %ymm19
+// CHECK: encoding: [0x62,0xe1,0x44,0x30,0x55,0x5a,0x80]
+ vandnps -512(%rdx){1to8}, %ymm23, %ymm19
+
+// CHECK: vandnps -516(%rdx){1to8}, %ymm23, %ymm19
+// CHECK: encoding: [0x62,0xe1,0x44,0x30,0x55,0x9a,0xfc,0xfd,0xff,0xff]
+ vandnps -516(%rdx){1to8}, %ymm23, %ymm19
+
+// CHECK: vorpd %xmm18, %xmm27, %xmm23
+// CHECK: encoding: [0x62,0xa1,0xa5,0x00,0x56,0xfa]
+ vorpd %xmm18, %xmm27, %xmm23
+
+// CHECK: vorpd %xmm18, %xmm27, %xmm23 {%k1}
+// CHECK: encoding: [0x62,0xa1,0xa5,0x01,0x56,0xfa]
+ vorpd %xmm18, %xmm27, %xmm23 {%k1}
+
+// CHECK: vorpd %xmm18, %xmm27, %xmm23 {%k1} {z}
+// CHECK: encoding: [0x62,0xa1,0xa5,0x81,0x56,0xfa]
+ vorpd %xmm18, %xmm27, %xmm23 {%k1} {z}
+
+// CHECK: vorpd (%rcx), %xmm27, %xmm23
+// CHECK: encoding: [0x62,0xe1,0xa5,0x00,0x56,0x39]
+ vorpd (%rcx), %xmm27, %xmm23
+
+// CHECK: vorpd 291(%rax,%r14,8), %xmm27, %xmm23
+// CHECK: encoding: [0x62,0xa1,0xa5,0x00,0x56,0xbc,0xf0,0x23,0x01,0x00,0x00]
+ vorpd 291(%rax,%r14,8), %xmm27, %xmm23
+
+// CHECK: vorpd (%rcx){1to2}, %xmm27, %xmm23
+// CHECK: encoding: [0x62,0xe1,0xa5,0x10,0x56,0x39]
+ vorpd (%rcx){1to2}, %xmm27, %xmm23
+
+// CHECK: vorpd 2032(%rdx), %xmm27, %xmm23
+// CHECK: encoding: [0x62,0xe1,0xa5,0x00,0x56,0x7a,0x7f]
+ vorpd 2032(%rdx), %xmm27, %xmm23
+
+// CHECK: vorpd 2048(%rdx), %xmm27, %xmm23
+// CHECK: encoding: [0x62,0xe1,0xa5,0x00,0x56,0xba,0x00,0x08,0x00,0x00]
+ vorpd 2048(%rdx), %xmm27, %xmm23
+
+// CHECK: vorpd -2048(%rdx), %xmm27, %xmm23
+// CHECK: encoding: [0x62,0xe1,0xa5,0x00,0x56,0x7a,0x80]
+ vorpd -2048(%rdx), %xmm27, %xmm23
+
+// CHECK: vorpd -2064(%rdx), %xmm27, %xmm23
+// CHECK: encoding: [0x62,0xe1,0xa5,0x00,0x56,0xba,0xf0,0xf7,0xff,0xff]
+ vorpd -2064(%rdx), %xmm27, %xmm23
+
+// CHECK: vorpd 1016(%rdx){1to2}, %xmm27, %xmm23
+// CHECK: encoding: [0x62,0xe1,0xa5,0x10,0x56,0x7a,0x7f]
+ vorpd 1016(%rdx){1to2}, %xmm27, %xmm23
+
+// CHECK: vorpd 1024(%rdx){1to2}, %xmm27, %xmm23
+// CHECK: encoding: [0x62,0xe1,0xa5,0x10,0x56,0xba,0x00,0x04,0x00,0x00]
+ vorpd 1024(%rdx){1to2}, %xmm27, %xmm23
+
+// CHECK: vorpd -1024(%rdx){1to2}, %xmm27, %xmm23
+// CHECK: encoding: [0x62,0xe1,0xa5,0x10,0x56,0x7a,0x80]
+ vorpd -1024(%rdx){1to2}, %xmm27, %xmm23
+
+// CHECK: vorpd -1032(%rdx){1to2}, %xmm27, %xmm23
+// CHECK: encoding: [0x62,0xe1,0xa5,0x10,0x56,0xba,0xf8,0xfb,0xff,0xff]
+ vorpd -1032(%rdx){1to2}, %xmm27, %xmm23
+
+// CHECK: vorpd %ymm20, %ymm29, %ymm26
+// CHECK: encoding: [0x62,0x21,0x95,0x20,0x56,0xd4]
+ vorpd %ymm20, %ymm29, %ymm26
+
+// CHECK: vorpd %ymm20, %ymm29, %ymm26 {%k5}
+// CHECK: encoding: [0x62,0x21,0x95,0x25,0x56,0xd4]
+ vorpd %ymm20, %ymm29, %ymm26 {%k5}
+
+// CHECK: vorpd %ymm20, %ymm29, %ymm26 {%k5} {z}
+// CHECK: encoding: [0x62,0x21,0x95,0xa5,0x56,0xd4]
+ vorpd %ymm20, %ymm29, %ymm26 {%k5} {z}
+
+// CHECK: vorpd (%rcx), %ymm29, %ymm26
+// CHECK: encoding: [0x62,0x61,0x95,0x20,0x56,0x11]
+ vorpd (%rcx), %ymm29, %ymm26
+
+// CHECK: vorpd 291(%rax,%r14,8), %ymm29, %ymm26
+// CHECK: encoding: [0x62,0x21,0x95,0x20,0x56,0x94,0xf0,0x23,0x01,0x00,0x00]
+ vorpd 291(%rax,%r14,8), %ymm29, %ymm26
+
+// CHECK: vorpd (%rcx){1to4}, %ymm29, %ymm26
+// CHECK: encoding: [0x62,0x61,0x95,0x30,0x56,0x11]
+ vorpd (%rcx){1to4}, %ymm29, %ymm26
+
+// CHECK: vorpd 4064(%rdx), %ymm29, %ymm26
+// CHECK: encoding: [0x62,0x61,0x95,0x20,0x56,0x52,0x7f]
+ vorpd 4064(%rdx), %ymm29, %ymm26
+
+// CHECK: vorpd 4096(%rdx), %ymm29, %ymm26
+// CHECK: encoding: [0x62,0x61,0x95,0x20,0x56,0x92,0x00,0x10,0x00,0x00]
+ vorpd 4096(%rdx), %ymm29, %ymm26
+
+// CHECK: vorpd -4096(%rdx), %ymm29, %ymm26
+// CHECK: encoding: [0x62,0x61,0x95,0x20,0x56,0x52,0x80]
+ vorpd -4096(%rdx), %ymm29, %ymm26
+
+// CHECK: vorpd -4128(%rdx), %ymm29, %ymm26
+// CHECK: encoding: [0x62,0x61,0x95,0x20,0x56,0x92,0xe0,0xef,0xff,0xff]
+ vorpd -4128(%rdx), %ymm29, %ymm26
+
+// CHECK: vorpd 1016(%rdx){1to4}, %ymm29, %ymm26
+// CHECK: encoding: [0x62,0x61,0x95,0x30,0x56,0x52,0x7f]
+ vorpd 1016(%rdx){1to4}, %ymm29, %ymm26
+
+// CHECK: vorpd 1024(%rdx){1to4}, %ymm29, %ymm26
+// CHECK: encoding: [0x62,0x61,0x95,0x30,0x56,0x92,0x00,0x04,0x00,0x00]
+ vorpd 1024(%rdx){1to4}, %ymm29, %ymm26
+
+// CHECK: vorpd -1024(%rdx){1to4}, %ymm29, %ymm26
+// CHECK: encoding: [0x62,0x61,0x95,0x30,0x56,0x52,0x80]
+ vorpd -1024(%rdx){1to4}, %ymm29, %ymm26
+
+// CHECK: vorpd -1032(%rdx){1to4}, %ymm29, %ymm26
+// CHECK: encoding: [0x62,0x61,0x95,0x30,0x56,0x92,0xf8,0xfb,0xff,0xff]
+ vorpd -1032(%rdx){1to4}, %ymm29, %ymm26
+
+// CHECK: vorps %xmm27, %xmm28, %xmm19
+// CHECK: encoding: [0x62,0x81,0x1c,0x00,0x56,0xdb]
+ vorps %xmm27, %xmm28, %xmm19
+
+// CHECK: vorps %xmm27, %xmm28, %xmm19 {%k4}
+// CHECK: encoding: [0x62,0x81,0x1c,0x04,0x56,0xdb]
+ vorps %xmm27, %xmm28, %xmm19 {%k4}
+
+// CHECK: vorps %xmm27, %xmm28, %xmm19 {%k4} {z}
+// CHECK: encoding: [0x62,0x81,0x1c,0x84,0x56,0xdb]
+ vorps %xmm27, %xmm28, %xmm19 {%k4} {z}
+
+// CHECK: vorps (%rcx), %xmm28, %xmm19
+// CHECK: encoding: [0x62,0xe1,0x1c,0x00,0x56,0x19]
+ vorps (%rcx), %xmm28, %xmm19
+
+// CHECK: vorps 291(%rax,%r14,8), %xmm28, %xmm19
+// CHECK: encoding: [0x62,0xa1,0x1c,0x00,0x56,0x9c,0xf0,0x23,0x01,0x00,0x00]
+ vorps 291(%rax,%r14,8), %xmm28, %xmm19
+
+// CHECK: vorps (%rcx){1to4}, %xmm28, %xmm19
+// CHECK: encoding: [0x62,0xe1,0x1c,0x10,0x56,0x19]
+ vorps (%rcx){1to4}, %xmm28, %xmm19
+
+// CHECK: vorps 2032(%rdx), %xmm28, %xmm19
+// CHECK: encoding: [0x62,0xe1,0x1c,0x00,0x56,0x5a,0x7f]
+ vorps 2032(%rdx), %xmm28, %xmm19
+
+// CHECK: vorps 2048(%rdx), %xmm28, %xmm19
+// CHECK: encoding: [0x62,0xe1,0x1c,0x00,0x56,0x9a,0x00,0x08,0x00,0x00]
+ vorps 2048(%rdx), %xmm28, %xmm19
+
+// CHECK: vorps -2048(%rdx), %xmm28, %xmm19
+// CHECK: encoding: [0x62,0xe1,0x1c,0x00,0x56,0x5a,0x80]
+ vorps -2048(%rdx), %xmm28, %xmm19
+
+// CHECK: vorps -2064(%rdx), %xmm28, %xmm19
+// CHECK: encoding: [0x62,0xe1,0x1c,0x00,0x56,0x9a,0xf0,0xf7,0xff,0xff]
+ vorps -2064(%rdx), %xmm28, %xmm19
+
+// CHECK: vorps 508(%rdx){1to4}, %xmm28, %xmm19
+// CHECK: encoding: [0x62,0xe1,0x1c,0x10,0x56,0x5a,0x7f]
+ vorps 508(%rdx){1to4}, %xmm28, %xmm19
+
+// CHECK: vorps 512(%rdx){1to4}, %xmm28, %xmm19
+// CHECK: encoding: [0x62,0xe1,0x1c,0x10,0x56,0x9a,0x00,0x02,0x00,0x00]
+ vorps 512(%rdx){1to4}, %xmm28, %xmm19
+
+// CHECK: vorps -512(%rdx){1to4}, %xmm28, %xmm19
+// CHECK: encoding: [0x62,0xe1,0x1c,0x10,0x56,0x5a,0x80]
+ vorps -512(%rdx){1to4}, %xmm28, %xmm19
+
+// CHECK: vorps -516(%rdx){1to4}, %xmm28, %xmm19
+// CHECK: encoding: [0x62,0xe1,0x1c,0x10,0x56,0x9a,0xfc,0xfd,0xff,0xff]
+ vorps -516(%rdx){1to4}, %xmm28, %xmm19
+
+// CHECK: vorps %ymm26, %ymm26, %ymm27
+// CHECK: encoding: [0x62,0x01,0x2c,0x20,0x56,0xda]
+ vorps %ymm26, %ymm26, %ymm27
+
+// CHECK: vorps %ymm26, %ymm26, %ymm27 {%k1}
+// CHECK: encoding: [0x62,0x01,0x2c,0x21,0x56,0xda]
+ vorps %ymm26, %ymm26, %ymm27 {%k1}
+
+// CHECK: vorps %ymm26, %ymm26, %ymm27 {%k1} {z}
+// CHECK: encoding: [0x62,0x01,0x2c,0xa1,0x56,0xda]
+ vorps %ymm26, %ymm26, %ymm27 {%k1} {z}
+
+// CHECK: vorps (%rcx), %ymm26, %ymm27
+// CHECK: encoding: [0x62,0x61,0x2c,0x20,0x56,0x19]
+ vorps (%rcx), %ymm26, %ymm27
+
+// CHECK: vorps 291(%rax,%r14,8), %ymm26, %ymm27
+// CHECK: encoding: [0x62,0x21,0x2c,0x20,0x56,0x9c,0xf0,0x23,0x01,0x00,0x00]
+ vorps 291(%rax,%r14,8), %ymm26, %ymm27
+
+// CHECK: vorps (%rcx){1to8}, %ymm26, %ymm27
+// CHECK: encoding: [0x62,0x61,0x2c,0x30,0x56,0x19]
+ vorps (%rcx){1to8}, %ymm26, %ymm27
+
+// CHECK: vorps 4064(%rdx), %ymm26, %ymm27
+// CHECK: encoding: [0x62,0x61,0x2c,0x20,0x56,0x5a,0x7f]
+ vorps 4064(%rdx), %ymm26, %ymm27
+
+// CHECK: vorps 4096(%rdx), %ymm26, %ymm27
+// CHECK: encoding: [0x62,0x61,0x2c,0x20,0x56,0x9a,0x00,0x10,0x00,0x00]
+ vorps 4096(%rdx), %ymm26, %ymm27
+
+// CHECK: vorps -4096(%rdx), %ymm26, %ymm27
+// CHECK: encoding: [0x62,0x61,0x2c,0x20,0x56,0x5a,0x80]
+ vorps -4096(%rdx), %ymm26, %ymm27
+
+// CHECK: vorps -4128(%rdx), %ymm26, %ymm27
+// CHECK: encoding: [0x62,0x61,0x2c,0x20,0x56,0x9a,0xe0,0xef,0xff,0xff]
+ vorps -4128(%rdx), %ymm26, %ymm27
+
+// CHECK: vorps 508(%rdx){1to8}, %ymm26, %ymm27
+// CHECK: encoding: [0x62,0x61,0x2c,0x30,0x56,0x5a,0x7f]
+ vorps 508(%rdx){1to8}, %ymm26, %ymm27
+
+// CHECK: vorps 512(%rdx){1to8}, %ymm26, %ymm27
+// CHECK: encoding: [0x62,0x61,0x2c,0x30,0x56,0x9a,0x00,0x02,0x00,0x00]
+ vorps 512(%rdx){1to8}, %ymm26, %ymm27
+
+// CHECK: vorps -512(%rdx){1to8}, %ymm26, %ymm27
+// CHECK: encoding: [0x62,0x61,0x2c,0x30,0x56,0x5a,0x80]
+ vorps -512(%rdx){1to8}, %ymm26, %ymm27
+
+// CHECK: vorps -516(%rdx){1to8}, %ymm26, %ymm27
+// CHECK: encoding: [0x62,0x61,0x2c,0x30,0x56,0x9a,0xfc,0xfd,0xff,0xff]
+ vorps -516(%rdx){1to8}, %ymm26, %ymm27
+
+// CHECK: vxorpd %xmm23, %xmm21, %xmm18
+// CHECK: encoding: [0x62,0xa1,0xd5,0x00,0x57,0xd7]
+ vxorpd %xmm23, %xmm21, %xmm18
+
+// CHECK: vxorpd %xmm23, %xmm21, %xmm18 {%k2}
+// CHECK: encoding: [0x62,0xa1,0xd5,0x02,0x57,0xd7]
+ vxorpd %xmm23, %xmm21, %xmm18 {%k2}
+
+// CHECK: vxorpd %xmm23, %xmm21, %xmm18 {%k2} {z}
+// CHECK: encoding: [0x62,0xa1,0xd5,0x82,0x57,0xd7]
+ vxorpd %xmm23, %xmm21, %xmm18 {%k2} {z}
+
+// CHECK: vxorpd (%rcx), %xmm21, %xmm18
+// CHECK: encoding: [0x62,0xe1,0xd5,0x00,0x57,0x11]
+ vxorpd (%rcx), %xmm21, %xmm18
+
+// CHECK: vxorpd 291(%rax,%r14,8), %xmm21, %xmm18
+// CHECK: encoding: [0x62,0xa1,0xd5,0x00,0x57,0x94,0xf0,0x23,0x01,0x00,0x00]
+ vxorpd 291(%rax,%r14,8), %xmm21, %xmm18
+
+// CHECK: vxorpd (%rcx){1to2}, %xmm21, %xmm18
+// CHECK: encoding: [0x62,0xe1,0xd5,0x10,0x57,0x11]
+ vxorpd (%rcx){1to2}, %xmm21, %xmm18
+
+// CHECK: vxorpd 2032(%rdx), %xmm21, %xmm18
+// CHECK: encoding: [0x62,0xe1,0xd5,0x00,0x57,0x52,0x7f]
+ vxorpd 2032(%rdx), %xmm21, %xmm18
+
+// CHECK: vxorpd 2048(%rdx), %xmm21, %xmm18
+// CHECK: encoding: [0x62,0xe1,0xd5,0x00,0x57,0x92,0x00,0x08,0x00,0x00]
+ vxorpd 2048(%rdx), %xmm21, %xmm18
+
+// CHECK: vxorpd -2048(%rdx), %xmm21, %xmm18
+// CHECK: encoding: [0x62,0xe1,0xd5,0x00,0x57,0x52,0x80]
+ vxorpd -2048(%rdx), %xmm21, %xmm18
+
+// CHECK: vxorpd -2064(%rdx), %xmm21, %xmm18
+// CHECK: encoding: [0x62,0xe1,0xd5,0x00,0x57,0x92,0xf0,0xf7,0xff,0xff]
+ vxorpd -2064(%rdx), %xmm21, %xmm18
+
+// CHECK: vxorpd 1016(%rdx){1to2}, %xmm21, %xmm18
+// CHECK: encoding: [0x62,0xe1,0xd5,0x10,0x57,0x52,0x7f]
+ vxorpd 1016(%rdx){1to2}, %xmm21, %xmm18
+
+// CHECK: vxorpd 1024(%rdx){1to2}, %xmm21, %xmm18
+// CHECK: encoding: [0x62,0xe1,0xd5,0x10,0x57,0x92,0x00,0x04,0x00,0x00]
+ vxorpd 1024(%rdx){1to2}, %xmm21, %xmm18
+
+// CHECK: vxorpd -1024(%rdx){1to2}, %xmm21, %xmm18
+// CHECK: encoding: [0x62,0xe1,0xd5,0x10,0x57,0x52,0x80]
+ vxorpd -1024(%rdx){1to2}, %xmm21, %xmm18
+
+// CHECK: vxorpd -1032(%rdx){1to2}, %xmm21, %xmm18
+// CHECK: encoding: [0x62,0xe1,0xd5,0x10,0x57,0x92,0xf8,0xfb,0xff,0xff]
+ vxorpd -1032(%rdx){1to2}, %xmm21, %xmm18
+
+// CHECK: vxorpd %ymm19, %ymm29, %ymm24
+// CHECK: encoding: [0x62,0x21,0x95,0x20,0x57,0xc3]
+ vxorpd %ymm19, %ymm29, %ymm24
+
+// CHECK: vxorpd %ymm19, %ymm29, %ymm24 {%k7}
+// CHECK: encoding: [0x62,0x21,0x95,0x27,0x57,0xc3]
+ vxorpd %ymm19, %ymm29, %ymm24 {%k7}
+
+// CHECK: vxorpd %ymm19, %ymm29, %ymm24 {%k7} {z}
+// CHECK: encoding: [0x62,0x21,0x95,0xa7,0x57,0xc3]
+ vxorpd %ymm19, %ymm29, %ymm24 {%k7} {z}
+
+// CHECK: vxorpd (%rcx), %ymm29, %ymm24
+// CHECK: encoding: [0x62,0x61,0x95,0x20,0x57,0x01]
+ vxorpd (%rcx), %ymm29, %ymm24
+
+// CHECK: vxorpd 291(%rax,%r14,8), %ymm29, %ymm24
+// CHECK: encoding: [0x62,0x21,0x95,0x20,0x57,0x84,0xf0,0x23,0x01,0x00,0x00]
+ vxorpd 291(%rax,%r14,8), %ymm29, %ymm24
+
+// CHECK: vxorpd (%rcx){1to4}, %ymm29, %ymm24
+// CHECK: encoding: [0x62,0x61,0x95,0x30,0x57,0x01]
+ vxorpd (%rcx){1to4}, %ymm29, %ymm24
+
+// CHECK: vxorpd 4064(%rdx), %ymm29, %ymm24
+// CHECK: encoding: [0x62,0x61,0x95,0x20,0x57,0x42,0x7f]
+ vxorpd 4064(%rdx), %ymm29, %ymm24
+
+// CHECK: vxorpd 4096(%rdx), %ymm29, %ymm24
+// CHECK: encoding: [0x62,0x61,0x95,0x20,0x57,0x82,0x00,0x10,0x00,0x00]
+ vxorpd 4096(%rdx), %ymm29, %ymm24
+
+// CHECK: vxorpd -4096(%rdx), %ymm29, %ymm24
+// CHECK: encoding: [0x62,0x61,0x95,0x20,0x57,0x42,0x80]
+ vxorpd -4096(%rdx), %ymm29, %ymm24
+
+// CHECK: vxorpd -4128(%rdx), %ymm29, %ymm24
+// CHECK: encoding: [0x62,0x61,0x95,0x20,0x57,0x82,0xe0,0xef,0xff,0xff]
+ vxorpd -4128(%rdx), %ymm29, %ymm24
+
+// CHECK: vxorpd 1016(%rdx){1to4}, %ymm29, %ymm24
+// CHECK: encoding: [0x62,0x61,0x95,0x30,0x57,0x42,0x7f]
+ vxorpd 1016(%rdx){1to4}, %ymm29, %ymm24
+
+// CHECK: vxorpd 1024(%rdx){1to4}, %ymm29, %ymm24
+// CHECK: encoding: [0x62,0x61,0x95,0x30,0x57,0x82,0x00,0x04,0x00,0x00]
+ vxorpd 1024(%rdx){1to4}, %ymm29, %ymm24
+
+// CHECK: vxorpd -1024(%rdx){1to4}, %ymm29, %ymm24
+// CHECK: encoding: [0x62,0x61,0x95,0x30,0x57,0x42,0x80]
+ vxorpd -1024(%rdx){1to4}, %ymm29, %ymm24
+
+// CHECK: vxorpd -1032(%rdx){1to4}, %ymm29, %ymm24
+// CHECK: encoding: [0x62,0x61,0x95,0x30,0x57,0x82,0xf8,0xfb,0xff,0xff]
+ vxorpd -1032(%rdx){1to4}, %ymm29, %ymm24
+
+// CHECK: vxorps %xmm19, %xmm18, %xmm20
+// CHECK: encoding: [0x62,0xa1,0x6c,0x00,0x57,0xe3]
+ vxorps %xmm19, %xmm18, %xmm20
+
+// CHECK: vxorps %xmm19, %xmm18, %xmm20 {%k1}
+// CHECK: encoding: [0x62,0xa1,0x6c,0x01,0x57,0xe3]
+ vxorps %xmm19, %xmm18, %xmm20 {%k1}
+
+// CHECK: vxorps %xmm19, %xmm18, %xmm20 {%k1} {z}
+// CHECK: encoding: [0x62,0xa1,0x6c,0x81,0x57,0xe3]
+ vxorps %xmm19, %xmm18, %xmm20 {%k1} {z}
+
+// CHECK: vxorps (%rcx), %xmm18, %xmm20
+// CHECK: encoding: [0x62,0xe1,0x6c,0x00,0x57,0x21]
+ vxorps (%rcx), %xmm18, %xmm20
+
+// CHECK: vxorps 291(%rax,%r14,8), %xmm18, %xmm20
+// CHECK: encoding: [0x62,0xa1,0x6c,0x00,0x57,0xa4,0xf0,0x23,0x01,0x00,0x00]
+ vxorps 291(%rax,%r14,8), %xmm18, %xmm20
+
+// CHECK: vxorps (%rcx){1to4}, %xmm18, %xmm20
+// CHECK: encoding: [0x62,0xe1,0x6c,0x10,0x57,0x21]
+ vxorps (%rcx){1to4}, %xmm18, %xmm20
+
+// CHECK: vxorps 2032(%rdx), %xmm18, %xmm20
+// CHECK: encoding: [0x62,0xe1,0x6c,0x00,0x57,0x62,0x7f]
+ vxorps 2032(%rdx), %xmm18, %xmm20
+
+// CHECK: vxorps 2048(%rdx), %xmm18, %xmm20
+// CHECK: encoding: [0x62,0xe1,0x6c,0x00,0x57,0xa2,0x00,0x08,0x00,0x00]
+ vxorps 2048(%rdx), %xmm18, %xmm20
+
+// CHECK: vxorps -2048(%rdx), %xmm18, %xmm20
+// CHECK: encoding: [0x62,0xe1,0x6c,0x00,0x57,0x62,0x80]
+ vxorps -2048(%rdx), %xmm18, %xmm20
+
+// CHECK: vxorps -2064(%rdx), %xmm18, %xmm20
+// CHECK: encoding: [0x62,0xe1,0x6c,0x00,0x57,0xa2,0xf0,0xf7,0xff,0xff]
+ vxorps -2064(%rdx), %xmm18, %xmm20
+
+// CHECK: vxorps 508(%rdx){1to4}, %xmm18, %xmm20
+// CHECK: encoding: [0x62,0xe1,0x6c,0x10,0x57,0x62,0x7f]
+ vxorps 508(%rdx){1to4}, %xmm18, %xmm20
+
+// CHECK: vxorps 512(%rdx){1to4}, %xmm18, %xmm20
+// CHECK: encoding: [0x62,0xe1,0x6c,0x10,0x57,0xa2,0x00,0x02,0x00,0x00]
+ vxorps 512(%rdx){1to4}, %xmm18, %xmm20
+
+// CHECK: vxorps -512(%rdx){1to4}, %xmm18, %xmm20
+// CHECK: encoding: [0x62,0xe1,0x6c,0x10,0x57,0x62,0x80]
+ vxorps -512(%rdx){1to4}, %xmm18, %xmm20
+
+// CHECK: vxorps -516(%rdx){1to4}, %xmm18, %xmm20
+// CHECK: encoding: [0x62,0xe1,0x6c,0x10,0x57,0xa2,0xfc,0xfd,0xff,0xff]
+ vxorps -516(%rdx){1to4}, %xmm18, %xmm20
+
+// CHECK: vxorps %ymm24, %ymm20, %ymm27
+// CHECK: encoding: [0x62,0x01,0x5c,0x20,0x57,0xd8]
+ vxorps %ymm24, %ymm20, %ymm27
+
+// CHECK: vxorps %ymm24, %ymm20, %ymm27 {%k2}
+// CHECK: encoding: [0x62,0x01,0x5c,0x22,0x57,0xd8]
+ vxorps %ymm24, %ymm20, %ymm27 {%k2}
+
+// CHECK: vxorps %ymm24, %ymm20, %ymm27 {%k2} {z}
+// CHECK: encoding: [0x62,0x01,0x5c,0xa2,0x57,0xd8]
+ vxorps %ymm24, %ymm20, %ymm27 {%k2} {z}
+
+// CHECK: vxorps (%rcx), %ymm20, %ymm27
+// CHECK: encoding: [0x62,0x61,0x5c,0x20,0x57,0x19]
+ vxorps (%rcx), %ymm20, %ymm27
+
+// CHECK: vxorps 291(%rax,%r14,8), %ymm20, %ymm27
+// CHECK: encoding: [0x62,0x21,0x5c,0x20,0x57,0x9c,0xf0,0x23,0x01,0x00,0x00]
+ vxorps 291(%rax,%r14,8), %ymm20, %ymm27
+
+// CHECK: vxorps (%rcx){1to8}, %ymm20, %ymm27
+// CHECK: encoding: [0x62,0x61,0x5c,0x30,0x57,0x19]
+ vxorps (%rcx){1to8}, %ymm20, %ymm27
+
+// CHECK: vxorps 4064(%rdx), %ymm20, %ymm27
+// CHECK: encoding: [0x62,0x61,0x5c,0x20,0x57,0x5a,0x7f]
+ vxorps 4064(%rdx), %ymm20, %ymm27
+
+// CHECK: vxorps 4096(%rdx), %ymm20, %ymm27
+// CHECK: encoding: [0x62,0x61,0x5c,0x20,0x57,0x9a,0x00,0x10,0x00,0x00]
+ vxorps 4096(%rdx), %ymm20, %ymm27
+
+// CHECK: vxorps -4096(%rdx), %ymm20, %ymm27
+// CHECK: encoding: [0x62,0x61,0x5c,0x20,0x57,0x5a,0x80]
+ vxorps -4096(%rdx), %ymm20, %ymm27
+
+// CHECK: vxorps -4128(%rdx), %ymm20, %ymm27
+// CHECK: encoding: [0x62,0x61,0x5c,0x20,0x57,0x9a,0xe0,0xef,0xff,0xff]
+ vxorps -4128(%rdx), %ymm20, %ymm27
+
+// CHECK: vxorps 508(%rdx){1to8}, %ymm20, %ymm27
+// CHECK: encoding: [0x62,0x61,0x5c,0x30,0x57,0x5a,0x7f]
+ vxorps 508(%rdx){1to8}, %ymm20, %ymm27
+
+// CHECK: vxorps 512(%rdx){1to8}, %ymm20, %ymm27
+// CHECK: encoding: [0x62,0x61,0x5c,0x30,0x57,0x9a,0x00,0x02,0x00,0x00]
+ vxorps 512(%rdx){1to8}, %ymm20, %ymm27
+
+// CHECK: vxorps -512(%rdx){1to8}, %ymm20, %ymm27
+// CHECK: encoding: [0x62,0x61,0x5c,0x30,0x57,0x5a,0x80]
+ vxorps -512(%rdx){1to8}, %ymm20, %ymm27
+
+// CHECK: vxorps -516(%rdx){1to8}, %ymm20, %ymm27
+// CHECK: encoding: [0x62,0x61,0x5c,0x30,0x57,0x9a,0xfc,0xfd,0xff,0xff]
+ vxorps -516(%rdx){1to8}, %ymm20, %ymm27
+
+// CHECK: vandpd %xmm27, %xmm25, %xmm19
+// CHECK: encoding: [0x62,0x81,0xb5,0x00,0x54,0xdb]
+ vandpd %xmm27, %xmm25, %xmm19
+
+// CHECK: vandpd %xmm27, %xmm25, %xmm19 {%k6}
+// CHECK: encoding: [0x62,0x81,0xb5,0x06,0x54,0xdb]
+ vandpd %xmm27, %xmm25, %xmm19 {%k6}
+
+// CHECK: vandpd %xmm27, %xmm25, %xmm19 {%k6} {z}
+// CHECK: encoding: [0x62,0x81,0xb5,0x86,0x54,0xdb]
+ vandpd %xmm27, %xmm25, %xmm19 {%k6} {z}
+
+// CHECK: vandpd (%rcx), %xmm25, %xmm19
+// CHECK: encoding: [0x62,0xe1,0xb5,0x00,0x54,0x19]
+ vandpd (%rcx), %xmm25, %xmm19
+
+// CHECK: vandpd 4660(%rax,%r14,8), %xmm25, %xmm19
+// CHECK: encoding: [0x62,0xa1,0xb5,0x00,0x54,0x9c,0xf0,0x34,0x12,0x00,0x00]
+ vandpd 4660(%rax,%r14,8), %xmm25, %xmm19
+
+// CHECK: vandpd (%rcx){1to2}, %xmm25, %xmm19
+// CHECK: encoding: [0x62,0xe1,0xb5,0x10,0x54,0x19]
+ vandpd (%rcx){1to2}, %xmm25, %xmm19
+
+// CHECK: vandpd 2032(%rdx), %xmm25, %xmm19
+// CHECK: encoding: [0x62,0xe1,0xb5,0x00,0x54,0x5a,0x7f]
+ vandpd 2032(%rdx), %xmm25, %xmm19
+
+// CHECK: vandpd 2048(%rdx), %xmm25, %xmm19
+// CHECK: encoding: [0x62,0xe1,0xb5,0x00,0x54,0x9a,0x00,0x08,0x00,0x00]
+ vandpd 2048(%rdx), %xmm25, %xmm19
+
+// CHECK: vandpd -2048(%rdx), %xmm25, %xmm19
+// CHECK: encoding: [0x62,0xe1,0xb5,0x00,0x54,0x5a,0x80]
+ vandpd -2048(%rdx), %xmm25, %xmm19
+
+// CHECK: vandpd -2064(%rdx), %xmm25, %xmm19
+// CHECK: encoding: [0x62,0xe1,0xb5,0x00,0x54,0x9a,0xf0,0xf7,0xff,0xff]
+ vandpd -2064(%rdx), %xmm25, %xmm19
+
+// CHECK: vandpd 1016(%rdx){1to2}, %xmm25, %xmm19
+// CHECK: encoding: [0x62,0xe1,0xb5,0x10,0x54,0x5a,0x7f]
+ vandpd 1016(%rdx){1to2}, %xmm25, %xmm19
+
+// CHECK: vandpd 1024(%rdx){1to2}, %xmm25, %xmm19
+// CHECK: encoding: [0x62,0xe1,0xb5,0x10,0x54,0x9a,0x00,0x04,0x00,0x00]
+ vandpd 1024(%rdx){1to2}, %xmm25, %xmm19
+
+// CHECK: vandpd -1024(%rdx){1to2}, %xmm25, %xmm19
+// CHECK: encoding: [0x62,0xe1,0xb5,0x10,0x54,0x5a,0x80]
+ vandpd -1024(%rdx){1to2}, %xmm25, %xmm19
+
+// CHECK: vandpd -1032(%rdx){1to2}, %xmm25, %xmm19
+// CHECK: encoding: [0x62,0xe1,0xb5,0x10,0x54,0x9a,0xf8,0xfb,0xff,0xff]
+ vandpd -1032(%rdx){1to2}, %xmm25, %xmm19
+
+// CHECK: vandpd %ymm21, %ymm29, %ymm24
+// CHECK: encoding: [0x62,0x21,0x95,0x20,0x54,0xc5]
+ vandpd %ymm21, %ymm29, %ymm24
+
+// CHECK: vandpd %ymm21, %ymm29, %ymm24 {%k2}
+// CHECK: encoding: [0x62,0x21,0x95,0x22,0x54,0xc5]
+ vandpd %ymm21, %ymm29, %ymm24 {%k2}
+
+// CHECK: vandpd %ymm21, %ymm29, %ymm24 {%k2} {z}
+// CHECK: encoding: [0x62,0x21,0x95,0xa2,0x54,0xc5]
+ vandpd %ymm21, %ymm29, %ymm24 {%k2} {z}
+
+// CHECK: vandpd (%rcx), %ymm29, %ymm24
+// CHECK: encoding: [0x62,0x61,0x95,0x20,0x54,0x01]
+ vandpd (%rcx), %ymm29, %ymm24
+
+// CHECK: vandpd 4660(%rax,%r14,8), %ymm29, %ymm24
+// CHECK: encoding: [0x62,0x21,0x95,0x20,0x54,0x84,0xf0,0x34,0x12,0x00,0x00]
+ vandpd 4660(%rax,%r14,8), %ymm29, %ymm24
+
+// CHECK: vandpd (%rcx){1to4}, %ymm29, %ymm24
+// CHECK: encoding: [0x62,0x61,0x95,0x30,0x54,0x01]
+ vandpd (%rcx){1to4}, %ymm29, %ymm24
+
+// CHECK: vandpd 4064(%rdx), %ymm29, %ymm24
+// CHECK: encoding: [0x62,0x61,0x95,0x20,0x54,0x42,0x7f]
+ vandpd 4064(%rdx), %ymm29, %ymm24
+
+// CHECK: vandpd 4096(%rdx), %ymm29, %ymm24
+// CHECK: encoding: [0x62,0x61,0x95,0x20,0x54,0x82,0x00,0x10,0x00,0x00]
+ vandpd 4096(%rdx), %ymm29, %ymm24
+
+// CHECK: vandpd -4096(%rdx), %ymm29, %ymm24
+// CHECK: encoding: [0x62,0x61,0x95,0x20,0x54,0x42,0x80]
+ vandpd -4096(%rdx), %ymm29, %ymm24
+
+// CHECK: vandpd -4128(%rdx), %ymm29, %ymm24
+// CHECK: encoding: [0x62,0x61,0x95,0x20,0x54,0x82,0xe0,0xef,0xff,0xff]
+ vandpd -4128(%rdx), %ymm29, %ymm24
+
+// CHECK: vandpd 1016(%rdx){1to4}, %ymm29, %ymm24
+// CHECK: encoding: [0x62,0x61,0x95,0x30,0x54,0x42,0x7f]
+ vandpd 1016(%rdx){1to4}, %ymm29, %ymm24
+
+// CHECK: vandpd 1024(%rdx){1to4}, %ymm29, %ymm24
+// CHECK: encoding: [0x62,0x61,0x95,0x30,0x54,0x82,0x00,0x04,0x00,0x00]
+ vandpd 1024(%rdx){1to4}, %ymm29, %ymm24
+
+// CHECK: vandpd -1024(%rdx){1to4}, %ymm29, %ymm24
+// CHECK: encoding: [0x62,0x61,0x95,0x30,0x54,0x42,0x80]
+ vandpd -1024(%rdx){1to4}, %ymm29, %ymm24
+
+// CHECK: vandpd -1032(%rdx){1to4}, %ymm29, %ymm24
+// CHECK: encoding: [0x62,0x61,0x95,0x30,0x54,0x82,0xf8,0xfb,0xff,0xff]
+ vandpd -1032(%rdx){1to4}, %ymm29, %ymm24
+
+// CHECK: vandps %xmm17, %xmm25, %xmm22
+// CHECK: encoding: [0x62,0xa1,0x34,0x00,0x54,0xf1]
+ vandps %xmm17, %xmm25, %xmm22
+
+// CHECK: vandps %xmm17, %xmm25, %xmm22 {%k3}
+// CHECK: encoding: [0x62,0xa1,0x34,0x03,0x54,0xf1]
+ vandps %xmm17, %xmm25, %xmm22 {%k3}
+
+// CHECK: vandps %xmm17, %xmm25, %xmm22 {%k3} {z}
+// CHECK: encoding: [0x62,0xa1,0x34,0x83,0x54,0xf1]
+ vandps %xmm17, %xmm25, %xmm22 {%k3} {z}
+
+// CHECK: vandps (%rcx), %xmm25, %xmm22
+// CHECK: encoding: [0x62,0xe1,0x34,0x00,0x54,0x31]
+ vandps (%rcx), %xmm25, %xmm22
+
+// CHECK: vandps 4660(%rax,%r14,8), %xmm25, %xmm22
+// CHECK: encoding: [0x62,0xa1,0x34,0x00,0x54,0xb4,0xf0,0x34,0x12,0x00,0x00]
+ vandps 4660(%rax,%r14,8), %xmm25, %xmm22
+
+// CHECK: vandps (%rcx){1to4}, %xmm25, %xmm22
+// CHECK: encoding: [0x62,0xe1,0x34,0x10,0x54,0x31]
+ vandps (%rcx){1to4}, %xmm25, %xmm22
+
+// CHECK: vandps 2032(%rdx), %xmm25, %xmm22
+// CHECK: encoding: [0x62,0xe1,0x34,0x00,0x54,0x72,0x7f]
+ vandps 2032(%rdx), %xmm25, %xmm22
+
+// CHECK: vandps 2048(%rdx), %xmm25, %xmm22
+// CHECK: encoding: [0x62,0xe1,0x34,0x00,0x54,0xb2,0x00,0x08,0x00,0x00]
+ vandps 2048(%rdx), %xmm25, %xmm22
+
+// CHECK: vandps -2048(%rdx), %xmm25, %xmm22
+// CHECK: encoding: [0x62,0xe1,0x34,0x00,0x54,0x72,0x80]
+ vandps -2048(%rdx), %xmm25, %xmm22
+
+// CHECK: vandps -2064(%rdx), %xmm25, %xmm22
+// CHECK: encoding: [0x62,0xe1,0x34,0x00,0x54,0xb2,0xf0,0xf7,0xff,0xff]
+ vandps -2064(%rdx), %xmm25, %xmm22
+
+// CHECK: vandps 508(%rdx){1to4}, %xmm25, %xmm22
+// CHECK: encoding: [0x62,0xe1,0x34,0x10,0x54,0x72,0x7f]
+ vandps 508(%rdx){1to4}, %xmm25, %xmm22
+
+// CHECK: vandps 512(%rdx){1to4}, %xmm25, %xmm22
+// CHECK: encoding: [0x62,0xe1,0x34,0x10,0x54,0xb2,0x00,0x02,0x00,0x00]
+ vandps 512(%rdx){1to4}, %xmm25, %xmm22
+
+// CHECK: vandps -512(%rdx){1to4}, %xmm25, %xmm22
+// CHECK: encoding: [0x62,0xe1,0x34,0x10,0x54,0x72,0x80]
+ vandps -512(%rdx){1to4}, %xmm25, %xmm22
+
+// CHECK: vandps -516(%rdx){1to4}, %xmm25, %xmm22
+// CHECK: encoding: [0x62,0xe1,0x34,0x10,0x54,0xb2,0xfc,0xfd,0xff,0xff]
+ vandps -516(%rdx){1to4}, %xmm25, %xmm22
+
+// CHECK: vandps %ymm18, %ymm25, %ymm22
+// CHECK: encoding: [0x62,0xa1,0x34,0x20,0x54,0xf2]
+ vandps %ymm18, %ymm25, %ymm22
+
+// CHECK: vandps %ymm18, %ymm25, %ymm22 {%k1}
+// CHECK: encoding: [0x62,0xa1,0x34,0x21,0x54,0xf2]
+ vandps %ymm18, %ymm25, %ymm22 {%k1}
+
+// CHECK: vandps %ymm18, %ymm25, %ymm22 {%k1} {z}
+// CHECK: encoding: [0x62,0xa1,0x34,0xa1,0x54,0xf2]
+ vandps %ymm18, %ymm25, %ymm22 {%k1} {z}
+
+// CHECK: vandps (%rcx), %ymm25, %ymm22
+// CHECK: encoding: [0x62,0xe1,0x34,0x20,0x54,0x31]
+ vandps (%rcx), %ymm25, %ymm22
+
+// CHECK: vandps 4660(%rax,%r14,8), %ymm25, %ymm22
+// CHECK: encoding: [0x62,0xa1,0x34,0x20,0x54,0xb4,0xf0,0x34,0x12,0x00,0x00]
+ vandps 4660(%rax,%r14,8), %ymm25, %ymm22
+
+// CHECK: vandps (%rcx){1to8}, %ymm25, %ymm22
+// CHECK: encoding: [0x62,0xe1,0x34,0x30,0x54,0x31]
+ vandps (%rcx){1to8}, %ymm25, %ymm22
+
+// CHECK: vandps 4064(%rdx), %ymm25, %ymm22
+// CHECK: encoding: [0x62,0xe1,0x34,0x20,0x54,0x72,0x7f]
+ vandps 4064(%rdx), %ymm25, %ymm22
+
+// CHECK: vandps 4096(%rdx), %ymm25, %ymm22
+// CHECK: encoding: [0x62,0xe1,0x34,0x20,0x54,0xb2,0x00,0x10,0x00,0x00]
+ vandps 4096(%rdx), %ymm25, %ymm22
+
+// CHECK: vandps -4096(%rdx), %ymm25, %ymm22
+// CHECK: encoding: [0x62,0xe1,0x34,0x20,0x54,0x72,0x80]
+ vandps -4096(%rdx), %ymm25, %ymm22
+
+// CHECK: vandps -4128(%rdx), %ymm25, %ymm22
+// CHECK: encoding: [0x62,0xe1,0x34,0x20,0x54,0xb2,0xe0,0xef,0xff,0xff]
+ vandps -4128(%rdx), %ymm25, %ymm22
+
+// CHECK: vandps 508(%rdx){1to8}, %ymm25, %ymm22
+// CHECK: encoding: [0x62,0xe1,0x34,0x30,0x54,0x72,0x7f]
+ vandps 508(%rdx){1to8}, %ymm25, %ymm22
+
+// CHECK: vandps 512(%rdx){1to8}, %ymm25, %ymm22
+// CHECK: encoding: [0x62,0xe1,0x34,0x30,0x54,0xb2,0x00,0x02,0x00,0x00]
+ vandps 512(%rdx){1to8}, %ymm25, %ymm22
+
+// CHECK: vandps -512(%rdx){1to8}, %ymm25, %ymm22
+// CHECK: encoding: [0x62,0xe1,0x34,0x30,0x54,0x72,0x80]
+ vandps -512(%rdx){1to8}, %ymm25, %ymm22
+
+// CHECK: vandps -516(%rdx){1to8}, %ymm25, %ymm22
+// CHECK: encoding: [0x62,0xe1,0x34,0x30,0x54,0xb2,0xfc,0xfd,0xff,0xff]
+ vandps -516(%rdx){1to8}, %ymm25, %ymm22
+
+// CHECK: vandnpd %xmm23, %xmm18, %xmm19
+// CHECK: encoding: [0x62,0xa1,0xed,0x00,0x55,0xdf]
+ vandnpd %xmm23, %xmm18, %xmm19
+
+// CHECK: vandnpd %xmm23, %xmm18, %xmm19 {%k1}
+// CHECK: encoding: [0x62,0xa1,0xed,0x01,0x55,0xdf]
+ vandnpd %xmm23, %xmm18, %xmm19 {%k1}
+
+// CHECK: vandnpd %xmm23, %xmm18, %xmm19 {%k1} {z}
+// CHECK: encoding: [0x62,0xa1,0xed,0x81,0x55,0xdf]
+ vandnpd %xmm23, %xmm18, %xmm19 {%k1} {z}
+
+// CHECK: vandnpd (%rcx), %xmm18, %xmm19
+// CHECK: encoding: [0x62,0xe1,0xed,0x00,0x55,0x19]
+ vandnpd (%rcx), %xmm18, %xmm19
+
+// CHECK: vandnpd 4660(%rax,%r14,8), %xmm18, %xmm19
+// CHECK: encoding: [0x62,0xa1,0xed,0x00,0x55,0x9c,0xf0,0x34,0x12,0x00,0x00]
+ vandnpd 4660(%rax,%r14,8), %xmm18, %xmm19
+
+// CHECK: vandnpd (%rcx){1to2}, %xmm18, %xmm19
+// CHECK: encoding: [0x62,0xe1,0xed,0x10,0x55,0x19]
+ vandnpd (%rcx){1to2}, %xmm18, %xmm19
+
+// CHECK: vandnpd 2032(%rdx), %xmm18, %xmm19
+// CHECK: encoding: [0x62,0xe1,0xed,0x00,0x55,0x5a,0x7f]
+ vandnpd 2032(%rdx), %xmm18, %xmm19
+
+// CHECK: vandnpd 2048(%rdx), %xmm18, %xmm19
+// CHECK: encoding: [0x62,0xe1,0xed,0x00,0x55,0x9a,0x00,0x08,0x00,0x00]
+ vandnpd 2048(%rdx), %xmm18, %xmm19
+
+// CHECK: vandnpd -2048(%rdx), %xmm18, %xmm19
+// CHECK: encoding: [0x62,0xe1,0xed,0x00,0x55,0x5a,0x80]
+ vandnpd -2048(%rdx), %xmm18, %xmm19
+
+// CHECK: vandnpd -2064(%rdx), %xmm18, %xmm19
+// CHECK: encoding: [0x62,0xe1,0xed,0x00,0x55,0x9a,0xf0,0xf7,0xff,0xff]
+ vandnpd -2064(%rdx), %xmm18, %xmm19
+
+// CHECK: vandnpd 1016(%rdx){1to2}, %xmm18, %xmm19
+// CHECK: encoding: [0x62,0xe1,0xed,0x10,0x55,0x5a,0x7f]
+ vandnpd 1016(%rdx){1to2}, %xmm18, %xmm19
+
+// CHECK: vandnpd 1024(%rdx){1to2}, %xmm18, %xmm19
+// CHECK: encoding: [0x62,0xe1,0xed,0x10,0x55,0x9a,0x00,0x04,0x00,0x00]
+ vandnpd 1024(%rdx){1to2}, %xmm18, %xmm19
+
+// CHECK: vandnpd -1024(%rdx){1to2}, %xmm18, %xmm19
+// CHECK: encoding: [0x62,0xe1,0xed,0x10,0x55,0x5a,0x80]
+ vandnpd -1024(%rdx){1to2}, %xmm18, %xmm19
+
+// CHECK: vandnpd -1032(%rdx){1to2}, %xmm18, %xmm19
+// CHECK: encoding: [0x62,0xe1,0xed,0x10,0x55,0x9a,0xf8,0xfb,0xff,0xff]
+ vandnpd -1032(%rdx){1to2}, %xmm18, %xmm19
+
+// CHECK: vandnpd %ymm28, %ymm29, %ymm25
+// CHECK: encoding: [0x62,0x01,0x95,0x20,0x55,0xcc]
+ vandnpd %ymm28, %ymm29, %ymm25
+
+// CHECK: vandnpd %ymm28, %ymm29, %ymm25 {%k7}
+// CHECK: encoding: [0x62,0x01,0x95,0x27,0x55,0xcc]
+ vandnpd %ymm28, %ymm29, %ymm25 {%k7}
+
+// CHECK: vandnpd %ymm28, %ymm29, %ymm25 {%k7} {z}
+// CHECK: encoding: [0x62,0x01,0x95,0xa7,0x55,0xcc]
+ vandnpd %ymm28, %ymm29, %ymm25 {%k7} {z}
+
+// CHECK: vandnpd (%rcx), %ymm29, %ymm25
+// CHECK: encoding: [0x62,0x61,0x95,0x20,0x55,0x09]
+ vandnpd (%rcx), %ymm29, %ymm25
+
+// CHECK: vandnpd 4660(%rax,%r14,8), %ymm29, %ymm25
+// CHECK: encoding: [0x62,0x21,0x95,0x20,0x55,0x8c,0xf0,0x34,0x12,0x00,0x00]
+ vandnpd 4660(%rax,%r14,8), %ymm29, %ymm25
+
+// CHECK: vandnpd (%rcx){1to4}, %ymm29, %ymm25
+// CHECK: encoding: [0x62,0x61,0x95,0x30,0x55,0x09]
+ vandnpd (%rcx){1to4}, %ymm29, %ymm25
+
+// CHECK: vandnpd 4064(%rdx), %ymm29, %ymm25
+// CHECK: encoding: [0x62,0x61,0x95,0x20,0x55,0x4a,0x7f]
+ vandnpd 4064(%rdx), %ymm29, %ymm25
+
+// CHECK: vandnpd 4096(%rdx), %ymm29, %ymm25
+// CHECK: encoding: [0x62,0x61,0x95,0x20,0x55,0x8a,0x00,0x10,0x00,0x00]
+ vandnpd 4096(%rdx), %ymm29, %ymm25
+
+// CHECK: vandnpd -4096(%rdx), %ymm29, %ymm25
+// CHECK: encoding: [0x62,0x61,0x95,0x20,0x55,0x4a,0x80]
+ vandnpd -4096(%rdx), %ymm29, %ymm25
+
+// CHECK: vandnpd -4128(%rdx), %ymm29, %ymm25
+// CHECK: encoding: [0x62,0x61,0x95,0x20,0x55,0x8a,0xe0,0xef,0xff,0xff]
+ vandnpd -4128(%rdx), %ymm29, %ymm25
+
+// CHECK: vandnpd 1016(%rdx){1to4}, %ymm29, %ymm25
+// CHECK: encoding: [0x62,0x61,0x95,0x30,0x55,0x4a,0x7f]
+ vandnpd 1016(%rdx){1to4}, %ymm29, %ymm25
+
+// CHECK: vandnpd 1024(%rdx){1to4}, %ymm29, %ymm25
+// CHECK: encoding: [0x62,0x61,0x95,0x30,0x55,0x8a,0x00,0x04,0x00,0x00]
+ vandnpd 1024(%rdx){1to4}, %ymm29, %ymm25
+
+// CHECK: vandnpd -1024(%rdx){1to4}, %ymm29, %ymm25
+// CHECK: encoding: [0x62,0x61,0x95,0x30,0x55,0x4a,0x80]
+ vandnpd -1024(%rdx){1to4}, %ymm29, %ymm25
+
+// CHECK: vandnpd -1032(%rdx){1to4}, %ymm29, %ymm25
+// CHECK: encoding: [0x62,0x61,0x95,0x30,0x55,0x8a,0xf8,0xfb,0xff,0xff]
+ vandnpd -1032(%rdx){1to4}, %ymm29, %ymm25
+
+// CHECK: vandnps %xmm23, %xmm17, %xmm25
+// CHECK: encoding: [0x62,0x21,0x74,0x00,0x55,0xcf]
+ vandnps %xmm23, %xmm17, %xmm25
+
+// CHECK: vandnps %xmm23, %xmm17, %xmm25 {%k5}
+// CHECK: encoding: [0x62,0x21,0x74,0x05,0x55,0xcf]
+ vandnps %xmm23, %xmm17, %xmm25 {%k5}
+
+// CHECK: vandnps %xmm23, %xmm17, %xmm25 {%k5} {z}
+// CHECK: encoding: [0x62,0x21,0x74,0x85,0x55,0xcf]
+ vandnps %xmm23, %xmm17, %xmm25 {%k5} {z}
+
+// CHECK: vandnps (%rcx), %xmm17, %xmm25
+// CHECK: encoding: [0x62,0x61,0x74,0x00,0x55,0x09]
+ vandnps (%rcx), %xmm17, %xmm25
+
+// CHECK: vandnps 4660(%rax,%r14,8), %xmm17, %xmm25
+// CHECK: encoding: [0x62,0x21,0x74,0x00,0x55,0x8c,0xf0,0x34,0x12,0x00,0x00]
+ vandnps 4660(%rax,%r14,8), %xmm17, %xmm25
+
+// CHECK: vandnps (%rcx){1to4}, %xmm17, %xmm25
+// CHECK: encoding: [0x62,0x61,0x74,0x10,0x55,0x09]
+ vandnps (%rcx){1to4}, %xmm17, %xmm25
+
+// CHECK: vandnps 2032(%rdx), %xmm17, %xmm25
+// CHECK: encoding: [0x62,0x61,0x74,0x00,0x55,0x4a,0x7f]
+ vandnps 2032(%rdx), %xmm17, %xmm25
+
+// CHECK: vandnps 2048(%rdx), %xmm17, %xmm25
+// CHECK: encoding: [0x62,0x61,0x74,0x00,0x55,0x8a,0x00,0x08,0x00,0x00]
+ vandnps 2048(%rdx), %xmm17, %xmm25
+
+// CHECK: vandnps -2048(%rdx), %xmm17, %xmm25
+// CHECK: encoding: [0x62,0x61,0x74,0x00,0x55,0x4a,0x80]
+ vandnps -2048(%rdx), %xmm17, %xmm25
+
+// CHECK: vandnps -2064(%rdx), %xmm17, %xmm25
+// CHECK: encoding: [0x62,0x61,0x74,0x00,0x55,0x8a,0xf0,0xf7,0xff,0xff]
+ vandnps -2064(%rdx), %xmm17, %xmm25
+
+// CHECK: vandnps 508(%rdx){1to4}, %xmm17, %xmm25
+// CHECK: encoding: [0x62,0x61,0x74,0x10,0x55,0x4a,0x7f]
+ vandnps 508(%rdx){1to4}, %xmm17, %xmm25
+
+// CHECK: vandnps 512(%rdx){1to4}, %xmm17, %xmm25
+// CHECK: encoding: [0x62,0x61,0x74,0x10,0x55,0x8a,0x00,0x02,0x00,0x00]
+ vandnps 512(%rdx){1to4}, %xmm17, %xmm25
+
+// CHECK: vandnps -512(%rdx){1to4}, %xmm17, %xmm25
+// CHECK: encoding: [0x62,0x61,0x74,0x10,0x55,0x4a,0x80]
+ vandnps -512(%rdx){1to4}, %xmm17, %xmm25
+
+// CHECK: vandnps -516(%rdx){1to4}, %xmm17, %xmm25
+// CHECK: encoding: [0x62,0x61,0x74,0x10,0x55,0x8a,0xfc,0xfd,0xff,0xff]
+ vandnps -516(%rdx){1to4}, %xmm17, %xmm25
+
+// CHECK: vandnps %ymm23, %ymm19, %ymm18
+// CHECK: encoding: [0x62,0xa1,0x64,0x20,0x55,0xd7]
+ vandnps %ymm23, %ymm19, %ymm18
+
+// CHECK: vandnps %ymm23, %ymm19, %ymm18 {%k6}
+// CHECK: encoding: [0x62,0xa1,0x64,0x26,0x55,0xd7]
+ vandnps %ymm23, %ymm19, %ymm18 {%k6}
+
+// CHECK: vandnps %ymm23, %ymm19, %ymm18 {%k6} {z}
+// CHECK: encoding: [0x62,0xa1,0x64,0xa6,0x55,0xd7]
+ vandnps %ymm23, %ymm19, %ymm18 {%k6} {z}
+
+// CHECK: vandnps (%rcx), %ymm19, %ymm18
+// CHECK: encoding: [0x62,0xe1,0x64,0x20,0x55,0x11]
+ vandnps (%rcx), %ymm19, %ymm18
+
+// CHECK: vandnps 4660(%rax,%r14,8), %ymm19, %ymm18
+// CHECK: encoding: [0x62,0xa1,0x64,0x20,0x55,0x94,0xf0,0x34,0x12,0x00,0x00]
+ vandnps 4660(%rax,%r14,8), %ymm19, %ymm18
+
+// CHECK: vandnps (%rcx){1to8}, %ymm19, %ymm18
+// CHECK: encoding: [0x62,0xe1,0x64,0x30,0x55,0x11]
+ vandnps (%rcx){1to8}, %ymm19, %ymm18
+
+// CHECK: vandnps 4064(%rdx), %ymm19, %ymm18
+// CHECK: encoding: [0x62,0xe1,0x64,0x20,0x55,0x52,0x7f]
+ vandnps 4064(%rdx), %ymm19, %ymm18
+
+// CHECK: vandnps 4096(%rdx), %ymm19, %ymm18
+// CHECK: encoding: [0x62,0xe1,0x64,0x20,0x55,0x92,0x00,0x10,0x00,0x00]
+ vandnps 4096(%rdx), %ymm19, %ymm18
+
+// CHECK: vandnps -4096(%rdx), %ymm19, %ymm18
+// CHECK: encoding: [0x62,0xe1,0x64,0x20,0x55,0x52,0x80]
+ vandnps -4096(%rdx), %ymm19, %ymm18
+
+// CHECK: vandnps -4128(%rdx), %ymm19, %ymm18
+// CHECK: encoding: [0x62,0xe1,0x64,0x20,0x55,0x92,0xe0,0xef,0xff,0xff]
+ vandnps -4128(%rdx), %ymm19, %ymm18
+
+// CHECK: vandnps 508(%rdx){1to8}, %ymm19, %ymm18
+// CHECK: encoding: [0x62,0xe1,0x64,0x30,0x55,0x52,0x7f]
+ vandnps 508(%rdx){1to8}, %ymm19, %ymm18
+
+// CHECK: vandnps 512(%rdx){1to8}, %ymm19, %ymm18
+// CHECK: encoding: [0x62,0xe1,0x64,0x30,0x55,0x92,0x00,0x02,0x00,0x00]
+ vandnps 512(%rdx){1to8}, %ymm19, %ymm18
+
+// CHECK: vandnps -512(%rdx){1to8}, %ymm19, %ymm18
+// CHECK: encoding: [0x62,0xe1,0x64,0x30,0x55,0x52,0x80]
+ vandnps -512(%rdx){1to8}, %ymm19, %ymm18
+
+// CHECK: vandnps -516(%rdx){1to8}, %ymm19, %ymm18
+// CHECK: encoding: [0x62,0xe1,0x64,0x30,0x55,0x92,0xfc,0xfd,0xff,0xff]
+ vandnps -516(%rdx){1to8}, %ymm19, %ymm18
+
+// CHECK: vorpd %xmm18, %xmm29, %xmm26
+// CHECK: encoding: [0x62,0x21,0x95,0x00,0x56,0xd2]
+ vorpd %xmm18, %xmm29, %xmm26
+
+// CHECK: vorpd %xmm18, %xmm29, %xmm26 {%k2}
+// CHECK: encoding: [0x62,0x21,0x95,0x02,0x56,0xd2]
+ vorpd %xmm18, %xmm29, %xmm26 {%k2}
+
+// CHECK: vorpd %xmm18, %xmm29, %xmm26 {%k2} {z}
+// CHECK: encoding: [0x62,0x21,0x95,0x82,0x56,0xd2]
+ vorpd %xmm18, %xmm29, %xmm26 {%k2} {z}
+
+// CHECK: vorpd (%rcx), %xmm29, %xmm26
+// CHECK: encoding: [0x62,0x61,0x95,0x00,0x56,0x11]
+ vorpd (%rcx), %xmm29, %xmm26
+
+// CHECK: vorpd 4660(%rax,%r14,8), %xmm29, %xmm26
+// CHECK: encoding: [0x62,0x21,0x95,0x00,0x56,0x94,0xf0,0x34,0x12,0x00,0x00]
+ vorpd 4660(%rax,%r14,8), %xmm29, %xmm26
+
+// CHECK: vorpd (%rcx){1to2}, %xmm29, %xmm26
+// CHECK: encoding: [0x62,0x61,0x95,0x10,0x56,0x11]
+ vorpd (%rcx){1to2}, %xmm29, %xmm26
+
+// CHECK: vorpd 2032(%rdx), %xmm29, %xmm26
+// CHECK: encoding: [0x62,0x61,0x95,0x00,0x56,0x52,0x7f]
+ vorpd 2032(%rdx), %xmm29, %xmm26
+
+// CHECK: vorpd 2048(%rdx), %xmm29, %xmm26
+// CHECK: encoding: [0x62,0x61,0x95,0x00,0x56,0x92,0x00,0x08,0x00,0x00]
+ vorpd 2048(%rdx), %xmm29, %xmm26
+
+// CHECK: vorpd -2048(%rdx), %xmm29, %xmm26
+// CHECK: encoding: [0x62,0x61,0x95,0x00,0x56,0x52,0x80]
+ vorpd -2048(%rdx), %xmm29, %xmm26
+
+// CHECK: vorpd -2064(%rdx), %xmm29, %xmm26
+// CHECK: encoding: [0x62,0x61,0x95,0x00,0x56,0x92,0xf0,0xf7,0xff,0xff]
+ vorpd -2064(%rdx), %xmm29, %xmm26
+
+// CHECK: vorpd 1016(%rdx){1to2}, %xmm29, %xmm26
+// CHECK: encoding: [0x62,0x61,0x95,0x10,0x56,0x52,0x7f]
+ vorpd 1016(%rdx){1to2}, %xmm29, %xmm26
+
+// CHECK: vorpd 1024(%rdx){1to2}, %xmm29, %xmm26
+// CHECK: encoding: [0x62,0x61,0x95,0x10,0x56,0x92,0x00,0x04,0x00,0x00]
+ vorpd 1024(%rdx){1to2}, %xmm29, %xmm26
+
+// CHECK: vorpd -1024(%rdx){1to2}, %xmm29, %xmm26
+// CHECK: encoding: [0x62,0x61,0x95,0x10,0x56,0x52,0x80]
+ vorpd -1024(%rdx){1to2}, %xmm29, %xmm26
+
+// CHECK: vorpd -1032(%rdx){1to2}, %xmm29, %xmm26
+// CHECK: encoding: [0x62,0x61,0x95,0x10,0x56,0x92,0xf8,0xfb,0xff,0xff]
+ vorpd -1032(%rdx){1to2}, %xmm29, %xmm26
+
+// CHECK: vorpd %ymm22, %ymm19, %ymm28
+// CHECK: encoding: [0x62,0x21,0xe5,0x20,0x56,0xe6]
+ vorpd %ymm22, %ymm19, %ymm28
+
+// CHECK: vorpd %ymm22, %ymm19, %ymm28 {%k1}
+// CHECK: encoding: [0x62,0x21,0xe5,0x21,0x56,0xe6]
+ vorpd %ymm22, %ymm19, %ymm28 {%k1}
+
+// CHECK: vorpd %ymm22, %ymm19, %ymm28 {%k1} {z}
+// CHECK: encoding: [0x62,0x21,0xe5,0xa1,0x56,0xe6]
+ vorpd %ymm22, %ymm19, %ymm28 {%k1} {z}
+
+// CHECK: vorpd (%rcx), %ymm19, %ymm28
+// CHECK: encoding: [0x62,0x61,0xe5,0x20,0x56,0x21]
+ vorpd (%rcx), %ymm19, %ymm28
+
+// CHECK: vorpd 4660(%rax,%r14,8), %ymm19, %ymm28
+// CHECK: encoding: [0x62,0x21,0xe5,0x20,0x56,0xa4,0xf0,0x34,0x12,0x00,0x00]
+ vorpd 4660(%rax,%r14,8), %ymm19, %ymm28
+
+// CHECK: vorpd (%rcx){1to4}, %ymm19, %ymm28
+// CHECK: encoding: [0x62,0x61,0xe5,0x30,0x56,0x21]
+ vorpd (%rcx){1to4}, %ymm19, %ymm28
+
+// CHECK: vorpd 4064(%rdx), %ymm19, %ymm28
+// CHECK: encoding: [0x62,0x61,0xe5,0x20,0x56,0x62,0x7f]
+ vorpd 4064(%rdx), %ymm19, %ymm28
+
+// CHECK: vorpd 4096(%rdx), %ymm19, %ymm28
+// CHECK: encoding: [0x62,0x61,0xe5,0x20,0x56,0xa2,0x00,0x10,0x00,0x00]
+ vorpd 4096(%rdx), %ymm19, %ymm28
+
+// CHECK: vorpd -4096(%rdx), %ymm19, %ymm28
+// CHECK: encoding: [0x62,0x61,0xe5,0x20,0x56,0x62,0x80]
+ vorpd -4096(%rdx), %ymm19, %ymm28
+
+// CHECK: vorpd -4128(%rdx), %ymm19, %ymm28
+// CHECK: encoding: [0x62,0x61,0xe5,0x20,0x56,0xa2,0xe0,0xef,0xff,0xff]
+ vorpd -4128(%rdx), %ymm19, %ymm28
+
+// CHECK: vorpd 1016(%rdx){1to4}, %ymm19, %ymm28
+// CHECK: encoding: [0x62,0x61,0xe5,0x30,0x56,0x62,0x7f]
+ vorpd 1016(%rdx){1to4}, %ymm19, %ymm28
+
+// CHECK: vorpd 1024(%rdx){1to4}, %ymm19, %ymm28
+// CHECK: encoding: [0x62,0x61,0xe5,0x30,0x56,0xa2,0x00,0x04,0x00,0x00]
+ vorpd 1024(%rdx){1to4}, %ymm19, %ymm28
+
+// CHECK: vorpd -1024(%rdx){1to4}, %ymm19, %ymm28
+// CHECK: encoding: [0x62,0x61,0xe5,0x30,0x56,0x62,0x80]
+ vorpd -1024(%rdx){1to4}, %ymm19, %ymm28
+
+// CHECK: vorpd -1032(%rdx){1to4}, %ymm19, %ymm28
+// CHECK: encoding: [0x62,0x61,0xe5,0x30,0x56,0xa2,0xf8,0xfb,0xff,0xff]
+ vorpd -1032(%rdx){1to4}, %ymm19, %ymm28
+
+// CHECK: vorps %xmm24, %xmm22, %xmm28
+// CHECK: encoding: [0x62,0x01,0x4c,0x00,0x56,0xe0]
+ vorps %xmm24, %xmm22, %xmm28
+
+// CHECK: vorps %xmm24, %xmm22, %xmm28 {%k6}
+// CHECK: encoding: [0x62,0x01,0x4c,0x06,0x56,0xe0]
+ vorps %xmm24, %xmm22, %xmm28 {%k6}
+
+// CHECK: vorps %xmm24, %xmm22, %xmm28 {%k6} {z}
+// CHECK: encoding: [0x62,0x01,0x4c,0x86,0x56,0xe0]
+ vorps %xmm24, %xmm22, %xmm28 {%k6} {z}
+
+// CHECK: vorps (%rcx), %xmm22, %xmm28
+// CHECK: encoding: [0x62,0x61,0x4c,0x00,0x56,0x21]
+ vorps (%rcx), %xmm22, %xmm28
+
+// CHECK: vorps 4660(%rax,%r14,8), %xmm22, %xmm28
+// CHECK: encoding: [0x62,0x21,0x4c,0x00,0x56,0xa4,0xf0,0x34,0x12,0x00,0x00]
+ vorps 4660(%rax,%r14,8), %xmm22, %xmm28
+
+// CHECK: vorps (%rcx){1to4}, %xmm22, %xmm28
+// CHECK: encoding: [0x62,0x61,0x4c,0x10,0x56,0x21]
+ vorps (%rcx){1to4}, %xmm22, %xmm28
+
+// CHECK: vorps 2032(%rdx), %xmm22, %xmm28
+// CHECK: encoding: [0x62,0x61,0x4c,0x00,0x56,0x62,0x7f]
+ vorps 2032(%rdx), %xmm22, %xmm28
+
+// CHECK: vorps 2048(%rdx), %xmm22, %xmm28
+// CHECK: encoding: [0x62,0x61,0x4c,0x00,0x56,0xa2,0x00,0x08,0x00,0x00]
+ vorps 2048(%rdx), %xmm22, %xmm28
+
+// CHECK: vorps -2048(%rdx), %xmm22, %xmm28
+// CHECK: encoding: [0x62,0x61,0x4c,0x00,0x56,0x62,0x80]
+ vorps -2048(%rdx), %xmm22, %xmm28
+
+// CHECK: vorps -2064(%rdx), %xmm22, %xmm28
+// CHECK: encoding: [0x62,0x61,0x4c,0x00,0x56,0xa2,0xf0,0xf7,0xff,0xff]
+ vorps -2064(%rdx), %xmm22, %xmm28
+
+// CHECK: vorps 508(%rdx){1to4}, %xmm22, %xmm28
+// CHECK: encoding: [0x62,0x61,0x4c,0x10,0x56,0x62,0x7f]
+ vorps 508(%rdx){1to4}, %xmm22, %xmm28
+
+// CHECK: vorps 512(%rdx){1to4}, %xmm22, %xmm28
+// CHECK: encoding: [0x62,0x61,0x4c,0x10,0x56,0xa2,0x00,0x02,0x00,0x00]
+ vorps 512(%rdx){1to4}, %xmm22, %xmm28
+
+// CHECK: vorps -512(%rdx){1to4}, %xmm22, %xmm28
+// CHECK: encoding: [0x62,0x61,0x4c,0x10,0x56,0x62,0x80]
+ vorps -512(%rdx){1to4}, %xmm22, %xmm28
+
+// CHECK: vorps -516(%rdx){1to4}, %xmm22, %xmm28
+// CHECK: encoding: [0x62,0x61,0x4c,0x10,0x56,0xa2,0xfc,0xfd,0xff,0xff]
+ vorps -516(%rdx){1to4}, %xmm22, %xmm28
+
+// CHECK: vorps %ymm25, %ymm24, %ymm20
+// CHECK: encoding: [0x62,0x81,0x3c,0x20,0x56,0xe1]
+ vorps %ymm25, %ymm24, %ymm20
+
+// CHECK: vorps %ymm25, %ymm24, %ymm20 {%k1}
+// CHECK: encoding: [0x62,0x81,0x3c,0x21,0x56,0xe1]
+ vorps %ymm25, %ymm24, %ymm20 {%k1}
+
+// CHECK: vorps %ymm25, %ymm24, %ymm20 {%k1} {z}
+// CHECK: encoding: [0x62,0x81,0x3c,0xa1,0x56,0xe1]
+ vorps %ymm25, %ymm24, %ymm20 {%k1} {z}
+
+// CHECK: vorps (%rcx), %ymm24, %ymm20
+// CHECK: encoding: [0x62,0xe1,0x3c,0x20,0x56,0x21]
+ vorps (%rcx), %ymm24, %ymm20
+
+// CHECK: vorps 4660(%rax,%r14,8), %ymm24, %ymm20
+// CHECK: encoding: [0x62,0xa1,0x3c,0x20,0x56,0xa4,0xf0,0x34,0x12,0x00,0x00]
+ vorps 4660(%rax,%r14,8), %ymm24, %ymm20
+
+// CHECK: vorps (%rcx){1to8}, %ymm24, %ymm20
+// CHECK: encoding: [0x62,0xe1,0x3c,0x30,0x56,0x21]
+ vorps (%rcx){1to8}, %ymm24, %ymm20
+
+// CHECK: vorps 4064(%rdx), %ymm24, %ymm20
+// CHECK: encoding: [0x62,0xe1,0x3c,0x20,0x56,0x62,0x7f]
+ vorps 4064(%rdx), %ymm24, %ymm20
+
+// CHECK: vorps 4096(%rdx), %ymm24, %ymm20
+// CHECK: encoding: [0x62,0xe1,0x3c,0x20,0x56,0xa2,0x00,0x10,0x00,0x00]
+ vorps 4096(%rdx), %ymm24, %ymm20
+
+// CHECK: vorps -4096(%rdx), %ymm24, %ymm20
+// CHECK: encoding: [0x62,0xe1,0x3c,0x20,0x56,0x62,0x80]
+ vorps -4096(%rdx), %ymm24, %ymm20
+
+// CHECK: vorps -4128(%rdx), %ymm24, %ymm20
+// CHECK: encoding: [0x62,0xe1,0x3c,0x20,0x56,0xa2,0xe0,0xef,0xff,0xff]
+ vorps -4128(%rdx), %ymm24, %ymm20
+
+// CHECK: vorps 508(%rdx){1to8}, %ymm24, %ymm20
+// CHECK: encoding: [0x62,0xe1,0x3c,0x30,0x56,0x62,0x7f]
+ vorps 508(%rdx){1to8}, %ymm24, %ymm20
+
+// CHECK: vorps 512(%rdx){1to8}, %ymm24, %ymm20
+// CHECK: encoding: [0x62,0xe1,0x3c,0x30,0x56,0xa2,0x00,0x02,0x00,0x00]
+ vorps 512(%rdx){1to8}, %ymm24, %ymm20
+
+// CHECK: vorps -512(%rdx){1to8}, %ymm24, %ymm20
+// CHECK: encoding: [0x62,0xe1,0x3c,0x30,0x56,0x62,0x80]
+ vorps -512(%rdx){1to8}, %ymm24, %ymm20
+
+// CHECK: vorps -516(%rdx){1to8}, %ymm24, %ymm20
+// CHECK: encoding: [0x62,0xe1,0x3c,0x30,0x56,0xa2,0xfc,0xfd,0xff,0xff]
+ vorps -516(%rdx){1to8}, %ymm24, %ymm20
+
+// CHECK: vxorpd %xmm18, %xmm21, %xmm22
+// CHECK: encoding: [0x62,0xa1,0xd5,0x00,0x57,0xf2]
+ vxorpd %xmm18, %xmm21, %xmm22
+
+// CHECK: vxorpd %xmm18, %xmm21, %xmm22 {%k3}
+// CHECK: encoding: [0x62,0xa1,0xd5,0x03,0x57,0xf2]
+ vxorpd %xmm18, %xmm21, %xmm22 {%k3}
+
+// CHECK: vxorpd %xmm18, %xmm21, %xmm22 {%k3} {z}
+// CHECK: encoding: [0x62,0xa1,0xd5,0x83,0x57,0xf2]
+ vxorpd %xmm18, %xmm21, %xmm22 {%k3} {z}
+
+// CHECK: vxorpd (%rcx), %xmm21, %xmm22
+// CHECK: encoding: [0x62,0xe1,0xd5,0x00,0x57,0x31]
+ vxorpd (%rcx), %xmm21, %xmm22
+
+// CHECK: vxorpd 4660(%rax,%r14,8), %xmm21, %xmm22
+// CHECK: encoding: [0x62,0xa1,0xd5,0x00,0x57,0xb4,0xf0,0x34,0x12,0x00,0x00]
+ vxorpd 4660(%rax,%r14,8), %xmm21, %xmm22
+
+// CHECK: vxorpd (%rcx){1to2}, %xmm21, %xmm22
+// CHECK: encoding: [0x62,0xe1,0xd5,0x10,0x57,0x31]
+ vxorpd (%rcx){1to2}, %xmm21, %xmm22
+
+// CHECK: vxorpd 2032(%rdx), %xmm21, %xmm22
+// CHECK: encoding: [0x62,0xe1,0xd5,0x00,0x57,0x72,0x7f]
+ vxorpd 2032(%rdx), %xmm21, %xmm22
+
+// CHECK: vxorpd 2048(%rdx), %xmm21, %xmm22
+// CHECK: encoding: [0x62,0xe1,0xd5,0x00,0x57,0xb2,0x00,0x08,0x00,0x00]
+ vxorpd 2048(%rdx), %xmm21, %xmm22
+
+// CHECK: vxorpd -2048(%rdx), %xmm21, %xmm22
+// CHECK: encoding: [0x62,0xe1,0xd5,0x00,0x57,0x72,0x80]
+ vxorpd -2048(%rdx), %xmm21, %xmm22
+
+// CHECK: vxorpd -2064(%rdx), %xmm21, %xmm22
+// CHECK: encoding: [0x62,0xe1,0xd5,0x00,0x57,0xb2,0xf0,0xf7,0xff,0xff]
+ vxorpd -2064(%rdx), %xmm21, %xmm22
+
+// CHECK: vxorpd 1016(%rdx){1to2}, %xmm21, %xmm22
+// CHECK: encoding: [0x62,0xe1,0xd5,0x10,0x57,0x72,0x7f]
+ vxorpd 1016(%rdx){1to2}, %xmm21, %xmm22
+
+// CHECK: vxorpd 1024(%rdx){1to2}, %xmm21, %xmm22
+// CHECK: encoding: [0x62,0xe1,0xd5,0x10,0x57,0xb2,0x00,0x04,0x00,0x00]
+ vxorpd 1024(%rdx){1to2}, %xmm21, %xmm22
+
+// CHECK: vxorpd -1024(%rdx){1to2}, %xmm21, %xmm22
+// CHECK: encoding: [0x62,0xe1,0xd5,0x10,0x57,0x72,0x80]
+ vxorpd -1024(%rdx){1to2}, %xmm21, %xmm22
+
+// CHECK: vxorpd -1032(%rdx){1to2}, %xmm21, %xmm22
+// CHECK: encoding: [0x62,0xe1,0xd5,0x10,0x57,0xb2,0xf8,0xfb,0xff,0xff]
+ vxorpd -1032(%rdx){1to2}, %xmm21, %xmm22
+
+// CHECK: vxorpd %ymm27, %ymm21, %ymm25
+// CHECK: encoding: [0x62,0x01,0xd5,0x20,0x57,0xcb]
+ vxorpd %ymm27, %ymm21, %ymm25
+
+// CHECK: vxorpd %ymm27, %ymm21, %ymm25 {%k7}
+// CHECK: encoding: [0x62,0x01,0xd5,0x27,0x57,0xcb]
+ vxorpd %ymm27, %ymm21, %ymm25 {%k7}
+
+// CHECK: vxorpd %ymm27, %ymm21, %ymm25 {%k7} {z}
+// CHECK: encoding: [0x62,0x01,0xd5,0xa7,0x57,0xcb]
+ vxorpd %ymm27, %ymm21, %ymm25 {%k7} {z}
+
+// CHECK: vxorpd (%rcx), %ymm21, %ymm25
+// CHECK: encoding: [0x62,0x61,0xd5,0x20,0x57,0x09]
+ vxorpd (%rcx), %ymm21, %ymm25
+
+// CHECK: vxorpd 4660(%rax,%r14,8), %ymm21, %ymm25
+// CHECK: encoding: [0x62,0x21,0xd5,0x20,0x57,0x8c,0xf0,0x34,0x12,0x00,0x00]
+ vxorpd 4660(%rax,%r14,8), %ymm21, %ymm25
+
+// CHECK: vxorpd (%rcx){1to4}, %ymm21, %ymm25
+// CHECK: encoding: [0x62,0x61,0xd5,0x30,0x57,0x09]
+ vxorpd (%rcx){1to4}, %ymm21, %ymm25
+
+// CHECK: vxorpd 4064(%rdx), %ymm21, %ymm25
+// CHECK: encoding: [0x62,0x61,0xd5,0x20,0x57,0x4a,0x7f]
+ vxorpd 4064(%rdx), %ymm21, %ymm25
+
+// CHECK: vxorpd 4096(%rdx), %ymm21, %ymm25
+// CHECK: encoding: [0x62,0x61,0xd5,0x20,0x57,0x8a,0x00,0x10,0x00,0x00]
+ vxorpd 4096(%rdx), %ymm21, %ymm25
+
+// CHECK: vxorpd -4096(%rdx), %ymm21, %ymm25
+// CHECK: encoding: [0x62,0x61,0xd5,0x20,0x57,0x4a,0x80]
+ vxorpd -4096(%rdx), %ymm21, %ymm25
+
+// CHECK: vxorpd -4128(%rdx), %ymm21, %ymm25
+// CHECK: encoding: [0x62,0x61,0xd5,0x20,0x57,0x8a,0xe0,0xef,0xff,0xff]
+ vxorpd -4128(%rdx), %ymm21, %ymm25
+
+// CHECK: vxorpd 1016(%rdx){1to4}, %ymm21, %ymm25
+// CHECK: encoding: [0x62,0x61,0xd5,0x30,0x57,0x4a,0x7f]
+ vxorpd 1016(%rdx){1to4}, %ymm21, %ymm25
+
+// CHECK: vxorpd 1024(%rdx){1to4}, %ymm21, %ymm25
+// CHECK: encoding: [0x62,0x61,0xd5,0x30,0x57,0x8a,0x00,0x04,0x00,0x00]
+ vxorpd 1024(%rdx){1to4}, %ymm21, %ymm25
+
+// CHECK: vxorpd -1024(%rdx){1to4}, %ymm21, %ymm25
+// CHECK: encoding: [0x62,0x61,0xd5,0x30,0x57,0x4a,0x80]
+ vxorpd -1024(%rdx){1to4}, %ymm21, %ymm25
+
+// CHECK: vxorpd -1032(%rdx){1to4}, %ymm21, %ymm25
+// CHECK: encoding: [0x62,0x61,0xd5,0x30,0x57,0x8a,0xf8,0xfb,0xff,0xff]
+ vxorpd -1032(%rdx){1to4}, %ymm21, %ymm25
+
+// CHECK: vxorps %xmm21, %xmm21, %xmm17
+// CHECK: encoding: [0x62,0xa1,0x54,0x00,0x57,0xcd]
+ vxorps %xmm21, %xmm21, %xmm17
+
+// CHECK: vxorps %xmm21, %xmm21, %xmm17 {%k5}
+// CHECK: encoding: [0x62,0xa1,0x54,0x05,0x57,0xcd]
+ vxorps %xmm21, %xmm21, %xmm17 {%k5}
+
+// CHECK: vxorps %xmm21, %xmm21, %xmm17 {%k5} {z}
+// CHECK: encoding: [0x62,0xa1,0x54,0x85,0x57,0xcd]
+ vxorps %xmm21, %xmm21, %xmm17 {%k5} {z}
+
+// CHECK: vxorps (%rcx), %xmm21, %xmm17
+// CHECK: encoding: [0x62,0xe1,0x54,0x00,0x57,0x09]
+ vxorps (%rcx), %xmm21, %xmm17
+
+// CHECK: vxorps 4660(%rax,%r14,8), %xmm21, %xmm17
+// CHECK: encoding: [0x62,0xa1,0x54,0x00,0x57,0x8c,0xf0,0x34,0x12,0x00,0x00]
+ vxorps 4660(%rax,%r14,8), %xmm21, %xmm17
+
+// CHECK: vxorps (%rcx){1to4}, %xmm21, %xmm17
+// CHECK: encoding: [0x62,0xe1,0x54,0x10,0x57,0x09]
+ vxorps (%rcx){1to4}, %xmm21, %xmm17
+
+// CHECK: vxorps 2032(%rdx), %xmm21, %xmm17
+// CHECK: encoding: [0x62,0xe1,0x54,0x00,0x57,0x4a,0x7f]
+ vxorps 2032(%rdx), %xmm21, %xmm17
+
+// CHECK: vxorps 2048(%rdx), %xmm21, %xmm17
+// CHECK: encoding: [0x62,0xe1,0x54,0x00,0x57,0x8a,0x00,0x08,0x00,0x00]
+ vxorps 2048(%rdx), %xmm21, %xmm17
+
+// CHECK: vxorps -2048(%rdx), %xmm21, %xmm17
+// CHECK: encoding: [0x62,0xe1,0x54,0x00,0x57,0x4a,0x80]
+ vxorps -2048(%rdx), %xmm21, %xmm17
+
+// CHECK: vxorps -2064(%rdx), %xmm21, %xmm17
+// CHECK: encoding: [0x62,0xe1,0x54,0x00,0x57,0x8a,0xf0,0xf7,0xff,0xff]
+ vxorps -2064(%rdx), %xmm21, %xmm17
+
+// CHECK: vxorps 508(%rdx){1to4}, %xmm21, %xmm17
+// CHECK: encoding: [0x62,0xe1,0x54,0x10,0x57,0x4a,0x7f]
+ vxorps 508(%rdx){1to4}, %xmm21, %xmm17
+
+// CHECK: vxorps 512(%rdx){1to4}, %xmm21, %xmm17
+// CHECK: encoding: [0x62,0xe1,0x54,0x10,0x57,0x8a,0x00,0x02,0x00,0x00]
+ vxorps 512(%rdx){1to4}, %xmm21, %xmm17
+
+// CHECK: vxorps -512(%rdx){1to4}, %xmm21, %xmm17
+// CHECK: encoding: [0x62,0xe1,0x54,0x10,0x57,0x4a,0x80]
+ vxorps -512(%rdx){1to4}, %xmm21, %xmm17
+
+// CHECK: vxorps -516(%rdx){1to4}, %xmm21, %xmm17
+// CHECK: encoding: [0x62,0xe1,0x54,0x10,0x57,0x8a,0xfc,0xfd,0xff,0xff]
+ vxorps -516(%rdx){1to4}, %xmm21, %xmm17
+
+// CHECK: vxorps %ymm22, %ymm25, %ymm28
+// CHECK: encoding: [0x62,0x21,0x34,0x20,0x57,0xe6]
+ vxorps %ymm22, %ymm25, %ymm28
+
+// CHECK: vxorps %ymm22, %ymm25, %ymm28 {%k3}
+// CHECK: encoding: [0x62,0x21,0x34,0x23,0x57,0xe6]
+ vxorps %ymm22, %ymm25, %ymm28 {%k3}
+
+// CHECK: vxorps %ymm22, %ymm25, %ymm28 {%k3} {z}
+// CHECK: encoding: [0x62,0x21,0x34,0xa3,0x57,0xe6]
+ vxorps %ymm22, %ymm25, %ymm28 {%k3} {z}
+
+// CHECK: vxorps (%rcx), %ymm25, %ymm28
+// CHECK: encoding: [0x62,0x61,0x34,0x20,0x57,0x21]
+ vxorps (%rcx), %ymm25, %ymm28
+
+// CHECK: vxorps 4660(%rax,%r14,8), %ymm25, %ymm28
+// CHECK: encoding: [0x62,0x21,0x34,0x20,0x57,0xa4,0xf0,0x34,0x12,0x00,0x00]
+ vxorps 4660(%rax,%r14,8), %ymm25, %ymm28
+
+// CHECK: vxorps (%rcx){1to8}, %ymm25, %ymm28
+// CHECK: encoding: [0x62,0x61,0x34,0x30,0x57,0x21]
+ vxorps (%rcx){1to8}, %ymm25, %ymm28
+
+// CHECK: vxorps 4064(%rdx), %ymm25, %ymm28
+// CHECK: encoding: [0x62,0x61,0x34,0x20,0x57,0x62,0x7f]
+ vxorps 4064(%rdx), %ymm25, %ymm28
+
+// CHECK: vxorps 4096(%rdx), %ymm25, %ymm28
+// CHECK: encoding: [0x62,0x61,0x34,0x20,0x57,0xa2,0x00,0x10,0x00,0x00]
+ vxorps 4096(%rdx), %ymm25, %ymm28
+
+// CHECK: vxorps -4096(%rdx), %ymm25, %ymm28
+// CHECK: encoding: [0x62,0x61,0x34,0x20,0x57,0x62,0x80]
+ vxorps -4096(%rdx), %ymm25, %ymm28
+
+// CHECK: vxorps -4128(%rdx), %ymm25, %ymm28
+// CHECK: encoding: [0x62,0x61,0x34,0x20,0x57,0xa2,0xe0,0xef,0xff,0xff]
+ vxorps -4128(%rdx), %ymm25, %ymm28
+
+// CHECK: vxorps 508(%rdx){1to8}, %ymm25, %ymm28
+// CHECK: encoding: [0x62,0x61,0x34,0x30,0x57,0x62,0x7f]
+ vxorps 508(%rdx){1to8}, %ymm25, %ymm28
+
+// CHECK: vxorps 512(%rdx){1to8}, %ymm25, %ymm28
+// CHECK: encoding: [0x62,0x61,0x34,0x30,0x57,0xa2,0x00,0x02,0x00,0x00]
+ vxorps 512(%rdx){1to8}, %ymm25, %ymm28
+
+// CHECK: vxorps -512(%rdx){1to8}, %ymm25, %ymm28
+// CHECK: encoding: [0x62,0x61,0x34,0x30,0x57,0x62,0x80]
+ vxorps -512(%rdx){1to8}, %ymm25, %ymm28
+
+// CHECK: vxorps -516(%rdx){1to8}, %ymm25, %ymm28
+// CHECK: encoding: [0x62,0x61,0x34,0x30,0x57,0xa2,0xfc,0xfd,0xff,0xff]
+ vxorps -516(%rdx){1to8}, %ymm25, %ymm28
+
// CHECK: vprorq $123, -1032(%rdx){1to4}, %ymm21
// CHECK: encoding: [0x62,0xf1,0xd5,0x30,0x72,0x82,0xf8,0xfb,0xff,0xff,0x7b]
vprorq $123, -1032(%rdx){1to4}, %ymm21
+
+// CHECK: vpmuludq %xmm18, %xmm22, %xmm27
+// CHECK: encoding: [0x62,0x21,0xcd,0x00,0xf4,0xda]
+ vpmuludq %xmm18, %xmm22, %xmm27
+
+// CHECK: vpmuludq %xmm18, %xmm22, %xmm27 {%k5}
+// CHECK: encoding: [0x62,0x21,0xcd,0x05,0xf4,0xda]
+ vpmuludq %xmm18, %xmm22, %xmm27 {%k5}
+
+// CHECK: vpmuludq %xmm18, %xmm22, %xmm27 {%k5} {z}
+// CHECK: encoding: [0x62,0x21,0xcd,0x85,0xf4,0xda]
+ vpmuludq %xmm18, %xmm22, %xmm27 {%k5} {z}
+
+// CHECK: vpmuludq (%rcx), %xmm22, %xmm27
+// CHECK: encoding: [0x62,0x61,0xcd,0x00,0xf4,0x19]
+ vpmuludq (%rcx), %xmm22, %xmm27
+
+// CHECK: vpmuludq 291(%rax,%r14,8), %xmm22, %xmm27
+// CHECK: encoding: [0x62,0x21,0xcd,0x00,0xf4,0x9c,0xf0,0x23,0x01,0x00,0x00]
+ vpmuludq 291(%rax,%r14,8), %xmm22, %xmm27
+
+// CHECK: vpmuludq (%rcx){1to2}, %xmm22, %xmm27
+// CHECK: encoding: [0x62,0x61,0xcd,0x10,0xf4,0x19]
+ vpmuludq (%rcx){1to2}, %xmm22, %xmm27
+
+// CHECK: vpmuludq 2032(%rdx), %xmm22, %xmm27
+// CHECK: encoding: [0x62,0x61,0xcd,0x00,0xf4,0x5a,0x7f]
+ vpmuludq 2032(%rdx), %xmm22, %xmm27
+
+// CHECK: vpmuludq 2048(%rdx), %xmm22, %xmm27
+// CHECK: encoding: [0x62,0x61,0xcd,0x00,0xf4,0x9a,0x00,0x08,0x00,0x00]
+ vpmuludq 2048(%rdx), %xmm22, %xmm27
+
+// CHECK: vpmuludq -2048(%rdx), %xmm22, %xmm27
+// CHECK: encoding: [0x62,0x61,0xcd,0x00,0xf4,0x5a,0x80]
+ vpmuludq -2048(%rdx), %xmm22, %xmm27
+
+// CHECK: vpmuludq -2064(%rdx), %xmm22, %xmm27
+// CHECK: encoding: [0x62,0x61,0xcd,0x00,0xf4,0x9a,0xf0,0xf7,0xff,0xff]
+ vpmuludq -2064(%rdx), %xmm22, %xmm27
+
+// CHECK: vpmuludq 1016(%rdx){1to2}, %xmm22, %xmm27
+// CHECK: encoding: [0x62,0x61,0xcd,0x10,0xf4,0x5a,0x7f]
+ vpmuludq 1016(%rdx){1to2}, %xmm22, %xmm27
+
+// CHECK: vpmuludq 1024(%rdx){1to2}, %xmm22, %xmm27
+// CHECK: encoding: [0x62,0x61,0xcd,0x10,0xf4,0x9a,0x00,0x04,0x00,0x00]
+ vpmuludq 1024(%rdx){1to2}, %xmm22, %xmm27
+
+// CHECK: vpmuludq -1024(%rdx){1to2}, %xmm22, %xmm27
+// CHECK: encoding: [0x62,0x61,0xcd,0x10,0xf4,0x5a,0x80]
+ vpmuludq -1024(%rdx){1to2}, %xmm22, %xmm27
+
+// CHECK: vpmuludq -1032(%rdx){1to2}, %xmm22, %xmm27
+// CHECK: encoding: [0x62,0x61,0xcd,0x10,0xf4,0x9a,0xf8,0xfb,0xff,0xff]
+ vpmuludq -1032(%rdx){1to2}, %xmm22, %xmm27
+
+// CHECK: vpmuludq %ymm18, %ymm27, %ymm30
+// CHECK: encoding: [0x62,0x21,0xa5,0x20,0xf4,0xf2]
+ vpmuludq %ymm18, %ymm27, %ymm30
+
+// CHECK: vpmuludq %ymm18, %ymm27, %ymm30 {%k7}
+// CHECK: encoding: [0x62,0x21,0xa5,0x27,0xf4,0xf2]
+ vpmuludq %ymm18, %ymm27, %ymm30 {%k7}
+
+// CHECK: vpmuludq %ymm18, %ymm27, %ymm30 {%k7} {z}
+// CHECK: encoding: [0x62,0x21,0xa5,0xa7,0xf4,0xf2]
+ vpmuludq %ymm18, %ymm27, %ymm30 {%k7} {z}
+
+// CHECK: vpmuludq (%rcx), %ymm27, %ymm30
+// CHECK: encoding: [0x62,0x61,0xa5,0x20,0xf4,0x31]
+ vpmuludq (%rcx), %ymm27, %ymm30
+
+// CHECK: vpmuludq 291(%rax,%r14,8), %ymm27, %ymm30
+// CHECK: encoding: [0x62,0x21,0xa5,0x20,0xf4,0xb4,0xf0,0x23,0x01,0x00,0x00]
+ vpmuludq 291(%rax,%r14,8), %ymm27, %ymm30
+
+// CHECK: vpmuludq (%rcx){1to4}, %ymm27, %ymm30
+// CHECK: encoding: [0x62,0x61,0xa5,0x30,0xf4,0x31]
+ vpmuludq (%rcx){1to4}, %ymm27, %ymm30
+
+// CHECK: vpmuludq 4064(%rdx), %ymm27, %ymm30
+// CHECK: encoding: [0x62,0x61,0xa5,0x20,0xf4,0x72,0x7f]
+ vpmuludq 4064(%rdx), %ymm27, %ymm30
+
+// CHECK: vpmuludq 4096(%rdx), %ymm27, %ymm30
+// CHECK: encoding: [0x62,0x61,0xa5,0x20,0xf4,0xb2,0x00,0x10,0x00,0x00]
+ vpmuludq 4096(%rdx), %ymm27, %ymm30
+
+// CHECK: vpmuludq -4096(%rdx), %ymm27, %ymm30
+// CHECK: encoding: [0x62,0x61,0xa5,0x20,0xf4,0x72,0x80]
+ vpmuludq -4096(%rdx), %ymm27, %ymm30
+
+// CHECK: vpmuludq -4128(%rdx), %ymm27, %ymm30
+// CHECK: encoding: [0x62,0x61,0xa5,0x20,0xf4,0xb2,0xe0,0xef,0xff,0xff]
+ vpmuludq -4128(%rdx), %ymm27, %ymm30
+
+// CHECK: vpmuludq 1016(%rdx){1to4}, %ymm27, %ymm30
+// CHECK: encoding: [0x62,0x61,0xa5,0x30,0xf4,0x72,0x7f]
+ vpmuludq 1016(%rdx){1to4}, %ymm27, %ymm30
+
+// CHECK: vpmuludq 1024(%rdx){1to4}, %ymm27, %ymm30
+// CHECK: encoding: [0x62,0x61,0xa5,0x30,0xf4,0xb2,0x00,0x04,0x00,0x00]
+ vpmuludq 1024(%rdx){1to4}, %ymm27, %ymm30
+
+// CHECK: vpmuludq -1024(%rdx){1to4}, %ymm27, %ymm30
+// CHECK: encoding: [0x62,0x61,0xa5,0x30,0xf4,0x72,0x80]
+ vpmuludq -1024(%rdx){1to4}, %ymm27, %ymm30
+
+// CHECK: vpmuludq -1032(%rdx){1to4}, %ymm27, %ymm30
+// CHECK: encoding: [0x62,0x61,0xa5,0x30,0xf4,0xb2,0xf8,0xfb,0xff,0xff]
+ vpmuludq -1032(%rdx){1to4}, %ymm27, %ymm30
+
+// CHECK: vpmuldq %xmm26, %xmm24, %xmm25
+// CHECK: encoding: [0x62,0x02,0xbd,0x00,0x28,0xca]
+ vpmuldq %xmm26, %xmm24, %xmm25
+
+// CHECK: vpmuldq %xmm26, %xmm24, %xmm25 {%k3}
+// CHECK: encoding: [0x62,0x02,0xbd,0x03,0x28,0xca]
+ vpmuldq %xmm26, %xmm24, %xmm25 {%k3}
+
+// CHECK: vpmuldq %xmm26, %xmm24, %xmm25 {%k3} {z}
+// CHECK: encoding: [0x62,0x02,0xbd,0x83,0x28,0xca]
+ vpmuldq %xmm26, %xmm24, %xmm25 {%k3} {z}
+
+// CHECK: vpmuldq (%rcx), %xmm24, %xmm25
+// CHECK: encoding: [0x62,0x62,0xbd,0x00,0x28,0x09]
+ vpmuldq (%rcx), %xmm24, %xmm25
+
+// CHECK: vpmuldq 291(%rax,%r14,8), %xmm24, %xmm25
+// CHECK: encoding: [0x62,0x22,0xbd,0x00,0x28,0x8c,0xf0,0x23,0x01,0x00,0x00]
+ vpmuldq 291(%rax,%r14,8), %xmm24, %xmm25
+
+// CHECK: vpmuldq (%rcx){1to2}, %xmm24, %xmm25
+// CHECK: encoding: [0x62,0x62,0xbd,0x10,0x28,0x09]
+ vpmuldq (%rcx){1to2}, %xmm24, %xmm25
+
+// CHECK: vpmuldq 2032(%rdx), %xmm24, %xmm25
+// CHECK: encoding: [0x62,0x62,0xbd,0x00,0x28,0x4a,0x7f]
+ vpmuldq 2032(%rdx), %xmm24, %xmm25
+
+// CHECK: vpmuldq 2048(%rdx), %xmm24, %xmm25
+// CHECK: encoding: [0x62,0x62,0xbd,0x00,0x28,0x8a,0x00,0x08,0x00,0x00]
+ vpmuldq 2048(%rdx), %xmm24, %xmm25
+
+// CHECK: vpmuldq -2048(%rdx), %xmm24, %xmm25
+// CHECK: encoding: [0x62,0x62,0xbd,0x00,0x28,0x4a,0x80]
+ vpmuldq -2048(%rdx), %xmm24, %xmm25
+
+// CHECK: vpmuldq -2064(%rdx), %xmm24, %xmm25
+// CHECK: encoding: [0x62,0x62,0xbd,0x00,0x28,0x8a,0xf0,0xf7,0xff,0xff]
+ vpmuldq -2064(%rdx), %xmm24, %xmm25
+
+// CHECK: vpmuldq 1016(%rdx){1to2}, %xmm24, %xmm25
+// CHECK: encoding: [0x62,0x62,0xbd,0x10,0x28,0x4a,0x7f]
+ vpmuldq 1016(%rdx){1to2}, %xmm24, %xmm25
+
+// CHECK: vpmuldq 1024(%rdx){1to2}, %xmm24, %xmm25
+// CHECK: encoding: [0x62,0x62,0xbd,0x10,0x28,0x8a,0x00,0x04,0x00,0x00]
+ vpmuldq 1024(%rdx){1to2}, %xmm24, %xmm25
+
+// CHECK: vpmuldq -1024(%rdx){1to2}, %xmm24, %xmm25
+// CHECK: encoding: [0x62,0x62,0xbd,0x10,0x28,0x4a,0x80]
+ vpmuldq -1024(%rdx){1to2}, %xmm24, %xmm25
+
+// CHECK: vpmuldq -1032(%rdx){1to2}, %xmm24, %xmm25
+// CHECK: encoding: [0x62,0x62,0xbd,0x10,0x28,0x8a,0xf8,0xfb,0xff,0xff]
+ vpmuldq -1032(%rdx){1to2}, %xmm24, %xmm25
+
+// CHECK: vpmuldq %ymm26, %ymm18, %ymm19
+// CHECK: encoding: [0x62,0x82,0xed,0x20,0x28,0xda]
+ vpmuldq %ymm26, %ymm18, %ymm19
+
+// CHECK: vpmuldq %ymm26, %ymm18, %ymm19 {%k6}
+// CHECK: encoding: [0x62,0x82,0xed,0x26,0x28,0xda]
+ vpmuldq %ymm26, %ymm18, %ymm19 {%k6}
+
+// CHECK: vpmuldq %ymm26, %ymm18, %ymm19 {%k6} {z}
+// CHECK: encoding: [0x62,0x82,0xed,0xa6,0x28,0xda]
+ vpmuldq %ymm26, %ymm18, %ymm19 {%k6} {z}
+
+// CHECK: vpmuldq (%rcx), %ymm18, %ymm19
+// CHECK: encoding: [0x62,0xe2,0xed,0x20,0x28,0x19]
+ vpmuldq (%rcx), %ymm18, %ymm19
+
+// CHECK: vpmuldq 291(%rax,%r14,8), %ymm18, %ymm19
+// CHECK: encoding: [0x62,0xa2,0xed,0x20,0x28,0x9c,0xf0,0x23,0x01,0x00,0x00]
+ vpmuldq 291(%rax,%r14,8), %ymm18, %ymm19
+
+// CHECK: vpmuldq (%rcx){1to4}, %ymm18, %ymm19
+// CHECK: encoding: [0x62,0xe2,0xed,0x30,0x28,0x19]
+ vpmuldq (%rcx){1to4}, %ymm18, %ymm19
+
+// CHECK: vpmuldq 4064(%rdx), %ymm18, %ymm19
+// CHECK: encoding: [0x62,0xe2,0xed,0x20,0x28,0x5a,0x7f]
+ vpmuldq 4064(%rdx), %ymm18, %ymm19
+
+// CHECK: vpmuldq 4096(%rdx), %ymm18, %ymm19
+// CHECK: encoding: [0x62,0xe2,0xed,0x20,0x28,0x9a,0x00,0x10,0x00,0x00]
+ vpmuldq 4096(%rdx), %ymm18, %ymm19
+
+// CHECK: vpmuldq -4096(%rdx), %ymm18, %ymm19
+// CHECK: encoding: [0x62,0xe2,0xed,0x20,0x28,0x5a,0x80]
+ vpmuldq -4096(%rdx), %ymm18, %ymm19
+
+// CHECK: vpmuldq -4128(%rdx), %ymm18, %ymm19
+// CHECK: encoding: [0x62,0xe2,0xed,0x20,0x28,0x9a,0xe0,0xef,0xff,0xff]
+ vpmuldq -4128(%rdx), %ymm18, %ymm19
+
+// CHECK: vpmuldq 1016(%rdx){1to4}, %ymm18, %ymm19
+// CHECK: encoding: [0x62,0xe2,0xed,0x30,0x28,0x5a,0x7f]
+ vpmuldq 1016(%rdx){1to4}, %ymm18, %ymm19
+
+// CHECK: vpmuldq 1024(%rdx){1to4}, %ymm18, %ymm19
+// CHECK: encoding: [0x62,0xe2,0xed,0x30,0x28,0x9a,0x00,0x04,0x00,0x00]
+ vpmuldq 1024(%rdx){1to4}, %ymm18, %ymm19
+
+// CHECK: vpmuldq -1024(%rdx){1to4}, %ymm18, %ymm19
+// CHECK: encoding: [0x62,0xe2,0xed,0x30,0x28,0x5a,0x80]
+ vpmuldq -1024(%rdx){1to4}, %ymm18, %ymm19
+
+// CHECK: vpmuldq -1032(%rdx){1to4}, %ymm18, %ymm19
+// CHECK: encoding: [0x62,0xe2,0xed,0x30,0x28,0x9a,0xf8,0xfb,0xff,0xff]
+ vpmuldq -1032(%rdx){1to4}, %ymm18, %ymm19