(VMOVNTDQAZ256rm addr:$src)>;
def : Pat<(v4i64 (alignednontemporalload addr:$src)),
(VMOVNTDQAZ256rm addr:$src)>;
- def : Pat<(v8i32 (bitconvert (v2i64 (alignednontemporalload addr:$src)))),
+ def : Pat<(v8i32 (bitconvert (v4i64 (alignednontemporalload addr:$src)))),
(VMOVNTDQAZ256rm addr:$src)>;
- def : Pat<(v16i16 (bitconvert (v2i64 (alignednontemporalload addr:$src)))),
+ def : Pat<(v16i16 (bitconvert (v4i64 (alignednontemporalload addr:$src)))),
(VMOVNTDQAZ256rm addr:$src)>;
- def : Pat<(v32i8 (bitconvert (v2i64 (alignednontemporalload addr:$src)))),
+ def : Pat<(v32i8 (bitconvert (v4i64 (alignednontemporalload addr:$src)))),
(VMOVNTDQAZ256rm addr:$src)>;
def : Pat<(alignednontemporalstore (v4i32 VR128X:$src), addr:$dst),
; AVX2-NEXT: vmovntdqa (%rdi), %ymm0
; AVX2-NEXT: retq
;
-; AVX512F-LABEL: test_v8i32:
-; AVX512F: # BB#0:
-; AVX512F-NEXT: vmovntdqa (%rdi), %ymm0
-; AVX512F-NEXT: retq
-;
-; AVX512BW-LABEL: test_v8i32:
-; AVX512BW: # BB#0:
-; AVX512BW-NEXT: vmovntdqa (%rdi), %ymm0
-; AVX512BW-NEXT: retq
-;
-; AVX512VL-LABEL: test_v8i32:
-; AVX512VL: # BB#0:
-; AVX512VL-NEXT: vmovaps (%rdi), %ymm0
-; AVX512VL-NEXT: retq
+; AVX512-LABEL: test_v8i32:
+; AVX512: # BB#0:
+; AVX512-NEXT: vmovntdqa (%rdi), %ymm0
+; AVX512-NEXT: retq
%1 = load <8 x i32>, <8 x i32>* %src, align 32, !nontemporal !1
ret <8 x i32> %1
}
; AVX2-NEXT: vpaddd %ymm1, %ymm0, %ymm0
; AVX2-NEXT: retq
;
-; AVX512F-LABEL: test_arg_v8i32:
-; AVX512F: # BB#0:
-; AVX512F-NEXT: vmovntdqa (%rdi), %ymm1
-; AVX512F-NEXT: vpaddd %ymm1, %ymm0, %ymm0
-; AVX512F-NEXT: retq
-;
-; AVX512BW-LABEL: test_arg_v8i32:
-; AVX512BW: # BB#0:
-; AVX512BW-NEXT: vmovntdqa (%rdi), %ymm1
-; AVX512BW-NEXT: vpaddd %ymm1, %ymm0, %ymm0
-; AVX512BW-NEXT: retq
-;
-; AVX512VL-LABEL: test_arg_v8i32:
-; AVX512VL: # BB#0:
-; AVX512VL-NEXT: vpaddd (%rdi), %ymm0, %ymm0
-; AVX512VL-NEXT: retq
+; AVX512-LABEL: test_arg_v8i32:
+; AVX512: # BB#0:
+; AVX512-NEXT: vmovntdqa (%rdi), %ymm1
+; AVX512-NEXT: vpaddd %ymm1, %ymm0, %ymm0
+; AVX512-NEXT: retq
%1 = load <8 x i32>, <8 x i32>* %src, align 32, !nontemporal !1
%2 = add <8 x i32> %arg, %1
ret <8 x i32> %2