return false;
}
- /// Return true if the node is a math/logic binary operator.
- virtual bool isBinOp(unsigned Opcode) const {
- switch (Opcode) {
- case ISD::ADD:
- case ISD::SUB:
- case ISD::MUL:
- case ISD::AND:
- case ISD::OR:
- case ISD::XOR:
- case ISD::SHL:
- case ISD::SRL:
- case ISD::SRA:
- case ISD::SDIV:
- case ISD::UDIV:
- case ISD::SREM:
- case ISD::UREM:
- case ISD::FADD:
- case ISD::FSUB:
- case ISD::FMUL:
- case ISD::FDIV:
- case ISD::FREM:
- case ISD::FMINNUM:
- case ISD::FMAXNUM:
- case ISD::FMINNUM_IEEE:
- case ISD::FMAXNUM_IEEE:
- case ISD::FMAXIMUM:
- case ISD::FMINIMUM:
- return true;
- default:
- return false;
- }
- }
-
/// Returns true if the opcode is a commutative binary operation.
virtual bool isCommutativeBinOp(unsigned Opcode) const {
// FIXME: This should get its info from the td file.
case ISD::UADDSAT:
case ISD::FMINNUM:
case ISD::FMAXNUM:
+ case ISD::FMINNUM_IEEE:
+ case ISD::FMAXNUM_IEEE:
case ISD::FMINIMUM:
case ISD::FMAXIMUM:
return true;
}
}
+ /// Return true if the node is a math/logic binary operator.
+ virtual bool isBinOp(unsigned Opcode) const {
+ // A commutative binop must be a binop.
+ if (isCommutativeBinOp(Opcode))
+ return true;
+ // These are non-commutative binops.
+ switch (Opcode) {
+ case ISD::SUB:
+ case ISD::SHL:
+ case ISD::SRL:
+ case ISD::SRA:
+ case ISD::SDIV:
+ case ISD::UDIV:
+ case ISD::SREM:
+ case ISD::UREM:
+ case ISD::FSUB:
+ case ISD::FDIV:
+ case ISD::FREM:
+ return true;
+ default:
+ return false;
+ }
+ }
+
/// Return true if it's free to truncate a value of type FromTy to type
/// ToTy. e.g. On x86 it's free to truncate a i32 value in register EAX to i16
/// by referencing its sub-register AX.
; X64-AVX512-LABEL: test_reduce_v4i64:
; X64-AVX512: ## %bb.0:
; X64-AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
-; X64-AVX512-NEXT: vpmaxsq %ymm1, %ymm0, %ymm0
+; X64-AVX512-NEXT: vpmaxsq %xmm1, %xmm0, %xmm0
; X64-AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; X64-AVX512-NEXT: vpmaxsq %ymm1, %ymm0, %ymm0
+; X64-AVX512-NEXT: vpmaxsq %xmm1, %xmm0, %xmm0
; X64-AVX512-NEXT: vmovq %xmm0, %rax
; X64-AVX512-NEXT: vzeroupper
; X64-AVX512-NEXT: retq
; X86-AVX2-LABEL: test_reduce_v8i32:
; X86-AVX2: ## %bb.0:
; X86-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
-; X86-AVX2-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0
+; X86-AVX2-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0
; X86-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; X86-AVX2-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0
+; X86-AVX2-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0
; X86-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; X86-AVX2-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0
+; X86-AVX2-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0
; X86-AVX2-NEXT: vmovd %xmm0, %eax
; X86-AVX2-NEXT: vzeroupper
; X86-AVX2-NEXT: retl
; X64-AVX2-LABEL: test_reduce_v8i32:
; X64-AVX2: ## %bb.0:
; X64-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
-; X64-AVX2-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0
+; X64-AVX2-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0
; X64-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; X64-AVX2-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0
+; X64-AVX2-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0
; X64-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; X64-AVX2-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0
+; X64-AVX2-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0
; X64-AVX2-NEXT: vmovd %xmm0, %eax
; X64-AVX2-NEXT: vzeroupper
; X64-AVX2-NEXT: retq
; X64-AVX512-LABEL: test_reduce_v8i32:
; X64-AVX512: ## %bb.0:
; X64-AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
-; X64-AVX512-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0
+; X64-AVX512-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0
; X64-AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; X64-AVX512-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0
+; X64-AVX512-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0
; X64-AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; X64-AVX512-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0
+; X64-AVX512-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0
; X64-AVX512-NEXT: vmovd %xmm0, %eax
; X64-AVX512-NEXT: vzeroupper
; X64-AVX512-NEXT: retq
; X64-AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1
; X64-AVX512-NEXT: vpmaxsq %zmm1, %zmm0, %zmm0
; X64-AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
-; X64-AVX512-NEXT: vpmaxsq %zmm1, %zmm0, %zmm0
+; X64-AVX512-NEXT: vpmaxsq %xmm1, %xmm0, %xmm0
; X64-AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; X64-AVX512-NEXT: vpmaxsq %zmm1, %zmm0, %zmm0
+; X64-AVX512-NEXT: vpmaxsq %xmm1, %xmm0, %xmm0
; X64-AVX512-NEXT: vmovq %xmm0, %rax
; X64-AVX512-NEXT: vzeroupper
; X64-AVX512-NEXT: retq
; X86-AVX2: ## %bb.0:
; X86-AVX2-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0
; X86-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
-; X86-AVX2-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0
+; X86-AVX2-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0
; X86-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; X86-AVX2-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0
+; X86-AVX2-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0
; X86-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; X86-AVX2-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0
+; X86-AVX2-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0
; X86-AVX2-NEXT: vmovd %xmm0, %eax
; X86-AVX2-NEXT: vzeroupper
; X86-AVX2-NEXT: retl
; X64-AVX2: ## %bb.0:
; X64-AVX2-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0
; X64-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
-; X64-AVX2-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0
+; X64-AVX2-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0
; X64-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; X64-AVX2-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0
+; X64-AVX2-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0
; X64-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; X64-AVX2-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0
+; X64-AVX2-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0
; X64-AVX2-NEXT: vmovd %xmm0, %eax
; X64-AVX2-NEXT: vzeroupper
; X64-AVX2-NEXT: retq
; X64-AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1
; X64-AVX512-NEXT: vpmaxsd %zmm1, %zmm0, %zmm0
; X64-AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
-; X64-AVX512-NEXT: vpmaxsd %zmm1, %zmm0, %zmm0
+; X64-AVX512-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0
; X64-AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; X64-AVX512-NEXT: vpmaxsd %zmm1, %zmm0, %zmm0
+; X64-AVX512-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0
; X64-AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; X64-AVX512-NEXT: vpmaxsd %zmm1, %zmm0, %zmm0
+; X64-AVX512-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0
; X64-AVX512-NEXT: vmovd %xmm0, %eax
; X64-AVX512-NEXT: vzeroupper
; X64-AVX512-NEXT: retq
; X64-AVX512-LABEL: test_reduce_v4i64:
; X64-AVX512: ## %bb.0:
; X64-AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
-; X64-AVX512-NEXT: vpminsq %ymm1, %ymm0, %ymm0
+; X64-AVX512-NEXT: vpminsq %xmm1, %xmm0, %xmm0
; X64-AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; X64-AVX512-NEXT: vpminsq %ymm1, %ymm0, %ymm0
+; X64-AVX512-NEXT: vpminsq %xmm1, %xmm0, %xmm0
; X64-AVX512-NEXT: vmovq %xmm0, %rax
; X64-AVX512-NEXT: vzeroupper
; X64-AVX512-NEXT: retq
; X86-AVX2-LABEL: test_reduce_v8i32:
; X86-AVX2: ## %bb.0:
; X86-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
-; X86-AVX2-NEXT: vpminsd %ymm1, %ymm0, %ymm0
+; X86-AVX2-NEXT: vpminsd %xmm1, %xmm0, %xmm0
; X86-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; X86-AVX2-NEXT: vpminsd %ymm1, %ymm0, %ymm0
+; X86-AVX2-NEXT: vpminsd %xmm1, %xmm0, %xmm0
; X86-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; X86-AVX2-NEXT: vpminsd %ymm1, %ymm0, %ymm0
+; X86-AVX2-NEXT: vpminsd %xmm1, %xmm0, %xmm0
; X86-AVX2-NEXT: vmovd %xmm0, %eax
; X86-AVX2-NEXT: vzeroupper
; X86-AVX2-NEXT: retl
; X64-AVX2-LABEL: test_reduce_v8i32:
; X64-AVX2: ## %bb.0:
; X64-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
-; X64-AVX2-NEXT: vpminsd %ymm1, %ymm0, %ymm0
+; X64-AVX2-NEXT: vpminsd %xmm1, %xmm0, %xmm0
; X64-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; X64-AVX2-NEXT: vpminsd %ymm1, %ymm0, %ymm0
+; X64-AVX2-NEXT: vpminsd %xmm1, %xmm0, %xmm0
; X64-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; X64-AVX2-NEXT: vpminsd %ymm1, %ymm0, %ymm0
+; X64-AVX2-NEXT: vpminsd %xmm1, %xmm0, %xmm0
; X64-AVX2-NEXT: vmovd %xmm0, %eax
; X64-AVX2-NEXT: vzeroupper
; X64-AVX2-NEXT: retq
; X64-AVX512-LABEL: test_reduce_v8i32:
; X64-AVX512: ## %bb.0:
; X64-AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
-; X64-AVX512-NEXT: vpminsd %ymm1, %ymm0, %ymm0
+; X64-AVX512-NEXT: vpminsd %xmm1, %xmm0, %xmm0
; X64-AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; X64-AVX512-NEXT: vpminsd %ymm1, %ymm0, %ymm0
+; X64-AVX512-NEXT: vpminsd %xmm1, %xmm0, %xmm0
; X64-AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; X64-AVX512-NEXT: vpminsd %ymm1, %ymm0, %ymm0
+; X64-AVX512-NEXT: vpminsd %xmm1, %xmm0, %xmm0
; X64-AVX512-NEXT: vmovd %xmm0, %eax
; X64-AVX512-NEXT: vzeroupper
; X64-AVX512-NEXT: retq
; X64-AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1
; X64-AVX512-NEXT: vpminsq %zmm1, %zmm0, %zmm0
; X64-AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
-; X64-AVX512-NEXT: vpminsq %zmm1, %zmm0, %zmm0
+; X64-AVX512-NEXT: vpminsq %xmm1, %xmm0, %xmm0
; X64-AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; X64-AVX512-NEXT: vpminsq %zmm1, %zmm0, %zmm0
+; X64-AVX512-NEXT: vpminsq %xmm1, %xmm0, %xmm0
; X64-AVX512-NEXT: vmovq %xmm0, %rax
; X64-AVX512-NEXT: vzeroupper
; X64-AVX512-NEXT: retq
; X86-AVX2: ## %bb.0:
; X86-AVX2-NEXT: vpminsd %ymm1, %ymm0, %ymm0
; X86-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
-; X86-AVX2-NEXT: vpminsd %ymm1, %ymm0, %ymm0
+; X86-AVX2-NEXT: vpminsd %xmm1, %xmm0, %xmm0
; X86-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; X86-AVX2-NEXT: vpminsd %ymm1, %ymm0, %ymm0
+; X86-AVX2-NEXT: vpminsd %xmm1, %xmm0, %xmm0
; X86-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; X86-AVX2-NEXT: vpminsd %ymm1, %ymm0, %ymm0
+; X86-AVX2-NEXT: vpminsd %xmm1, %xmm0, %xmm0
; X86-AVX2-NEXT: vmovd %xmm0, %eax
; X86-AVX2-NEXT: vzeroupper
; X86-AVX2-NEXT: retl
; X64-AVX2: ## %bb.0:
; X64-AVX2-NEXT: vpminsd %ymm1, %ymm0, %ymm0
; X64-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
-; X64-AVX2-NEXT: vpminsd %ymm1, %ymm0, %ymm0
+; X64-AVX2-NEXT: vpminsd %xmm1, %xmm0, %xmm0
; X64-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; X64-AVX2-NEXT: vpminsd %ymm1, %ymm0, %ymm0
+; X64-AVX2-NEXT: vpminsd %xmm1, %xmm0, %xmm0
; X64-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; X64-AVX2-NEXT: vpminsd %ymm1, %ymm0, %ymm0
+; X64-AVX2-NEXT: vpminsd %xmm1, %xmm0, %xmm0
; X64-AVX2-NEXT: vmovd %xmm0, %eax
; X64-AVX2-NEXT: vzeroupper
; X64-AVX2-NEXT: retq
; X64-AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1
; X64-AVX512-NEXT: vpminsd %zmm1, %zmm0, %zmm0
; X64-AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
-; X64-AVX512-NEXT: vpminsd %zmm1, %zmm0, %zmm0
+; X64-AVX512-NEXT: vpminsd %xmm1, %xmm0, %xmm0
; X64-AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; X64-AVX512-NEXT: vpminsd %zmm1, %zmm0, %zmm0
+; X64-AVX512-NEXT: vpminsd %xmm1, %xmm0, %xmm0
; X64-AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; X64-AVX512-NEXT: vpminsd %zmm1, %zmm0, %zmm0
+; X64-AVX512-NEXT: vpminsd %xmm1, %xmm0, %xmm0
; X64-AVX512-NEXT: vmovd %xmm0, %eax
; X64-AVX512-NEXT: vzeroupper
; X64-AVX512-NEXT: retq
; X64-AVX512-LABEL: test_reduce_v4i64:
; X64-AVX512: ## %bb.0:
; X64-AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
-; X64-AVX512-NEXT: vpmaxuq %ymm1, %ymm0, %ymm0
+; X64-AVX512-NEXT: vpmaxuq %xmm1, %xmm0, %xmm0
; X64-AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; X64-AVX512-NEXT: vpmaxuq %ymm1, %ymm0, %ymm0
+; X64-AVX512-NEXT: vpmaxuq %xmm1, %xmm0, %xmm0
; X64-AVX512-NEXT: vmovq %xmm0, %rax
; X64-AVX512-NEXT: vzeroupper
; X64-AVX512-NEXT: retq
; X86-AVX2-LABEL: test_reduce_v8i32:
; X86-AVX2: ## %bb.0:
; X86-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
-; X86-AVX2-NEXT: vpmaxud %ymm1, %ymm0, %ymm0
+; X86-AVX2-NEXT: vpmaxud %xmm1, %xmm0, %xmm0
; X86-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; X86-AVX2-NEXT: vpmaxud %ymm1, %ymm0, %ymm0
+; X86-AVX2-NEXT: vpmaxud %xmm1, %xmm0, %xmm0
; X86-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; X86-AVX2-NEXT: vpmaxud %ymm1, %ymm0, %ymm0
+; X86-AVX2-NEXT: vpmaxud %xmm1, %xmm0, %xmm0
; X86-AVX2-NEXT: vmovd %xmm0, %eax
; X86-AVX2-NEXT: vzeroupper
; X86-AVX2-NEXT: retl
; X64-AVX2-LABEL: test_reduce_v8i32:
; X64-AVX2: ## %bb.0:
; X64-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
-; X64-AVX2-NEXT: vpmaxud %ymm1, %ymm0, %ymm0
+; X64-AVX2-NEXT: vpmaxud %xmm1, %xmm0, %xmm0
; X64-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; X64-AVX2-NEXT: vpmaxud %ymm1, %ymm0, %ymm0
+; X64-AVX2-NEXT: vpmaxud %xmm1, %xmm0, %xmm0
; X64-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; X64-AVX2-NEXT: vpmaxud %ymm1, %ymm0, %ymm0
+; X64-AVX2-NEXT: vpmaxud %xmm1, %xmm0, %xmm0
; X64-AVX2-NEXT: vmovd %xmm0, %eax
; X64-AVX2-NEXT: vzeroupper
; X64-AVX2-NEXT: retq
; X64-AVX512-LABEL: test_reduce_v8i32:
; X64-AVX512: ## %bb.0:
; X64-AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
-; X64-AVX512-NEXT: vpmaxud %ymm1, %ymm0, %ymm0
+; X64-AVX512-NEXT: vpmaxud %xmm1, %xmm0, %xmm0
; X64-AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; X64-AVX512-NEXT: vpmaxud %ymm1, %ymm0, %ymm0
+; X64-AVX512-NEXT: vpmaxud %xmm1, %xmm0, %xmm0
; X64-AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; X64-AVX512-NEXT: vpmaxud %ymm1, %ymm0, %ymm0
+; X64-AVX512-NEXT: vpmaxud %xmm1, %xmm0, %xmm0
; X64-AVX512-NEXT: vmovd %xmm0, %eax
; X64-AVX512-NEXT: vzeroupper
; X64-AVX512-NEXT: retq
; X64-AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1
; X64-AVX512-NEXT: vpmaxuq %zmm1, %zmm0, %zmm0
; X64-AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
-; X64-AVX512-NEXT: vpmaxuq %zmm1, %zmm0, %zmm0
+; X64-AVX512-NEXT: vpmaxuq %xmm1, %xmm0, %xmm0
; X64-AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; X64-AVX512-NEXT: vpmaxuq %zmm1, %zmm0, %zmm0
+; X64-AVX512-NEXT: vpmaxuq %xmm1, %xmm0, %xmm0
; X64-AVX512-NEXT: vmovq %xmm0, %rax
; X64-AVX512-NEXT: vzeroupper
; X64-AVX512-NEXT: retq
; X86-AVX2: ## %bb.0:
; X86-AVX2-NEXT: vpmaxud %ymm1, %ymm0, %ymm0
; X86-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
-; X86-AVX2-NEXT: vpmaxud %ymm1, %ymm0, %ymm0
+; X86-AVX2-NEXT: vpmaxud %xmm1, %xmm0, %xmm0
; X86-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; X86-AVX2-NEXT: vpmaxud %ymm1, %ymm0, %ymm0
+; X86-AVX2-NEXT: vpmaxud %xmm1, %xmm0, %xmm0
; X86-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; X86-AVX2-NEXT: vpmaxud %ymm1, %ymm0, %ymm0
+; X86-AVX2-NEXT: vpmaxud %xmm1, %xmm0, %xmm0
; X86-AVX2-NEXT: vmovd %xmm0, %eax
; X86-AVX2-NEXT: vzeroupper
; X86-AVX2-NEXT: retl
; X64-AVX2: ## %bb.0:
; X64-AVX2-NEXT: vpmaxud %ymm1, %ymm0, %ymm0
; X64-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
-; X64-AVX2-NEXT: vpmaxud %ymm1, %ymm0, %ymm0
+; X64-AVX2-NEXT: vpmaxud %xmm1, %xmm0, %xmm0
; X64-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; X64-AVX2-NEXT: vpmaxud %ymm1, %ymm0, %ymm0
+; X64-AVX2-NEXT: vpmaxud %xmm1, %xmm0, %xmm0
; X64-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; X64-AVX2-NEXT: vpmaxud %ymm1, %ymm0, %ymm0
+; X64-AVX2-NEXT: vpmaxud %xmm1, %xmm0, %xmm0
; X64-AVX2-NEXT: vmovd %xmm0, %eax
; X64-AVX2-NEXT: vzeroupper
; X64-AVX2-NEXT: retq
; X64-AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1
; X64-AVX512-NEXT: vpmaxud %zmm1, %zmm0, %zmm0
; X64-AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
-; X64-AVX512-NEXT: vpmaxud %zmm1, %zmm0, %zmm0
+; X64-AVX512-NEXT: vpmaxud %xmm1, %xmm0, %xmm0
; X64-AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; X64-AVX512-NEXT: vpmaxud %zmm1, %zmm0, %zmm0
+; X64-AVX512-NEXT: vpmaxud %xmm1, %xmm0, %xmm0
; X64-AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; X64-AVX512-NEXT: vpmaxud %zmm1, %zmm0, %zmm0
+; X64-AVX512-NEXT: vpmaxud %xmm1, %xmm0, %xmm0
; X64-AVX512-NEXT: vmovd %xmm0, %eax
; X64-AVX512-NEXT: vzeroupper
; X64-AVX512-NEXT: retq
; X64-AVX512-LABEL: test_reduce_v4i64:
; X64-AVX512: ## %bb.0:
; X64-AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
-; X64-AVX512-NEXT: vpminuq %ymm1, %ymm0, %ymm0
+; X64-AVX512-NEXT: vpminuq %xmm1, %xmm0, %xmm0
; X64-AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; X64-AVX512-NEXT: vpminuq %ymm1, %ymm0, %ymm0
+; X64-AVX512-NEXT: vpminuq %xmm1, %xmm0, %xmm0
; X64-AVX512-NEXT: vmovq %xmm0, %rax
; X64-AVX512-NEXT: vzeroupper
; X64-AVX512-NEXT: retq
; X86-AVX2-LABEL: test_reduce_v8i32:
; X86-AVX2: ## %bb.0:
; X86-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
-; X86-AVX2-NEXT: vpminud %ymm1, %ymm0, %ymm0
+; X86-AVX2-NEXT: vpminud %xmm1, %xmm0, %xmm0
; X86-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; X86-AVX2-NEXT: vpminud %ymm1, %ymm0, %ymm0
+; X86-AVX2-NEXT: vpminud %xmm1, %xmm0, %xmm0
; X86-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; X86-AVX2-NEXT: vpminud %ymm1, %ymm0, %ymm0
+; X86-AVX2-NEXT: vpminud %xmm1, %xmm0, %xmm0
; X86-AVX2-NEXT: vmovd %xmm0, %eax
; X86-AVX2-NEXT: vzeroupper
; X86-AVX2-NEXT: retl
; X64-AVX2-LABEL: test_reduce_v8i32:
; X64-AVX2: ## %bb.0:
; X64-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
-; X64-AVX2-NEXT: vpminud %ymm1, %ymm0, %ymm0
+; X64-AVX2-NEXT: vpminud %xmm1, %xmm0, %xmm0
; X64-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; X64-AVX2-NEXT: vpminud %ymm1, %ymm0, %ymm0
+; X64-AVX2-NEXT: vpminud %xmm1, %xmm0, %xmm0
; X64-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; X64-AVX2-NEXT: vpminud %ymm1, %ymm0, %ymm0
+; X64-AVX2-NEXT: vpminud %xmm1, %xmm0, %xmm0
; X64-AVX2-NEXT: vmovd %xmm0, %eax
; X64-AVX2-NEXT: vzeroupper
; X64-AVX2-NEXT: retq
; X64-AVX512-LABEL: test_reduce_v8i32:
; X64-AVX512: ## %bb.0:
; X64-AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
-; X64-AVX512-NEXT: vpminud %ymm1, %ymm0, %ymm0
+; X64-AVX512-NEXT: vpminud %xmm1, %xmm0, %xmm0
; X64-AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; X64-AVX512-NEXT: vpminud %ymm1, %ymm0, %ymm0
+; X64-AVX512-NEXT: vpminud %xmm1, %xmm0, %xmm0
; X64-AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; X64-AVX512-NEXT: vpminud %ymm1, %ymm0, %ymm0
+; X64-AVX512-NEXT: vpminud %xmm1, %xmm0, %xmm0
; X64-AVX512-NEXT: vmovd %xmm0, %eax
; X64-AVX512-NEXT: vzeroupper
; X64-AVX512-NEXT: retq
; X64-AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1
; X64-AVX512-NEXT: vpminuq %zmm1, %zmm0, %zmm0
; X64-AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
-; X64-AVX512-NEXT: vpminuq %zmm1, %zmm0, %zmm0
+; X64-AVX512-NEXT: vpminuq %xmm1, %xmm0, %xmm0
; X64-AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; X64-AVX512-NEXT: vpminuq %zmm1, %zmm0, %zmm0
+; X64-AVX512-NEXT: vpminuq %xmm1, %xmm0, %xmm0
; X64-AVX512-NEXT: vmovq %xmm0, %rax
; X64-AVX512-NEXT: vzeroupper
; X64-AVX512-NEXT: retq
; X86-AVX2: ## %bb.0:
; X86-AVX2-NEXT: vpminud %ymm1, %ymm0, %ymm0
; X86-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
-; X86-AVX2-NEXT: vpminud %ymm1, %ymm0, %ymm0
+; X86-AVX2-NEXT: vpminud %xmm1, %xmm0, %xmm0
; X86-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; X86-AVX2-NEXT: vpminud %ymm1, %ymm0, %ymm0
+; X86-AVX2-NEXT: vpminud %xmm1, %xmm0, %xmm0
; X86-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; X86-AVX2-NEXT: vpminud %ymm1, %ymm0, %ymm0
+; X86-AVX2-NEXT: vpminud %xmm1, %xmm0, %xmm0
; X86-AVX2-NEXT: vmovd %xmm0, %eax
; X86-AVX2-NEXT: vzeroupper
; X86-AVX2-NEXT: retl
; X64-AVX2: ## %bb.0:
; X64-AVX2-NEXT: vpminud %ymm1, %ymm0, %ymm0
; X64-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
-; X64-AVX2-NEXT: vpminud %ymm1, %ymm0, %ymm0
+; X64-AVX2-NEXT: vpminud %xmm1, %xmm0, %xmm0
; X64-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; X64-AVX2-NEXT: vpminud %ymm1, %ymm0, %ymm0
+; X64-AVX2-NEXT: vpminud %xmm1, %xmm0, %xmm0
; X64-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; X64-AVX2-NEXT: vpminud %ymm1, %ymm0, %ymm0
+; X64-AVX2-NEXT: vpminud %xmm1, %xmm0, %xmm0
; X64-AVX2-NEXT: vmovd %xmm0, %eax
; X64-AVX2-NEXT: vzeroupper
; X64-AVX2-NEXT: retq
; X64-AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1
; X64-AVX512-NEXT: vpminud %zmm1, %zmm0, %zmm0
; X64-AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
-; X64-AVX512-NEXT: vpminud %zmm1, %zmm0, %zmm0
+; X64-AVX512-NEXT: vpminud %xmm1, %xmm0, %xmm0
; X64-AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; X64-AVX512-NEXT: vpminud %zmm1, %zmm0, %zmm0
+; X64-AVX512-NEXT: vpminud %xmm1, %xmm0, %xmm0
; X64-AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; X64-AVX512-NEXT: vpminud %zmm1, %zmm0, %zmm0
+; X64-AVX512-NEXT: vpminud %xmm1, %xmm0, %xmm0
; X64-AVX512-NEXT: vmovd %xmm0, %eax
; X64-AVX512-NEXT: vzeroupper
; X64-AVX512-NEXT: retq
; AVX512VL-LABEL: test_v4i64:
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vextracti128 $1, %ymm0, %xmm1
-; AVX512VL-NEXT: vpmaxsq %ymm1, %ymm0, %ymm0
+; AVX512VL-NEXT: vpmaxsq %xmm1, %xmm0, %xmm0
; AVX512VL-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX512VL-NEXT: vpmaxsq %ymm1, %ymm0, %ymm0
+; AVX512VL-NEXT: vpmaxsq %xmm1, %xmm0, %xmm0
; AVX512VL-NEXT: vmovq %xmm0, %rax
; AVX512VL-NEXT: vzeroupper
; AVX512VL-NEXT: retq
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
;
-; AVX512-LABEL: test_v8i64:
-; AVX512: # %bb.0:
-; AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1
-; AVX512-NEXT: vpmaxsq %zmm1, %zmm0, %zmm0
-; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
-; AVX512-NEXT: vpmaxsq %zmm1, %zmm0, %zmm0
-; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX512-NEXT: vpmaxsq %zmm1, %zmm0, %zmm0
-; AVX512-NEXT: vmovq %xmm0, %rax
-; AVX512-NEXT: vzeroupper
-; AVX512-NEXT: retq
+; AVX512BW-LABEL: test_v8i64:
+; AVX512BW: # %bb.0:
+; AVX512BW-NEXT: vextracti64x4 $1, %zmm0, %ymm1
+; AVX512BW-NEXT: vpmaxsq %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: vextracti128 $1, %ymm0, %xmm1
+; AVX512BW-NEXT: vpmaxsq %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
+; AVX512BW-NEXT: vpmaxsq %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: vmovq %xmm0, %rax
+; AVX512BW-NEXT: vzeroupper
+; AVX512BW-NEXT: retq
+;
+; AVX512VL-LABEL: test_v8i64:
+; AVX512VL: # %bb.0:
+; AVX512VL-NEXT: vextracti64x4 $1, %zmm0, %ymm1
+; AVX512VL-NEXT: vpmaxsq %zmm1, %zmm0, %zmm0
+; AVX512VL-NEXT: vextracti128 $1, %ymm0, %xmm1
+; AVX512VL-NEXT: vpmaxsq %xmm1, %xmm0, %xmm0
+; AVX512VL-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
+; AVX512VL-NEXT: vpmaxsq %xmm1, %xmm0, %xmm0
+; AVX512VL-NEXT: vmovq %xmm0, %rax
+; AVX512VL-NEXT: vzeroupper
+; AVX512VL-NEXT: retq
%1 = call i64 @llvm.experimental.vector.reduce.smax.i64.v8i64(<8 x i64> %a0)
ret i64 %1
}
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
;
-; AVX512-LABEL: test_v16i64:
-; AVX512: # %bb.0:
-; AVX512-NEXT: vpmaxsq %zmm1, %zmm0, %zmm0
-; AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1
-; AVX512-NEXT: vpmaxsq %zmm1, %zmm0, %zmm0
-; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
-; AVX512-NEXT: vpmaxsq %zmm1, %zmm0, %zmm0
-; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX512-NEXT: vpmaxsq %zmm1, %zmm0, %zmm0
-; AVX512-NEXT: vmovq %xmm0, %rax
-; AVX512-NEXT: vzeroupper
-; AVX512-NEXT: retq
+; AVX512BW-LABEL: test_v16i64:
+; AVX512BW: # %bb.0:
+; AVX512BW-NEXT: vpmaxsq %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: vextracti64x4 $1, %zmm0, %ymm1
+; AVX512BW-NEXT: vpmaxsq %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: vextracti128 $1, %ymm0, %xmm1
+; AVX512BW-NEXT: vpmaxsq %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
+; AVX512BW-NEXT: vpmaxsq %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: vmovq %xmm0, %rax
+; AVX512BW-NEXT: vzeroupper
+; AVX512BW-NEXT: retq
+;
+; AVX512VL-LABEL: test_v16i64:
+; AVX512VL: # %bb.0:
+; AVX512VL-NEXT: vpmaxsq %zmm1, %zmm0, %zmm0
+; AVX512VL-NEXT: vextracti64x4 $1, %zmm0, %ymm1
+; AVX512VL-NEXT: vpmaxsq %zmm1, %zmm0, %zmm0
+; AVX512VL-NEXT: vextracti128 $1, %ymm0, %xmm1
+; AVX512VL-NEXT: vpmaxsq %xmm1, %xmm0, %xmm0
+; AVX512VL-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
+; AVX512VL-NEXT: vpmaxsq %xmm1, %xmm0, %xmm0
+; AVX512VL-NEXT: vmovq %xmm0, %rax
+; AVX512VL-NEXT: vzeroupper
+; AVX512VL-NEXT: retq
%1 = call i64 @llvm.experimental.vector.reduce.smax.i64.v16i64(<16 x i64> %a0)
ret i64 %1
}
; AVX2-LABEL: test_v8i32:
; AVX2: # %bb.0:
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
-; AVX2-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX2-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX2-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vmovd %xmm0, %eax
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
; AVX512-LABEL: test_v8i32:
; AVX512: # %bb.0:
; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
-; AVX512-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0
+; AVX512-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX512-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0
+; AVX512-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX512-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0
+; AVX512-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vmovd %xmm0, %eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
; AVX2: # %bb.0:
; AVX2-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
-; AVX2-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX2-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX2-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vmovd %xmm0, %eax
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
; AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1
; AVX512-NEXT: vpmaxsd %zmm1, %zmm0, %zmm0
; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
-; AVX512-NEXT: vpmaxsd %zmm1, %zmm0, %zmm0
+; AVX512-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX512-NEXT: vpmaxsd %zmm1, %zmm0, %zmm0
+; AVX512-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX512-NEXT: vpmaxsd %zmm1, %zmm0, %zmm0
+; AVX512-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vmovd %xmm0, %eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
; AVX2-NEXT: vpmaxsd %ymm2, %ymm0, %ymm0
; AVX2-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
-; AVX2-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX2-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX2-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vmovd %xmm0, %eax
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
; AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1
; AVX512-NEXT: vpmaxsd %zmm1, %zmm0, %zmm0
; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
-; AVX512-NEXT: vpmaxsd %zmm1, %zmm0, %zmm0
+; AVX512-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX512-NEXT: vpmaxsd %zmm1, %zmm0, %zmm0
+; AVX512-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX512-NEXT: vpmaxsd %zmm1, %zmm0, %zmm0
+; AVX512-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vmovd %xmm0, %eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
; AVX512VL-LABEL: test_v4i64:
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vextracti128 $1, %ymm0, %xmm1
-; AVX512VL-NEXT: vpmaxsq %ymm1, %ymm0, %ymm0
+; AVX512VL-NEXT: vpmaxsq %xmm1, %xmm0, %xmm0
; AVX512VL-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX512VL-NEXT: vpmaxsq %ymm1, %ymm0, %ymm0
+; AVX512VL-NEXT: vpmaxsq %xmm1, %xmm0, %xmm0
; AVX512VL-NEXT: vmovq %xmm0, %rax
; AVX512VL-NEXT: vzeroupper
; AVX512VL-NEXT: retq
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
;
-; AVX512-LABEL: test_v8i64:
-; AVX512: # %bb.0:
-; AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1
-; AVX512-NEXT: vpmaxsq %zmm1, %zmm0, %zmm0
-; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
-; AVX512-NEXT: vpmaxsq %zmm1, %zmm0, %zmm0
-; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX512-NEXT: vpmaxsq %zmm1, %zmm0, %zmm0
-; AVX512-NEXT: vmovq %xmm0, %rax
-; AVX512-NEXT: vzeroupper
-; AVX512-NEXT: retq
+; AVX512BW-LABEL: test_v8i64:
+; AVX512BW: # %bb.0:
+; AVX512BW-NEXT: vextracti64x4 $1, %zmm0, %ymm1
+; AVX512BW-NEXT: vpmaxsq %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: vextracti128 $1, %ymm0, %xmm1
+; AVX512BW-NEXT: vpmaxsq %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
+; AVX512BW-NEXT: vpmaxsq %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: vmovq %xmm0, %rax
+; AVX512BW-NEXT: vzeroupper
+; AVX512BW-NEXT: retq
+;
+; AVX512VL-LABEL: test_v8i64:
+; AVX512VL: # %bb.0:
+; AVX512VL-NEXT: vextracti64x4 $1, %zmm0, %ymm1
+; AVX512VL-NEXT: vpmaxsq %zmm1, %zmm0, %zmm0
+; AVX512VL-NEXT: vextracti128 $1, %ymm0, %xmm1
+; AVX512VL-NEXT: vpmaxsq %xmm1, %xmm0, %xmm0
+; AVX512VL-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
+; AVX512VL-NEXT: vpmaxsq %xmm1, %xmm0, %xmm0
+; AVX512VL-NEXT: vmovq %xmm0, %rax
+; AVX512VL-NEXT: vzeroupper
+; AVX512VL-NEXT: retq
%1 = call i64 @llvm.experimental.vector.reduce.smax.i64.v8i64(<8 x i64> %a0)
ret i64 %1
}
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
;
-; AVX512-LABEL: test_v16i64:
-; AVX512: # %bb.0:
-; AVX512-NEXT: vpmaxsq %zmm1, %zmm0, %zmm0
-; AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1
-; AVX512-NEXT: vpmaxsq %zmm1, %zmm0, %zmm0
-; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
-; AVX512-NEXT: vpmaxsq %zmm1, %zmm0, %zmm0
-; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX512-NEXT: vpmaxsq %zmm1, %zmm0, %zmm0
-; AVX512-NEXT: vmovq %xmm0, %rax
-; AVX512-NEXT: vzeroupper
-; AVX512-NEXT: retq
+; AVX512BW-LABEL: test_v16i64:
+; AVX512BW: # %bb.0:
+; AVX512BW-NEXT: vpmaxsq %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: vextracti64x4 $1, %zmm0, %ymm1
+; AVX512BW-NEXT: vpmaxsq %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: vextracti128 $1, %ymm0, %xmm1
+; AVX512BW-NEXT: vpmaxsq %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
+; AVX512BW-NEXT: vpmaxsq %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: vmovq %xmm0, %rax
+; AVX512BW-NEXT: vzeroupper
+; AVX512BW-NEXT: retq
+;
+; AVX512VL-LABEL: test_v16i64:
+; AVX512VL: # %bb.0:
+; AVX512VL-NEXT: vpmaxsq %zmm1, %zmm0, %zmm0
+; AVX512VL-NEXT: vextracti64x4 $1, %zmm0, %ymm1
+; AVX512VL-NEXT: vpmaxsq %zmm1, %zmm0, %zmm0
+; AVX512VL-NEXT: vextracti128 $1, %ymm0, %xmm1
+; AVX512VL-NEXT: vpmaxsq %xmm1, %xmm0, %xmm0
+; AVX512VL-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
+; AVX512VL-NEXT: vpmaxsq %xmm1, %xmm0, %xmm0
+; AVX512VL-NEXT: vmovq %xmm0, %rax
+; AVX512VL-NEXT: vzeroupper
+; AVX512VL-NEXT: retq
%1 = call i64 @llvm.experimental.vector.reduce.smax.i64.v16i64(<16 x i64> %a0)
ret i64 %1
}
; AVX2-LABEL: test_v8i32:
; AVX2: # %bb.0:
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
-; AVX2-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX2-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX2-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vmovd %xmm0, %eax
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
; AVX512-LABEL: test_v8i32:
; AVX512: # %bb.0:
; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
-; AVX512-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0
+; AVX512-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX512-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0
+; AVX512-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX512-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0
+; AVX512-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vmovd %xmm0, %eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
; AVX2: # %bb.0:
; AVX2-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
-; AVX2-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX2-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX2-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vmovd %xmm0, %eax
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
; AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1
; AVX512-NEXT: vpmaxsd %zmm1, %zmm0, %zmm0
; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
-; AVX512-NEXT: vpmaxsd %zmm1, %zmm0, %zmm0
+; AVX512-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX512-NEXT: vpmaxsd %zmm1, %zmm0, %zmm0
+; AVX512-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX512-NEXT: vpmaxsd %zmm1, %zmm0, %zmm0
+; AVX512-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vmovd %xmm0, %eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
; AVX2-NEXT: vpmaxsd %ymm2, %ymm0, %ymm0
; AVX2-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
-; AVX2-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX2-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX2-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vmovd %xmm0, %eax
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
; AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1
; AVX512-NEXT: vpmaxsd %zmm1, %zmm0, %zmm0
; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
-; AVX512-NEXT: vpmaxsd %zmm1, %zmm0, %zmm0
+; AVX512-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX512-NEXT: vpmaxsd %zmm1, %zmm0, %zmm0
+; AVX512-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX512-NEXT: vpmaxsd %zmm1, %zmm0, %zmm0
+; AVX512-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vmovd %xmm0, %eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
; AVX512VL-LABEL: test_v4i64:
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vextracti128 $1, %ymm0, %xmm1
-; AVX512VL-NEXT: vpminsq %ymm1, %ymm0, %ymm0
+; AVX512VL-NEXT: vpminsq %xmm1, %xmm0, %xmm0
; AVX512VL-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX512VL-NEXT: vpminsq %ymm1, %ymm0, %ymm0
+; AVX512VL-NEXT: vpminsq %xmm1, %xmm0, %xmm0
; AVX512VL-NEXT: vmovq %xmm0, %rax
; AVX512VL-NEXT: vzeroupper
; AVX512VL-NEXT: retq
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
;
-; AVX512-LABEL: test_v8i64:
-; AVX512: # %bb.0:
-; AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1
-; AVX512-NEXT: vpminsq %zmm1, %zmm0, %zmm0
-; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
-; AVX512-NEXT: vpminsq %zmm1, %zmm0, %zmm0
-; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX512-NEXT: vpminsq %zmm1, %zmm0, %zmm0
-; AVX512-NEXT: vmovq %xmm0, %rax
-; AVX512-NEXT: vzeroupper
-; AVX512-NEXT: retq
+; AVX512BW-LABEL: test_v8i64:
+; AVX512BW: # %bb.0:
+; AVX512BW-NEXT: vextracti64x4 $1, %zmm0, %ymm1
+; AVX512BW-NEXT: vpminsq %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: vextracti128 $1, %ymm0, %xmm1
+; AVX512BW-NEXT: vpminsq %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
+; AVX512BW-NEXT: vpminsq %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: vmovq %xmm0, %rax
+; AVX512BW-NEXT: vzeroupper
+; AVX512BW-NEXT: retq
+;
+; AVX512VL-LABEL: test_v8i64:
+; AVX512VL: # %bb.0:
+; AVX512VL-NEXT: vextracti64x4 $1, %zmm0, %ymm1
+; AVX512VL-NEXT: vpminsq %zmm1, %zmm0, %zmm0
+; AVX512VL-NEXT: vextracti128 $1, %ymm0, %xmm1
+; AVX512VL-NEXT: vpminsq %xmm1, %xmm0, %xmm0
+; AVX512VL-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
+; AVX512VL-NEXT: vpminsq %xmm1, %xmm0, %xmm0
+; AVX512VL-NEXT: vmovq %xmm0, %rax
+; AVX512VL-NEXT: vzeroupper
+; AVX512VL-NEXT: retq
%1 = call i64 @llvm.experimental.vector.reduce.smin.i64.v8i64(<8 x i64> %a0)
ret i64 %1
}
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
;
-; AVX512-LABEL: test_v16i64:
-; AVX512: # %bb.0:
-; AVX512-NEXT: vpminsq %zmm1, %zmm0, %zmm0
-; AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1
-; AVX512-NEXT: vpminsq %zmm1, %zmm0, %zmm0
-; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
-; AVX512-NEXT: vpminsq %zmm1, %zmm0, %zmm0
-; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX512-NEXT: vpminsq %zmm1, %zmm0, %zmm0
-; AVX512-NEXT: vmovq %xmm0, %rax
-; AVX512-NEXT: vzeroupper
-; AVX512-NEXT: retq
+; AVX512BW-LABEL: test_v16i64:
+; AVX512BW: # %bb.0:
+; AVX512BW-NEXT: vpminsq %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: vextracti64x4 $1, %zmm0, %ymm1
+; AVX512BW-NEXT: vpminsq %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: vextracti128 $1, %ymm0, %xmm1
+; AVX512BW-NEXT: vpminsq %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
+; AVX512BW-NEXT: vpminsq %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: vmovq %xmm0, %rax
+; AVX512BW-NEXT: vzeroupper
+; AVX512BW-NEXT: retq
+;
+; AVX512VL-LABEL: test_v16i64:
+; AVX512VL: # %bb.0:
+; AVX512VL-NEXT: vpminsq %zmm1, %zmm0, %zmm0
+; AVX512VL-NEXT: vextracti64x4 $1, %zmm0, %ymm1
+; AVX512VL-NEXT: vpminsq %zmm1, %zmm0, %zmm0
+; AVX512VL-NEXT: vextracti128 $1, %ymm0, %xmm1
+; AVX512VL-NEXT: vpminsq %xmm1, %xmm0, %xmm0
+; AVX512VL-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
+; AVX512VL-NEXT: vpminsq %xmm1, %xmm0, %xmm0
+; AVX512VL-NEXT: vmovq %xmm0, %rax
+; AVX512VL-NEXT: vzeroupper
+; AVX512VL-NEXT: retq
%1 = call i64 @llvm.experimental.vector.reduce.smin.i64.v16i64(<16 x i64> %a0)
ret i64 %1
}
; AVX2-LABEL: test_v8i32:
; AVX2: # %bb.0:
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
-; AVX2-NEXT: vpminsd %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vpminsd %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX2-NEXT: vpminsd %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vpminsd %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX2-NEXT: vpminsd %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vpminsd %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vmovd %xmm0, %eax
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
; AVX512-LABEL: test_v8i32:
; AVX512: # %bb.0:
; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
-; AVX512-NEXT: vpminsd %ymm1, %ymm0, %ymm0
+; AVX512-NEXT: vpminsd %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX512-NEXT: vpminsd %ymm1, %ymm0, %ymm0
+; AVX512-NEXT: vpminsd %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX512-NEXT: vpminsd %ymm1, %ymm0, %ymm0
+; AVX512-NEXT: vpminsd %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vmovd %xmm0, %eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
; AVX2: # %bb.0:
; AVX2-NEXT: vpminsd %ymm1, %ymm0, %ymm0
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
-; AVX2-NEXT: vpminsd %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vpminsd %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX2-NEXT: vpminsd %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vpminsd %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX2-NEXT: vpminsd %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vpminsd %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vmovd %xmm0, %eax
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
; AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1
; AVX512-NEXT: vpminsd %zmm1, %zmm0, %zmm0
; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
-; AVX512-NEXT: vpminsd %zmm1, %zmm0, %zmm0
+; AVX512-NEXT: vpminsd %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX512-NEXT: vpminsd %zmm1, %zmm0, %zmm0
+; AVX512-NEXT: vpminsd %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX512-NEXT: vpminsd %zmm1, %zmm0, %zmm0
+; AVX512-NEXT: vpminsd %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vmovd %xmm0, %eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
; AVX2-NEXT: vpminsd %ymm2, %ymm0, %ymm0
; AVX2-NEXT: vpminsd %ymm1, %ymm0, %ymm0
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
-; AVX2-NEXT: vpminsd %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vpminsd %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX2-NEXT: vpminsd %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vpminsd %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX2-NEXT: vpminsd %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vpminsd %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vmovd %xmm0, %eax
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
; AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1
; AVX512-NEXT: vpminsd %zmm1, %zmm0, %zmm0
; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
-; AVX512-NEXT: vpminsd %zmm1, %zmm0, %zmm0
+; AVX512-NEXT: vpminsd %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX512-NEXT: vpminsd %zmm1, %zmm0, %zmm0
+; AVX512-NEXT: vpminsd %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX512-NEXT: vpminsd %zmm1, %zmm0, %zmm0
+; AVX512-NEXT: vpminsd %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vmovd %xmm0, %eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
; AVX512VL-LABEL: test_v4i64:
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vextracti128 $1, %ymm0, %xmm1
-; AVX512VL-NEXT: vpminsq %ymm1, %ymm0, %ymm0
+; AVX512VL-NEXT: vpminsq %xmm1, %xmm0, %xmm0
; AVX512VL-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX512VL-NEXT: vpminsq %ymm1, %ymm0, %ymm0
+; AVX512VL-NEXT: vpminsq %xmm1, %xmm0, %xmm0
; AVX512VL-NEXT: vmovq %xmm0, %rax
; AVX512VL-NEXT: vzeroupper
; AVX512VL-NEXT: retq
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
;
-; AVX512-LABEL: test_v8i64:
-; AVX512: # %bb.0:
-; AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1
-; AVX512-NEXT: vpminsq %zmm1, %zmm0, %zmm0
-; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
-; AVX512-NEXT: vpminsq %zmm1, %zmm0, %zmm0
-; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX512-NEXT: vpminsq %zmm1, %zmm0, %zmm0
-; AVX512-NEXT: vmovq %xmm0, %rax
-; AVX512-NEXT: vzeroupper
-; AVX512-NEXT: retq
+; AVX512BW-LABEL: test_v8i64:
+; AVX512BW: # %bb.0:
+; AVX512BW-NEXT: vextracti64x4 $1, %zmm0, %ymm1
+; AVX512BW-NEXT: vpminsq %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: vextracti128 $1, %ymm0, %xmm1
+; AVX512BW-NEXT: vpminsq %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
+; AVX512BW-NEXT: vpminsq %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: vmovq %xmm0, %rax
+; AVX512BW-NEXT: vzeroupper
+; AVX512BW-NEXT: retq
+;
+; AVX512VL-LABEL: test_v8i64:
+; AVX512VL: # %bb.0:
+; AVX512VL-NEXT: vextracti64x4 $1, %zmm0, %ymm1
+; AVX512VL-NEXT: vpminsq %zmm1, %zmm0, %zmm0
+; AVX512VL-NEXT: vextracti128 $1, %ymm0, %xmm1
+; AVX512VL-NEXT: vpminsq %xmm1, %xmm0, %xmm0
+; AVX512VL-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
+; AVX512VL-NEXT: vpminsq %xmm1, %xmm0, %xmm0
+; AVX512VL-NEXT: vmovq %xmm0, %rax
+; AVX512VL-NEXT: vzeroupper
+; AVX512VL-NEXT: retq
%1 = call i64 @llvm.experimental.vector.reduce.smin.i64.v8i64(<8 x i64> %a0)
ret i64 %1
}
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
;
-; AVX512-LABEL: test_v16i64:
-; AVX512: # %bb.0:
-; AVX512-NEXT: vpminsq %zmm1, %zmm0, %zmm0
-; AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1
-; AVX512-NEXT: vpminsq %zmm1, %zmm0, %zmm0
-; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
-; AVX512-NEXT: vpminsq %zmm1, %zmm0, %zmm0
-; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX512-NEXT: vpminsq %zmm1, %zmm0, %zmm0
-; AVX512-NEXT: vmovq %xmm0, %rax
-; AVX512-NEXT: vzeroupper
-; AVX512-NEXT: retq
+; AVX512BW-LABEL: test_v16i64:
+; AVX512BW: # %bb.0:
+; AVX512BW-NEXT: vpminsq %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: vextracti64x4 $1, %zmm0, %ymm1
+; AVX512BW-NEXT: vpminsq %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: vextracti128 $1, %ymm0, %xmm1
+; AVX512BW-NEXT: vpminsq %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
+; AVX512BW-NEXT: vpminsq %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: vmovq %xmm0, %rax
+; AVX512BW-NEXT: vzeroupper
+; AVX512BW-NEXT: retq
+;
+; AVX512VL-LABEL: test_v16i64:
+; AVX512VL: # %bb.0:
+; AVX512VL-NEXT: vpminsq %zmm1, %zmm0, %zmm0
+; AVX512VL-NEXT: vextracti64x4 $1, %zmm0, %ymm1
+; AVX512VL-NEXT: vpminsq %zmm1, %zmm0, %zmm0
+; AVX512VL-NEXT: vextracti128 $1, %ymm0, %xmm1
+; AVX512VL-NEXT: vpminsq %xmm1, %xmm0, %xmm0
+; AVX512VL-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
+; AVX512VL-NEXT: vpminsq %xmm1, %xmm0, %xmm0
+; AVX512VL-NEXT: vmovq %xmm0, %rax
+; AVX512VL-NEXT: vzeroupper
+; AVX512VL-NEXT: retq
%1 = call i64 @llvm.experimental.vector.reduce.smin.i64.v16i64(<16 x i64> %a0)
ret i64 %1
}
; AVX2-LABEL: test_v8i32:
; AVX2: # %bb.0:
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
-; AVX2-NEXT: vpminsd %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vpminsd %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX2-NEXT: vpminsd %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vpminsd %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX2-NEXT: vpminsd %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vpminsd %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vmovd %xmm0, %eax
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
; AVX512-LABEL: test_v8i32:
; AVX512: # %bb.0:
; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
-; AVX512-NEXT: vpminsd %ymm1, %ymm0, %ymm0
+; AVX512-NEXT: vpminsd %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX512-NEXT: vpminsd %ymm1, %ymm0, %ymm0
+; AVX512-NEXT: vpminsd %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX512-NEXT: vpminsd %ymm1, %ymm0, %ymm0
+; AVX512-NEXT: vpminsd %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vmovd %xmm0, %eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
; AVX2: # %bb.0:
; AVX2-NEXT: vpminsd %ymm1, %ymm0, %ymm0
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
-; AVX2-NEXT: vpminsd %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vpminsd %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX2-NEXT: vpminsd %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vpminsd %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX2-NEXT: vpminsd %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vpminsd %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vmovd %xmm0, %eax
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
; AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1
; AVX512-NEXT: vpminsd %zmm1, %zmm0, %zmm0
; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
-; AVX512-NEXT: vpminsd %zmm1, %zmm0, %zmm0
+; AVX512-NEXT: vpminsd %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX512-NEXT: vpminsd %zmm1, %zmm0, %zmm0
+; AVX512-NEXT: vpminsd %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX512-NEXT: vpminsd %zmm1, %zmm0, %zmm0
+; AVX512-NEXT: vpminsd %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vmovd %xmm0, %eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
; AVX2-NEXT: vpminsd %ymm2, %ymm0, %ymm0
; AVX2-NEXT: vpminsd %ymm1, %ymm0, %ymm0
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
-; AVX2-NEXT: vpminsd %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vpminsd %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX2-NEXT: vpminsd %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vpminsd %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX2-NEXT: vpminsd %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vpminsd %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vmovd %xmm0, %eax
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
; AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1
; AVX512-NEXT: vpminsd %zmm1, %zmm0, %zmm0
; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
-; AVX512-NEXT: vpminsd %zmm1, %zmm0, %zmm0
+; AVX512-NEXT: vpminsd %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX512-NEXT: vpminsd %zmm1, %zmm0, %zmm0
+; AVX512-NEXT: vpminsd %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX512-NEXT: vpminsd %zmm1, %zmm0, %zmm0
+; AVX512-NEXT: vpminsd %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vmovd %xmm0, %eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
; AVX512VL-LABEL: test_v4i64:
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vextracti128 $1, %ymm0, %xmm1
-; AVX512VL-NEXT: vpmaxuq %ymm1, %ymm0, %ymm0
+; AVX512VL-NEXT: vpmaxuq %xmm1, %xmm0, %xmm0
; AVX512VL-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX512VL-NEXT: vpmaxuq %ymm1, %ymm0, %ymm0
+; AVX512VL-NEXT: vpmaxuq %xmm1, %xmm0, %xmm0
; AVX512VL-NEXT: vmovq %xmm0, %rax
; AVX512VL-NEXT: vzeroupper
; AVX512VL-NEXT: retq
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
;
-; AVX512-LABEL: test_v8i64:
-; AVX512: # %bb.0:
-; AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1
-; AVX512-NEXT: vpmaxuq %zmm1, %zmm0, %zmm0
-; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
-; AVX512-NEXT: vpmaxuq %zmm1, %zmm0, %zmm0
-; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX512-NEXT: vpmaxuq %zmm1, %zmm0, %zmm0
-; AVX512-NEXT: vmovq %xmm0, %rax
-; AVX512-NEXT: vzeroupper
-; AVX512-NEXT: retq
+; AVX512BW-LABEL: test_v8i64:
+; AVX512BW: # %bb.0:
+; AVX512BW-NEXT: vextracti64x4 $1, %zmm0, %ymm1
+; AVX512BW-NEXT: vpmaxuq %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: vextracti128 $1, %ymm0, %xmm1
+; AVX512BW-NEXT: vpmaxuq %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
+; AVX512BW-NEXT: vpmaxuq %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: vmovq %xmm0, %rax
+; AVX512BW-NEXT: vzeroupper
+; AVX512BW-NEXT: retq
+;
+; AVX512VL-LABEL: test_v8i64:
+; AVX512VL: # %bb.0:
+; AVX512VL-NEXT: vextracti64x4 $1, %zmm0, %ymm1
+; AVX512VL-NEXT: vpmaxuq %zmm1, %zmm0, %zmm0
+; AVX512VL-NEXT: vextracti128 $1, %ymm0, %xmm1
+; AVX512VL-NEXT: vpmaxuq %xmm1, %xmm0, %xmm0
+; AVX512VL-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
+; AVX512VL-NEXT: vpmaxuq %xmm1, %xmm0, %xmm0
+; AVX512VL-NEXT: vmovq %xmm0, %rax
+; AVX512VL-NEXT: vzeroupper
+; AVX512VL-NEXT: retq
%1 = call i64 @llvm.experimental.vector.reduce.umax.i64.v8i64(<8 x i64> %a0)
ret i64 %1
}
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
;
-; AVX512-LABEL: test_v16i64:
-; AVX512: # %bb.0:
-; AVX512-NEXT: vpmaxuq %zmm1, %zmm0, %zmm0
-; AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1
-; AVX512-NEXT: vpmaxuq %zmm1, %zmm0, %zmm0
-; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
-; AVX512-NEXT: vpmaxuq %zmm1, %zmm0, %zmm0
-; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX512-NEXT: vpmaxuq %zmm1, %zmm0, %zmm0
-; AVX512-NEXT: vmovq %xmm0, %rax
-; AVX512-NEXT: vzeroupper
-; AVX512-NEXT: retq
+; AVX512BW-LABEL: test_v16i64:
+; AVX512BW: # %bb.0:
+; AVX512BW-NEXT: vpmaxuq %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: vextracti64x4 $1, %zmm0, %ymm1
+; AVX512BW-NEXT: vpmaxuq %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: vextracti128 $1, %ymm0, %xmm1
+; AVX512BW-NEXT: vpmaxuq %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
+; AVX512BW-NEXT: vpmaxuq %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: vmovq %xmm0, %rax
+; AVX512BW-NEXT: vzeroupper
+; AVX512BW-NEXT: retq
+;
+; AVX512VL-LABEL: test_v16i64:
+; AVX512VL: # %bb.0:
+; AVX512VL-NEXT: vpmaxuq %zmm1, %zmm0, %zmm0
+; AVX512VL-NEXT: vextracti64x4 $1, %zmm0, %ymm1
+; AVX512VL-NEXT: vpmaxuq %zmm1, %zmm0, %zmm0
+; AVX512VL-NEXT: vextracti128 $1, %ymm0, %xmm1
+; AVX512VL-NEXT: vpmaxuq %xmm1, %xmm0, %xmm0
+; AVX512VL-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
+; AVX512VL-NEXT: vpmaxuq %xmm1, %xmm0, %xmm0
+; AVX512VL-NEXT: vmovq %xmm0, %rax
+; AVX512VL-NEXT: vzeroupper
+; AVX512VL-NEXT: retq
%1 = call i64 @llvm.experimental.vector.reduce.umax.i64.v16i64(<16 x i64> %a0)
ret i64 %1
}
; AVX2-LABEL: test_v8i32:
; AVX2: # %bb.0:
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
-; AVX2-NEXT: vpmaxud %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vpmaxud %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX2-NEXT: vpmaxud %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vpmaxud %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX2-NEXT: vpmaxud %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vpmaxud %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vmovd %xmm0, %eax
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
; AVX512-LABEL: test_v8i32:
; AVX512: # %bb.0:
; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
-; AVX512-NEXT: vpmaxud %ymm1, %ymm0, %ymm0
+; AVX512-NEXT: vpmaxud %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX512-NEXT: vpmaxud %ymm1, %ymm0, %ymm0
+; AVX512-NEXT: vpmaxud %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX512-NEXT: vpmaxud %ymm1, %ymm0, %ymm0
+; AVX512-NEXT: vpmaxud %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vmovd %xmm0, %eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
; AVX2: # %bb.0:
; AVX2-NEXT: vpmaxud %ymm1, %ymm0, %ymm0
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
-; AVX2-NEXT: vpmaxud %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vpmaxud %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX2-NEXT: vpmaxud %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vpmaxud %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX2-NEXT: vpmaxud %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vpmaxud %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vmovd %xmm0, %eax
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
; AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1
; AVX512-NEXT: vpmaxud %zmm1, %zmm0, %zmm0
; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
-; AVX512-NEXT: vpmaxud %zmm1, %zmm0, %zmm0
+; AVX512-NEXT: vpmaxud %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX512-NEXT: vpmaxud %zmm1, %zmm0, %zmm0
+; AVX512-NEXT: vpmaxud %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX512-NEXT: vpmaxud %zmm1, %zmm0, %zmm0
+; AVX512-NEXT: vpmaxud %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vmovd %xmm0, %eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
; AVX2-NEXT: vpmaxud %ymm2, %ymm0, %ymm0
; AVX2-NEXT: vpmaxud %ymm1, %ymm0, %ymm0
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
-; AVX2-NEXT: vpmaxud %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vpmaxud %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX2-NEXT: vpmaxud %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vpmaxud %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX2-NEXT: vpmaxud %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vpmaxud %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vmovd %xmm0, %eax
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
; AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1
; AVX512-NEXT: vpmaxud %zmm1, %zmm0, %zmm0
; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
-; AVX512-NEXT: vpmaxud %zmm1, %zmm0, %zmm0
+; AVX512-NEXT: vpmaxud %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX512-NEXT: vpmaxud %zmm1, %zmm0, %zmm0
+; AVX512-NEXT: vpmaxud %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX512-NEXT: vpmaxud %zmm1, %zmm0, %zmm0
+; AVX512-NEXT: vpmaxud %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vmovd %xmm0, %eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
; AVX512VL-LABEL: test_v4i64:
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vextracti128 $1, %ymm0, %xmm1
-; AVX512VL-NEXT: vpmaxuq %ymm1, %ymm0, %ymm0
+; AVX512VL-NEXT: vpmaxuq %xmm1, %xmm0, %xmm0
; AVX512VL-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX512VL-NEXT: vpmaxuq %ymm1, %ymm0, %ymm0
+; AVX512VL-NEXT: vpmaxuq %xmm1, %xmm0, %xmm0
; AVX512VL-NEXT: vmovq %xmm0, %rax
; AVX512VL-NEXT: vzeroupper
; AVX512VL-NEXT: retq
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
;
-; AVX512-LABEL: test_v8i64:
-; AVX512: # %bb.0:
-; AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1
-; AVX512-NEXT: vpmaxuq %zmm1, %zmm0, %zmm0
-; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
-; AVX512-NEXT: vpmaxuq %zmm1, %zmm0, %zmm0
-; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX512-NEXT: vpmaxuq %zmm1, %zmm0, %zmm0
-; AVX512-NEXT: vmovq %xmm0, %rax
-; AVX512-NEXT: vzeroupper
-; AVX512-NEXT: retq
+; AVX512BW-LABEL: test_v8i64:
+; AVX512BW: # %bb.0:
+; AVX512BW-NEXT: vextracti64x4 $1, %zmm0, %ymm1
+; AVX512BW-NEXT: vpmaxuq %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: vextracti128 $1, %ymm0, %xmm1
+; AVX512BW-NEXT: vpmaxuq %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
+; AVX512BW-NEXT: vpmaxuq %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: vmovq %xmm0, %rax
+; AVX512BW-NEXT: vzeroupper
+; AVX512BW-NEXT: retq
+;
+; AVX512VL-LABEL: test_v8i64:
+; AVX512VL: # %bb.0:
+; AVX512VL-NEXT: vextracti64x4 $1, %zmm0, %ymm1
+; AVX512VL-NEXT: vpmaxuq %zmm1, %zmm0, %zmm0
+; AVX512VL-NEXT: vextracti128 $1, %ymm0, %xmm1
+; AVX512VL-NEXT: vpmaxuq %xmm1, %xmm0, %xmm0
+; AVX512VL-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
+; AVX512VL-NEXT: vpmaxuq %xmm1, %xmm0, %xmm0
+; AVX512VL-NEXT: vmovq %xmm0, %rax
+; AVX512VL-NEXT: vzeroupper
+; AVX512VL-NEXT: retq
%1 = call i64 @llvm.experimental.vector.reduce.umax.i64.v8i64(<8 x i64> %a0)
ret i64 %1
}
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
;
-; AVX512-LABEL: test_v16i64:
-; AVX512: # %bb.0:
-; AVX512-NEXT: vpmaxuq %zmm1, %zmm0, %zmm0
-; AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1
-; AVX512-NEXT: vpmaxuq %zmm1, %zmm0, %zmm0
-; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
-; AVX512-NEXT: vpmaxuq %zmm1, %zmm0, %zmm0
-; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX512-NEXT: vpmaxuq %zmm1, %zmm0, %zmm0
-; AVX512-NEXT: vmovq %xmm0, %rax
-; AVX512-NEXT: vzeroupper
-; AVX512-NEXT: retq
+; AVX512BW-LABEL: test_v16i64:
+; AVX512BW: # %bb.0:
+; AVX512BW-NEXT: vpmaxuq %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: vextracti64x4 $1, %zmm0, %ymm1
+; AVX512BW-NEXT: vpmaxuq %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: vextracti128 $1, %ymm0, %xmm1
+; AVX512BW-NEXT: vpmaxuq %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
+; AVX512BW-NEXT: vpmaxuq %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: vmovq %xmm0, %rax
+; AVX512BW-NEXT: vzeroupper
+; AVX512BW-NEXT: retq
+;
+; AVX512VL-LABEL: test_v16i64:
+; AVX512VL: # %bb.0:
+; AVX512VL-NEXT: vpmaxuq %zmm1, %zmm0, %zmm0
+; AVX512VL-NEXT: vextracti64x4 $1, %zmm0, %ymm1
+; AVX512VL-NEXT: vpmaxuq %zmm1, %zmm0, %zmm0
+; AVX512VL-NEXT: vextracti128 $1, %ymm0, %xmm1
+; AVX512VL-NEXT: vpmaxuq %xmm1, %xmm0, %xmm0
+; AVX512VL-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
+; AVX512VL-NEXT: vpmaxuq %xmm1, %xmm0, %xmm0
+; AVX512VL-NEXT: vmovq %xmm0, %rax
+; AVX512VL-NEXT: vzeroupper
+; AVX512VL-NEXT: retq
%1 = call i64 @llvm.experimental.vector.reduce.umax.i64.v16i64(<16 x i64> %a0)
ret i64 %1
}
; AVX2-LABEL: test_v8i32:
; AVX2: # %bb.0:
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
-; AVX2-NEXT: vpmaxud %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vpmaxud %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX2-NEXT: vpmaxud %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vpmaxud %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX2-NEXT: vpmaxud %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vpmaxud %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vmovd %xmm0, %eax
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
; AVX512-LABEL: test_v8i32:
; AVX512: # %bb.0:
; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
-; AVX512-NEXT: vpmaxud %ymm1, %ymm0, %ymm0
+; AVX512-NEXT: vpmaxud %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX512-NEXT: vpmaxud %ymm1, %ymm0, %ymm0
+; AVX512-NEXT: vpmaxud %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX512-NEXT: vpmaxud %ymm1, %ymm0, %ymm0
+; AVX512-NEXT: vpmaxud %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vmovd %xmm0, %eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
; AVX2: # %bb.0:
; AVX2-NEXT: vpmaxud %ymm1, %ymm0, %ymm0
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
-; AVX2-NEXT: vpmaxud %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vpmaxud %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX2-NEXT: vpmaxud %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vpmaxud %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX2-NEXT: vpmaxud %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vpmaxud %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vmovd %xmm0, %eax
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
; AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1
; AVX512-NEXT: vpmaxud %zmm1, %zmm0, %zmm0
; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
-; AVX512-NEXT: vpmaxud %zmm1, %zmm0, %zmm0
+; AVX512-NEXT: vpmaxud %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX512-NEXT: vpmaxud %zmm1, %zmm0, %zmm0
+; AVX512-NEXT: vpmaxud %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX512-NEXT: vpmaxud %zmm1, %zmm0, %zmm0
+; AVX512-NEXT: vpmaxud %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vmovd %xmm0, %eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
; AVX2-NEXT: vpmaxud %ymm2, %ymm0, %ymm0
; AVX2-NEXT: vpmaxud %ymm1, %ymm0, %ymm0
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
-; AVX2-NEXT: vpmaxud %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vpmaxud %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX2-NEXT: vpmaxud %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vpmaxud %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX2-NEXT: vpmaxud %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vpmaxud %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vmovd %xmm0, %eax
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
; AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1
; AVX512-NEXT: vpmaxud %zmm1, %zmm0, %zmm0
; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
-; AVX512-NEXT: vpmaxud %zmm1, %zmm0, %zmm0
+; AVX512-NEXT: vpmaxud %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX512-NEXT: vpmaxud %zmm1, %zmm0, %zmm0
+; AVX512-NEXT: vpmaxud %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX512-NEXT: vpmaxud %zmm1, %zmm0, %zmm0
+; AVX512-NEXT: vpmaxud %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vmovd %xmm0, %eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
; AVX512VL-LABEL: test_v4i64:
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vextracti128 $1, %ymm0, %xmm1
-; AVX512VL-NEXT: vpminuq %ymm1, %ymm0, %ymm0
+; AVX512VL-NEXT: vpminuq %xmm1, %xmm0, %xmm0
; AVX512VL-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX512VL-NEXT: vpminuq %ymm1, %ymm0, %ymm0
+; AVX512VL-NEXT: vpminuq %xmm1, %xmm0, %xmm0
; AVX512VL-NEXT: vmovq %xmm0, %rax
; AVX512VL-NEXT: vzeroupper
; AVX512VL-NEXT: retq
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
;
-; AVX512-LABEL: test_v8i64:
-; AVX512: # %bb.0:
-; AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1
-; AVX512-NEXT: vpminuq %zmm1, %zmm0, %zmm0
-; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
-; AVX512-NEXT: vpminuq %zmm1, %zmm0, %zmm0
-; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX512-NEXT: vpminuq %zmm1, %zmm0, %zmm0
-; AVX512-NEXT: vmovq %xmm0, %rax
-; AVX512-NEXT: vzeroupper
-; AVX512-NEXT: retq
+; AVX512BW-LABEL: test_v8i64:
+; AVX512BW: # %bb.0:
+; AVX512BW-NEXT: vextracti64x4 $1, %zmm0, %ymm1
+; AVX512BW-NEXT: vpminuq %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: vextracti128 $1, %ymm0, %xmm1
+; AVX512BW-NEXT: vpminuq %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
+; AVX512BW-NEXT: vpminuq %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: vmovq %xmm0, %rax
+; AVX512BW-NEXT: vzeroupper
+; AVX512BW-NEXT: retq
+;
+; AVX512VL-LABEL: test_v8i64:
+; AVX512VL: # %bb.0:
+; AVX512VL-NEXT: vextracti64x4 $1, %zmm0, %ymm1
+; AVX512VL-NEXT: vpminuq %zmm1, %zmm0, %zmm0
+; AVX512VL-NEXT: vextracti128 $1, %ymm0, %xmm1
+; AVX512VL-NEXT: vpminuq %xmm1, %xmm0, %xmm0
+; AVX512VL-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
+; AVX512VL-NEXT: vpminuq %xmm1, %xmm0, %xmm0
+; AVX512VL-NEXT: vmovq %xmm0, %rax
+; AVX512VL-NEXT: vzeroupper
+; AVX512VL-NEXT: retq
%1 = call i64 @llvm.experimental.vector.reduce.umin.i64.v8i64(<8 x i64> %a0)
ret i64 %1
}
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
;
-; AVX512-LABEL: test_v16i64:
-; AVX512: # %bb.0:
-; AVX512-NEXT: vpminuq %zmm1, %zmm0, %zmm0
-; AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1
-; AVX512-NEXT: vpminuq %zmm1, %zmm0, %zmm0
-; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
-; AVX512-NEXT: vpminuq %zmm1, %zmm0, %zmm0
-; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX512-NEXT: vpminuq %zmm1, %zmm0, %zmm0
-; AVX512-NEXT: vmovq %xmm0, %rax
-; AVX512-NEXT: vzeroupper
-; AVX512-NEXT: retq
+; AVX512BW-LABEL: test_v16i64:
+; AVX512BW: # %bb.0:
+; AVX512BW-NEXT: vpminuq %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: vextracti64x4 $1, %zmm0, %ymm1
+; AVX512BW-NEXT: vpminuq %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: vextracti128 $1, %ymm0, %xmm1
+; AVX512BW-NEXT: vpminuq %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
+; AVX512BW-NEXT: vpminuq %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: vmovq %xmm0, %rax
+; AVX512BW-NEXT: vzeroupper
+; AVX512BW-NEXT: retq
+;
+; AVX512VL-LABEL: test_v16i64:
+; AVX512VL: # %bb.0:
+; AVX512VL-NEXT: vpminuq %zmm1, %zmm0, %zmm0
+; AVX512VL-NEXT: vextracti64x4 $1, %zmm0, %ymm1
+; AVX512VL-NEXT: vpminuq %zmm1, %zmm0, %zmm0
+; AVX512VL-NEXT: vextracti128 $1, %ymm0, %xmm1
+; AVX512VL-NEXT: vpminuq %xmm1, %xmm0, %xmm0
+; AVX512VL-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
+; AVX512VL-NEXT: vpminuq %xmm1, %xmm0, %xmm0
+; AVX512VL-NEXT: vmovq %xmm0, %rax
+; AVX512VL-NEXT: vzeroupper
+; AVX512VL-NEXT: retq
%1 = call i64 @llvm.experimental.vector.reduce.umin.i64.v16i64(<16 x i64> %a0)
ret i64 %1
}
; AVX2-LABEL: test_v8i32:
; AVX2: # %bb.0:
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
-; AVX2-NEXT: vpminud %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vpminud %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX2-NEXT: vpminud %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vpminud %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX2-NEXT: vpminud %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vpminud %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vmovd %xmm0, %eax
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
; AVX512-LABEL: test_v8i32:
; AVX512: # %bb.0:
; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
-; AVX512-NEXT: vpminud %ymm1, %ymm0, %ymm0
+; AVX512-NEXT: vpminud %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX512-NEXT: vpminud %ymm1, %ymm0, %ymm0
+; AVX512-NEXT: vpminud %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX512-NEXT: vpminud %ymm1, %ymm0, %ymm0
+; AVX512-NEXT: vpminud %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vmovd %xmm0, %eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
; AVX2: # %bb.0:
; AVX2-NEXT: vpminud %ymm1, %ymm0, %ymm0
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
-; AVX2-NEXT: vpminud %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vpminud %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX2-NEXT: vpminud %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vpminud %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX2-NEXT: vpminud %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vpminud %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vmovd %xmm0, %eax
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
; AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1
; AVX512-NEXT: vpminud %zmm1, %zmm0, %zmm0
; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
-; AVX512-NEXT: vpminud %zmm1, %zmm0, %zmm0
+; AVX512-NEXT: vpminud %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX512-NEXT: vpminud %zmm1, %zmm0, %zmm0
+; AVX512-NEXT: vpminud %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX512-NEXT: vpminud %zmm1, %zmm0, %zmm0
+; AVX512-NEXT: vpminud %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vmovd %xmm0, %eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
; AVX2-NEXT: vpminud %ymm2, %ymm0, %ymm0
; AVX2-NEXT: vpminud %ymm1, %ymm0, %ymm0
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
-; AVX2-NEXT: vpminud %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vpminud %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX2-NEXT: vpminud %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vpminud %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX2-NEXT: vpminud %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vpminud %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vmovd %xmm0, %eax
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
; AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1
; AVX512-NEXT: vpminud %zmm1, %zmm0, %zmm0
; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
-; AVX512-NEXT: vpminud %zmm1, %zmm0, %zmm0
+; AVX512-NEXT: vpminud %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX512-NEXT: vpminud %zmm1, %zmm0, %zmm0
+; AVX512-NEXT: vpminud %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX512-NEXT: vpminud %zmm1, %zmm0, %zmm0
+; AVX512-NEXT: vpminud %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vmovd %xmm0, %eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
; AVX512VL-LABEL: test_v4i64:
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vextracti128 $1, %ymm0, %xmm1
-; AVX512VL-NEXT: vpminuq %ymm1, %ymm0, %ymm0
+; AVX512VL-NEXT: vpminuq %xmm1, %xmm0, %xmm0
; AVX512VL-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX512VL-NEXT: vpminuq %ymm1, %ymm0, %ymm0
+; AVX512VL-NEXT: vpminuq %xmm1, %xmm0, %xmm0
; AVX512VL-NEXT: vmovq %xmm0, %rax
; AVX512VL-NEXT: vzeroupper
; AVX512VL-NEXT: retq
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
;
-; AVX512-LABEL: test_v8i64:
-; AVX512: # %bb.0:
-; AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1
-; AVX512-NEXT: vpminuq %zmm1, %zmm0, %zmm0
-; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
-; AVX512-NEXT: vpminuq %zmm1, %zmm0, %zmm0
-; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX512-NEXT: vpminuq %zmm1, %zmm0, %zmm0
-; AVX512-NEXT: vmovq %xmm0, %rax
-; AVX512-NEXT: vzeroupper
-; AVX512-NEXT: retq
+; AVX512BW-LABEL: test_v8i64:
+; AVX512BW: # %bb.0:
+; AVX512BW-NEXT: vextracti64x4 $1, %zmm0, %ymm1
+; AVX512BW-NEXT: vpminuq %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: vextracti128 $1, %ymm0, %xmm1
+; AVX512BW-NEXT: vpminuq %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
+; AVX512BW-NEXT: vpminuq %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: vmovq %xmm0, %rax
+; AVX512BW-NEXT: vzeroupper
+; AVX512BW-NEXT: retq
+;
+; AVX512VL-LABEL: test_v8i64:
+; AVX512VL: # %bb.0:
+; AVX512VL-NEXT: vextracti64x4 $1, %zmm0, %ymm1
+; AVX512VL-NEXT: vpminuq %zmm1, %zmm0, %zmm0
+; AVX512VL-NEXT: vextracti128 $1, %ymm0, %xmm1
+; AVX512VL-NEXT: vpminuq %xmm1, %xmm0, %xmm0
+; AVX512VL-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
+; AVX512VL-NEXT: vpminuq %xmm1, %xmm0, %xmm0
+; AVX512VL-NEXT: vmovq %xmm0, %rax
+; AVX512VL-NEXT: vzeroupper
+; AVX512VL-NEXT: retq
%1 = call i64 @llvm.experimental.vector.reduce.umin.i64.v8i64(<8 x i64> %a0)
ret i64 %1
}
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
;
-; AVX512-LABEL: test_v16i64:
-; AVX512: # %bb.0:
-; AVX512-NEXT: vpminuq %zmm1, %zmm0, %zmm0
-; AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1
-; AVX512-NEXT: vpminuq %zmm1, %zmm0, %zmm0
-; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
-; AVX512-NEXT: vpminuq %zmm1, %zmm0, %zmm0
-; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX512-NEXT: vpminuq %zmm1, %zmm0, %zmm0
-; AVX512-NEXT: vmovq %xmm0, %rax
-; AVX512-NEXT: vzeroupper
-; AVX512-NEXT: retq
+; AVX512BW-LABEL: test_v16i64:
+; AVX512BW: # %bb.0:
+; AVX512BW-NEXT: vpminuq %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: vextracti64x4 $1, %zmm0, %ymm1
+; AVX512BW-NEXT: vpminuq %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: vextracti128 $1, %ymm0, %xmm1
+; AVX512BW-NEXT: vpminuq %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
+; AVX512BW-NEXT: vpminuq %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: vmovq %xmm0, %rax
+; AVX512BW-NEXT: vzeroupper
+; AVX512BW-NEXT: retq
+;
+; AVX512VL-LABEL: test_v16i64:
+; AVX512VL: # %bb.0:
+; AVX512VL-NEXT: vpminuq %zmm1, %zmm0, %zmm0
+; AVX512VL-NEXT: vextracti64x4 $1, %zmm0, %ymm1
+; AVX512VL-NEXT: vpminuq %zmm1, %zmm0, %zmm0
+; AVX512VL-NEXT: vextracti128 $1, %ymm0, %xmm1
+; AVX512VL-NEXT: vpminuq %xmm1, %xmm0, %xmm0
+; AVX512VL-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
+; AVX512VL-NEXT: vpminuq %xmm1, %xmm0, %xmm0
+; AVX512VL-NEXT: vmovq %xmm0, %rax
+; AVX512VL-NEXT: vzeroupper
+; AVX512VL-NEXT: retq
%1 = call i64 @llvm.experimental.vector.reduce.umin.i64.v16i64(<16 x i64> %a0)
ret i64 %1
}
; AVX2-LABEL: test_v8i32:
; AVX2: # %bb.0:
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
-; AVX2-NEXT: vpminud %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vpminud %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX2-NEXT: vpminud %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vpminud %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX2-NEXT: vpminud %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vpminud %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vmovd %xmm0, %eax
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
; AVX512-LABEL: test_v8i32:
; AVX512: # %bb.0:
; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
-; AVX512-NEXT: vpminud %ymm1, %ymm0, %ymm0
+; AVX512-NEXT: vpminud %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX512-NEXT: vpminud %ymm1, %ymm0, %ymm0
+; AVX512-NEXT: vpminud %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX512-NEXT: vpminud %ymm1, %ymm0, %ymm0
+; AVX512-NEXT: vpminud %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vmovd %xmm0, %eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
; AVX2: # %bb.0:
; AVX2-NEXT: vpminud %ymm1, %ymm0, %ymm0
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
-; AVX2-NEXT: vpminud %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vpminud %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX2-NEXT: vpminud %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vpminud %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX2-NEXT: vpminud %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vpminud %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vmovd %xmm0, %eax
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
; AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1
; AVX512-NEXT: vpminud %zmm1, %zmm0, %zmm0
; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
-; AVX512-NEXT: vpminud %zmm1, %zmm0, %zmm0
+; AVX512-NEXT: vpminud %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX512-NEXT: vpminud %zmm1, %zmm0, %zmm0
+; AVX512-NEXT: vpminud %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX512-NEXT: vpminud %zmm1, %zmm0, %zmm0
+; AVX512-NEXT: vpminud %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vmovd %xmm0, %eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
; AVX2-NEXT: vpminud %ymm2, %ymm0, %ymm0
; AVX2-NEXT: vpminud %ymm1, %ymm0, %ymm0
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
-; AVX2-NEXT: vpminud %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vpminud %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX2-NEXT: vpminud %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vpminud %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX2-NEXT: vpminud %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vpminud %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vmovd %xmm0, %eax
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
; AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1
; AVX512-NEXT: vpminud %zmm1, %zmm0, %zmm0
; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
-; AVX512-NEXT: vpminud %zmm1, %zmm0, %zmm0
+; AVX512-NEXT: vpminud %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX512-NEXT: vpminud %zmm1, %zmm0, %zmm0
+; AVX512-NEXT: vpminud %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX512-NEXT: vpminud %zmm1, %zmm0, %zmm0
+; AVX512-NEXT: vpminud %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vmovd %xmm0, %eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq