return false;
}
+ /// Try to convert an extract element of a vector binary operation into an
+ /// extract element followed by a scalar operation.
+ virtual bool shouldScalarizeBinop(SDValue VecOp) const {
+ return false;
+ }
+
// Return true if it is profitable to use a scalar input to a BUILD_VECTOR
// even if the vector itself has multiple uses.
virtual bool aggressivelyPreferBuildVectorSources(EVT VecVT) const {
// Determines if a BUILD_VECTOR is composed of all-constants possibly mixed with
// undef's.
-static bool isAnyConstantBuildVector(const SDNode *N) {
- return ISD::isBuildVectorOfConstantSDNodes(N) ||
- ISD::isBuildVectorOfConstantFPSDNodes(N);
+static bool isAnyConstantBuildVector(SDValue V, bool NoOpaques = false) {
+ if (V.getOpcode() != ISD::BUILD_VECTOR)
+ return false;
+ return isConstantOrConstantVector(V, NoOpaques) ||
+ ISD::isBuildVectorOfConstantFPSDNodes(V.getNode());
}
SDValue DAGCombiner::ReassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
return SDValue(EVE, 0);
}
+/// Transform a vector binary operation into a scalar binary operation by moving
+/// the math/logic after an extract element of a vector.
+static SDValue scalarizeExtractedBinop(SDNode *ExtElt, SelectionDAG &DAG,
+ bool LegalOperations) {
+ SDValue Vec = ExtElt->getOperand(0);
+ SDValue Index = ExtElt->getOperand(1);
+ auto *IndexC = dyn_cast<ConstantSDNode>(Index);
+ if (!IndexC || !ISD::isBinaryOp(Vec.getNode()) || !Vec.hasOneUse())
+ return SDValue();
+
+ // Targets may want to avoid this to prevent an expensive register transfer.
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ if (!TLI.shouldScalarizeBinop(Vec))
+ return SDValue();
+
+ // Extracting an element of a vector constant is constant-folded, so this
+ // transform is just replacing a vector op with a scalar op while moving the
+ // extract.
+ SDValue Op0 = Vec.getOperand(0);
+ SDValue Op1 = Vec.getOperand(1);
+ if (isAnyConstantBuildVector(Op0, true) ||
+ isAnyConstantBuildVector(Op1, true)) {
+ // extractelt (binop X, C), IndexC --> binop (extractelt X, IndexC), C'
+ // extractelt (binop C, X), IndexC --> binop C', (extractelt X, IndexC)
+ SDLoc DL(ExtElt);
+ EVT VT = ExtElt->getValueType(0);
+ SDValue Ext0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Op0, Index);
+ SDValue Ext1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Op1, Index);
+ return DAG.getNode(Vec.getOpcode(), DL, VT, Ext0, Ext1);
+ }
+
+ return SDValue();
+}
+
SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
SDValue VecOp = N->getOperand(0);
SDValue Index = N->getOperand(1);
}
}
+ if (SDValue BO = scalarizeExtractedBinop(N, DAG, LegalOperations))
+ return BO;
+
// Transform: (EXTRACT_VECTOR_ELT( VECTOR_SHUFFLE )) -> EXTRACT_VECTOR_ELT.
// We only perform this optimization before the op legalization phase because
// we may introduce new vector instructions which are not backed by TD
if (!N1->hasOneUse())
return SDValue();
- bool N0AnyConst = isAnyConstantBuildVector(N0.getNode());
- bool N1AnyConst = isAnyConstantBuildVector(N1.getNode());
+ bool N0AnyConst = isAnyConstantBuildVector(N0);
+ bool N1AnyConst = isAnyConstantBuildVector(N1);
if (N0AnyConst && !N1AnyConst && !ISD::isBuildVectorAllZeros(N0.getNode()))
return SDValue();
if (!N0AnyConst && N1AnyConst && !ISD::isBuildVectorAllZeros(N1.getNode()))
return (Index % ResVT.getVectorNumElements()) == 0;
}
+bool X86TargetLowering::shouldScalarizeBinop(SDValue VecOp) const {
+ // If the vector op is not supported, try to convert to scalar.
+ EVT VecVT = VecOp.getValueType();
+ if (!isOperationLegalOrCustomOrPromote(VecOp.getOpcode(), VecVT))
+ return true;
+
+ // If the vector op is supported, but the scalar op is not, the transform may
+ // not be worthwhile.
+ EVT ScalarVT = VecVT.getScalarType();
+ return isOperationLegalOrCustomOrPromote(VecOp.getOpcode(), ScalarVT);
+}
+
bool X86TargetLowering::isCheapToSpeculateCttz() const {
// Speculate cttz only if we can directly use TZCNT.
return Subtarget.hasBMI();
bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT,
unsigned Index) const override;
+ /// Scalar ops always have equal or better analysis/performance/power than
+ /// the vector equivalent, so this always makes sense if the scalar op is
+ /// supported.
+ bool shouldScalarizeBinop(SDValue) const override;
+
bool storeOfVectorConstantIsCheap(EVT MemVT, unsigned NumElem,
unsigned AddrSpace) const override {
// If we can replace more than 2 scalar stores, there will be a reduction
; CHECK: # %bb.0:
; CHECK-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; CHECK-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
-; CHECK-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
-; CHECK-NEXT: pand {{.*}}(%rip), %xmm0
-; CHECK-NEXT: pextrw $4, %xmm0, %eax
+; CHECK-NEXT: pextrw $2, %xmm0, %eax
+; CHECK-NEXT: andb $95, %al
; CHECK-NEXT: # kill: def $al killed $al killed $eax
; CHECK-NEXT: retq
%Vp = bitcast <4 x i8>* %V to <3 x i8>*
define float @ext_fadd_v4f32(<4 x float> %x) {
; CHECK-LABEL: ext_fadd_v4f32:
; CHECK: # %bb.0:
-; CHECK-NEXT: addps {{.*}}(%rip), %xmm0
; CHECK-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
+; CHECK-NEXT: addss {{.*}}(%rip), %xmm0
; CHECK-NEXT: retq
%bo = fadd <4 x float> %x, <float 1.0, float 2.0, float 3.0, float 42.0>
%ext = extractelement <4 x float> %bo, i32 2
define float @ext_fsub_v4f32(<4 x float> %x) {
; CHECK-LABEL: ext_fsub_v4f32:
; CHECK: # %bb.0:
-; CHECK-NEXT: movaps {{.*#+}} xmm1 = <u,2.0E+0,u,u>
-; CHECK-NEXT: subps %xmm0, %xmm1
-; CHECK-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1,2,3]
+; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,2,3]
+; CHECK-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; CHECK-NEXT: subss %xmm0, %xmm1
; CHECK-NEXT: movaps %xmm1, %xmm0
; CHECK-NEXT: retq
%bo = fsub <4 x float> <float 1.0, float 2.0, float 3.0, float 42.0>, %x
define float @ext_fmul_v4f32(<4 x float> %x) {
; CHECK-LABEL: ext_fmul_v4f32:
; CHECK: # %bb.0:
-; CHECK-NEXT: mulps {{.*}}(%rip), %xmm0
; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,1,2,3]
+; CHECK-NEXT: mulss {{.*}}(%rip), %xmm0
; CHECK-NEXT: retq
%bo = fmul <4 x float> %x, <float 1.0, float 2.0, float 3.0, float 42.0>
%ext = extractelement <4 x float> %bo, i32 3
ret float %ext
}
+; TODO: X / 1.0 --> X
+
define float @ext_fdiv_v4f32(<4 x float> %x) {
; CHECK-LABEL: ext_fdiv_v4f32:
; CHECK: # %bb.0:
-; CHECK-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
-; CHECK-NEXT: divps %xmm1, %xmm0
+; CHECK-NEXT: divss {{.*}}(%rip), %xmm0
; CHECK-NEXT: retq
%bo = fdiv <4 x float> %x, <float 1.0, float 2.0, float 3.0, float 42.0>
%ext = extractelement <4 x float> %bo, i32 0
define float @ext_fdiv_v4f32_constant_op0(<4 x float> %x) {
; CHECK-LABEL: ext_fdiv_v4f32_constant_op0:
; CHECK: # %bb.0:
-; CHECK-NEXT: movaps {{.*#+}} xmm1 = <u,2.0E+0,u,u>
-; CHECK-NEXT: divps %xmm0, %xmm1
-; CHECK-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1,2,3]
+; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,2,3]
+; CHECK-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; CHECK-NEXT: divss %xmm0, %xmm1
; CHECK-NEXT: movaps %xmm1, %xmm0
; CHECK-NEXT: retq
%bo = fdiv <4 x float> <float 1.0, float 2.0, float 3.0, float 42.0>, %x
;
; X86-SSE42-LABEL: test_reduce_v8i16:
; X86-SSE42: ## %bb.0:
-; X86-SSE42-NEXT: movdqa {{.*#+}} xmm1 = [32767,32767,32767,32767,32767,32767,32767,32767]
-; X86-SSE42-NEXT: pxor %xmm1, %xmm0
+; X86-SSE42-NEXT: pxor LCPI2_0, %xmm0
; X86-SSE42-NEXT: phminposuw %xmm0, %xmm0
-; X86-SSE42-NEXT: pxor %xmm1, %xmm0
; X86-SSE42-NEXT: movd %xmm0, %eax
+; X86-SSE42-NEXT: xorl $32767, %eax ## imm = 0x7FFF
; X86-SSE42-NEXT: ## kill: def $ax killed $ax killed $eax
; X86-SSE42-NEXT: retl
;
; X86-AVX-LABEL: test_reduce_v8i16:
; X86-AVX: ## %bb.0:
-; X86-AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [32767,32767,32767,32767,32767,32767,32767,32767]
-; X86-AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; X86-AVX-NEXT: vpxor LCPI2_0, %xmm0, %xmm0
; X86-AVX-NEXT: vphminposuw %xmm0, %xmm0
-; X86-AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X86-AVX-NEXT: vmovd %xmm0, %eax
+; X86-AVX-NEXT: xorl $32767, %eax ## imm = 0x7FFF
; X86-AVX-NEXT: ## kill: def $ax killed $ax killed $eax
; X86-AVX-NEXT: retl
;
;
; X64-SSE42-LABEL: test_reduce_v8i16:
; X64-SSE42: ## %bb.0:
-; X64-SSE42-NEXT: movdqa {{.*#+}} xmm1 = [32767,32767,32767,32767,32767,32767,32767,32767]
-; X64-SSE42-NEXT: pxor %xmm1, %xmm0
+; X64-SSE42-NEXT: pxor {{.*}}(%rip), %xmm0
; X64-SSE42-NEXT: phminposuw %xmm0, %xmm0
-; X64-SSE42-NEXT: pxor %xmm1, %xmm0
; X64-SSE42-NEXT: movd %xmm0, %eax
+; X64-SSE42-NEXT: xorl $32767, %eax ## imm = 0x7FFF
; X64-SSE42-NEXT: ## kill: def $ax killed $ax killed $eax
; X64-SSE42-NEXT: retq
;
; X64-AVX-LABEL: test_reduce_v8i16:
; X64-AVX: ## %bb.0:
-; X64-AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [32767,32767,32767,32767,32767,32767,32767,32767]
-; X64-AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; X64-AVX-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
; X64-AVX-NEXT: vphminposuw %xmm0, %xmm0
-; X64-AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X64-AVX-NEXT: vmovd %xmm0, %eax
+; X64-AVX-NEXT: xorl $32767, %eax ## imm = 0x7FFF
; X64-AVX-NEXT: ## kill: def $ax killed $ax killed $eax
; X64-AVX-NEXT: retq
%1 = shufflevector <8 x i16> %a0, <8 x i16> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
;
; X86-SSE42-LABEL: test_reduce_v16i8:
; X86-SSE42: ## %bb.0:
-; X86-SSE42-NEXT: movdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
-; X86-SSE42-NEXT: pxor %xmm1, %xmm0
-; X86-SSE42-NEXT: movdqa %xmm0, %xmm2
-; X86-SSE42-NEXT: psrlw $8, %xmm2
-; X86-SSE42-NEXT: pminub %xmm0, %xmm2
-; X86-SSE42-NEXT: phminposuw %xmm2, %xmm0
-; X86-SSE42-NEXT: pxor %xmm1, %xmm0
+; X86-SSE42-NEXT: pxor LCPI3_0, %xmm0
+; X86-SSE42-NEXT: movdqa %xmm0, %xmm1
+; X86-SSE42-NEXT: psrlw $8, %xmm1
+; X86-SSE42-NEXT: pminub %xmm0, %xmm1
+; X86-SSE42-NEXT: phminposuw %xmm1, %xmm0
; X86-SSE42-NEXT: pextrb $0, %xmm0, %eax
+; X86-SSE42-NEXT: xorb $127, %al
; X86-SSE42-NEXT: ## kill: def $al killed $al killed $eax
; X86-SSE42-NEXT: retl
;
; X86-AVX-LABEL: test_reduce_v16i8:
; X86-AVX: ## %bb.0:
-; X86-AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
-; X86-AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
-; X86-AVX-NEXT: vpsrlw $8, %xmm0, %xmm2
-; X86-AVX-NEXT: vpminub %xmm2, %xmm0, %xmm0
+; X86-AVX-NEXT: vpxor LCPI3_0, %xmm0, %xmm0
+; X86-AVX-NEXT: vpsrlw $8, %xmm0, %xmm1
+; X86-AVX-NEXT: vpminub %xmm1, %xmm0, %xmm0
; X86-AVX-NEXT: vphminposuw %xmm0, %xmm0
-; X86-AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X86-AVX-NEXT: vpextrb $0, %xmm0, %eax
+; X86-AVX-NEXT: xorb $127, %al
; X86-AVX-NEXT: ## kill: def $al killed $al killed $eax
; X86-AVX-NEXT: retl
;
;
; X64-SSE42-LABEL: test_reduce_v16i8:
; X64-SSE42: ## %bb.0:
-; X64-SSE42-NEXT: movdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
-; X64-SSE42-NEXT: pxor %xmm1, %xmm0
-; X64-SSE42-NEXT: movdqa %xmm0, %xmm2
-; X64-SSE42-NEXT: psrlw $8, %xmm2
-; X64-SSE42-NEXT: pminub %xmm0, %xmm2
-; X64-SSE42-NEXT: phminposuw %xmm2, %xmm0
-; X64-SSE42-NEXT: pxor %xmm1, %xmm0
+; X64-SSE42-NEXT: pxor {{.*}}(%rip), %xmm0
+; X64-SSE42-NEXT: movdqa %xmm0, %xmm1
+; X64-SSE42-NEXT: psrlw $8, %xmm1
+; X64-SSE42-NEXT: pminub %xmm0, %xmm1
+; X64-SSE42-NEXT: phminposuw %xmm1, %xmm0
; X64-SSE42-NEXT: pextrb $0, %xmm0, %eax
+; X64-SSE42-NEXT: xorb $127, %al
; X64-SSE42-NEXT: ## kill: def $al killed $al killed $eax
; X64-SSE42-NEXT: retq
;
; X64-AVX-LABEL: test_reduce_v16i8:
; X64-AVX: ## %bb.0:
-; X64-AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
-; X64-AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
-; X64-AVX-NEXT: vpsrlw $8, %xmm0, %xmm2
-; X64-AVX-NEXT: vpminub %xmm2, %xmm0, %xmm0
+; X64-AVX-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
+; X64-AVX-NEXT: vpsrlw $8, %xmm0, %xmm1
+; X64-AVX-NEXT: vpminub %xmm1, %xmm0, %xmm0
; X64-AVX-NEXT: vphminposuw %xmm0, %xmm0
-; X64-AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X64-AVX-NEXT: vpextrb $0, %xmm0, %eax
+; X64-AVX-NEXT: xorb $127, %al
; X64-AVX-NEXT: ## kill: def $al killed $al killed $eax
; X64-AVX-NEXT: retq
%1 = shufflevector <16 x i8> %a0, <16 x i8> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
; X86-SSE42-LABEL: test_reduce_v16i16:
; X86-SSE42: ## %bb.0:
; X86-SSE42-NEXT: pmaxsw %xmm1, %xmm0
-; X86-SSE42-NEXT: movdqa {{.*#+}} xmm1 = [32767,32767,32767,32767,32767,32767,32767,32767]
-; X86-SSE42-NEXT: pxor %xmm1, %xmm0
+; X86-SSE42-NEXT: pxor LCPI6_0, %xmm0
; X86-SSE42-NEXT: phminposuw %xmm0, %xmm0
-; X86-SSE42-NEXT: pxor %xmm1, %xmm0
; X86-SSE42-NEXT: movd %xmm0, %eax
+; X86-SSE42-NEXT: xorl $32767, %eax ## imm = 0x7FFF
; X86-SSE42-NEXT: ## kill: def $ax killed $ax killed $eax
; X86-SSE42-NEXT: retl
;
; X86-AVX1: ## %bb.0:
; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
; X86-AVX1-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0
-; X86-AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [32767,32767,32767,32767,32767,32767,32767,32767]
-; X86-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; X86-AVX1-NEXT: vpxor LCPI6_0, %xmm0, %xmm0
; X86-AVX1-NEXT: vphminposuw %xmm0, %xmm0
-; X86-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X86-AVX1-NEXT: vmovd %xmm0, %eax
+; X86-AVX1-NEXT: xorl $32767, %eax ## imm = 0x7FFF
; X86-AVX1-NEXT: ## kill: def $ax killed $ax killed $eax
; X86-AVX1-NEXT: vzeroupper
; X86-AVX1-NEXT: retl
; X86-AVX2: ## %bb.0:
; X86-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
; X86-AVX2-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0
-; X86-AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [32767,32767,32767,32767,32767,32767,32767,32767]
-; X86-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; X86-AVX2-NEXT: vpxor LCPI6_0, %xmm0, %xmm0
; X86-AVX2-NEXT: vphminposuw %xmm0, %xmm0
-; X86-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X86-AVX2-NEXT: vmovd %xmm0, %eax
+; X86-AVX2-NEXT: xorl $32767, %eax ## imm = 0x7FFF
; X86-AVX2-NEXT: ## kill: def $ax killed $ax killed $eax
; X86-AVX2-NEXT: vzeroupper
; X86-AVX2-NEXT: retl
; X64-SSE42-LABEL: test_reduce_v16i16:
; X64-SSE42: ## %bb.0:
; X64-SSE42-NEXT: pmaxsw %xmm1, %xmm0
-; X64-SSE42-NEXT: movdqa {{.*#+}} xmm1 = [32767,32767,32767,32767,32767,32767,32767,32767]
-; X64-SSE42-NEXT: pxor %xmm1, %xmm0
+; X64-SSE42-NEXT: pxor {{.*}}(%rip), %xmm0
; X64-SSE42-NEXT: phminposuw %xmm0, %xmm0
-; X64-SSE42-NEXT: pxor %xmm1, %xmm0
; X64-SSE42-NEXT: movd %xmm0, %eax
+; X64-SSE42-NEXT: xorl $32767, %eax ## imm = 0x7FFF
; X64-SSE42-NEXT: ## kill: def $ax killed $ax killed $eax
; X64-SSE42-NEXT: retq
;
; X64-AVX1: ## %bb.0:
; X64-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
; X64-AVX1-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0
-; X64-AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [32767,32767,32767,32767,32767,32767,32767,32767]
-; X64-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; X64-AVX1-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
; X64-AVX1-NEXT: vphminposuw %xmm0, %xmm0
-; X64-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X64-AVX1-NEXT: vmovd %xmm0, %eax
+; X64-AVX1-NEXT: xorl $32767, %eax ## imm = 0x7FFF
; X64-AVX1-NEXT: ## kill: def $ax killed $ax killed $eax
; X64-AVX1-NEXT: vzeroupper
; X64-AVX1-NEXT: retq
; X64-AVX2: ## %bb.0:
; X64-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
; X64-AVX2-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0
-; X64-AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [32767,32767,32767,32767,32767,32767,32767,32767]
-; X64-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; X64-AVX2-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
; X64-AVX2-NEXT: vphminposuw %xmm0, %xmm0
-; X64-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X64-AVX2-NEXT: vmovd %xmm0, %eax
+; X64-AVX2-NEXT: xorl $32767, %eax ## imm = 0x7FFF
; X64-AVX2-NEXT: ## kill: def $ax killed $ax killed $eax
; X64-AVX2-NEXT: vzeroupper
; X64-AVX2-NEXT: retq
; X64-AVX512: ## %bb.0:
; X64-AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
; X64-AVX512-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0
-; X64-AVX512-NEXT: vmovdqa {{.*#+}} xmm1 = [32767,32767,32767,32767,32767,32767,32767,32767]
-; X64-AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; X64-AVX512-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
; X64-AVX512-NEXT: vphminposuw %xmm0, %xmm0
-; X64-AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X64-AVX512-NEXT: vmovd %xmm0, %eax
+; X64-AVX512-NEXT: xorl $32767, %eax ## imm = 0x7FFF
; X64-AVX512-NEXT: ## kill: def $ax killed $ax killed $eax
; X64-AVX512-NEXT: vzeroupper
; X64-AVX512-NEXT: retq
; X86-SSE42-LABEL: test_reduce_v32i8:
; X86-SSE42: ## %bb.0:
; X86-SSE42-NEXT: pmaxsb %xmm1, %xmm0
-; X86-SSE42-NEXT: movdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
-; X86-SSE42-NEXT: pxor %xmm1, %xmm0
-; X86-SSE42-NEXT: movdqa %xmm0, %xmm2
-; X86-SSE42-NEXT: psrlw $8, %xmm2
-; X86-SSE42-NEXT: pminub %xmm0, %xmm2
-; X86-SSE42-NEXT: phminposuw %xmm2, %xmm0
-; X86-SSE42-NEXT: pxor %xmm1, %xmm0
+; X86-SSE42-NEXT: pxor LCPI7_0, %xmm0
+; X86-SSE42-NEXT: movdqa %xmm0, %xmm1
+; X86-SSE42-NEXT: psrlw $8, %xmm1
+; X86-SSE42-NEXT: pminub %xmm0, %xmm1
+; X86-SSE42-NEXT: phminposuw %xmm1, %xmm0
; X86-SSE42-NEXT: pextrb $0, %xmm0, %eax
+; X86-SSE42-NEXT: xorb $127, %al
; X86-SSE42-NEXT: ## kill: def $al killed $al killed $eax
; X86-SSE42-NEXT: retl
;
; X86-AVX1: ## %bb.0:
; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
; X86-AVX1-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0
-; X86-AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
-; X86-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
-; X86-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm2
-; X86-AVX1-NEXT: vpminub %xmm2, %xmm0, %xmm0
+; X86-AVX1-NEXT: vpxor LCPI7_0, %xmm0, %xmm0
+; X86-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1
+; X86-AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0
; X86-AVX1-NEXT: vphminposuw %xmm0, %xmm0
-; X86-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X86-AVX1-NEXT: vpextrb $0, %xmm0, %eax
+; X86-AVX1-NEXT: xorb $127, %al
; X86-AVX1-NEXT: ## kill: def $al killed $al killed $eax
; X86-AVX1-NEXT: vzeroupper
; X86-AVX1-NEXT: retl
; X86-AVX2: ## %bb.0:
; X86-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
; X86-AVX2-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0
-; X86-AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
-; X86-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
-; X86-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm2
-; X86-AVX2-NEXT: vpminub %xmm2, %xmm0, %xmm0
+; X86-AVX2-NEXT: vpxor LCPI7_0, %xmm0, %xmm0
+; X86-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1
+; X86-AVX2-NEXT: vpminub %xmm1, %xmm0, %xmm0
; X86-AVX2-NEXT: vphminposuw %xmm0, %xmm0
-; X86-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X86-AVX2-NEXT: vpextrb $0, %xmm0, %eax
+; X86-AVX2-NEXT: xorb $127, %al
; X86-AVX2-NEXT: ## kill: def $al killed $al killed $eax
; X86-AVX2-NEXT: vzeroupper
; X86-AVX2-NEXT: retl
; X64-SSE42-LABEL: test_reduce_v32i8:
; X64-SSE42: ## %bb.0:
; X64-SSE42-NEXT: pmaxsb %xmm1, %xmm0
-; X64-SSE42-NEXT: movdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
-; X64-SSE42-NEXT: pxor %xmm1, %xmm0
-; X64-SSE42-NEXT: movdqa %xmm0, %xmm2
-; X64-SSE42-NEXT: psrlw $8, %xmm2
-; X64-SSE42-NEXT: pminub %xmm0, %xmm2
-; X64-SSE42-NEXT: phminposuw %xmm2, %xmm0
-; X64-SSE42-NEXT: pxor %xmm1, %xmm0
+; X64-SSE42-NEXT: pxor {{.*}}(%rip), %xmm0
+; X64-SSE42-NEXT: movdqa %xmm0, %xmm1
+; X64-SSE42-NEXT: psrlw $8, %xmm1
+; X64-SSE42-NEXT: pminub %xmm0, %xmm1
+; X64-SSE42-NEXT: phminposuw %xmm1, %xmm0
; X64-SSE42-NEXT: pextrb $0, %xmm0, %eax
+; X64-SSE42-NEXT: xorb $127, %al
; X64-SSE42-NEXT: ## kill: def $al killed $al killed $eax
; X64-SSE42-NEXT: retq
;
; X64-AVX1: ## %bb.0:
; X64-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
; X64-AVX1-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0
-; X64-AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
-; X64-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
-; X64-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm2
-; X64-AVX1-NEXT: vpminub %xmm2, %xmm0, %xmm0
+; X64-AVX1-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
+; X64-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1
+; X64-AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0
; X64-AVX1-NEXT: vphminposuw %xmm0, %xmm0
-; X64-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X64-AVX1-NEXT: vpextrb $0, %xmm0, %eax
+; X64-AVX1-NEXT: xorb $127, %al
; X64-AVX1-NEXT: ## kill: def $al killed $al killed $eax
; X64-AVX1-NEXT: vzeroupper
; X64-AVX1-NEXT: retq
; X64-AVX2: ## %bb.0:
; X64-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
; X64-AVX2-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0
-; X64-AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
-; X64-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
-; X64-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm2
-; X64-AVX2-NEXT: vpminub %xmm2, %xmm0, %xmm0
+; X64-AVX2-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
+; X64-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1
+; X64-AVX2-NEXT: vpminub %xmm1, %xmm0, %xmm0
; X64-AVX2-NEXT: vphminposuw %xmm0, %xmm0
-; X64-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X64-AVX2-NEXT: vpextrb $0, %xmm0, %eax
+; X64-AVX2-NEXT: xorb $127, %al
; X64-AVX2-NEXT: ## kill: def $al killed $al killed $eax
; X64-AVX2-NEXT: vzeroupper
; X64-AVX2-NEXT: retq
; X64-AVX512: ## %bb.0:
; X64-AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
; X64-AVX512-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0
-; X64-AVX512-NEXT: vmovdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
-; X64-AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
-; X64-AVX512-NEXT: vpsrlw $8, %xmm0, %xmm2
-; X64-AVX512-NEXT: vpminub %xmm2, %xmm0, %xmm0
+; X64-AVX512-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
+; X64-AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1
+; X64-AVX512-NEXT: vpminub %xmm1, %xmm0, %xmm0
; X64-AVX512-NEXT: vphminposuw %xmm0, %xmm0
-; X64-AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X64-AVX512-NEXT: vpextrb $0, %xmm0, %eax
+; X64-AVX512-NEXT: xorb $127, %al
; X64-AVX512-NEXT: ## kill: def $al killed $al killed $eax
; X64-AVX512-NEXT: vzeroupper
; X64-AVX512-NEXT: retq
; X86-SSE42-NEXT: pmaxsw %xmm3, %xmm1
; X86-SSE42-NEXT: pmaxsw %xmm2, %xmm0
; X86-SSE42-NEXT: pmaxsw %xmm1, %xmm0
-; X86-SSE42-NEXT: movdqa {{.*#+}} xmm1 = [32767,32767,32767,32767,32767,32767,32767,32767]
-; X86-SSE42-NEXT: pxor %xmm1, %xmm0
+; X86-SSE42-NEXT: pxor LCPI10_0, %xmm0
; X86-SSE42-NEXT: phminposuw %xmm0, %xmm0
-; X86-SSE42-NEXT: pxor %xmm1, %xmm0
; X86-SSE42-NEXT: movd %xmm0, %eax
+; X86-SSE42-NEXT: xorl $32767, %eax ## imm = 0x7FFF
; X86-SSE42-NEXT: ## kill: def $ax killed $ax killed $eax
; X86-SSE42-NEXT: retl
;
; X86-AVX1-NEXT: vpmaxsw %xmm2, %xmm3, %xmm2
; X86-AVX1-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0
; X86-AVX1-NEXT: vpmaxsw %xmm2, %xmm0, %xmm0
-; X86-AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [32767,32767,32767,32767,32767,32767,32767,32767]
-; X86-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; X86-AVX1-NEXT: vpxor LCPI10_0, %xmm0, %xmm0
; X86-AVX1-NEXT: vphminposuw %xmm0, %xmm0
-; X86-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X86-AVX1-NEXT: vmovd %xmm0, %eax
+; X86-AVX1-NEXT: xorl $32767, %eax ## imm = 0x7FFF
; X86-AVX1-NEXT: ## kill: def $ax killed $ax killed $eax
; X86-AVX1-NEXT: vzeroupper
; X86-AVX1-NEXT: retl
; X86-AVX2-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0
; X86-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
; X86-AVX2-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0
-; X86-AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [32767,32767,32767,32767,32767,32767,32767,32767]
-; X86-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; X86-AVX2-NEXT: vpxor LCPI10_0, %xmm0, %xmm0
; X86-AVX2-NEXT: vphminposuw %xmm0, %xmm0
-; X86-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X86-AVX2-NEXT: vmovd %xmm0, %eax
+; X86-AVX2-NEXT: xorl $32767, %eax ## imm = 0x7FFF
; X86-AVX2-NEXT: ## kill: def $ax killed $ax killed $eax
; X86-AVX2-NEXT: vzeroupper
; X86-AVX2-NEXT: retl
; X64-SSE42-NEXT: pmaxsw %xmm3, %xmm1
; X64-SSE42-NEXT: pmaxsw %xmm2, %xmm0
; X64-SSE42-NEXT: pmaxsw %xmm1, %xmm0
-; X64-SSE42-NEXT: movdqa {{.*#+}} xmm1 = [32767,32767,32767,32767,32767,32767,32767,32767]
-; X64-SSE42-NEXT: pxor %xmm1, %xmm0
+; X64-SSE42-NEXT: pxor {{.*}}(%rip), %xmm0
; X64-SSE42-NEXT: phminposuw %xmm0, %xmm0
-; X64-SSE42-NEXT: pxor %xmm1, %xmm0
; X64-SSE42-NEXT: movd %xmm0, %eax
+; X64-SSE42-NEXT: xorl $32767, %eax ## imm = 0x7FFF
; X64-SSE42-NEXT: ## kill: def $ax killed $ax killed $eax
; X64-SSE42-NEXT: retq
;
; X64-AVX1-NEXT: vpmaxsw %xmm2, %xmm3, %xmm2
; X64-AVX1-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0
; X64-AVX1-NEXT: vpmaxsw %xmm2, %xmm0, %xmm0
-; X64-AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [32767,32767,32767,32767,32767,32767,32767,32767]
-; X64-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; X64-AVX1-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
; X64-AVX1-NEXT: vphminposuw %xmm0, %xmm0
-; X64-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X64-AVX1-NEXT: vmovd %xmm0, %eax
+; X64-AVX1-NEXT: xorl $32767, %eax ## imm = 0x7FFF
; X64-AVX1-NEXT: ## kill: def $ax killed $ax killed $eax
; X64-AVX1-NEXT: vzeroupper
; X64-AVX1-NEXT: retq
; X64-AVX2-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0
; X64-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
; X64-AVX2-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0
-; X64-AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [32767,32767,32767,32767,32767,32767,32767,32767]
-; X64-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; X64-AVX2-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
; X64-AVX2-NEXT: vphminposuw %xmm0, %xmm0
-; X64-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X64-AVX2-NEXT: vmovd %xmm0, %eax
+; X64-AVX2-NEXT: xorl $32767, %eax ## imm = 0x7FFF
; X64-AVX2-NEXT: ## kill: def $ax killed $ax killed $eax
; X64-AVX2-NEXT: vzeroupper
; X64-AVX2-NEXT: retq
; X64-AVX512-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0
; X64-AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
; X64-AVX512-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0
-; X64-AVX512-NEXT: vmovdqa {{.*#+}} xmm1 = [32767,32767,32767,32767,32767,32767,32767,32767]
-; X64-AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; X64-AVX512-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
; X64-AVX512-NEXT: vphminposuw %xmm0, %xmm0
-; X64-AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X64-AVX512-NEXT: vmovd %xmm0, %eax
+; X64-AVX512-NEXT: xorl $32767, %eax ## imm = 0x7FFF
; X64-AVX512-NEXT: ## kill: def $ax killed $ax killed $eax
; X64-AVX512-NEXT: vzeroupper
; X64-AVX512-NEXT: retq
; X86-SSE42-NEXT: pmaxsb %xmm3, %xmm1
; X86-SSE42-NEXT: pmaxsb %xmm2, %xmm0
; X86-SSE42-NEXT: pmaxsb %xmm1, %xmm0
-; X86-SSE42-NEXT: movdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
-; X86-SSE42-NEXT: pxor %xmm1, %xmm0
-; X86-SSE42-NEXT: movdqa %xmm0, %xmm2
-; X86-SSE42-NEXT: psrlw $8, %xmm2
-; X86-SSE42-NEXT: pminub %xmm0, %xmm2
-; X86-SSE42-NEXT: phminposuw %xmm2, %xmm0
-; X86-SSE42-NEXT: pxor %xmm1, %xmm0
+; X86-SSE42-NEXT: pxor LCPI11_0, %xmm0
+; X86-SSE42-NEXT: movdqa %xmm0, %xmm1
+; X86-SSE42-NEXT: psrlw $8, %xmm1
+; X86-SSE42-NEXT: pminub %xmm0, %xmm1
+; X86-SSE42-NEXT: phminposuw %xmm1, %xmm0
; X86-SSE42-NEXT: pextrb $0, %xmm0, %eax
+; X86-SSE42-NEXT: xorb $127, %al
; X86-SSE42-NEXT: ## kill: def $al killed $al killed $eax
; X86-SSE42-NEXT: retl
;
; X86-AVX1-NEXT: vpmaxsb %xmm2, %xmm3, %xmm2
; X86-AVX1-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0
; X86-AVX1-NEXT: vpmaxsb %xmm2, %xmm0, %xmm0
-; X86-AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
-; X86-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
-; X86-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm2
-; X86-AVX1-NEXT: vpminub %xmm2, %xmm0, %xmm0
+; X86-AVX1-NEXT: vpxor LCPI11_0, %xmm0, %xmm0
+; X86-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1
+; X86-AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0
; X86-AVX1-NEXT: vphminposuw %xmm0, %xmm0
-; X86-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X86-AVX1-NEXT: vpextrb $0, %xmm0, %eax
+; X86-AVX1-NEXT: xorb $127, %al
; X86-AVX1-NEXT: ## kill: def $al killed $al killed $eax
; X86-AVX1-NEXT: vzeroupper
; X86-AVX1-NEXT: retl
; X86-AVX2-NEXT: vpmaxsb %ymm1, %ymm0, %ymm0
; X86-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
; X86-AVX2-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0
-; X86-AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
-; X86-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
-; X86-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm2
-; X86-AVX2-NEXT: vpminub %xmm2, %xmm0, %xmm0
+; X86-AVX2-NEXT: vpxor LCPI11_0, %xmm0, %xmm0
+; X86-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1
+; X86-AVX2-NEXT: vpminub %xmm1, %xmm0, %xmm0
; X86-AVX2-NEXT: vphminposuw %xmm0, %xmm0
-; X86-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X86-AVX2-NEXT: vpextrb $0, %xmm0, %eax
+; X86-AVX2-NEXT: xorb $127, %al
; X86-AVX2-NEXT: ## kill: def $al killed $al killed $eax
; X86-AVX2-NEXT: vzeroupper
; X86-AVX2-NEXT: retl
; X64-SSE42-NEXT: pmaxsb %xmm3, %xmm1
; X64-SSE42-NEXT: pmaxsb %xmm2, %xmm0
; X64-SSE42-NEXT: pmaxsb %xmm1, %xmm0
-; X64-SSE42-NEXT: movdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
-; X64-SSE42-NEXT: pxor %xmm1, %xmm0
-; X64-SSE42-NEXT: movdqa %xmm0, %xmm2
-; X64-SSE42-NEXT: psrlw $8, %xmm2
-; X64-SSE42-NEXT: pminub %xmm0, %xmm2
-; X64-SSE42-NEXT: phminposuw %xmm2, %xmm0
-; X64-SSE42-NEXT: pxor %xmm1, %xmm0
+; X64-SSE42-NEXT: pxor {{.*}}(%rip), %xmm0
+; X64-SSE42-NEXT: movdqa %xmm0, %xmm1
+; X64-SSE42-NEXT: psrlw $8, %xmm1
+; X64-SSE42-NEXT: pminub %xmm0, %xmm1
+; X64-SSE42-NEXT: phminposuw %xmm1, %xmm0
; X64-SSE42-NEXT: pextrb $0, %xmm0, %eax
+; X64-SSE42-NEXT: xorb $127, %al
; X64-SSE42-NEXT: ## kill: def $al killed $al killed $eax
; X64-SSE42-NEXT: retq
;
; X64-AVX1-NEXT: vpmaxsb %xmm2, %xmm3, %xmm2
; X64-AVX1-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0
; X64-AVX1-NEXT: vpmaxsb %xmm2, %xmm0, %xmm0
-; X64-AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
-; X64-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
-; X64-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm2
-; X64-AVX1-NEXT: vpminub %xmm2, %xmm0, %xmm0
+; X64-AVX1-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
+; X64-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1
+; X64-AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0
; X64-AVX1-NEXT: vphminposuw %xmm0, %xmm0
-; X64-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X64-AVX1-NEXT: vpextrb $0, %xmm0, %eax
+; X64-AVX1-NEXT: xorb $127, %al
; X64-AVX1-NEXT: ## kill: def $al killed $al killed $eax
; X64-AVX1-NEXT: vzeroupper
; X64-AVX1-NEXT: retq
; X64-AVX2-NEXT: vpmaxsb %ymm1, %ymm0, %ymm0
; X64-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
; X64-AVX2-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0
-; X64-AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
-; X64-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
-; X64-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm2
-; X64-AVX2-NEXT: vpminub %xmm2, %xmm0, %xmm0
+; X64-AVX2-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
+; X64-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1
+; X64-AVX2-NEXT: vpminub %xmm1, %xmm0, %xmm0
; X64-AVX2-NEXT: vphminposuw %xmm0, %xmm0
-; X64-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X64-AVX2-NEXT: vpextrb $0, %xmm0, %eax
+; X64-AVX2-NEXT: xorb $127, %al
; X64-AVX2-NEXT: ## kill: def $al killed $al killed $eax
; X64-AVX2-NEXT: vzeroupper
; X64-AVX2-NEXT: retq
; X64-AVX512-NEXT: vpmaxsb %ymm1, %ymm0, %ymm0
; X64-AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
; X64-AVX512-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0
-; X64-AVX512-NEXT: vmovdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
-; X64-AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
-; X64-AVX512-NEXT: vpsrlw $8, %xmm0, %xmm2
-; X64-AVX512-NEXT: vpminub %xmm2, %xmm0, %xmm0
+; X64-AVX512-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
+; X64-AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1
+; X64-AVX512-NEXT: vpminub %xmm1, %xmm0, %xmm0
; X64-AVX512-NEXT: vphminposuw %xmm0, %xmm0
-; X64-AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X64-AVX512-NEXT: vpextrb $0, %xmm0, %eax
+; X64-AVX512-NEXT: xorb $127, %al
; X64-AVX512-NEXT: ## kill: def $al killed $al killed $eax
; X64-AVX512-NEXT: vzeroupper
; X64-AVX512-NEXT: retq
;
; X86-SSE42-LABEL: test_reduce_v8i16:
; X86-SSE42: ## %bb.0:
-; X86-SSE42-NEXT: movdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768]
-; X86-SSE42-NEXT: pxor %xmm1, %xmm0
+; X86-SSE42-NEXT: pxor LCPI2_0, %xmm0
; X86-SSE42-NEXT: phminposuw %xmm0, %xmm0
-; X86-SSE42-NEXT: pxor %xmm1, %xmm0
; X86-SSE42-NEXT: movd %xmm0, %eax
+; X86-SSE42-NEXT: xorl $32768, %eax ## imm = 0x8000
; X86-SSE42-NEXT: ## kill: def $ax killed $ax killed $eax
; X86-SSE42-NEXT: retl
;
; X86-AVX-LABEL: test_reduce_v8i16:
; X86-AVX: ## %bb.0:
-; X86-AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768]
-; X86-AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; X86-AVX-NEXT: vpxor LCPI2_0, %xmm0, %xmm0
; X86-AVX-NEXT: vphminposuw %xmm0, %xmm0
-; X86-AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X86-AVX-NEXT: vmovd %xmm0, %eax
+; X86-AVX-NEXT: xorl $32768, %eax ## imm = 0x8000
; X86-AVX-NEXT: ## kill: def $ax killed $ax killed $eax
; X86-AVX-NEXT: retl
;
;
; X64-SSE42-LABEL: test_reduce_v8i16:
; X64-SSE42: ## %bb.0:
-; X64-SSE42-NEXT: movdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768]
-; X64-SSE42-NEXT: pxor %xmm1, %xmm0
+; X64-SSE42-NEXT: pxor {{.*}}(%rip), %xmm0
; X64-SSE42-NEXT: phminposuw %xmm0, %xmm0
-; X64-SSE42-NEXT: pxor %xmm1, %xmm0
; X64-SSE42-NEXT: movd %xmm0, %eax
+; X64-SSE42-NEXT: xorl $32768, %eax ## imm = 0x8000
; X64-SSE42-NEXT: ## kill: def $ax killed $ax killed $eax
; X64-SSE42-NEXT: retq
;
; X64-AVX-LABEL: test_reduce_v8i16:
; X64-AVX: ## %bb.0:
-; X64-AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768]
-; X64-AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; X64-AVX-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
; X64-AVX-NEXT: vphminposuw %xmm0, %xmm0
-; X64-AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X64-AVX-NEXT: vmovd %xmm0, %eax
+; X64-AVX-NEXT: xorl $32768, %eax ## imm = 0x8000
; X64-AVX-NEXT: ## kill: def $ax killed $ax killed $eax
; X64-AVX-NEXT: retq
%1 = shufflevector <8 x i16> %a0, <8 x i16> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
;
; X86-SSE42-LABEL: test_reduce_v16i8:
; X86-SSE42: ## %bb.0:
-; X86-SSE42-NEXT: movdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
-; X86-SSE42-NEXT: pxor %xmm1, %xmm0
-; X86-SSE42-NEXT: movdqa %xmm0, %xmm2
-; X86-SSE42-NEXT: psrlw $8, %xmm2
-; X86-SSE42-NEXT: pminub %xmm0, %xmm2
-; X86-SSE42-NEXT: phminposuw %xmm2, %xmm0
-; X86-SSE42-NEXT: pxor %xmm1, %xmm0
+; X86-SSE42-NEXT: pxor LCPI3_0, %xmm0
+; X86-SSE42-NEXT: movdqa %xmm0, %xmm1
+; X86-SSE42-NEXT: psrlw $8, %xmm1
+; X86-SSE42-NEXT: pminub %xmm0, %xmm1
+; X86-SSE42-NEXT: phminposuw %xmm1, %xmm0
; X86-SSE42-NEXT: pextrb $0, %xmm0, %eax
+; X86-SSE42-NEXT: xorb $-128, %al
; X86-SSE42-NEXT: ## kill: def $al killed $al killed $eax
; X86-SSE42-NEXT: retl
;
; X86-AVX-LABEL: test_reduce_v16i8:
; X86-AVX: ## %bb.0:
-; X86-AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
-; X86-AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
-; X86-AVX-NEXT: vpsrlw $8, %xmm0, %xmm2
-; X86-AVX-NEXT: vpminub %xmm2, %xmm0, %xmm0
+; X86-AVX-NEXT: vpxor LCPI3_0, %xmm0, %xmm0
+; X86-AVX-NEXT: vpsrlw $8, %xmm0, %xmm1
+; X86-AVX-NEXT: vpminub %xmm1, %xmm0, %xmm0
; X86-AVX-NEXT: vphminposuw %xmm0, %xmm0
-; X86-AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X86-AVX-NEXT: vpextrb $0, %xmm0, %eax
+; X86-AVX-NEXT: xorb $-128, %al
; X86-AVX-NEXT: ## kill: def $al killed $al killed $eax
; X86-AVX-NEXT: retl
;
;
; X64-SSE42-LABEL: test_reduce_v16i8:
; X64-SSE42: ## %bb.0:
-; X64-SSE42-NEXT: movdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
-; X64-SSE42-NEXT: pxor %xmm1, %xmm0
-; X64-SSE42-NEXT: movdqa %xmm0, %xmm2
-; X64-SSE42-NEXT: psrlw $8, %xmm2
-; X64-SSE42-NEXT: pminub %xmm0, %xmm2
-; X64-SSE42-NEXT: phminposuw %xmm2, %xmm0
-; X64-SSE42-NEXT: pxor %xmm1, %xmm0
+; X64-SSE42-NEXT: pxor {{.*}}(%rip), %xmm0
+; X64-SSE42-NEXT: movdqa %xmm0, %xmm1
+; X64-SSE42-NEXT: psrlw $8, %xmm1
+; X64-SSE42-NEXT: pminub %xmm0, %xmm1
+; X64-SSE42-NEXT: phminposuw %xmm1, %xmm0
; X64-SSE42-NEXT: pextrb $0, %xmm0, %eax
+; X64-SSE42-NEXT: xorb $-128, %al
; X64-SSE42-NEXT: ## kill: def $al killed $al killed $eax
; X64-SSE42-NEXT: retq
;
; X64-AVX-LABEL: test_reduce_v16i8:
; X64-AVX: ## %bb.0:
-; X64-AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
-; X64-AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
-; X64-AVX-NEXT: vpsrlw $8, %xmm0, %xmm2
-; X64-AVX-NEXT: vpminub %xmm2, %xmm0, %xmm0
+; X64-AVX-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
+; X64-AVX-NEXT: vpsrlw $8, %xmm0, %xmm1
+; X64-AVX-NEXT: vpminub %xmm1, %xmm0, %xmm0
; X64-AVX-NEXT: vphminposuw %xmm0, %xmm0
-; X64-AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X64-AVX-NEXT: vpextrb $0, %xmm0, %eax
+; X64-AVX-NEXT: xorb $-128, %al
; X64-AVX-NEXT: ## kill: def $al killed $al killed $eax
; X64-AVX-NEXT: retq
%1 = shufflevector <16 x i8> %a0, <16 x i8> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
; X86-SSE42-LABEL: test_reduce_v16i16:
; X86-SSE42: ## %bb.0:
; X86-SSE42-NEXT: pminsw %xmm1, %xmm0
-; X86-SSE42-NEXT: movdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768]
-; X86-SSE42-NEXT: pxor %xmm1, %xmm0
+; X86-SSE42-NEXT: pxor LCPI6_0, %xmm0
; X86-SSE42-NEXT: phminposuw %xmm0, %xmm0
-; X86-SSE42-NEXT: pxor %xmm1, %xmm0
; X86-SSE42-NEXT: movd %xmm0, %eax
+; X86-SSE42-NEXT: xorl $32768, %eax ## imm = 0x8000
; X86-SSE42-NEXT: ## kill: def $ax killed $ax killed $eax
; X86-SSE42-NEXT: retl
;
; X86-AVX1: ## %bb.0:
; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
; X86-AVX1-NEXT: vpminsw %xmm1, %xmm0, %xmm0
-; X86-AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768]
-; X86-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; X86-AVX1-NEXT: vpxor LCPI6_0, %xmm0, %xmm0
; X86-AVX1-NEXT: vphminposuw %xmm0, %xmm0
-; X86-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X86-AVX1-NEXT: vmovd %xmm0, %eax
+; X86-AVX1-NEXT: xorl $32768, %eax ## imm = 0x8000
; X86-AVX1-NEXT: ## kill: def $ax killed $ax killed $eax
; X86-AVX1-NEXT: vzeroupper
; X86-AVX1-NEXT: retl
; X86-AVX2: ## %bb.0:
; X86-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
; X86-AVX2-NEXT: vpminsw %xmm1, %xmm0, %xmm0
-; X86-AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768]
-; X86-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; X86-AVX2-NEXT: vpxor LCPI6_0, %xmm0, %xmm0
; X86-AVX2-NEXT: vphminposuw %xmm0, %xmm0
-; X86-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X86-AVX2-NEXT: vmovd %xmm0, %eax
+; X86-AVX2-NEXT: xorl $32768, %eax ## imm = 0x8000
; X86-AVX2-NEXT: ## kill: def $ax killed $ax killed $eax
; X86-AVX2-NEXT: vzeroupper
; X86-AVX2-NEXT: retl
; X64-SSE42-LABEL: test_reduce_v16i16:
; X64-SSE42: ## %bb.0:
; X64-SSE42-NEXT: pminsw %xmm1, %xmm0
-; X64-SSE42-NEXT: movdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768]
-; X64-SSE42-NEXT: pxor %xmm1, %xmm0
+; X64-SSE42-NEXT: pxor {{.*}}(%rip), %xmm0
; X64-SSE42-NEXT: phminposuw %xmm0, %xmm0
-; X64-SSE42-NEXT: pxor %xmm1, %xmm0
; X64-SSE42-NEXT: movd %xmm0, %eax
+; X64-SSE42-NEXT: xorl $32768, %eax ## imm = 0x8000
; X64-SSE42-NEXT: ## kill: def $ax killed $ax killed $eax
; X64-SSE42-NEXT: retq
;
; X64-AVX1: ## %bb.0:
; X64-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
; X64-AVX1-NEXT: vpminsw %xmm1, %xmm0, %xmm0
-; X64-AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768]
-; X64-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; X64-AVX1-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
; X64-AVX1-NEXT: vphminposuw %xmm0, %xmm0
-; X64-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X64-AVX1-NEXT: vmovd %xmm0, %eax
+; X64-AVX1-NEXT: xorl $32768, %eax ## imm = 0x8000
; X64-AVX1-NEXT: ## kill: def $ax killed $ax killed $eax
; X64-AVX1-NEXT: vzeroupper
; X64-AVX1-NEXT: retq
; X64-AVX2: ## %bb.0:
; X64-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
; X64-AVX2-NEXT: vpminsw %xmm1, %xmm0, %xmm0
-; X64-AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768]
-; X64-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; X64-AVX2-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
; X64-AVX2-NEXT: vphminposuw %xmm0, %xmm0
-; X64-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X64-AVX2-NEXT: vmovd %xmm0, %eax
+; X64-AVX2-NEXT: xorl $32768, %eax ## imm = 0x8000
; X64-AVX2-NEXT: ## kill: def $ax killed $ax killed $eax
; X64-AVX2-NEXT: vzeroupper
; X64-AVX2-NEXT: retq
; X64-AVX512: ## %bb.0:
; X64-AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
; X64-AVX512-NEXT: vpminsw %xmm1, %xmm0, %xmm0
-; X64-AVX512-NEXT: vmovdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768]
-; X64-AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; X64-AVX512-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
; X64-AVX512-NEXT: vphminposuw %xmm0, %xmm0
-; X64-AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X64-AVX512-NEXT: vmovd %xmm0, %eax
+; X64-AVX512-NEXT: xorl $32768, %eax ## imm = 0x8000
; X64-AVX512-NEXT: ## kill: def $ax killed $ax killed $eax
; X64-AVX512-NEXT: vzeroupper
; X64-AVX512-NEXT: retq
; X86-SSE42-LABEL: test_reduce_v32i8:
; X86-SSE42: ## %bb.0:
; X86-SSE42-NEXT: pminsb %xmm1, %xmm0
-; X86-SSE42-NEXT: movdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
-; X86-SSE42-NEXT: pxor %xmm1, %xmm0
-; X86-SSE42-NEXT: movdqa %xmm0, %xmm2
-; X86-SSE42-NEXT: psrlw $8, %xmm2
-; X86-SSE42-NEXT: pminub %xmm0, %xmm2
-; X86-SSE42-NEXT: phminposuw %xmm2, %xmm0
-; X86-SSE42-NEXT: pxor %xmm1, %xmm0
+; X86-SSE42-NEXT: pxor LCPI7_0, %xmm0
+; X86-SSE42-NEXT: movdqa %xmm0, %xmm1
+; X86-SSE42-NEXT: psrlw $8, %xmm1
+; X86-SSE42-NEXT: pminub %xmm0, %xmm1
+; X86-SSE42-NEXT: phminposuw %xmm1, %xmm0
; X86-SSE42-NEXT: pextrb $0, %xmm0, %eax
+; X86-SSE42-NEXT: xorb $-128, %al
; X86-SSE42-NEXT: ## kill: def $al killed $al killed $eax
; X86-SSE42-NEXT: retl
;
; X86-AVX1: ## %bb.0:
; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
; X86-AVX1-NEXT: vpminsb %xmm1, %xmm0, %xmm0
-; X86-AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
-; X86-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
-; X86-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm2
-; X86-AVX1-NEXT: vpminub %xmm2, %xmm0, %xmm0
+; X86-AVX1-NEXT: vpxor LCPI7_0, %xmm0, %xmm0
+; X86-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1
+; X86-AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0
; X86-AVX1-NEXT: vphminposuw %xmm0, %xmm0
-; X86-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X86-AVX1-NEXT: vpextrb $0, %xmm0, %eax
+; X86-AVX1-NEXT: xorb $-128, %al
; X86-AVX1-NEXT: ## kill: def $al killed $al killed $eax
; X86-AVX1-NEXT: vzeroupper
; X86-AVX1-NEXT: retl
; X86-AVX2: ## %bb.0:
; X86-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
; X86-AVX2-NEXT: vpminsb %xmm1, %xmm0, %xmm0
-; X86-AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
-; X86-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
-; X86-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm2
-; X86-AVX2-NEXT: vpminub %xmm2, %xmm0, %xmm0
+; X86-AVX2-NEXT: vpxor LCPI7_0, %xmm0, %xmm0
+; X86-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1
+; X86-AVX2-NEXT: vpminub %xmm1, %xmm0, %xmm0
; X86-AVX2-NEXT: vphminposuw %xmm0, %xmm0
-; X86-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X86-AVX2-NEXT: vpextrb $0, %xmm0, %eax
+; X86-AVX2-NEXT: xorb $-128, %al
; X86-AVX2-NEXT: ## kill: def $al killed $al killed $eax
; X86-AVX2-NEXT: vzeroupper
; X86-AVX2-NEXT: retl
; X64-SSE42-LABEL: test_reduce_v32i8:
; X64-SSE42: ## %bb.0:
; X64-SSE42-NEXT: pminsb %xmm1, %xmm0
-; X64-SSE42-NEXT: movdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
-; X64-SSE42-NEXT: pxor %xmm1, %xmm0
-; X64-SSE42-NEXT: movdqa %xmm0, %xmm2
-; X64-SSE42-NEXT: psrlw $8, %xmm2
-; X64-SSE42-NEXT: pminub %xmm0, %xmm2
-; X64-SSE42-NEXT: phminposuw %xmm2, %xmm0
-; X64-SSE42-NEXT: pxor %xmm1, %xmm0
+; X64-SSE42-NEXT: pxor {{.*}}(%rip), %xmm0
+; X64-SSE42-NEXT: movdqa %xmm0, %xmm1
+; X64-SSE42-NEXT: psrlw $8, %xmm1
+; X64-SSE42-NEXT: pminub %xmm0, %xmm1
+; X64-SSE42-NEXT: phminposuw %xmm1, %xmm0
; X64-SSE42-NEXT: pextrb $0, %xmm0, %eax
+; X64-SSE42-NEXT: xorb $-128, %al
; X64-SSE42-NEXT: ## kill: def $al killed $al killed $eax
; X64-SSE42-NEXT: retq
;
; X64-AVX1: ## %bb.0:
; X64-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
; X64-AVX1-NEXT: vpminsb %xmm1, %xmm0, %xmm0
-; X64-AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
-; X64-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
-; X64-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm2
-; X64-AVX1-NEXT: vpminub %xmm2, %xmm0, %xmm0
+; X64-AVX1-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
+; X64-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1
+; X64-AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0
; X64-AVX1-NEXT: vphminposuw %xmm0, %xmm0
-; X64-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X64-AVX1-NEXT: vpextrb $0, %xmm0, %eax
+; X64-AVX1-NEXT: xorb $-128, %al
; X64-AVX1-NEXT: ## kill: def $al killed $al killed $eax
; X64-AVX1-NEXT: vzeroupper
; X64-AVX1-NEXT: retq
; X64-AVX2: ## %bb.0:
; X64-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
; X64-AVX2-NEXT: vpminsb %xmm1, %xmm0, %xmm0
-; X64-AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
-; X64-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
-; X64-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm2
-; X64-AVX2-NEXT: vpminub %xmm2, %xmm0, %xmm0
+; X64-AVX2-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
+; X64-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1
+; X64-AVX2-NEXT: vpminub %xmm1, %xmm0, %xmm0
; X64-AVX2-NEXT: vphminposuw %xmm0, %xmm0
-; X64-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X64-AVX2-NEXT: vpextrb $0, %xmm0, %eax
+; X64-AVX2-NEXT: xorb $-128, %al
; X64-AVX2-NEXT: ## kill: def $al killed $al killed $eax
; X64-AVX2-NEXT: vzeroupper
; X64-AVX2-NEXT: retq
; X64-AVX512: ## %bb.0:
; X64-AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
; X64-AVX512-NEXT: vpminsb %xmm1, %xmm0, %xmm0
-; X64-AVX512-NEXT: vmovdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
-; X64-AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
-; X64-AVX512-NEXT: vpsrlw $8, %xmm0, %xmm2
-; X64-AVX512-NEXT: vpminub %xmm2, %xmm0, %xmm0
+; X64-AVX512-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
+; X64-AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1
+; X64-AVX512-NEXT: vpminub %xmm1, %xmm0, %xmm0
; X64-AVX512-NEXT: vphminposuw %xmm0, %xmm0
-; X64-AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X64-AVX512-NEXT: vpextrb $0, %xmm0, %eax
+; X64-AVX512-NEXT: xorb $-128, %al
; X64-AVX512-NEXT: ## kill: def $al killed $al killed $eax
; X64-AVX512-NEXT: vzeroupper
; X64-AVX512-NEXT: retq
; X86-SSE42-NEXT: pminsw %xmm3, %xmm1
; X86-SSE42-NEXT: pminsw %xmm2, %xmm0
; X86-SSE42-NEXT: pminsw %xmm1, %xmm0
-; X86-SSE42-NEXT: movdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768]
-; X86-SSE42-NEXT: pxor %xmm1, %xmm0
+; X86-SSE42-NEXT: pxor LCPI10_0, %xmm0
; X86-SSE42-NEXT: phminposuw %xmm0, %xmm0
-; X86-SSE42-NEXT: pxor %xmm1, %xmm0
; X86-SSE42-NEXT: movd %xmm0, %eax
+; X86-SSE42-NEXT: xorl $32768, %eax ## imm = 0x8000
; X86-SSE42-NEXT: ## kill: def $ax killed $ax killed $eax
; X86-SSE42-NEXT: retl
;
; X86-AVX1-NEXT: vpminsw %xmm2, %xmm3, %xmm2
; X86-AVX1-NEXT: vpminsw %xmm1, %xmm0, %xmm0
; X86-AVX1-NEXT: vpminsw %xmm2, %xmm0, %xmm0
-; X86-AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768]
-; X86-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; X86-AVX1-NEXT: vpxor LCPI10_0, %xmm0, %xmm0
; X86-AVX1-NEXT: vphminposuw %xmm0, %xmm0
-; X86-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X86-AVX1-NEXT: vmovd %xmm0, %eax
+; X86-AVX1-NEXT: xorl $32768, %eax ## imm = 0x8000
; X86-AVX1-NEXT: ## kill: def $ax killed $ax killed $eax
; X86-AVX1-NEXT: vzeroupper
; X86-AVX1-NEXT: retl
; X86-AVX2-NEXT: vpminsw %ymm1, %ymm0, %ymm0
; X86-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
; X86-AVX2-NEXT: vpminsw %xmm1, %xmm0, %xmm0
-; X86-AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768]
-; X86-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; X86-AVX2-NEXT: vpxor LCPI10_0, %xmm0, %xmm0
; X86-AVX2-NEXT: vphminposuw %xmm0, %xmm0
-; X86-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X86-AVX2-NEXT: vmovd %xmm0, %eax
+; X86-AVX2-NEXT: xorl $32768, %eax ## imm = 0x8000
; X86-AVX2-NEXT: ## kill: def $ax killed $ax killed $eax
; X86-AVX2-NEXT: vzeroupper
; X86-AVX2-NEXT: retl
; X64-SSE42-NEXT: pminsw %xmm3, %xmm1
; X64-SSE42-NEXT: pminsw %xmm2, %xmm0
; X64-SSE42-NEXT: pminsw %xmm1, %xmm0
-; X64-SSE42-NEXT: movdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768]
-; X64-SSE42-NEXT: pxor %xmm1, %xmm0
+; X64-SSE42-NEXT: pxor {{.*}}(%rip), %xmm0
; X64-SSE42-NEXT: phminposuw %xmm0, %xmm0
-; X64-SSE42-NEXT: pxor %xmm1, %xmm0
; X64-SSE42-NEXT: movd %xmm0, %eax
+; X64-SSE42-NEXT: xorl $32768, %eax ## imm = 0x8000
; X64-SSE42-NEXT: ## kill: def $ax killed $ax killed $eax
; X64-SSE42-NEXT: retq
;
; X64-AVX1-NEXT: vpminsw %xmm2, %xmm3, %xmm2
; X64-AVX1-NEXT: vpminsw %xmm1, %xmm0, %xmm0
; X64-AVX1-NEXT: vpminsw %xmm2, %xmm0, %xmm0
-; X64-AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768]
-; X64-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; X64-AVX1-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
; X64-AVX1-NEXT: vphminposuw %xmm0, %xmm0
-; X64-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X64-AVX1-NEXT: vmovd %xmm0, %eax
+; X64-AVX1-NEXT: xorl $32768, %eax ## imm = 0x8000
; X64-AVX1-NEXT: ## kill: def $ax killed $ax killed $eax
; X64-AVX1-NEXT: vzeroupper
; X64-AVX1-NEXT: retq
; X64-AVX2-NEXT: vpminsw %ymm1, %ymm0, %ymm0
; X64-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
; X64-AVX2-NEXT: vpminsw %xmm1, %xmm0, %xmm0
-; X64-AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768]
-; X64-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; X64-AVX2-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
; X64-AVX2-NEXT: vphminposuw %xmm0, %xmm0
-; X64-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X64-AVX2-NEXT: vmovd %xmm0, %eax
+; X64-AVX2-NEXT: xorl $32768, %eax ## imm = 0x8000
; X64-AVX2-NEXT: ## kill: def $ax killed $ax killed $eax
; X64-AVX2-NEXT: vzeroupper
; X64-AVX2-NEXT: retq
; X64-AVX512-NEXT: vpminsw %ymm1, %ymm0, %ymm0
; X64-AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
; X64-AVX512-NEXT: vpminsw %xmm1, %xmm0, %xmm0
-; X64-AVX512-NEXT: vmovdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768]
-; X64-AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; X64-AVX512-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
; X64-AVX512-NEXT: vphminposuw %xmm0, %xmm0
-; X64-AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X64-AVX512-NEXT: vmovd %xmm0, %eax
+; X64-AVX512-NEXT: xorl $32768, %eax ## imm = 0x8000
; X64-AVX512-NEXT: ## kill: def $ax killed $ax killed $eax
; X64-AVX512-NEXT: vzeroupper
; X64-AVX512-NEXT: retq
; X86-SSE42-NEXT: pminsb %xmm3, %xmm1
; X86-SSE42-NEXT: pminsb %xmm2, %xmm0
; X86-SSE42-NEXT: pminsb %xmm1, %xmm0
-; X86-SSE42-NEXT: movdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
-; X86-SSE42-NEXT: pxor %xmm1, %xmm0
-; X86-SSE42-NEXT: movdqa %xmm0, %xmm2
-; X86-SSE42-NEXT: psrlw $8, %xmm2
-; X86-SSE42-NEXT: pminub %xmm0, %xmm2
-; X86-SSE42-NEXT: phminposuw %xmm2, %xmm0
-; X86-SSE42-NEXT: pxor %xmm1, %xmm0
+; X86-SSE42-NEXT: pxor LCPI11_0, %xmm0
+; X86-SSE42-NEXT: movdqa %xmm0, %xmm1
+; X86-SSE42-NEXT: psrlw $8, %xmm1
+; X86-SSE42-NEXT: pminub %xmm0, %xmm1
+; X86-SSE42-NEXT: phminposuw %xmm1, %xmm0
; X86-SSE42-NEXT: pextrb $0, %xmm0, %eax
+; X86-SSE42-NEXT: xorb $-128, %al
; X86-SSE42-NEXT: ## kill: def $al killed $al killed $eax
; X86-SSE42-NEXT: retl
;
; X86-AVX1-NEXT: vpminsb %xmm2, %xmm3, %xmm2
; X86-AVX1-NEXT: vpminsb %xmm1, %xmm0, %xmm0
; X86-AVX1-NEXT: vpminsb %xmm2, %xmm0, %xmm0
-; X86-AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
-; X86-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
-; X86-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm2
-; X86-AVX1-NEXT: vpminub %xmm2, %xmm0, %xmm0
+; X86-AVX1-NEXT: vpxor LCPI11_0, %xmm0, %xmm0
+; X86-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1
+; X86-AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0
; X86-AVX1-NEXT: vphminposuw %xmm0, %xmm0
-; X86-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X86-AVX1-NEXT: vpextrb $0, %xmm0, %eax
+; X86-AVX1-NEXT: xorb $-128, %al
; X86-AVX1-NEXT: ## kill: def $al killed $al killed $eax
; X86-AVX1-NEXT: vzeroupper
; X86-AVX1-NEXT: retl
; X86-AVX2-NEXT: vpminsb %ymm1, %ymm0, %ymm0
; X86-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
; X86-AVX2-NEXT: vpminsb %xmm1, %xmm0, %xmm0
-; X86-AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
-; X86-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
-; X86-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm2
-; X86-AVX2-NEXT: vpminub %xmm2, %xmm0, %xmm0
+; X86-AVX2-NEXT: vpxor LCPI11_0, %xmm0, %xmm0
+; X86-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1
+; X86-AVX2-NEXT: vpminub %xmm1, %xmm0, %xmm0
; X86-AVX2-NEXT: vphminposuw %xmm0, %xmm0
-; X86-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X86-AVX2-NEXT: vpextrb $0, %xmm0, %eax
+; X86-AVX2-NEXT: xorb $-128, %al
; X86-AVX2-NEXT: ## kill: def $al killed $al killed $eax
; X86-AVX2-NEXT: vzeroupper
; X86-AVX2-NEXT: retl
; X64-SSE42-NEXT: pminsb %xmm3, %xmm1
; X64-SSE42-NEXT: pminsb %xmm2, %xmm0
; X64-SSE42-NEXT: pminsb %xmm1, %xmm0
-; X64-SSE42-NEXT: movdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
-; X64-SSE42-NEXT: pxor %xmm1, %xmm0
-; X64-SSE42-NEXT: movdqa %xmm0, %xmm2
-; X64-SSE42-NEXT: psrlw $8, %xmm2
-; X64-SSE42-NEXT: pminub %xmm0, %xmm2
-; X64-SSE42-NEXT: phminposuw %xmm2, %xmm0
-; X64-SSE42-NEXT: pxor %xmm1, %xmm0
+; X64-SSE42-NEXT: pxor {{.*}}(%rip), %xmm0
+; X64-SSE42-NEXT: movdqa %xmm0, %xmm1
+; X64-SSE42-NEXT: psrlw $8, %xmm1
+; X64-SSE42-NEXT: pminub %xmm0, %xmm1
+; X64-SSE42-NEXT: phminposuw %xmm1, %xmm0
; X64-SSE42-NEXT: pextrb $0, %xmm0, %eax
+; X64-SSE42-NEXT: xorb $-128, %al
; X64-SSE42-NEXT: ## kill: def $al killed $al killed $eax
; X64-SSE42-NEXT: retq
;
; X64-AVX1-NEXT: vpminsb %xmm2, %xmm3, %xmm2
; X64-AVX1-NEXT: vpminsb %xmm1, %xmm0, %xmm0
; X64-AVX1-NEXT: vpminsb %xmm2, %xmm0, %xmm0
-; X64-AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
-; X64-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
-; X64-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm2
-; X64-AVX1-NEXT: vpminub %xmm2, %xmm0, %xmm0
+; X64-AVX1-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
+; X64-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1
+; X64-AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0
; X64-AVX1-NEXT: vphminposuw %xmm0, %xmm0
-; X64-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X64-AVX1-NEXT: vpextrb $0, %xmm0, %eax
+; X64-AVX1-NEXT: xorb $-128, %al
; X64-AVX1-NEXT: ## kill: def $al killed $al killed $eax
; X64-AVX1-NEXT: vzeroupper
; X64-AVX1-NEXT: retq
; X64-AVX2-NEXT: vpminsb %ymm1, %ymm0, %ymm0
; X64-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
; X64-AVX2-NEXT: vpminsb %xmm1, %xmm0, %xmm0
-; X64-AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
-; X64-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
-; X64-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm2
-; X64-AVX2-NEXT: vpminub %xmm2, %xmm0, %xmm0
+; X64-AVX2-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
+; X64-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1
+; X64-AVX2-NEXT: vpminub %xmm1, %xmm0, %xmm0
; X64-AVX2-NEXT: vphminposuw %xmm0, %xmm0
-; X64-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X64-AVX2-NEXT: vpextrb $0, %xmm0, %eax
+; X64-AVX2-NEXT: xorb $-128, %al
; X64-AVX2-NEXT: ## kill: def $al killed $al killed $eax
; X64-AVX2-NEXT: vzeroupper
; X64-AVX2-NEXT: retq
; X64-AVX512-NEXT: vpminsb %ymm1, %ymm0, %ymm0
; X64-AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
; X64-AVX512-NEXT: vpminsb %xmm1, %xmm0, %xmm0
-; X64-AVX512-NEXT: vmovdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
-; X64-AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
-; X64-AVX512-NEXT: vpsrlw $8, %xmm0, %xmm2
-; X64-AVX512-NEXT: vpminub %xmm2, %xmm0, %xmm0
+; X64-AVX512-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
+; X64-AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1
+; X64-AVX512-NEXT: vpminub %xmm1, %xmm0, %xmm0
; X64-AVX512-NEXT: vphminposuw %xmm0, %xmm0
-; X64-AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X64-AVX512-NEXT: vpextrb $0, %xmm0, %eax
+; X64-AVX512-NEXT: xorb $-128, %al
; X64-AVX512-NEXT: ## kill: def $al killed $al killed $eax
; X64-AVX512-NEXT: vzeroupper
; X64-AVX512-NEXT: retq
; X86-SSE2-NEXT: psrld $16, %xmm1
; X86-SSE2-NEXT: pxor %xmm2, %xmm1
; X86-SSE2-NEXT: pmaxsw %xmm0, %xmm1
-; X86-SSE2-NEXT: pxor %xmm2, %xmm1
; X86-SSE2-NEXT: movd %xmm1, %eax
+; X86-SSE2-NEXT: xorl $32768, %eax ## imm = 0x8000
; X86-SSE2-NEXT: ## kill: def $ax killed $ax killed $eax
; X86-SSE2-NEXT: retl
;
; X86-SSE42-LABEL: test_reduce_v8i16:
; X86-SSE42: ## %bb.0:
; X86-SSE42-NEXT: pcmpeqd %xmm1, %xmm1
-; X86-SSE42-NEXT: pxor %xmm1, %xmm0
-; X86-SSE42-NEXT: phminposuw %xmm0, %xmm0
-; X86-SSE42-NEXT: pxor %xmm1, %xmm0
+; X86-SSE42-NEXT: pxor %xmm0, %xmm1
+; X86-SSE42-NEXT: phminposuw %xmm1, %xmm0
; X86-SSE42-NEXT: movd %xmm0, %eax
+; X86-SSE42-NEXT: notl %eax
; X86-SSE42-NEXT: ## kill: def $ax killed $ax killed $eax
; X86-SSE42-NEXT: retl
;
; X86-AVX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; X86-AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X86-AVX-NEXT: vphminposuw %xmm0, %xmm0
-; X86-AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X86-AVX-NEXT: vmovd %xmm0, %eax
+; X86-AVX-NEXT: notl %eax
; X86-AVX-NEXT: ## kill: def $ax killed $ax killed $eax
; X86-AVX-NEXT: retl
;
; X64-SSE2-NEXT: psrld $16, %xmm1
; X64-SSE2-NEXT: pxor %xmm2, %xmm1
; X64-SSE2-NEXT: pmaxsw %xmm0, %xmm1
-; X64-SSE2-NEXT: pxor %xmm2, %xmm1
; X64-SSE2-NEXT: movd %xmm1, %eax
+; X64-SSE2-NEXT: xorl $32768, %eax ## imm = 0x8000
; X64-SSE2-NEXT: ## kill: def $ax killed $ax killed $eax
; X64-SSE2-NEXT: retq
;
; X64-SSE42-LABEL: test_reduce_v8i16:
; X64-SSE42: ## %bb.0:
; X64-SSE42-NEXT: pcmpeqd %xmm1, %xmm1
-; X64-SSE42-NEXT: pxor %xmm1, %xmm0
-; X64-SSE42-NEXT: phminposuw %xmm0, %xmm0
-; X64-SSE42-NEXT: pxor %xmm1, %xmm0
+; X64-SSE42-NEXT: pxor %xmm0, %xmm1
+; X64-SSE42-NEXT: phminposuw %xmm1, %xmm0
; X64-SSE42-NEXT: movd %xmm0, %eax
+; X64-SSE42-NEXT: notl %eax
; X64-SSE42-NEXT: ## kill: def $ax killed $ax killed $eax
; X64-SSE42-NEXT: retq
;
; X64-AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; X64-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X64-AVX1-NEXT: vphminposuw %xmm0, %xmm0
-; X64-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X64-AVX1-NEXT: vmovd %xmm0, %eax
+; X64-AVX1-NEXT: notl %eax
; X64-AVX1-NEXT: ## kill: def $ax killed $ax killed $eax
; X64-AVX1-NEXT: retq
;
; X64-AVX2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; X64-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X64-AVX2-NEXT: vphminposuw %xmm0, %xmm0
-; X64-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X64-AVX2-NEXT: vmovd %xmm0, %eax
+; X64-AVX2-NEXT: notl %eax
; X64-AVX2-NEXT: ## kill: def $ax killed $ax killed $eax
; X64-AVX2-NEXT: retq
;
; X64-AVX512: ## %bb.0:
; X64-AVX512-NEXT: vpternlogq $15, %xmm0, %xmm0, %xmm0
; X64-AVX512-NEXT: vphminposuw %xmm0, %xmm0
-; X64-AVX512-NEXT: vpternlogq $15, %xmm0, %xmm0, %xmm0
; X64-AVX512-NEXT: vmovd %xmm0, %eax
+; X64-AVX512-NEXT: notl %eax
; X64-AVX512-NEXT: ## kill: def $ax killed $ax killed $eax
; X64-AVX512-NEXT: retq
%1 = shufflevector <8 x i16> %a0, <8 x i16> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
; X86-SSE42-LABEL: test_reduce_v16i8:
; X86-SSE42: ## %bb.0:
; X86-SSE42-NEXT: pcmpeqd %xmm1, %xmm1
-; X86-SSE42-NEXT: pxor %xmm1, %xmm0
-; X86-SSE42-NEXT: movdqa %xmm0, %xmm2
-; X86-SSE42-NEXT: psrlw $8, %xmm2
-; X86-SSE42-NEXT: pminub %xmm0, %xmm2
-; X86-SSE42-NEXT: phminposuw %xmm2, %xmm0
-; X86-SSE42-NEXT: pxor %xmm1, %xmm0
+; X86-SSE42-NEXT: pxor %xmm0, %xmm1
+; X86-SSE42-NEXT: movdqa %xmm1, %xmm0
+; X86-SSE42-NEXT: psrlw $8, %xmm0
+; X86-SSE42-NEXT: pminub %xmm1, %xmm0
+; X86-SSE42-NEXT: phminposuw %xmm0, %xmm0
; X86-SSE42-NEXT: pextrb $0, %xmm0, %eax
+; X86-SSE42-NEXT: notb %al
; X86-SSE42-NEXT: ## kill: def $al killed $al killed $eax
; X86-SSE42-NEXT: retl
;
; X86-AVX: ## %bb.0:
; X86-AVX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; X86-AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
-; X86-AVX-NEXT: vpsrlw $8, %xmm0, %xmm2
-; X86-AVX-NEXT: vpminub %xmm2, %xmm0, %xmm0
+; X86-AVX-NEXT: vpsrlw $8, %xmm0, %xmm1
+; X86-AVX-NEXT: vpminub %xmm1, %xmm0, %xmm0
; X86-AVX-NEXT: vphminposuw %xmm0, %xmm0
-; X86-AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X86-AVX-NEXT: vpextrb $0, %xmm0, %eax
+; X86-AVX-NEXT: notb %al
; X86-AVX-NEXT: ## kill: def $al killed $al killed $eax
; X86-AVX-NEXT: retl
;
; X64-SSE42-LABEL: test_reduce_v16i8:
; X64-SSE42: ## %bb.0:
; X64-SSE42-NEXT: pcmpeqd %xmm1, %xmm1
-; X64-SSE42-NEXT: pxor %xmm1, %xmm0
-; X64-SSE42-NEXT: movdqa %xmm0, %xmm2
-; X64-SSE42-NEXT: psrlw $8, %xmm2
-; X64-SSE42-NEXT: pminub %xmm0, %xmm2
-; X64-SSE42-NEXT: phminposuw %xmm2, %xmm0
-; X64-SSE42-NEXT: pxor %xmm1, %xmm0
+; X64-SSE42-NEXT: pxor %xmm0, %xmm1
+; X64-SSE42-NEXT: movdqa %xmm1, %xmm0
+; X64-SSE42-NEXT: psrlw $8, %xmm0
+; X64-SSE42-NEXT: pminub %xmm1, %xmm0
+; X64-SSE42-NEXT: phminposuw %xmm0, %xmm0
; X64-SSE42-NEXT: pextrb $0, %xmm0, %eax
+; X64-SSE42-NEXT: notb %al
; X64-SSE42-NEXT: ## kill: def $al killed $al killed $eax
; X64-SSE42-NEXT: retq
;
; X64-AVX1: ## %bb.0:
; X64-AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; X64-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
-; X64-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm2
-; X64-AVX1-NEXT: vpminub %xmm2, %xmm0, %xmm0
+; X64-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1
+; X64-AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0
; X64-AVX1-NEXT: vphminposuw %xmm0, %xmm0
-; X64-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X64-AVX1-NEXT: vpextrb $0, %xmm0, %eax
+; X64-AVX1-NEXT: notb %al
; X64-AVX1-NEXT: ## kill: def $al killed $al killed $eax
; X64-AVX1-NEXT: retq
;
; X64-AVX2: ## %bb.0:
; X64-AVX2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; X64-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
-; X64-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm2
-; X64-AVX2-NEXT: vpminub %xmm2, %xmm0, %xmm0
+; X64-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1
+; X64-AVX2-NEXT: vpminub %xmm1, %xmm0, %xmm0
; X64-AVX2-NEXT: vphminposuw %xmm0, %xmm0
-; X64-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X64-AVX2-NEXT: vpextrb $0, %xmm0, %eax
+; X64-AVX2-NEXT: notb %al
; X64-AVX2-NEXT: ## kill: def $al killed $al killed $eax
; X64-AVX2-NEXT: retq
;
; X64-AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1
; X64-AVX512-NEXT: vpminub %xmm1, %xmm0, %xmm0
; X64-AVX512-NEXT: vphminposuw %xmm0, %xmm0
-; X64-AVX512-NEXT: vpternlogq $15, %xmm0, %xmm0, %xmm0
; X64-AVX512-NEXT: vpextrb $0, %xmm0, %eax
+; X64-AVX512-NEXT: notb %al
; X64-AVX512-NEXT: ## kill: def $al killed $al killed $eax
; X64-AVX512-NEXT: retq
%1 = shufflevector <16 x i8> %a0, <16 x i8> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
; X86-SSE2-NEXT: psrld $16, %xmm1
; X86-SSE2-NEXT: pxor %xmm2, %xmm1
; X86-SSE2-NEXT: pmaxsw %xmm0, %xmm1
-; X86-SSE2-NEXT: pxor %xmm2, %xmm1
; X86-SSE2-NEXT: movd %xmm1, %eax
+; X86-SSE2-NEXT: xorl $32768, %eax ## imm = 0x8000
; X86-SSE2-NEXT: ## kill: def $ax killed $ax killed $eax
; X86-SSE2-NEXT: retl
;
; X86-SSE42: ## %bb.0:
; X86-SSE42-NEXT: pmaxuw %xmm1, %xmm0
; X86-SSE42-NEXT: pcmpeqd %xmm1, %xmm1
-; X86-SSE42-NEXT: pxor %xmm1, %xmm0
-; X86-SSE42-NEXT: phminposuw %xmm0, %xmm0
-; X86-SSE42-NEXT: pxor %xmm1, %xmm0
+; X86-SSE42-NEXT: pxor %xmm0, %xmm1
+; X86-SSE42-NEXT: phminposuw %xmm1, %xmm0
; X86-SSE42-NEXT: movd %xmm0, %eax
+; X86-SSE42-NEXT: notl %eax
; X86-SSE42-NEXT: ## kill: def $ax killed $ax killed $eax
; X86-SSE42-NEXT: retl
;
; X86-AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; X86-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X86-AVX1-NEXT: vphminposuw %xmm0, %xmm0
-; X86-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X86-AVX1-NEXT: vmovd %xmm0, %eax
+; X86-AVX1-NEXT: notl %eax
; X86-AVX1-NEXT: ## kill: def $ax killed $ax killed $eax
; X86-AVX1-NEXT: vzeroupper
; X86-AVX1-NEXT: retl
; X86-AVX2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; X86-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X86-AVX2-NEXT: vphminposuw %xmm0, %xmm0
-; X86-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X86-AVX2-NEXT: vmovd %xmm0, %eax
+; X86-AVX2-NEXT: notl %eax
; X86-AVX2-NEXT: ## kill: def $ax killed $ax killed $eax
; X86-AVX2-NEXT: vzeroupper
; X86-AVX2-NEXT: retl
; X64-SSE2-NEXT: psrld $16, %xmm1
; X64-SSE2-NEXT: pxor %xmm2, %xmm1
; X64-SSE2-NEXT: pmaxsw %xmm0, %xmm1
-; X64-SSE2-NEXT: pxor %xmm2, %xmm1
; X64-SSE2-NEXT: movd %xmm1, %eax
+; X64-SSE2-NEXT: xorl $32768, %eax ## imm = 0x8000
; X64-SSE2-NEXT: ## kill: def $ax killed $ax killed $eax
; X64-SSE2-NEXT: retq
;
; X64-SSE42: ## %bb.0:
; X64-SSE42-NEXT: pmaxuw %xmm1, %xmm0
; X64-SSE42-NEXT: pcmpeqd %xmm1, %xmm1
-; X64-SSE42-NEXT: pxor %xmm1, %xmm0
-; X64-SSE42-NEXT: phminposuw %xmm0, %xmm0
-; X64-SSE42-NEXT: pxor %xmm1, %xmm0
+; X64-SSE42-NEXT: pxor %xmm0, %xmm1
+; X64-SSE42-NEXT: phminposuw %xmm1, %xmm0
; X64-SSE42-NEXT: movd %xmm0, %eax
+; X64-SSE42-NEXT: notl %eax
; X64-SSE42-NEXT: ## kill: def $ax killed $ax killed $eax
; X64-SSE42-NEXT: retq
;
; X64-AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; X64-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X64-AVX1-NEXT: vphminposuw %xmm0, %xmm0
-; X64-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X64-AVX1-NEXT: vmovd %xmm0, %eax
+; X64-AVX1-NEXT: notl %eax
; X64-AVX1-NEXT: ## kill: def $ax killed $ax killed $eax
; X64-AVX1-NEXT: vzeroupper
; X64-AVX1-NEXT: retq
; X64-AVX2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; X64-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X64-AVX2-NEXT: vphminposuw %xmm0, %xmm0
-; X64-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X64-AVX2-NEXT: vmovd %xmm0, %eax
+; X64-AVX2-NEXT: notl %eax
; X64-AVX2-NEXT: ## kill: def $ax killed $ax killed $eax
; X64-AVX2-NEXT: vzeroupper
; X64-AVX2-NEXT: retq
; X64-AVX512-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0
; X64-AVX512-NEXT: vpternlogq $15, %xmm0, %xmm0, %xmm0
; X64-AVX512-NEXT: vphminposuw %xmm0, %xmm0
-; X64-AVX512-NEXT: vpternlogq $15, %xmm0, %xmm0, %xmm0
; X64-AVX512-NEXT: vmovd %xmm0, %eax
+; X64-AVX512-NEXT: notl %eax
; X64-AVX512-NEXT: ## kill: def $ax killed $ax killed $eax
; X64-AVX512-NEXT: vzeroupper
; X64-AVX512-NEXT: retq
; X86-SSE42: ## %bb.0:
; X86-SSE42-NEXT: pmaxub %xmm1, %xmm0
; X86-SSE42-NEXT: pcmpeqd %xmm1, %xmm1
-; X86-SSE42-NEXT: pxor %xmm1, %xmm0
-; X86-SSE42-NEXT: movdqa %xmm0, %xmm2
-; X86-SSE42-NEXT: psrlw $8, %xmm2
-; X86-SSE42-NEXT: pminub %xmm0, %xmm2
-; X86-SSE42-NEXT: phminposuw %xmm2, %xmm0
-; X86-SSE42-NEXT: pxor %xmm1, %xmm0
+; X86-SSE42-NEXT: pxor %xmm0, %xmm1
+; X86-SSE42-NEXT: movdqa %xmm1, %xmm0
+; X86-SSE42-NEXT: psrlw $8, %xmm0
+; X86-SSE42-NEXT: pminub %xmm1, %xmm0
+; X86-SSE42-NEXT: phminposuw %xmm0, %xmm0
; X86-SSE42-NEXT: pextrb $0, %xmm0, %eax
+; X86-SSE42-NEXT: notb %al
; X86-SSE42-NEXT: ## kill: def $al killed $al killed $eax
; X86-SSE42-NEXT: retl
;
; X86-AVX1-NEXT: vpmaxub %xmm1, %xmm0, %xmm0
; X86-AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; X86-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
-; X86-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm2
-; X86-AVX1-NEXT: vpminub %xmm2, %xmm0, %xmm0
+; X86-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1
+; X86-AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0
; X86-AVX1-NEXT: vphminposuw %xmm0, %xmm0
-; X86-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X86-AVX1-NEXT: vpextrb $0, %xmm0, %eax
+; X86-AVX1-NEXT: notb %al
; X86-AVX1-NEXT: ## kill: def $al killed $al killed $eax
; X86-AVX1-NEXT: vzeroupper
; X86-AVX1-NEXT: retl
; X86-AVX2-NEXT: vpmaxub %xmm1, %xmm0, %xmm0
; X86-AVX2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; X86-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
-; X86-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm2
-; X86-AVX2-NEXT: vpminub %xmm2, %xmm0, %xmm0
+; X86-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1
+; X86-AVX2-NEXT: vpminub %xmm1, %xmm0, %xmm0
; X86-AVX2-NEXT: vphminposuw %xmm0, %xmm0
-; X86-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X86-AVX2-NEXT: vpextrb $0, %xmm0, %eax
+; X86-AVX2-NEXT: notb %al
; X86-AVX2-NEXT: ## kill: def $al killed $al killed $eax
; X86-AVX2-NEXT: vzeroupper
; X86-AVX2-NEXT: retl
; X64-SSE42: ## %bb.0:
; X64-SSE42-NEXT: pmaxub %xmm1, %xmm0
; X64-SSE42-NEXT: pcmpeqd %xmm1, %xmm1
-; X64-SSE42-NEXT: pxor %xmm1, %xmm0
-; X64-SSE42-NEXT: movdqa %xmm0, %xmm2
-; X64-SSE42-NEXT: psrlw $8, %xmm2
-; X64-SSE42-NEXT: pminub %xmm0, %xmm2
-; X64-SSE42-NEXT: phminposuw %xmm2, %xmm0
-; X64-SSE42-NEXT: pxor %xmm1, %xmm0
+; X64-SSE42-NEXT: pxor %xmm0, %xmm1
+; X64-SSE42-NEXT: movdqa %xmm1, %xmm0
+; X64-SSE42-NEXT: psrlw $8, %xmm0
+; X64-SSE42-NEXT: pminub %xmm1, %xmm0
+; X64-SSE42-NEXT: phminposuw %xmm0, %xmm0
; X64-SSE42-NEXT: pextrb $0, %xmm0, %eax
+; X64-SSE42-NEXT: notb %al
; X64-SSE42-NEXT: ## kill: def $al killed $al killed $eax
; X64-SSE42-NEXT: retq
;
; X64-AVX1-NEXT: vpmaxub %xmm1, %xmm0, %xmm0
; X64-AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; X64-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
-; X64-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm2
-; X64-AVX1-NEXT: vpminub %xmm2, %xmm0, %xmm0
+; X64-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1
+; X64-AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0
; X64-AVX1-NEXT: vphminposuw %xmm0, %xmm0
-; X64-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X64-AVX1-NEXT: vpextrb $0, %xmm0, %eax
+; X64-AVX1-NEXT: notb %al
; X64-AVX1-NEXT: ## kill: def $al killed $al killed $eax
; X64-AVX1-NEXT: vzeroupper
; X64-AVX1-NEXT: retq
; X64-AVX2-NEXT: vpmaxub %xmm1, %xmm0, %xmm0
; X64-AVX2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; X64-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
-; X64-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm2
-; X64-AVX2-NEXT: vpminub %xmm2, %xmm0, %xmm0
+; X64-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1
+; X64-AVX2-NEXT: vpminub %xmm1, %xmm0, %xmm0
; X64-AVX2-NEXT: vphminposuw %xmm0, %xmm0
-; X64-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X64-AVX2-NEXT: vpextrb $0, %xmm0, %eax
+; X64-AVX2-NEXT: notb %al
; X64-AVX2-NEXT: ## kill: def $al killed $al killed $eax
; X64-AVX2-NEXT: vzeroupper
; X64-AVX2-NEXT: retq
; X64-AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1
; X64-AVX512-NEXT: vpminub %xmm1, %xmm0, %xmm0
; X64-AVX512-NEXT: vphminposuw %xmm0, %xmm0
-; X64-AVX512-NEXT: vpternlogq $15, %xmm0, %xmm0, %xmm0
; X64-AVX512-NEXT: vpextrb $0, %xmm0, %eax
+; X64-AVX512-NEXT: notb %al
; X64-AVX512-NEXT: ## kill: def $al killed $al killed $eax
; X64-AVX512-NEXT: vzeroupper
; X64-AVX512-NEXT: retq
; X86-SSE2-NEXT: psrld $16, %xmm1
; X86-SSE2-NEXT: pxor %xmm4, %xmm1
; X86-SSE2-NEXT: pmaxsw %xmm0, %xmm1
-; X86-SSE2-NEXT: pxor %xmm4, %xmm1
; X86-SSE2-NEXT: movd %xmm1, %eax
+; X86-SSE2-NEXT: xorl $32768, %eax ## imm = 0x8000
; X86-SSE2-NEXT: ## kill: def $ax killed $ax killed $eax
; X86-SSE2-NEXT: retl
;
; X86-SSE42-NEXT: pmaxuw %xmm2, %xmm0
; X86-SSE42-NEXT: pmaxuw %xmm1, %xmm0
; X86-SSE42-NEXT: pcmpeqd %xmm1, %xmm1
-; X86-SSE42-NEXT: pxor %xmm1, %xmm0
-; X86-SSE42-NEXT: phminposuw %xmm0, %xmm0
-; X86-SSE42-NEXT: pxor %xmm1, %xmm0
+; X86-SSE42-NEXT: pxor %xmm0, %xmm1
+; X86-SSE42-NEXT: phminposuw %xmm1, %xmm0
; X86-SSE42-NEXT: movd %xmm0, %eax
+; X86-SSE42-NEXT: notl %eax
; X86-SSE42-NEXT: ## kill: def $ax killed $ax killed $eax
; X86-SSE42-NEXT: retl
;
; X86-AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; X86-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X86-AVX1-NEXT: vphminposuw %xmm0, %xmm0
-; X86-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X86-AVX1-NEXT: vmovd %xmm0, %eax
+; X86-AVX1-NEXT: notl %eax
; X86-AVX1-NEXT: ## kill: def $ax killed $ax killed $eax
; X86-AVX1-NEXT: vzeroupper
; X86-AVX1-NEXT: retl
; X86-AVX2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; X86-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X86-AVX2-NEXT: vphminposuw %xmm0, %xmm0
-; X86-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X86-AVX2-NEXT: vmovd %xmm0, %eax
+; X86-AVX2-NEXT: notl %eax
; X86-AVX2-NEXT: ## kill: def $ax killed $ax killed $eax
; X86-AVX2-NEXT: vzeroupper
; X86-AVX2-NEXT: retl
; X64-SSE2-NEXT: psrld $16, %xmm1
; X64-SSE2-NEXT: pxor %xmm4, %xmm1
; X64-SSE2-NEXT: pmaxsw %xmm0, %xmm1
-; X64-SSE2-NEXT: pxor %xmm4, %xmm1
; X64-SSE2-NEXT: movd %xmm1, %eax
+; X64-SSE2-NEXT: xorl $32768, %eax ## imm = 0x8000
; X64-SSE2-NEXT: ## kill: def $ax killed $ax killed $eax
; X64-SSE2-NEXT: retq
;
; X64-SSE42-NEXT: pmaxuw %xmm2, %xmm0
; X64-SSE42-NEXT: pmaxuw %xmm1, %xmm0
; X64-SSE42-NEXT: pcmpeqd %xmm1, %xmm1
-; X64-SSE42-NEXT: pxor %xmm1, %xmm0
-; X64-SSE42-NEXT: phminposuw %xmm0, %xmm0
-; X64-SSE42-NEXT: pxor %xmm1, %xmm0
+; X64-SSE42-NEXT: pxor %xmm0, %xmm1
+; X64-SSE42-NEXT: phminposuw %xmm1, %xmm0
; X64-SSE42-NEXT: movd %xmm0, %eax
+; X64-SSE42-NEXT: notl %eax
; X64-SSE42-NEXT: ## kill: def $ax killed $ax killed $eax
; X64-SSE42-NEXT: retq
;
; X64-AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; X64-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X64-AVX1-NEXT: vphminposuw %xmm0, %xmm0
-; X64-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X64-AVX1-NEXT: vmovd %xmm0, %eax
+; X64-AVX1-NEXT: notl %eax
; X64-AVX1-NEXT: ## kill: def $ax killed $ax killed $eax
; X64-AVX1-NEXT: vzeroupper
; X64-AVX1-NEXT: retq
; X64-AVX2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; X64-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X64-AVX2-NEXT: vphminposuw %xmm0, %xmm0
-; X64-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X64-AVX2-NEXT: vmovd %xmm0, %eax
+; X64-AVX2-NEXT: notl %eax
; X64-AVX2-NEXT: ## kill: def $ax killed $ax killed $eax
; X64-AVX2-NEXT: vzeroupper
; X64-AVX2-NEXT: retq
; X64-AVX512-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0
; X64-AVX512-NEXT: vpternlogq $15, %xmm0, %xmm0, %xmm0
; X64-AVX512-NEXT: vphminposuw %xmm0, %xmm0
-; X64-AVX512-NEXT: vpternlogq $15, %xmm0, %xmm0, %xmm0
; X64-AVX512-NEXT: vmovd %xmm0, %eax
+; X64-AVX512-NEXT: notl %eax
; X64-AVX512-NEXT: ## kill: def $ax killed $ax killed $eax
; X64-AVX512-NEXT: vzeroupper
; X64-AVX512-NEXT: retq
; X86-SSE42-NEXT: pmaxub %xmm2, %xmm0
; X86-SSE42-NEXT: pmaxub %xmm1, %xmm0
; X86-SSE42-NEXT: pcmpeqd %xmm1, %xmm1
-; X86-SSE42-NEXT: pxor %xmm1, %xmm0
-; X86-SSE42-NEXT: movdqa %xmm0, %xmm2
-; X86-SSE42-NEXT: psrlw $8, %xmm2
-; X86-SSE42-NEXT: pminub %xmm0, %xmm2
-; X86-SSE42-NEXT: phminposuw %xmm2, %xmm0
-; X86-SSE42-NEXT: pxor %xmm1, %xmm0
+; X86-SSE42-NEXT: pxor %xmm0, %xmm1
+; X86-SSE42-NEXT: movdqa %xmm1, %xmm0
+; X86-SSE42-NEXT: psrlw $8, %xmm0
+; X86-SSE42-NEXT: pminub %xmm1, %xmm0
+; X86-SSE42-NEXT: phminposuw %xmm0, %xmm0
; X86-SSE42-NEXT: pextrb $0, %xmm0, %eax
+; X86-SSE42-NEXT: notb %al
; X86-SSE42-NEXT: ## kill: def $al killed $al killed $eax
; X86-SSE42-NEXT: retl
;
; X86-AVX1-NEXT: vpmaxub %xmm2, %xmm0, %xmm0
; X86-AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; X86-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
-; X86-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm2
-; X86-AVX1-NEXT: vpminub %xmm2, %xmm0, %xmm0
+; X86-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1
+; X86-AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0
; X86-AVX1-NEXT: vphminposuw %xmm0, %xmm0
-; X86-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X86-AVX1-NEXT: vpextrb $0, %xmm0, %eax
+; X86-AVX1-NEXT: notb %al
; X86-AVX1-NEXT: ## kill: def $al killed $al killed $eax
; X86-AVX1-NEXT: vzeroupper
; X86-AVX1-NEXT: retl
; X86-AVX2-NEXT: vpmaxub %xmm1, %xmm0, %xmm0
; X86-AVX2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; X86-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
-; X86-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm2
-; X86-AVX2-NEXT: vpminub %xmm2, %xmm0, %xmm0
+; X86-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1
+; X86-AVX2-NEXT: vpminub %xmm1, %xmm0, %xmm0
; X86-AVX2-NEXT: vphminposuw %xmm0, %xmm0
-; X86-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X86-AVX2-NEXT: vpextrb $0, %xmm0, %eax
+; X86-AVX2-NEXT: notb %al
; X86-AVX2-NEXT: ## kill: def $al killed $al killed $eax
; X86-AVX2-NEXT: vzeroupper
; X86-AVX2-NEXT: retl
; X64-SSE42-NEXT: pmaxub %xmm2, %xmm0
; X64-SSE42-NEXT: pmaxub %xmm1, %xmm0
; X64-SSE42-NEXT: pcmpeqd %xmm1, %xmm1
-; X64-SSE42-NEXT: pxor %xmm1, %xmm0
-; X64-SSE42-NEXT: movdqa %xmm0, %xmm2
-; X64-SSE42-NEXT: psrlw $8, %xmm2
-; X64-SSE42-NEXT: pminub %xmm0, %xmm2
-; X64-SSE42-NEXT: phminposuw %xmm2, %xmm0
-; X64-SSE42-NEXT: pxor %xmm1, %xmm0
+; X64-SSE42-NEXT: pxor %xmm0, %xmm1
+; X64-SSE42-NEXT: movdqa %xmm1, %xmm0
+; X64-SSE42-NEXT: psrlw $8, %xmm0
+; X64-SSE42-NEXT: pminub %xmm1, %xmm0
+; X64-SSE42-NEXT: phminposuw %xmm0, %xmm0
; X64-SSE42-NEXT: pextrb $0, %xmm0, %eax
+; X64-SSE42-NEXT: notb %al
; X64-SSE42-NEXT: ## kill: def $al killed $al killed $eax
; X64-SSE42-NEXT: retq
;
; X64-AVX1-NEXT: vpmaxub %xmm2, %xmm0, %xmm0
; X64-AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; X64-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
-; X64-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm2
-; X64-AVX1-NEXT: vpminub %xmm2, %xmm0, %xmm0
+; X64-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1
+; X64-AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0
; X64-AVX1-NEXT: vphminposuw %xmm0, %xmm0
-; X64-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X64-AVX1-NEXT: vpextrb $0, %xmm0, %eax
+; X64-AVX1-NEXT: notb %al
; X64-AVX1-NEXT: ## kill: def $al killed $al killed $eax
; X64-AVX1-NEXT: vzeroupper
; X64-AVX1-NEXT: retq
; X64-AVX2-NEXT: vpmaxub %xmm1, %xmm0, %xmm0
; X64-AVX2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; X64-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
-; X64-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm2
-; X64-AVX2-NEXT: vpminub %xmm2, %xmm0, %xmm0
+; X64-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1
+; X64-AVX2-NEXT: vpminub %xmm1, %xmm0, %xmm0
; X64-AVX2-NEXT: vphminposuw %xmm0, %xmm0
-; X64-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X64-AVX2-NEXT: vpextrb $0, %xmm0, %eax
+; X64-AVX2-NEXT: notb %al
; X64-AVX2-NEXT: ## kill: def $al killed $al killed $eax
; X64-AVX2-NEXT: vzeroupper
; X64-AVX2-NEXT: retq
; X64-AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1
; X64-AVX512-NEXT: vpminub %xmm1, %xmm0, %xmm0
; X64-AVX512-NEXT: vphminposuw %xmm0, %xmm0
-; X64-AVX512-NEXT: vpternlogq $15, %xmm0, %xmm0, %xmm0
; X64-AVX512-NEXT: vpextrb $0, %xmm0, %eax
+; X64-AVX512-NEXT: notb %al
; X64-AVX512-NEXT: ## kill: def $al killed $al killed $eax
; X64-AVX512-NEXT: vzeroupper
; X64-AVX512-NEXT: retq
; X86-SSE2-NEXT: psrld $16, %xmm1
; X86-SSE2-NEXT: pxor %xmm2, %xmm1
; X86-SSE2-NEXT: pminsw %xmm0, %xmm1
-; X86-SSE2-NEXT: pxor %xmm2, %xmm1
; X86-SSE2-NEXT: movd %xmm1, %eax
+; X86-SSE2-NEXT: xorl $32768, %eax ## imm = 0x8000
; X86-SSE2-NEXT: ## kill: def $ax killed $ax killed $eax
; X86-SSE2-NEXT: retl
;
; X64-SSE2-NEXT: psrld $16, %xmm1
; X64-SSE2-NEXT: pxor %xmm2, %xmm1
; X64-SSE2-NEXT: pminsw %xmm0, %xmm1
-; X64-SSE2-NEXT: pxor %xmm2, %xmm1
; X64-SSE2-NEXT: movd %xmm1, %eax
+; X64-SSE2-NEXT: xorl $32768, %eax ## imm = 0x8000
; X64-SSE2-NEXT: ## kill: def $ax killed $ax killed $eax
; X64-SSE2-NEXT: retq
;
; X86-SSE2-NEXT: psrld $16, %xmm1
; X86-SSE2-NEXT: pxor %xmm2, %xmm1
; X86-SSE2-NEXT: pminsw %xmm0, %xmm1
-; X86-SSE2-NEXT: pxor %xmm2, %xmm1
; X86-SSE2-NEXT: movd %xmm1, %eax
+; X86-SSE2-NEXT: xorl $32768, %eax ## imm = 0x8000
; X86-SSE2-NEXT: ## kill: def $ax killed $ax killed $eax
; X86-SSE2-NEXT: retl
;
; X64-SSE2-NEXT: psrld $16, %xmm1
; X64-SSE2-NEXT: pxor %xmm2, %xmm1
; X64-SSE2-NEXT: pminsw %xmm0, %xmm1
-; X64-SSE2-NEXT: pxor %xmm2, %xmm1
; X64-SSE2-NEXT: movd %xmm1, %eax
+; X64-SSE2-NEXT: xorl $32768, %eax ## imm = 0x8000
; X64-SSE2-NEXT: ## kill: def $ax killed $ax killed $eax
; X64-SSE2-NEXT: retq
;
; X86-SSE2-NEXT: psrld $16, %xmm1
; X86-SSE2-NEXT: pxor %xmm4, %xmm1
; X86-SSE2-NEXT: pminsw %xmm0, %xmm1
-; X86-SSE2-NEXT: pxor %xmm4, %xmm1
; X86-SSE2-NEXT: movd %xmm1, %eax
+; X86-SSE2-NEXT: xorl $32768, %eax ## imm = 0x8000
; X86-SSE2-NEXT: ## kill: def $ax killed $ax killed $eax
; X86-SSE2-NEXT: retl
;
; X64-SSE2-NEXT: psrld $16, %xmm1
; X64-SSE2-NEXT: pxor %xmm4, %xmm1
; X64-SSE2-NEXT: pminsw %xmm0, %xmm1
-; X64-SSE2-NEXT: pxor %xmm4, %xmm1
; X64-SSE2-NEXT: movd %xmm1, %eax
+; X64-SSE2-NEXT: xorl $32768, %eax ## imm = 0x8000
; X64-SSE2-NEXT: ## kill: def $ax killed $ax killed $eax
; X64-SSE2-NEXT: retq
;
define i32 @knownbits_mask_extract_sext(<8 x i16> %a0) nounwind {
; X32-LABEL: knownbits_mask_extract_sext:
; X32: # %bb.0:
-; X32-NEXT: movl $15, %eax
-; X32-NEXT: vmovd %eax, %xmm1
-; X32-NEXT: vpand %xmm1, %xmm0, %xmm0
-; X32-NEXT: vpextrw $0, %xmm0, %eax
+; X32-NEXT: vmovd %xmm0, %eax
+; X32-NEXT: andl $15, %eax
; X32-NEXT: retl
;
; X64-LABEL: knownbits_mask_extract_sext:
; X64: # %bb.0:
-; X64-NEXT: movl $15, %eax
-; X64-NEXT: vmovd %eax, %xmm1
-; X64-NEXT: vpand %xmm1, %xmm0, %xmm0
-; X64-NEXT: vpextrw $0, %xmm0, %eax
+; X64-NEXT: vmovd %xmm0, %eax
+; X64-NEXT: andl $15, %eax
; X64-NEXT: retq
%1 = and <8 x i16> %a0, <i16 15, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
%2 = extractelement <8 x i16> %1, i32 0
;
; X64-LABEL: knownbits_mask_extract_uitofp:
; X64: # %bb.0:
-; X64-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
; X64-NEXT: vmovq %xmm0, %rax
+; X64-NEXT: movzwl %ax, %eax
; X64-NEXT: vcvtsi2ssl %eax, %xmm1, %xmm0
; X64-NEXT: retq
%1 = and <2 x i64> %a0, <i64 65535, i64 -1>
;
; X64-LABEL: signbits_ashr_extract_sitofp_0:
; X64: # %bb.0:
-; X64-NEXT: vpsrlq $32, %xmm0, %xmm0
; X64-NEXT: vmovq %xmm0, %rax
+; X64-NEXT: shrq $32, %rax
; X64-NEXT: vcvtsi2ssl %eax, %xmm1, %xmm0
; X64-NEXT: retq
%1 = ashr <2 x i64> %a0, <i64 32, i64 32>
;
; X64-LABEL: signbits_ashr_extract_sitofp_1:
; X64: # %bb.0:
-; X64-NEXT: vpsrlq $32, %xmm0, %xmm0
-; X64-NEXT: vmovdqa {{.*#+}} xmm1 = [2147483648,1]
-; X64-NEXT: vpxor %xmm1, %xmm0, %xmm0
-; X64-NEXT: vpsubq %xmm1, %xmm0, %xmm0
; X64-NEXT: vmovq %xmm0, %rax
-; X64-NEXT: vcvtsi2ssl %eax, %xmm2, %xmm0
+; X64-NEXT: shrq $32, %rax
+; X64-NEXT: vcvtsi2ssl %eax, %xmm1, %xmm0
; X64-NEXT: retq
%1 = ashr <2 x i64> %a0, <i64 32, i64 63>
%2 = extractelement <2 x i64> %1, i32 0
;
; X64-LABEL: signbits_ashr_shl_extract_sitofp:
; X64: # %bb.0:
-; X64-NEXT: vpsrlq $61, %xmm0, %xmm0
-; X64-NEXT: vmovdqa {{.*#+}} xmm1 = [4,8]
-; X64-NEXT: vpxor %xmm1, %xmm0, %xmm0
-; X64-NEXT: vpsubq %xmm1, %xmm0, %xmm0
-; X64-NEXT: vpsllq $20, %xmm0, %xmm0
; X64-NEXT: vmovq %xmm0, %rax
-; X64-NEXT: vcvtsi2ssl %eax, %xmm2, %xmm0
+; X64-NEXT: sarq $61, %rax
+; X64-NEXT: shll $20, %eax
+; X64-NEXT: vcvtsi2ssl %eax, %xmm1, %xmm0
; X64-NEXT: retq
%1 = ashr <2 x i64> %a0, <i64 61, i64 60>
%2 = shl <2 x i64> %1, <i64 20, i64 16>
; X64-LABEL: signbits_ashr_insert_ashr_extract_sitofp:
; X64: # %bb.0:
; X64-NEXT: sarq $30, %rdi
-; X64-NEXT: vmovq %rdi, %xmm0
-; X64-NEXT: vpsrlq $3, %xmm0, %xmm0
-; X64-NEXT: vmovq %xmm0, %rax
-; X64-NEXT: vcvtsi2ssl %eax, %xmm1, %xmm0
+; X64-NEXT: shrq $3, %rdi
+; X64-NEXT: vcvtsi2ssl %edi, %xmm0, %xmm0
; X64-NEXT: retq
%1 = ashr i64 %a0, 30
%2 = insertelement <2 x i64> undef, i64 %1, i32 0
; CHECK: # %bb.0:
; CHECK-NEXT: addpd {{.*}}(%rip), %xmm0
; CHECK-NEXT: cvtdq2pd %xmm0, %xmm0
-; CHECK-NEXT: mulpd {{.*}}(%rip), %xmm0
+; CHECK-NEXT: mulsd {{.*}}(%rip), %xmm0
; CHECK-NEXT: movq %xmm0, %rax
; CHECK-NEXT: retq
%1 = fadd <2 x double> %a, <double 0x4338000000000000, double 0x4338000000000000>
; SSE2-LABEL: test_eq_1:
; SSE2: # %bb.0:
; SSE2-NEXT: pcmpgtd %xmm0, %xmm1
-; SSE2-NEXT: pcmpeqd %xmm0, %xmm0
-; SSE2-NEXT: pxor %xmm1, %xmm0
-; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
; SSE2-NEXT: movd %xmm0, %eax
+; SSE2-NEXT: notl %eax
; SSE2-NEXT: retq
;
; SSE41-LABEL: test_eq_1:
; SSE41: # %bb.0:
; SSE41-NEXT: pcmpgtd %xmm0, %xmm1
-; SSE41-NEXT: pcmpeqd %xmm0, %xmm0
-; SSE41-NEXT: pxor %xmm1, %xmm0
-; SSE41-NEXT: pextrd $1, %xmm0, %eax
+; SSE41-NEXT: pextrd $1, %xmm1, %eax
+; SSE41-NEXT: notl %eax
; SSE41-NEXT: retq
%cmp = icmp slt <4 x i32> %A, %B
%sext = sext <4 x i1> %cmp to <4 x i32>
; SSE2-LABEL: test_ge_1:
; SSE2: # %bb.0:
; SSE2-NEXT: pcmpgtd %xmm0, %xmm1
-; SSE2-NEXT: pcmpeqd %xmm0, %xmm0
-; SSE2-NEXT: pxor %xmm1, %xmm0
-; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
; SSE2-NEXT: movd %xmm0, %eax
+; SSE2-NEXT: notl %eax
; SSE2-NEXT: retq
;
; SSE41-LABEL: test_ge_1:
; SSE41: # %bb.0:
; SSE41-NEXT: pcmpgtd %xmm0, %xmm1
-; SSE41-NEXT: pcmpeqd %xmm0, %xmm0
-; SSE41-NEXT: pxor %xmm1, %xmm0
-; SSE41-NEXT: pextrd $1, %xmm0, %eax
+; SSE41-NEXT: pextrd $1, %xmm1, %eax
+; SSE41-NEXT: notl %eax
; SSE41-NEXT: retq
%cmp = icmp slt <4 x i32> %A, %B
%sext = sext <4 x i1> %cmp to <4 x i32>
; SSE2-LABEL: test_eq_2:
; SSE2: # %bb.0:
; SSE2-NEXT: pcmpgtd %xmm1, %xmm0
-; SSE2-NEXT: pcmpeqd %xmm1, %xmm1
-; SSE2-NEXT: pxor %xmm0, %xmm1
-; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
; SSE2-NEXT: movd %xmm0, %eax
+; SSE2-NEXT: notl %eax
; SSE2-NEXT: retq
;
; SSE41-LABEL: test_eq_2:
; SSE41: # %bb.0:
; SSE41-NEXT: pcmpgtd %xmm1, %xmm0
-; SSE41-NEXT: pcmpeqd %xmm1, %xmm1
-; SSE41-NEXT: pxor %xmm0, %xmm1
-; SSE41-NEXT: pextrd $1, %xmm1, %eax
+; SSE41-NEXT: pextrd $1, %xmm0, %eax
+; SSE41-NEXT: notl %eax
; SSE41-NEXT: retq
%cmp = icmp slt <4 x i32> %B, %A
%sext = sext <4 x i1> %cmp to <4 x i32>
; SSE2-LABEL: test_le_2:
; SSE2: # %bb.0:
; SSE2-NEXT: pcmpgtd %xmm1, %xmm0
-; SSE2-NEXT: pcmpeqd %xmm1, %xmm1
-; SSE2-NEXT: pxor %xmm0, %xmm1
-; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
; SSE2-NEXT: movd %xmm0, %eax
+; SSE2-NEXT: notl %eax
; SSE2-NEXT: retq
;
; SSE41-LABEL: test_le_2:
; SSE41: # %bb.0:
; SSE41-NEXT: pcmpgtd %xmm1, %xmm0
-; SSE41-NEXT: pcmpeqd %xmm1, %xmm1
-; SSE41-NEXT: pxor %xmm0, %xmm1
-; SSE41-NEXT: pextrd $1, %xmm1, %eax
+; SSE41-NEXT: pextrd $1, %xmm0, %eax
+; SSE41-NEXT: notl %eax
; SSE41-NEXT: retq
%cmp = icmp slt <4 x i32> %B, %A
%sext = sext <4 x i1> %cmp to <4 x i32>
; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-SSE-NEXT: movdqa (%eax), %xmm5
; X86-SSE-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
-; X86-SSE-NEXT: movdqa (%ecx), %xmm3
+; X86-SSE-NEXT: movdqa (%ecx), %xmm2
; X86-SSE-NEXT: movdqa 16(%ecx), %xmm6
; X86-SSE-NEXT: pxor %xmm0, %xmm0
; X86-SSE-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
; X86-SSE-NEXT: xorl %edx, %edx
; X86-SSE-NEXT: divl %esi
; X86-SSE-NEXT: movd %edx, %xmm0
-; X86-SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm5[2,3,0,1]
-; X86-SSE-NEXT: movd %xmm2, %eax
-; X86-SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm6[2,3,0,1]
-; X86-SSE-NEXT: movd %xmm2, %esi
+; X86-SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm5[2,3,0,1]
+; X86-SSE-NEXT: movd %xmm3, %eax
+; X86-SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm6[2,3,0,1]
+; X86-SSE-NEXT: movd %xmm3, %esi
; X86-SSE-NEXT: xorl %edx, %edx
; X86-SSE-NEXT: divl %esi
; X86-SSE-NEXT: movd %edx, %xmm7
; X86-SSE-NEXT: movd %xmm6, %esi
; X86-SSE-NEXT: xorl %edx, %edx
; X86-SSE-NEXT: divl %esi
-; X86-SSE-NEXT: movd %edx, %xmm2
+; X86-SSE-NEXT: movd %edx, %xmm3
; X86-SSE-NEXT: pshufd {{.*#+}} xmm5 = xmm5[1,1,2,3]
; X86-SSE-NEXT: movd %xmm5, %eax
; X86-SSE-NEXT: pshufd {{.*#+}} xmm5 = xmm6[1,1,2,3]
; X86-SSE-NEXT: xorl %edx, %edx
; X86-SSE-NEXT: divl %esi
; X86-SSE-NEXT: movd %edx, %xmm5
-; X86-SSE-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm5[0],xmm2[1],xmm5[1]
-; X86-SSE-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm7[0]
+; X86-SSE-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm5[0],xmm3[1],xmm5[1]
+; X86-SSE-NEXT: punpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm7[0]
; X86-SSE-NEXT: pshufd {{.*#+}} xmm6 = xmm4[3,1,2,3]
; X86-SSE-NEXT: movd %xmm6, %eax
-; X86-SSE-NEXT: pshufd {{.*#+}} xmm6 = xmm3[3,1,2,3]
+; X86-SSE-NEXT: pshufd {{.*#+}} xmm6 = xmm2[3,1,2,3]
; X86-SSE-NEXT: movd %xmm6, %esi
; X86-SSE-NEXT: xorl %edx, %edx
; X86-SSE-NEXT: divl %esi
; X86-SSE-NEXT: movd %edx, %xmm6
; X86-SSE-NEXT: pshufd {{.*#+}} xmm7 = xmm4[2,3,0,1]
; X86-SSE-NEXT: movd %xmm7, %eax
-; X86-SSE-NEXT: pshufd {{.*#+}} xmm7 = xmm3[2,3,0,1]
+; X86-SSE-NEXT: pshufd {{.*#+}} xmm7 = xmm2[2,3,0,1]
; X86-SSE-NEXT: movd %xmm7, %esi
; X86-SSE-NEXT: xorl %edx, %edx
; X86-SSE-NEXT: divl %esi
; X86-SSE-NEXT: movd %edx, %xmm7
; X86-SSE-NEXT: punpckldq {{.*#+}} xmm7 = xmm7[0],xmm6[0],xmm7[1],xmm6[1]
; X86-SSE-NEXT: movd %xmm4, %eax
-; X86-SSE-NEXT: movd %xmm3, %esi
+; X86-SSE-NEXT: movd %xmm2, %esi
; X86-SSE-NEXT: xorl %edx, %edx
; X86-SSE-NEXT: divl %esi
; X86-SSE-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,1,2,3]
; X86-SSE-NEXT: movd %xmm4, %eax
; X86-SSE-NEXT: movd %edx, %xmm4
-; X86-SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,2,3]
-; X86-SSE-NEXT: movd %xmm3, %esi
+; X86-SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,2,3]
+; X86-SSE-NEXT: movd %xmm2, %esi
; X86-SSE-NEXT: xorl %edx, %edx
; X86-SSE-NEXT: divl %esi
-; X86-SSE-NEXT: movd %edx, %xmm3
-; X86-SSE-NEXT: punpckldq {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1]
+; X86-SSE-NEXT: movd %edx, %xmm2
+; X86-SSE-NEXT: punpckldq {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1]
; X86-SSE-NEXT: punpcklqdq {{.*#+}} xmm4 = xmm4[0],xmm7[0]
; X86-SSE-NEXT: movd %xmm1, %eax
-; X86-SSE-NEXT: shufps {{.*#+}} xmm3 = xmm3[0,0],xmm6[0,0]
+; X86-SSE-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,0],xmm6[0,0]
; X86-SSE-NEXT: movdqa {{.*#+}} xmm1 = [8199,8199,8199,8199]
; X86-SSE-NEXT: pmuludq %xmm1, %xmm4
; X86-SSE-NEXT: pshufd {{.*#+}} xmm4 = xmm4[0,2,2,3]
-; X86-SSE-NEXT: pmuludq %xmm1, %xmm3
-; X86-SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,2,2,3]
-; X86-SSE-NEXT: punpckldq {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1]
+; X86-SSE-NEXT: pmuludq %xmm1, %xmm2
+; X86-SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
+; X86-SSE-NEXT: punpckldq {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1]
; X86-SSE-NEXT: shufps {{.*#+}} xmm5 = xmm5[0,0],xmm0[0,0]
+; X86-SSE-NEXT: pmuludq %xmm1, %xmm3
+; X86-SSE-NEXT: pmuludq %xmm1, %xmm5
+; X86-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm3[0,2,2,3]
+; X86-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm5[0,2,2,3]
+; X86-SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; X86-SSE-NEXT: xorl %edx, %edx
; X86-SSE-NEXT: divl 32(%ecx)
-; X86-SSE-NEXT: pmuludq %xmm1, %xmm2
-; X86-SSE-NEXT: pmuludq %xmm1, %xmm5
-; X86-SSE-NEXT: movd %edx, %xmm0
-; X86-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm2[0,2,2,3]
-; X86-SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm5[0,2,2,3]
-; X86-SSE-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
-; X86-SSE-NEXT: movl $8199, %eax # imm = 0x2007
-; X86-SSE-NEXT: movd %eax, %xmm2
-; X86-SSE-NEXT: pmuludq %xmm0, %xmm2
-; X86-SSE-NEXT: movd %xmm2, (%eax)
-; X86-SSE-NEXT: movdqa %xmm1, (%eax)
+; X86-SSE-NEXT: movdqa %xmm0, (%eax)
; X86-SSE-NEXT: movdqa %xmm4, (%eax)
+; X86-SSE-NEXT: imull $8199, %edx, %eax # imm = 0x2007
+; X86-SSE-NEXT: movl %eax, (%eax)
; X86-SSE-NEXT: popl %esi
; X86-SSE-NEXT: retl
;
; X86-AVX1-NEXT: vpinsrd $1, (%esp), %xmm1, %xmm1 # 4-byte Folded Reload
; X86-AVX1-NEXT: vpinsrd $2, {{[-0-9]+}}(%e{{[sb]}}p), %xmm1, %xmm1 # 4-byte Folded Reload
; X86-AVX1-NEXT: vpinsrd $3, {{[-0-9]+}}(%e{{[sb]}}p), %xmm1, %xmm1 # 4-byte Folded Reload
-; X86-AVX1-NEXT: vmovd {{[-0-9]+}}(%e{{[sb]}}p), %xmm2 # 4-byte Folded Reload
-; X86-AVX1-NEXT: # xmm2 = mem[0],zero,zero,zero
-; X86-AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [8199,8199,8199,8199]
-; X86-AVX1-NEXT: vpmulld %xmm3, %xmm0, %xmm0
-; X86-AVX1-NEXT: vpmulld %xmm3, %xmm1, %xmm1
+; X86-AVX1-NEXT: imull $8199, {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-AVX1-NEXT: # imm = 0x2007
+; X86-AVX1-NEXT: movl %eax, (%eax)
+; X86-AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [8199,8199,8199,8199]
+; X86-AVX1-NEXT: vpmulld %xmm2, %xmm0, %xmm0
+; X86-AVX1-NEXT: vpmulld %xmm2, %xmm1, %xmm1
; X86-AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
-; X86-AVX1-NEXT: vpmulld %xmm3, %xmm2, %xmm1
-; X86-AVX1-NEXT: vmovd %xmm1, (%eax)
; X86-AVX1-NEXT: vmovaps %ymm0, (%eax)
; X86-AVX1-NEXT: addl $16, %esp
; X86-AVX1-NEXT: popl %esi
; X86-AVX2-NEXT: vmovd %xmm0, %eax
; X86-AVX2-NEXT: xorl %edx, %edx
; X86-AVX2-NEXT: divl 32(%esi)
-; X86-AVX2-NEXT: vmovd %edx, %xmm0
-; X86-AVX2-NEXT: vpbroadcastd {{.*#+}} ymm2 = [8199,8199,8199,8199,8199,8199,8199,8199]
-; X86-AVX2-NEXT: vpmulld %ymm2, %ymm1, %ymm1
-; X86-AVX2-NEXT: vpmulld %xmm2, %xmm0, %xmm0
-; X86-AVX2-NEXT: vmovd %xmm0, (%eax)
-; X86-AVX2-NEXT: vmovdqa %ymm1, (%eax)
+; X86-AVX2-NEXT: vpbroadcastd {{.*#+}} ymm0 = [8199,8199,8199,8199,8199,8199,8199,8199]
+; X86-AVX2-NEXT: vpmulld %ymm0, %ymm1, %ymm0
+; X86-AVX2-NEXT: imull $8199, %edx, %eax # imm = 0x2007
+; X86-AVX2-NEXT: movl %eax, (%eax)
+; X86-AVX2-NEXT: vmovdqa %ymm0, (%eax)
; X86-AVX2-NEXT: popl %esi
; X86-AVX2-NEXT: popl %edi
; X86-AVX2-NEXT: vzeroupper
; X64-SSE-NEXT: movdqa 16(%rsi), %xmm6
; X64-SSE-NEXT: pxor %xmm0, %xmm0
; X64-SSE-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
-; X64-SSE-NEXT: movdqa %xmm5, %xmm4
-; X64-SSE-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm0[0],xmm4[1],xmm0[1],xmm4[2],xmm0[2],xmm4[3],xmm0[3]
+; X64-SSE-NEXT: movdqa %xmm5, %xmm3
+; X64-SSE-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3]
; X64-SSE-NEXT: punpckhwd {{.*#+}} xmm5 = xmm5[4],xmm0[4],xmm5[5],xmm0[5],xmm5[6],xmm0[6],xmm5[7],xmm0[7]
; X64-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm5[3,1,2,3]
; X64-SSE-NEXT: movd %xmm0, %eax
; X64-SSE-NEXT: xorl %edx, %edx
; X64-SSE-NEXT: divl %ecx
; X64-SSE-NEXT: movd %edx, %xmm8
-; X64-SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm5[2,3,0,1]
-; X64-SSE-NEXT: movd %xmm3, %eax
-; X64-SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm6[2,3,0,1]
-; X64-SSE-NEXT: movd %xmm3, %ecx
+; X64-SSE-NEXT: pshufd {{.*#+}} xmm4 = xmm5[2,3,0,1]
+; X64-SSE-NEXT: movd %xmm4, %eax
+; X64-SSE-NEXT: pshufd {{.*#+}} xmm4 = xmm6[2,3,0,1]
+; X64-SSE-NEXT: movd %xmm4, %ecx
; X64-SSE-NEXT: xorl %edx, %edx
; X64-SSE-NEXT: divl %ecx
; X64-SSE-NEXT: movd %edx, %xmm7
; X64-SSE-NEXT: movd %xmm6, %ecx
; X64-SSE-NEXT: xorl %edx, %edx
; X64-SSE-NEXT: divl %ecx
-; X64-SSE-NEXT: movd %edx, %xmm3
+; X64-SSE-NEXT: movd %edx, %xmm4
; X64-SSE-NEXT: pshufd {{.*#+}} xmm5 = xmm5[1,1,2,3]
; X64-SSE-NEXT: movd %xmm5, %eax
; X64-SSE-NEXT: pshufd {{.*#+}} xmm5 = xmm6[1,1,2,3]
; X64-SSE-NEXT: xorl %edx, %edx
; X64-SSE-NEXT: divl %ecx
; X64-SSE-NEXT: movd %edx, %xmm5
-; X64-SSE-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm5[0],xmm3[1],xmm5[1]
-; X64-SSE-NEXT: punpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm7[0]
-; X64-SSE-NEXT: pshufd {{.*#+}} xmm6 = xmm4[3,1,2,3]
+; X64-SSE-NEXT: punpckldq {{.*#+}} xmm4 = xmm4[0],xmm5[0],xmm4[1],xmm5[1]
+; X64-SSE-NEXT: punpcklqdq {{.*#+}} xmm4 = xmm4[0],xmm7[0]
+; X64-SSE-NEXT: pshufd {{.*#+}} xmm6 = xmm3[3,1,2,3]
; X64-SSE-NEXT: movd %xmm6, %eax
; X64-SSE-NEXT: pshufd {{.*#+}} xmm6 = xmm2[3,1,2,3]
; X64-SSE-NEXT: movd %xmm6, %ecx
; X64-SSE-NEXT: xorl %edx, %edx
; X64-SSE-NEXT: divl %ecx
; X64-SSE-NEXT: movd %edx, %xmm6
-; X64-SSE-NEXT: pshufd {{.*#+}} xmm7 = xmm4[2,3,0,1]
+; X64-SSE-NEXT: pshufd {{.*#+}} xmm7 = xmm3[2,3,0,1]
; X64-SSE-NEXT: movd %xmm7, %eax
; X64-SSE-NEXT: pshufd {{.*#+}} xmm7 = xmm2[2,3,0,1]
; X64-SSE-NEXT: movd %xmm7, %ecx
; X64-SSE-NEXT: divl %ecx
; X64-SSE-NEXT: movd %edx, %xmm7
; X64-SSE-NEXT: punpckldq {{.*#+}} xmm7 = xmm7[0],xmm6[0],xmm7[1],xmm6[1]
-; X64-SSE-NEXT: movd %xmm4, %eax
+; X64-SSE-NEXT: movd %xmm3, %eax
; X64-SSE-NEXT: movd %xmm2, %ecx
; X64-SSE-NEXT: xorl %edx, %edx
; X64-SSE-NEXT: divl %ecx
; X64-SSE-NEXT: movd %edx, %xmm0
-; X64-SSE-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,1,2,3]
-; X64-SSE-NEXT: movd %xmm4, %eax
+; X64-SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,2,3]
+; X64-SSE-NEXT: movd %xmm3, %eax
; X64-SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,2,3]
; X64-SSE-NEXT: movd %xmm2, %ecx
; X64-SSE-NEXT: xorl %edx, %edx
; X64-SSE-NEXT: movd %xmm1, %eax
; X64-SSE-NEXT: xorl %edx, %edx
; X64-SSE-NEXT: divl 32(%rsi)
-; X64-SSE-NEXT: movd %edx, %xmm1
-; X64-SSE-NEXT: movdqa {{.*#+}} xmm4 = [8199,8199,8199,8199]
-; X64-SSE-NEXT: pmuludq %xmm4, %xmm0
+; X64-SSE-NEXT: movdqa {{.*#+}} xmm1 = [8199,8199,8199,8199]
+; X64-SSE-NEXT: pmuludq %xmm1, %xmm0
; X64-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
; X64-SSE-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,0],xmm6[0,0]
-; X64-SSE-NEXT: pmuludq %xmm4, %xmm2
+; X64-SSE-NEXT: pmuludq %xmm1, %xmm2
; X64-SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
; X64-SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
-; X64-SSE-NEXT: pmuludq %xmm4, %xmm3
-; X64-SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm3[0,2,2,3]
+; X64-SSE-NEXT: pmuludq %xmm1, %xmm4
+; X64-SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm4[0,2,2,3]
; X64-SSE-NEXT: shufps {{.*#+}} xmm5 = xmm5[0,0],xmm8[0,0]
-; X64-SSE-NEXT: pmuludq %xmm4, %xmm5
-; X64-SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm5[0,2,2,3]
-; X64-SSE-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
-; X64-SSE-NEXT: movl $8199, %eax # imm = 0x2007
-; X64-SSE-NEXT: movd %eax, %xmm3
-; X64-SSE-NEXT: pmuludq %xmm1, %xmm3
-; X64-SSE-NEXT: movd %xmm3, (%rax)
+; X64-SSE-NEXT: pmuludq %xmm1, %xmm5
+; X64-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm5[0,2,2,3]
+; X64-SSE-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
+; X64-SSE-NEXT: imull $8199, %edx, %eax # imm = 0x2007
+; X64-SSE-NEXT: movl %eax, (%rax)
; X64-SSE-NEXT: movdqa %xmm2, (%rax)
; X64-SSE-NEXT: movdqa %xmm0, (%rax)
; X64-SSE-NEXT: retq
; X64-AVX1-NEXT: vpinsrd $1, %r11d, %xmm2, %xmm2
; X64-AVX1-NEXT: vpinsrd $2, %r10d, %xmm2, %xmm2
; X64-AVX1-NEXT: vpinsrd $3, %r9d, %xmm2, %xmm2
-; X64-AVX1-NEXT: vpmulld %xmm1, %xmm2, %xmm2
-; X64-AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0
-; X64-AVX1-NEXT: vmovd %r8d, %xmm2
; X64-AVX1-NEXT: vpmulld %xmm1, %xmm2, %xmm1
-; X64-AVX1-NEXT: vmovd %xmm1, (%rax)
+; X64-AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; X64-AVX1-NEXT: imull $8199, %r8d, %eax # imm = 0x2007
+; X64-AVX1-NEXT: movl %eax, (%rax)
; X64-AVX1-NEXT: vmovaps %ymm0, (%rax)
; X64-AVX1-NEXT: popq %rbx
; X64-AVX1-NEXT: popq %rbp
; X64-AVX2-NEXT: vmovd %xmm0, %eax
; X64-AVX2-NEXT: xorl %edx, %edx
; X64-AVX2-NEXT: divl 32(%rsi)
-; X64-AVX2-NEXT: vmovd %edx, %xmm0
-; X64-AVX2-NEXT: vpbroadcastd {{.*#+}} ymm2 = [8199,8199,8199,8199,8199,8199,8199,8199]
-; X64-AVX2-NEXT: vpmulld %ymm2, %ymm1, %ymm1
-; X64-AVX2-NEXT: vpmulld %xmm2, %xmm0, %xmm0
-; X64-AVX2-NEXT: vmovd %xmm0, (%rax)
-; X64-AVX2-NEXT: vmovdqa %ymm1, (%rax)
+; X64-AVX2-NEXT: vpbroadcastd {{.*#+}} ymm0 = [8199,8199,8199,8199,8199,8199,8199,8199]
+; X64-AVX2-NEXT: vpmulld %ymm0, %ymm1, %ymm0
+; X64-AVX2-NEXT: imull $8199, %edx, %eax # imm = 0x2007
+; X64-AVX2-NEXT: movl %eax, (%rax)
+; X64-AVX2-NEXT: vmovdqa %ymm0, (%rax)
; X64-AVX2-NEXT: vzeroupper
; X64-AVX2-NEXT: retq
%a0 = load <9 x i16>, <9 x i16>* %p0, align 64
; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-SSE-NEXT: movdqa (%eax), %xmm5
; X86-SSE-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
-; X86-SSE-NEXT: movdqa (%ecx), %xmm3
+; X86-SSE-NEXT: movdqa (%ecx), %xmm2
; X86-SSE-NEXT: movdqa 16(%ecx), %xmm6
; X86-SSE-NEXT: pxor %xmm0, %xmm0
; X86-SSE-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
; X86-SSE-NEXT: xorl %edx, %edx
; X86-SSE-NEXT: divl %esi
; X86-SSE-NEXT: movd %edx, %xmm0
-; X86-SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm5[2,3,0,1]
-; X86-SSE-NEXT: movd %xmm2, %eax
-; X86-SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm6[2,3,0,1]
-; X86-SSE-NEXT: movd %xmm2, %esi
+; X86-SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm5[2,3,0,1]
+; X86-SSE-NEXT: movd %xmm3, %eax
+; X86-SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm6[2,3,0,1]
+; X86-SSE-NEXT: movd %xmm3, %esi
; X86-SSE-NEXT: xorl %edx, %edx
; X86-SSE-NEXT: divl %esi
; X86-SSE-NEXT: movd %edx, %xmm7
; X86-SSE-NEXT: movd %xmm6, %esi
; X86-SSE-NEXT: xorl %edx, %edx
; X86-SSE-NEXT: divl %esi
-; X86-SSE-NEXT: movd %edx, %xmm2
+; X86-SSE-NEXT: movd %edx, %xmm3
; X86-SSE-NEXT: pshufd {{.*#+}} xmm5 = xmm5[1,1,2,3]
; X86-SSE-NEXT: movd %xmm5, %eax
; X86-SSE-NEXT: pshufd {{.*#+}} xmm5 = xmm6[1,1,2,3]
; X86-SSE-NEXT: xorl %edx, %edx
; X86-SSE-NEXT: divl %esi
; X86-SSE-NEXT: movd %edx, %xmm5
-; X86-SSE-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm5[0],xmm2[1],xmm5[1]
-; X86-SSE-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm7[0]
+; X86-SSE-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm5[0],xmm3[1],xmm5[1]
+; X86-SSE-NEXT: punpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm7[0]
; X86-SSE-NEXT: pshufd {{.*#+}} xmm6 = xmm4[3,1,2,3]
; X86-SSE-NEXT: movd %xmm6, %eax
-; X86-SSE-NEXT: pshufd {{.*#+}} xmm6 = xmm3[3,1,2,3]
+; X86-SSE-NEXT: pshufd {{.*#+}} xmm6 = xmm2[3,1,2,3]
; X86-SSE-NEXT: movd %xmm6, %esi
; X86-SSE-NEXT: xorl %edx, %edx
; X86-SSE-NEXT: divl %esi
; X86-SSE-NEXT: movd %edx, %xmm6
; X86-SSE-NEXT: pshufd {{.*#+}} xmm7 = xmm4[2,3,0,1]
; X86-SSE-NEXT: movd %xmm7, %eax
-; X86-SSE-NEXT: pshufd {{.*#+}} xmm7 = xmm3[2,3,0,1]
+; X86-SSE-NEXT: pshufd {{.*#+}} xmm7 = xmm2[2,3,0,1]
; X86-SSE-NEXT: movd %xmm7, %esi
; X86-SSE-NEXT: xorl %edx, %edx
; X86-SSE-NEXT: divl %esi
; X86-SSE-NEXT: movd %edx, %xmm7
; X86-SSE-NEXT: punpckldq {{.*#+}} xmm7 = xmm7[0],xmm6[0],xmm7[1],xmm6[1]
; X86-SSE-NEXT: movd %xmm4, %eax
-; X86-SSE-NEXT: movd %xmm3, %esi
+; X86-SSE-NEXT: movd %xmm2, %esi
; X86-SSE-NEXT: xorl %edx, %edx
; X86-SSE-NEXT: divl %esi
; X86-SSE-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,1,2,3]
; X86-SSE-NEXT: movd %xmm4, %eax
; X86-SSE-NEXT: movd %edx, %xmm4
-; X86-SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,2,3]
-; X86-SSE-NEXT: movd %xmm3, %esi
+; X86-SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,2,3]
+; X86-SSE-NEXT: movd %xmm2, %esi
; X86-SSE-NEXT: xorl %edx, %edx
; X86-SSE-NEXT: divl %esi
-; X86-SSE-NEXT: movd %edx, %xmm3
-; X86-SSE-NEXT: punpckldq {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1]
+; X86-SSE-NEXT: movd %edx, %xmm2
+; X86-SSE-NEXT: punpckldq {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1]
; X86-SSE-NEXT: punpcklqdq {{.*#+}} xmm4 = xmm4[0],xmm7[0]
; X86-SSE-NEXT: movd %xmm1, %eax
-; X86-SSE-NEXT: shufps {{.*#+}} xmm3 = xmm3[0,0],xmm6[0,0]
+; X86-SSE-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,0],xmm6[0,0]
; X86-SSE-NEXT: movdqa {{.*#+}} xmm1 = [8199,8199,8199,8199]
; X86-SSE-NEXT: pmuludq %xmm1, %xmm4
; X86-SSE-NEXT: pshufd {{.*#+}} xmm4 = xmm4[0,2,2,3]
-; X86-SSE-NEXT: pmuludq %xmm1, %xmm3
-; X86-SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,2,2,3]
-; X86-SSE-NEXT: punpckldq {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1]
+; X86-SSE-NEXT: pmuludq %xmm1, %xmm2
+; X86-SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
+; X86-SSE-NEXT: punpckldq {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1]
; X86-SSE-NEXT: shufps {{.*#+}} xmm5 = xmm5[0,0],xmm0[0,0]
+; X86-SSE-NEXT: pmuludq %xmm1, %xmm3
+; X86-SSE-NEXT: pmuludq %xmm1, %xmm5
+; X86-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm3[0,2,2,3]
+; X86-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm5[0,2,2,3]
+; X86-SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; X86-SSE-NEXT: xorl %edx, %edx
; X86-SSE-NEXT: divl 32(%ecx)
-; X86-SSE-NEXT: pmuludq %xmm1, %xmm2
-; X86-SSE-NEXT: pmuludq %xmm1, %xmm5
-; X86-SSE-NEXT: movd %edx, %xmm0
-; X86-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm2[0,2,2,3]
-; X86-SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm5[0,2,2,3]
-; X86-SSE-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
-; X86-SSE-NEXT: movl $8199, %eax # imm = 0x2007
-; X86-SSE-NEXT: movd %eax, %xmm2
-; X86-SSE-NEXT: pmuludq %xmm0, %xmm2
-; X86-SSE-NEXT: movd %xmm2, (%eax)
-; X86-SSE-NEXT: movdqa %xmm1, (%eax)
+; X86-SSE-NEXT: movdqa %xmm0, (%eax)
; X86-SSE-NEXT: movdqa %xmm4, (%eax)
+; X86-SSE-NEXT: imull $8199, %edx, %eax # imm = 0x2007
+; X86-SSE-NEXT: movl %eax, (%eax)
; X86-SSE-NEXT: popl %esi
; X86-SSE-NEXT: retl
;
; X86-AVX1-NEXT: vpinsrd $1, (%esp), %xmm1, %xmm1 # 4-byte Folded Reload
; X86-AVX1-NEXT: vpinsrd $2, {{[-0-9]+}}(%e{{[sb]}}p), %xmm1, %xmm1 # 4-byte Folded Reload
; X86-AVX1-NEXT: vpinsrd $3, {{[-0-9]+}}(%e{{[sb]}}p), %xmm1, %xmm1 # 4-byte Folded Reload
-; X86-AVX1-NEXT: vmovd {{[-0-9]+}}(%e{{[sb]}}p), %xmm2 # 4-byte Folded Reload
-; X86-AVX1-NEXT: # xmm2 = mem[0],zero,zero,zero
-; X86-AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [8199,8199,8199,8199]
-; X86-AVX1-NEXT: vpmulld %xmm3, %xmm0, %xmm0
-; X86-AVX1-NEXT: vpmulld %xmm3, %xmm1, %xmm1
+; X86-AVX1-NEXT: imull $8199, {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-AVX1-NEXT: # imm = 0x2007
+; X86-AVX1-NEXT: movl %eax, (%eax)
+; X86-AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [8199,8199,8199,8199]
+; X86-AVX1-NEXT: vpmulld %xmm2, %xmm0, %xmm0
+; X86-AVX1-NEXT: vpmulld %xmm2, %xmm1, %xmm1
; X86-AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
-; X86-AVX1-NEXT: vpmulld %xmm3, %xmm2, %xmm1
-; X86-AVX1-NEXT: vmovd %xmm1, (%eax)
; X86-AVX1-NEXT: vmovaps %ymm0, (%eax)
; X86-AVX1-NEXT: addl $16, %esp
; X86-AVX1-NEXT: popl %esi
; X86-AVX2-NEXT: vmovd %xmm0, %eax
; X86-AVX2-NEXT: xorl %edx, %edx
; X86-AVX2-NEXT: divl 32(%esi)
-; X86-AVX2-NEXT: vmovd %edx, %xmm0
-; X86-AVX2-NEXT: vpbroadcastd {{.*#+}} ymm2 = [8199,8199,8199,8199,8199,8199,8199,8199]
-; X86-AVX2-NEXT: vpmulld %ymm2, %ymm1, %ymm1
-; X86-AVX2-NEXT: vpmulld %xmm2, %xmm0, %xmm0
-; X86-AVX2-NEXT: vmovd %xmm0, (%eax)
-; X86-AVX2-NEXT: vmovdqa %ymm1, (%eax)
+; X86-AVX2-NEXT: vpbroadcastd {{.*#+}} ymm0 = [8199,8199,8199,8199,8199,8199,8199,8199]
+; X86-AVX2-NEXT: vpmulld %ymm0, %ymm1, %ymm0
+; X86-AVX2-NEXT: imull $8199, %edx, %eax # imm = 0x2007
+; X86-AVX2-NEXT: movl %eax, (%eax)
+; X86-AVX2-NEXT: vmovdqa %ymm0, (%eax)
; X86-AVX2-NEXT: popl %esi
; X86-AVX2-NEXT: popl %edi
; X86-AVX2-NEXT: vzeroupper
; X64-SSE-NEXT: movdqa 16(%rsi), %xmm6
; X64-SSE-NEXT: pxor %xmm0, %xmm0
; X64-SSE-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
-; X64-SSE-NEXT: movdqa %xmm5, %xmm4
-; X64-SSE-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm0[0],xmm4[1],xmm0[1],xmm4[2],xmm0[2],xmm4[3],xmm0[3]
+; X64-SSE-NEXT: movdqa %xmm5, %xmm3
+; X64-SSE-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3]
; X64-SSE-NEXT: punpckhwd {{.*#+}} xmm5 = xmm5[4],xmm0[4],xmm5[5],xmm0[5],xmm5[6],xmm0[6],xmm5[7],xmm0[7]
; X64-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm5[3,1,2,3]
; X64-SSE-NEXT: movd %xmm0, %eax
; X64-SSE-NEXT: xorl %edx, %edx
; X64-SSE-NEXT: divl %ecx
; X64-SSE-NEXT: movd %edx, %xmm8
-; X64-SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm5[2,3,0,1]
-; X64-SSE-NEXT: movd %xmm3, %eax
-; X64-SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm6[2,3,0,1]
-; X64-SSE-NEXT: movd %xmm3, %ecx
+; X64-SSE-NEXT: pshufd {{.*#+}} xmm4 = xmm5[2,3,0,1]
+; X64-SSE-NEXT: movd %xmm4, %eax
+; X64-SSE-NEXT: pshufd {{.*#+}} xmm4 = xmm6[2,3,0,1]
+; X64-SSE-NEXT: movd %xmm4, %ecx
; X64-SSE-NEXT: xorl %edx, %edx
; X64-SSE-NEXT: divl %ecx
; X64-SSE-NEXT: movd %edx, %xmm7
; X64-SSE-NEXT: movd %xmm6, %ecx
; X64-SSE-NEXT: xorl %edx, %edx
; X64-SSE-NEXT: divl %ecx
-; X64-SSE-NEXT: movd %edx, %xmm3
+; X64-SSE-NEXT: movd %edx, %xmm4
; X64-SSE-NEXT: pshufd {{.*#+}} xmm5 = xmm5[1,1,2,3]
; X64-SSE-NEXT: movd %xmm5, %eax
; X64-SSE-NEXT: pshufd {{.*#+}} xmm5 = xmm6[1,1,2,3]
; X64-SSE-NEXT: xorl %edx, %edx
; X64-SSE-NEXT: divl %ecx
; X64-SSE-NEXT: movd %edx, %xmm5
-; X64-SSE-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm5[0],xmm3[1],xmm5[1]
-; X64-SSE-NEXT: punpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm7[0]
-; X64-SSE-NEXT: pshufd {{.*#+}} xmm6 = xmm4[3,1,2,3]
+; X64-SSE-NEXT: punpckldq {{.*#+}} xmm4 = xmm4[0],xmm5[0],xmm4[1],xmm5[1]
+; X64-SSE-NEXT: punpcklqdq {{.*#+}} xmm4 = xmm4[0],xmm7[0]
+; X64-SSE-NEXT: pshufd {{.*#+}} xmm6 = xmm3[3,1,2,3]
; X64-SSE-NEXT: movd %xmm6, %eax
; X64-SSE-NEXT: pshufd {{.*#+}} xmm6 = xmm2[3,1,2,3]
; X64-SSE-NEXT: movd %xmm6, %ecx
; X64-SSE-NEXT: xorl %edx, %edx
; X64-SSE-NEXT: divl %ecx
; X64-SSE-NEXT: movd %edx, %xmm6
-; X64-SSE-NEXT: pshufd {{.*#+}} xmm7 = xmm4[2,3,0,1]
+; X64-SSE-NEXT: pshufd {{.*#+}} xmm7 = xmm3[2,3,0,1]
; X64-SSE-NEXT: movd %xmm7, %eax
; X64-SSE-NEXT: pshufd {{.*#+}} xmm7 = xmm2[2,3,0,1]
; X64-SSE-NEXT: movd %xmm7, %ecx
; X64-SSE-NEXT: divl %ecx
; X64-SSE-NEXT: movd %edx, %xmm7
; X64-SSE-NEXT: punpckldq {{.*#+}} xmm7 = xmm7[0],xmm6[0],xmm7[1],xmm6[1]
-; X64-SSE-NEXT: movd %xmm4, %eax
+; X64-SSE-NEXT: movd %xmm3, %eax
; X64-SSE-NEXT: movd %xmm2, %ecx
; X64-SSE-NEXT: xorl %edx, %edx
; X64-SSE-NEXT: divl %ecx
; X64-SSE-NEXT: movd %edx, %xmm0
-; X64-SSE-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,1,2,3]
-; X64-SSE-NEXT: movd %xmm4, %eax
+; X64-SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,2,3]
+; X64-SSE-NEXT: movd %xmm3, %eax
; X64-SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,2,3]
; X64-SSE-NEXT: movd %xmm2, %ecx
; X64-SSE-NEXT: xorl %edx, %edx
; X64-SSE-NEXT: movd %xmm1, %eax
; X64-SSE-NEXT: xorl %edx, %edx
; X64-SSE-NEXT: divl 32(%rsi)
-; X64-SSE-NEXT: movd %edx, %xmm1
-; X64-SSE-NEXT: movdqa {{.*#+}} xmm4 = [8199,8199,8199,8199]
-; X64-SSE-NEXT: pmuludq %xmm4, %xmm0
+; X64-SSE-NEXT: movdqa {{.*#+}} xmm1 = [8199,8199,8199,8199]
+; X64-SSE-NEXT: pmuludq %xmm1, %xmm0
; X64-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
; X64-SSE-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,0],xmm6[0,0]
-; X64-SSE-NEXT: pmuludq %xmm4, %xmm2
+; X64-SSE-NEXT: pmuludq %xmm1, %xmm2
; X64-SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
; X64-SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
-; X64-SSE-NEXT: pmuludq %xmm4, %xmm3
-; X64-SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm3[0,2,2,3]
+; X64-SSE-NEXT: pmuludq %xmm1, %xmm4
+; X64-SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm4[0,2,2,3]
; X64-SSE-NEXT: shufps {{.*#+}} xmm5 = xmm5[0,0],xmm8[0,0]
-; X64-SSE-NEXT: pmuludq %xmm4, %xmm5
-; X64-SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm5[0,2,2,3]
-; X64-SSE-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
-; X64-SSE-NEXT: movl $8199, %eax # imm = 0x2007
-; X64-SSE-NEXT: movd %eax, %xmm3
-; X64-SSE-NEXT: pmuludq %xmm1, %xmm3
-; X64-SSE-NEXT: movd %xmm3, (%rax)
+; X64-SSE-NEXT: pmuludq %xmm1, %xmm5
+; X64-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm5[0,2,2,3]
+; X64-SSE-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
+; X64-SSE-NEXT: imull $8199, %edx, %eax # imm = 0x2007
+; X64-SSE-NEXT: movl %eax, (%rax)
; X64-SSE-NEXT: movdqa %xmm2, (%rax)
; X64-SSE-NEXT: movdqa %xmm0, (%rax)
; X64-SSE-NEXT: retq
; X64-AVX1-NEXT: vpinsrd $1, %r11d, %xmm2, %xmm2
; X64-AVX1-NEXT: vpinsrd $2, %r10d, %xmm2, %xmm2
; X64-AVX1-NEXT: vpinsrd $3, %r9d, %xmm2, %xmm2
-; X64-AVX1-NEXT: vpmulld %xmm1, %xmm2, %xmm2
-; X64-AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0
-; X64-AVX1-NEXT: vmovd %r8d, %xmm2
; X64-AVX1-NEXT: vpmulld %xmm1, %xmm2, %xmm1
-; X64-AVX1-NEXT: vmovd %xmm1, (%rax)
+; X64-AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; X64-AVX1-NEXT: imull $8199, %r8d, %eax # imm = 0x2007
+; X64-AVX1-NEXT: movl %eax, (%rax)
; X64-AVX1-NEXT: vmovaps %ymm0, (%rax)
; X64-AVX1-NEXT: popq %rbx
; X64-AVX1-NEXT: popq %rbp
; X64-AVX2-NEXT: vmovd %xmm0, %eax
; X64-AVX2-NEXT: xorl %edx, %edx
; X64-AVX2-NEXT: divl 32(%rsi)
-; X64-AVX2-NEXT: vmovd %edx, %xmm0
-; X64-AVX2-NEXT: vpbroadcastd {{.*#+}} ymm2 = [8199,8199,8199,8199,8199,8199,8199,8199]
-; X64-AVX2-NEXT: vpmulld %ymm2, %ymm1, %ymm1
-; X64-AVX2-NEXT: vpmulld %xmm2, %xmm0, %xmm0
-; X64-AVX2-NEXT: vmovd %xmm0, (%rax)
-; X64-AVX2-NEXT: vmovdqa %ymm1, (%rax)
+; X64-AVX2-NEXT: vpbroadcastd {{.*#+}} ymm0 = [8199,8199,8199,8199,8199,8199,8199,8199]
+; X64-AVX2-NEXT: vpmulld %ymm0, %ymm1, %ymm0
+; X64-AVX2-NEXT: imull $8199, %edx, %eax # imm = 0x2007
+; X64-AVX2-NEXT: movl %eax, (%rax)
+; X64-AVX2-NEXT: vmovdqa %ymm0, (%rax)
; X64-AVX2-NEXT: vzeroupper
; X64-AVX2-NEXT: retq
%a0 = load <9 x i16>, <9 x i16>* %p0, align 64
; SSE2-NEXT: movss {{.*#+}} xmm4 = xmm2[0],xmm4[1,2,3]
; SSE2-NEXT: movdqa %xmm0, %xmm5
; SSE2-NEXT: pslld %xmm4, %xmm5
-; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [32,32,32,32]
-; SSE2-NEXT: psubd %xmm2, %xmm4
-; SSE2-NEXT: pcmpeqd %xmm3, %xmm2
-; SSE2-NEXT: movss {{.*#+}} xmm3 = xmm4[0],xmm3[1,2,3]
-; SSE2-NEXT: psrld %xmm3, %xmm1
+; SSE2-NEXT: movd %xmm2, %eax
+; SSE2-NEXT: movl $32, %ecx
+; SSE2-NEXT: subl %eax, %ecx
+; SSE2-NEXT: movd %ecx, %xmm4
+; SSE2-NEXT: psrld %xmm4, %xmm1
; SSE2-NEXT: por %xmm5, %xmm1
+; SSE2-NEXT: pcmpeqd %xmm3, %xmm2
; SSE2-NEXT: pand %xmm2, %xmm0
; SSE2-NEXT: pandn %xmm1, %xmm2
; SSE2-NEXT: por %xmm2, %xmm0
; X32-SSE-NEXT: movss {{.*#+}} xmm4 = xmm2[0],xmm4[1,2,3]
; X32-SSE-NEXT: movdqa %xmm0, %xmm5
; X32-SSE-NEXT: pslld %xmm4, %xmm5
-; X32-SSE-NEXT: movdqa {{.*#+}} xmm4 = [32,32,32,32]
-; X32-SSE-NEXT: psubd %xmm2, %xmm4
-; X32-SSE-NEXT: pcmpeqd %xmm3, %xmm2
-; X32-SSE-NEXT: movss {{.*#+}} xmm3 = xmm4[0],xmm3[1,2,3]
-; X32-SSE-NEXT: psrld %xmm3, %xmm1
+; X32-SSE-NEXT: movd %xmm2, %eax
+; X32-SSE-NEXT: movl $32, %ecx
+; X32-SSE-NEXT: subl %eax, %ecx
+; X32-SSE-NEXT: movd %ecx, %xmm4
+; X32-SSE-NEXT: psrld %xmm4, %xmm1
; X32-SSE-NEXT: por %xmm5, %xmm1
+; X32-SSE-NEXT: pcmpeqd %xmm3, %xmm2
; X32-SSE-NEXT: pand %xmm2, %xmm0
; X32-SSE-NEXT: pandn %xmm1, %xmm2
; X32-SSE-NEXT: por %xmm2, %xmm0
define <4 x i32> @splatvar_funnnel_v4i32(<4 x i32> %x, <4 x i32> %amt) nounwind {
; SSE2-LABEL: splatvar_funnnel_v4i32:
; SSE2: # %bb.0:
-; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,0,0]
-; SSE2-NEXT: pand {{.*}}(%rip), %xmm1
-; SSE2-NEXT: xorps %xmm2, %xmm2
-; SSE2-NEXT: xorps %xmm3, %xmm3
-; SSE2-NEXT: movss {{.*#+}} xmm3 = xmm1[0],xmm3[1,2,3]
-; SSE2-NEXT: movdqa %xmm0, %xmm4
-; SSE2-NEXT: pslld %xmm3, %xmm4
-; SSE2-NEXT: movdqa {{.*#+}} xmm3 = [32,32,32,32]
-; SSE2-NEXT: psubd %xmm1, %xmm3
-; SSE2-NEXT: movss {{.*#+}} xmm2 = xmm3[0],xmm2[1,2,3]
-; SSE2-NEXT: psrld %xmm2, %xmm0
-; SSE2-NEXT: por %xmm4, %xmm0
+; SSE2-NEXT: movd %xmm1, %eax
+; SSE2-NEXT: andl $31, %eax
+; SSE2-NEXT: movd %eax, %xmm1
+; SSE2-NEXT: movdqa %xmm0, %xmm2
+; SSE2-NEXT: pslld %xmm1, %xmm2
+; SSE2-NEXT: movl $32, %ecx
+; SSE2-NEXT: subl %eax, %ecx
+; SSE2-NEXT: movd %ecx, %xmm1
+; SSE2-NEXT: psrld %xmm1, %xmm0
+; SSE2-NEXT: por %xmm2, %xmm0
; SSE2-NEXT: retq
;
; SSE41-LABEL: splatvar_funnnel_v4i32:
;
; X32-SSE-LABEL: splatvar_funnnel_v4i32:
; X32-SSE: # %bb.0:
-; X32-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,0,0]
-; X32-SSE-NEXT: pand {{\.LCPI.*}}, %xmm1
-; X32-SSE-NEXT: xorps %xmm2, %xmm2
-; X32-SSE-NEXT: xorps %xmm3, %xmm3
-; X32-SSE-NEXT: movss {{.*#+}} xmm3 = xmm1[0],xmm3[1,2,3]
-; X32-SSE-NEXT: movdqa %xmm0, %xmm4
-; X32-SSE-NEXT: pslld %xmm3, %xmm4
-; X32-SSE-NEXT: movdqa {{.*#+}} xmm3 = [32,32,32,32]
-; X32-SSE-NEXT: psubd %xmm1, %xmm3
-; X32-SSE-NEXT: movss {{.*#+}} xmm2 = xmm3[0],xmm2[1,2,3]
-; X32-SSE-NEXT: psrld %xmm2, %xmm0
-; X32-SSE-NEXT: por %xmm4, %xmm0
+; X32-SSE-NEXT: movd %xmm1, %eax
+; X32-SSE-NEXT: andl $31, %eax
+; X32-SSE-NEXT: movd %eax, %xmm1
+; X32-SSE-NEXT: movdqa %xmm0, %xmm2
+; X32-SSE-NEXT: pslld %xmm1, %xmm2
+; X32-SSE-NEXT: movl $32, %ecx
+; X32-SSE-NEXT: subl %eax, %ecx
+; X32-SSE-NEXT: movd %ecx, %xmm1
+; X32-SSE-NEXT: psrld %xmm1, %xmm0
+; X32-SSE-NEXT: por %xmm2, %xmm0
; X32-SSE-NEXT: retl
%splat = shufflevector <4 x i32> %amt, <4 x i32> undef, <4 x i32> zeroinitializer
%res = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %x, <4 x i32> %x, <4 x i32> %splat)
; SSE2-NEXT: movss {{.*#+}} xmm4 = xmm2[0],xmm4[1,2,3]
; SSE2-NEXT: movdqa %xmm1, %xmm5
; SSE2-NEXT: psrld %xmm4, %xmm5
-; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [32,32,32,32]
-; SSE2-NEXT: psubd %xmm2, %xmm4
-; SSE2-NEXT: pcmpeqd %xmm3, %xmm2
-; SSE2-NEXT: movss {{.*#+}} xmm3 = xmm4[0],xmm3[1,2,3]
-; SSE2-NEXT: pslld %xmm3, %xmm0
+; SSE2-NEXT: movd %xmm2, %eax
+; SSE2-NEXT: movl $32, %ecx
+; SSE2-NEXT: subl %eax, %ecx
+; SSE2-NEXT: movd %ecx, %xmm4
+; SSE2-NEXT: pslld %xmm4, %xmm0
; SSE2-NEXT: por %xmm5, %xmm0
+; SSE2-NEXT: pcmpeqd %xmm3, %xmm2
; SSE2-NEXT: pand %xmm2, %xmm1
; SSE2-NEXT: pandn %xmm0, %xmm2
; SSE2-NEXT: por %xmm1, %xmm2
; X32-SSE-NEXT: movss {{.*#+}} xmm4 = xmm2[0],xmm4[1,2,3]
; X32-SSE-NEXT: movdqa %xmm1, %xmm5
; X32-SSE-NEXT: psrld %xmm4, %xmm5
-; X32-SSE-NEXT: movdqa {{.*#+}} xmm4 = [32,32,32,32]
-; X32-SSE-NEXT: psubd %xmm2, %xmm4
-; X32-SSE-NEXT: pcmpeqd %xmm3, %xmm2
-; X32-SSE-NEXT: movss {{.*#+}} xmm3 = xmm4[0],xmm3[1,2,3]
-; X32-SSE-NEXT: pslld %xmm3, %xmm0
+; X32-SSE-NEXT: movd %xmm2, %eax
+; X32-SSE-NEXT: movl $32, %ecx
+; X32-SSE-NEXT: subl %eax, %ecx
+; X32-SSE-NEXT: movd %ecx, %xmm4
+; X32-SSE-NEXT: pslld %xmm4, %xmm0
; X32-SSE-NEXT: por %xmm5, %xmm0
+; X32-SSE-NEXT: pcmpeqd %xmm3, %xmm2
; X32-SSE-NEXT: pand %xmm2, %xmm1
; X32-SSE-NEXT: pandn %xmm0, %xmm2
; X32-SSE-NEXT: por %xmm1, %xmm2
define <4 x i32> @splatvar_funnnel_v4i32(<4 x i32> %x, <4 x i32> %amt) nounwind {
; SSE2-LABEL: splatvar_funnnel_v4i32:
; SSE2: # %bb.0:
-; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,0,0]
-; SSE2-NEXT: xorps %xmm2, %xmm2
-; SSE2-NEXT: pxor %xmm3, %xmm3
-; SSE2-NEXT: psubd %xmm1, %xmm3
-; SSE2-NEXT: pand {{.*}}(%rip), %xmm3
-; SSE2-NEXT: pxor %xmm1, %xmm1
-; SSE2-NEXT: movss {{.*#+}} xmm1 = xmm3[0],xmm1[1,2,3]
-; SSE2-NEXT: movdqa %xmm0, %xmm4
-; SSE2-NEXT: pslld %xmm1, %xmm4
-; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [32,32,32,32]
-; SSE2-NEXT: psubd %xmm3, %xmm1
-; SSE2-NEXT: movss {{.*#+}} xmm2 = xmm1[0],xmm2[1,2,3]
-; SSE2-NEXT: psrld %xmm2, %xmm0
-; SSE2-NEXT: por %xmm4, %xmm0
+; SSE2-NEXT: movd %xmm1, %eax
+; SSE2-NEXT: negl %eax
+; SSE2-NEXT: andl $31, %eax
+; SSE2-NEXT: movd %eax, %xmm1
+; SSE2-NEXT: movdqa %xmm0, %xmm2
+; SSE2-NEXT: pslld %xmm1, %xmm2
+; SSE2-NEXT: movl $32, %ecx
+; SSE2-NEXT: subl %eax, %ecx
+; SSE2-NEXT: movd %ecx, %xmm1
+; SSE2-NEXT: psrld %xmm1, %xmm0
+; SSE2-NEXT: por %xmm2, %xmm0
; SSE2-NEXT: retq
;
; SSE41-LABEL: splatvar_funnnel_v4i32:
;
; X32-SSE-LABEL: splatvar_funnnel_v4i32:
; X32-SSE: # %bb.0:
-; X32-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,0,0]
-; X32-SSE-NEXT: xorps %xmm2, %xmm2
-; X32-SSE-NEXT: pxor %xmm3, %xmm3
-; X32-SSE-NEXT: psubd %xmm1, %xmm3
-; X32-SSE-NEXT: pand {{\.LCPI.*}}, %xmm3
-; X32-SSE-NEXT: pxor %xmm1, %xmm1
-; X32-SSE-NEXT: movss {{.*#+}} xmm1 = xmm3[0],xmm1[1,2,3]
-; X32-SSE-NEXT: movdqa %xmm0, %xmm4
-; X32-SSE-NEXT: pslld %xmm1, %xmm4
-; X32-SSE-NEXT: movdqa {{.*#+}} xmm1 = [32,32,32,32]
-; X32-SSE-NEXT: psubd %xmm3, %xmm1
-; X32-SSE-NEXT: movss {{.*#+}} xmm2 = xmm1[0],xmm2[1,2,3]
-; X32-SSE-NEXT: psrld %xmm2, %xmm0
-; X32-SSE-NEXT: por %xmm4, %xmm0
+; X32-SSE-NEXT: movd %xmm1, %eax
+; X32-SSE-NEXT: negl %eax
+; X32-SSE-NEXT: andl $31, %eax
+; X32-SSE-NEXT: movd %eax, %xmm1
+; X32-SSE-NEXT: movdqa %xmm0, %xmm2
+; X32-SSE-NEXT: pslld %xmm1, %xmm2
+; X32-SSE-NEXT: movl $32, %ecx
+; X32-SSE-NEXT: subl %eax, %ecx
+; X32-SSE-NEXT: movd %ecx, %xmm1
+; X32-SSE-NEXT: psrld %xmm1, %xmm0
+; X32-SSE-NEXT: por %xmm2, %xmm0
; X32-SSE-NEXT: retl
%splat = shufflevector <4 x i32> %amt, <4 x i32> undef, <4 x i32> zeroinitializer
%res = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %x, <4 x i32> %x, <4 x i32> %splat)
define i32 @AGEP1(<4 x i32*> %param) nounwind {
; CHECK-LABEL: AGEP1:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpaddd {{\.LCPI.*}}, %xmm0, %xmm0
-; CHECK-NEXT: vpextrd $3, %xmm0, %eax
-; CHECK-NEXT: movl (%eax), %eax
+; CHECK-NEXT: vextractps $3, %xmm0, %eax
+; CHECK-NEXT: movl 16(%eax), %eax
; CHECK-NEXT: retl
%A2 = getelementptr i32, <4 x i32*> %param, <4 x i32> <i32 1, i32 2, i32 3, i32 4>
%k = extractelement <4 x i32*> %A2, i32 3
;
; SSE41-LABEL: test_v8i16:
; SSE41: # %bb.0:
-; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [32767,32767,32767,32767,32767,32767,32767,32767]
-; SSE41-NEXT: pxor %xmm1, %xmm0
+; SSE41-NEXT: pxor {{.*}}(%rip), %xmm0
; SSE41-NEXT: phminposuw %xmm0, %xmm0
-; SSE41-NEXT: pxor %xmm1, %xmm0
; SSE41-NEXT: movd %xmm0, %eax
+; SSE41-NEXT: xorl $32767, %eax # imm = 0x7FFF
; SSE41-NEXT: # kill: def $ax killed $ax killed $eax
; SSE41-NEXT: retq
;
; AVX-LABEL: test_v8i16:
; AVX: # %bb.0:
-; AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [32767,32767,32767,32767,32767,32767,32767,32767]
-; AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
; AVX-NEXT: vphminposuw %xmm0, %xmm0
-; AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX-NEXT: vmovd %xmm0, %eax
+; AVX-NEXT: xorl $32767, %eax # imm = 0x7FFF
; AVX-NEXT: # kill: def $ax killed $ax killed $eax
; AVX-NEXT: retq
;
; AVX512-LABEL: test_v8i16:
; AVX512: # %bb.0:
-; AVX512-NEXT: vmovdqa {{.*#+}} xmm1 = [32767,32767,32767,32767,32767,32767,32767,32767]
-; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; AVX512-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
; AVX512-NEXT: vphminposuw %xmm0, %xmm0
-; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vmovd %xmm0, %eax
+; AVX512-NEXT: xorl $32767, %eax # imm = 0x7FFF
; AVX512-NEXT: # kill: def $ax killed $ax killed $eax
; AVX512-NEXT: retq
%1 = call i16 @llvm.experimental.vector.reduce.smax.i16.v8i16(<8 x i16> %a0)
; SSE41-LABEL: test_v16i16:
; SSE41: # %bb.0:
; SSE41-NEXT: pmaxsw %xmm1, %xmm0
-; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [32767,32767,32767,32767,32767,32767,32767,32767]
-; SSE41-NEXT: pxor %xmm1, %xmm0
+; SSE41-NEXT: pxor {{.*}}(%rip), %xmm0
; SSE41-NEXT: phminposuw %xmm0, %xmm0
-; SSE41-NEXT: pxor %xmm1, %xmm0
; SSE41-NEXT: movd %xmm0, %eax
+; SSE41-NEXT: xorl $32767, %eax # imm = 0x7FFF
; SSE41-NEXT: # kill: def $ax killed $ax killed $eax
; SSE41-NEXT: retq
;
; AVX1: # %bb.0:
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX1-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [32767,32767,32767,32767,32767,32767,32767,32767]
-; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
; AVX1-NEXT: vphminposuw %xmm0, %xmm0
-; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vmovd %xmm0, %eax
+; AVX1-NEXT: xorl $32767, %eax # imm = 0x7FFF
; AVX1-NEXT: # kill: def $ax killed $ax killed $eax
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
; AVX2: # %bb.0:
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX2-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0
-; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [32767,32767,32767,32767,32767,32767,32767,32767]
-; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; AVX2-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
; AVX2-NEXT: vphminposuw %xmm0, %xmm0
-; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vmovd %xmm0, %eax
+; AVX2-NEXT: xorl $32767, %eax # imm = 0x7FFF
; AVX2-NEXT: # kill: def $ax killed $ax killed $eax
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
; AVX512: # %bb.0:
; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX512-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vmovdqa {{.*#+}} xmm1 = [32767,32767,32767,32767,32767,32767,32767,32767]
-; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; AVX512-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
; AVX512-NEXT: vphminposuw %xmm0, %xmm0
-; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vmovd %xmm0, %eax
+; AVX512-NEXT: xorl $32767, %eax # imm = 0x7FFF
; AVX512-NEXT: # kill: def $ax killed $ax killed $eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
; SSE41-NEXT: pmaxsw %xmm3, %xmm1
; SSE41-NEXT: pmaxsw %xmm2, %xmm0
; SSE41-NEXT: pmaxsw %xmm1, %xmm0
-; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [32767,32767,32767,32767,32767,32767,32767,32767]
-; SSE41-NEXT: pxor %xmm1, %xmm0
+; SSE41-NEXT: pxor {{.*}}(%rip), %xmm0
; SSE41-NEXT: phminposuw %xmm0, %xmm0
-; SSE41-NEXT: pxor %xmm1, %xmm0
; SSE41-NEXT: movd %xmm0, %eax
+; SSE41-NEXT: xorl $32767, %eax # imm = 0x7FFF
; SSE41-NEXT: # kill: def $ax killed $ax killed $eax
; SSE41-NEXT: retq
;
; AVX1-NEXT: vpmaxsw %xmm2, %xmm3, %xmm2
; AVX1-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vpmaxsw %xmm2, %xmm0, %xmm0
-; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [32767,32767,32767,32767,32767,32767,32767,32767]
-; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
; AVX1-NEXT: vphminposuw %xmm0, %xmm0
-; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vmovd %xmm0, %eax
+; AVX1-NEXT: xorl $32767, %eax # imm = 0x7FFF
; AVX1-NEXT: # kill: def $ax killed $ax killed $eax
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
; AVX2-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX2-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0
-; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [32767,32767,32767,32767,32767,32767,32767,32767]
-; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; AVX2-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
; AVX2-NEXT: vphminposuw %xmm0, %xmm0
-; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vmovd %xmm0, %eax
+; AVX2-NEXT: xorl $32767, %eax # imm = 0x7FFF
; AVX2-NEXT: # kill: def $ax killed $ax killed $eax
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
; AVX512-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0
; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX512-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vmovdqa {{.*#+}} xmm1 = [32767,32767,32767,32767,32767,32767,32767,32767]
-; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; AVX512-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
; AVX512-NEXT: vphminposuw %xmm0, %xmm0
-; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vmovd %xmm0, %eax
+; AVX512-NEXT: xorl $32767, %eax # imm = 0x7FFF
; AVX512-NEXT: # kill: def $ax killed $ax killed $eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
; SSE41-NEXT: pmaxsw %xmm4, %xmm0
; SSE41-NEXT: pmaxsw %xmm2, %xmm0
; SSE41-NEXT: pmaxsw %xmm1, %xmm0
-; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [32767,32767,32767,32767,32767,32767,32767,32767]
-; SSE41-NEXT: pxor %xmm1, %xmm0
+; SSE41-NEXT: pxor {{.*}}(%rip), %xmm0
; SSE41-NEXT: phminposuw %xmm0, %xmm0
-; SSE41-NEXT: pxor %xmm1, %xmm0
; SSE41-NEXT: movd %xmm0, %eax
+; SSE41-NEXT: xorl $32767, %eax # imm = 0x7FFF
; SSE41-NEXT: # kill: def $ax killed $ax killed $eax
; SSE41-NEXT: retq
;
; AVX1-NEXT: vpmaxsw %xmm2, %xmm0, %xmm0
; AVX1-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vpmaxsw %xmm4, %xmm0, %xmm0
-; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [32767,32767,32767,32767,32767,32767,32767,32767]
-; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
; AVX1-NEXT: vphminposuw %xmm0, %xmm0
-; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vmovd %xmm0, %eax
+; AVX1-NEXT: xorl $32767, %eax # imm = 0x7FFF
; AVX1-NEXT: # kill: def $ax killed $ax killed $eax
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
; AVX2-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX2-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0
-; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [32767,32767,32767,32767,32767,32767,32767,32767]
-; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; AVX2-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
; AVX2-NEXT: vphminposuw %xmm0, %xmm0
-; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vmovd %xmm0, %eax
+; AVX2-NEXT: xorl $32767, %eax # imm = 0x7FFF
; AVX2-NEXT: # kill: def $ax killed $ax killed $eax
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
; AVX512-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0
; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX512-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vmovdqa {{.*#+}} xmm1 = [32767,32767,32767,32767,32767,32767,32767,32767]
-; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; AVX512-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
; AVX512-NEXT: vphminposuw %xmm0, %xmm0
-; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vmovd %xmm0, %eax
+; AVX512-NEXT: xorl $32767, %eax # imm = 0x7FFF
; AVX512-NEXT: # kill: def $ax killed $ax killed $eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
;
; SSE41-LABEL: test_v16i8:
; SSE41: # %bb.0:
-; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
-; SSE41-NEXT: pxor %xmm1, %xmm0
-; SSE41-NEXT: movdqa %xmm0, %xmm2
-; SSE41-NEXT: psrlw $8, %xmm2
-; SSE41-NEXT: pminub %xmm0, %xmm2
-; SSE41-NEXT: phminposuw %xmm2, %xmm0
-; SSE41-NEXT: pxor %xmm1, %xmm0
+; SSE41-NEXT: pxor {{.*}}(%rip), %xmm0
+; SSE41-NEXT: movdqa %xmm0, %xmm1
+; SSE41-NEXT: psrlw $8, %xmm1
+; SSE41-NEXT: pminub %xmm0, %xmm1
+; SSE41-NEXT: phminposuw %xmm1, %xmm0
; SSE41-NEXT: pextrb $0, %xmm0, %eax
+; SSE41-NEXT: xorb $127, %al
; SSE41-NEXT: # kill: def $al killed $al killed $eax
; SSE41-NEXT: retq
;
; AVX-LABEL: test_v16i8:
; AVX: # %bb.0:
-; AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
-; AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
-; AVX-NEXT: vpsrlw $8, %xmm0, %xmm2
-; AVX-NEXT: vpminub %xmm2, %xmm0, %xmm0
+; AVX-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
+; AVX-NEXT: vpsrlw $8, %xmm0, %xmm1
+; AVX-NEXT: vpminub %xmm1, %xmm0, %xmm0
; AVX-NEXT: vphminposuw %xmm0, %xmm0
-; AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX-NEXT: vpextrb $0, %xmm0, %eax
+; AVX-NEXT: xorb $127, %al
; AVX-NEXT: # kill: def $al killed $al killed $eax
; AVX-NEXT: retq
;
; AVX512-LABEL: test_v16i8:
; AVX512: # %bb.0:
-; AVX512-NEXT: vmovdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
-; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm2
-; AVX512-NEXT: vpminub %xmm2, %xmm0, %xmm0
+; AVX512-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
+; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1
+; AVX512-NEXT: vpminub %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vphminposuw %xmm0, %xmm0
-; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vpextrb $0, %xmm0, %eax
+; AVX512-NEXT: xorb $127, %al
; AVX512-NEXT: # kill: def $al killed $al killed $eax
; AVX512-NEXT: retq
%1 = call i8 @llvm.experimental.vector.reduce.smax.i8.v16i8(<16 x i8> %a0)
; SSE41-LABEL: test_v32i8:
; SSE41: # %bb.0:
; SSE41-NEXT: pmaxsb %xmm1, %xmm0
-; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
-; SSE41-NEXT: pxor %xmm1, %xmm0
-; SSE41-NEXT: movdqa %xmm0, %xmm2
-; SSE41-NEXT: psrlw $8, %xmm2
-; SSE41-NEXT: pminub %xmm0, %xmm2
-; SSE41-NEXT: phminposuw %xmm2, %xmm0
-; SSE41-NEXT: pxor %xmm1, %xmm0
+; SSE41-NEXT: pxor {{.*}}(%rip), %xmm0
+; SSE41-NEXT: movdqa %xmm0, %xmm1
+; SSE41-NEXT: psrlw $8, %xmm1
+; SSE41-NEXT: pminub %xmm0, %xmm1
+; SSE41-NEXT: phminposuw %xmm1, %xmm0
; SSE41-NEXT: pextrb $0, %xmm0, %eax
+; SSE41-NEXT: xorb $127, %al
; SSE41-NEXT: # kill: def $al killed $al killed $eax
; SSE41-NEXT: retq
;
; AVX1: # %bb.0:
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX1-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
-; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm2
-; AVX1-NEXT: vpminub %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
+; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1
+; AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vphminposuw %xmm0, %xmm0
-; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vpextrb $0, %xmm0, %eax
+; AVX1-NEXT: xorb $127, %al
; AVX1-NEXT: # kill: def $al killed $al killed $eax
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
; AVX2: # %bb.0:
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX2-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0
-; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
-; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
-; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm2
-; AVX2-NEXT: vpminub %xmm2, %xmm0, %xmm0
+; AVX2-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
+; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1
+; AVX2-NEXT: vpminub %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vphminposuw %xmm0, %xmm0
-; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vpextrb $0, %xmm0, %eax
+; AVX2-NEXT: xorb $127, %al
; AVX2-NEXT: # kill: def $al killed $al killed $eax
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
; AVX512: # %bb.0:
; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX512-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vmovdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
-; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm2
-; AVX512-NEXT: vpminub %xmm2, %xmm0, %xmm0
+; AVX512-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
+; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1
+; AVX512-NEXT: vpminub %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vphminposuw %xmm0, %xmm0
-; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vpextrb $0, %xmm0, %eax
+; AVX512-NEXT: xorb $127, %al
; AVX512-NEXT: # kill: def $al killed $al killed $eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
; SSE41-NEXT: pmaxsb %xmm3, %xmm1
; SSE41-NEXT: pmaxsb %xmm2, %xmm0
; SSE41-NEXT: pmaxsb %xmm1, %xmm0
-; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
-; SSE41-NEXT: pxor %xmm1, %xmm0
-; SSE41-NEXT: movdqa %xmm0, %xmm2
-; SSE41-NEXT: psrlw $8, %xmm2
-; SSE41-NEXT: pminub %xmm0, %xmm2
-; SSE41-NEXT: phminposuw %xmm2, %xmm0
-; SSE41-NEXT: pxor %xmm1, %xmm0
+; SSE41-NEXT: pxor {{.*}}(%rip), %xmm0
+; SSE41-NEXT: movdqa %xmm0, %xmm1
+; SSE41-NEXT: psrlw $8, %xmm1
+; SSE41-NEXT: pminub %xmm0, %xmm1
+; SSE41-NEXT: phminposuw %xmm1, %xmm0
; SSE41-NEXT: pextrb $0, %xmm0, %eax
+; SSE41-NEXT: xorb $127, %al
; SSE41-NEXT: # kill: def $al killed $al killed $eax
; SSE41-NEXT: retq
;
; AVX1-NEXT: vpmaxsb %xmm2, %xmm3, %xmm2
; AVX1-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vpmaxsb %xmm2, %xmm0, %xmm0
-; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
-; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm2
-; AVX1-NEXT: vpminub %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
+; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1
+; AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vphminposuw %xmm0, %xmm0
-; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vpextrb $0, %xmm0, %eax
+; AVX1-NEXT: xorb $127, %al
; AVX1-NEXT: # kill: def $al killed $al killed $eax
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
; AVX2-NEXT: vpmaxsb %ymm1, %ymm0, %ymm0
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX2-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0
-; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
-; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
-; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm2
-; AVX2-NEXT: vpminub %xmm2, %xmm0, %xmm0
+; AVX2-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
+; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1
+; AVX2-NEXT: vpminub %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vphminposuw %xmm0, %xmm0
-; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vpextrb $0, %xmm0, %eax
+; AVX2-NEXT: xorb $127, %al
; AVX2-NEXT: # kill: def $al killed $al killed $eax
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
; AVX512-NEXT: vpmaxsb %ymm1, %ymm0, %ymm0
; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX512-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vmovdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
-; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm2
-; AVX512-NEXT: vpminub %xmm2, %xmm0, %xmm0
+; AVX512-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
+; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1
+; AVX512-NEXT: vpminub %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vphminposuw %xmm0, %xmm0
-; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vpextrb $0, %xmm0, %eax
+; AVX512-NEXT: xorb $127, %al
; AVX512-NEXT: # kill: def $al killed $al killed $eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
; SSE41-NEXT: pmaxsb %xmm4, %xmm0
; SSE41-NEXT: pmaxsb %xmm2, %xmm0
; SSE41-NEXT: pmaxsb %xmm1, %xmm0
-; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
-; SSE41-NEXT: pxor %xmm1, %xmm0
-; SSE41-NEXT: movdqa %xmm0, %xmm2
-; SSE41-NEXT: psrlw $8, %xmm2
-; SSE41-NEXT: pminub %xmm0, %xmm2
-; SSE41-NEXT: phminposuw %xmm2, %xmm0
-; SSE41-NEXT: pxor %xmm1, %xmm0
+; SSE41-NEXT: pxor {{.*}}(%rip), %xmm0
+; SSE41-NEXT: movdqa %xmm0, %xmm1
+; SSE41-NEXT: psrlw $8, %xmm1
+; SSE41-NEXT: pminub %xmm0, %xmm1
+; SSE41-NEXT: phminposuw %xmm1, %xmm0
; SSE41-NEXT: pextrb $0, %xmm0, %eax
+; SSE41-NEXT: xorb $127, %al
; SSE41-NEXT: # kill: def $al killed $al killed $eax
; SSE41-NEXT: retq
;
; AVX1-NEXT: vpmaxsb %xmm2, %xmm0, %xmm0
; AVX1-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vpmaxsb %xmm4, %xmm0, %xmm0
-; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
-; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm2
-; AVX1-NEXT: vpminub %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
+; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1
+; AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vphminposuw %xmm0, %xmm0
-; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vpextrb $0, %xmm0, %eax
+; AVX1-NEXT: xorb $127, %al
; AVX1-NEXT: # kill: def $al killed $al killed $eax
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
; AVX2-NEXT: vpmaxsb %ymm1, %ymm0, %ymm0
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX2-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0
-; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
-; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
-; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm2
-; AVX2-NEXT: vpminub %xmm2, %xmm0, %xmm0
+; AVX2-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
+; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1
+; AVX2-NEXT: vpminub %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vphminposuw %xmm0, %xmm0
-; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vpextrb $0, %xmm0, %eax
+; AVX2-NEXT: xorb $127, %al
; AVX2-NEXT: # kill: def $al killed $al killed $eax
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
; AVX512-NEXT: vpmaxsb %ymm1, %ymm0, %ymm0
; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX512-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vmovdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
-; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm2
-; AVX512-NEXT: vpminub %xmm2, %xmm0, %xmm0
+; AVX512-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
+; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1
+; AVX512-NEXT: vpminub %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vphminposuw %xmm0, %xmm0
-; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vpextrb $0, %xmm0, %eax
+; AVX512-NEXT: xorb $127, %al
; AVX512-NEXT: # kill: def $al killed $al killed $eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
;
; SSE41-LABEL: test_v8i16:
; SSE41: # %bb.0:
-; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [32767,32767,32767,32767,32767,32767,32767,32767]
-; SSE41-NEXT: pxor %xmm1, %xmm0
+; SSE41-NEXT: pxor {{.*}}(%rip), %xmm0
; SSE41-NEXT: phminposuw %xmm0, %xmm0
-; SSE41-NEXT: pxor %xmm1, %xmm0
; SSE41-NEXT: movd %xmm0, %eax
+; SSE41-NEXT: xorl $32767, %eax # imm = 0x7FFF
; SSE41-NEXT: # kill: def $ax killed $ax killed $eax
; SSE41-NEXT: retq
;
; AVX-LABEL: test_v8i16:
; AVX: # %bb.0:
-; AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [32767,32767,32767,32767,32767,32767,32767,32767]
-; AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
; AVX-NEXT: vphminposuw %xmm0, %xmm0
-; AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX-NEXT: vmovd %xmm0, %eax
+; AVX-NEXT: xorl $32767, %eax # imm = 0x7FFF
; AVX-NEXT: # kill: def $ax killed $ax killed $eax
; AVX-NEXT: retq
;
; AVX512-LABEL: test_v8i16:
; AVX512: # %bb.0:
-; AVX512-NEXT: vmovdqa {{.*#+}} xmm1 = [32767,32767,32767,32767,32767,32767,32767,32767]
-; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; AVX512-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
; AVX512-NEXT: vphminposuw %xmm0, %xmm0
-; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vmovd %xmm0, %eax
+; AVX512-NEXT: xorl $32767, %eax # imm = 0x7FFF
; AVX512-NEXT: # kill: def $ax killed $ax killed $eax
; AVX512-NEXT: retq
%1 = call i16 @llvm.experimental.vector.reduce.smax.i16.v8i16(<8 x i16> %a0)
; SSE41-LABEL: test_v16i16:
; SSE41: # %bb.0:
; SSE41-NEXT: pmaxsw %xmm1, %xmm0
-; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [32767,32767,32767,32767,32767,32767,32767,32767]
-; SSE41-NEXT: pxor %xmm1, %xmm0
+; SSE41-NEXT: pxor {{.*}}(%rip), %xmm0
; SSE41-NEXT: phminposuw %xmm0, %xmm0
-; SSE41-NEXT: pxor %xmm1, %xmm0
; SSE41-NEXT: movd %xmm0, %eax
+; SSE41-NEXT: xorl $32767, %eax # imm = 0x7FFF
; SSE41-NEXT: # kill: def $ax killed $ax killed $eax
; SSE41-NEXT: retq
;
; AVX1: # %bb.0:
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX1-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [32767,32767,32767,32767,32767,32767,32767,32767]
-; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
; AVX1-NEXT: vphminposuw %xmm0, %xmm0
-; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vmovd %xmm0, %eax
+; AVX1-NEXT: xorl $32767, %eax # imm = 0x7FFF
; AVX1-NEXT: # kill: def $ax killed $ax killed $eax
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
; AVX2: # %bb.0:
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX2-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0
-; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [32767,32767,32767,32767,32767,32767,32767,32767]
-; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; AVX2-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
; AVX2-NEXT: vphminposuw %xmm0, %xmm0
-; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vmovd %xmm0, %eax
+; AVX2-NEXT: xorl $32767, %eax # imm = 0x7FFF
; AVX2-NEXT: # kill: def $ax killed $ax killed $eax
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
; AVX512: # %bb.0:
; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX512-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vmovdqa {{.*#+}} xmm1 = [32767,32767,32767,32767,32767,32767,32767,32767]
-; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; AVX512-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
; AVX512-NEXT: vphminposuw %xmm0, %xmm0
-; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vmovd %xmm0, %eax
+; AVX512-NEXT: xorl $32767, %eax # imm = 0x7FFF
; AVX512-NEXT: # kill: def $ax killed $ax killed $eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
; SSE41-NEXT: pmaxsw %xmm3, %xmm1
; SSE41-NEXT: pmaxsw %xmm2, %xmm0
; SSE41-NEXT: pmaxsw %xmm1, %xmm0
-; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [32767,32767,32767,32767,32767,32767,32767,32767]
-; SSE41-NEXT: pxor %xmm1, %xmm0
+; SSE41-NEXT: pxor {{.*}}(%rip), %xmm0
; SSE41-NEXT: phminposuw %xmm0, %xmm0
-; SSE41-NEXT: pxor %xmm1, %xmm0
; SSE41-NEXT: movd %xmm0, %eax
+; SSE41-NEXT: xorl $32767, %eax # imm = 0x7FFF
; SSE41-NEXT: # kill: def $ax killed $ax killed $eax
; SSE41-NEXT: retq
;
; AVX1-NEXT: vpmaxsw %xmm2, %xmm3, %xmm2
; AVX1-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vpmaxsw %xmm2, %xmm0, %xmm0
-; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [32767,32767,32767,32767,32767,32767,32767,32767]
-; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
; AVX1-NEXT: vphminposuw %xmm0, %xmm0
-; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vmovd %xmm0, %eax
+; AVX1-NEXT: xorl $32767, %eax # imm = 0x7FFF
; AVX1-NEXT: # kill: def $ax killed $ax killed $eax
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
; AVX2-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX2-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0
-; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [32767,32767,32767,32767,32767,32767,32767,32767]
-; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; AVX2-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
; AVX2-NEXT: vphminposuw %xmm0, %xmm0
-; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vmovd %xmm0, %eax
+; AVX2-NEXT: xorl $32767, %eax # imm = 0x7FFF
; AVX2-NEXT: # kill: def $ax killed $ax killed $eax
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
; AVX512-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0
; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX512-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vmovdqa {{.*#+}} xmm1 = [32767,32767,32767,32767,32767,32767,32767,32767]
-; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; AVX512-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
; AVX512-NEXT: vphminposuw %xmm0, %xmm0
-; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vmovd %xmm0, %eax
+; AVX512-NEXT: xorl $32767, %eax # imm = 0x7FFF
; AVX512-NEXT: # kill: def $ax killed $ax killed $eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
; SSE41-NEXT: pmaxsw %xmm4, %xmm0
; SSE41-NEXT: pmaxsw %xmm2, %xmm0
; SSE41-NEXT: pmaxsw %xmm1, %xmm0
-; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [32767,32767,32767,32767,32767,32767,32767,32767]
-; SSE41-NEXT: pxor %xmm1, %xmm0
+; SSE41-NEXT: pxor {{.*}}(%rip), %xmm0
; SSE41-NEXT: phminposuw %xmm0, %xmm0
-; SSE41-NEXT: pxor %xmm1, %xmm0
; SSE41-NEXT: movd %xmm0, %eax
+; SSE41-NEXT: xorl $32767, %eax # imm = 0x7FFF
; SSE41-NEXT: # kill: def $ax killed $ax killed $eax
; SSE41-NEXT: retq
;
; AVX1-NEXT: vpmaxsw %xmm2, %xmm0, %xmm0
; AVX1-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vpmaxsw %xmm4, %xmm0, %xmm0
-; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [32767,32767,32767,32767,32767,32767,32767,32767]
-; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
; AVX1-NEXT: vphminposuw %xmm0, %xmm0
-; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vmovd %xmm0, %eax
+; AVX1-NEXT: xorl $32767, %eax # imm = 0x7FFF
; AVX1-NEXT: # kill: def $ax killed $ax killed $eax
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
; AVX2-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX2-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0
-; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [32767,32767,32767,32767,32767,32767,32767,32767]
-; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; AVX2-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
; AVX2-NEXT: vphminposuw %xmm0, %xmm0
-; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vmovd %xmm0, %eax
+; AVX2-NEXT: xorl $32767, %eax # imm = 0x7FFF
; AVX2-NEXT: # kill: def $ax killed $ax killed $eax
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
; AVX512-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0
; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX512-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vmovdqa {{.*#+}} xmm1 = [32767,32767,32767,32767,32767,32767,32767,32767]
-; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; AVX512-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
; AVX512-NEXT: vphminposuw %xmm0, %xmm0
-; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vmovd %xmm0, %eax
+; AVX512-NEXT: xorl $32767, %eax # imm = 0x7FFF
; AVX512-NEXT: # kill: def $ax killed $ax killed $eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
;
; SSE41-LABEL: test_v16i8:
; SSE41: # %bb.0:
-; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
-; SSE41-NEXT: pxor %xmm1, %xmm0
-; SSE41-NEXT: movdqa %xmm0, %xmm2
-; SSE41-NEXT: psrlw $8, %xmm2
-; SSE41-NEXT: pminub %xmm0, %xmm2
-; SSE41-NEXT: phminposuw %xmm2, %xmm0
-; SSE41-NEXT: pxor %xmm1, %xmm0
+; SSE41-NEXT: pxor {{.*}}(%rip), %xmm0
+; SSE41-NEXT: movdqa %xmm0, %xmm1
+; SSE41-NEXT: psrlw $8, %xmm1
+; SSE41-NEXT: pminub %xmm0, %xmm1
+; SSE41-NEXT: phminposuw %xmm1, %xmm0
; SSE41-NEXT: pextrb $0, %xmm0, %eax
+; SSE41-NEXT: xorb $127, %al
; SSE41-NEXT: # kill: def $al killed $al killed $eax
; SSE41-NEXT: retq
;
; AVX-LABEL: test_v16i8:
; AVX: # %bb.0:
-; AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
-; AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
-; AVX-NEXT: vpsrlw $8, %xmm0, %xmm2
-; AVX-NEXT: vpminub %xmm2, %xmm0, %xmm0
+; AVX-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
+; AVX-NEXT: vpsrlw $8, %xmm0, %xmm1
+; AVX-NEXT: vpminub %xmm1, %xmm0, %xmm0
; AVX-NEXT: vphminposuw %xmm0, %xmm0
-; AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX-NEXT: vpextrb $0, %xmm0, %eax
+; AVX-NEXT: xorb $127, %al
; AVX-NEXT: # kill: def $al killed $al killed $eax
; AVX-NEXT: retq
;
; AVX512-LABEL: test_v16i8:
; AVX512: # %bb.0:
-; AVX512-NEXT: vmovdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
-; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm2
-; AVX512-NEXT: vpminub %xmm2, %xmm0, %xmm0
+; AVX512-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
+; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1
+; AVX512-NEXT: vpminub %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vphminposuw %xmm0, %xmm0
-; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vpextrb $0, %xmm0, %eax
+; AVX512-NEXT: xorb $127, %al
; AVX512-NEXT: # kill: def $al killed $al killed $eax
; AVX512-NEXT: retq
%1 = call i8 @llvm.experimental.vector.reduce.smax.i8.v16i8(<16 x i8> %a0)
; SSE41-LABEL: test_v32i8:
; SSE41: # %bb.0:
; SSE41-NEXT: pmaxsb %xmm1, %xmm0
-; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
-; SSE41-NEXT: pxor %xmm1, %xmm0
-; SSE41-NEXT: movdqa %xmm0, %xmm2
-; SSE41-NEXT: psrlw $8, %xmm2
-; SSE41-NEXT: pminub %xmm0, %xmm2
-; SSE41-NEXT: phminposuw %xmm2, %xmm0
-; SSE41-NEXT: pxor %xmm1, %xmm0
+; SSE41-NEXT: pxor {{.*}}(%rip), %xmm0
+; SSE41-NEXT: movdqa %xmm0, %xmm1
+; SSE41-NEXT: psrlw $8, %xmm1
+; SSE41-NEXT: pminub %xmm0, %xmm1
+; SSE41-NEXT: phminposuw %xmm1, %xmm0
; SSE41-NEXT: pextrb $0, %xmm0, %eax
+; SSE41-NEXT: xorb $127, %al
; SSE41-NEXT: # kill: def $al killed $al killed $eax
; SSE41-NEXT: retq
;
; AVX1: # %bb.0:
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX1-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
-; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm2
-; AVX1-NEXT: vpminub %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
+; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1
+; AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vphminposuw %xmm0, %xmm0
-; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vpextrb $0, %xmm0, %eax
+; AVX1-NEXT: xorb $127, %al
; AVX1-NEXT: # kill: def $al killed $al killed $eax
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
; AVX2: # %bb.0:
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX2-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0
-; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
-; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
-; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm2
-; AVX2-NEXT: vpminub %xmm2, %xmm0, %xmm0
+; AVX2-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
+; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1
+; AVX2-NEXT: vpminub %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vphminposuw %xmm0, %xmm0
-; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vpextrb $0, %xmm0, %eax
+; AVX2-NEXT: xorb $127, %al
; AVX2-NEXT: # kill: def $al killed $al killed $eax
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
; AVX512: # %bb.0:
; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX512-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vmovdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
-; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm2
-; AVX512-NEXT: vpminub %xmm2, %xmm0, %xmm0
+; AVX512-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
+; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1
+; AVX512-NEXT: vpminub %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vphminposuw %xmm0, %xmm0
-; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vpextrb $0, %xmm0, %eax
+; AVX512-NEXT: xorb $127, %al
; AVX512-NEXT: # kill: def $al killed $al killed $eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
; SSE41-NEXT: pmaxsb %xmm3, %xmm1
; SSE41-NEXT: pmaxsb %xmm2, %xmm0
; SSE41-NEXT: pmaxsb %xmm1, %xmm0
-; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
-; SSE41-NEXT: pxor %xmm1, %xmm0
-; SSE41-NEXT: movdqa %xmm0, %xmm2
-; SSE41-NEXT: psrlw $8, %xmm2
-; SSE41-NEXT: pminub %xmm0, %xmm2
-; SSE41-NEXT: phminposuw %xmm2, %xmm0
-; SSE41-NEXT: pxor %xmm1, %xmm0
+; SSE41-NEXT: pxor {{.*}}(%rip), %xmm0
+; SSE41-NEXT: movdqa %xmm0, %xmm1
+; SSE41-NEXT: psrlw $8, %xmm1
+; SSE41-NEXT: pminub %xmm0, %xmm1
+; SSE41-NEXT: phminposuw %xmm1, %xmm0
; SSE41-NEXT: pextrb $0, %xmm0, %eax
+; SSE41-NEXT: xorb $127, %al
; SSE41-NEXT: # kill: def $al killed $al killed $eax
; SSE41-NEXT: retq
;
; AVX1-NEXT: vpmaxsb %xmm2, %xmm3, %xmm2
; AVX1-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vpmaxsb %xmm2, %xmm0, %xmm0
-; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
-; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm2
-; AVX1-NEXT: vpminub %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
+; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1
+; AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vphminposuw %xmm0, %xmm0
-; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vpextrb $0, %xmm0, %eax
+; AVX1-NEXT: xorb $127, %al
; AVX1-NEXT: # kill: def $al killed $al killed $eax
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
; AVX2-NEXT: vpmaxsb %ymm1, %ymm0, %ymm0
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX2-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0
-; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
-; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
-; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm2
-; AVX2-NEXT: vpminub %xmm2, %xmm0, %xmm0
+; AVX2-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
+; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1
+; AVX2-NEXT: vpminub %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vphminposuw %xmm0, %xmm0
-; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vpextrb $0, %xmm0, %eax
+; AVX2-NEXT: xorb $127, %al
; AVX2-NEXT: # kill: def $al killed $al killed $eax
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
; AVX512-NEXT: vpmaxsb %ymm1, %ymm0, %ymm0
; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX512-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vmovdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
-; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm2
-; AVX512-NEXT: vpminub %xmm2, %xmm0, %xmm0
+; AVX512-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
+; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1
+; AVX512-NEXT: vpminub %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vphminposuw %xmm0, %xmm0
-; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vpextrb $0, %xmm0, %eax
+; AVX512-NEXT: xorb $127, %al
; AVX512-NEXT: # kill: def $al killed $al killed $eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
; SSE41-NEXT: pmaxsb %xmm4, %xmm0
; SSE41-NEXT: pmaxsb %xmm2, %xmm0
; SSE41-NEXT: pmaxsb %xmm1, %xmm0
-; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
-; SSE41-NEXT: pxor %xmm1, %xmm0
-; SSE41-NEXT: movdqa %xmm0, %xmm2
-; SSE41-NEXT: psrlw $8, %xmm2
-; SSE41-NEXT: pminub %xmm0, %xmm2
-; SSE41-NEXT: phminposuw %xmm2, %xmm0
-; SSE41-NEXT: pxor %xmm1, %xmm0
+; SSE41-NEXT: pxor {{.*}}(%rip), %xmm0
+; SSE41-NEXT: movdqa %xmm0, %xmm1
+; SSE41-NEXT: psrlw $8, %xmm1
+; SSE41-NEXT: pminub %xmm0, %xmm1
+; SSE41-NEXT: phminposuw %xmm1, %xmm0
; SSE41-NEXT: pextrb $0, %xmm0, %eax
+; SSE41-NEXT: xorb $127, %al
; SSE41-NEXT: # kill: def $al killed $al killed $eax
; SSE41-NEXT: retq
;
; AVX1-NEXT: vpmaxsb %xmm2, %xmm0, %xmm0
; AVX1-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vpmaxsb %xmm4, %xmm0, %xmm0
-; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
-; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm2
-; AVX1-NEXT: vpminub %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
+; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1
+; AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vphminposuw %xmm0, %xmm0
-; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vpextrb $0, %xmm0, %eax
+; AVX1-NEXT: xorb $127, %al
; AVX1-NEXT: # kill: def $al killed $al killed $eax
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
; AVX2-NEXT: vpmaxsb %ymm1, %ymm0, %ymm0
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX2-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0
-; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
-; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
-; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm2
-; AVX2-NEXT: vpminub %xmm2, %xmm0, %xmm0
+; AVX2-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
+; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1
+; AVX2-NEXT: vpminub %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vphminposuw %xmm0, %xmm0
-; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vpextrb $0, %xmm0, %eax
+; AVX2-NEXT: xorb $127, %al
; AVX2-NEXT: # kill: def $al killed $al killed $eax
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
; AVX512-NEXT: vpmaxsb %ymm1, %ymm0, %ymm0
; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX512-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vmovdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
-; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm2
-; AVX512-NEXT: vpminub %xmm2, %xmm0, %xmm0
+; AVX512-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
+; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1
+; AVX512-NEXT: vpminub %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vphminposuw %xmm0, %xmm0
-; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vpextrb $0, %xmm0, %eax
+; AVX512-NEXT: xorb $127, %al
; AVX512-NEXT: # kill: def $al killed $al killed $eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
;
; SSE41-LABEL: test_v8i16:
; SSE41: # %bb.0:
-; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768]
-; SSE41-NEXT: pxor %xmm1, %xmm0
+; SSE41-NEXT: pxor {{.*}}(%rip), %xmm0
; SSE41-NEXT: phminposuw %xmm0, %xmm0
-; SSE41-NEXT: pxor %xmm1, %xmm0
; SSE41-NEXT: movd %xmm0, %eax
+; SSE41-NEXT: xorl $32768, %eax # imm = 0x8000
; SSE41-NEXT: # kill: def $ax killed $ax killed $eax
; SSE41-NEXT: retq
;
; AVX-LABEL: test_v8i16:
; AVX: # %bb.0:
-; AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768]
-; AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
; AVX-NEXT: vphminposuw %xmm0, %xmm0
-; AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX-NEXT: vmovd %xmm0, %eax
+; AVX-NEXT: xorl $32768, %eax # imm = 0x8000
; AVX-NEXT: # kill: def $ax killed $ax killed $eax
; AVX-NEXT: retq
;
; AVX512-LABEL: test_v8i16:
; AVX512: # %bb.0:
-; AVX512-NEXT: vmovdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768]
-; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; AVX512-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
; AVX512-NEXT: vphminposuw %xmm0, %xmm0
-; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vmovd %xmm0, %eax
+; AVX512-NEXT: xorl $32768, %eax # imm = 0x8000
; AVX512-NEXT: # kill: def $ax killed $ax killed $eax
; AVX512-NEXT: retq
%1 = call i16 @llvm.experimental.vector.reduce.smin.i16.v8i16(<8 x i16> %a0)
; SSE41-LABEL: test_v16i16:
; SSE41: # %bb.0:
; SSE41-NEXT: pminsw %xmm1, %xmm0
-; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768]
-; SSE41-NEXT: pxor %xmm1, %xmm0
+; SSE41-NEXT: pxor {{.*}}(%rip), %xmm0
; SSE41-NEXT: phminposuw %xmm0, %xmm0
-; SSE41-NEXT: pxor %xmm1, %xmm0
; SSE41-NEXT: movd %xmm0, %eax
+; SSE41-NEXT: xorl $32768, %eax # imm = 0x8000
; SSE41-NEXT: # kill: def $ax killed $ax killed $eax
; SSE41-NEXT: retq
;
; AVX1: # %bb.0:
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX1-NEXT: vpminsw %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768]
-; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
; AVX1-NEXT: vphminposuw %xmm0, %xmm0
-; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vmovd %xmm0, %eax
+; AVX1-NEXT: xorl $32768, %eax # imm = 0x8000
; AVX1-NEXT: # kill: def $ax killed $ax killed $eax
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
; AVX2: # %bb.0:
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX2-NEXT: vpminsw %xmm1, %xmm0, %xmm0
-; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768]
-; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; AVX2-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
; AVX2-NEXT: vphminposuw %xmm0, %xmm0
-; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vmovd %xmm0, %eax
+; AVX2-NEXT: xorl $32768, %eax # imm = 0x8000
; AVX2-NEXT: # kill: def $ax killed $ax killed $eax
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
; AVX512: # %bb.0:
; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX512-NEXT: vpminsw %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vmovdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768]
-; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; AVX512-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
; AVX512-NEXT: vphminposuw %xmm0, %xmm0
-; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vmovd %xmm0, %eax
+; AVX512-NEXT: xorl $32768, %eax # imm = 0x8000
; AVX512-NEXT: # kill: def $ax killed $ax killed $eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
; SSE41-NEXT: pminsw %xmm3, %xmm1
; SSE41-NEXT: pminsw %xmm2, %xmm0
; SSE41-NEXT: pminsw %xmm1, %xmm0
-; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768]
-; SSE41-NEXT: pxor %xmm1, %xmm0
+; SSE41-NEXT: pxor {{.*}}(%rip), %xmm0
; SSE41-NEXT: phminposuw %xmm0, %xmm0
-; SSE41-NEXT: pxor %xmm1, %xmm0
; SSE41-NEXT: movd %xmm0, %eax
+; SSE41-NEXT: xorl $32768, %eax # imm = 0x8000
; SSE41-NEXT: # kill: def $ax killed $ax killed $eax
; SSE41-NEXT: retq
;
; AVX1-NEXT: vpminsw %xmm2, %xmm3, %xmm2
; AVX1-NEXT: vpminsw %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vpminsw %xmm2, %xmm0, %xmm0
-; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768]
-; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
; AVX1-NEXT: vphminposuw %xmm0, %xmm0
-; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vmovd %xmm0, %eax
+; AVX1-NEXT: xorl $32768, %eax # imm = 0x8000
; AVX1-NEXT: # kill: def $ax killed $ax killed $eax
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
; AVX2-NEXT: vpminsw %ymm1, %ymm0, %ymm0
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX2-NEXT: vpminsw %xmm1, %xmm0, %xmm0
-; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768]
-; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; AVX2-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
; AVX2-NEXT: vphminposuw %xmm0, %xmm0
-; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vmovd %xmm0, %eax
+; AVX2-NEXT: xorl $32768, %eax # imm = 0x8000
; AVX2-NEXT: # kill: def $ax killed $ax killed $eax
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
; AVX512-NEXT: vpminsw %ymm1, %ymm0, %ymm0
; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX512-NEXT: vpminsw %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vmovdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768]
-; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; AVX512-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
; AVX512-NEXT: vphminposuw %xmm0, %xmm0
-; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vmovd %xmm0, %eax
+; AVX512-NEXT: xorl $32768, %eax # imm = 0x8000
; AVX512-NEXT: # kill: def $ax killed $ax killed $eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
; SSE41-NEXT: pminsw %xmm4, %xmm0
; SSE41-NEXT: pminsw %xmm2, %xmm0
; SSE41-NEXT: pminsw %xmm1, %xmm0
-; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768]
-; SSE41-NEXT: pxor %xmm1, %xmm0
+; SSE41-NEXT: pxor {{.*}}(%rip), %xmm0
; SSE41-NEXT: phminposuw %xmm0, %xmm0
-; SSE41-NEXT: pxor %xmm1, %xmm0
; SSE41-NEXT: movd %xmm0, %eax
+; SSE41-NEXT: xorl $32768, %eax # imm = 0x8000
; SSE41-NEXT: # kill: def $ax killed $ax killed $eax
; SSE41-NEXT: retq
;
; AVX1-NEXT: vpminsw %xmm2, %xmm0, %xmm0
; AVX1-NEXT: vpminsw %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vpminsw %xmm4, %xmm0, %xmm0
-; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768]
-; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
; AVX1-NEXT: vphminposuw %xmm0, %xmm0
-; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vmovd %xmm0, %eax
+; AVX1-NEXT: xorl $32768, %eax # imm = 0x8000
; AVX1-NEXT: # kill: def $ax killed $ax killed $eax
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
; AVX2-NEXT: vpminsw %ymm1, %ymm0, %ymm0
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX2-NEXT: vpminsw %xmm1, %xmm0, %xmm0
-; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768]
-; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; AVX2-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
; AVX2-NEXT: vphminposuw %xmm0, %xmm0
-; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vmovd %xmm0, %eax
+; AVX2-NEXT: xorl $32768, %eax # imm = 0x8000
; AVX2-NEXT: # kill: def $ax killed $ax killed $eax
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
; AVX512-NEXT: vpminsw %ymm1, %ymm0, %ymm0
; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX512-NEXT: vpminsw %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vmovdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768]
-; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; AVX512-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
; AVX512-NEXT: vphminposuw %xmm0, %xmm0
-; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vmovd %xmm0, %eax
+; AVX512-NEXT: xorl $32768, %eax # imm = 0x8000
; AVX512-NEXT: # kill: def $ax killed $ax killed $eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
;
; SSE41-LABEL: test_v16i8:
; SSE41: # %bb.0:
-; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
-; SSE41-NEXT: pxor %xmm1, %xmm0
-; SSE41-NEXT: movdqa %xmm0, %xmm2
-; SSE41-NEXT: psrlw $8, %xmm2
-; SSE41-NEXT: pminub %xmm0, %xmm2
-; SSE41-NEXT: phminposuw %xmm2, %xmm0
-; SSE41-NEXT: pxor %xmm1, %xmm0
+; SSE41-NEXT: pxor {{.*}}(%rip), %xmm0
+; SSE41-NEXT: movdqa %xmm0, %xmm1
+; SSE41-NEXT: psrlw $8, %xmm1
+; SSE41-NEXT: pminub %xmm0, %xmm1
+; SSE41-NEXT: phminposuw %xmm1, %xmm0
; SSE41-NEXT: pextrb $0, %xmm0, %eax
+; SSE41-NEXT: xorb $-128, %al
; SSE41-NEXT: # kill: def $al killed $al killed $eax
; SSE41-NEXT: retq
;
; AVX-LABEL: test_v16i8:
; AVX: # %bb.0:
-; AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
-; AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
-; AVX-NEXT: vpsrlw $8, %xmm0, %xmm2
-; AVX-NEXT: vpminub %xmm2, %xmm0, %xmm0
+; AVX-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
+; AVX-NEXT: vpsrlw $8, %xmm0, %xmm1
+; AVX-NEXT: vpminub %xmm1, %xmm0, %xmm0
; AVX-NEXT: vphminposuw %xmm0, %xmm0
-; AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX-NEXT: vpextrb $0, %xmm0, %eax
+; AVX-NEXT: xorb $-128, %al
; AVX-NEXT: # kill: def $al killed $al killed $eax
; AVX-NEXT: retq
;
; AVX512-LABEL: test_v16i8:
; AVX512: # %bb.0:
-; AVX512-NEXT: vmovdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
-; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm2
-; AVX512-NEXT: vpminub %xmm2, %xmm0, %xmm0
+; AVX512-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
+; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1
+; AVX512-NEXT: vpminub %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vphminposuw %xmm0, %xmm0
-; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vpextrb $0, %xmm0, %eax
+; AVX512-NEXT: xorb $-128, %al
; AVX512-NEXT: # kill: def $al killed $al killed $eax
; AVX512-NEXT: retq
%1 = call i8 @llvm.experimental.vector.reduce.smin.i8.v16i8(<16 x i8> %a0)
; SSE41-LABEL: test_v32i8:
; SSE41: # %bb.0:
; SSE41-NEXT: pminsb %xmm1, %xmm0
-; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
-; SSE41-NEXT: pxor %xmm1, %xmm0
-; SSE41-NEXT: movdqa %xmm0, %xmm2
-; SSE41-NEXT: psrlw $8, %xmm2
-; SSE41-NEXT: pminub %xmm0, %xmm2
-; SSE41-NEXT: phminposuw %xmm2, %xmm0
-; SSE41-NEXT: pxor %xmm1, %xmm0
+; SSE41-NEXT: pxor {{.*}}(%rip), %xmm0
+; SSE41-NEXT: movdqa %xmm0, %xmm1
+; SSE41-NEXT: psrlw $8, %xmm1
+; SSE41-NEXT: pminub %xmm0, %xmm1
+; SSE41-NEXT: phminposuw %xmm1, %xmm0
; SSE41-NEXT: pextrb $0, %xmm0, %eax
+; SSE41-NEXT: xorb $-128, %al
; SSE41-NEXT: # kill: def $al killed $al killed $eax
; SSE41-NEXT: retq
;
; AVX1: # %bb.0:
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX1-NEXT: vpminsb %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
-; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm2
-; AVX1-NEXT: vpminub %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
+; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1
+; AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vphminposuw %xmm0, %xmm0
-; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vpextrb $0, %xmm0, %eax
+; AVX1-NEXT: xorb $-128, %al
; AVX1-NEXT: # kill: def $al killed $al killed $eax
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
; AVX2: # %bb.0:
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX2-NEXT: vpminsb %xmm1, %xmm0, %xmm0
-; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
-; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
-; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm2
-; AVX2-NEXT: vpminub %xmm2, %xmm0, %xmm0
+; AVX2-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
+; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1
+; AVX2-NEXT: vpminub %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vphminposuw %xmm0, %xmm0
-; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vpextrb $0, %xmm0, %eax
+; AVX2-NEXT: xorb $-128, %al
; AVX2-NEXT: # kill: def $al killed $al killed $eax
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
; AVX512: # %bb.0:
; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX512-NEXT: vpminsb %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vmovdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
-; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm2
-; AVX512-NEXT: vpminub %xmm2, %xmm0, %xmm0
+; AVX512-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
+; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1
+; AVX512-NEXT: vpminub %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vphminposuw %xmm0, %xmm0
-; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vpextrb $0, %xmm0, %eax
+; AVX512-NEXT: xorb $-128, %al
; AVX512-NEXT: # kill: def $al killed $al killed $eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
; SSE41-NEXT: pminsb %xmm3, %xmm1
; SSE41-NEXT: pminsb %xmm2, %xmm0
; SSE41-NEXT: pminsb %xmm1, %xmm0
-; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
-; SSE41-NEXT: pxor %xmm1, %xmm0
-; SSE41-NEXT: movdqa %xmm0, %xmm2
-; SSE41-NEXT: psrlw $8, %xmm2
-; SSE41-NEXT: pminub %xmm0, %xmm2
-; SSE41-NEXT: phminposuw %xmm2, %xmm0
-; SSE41-NEXT: pxor %xmm1, %xmm0
+; SSE41-NEXT: pxor {{.*}}(%rip), %xmm0
+; SSE41-NEXT: movdqa %xmm0, %xmm1
+; SSE41-NEXT: psrlw $8, %xmm1
+; SSE41-NEXT: pminub %xmm0, %xmm1
+; SSE41-NEXT: phminposuw %xmm1, %xmm0
; SSE41-NEXT: pextrb $0, %xmm0, %eax
+; SSE41-NEXT: xorb $-128, %al
; SSE41-NEXT: # kill: def $al killed $al killed $eax
; SSE41-NEXT: retq
;
; AVX1-NEXT: vpminsb %xmm2, %xmm3, %xmm2
; AVX1-NEXT: vpminsb %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vpminsb %xmm2, %xmm0, %xmm0
-; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
-; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm2
-; AVX1-NEXT: vpminub %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
+; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1
+; AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vphminposuw %xmm0, %xmm0
-; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vpextrb $0, %xmm0, %eax
+; AVX1-NEXT: xorb $-128, %al
; AVX1-NEXT: # kill: def $al killed $al killed $eax
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
; AVX2-NEXT: vpminsb %ymm1, %ymm0, %ymm0
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX2-NEXT: vpminsb %xmm1, %xmm0, %xmm0
-; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
-; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
-; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm2
-; AVX2-NEXT: vpminub %xmm2, %xmm0, %xmm0
+; AVX2-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
+; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1
+; AVX2-NEXT: vpminub %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vphminposuw %xmm0, %xmm0
-; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vpextrb $0, %xmm0, %eax
+; AVX2-NEXT: xorb $-128, %al
; AVX2-NEXT: # kill: def $al killed $al killed $eax
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
; AVX512-NEXT: vpminsb %ymm1, %ymm0, %ymm0
; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX512-NEXT: vpminsb %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vmovdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
-; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm2
-; AVX512-NEXT: vpminub %xmm2, %xmm0, %xmm0
+; AVX512-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
+; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1
+; AVX512-NEXT: vpminub %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vphminposuw %xmm0, %xmm0
-; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vpextrb $0, %xmm0, %eax
+; AVX512-NEXT: xorb $-128, %al
; AVX512-NEXT: # kill: def $al killed $al killed $eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
; SSE41-NEXT: pminsb %xmm4, %xmm0
; SSE41-NEXT: pminsb %xmm2, %xmm0
; SSE41-NEXT: pminsb %xmm1, %xmm0
-; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
-; SSE41-NEXT: pxor %xmm1, %xmm0
-; SSE41-NEXT: movdqa %xmm0, %xmm2
-; SSE41-NEXT: psrlw $8, %xmm2
-; SSE41-NEXT: pminub %xmm0, %xmm2
-; SSE41-NEXT: phminposuw %xmm2, %xmm0
-; SSE41-NEXT: pxor %xmm1, %xmm0
+; SSE41-NEXT: pxor {{.*}}(%rip), %xmm0
+; SSE41-NEXT: movdqa %xmm0, %xmm1
+; SSE41-NEXT: psrlw $8, %xmm1
+; SSE41-NEXT: pminub %xmm0, %xmm1
+; SSE41-NEXT: phminposuw %xmm1, %xmm0
; SSE41-NEXT: pextrb $0, %xmm0, %eax
+; SSE41-NEXT: xorb $-128, %al
; SSE41-NEXT: # kill: def $al killed $al killed $eax
; SSE41-NEXT: retq
;
; AVX1-NEXT: vpminsb %xmm2, %xmm0, %xmm0
; AVX1-NEXT: vpminsb %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vpminsb %xmm4, %xmm0, %xmm0
-; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
-; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm2
-; AVX1-NEXT: vpminub %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
+; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1
+; AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vphminposuw %xmm0, %xmm0
-; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vpextrb $0, %xmm0, %eax
+; AVX1-NEXT: xorb $-128, %al
; AVX1-NEXT: # kill: def $al killed $al killed $eax
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
; AVX2-NEXT: vpminsb %ymm1, %ymm0, %ymm0
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX2-NEXT: vpminsb %xmm1, %xmm0, %xmm0
-; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
-; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
-; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm2
-; AVX2-NEXT: vpminub %xmm2, %xmm0, %xmm0
+; AVX2-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
+; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1
+; AVX2-NEXT: vpminub %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vphminposuw %xmm0, %xmm0
-; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vpextrb $0, %xmm0, %eax
+; AVX2-NEXT: xorb $-128, %al
; AVX2-NEXT: # kill: def $al killed $al killed $eax
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
; AVX512-NEXT: vpminsb %ymm1, %ymm0, %ymm0
; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX512-NEXT: vpminsb %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vmovdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
-; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm2
-; AVX512-NEXT: vpminub %xmm2, %xmm0, %xmm0
+; AVX512-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
+; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1
+; AVX512-NEXT: vpminub %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vphminposuw %xmm0, %xmm0
-; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vpextrb $0, %xmm0, %eax
+; AVX512-NEXT: xorb $-128, %al
; AVX512-NEXT: # kill: def $al killed $al killed $eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
;
; SSE41-LABEL: test_v8i16:
; SSE41: # %bb.0:
-; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768]
-; SSE41-NEXT: pxor %xmm1, %xmm0
+; SSE41-NEXT: pxor {{.*}}(%rip), %xmm0
; SSE41-NEXT: phminposuw %xmm0, %xmm0
-; SSE41-NEXT: pxor %xmm1, %xmm0
; SSE41-NEXT: movd %xmm0, %eax
+; SSE41-NEXT: xorl $32768, %eax # imm = 0x8000
; SSE41-NEXT: # kill: def $ax killed $ax killed $eax
; SSE41-NEXT: retq
;
; AVX-LABEL: test_v8i16:
; AVX: # %bb.0:
-; AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768]
-; AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
; AVX-NEXT: vphminposuw %xmm0, %xmm0
-; AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX-NEXT: vmovd %xmm0, %eax
+; AVX-NEXT: xorl $32768, %eax # imm = 0x8000
; AVX-NEXT: # kill: def $ax killed $ax killed $eax
; AVX-NEXT: retq
;
; AVX512-LABEL: test_v8i16:
; AVX512: # %bb.0:
-; AVX512-NEXT: vmovdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768]
-; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; AVX512-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
; AVX512-NEXT: vphminposuw %xmm0, %xmm0
-; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vmovd %xmm0, %eax
+; AVX512-NEXT: xorl $32768, %eax # imm = 0x8000
; AVX512-NEXT: # kill: def $ax killed $ax killed $eax
; AVX512-NEXT: retq
%1 = call i16 @llvm.experimental.vector.reduce.smin.i16.v8i16(<8 x i16> %a0)
; SSE41-LABEL: test_v16i16:
; SSE41: # %bb.0:
; SSE41-NEXT: pminsw %xmm1, %xmm0
-; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768]
-; SSE41-NEXT: pxor %xmm1, %xmm0
+; SSE41-NEXT: pxor {{.*}}(%rip), %xmm0
; SSE41-NEXT: phminposuw %xmm0, %xmm0
-; SSE41-NEXT: pxor %xmm1, %xmm0
; SSE41-NEXT: movd %xmm0, %eax
+; SSE41-NEXT: xorl $32768, %eax # imm = 0x8000
; SSE41-NEXT: # kill: def $ax killed $ax killed $eax
; SSE41-NEXT: retq
;
; AVX1: # %bb.0:
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX1-NEXT: vpminsw %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768]
-; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
; AVX1-NEXT: vphminposuw %xmm0, %xmm0
-; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vmovd %xmm0, %eax
+; AVX1-NEXT: xorl $32768, %eax # imm = 0x8000
; AVX1-NEXT: # kill: def $ax killed $ax killed $eax
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
; AVX2: # %bb.0:
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX2-NEXT: vpminsw %xmm1, %xmm0, %xmm0
-; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768]
-; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; AVX2-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
; AVX2-NEXT: vphminposuw %xmm0, %xmm0
-; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vmovd %xmm0, %eax
+; AVX2-NEXT: xorl $32768, %eax # imm = 0x8000
; AVX2-NEXT: # kill: def $ax killed $ax killed $eax
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
; AVX512: # %bb.0:
; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX512-NEXT: vpminsw %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vmovdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768]
-; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; AVX512-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
; AVX512-NEXT: vphminposuw %xmm0, %xmm0
-; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vmovd %xmm0, %eax
+; AVX512-NEXT: xorl $32768, %eax # imm = 0x8000
; AVX512-NEXT: # kill: def $ax killed $ax killed $eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
; SSE41-NEXT: pminsw %xmm3, %xmm1
; SSE41-NEXT: pminsw %xmm2, %xmm0
; SSE41-NEXT: pminsw %xmm1, %xmm0
-; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768]
-; SSE41-NEXT: pxor %xmm1, %xmm0
+; SSE41-NEXT: pxor {{.*}}(%rip), %xmm0
; SSE41-NEXT: phminposuw %xmm0, %xmm0
-; SSE41-NEXT: pxor %xmm1, %xmm0
; SSE41-NEXT: movd %xmm0, %eax
+; SSE41-NEXT: xorl $32768, %eax # imm = 0x8000
; SSE41-NEXT: # kill: def $ax killed $ax killed $eax
; SSE41-NEXT: retq
;
; AVX1-NEXT: vpminsw %xmm2, %xmm3, %xmm2
; AVX1-NEXT: vpminsw %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vpminsw %xmm2, %xmm0, %xmm0
-; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768]
-; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
; AVX1-NEXT: vphminposuw %xmm0, %xmm0
-; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vmovd %xmm0, %eax
+; AVX1-NEXT: xorl $32768, %eax # imm = 0x8000
; AVX1-NEXT: # kill: def $ax killed $ax killed $eax
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
; AVX2-NEXT: vpminsw %ymm1, %ymm0, %ymm0
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX2-NEXT: vpminsw %xmm1, %xmm0, %xmm0
-; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768]
-; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; AVX2-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
; AVX2-NEXT: vphminposuw %xmm0, %xmm0
-; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vmovd %xmm0, %eax
+; AVX2-NEXT: xorl $32768, %eax # imm = 0x8000
; AVX2-NEXT: # kill: def $ax killed $ax killed $eax
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
; AVX512-NEXT: vpminsw %ymm1, %ymm0, %ymm0
; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX512-NEXT: vpminsw %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vmovdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768]
-; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; AVX512-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
; AVX512-NEXT: vphminposuw %xmm0, %xmm0
-; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vmovd %xmm0, %eax
+; AVX512-NEXT: xorl $32768, %eax # imm = 0x8000
; AVX512-NEXT: # kill: def $ax killed $ax killed $eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
; SSE41-NEXT: pminsw %xmm4, %xmm0
; SSE41-NEXT: pminsw %xmm2, %xmm0
; SSE41-NEXT: pminsw %xmm1, %xmm0
-; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768]
-; SSE41-NEXT: pxor %xmm1, %xmm0
+; SSE41-NEXT: pxor {{.*}}(%rip), %xmm0
; SSE41-NEXT: phminposuw %xmm0, %xmm0
-; SSE41-NEXT: pxor %xmm1, %xmm0
; SSE41-NEXT: movd %xmm0, %eax
+; SSE41-NEXT: xorl $32768, %eax # imm = 0x8000
; SSE41-NEXT: # kill: def $ax killed $ax killed $eax
; SSE41-NEXT: retq
;
; AVX1-NEXT: vpminsw %xmm2, %xmm0, %xmm0
; AVX1-NEXT: vpminsw %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vpminsw %xmm4, %xmm0, %xmm0
-; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768]
-; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
; AVX1-NEXT: vphminposuw %xmm0, %xmm0
-; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vmovd %xmm0, %eax
+; AVX1-NEXT: xorl $32768, %eax # imm = 0x8000
; AVX1-NEXT: # kill: def $ax killed $ax killed $eax
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
; AVX2-NEXT: vpminsw %ymm1, %ymm0, %ymm0
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX2-NEXT: vpminsw %xmm1, %xmm0, %xmm0
-; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768]
-; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; AVX2-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
; AVX2-NEXT: vphminposuw %xmm0, %xmm0
-; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vmovd %xmm0, %eax
+; AVX2-NEXT: xorl $32768, %eax # imm = 0x8000
; AVX2-NEXT: # kill: def $ax killed $ax killed $eax
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
; AVX512-NEXT: vpminsw %ymm1, %ymm0, %ymm0
; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX512-NEXT: vpminsw %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vmovdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768]
-; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; AVX512-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
; AVX512-NEXT: vphminposuw %xmm0, %xmm0
-; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vmovd %xmm0, %eax
+; AVX512-NEXT: xorl $32768, %eax # imm = 0x8000
; AVX512-NEXT: # kill: def $ax killed $ax killed $eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
;
; SSE41-LABEL: test_v16i8:
; SSE41: # %bb.0:
-; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
-; SSE41-NEXT: pxor %xmm1, %xmm0
-; SSE41-NEXT: movdqa %xmm0, %xmm2
-; SSE41-NEXT: psrlw $8, %xmm2
-; SSE41-NEXT: pminub %xmm0, %xmm2
-; SSE41-NEXT: phminposuw %xmm2, %xmm0
-; SSE41-NEXT: pxor %xmm1, %xmm0
+; SSE41-NEXT: pxor {{.*}}(%rip), %xmm0
+; SSE41-NEXT: movdqa %xmm0, %xmm1
+; SSE41-NEXT: psrlw $8, %xmm1
+; SSE41-NEXT: pminub %xmm0, %xmm1
+; SSE41-NEXT: phminposuw %xmm1, %xmm0
; SSE41-NEXT: pextrb $0, %xmm0, %eax
+; SSE41-NEXT: xorb $-128, %al
; SSE41-NEXT: # kill: def $al killed $al killed $eax
; SSE41-NEXT: retq
;
; AVX-LABEL: test_v16i8:
; AVX: # %bb.0:
-; AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
-; AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
-; AVX-NEXT: vpsrlw $8, %xmm0, %xmm2
-; AVX-NEXT: vpminub %xmm2, %xmm0, %xmm0
+; AVX-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
+; AVX-NEXT: vpsrlw $8, %xmm0, %xmm1
+; AVX-NEXT: vpminub %xmm1, %xmm0, %xmm0
; AVX-NEXT: vphminposuw %xmm0, %xmm0
-; AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX-NEXT: vpextrb $0, %xmm0, %eax
+; AVX-NEXT: xorb $-128, %al
; AVX-NEXT: # kill: def $al killed $al killed $eax
; AVX-NEXT: retq
;
; AVX512-LABEL: test_v16i8:
; AVX512: # %bb.0:
-; AVX512-NEXT: vmovdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
-; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm2
-; AVX512-NEXT: vpminub %xmm2, %xmm0, %xmm0
+; AVX512-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
+; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1
+; AVX512-NEXT: vpminub %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vphminposuw %xmm0, %xmm0
-; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vpextrb $0, %xmm0, %eax
+; AVX512-NEXT: xorb $-128, %al
; AVX512-NEXT: # kill: def $al killed $al killed $eax
; AVX512-NEXT: retq
%1 = call i8 @llvm.experimental.vector.reduce.smin.i8.v16i8(<16 x i8> %a0)
; SSE41-LABEL: test_v32i8:
; SSE41: # %bb.0:
; SSE41-NEXT: pminsb %xmm1, %xmm0
-; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
-; SSE41-NEXT: pxor %xmm1, %xmm0
-; SSE41-NEXT: movdqa %xmm0, %xmm2
-; SSE41-NEXT: psrlw $8, %xmm2
-; SSE41-NEXT: pminub %xmm0, %xmm2
-; SSE41-NEXT: phminposuw %xmm2, %xmm0
-; SSE41-NEXT: pxor %xmm1, %xmm0
+; SSE41-NEXT: pxor {{.*}}(%rip), %xmm0
+; SSE41-NEXT: movdqa %xmm0, %xmm1
+; SSE41-NEXT: psrlw $8, %xmm1
+; SSE41-NEXT: pminub %xmm0, %xmm1
+; SSE41-NEXT: phminposuw %xmm1, %xmm0
; SSE41-NEXT: pextrb $0, %xmm0, %eax
+; SSE41-NEXT: xorb $-128, %al
; SSE41-NEXT: # kill: def $al killed $al killed $eax
; SSE41-NEXT: retq
;
; AVX1: # %bb.0:
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX1-NEXT: vpminsb %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
-; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm2
-; AVX1-NEXT: vpminub %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
+; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1
+; AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vphminposuw %xmm0, %xmm0
-; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vpextrb $0, %xmm0, %eax
+; AVX1-NEXT: xorb $-128, %al
; AVX1-NEXT: # kill: def $al killed $al killed $eax
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
; AVX2: # %bb.0:
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX2-NEXT: vpminsb %xmm1, %xmm0, %xmm0
-; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
-; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
-; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm2
-; AVX2-NEXT: vpminub %xmm2, %xmm0, %xmm0
+; AVX2-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
+; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1
+; AVX2-NEXT: vpminub %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vphminposuw %xmm0, %xmm0
-; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vpextrb $0, %xmm0, %eax
+; AVX2-NEXT: xorb $-128, %al
; AVX2-NEXT: # kill: def $al killed $al killed $eax
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
; AVX512: # %bb.0:
; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX512-NEXT: vpminsb %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vmovdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
-; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm2
-; AVX512-NEXT: vpminub %xmm2, %xmm0, %xmm0
+; AVX512-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
+; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1
+; AVX512-NEXT: vpminub %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vphminposuw %xmm0, %xmm0
-; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vpextrb $0, %xmm0, %eax
+; AVX512-NEXT: xorb $-128, %al
; AVX512-NEXT: # kill: def $al killed $al killed $eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
; SSE41-NEXT: pminsb %xmm3, %xmm1
; SSE41-NEXT: pminsb %xmm2, %xmm0
; SSE41-NEXT: pminsb %xmm1, %xmm0
-; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
-; SSE41-NEXT: pxor %xmm1, %xmm0
-; SSE41-NEXT: movdqa %xmm0, %xmm2
-; SSE41-NEXT: psrlw $8, %xmm2
-; SSE41-NEXT: pminub %xmm0, %xmm2
-; SSE41-NEXT: phminposuw %xmm2, %xmm0
-; SSE41-NEXT: pxor %xmm1, %xmm0
+; SSE41-NEXT: pxor {{.*}}(%rip), %xmm0
+; SSE41-NEXT: movdqa %xmm0, %xmm1
+; SSE41-NEXT: psrlw $8, %xmm1
+; SSE41-NEXT: pminub %xmm0, %xmm1
+; SSE41-NEXT: phminposuw %xmm1, %xmm0
; SSE41-NEXT: pextrb $0, %xmm0, %eax
+; SSE41-NEXT: xorb $-128, %al
; SSE41-NEXT: # kill: def $al killed $al killed $eax
; SSE41-NEXT: retq
;
; AVX1-NEXT: vpminsb %xmm2, %xmm3, %xmm2
; AVX1-NEXT: vpminsb %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vpminsb %xmm2, %xmm0, %xmm0
-; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
-; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm2
-; AVX1-NEXT: vpminub %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
+; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1
+; AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vphminposuw %xmm0, %xmm0
-; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vpextrb $0, %xmm0, %eax
+; AVX1-NEXT: xorb $-128, %al
; AVX1-NEXT: # kill: def $al killed $al killed $eax
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
; AVX2-NEXT: vpminsb %ymm1, %ymm0, %ymm0
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX2-NEXT: vpminsb %xmm1, %xmm0, %xmm0
-; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
-; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
-; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm2
-; AVX2-NEXT: vpminub %xmm2, %xmm0, %xmm0
+; AVX2-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
+; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1
+; AVX2-NEXT: vpminub %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vphminposuw %xmm0, %xmm0
-; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vpextrb $0, %xmm0, %eax
+; AVX2-NEXT: xorb $-128, %al
; AVX2-NEXT: # kill: def $al killed $al killed $eax
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
; AVX512-NEXT: vpminsb %ymm1, %ymm0, %ymm0
; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX512-NEXT: vpminsb %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vmovdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
-; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm2
-; AVX512-NEXT: vpminub %xmm2, %xmm0, %xmm0
+; AVX512-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
+; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1
+; AVX512-NEXT: vpminub %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vphminposuw %xmm0, %xmm0
-; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vpextrb $0, %xmm0, %eax
+; AVX512-NEXT: xorb $-128, %al
; AVX512-NEXT: # kill: def $al killed $al killed $eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
; SSE41-NEXT: pminsb %xmm4, %xmm0
; SSE41-NEXT: pminsb %xmm2, %xmm0
; SSE41-NEXT: pminsb %xmm1, %xmm0
-; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
-; SSE41-NEXT: pxor %xmm1, %xmm0
-; SSE41-NEXT: movdqa %xmm0, %xmm2
-; SSE41-NEXT: psrlw $8, %xmm2
-; SSE41-NEXT: pminub %xmm0, %xmm2
-; SSE41-NEXT: phminposuw %xmm2, %xmm0
-; SSE41-NEXT: pxor %xmm1, %xmm0
+; SSE41-NEXT: pxor {{.*}}(%rip), %xmm0
+; SSE41-NEXT: movdqa %xmm0, %xmm1
+; SSE41-NEXT: psrlw $8, %xmm1
+; SSE41-NEXT: pminub %xmm0, %xmm1
+; SSE41-NEXT: phminposuw %xmm1, %xmm0
; SSE41-NEXT: pextrb $0, %xmm0, %eax
+; SSE41-NEXT: xorb $-128, %al
; SSE41-NEXT: # kill: def $al killed $al killed $eax
; SSE41-NEXT: retq
;
; AVX1-NEXT: vpminsb %xmm2, %xmm0, %xmm0
; AVX1-NEXT: vpminsb %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vpminsb %xmm4, %xmm0, %xmm0
-; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
-; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm2
-; AVX1-NEXT: vpminub %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
+; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1
+; AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vphminposuw %xmm0, %xmm0
-; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vpextrb $0, %xmm0, %eax
+; AVX1-NEXT: xorb $-128, %al
; AVX1-NEXT: # kill: def $al killed $al killed $eax
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
; AVX2-NEXT: vpminsb %ymm1, %ymm0, %ymm0
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX2-NEXT: vpminsb %xmm1, %xmm0, %xmm0
-; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
-; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
-; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm2
-; AVX2-NEXT: vpminub %xmm2, %xmm0, %xmm0
+; AVX2-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
+; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1
+; AVX2-NEXT: vpminub %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vphminposuw %xmm0, %xmm0
-; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vpextrb $0, %xmm0, %eax
+; AVX2-NEXT: xorb $-128, %al
; AVX2-NEXT: # kill: def $al killed $al killed $eax
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
; AVX512-NEXT: vpminsb %ymm1, %ymm0, %ymm0
; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX512-NEXT: vpminsb %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vmovdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
-; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm2
-; AVX512-NEXT: vpminub %xmm2, %xmm0, %xmm0
+; AVX512-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
+; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1
+; AVX512-NEXT: vpminub %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vphminposuw %xmm0, %xmm0
-; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vpextrb $0, %xmm0, %eax
+; AVX512-NEXT: xorb $-128, %al
; AVX512-NEXT: # kill: def $al killed $al killed $eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
; SSE2-NEXT: pxor %xmm2, %xmm0
; SSE2-NEXT: pxor %xmm2, %xmm1
; SSE2-NEXT: pmaxsw %xmm0, %xmm1
-; SSE2-NEXT: pxor %xmm2, %xmm1
; SSE2-NEXT: movd %xmm1, %eax
+; SSE2-NEXT: xorl $32768, %eax # imm = 0x8000
; SSE2-NEXT: # kill: def $ax killed $ax killed $eax
; SSE2-NEXT: retq
;
; SSE2-NEXT: psrld $16, %xmm0
; SSE2-NEXT: pxor %xmm2, %xmm0
; SSE2-NEXT: pmaxsw %xmm1, %xmm0
-; SSE2-NEXT: pxor %xmm2, %xmm0
; SSE2-NEXT: movd %xmm0, %eax
+; SSE2-NEXT: xorl $32768, %eax # imm = 0x8000
; SSE2-NEXT: # kill: def $ax killed $ax killed $eax
; SSE2-NEXT: retq
;
; SSE2-NEXT: psrld $16, %xmm1
; SSE2-NEXT: pxor %xmm2, %xmm1
; SSE2-NEXT: pmaxsw %xmm0, %xmm1
-; SSE2-NEXT: pxor %xmm2, %xmm1
; SSE2-NEXT: movd %xmm1, %eax
+; SSE2-NEXT: xorl $32768, %eax # imm = 0x8000
; SSE2-NEXT: # kill: def $ax killed $ax killed $eax
; SSE2-NEXT: retq
;
; SSE41-LABEL: test_v8i16:
; SSE41: # %bb.0:
; SSE41-NEXT: pcmpeqd %xmm1, %xmm1
-; SSE41-NEXT: pxor %xmm1, %xmm0
-; SSE41-NEXT: phminposuw %xmm0, %xmm0
-; SSE41-NEXT: pxor %xmm1, %xmm0
+; SSE41-NEXT: pxor %xmm0, %xmm1
+; SSE41-NEXT: phminposuw %xmm1, %xmm0
; SSE41-NEXT: movd %xmm0, %eax
+; SSE41-NEXT: notl %eax
; SSE41-NEXT: # kill: def $ax killed $ax killed $eax
; SSE41-NEXT: retq
;
; AVX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX-NEXT: vphminposuw %xmm0, %xmm0
-; AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX-NEXT: vmovd %xmm0, %eax
+; AVX-NEXT: notl %eax
; AVX-NEXT: # kill: def $ax killed $ax killed $eax
; AVX-NEXT: retq
;
; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
; AVX512BW-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
; AVX512BW-NEXT: vphminposuw %xmm0, %xmm0
-; AVX512BW-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
; AVX512BW-NEXT: vmovd %xmm0, %eax
+; AVX512BW-NEXT: notl %eax
; AVX512BW-NEXT: # kill: def $ax killed $ax killed $eax
; AVX512BW-NEXT: vzeroupper
; AVX512BW-NEXT: retq
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vpternlogq $15, %xmm0, %xmm0, %xmm0
; AVX512VL-NEXT: vphminposuw %xmm0, %xmm0
-; AVX512VL-NEXT: vpternlogq $15, %xmm0, %xmm0, %xmm0
; AVX512VL-NEXT: vmovd %xmm0, %eax
+; AVX512VL-NEXT: notl %eax
; AVX512VL-NEXT: # kill: def $ax killed $ax killed $eax
; AVX512VL-NEXT: retq
%1 = call i16 @llvm.experimental.vector.reduce.umax.i16.v8i16(<8 x i16> %a0)
; SSE2-NEXT: psrld $16, %xmm1
; SSE2-NEXT: pxor %xmm2, %xmm1
; SSE2-NEXT: pmaxsw %xmm0, %xmm1
-; SSE2-NEXT: pxor %xmm2, %xmm1
; SSE2-NEXT: movd %xmm1, %eax
+; SSE2-NEXT: xorl $32768, %eax # imm = 0x8000
; SSE2-NEXT: # kill: def $ax killed $ax killed $eax
; SSE2-NEXT: retq
;
; SSE41: # %bb.0:
; SSE41-NEXT: pmaxuw %xmm1, %xmm0
; SSE41-NEXT: pcmpeqd %xmm1, %xmm1
-; SSE41-NEXT: pxor %xmm1, %xmm0
-; SSE41-NEXT: phminposuw %xmm0, %xmm0
-; SSE41-NEXT: pxor %xmm1, %xmm0
+; SSE41-NEXT: pxor %xmm0, %xmm1
+; SSE41-NEXT: phminposuw %xmm1, %xmm0
; SSE41-NEXT: movd %xmm0, %eax
+; SSE41-NEXT: notl %eax
; SSE41-NEXT: # kill: def $ax killed $ax killed $eax
; SSE41-NEXT: retq
;
; AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vphminposuw %xmm0, %xmm0
-; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vmovd %xmm0, %eax
+; AVX1-NEXT: notl %eax
; AVX1-NEXT: # kill: def $ax killed $ax killed $eax
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
; AVX2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vphminposuw %xmm0, %xmm0
-; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vmovd %xmm0, %eax
+; AVX2-NEXT: notl %eax
; AVX2-NEXT: # kill: def $ax killed $ax killed $eax
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
; AVX512BW-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0
; AVX512BW-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
; AVX512BW-NEXT: vphminposuw %xmm0, %xmm0
-; AVX512BW-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
; AVX512BW-NEXT: vmovd %xmm0, %eax
+; AVX512BW-NEXT: notl %eax
; AVX512BW-NEXT: # kill: def $ax killed $ax killed $eax
; AVX512BW-NEXT: vzeroupper
; AVX512BW-NEXT: retq
; AVX512VL-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0
; AVX512VL-NEXT: vpternlogq $15, %xmm0, %xmm0, %xmm0
; AVX512VL-NEXT: vphminposuw %xmm0, %xmm0
-; AVX512VL-NEXT: vpternlogq $15, %xmm0, %xmm0, %xmm0
; AVX512VL-NEXT: vmovd %xmm0, %eax
+; AVX512VL-NEXT: notl %eax
; AVX512VL-NEXT: # kill: def $ax killed $ax killed $eax
; AVX512VL-NEXT: vzeroupper
; AVX512VL-NEXT: retq
; SSE2-NEXT: psrld $16, %xmm1
; SSE2-NEXT: pxor %xmm4, %xmm1
; SSE2-NEXT: pmaxsw %xmm0, %xmm1
-; SSE2-NEXT: pxor %xmm4, %xmm1
; SSE2-NEXT: movd %xmm1, %eax
+; SSE2-NEXT: xorl $32768, %eax # imm = 0x8000
; SSE2-NEXT: # kill: def $ax killed $ax killed $eax
; SSE2-NEXT: retq
;
; SSE41-NEXT: pmaxuw %xmm2, %xmm0
; SSE41-NEXT: pmaxuw %xmm1, %xmm0
; SSE41-NEXT: pcmpeqd %xmm1, %xmm1
-; SSE41-NEXT: pxor %xmm1, %xmm0
-; SSE41-NEXT: phminposuw %xmm0, %xmm0
-; SSE41-NEXT: pxor %xmm1, %xmm0
+; SSE41-NEXT: pxor %xmm0, %xmm1
+; SSE41-NEXT: phminposuw %xmm1, %xmm0
; SSE41-NEXT: movd %xmm0, %eax
+; SSE41-NEXT: notl %eax
; SSE41-NEXT: # kill: def $ax killed $ax killed $eax
; SSE41-NEXT: retq
;
; AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vphminposuw %xmm0, %xmm0
-; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vmovd %xmm0, %eax
+; AVX1-NEXT: notl %eax
; AVX1-NEXT: # kill: def $ax killed $ax killed $eax
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
; AVX2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vphminposuw %xmm0, %xmm0
-; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vmovd %xmm0, %eax
+; AVX2-NEXT: notl %eax
; AVX2-NEXT: # kill: def $ax killed $ax killed $eax
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
; AVX512BW-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0
; AVX512BW-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
; AVX512BW-NEXT: vphminposuw %xmm0, %xmm0
-; AVX512BW-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
; AVX512BW-NEXT: vmovd %xmm0, %eax
+; AVX512BW-NEXT: notl %eax
; AVX512BW-NEXT: # kill: def $ax killed $ax killed $eax
; AVX512BW-NEXT: vzeroupper
; AVX512BW-NEXT: retq
; AVX512VL-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0
; AVX512VL-NEXT: vpternlogq $15, %xmm0, %xmm0, %xmm0
; AVX512VL-NEXT: vphminposuw %xmm0, %xmm0
-; AVX512VL-NEXT: vpternlogq $15, %xmm0, %xmm0, %xmm0
; AVX512VL-NEXT: vmovd %xmm0, %eax
+; AVX512VL-NEXT: notl %eax
; AVX512VL-NEXT: # kill: def $ax killed $ax killed $eax
; AVX512VL-NEXT: vzeroupper
; AVX512VL-NEXT: retq
; SSE2-NEXT: psrld $16, %xmm0
; SSE2-NEXT: pxor %xmm8, %xmm0
; SSE2-NEXT: pmaxsw %xmm1, %xmm0
-; SSE2-NEXT: pxor %xmm8, %xmm0
; SSE2-NEXT: movd %xmm0, %eax
+; SSE2-NEXT: xorl $32768, %eax # imm = 0x8000
; SSE2-NEXT: # kill: def $ax killed $ax killed $eax
; SSE2-NEXT: retq
;
; SSE41-NEXT: pmaxuw %xmm2, %xmm0
; SSE41-NEXT: pmaxuw %xmm1, %xmm0
; SSE41-NEXT: pcmpeqd %xmm1, %xmm1
-; SSE41-NEXT: pxor %xmm1, %xmm0
-; SSE41-NEXT: phminposuw %xmm0, %xmm0
-; SSE41-NEXT: pxor %xmm1, %xmm0
+; SSE41-NEXT: pxor %xmm0, %xmm1
+; SSE41-NEXT: phminposuw %xmm1, %xmm0
; SSE41-NEXT: movd %xmm0, %eax
+; SSE41-NEXT: notl %eax
; SSE41-NEXT: # kill: def $ax killed $ax killed $eax
; SSE41-NEXT: retq
;
; AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vphminposuw %xmm0, %xmm0
-; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vmovd %xmm0, %eax
+; AVX1-NEXT: notl %eax
; AVX1-NEXT: # kill: def $ax killed $ax killed $eax
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
; AVX2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vphminposuw %xmm0, %xmm0
-; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vmovd %xmm0, %eax
+; AVX2-NEXT: notl %eax
; AVX2-NEXT: # kill: def $ax killed $ax killed $eax
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
; AVX512BW-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0
; AVX512BW-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
; AVX512BW-NEXT: vphminposuw %xmm0, %xmm0
-; AVX512BW-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
; AVX512BW-NEXT: vmovd %xmm0, %eax
+; AVX512BW-NEXT: notl %eax
; AVX512BW-NEXT: # kill: def $ax killed $ax killed $eax
; AVX512BW-NEXT: vzeroupper
; AVX512BW-NEXT: retq
; AVX512VL-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0
; AVX512VL-NEXT: vpternlogq $15, %xmm0, %xmm0, %xmm0
; AVX512VL-NEXT: vphminposuw %xmm0, %xmm0
-; AVX512VL-NEXT: vpternlogq $15, %xmm0, %xmm0, %xmm0
; AVX512VL-NEXT: vmovd %xmm0, %eax
+; AVX512VL-NEXT: notl %eax
; AVX512VL-NEXT: # kill: def $ax killed $ax killed $eax
; AVX512VL-NEXT: vzeroupper
; AVX512VL-NEXT: retq
; SSE41-LABEL: test_v16i8:
; SSE41: # %bb.0:
; SSE41-NEXT: pcmpeqd %xmm1, %xmm1
-; SSE41-NEXT: pxor %xmm1, %xmm0
-; SSE41-NEXT: movdqa %xmm0, %xmm2
-; SSE41-NEXT: psrlw $8, %xmm2
-; SSE41-NEXT: pminub %xmm0, %xmm2
-; SSE41-NEXT: phminposuw %xmm2, %xmm0
-; SSE41-NEXT: pxor %xmm1, %xmm0
+; SSE41-NEXT: pxor %xmm0, %xmm1
+; SSE41-NEXT: movdqa %xmm1, %xmm0
+; SSE41-NEXT: psrlw $8, %xmm0
+; SSE41-NEXT: pminub %xmm1, %xmm0
+; SSE41-NEXT: phminposuw %xmm0, %xmm0
; SSE41-NEXT: pextrb $0, %xmm0, %eax
+; SSE41-NEXT: notb %al
; SSE41-NEXT: # kill: def $al killed $al killed $eax
; SSE41-NEXT: retq
;
; AVX: # %bb.0:
; AVX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
-; AVX-NEXT: vpsrlw $8, %xmm0, %xmm2
-; AVX-NEXT: vpminub %xmm2, %xmm0, %xmm0
+; AVX-NEXT: vpsrlw $8, %xmm0, %xmm1
+; AVX-NEXT: vpminub %xmm1, %xmm0, %xmm0
; AVX-NEXT: vphminposuw %xmm0, %xmm0
-; AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX-NEXT: vpextrb $0, %xmm0, %eax
+; AVX-NEXT: notb %al
; AVX-NEXT: # kill: def $al killed $al killed $eax
; AVX-NEXT: retq
;
; AVX512BW-NEXT: vpsrlw $8, %xmm0, %xmm1
; AVX512BW-NEXT: vpminub %xmm1, %xmm0, %xmm0
; AVX512BW-NEXT: vphminposuw %xmm0, %xmm0
-; AVX512BW-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
; AVX512BW-NEXT: vpextrb $0, %xmm0, %eax
+; AVX512BW-NEXT: notb %al
; AVX512BW-NEXT: # kill: def $al killed $al killed $eax
; AVX512BW-NEXT: vzeroupper
; AVX512BW-NEXT: retq
; AVX512VL-NEXT: vpsrlw $8, %xmm0, %xmm1
; AVX512VL-NEXT: vpminub %xmm1, %xmm0, %xmm0
; AVX512VL-NEXT: vphminposuw %xmm0, %xmm0
-; AVX512VL-NEXT: vpternlogq $15, %xmm0, %xmm0, %xmm0
; AVX512VL-NEXT: vpextrb $0, %xmm0, %eax
+; AVX512VL-NEXT: notb %al
; AVX512VL-NEXT: # kill: def $al killed $al killed $eax
; AVX512VL-NEXT: retq
%1 = call i8 @llvm.experimental.vector.reduce.umax.i8.v16i8(<16 x i8> %a0)
; SSE41: # %bb.0:
; SSE41-NEXT: pmaxub %xmm1, %xmm0
; SSE41-NEXT: pcmpeqd %xmm1, %xmm1
-; SSE41-NEXT: pxor %xmm1, %xmm0
-; SSE41-NEXT: movdqa %xmm0, %xmm2
-; SSE41-NEXT: psrlw $8, %xmm2
-; SSE41-NEXT: pminub %xmm0, %xmm2
-; SSE41-NEXT: phminposuw %xmm2, %xmm0
-; SSE41-NEXT: pxor %xmm1, %xmm0
+; SSE41-NEXT: pxor %xmm0, %xmm1
+; SSE41-NEXT: movdqa %xmm1, %xmm0
+; SSE41-NEXT: psrlw $8, %xmm0
+; SSE41-NEXT: pminub %xmm1, %xmm0
+; SSE41-NEXT: phminposuw %xmm0, %xmm0
; SSE41-NEXT: pextrb $0, %xmm0, %eax
+; SSE41-NEXT: notb %al
; SSE41-NEXT: # kill: def $al killed $al killed $eax
; SSE41-NEXT: retq
;
; AVX1-NEXT: vpmaxub %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm2
-; AVX1-NEXT: vpminub %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1
+; AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vphminposuw %xmm0, %xmm0
-; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vpextrb $0, %xmm0, %eax
+; AVX1-NEXT: notb %al
; AVX1-NEXT: # kill: def $al killed $al killed $eax
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
; AVX2-NEXT: vpmaxub %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
-; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm2
-; AVX2-NEXT: vpminub %xmm2, %xmm0, %xmm0
+; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1
+; AVX2-NEXT: vpminub %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vphminposuw %xmm0, %xmm0
-; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vpextrb $0, %xmm0, %eax
+; AVX2-NEXT: notb %al
; AVX2-NEXT: # kill: def $al killed $al killed $eax
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
; AVX512BW-NEXT: vpsrlw $8, %xmm0, %xmm1
; AVX512BW-NEXT: vpminub %xmm1, %xmm0, %xmm0
; AVX512BW-NEXT: vphminposuw %xmm0, %xmm0
-; AVX512BW-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
; AVX512BW-NEXT: vpextrb $0, %xmm0, %eax
+; AVX512BW-NEXT: notb %al
; AVX512BW-NEXT: # kill: def $al killed $al killed $eax
; AVX512BW-NEXT: vzeroupper
; AVX512BW-NEXT: retq
; AVX512VL-NEXT: vpsrlw $8, %xmm0, %xmm1
; AVX512VL-NEXT: vpminub %xmm1, %xmm0, %xmm0
; AVX512VL-NEXT: vphminposuw %xmm0, %xmm0
-; AVX512VL-NEXT: vpternlogq $15, %xmm0, %xmm0, %xmm0
; AVX512VL-NEXT: vpextrb $0, %xmm0, %eax
+; AVX512VL-NEXT: notb %al
; AVX512VL-NEXT: # kill: def $al killed $al killed $eax
; AVX512VL-NEXT: vzeroupper
; AVX512VL-NEXT: retq
; SSE41-NEXT: pmaxub %xmm2, %xmm0
; SSE41-NEXT: pmaxub %xmm1, %xmm0
; SSE41-NEXT: pcmpeqd %xmm1, %xmm1
-; SSE41-NEXT: pxor %xmm1, %xmm0
-; SSE41-NEXT: movdqa %xmm0, %xmm2
-; SSE41-NEXT: psrlw $8, %xmm2
-; SSE41-NEXT: pminub %xmm0, %xmm2
-; SSE41-NEXT: phminposuw %xmm2, %xmm0
-; SSE41-NEXT: pxor %xmm1, %xmm0
+; SSE41-NEXT: pxor %xmm0, %xmm1
+; SSE41-NEXT: movdqa %xmm1, %xmm0
+; SSE41-NEXT: psrlw $8, %xmm0
+; SSE41-NEXT: pminub %xmm1, %xmm0
+; SSE41-NEXT: phminposuw %xmm0, %xmm0
; SSE41-NEXT: pextrb $0, %xmm0, %eax
+; SSE41-NEXT: notb %al
; SSE41-NEXT: # kill: def $al killed $al killed $eax
; SSE41-NEXT: retq
;
; AVX1-NEXT: vpmaxub %xmm2, %xmm0, %xmm0
; AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm2
-; AVX1-NEXT: vpminub %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1
+; AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vphminposuw %xmm0, %xmm0
-; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vpextrb $0, %xmm0, %eax
+; AVX1-NEXT: notb %al
; AVX1-NEXT: # kill: def $al killed $al killed $eax
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
; AVX2-NEXT: vpmaxub %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
-; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm2
-; AVX2-NEXT: vpminub %xmm2, %xmm0, %xmm0
+; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1
+; AVX2-NEXT: vpminub %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vphminposuw %xmm0, %xmm0
-; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vpextrb $0, %xmm0, %eax
+; AVX2-NEXT: notb %al
; AVX2-NEXT: # kill: def $al killed $al killed $eax
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
; AVX512BW-NEXT: vpsrlw $8, %xmm0, %xmm1
; AVX512BW-NEXT: vpminub %xmm1, %xmm0, %xmm0
; AVX512BW-NEXT: vphminposuw %xmm0, %xmm0
-; AVX512BW-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
; AVX512BW-NEXT: vpextrb $0, %xmm0, %eax
+; AVX512BW-NEXT: notb %al
; AVX512BW-NEXT: # kill: def $al killed $al killed $eax
; AVX512BW-NEXT: vzeroupper
; AVX512BW-NEXT: retq
; AVX512VL-NEXT: vpsrlw $8, %xmm0, %xmm1
; AVX512VL-NEXT: vpminub %xmm1, %xmm0, %xmm0
; AVX512VL-NEXT: vphminposuw %xmm0, %xmm0
-; AVX512VL-NEXT: vpternlogq $15, %xmm0, %xmm0, %xmm0
; AVX512VL-NEXT: vpextrb $0, %xmm0, %eax
+; AVX512VL-NEXT: notb %al
; AVX512VL-NEXT: # kill: def $al killed $al killed $eax
; AVX512VL-NEXT: vzeroupper
; AVX512VL-NEXT: retq
; SSE41-NEXT: pmaxub %xmm2, %xmm0
; SSE41-NEXT: pmaxub %xmm1, %xmm0
; SSE41-NEXT: pcmpeqd %xmm1, %xmm1
-; SSE41-NEXT: pxor %xmm1, %xmm0
-; SSE41-NEXT: movdqa %xmm0, %xmm2
-; SSE41-NEXT: psrlw $8, %xmm2
-; SSE41-NEXT: pminub %xmm0, %xmm2
-; SSE41-NEXT: phminposuw %xmm2, %xmm0
-; SSE41-NEXT: pxor %xmm1, %xmm0
+; SSE41-NEXT: pxor %xmm0, %xmm1
+; SSE41-NEXT: movdqa %xmm1, %xmm0
+; SSE41-NEXT: psrlw $8, %xmm0
+; SSE41-NEXT: pminub %xmm1, %xmm0
+; SSE41-NEXT: phminposuw %xmm0, %xmm0
; SSE41-NEXT: pextrb $0, %xmm0, %eax
+; SSE41-NEXT: notb %al
; SSE41-NEXT: # kill: def $al killed $al killed $eax
; SSE41-NEXT: retq
;
; AVX1-NEXT: vpmaxub %xmm4, %xmm0, %xmm0
; AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm2
-; AVX1-NEXT: vpminub %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1
+; AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vphminposuw %xmm0, %xmm0
-; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vpextrb $0, %xmm0, %eax
+; AVX1-NEXT: notb %al
; AVX1-NEXT: # kill: def $al killed $al killed $eax
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
; AVX2-NEXT: vpmaxub %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
-; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm2
-; AVX2-NEXT: vpminub %xmm2, %xmm0, %xmm0
+; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1
+; AVX2-NEXT: vpminub %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vphminposuw %xmm0, %xmm0
-; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vpextrb $0, %xmm0, %eax
+; AVX2-NEXT: notb %al
; AVX2-NEXT: # kill: def $al killed $al killed $eax
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
; AVX512BW-NEXT: vpsrlw $8, %xmm0, %xmm1
; AVX512BW-NEXT: vpminub %xmm1, %xmm0, %xmm0
; AVX512BW-NEXT: vphminposuw %xmm0, %xmm0
-; AVX512BW-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
; AVX512BW-NEXT: vpextrb $0, %xmm0, %eax
+; AVX512BW-NEXT: notb %al
; AVX512BW-NEXT: # kill: def $al killed $al killed $eax
; AVX512BW-NEXT: vzeroupper
; AVX512BW-NEXT: retq
; AVX512VL-NEXT: vpsrlw $8, %xmm0, %xmm1
; AVX512VL-NEXT: vpminub %xmm1, %xmm0, %xmm0
; AVX512VL-NEXT: vphminposuw %xmm0, %xmm0
-; AVX512VL-NEXT: vpternlogq $15, %xmm0, %xmm0, %xmm0
; AVX512VL-NEXT: vpextrb $0, %xmm0, %eax
+; AVX512VL-NEXT: notb %al
; AVX512VL-NEXT: # kill: def $al killed $al killed $eax
; AVX512VL-NEXT: vzeroupper
; AVX512VL-NEXT: retq
; SSE2-NEXT: psrld $16, %xmm1
; SSE2-NEXT: pxor %xmm2, %xmm1
; SSE2-NEXT: pmaxsw %xmm0, %xmm1
-; SSE2-NEXT: pxor %xmm2, %xmm1
; SSE2-NEXT: movd %xmm1, %eax
+; SSE2-NEXT: xorl $32768, %eax # imm = 0x8000
; SSE2-NEXT: # kill: def $ax killed $ax killed $eax
; SSE2-NEXT: retq
;
; SSE41-LABEL: test_v8i16:
; SSE41: # %bb.0:
; SSE41-NEXT: pcmpeqd %xmm1, %xmm1
-; SSE41-NEXT: pxor %xmm1, %xmm0
-; SSE41-NEXT: phminposuw %xmm0, %xmm0
-; SSE41-NEXT: pxor %xmm1, %xmm0
+; SSE41-NEXT: pxor %xmm0, %xmm1
+; SSE41-NEXT: phminposuw %xmm1, %xmm0
; SSE41-NEXT: movd %xmm0, %eax
+; SSE41-NEXT: notl %eax
; SSE41-NEXT: # kill: def $ax killed $ax killed $eax
; SSE41-NEXT: retq
;
; AVX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX-NEXT: vphminposuw %xmm0, %xmm0
-; AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX-NEXT: vmovd %xmm0, %eax
+; AVX-NEXT: notl %eax
; AVX-NEXT: # kill: def $ax killed $ax killed $eax
; AVX-NEXT: retq
;
; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
; AVX512BW-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
; AVX512BW-NEXT: vphminposuw %xmm0, %xmm0
-; AVX512BW-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
; AVX512BW-NEXT: vmovd %xmm0, %eax
+; AVX512BW-NEXT: notl %eax
; AVX512BW-NEXT: # kill: def $ax killed $ax killed $eax
; AVX512BW-NEXT: vzeroupper
; AVX512BW-NEXT: retq
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vpternlogq $15, %xmm0, %xmm0, %xmm0
; AVX512VL-NEXT: vphminposuw %xmm0, %xmm0
-; AVX512VL-NEXT: vpternlogq $15, %xmm0, %xmm0, %xmm0
; AVX512VL-NEXT: vmovd %xmm0, %eax
+; AVX512VL-NEXT: notl %eax
; AVX512VL-NEXT: # kill: def $ax killed $ax killed $eax
; AVX512VL-NEXT: retq
%1 = call i16 @llvm.experimental.vector.reduce.umax.i16.v8i16(<8 x i16> %a0)
; SSE2-NEXT: psrld $16, %xmm1
; SSE2-NEXT: pxor %xmm2, %xmm1
; SSE2-NEXT: pmaxsw %xmm0, %xmm1
-; SSE2-NEXT: pxor %xmm2, %xmm1
; SSE2-NEXT: movd %xmm1, %eax
+; SSE2-NEXT: xorl $32768, %eax # imm = 0x8000
; SSE2-NEXT: # kill: def $ax killed $ax killed $eax
; SSE2-NEXT: retq
;
; SSE41: # %bb.0:
; SSE41-NEXT: pmaxuw %xmm1, %xmm0
; SSE41-NEXT: pcmpeqd %xmm1, %xmm1
-; SSE41-NEXT: pxor %xmm1, %xmm0
-; SSE41-NEXT: phminposuw %xmm0, %xmm0
-; SSE41-NEXT: pxor %xmm1, %xmm0
+; SSE41-NEXT: pxor %xmm0, %xmm1
+; SSE41-NEXT: phminposuw %xmm1, %xmm0
; SSE41-NEXT: movd %xmm0, %eax
+; SSE41-NEXT: notl %eax
; SSE41-NEXT: # kill: def $ax killed $ax killed $eax
; SSE41-NEXT: retq
;
; AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vphminposuw %xmm0, %xmm0
-; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vmovd %xmm0, %eax
+; AVX1-NEXT: notl %eax
; AVX1-NEXT: # kill: def $ax killed $ax killed $eax
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
; AVX2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vphminposuw %xmm0, %xmm0
-; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vmovd %xmm0, %eax
+; AVX2-NEXT: notl %eax
; AVX2-NEXT: # kill: def $ax killed $ax killed $eax
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
; AVX512BW-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0
; AVX512BW-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
; AVX512BW-NEXT: vphminposuw %xmm0, %xmm0
-; AVX512BW-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
; AVX512BW-NEXT: vmovd %xmm0, %eax
+; AVX512BW-NEXT: notl %eax
; AVX512BW-NEXT: # kill: def $ax killed $ax killed $eax
; AVX512BW-NEXT: vzeroupper
; AVX512BW-NEXT: retq
; AVX512VL-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0
; AVX512VL-NEXT: vpternlogq $15, %xmm0, %xmm0, %xmm0
; AVX512VL-NEXT: vphminposuw %xmm0, %xmm0
-; AVX512VL-NEXT: vpternlogq $15, %xmm0, %xmm0, %xmm0
; AVX512VL-NEXT: vmovd %xmm0, %eax
+; AVX512VL-NEXT: notl %eax
; AVX512VL-NEXT: # kill: def $ax killed $ax killed $eax
; AVX512VL-NEXT: vzeroupper
; AVX512VL-NEXT: retq
; SSE2-NEXT: psrld $16, %xmm1
; SSE2-NEXT: pxor %xmm4, %xmm1
; SSE2-NEXT: pmaxsw %xmm0, %xmm1
-; SSE2-NEXT: pxor %xmm4, %xmm1
; SSE2-NEXT: movd %xmm1, %eax
+; SSE2-NEXT: xorl $32768, %eax # imm = 0x8000
; SSE2-NEXT: # kill: def $ax killed $ax killed $eax
; SSE2-NEXT: retq
;
; SSE41-NEXT: pmaxuw %xmm2, %xmm0
; SSE41-NEXT: pmaxuw %xmm1, %xmm0
; SSE41-NEXT: pcmpeqd %xmm1, %xmm1
-; SSE41-NEXT: pxor %xmm1, %xmm0
-; SSE41-NEXT: phminposuw %xmm0, %xmm0
-; SSE41-NEXT: pxor %xmm1, %xmm0
+; SSE41-NEXT: pxor %xmm0, %xmm1
+; SSE41-NEXT: phminposuw %xmm1, %xmm0
; SSE41-NEXT: movd %xmm0, %eax
+; SSE41-NEXT: notl %eax
; SSE41-NEXT: # kill: def $ax killed $ax killed $eax
; SSE41-NEXT: retq
;
; AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vphminposuw %xmm0, %xmm0
-; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vmovd %xmm0, %eax
+; AVX1-NEXT: notl %eax
; AVX1-NEXT: # kill: def $ax killed $ax killed $eax
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
; AVX2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vphminposuw %xmm0, %xmm0
-; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vmovd %xmm0, %eax
+; AVX2-NEXT: notl %eax
; AVX2-NEXT: # kill: def $ax killed $ax killed $eax
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
; AVX512BW-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0
; AVX512BW-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
; AVX512BW-NEXT: vphminposuw %xmm0, %xmm0
-; AVX512BW-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
; AVX512BW-NEXT: vmovd %xmm0, %eax
+; AVX512BW-NEXT: notl %eax
; AVX512BW-NEXT: # kill: def $ax killed $ax killed $eax
; AVX512BW-NEXT: vzeroupper
; AVX512BW-NEXT: retq
; AVX512VL-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0
; AVX512VL-NEXT: vpternlogq $15, %xmm0, %xmm0, %xmm0
; AVX512VL-NEXT: vphminposuw %xmm0, %xmm0
-; AVX512VL-NEXT: vpternlogq $15, %xmm0, %xmm0, %xmm0
; AVX512VL-NEXT: vmovd %xmm0, %eax
+; AVX512VL-NEXT: notl %eax
; AVX512VL-NEXT: # kill: def $ax killed $ax killed $eax
; AVX512VL-NEXT: vzeroupper
; AVX512VL-NEXT: retq
; SSE2-NEXT: psrld $16, %xmm0
; SSE2-NEXT: pxor %xmm8, %xmm0
; SSE2-NEXT: pmaxsw %xmm1, %xmm0
-; SSE2-NEXT: pxor %xmm8, %xmm0
; SSE2-NEXT: movd %xmm0, %eax
+; SSE2-NEXT: xorl $32768, %eax # imm = 0x8000
; SSE2-NEXT: # kill: def $ax killed $ax killed $eax
; SSE2-NEXT: retq
;
; SSE41-NEXT: pmaxuw %xmm2, %xmm0
; SSE41-NEXT: pmaxuw %xmm1, %xmm0
; SSE41-NEXT: pcmpeqd %xmm1, %xmm1
-; SSE41-NEXT: pxor %xmm1, %xmm0
-; SSE41-NEXT: phminposuw %xmm0, %xmm0
-; SSE41-NEXT: pxor %xmm1, %xmm0
+; SSE41-NEXT: pxor %xmm0, %xmm1
+; SSE41-NEXT: phminposuw %xmm1, %xmm0
; SSE41-NEXT: movd %xmm0, %eax
+; SSE41-NEXT: notl %eax
; SSE41-NEXT: # kill: def $ax killed $ax killed $eax
; SSE41-NEXT: retq
;
; AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vphminposuw %xmm0, %xmm0
-; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vmovd %xmm0, %eax
+; AVX1-NEXT: notl %eax
; AVX1-NEXT: # kill: def $ax killed $ax killed $eax
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
; AVX2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vphminposuw %xmm0, %xmm0
-; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vmovd %xmm0, %eax
+; AVX2-NEXT: notl %eax
; AVX2-NEXT: # kill: def $ax killed $ax killed $eax
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
; AVX512BW-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0
; AVX512BW-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
; AVX512BW-NEXT: vphminposuw %xmm0, %xmm0
-; AVX512BW-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
; AVX512BW-NEXT: vmovd %xmm0, %eax
+; AVX512BW-NEXT: notl %eax
; AVX512BW-NEXT: # kill: def $ax killed $ax killed $eax
; AVX512BW-NEXT: vzeroupper
; AVX512BW-NEXT: retq
; AVX512VL-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0
; AVX512VL-NEXT: vpternlogq $15, %xmm0, %xmm0, %xmm0
; AVX512VL-NEXT: vphminposuw %xmm0, %xmm0
-; AVX512VL-NEXT: vpternlogq $15, %xmm0, %xmm0, %xmm0
; AVX512VL-NEXT: vmovd %xmm0, %eax
+; AVX512VL-NEXT: notl %eax
; AVX512VL-NEXT: # kill: def $ax killed $ax killed $eax
; AVX512VL-NEXT: vzeroupper
; AVX512VL-NEXT: retq
; SSE41-LABEL: test_v16i8:
; SSE41: # %bb.0:
; SSE41-NEXT: pcmpeqd %xmm1, %xmm1
-; SSE41-NEXT: pxor %xmm1, %xmm0
-; SSE41-NEXT: movdqa %xmm0, %xmm2
-; SSE41-NEXT: psrlw $8, %xmm2
-; SSE41-NEXT: pminub %xmm0, %xmm2
-; SSE41-NEXT: phminposuw %xmm2, %xmm0
-; SSE41-NEXT: pxor %xmm1, %xmm0
+; SSE41-NEXT: pxor %xmm0, %xmm1
+; SSE41-NEXT: movdqa %xmm1, %xmm0
+; SSE41-NEXT: psrlw $8, %xmm0
+; SSE41-NEXT: pminub %xmm1, %xmm0
+; SSE41-NEXT: phminposuw %xmm0, %xmm0
; SSE41-NEXT: pextrb $0, %xmm0, %eax
+; SSE41-NEXT: notb %al
; SSE41-NEXT: # kill: def $al killed $al killed $eax
; SSE41-NEXT: retq
;
; AVX: # %bb.0:
; AVX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
-; AVX-NEXT: vpsrlw $8, %xmm0, %xmm2
-; AVX-NEXT: vpminub %xmm2, %xmm0, %xmm0
+; AVX-NEXT: vpsrlw $8, %xmm0, %xmm1
+; AVX-NEXT: vpminub %xmm1, %xmm0, %xmm0
; AVX-NEXT: vphminposuw %xmm0, %xmm0
-; AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX-NEXT: vpextrb $0, %xmm0, %eax
+; AVX-NEXT: notb %al
; AVX-NEXT: # kill: def $al killed $al killed $eax
; AVX-NEXT: retq
;
; AVX512BW-NEXT: vpsrlw $8, %xmm0, %xmm1
; AVX512BW-NEXT: vpminub %xmm1, %xmm0, %xmm0
; AVX512BW-NEXT: vphminposuw %xmm0, %xmm0
-; AVX512BW-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
; AVX512BW-NEXT: vpextrb $0, %xmm0, %eax
+; AVX512BW-NEXT: notb %al
; AVX512BW-NEXT: # kill: def $al killed $al killed $eax
; AVX512BW-NEXT: vzeroupper
; AVX512BW-NEXT: retq
; AVX512VL-NEXT: vpsrlw $8, %xmm0, %xmm1
; AVX512VL-NEXT: vpminub %xmm1, %xmm0, %xmm0
; AVX512VL-NEXT: vphminposuw %xmm0, %xmm0
-; AVX512VL-NEXT: vpternlogq $15, %xmm0, %xmm0, %xmm0
; AVX512VL-NEXT: vpextrb $0, %xmm0, %eax
+; AVX512VL-NEXT: notb %al
; AVX512VL-NEXT: # kill: def $al killed $al killed $eax
; AVX512VL-NEXT: retq
%1 = call i8 @llvm.experimental.vector.reduce.umax.i8.v16i8(<16 x i8> %a0)
; SSE41: # %bb.0:
; SSE41-NEXT: pmaxub %xmm1, %xmm0
; SSE41-NEXT: pcmpeqd %xmm1, %xmm1
-; SSE41-NEXT: pxor %xmm1, %xmm0
-; SSE41-NEXT: movdqa %xmm0, %xmm2
-; SSE41-NEXT: psrlw $8, %xmm2
-; SSE41-NEXT: pminub %xmm0, %xmm2
-; SSE41-NEXT: phminposuw %xmm2, %xmm0
-; SSE41-NEXT: pxor %xmm1, %xmm0
+; SSE41-NEXT: pxor %xmm0, %xmm1
+; SSE41-NEXT: movdqa %xmm1, %xmm0
+; SSE41-NEXT: psrlw $8, %xmm0
+; SSE41-NEXT: pminub %xmm1, %xmm0
+; SSE41-NEXT: phminposuw %xmm0, %xmm0
; SSE41-NEXT: pextrb $0, %xmm0, %eax
+; SSE41-NEXT: notb %al
; SSE41-NEXT: # kill: def $al killed $al killed $eax
; SSE41-NEXT: retq
;
; AVX1-NEXT: vpmaxub %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm2
-; AVX1-NEXT: vpminub %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1
+; AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vphminposuw %xmm0, %xmm0
-; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vpextrb $0, %xmm0, %eax
+; AVX1-NEXT: notb %al
; AVX1-NEXT: # kill: def $al killed $al killed $eax
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
; AVX2-NEXT: vpmaxub %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
-; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm2
-; AVX2-NEXT: vpminub %xmm2, %xmm0, %xmm0
+; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1
+; AVX2-NEXT: vpminub %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vphminposuw %xmm0, %xmm0
-; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vpextrb $0, %xmm0, %eax
+; AVX2-NEXT: notb %al
; AVX2-NEXT: # kill: def $al killed $al killed $eax
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
; AVX512BW-NEXT: vpsrlw $8, %xmm0, %xmm1
; AVX512BW-NEXT: vpminub %xmm1, %xmm0, %xmm0
; AVX512BW-NEXT: vphminposuw %xmm0, %xmm0
-; AVX512BW-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
; AVX512BW-NEXT: vpextrb $0, %xmm0, %eax
+; AVX512BW-NEXT: notb %al
; AVX512BW-NEXT: # kill: def $al killed $al killed $eax
; AVX512BW-NEXT: vzeroupper
; AVX512BW-NEXT: retq
; AVX512VL-NEXT: vpsrlw $8, %xmm0, %xmm1
; AVX512VL-NEXT: vpminub %xmm1, %xmm0, %xmm0
; AVX512VL-NEXT: vphminposuw %xmm0, %xmm0
-; AVX512VL-NEXT: vpternlogq $15, %xmm0, %xmm0, %xmm0
; AVX512VL-NEXT: vpextrb $0, %xmm0, %eax
+; AVX512VL-NEXT: notb %al
; AVX512VL-NEXT: # kill: def $al killed $al killed $eax
; AVX512VL-NEXT: vzeroupper
; AVX512VL-NEXT: retq
; SSE41-NEXT: pmaxub %xmm2, %xmm0
; SSE41-NEXT: pmaxub %xmm1, %xmm0
; SSE41-NEXT: pcmpeqd %xmm1, %xmm1
-; SSE41-NEXT: pxor %xmm1, %xmm0
-; SSE41-NEXT: movdqa %xmm0, %xmm2
-; SSE41-NEXT: psrlw $8, %xmm2
-; SSE41-NEXT: pminub %xmm0, %xmm2
-; SSE41-NEXT: phminposuw %xmm2, %xmm0
-; SSE41-NEXT: pxor %xmm1, %xmm0
+; SSE41-NEXT: pxor %xmm0, %xmm1
+; SSE41-NEXT: movdqa %xmm1, %xmm0
+; SSE41-NEXT: psrlw $8, %xmm0
+; SSE41-NEXT: pminub %xmm1, %xmm0
+; SSE41-NEXT: phminposuw %xmm0, %xmm0
; SSE41-NEXT: pextrb $0, %xmm0, %eax
+; SSE41-NEXT: notb %al
; SSE41-NEXT: # kill: def $al killed $al killed $eax
; SSE41-NEXT: retq
;
; AVX1-NEXT: vpmaxub %xmm2, %xmm0, %xmm0
; AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm2
-; AVX1-NEXT: vpminub %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1
+; AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vphminposuw %xmm0, %xmm0
-; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vpextrb $0, %xmm0, %eax
+; AVX1-NEXT: notb %al
; AVX1-NEXT: # kill: def $al killed $al killed $eax
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
; AVX2-NEXT: vpmaxub %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
-; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm2
-; AVX2-NEXT: vpminub %xmm2, %xmm0, %xmm0
+; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1
+; AVX2-NEXT: vpminub %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vphminposuw %xmm0, %xmm0
-; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vpextrb $0, %xmm0, %eax
+; AVX2-NEXT: notb %al
; AVX2-NEXT: # kill: def $al killed $al killed $eax
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
; AVX512BW-NEXT: vpsrlw $8, %xmm0, %xmm1
; AVX512BW-NEXT: vpminub %xmm1, %xmm0, %xmm0
; AVX512BW-NEXT: vphminposuw %xmm0, %xmm0
-; AVX512BW-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
; AVX512BW-NEXT: vpextrb $0, %xmm0, %eax
+; AVX512BW-NEXT: notb %al
; AVX512BW-NEXT: # kill: def $al killed $al killed $eax
; AVX512BW-NEXT: vzeroupper
; AVX512BW-NEXT: retq
; AVX512VL-NEXT: vpsrlw $8, %xmm0, %xmm1
; AVX512VL-NEXT: vpminub %xmm1, %xmm0, %xmm0
; AVX512VL-NEXT: vphminposuw %xmm0, %xmm0
-; AVX512VL-NEXT: vpternlogq $15, %xmm0, %xmm0, %xmm0
; AVX512VL-NEXT: vpextrb $0, %xmm0, %eax
+; AVX512VL-NEXT: notb %al
; AVX512VL-NEXT: # kill: def $al killed $al killed $eax
; AVX512VL-NEXT: vzeroupper
; AVX512VL-NEXT: retq
; SSE41-NEXT: pmaxub %xmm2, %xmm0
; SSE41-NEXT: pmaxub %xmm1, %xmm0
; SSE41-NEXT: pcmpeqd %xmm1, %xmm1
-; SSE41-NEXT: pxor %xmm1, %xmm0
-; SSE41-NEXT: movdqa %xmm0, %xmm2
-; SSE41-NEXT: psrlw $8, %xmm2
-; SSE41-NEXT: pminub %xmm0, %xmm2
-; SSE41-NEXT: phminposuw %xmm2, %xmm0
-; SSE41-NEXT: pxor %xmm1, %xmm0
+; SSE41-NEXT: pxor %xmm0, %xmm1
+; SSE41-NEXT: movdqa %xmm1, %xmm0
+; SSE41-NEXT: psrlw $8, %xmm0
+; SSE41-NEXT: pminub %xmm1, %xmm0
+; SSE41-NEXT: phminposuw %xmm0, %xmm0
; SSE41-NEXT: pextrb $0, %xmm0, %eax
+; SSE41-NEXT: notb %al
; SSE41-NEXT: # kill: def $al killed $al killed $eax
; SSE41-NEXT: retq
;
; AVX1-NEXT: vpmaxub %xmm4, %xmm0, %xmm0
; AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm2
-; AVX1-NEXT: vpminub %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1
+; AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vphminposuw %xmm0, %xmm0
-; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vpextrb $0, %xmm0, %eax
+; AVX1-NEXT: notb %al
; AVX1-NEXT: # kill: def $al killed $al killed $eax
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
; AVX2-NEXT: vpmaxub %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
-; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm2
-; AVX2-NEXT: vpminub %xmm2, %xmm0, %xmm0
+; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1
+; AVX2-NEXT: vpminub %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vphminposuw %xmm0, %xmm0
-; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vpextrb $0, %xmm0, %eax
+; AVX2-NEXT: notb %al
; AVX2-NEXT: # kill: def $al killed $al killed $eax
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
; AVX512BW-NEXT: vpsrlw $8, %xmm0, %xmm1
; AVX512BW-NEXT: vpminub %xmm1, %xmm0, %xmm0
; AVX512BW-NEXT: vphminposuw %xmm0, %xmm0
-; AVX512BW-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
; AVX512BW-NEXT: vpextrb $0, %xmm0, %eax
+; AVX512BW-NEXT: notb %al
; AVX512BW-NEXT: # kill: def $al killed $al killed $eax
; AVX512BW-NEXT: vzeroupper
; AVX512BW-NEXT: retq
; AVX512VL-NEXT: vpsrlw $8, %xmm0, %xmm1
; AVX512VL-NEXT: vpminub %xmm1, %xmm0, %xmm0
; AVX512VL-NEXT: vphminposuw %xmm0, %xmm0
-; AVX512VL-NEXT: vpternlogq $15, %xmm0, %xmm0, %xmm0
; AVX512VL-NEXT: vpextrb $0, %xmm0, %eax
+; AVX512VL-NEXT: notb %al
; AVX512VL-NEXT: # kill: def $al killed $al killed $eax
; AVX512VL-NEXT: vzeroupper
; AVX512VL-NEXT: retq
; SSE2-NEXT: pxor %xmm2, %xmm0
; SSE2-NEXT: pxor %xmm2, %xmm1
; SSE2-NEXT: pminsw %xmm0, %xmm1
-; SSE2-NEXT: pxor %xmm2, %xmm1
; SSE2-NEXT: movd %xmm1, %eax
+; SSE2-NEXT: xorl $32768, %eax # imm = 0x8000
; SSE2-NEXT: # kill: def $ax killed $ax killed $eax
; SSE2-NEXT: retq
;
; SSE2-NEXT: psrld $16, %xmm0
; SSE2-NEXT: pxor %xmm2, %xmm0
; SSE2-NEXT: pminsw %xmm1, %xmm0
-; SSE2-NEXT: pxor %xmm2, %xmm0
; SSE2-NEXT: movd %xmm0, %eax
+; SSE2-NEXT: xorl $32768, %eax # imm = 0x8000
; SSE2-NEXT: # kill: def $ax killed $ax killed $eax
; SSE2-NEXT: retq
;
; SSE2-NEXT: psrld $16, %xmm1
; SSE2-NEXT: pxor %xmm2, %xmm1
; SSE2-NEXT: pminsw %xmm0, %xmm1
-; SSE2-NEXT: pxor %xmm2, %xmm1
; SSE2-NEXT: movd %xmm1, %eax
+; SSE2-NEXT: xorl $32768, %eax # imm = 0x8000
; SSE2-NEXT: # kill: def $ax killed $ax killed $eax
; SSE2-NEXT: retq
;
; SSE2-NEXT: psrld $16, %xmm1
; SSE2-NEXT: pxor %xmm2, %xmm1
; SSE2-NEXT: pminsw %xmm0, %xmm1
-; SSE2-NEXT: pxor %xmm2, %xmm1
; SSE2-NEXT: movd %xmm1, %eax
+; SSE2-NEXT: xorl $32768, %eax # imm = 0x8000
; SSE2-NEXT: # kill: def $ax killed $ax killed $eax
; SSE2-NEXT: retq
;
; SSE2-NEXT: psrld $16, %xmm1
; SSE2-NEXT: pxor %xmm4, %xmm1
; SSE2-NEXT: pminsw %xmm0, %xmm1
-; SSE2-NEXT: pxor %xmm4, %xmm1
; SSE2-NEXT: movd %xmm1, %eax
+; SSE2-NEXT: xorl $32768, %eax # imm = 0x8000
; SSE2-NEXT: # kill: def $ax killed $ax killed $eax
; SSE2-NEXT: retq
;
; SSE2-NEXT: psrld $16, %xmm0
; SSE2-NEXT: pxor %xmm8, %xmm0
; SSE2-NEXT: pminsw %xmm1, %xmm0
-; SSE2-NEXT: pxor %xmm8, %xmm0
; SSE2-NEXT: movd %xmm0, %eax
+; SSE2-NEXT: xorl $32768, %eax # imm = 0x8000
; SSE2-NEXT: # kill: def $ax killed $ax killed $eax
; SSE2-NEXT: retq
;
; SSE2-NEXT: psrld $16, %xmm1
; SSE2-NEXT: pxor %xmm2, %xmm1
; SSE2-NEXT: pminsw %xmm0, %xmm1
-; SSE2-NEXT: pxor %xmm2, %xmm1
; SSE2-NEXT: movd %xmm1, %eax
+; SSE2-NEXT: xorl $32768, %eax # imm = 0x8000
; SSE2-NEXT: # kill: def $ax killed $ax killed $eax
; SSE2-NEXT: retq
;
; SSE2-NEXT: psrld $16, %xmm1
; SSE2-NEXT: pxor %xmm2, %xmm1
; SSE2-NEXT: pminsw %xmm0, %xmm1
-; SSE2-NEXT: pxor %xmm2, %xmm1
; SSE2-NEXT: movd %xmm1, %eax
+; SSE2-NEXT: xorl $32768, %eax # imm = 0x8000
; SSE2-NEXT: # kill: def $ax killed $ax killed $eax
; SSE2-NEXT: retq
;
; SSE2-NEXT: psrld $16, %xmm1
; SSE2-NEXT: pxor %xmm4, %xmm1
; SSE2-NEXT: pminsw %xmm0, %xmm1
-; SSE2-NEXT: pxor %xmm4, %xmm1
; SSE2-NEXT: movd %xmm1, %eax
+; SSE2-NEXT: xorl $32768, %eax # imm = 0x8000
; SSE2-NEXT: # kill: def $ax killed $ax killed $eax
; SSE2-NEXT: retq
;
; SSE2-NEXT: psrld $16, %xmm0
; SSE2-NEXT: pxor %xmm8, %xmm0
; SSE2-NEXT: pminsw %xmm1, %xmm0
-; SSE2-NEXT: pxor %xmm8, %xmm0
; SSE2-NEXT: movd %xmm0, %eax
+; SSE2-NEXT: xorl $32768, %eax # imm = 0x8000
; SSE2-NEXT: # kill: def $ax killed $ax killed $eax
; SSE2-NEXT: retq
;
define <4 x i32> @splatvar_rotate_v4i32(<4 x i32> %a, <4 x i32> %b) nounwind {
; SSE2-LABEL: splatvar_rotate_v4i32:
; SSE2: # %bb.0:
-; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,0,0]
-; SSE2-NEXT: pand {{.*}}(%rip), %xmm1
-; SSE2-NEXT: xorps %xmm2, %xmm2
-; SSE2-NEXT: xorps %xmm3, %xmm3
-; SSE2-NEXT: movss {{.*#+}} xmm3 = xmm1[0],xmm3[1,2,3]
-; SSE2-NEXT: movdqa %xmm0, %xmm4
-; SSE2-NEXT: pslld %xmm3, %xmm4
-; SSE2-NEXT: movdqa {{.*#+}} xmm3 = [32,32,32,32]
-; SSE2-NEXT: psubd %xmm1, %xmm3
-; SSE2-NEXT: movss {{.*#+}} xmm2 = xmm3[0],xmm2[1,2,3]
-; SSE2-NEXT: psrld %xmm2, %xmm0
-; SSE2-NEXT: por %xmm4, %xmm0
+; SSE2-NEXT: movd %xmm1, %eax
+; SSE2-NEXT: andl $31, %eax
+; SSE2-NEXT: movd %eax, %xmm1
+; SSE2-NEXT: movdqa %xmm0, %xmm2
+; SSE2-NEXT: pslld %xmm1, %xmm2
+; SSE2-NEXT: movl $32, %ecx
+; SSE2-NEXT: subl %eax, %ecx
+; SSE2-NEXT: movd %ecx, %xmm1
+; SSE2-NEXT: psrld %xmm1, %xmm0
+; SSE2-NEXT: por %xmm2, %xmm0
; SSE2-NEXT: retq
;
; SSE41-LABEL: splatvar_rotate_v4i32:
;
; X32-SSE-LABEL: splatvar_rotate_v4i32:
; X32-SSE: # %bb.0:
-; X32-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,0,0]
-; X32-SSE-NEXT: pand {{\.LCPI.*}}, %xmm1
-; X32-SSE-NEXT: xorps %xmm2, %xmm2
-; X32-SSE-NEXT: xorps %xmm3, %xmm3
-; X32-SSE-NEXT: movss {{.*#+}} xmm3 = xmm1[0],xmm3[1,2,3]
-; X32-SSE-NEXT: movdqa %xmm0, %xmm4
-; X32-SSE-NEXT: pslld %xmm3, %xmm4
-; X32-SSE-NEXT: movdqa {{.*#+}} xmm3 = [32,32,32,32]
-; X32-SSE-NEXT: psubd %xmm1, %xmm3
-; X32-SSE-NEXT: movss {{.*#+}} xmm2 = xmm3[0],xmm2[1,2,3]
-; X32-SSE-NEXT: psrld %xmm2, %xmm0
-; X32-SSE-NEXT: por %xmm4, %xmm0
+; X32-SSE-NEXT: movd %xmm1, %eax
+; X32-SSE-NEXT: andl $31, %eax
+; X32-SSE-NEXT: movd %eax, %xmm1
+; X32-SSE-NEXT: movdqa %xmm0, %xmm2
+; X32-SSE-NEXT: pslld %xmm1, %xmm2
+; X32-SSE-NEXT: movl $32, %ecx
+; X32-SSE-NEXT: subl %eax, %ecx
+; X32-SSE-NEXT: movd %ecx, %xmm1
+; X32-SSE-NEXT: psrld %xmm1, %xmm0
+; X32-SSE-NEXT: por %xmm2, %xmm0
; X32-SSE-NEXT: retl
%splat = shufflevector <4 x i32> %b, <4 x i32> undef, <4 x i32> zeroinitializer
%splat32 = sub <4 x i32> <i32 32, i32 32, i32 32, i32 32>, %splat
; X64-LIN-LABEL: PR17487:
; X64-LIN: # %bb.0:
; X64-LIN-NEXT: movd %edi, %xmm0
-; X64-LIN-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
-; X64-LIN-NEXT: pandn {{.*}}(%rip), %xmm0
-; X64-LIN-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
-; X64-LIN-NEXT: movq %xmm0, %rcx
-; X64-LIN-NEXT: xorl %eax, %eax
-; X64-LIN-NEXT: cmpq $1, %rcx
-; X64-LIN-NEXT: setne %al
+; X64-LIN-NEXT: pextrw $0, %xmm0, %eax
+; X64-LIN-NEXT: andl $1, %eax
; X64-LIN-NEXT: retq
;
; X64-WIN-LABEL: PR17487:
; X64-WIN: # %bb.0:
+; X64-WIN-NEXT: andb $1, %cl
; X64-WIN-NEXT: movzbl %cl, %eax
-; X64-WIN-NEXT: movd %eax, %xmm0
-; X64-WIN-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
-; X64-WIN-NEXT: pandn __xmm@{{.*}}(%rip), %xmm0
-; X64-WIN-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
-; X64-WIN-NEXT: movq %xmm0, %rcx
-; X64-WIN-NEXT: xorl %eax, %eax
-; X64-WIN-NEXT: cmpq $1, %rcx
-; X64-WIN-NEXT: setne %al
; X64-WIN-NEXT: retq
%tmp = insertelement <2 x i1> undef, i1 %tobool, i32 1
%tmp1 = zext <2 x i1> %tmp to <2 x i64>