; SSE-LABEL: combine_vec_srem_by_pow2b:
; SSE: # %bb.0:
; SSE-NEXT: movdqa %xmm0, %xmm1
-; SSE-NEXT: psrld $31, %xmm1
-; SSE-NEXT: movdqa %xmm0, %xmm2
-; SSE-NEXT: psrad $31, %xmm2
-; SSE-NEXT: movdqa %xmm2, %xmm3
-; SSE-NEXT: psrld $29, %xmm3
-; SSE-NEXT: pblendw {{.*#+}} xmm3 = xmm1[0,1,2,3],xmm3[4,5,6,7]
-; SSE-NEXT: psrld $30, %xmm2
-; SSE-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm3[2,3],xmm2[4,5],xmm3[6,7]
-; SSE-NEXT: paddd %xmm0, %xmm2
-; SSE-NEXT: movdqa %xmm2, %xmm1
-; SSE-NEXT: psrad $3, %xmm1
-; SSE-NEXT: movdqa %xmm2, %xmm3
+; SSE-NEXT: psrad $31, %xmm1
+; SSE-NEXT: movdqa %xmm1, %xmm2
+; SSE-NEXT: psrld $29, %xmm2
+; SSE-NEXT: movdqa %xmm1, %xmm3
+; SSE-NEXT: psrld $31, %xmm3
+; SSE-NEXT: pblendw {{.*#+}} xmm3 = xmm3[0,1,2,3],xmm2[4,5,6,7]
+; SSE-NEXT: psrld $30, %xmm1
+; SSE-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1],xmm3[2,3],xmm1[4,5],xmm3[6,7]
+; SSE-NEXT: paddd %xmm0, %xmm1
+; SSE-NEXT: movdqa %xmm1, %xmm2
+; SSE-NEXT: psrad $3, %xmm2
+; SSE-NEXT: movdqa %xmm1, %xmm3
; SSE-NEXT: psrad $1, %xmm3
-; SSE-NEXT: pblendw {{.*#+}} xmm3 = xmm3[0,1,2,3],xmm1[4,5,6,7]
-; SSE-NEXT: psrad $2, %xmm2
-; SSE-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm3[2,3],xmm2[4,5],xmm3[6,7]
-; SSE-NEXT: pblendw {{.*#+}} xmm2 = xmm0[0,1],xmm2[2,3,4,5,6,7]
-; SSE-NEXT: pmulld {{.*}}(%rip), %xmm2
-; SSE-NEXT: psubd %xmm2, %xmm0
+; SSE-NEXT: pblendw {{.*#+}} xmm3 = xmm3[0,1,2,3],xmm2[4,5,6,7]
+; SSE-NEXT: psrad $2, %xmm1
+; SSE-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1],xmm3[2,3],xmm1[4,5],xmm3[6,7]
+; SSE-NEXT: pblendw {{.*#+}} xmm1 = xmm0[0,1],xmm1[2,3,4,5,6,7]
+; SSE-NEXT: pmulld {{.*}}(%rip), %xmm1
+; SSE-NEXT: psubd %xmm1, %xmm0
; SSE-NEXT: retq
;
; AVX1-LABEL: combine_vec_srem_by_pow2b:
; AVX1: # %bb.0:
-; AVX1-NEXT: vpsrld $31, %xmm0, %xmm1
-; AVX1-NEXT: vpsrad $31, %xmm0, %xmm2
-; AVX1-NEXT: vpsrld $29, %xmm2, %xmm3
-; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm3[4,5,6,7]
-; AVX1-NEXT: vpsrld $30, %xmm2, %xmm2
-; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7]
+; AVX1-NEXT: vpsrad $31, %xmm0, %xmm1
+; AVX1-NEXT: vpsrld $29, %xmm1, %xmm2
+; AVX1-NEXT: vpsrld $31, %xmm1, %xmm3
+; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm3[0,1,2,3],xmm2[4,5,6,7]
+; AVX1-NEXT: vpsrld $30, %xmm1, %xmm1
+; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7]
; AVX1-NEXT: vpaddd %xmm1, %xmm0, %xmm1
; AVX1-NEXT: vpsrad $3, %xmm1, %xmm2
; AVX1-NEXT: vpsrad $1, %xmm1, %xmm3
; SSE-NEXT: movdqa %xmm1, %xmm3
; SSE-NEXT: psrld $30, %xmm3
; SSE-NEXT: pblendw {{.*#+}} xmm3 = xmm3[0,1,2,3],xmm2[4,5,6,7]
-; SSE-NEXT: movdqa %xmm0, %xmm2
-; SSE-NEXT: psrld $31, %xmm2
-; SSE-NEXT: psrld $29, %xmm1
-; SSE-NEXT: pblendw {{.*#+}} xmm1 = xmm2[0,1,2,3],xmm1[4,5,6,7]
+; SSE-NEXT: movdqa %xmm1, %xmm2
+; SSE-NEXT: psrld $29, %xmm2
+; SSE-NEXT: psrld $31, %xmm1
+; SSE-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm2[4,5,6,7]
; SSE-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1],xmm3[2,3],xmm1[4,5],xmm3[6,7]
; SSE-NEXT: paddd %xmm0, %xmm1
; SSE-NEXT: movdqa %xmm1, %xmm2
; AVX1-NEXT: vpsrld $28, %xmm1, %xmm2
; AVX1-NEXT: vpsrld $30, %xmm1, %xmm3
; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm3[0,1,2,3],xmm2[4,5,6,7]
-; AVX1-NEXT: vpsrld $31, %xmm0, %xmm3
-; AVX1-NEXT: vpsrld $29, %xmm1, %xmm1
-; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm3[0,1,2,3],xmm1[4,5,6,7]
+; AVX1-NEXT: vpsrld $29, %xmm1, %xmm3
+; AVX1-NEXT: vpsrld $31, %xmm1, %xmm1
+; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm3[4,5,6,7]
; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7]
; AVX1-NEXT: vpaddd %xmm1, %xmm0, %xmm1
; AVX1-NEXT: vpsrad $4, %xmm1, %xmm2