unsigned MIOpc = MI->getOpcode();
switch (MIOpc) {
- case X86::SHUFPSrri: {
- assert(MI->getNumOperands() == 4 && "Unknown shufps instruction!");
- if (!Subtarget.hasSSE2()) return nullptr;
-
- unsigned B = MI->getOperand(1).getReg();
- unsigned C = MI->getOperand(2).getReg();
- if (B != C) return nullptr;
- int64_t M = MI->getOperand(3).getImm();
- NewMI = BuildMI(MF, MI->getDebugLoc(), get(X86::PSHUFDri))
- .addOperand(Dest).addOperand(Src).addImm(M);
- break;
- }
- case X86::SHUFPDrri: {
- assert(MI->getNumOperands() == 4 && "Unknown shufpd instruction!");
- if (!Subtarget.hasSSE2()) return nullptr;
-
- unsigned B = MI->getOperand(1).getReg();
- unsigned C = MI->getOperand(2).getReg();
- if (B != C) return nullptr;
- unsigned M = MI->getOperand(3).getImm();
-
- // Convert to PSHUFD mask.
- M = ((M & 1) << 1) | ((M & 1) << 3) | ((M & 2) << 4) | ((M & 2) << 6)| 0x44;
-
- NewMI = BuildMI(MF, MI->getDebugLoc(), get(X86::PSHUFDri))
- .addOperand(Dest).addOperand(Src).addImm(M);
- break;
- }
case X86::SHL64ri: {
assert(MI->getNumOperands() >= 3 && "Unknown shift instruction!");
unsigned ShAmt = getTruncatedShiftCount(MI, 2);
define <2 x float> @buildvector(<2 x float> %A, <2 x float> %B) nounwind {
; X32-LABEL: buildvector:
; X32: ## BB#0: ## %entry
-; X32-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,2,3]
+; X32-NEXT: movaps %xmm0, %xmm2
+; X32-NEXT: shufps {{.*#+}} xmm2 = xmm2[1,1,2,3]
; X32-NEXT: addss %xmm1, %xmm0
; X32-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1,2,3]
; X32-NEXT: addss %xmm2, %xmm1
;
; X64-LABEL: buildvector:
; X64: ## BB#0: ## %entry
-; X64-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,2,3]
+; X64-NEXT: movaps %xmm0, %xmm2
+; X64-NEXT: shufps {{.*#+}} xmm2 = xmm2[1,1,2,3]
; X64-NEXT: addss %xmm1, %xmm0
; X64-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1,2,3]
; X64-NEXT: addss %xmm2, %xmm1
; SSE2: # BB#0:
; SSE2-NEXT: andps %xmm1, %xmm0
; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2],xmm0[1,3]
-; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,2,1,3]
+; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2,1,3]
+; SSE2-NEXT: movaps %xmm2, %xmm0
; SSE2-NEXT: retq
;
; SSSE3-LABEL: combine_bitwise_ops_test4b:
; SSSE3: # BB#0:
; SSSE3-NEXT: andps %xmm1, %xmm0
; SSSE3-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2],xmm0[1,3]
-; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,2,1,3]
+; SSSE3-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2,1,3]
+; SSSE3-NEXT: movaps %xmm2, %xmm0
; SSSE3-NEXT: retq
;
; SSE41-LABEL: combine_bitwise_ops_test4b:
; SSE2: # BB#0:
; SSE2-NEXT: orps %xmm1, %xmm0
; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2],xmm0[1,3]
-; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,2,1,3]
+; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2,1,3]
+; SSE2-NEXT: movaps %xmm2, %xmm0
; SSE2-NEXT: retq
;
; SSSE3-LABEL: combine_bitwise_ops_test5b:
; SSSE3: # BB#0:
; SSSE3-NEXT: orps %xmm1, %xmm0
; SSSE3-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2],xmm0[1,3]
-; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,2,1,3]
+; SSSE3-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2,1,3]
+; SSSE3-NEXT: movaps %xmm2, %xmm0
; SSSE3-NEXT: retq
;
; SSE41-LABEL: combine_bitwise_ops_test5b:
; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2],xmm0[1,3]
; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2,1,3]
; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2],xmm1[1,3]
-; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,2,1,3]
+; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2,1,3]
+; SSE2-NEXT: movaps %xmm2, %xmm0
; SSE2-NEXT: retq
;
; SSSE3-LABEL: combine_test1:
; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2],xmm0[1,3]
; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2,1,3]
; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2],xmm1[1,3]
-; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,2,1,3]
+; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2,1,3]
+; SSE2-NEXT: movaps %xmm2, %xmm0
; SSE2-NEXT: retq
;
; SSSE3-LABEL: combine_test6:
; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2],xmm0[1,3]
; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2,1,3]
; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,0],xmm1[1,1]
-; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,2,1,3]
+; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2,1,3]
+; SSE2-NEXT: movaps %xmm2, %xmm0
; SSE2-NEXT: retq
;
; SSSE3-LABEL: combine_test2b:
; SSSE3-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2],xmm0[1,3]
; SSSE3-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2,1,3]
; SSSE3-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,0],xmm1[1,1]
-; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,2,1,3]
+; SSSE3-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2,1,3]
+; SSSE3-NEXT: movaps %xmm2, %xmm0
; SSSE3-NEXT: retq
;
; SSE41-LABEL: combine_test2b:
; SSE41-NEXT: movaps %xmm1, %xmm2
; SSE41-NEXT: blendps {{.*#+}} xmm2 = xmm2[0],xmm0[1],xmm2[2],xmm0[3]
; SSE41-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,0],xmm1[1,1]
-; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,2,1,3]
+; SSE41-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2,1,3]
+; SSE41-NEXT: movaps %xmm2, %xmm0
; SSE41-NEXT: retq
;
; AVX-LABEL: combine_test2b: