define <16 x i16> @shuffle_combine_packusdw_pshufb(<8 x i32> %a0, <8 x i32> %a1) {
; X32-LABEL: shuffle_combine_packusdw_pshufb:
; X32: # BB#0:
-; X32-NEXT: vpand {{\.LCPI.*}}, %ymm0, %ymm0
+; X32-NEXT: vpsrld $16, %ymm0, %ymm0
; X32-NEXT: vpackusdw %ymm0, %ymm0, %ymm0
; X32-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[6,7,4,5,2,3,0,1,6,7,4,5,2,3,0,1,16,17,18,19,20,21,22,23,22,23,20,21,18,19,16,17]
; X32-NEXT: retl
;
; X64-LABEL: shuffle_combine_packusdw_pshufb:
; X64: # BB#0:
-; X64-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0
+; X64-NEXT: vpsrld $16, %ymm0, %ymm0
; X64-NEXT: vpackusdw %ymm0, %ymm0, %ymm0
; X64-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[6,7,4,5,2,3,0,1,6,7,4,5,2,3,0,1,16,17,18,19,20,21,22,23,22,23,20,21,18,19,16,17]
; X64-NEXT: retq
- %1 = and <8 x i32> %a0, <i32 255, i32 65535, i32 255, i32 65535, i32 255, i32 255, i32 255, i32 65535>
+ %1 = lshr <8 x i32> %a0, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
%2 = tail call <16 x i16> @llvm.x86.avx2.packusdw(<8 x i32> %1, <8 x i32> %1)
%3 = shufflevector <16 x i16> %2, <16 x i16> undef, <16 x i32> <i32 3, i32 2, i32 1, i32 0, i32 3, i32 2, i32 1, i32 0, i32 8, i32 9, i32 10, i32 11, i32 11, i32 10, i32 9, i32 8>
ret <16 x i16> %3