vpunpcklwd xmm4, xmm5, xmm5 ; xmm4=(00 00 01 01 02 02 03 03)
vpunpckhwd xmm5, xmm5, xmm5 ; xmm5=(04 04 05 05 06 06 07 07)
- vinserti128 ymm4, xmm5, 1
+ vinserti128 ymm4, ymm4, xmm5, 1
vpshufd ymm0, ymm4, 0x00 ; ymm0=col0_4=(00 00 00 00 00 00 00 00 04 04 04 04 04 04 04 04)
vpshufd ymm1, ymm4, 0x55 ; ymm1=col1_5=(01 01 01 01 01 01 01 01 05 05 05 05 05 05 05 05)
movq xmm6, XMM_MMWORD [ebx+eax*SIZEOF_JSAMPLE]
movq xmm7, XMM_MMWORD [edx+eax*SIZEOF_JSAMPLE]
- vinserti128 ymm0, xmm1, 1
- vinserti128 ymm2, xmm3, 1
- vinserti128 ymm4, xmm5, 1
- vinserti128 ymm6, xmm7, 1
+ vinserti128 ymm0, ymm0, xmm1, 1
+ vinserti128 ymm2, ymm2, xmm3, 1
+ vinserti128 ymm4, ymm4, xmm5, 1
+ vinserti128 ymm6, ymm6, xmm7, 1
vpxor ymm1, ymm1, ymm1 ; ymm1=(all 0's)
vpunpcklbw ymm0, ymm0, ymm1
vpunpcklwd xmm4, xmm5, xmm5 ; xmm4=(00 00 01 01 02 02 03 03)
vpunpckhwd xmm5, xmm5, xmm5 ; xmm5=(04 04 05 05 06 06 07 07)
- vinserti128 ymm4, xmm5, 1
+ vinserti128 ymm4, ymm4, xmm5, 1
vpshufd ymm0, ymm4, 0x00 ; ymm0=col0_4=(00 00 00 00 00 00 00 00 04 04 04 04 04 04 04 04)
vpshufd ymm1, ymm4, 0x55 ; ymm1=col1_5=(01 01 01 01 01 01 01 01 05 05 05 05 05 05 05 05)