Thanks Jason for pointing that out on #vp8. ;-).
Change-Id: I5330a753e752a8704b78a409597472628e0b26a5
movsxd rax, dword ptr arg(3) ;src_stride;
sub rsi, rax
pxor mm0, mm0
- movd mm1, [rsi]
- movd mm2, [rsi+4]
- punpcklbw mm1, mm0
- punpcklbw mm2, mm0
- paddw mm1, mm2
- pshufw mm2, mm1, 0x0e
- paddw mm1, mm2
- pshufw mm2, mm1, 0x01
- paddw mm1, mm2
+ movq mm1, [rsi]
+ psadbw mm1, mm0
; from left
dec rsi
movsxd rax, dword ptr arg(3) ;src_stride;
sub rsi, rax
pxor mm0, mm0
- movd mm1, [rsi]
- movd mm2, [rsi+4]
- punpcklbw mm1, mm0
- punpcklbw mm2, mm0
- paddw mm1, mm2
- pshufw mm2, mm1, 0x0e
- paddw mm1, mm2
- pshufw mm2, mm1, 0x01
- paddw mm1, mm2
+ movq mm1, [rsi]
+ psadbw mm1, mm0
; add up
paddw mm1, [GLOBAL(dc_4)]