From: Loren Merritt Date: Wed, 24 Feb 2010 07:50:23 +0000 (-0800) Subject: Fix a buffer overread on odd input resolutions X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=9eb6ec9f017c49a7d6979c72ce0d65a0fc104f0f;p=libx264 Fix a buffer overread on odd input resolutions --- diff --git a/common/mc.c b/common/mc.c index ac740cfc..d062af39 100644 --- a/common/mc.c +++ b/common/mc.c @@ -317,7 +317,7 @@ MC_COPY( 16 ) MC_COPY( 8 ) MC_COPY( 4 ) -static void plane_copy( uint8_t *dst, int i_dst, +void x264_plane_copy_c( uint8_t *dst, int i_dst, uint8_t *src, int i_src, int w, int h) { while( h-- ) @@ -483,7 +483,7 @@ void x264_mc_init( int cpu, x264_mc_functions_t *pf ) pf->copy[PIXEL_8x8] = mc_copy_w8; pf->copy[PIXEL_4x4] = mc_copy_w4; - pf->plane_copy = plane_copy; + pf->plane_copy = x264_plane_copy_c; pf->hpel_filter = hpel_filter; pf->prefetch_fenc = prefetch_fenc_null; diff --git a/common/x86/mc-a2.asm b/common/x86/mc-a2.asm index 9e073e3a..edafa61a 100644 --- a/common/x86/mc-a2.asm +++ b/common/x86/mc-a2.asm @@ -598,48 +598,44 @@ cglobal x264_sfence ret ;----------------------------------------------------------------------------- -; void x264_plane_copy_mmxext( uint8_t *dst, int i_dst, -; uint8_t *src, int i_src, int w, int h) +; void x264_plane_copy_core_mmxext( uint8_t *dst, int i_dst, +; uint8_t *src, int i_src, int w, int h) ;----------------------------------------------------------------------------- -cglobal x264_plane_copy_mmxext, 6,7 +; assumes i_dst and w are multiples of 16, and i_dst>w +cglobal x264_plane_copy_core_mmxext, 6,7 movsxdifnidn r1, r1d movsxdifnidn r3, r3d - add r4d, 3 - and r4d, ~3 - mov r6d, r4d - and r6d, ~15 - sub r1, r6 - sub r3, r6 + movsxdifnidn r4, r4d + sub r1, r4 + sub r3, r4 .loopy: mov r6d, r4d - sub r6d, 64 - jl .endx + sub r6d, 63 .loopx: prefetchnta [r2+256] movq mm0, [r2 ] movq mm1, [r2+ 8] - movq mm2, [r2+16] - movq mm3, [r2+24] - movq mm4, [r2+32] - movq mm5, [r2+40] - movq mm6, [r2+48] - movq mm7, [r2+56] movntq [r0 ], mm0 movntq [r0+ 8], mm1 + movq mm2, [r2+16] + movq mm3, [r2+24] movntq [r0+16], mm2 movntq [r0+24], mm3 + movq mm4, [r2+32] + movq mm5, [r2+40] movntq [r0+32], mm4 movntq [r0+40], mm5 + movq mm6, [r2+48] + movq mm7, [r2+56] movntq [r0+48], mm6 movntq [r0+56], mm7 add r2, 64 add r0, 64 sub r6d, 64 - jge .loopx -.endx: + jg .loopx prefetchnta [r2+256] - add r6d, 48 - jl .end16 + add r6d, 63 + jle .end16 .loop16: movq mm0, [r2 ] movq mm1, [r2+8] @@ -648,20 +644,12 @@ cglobal x264_plane_copy_mmxext, 6,7 add r2, 16 add r0, 16 sub r6d, 16 - jge .loop16 + jg .loop16 .end16: - add r6d, 12 - jl .end4 -.loop4: - movd mm2, [r2+r6] - movd [r0+r6], mm2 - sub r6d, 4 - jge .loop4 -.end4: - add r2, r3 add r0, r1 + add r2, r3 dec r5d - jg .loopy + jg .loopy sfence emms RET diff --git a/common/x86/mc-c.c b/common/x86/mc-c.c index ecdb4e80..a6cabd55 100644 --- a/common/x86/mc-c.c +++ b/common/x86/mc-c.c @@ -88,7 +88,8 @@ extern void x264_mc_chroma_ssse3( uint8_t *src, int i_src_stride, extern void x264_mc_chroma_ssse3_cache64( uint8_t *src, int i_src_stride, uint8_t *dst, int i_dst_stride, int dx, int dy, int i_width, int i_height ); -extern void x264_plane_copy_mmxext( uint8_t *, int, uint8_t *, int, int w, int h); +extern void x264_plane_copy_core_mmxext( uint8_t *, int, uint8_t *, int, int w, int h); +extern void x264_plane_copy_c( uint8_t *, int, uint8_t *, int, int w, int h); extern void *x264_memcpy_aligned_mmx( void * dst, const void * src, size_t n ); extern void *x264_memcpy_aligned_sse2( void * dst, const void * src, size_t n ); extern void x264_memzero_aligned_mmx( void * dst, int n ); @@ -339,10 +340,23 @@ void x264_hpel_filter_ssse3( uint8_t *dsth, uint8_t *dstv, uint8_t *dstc, uint8_ #else HPEL(16, sse2, sse2, sse2, sse2) HPEL(16, ssse3, ssse3, ssse3, ssse3) - #endif HPEL(16, sse2_misalign, sse2, sse2_misalign, sse2) +static void x264_plane_copy_mmxext( uint8_t *dst, int i_dst, uint8_t *src, int i_src, int w, int h) +{ + if( w < 256 ) { // tiny resolutions don't want non-temporal hints. dunno the exact threshold. + x264_plane_copy_c( dst, i_dst, src, i_src, w, h ); + } else if(i_src > 0) { + // have to use plain memcpy on the last line (in memory order) to avoid overreading src + x264_plane_copy_core_mmxext( dst, i_dst, src, i_src, (w+15)&~15, h-1 ); + memcpy( dst+i_dst*(h-1), src+i_src*(h-1), w ); + } else { + memcpy( dst, src, w ); + x264_plane_copy_core_mmxext( dst+i_dst, i_dst, src+i_src, i_src, (w+15)&~15, h-1 ); + } +} + void x264_mc_init_mmx( int cpu, x264_mc_functions_t *pf ) { if( !(cpu&X264_CPU_MMX) )