Fix a buffer overread on odd input resolutions

author Loren Merritt <pengvado@akuvian.org>

Wed, 24 Feb 2010 07:50:23 +0000 (23:50 -0800)

committer Fiona Glaser <fiona@x264.com>

Thu, 25 Feb 2010 00:50:13 +0000 (16:50 -0800)
author Loren Merritt <pengvado@akuvian.org>
Wed, 24 Feb 2010 07:50:23 +0000 (23:50 -0800)
committer Fiona Glaser <fiona@x264.com>
Thu, 25 Feb 2010 00:50:13 +0000 (16:50 -0800)
diff --git a/common/mc.c b/common/mc.c

index ac740cfc238c541bd3f36f03d93f83f785fe10bf..d062af3911442093bf8e925b0f6f1989c80d6173 100644 (file)
--- a/common/mc.c
+++ b/common/mc.c
@@ -317,7 +317,7 @@ MC_COPY( 16 )
  MC_COPY( 8 )
  MC_COPY( 4 )
  
-static void plane_copy( uint8_t *dst, int i_dst,
+void x264_plane_copy_c( uint8_t *dst, int i_dst,
                          uint8_t *src, int i_src, int w, int h)
  {
      while( h-- )
@@ -483,7 +483,7 @@ void x264_mc_init( int cpu, x264_mc_functions_t *pf )
      pf->copy[PIXEL_8x8]   = mc_copy_w8;
      pf->copy[PIXEL_4x4]   = mc_copy_w4;
  
-    pf->plane_copy = plane_copy;
+    pf->plane_copy = x264_plane_copy_c;
      pf->hpel_filter = hpel_filter;
  
      pf->prefetch_fenc = prefetch_fenc_null;
diff --git a/common/x86/mc-a2.asm b/common/x86/mc-a2.asm

index 9e073e3aefd6bba7f678eb73bb852e4069d4d310..edafa61a9793544d7762acd06beaf8faf65520c3 100644 (file)
--- a/common/x86/mc-a2.asm
+++ b/common/x86/mc-a2.asm
@@ -598,48 +598,44 @@ cglobal x264_sfence
      ret
  
  ;-----------------------------------------------------------------------------
-; void x264_plane_copy_mmxext( uint8_t *dst, int i_dst,
-;                              uint8_t *src, int i_src, int w, int h)
+; void x264_plane_copy_core_mmxext( uint8_t *dst, int i_dst,
+;                                   uint8_t *src, int i_src, int w, int h)
  ;-----------------------------------------------------------------------------
-cglobal x264_plane_copy_mmxext, 6,7
+; assumes i_dst and w are multiples of 16, and i_dst>w
+cglobal x264_plane_copy_core_mmxext, 6,7
      movsxdifnidn r1, r1d
      movsxdifnidn r3, r3d
-    add    r4d, 3
-    and    r4d, ~3
-    mov    r6d, r4d
-    and    r6d, ~15
-    sub    r1,  r6
-    sub    r3,  r6
+    movsxdifnidn r4, r4d
+    sub    r1,  r4
+    sub    r3,  r4
  .loopy:
      mov    r6d, r4d
-    sub    r6d, 64
-    jl     .endx
+    sub    r6d, 63
  .loopx:
      prefetchnta [r2+256]
      movq   mm0, [r2   ]
      movq   mm1, [r2+ 8]
-    movq   mm2, [r2+16]
-    movq   mm3, [r2+24]
-    movq   mm4, [r2+32]
-    movq   mm5, [r2+40]
-    movq   mm6, [r2+48]
-    movq   mm7, [r2+56]
      movntq [r0   ], mm0
      movntq [r0+ 8], mm1
+    movq   mm2, [r2+16]
+    movq   mm3, [r2+24]
      movntq [r0+16], mm2
      movntq [r0+24], mm3
+    movq   mm4, [r2+32]
+    movq   mm5, [r2+40]
      movntq [r0+32], mm4
      movntq [r0+40], mm5
+    movq   mm6, [r2+48]
+    movq   mm7, [r2+56]
      movntq [r0+48], mm6
      movntq [r0+56], mm7
      add    r2,  64
      add    r0,  64
      sub    r6d, 64
-    jge    .loopx
-.endx:
+    jg .loopx
      prefetchnta [r2+256]
-    add    r6d, 48
-    jl .end16
+    add    r6d, 63
+    jle .end16
  .loop16:
      movq   mm0, [r2  ]
      movq   mm1, [r2+8]
@@ -648,20 +644,12 @@ cglobal x264_plane_copy_mmxext, 6,7
      add    r2,  16
      add    r0,  16
      sub    r6d, 16
-    jge    .loop16
+    jg .loop16
  .end16:
-    add    r6d, 12
-    jl .end4
-.loop4:
-    movd   mm2, [r2+r6]
-    movd   [r0+r6], mm2
-    sub    r6d, 4
-    jge .loop4
-.end4:
-    add    r2, r3
      add    r0, r1
+    add    r2, r3
      dec    r5d
-    jg     .loopy
+    jg .loopy
      sfence
      emms
      RET
diff --git a/common/x86/mc-c.c b/common/x86/mc-c.c

index ecdb4e80904175f528d671a00dfb637fa0c3370f..a6cabd5513c101e52ecd99b7e2fa35155263aa66 100644 (file)
--- a/common/x86/mc-c.c
+++ b/common/x86/mc-c.c
@@ -88,7 +88,8 @@ extern void x264_mc_chroma_ssse3( uint8_t *src, int i_src_stride,
  extern void x264_mc_chroma_ssse3_cache64( uint8_t *src, int i_src_stride,
                                    uint8_t *dst, int i_dst_stride,
                                    int dx, int dy, int i_width, int i_height );
-extern void x264_plane_copy_mmxext( uint8_t *, int, uint8_t *, int, int w, int h);
+extern void x264_plane_copy_core_mmxext( uint8_t *, int, uint8_t *, int, int w, int h);
+extern void x264_plane_copy_c( uint8_t *, int, uint8_t *, int, int w, int h);
  extern void *x264_memcpy_aligned_mmx( void * dst, const void * src, size_t n );
  extern void *x264_memcpy_aligned_sse2( void * dst, const void * src, size_t n );
  extern void x264_memzero_aligned_mmx( void * dst, int n );
@@ -339,10 +340,23 @@ void x264_hpel_filter_ssse3( uint8_t *dsth, uint8_t *dstv, uint8_t *dstc, uint8_
  #else
  HPEL(16, sse2, sse2, sse2, sse2)
  HPEL(16, ssse3, ssse3, ssse3, ssse3)
-
  #endif
  HPEL(16, sse2_misalign, sse2, sse2_misalign, sse2)
  
+static void x264_plane_copy_mmxext( uint8_t *dst, int i_dst, uint8_t *src, int i_src, int w, int h)
+{
+    if( w < 256 ) { // tiny resolutions don't want non-temporal hints. dunno the exact threshold.
+        x264_plane_copy_c( dst, i_dst, src, i_src, w, h );
+    } else if(i_src > 0) {
+        // have to use plain memcpy on the last line (in memory order) to avoid overreading src
+        x264_plane_copy_core_mmxext( dst, i_dst, src, i_src, (w+15)&~15, h-1 );
+        memcpy( dst+i_dst*(h-1), src+i_src*(h-1), w );
+    } else {
+        memcpy( dst, src, w );
+        x264_plane_copy_core_mmxext( dst+i_dst, i_dst, src+i_src, i_src, (w+15)&~15, h-1 );
+    }
+}
+
  void x264_mc_init_mmx( int cpu, x264_mc_functions_t *pf )
  {
      if( !(cpu&X264_CPU_MMX) )
author	Loren Merritt <pengvado@akuvian.org>
	Wed, 24 Feb 2010 07:50:23 +0000 (23:50 -0800)
committer	Fiona Glaser <fiona@x264.com>
	Thu, 25 Feb 2010 00:50:13 +0000 (16:50 -0800)
common/mc.c		patch \| blob \| history
common/x86/mc-a2.asm		patch \| blob \| history
common/x86/mc-c.c		patch \| blob \| history