From: Loren Merritt <pengvado@videolan.org>
Date: Sat, 15 Sep 2007 06:34:05 +0000 (+0000)
Subject: implement multithreaded me=esa
X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=463437926e73b3d18542804cbef31f277d115cc2;p=libx264

implement multithreaded me=esa


git-svn-id: svn://svn.videolan.org/x264/trunk@676 df754926-b1dd-0310-bc7b-ec298dee348c
---

diff --git a/common/mc.c b/common/mc.c
index 6d3a226b..dfede0c0 100644
--- a/common/mc.c
+++ b/common/mc.c
@@ -386,8 +386,8 @@ void x264_frame_filter( int cpu, x264_frame_t *frame, int b_interlaced, int mb_y
 {
     const int x_inc = 16, y_inc = 16;
     const int stride = frame->i_stride[0] << b_interlaced;
-    const int start = (mb_y*16 >> b_interlaced) - 8;
-    const int height = ((b_end ? frame->i_lines[0] : mb_y*16) >> b_interlaced) + 8;
+    int start = (mb_y*16 >> b_interlaced) - 8;
+    int height = ((b_end ? frame->i_lines[0] : mb_y*16) >> b_interlaced) + 8;
     int x, y;
 
     if( mb_y & b_interlaced )
@@ -432,28 +432,33 @@ void x264_frame_filter( int cpu, x264_frame_t *frame, int b_interlaced, int mb_y
     /* generate integral image:
      * frame->integral contains 2 planes. in the upper plane, each element is
      * the sum of an 8x8 pixel region with top-left corner on that point.
-     * in the lower plane, 4x4 sums (needed only with --analyse p4x4). */
+     * in the lower plane, 4x4 sums (needed only with --partitions p4x4). */
 
-    if( frame->integral && b_end )
+    if( frame->integral )
     {
-        //FIXME slice
-        memset( frame->integral - 32 * stride - 32, 0, stride * sizeof(uint16_t) );
-        for( y = -32; y < frame->i_lines[0] + 31; y++ )
+        if( start < 0 )
+        {
+            memset( frame->integral - 32 * stride - 32, 0, stride * sizeof(uint16_t) );
+            start = -32;
+        }
+        if( b_end )
+            height += 24;
+        for( y = start; y < height; y++ )
         {
             uint8_t  *ref  = frame->plane[0] + y * stride - 32;
             uint16_t *line = frame->integral + (y+1) * stride - 31;
             uint16_t v = line[0] = 0;
             for( x = 0; x < stride-1; x++ )
                 line[x] = v += ref[x] + line[x-stride] - line[x-stride-1];
-        }
-        for( y = -31; y < frame->i_lines[0] + 24; y++ )
-        {
-            uint16_t *line = frame->integral + y * stride - 31;
-            uint16_t *sum4 = line + frame->i_stride[0] * (frame->i_lines[0] + 64);
-            for( x = -31; x < stride - 40; x++, line++, sum4++ )
+            line -= 8*stride;
+            if( y >= 8-31 )
             {
-                sum4[0] =  line[4+4*stride] - line[4] - line[4*stride] + line[0];
-                line[0] += line[8+8*stride] - line[8] - line[8*stride];
+                uint16_t *sum4 = line + frame->i_stride[0] * (frame->i_lines[0] + 64);
+                for( x = 1; x < stride-8; x++, line++, sum4++ )
+                {
+                    sum4[0] =  line[4+4*stride] - line[4] - line[4*stride] + line[0];
+                    line[0] += line[8+8*stride] - line[8] - line[8*stride];
+                }
             }
         }
     }
diff --git a/encoder/encoder.c b/encoder/encoder.c
index a48ed1e7..128cded2 100644
--- a/encoder/encoder.c
+++ b/encoder/encoder.c
@@ -356,11 +356,6 @@ static int x264_validate_parameters( x264_t *h )
         x264_log( h, X264_LOG_WARNING, "not compiled with pthread support!\n");
         h->param.i_threads = 1;
 #else
-        if( h->param.analyse.i_me_method == X264_ME_ESA )
-        {
-            x264_log( h, X264_LOG_WARNING, "threads are not yet compatible with ESA\n");
-            h->param.analyse.i_me_method = X264_ME_UMH;
-        }
         if( h->param.i_scenecut_threshold >= 0 )
             h->param.b_pre_scenecut = 1;
 #endif