From: Loren Merritt Date: Sat, 15 Sep 2007 06:34:05 +0000 (+0000) Subject: implement multithreaded me=esa X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=463437926e73b3d18542804cbef31f277d115cc2;p=libx264 implement multithreaded me=esa git-svn-id: svn://svn.videolan.org/x264/trunk@676 df754926-b1dd-0310-bc7b-ec298dee348c --- diff --git a/common/mc.c b/common/mc.c index 6d3a226b..dfede0c0 100644 --- a/common/mc.c +++ b/common/mc.c @@ -386,8 +386,8 @@ void x264_frame_filter( int cpu, x264_frame_t *frame, int b_interlaced, int mb_y { const int x_inc = 16, y_inc = 16; const int stride = frame->i_stride[0] << b_interlaced; - const int start = (mb_y*16 >> b_interlaced) - 8; - const int height = ((b_end ? frame->i_lines[0] : mb_y*16) >> b_interlaced) + 8; + int start = (mb_y*16 >> b_interlaced) - 8; + int height = ((b_end ? frame->i_lines[0] : mb_y*16) >> b_interlaced) + 8; int x, y; if( mb_y & b_interlaced ) @@ -432,28 +432,33 @@ void x264_frame_filter( int cpu, x264_frame_t *frame, int b_interlaced, int mb_y /* generate integral image: * frame->integral contains 2 planes. in the upper plane, each element is * the sum of an 8x8 pixel region with top-left corner on that point. - * in the lower plane, 4x4 sums (needed only with --analyse p4x4). */ + * in the lower plane, 4x4 sums (needed only with --partitions p4x4). */ - if( frame->integral && b_end ) + if( frame->integral ) { - //FIXME slice - memset( frame->integral - 32 * stride - 32, 0, stride * sizeof(uint16_t) ); - for( y = -32; y < frame->i_lines[0] + 31; y++ ) + if( start < 0 ) + { + memset( frame->integral - 32 * stride - 32, 0, stride * sizeof(uint16_t) ); + start = -32; + } + if( b_end ) + height += 24; + for( y = start; y < height; y++ ) { uint8_t *ref = frame->plane[0] + y * stride - 32; uint16_t *line = frame->integral + (y+1) * stride - 31; uint16_t v = line[0] = 0; for( x = 0; x < stride-1; x++ ) line[x] = v += ref[x] + line[x-stride] - line[x-stride-1]; - } - for( y = -31; y < frame->i_lines[0] + 24; y++ ) - { - uint16_t *line = frame->integral + y * stride - 31; - uint16_t *sum4 = line + frame->i_stride[0] * (frame->i_lines[0] + 64); - for( x = -31; x < stride - 40; x++, line++, sum4++ ) + line -= 8*stride; + if( y >= 8-31 ) { - sum4[0] = line[4+4*stride] - line[4] - line[4*stride] + line[0]; - line[0] += line[8+8*stride] - line[8] - line[8*stride]; + uint16_t *sum4 = line + frame->i_stride[0] * (frame->i_lines[0] + 64); + for( x = 1; x < stride-8; x++, line++, sum4++ ) + { + sum4[0] = line[4+4*stride] - line[4] - line[4*stride] + line[0]; + line[0] += line[8+8*stride] - line[8] - line[8*stride]; + } } } } diff --git a/encoder/encoder.c b/encoder/encoder.c index a48ed1e7..128cded2 100644 --- a/encoder/encoder.c +++ b/encoder/encoder.c @@ -356,11 +356,6 @@ static int x264_validate_parameters( x264_t *h ) x264_log( h, X264_LOG_WARNING, "not compiled with pthread support!\n"); h->param.i_threads = 1; #else - if( h->param.analyse.i_me_method == X264_ME_ESA ) - { - x264_log( h, X264_LOG_WARNING, "threads are not yet compatible with ESA\n"); - h->param.analyse.i_me_method = X264_ME_UMH; - } if( h->param.i_scenecut_threshold >= 0 ) h->param.b_pre_scenecut = 1; #endif