From: Loren Merritt Date: Mon, 10 Oct 2005 22:51:26 +0000 (+0000) Subject: allow non-mod16 resolutions X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=d69837d312aa09c020416008c26f7008783d8c7f;p=libx264 allow non-mod16 resolutions git-svn-id: svn://svn.videolan.org/x264/trunk@327 df754926-b1dd-0310-bc7b-ec298dee348c --- diff --git a/common/frame.c b/common/frame.c index ca0f78af..68879721 100644 --- a/common/frame.c +++ b/common/frame.c @@ -222,6 +222,34 @@ void x264_frame_expand_border_lowres( x264_frame_t *frame ) plane_expand_border( frame->lowres[i], frame->i_stride_lowres, frame->i_lines_lowres, 32 ); } +void x264_frame_expand_border_mod16( x264_t *h, x264_frame_t *frame ) +{ + int i, y; + for( i = 0; i < frame->i_plane; i++ ) + { + int i_subsample = i ? 1 : 0; + int i_width = h->param.i_width >> i_subsample; + int i_height = h->param.i_height >> i_subsample; + int i_padx = ( h->sps->i_mb_width * 16 - h->param.i_width ) >> i_subsample; + int i_pady = ( h->sps->i_mb_height * 16 - h->param.i_height ) >> i_subsample; + + if( i_padx ) + { + for( y = 0; y < i_height; y++ ) + memset( &frame->plane[i][y*frame->i_stride[i] + i_width], + frame->plane[i][y*frame->i_stride[i] + i_width - 1], + i_padx ); + } + if( i_pady ) + { + for( y = i_height; y < i_height + i_pady; y++ ); + memcpy( &frame->plane[i][y*frame->i_stride[i]], + &frame->plane[i][(i_height-1)*frame->i_stride[i]], + i_width + i_padx ); + } + } +} + /* Deblocking filter */ diff --git a/common/frame.h b/common/frame.h index a29d7234..60512f30 100644 --- a/common/frame.h +++ b/common/frame.h @@ -72,6 +72,7 @@ void x264_frame_copy_picture( x264_t *h, x264_frame_t *dst, x264_pictur void x264_frame_expand_border( x264_frame_t *frame ); void x264_frame_expand_border_filtered( x264_frame_t *frame ); void x264_frame_expand_border_lowres( x264_frame_t *frame ); +void x264_frame_expand_border_mod16( x264_t *h, x264_frame_t *frame ); void x264_frame_deblocking_filter( x264_t *h, int i_slice_type ); diff --git a/common/pixel.c b/common/pixel.c index 2e139f5f..1e3a015f 100644 --- a/common/pixel.c +++ b/common/pixel.c @@ -106,6 +106,43 @@ PIXEL_SSD_C( pixel_ssd_8x4, 8, 4 ) PIXEL_SSD_C( pixel_ssd_4x8, 4, 8 ) PIXEL_SSD_C( pixel_ssd_4x4, 4, 4 ) +int64_t x264_pixel_ssd_wxh( x264_pixel_function_t *pf, uint8_t *pix1, int i_pix1, uint8_t *pix2, int i_pix2, int i_width, int i_height ) +{ + int64_t i_ssd = 0; + int x, y; + +#define SSD(size) i_ssd += pf->ssd[size]( pix1 + y*i_pix1 + x, i_pix1, \ + pix2 + y*i_pix2 + x, i_pix2 ); + for( y = 0; y < i_height-15; y += 16 ) + { + for( x = 0; x < i_width-15; x += 16 ) + SSD(PIXEL_16x16); + if( x < i_width-7 ) + SSD(PIXEL_8x16); + } + if( y < i_height-7 ) + for( x = 0; x < i_width-7; x += 8 ) + SSD(PIXEL_8x8); +#undef SSD + +#define SSD1 { int d = pix1[y*i_pix1+x] - pix2[y*i_pix2+x]; i_ssd += d*d; } + if( i_width % 8 != 0 ) + { + for( y = 0; y < (i_height & ~7); y++ ) + for( x = i_width & ~7; x < i_width; x++ ) + SSD1; + } + if( i_height % 8 != 0 ) + { + for( y = i_height & ~7; y < i_height; y++ ) + for( x = 0; x < i_width; x++ ) + SSD1; + } +#undef SSD1 + + return i_ssd; +} + static inline void pixel_sub_wxh( int16_t *diff, int i_size, uint8_t *pix1, int i_pix1, uint8_t *pix2, int i_pix2 ) diff --git a/common/pixel.h b/common/pixel.h index dc59fc0c..f8012bab 100644 --- a/common/pixel.h +++ b/common/pixel.h @@ -69,5 +69,6 @@ typedef struct } x264_pixel_function_t; void x264_pixel_init( int cpu, x264_pixel_function_t *pixf ); +int64_t x264_pixel_ssd_wxh( x264_pixel_function_t *pf, uint8_t *pix1, int i_pix1, uint8_t *pix2, int i_pix2, int i_width, int i_height ); #endif diff --git a/encoder/encoder.c b/encoder/encoder.c index dec9ad45..6f32938a 100644 --- a/encoder/encoder.c +++ b/encoder/encoder.c @@ -87,29 +87,6 @@ static int64_t i_mtime_filter = 0; ******************************* x264 libs ********************************** * ****************************************************************************/ -static int64_t x264_sqe( x264_t *h, uint8_t *pix1, int i_pix_stride, uint8_t *pix2, int i_pix2_stride, int i_width, int i_height ) -{ - int64_t i_sqe = 0; - int x, y; - -#define SSD(size) i_sqe += h->pixf.ssd[size]( pix1+y*i_pix_stride+x, i_pix_stride, \ - pix2+y*i_pix2_stride+x, i_pix2_stride ); - for( y = 0; y < i_height-15; y += 16 ) - { - for( x = 0; x < i_width-15; x += 16 ) - SSD(PIXEL_16x16); - if( x < i_width-7 ) - SSD(PIXEL_8x16); - } - if( y < i_height-7 ) - for( x = 0; x < i_width-7; x += 8 ) - SSD(PIXEL_8x8); -#undef SSD - x264_cpu_restore( h->param.cpu ); - - return i_sqe; -} - static float x264_psnr( int64_t i_sqe, int64_t i_size ) { double f_mse = (double)i_sqe / ((double)65025.0 * (double)i_size); @@ -363,9 +340,9 @@ static int x264_validate_parameters( x264_t *h ) return -1; } - if( h->param.i_width % 16 != 0 || h->param.i_height % 16 != 0 ) + if( h->param.i_width % 2 || h->param.i_height % 2 ) { - x264_log( h, X264_LOG_ERROR, "width %% 16 != 0 or height %% 16 != 0 (%dx%d)\n", + x264_log( h, X264_LOG_ERROR, "width or height not divisible by 2 (%dx%d)\n", h->param.i_width, h->param.i_height ); return -1; } @@ -376,7 +353,7 @@ static int x264_validate_parameters( x264_t *h ) } h->param.i_threads = x264_clip3( h->param.i_threads, 1, X264_SLICE_MAX ); - h->param.i_threads = X264_MIN( h->param.i_threads, h->param.i_height / 16 ); + h->param.i_threads = X264_MIN( h->param.i_threads, (h->param.i_height + 15) / 16 ); #if !(HAVE_PTHREAD) if( h->param.i_threads > 1 ) { @@ -396,6 +373,13 @@ static int x264_validate_parameters( x264_t *h ) h->param.analyse.b_psnr = 0; } + if( ( h->param.i_width % 16 || h->param.i_height % 16 ) && !h->mb.b_lossless ) + { + x264_log( h, X264_LOG_WARNING, + "width or height not divisible by 16 (%dx%d), compression will suffer.\n", + h->param.i_width, h->param.i_height ); + } + h->param.i_frame_reference = x264_clip3( h->param.i_frame_reference, 1, 16 ); if( h->param.i_keyint_max <= 0 ) h->param.i_keyint_max = 1; @@ -1208,6 +1192,9 @@ int x264_encoder_encode( x264_t *h, x264_frame_copy_picture( h, fenc, pic_in ); + if( h->param.i_width % 16 || h->param.i_height % 16 ) + x264_frame_expand_border_mod16( h, fenc ); + fenc->i_frame = h->frames.i_input++; x264_frame_put( h->frames.next, fenc ); @@ -1556,9 +1543,10 @@ do_encode: int64_t i_sqe_y, i_sqe_u, i_sqe_v; /* PSNR */ - i_sqe_y = x264_sqe( h, frame_psnr->plane[0], frame_psnr->i_stride[0], h->fenc->plane[0], h->fenc->i_stride[0], h->param.i_width, h->param.i_height ); - i_sqe_u = x264_sqe( h, frame_psnr->plane[1], frame_psnr->i_stride[1], h->fenc->plane[1], h->fenc->i_stride[1], h->param.i_width/2, h->param.i_height/2); - i_sqe_v = x264_sqe( h, frame_psnr->plane[2], frame_psnr->i_stride[2], h->fenc->plane[2], h->fenc->i_stride[2], h->param.i_width/2, h->param.i_height/2); + i_sqe_y = x264_pixel_ssd_wxh( &h->pixf, frame_psnr->plane[0], frame_psnr->i_stride[0], h->fenc->plane[0], h->fenc->i_stride[0], h->param.i_width, h->param.i_height ); + i_sqe_u = x264_pixel_ssd_wxh( &h->pixf, frame_psnr->plane[1], frame_psnr->i_stride[1], h->fenc->plane[1], h->fenc->i_stride[1], h->param.i_width/2, h->param.i_height/2); + i_sqe_v = x264_pixel_ssd_wxh( &h->pixf, frame_psnr->plane[2], frame_psnr->i_stride[2], h->fenc->plane[2], h->fenc->i_stride[2], h->param.i_width/2, h->param.i_height/2); + x264_cpu_restore( h->param.cpu ); h->stat.i_sqe_global[i_slice_type] += i_sqe_y + i_sqe_u + i_sqe_v; h->stat.f_psnr_average[i_slice_type] += x264_psnr( i_sqe_y + i_sqe_u + i_sqe_v, 3 * h->param.i_width * h->param.i_height / 2 ); diff --git a/encoder/set.c b/encoder/set.c index b95a7876..7ee0b432 100644 --- a/encoder/set.c +++ b/encoder/set.c @@ -129,22 +129,12 @@ void x264_sps_init( x264_sps_t *sps, int i_id, x264_param_t *param ) sps->b_direct8x8_inference = 1; } - if( param->i_width % 16 != 0 || param->i_height % 16 != 0 ) - { - sps->b_crop = 1; - sps->crop.i_left = 0; - sps->crop.i_right = ( 16 - param->i_width % 16)/2; - sps->crop.i_top = 0; - sps->crop.i_bottom = ( 16 - param->i_height % 16)/2; - } - else - { - sps->b_crop = 0; - sps->crop.i_left = 0; - sps->crop.i_right = 0; - sps->crop.i_top = 0; - sps->crop.i_bottom = 0; - } + sps->crop.i_left = 0; + sps->crop.i_top = 0; + sps->crop.i_right = (- param->i_width) & 15; + sps->crop.i_bottom = (- param->i_height) & 15; + sps->b_crop = sps->crop.i_left || sps->crop.i_top || + sps->crop.i_right || sps->crop.i_bottom; sps->b_vui = 0; sps->vui.b_aspect_ratio_info_present = 0; @@ -275,10 +265,10 @@ void x264_sps_write( bs_t *s, x264_sps_t *sps ) bs_write( s, 1, sps->b_crop ); if( sps->b_crop ) { - bs_write_ue( s, sps->crop.i_left ); - bs_write_ue( s, sps->crop.i_right ); - bs_write_ue( s, sps->crop.i_top ); - bs_write_ue( s, sps->crop.i_bottom ); + bs_write_ue( s, sps->crop.i_left / 2 ); + bs_write_ue( s, sps->crop.i_right / 2 ); + bs_write_ue( s, sps->crop.i_top / 2 ); + bs_write_ue( s, sps->crop.i_bottom / 2 ); } bs_write( s, 1, sps->b_vui );