From 8ade503619aff45e5be0ee544d8ab8c867eb5720 Mon Sep 17 00:00:00 2001 From: Fiona Glaser Date: Sat, 9 Jul 2011 19:21:00 -0700 Subject: [PATCH] Fix SSIM calculation with sliced threads --- common/common.h | 1 + common/pixel.c | 3 ++- common/pixel.h | 2 +- encoder/encoder.c | 7 +++++-- tools/checkasm.c | 5 +++-- 5 files changed, 12 insertions(+), 6 deletions(-) diff --git a/common/common.h b/common/common.h index d9e87997..0b146fcb 100644 --- a/common/common.h +++ b/common/common.h @@ -830,6 +830,7 @@ struct x264_t /* Metrics */ int64_t i_ssd[3]; double f_ssim; + int i_ssim_cnt; } frame; /* Cumulated stats */ diff --git a/common/pixel.c b/common/pixel.c index f1e45915..aa490308 100644 --- a/common/pixel.c +++ b/common/pixel.c @@ -619,7 +619,7 @@ static float ssim_end4( int sum0[5][4], int sum1[5][4], int width ) float x264_pixel_ssim_wxh( x264_pixel_function_t *pf, pixel *pix1, int stride1, pixel *pix2, int stride2, - int width, int height, void *buf ) + int width, int height, void *buf, int *cnt ) { int z = 0; float ssim = 0.0; @@ -638,6 +638,7 @@ float x264_pixel_ssim_wxh( x264_pixel_function_t *pf, for( int x = 0; x < width-1; x += 4 ) ssim += pf->ssim_end4( sum0+x, sum1+x, X264_MIN(4,width-x-1) ); } + *cnt = (height-1) * (width-1); return ssim; } diff --git a/common/pixel.h b/common/pixel.h index 830fd637..862b1424 100644 --- a/common/pixel.h +++ b/common/pixel.h @@ -125,7 +125,7 @@ typedef struct void x264_pixel_init( int cpu, x264_pixel_function_t *pixf ); void x264_pixel_ssd_nv12( x264_pixel_function_t *pf, pixel *pix1, int i_pix1, pixel *pix2, int i_pix2, int i_width, int i_height, uint64_t *ssd_u, uint64_t *ssd_v ); uint64_t x264_pixel_ssd_wxh( x264_pixel_function_t *pf, pixel *pix1, int i_pix1, pixel *pix2, int i_pix2, int i_width, int i_height ); -float x264_pixel_ssim_wxh( x264_pixel_function_t *pf, pixel *pix1, int i_pix1, pixel *pix2, int i_pix2, int i_width, int i_height, void *buf ); +float x264_pixel_ssim_wxh( x264_pixel_function_t *pf, pixel *pix1, int i_pix1, pixel *pix2, int i_pix2, int i_width, int i_height, void *buf, int *cnt ); int x264_field_vsad( x264_t *h, int mb_x, int mb_y ); #endif diff --git a/encoder/encoder.c b/encoder/encoder.c index 001c8cdf..44f4519c 100644 --- a/encoder/encoder.c +++ b/encoder/encoder.c @@ -1819,6 +1819,7 @@ static void x264_fdec_filter_row( x264_t *h, int mb_y, int b_inloop ) if( h->param.analyse.b_ssim ) { + int ssim_cnt; x264_emms(); /* offset by 2 pixels to avoid alignment of ssim blocks with dct blocks, * and overlap by 4 */ @@ -1827,7 +1828,8 @@ static void x264_fdec_filter_row( x264_t *h, int mb_y, int b_inloop ) x264_pixel_ssim_wxh( &h->pixf, h->fdec->plane[0] + 2+minpix_y*h->fdec->i_stride[0], h->fdec->i_stride[0], h->fenc->plane[0] + 2+minpix_y*h->fenc->i_stride[0], h->fenc->i_stride[0], - h->param.i_width-2, maxpix_y-minpix_y, h->scratch_buffer ); + h->param.i_width-2, maxpix_y-minpix_y, h->scratch_buffer, &ssim_cnt ); + h->stat.frame.i_ssim_cnt += ssim_cnt; } } } @@ -2446,6 +2448,7 @@ static int x264_threaded_slices_write( x264_t *h ) for( int j = 0; j < 3; j++ ) h->stat.frame.i_ssd[j] += t->stat.frame.i_ssd[j]; h->stat.frame.f_ssim += t->stat.frame.f_ssim; + h->stat.frame.i_ssim_cnt += t->stat.frame.i_ssim_cnt; } return 0; @@ -3126,7 +3129,7 @@ static int x264_encoder_frame_end( x264_t *h, x264_t *thread_current, if( h->param.analyse.b_ssim ) { double ssim_y = h->stat.frame.f_ssim - / (((h->param.i_width-6)>>2) * ((h->param.i_height-6)>>2)); + / h->stat.frame.i_ssim_cnt; h->stat.f_ssim_mean_y[h->sh.i_type] += ssim_y * dur; snprintf( psz_message + strlen(psz_message), 80 - strlen(psz_message), " SSIM Y:%.5f", ssim_y ); diff --git a/tools/checkasm.c b/tools/checkasm.c index 88ae68f7..2a483914 100644 --- a/tools/checkasm.c +++ b/tools/checkasm.c @@ -488,12 +488,13 @@ static int check_pixel( int cpu_ref, int cpu_new ) if( pixel_asm.ssim_4x4x2_core != pixel_ref.ssim_4x4x2_core || pixel_asm.ssim_end4 != pixel_ref.ssim_end4 ) { + int cnt; float res_c, res_a; ALIGNED_16( int sums[5][4] ) = {{0}}; used_asm = ok = 1; x264_emms(); - res_c = x264_pixel_ssim_wxh( &pixel_c, pbuf1+2, 32, pbuf2+2, 32, 32, 28, pbuf3 ); - res_a = x264_pixel_ssim_wxh( &pixel_asm, pbuf1+2, 32, pbuf2+2, 32, 32, 28, pbuf3 ); + res_c = x264_pixel_ssim_wxh( &pixel_c, pbuf1+2, 32, pbuf2+2, 32, 32, 28, pbuf3, &cnt ); + res_a = x264_pixel_ssim_wxh( &pixel_asm, pbuf1+2, 32, pbuf2+2, 32, 32, 28, pbuf3, &cnt ); if( fabs( res_c - res_a ) > 1e-6 ) { ok = 0; -- 2.40.0