From c0874f2441aa23245d227e820e5e0b09550ca578 Mon Sep 17 00:00:00 2001 From: Yaowu Xu Date: Fri, 5 Feb 2016 11:30:12 -0800 Subject: [PATCH] Enable computing of FastSSIM for HBD build This commit adds the computation of fastSSIM for highbitdepth build, it also modifies the hbdmetric test to be more generic and applicable for fastSSIM. The 255 used for calculating ssim constants c1 and c2 is not exactly scaled by 4x and 16x to 1023 and 4095, therefore requries the metric test to have a thresold more tolerant than 0, currently at 0.03dB. Change-Id: I631829da7773de400e77fc36004156e5e126c7e0 --- test/hbd_metrics_test.cc | 64 +++++++++++--- vp10/encoder/encoder.c | 19 +++-- vp9/encoder/vp9_encoder.c | 18 ++-- vpx_dsp/fastssim.c | 172 +++++++++++++++++++++++++++++--------- vpx_dsp/ssim.h | 5 +- 5 files changed, 213 insertions(+), 65 deletions(-) diff --git a/test/hbd_metrics_test.cc b/test/hbd_metrics_test.cc index 75b7c9bd6..fa9cfc158 100644 --- a/test/hbd_metrics_test.cc +++ b/test/hbd_metrics_test.cc @@ -27,11 +27,40 @@ using libvpx_test::ACMRandom; namespace { typedef double (*LBDMetricFunc)(const YV12_BUFFER_CONFIG *source, - const YV12_BUFFER_CONFIG *dest, - double *weight); + const YV12_BUFFER_CONFIG *dest); typedef double (*HBDMetricFunc)(const YV12_BUFFER_CONFIG *source, const YV12_BUFFER_CONFIG *dest, - double *weight, unsigned int bd); + uint32_t bd); + +double compute_hbd_fastssim(const YV12_BUFFER_CONFIG *source, + const YV12_BUFFER_CONFIG *dest, + uint32_t bit_depth) { + double tempy, tempu, tempv; + return vpx_calc_fastssim(source, dest, + &tempy, &tempu, &tempv, bit_depth); +} + +double compute_fastssim(const YV12_BUFFER_CONFIG *source, + const YV12_BUFFER_CONFIG *dest) { + double tempy, tempu, tempv; + return vpx_calc_fastssim(source, dest, + &tempy, &tempu, &tempv, 8); +} + +double compute_hbd_vpxssim(const YV12_BUFFER_CONFIG *source, + const YV12_BUFFER_CONFIG *dest, + uint32_t bit_depth) { + double ssim, weight; + ssim = vpx_highbd_calc_ssim(source, dest, &weight, bit_depth); + return 100 * pow(ssim / weight, 8.0); +} + +double compute_vpxssim(const YV12_BUFFER_CONFIG *source, + const YV12_BUFFER_CONFIG *dest) { + double ssim, weight; + ssim = vpx_calc_ssim(source, dest, &weight); + return 100 * pow(ssim / weight, 8.0); +} class HBDMetricsTestBase { public: @@ -46,7 +75,7 @@ class HBDMetricsTestBase { YV12_BUFFER_CONFIG lbd_src, lbd_dst; YV12_BUFFER_CONFIG hbd_src, hbd_dst; ACMRandom rnd(ACMRandom::DeterministicSeed()); - double lbd_score, hbd_score, lbd_db, hbd_db, lbd_w, hbd_w; + double lbd_db, hbd_db; memset(&lbd_src, 0, sizeof(lbd_src)); memset(&lbd_dst, 0, sizeof(lbd_dst)); @@ -63,18 +92,18 @@ class HBDMetricsTestBase { uint16_t spel, dpel; spel = lbd_src.buffer_alloc[i]; // Create some distortion for dst buffer. - lbd_dst.buffer_alloc[i] = rnd.Rand8(); - dpel = lbd_dst.buffer_alloc[i]; + dpel = rnd.Rand8(); + lbd_dst.buffer_alloc[i] = (uint8_t)dpel; ((uint16_t*)(hbd_src.buffer_alloc))[i] = spel << (bit_depth_ - 8); ((uint16_t*)(hbd_dst.buffer_alloc))[i] = dpel << (bit_depth_ - 8); i++; } - lbd_score = lbd_metric_(&lbd_src, &lbd_dst, &lbd_w); - hbd_score = hbd_metric_(&hbd_src, &hbd_dst, &hbd_w, bit_depth_); + lbd_db = lbd_metric_(&lbd_src, &lbd_dst); + hbd_db = hbd_metric_(&hbd_src, &hbd_dst, bit_depth_); - lbd_db = 100 * pow(lbd_score / lbd_w, 8.0); - hbd_db = 100 * pow(hbd_score / hbd_w, 8.0); + printf("%10f \n", lbd_db); + printf("%10f \n", hbd_db); vpx_free_frame_buffer(&lbd_src); vpx_free_frame_buffer(&lbd_dst); @@ -111,13 +140,22 @@ TEST_P(HBDMetricsTest, RunAccuracyCheck) { // Allow small variation due to floating point operations. static const double kSsim_thresh = 0.001; +// Allow some variation from accumulated errors in floating point operations. +static const double kFSsim_thresh = 0.01; INSTANTIATE_TEST_CASE_P( - C, HBDMetricsTest, + VPXSSIM, HBDMetricsTest, ::testing::Values( - MetricTestTParam(&vpx_calc_ssim, &vpx_highbd_calc_ssim, 10, + MetricTestTParam(&compute_vpxssim, &compute_hbd_vpxssim, 10, kSsim_thresh), - MetricTestTParam(&vpx_calc_ssim, &vpx_highbd_calc_ssim, 12, + MetricTestTParam(&compute_vpxssim, &compute_hbd_vpxssim, 12, kSsim_thresh))); +INSTANTIATE_TEST_CASE_P( + FASTSSIM, HBDMetricsTest, + ::testing::Values( + MetricTestTParam(&compute_fastssim, &compute_hbd_fastssim, 10, + kFSsim_thresh), + MetricTestTParam(&compute_fastssim, &compute_hbd_fastssim, 12, + kFSsim_thresh))); } // namespace diff --git a/vp10/encoder/encoder.c b/vp10/encoder/encoder.c index 739791ce6..8cd677b9e 100644 --- a/vp10/encoder/encoder.c +++ b/vp10/encoder/encoder.c @@ -4372,14 +4372,25 @@ int vp10_get_compressed_data(VP10_COMP *cpi, unsigned int *frame_flags, if (cm->show_frame) { YV12_BUFFER_CONFIG *orig = cpi->Source; YV12_BUFFER_CONFIG *recon = cpi->common.frame_to_show; + uint32_t bit_depth = 8; +#if CONFIG_VP9_HIGHBITDEPTH + uint32_t in_bit_depth = 8; +#endif cpi->count++; +#if CONFIG_VP9_HIGHBITDEPTH + if (cm->use_highbitdepth) { + in_bit_depth = cpi->oxcf.input_bit_depth; + bit_depth = cm->bit_depth; + } +#endif + if (cpi->b_calculate_psnr) { PSNR_STATS psnr; YV12_BUFFER_CONFIG *pp = &cm->post_proc_buffer; #if CONFIG_VP9_HIGHBITDEPTH calc_highbd_psnr(orig, recon, &psnr, cpi->td.mb.e_mbd.bd, - cpi->oxcf.input_bit_depth); + in_bit_depth); #else calc_psnr(orig, recon, &psnr); #endif // CONFIG_VP9_HIGHBITDEPTH @@ -4494,14 +4505,10 @@ int vp10_get_compressed_data(VP10_COMP *cpi, unsigned int *frame_flags, } } -#if CONFIG_VP9_HIGHBITDEPTH - if (!cm->use_highbitdepth) -#endif { double y, u, v, frame_all; - frame_all = vpx_calc_fastssim(orig, recon, &y, &u, &v); + frame_all = vpx_calc_fastssim(orig, recon, &y, &u, &v, bit_depth); adjust_image_stat(y, u, v, frame_all, &cpi->fastssim); - /* TODO(JBB): add 10/12 bit support */ } #if CONFIG_VP9_HIGHBITDEPTH if (!cm->use_highbitdepth) diff --git a/vp9/encoder/vp9_encoder.c b/vp9/encoder/vp9_encoder.c index 019abfe7e..3067afea7 100644 --- a/vp9/encoder/vp9_encoder.c +++ b/vp9/encoder/vp9_encoder.c @@ -4537,7 +4537,17 @@ int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags, cpi->bytes += (int)(*size); if (cm->show_frame) { + uint32_t bit_depth = 8; +#if CONFIG_VP9_HIGHBITDEPTH + uint32_t in_bit_depth = 8; +#endif cpi->count++; +#if CONFIG_VP9_HIGHBITDEPTH + if (cm->use_highbitdepth) { + in_bit_depth = cpi->oxcf.input_bit_depth; + bit_depth = cm->bit_depth; + } +#endif if (cpi->b_calculate_psnr) { YV12_BUFFER_CONFIG *orig = cpi->Source; @@ -4546,7 +4556,7 @@ int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags, PSNR_STATS psnr; #if CONFIG_VP9_HIGHBITDEPTH calc_highbd_psnr(orig, recon, &psnr, cpi->td.mb.e_mbd.bd, - cpi->oxcf.input_bit_depth); + in_bit_depth); #else calc_psnr(orig, recon, &psnr); #endif // CONFIG_VP9_HIGHBITDEPTH @@ -4665,15 +4675,11 @@ int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags, } } -#if CONFIG_VP9_HIGHBITDEPTH - if (!cm->use_highbitdepth) -#endif { double y, u, v, frame_all; frame_all = vpx_calc_fastssim(cpi->Source, cm->frame_to_show, &y, &u, - &v); + &v, bit_depth); adjust_image_stat(y, u, v, frame_all, &cpi->fastssim); - /* TODO(JBB): add 10/12 bit support */ } #if CONFIG_VP9_HIGHBITDEPTH if (!cm->use_highbitdepth) diff --git a/vpx_dsp/fastssim.c b/vpx_dsp/fastssim.c index 569f18b69..590df9510 100644 --- a/vpx_dsp/fastssim.c +++ b/vpx_dsp/fastssim.c @@ -19,19 +19,23 @@ #include "vpx_dsp/ssim.h" #include "vpx_ports/system_state.h" -/* TODO(jbb): High bit depth version of this code needed */ typedef struct fs_level fs_level; typedef struct fs_ctx fs_ctx; #define SSIM_C1 (255 * 255 * 0.01 * 0.01) #define SSIM_C2 (255 * 255 * 0.03 * 0.03) - +#if CONFIG_VP9_HIGHBITDEPTH +#define SSIM_C1_10 (1023 * 1023 * 0.01 * 0.01) +#define SSIM_C1_12 (4095 * 4095 * 0.01 * 0.01) +#define SSIM_C2_10 (1023 * 1023 * 0.03 * 0.03) +#define SSIM_C2_12 (4095 * 4095 * 0.03 * 0.03) +#endif #define FS_MINI(_a, _b) ((_a) < (_b) ? (_a) : (_b)) #define FS_MAXI(_a, _b) ((_a) > (_b) ? (_a) : (_b)) struct fs_level { - uint16_t *im1; - uint16_t *im2; + uint32_t *im1; + uint32_t *im2; double *ssim; int w; int h; @@ -82,7 +86,7 @@ static void fs_ctx_init(fs_ctx *_ctx, int _w, int _h, int _nlevels) { level_size += sizeof(*_ctx->level[l].ssim) - 1; level_size /= sizeof(*_ctx->level[l].ssim); level_size *= sizeof(*_ctx->level[l].ssim); - _ctx->level[l].im1 = (uint16_t *) data; + _ctx->level[l].im1 = (uint32_t *)data; _ctx->level[l].im2 = _ctx->level[l].im1 + im_size; data += level_size; _ctx->level[l].ssim = (double *) data; @@ -98,10 +102,10 @@ static void fs_ctx_clear(fs_ctx *_ctx) { } static void fs_downsample_level(fs_ctx *_ctx, int _l) { - const uint16_t *src1; - const uint16_t *src2; - uint16_t *dst1; - uint16_t *dst2; + const uint32_t *src1; + const uint32_t *src2; + uint32_t *dst1; + uint32_t *dst2; int w2; int h2; int w; @@ -137,8 +141,40 @@ static void fs_downsample_level(fs_ctx *_ctx, int _l) { static void fs_downsample_level0(fs_ctx *_ctx, const unsigned char *_src1, int _s1ystride, const unsigned char *_src2, int _s2ystride, int _w, int _h) { - uint16_t *dst1; - uint16_t *dst2; + uint32_t *dst1; + uint32_t *dst2; + int w; + int h; + int i; + int j; + w = _ctx->level[0].w; + h = _ctx->level[0].h; + dst1 = _ctx->level[0].im1; + dst2 = _ctx->level[0].im2; + for (j = 0; j < h; j++) { + int j0; + int j1; + j0 = 2 * j; + j1 = FS_MINI(j0 + 1, _h); + for (i = 0; i < w; i++) { + int i0; + int i1; + i0 = 2 * i; + i1 = FS_MINI(i0 + 1, _w); + dst1[j * w + i] = _src1[j0 * _s1ystride + i0] + + _src1[j0 * _s1ystride + i1] + _src1[j1 * _s1ystride + i0] + + _src1[j1 * _s1ystride + i1]; + dst2[j * w + i] = _src2[j0 * _s2ystride + i0] + + _src2[j0 * _s2ystride + i1] + _src2[j1 * _s2ystride + i0] + + _src2[j1 * _s2ystride + i1]; + } + } +} +static void hbd_fs_downsample_level0(fs_ctx *_ctx, const uint16_t *_src1, + int _s1ystride, const uint16_t *_src2, + int _s2ystride, int _w, int _h) { + uint32_t *dst1; + uint32_t *dst2; int w; int h; int i; @@ -167,11 +203,11 @@ static void fs_downsample_level0(fs_ctx *_ctx, const unsigned char *_src1, } } -static void fs_apply_luminance(fs_ctx *_ctx, int _l) { +static void fs_apply_luminance(fs_ctx *_ctx, int _l, int bit_depth) { unsigned *col_sums_x; unsigned *col_sums_y; - uint16_t *im1; - uint16_t *im2; + uint32_t *im1; + uint32_t *im2; double *ssim; double c1; int w; @@ -180,6 +216,15 @@ static void fs_apply_luminance(fs_ctx *_ctx, int _l) { int j1offs; int i; int j; + double ssim_c1 = SSIM_C1; +#if CONFIG_VP9_HIGHBITDEPTH + if (bit_depth == 10) + ssim_c1 = SSIM_C1_10; + if (bit_depth == 12) + ssim_c1 = SSIM_C1_12; +#else + assert(bit_depth == 8); +#endif w = _ctx->level[_l].w; h = _ctx->level[_l].h; col_sums_x = _ctx->col_buf; @@ -198,7 +243,7 @@ static void fs_apply_luminance(fs_ctx *_ctx, int _l) { col_sums_y[i] += im2[j1offs + i]; } ssim = _ctx->level[_l].ssim; - c1 = (double) (SSIM_C1 * 4096 * (1 << 4 * _l)); + c1 = (double) (ssim_c1 * 4096 * (1 << 4 * _l)); for (j = 0; j < h; j++) { unsigned mux; unsigned muy; @@ -296,9 +341,9 @@ static void fs_apply_luminance(fs_ctx *_ctx, int _l) { } \ while (0) -static void fs_calc_structure(fs_ctx *_ctx, int _l) { - uint16_t *im1; - uint16_t *im2; +static void fs_calc_structure(fs_ctx *_ctx, int _l, int bit_depth) { + uint32_t *im1; + uint32_t *im2; unsigned *gx_buf; unsigned *gy_buf; double *ssim; @@ -311,6 +356,16 @@ static void fs_calc_structure(fs_ctx *_ctx, int _l) { int h; int i; int j; + double ssim_c2 = SSIM_C2; +#if CONFIG_VP9_HIGHBITDEPTH + if (bit_depth == 10) + ssim_c2 = SSIM_C2_10; + if (bit_depth == 12) + ssim_c2 = SSIM_C2_12; +#else + assert(bit_depth == 8); +#endif + w = _ctx->level[_l].w; h = _ctx->level[_l].h; im1 = _ctx->level[_l].im1; @@ -320,7 +375,7 @@ static void fs_calc_structure(fs_ctx *_ctx, int _l) { stride = w + 8; gy_buf = gx_buf + 8 * stride; memset(gx_buf, 0, 2 * 8 * stride * sizeof(*gx_buf)); - c2 = SSIM_C2 * (1 << 4 * _l) * 16 * 104; + c2 = ssim_c2 * (1 << 4 * _l) * 16 * 104; for (j = 0; j < h + 4; j++) { if (j < h - 1) { for (i = 0; i < w - 1; i++) { @@ -423,6 +478,13 @@ static double fs_average(fs_ctx *_ctx, int _l) { return pow(ret / (w * h), FS_WEIGHTS[_l]); } +static double convert_ssim_db(double _ssim, double _weight) { + assert(_weight >= _ssim); + if ((_weight - _ssim) < 1e-10) + return MAX_SSIM_DB; + return 10 * (log10(_weight) - log10(_weight - _ssim)); +} + static double calc_ssim(const unsigned char *_src, int _systride, const unsigned char *_dst, int _dystride, int _w, int _h) { fs_ctx ctx; @@ -432,42 +494,76 @@ static double calc_ssim(const unsigned char *_src, int _systride, fs_ctx_init(&ctx, _w, _h, FS_NLEVELS); fs_downsample_level0(&ctx, _src, _systride, _dst, _dystride, _w, _h); for (l = 0; l < FS_NLEVELS - 1; l++) { - fs_calc_structure(&ctx, l); + fs_calc_structure(&ctx, l, 8); ret *= fs_average(&ctx, l); fs_downsample_level(&ctx, l + 1); } - fs_calc_structure(&ctx, l); - fs_apply_luminance(&ctx, l); + fs_calc_structure(&ctx, l, 8); + fs_apply_luminance(&ctx, l, 8); ret *= fs_average(&ctx, l); fs_ctx_clear(&ctx); return ret; } -static double convert_ssim_db(double _ssim, double _weight) { - assert(_weight >= _ssim); - if ((_weight - _ssim) < 1e-10) - return MAX_SSIM_DB; - return 10 * (log10(_weight) - log10(_weight - _ssim)); + +#define CONVERT_TO_SHORTPTR(x) ((uint16_t*)(((uintptr_t)(x)) << 1)) + +static double calc_hbd_ssim(const uint8_t *_src, int _systride, + const uint8_t *_dst, int _dystride, + int _w, int _h, uint32_t bit_depth) { + fs_ctx ctx; + double ret; + int l; + ret = 1; + fs_ctx_init(&ctx, _w, _h, FS_NLEVELS); + hbd_fs_downsample_level0(&ctx, + CONVERT_TO_SHORTPTR(_src), _systride, + CONVERT_TO_SHORTPTR(_dst), _dystride, + _w, _h); + for (l = 0; l < FS_NLEVELS - 1; l++) { + fs_calc_structure(&ctx, l, bit_depth); + ret *= fs_average(&ctx, l); + fs_downsample_level(&ctx, l + 1); + } + fs_calc_structure(&ctx, l, bit_depth); + fs_apply_luminance(&ctx, l, bit_depth); + ret *= fs_average(&ctx, l); + fs_ctx_clear(&ctx); + return ret; } double vpx_calc_fastssim(const YV12_BUFFER_CONFIG *source, const YV12_BUFFER_CONFIG *dest, - double *ssim_y, double *ssim_u, double *ssim_v) { + double *ssim_y, double *ssim_u, double *ssim_v, + uint32_t bit_depth) { double ssimv; vpx_clear_system_state(); - *ssim_y = calc_ssim(source->y_buffer, source->y_stride, dest->y_buffer, - dest->y_stride, source->y_crop_width, - source->y_crop_height); + if (bit_depth == 8) { + *ssim_y = calc_ssim(source->y_buffer, source->y_stride, dest->y_buffer, + dest->y_stride, source->y_crop_width, + source->y_crop_height); + *ssim_u = calc_ssim(source->u_buffer, source->uv_stride, dest->u_buffer, + dest->uv_stride, source->uv_crop_width, + source->uv_crop_height); + *ssim_v = calc_ssim(source->v_buffer, source->uv_stride, dest->v_buffer, + dest->uv_stride, source->uv_crop_width, + source->uv_crop_height); + } else if (bit_depth == 10 || bit_depth == 12) { + *ssim_y = calc_hbd_ssim(source->y_buffer, source->y_stride, dest->y_buffer, + dest->y_stride, source->y_crop_width, + source->y_crop_height, bit_depth); + *ssim_u = calc_hbd_ssim(source->u_buffer, source->uv_stride, dest->u_buffer, + dest->uv_stride, source->uv_crop_width, + source->uv_crop_height, bit_depth); + *ssim_v = calc_hbd_ssim(source->v_buffer, source->uv_stride, dest->v_buffer, + dest->uv_stride, source->uv_crop_width, + source->uv_crop_height, bit_depth); - *ssim_u = calc_ssim(source->u_buffer, source->uv_stride, dest->u_buffer, - dest->uv_stride, source->uv_crop_width, - source->uv_crop_height); + } else { + assert(0); + } - *ssim_v = calc_ssim(source->v_buffer, source->uv_stride, dest->v_buffer, - dest->uv_stride, source->uv_crop_width, - source->uv_crop_height); ssimv = (*ssim_y) * .8 + .1 * ((*ssim_u) + (*ssim_v)); - return convert_ssim_db(ssimv, 1.0); } diff --git a/vpx_dsp/ssim.h b/vpx_dsp/ssim.h index 0074c3622..b1b64301a 100644 --- a/vpx_dsp/ssim.h +++ b/vpx_dsp/ssim.h @@ -72,7 +72,8 @@ double vpx_calc_ssim(const YV12_BUFFER_CONFIG *source, double vpx_calc_fastssim(const YV12_BUFFER_CONFIG *source, const YV12_BUFFER_CONFIG *dest, - double *ssim_y, double *ssim_u, double *ssim_v); + double *ssim_y, double *ssim_u, + double *ssim_v, uint32_t bit_depth); double vpx_psnrhvs(const YV12_BUFFER_CONFIG *source, const YV12_BUFFER_CONFIG *dest, @@ -82,7 +83,7 @@ double vpx_psnrhvs(const YV12_BUFFER_CONFIG *source, double vpx_highbd_calc_ssim(const YV12_BUFFER_CONFIG *source, const YV12_BUFFER_CONFIG *dest, double *weight, - unsigned int bd); + uint32_t bd); #endif // CONFIG_VP9_HIGHBITDEPTH #ifdef __cplusplus -- 2.40.0