From ee87e20d53d16bcf7971468079c0ae0d63e41c44 Mon Sep 17 00:00:00 2001 From: Jim Bankoski Date: Tue, 21 Apr 2015 10:05:37 -0700 Subject: [PATCH] Adds a new temporal consistency metric to libvpx. Change-Id: Id61699ebf57ae4f8af96a468740c852b2f45f8e1 --- test/consistency_test.cc | 224 ++++++++++++++++++++++++++++++++++ test/test.mk | 2 + vp9/encoder/vp9_encoder.c | 84 ++++++++----- vp9/encoder/vp9_encoder.h | 7 ++ vp9/encoder/vp9_ssim.c | 247 +++++++++++++++++++++++++++++++++++++- vp9/encoder/vp9_ssim.h | 46 +++++++ 6 files changed, 578 insertions(+), 32 deletions(-) create mode 100644 test/consistency_test.cc diff --git a/test/consistency_test.cc b/test/consistency_test.cc new file mode 100644 index 000000000..66f694c68 --- /dev/null +++ b/test/consistency_test.cc @@ -0,0 +1,224 @@ +/* + * Copyright (c) 2012 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + + +#include +#include +#include + +#include "./vpx_config.h" +#if CONFIG_VP9_ENCODER +#include "./vp9_rtcd.h" +#endif + +#include "test/acm_random.h" +#include "test/clear_system_state.h" +#include "test/register_state_check.h" +#include "test/util.h" +#include "third_party/googletest/src/include/gtest/gtest.h" +#include "vp9/encoder/vp9_ssim.h" +#include "vpx_mem/vpx_mem.h" + +extern "C" +double vp9_get_ssim_metrics(uint8_t *img1, int img1_pitch, + uint8_t *img2, int img2_pitch, + int width, int height, + Ssimv *sv2, Metrics *m, + int do_inconsistency); + +using libvpx_test::ACMRandom; + +namespace { +class ConsistencyTestBase : public ::testing::Test { + public: + ConsistencyTestBase(int width, int height) : width_(width), height_(height) {} + + static void SetUpTestCase() { + source_data_[0] = reinterpret_cast( + vpx_memalign(kDataAlignment, kDataBufferSize)); + reference_data_[0] = reinterpret_cast( + vpx_memalign(kDataAlignment, kDataBufferSize)); + source_data_[1] = reinterpret_cast( + vpx_memalign(kDataAlignment, kDataBufferSize)); + reference_data_[1] = reinterpret_cast( + vpx_memalign(kDataAlignment, kDataBufferSize)); + ssim_array_ = new Ssimv[kDataBufferSize / 16]; + } + + static void ClearSsim() { + memset(ssim_array_, 0, kDataBufferSize / 16); + } + static void TearDownTestCase() { + vpx_free(source_data_[0]); + source_data_[0] = NULL; + vpx_free(reference_data_[0]); + reference_data_[0] = NULL; + vpx_free(source_data_[1]); + source_data_[1] = NULL; + vpx_free(reference_data_[1]); + reference_data_[1] = NULL; + + delete ssim_array_; + } + + virtual void TearDown() { + libvpx_test::ClearSystemState(); + } + + protected: + // Handle frames up to 640x480 + static const int kDataAlignment = 16; + static const int kDataBufferSize = 640*480; + + virtual void SetUp() { + source_stride_ = (width_ + 31) & ~31; + reference_stride_ = width_ * 2; + rnd_.Reset(ACMRandom::DeterministicSeed()); + } + + void FillRandom(uint8_t *data, int stride, int width, int height) { + for (int h = 0; h < height; ++h) { + for (int w = 0; w < width; ++w) { + data[h * stride + w] = rnd_.Rand8(); + } + } + } + + void FillRandom(uint8_t *data, int stride) { + FillRandom(data, stride, width_, height_); + } + + void Copy(uint8_t *reference, uint8_t *source) { + memcpy(reference, source, kDataBufferSize); + } + + void Blur(uint8_t *data, int stride, int taps) { + int sum = 0; + int half_taps = taps / 2; + for (int h = 0; h < height_; ++h) { + for (int w = 0; w < taps; ++w) { + sum += data[w + h * stride]; + } + for (int w = taps; w < width_; ++w) { + sum += data[w + h * stride] - data[w - taps + h * stride]; + data[w - half_taps + h * stride] = (sum + half_taps) / taps; + } + } + for (int w = 0; w < width_; ++w) { + for (int h = 0; h < taps; ++h) { + sum += data[h + w * stride]; + } + for (int h = taps; h < height_; ++h) { + sum += data[w + h * stride] - data[(h - taps) * stride + w]; + data[(h - half_taps) * stride + w] = (sum + half_taps) / taps; + } + } + } + int width_, height_; + static uint8_t* source_data_[2]; + int source_stride_; + static uint8_t* reference_data_[2]; + int reference_stride_; + static Ssimv *ssim_array_; + Metrics metrics_; + + ACMRandom rnd_; +}; + +#if CONFIG_VP9_ENCODER +typedef std::tr1::tuple ConsistencyParam; +class ConsistencyVP9Test + : public ConsistencyTestBase, + public ::testing::WithParamInterface { + public: + ConsistencyVP9Test() : ConsistencyTestBase(GET_PARAM(0), GET_PARAM(1)) {} + + protected: + double CheckConsistency(int frame) { + EXPECT_LT(frame, 2)<< "Frame to check has to be less than 2."; + return + vp9_get_ssim_metrics(source_data_[frame], source_stride_, + reference_data_[frame], reference_stride_, + width_, height_, ssim_array_, &metrics_, 1); + } +}; +#endif // CONFIG_VP9_ENCODER + +uint8_t* ConsistencyTestBase::source_data_[2] = {NULL, NULL}; +uint8_t* ConsistencyTestBase::reference_data_[2] = {NULL, NULL}; +Ssimv* ConsistencyTestBase::ssim_array_ = NULL; + +#if CONFIG_VP9_ENCODER +TEST_P(ConsistencyVP9Test, ConsistencyIsZero) { + FillRandom(source_data_[0], source_stride_); + Copy(source_data_[1], source_data_[0]); + Copy(reference_data_[0], source_data_[0]); + Blur(reference_data_[0], reference_stride_, 3); + Copy(reference_data_[1], source_data_[0]); + Blur(reference_data_[1], reference_stride_, 3); + + double inconsistency = CheckConsistency(1); + inconsistency = CheckConsistency(0); + EXPECT_EQ(inconsistency, 0.0) + << "Should have 0 inconsistency if they are exactly the same."; + + // If sources are not consistent reference frames inconsistency should + // be less than if the source is consistent. + FillRandom(source_data_[0], source_stride_); + FillRandom(source_data_[1], source_stride_); + FillRandom(reference_data_[0], reference_stride_); + FillRandom(reference_data_[1], reference_stride_); + CheckConsistency(0); + inconsistency = CheckConsistency(1); + + Copy(source_data_[1], source_data_[0]); + CheckConsistency(0); + double inconsistency2 = CheckConsistency(1); + EXPECT_LT(inconsistency, inconsistency2) + << "Should have less inconsistency if source itself is inconsistent."; + + // Less of a blur should be less inconsistent than more blur coming off a + // a frame with no blur. + ClearSsim(); + FillRandom(source_data_[0], source_stride_); + Copy(source_data_[1], source_data_[0]); + Copy(reference_data_[0], source_data_[0]); + Copy(reference_data_[1], source_data_[0]); + Blur(reference_data_[1], reference_stride_, 4); + CheckConsistency(0); + inconsistency = CheckConsistency(1); + ClearSsim(); + Copy(reference_data_[1], source_data_[0]); + Blur(reference_data_[1], reference_stride_, 8); + CheckConsistency(0); + inconsistency2 = CheckConsistency(1); + + EXPECT_LT(inconsistency, inconsistency2) + << "Stronger Blur should produce more inconsistency."; +} +#endif // CONFIG_VP9_ENCODER + + +using std::tr1::make_tuple; + +//------------------------------------------------------------------------------ +// C functions + +#if CONFIG_VP9_ENCODER +const ConsistencyParam c_vp9_tests[] = { + make_tuple(320, 240), + make_tuple(318, 242), + make_tuple(318, 238), +}; +INSTANTIATE_TEST_CASE_P(C, ConsistencyVP9Test, + ::testing::ValuesIn(c_vp9_tests)); +#endif + +} // namespace diff --git a/test/test.mk b/test/test.mk index 5baf23470..91a93f120 100644 --- a/test/test.mk +++ b/test/test.mk @@ -151,6 +151,8 @@ LIBVPX_TEST_SRCS-$(CONFIG_VP9) += vp9_intrapred_test.cc ifeq ($(CONFIG_VP9_ENCODER),yes) LIBVPX_TEST_SRCS-$(CONFIG_SPATIAL_SVC) += svc_test.cc LIBVPX_TEST_SRCS-$(CONFIG_INTERNAL_STATS) += blockiness_test.cc +LIBVPX_TEST_SRCS-$(CONFIG_INTERNAL_STATS) += consistency_test.cc + endif ifeq ($(CONFIG_VP9_ENCODER)$(CONFIG_VP9_TEMPORAL_DENOISING),yesyes) diff --git a/vp9/encoder/vp9_encoder.c b/vp9/encoder/vp9_encoder.c index c6bc6aaf0..1b8785dbf 100644 --- a/vp9/encoder/vp9_encoder.c +++ b/vp9/encoder/vp9_encoder.c @@ -1618,7 +1618,8 @@ VP9_COMP *vp9_create_compressor(VP9EncoderConfig *oxcf, #if CONFIG_INTERNAL_STATS cpi->b_calculate_ssimg = 0; cpi->b_calculate_blockiness = 1; - + cpi->b_calculate_consistency = 1; + cpi->total_inconsistency = 0; cpi->count = 0; cpi->bytes = 0; @@ -1669,6 +1670,10 @@ VP9_COMP *vp9_create_compressor(VP9EncoderConfig *oxcf, cpi->total_blockiness = 0; } + if (cpi->b_calculate_consistency) { + cpi->ssim_vars = vpx_malloc(sizeof(*cpi->ssim_vars)*720*480); + } + #endif cpi->first_time_stamp_ever = INT64_MAX; @@ -1865,6 +1870,12 @@ VP9_COMP *vp9_create_compressor(VP9EncoderConfig *oxcf, return cpi; } +#define SNPRINT(H, T) \ + snprintf((H) + strlen(H), sizeof(H) - strlen(H), (T)) + +#define SNPRINT2(H, T, V) \ + snprintf((H) + strlen(H), sizeof(H) - strlen(H), (T), (V)) + void vp9_remove_compressor(VP9_COMP *cpi) { VP9_COMMON *const cm = &cpi->common; @@ -1878,8 +1889,9 @@ void vp9_remove_compressor(VP9_COMP *cpi) { #if CONFIG_INTERNAL_STATS vp9_clear_system_state(); - // printf("\n8x8-4x4:%d-%d\n", cpi->t8x8_count, cpi->t4x4_count); if (cpi->oxcf.pass != 1) { + char headings[512] = {0}; + char results[512] = {0}; FILE *f = fopen("opsnr.stt", "a"); double time_encoded = (cpi->last_end_time_stamp_seen - cpi->first_time_stamp_ever) / 10000000.000; @@ -1897,39 +1909,39 @@ void vp9_remove_compressor(VP9_COMP *cpi) { vpx_sse_to_psnr((double)cpi->totalp_samples, peak, (double)cpi->totalp_sq_error); const double total_ssim = 100 * pow(cpi->summed_quality / - cpi->summed_weights, 8.0); + cpi->summed_weights, 8.0); + + snprintf(headings, sizeof(headings), + "Bitrate\tAVGPsnr\tGLBPsnr\tAVPsnrP\tGLPsnrP\t" + "VPXSSIM\tFASTSIM\tPSNRHVS"); + snprintf(results, sizeof(results), + "%7.2f\t%7.3f\t%7.3f\t%7.3f\t%7.3f\t" + "%7.3f\t%7.3f\t%7.3f", dr, + cpi->total / cpi->count, total_psnr, + cpi->totalp / cpi->count, totalp_psnr, total_ssim, + cpi->total_fastssim_all / cpi->count, + cpi->total_psnrhvs_all / cpi->count); if (cpi->b_calculate_blockiness) { - fprintf(f, "Bitrate\tAVGPsnr\tGLBPsnr\tAVPsnrP\tGLPsnrP\t" - "VPXSSIM\tVPSSIMP\tFASTSSIM\tPSNRHVS\tTime(ms)\n"); - fprintf(f, "%7.2f\t%7.3f\t%7.3f\t%7.3f\t%7.3f\t%7.3f\t" - "%7.3f\t%7.3f\t%8.0f\n", - dr, cpi->total / cpi->count, total_psnr, - cpi->totalp / cpi->count, totalp_psnr, total_ssim, - cpi->total_fastssim_all / cpi->count, - cpi->total_psnrhvs_all / cpi->count, - total_encode_time); - } else { - fprintf(f, "Bitrate\tAVGPsnr\tGLBPsnr\tAVPsnrP\tGLPsnrP\t" - "VPXSSIM\tVPSSIMP\tBlockiness\tFASTSSIM\tPSNRHVS\tTime(ms)\n"); - fprintf(f, "%7.2f\t%7.3f\t%7.3f\t%7.3f\t%7.3f\t%7.3f\t" - "%7.3f\t%7.3f\t%7.3f\t%8.0f\n", - dr, cpi->total / cpi->count, total_psnr, - cpi->totalp / cpi->count, totalp_psnr, total_ssim, - cpi->total_blockiness / cpi->count, - cpi->total_fastssim_all / cpi->count, - cpi->total_psnrhvs_all / cpi->count, - total_encode_time); + SNPRINT(headings, "\t Block"); + SNPRINT2(results, "\t%7.3f", cpi->total_blockiness / cpi->count); } - } + if (cpi->b_calculate_consistency) { + double consistency = + vpx_sse_to_psnr((double)cpi->totalp_samples, peak, + (double)cpi->total_inconsistency); - if (cpi->b_calculate_ssimg) { - fprintf(f, "BitRate\tSSIM_Y\tSSIM_U\tSSIM_V\tSSIM_A\t Time(ms)\n"); - fprintf(f, "%7.2f\t%6.4f\t%6.4f\t%6.4f\t%6.4f\t%8.0f\n", dr, - cpi->total_ssimg_y / cpi->count, - cpi->total_ssimg_u / cpi->count, - cpi->total_ssimg_v / cpi->count, - cpi->total_ssimg_all / cpi->count, total_encode_time); + SNPRINT(headings, "\tConsist"); + SNPRINT2(results, "\t%7.3f", consistency); + } + + if (cpi->b_calculate_ssimg) { + SNPRINT(headings, "\t SSIMG"); + SNPRINT2(results, "\t%7.3f", cpi->total_ssimg_all / cpi->count); + } + + fprintf(f, "%s\t Time\n", headings); + fprintf(f, "%s\t%8.0f\n", results, total_encode_time); } fclose(f); @@ -4201,6 +4213,16 @@ int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags, cm->frame_to_show->y_stride, cpi->Source->y_width, cpi->Source->y_height); + if (cpi->b_calculate_consistency) + cpi->total_inconsistency += vp9_get_ssim_metrics(cpi->Source->y_buffer, + cpi->Source->y_stride, + cm->frame_to_show->y_buffer, + cm->frame_to_show->y_stride, + cpi->Source->y_width, + cpi->Source->y_height, + cpi->ssim_vars, + &cpi->metrics, 1); + if (cpi->b_calculate_ssimg) { double y, u, v, frame_all; #if CONFIG_VP9_HIGHBITDEPTH diff --git a/vp9/encoder/vp9_encoder.h b/vp9/encoder/vp9_encoder.h index 267c79620..9906929cf 100644 --- a/vp9/encoder/vp9_encoder.h +++ b/vp9/encoder/vp9_encoder.h @@ -34,6 +34,9 @@ #include "vp9/encoder/vp9_quantize.h" #include "vp9/encoder/vp9_ratectrl.h" #include "vp9/encoder/vp9_rd.h" +#if CONFIG_INTERNAL_STATS +#include "vp9/encoder/vp9_ssim.h" +#endif #include "vp9/encoder/vp9_speed_features.h" #include "vp9/encoder/vp9_svc_layercontext.h" #include "vp9/encoder/vp9_tokenize.h" @@ -429,6 +432,10 @@ typedef struct VP9_COMP { int b_calculate_ssimg; int b_calculate_blockiness; + int b_calculate_consistency; + double total_inconsistency; + Ssimv *ssim_vars; + Metrics metrics; #endif int b_calculate_psnr; diff --git a/vp9/encoder/vp9_ssim.c b/vp9/encoder/vp9_ssim.c index 5dbfbf53b..88db5dda0 100644 --- a/vp9/encoder/vp9_ssim.c +++ b/vp9/encoder/vp9_ssim.c @@ -8,8 +8,8 @@ * be found in the AUTHORS file in the root of the source tree. */ +#include #include "./vp9_rtcd.h" - #include "vp9/encoder/vp9_ssim.h" void vp9_ssim_parms_16x16_c(uint8_t *s, int sp, uint8_t *r, @@ -201,6 +201,251 @@ double vp9_calc_ssimg(YV12_BUFFER_CONFIG *source, YV12_BUFFER_CONFIG *dest, return ssim_all; } +// traditional ssim as per: http://en.wikipedia.org/wiki/Structural_similarity +// +// Re working out the math -> +// +// ssim(x,y) = (2*mean(x)*mean(y) + c1)*(2*cov(x,y)+c2) / +// ((mean(x)^2+mean(y)^2+c1)*(var(x)+var(y)+c2)) +// +// mean(x) = sum(x) / n +// +// cov(x,y) = (n*sum(xi*yi)-sum(x)*sum(y))/(n*n) +// +// var(x) = (n*sum(xi*xi)-sum(xi)*sum(xi))/(n*n) +// +// ssim(x,y) = +// (2*sum(x)*sum(y)/(n*n) + c1)*(2*(n*sum(xi*yi)-sum(x)*sum(y))/(n*n)+c2) / +// (((sum(x)*sum(x)+sum(y)*sum(y))/(n*n) +c1) * +// ((n*sum(xi*xi) - sum(xi)*sum(xi))/(n*n)+ +// (n*sum(yi*yi) - sum(yi)*sum(yi))/(n*n)+c2))) +// +// factoring out n*n +// +// ssim(x,y) = +// (2*sum(x)*sum(y) + n*n*c1)*(2*(n*sum(xi*yi)-sum(x)*sum(y))+n*n*c2) / +// (((sum(x)*sum(x)+sum(y)*sum(y)) + n*n*c1) * +// (n*sum(xi*xi)-sum(xi)*sum(xi)+n*sum(yi*yi)-sum(yi)*sum(yi)+n*n*c2)) +// +// Replace c1 with n*n * c1 for the final step that leads to this code: +// The final step scales by 12 bits so we don't lose precision in the constants. + +double ssimv_similarity(Ssimv *sv, int64_t n) { + // Scale the constants by number of pixels. + const int64_t c1 = (cc1 * n * n) >> 12; + const int64_t c2 = (cc2 * n * n) >> 12; + + const double l = 1.0 * (2 * sv->sum_s * sv->sum_r + c1) / + (sv->sum_s * sv->sum_s + sv->sum_r * sv->sum_r + c1); + + // Since these variables are unsigned sums, convert to double so + // math is done in double arithmetic. + const double v = (2.0 * n * sv->sum_sxr - 2 * sv->sum_s * sv->sum_r + c2) + / (n * sv->sum_sq_s - sv->sum_s * sv->sum_s + n * sv->sum_sq_r + - sv->sum_r * sv->sum_r + c2); + + return l * v; +} + +// The first term of the ssim metric is a luminance factor. +// +// (2*mean(x)*mean(y) + c1)/ (mean(x)^2+mean(y)^2+c1) +// +// This luminance factor is super sensitive to the dark side of luminance +// values and completely insensitive on the white side. check out 2 sets +// (1,3) and (250,252) the term gives ( 2*1*3/(1+9) = .60 +// 2*250*252/ (250^2+252^2) => .99999997 +// +// As a result in this tweaked version of the calculation in which the +// luminance is taken as percentage off from peak possible. +// +// 255 * 255 - (sum_s - sum_r) / count * (sum_s - sum_r) / count +// +double ssimv_similarity2(Ssimv *sv, int64_t n) { + // Scale the constants by number of pixels. + const int64_t c1 = (cc1 * n * n) >> 12; + const int64_t c2 = (cc2 * n * n) >> 12; + + const double mean_diff = (1.0 * sv->sum_s - sv->sum_r) / n; + const double l = (255 * 255 - mean_diff * mean_diff + c1) / (255 * 255 + c1); + + // Since these variables are unsigned, sums convert to double so + // math is done in double arithmetic. + const double v = (2.0 * n * sv->sum_sxr - 2 * sv->sum_s * sv->sum_r + c2) + / (n * sv->sum_sq_s - sv->sum_s * sv->sum_s + + n * sv->sum_sq_r - sv->sum_r * sv->sum_r + c2); + + return l * v; +} +void ssimv_parms(uint8_t *img1, int img1_pitch, uint8_t *img2, int img2_pitch, + Ssimv *sv) { + vp9_ssim_parms_8x8(img1, img1_pitch, img2, img2_pitch, + &sv->sum_s, &sv->sum_r, &sv->sum_sq_s, &sv->sum_sq_r, + &sv->sum_sxr); +} + +double vp9_get_ssim_metrics(uint8_t *img1, int img1_pitch, + uint8_t *img2, int img2_pitch, + int width, int height, + Ssimv *sv2, Metrics *m, + int do_inconsistency) { + double dssim_total = 0; + double ssim_total = 0; + double ssim2_total = 0; + double inconsistency_total = 0; + int i, j; + int c = 0; + double norm; + double old_ssim_total = 0; + vp9_clear_system_state(); + // We can sample points as frequently as we like start with 1 per 4x4. + for (i = 0; i < height; i += 4, + img1 += img1_pitch * 4, img2 += img2_pitch * 4) { + for (j = 0; j < width; j += 4, ++c) { + Ssimv sv = {0}; + double ssim; + double ssim2; + double dssim; + uint32_t var_new; + uint32_t var_old; + uint32_t mean_new; + uint32_t mean_old; + double ssim_new; + double ssim_old; + + // Not sure there's a great way to handle the edge pixels + // in ssim when using a window. Seems biased against edge pixels + // however you handle this. This uses only samples that are + // fully in the frame. + if (j + 8 <= width && i + 8 <= height) { + ssimv_parms(img1 + j, img1_pitch, img2 + j, img2_pitch, &sv); + } + + ssim = ssimv_similarity(&sv, 64); + ssim2 = ssimv_similarity2(&sv, 64); + + sv.ssim = ssim2; + + // dssim is calculated to use as an actual error metric and + // is scaled up to the same range as sum square error. + // Since we are subsampling every 16th point maybe this should be + // *16 ? + dssim = 255 * 255 * (1 - ssim2) / 2; + + // Here I introduce a new error metric: consistency-weighted + // SSIM-inconsistency. This metric isolates frames where the + // SSIM 'suddenly' changes, e.g. if one frame in every 8 is much + // sharper or blurrier than the others. Higher values indicate a + // temporally inconsistent SSIM. There are two ideas at work: + // + // 1) 'SSIM-inconsistency': the total inconsistency value + // reflects how much SSIM values are changing between this + // source / reference frame pair and the previous pair. + // + // 2) 'consistency-weighted': weights de-emphasize areas in the + // frame where the scene content has changed. Changes in scene + // content are detected via changes in local variance and local + // mean. + // + // Thus the overall measure reflects how inconsistent the SSIM + // values are, over consistent regions of the frame. + // + // The metric has three terms: + // + // term 1 -> uses change in scene Variance to weight error score + // 2 * var(Fi)*var(Fi-1) / (var(Fi)^2+var(Fi-1)^2) + // larger changes from one frame to the next mean we care + // less about consistency. + // + // term 2 -> uses change in local scene luminance to weight error + // 2 * avg(Fi)*avg(Fi-1) / (avg(Fi)^2+avg(Fi-1)^2) + // larger changes from one frame to the next mean we care + // less about consistency. + // + // term3 -> measures inconsistency in ssim scores between frames + // 1 - ( 2 * ssim(Fi)*ssim(Fi-1)/(ssim(Fi)^2+sssim(Fi-1)^2). + // + // This term compares the ssim score for the same location in 2 + // subsequent frames. + var_new = sv.sum_sq_s - sv.sum_s * sv.sum_s / 64; + var_old = sv2[c].sum_sq_s - sv2[c].sum_s * sv2[c].sum_s / 64; + mean_new = sv.sum_s; + mean_old = sv2[c].sum_s; + ssim_new = sv.ssim; + ssim_old = sv2[c].ssim; + + if (do_inconsistency) { + // We do the metric once for every 4x4 block in the image. Since + // we are scaling the error to SSE for use in a psnr calculation + // 1.0 = 4x4x255x255 the worst error we can possibly have. + static const double kScaling = 4. * 4 * 255 * 255; + + // The constants have to be non 0 to avoid potential divide by 0 + // issues other than that they affect kind of a weighting between + // the terms. No testing of what the right terms should be has been + // done. + static const double c1 = 1, c2 = 1, c3 = 1; + + // This measures how much consistent variance is in two consecutive + // source frames. 1.0 means they have exactly the same variance. + const double variance_term = (2.0 * var_old * var_new + c1) / + (1.0 * var_old * var_old + 1.0 * var_new * var_new + c1); + + // This measures how consistent the local mean are between two + // consecutive frames. 1.0 means they have exactly the same mean. + const double mean_term = (2.0 * mean_old * mean_new + c2) / + (1.0 * mean_old * mean_old + 1.0 * mean_new * mean_new + c2); + + // This measures how consistent the ssims of two + // consecutive frames is. 1.0 means they are exactly the same. + double ssim_term = pow((2.0 * ssim_old * ssim_new + c3) / + (ssim_old * ssim_old + ssim_new * ssim_new + c3), + 5); + + double this_inconsistency; + + // Floating point math sometimes makes this > 1 by a tiny bit. + // We want the metric to scale between 0 and 1.0 so we can convert + // it to an snr scaled value. + if (ssim_term > 1) + ssim_term = 1; + + // This converts the consistency metric to an inconsistency metric + // ( so we can scale it like psnr to something like sum square error. + // The reason for the variance and mean terms is the assumption that + // if there are big changes in the source we shouldn't penalize + // inconsistency in ssim scores a bit less as it will be less visible + // to the user. + this_inconsistency = (1 - ssim_term) * variance_term * mean_term; + + this_inconsistency *= kScaling; + inconsistency_total += this_inconsistency; + } + sv2[c] = sv; + ssim_total += ssim; + ssim2_total += ssim2; + dssim_total += dssim; + + old_ssim_total += ssim_old; + } + old_ssim_total += 0; + } + + norm = 1. / (width / 4) / (height / 4); + ssim_total *= norm; + ssim2_total *= norm; + m->ssim2 = ssim2_total; + m->ssim = ssim_total; + if (old_ssim_total == 0) + inconsistency_total = 0; + + m->ssimc = inconsistency_total; + + m->dssim = dssim_total; + return inconsistency_total; +} + + #if CONFIG_VP9_HIGHBITDEPTH double vp9_highbd_calc_ssim(YV12_BUFFER_CONFIG *source, YV12_BUFFER_CONFIG *dest, diff --git a/vp9/encoder/vp9_ssim.h b/vp9/encoder/vp9_ssim.h index ed1bb83de..10f14c4d2 100644 --- a/vp9/encoder/vp9_ssim.h +++ b/vp9/encoder/vp9_ssim.h @@ -17,6 +17,52 @@ extern "C" { #include "vpx_scale/yv12config.h" +// metrics used for calculating ssim, ssim2, dssim, and ssimc +typedef struct { + // source sum ( over 8x8 region ) + uint64_t sum_s; + + // reference sum (over 8x8 region ) + uint64_t sum_r; + + // source sum squared ( over 8x8 region ) + uint64_t sum_sq_s; + + // reference sum squared (over 8x8 region ) + uint64_t sum_sq_r; + + // sum of source times reference (over 8x8 region) + uint64_t sum_sxr; + + // calculated ssim score between source and reference + double ssim; +} Ssimv; + +// metrics collected on a frame basis +typedef struct { + // ssim consistency error metric ( see code for explanation ) + double ssimc; + + // standard ssim + double ssim; + + // revised ssim ( see code for explanation) + double ssim2; + + // ssim restated as an error metric like sse + double dssim; + + // dssim converted to decibels + double dssimd; + + // ssimc converted to decibels + double ssimcd; +} Metrics; + +double vp9_get_ssim_metrics(uint8_t *img1, int img1_pitch, uint8_t *img2, + int img2_pitch, int width, int height, Ssimv *sv2, + Metrics *m, int do_inconsistency); + double vp9_calc_ssim(YV12_BUFFER_CONFIG *source, YV12_BUFFER_CONFIG *dest, double *weight); -- 2.40.0