granicus.if.org Git - libvpx/blob - third_party/libyuv/source/compare.cc

   1 /*
   2  *  Copyright 2011 The LibYuv Project Authors. All rights reserved.
   3  *
   4  *  Use of this source code is governed by a BSD-style license
   5  *  that can be found in the LICENSE file in the root of the source
   6  *  tree. An additional intellectual property rights grant can be found
   7  *  in the file PATENTS. All contributing project authors may
   8  *  be found in the AUTHORS file in the root of the source tree.
   9  */
  10
  11 #include "libyuv/compare.h"
  12
  13 #include <float.h>
  14 #include <math.h>
  15 #ifdef _OPENMP
  16 #include <omp.h>
  17 #endif
  18
  19 #include "libyuv/basic_types.h"
  20 #include "libyuv/cpu_id.h"
  21 #include "libyuv/row.h"
  22 #include "libyuv/video_common.h"
  23
  24 #ifdef __cplusplus
  25 namespace libyuv {
  26 extern "C" {
  27 #endif
  28
  29 // hash seed of 5381 recommended.
  30 // Internal C version of HashDjb2 with int sized count for efficiency.
  31 uint32 HashDjb2_C(const uint8* src, int count, uint32 seed);
  32
  33 // This module is for Visual C x86
  34 #if !defined(LIBYUV_DISABLE_X86) && \
  35     (defined(_M_IX86) || \
  36     (defined(__x86_64__) || (defined(__i386__) && !defined(__pic__))))
  37 #define HAS_HASHDJB2_SSE41
  38 uint32 HashDjb2_SSE41(const uint8* src, int count, uint32 seed);
  39
  40 #if _MSC_VER >= 1700
  41 #define HAS_HASHDJB2_AVX2
  42 uint32 HashDjb2_AVX2(const uint8* src, int count, uint32 seed);
  43 #endif
  44
  45 #endif  // HAS_HASHDJB2_SSE41
  46
  47 // hash seed of 5381 recommended.
  48 LIBYUV_API
  49 uint32 HashDjb2(const uint8* src, uint64 count, uint32 seed) {
  50   const int kBlockSize = 1 << 15;  // 32768;
  51   int remainder;
  52   uint32 (*HashDjb2_SSE)(const uint8* src, int count, uint32 seed) = HashDjb2_C;
  53 #if defined(HAS_HASHDJB2_SSE41)
  54   if (TestCpuFlag(kCpuHasSSE41)) {
  55     HashDjb2_SSE = HashDjb2_SSE41;
  56   }
  57 #endif
  58 #if defined(HAS_HASHDJB2_AVX2)
  59   if (TestCpuFlag(kCpuHasAVX2)) {
  60     HashDjb2_SSE = HashDjb2_AVX2;
  61   }
  62 #endif
  63
  64   while (count >= (uint64)(kBlockSize)) {
  65     seed = HashDjb2_SSE(src, kBlockSize, seed);
  66     src += kBlockSize;
  67     count -= kBlockSize;
  68   }
  69   remainder = (int)(count) & ~15;
  70   if (remainder) {
  71     seed = HashDjb2_SSE(src, remainder, seed);
  72     src += remainder;
  73     count -= remainder;
  74   }
  75   remainder = (int)(count) & 15;
  76   if (remainder) {
  77     seed = HashDjb2_C(src, remainder, seed);
  78   }
  79   return seed;
  80 }
  81
  82 static uint32 ARGBDetectRow_C(const uint8* argb, int width) {
  83   int x;
  84   for (x = 0; x < width - 1; x += 2) {
  85     if (argb[0] != 255) {  // First byte is not Alpha of 255, so not ARGB.
  86       return FOURCC_BGRA;
  87     }
  88     if (argb[3] != 255) {  // 4th byte is not Alpha of 255, so not BGRA.
  89       return FOURCC_ARGB;
  90     }
  91     if (argb[4] != 255) {  // Second pixel first byte is not Alpha of 255.
  92       return FOURCC_BGRA;
  93     }
  94     if (argb[7] != 255) {  // Second pixel 4th byte is not Alpha of 255.
  95       return FOURCC_ARGB;
  96     }
  97     argb += 8;
  98   }
  99   if (width & 1) {
 100     if (argb[0] != 255) {  // First byte is not Alpha of 255, so not ARGB.
 101       return FOURCC_BGRA;
 102     }
 103     if (argb[3] != 255) {  // 4th byte is not Alpha of 255, so not BGRA.
 104       return FOURCC_ARGB;
 105     }
 106   }
 107   return 0;
 108 }
 109
 110 // Scan an opaque argb image and return fourcc based on alpha offset.
 111 // Returns FOURCC_ARGB, FOURCC_BGRA, or 0 if unknown.
 112 LIBYUV_API
 113 uint32 ARGBDetect(const uint8* argb, int stride_argb, int width, int height) {
 114   uint32 fourcc = 0;
 115   int h;
 116
 117   // Coalesce rows.
 118   if (stride_argb == width * 4) {
 119     width *= height;
 120     height = 1;
 121     stride_argb = 0;
 122   }
 123   for (h = 0; h < height && fourcc == 0; ++h) {
 124     fourcc = ARGBDetectRow_C(argb, width);
 125     argb += stride_argb;
 126   }
 127   return fourcc;
 128 }
 129
 130 uint32 SumSquareError_C(const uint8* src_a, const uint8* src_b, int count);
 131 #if !defined(LIBYUV_DISABLE_NEON) && \
 132     (defined(__ARM_NEON__) || defined(LIBYUV_NEON) || defined(__aarch64__))
 133 #define HAS_SUMSQUAREERROR_NEON
 134 uint32 SumSquareError_NEON(const uint8* src_a, const uint8* src_b, int count);
 135 #endif
 136 #if !defined(LIBYUV_DISABLE_X86) && \
 137     (defined(_M_IX86) || defined(__x86_64__) || defined(__i386__))
 138 #define HAS_SUMSQUAREERROR_SSE2
 139 uint32 SumSquareError_SSE2(const uint8* src_a, const uint8* src_b, int count);
 140 #endif
 141 // Visual C 2012 required for AVX2.
 142 #if !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86) && _MSC_VER >= 1700
 143 #define HAS_SUMSQUAREERROR_AVX2
 144 uint32 SumSquareError_AVX2(const uint8* src_a, const uint8* src_b, int count);
 145 #endif
 146
 147 // TODO(fbarchard): Refactor into row function.
 148 LIBYUV_API
 149 uint64 ComputeSumSquareError(const uint8* src_a, const uint8* src_b,
 150                              int count) {
 151   // SumSquareError returns values 0 to 65535 for each squared difference.
 152   // Up to 65536 of those can be summed and remain within a uint32.
 153   // After each block of 65536 pixels, accumulate into a uint64.
 154   const int kBlockSize = 65536;
 155   int remainder = count & (kBlockSize - 1) & ~31;
 156   uint64 sse = 0;
 157   int i;
 158   uint32 (*SumSquareError)(const uint8* src_a, const uint8* src_b, int count) =
 159       SumSquareError_C;
 160 #if defined(HAS_SUMSQUAREERROR_NEON)
 161   if (TestCpuFlag(kCpuHasNEON)) {
 162     SumSquareError = SumSquareError_NEON;
 163   }
 164 #endif
 165 #if defined(HAS_SUMSQUAREERROR_SSE2)
 166   if (TestCpuFlag(kCpuHasSSE2)) {
 167     // Note only used for multiples of 16 so count is not checked.
 168     SumSquareError = SumSquareError_SSE2;
 169   }
 170 #endif
 171 #if defined(HAS_SUMSQUAREERROR_AVX2)
 172   if (TestCpuFlag(kCpuHasAVX2)) {
 173     // Note only used for multiples of 32 so count is not checked.
 174     SumSquareError = SumSquareError_AVX2;
 175   }
 176 #endif
 177 #ifdef _OPENMP
 178 #pragma omp parallel for reduction(+: sse)
 179 #endif
 180   for (i = 0; i < (count - (kBlockSize - 1)); i += kBlockSize) {
 181     sse += SumSquareError(src_a + i, src_b + i, kBlockSize);
 182   }
 183   src_a += count & ~(kBlockSize - 1);
 184   src_b += count & ~(kBlockSize - 1);
 185   if (remainder) {
 186     sse += SumSquareError(src_a, src_b, remainder);
 187     src_a += remainder;
 188     src_b += remainder;
 189   }
 190   remainder = count & 31;
 191   if (remainder) {
 192     sse += SumSquareError_C(src_a, src_b, remainder);
 193   }
 194   return sse;
 195 }
 196
 197 LIBYUV_API
 198 uint64 ComputeSumSquareErrorPlane(const uint8* src_a, int stride_a,
 199                                   const uint8* src_b, int stride_b,
 200                                   int width, int height) {
 201   uint64 sse = 0;
 202   int h;
 203   // Coalesce rows.
 204   if (stride_a == width &&
 205       stride_b == width) {
 206     width *= height;
 207     height = 1;
 208     stride_a = stride_b = 0;
 209   }
 210   for (h = 0; h < height; ++h) {
 211     sse += ComputeSumSquareError(src_a, src_b, width);
 212     src_a += stride_a;
 213     src_b += stride_b;
 214   }
 215   return sse;
 216 }
 217
 218 LIBYUV_API
 219 double SumSquareErrorToPsnr(uint64 sse, uint64 count) {
 220   double psnr;
 221   if (sse > 0) {
 222     double mse = (double)(count) / (double)(sse);
 223     psnr = 10.0 * log10(255.0 * 255.0 * mse);
 224   } else {
 225     psnr = kMaxPsnr;      // Limit to prevent divide by 0
 226   }
 227
 228   if (psnr > kMaxPsnr)
 229     psnr = kMaxPsnr;
 230
 231   return psnr;
 232 }
 233
 234 LIBYUV_API
 235 double CalcFramePsnr(const uint8* src_a, int stride_a,
 236                      const uint8* src_b, int stride_b,
 237                      int width, int height) {
 238   const uint64 samples = width * height;
 239   const uint64 sse = ComputeSumSquareErrorPlane(src_a, stride_a,
 240                                                 src_b, stride_b,
 241                                                 width, height);
 242   return SumSquareErrorToPsnr(sse, samples);
 243 }
 244
 245 LIBYUV_API
 246 double I420Psnr(const uint8* src_y_a, int stride_y_a,
 247                 const uint8* src_u_a, int stride_u_a,
 248                 const uint8* src_v_a, int stride_v_a,
 249                 const uint8* src_y_b, int stride_y_b,
 250                 const uint8* src_u_b, int stride_u_b,
 251                 const uint8* src_v_b, int stride_v_b,
 252                 int width, int height) {
 253   const uint64 sse_y = ComputeSumSquareErrorPlane(src_y_a, stride_y_a,
 254                                                   src_y_b, stride_y_b,
 255                                                   width, height);
 256   const int width_uv = (width + 1) >> 1;
 257   const int height_uv = (height + 1) >> 1;
 258   const uint64 sse_u = ComputeSumSquareErrorPlane(src_u_a, stride_u_a,
 259                                                   src_u_b, stride_u_b,
 260                                                   width_uv, height_uv);
 261   const uint64 sse_v = ComputeSumSquareErrorPlane(src_v_a, stride_v_a,
 262                                                   src_v_b, stride_v_b,
 263                                                   width_uv, height_uv);
 264   const uint64 samples = width * height + 2 * (width_uv * height_uv);
 265   const uint64 sse = sse_y + sse_u + sse_v;
 266   return SumSquareErrorToPsnr(sse, samples);
 267 }
 268
 269 static const int64 cc1 =  26634;  // (64^2*(.01*255)^2
 270 static const int64 cc2 = 239708;  // (64^2*(.03*255)^2
 271
 272 static double Ssim8x8_C(const uint8* src_a, int stride_a,
 273                         const uint8* src_b, int stride_b) {
 274   int64 sum_a = 0;
 275   int64 sum_b = 0;
 276   int64 sum_sq_a = 0;
 277   int64 sum_sq_b = 0;
 278   int64 sum_axb = 0;
 279
 280   int i;
 281   for (i = 0; i < 8; ++i) {
 282     int j;
 283     for (j = 0; j < 8; ++j) {
 284       sum_a += src_a[j];
 285       sum_b += src_b[j];
 286       sum_sq_a += src_a[j] * src_a[j];
 287       sum_sq_b += src_b[j] * src_b[j];
 288       sum_axb += src_a[j] * src_b[j];
 289     }
 290
 291     src_a += stride_a;
 292     src_b += stride_b;
 293   }
 294
 295   {
 296     const int64 count = 64;
 297     // scale the constants by number of pixels
 298     const int64 c1 = (cc1 * count * count) >> 12;
 299     const int64 c2 = (cc2 * count * count) >> 12;
 300
 301     const int64 sum_a_x_sum_b = sum_a * sum_b;
 302
 303     const int64 ssim_n = (2 * sum_a_x_sum_b + c1) *
 304                          (2 * count * sum_axb - 2 * sum_a_x_sum_b + c2);
 305
 306     const int64 sum_a_sq = sum_a*sum_a;
 307     const int64 sum_b_sq = sum_b*sum_b;
 308
 309     const int64 ssim_d = (sum_a_sq + sum_b_sq + c1) *
 310                          (count * sum_sq_a - sum_a_sq +
 311                           count * sum_sq_b - sum_b_sq + c2);
 312
 313     if (ssim_d == 0.0) {
 314       return DBL_MAX;
 315     }
 316     return ssim_n * 1.0 / ssim_d;
 317   }
 318 }
 319
 320 // We are using a 8x8 moving window with starting location of each 8x8 window
 321 // on the 4x4 pixel grid. Such arrangement allows the windows to overlap
 322 // block boundaries to penalize blocking artifacts.
 323 LIBYUV_API
 324 double CalcFrameSsim(const uint8* src_a, int stride_a,
 325                      const uint8* src_b, int stride_b,
 326                      int width, int height) {
 327   int samples = 0;
 328   double ssim_total = 0;
 329   double (*Ssim8x8)(const uint8* src_a, int stride_a,
 330                     const uint8* src_b, int stride_b) = Ssim8x8_C;
 331
 332   // sample point start with each 4x4 location
 333   int i;
 334   for (i = 0; i < height - 8; i += 4) {
 335     int j;
 336     for (j = 0; j < width - 8; j += 4) {
 337       ssim_total += Ssim8x8(src_a + j, stride_a, src_b + j, stride_b);
 338       samples++;
 339     }
 340
 341     src_a += stride_a * 4;
 342     src_b += stride_b * 4;
 343   }
 344
 345   ssim_total /= samples;
 346   return ssim_total;
 347 }
 348
 349 LIBYUV_API
 350 double I420Ssim(const uint8* src_y_a, int stride_y_a,
 351                 const uint8* src_u_a, int stride_u_a,
 352                 const uint8* src_v_a, int stride_v_a,
 353                 const uint8* src_y_b, int stride_y_b,
 354                 const uint8* src_u_b, int stride_u_b,
 355                 const uint8* src_v_b, int stride_v_b,
 356                 int width, int height) {
 357   const double ssim_y = CalcFrameSsim(src_y_a, stride_y_a,
 358                                       src_y_b, stride_y_b, width, height);
 359   const int width_uv = (width + 1) >> 1;
 360   const int height_uv = (height + 1) >> 1;
 361   const double ssim_u = CalcFrameSsim(src_u_a, stride_u_a,
 362                                       src_u_b, stride_u_b,
 363                                       width_uv, height_uv);
 364   const double ssim_v = CalcFrameSsim(src_v_a, stride_v_a,
 365                                       src_v_b, stride_v_b,
 366                                       width_uv, height_uv);
 367   return ssim_y * 0.8 + 0.1 * (ssim_u + ssim_v);
 368 }
 369
 370 #ifdef __cplusplus
 371 }  // extern "C"
 372 }  // namespace libyuv
 373 #endif