granicus.if.org Git - libvpx/blob - vpx_dsp/variance.c

   1 /*
   2  *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
   3  *
   4  *  Use of this source code is governed by a BSD-style license
   5  *  that can be found in the LICENSE file in the root of the source
   6  *  tree. An additional intellectual property rights grant can be found
   7  *  in the file PATENTS.  All contributing project authors may
   8  *  be found in the AUTHORS file in the root of the source tree.
   9  */
  10
  11 #include <assert.h>
  12
  13 #include "./vpx_config.h"
  14 #include "./vpx_dsp_rtcd.h"
  15
  16 #include "vpx_ports/mem.h"
  17 #include "vpx/vpx_integer.h"
  18
  19 #include "vpx_dsp/variance.h"
  20
  21 static const uint8_t bilinear_filters[8][2] = {
  22   { 128, 0 }, { 112, 16 }, { 96, 32 }, { 80, 48 },
  23   { 64, 64 }, { 48, 80 },  { 32, 96 }, { 16, 112 },
  24 };
  25
  26 uint32_t vpx_get4x4sse_cs_c(const uint8_t *a, int a_stride, const uint8_t *b,
  27                             int b_stride) {
  28   int distortion = 0;
  29   int r, c;
  30
  31   for (r = 0; r < 4; ++r) {
  32     for (c = 0; c < 4; ++c) {
  33       int diff = a[c] - b[c];
  34       distortion += diff * diff;
  35     }
  36
  37     a += a_stride;
  38     b += b_stride;
  39   }
  40
  41   return distortion;
  42 }
  43
  44 uint32_t vpx_get_mb_ss_c(const int16_t *a) {
  45   unsigned int i, sum = 0;
  46
  47   for (i = 0; i < 256; ++i) {
  48     sum += a[i] * a[i];
  49   }
  50
  51   return sum;
  52 }
  53
  54 static void variance(const uint8_t *a, int a_stride, const uint8_t *b,
  55                      int b_stride, int w, int h, uint32_t *sse, int *sum) {
  56   int i, j;
  57
  58   *sum = 0;
  59   *sse = 0;
  60
  61   for (i = 0; i < h; ++i) {
  62     for (j = 0; j < w; ++j) {
  63       const int diff = a[j] - b[j];
  64       *sum += diff;
  65       *sse += diff * diff;
  66     }
  67
  68     a += a_stride;
  69     b += b_stride;
  70   }
  71 }
  72
  73 // Applies a 1-D 2-tap bilinear filter to the source block in either horizontal
  74 // or vertical direction to produce the filtered output block. Used to implement
  75 // the first-pass of 2-D separable filter.
  76 //
  77 // Produces int16_t output to retain precision for the next pass. Two filter
  78 // taps should sum to FILTER_WEIGHT. pixel_step defines whether the filter is
  79 // applied horizontally (pixel_step = 1) or vertically (pixel_step = stride).
  80 // It defines the offset required to move from one input to the next.
  81 static void var_filter_block2d_bil_first_pass(const uint8_t *a, uint16_t *b,
  82                                               unsigned int src_pixels_per_line,
  83                                               int pixel_step,
  84                                               unsigned int output_height,
  85                                               unsigned int output_width,
  86                                               const uint8_t *filter) {
  87   unsigned int i, j;
  88
  89   for (i = 0; i < output_height; ++i) {
  90     for (j = 0; j < output_width; ++j) {
  91       b[j] = ROUND_POWER_OF_TWO(
  92           (int)a[0] * filter[0] + (int)a[pixel_step] * filter[1], FILTER_BITS);
  93
  94       ++a;
  95     }
  96
  97     a += src_pixels_per_line - output_width;
  98     b += output_width;
  99   }
 100 }
 101
 102 // Applies a 1-D 2-tap bilinear filter to the source block in either horizontal
 103 // or vertical direction to produce the filtered output block. Used to implement
 104 // the second-pass of 2-D separable filter.
 105 //
 106 // Requires 16-bit input as produced by filter_block2d_bil_first_pass. Two
 107 // filter taps should sum to FILTER_WEIGHT. pixel_step defines whether the
 108 // filter is applied horizontally (pixel_step = 1) or vertically
 109 // (pixel_step = stride). It defines the offset required to move from one input
 110 // to the next. Output is 8-bit.
 111 static void var_filter_block2d_bil_second_pass(const uint16_t *a, uint8_t *b,
 112                                                unsigned int src_pixels_per_line,
 113                                                unsigned int pixel_step,
 114                                                unsigned int output_height,
 115                                                unsigned int output_width,
 116                                                const uint8_t *filter) {
 117   unsigned int i, j;
 118
 119   for (i = 0; i < output_height; ++i) {
 120     for (j = 0; j < output_width; ++j) {
 121       b[j] = ROUND_POWER_OF_TWO(
 122           (int)a[0] * filter[0] + (int)a[pixel_step] * filter[1], FILTER_BITS);
 123       ++a;
 124     }
 125
 126     a += src_pixels_per_line - output_width;
 127     b += output_width;
 128   }
 129 }
 130
 131 #define VAR(W, H)                                                    \
 132   uint32_t vpx_variance##W##x##H##_c(const uint8_t *a, int a_stride, \
 133                                      const uint8_t *b, int b_stride, \
 134                                      uint32_t *sse) {                \
 135     int sum;                                                         \
 136     variance(a, a_stride, b, b_stride, W, H, sse, &sum);             \
 137     return *sse - (uint32_t)(((int64_t)sum * sum) / (W * H));        \
 138   }
 139
 140 #define SUBPIX_VAR(W, H)                                                \
 141   uint32_t vpx_sub_pixel_variance##W##x##H##_c(                         \
 142       const uint8_t *a, int a_stride, int xoffset, int yoffset,         \
 143       const uint8_t *b, int b_stride, uint32_t *sse) {                  \
 144     uint16_t fdata3[(H + 1) * W];                                       \
 145     uint8_t temp2[H * W];                                               \
 146                                                                         \
 147     var_filter_block2d_bil_first_pass(a, fdata3, a_stride, 1, H + 1, W, \
 148                                       bilinear_filters[xoffset]);       \
 149     var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W,       \
 150                                        bilinear_filters[yoffset]);      \
 151                                                                         \
 152     return vpx_variance##W##x##H##_c(temp2, W, b, b_stride, sse);       \
 153   }
 154
 155 #define SUBPIX_AVG_VAR(W, H)                                            \
 156   uint32_t vpx_sub_pixel_avg_variance##W##x##H##_c(                     \
 157       const uint8_t *a, int a_stride, int xoffset, int yoffset,         \
 158       const uint8_t *b, int b_stride, uint32_t *sse,                    \
 159       const uint8_t *second_pred) {                                     \
 160     uint16_t fdata3[(H + 1) * W];                                       \
 161     uint8_t temp2[H * W];                                               \
 162     DECLARE_ALIGNED(16, uint8_t, temp3[H * W]);                         \
 163                                                                         \
 164     var_filter_block2d_bil_first_pass(a, fdata3, a_stride, 1, H + 1, W, \
 165                                       bilinear_filters[xoffset]);       \
 166     var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W,       \
 167                                        bilinear_filters[yoffset]);      \
 168                                                                         \
 169     vpx_comp_avg_pred(temp3, second_pred, W, H, temp2, W);              \
 170                                                                         \
 171     return vpx_variance##W##x##H##_c(temp3, W, b, b_stride, sse);       \
 172   }
 173
 174 /* Identical to the variance call except it takes an additional parameter, sum,
 175  * and returns that value using pass-by-reference instead of returning
 176  * sse - sum^2 / w*h
 177  */
 178 #define GET_VAR(W, H)                                                         \
 179   void vpx_get##W##x##H##var_c(const uint8_t *a, int a_stride,                \
 180                                const uint8_t *b, int b_stride, uint32_t *sse, \
 181                                int *sum) {                                    \
 182     variance(a, a_stride, b, b_stride, W, H, sse, sum);                       \
 183   }
 184
 185 /* Identical to the variance call except it does not calculate the
 186  * sse - sum^2 / w*h and returns sse in addtion to modifying the passed in
 187  * variable.
 188  */
 189 #define MSE(W, H)                                               \
 190   uint32_t vpx_mse##W##x##H##_c(const uint8_t *a, int a_stride, \
 191                                 const uint8_t *b, int b_stride, \
 192                                 uint32_t *sse) {                \
 193     int sum;                                                    \
 194     variance(a, a_stride, b, b_stride, W, H, sse, &sum);        \
 195     return *sse;                                                \
 196   }
 197
 198 /* All three forms of the variance are available in the same sizes. */
 199 #define VARIANCES(W, H) \
 200   VAR(W, H)             \
 201   SUBPIX_VAR(W, H)      \
 202   SUBPIX_AVG_VAR(W, H)
 203
 204 VARIANCES(64, 64)
 205 VARIANCES(64, 32)
 206 VARIANCES(32, 64)
 207 VARIANCES(32, 32)
 208 VARIANCES(32, 16)
 209 VARIANCES(16, 32)
 210 VARIANCES(16, 16)
 211 VARIANCES(16, 8)
 212 VARIANCES(8, 16)
 213 VARIANCES(8, 8)
 214 VARIANCES(8, 4)
 215 VARIANCES(4, 8)
 216 VARIANCES(4, 4)
 217
 218 GET_VAR(16, 16)
 219 GET_VAR(8, 8)
 220
 221 MSE(16, 16)
 222 MSE(16, 8)
 223 MSE(8, 16)
 224 MSE(8, 8)
 225
 226 void vpx_comp_avg_pred_c(uint8_t *comp_pred, const uint8_t *pred, int width,
 227                          int height, const uint8_t *ref, int ref_stride) {
 228   int i, j;
 229   /* comp_pred and pred must be 16 byte aligned. */
 230   assert(((intptr_t)comp_pred & 0xf) == 0);
 231   assert(((intptr_t)pred & 0xf) == 0);
 232
 233   for (i = 0; i < height; ++i) {
 234     for (j = 0; j < width; ++j) {
 235       const int tmp = pred[j] + ref[j];
 236       comp_pred[j] = ROUND_POWER_OF_TWO(tmp, 1);
 237     }
 238     comp_pred += width;
 239     pred += width;
 240     ref += ref_stride;
 241   }
 242 }
 243
 244 #if CONFIG_VP9_HIGHBITDEPTH
 245 static void highbd_variance64(const uint8_t *a8, int a_stride,
 246                               const uint8_t *b8, int b_stride, int w, int h,
 247                               uint64_t *sse, int64_t *sum) {
 248   int i, j;
 249
 250   uint16_t *a = CONVERT_TO_SHORTPTR(a8);
 251   uint16_t *b = CONVERT_TO_SHORTPTR(b8);
 252   *sum = 0;
 253   *sse = 0;
 254
 255   for (i = 0; i < h; ++i) {
 256     for (j = 0; j < w; ++j) {
 257       const int diff = a[j] - b[j];
 258       *sum += diff;
 259       *sse += diff * diff;
 260     }
 261     a += a_stride;
 262     b += b_stride;
 263   }
 264 }
 265
 266 static void highbd_8_variance(const uint8_t *a8, int a_stride,
 267                               const uint8_t *b8, int b_stride, int w, int h,
 268                               uint32_t *sse, int *sum) {
 269   uint64_t sse_long = 0;
 270   int64_t sum_long = 0;
 271   highbd_variance64(a8, a_stride, b8, b_stride, w, h, &sse_long, &sum_long);
 272   *sse = (uint32_t)sse_long;
 273   *sum = (int)sum_long;
 274 }
 275
 276 static void highbd_10_variance(const uint8_t *a8, int a_stride,
 277                                const uint8_t *b8, int b_stride, int w, int h,
 278                                uint32_t *sse, int *sum) {
 279   uint64_t sse_long = 0;
 280   int64_t sum_long = 0;
 281   highbd_variance64(a8, a_stride, b8, b_stride, w, h, &sse_long, &sum_long);
 282   *sse = (uint32_t)ROUND_POWER_OF_TWO(sse_long, 4);
 283   *sum = (int)ROUND_POWER_OF_TWO(sum_long, 2);
 284 }
 285
 286 static void highbd_12_variance(const uint8_t *a8, int a_stride,
 287                                const uint8_t *b8, int b_stride, int w, int h,
 288                                uint32_t *sse, int *sum) {
 289   uint64_t sse_long = 0;
 290   int64_t sum_long = 0;
 291   highbd_variance64(a8, a_stride, b8, b_stride, w, h, &sse_long, &sum_long);
 292   *sse = (uint32_t)ROUND_POWER_OF_TWO(sse_long, 8);
 293   *sum = (int)ROUND_POWER_OF_TWO(sum_long, 4);
 294 }
 295
 296 #define HIGHBD_VAR(W, H)                                                       \
 297   uint32_t vpx_highbd_8_variance##W##x##H##_c(const uint8_t *a, int a_stride,  \
 298                                               const uint8_t *b, int b_stride,  \
 299                                               uint32_t *sse) {                 \
 300     int sum;                                                                   \
 301     highbd_8_variance(a, a_stride, b, b_stride, W, H, sse, &sum);              \
 302     return *sse - (uint32_t)(((int64_t)sum * sum) / (W * H));                  \
 303   }                                                                            \
 304                                                                                \
 305   uint32_t vpx_highbd_10_variance##W##x##H##_c(const uint8_t *a, int a_stride, \
 306                                                const uint8_t *b, int b_stride, \
 307                                                uint32_t *sse) {                \
 308     int sum;                                                                   \
 309     int64_t var;                                                               \
 310     highbd_10_variance(a, a_stride, b, b_stride, W, H, sse, &sum);             \
 311     var = (int64_t)(*sse) - (((int64_t)sum * sum) / (W * H));                  \
 312     return (var >= 0) ? (uint32_t)var : 0;                                     \
 313   }                                                                            \
 314                                                                                \
 315   uint32_t vpx_highbd_12_variance##W##x##H##_c(const uint8_t *a, int a_stride, \
 316                                                const uint8_t *b, int b_stride, \
 317                                                uint32_t *sse) {                \
 318     int sum;                                                                   \
 319     int64_t var;                                                               \
 320     highbd_12_variance(a, a_stride, b, b_stride, W, H, sse, &sum);             \
 321     var = (int64_t)(*sse) - (((int64_t)sum * sum) / (W * H));                  \
 322     return (var >= 0) ? (uint32_t)var : 0;                                     \
 323   }
 324
 325 #define HIGHBD_GET_VAR(S)                                                    \
 326   void vpx_highbd_8_get##S##x##S##var_c(const uint8_t *src, int src_stride,  \
 327                                         const uint8_t *ref, int ref_stride,  \
 328                                         uint32_t *sse, int *sum) {           \
 329     highbd_8_variance(src, src_stride, ref, ref_stride, S, S, sse, sum);     \
 330   }                                                                          \
 331                                                                              \
 332   void vpx_highbd_10_get##S##x##S##var_c(const uint8_t *src, int src_stride, \
 333                                          const uint8_t *ref, int ref_stride, \
 334                                          uint32_t *sse, int *sum) {          \
 335     highbd_10_variance(src, src_stride, ref, ref_stride, S, S, sse, sum);    \
 336   }                                                                          \
 337                                                                              \
 338   void vpx_highbd_12_get##S##x##S##var_c(const uint8_t *src, int src_stride, \
 339                                          const uint8_t *ref, int ref_stride, \
 340                                          uint32_t *sse, int *sum) {          \
 341     highbd_12_variance(src, src_stride, ref, ref_stride, S, S, sse, sum);    \
 342   }
 343
 344 #define HIGHBD_MSE(W, H)                                                      \
 345   uint32_t vpx_highbd_8_mse##W##x##H##_c(const uint8_t *src, int src_stride,  \
 346                                          const uint8_t *ref, int ref_stride,  \
 347                                          uint32_t *sse) {                     \
 348     int sum;                                                                  \
 349     highbd_8_variance(src, src_stride, ref, ref_stride, W, H, sse, &sum);     \
 350     return *sse;                                                              \
 351   }                                                                           \
 352                                                                               \
 353   uint32_t vpx_highbd_10_mse##W##x##H##_c(const uint8_t *src, int src_stride, \
 354                                           const uint8_t *ref, int ref_stride, \
 355                                           uint32_t *sse) {                    \
 356     int sum;                                                                  \
 357     highbd_10_variance(src, src_stride, ref, ref_stride, W, H, sse, &sum);    \
 358     return *sse;                                                              \
 359   }                                                                           \
 360                                                                               \
 361   uint32_t vpx_highbd_12_mse##W##x##H##_c(const uint8_t *src, int src_stride, \
 362                                           const uint8_t *ref, int ref_stride, \
 363                                           uint32_t *sse) {                    \
 364     int sum;                                                                  \
 365     highbd_12_variance(src, src_stride, ref, ref_stride, W, H, sse, &sum);    \
 366     return *sse;                                                              \
 367   }
 368
 369 static void highbd_var_filter_block2d_bil_first_pass(
 370     const uint8_t *src_ptr8, uint16_t *output_ptr,
 371     unsigned int src_pixels_per_line, int pixel_step,
 372     unsigned int output_height, unsigned int output_width,
 373     const uint8_t *filter) {
 374   unsigned int i, j;
 375   uint16_t *src_ptr = CONVERT_TO_SHORTPTR(src_ptr8);
 376   for (i = 0; i < output_height; ++i) {
 377     for (j = 0; j < output_width; ++j) {
 378       output_ptr[j] = ROUND_POWER_OF_TWO(
 379           (int)src_ptr[0] * filter[0] + (int)src_ptr[pixel_step] * filter[1],
 380           FILTER_BITS);
 381
 382       ++src_ptr;
 383     }
 384
 385     // Next row...
 386     src_ptr += src_pixels_per_line - output_width;
 387     output_ptr += output_width;
 388   }
 389 }
 390
 391 static void highbd_var_filter_block2d_bil_second_pass(
 392     const uint16_t *src_ptr, uint16_t *output_ptr,
 393     unsigned int src_pixels_per_line, unsigned int pixel_step,
 394     unsigned int output_height, unsigned int output_width,
 395     const uint8_t *filter) {
 396   unsigned int i, j;
 397
 398   for (i = 0; i < output_height; ++i) {
 399     for (j = 0; j < output_width; ++j) {
 400       output_ptr[j] = ROUND_POWER_OF_TWO(
 401           (int)src_ptr[0] * filter[0] + (int)src_ptr[pixel_step] * filter[1],
 402           FILTER_BITS);
 403       ++src_ptr;
 404     }
 405
 406     src_ptr += src_pixels_per_line - output_width;
 407     output_ptr += output_width;
 408   }
 409 }
 410
 411 #define HIGHBD_SUBPIX_VAR(W, H)                                              \
 412   uint32_t vpx_highbd_8_sub_pixel_variance##W##x##H##_c(                     \
 413       const uint8_t *src, int src_stride, int xoffset, int yoffset,          \
 414       const uint8_t *dst, int dst_stride, uint32_t *sse) {                   \
 415     uint16_t fdata3[(H + 1) * W];                                            \
 416     uint16_t temp2[H * W];                                                   \
 417                                                                              \
 418     highbd_var_filter_block2d_bil_first_pass(                                \
 419         src, fdata3, src_stride, 1, H + 1, W, bilinear_filters[xoffset]);    \
 420     highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W,     \
 421                                               bilinear_filters[yoffset]);    \
 422                                                                              \
 423     return vpx_highbd_8_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp2), W,  \
 424                                               dst, dst_stride, sse);         \
 425   }                                                                          \
 426                                                                              \
 427   uint32_t vpx_highbd_10_sub_pixel_variance##W##x##H##_c(                    \
 428       const uint8_t *src, int src_stride, int xoffset, int yoffset,          \
 429       const uint8_t *dst, int dst_stride, uint32_t *sse) {                   \
 430     uint16_t fdata3[(H + 1) * W];                                            \
 431     uint16_t temp2[H * W];                                                   \
 432                                                                              \
 433     highbd_var_filter_block2d_bil_first_pass(                                \
 434         src, fdata3, src_stride, 1, H + 1, W, bilinear_filters[xoffset]);    \
 435     highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W,     \
 436                                               bilinear_filters[yoffset]);    \
 437                                                                              \
 438     return vpx_highbd_10_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp2), W, \
 439                                                dst, dst_stride, sse);        \
 440   }                                                                          \
 441                                                                              \
 442   uint32_t vpx_highbd_12_sub_pixel_variance##W##x##H##_c(                    \
 443       const uint8_t *src, int src_stride, int xoffset, int yoffset,          \
 444       const uint8_t *dst, int dst_stride, uint32_t *sse) {                   \
 445     uint16_t fdata3[(H + 1) * W];                                            \
 446     uint16_t temp2[H * W];                                                   \
 447                                                                              \
 448     highbd_var_filter_block2d_bil_first_pass(                                \
 449         src, fdata3, src_stride, 1, H + 1, W, bilinear_filters[xoffset]);    \
 450     highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W,     \
 451                                               bilinear_filters[yoffset]);    \
 452                                                                              \
 453     return vpx_highbd_12_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp2), W, \
 454                                                dst, dst_stride, sse);        \
 455   }
 456
 457 #define HIGHBD_SUBPIX_AVG_VAR(W, H)                                          \
 458   uint32_t vpx_highbd_8_sub_pixel_avg_variance##W##x##H##_c(                 \
 459       const uint8_t *src, int src_stride, int xoffset, int yoffset,          \
 460       const uint8_t *dst, int dst_stride, uint32_t *sse,                     \
 461       const uint8_t *second_pred) {                                          \
 462     uint16_t fdata3[(H + 1) * W];                                            \
 463     uint16_t temp2[H * W];                                                   \
 464     DECLARE_ALIGNED(16, uint16_t, temp3[H * W]);                             \
 465                                                                              \
 466     highbd_var_filter_block2d_bil_first_pass(                                \
 467         src, fdata3, src_stride, 1, H + 1, W, bilinear_filters[xoffset]);    \
 468     highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W,     \
 469                                               bilinear_filters[yoffset]);    \
 470                                                                              \
 471     vpx_highbd_comp_avg_pred(temp3, second_pred, W, H,                       \
 472                              CONVERT_TO_BYTEPTR(temp2), W);                  \
 473                                                                              \
 474     return vpx_highbd_8_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp3), W,  \
 475                                               dst, dst_stride, sse);         \
 476   }                                                                          \
 477                                                                              \
 478   uint32_t vpx_highbd_10_sub_pixel_avg_variance##W##x##H##_c(                \
 479       const uint8_t *src, int src_stride, int xoffset, int yoffset,          \
 480       const uint8_t *dst, int dst_stride, uint32_t *sse,                     \
 481       const uint8_t *second_pred) {                                          \
 482     uint16_t fdata3[(H + 1) * W];                                            \
 483     uint16_t temp2[H * W];                                                   \
 484     DECLARE_ALIGNED(16, uint16_t, temp3[H * W]);                             \
 485                                                                              \
 486     highbd_var_filter_block2d_bil_first_pass(                                \
 487         src, fdata3, src_stride, 1, H + 1, W, bilinear_filters[xoffset]);    \
 488     highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W,     \
 489                                               bilinear_filters[yoffset]);    \
 490                                                                              \
 491     vpx_highbd_comp_avg_pred(temp3, second_pred, W, H,                       \
 492                              CONVERT_TO_BYTEPTR(temp2), W);                  \
 493                                                                              \
 494     return vpx_highbd_10_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp3), W, \
 495                                                dst, dst_stride, sse);        \
 496   }                                                                          \
 497                                                                              \
 498   uint32_t vpx_highbd_12_sub_pixel_avg_variance##W##x##H##_c(                \
 499       const uint8_t *src, int src_stride, int xoffset, int yoffset,          \
 500       const uint8_t *dst, int dst_stride, uint32_t *sse,                     \
 501       const uint8_t *second_pred) {                                          \
 502     uint16_t fdata3[(H + 1) * W];                                            \
 503     uint16_t temp2[H * W];                                                   \
 504     DECLARE_ALIGNED(16, uint16_t, temp3[H * W]);                             \
 505                                                                              \
 506     highbd_var_filter_block2d_bil_first_pass(                                \
 507         src, fdata3, src_stride, 1, H + 1, W, bilinear_filters[xoffset]);    \
 508     highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W,     \
 509                                               bilinear_filters[yoffset]);    \
 510                                                                              \
 511     vpx_highbd_comp_avg_pred(temp3, second_pred, W, H,                       \
 512                              CONVERT_TO_BYTEPTR(temp2), W);                  \
 513                                                                              \
 514     return vpx_highbd_12_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp3), W, \
 515                                                dst, dst_stride, sse);        \
 516   }
 517
 518 /* All three forms of the variance are available in the same sizes. */
 519 #define HIGHBD_VARIANCES(W, H) \
 520   HIGHBD_VAR(W, H)             \
 521   HIGHBD_SUBPIX_VAR(W, H)      \
 522   HIGHBD_SUBPIX_AVG_VAR(W, H)
 523
 524 HIGHBD_VARIANCES(64, 64)
 525 HIGHBD_VARIANCES(64, 32)
 526 HIGHBD_VARIANCES(32, 64)
 527 HIGHBD_VARIANCES(32, 32)
 528 HIGHBD_VARIANCES(32, 16)
 529 HIGHBD_VARIANCES(16, 32)
 530 HIGHBD_VARIANCES(16, 16)
 531 HIGHBD_VARIANCES(16, 8)
 532 HIGHBD_VARIANCES(8, 16)
 533 HIGHBD_VARIANCES(8, 8)
 534 HIGHBD_VARIANCES(8, 4)
 535 HIGHBD_VARIANCES(4, 8)
 536 HIGHBD_VARIANCES(4, 4)
 537
 538 HIGHBD_GET_VAR(8)
 539 HIGHBD_GET_VAR(16)
 540
 541 HIGHBD_MSE(16, 16)
 542 HIGHBD_MSE(16, 8)
 543 HIGHBD_MSE(8, 16)
 544 HIGHBD_MSE(8, 8)
 545
 546 void vpx_highbd_comp_avg_pred(uint16_t *comp_pred, const uint8_t *pred8,
 547                               int width, int height, const uint8_t *ref8,
 548                               int ref_stride) {
 549   int i, j;
 550   uint16_t *pred = CONVERT_TO_SHORTPTR(pred8);
 551   uint16_t *ref = CONVERT_TO_SHORTPTR(ref8);
 552   for (i = 0; i < height; ++i) {
 553     for (j = 0; j < width; ++j) {
 554       const int tmp = pred[j] + ref[j];
 555       comp_pred[j] = ROUND_POWER_OF_TWO(tmp, 1);
 556     }
 557     comp_pred += width;
 558     pred += width;
 559     ref += ref_stride;
 560   }
 561 }
 562 #endif  // CONFIG_VP9_HIGHBITDEPTH