]> granicus.if.org Git - libvpx/commitdiff
High bit-depth support for wedge partition expt
authorDebargha Mukherjee <debargha@google.com>
Sat, 11 Apr 2015 00:45:02 +0000 (17:45 -0700)
committerDebargha Mukherjee <debargha@google.com>
Mon, 13 Apr 2015 16:28:15 +0000 (09:28 -0700)
Change-Id: Idbd27e66d4f4a7953f888137d5752856215a6760

vp9/common/vp9_reconinter.c
vp9/common/vp9_rtcd_defs.pl
vp9/encoder/vp9_encoder.c
vp9/encoder/vp9_sad.c
vp9/encoder/vp9_variance.c

index 4afc785aad1e506fce3c671fb1acc574f65b74c8..82ca30011b071458baa5aa979fb7658f28ec51f3 100644 (file)
@@ -551,6 +551,28 @@ static void build_masked_compound(uint8_t *dst, int dst_stride,
     }
 }
 
+#if CONFIG_VP9_HIGHBITDEPTH
+static void build_masked_compound_highbd(uint8_t *dst_8, int dst_stride,
+                                         uint8_t *dst2_8, int dst2_stride,
+                                         int wedge_index, BLOCK_SIZE sb_type,
+                                         int h, int w) {
+  int i, j;
+  uint8_t mask[4096];
+  uint16_t *dst = CONVERT_TO_SHORTPTR(dst_8);
+  uint16_t *dst2 = CONVERT_TO_SHORTPTR(dst2_8);
+  vp9_generate_masked_weight(wedge_index, sb_type, h, w, mask, 64);
+  for (i = 0; i < h; ++i)
+    for (j = 0; j < w; ++j) {
+      int m = mask[i * 64 + j];
+      dst[i * dst_stride + j] = (dst[i * dst_stride + j] * m +
+                                 dst2[i * dst2_stride + j] *
+                                 ((1 << WEDGE_WEIGHT_BITS) - m) +
+                                 (1 << (WEDGE_WEIGHT_BITS - 1))) >>
+                                 WEDGE_WEIGHT_BITS;
+    }
+}
+#endif  // CONFIG_VP9_HIGHBITDEPTH
+
 #if CONFIG_SUPERTX
 void generate_masked_weight_extend(int wedge_index, int plane,
                                    BLOCK_SIZE sb_type, int h, int w,
@@ -686,7 +708,14 @@ static void build_inter_predictors(MACROBLOCKD *xd, int plane, int block,
 #if CONFIG_WEDGE_PARTITION
     if (ref && get_wedge_bits(mi->mbmi.sb_type)
         && mi->mbmi.use_wedge_interinter) {
+#if CONFIG_VP9_HIGHBITDEPTH
+      uint8_t tmp_dst_[8192];
+      uint8_t *tmp_dst =
+          (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) ?
+          CONVERT_TO_BYTEPTR(tmp_dst_) : tmp_dst_;
+#else
       uint8_t tmp_dst[4096];
+#endif
 #if CONFIG_VP9_HIGHBITDEPTH
       if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
         highbd_inter_predictor(pre, pre_buf->stride, tmp_dst, 64,
@@ -722,13 +751,27 @@ static void build_inter_predictors(MACROBLOCKD *xd, int plane, int block,
                                    wedge_offset_x, wedge_offset_y, h, w);
 #endif  // CONFIG_VP9_HIGHBITDEPTH
 #else   // CONFIG_SUPERTX
-      build_masked_compound(dst, dst_buf->stride, tmp_dst, 64,
-                            mi->mbmi.interinter_wedge_index, mi->mbmi.sb_type,
-                            h, w);
+#if CONFIG_VP9_HIGHBITDEPTH
+      if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
+        build_masked_compound_highbd(dst, dst_buf->stride, tmp_dst, 64,
+                                     mi->mbmi.interinter_wedge_index,
+                                     mi->mbmi.sb_type, h, w);
+      else
+#endif  // CONFIG_VP9_HIGHBITDEPTH
+        build_masked_compound(dst, dst_buf->stride, tmp_dst, 64,
+                              mi->mbmi.interinter_wedge_index, mi->mbmi.sb_type,
+                              h, w);
 #endif  // CONFIG_SUPERTX
     } else {
-      inter_predictor(pre, pre_buf->stride, dst, dst_buf->stride,
-                      subpel_x, subpel_y, sf, w, h, ref, kernel, xs, ys);
+#if CONFIG_VP9_HIGHBITDEPTH
+      if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
+        highbd_inter_predictor(pre, pre_buf->stride, dst, dst_buf->stride,
+                               subpel_x, subpel_y, sf, w, h, ref, kernel,
+                               xs, ys, xd->bd);
+      else
+#endif  // CONFIG_VP9_HIGHBITDEPTH
+        inter_predictor(pre, pre_buf->stride, dst, dst_buf->stride,
+                        subpel_x, subpel_y, sf, w, h, ref, kernel, xs, ys);
     }
 
 #else  // CONFIG_WEDGE_PARTITION
@@ -1410,7 +1453,14 @@ static void dec_build_inter_predictors(MACROBLOCKD *xd, int plane, int block,
 #if CONFIG_WEDGE_PARTITION
     if (ref && get_wedge_bits(mi->mbmi.sb_type)
         && mi->mbmi.use_wedge_interinter) {
+#if CONFIG_VP9_HIGHBITDEPTH
+      uint8_t tmp_dst_[8192];
+      uint8_t *tmp_dst =
+          (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) ?
+          CONVERT_TO_BYTEPTR(tmp_dst_) : tmp_dst_;
+#else
       uint8_t tmp_dst[4096];
+#endif
 #if CONFIG_VP9_HIGHBITDEPTH
       if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
         highbd_inter_predictor(buf_ptr, buf_stride, tmp_dst, 64,
@@ -1445,24 +1495,32 @@ static void dec_build_inter_predictors(MACROBLOCKD *xd, int plane, int block,
                                    wedge_offset_x, wedge_offset_y, h, w);
 #endif  // CONFIG_VP9_HIGHBITDEPTH
 #else   // CONFIG_SUPERTX
+#if CONFIG_VP9_HIGHBITDEPTH
+      if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
+        build_masked_compound_highbd(dst, dst_buf->stride, tmp_dst, 64,
+                                     mi->mbmi.interinter_wedge_index,
+                                     mi->mbmi.sb_type, h, w);
+      } else {
+        build_masked_compound(dst, dst_buf->stride, tmp_dst, 64,
+                              mi->mbmi.interinter_wedge_index, mi->mbmi.sb_type,
+                              h, w);
+      }
+#else
       build_masked_compound(dst, dst_buf->stride, tmp_dst, 64,
                             mi->mbmi.interinter_wedge_index, mi->mbmi.sb_type,
                             h, w);
+#endif  // CONFIG_VP9_HIGHBITDEPTH
 #endif  // CONFIG_SUPERTX
     } else {
 #if CONFIG_VP9_HIGHBITDEPTH
-      if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
+      if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
         highbd_inter_predictor(buf_ptr, buf_stride, dst, dst_buf->stride,
                                subpel_x, subpel_y, sf, w, h, ref, kernel,
                                xs, ys, xd->bd);
-      } else {
+      else
+#endif  // CONFIG_VP9_HIGHBITDEPTH
         inter_predictor(buf_ptr, buf_stride, dst, dst_buf->stride, subpel_x,
                         subpel_y, sf, w, h, ref, kernel, xs, ys);
-      }
-#else
-      inter_predictor(buf_ptr, buf_stride, dst, dst_buf->stride, subpel_x,
-                      subpel_y, sf, w, h, ref, kernel, xs, ys);
-#endif  // CONFIG_VP9_HIGHBITDEPTH
     }
 
 #else  // CONFIG_WEDGE_PARTITION
index 5e46e4de9a2bdccae50522bc5718a7d521b05094..3234c8fdd8da39b780c15c9586ee9093400f8657 100644 (file)
@@ -2127,6 +2127,281 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
   add_proto qw/unsigned int vp9_highbd_12_mse8x8/, "const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse";
   specialize qw/vp9_highbd_12_mse8x8/;
 
+  if (vpx_config("CONFIG_WEDGE_PARTITION") eq "yes") {
+    add_proto qw/unsigned int vp9_highbd_masked_variance32x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse";
+    specialize qw/vp9_highbd_masked_variance32x16/;
+
+    add_proto qw/unsigned int vp9_highbd_masked_variance16x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse";
+    specialize qw/vp9_highbd_masked_variance16x32/;
+
+    add_proto qw/unsigned int vp9_highbd_masked_variance64x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse";
+    specialize qw/vp9_highbd_masked_variance64x32/;
+
+    add_proto qw/unsigned int vp9_highbd_masked_variance32x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse";
+    specialize qw/vp9_highbd_masked_variance32x64/;
+
+    add_proto qw/unsigned int vp9_highbd_masked_variance32x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse";
+    specialize qw/vp9_highbd_masked_variance32x32/;
+
+    add_proto qw/unsigned int vp9_highbd_masked_variance64x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse";
+    specialize qw/vp9_highbd_masked_variance64x64/;
+
+    add_proto qw/unsigned int vp9_highbd_masked_variance16x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse";
+    specialize qw/vp9_highbd_masked_variance16x16/;
+
+    add_proto qw/unsigned int vp9_highbd_masked_variance16x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse";
+    specialize qw/vp9_highbd_masked_variance16x8/;
+
+    add_proto qw/unsigned int vp9_highbd_masked_variance8x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse";
+    specialize qw/vp9_highbd_masked_variance8x16/;
+
+    add_proto qw/unsigned int vp9_highbd_masked_variance8x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse";
+    specialize qw/vp9_highbd_masked_variance8x8/;
+
+    add_proto qw/unsigned int vp9_highbd_masked_variance8x4/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse";
+    specialize qw/vp9_highbd_masked_variance8x4/;
+
+    add_proto qw/unsigned int vp9_highbd_masked_variance4x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse";
+    specialize qw/vp9_highbd_masked_variance4x8/;
+
+    add_proto qw/unsigned int vp9_highbd_masked_variance4x4/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse";
+    specialize qw/vp9_highbd_masked_variance4x4/;
+
+    add_proto qw/unsigned int vp9_highbd_10_masked_variance32x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse";
+    specialize qw/vp9_highbd_10_masked_variance32x16/;
+
+    add_proto qw/unsigned int vp9_highbd_10_masked_variance16x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse";
+    specialize qw/vp9_highbd_10_masked_variance16x32/;
+
+    add_proto qw/unsigned int vp9_highbd_10_masked_variance64x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse";
+    specialize qw/vp9_highbd_10_masked_variance64x32/;
+
+    add_proto qw/unsigned int vp9_highbd_10_masked_variance32x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse";
+    specialize qw/vp9_highbd_10_masked_variance32x64/;
+
+    add_proto qw/unsigned int vp9_highbd_10_masked_variance32x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse";
+    specialize qw/vp9_highbd_10_masked_variance32x32/;
+
+    add_proto qw/unsigned int vp9_highbd_10_masked_variance64x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse";
+    specialize qw/vp9_highbd_10_masked_variance64x64/;
+
+    add_proto qw/unsigned int vp9_highbd_10_masked_variance16x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse";
+    specialize qw/vp9_highbd_10_masked_variance16x16/;
+
+    add_proto qw/unsigned int vp9_highbd_10_masked_variance16x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse";
+    specialize qw/vp9_highbd_10_masked_variance16x8/;
+
+    add_proto qw/unsigned int vp9_highbd_10_masked_variance8x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse";
+    specialize qw/vp9_highbd_10_masked_variance8x16/;
+
+    add_proto qw/unsigned int vp9_highbd_10_masked_variance8x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse";
+    specialize qw/vp9_highbd_10_masked_variance8x8/;
+
+    add_proto qw/unsigned int vp9_highbd_10_masked_variance8x4/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse";
+    specialize qw/vp9_highbd_10_masked_variance8x4/;
+
+    add_proto qw/unsigned int vp9_highbd_10_masked_variance4x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse";
+    specialize qw/vp9_highbd_10_masked_variance4x8/;
+
+    add_proto qw/unsigned int vp9_highbd_10_masked_variance4x4/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse";
+    specialize qw/vp9_highbd_10_masked_variance4x4/;
+
+    add_proto qw/unsigned int vp9_highbd_12_masked_variance32x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse";
+    specialize qw/vp9_highbd_12_masked_variance32x16/;
+
+    add_proto qw/unsigned int vp9_highbd_12_masked_variance16x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse";
+    specialize qw/vp9_highbd_12_masked_variance16x32/;
+
+    add_proto qw/unsigned int vp9_highbd_12_masked_variance64x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse";
+    specialize qw/vp9_highbd_12_masked_variance64x32/;
+
+    add_proto qw/unsigned int vp9_highbd_12_masked_variance32x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse";
+    specialize qw/vp9_highbd_12_masked_variance32x64/;
+
+    add_proto qw/unsigned int vp9_highbd_12_masked_variance32x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse";
+    specialize qw/vp9_highbd_12_masked_variance32x32/;
+
+    add_proto qw/unsigned int vp9_highbd_12_masked_variance64x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse";
+    specialize qw/vp9_highbd_12_masked_variance64x64/;
+
+    add_proto qw/unsigned int vp9_highbd_12_masked_variance16x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse";
+    specialize qw/vp9_highbd_12_masked_variance16x16/;
+
+    add_proto qw/unsigned int vp9_highbd_12_masked_variance16x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse";
+    specialize qw/vp9_highbd_12_masked_variance16x8/;
+
+    add_proto qw/unsigned int vp9_highbd_12_masked_variance8x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse";
+    specialize qw/vp9_highbd_12_masked_variance8x16/;
+
+    add_proto qw/unsigned int vp9_highbd_12_masked_variance8x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse";
+    specialize qw/vp9_highbd_12_masked_variance8x8/;
+
+    add_proto qw/unsigned int vp9_highbd_12_masked_variance8x4/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse";
+    specialize qw/vp9_highbd_12_masked_variance8x4/;
+
+    add_proto qw/unsigned int vp9_highbd_12_masked_variance4x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse";
+    specialize qw/vp9_highbd_12_masked_variance4x8/;
+
+    add_proto qw/unsigned int vp9_highbd_12_masked_variance4x4/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse";
+    specialize qw/vp9_highbd_12_masked_variance4x4/;
+
+    add_proto qw/unsigned int vp9_highbd_masked_sub_pixel_variance64x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, const uint8_t *m, int m_stride, unsigned int *sse";
+    specialize qw/vp9_highbd_masked_sub_pixel_variance64x64/;
+
+    add_proto qw/unsigned int vp9_highbd_masked_sub_pixel_variance64x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, const uint8_t *m, int m_stride, unsigned int *sse";
+    specialize qw/vp9_highbd_masked_sub_pixel_variance64x32/;
+
+    add_proto qw/unsigned int vp9_highbd_masked_sub_pixel_variance32x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, const uint8_t *m, int m_stride, unsigned int *sse";
+    specialize qw/vp9_highbd_masked_sub_pixel_variance32x64/;
+
+    add_proto qw/unsigned int vp9_highbd_masked_sub_pixel_variance32x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, const uint8_t *m, int m_stride, unsigned int *sse";
+    specialize qw/vp9_highbd_masked_sub_pixel_variance32x32/;
+
+    add_proto qw/unsigned int vp9_highbd_masked_sub_pixel_variance32x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, const uint8_t *m, int m_stride, unsigned int *sse";
+    specialize qw/vp9_highbd_masked_sub_pixel_variance32x16/;
+
+    add_proto qw/unsigned int vp9_highbd_masked_sub_pixel_variance16x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, const uint8_t *m, int m_stride, unsigned int *sse";
+    specialize qw/vp9_highbd_masked_sub_pixel_variance16x32/;
+
+    add_proto qw/unsigned int vp9_highbd_masked_sub_pixel_variance16x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, const uint8_t *m, int m_stride, unsigned int *sse";
+    specialize qw/vp9_highbd_masked_sub_pixel_variance16x16/;
+
+    add_proto qw/unsigned int vp9_highbd_masked_sub_pixel_variance16x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, const uint8_t *m, int m_stride, unsigned int *sse";
+    specialize qw/vp9_highbd_masked_sub_pixel_variance16x8/;
+
+    add_proto qw/unsigned int vp9_highbd_masked_sub_pixel_variance8x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, const uint8_t *m, int m_stride, unsigned int *sse";
+    specialize qw/vp9_highbd_masked_sub_pixel_variance8x16/;
+
+    add_proto qw/unsigned int vp9_highbd_masked_sub_pixel_variance8x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, const uint8_t *m, int m_stride, unsigned int *sse";
+    specialize qw/vp9_highbd_masked_sub_pixel_variance8x8/;
+
+    add_proto qw/unsigned int vp9_highbd_masked_sub_pixel_variance8x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, const uint8_t *m, int m_stride, unsigned int *sse";
+    specialize qw/vp9_highbd_masked_sub_pixel_variance8x4/;
+
+    add_proto qw/unsigned int vp9_highbd_masked_sub_pixel_variance4x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, const uint8_t *m, int m_stride, unsigned int *sse";
+    specialize qw/vp9_highbd_masked_sub_pixel_variance4x8/;
+
+    add_proto qw/unsigned int vp9_highbd_masked_sub_pixel_variance4x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, const uint8_t *m, int m_stride, unsigned int *sse";
+    specialize qw/vp9_highbd_masked_sub_pixel_variance4x4/;
+
+    add_proto qw/unsigned int vp9_highbd_10_masked_sub_pixel_variance64x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, const uint8_t *m, int m_stride, unsigned int *sse";
+    specialize qw/vp9_highbd_10_masked_sub_pixel_variance64x64/;
+
+    add_proto qw/unsigned int vp9_highbd_10_masked_sub_pixel_variance64x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, const uint8_t *m, int m_stride, unsigned int *sse";
+    specialize qw/vp9_highbd_10_masked_sub_pixel_variance64x32/;
+
+    add_proto qw/unsigned int vp9_highbd_10_masked_sub_pixel_variance32x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, const uint8_t *m, int m_stride, unsigned int *sse";
+    specialize qw/vp9_highbd_10_masked_sub_pixel_variance32x64/;
+
+    add_proto qw/unsigned int vp9_highbd_10_masked_sub_pixel_variance32x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, const uint8_t *m, int m_stride, unsigned int *sse";
+    specialize qw/vp9_highbd_10_masked_sub_pixel_variance32x32/;
+
+    add_proto qw/unsigned int vp9_highbd_10_masked_sub_pixel_variance32x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, const uint8_t *m, int m_stride, unsigned int *sse";
+    specialize qw/vp9_highbd_10_masked_sub_pixel_variance32x16/;
+
+    add_proto qw/unsigned int vp9_highbd_10_masked_sub_pixel_variance16x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, const uint8_t *m, int m_stride, unsigned int *sse";
+    specialize qw/vp9_highbd_10_masked_sub_pixel_variance16x32/;
+
+    add_proto qw/unsigned int vp9_highbd_10_masked_sub_pixel_variance16x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, const uint8_t *m, int m_stride, unsigned int *sse";
+    specialize qw/vp9_highbd_10_masked_sub_pixel_variance16x16/;
+
+    add_proto qw/unsigned int vp9_highbd_10_masked_sub_pixel_variance16x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, const uint8_t *m, int m_stride, unsigned int *sse";
+    specialize qw/vp9_highbd_10_masked_sub_pixel_variance16x8/;
+
+    add_proto qw/unsigned int vp9_highbd_10_masked_sub_pixel_variance8x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, const uint8_t *m, int m_stride, unsigned int *sse";
+    specialize qw/vp9_highbd_10_masked_sub_pixel_variance8x16/;
+
+    add_proto qw/unsigned int vp9_highbd_10_masked_sub_pixel_variance8x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, const uint8_t *m, int m_stride, unsigned int *sse";
+    specialize qw/vp9_highbd_10_masked_sub_pixel_variance8x8/;
+
+    add_proto qw/unsigned int vp9_highbd_10_masked_sub_pixel_variance8x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, const uint8_t *m, int m_stride, unsigned int *sse";
+    specialize qw/vp9_highbd_10_masked_sub_pixel_variance8x4/;
+
+    add_proto qw/unsigned int vp9_highbd_10_masked_sub_pixel_variance4x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, const uint8_t *m, int m_stride, unsigned int *sse";
+    specialize qw/vp9_highbd_10_masked_sub_pixel_variance4x8/;
+
+    add_proto qw/unsigned int vp9_highbd_10_masked_sub_pixel_variance4x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, const uint8_t *m, int m_stride, unsigned int *sse";
+    specialize qw/vp9_highbd_10_masked_sub_pixel_variance4x4/;
+
+    add_proto qw/unsigned int vp9_highbd_12_masked_sub_pixel_variance64x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, const uint8_t *m, int m_stride, unsigned int *sse";
+    specialize qw/vp9_highbd_12_masked_sub_pixel_variance64x64/;
+
+    add_proto qw/unsigned int vp9_highbd_12_masked_sub_pixel_variance64x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, const uint8_t *m, int m_stride, unsigned int *sse";
+    specialize qw/vp9_highbd_12_masked_sub_pixel_variance64x32/;
+
+    add_proto qw/unsigned int vp9_highbd_12_masked_sub_pixel_variance32x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, const uint8_t *m, int m_stride, unsigned int *sse";
+    specialize qw/vp9_highbd_12_masked_sub_pixel_variance32x64/;
+
+    add_proto qw/unsigned int vp9_highbd_12_masked_sub_pixel_variance32x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, const uint8_t *m, int m_stride, unsigned int *sse";
+    specialize qw/vp9_highbd_12_masked_sub_pixel_variance32x32/;
+
+    add_proto qw/unsigned int vp9_highbd_12_masked_sub_pixel_variance32x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, const uint8_t *m, int m_stride, unsigned int *sse";
+    specialize qw/vp9_highbd_12_masked_sub_pixel_variance32x16/;
+
+    add_proto qw/unsigned int vp9_highbd_12_masked_sub_pixel_variance16x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, const uint8_t *m, int m_stride, unsigned int *sse";
+    specialize qw/vp9_highbd_12_masked_sub_pixel_variance16x32/;
+
+    add_proto qw/unsigned int vp9_highbd_12_masked_sub_pixel_variance16x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, const uint8_t *m, int m_stride, unsigned int *sse";
+    specialize qw/vp9_highbd_12_masked_sub_pixel_variance16x16/;
+
+    add_proto qw/unsigned int vp9_highbd_12_masked_sub_pixel_variance16x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, const uint8_t *m, int m_stride, unsigned int *sse";
+    specialize qw/vp9_highbd_12_masked_sub_pixel_variance16x8/;
+
+    add_proto qw/unsigned int vp9_highbd_12_masked_sub_pixel_variance8x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, const uint8_t *m, int m_stride, unsigned int *sse";
+    specialize qw/vp9_highbd_12_masked_sub_pixel_variance8x16/;
+
+    add_proto qw/unsigned int vp9_highbd_12_masked_sub_pixel_variance8x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, const uint8_t *m, int m_stride, unsigned int *sse";
+    specialize qw/vp9_highbd_12_masked_sub_pixel_variance8x8/;
+
+    add_proto qw/unsigned int vp9_highbd_12_masked_sub_pixel_variance8x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, const uint8_t *m, int m_stride, unsigned int *sse";
+    specialize qw/vp9_highbd_12_masked_sub_pixel_variance8x4/;
+
+    add_proto qw/unsigned int vp9_highbd_12_masked_sub_pixel_variance4x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, const uint8_t *m, int m_stride, unsigned int *sse";
+    specialize qw/vp9_highbd_12_masked_sub_pixel_variance4x8/;
+
+    add_proto qw/unsigned int vp9_highbd_12_masked_sub_pixel_variance4x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, const uint8_t *m, int m_stride, unsigned int *sse";
+    specialize qw/vp9_highbd_12_masked_sub_pixel_variance4x4/;
+
+    add_proto qw/unsigned int vp9_highbd_masked_sad64x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride, const uint8_t *mask, int mask_stride";
+    specialize qw/vp9_highbd_masked_sad64x64/;
+
+    add_proto qw/unsigned int vp9_highbd_masked_sad32x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride";
+    specialize qw/vp9_highbd_masked_sad32x64/;
+
+    add_proto qw/unsigned int vp9_highbd_masked_sad64x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride";
+    specialize qw/vp9_highbd_masked_sad64x32/;
+
+    add_proto qw/unsigned int vp9_highbd_masked_sad32x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride";
+    specialize qw/vp9_highbd_masked_sad32x16/;
+
+    add_proto qw/unsigned int vp9_highbd_masked_sad16x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride";
+    specialize qw/vp9_highbd_masked_sad16x32/;
+
+    add_proto qw/unsigned int vp9_highbd_masked_sad32x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride, const uint8_t *mask, int mask_stride";
+    specialize qw/vp9_highbd_masked_sad32x32/;
+
+    add_proto qw/unsigned int vp9_highbd_masked_sad16x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride, const uint8_t *mask, int mask_stride";
+    specialize qw/vp9_highbd_masked_sad16x16/;
+
+    add_proto qw/unsigned int vp9_highbd_masked_sad16x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride, const uint8_t *mask, int mask_stride";
+    specialize qw/vp9_highbd_masked_sad16x8/;
+
+    add_proto qw/unsigned int vp9_highbd_masked_sad8x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride, const uint8_t *mask, int mask_stride";
+    specialize qw/vp9_highbd_masked_sad8x16/;
+
+    add_proto qw/unsigned int vp9_highbd_masked_sad8x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride, const uint8_t *mask, int mask_stride";
+    specialize qw/vp9_highbd_masked_sad8x8/;
+
+    add_proto qw/unsigned int vp9_highbd_masked_sad8x4/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride";
+    specialize qw/vp9_highbd_masked_sad8x4/;
+
+    add_proto qw/unsigned int vp9_highbd_masked_sad4x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride";
+    specialize qw/vp9_highbd_masked_sad4x8/;
+
+    add_proto qw/unsigned int vp9_highbd_masked_sad4x4/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride, const uint8_t *mask, int mask_stride";
+    specialize qw/vp9_highbd_masked_sad4x4/;
+  }
+
   # ENCODEMB INVOKE
 
   add_proto qw/int64_t vp9_highbd_block_error/, "const tran_low_t *coeff, const tran_low_t *dqcoeff, intptr_t block_size, int64_t *ssz, int bd";
index 3589dd903e06e5973f668d3ecc2c89c4c8c259df..39fa6d428ba5a8569b7e28bf04dca5cff6609243 100644 (file)
@@ -821,6 +821,56 @@ MAKE_BFP_SAD3_WRAPPER(vp9_highbd_sad4x4x3)
 MAKE_BFP_SAD8_WRAPPER(vp9_highbd_sad4x4x8)
 MAKE_BFP_SAD4D_WRAPPER(vp9_highbd_sad4x4x4d)
 
+#if CONFIG_WEDGE_PARTITION
+#define HIGHBD_MBFP(BT, MSDF, MVF, MSVF)         \
+  cpi->fn_ptr[BT].msdf            = MSDF; \
+  cpi->fn_ptr[BT].mvf             = MVF;  \
+  cpi->fn_ptr[BT].msvf            = MSVF;
+
+#define MAKE_MBFP_SAD_WRAPPER(fnname) \
+static unsigned int fnname##_bits8(const uint8_t *src_ptr, \
+                                   int source_stride, \
+                                   const uint8_t *ref_ptr, \
+                                   int ref_stride, \
+                                   const uint8_t *m, \
+                                   int m_stride) {  \
+  return fnname(src_ptr, source_stride, ref_ptr, ref_stride, \
+                m, m_stride); \
+} \
+static unsigned int fnname##_bits10(const uint8_t *src_ptr, \
+                                    int source_stride, \
+                                    const uint8_t *ref_ptr, \
+                                    int ref_stride, \
+                                    const uint8_t *m, \
+                                    int m_stride) {  \
+  return fnname(src_ptr, source_stride, ref_ptr, ref_stride, \
+                m, m_stride) >> 2; \
+} \
+static unsigned int fnname##_bits12(const uint8_t *src_ptr, \
+                                    int source_stride, \
+                                    const uint8_t *ref_ptr, \
+                                    int ref_stride, \
+                                    const uint8_t *m, \
+                                    int m_stride) {  \
+  return fnname(src_ptr, source_stride, ref_ptr, ref_stride, \
+                m, m_stride) >> 4; \
+}
+
+MAKE_MBFP_SAD_WRAPPER(vp9_highbd_masked_sad64x64)
+MAKE_MBFP_SAD_WRAPPER(vp9_highbd_masked_sad64x32)
+MAKE_MBFP_SAD_WRAPPER(vp9_highbd_masked_sad32x64)
+MAKE_MBFP_SAD_WRAPPER(vp9_highbd_masked_sad32x32)
+MAKE_MBFP_SAD_WRAPPER(vp9_highbd_masked_sad32x16)
+MAKE_MBFP_SAD_WRAPPER(vp9_highbd_masked_sad16x32)
+MAKE_MBFP_SAD_WRAPPER(vp9_highbd_masked_sad16x16)
+MAKE_MBFP_SAD_WRAPPER(vp9_highbd_masked_sad16x8)
+MAKE_MBFP_SAD_WRAPPER(vp9_highbd_masked_sad8x16)
+MAKE_MBFP_SAD_WRAPPER(vp9_highbd_masked_sad8x8)
+MAKE_MBFP_SAD_WRAPPER(vp9_highbd_masked_sad8x4)
+MAKE_MBFP_SAD_WRAPPER(vp9_highbd_masked_sad4x8)
+MAKE_MBFP_SAD_WRAPPER(vp9_highbd_masked_sad4x4)
+#endif  // CONFIG_WEDGE_PARTITION
+
 static void  highbd_set_var_fns(VP9_COMP *const cpi) {
   VP9_COMMON *const cm = &cpi->common;
   if (cm->use_highbitdepth) {
@@ -955,6 +1005,61 @@ static void  highbd_set_var_fns(VP9_COMP *const cpi) {
                    vp9_highbd_sad4x4x3_bits8,
                    vp9_highbd_sad4x4x8_bits8,
                    vp9_highbd_sad4x4x4d_bits8)
+
+#if CONFIG_WEDGE_PARTITION
+        HIGHBD_MBFP(BLOCK_64X64,
+                    vp9_highbd_masked_sad64x64_bits8,
+                    vp9_highbd_masked_variance64x64,
+                    vp9_highbd_masked_sub_pixel_variance64x64)
+        HIGHBD_MBFP(BLOCK_64X32,
+                    vp9_highbd_masked_sad64x32_bits8,
+                    vp9_highbd_masked_variance64x32,
+                    vp9_highbd_masked_sub_pixel_variance64x32)
+        HIGHBD_MBFP(BLOCK_32X64,
+                    vp9_highbd_masked_sad32x64_bits8,
+                    vp9_highbd_masked_variance32x64,
+                    vp9_highbd_masked_sub_pixel_variance32x64)
+        HIGHBD_MBFP(BLOCK_32X32,
+                    vp9_highbd_masked_sad32x32_bits8,
+                    vp9_highbd_masked_variance32x32,
+                    vp9_highbd_masked_sub_pixel_variance32x32)
+        HIGHBD_MBFP(BLOCK_32X16,
+                    vp9_highbd_masked_sad32x16_bits8,
+                    vp9_highbd_masked_variance32x16,
+                    vp9_highbd_masked_sub_pixel_variance32x16)
+        HIGHBD_MBFP(BLOCK_16X32,
+                    vp9_highbd_masked_sad16x32_bits8,
+                    vp9_highbd_masked_variance16x32,
+                    vp9_highbd_masked_sub_pixel_variance16x32)
+        HIGHBD_MBFP(BLOCK_16X16,
+                    vp9_highbd_masked_sad16x16_bits8,
+                    vp9_highbd_masked_variance16x16,
+                    vp9_highbd_masked_sub_pixel_variance16x16)
+        HIGHBD_MBFP(BLOCK_8X16,
+                    vp9_highbd_masked_sad8x16_bits8,
+                    vp9_highbd_masked_variance8x16,
+                    vp9_highbd_masked_sub_pixel_variance8x16)
+        HIGHBD_MBFP(BLOCK_16X8,
+                    vp9_highbd_masked_sad16x8_bits8,
+                    vp9_highbd_masked_variance16x8,
+                    vp9_highbd_masked_sub_pixel_variance16x8)
+        HIGHBD_MBFP(BLOCK_8X8,
+                    vp9_highbd_masked_sad8x8_bits8,
+                    vp9_highbd_masked_variance8x8,
+                    vp9_highbd_masked_sub_pixel_variance8x8)
+        HIGHBD_MBFP(BLOCK_4X8,
+                    vp9_highbd_masked_sad4x8_bits8,
+                    vp9_highbd_masked_variance4x8,
+                    vp9_highbd_masked_sub_pixel_variance4x8)
+        HIGHBD_MBFP(BLOCK_8X4,
+                    vp9_highbd_masked_sad8x4_bits8,
+                    vp9_highbd_masked_variance8x4,
+                    vp9_highbd_masked_sub_pixel_variance8x4)
+        HIGHBD_MBFP(BLOCK_4X4,
+                    vp9_highbd_masked_sad4x4_bits8,
+                    vp9_highbd_masked_variance4x4,
+                    vp9_highbd_masked_sub_pixel_variance4x4)
+#endif  // CONFIG_WEDGE_PARTITION
         break;
 
       case VPX_BITS_10:
@@ -1087,6 +1192,61 @@ static void  highbd_set_var_fns(VP9_COMP *const cpi) {
                    vp9_highbd_sad4x4x3_bits10,
                    vp9_highbd_sad4x4x8_bits10,
                    vp9_highbd_sad4x4x4d_bits10)
+
+#if CONFIG_WEDGE_PARTITION
+        HIGHBD_MBFP(BLOCK_64X64,
+                    vp9_highbd_masked_sad64x64_bits10,
+                    vp9_highbd_10_masked_variance64x64,
+                    vp9_highbd_10_masked_sub_pixel_variance64x64)
+        HIGHBD_MBFP(BLOCK_64X32,
+                    vp9_highbd_masked_sad64x32_bits10,
+                    vp9_highbd_10_masked_variance64x32,
+                    vp9_highbd_10_masked_sub_pixel_variance64x32)
+        HIGHBD_MBFP(BLOCK_32X64,
+                    vp9_highbd_masked_sad32x64_bits10,
+                    vp9_highbd_10_masked_variance32x64,
+                    vp9_highbd_10_masked_sub_pixel_variance32x64)
+        HIGHBD_MBFP(BLOCK_32X32,
+                    vp9_highbd_masked_sad32x32_bits10,
+                    vp9_highbd_10_masked_variance32x32,
+                    vp9_highbd_10_masked_sub_pixel_variance32x32)
+        HIGHBD_MBFP(BLOCK_32X16,
+                    vp9_highbd_masked_sad32x16_bits10,
+                    vp9_highbd_10_masked_variance32x16,
+                    vp9_highbd_10_masked_sub_pixel_variance32x16)
+        HIGHBD_MBFP(BLOCK_16X32,
+                    vp9_highbd_masked_sad16x32_bits10,
+                    vp9_highbd_10_masked_variance16x32,
+                    vp9_highbd_10_masked_sub_pixel_variance16x32)
+        HIGHBD_MBFP(BLOCK_16X16,
+                    vp9_highbd_masked_sad16x16_bits10,
+                    vp9_highbd_10_masked_variance16x16,
+                    vp9_highbd_10_masked_sub_pixel_variance16x16)
+        HIGHBD_MBFP(BLOCK_8X16,
+                    vp9_highbd_masked_sad8x16_bits10,
+                    vp9_highbd_10_masked_variance8x16,
+                    vp9_highbd_10_masked_sub_pixel_variance8x16)
+        HIGHBD_MBFP(BLOCK_16X8,
+                    vp9_highbd_masked_sad16x8_bits10,
+                    vp9_highbd_10_masked_variance16x8,
+                    vp9_highbd_10_masked_sub_pixel_variance16x8)
+        HIGHBD_MBFP(BLOCK_8X8,
+                    vp9_highbd_masked_sad8x8_bits10,
+                    vp9_highbd_10_masked_variance8x8,
+                    vp9_highbd_10_masked_sub_pixel_variance8x8)
+        HIGHBD_MBFP(BLOCK_4X8,
+                    vp9_highbd_masked_sad4x8_bits10,
+                    vp9_highbd_10_masked_variance4x8,
+                    vp9_highbd_10_masked_sub_pixel_variance4x8)
+        HIGHBD_MBFP(BLOCK_8X4,
+                    vp9_highbd_masked_sad8x4_bits10,
+                    vp9_highbd_10_masked_variance8x4,
+                    vp9_highbd_10_masked_sub_pixel_variance8x4)
+        HIGHBD_MBFP(BLOCK_4X4,
+                    vp9_highbd_masked_sad4x4_bits10,
+                    vp9_highbd_10_masked_variance4x4,
+                    vp9_highbd_10_masked_sub_pixel_variance4x4)
+#endif  // CONFIG_WEDGE_PARTITION
         break;
 
       case VPX_BITS_12:
@@ -1219,6 +1379,61 @@ static void  highbd_set_var_fns(VP9_COMP *const cpi) {
                    vp9_highbd_sad4x4x3_bits12,
                    vp9_highbd_sad4x4x8_bits12,
                    vp9_highbd_sad4x4x4d_bits12)
+
+#if CONFIG_WEDGE_PARTITION
+        HIGHBD_MBFP(BLOCK_64X64,
+                    vp9_highbd_masked_sad64x64_bits12,
+                    vp9_highbd_12_masked_variance64x64,
+                    vp9_highbd_12_masked_sub_pixel_variance64x64)
+        HIGHBD_MBFP(BLOCK_64X32,
+                    vp9_highbd_masked_sad64x32_bits12,
+                    vp9_highbd_12_masked_variance64x32,
+                    vp9_highbd_12_masked_sub_pixel_variance64x32)
+        HIGHBD_MBFP(BLOCK_32X64,
+                    vp9_highbd_masked_sad32x64_bits12,
+                    vp9_highbd_12_masked_variance32x64,
+                    vp9_highbd_12_masked_sub_pixel_variance32x64)
+        HIGHBD_MBFP(BLOCK_32X32,
+                    vp9_highbd_masked_sad32x32_bits12,
+                    vp9_highbd_12_masked_variance32x32,
+                    vp9_highbd_12_masked_sub_pixel_variance32x32)
+        HIGHBD_MBFP(BLOCK_32X16,
+                    vp9_highbd_masked_sad32x16_bits12,
+                    vp9_highbd_12_masked_variance32x16,
+                    vp9_highbd_12_masked_sub_pixel_variance32x16)
+        HIGHBD_MBFP(BLOCK_16X32,
+                    vp9_highbd_masked_sad16x32_bits12,
+                    vp9_highbd_12_masked_variance16x32,
+                    vp9_highbd_12_masked_sub_pixel_variance16x32)
+        HIGHBD_MBFP(BLOCK_16X16,
+                    vp9_highbd_masked_sad16x16_bits12,
+                    vp9_highbd_12_masked_variance16x16,
+                    vp9_highbd_12_masked_sub_pixel_variance16x16)
+        HIGHBD_MBFP(BLOCK_8X16,
+                    vp9_highbd_masked_sad8x16_bits12,
+                    vp9_highbd_12_masked_variance8x16,
+                    vp9_highbd_12_masked_sub_pixel_variance8x16)
+        HIGHBD_MBFP(BLOCK_16X8,
+                    vp9_highbd_masked_sad16x8_bits12,
+                    vp9_highbd_12_masked_variance16x8,
+                    vp9_highbd_12_masked_sub_pixel_variance16x8)
+        HIGHBD_MBFP(BLOCK_8X8,
+                    vp9_highbd_masked_sad8x8_bits12,
+                    vp9_highbd_12_masked_variance8x8,
+                    vp9_highbd_12_masked_sub_pixel_variance8x8)
+        HIGHBD_MBFP(BLOCK_4X8,
+                    vp9_highbd_masked_sad4x8_bits12,
+                    vp9_highbd_12_masked_variance4x8,
+                    vp9_highbd_12_masked_sub_pixel_variance4x8)
+        HIGHBD_MBFP(BLOCK_8X4,
+                    vp9_highbd_masked_sad8x4_bits12,
+                    vp9_highbd_12_masked_variance8x4,
+                    vp9_highbd_12_masked_sub_pixel_variance8x4)
+        HIGHBD_MBFP(BLOCK_4X4,
+                    vp9_highbd_masked_sad4x4_bits12,
+                    vp9_highbd_12_masked_variance4x4,
+                    vp9_highbd_12_masked_sub_pixel_variance4x4)
+#endif  // CONFIG_WEDGE_PARTITION
         break;
 
       default:
index e7eda9781fb0cc0995776d91f6a21d0451618397..f926a5500e413863bfa2c7bf1f5a2a01be3aa283 100644 (file)
@@ -317,4 +317,53 @@ MASKSADMxN(8, 8)
 MASKSADMxN(8, 4)
 MASKSADMxN(4, 8)
 MASKSADMxN(4, 4)
+
+#if CONFIG_VP9_HIGHBITDEPTH
+static INLINE unsigned int highbd_masked_sad(const uint8_t *a8, int a_stride,
+                                             const uint8_t *b8, int b_stride,
+                                             const uint8_t *m, int m_stride,
+                                             int width, int height) {
+  int y, x;
+  unsigned int sad = 0;
+  const uint16_t *a = CONVERT_TO_SHORTPTR(a8);
+  const uint16_t *b = CONVERT_TO_SHORTPTR(b8);
+
+  for (y = 0; y < height; y++) {
+    for (x = 0; x < width; x++)
+      sad += m[x] * abs(a[x] - b[x]);
+
+    a += a_stride;
+    b += b_stride;
+    m += m_stride;
+  }
+  sad = (sad + 31) >> 6;
+
+  return sad;
+}
+
+#define highbd_MASKSADMxN(m, n) \
+unsigned int vp9_highbd_masked_sad##m##x##n##_c(const uint8_t *src, \
+                                                int src_stride, \
+                                                const uint8_t *ref, \
+                                                int ref_stride, \
+                                                const uint8_t *msk, \
+                                                int msk_stride) { \
+  return highbd_masked_sad(src, src_stride, ref, ref_stride, \
+                           msk, msk_stride, m, n); \
+}
+
+highbd_MASKSADMxN(64, 64)
+highbd_MASKSADMxN(64, 32)
+highbd_MASKSADMxN(32, 64)
+highbd_MASKSADMxN(32, 32)
+highbd_MASKSADMxN(32, 16)
+highbd_MASKSADMxN(16, 32)
+highbd_MASKSADMxN(16, 16)
+highbd_MASKSADMxN(16, 8)
+highbd_MASKSADMxN(8, 16)
+highbd_MASKSADMxN(8, 8)
+highbd_MASKSADMxN(8, 4)
+highbd_MASKSADMxN(4, 8)
+highbd_MASKSADMxN(4, 4)
+#endif  // CONFIG_VP9_HIGHBITDEPTH
 #endif  // CONFIG_WEDGE_PARTITION
index 7225e92c82570353e184da41a569a709336fb22a..74c77287fb115847808280bbdebeaaccf6be82cc 100644 (file)
@@ -743,4 +743,213 @@ MASK_SUBPIX_VAR(64, 32)
 
 MASK_VAR(64, 64)
 MASK_SUBPIX_VAR(64, 64)
+
+#if CONFIG_VP9_HIGHBITDEPTH
+void highbd_masked_variance64(const uint8_t *a8, int  a_stride,
+                              const uint8_t *b8, int  b_stride,
+                              const uint8_t *m, int  m_stride,
+                              int  w, int  h,
+                              uint64_t *sse, int64_t *sum) {
+  int i, j;
+  uint16_t *a = CONVERT_TO_SHORTPTR(a8);
+  uint16_t *b = CONVERT_TO_SHORTPTR(b8);
+
+  *sum = 0;
+  *sse = 0;
+
+  for (i = 0; i < h; i++) {
+    for (j = 0; j < w; j++) {
+      const int diff = (a[j] - b[j]) * (m[j]);
+      *sum += diff;
+      *sse += diff * diff;
+    }
+
+    a += a_stride;
+    b += b_stride;
+    m += m_stride;
+  }
+  *sum = (*sum >= 0) ? ((*sum + 31) >> 6) : -((-*sum + 31) >> 6);
+  *sse = (*sse + 2047) >> 12;
+}
+
+void highbd_masked_variance(const uint8_t *a8, int  a_stride,
+                            const uint8_t *b8, int  b_stride,
+                            const uint8_t *m, int  m_stride,
+                            int  w, int  h,
+                            unsigned int *sse, int *sum) {
+  uint64_t sse_long = 0;
+  int64_t sum_long = 0;
+  highbd_masked_variance64(a8, a_stride, b8, b_stride, m, m_stride,
+                           w, h, &sse_long, &sum_long);
+  *sse = sse_long;
+  *sum = sum_long;
+}
+
+void highbd_10_masked_variance(const uint8_t *a8, int  a_stride,
+                               const uint8_t *b8, int  b_stride,
+                               const uint8_t *m, int  m_stride,
+                               int  w, int  h,
+                               unsigned int *sse, int *sum) {
+  uint64_t sse_long = 0;
+  int64_t sum_long = 0;
+  highbd_masked_variance64(a8, a_stride, b8, b_stride, m, m_stride,
+                           w, h, &sse_long, &sum_long);
+  *sum = ROUND_POWER_OF_TWO(sum_long, 2);
+  *sse = ROUND_POWER_OF_TWO(sse_long, 4);
+}
+
+void highbd_12_masked_variance(const uint8_t *a8, int  a_stride,
+                               const uint8_t *b8, int  b_stride,
+                               const uint8_t *m, int  m_stride,
+                               int  w, int  h,
+                               unsigned int *sse, int *sum) {
+  uint64_t sse_long = 0;
+  int64_t sum_long = 0;
+  highbd_masked_variance64(a8, a_stride, b8, b_stride, m, m_stride,
+                           w, h, &sse_long, &sum_long);
+  *sum = ROUND_POWER_OF_TWO(sum_long, 4);
+  *sse = ROUND_POWER_OF_TWO(sse_long, 8);
+}
+
+#define HIGHBD_MASK_VAR(W, H) \
+unsigned int vp9_highbd_masked_variance##W##x##H##_c(const uint8_t *a, \
+                                                     int a_stride, \
+                                                     const uint8_t *b, \
+                                                     int b_stride, \
+                                                     const uint8_t *m, \
+                                                     int m_stride, \
+                                                     unsigned int *sse) { \
+  int sum; \
+  highbd_masked_variance(a, a_stride, b, b_stride, m, m_stride, \
+                         W, H, sse, &sum); \
+  return *sse - (((int64_t)sum * sum) / (W * H)); \
+} \
+\
+unsigned int vp9_highbd_10_masked_variance##W##x##H##_c(const uint8_t *a, \
+                                                        int a_stride, \
+                                                        const uint8_t *b, \
+                                                        int b_stride, \
+                                                        const uint8_t *m, \
+                                                        int m_stride, \
+                                                        unsigned int *sse) { \
+  int sum; \
+  highbd_10_masked_variance(a, a_stride, b, b_stride, m, m_stride, \
+                            W, H, sse, &sum); \
+  return *sse - (((int64_t)sum * sum) / (W * H)); \
+} \
+\
+unsigned int vp9_highbd_12_masked_variance##W##x##H##_c(const uint8_t *a, \
+                                                        int a_stride, \
+                                                        const uint8_t *b, \
+                                                        int b_stride, \
+                                                        const uint8_t *m, \
+                                                        int m_stride, \
+                                                        unsigned int *sse) { \
+  int sum; \
+  highbd_12_masked_variance(a, a_stride, b, b_stride, m, m_stride, \
+                            W, H, sse, &sum); \
+  return *sse - (((int64_t)sum * sum) / (W * H)); \
+}
+
+#define HIGHBD_MASK_SUBPIX_VAR(W, H) \
+unsigned int vp9_highbd_masked_sub_pixel_variance##W##x##H##_c( \
+  const uint8_t *src, int  src_stride, \
+  int xoffset, int  yoffset, \
+  const uint8_t *dst, int dst_stride, \
+  const uint8_t *msk, int msk_stride, \
+  unsigned int *sse) { \
+  uint16_t fdata3[(H + 1) * W]; \
+  uint16_t temp2[H * W]; \
+\
+  highbd_var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, \
+                                           H + 1, W, \
+                                           BILINEAR_FILTERS_2TAP(xoffset)); \
+  highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
+                                            BILINEAR_FILTERS_2TAP(yoffset)); \
+\
+  return vp9_highbd_masked_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp2), \
+                                                 W, dst, dst_stride, \
+                                                 msk, msk_stride, sse); \
+} \
+\
+unsigned int vp9_highbd_10_masked_sub_pixel_variance##W##x##H##_c( \
+  const uint8_t *src, int  src_stride, \
+  int xoffset, int  yoffset, \
+  const uint8_t *dst, int dst_stride, \
+  const uint8_t *msk, int msk_stride, \
+  unsigned int *sse) { \
+  uint16_t fdata3[(H + 1) * W]; \
+  uint16_t temp2[H * W]; \
+\
+  highbd_var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, \
+                                           H + 1, W, \
+                                           BILINEAR_FILTERS_2TAP(xoffset)); \
+  highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
+                                            BILINEAR_FILTERS_2TAP(yoffset)); \
+\
+  return vp9_highbd_10_masked_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp2), \
+                                                    W, dst, dst_stride, \
+                                                    msk, msk_stride, sse); \
+} \
+\
+unsigned int vp9_highbd_12_masked_sub_pixel_variance##W##x##H##_c( \
+  const uint8_t *src, int  src_stride, \
+  int xoffset, int  yoffset, \
+  const uint8_t *dst, int dst_stride, \
+  const uint8_t *msk, int msk_stride, \
+  unsigned int *sse) { \
+  uint16_t fdata3[(H + 1) * W]; \
+  uint16_t temp2[H * W]; \
+\
+  highbd_var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, \
+                                           H + 1, W, \
+                                           BILINEAR_FILTERS_2TAP(xoffset)); \
+  highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
+                                            BILINEAR_FILTERS_2TAP(yoffset)); \
+\
+  return vp9_highbd_12_masked_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp2), \
+                                                    W, dst, dst_stride, \
+                                                    msk, msk_stride, sse); \
+}
+
+HIGHBD_MASK_VAR(4, 4)
+HIGHBD_MASK_SUBPIX_VAR(4, 4)
+
+HIGHBD_MASK_VAR(4, 8)
+HIGHBD_MASK_SUBPIX_VAR(4, 8)
+
+HIGHBD_MASK_VAR(8, 4)
+HIGHBD_MASK_SUBPIX_VAR(8, 4)
+
+HIGHBD_MASK_VAR(8, 8)
+HIGHBD_MASK_SUBPIX_VAR(8, 8)
+
+HIGHBD_MASK_VAR(8, 16)
+HIGHBD_MASK_SUBPIX_VAR(8, 16)
+
+HIGHBD_MASK_VAR(16, 8)
+HIGHBD_MASK_SUBPIX_VAR(16, 8)
+
+HIGHBD_MASK_VAR(16, 16)
+HIGHBD_MASK_SUBPIX_VAR(16, 16)
+
+HIGHBD_MASK_VAR(16, 32)
+HIGHBD_MASK_SUBPIX_VAR(16, 32)
+
+HIGHBD_MASK_VAR(32, 16)
+HIGHBD_MASK_SUBPIX_VAR(32, 16)
+
+HIGHBD_MASK_VAR(32, 32)
+HIGHBD_MASK_SUBPIX_VAR(32, 32)
+
+HIGHBD_MASK_VAR(32, 64)
+HIGHBD_MASK_SUBPIX_VAR(32, 64)
+
+HIGHBD_MASK_VAR(64, 32)
+HIGHBD_MASK_SUBPIX_VAR(64, 32)
+
+HIGHBD_MASK_VAR(64, 64)
+HIGHBD_MASK_SUBPIX_VAR(64, 64)
+
+#endif  // CONFIG_VP9_HIGHBITDEPTH
 #endif  // CONFIG_WEDGE_PARTITION