From 343c092e2e130f21e547e49dc52b8f6078f2bbed Mon Sep 17 00:00:00 2001 From: Debargha Mukherjee Date: Fri, 10 Apr 2015 17:45:02 -0700 Subject: [PATCH] High bit-depth support for wedge partition expt Change-Id: Idbd27e66d4f4a7953f888137d5752856215a6760 --- vp9/common/vp9_reconinter.c | 82 +++++++++-- vp9/common/vp9_rtcd_defs.pl | 275 ++++++++++++++++++++++++++++++++++++ vp9/encoder/vp9_encoder.c | 215 ++++++++++++++++++++++++++++ vp9/encoder/vp9_sad.c | 49 +++++++ vp9/encoder/vp9_variance.c | 209 +++++++++++++++++++++++++++ 5 files changed, 818 insertions(+), 12 deletions(-) diff --git a/vp9/common/vp9_reconinter.c b/vp9/common/vp9_reconinter.c index 4afc785aa..82ca30011 100644 --- a/vp9/common/vp9_reconinter.c +++ b/vp9/common/vp9_reconinter.c @@ -551,6 +551,28 @@ static void build_masked_compound(uint8_t *dst, int dst_stride, } } +#if CONFIG_VP9_HIGHBITDEPTH +static void build_masked_compound_highbd(uint8_t *dst_8, int dst_stride, + uint8_t *dst2_8, int dst2_stride, + int wedge_index, BLOCK_SIZE sb_type, + int h, int w) { + int i, j; + uint8_t mask[4096]; + uint16_t *dst = CONVERT_TO_SHORTPTR(dst_8); + uint16_t *dst2 = CONVERT_TO_SHORTPTR(dst2_8); + vp9_generate_masked_weight(wedge_index, sb_type, h, w, mask, 64); + for (i = 0; i < h; ++i) + for (j = 0; j < w; ++j) { + int m = mask[i * 64 + j]; + dst[i * dst_stride + j] = (dst[i * dst_stride + j] * m + + dst2[i * dst2_stride + j] * + ((1 << WEDGE_WEIGHT_BITS) - m) + + (1 << (WEDGE_WEIGHT_BITS - 1))) >> + WEDGE_WEIGHT_BITS; + } +} +#endif // CONFIG_VP9_HIGHBITDEPTH + #if CONFIG_SUPERTX void generate_masked_weight_extend(int wedge_index, int plane, BLOCK_SIZE sb_type, int h, int w, @@ -686,7 +708,14 @@ static void build_inter_predictors(MACROBLOCKD *xd, int plane, int block, #if CONFIG_WEDGE_PARTITION if (ref && get_wedge_bits(mi->mbmi.sb_type) && mi->mbmi.use_wedge_interinter) { +#if CONFIG_VP9_HIGHBITDEPTH + uint8_t tmp_dst_[8192]; + uint8_t *tmp_dst = + (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) ? + CONVERT_TO_BYTEPTR(tmp_dst_) : tmp_dst_; +#else uint8_t tmp_dst[4096]; +#endif #if CONFIG_VP9_HIGHBITDEPTH if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { highbd_inter_predictor(pre, pre_buf->stride, tmp_dst, 64, @@ -722,13 +751,27 @@ static void build_inter_predictors(MACROBLOCKD *xd, int plane, int block, wedge_offset_x, wedge_offset_y, h, w); #endif // CONFIG_VP9_HIGHBITDEPTH #else // CONFIG_SUPERTX - build_masked_compound(dst, dst_buf->stride, tmp_dst, 64, - mi->mbmi.interinter_wedge_index, mi->mbmi.sb_type, - h, w); +#if CONFIG_VP9_HIGHBITDEPTH + if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) + build_masked_compound_highbd(dst, dst_buf->stride, tmp_dst, 64, + mi->mbmi.interinter_wedge_index, + mi->mbmi.sb_type, h, w); + else +#endif // CONFIG_VP9_HIGHBITDEPTH + build_masked_compound(dst, dst_buf->stride, tmp_dst, 64, + mi->mbmi.interinter_wedge_index, mi->mbmi.sb_type, + h, w); #endif // CONFIG_SUPERTX } else { - inter_predictor(pre, pre_buf->stride, dst, dst_buf->stride, - subpel_x, subpel_y, sf, w, h, ref, kernel, xs, ys); +#if CONFIG_VP9_HIGHBITDEPTH + if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) + highbd_inter_predictor(pre, pre_buf->stride, dst, dst_buf->stride, + subpel_x, subpel_y, sf, w, h, ref, kernel, + xs, ys, xd->bd); + else +#endif // CONFIG_VP9_HIGHBITDEPTH + inter_predictor(pre, pre_buf->stride, dst, dst_buf->stride, + subpel_x, subpel_y, sf, w, h, ref, kernel, xs, ys); } #else // CONFIG_WEDGE_PARTITION @@ -1410,7 +1453,14 @@ static void dec_build_inter_predictors(MACROBLOCKD *xd, int plane, int block, #if CONFIG_WEDGE_PARTITION if (ref && get_wedge_bits(mi->mbmi.sb_type) && mi->mbmi.use_wedge_interinter) { +#if CONFIG_VP9_HIGHBITDEPTH + uint8_t tmp_dst_[8192]; + uint8_t *tmp_dst = + (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) ? + CONVERT_TO_BYTEPTR(tmp_dst_) : tmp_dst_; +#else uint8_t tmp_dst[4096]; +#endif #if CONFIG_VP9_HIGHBITDEPTH if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { highbd_inter_predictor(buf_ptr, buf_stride, tmp_dst, 64, @@ -1445,24 +1495,32 @@ static void dec_build_inter_predictors(MACROBLOCKD *xd, int plane, int block, wedge_offset_x, wedge_offset_y, h, w); #endif // CONFIG_VP9_HIGHBITDEPTH #else // CONFIG_SUPERTX +#if CONFIG_VP9_HIGHBITDEPTH + if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { + build_masked_compound_highbd(dst, dst_buf->stride, tmp_dst, 64, + mi->mbmi.interinter_wedge_index, + mi->mbmi.sb_type, h, w); + } else { + build_masked_compound(dst, dst_buf->stride, tmp_dst, 64, + mi->mbmi.interinter_wedge_index, mi->mbmi.sb_type, + h, w); + } +#else build_masked_compound(dst, dst_buf->stride, tmp_dst, 64, mi->mbmi.interinter_wedge_index, mi->mbmi.sb_type, h, w); +#endif // CONFIG_VP9_HIGHBITDEPTH #endif // CONFIG_SUPERTX } else { #if CONFIG_VP9_HIGHBITDEPTH - if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { + if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) highbd_inter_predictor(buf_ptr, buf_stride, dst, dst_buf->stride, subpel_x, subpel_y, sf, w, h, ref, kernel, xs, ys, xd->bd); - } else { + else +#endif // CONFIG_VP9_HIGHBITDEPTH inter_predictor(buf_ptr, buf_stride, dst, dst_buf->stride, subpel_x, subpel_y, sf, w, h, ref, kernel, xs, ys); - } -#else - inter_predictor(buf_ptr, buf_stride, dst, dst_buf->stride, subpel_x, - subpel_y, sf, w, h, ref, kernel, xs, ys); -#endif // CONFIG_VP9_HIGHBITDEPTH } #else // CONFIG_WEDGE_PARTITION diff --git a/vp9/common/vp9_rtcd_defs.pl b/vp9/common/vp9_rtcd_defs.pl index 5e46e4de9..3234c8fdd 100644 --- a/vp9/common/vp9_rtcd_defs.pl +++ b/vp9/common/vp9_rtcd_defs.pl @@ -2127,6 +2127,281 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") { add_proto qw/unsigned int vp9_highbd_12_mse8x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse"; specialize qw/vp9_highbd_12_mse8x8/; + if (vpx_config("CONFIG_WEDGE_PARTITION") eq "yes") { + add_proto qw/unsigned int vp9_highbd_masked_variance32x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse"; + specialize qw/vp9_highbd_masked_variance32x16/; + + add_proto qw/unsigned int vp9_highbd_masked_variance16x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse"; + specialize qw/vp9_highbd_masked_variance16x32/; + + add_proto qw/unsigned int vp9_highbd_masked_variance64x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse"; + specialize qw/vp9_highbd_masked_variance64x32/; + + add_proto qw/unsigned int vp9_highbd_masked_variance32x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse"; + specialize qw/vp9_highbd_masked_variance32x64/; + + add_proto qw/unsigned int vp9_highbd_masked_variance32x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse"; + specialize qw/vp9_highbd_masked_variance32x32/; + + add_proto qw/unsigned int vp9_highbd_masked_variance64x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse"; + specialize qw/vp9_highbd_masked_variance64x64/; + + add_proto qw/unsigned int vp9_highbd_masked_variance16x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse"; + specialize qw/vp9_highbd_masked_variance16x16/; + + add_proto qw/unsigned int vp9_highbd_masked_variance16x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse"; + specialize qw/vp9_highbd_masked_variance16x8/; + + add_proto qw/unsigned int vp9_highbd_masked_variance8x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse"; + specialize qw/vp9_highbd_masked_variance8x16/; + + add_proto qw/unsigned int vp9_highbd_masked_variance8x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse"; + specialize qw/vp9_highbd_masked_variance8x8/; + + add_proto qw/unsigned int vp9_highbd_masked_variance8x4/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse"; + specialize qw/vp9_highbd_masked_variance8x4/; + + add_proto qw/unsigned int vp9_highbd_masked_variance4x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse"; + specialize qw/vp9_highbd_masked_variance4x8/; + + add_proto qw/unsigned int vp9_highbd_masked_variance4x4/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse"; + specialize qw/vp9_highbd_masked_variance4x4/; + + add_proto qw/unsigned int vp9_highbd_10_masked_variance32x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse"; + specialize qw/vp9_highbd_10_masked_variance32x16/; + + add_proto qw/unsigned int vp9_highbd_10_masked_variance16x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse"; + specialize qw/vp9_highbd_10_masked_variance16x32/; + + add_proto qw/unsigned int vp9_highbd_10_masked_variance64x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse"; + specialize qw/vp9_highbd_10_masked_variance64x32/; + + add_proto qw/unsigned int vp9_highbd_10_masked_variance32x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse"; + specialize qw/vp9_highbd_10_masked_variance32x64/; + + add_proto qw/unsigned int vp9_highbd_10_masked_variance32x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse"; + specialize qw/vp9_highbd_10_masked_variance32x32/; + + add_proto qw/unsigned int vp9_highbd_10_masked_variance64x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse"; + specialize qw/vp9_highbd_10_masked_variance64x64/; + + add_proto qw/unsigned int vp9_highbd_10_masked_variance16x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse"; + specialize qw/vp9_highbd_10_masked_variance16x16/; + + add_proto qw/unsigned int vp9_highbd_10_masked_variance16x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse"; + specialize qw/vp9_highbd_10_masked_variance16x8/; + + add_proto qw/unsigned int vp9_highbd_10_masked_variance8x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse"; + specialize qw/vp9_highbd_10_masked_variance8x16/; + + add_proto qw/unsigned int vp9_highbd_10_masked_variance8x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse"; + specialize qw/vp9_highbd_10_masked_variance8x8/; + + add_proto qw/unsigned int vp9_highbd_10_masked_variance8x4/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse"; + specialize qw/vp9_highbd_10_masked_variance8x4/; + + add_proto qw/unsigned int vp9_highbd_10_masked_variance4x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse"; + specialize qw/vp9_highbd_10_masked_variance4x8/; + + add_proto qw/unsigned int vp9_highbd_10_masked_variance4x4/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse"; + specialize qw/vp9_highbd_10_masked_variance4x4/; + + add_proto qw/unsigned int vp9_highbd_12_masked_variance32x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse"; + specialize qw/vp9_highbd_12_masked_variance32x16/; + + add_proto qw/unsigned int vp9_highbd_12_masked_variance16x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse"; + specialize qw/vp9_highbd_12_masked_variance16x32/; + + add_proto qw/unsigned int vp9_highbd_12_masked_variance64x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse"; + specialize qw/vp9_highbd_12_masked_variance64x32/; + + add_proto qw/unsigned int vp9_highbd_12_masked_variance32x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse"; + specialize qw/vp9_highbd_12_masked_variance32x64/; + + add_proto qw/unsigned int vp9_highbd_12_masked_variance32x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse"; + specialize qw/vp9_highbd_12_masked_variance32x32/; + + add_proto qw/unsigned int vp9_highbd_12_masked_variance64x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse"; + specialize qw/vp9_highbd_12_masked_variance64x64/; + + add_proto qw/unsigned int vp9_highbd_12_masked_variance16x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse"; + specialize qw/vp9_highbd_12_masked_variance16x16/; + + add_proto qw/unsigned int vp9_highbd_12_masked_variance16x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse"; + specialize qw/vp9_highbd_12_masked_variance16x8/; + + add_proto qw/unsigned int vp9_highbd_12_masked_variance8x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse"; + specialize qw/vp9_highbd_12_masked_variance8x16/; + + add_proto qw/unsigned int vp9_highbd_12_masked_variance8x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse"; + specialize qw/vp9_highbd_12_masked_variance8x8/; + + add_proto qw/unsigned int vp9_highbd_12_masked_variance8x4/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse"; + specialize qw/vp9_highbd_12_masked_variance8x4/; + + add_proto qw/unsigned int vp9_highbd_12_masked_variance4x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse"; + specialize qw/vp9_highbd_12_masked_variance4x8/; + + add_proto qw/unsigned int vp9_highbd_12_masked_variance4x4/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse"; + specialize qw/vp9_highbd_12_masked_variance4x4/; + + add_proto qw/unsigned int vp9_highbd_masked_sub_pixel_variance64x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, const uint8_t *m, int m_stride, unsigned int *sse"; + specialize qw/vp9_highbd_masked_sub_pixel_variance64x64/; + + add_proto qw/unsigned int vp9_highbd_masked_sub_pixel_variance64x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, const uint8_t *m, int m_stride, unsigned int *sse"; + specialize qw/vp9_highbd_masked_sub_pixel_variance64x32/; + + add_proto qw/unsigned int vp9_highbd_masked_sub_pixel_variance32x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, const uint8_t *m, int m_stride, unsigned int *sse"; + specialize qw/vp9_highbd_masked_sub_pixel_variance32x64/; + + add_proto qw/unsigned int vp9_highbd_masked_sub_pixel_variance32x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, const uint8_t *m, int m_stride, unsigned int *sse"; + specialize qw/vp9_highbd_masked_sub_pixel_variance32x32/; + + add_proto qw/unsigned int vp9_highbd_masked_sub_pixel_variance32x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, const uint8_t *m, int m_stride, unsigned int *sse"; + specialize qw/vp9_highbd_masked_sub_pixel_variance32x16/; + + add_proto qw/unsigned int vp9_highbd_masked_sub_pixel_variance16x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, const uint8_t *m, int m_stride, unsigned int *sse"; + specialize qw/vp9_highbd_masked_sub_pixel_variance16x32/; + + add_proto qw/unsigned int vp9_highbd_masked_sub_pixel_variance16x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, const uint8_t *m, int m_stride, unsigned int *sse"; + specialize qw/vp9_highbd_masked_sub_pixel_variance16x16/; + + add_proto qw/unsigned int vp9_highbd_masked_sub_pixel_variance16x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, const uint8_t *m, int m_stride, unsigned int *sse"; + specialize qw/vp9_highbd_masked_sub_pixel_variance16x8/; + + add_proto qw/unsigned int vp9_highbd_masked_sub_pixel_variance8x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, const uint8_t *m, int m_stride, unsigned int *sse"; + specialize qw/vp9_highbd_masked_sub_pixel_variance8x16/; + + add_proto qw/unsigned int vp9_highbd_masked_sub_pixel_variance8x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, const uint8_t *m, int m_stride, unsigned int *sse"; + specialize qw/vp9_highbd_masked_sub_pixel_variance8x8/; + + add_proto qw/unsigned int vp9_highbd_masked_sub_pixel_variance8x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, const uint8_t *m, int m_stride, unsigned int *sse"; + specialize qw/vp9_highbd_masked_sub_pixel_variance8x4/; + + add_proto qw/unsigned int vp9_highbd_masked_sub_pixel_variance4x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, const uint8_t *m, int m_stride, unsigned int *sse"; + specialize qw/vp9_highbd_masked_sub_pixel_variance4x8/; + + add_proto qw/unsigned int vp9_highbd_masked_sub_pixel_variance4x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, const uint8_t *m, int m_stride, unsigned int *sse"; + specialize qw/vp9_highbd_masked_sub_pixel_variance4x4/; + + add_proto qw/unsigned int vp9_highbd_10_masked_sub_pixel_variance64x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, const uint8_t *m, int m_stride, unsigned int *sse"; + specialize qw/vp9_highbd_10_masked_sub_pixel_variance64x64/; + + add_proto qw/unsigned int vp9_highbd_10_masked_sub_pixel_variance64x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, const uint8_t *m, int m_stride, unsigned int *sse"; + specialize qw/vp9_highbd_10_masked_sub_pixel_variance64x32/; + + add_proto qw/unsigned int vp9_highbd_10_masked_sub_pixel_variance32x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, const uint8_t *m, int m_stride, unsigned int *sse"; + specialize qw/vp9_highbd_10_masked_sub_pixel_variance32x64/; + + add_proto qw/unsigned int vp9_highbd_10_masked_sub_pixel_variance32x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, const uint8_t *m, int m_stride, unsigned int *sse"; + specialize qw/vp9_highbd_10_masked_sub_pixel_variance32x32/; + + add_proto qw/unsigned int vp9_highbd_10_masked_sub_pixel_variance32x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, const uint8_t *m, int m_stride, unsigned int *sse"; + specialize qw/vp9_highbd_10_masked_sub_pixel_variance32x16/; + + add_proto qw/unsigned int vp9_highbd_10_masked_sub_pixel_variance16x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, const uint8_t *m, int m_stride, unsigned int *sse"; + specialize qw/vp9_highbd_10_masked_sub_pixel_variance16x32/; + + add_proto qw/unsigned int vp9_highbd_10_masked_sub_pixel_variance16x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, const uint8_t *m, int m_stride, unsigned int *sse"; + specialize qw/vp9_highbd_10_masked_sub_pixel_variance16x16/; + + add_proto qw/unsigned int vp9_highbd_10_masked_sub_pixel_variance16x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, const uint8_t *m, int m_stride, unsigned int *sse"; + specialize qw/vp9_highbd_10_masked_sub_pixel_variance16x8/; + + add_proto qw/unsigned int vp9_highbd_10_masked_sub_pixel_variance8x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, const uint8_t *m, int m_stride, unsigned int *sse"; + specialize qw/vp9_highbd_10_masked_sub_pixel_variance8x16/; + + add_proto qw/unsigned int vp9_highbd_10_masked_sub_pixel_variance8x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, const uint8_t *m, int m_stride, unsigned int *sse"; + specialize qw/vp9_highbd_10_masked_sub_pixel_variance8x8/; + + add_proto qw/unsigned int vp9_highbd_10_masked_sub_pixel_variance8x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, const uint8_t *m, int m_stride, unsigned int *sse"; + specialize qw/vp9_highbd_10_masked_sub_pixel_variance8x4/; + + add_proto qw/unsigned int vp9_highbd_10_masked_sub_pixel_variance4x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, const uint8_t *m, int m_stride, unsigned int *sse"; + specialize qw/vp9_highbd_10_masked_sub_pixel_variance4x8/; + + add_proto qw/unsigned int vp9_highbd_10_masked_sub_pixel_variance4x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, const uint8_t *m, int m_stride, unsigned int *sse"; + specialize qw/vp9_highbd_10_masked_sub_pixel_variance4x4/; + + add_proto qw/unsigned int vp9_highbd_12_masked_sub_pixel_variance64x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, const uint8_t *m, int m_stride, unsigned int *sse"; + specialize qw/vp9_highbd_12_masked_sub_pixel_variance64x64/; + + add_proto qw/unsigned int vp9_highbd_12_masked_sub_pixel_variance64x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, const uint8_t *m, int m_stride, unsigned int *sse"; + specialize qw/vp9_highbd_12_masked_sub_pixel_variance64x32/; + + add_proto qw/unsigned int vp9_highbd_12_masked_sub_pixel_variance32x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, const uint8_t *m, int m_stride, unsigned int *sse"; + specialize qw/vp9_highbd_12_masked_sub_pixel_variance32x64/; + + add_proto qw/unsigned int vp9_highbd_12_masked_sub_pixel_variance32x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, const uint8_t *m, int m_stride, unsigned int *sse"; + specialize qw/vp9_highbd_12_masked_sub_pixel_variance32x32/; + + add_proto qw/unsigned int vp9_highbd_12_masked_sub_pixel_variance32x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, const uint8_t *m, int m_stride, unsigned int *sse"; + specialize qw/vp9_highbd_12_masked_sub_pixel_variance32x16/; + + add_proto qw/unsigned int vp9_highbd_12_masked_sub_pixel_variance16x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, const uint8_t *m, int m_stride, unsigned int *sse"; + specialize qw/vp9_highbd_12_masked_sub_pixel_variance16x32/; + + add_proto qw/unsigned int vp9_highbd_12_masked_sub_pixel_variance16x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, const uint8_t *m, int m_stride, unsigned int *sse"; + specialize qw/vp9_highbd_12_masked_sub_pixel_variance16x16/; + + add_proto qw/unsigned int vp9_highbd_12_masked_sub_pixel_variance16x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, const uint8_t *m, int m_stride, unsigned int *sse"; + specialize qw/vp9_highbd_12_masked_sub_pixel_variance16x8/; + + add_proto qw/unsigned int vp9_highbd_12_masked_sub_pixel_variance8x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, const uint8_t *m, int m_stride, unsigned int *sse"; + specialize qw/vp9_highbd_12_masked_sub_pixel_variance8x16/; + + add_proto qw/unsigned int vp9_highbd_12_masked_sub_pixel_variance8x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, const uint8_t *m, int m_stride, unsigned int *sse"; + specialize qw/vp9_highbd_12_masked_sub_pixel_variance8x8/; + + add_proto qw/unsigned int vp9_highbd_12_masked_sub_pixel_variance8x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, const uint8_t *m, int m_stride, unsigned int *sse"; + specialize qw/vp9_highbd_12_masked_sub_pixel_variance8x4/; + + add_proto qw/unsigned int vp9_highbd_12_masked_sub_pixel_variance4x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, const uint8_t *m, int m_stride, unsigned int *sse"; + specialize qw/vp9_highbd_12_masked_sub_pixel_variance4x8/; + + add_proto qw/unsigned int vp9_highbd_12_masked_sub_pixel_variance4x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, const uint8_t *m, int m_stride, unsigned int *sse"; + specialize qw/vp9_highbd_12_masked_sub_pixel_variance4x4/; + + add_proto qw/unsigned int vp9_highbd_masked_sad64x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride"; + specialize qw/vp9_highbd_masked_sad64x64/; + + add_proto qw/unsigned int vp9_highbd_masked_sad32x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride"; + specialize qw/vp9_highbd_masked_sad32x64/; + + add_proto qw/unsigned int vp9_highbd_masked_sad64x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride"; + specialize qw/vp9_highbd_masked_sad64x32/; + + add_proto qw/unsigned int vp9_highbd_masked_sad32x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride"; + specialize qw/vp9_highbd_masked_sad32x16/; + + add_proto qw/unsigned int vp9_highbd_masked_sad16x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride"; + specialize qw/vp9_highbd_masked_sad16x32/; + + add_proto qw/unsigned int vp9_highbd_masked_sad32x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride"; + specialize qw/vp9_highbd_masked_sad32x32/; + + add_proto qw/unsigned int vp9_highbd_masked_sad16x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride"; + specialize qw/vp9_highbd_masked_sad16x16/; + + add_proto qw/unsigned int vp9_highbd_masked_sad16x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride"; + specialize qw/vp9_highbd_masked_sad16x8/; + + add_proto qw/unsigned int vp9_highbd_masked_sad8x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride"; + specialize qw/vp9_highbd_masked_sad8x16/; + + add_proto qw/unsigned int vp9_highbd_masked_sad8x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride"; + specialize qw/vp9_highbd_masked_sad8x8/; + + add_proto qw/unsigned int vp9_highbd_masked_sad8x4/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride"; + specialize qw/vp9_highbd_masked_sad8x4/; + + add_proto qw/unsigned int vp9_highbd_masked_sad4x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride"; + specialize qw/vp9_highbd_masked_sad4x8/; + + add_proto qw/unsigned int vp9_highbd_masked_sad4x4/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride"; + specialize qw/vp9_highbd_masked_sad4x4/; + } + # ENCODEMB INVOKE add_proto qw/int64_t vp9_highbd_block_error/, "const tran_low_t *coeff, const tran_low_t *dqcoeff, intptr_t block_size, int64_t *ssz, int bd"; diff --git a/vp9/encoder/vp9_encoder.c b/vp9/encoder/vp9_encoder.c index 3589dd903..39fa6d428 100644 --- a/vp9/encoder/vp9_encoder.c +++ b/vp9/encoder/vp9_encoder.c @@ -821,6 +821,56 @@ MAKE_BFP_SAD3_WRAPPER(vp9_highbd_sad4x4x3) MAKE_BFP_SAD8_WRAPPER(vp9_highbd_sad4x4x8) MAKE_BFP_SAD4D_WRAPPER(vp9_highbd_sad4x4x4d) +#if CONFIG_WEDGE_PARTITION +#define HIGHBD_MBFP(BT, MSDF, MVF, MSVF) \ + cpi->fn_ptr[BT].msdf = MSDF; \ + cpi->fn_ptr[BT].mvf = MVF; \ + cpi->fn_ptr[BT].msvf = MSVF; + +#define MAKE_MBFP_SAD_WRAPPER(fnname) \ +static unsigned int fnname##_bits8(const uint8_t *src_ptr, \ + int source_stride, \ + const uint8_t *ref_ptr, \ + int ref_stride, \ + const uint8_t *m, \ + int m_stride) { \ + return fnname(src_ptr, source_stride, ref_ptr, ref_stride, \ + m, m_stride); \ +} \ +static unsigned int fnname##_bits10(const uint8_t *src_ptr, \ + int source_stride, \ + const uint8_t *ref_ptr, \ + int ref_stride, \ + const uint8_t *m, \ + int m_stride) { \ + return fnname(src_ptr, source_stride, ref_ptr, ref_stride, \ + m, m_stride) >> 2; \ +} \ +static unsigned int fnname##_bits12(const uint8_t *src_ptr, \ + int source_stride, \ + const uint8_t *ref_ptr, \ + int ref_stride, \ + const uint8_t *m, \ + int m_stride) { \ + return fnname(src_ptr, source_stride, ref_ptr, ref_stride, \ + m, m_stride) >> 4; \ +} + +MAKE_MBFP_SAD_WRAPPER(vp9_highbd_masked_sad64x64) +MAKE_MBFP_SAD_WRAPPER(vp9_highbd_masked_sad64x32) +MAKE_MBFP_SAD_WRAPPER(vp9_highbd_masked_sad32x64) +MAKE_MBFP_SAD_WRAPPER(vp9_highbd_masked_sad32x32) +MAKE_MBFP_SAD_WRAPPER(vp9_highbd_masked_sad32x16) +MAKE_MBFP_SAD_WRAPPER(vp9_highbd_masked_sad16x32) +MAKE_MBFP_SAD_WRAPPER(vp9_highbd_masked_sad16x16) +MAKE_MBFP_SAD_WRAPPER(vp9_highbd_masked_sad16x8) +MAKE_MBFP_SAD_WRAPPER(vp9_highbd_masked_sad8x16) +MAKE_MBFP_SAD_WRAPPER(vp9_highbd_masked_sad8x8) +MAKE_MBFP_SAD_WRAPPER(vp9_highbd_masked_sad8x4) +MAKE_MBFP_SAD_WRAPPER(vp9_highbd_masked_sad4x8) +MAKE_MBFP_SAD_WRAPPER(vp9_highbd_masked_sad4x4) +#endif // CONFIG_WEDGE_PARTITION + static void highbd_set_var_fns(VP9_COMP *const cpi) { VP9_COMMON *const cm = &cpi->common; if (cm->use_highbitdepth) { @@ -955,6 +1005,61 @@ static void highbd_set_var_fns(VP9_COMP *const cpi) { vp9_highbd_sad4x4x3_bits8, vp9_highbd_sad4x4x8_bits8, vp9_highbd_sad4x4x4d_bits8) + +#if CONFIG_WEDGE_PARTITION + HIGHBD_MBFP(BLOCK_64X64, + vp9_highbd_masked_sad64x64_bits8, + vp9_highbd_masked_variance64x64, + vp9_highbd_masked_sub_pixel_variance64x64) + HIGHBD_MBFP(BLOCK_64X32, + vp9_highbd_masked_sad64x32_bits8, + vp9_highbd_masked_variance64x32, + vp9_highbd_masked_sub_pixel_variance64x32) + HIGHBD_MBFP(BLOCK_32X64, + vp9_highbd_masked_sad32x64_bits8, + vp9_highbd_masked_variance32x64, + vp9_highbd_masked_sub_pixel_variance32x64) + HIGHBD_MBFP(BLOCK_32X32, + vp9_highbd_masked_sad32x32_bits8, + vp9_highbd_masked_variance32x32, + vp9_highbd_masked_sub_pixel_variance32x32) + HIGHBD_MBFP(BLOCK_32X16, + vp9_highbd_masked_sad32x16_bits8, + vp9_highbd_masked_variance32x16, + vp9_highbd_masked_sub_pixel_variance32x16) + HIGHBD_MBFP(BLOCK_16X32, + vp9_highbd_masked_sad16x32_bits8, + vp9_highbd_masked_variance16x32, + vp9_highbd_masked_sub_pixel_variance16x32) + HIGHBD_MBFP(BLOCK_16X16, + vp9_highbd_masked_sad16x16_bits8, + vp9_highbd_masked_variance16x16, + vp9_highbd_masked_sub_pixel_variance16x16) + HIGHBD_MBFP(BLOCK_8X16, + vp9_highbd_masked_sad8x16_bits8, + vp9_highbd_masked_variance8x16, + vp9_highbd_masked_sub_pixel_variance8x16) + HIGHBD_MBFP(BLOCK_16X8, + vp9_highbd_masked_sad16x8_bits8, + vp9_highbd_masked_variance16x8, + vp9_highbd_masked_sub_pixel_variance16x8) + HIGHBD_MBFP(BLOCK_8X8, + vp9_highbd_masked_sad8x8_bits8, + vp9_highbd_masked_variance8x8, + vp9_highbd_masked_sub_pixel_variance8x8) + HIGHBD_MBFP(BLOCK_4X8, + vp9_highbd_masked_sad4x8_bits8, + vp9_highbd_masked_variance4x8, + vp9_highbd_masked_sub_pixel_variance4x8) + HIGHBD_MBFP(BLOCK_8X4, + vp9_highbd_masked_sad8x4_bits8, + vp9_highbd_masked_variance8x4, + vp9_highbd_masked_sub_pixel_variance8x4) + HIGHBD_MBFP(BLOCK_4X4, + vp9_highbd_masked_sad4x4_bits8, + vp9_highbd_masked_variance4x4, + vp9_highbd_masked_sub_pixel_variance4x4) +#endif // CONFIG_WEDGE_PARTITION break; case VPX_BITS_10: @@ -1087,6 +1192,61 @@ static void highbd_set_var_fns(VP9_COMP *const cpi) { vp9_highbd_sad4x4x3_bits10, vp9_highbd_sad4x4x8_bits10, vp9_highbd_sad4x4x4d_bits10) + +#if CONFIG_WEDGE_PARTITION + HIGHBD_MBFP(BLOCK_64X64, + vp9_highbd_masked_sad64x64_bits10, + vp9_highbd_10_masked_variance64x64, + vp9_highbd_10_masked_sub_pixel_variance64x64) + HIGHBD_MBFP(BLOCK_64X32, + vp9_highbd_masked_sad64x32_bits10, + vp9_highbd_10_masked_variance64x32, + vp9_highbd_10_masked_sub_pixel_variance64x32) + HIGHBD_MBFP(BLOCK_32X64, + vp9_highbd_masked_sad32x64_bits10, + vp9_highbd_10_masked_variance32x64, + vp9_highbd_10_masked_sub_pixel_variance32x64) + HIGHBD_MBFP(BLOCK_32X32, + vp9_highbd_masked_sad32x32_bits10, + vp9_highbd_10_masked_variance32x32, + vp9_highbd_10_masked_sub_pixel_variance32x32) + HIGHBD_MBFP(BLOCK_32X16, + vp9_highbd_masked_sad32x16_bits10, + vp9_highbd_10_masked_variance32x16, + vp9_highbd_10_masked_sub_pixel_variance32x16) + HIGHBD_MBFP(BLOCK_16X32, + vp9_highbd_masked_sad16x32_bits10, + vp9_highbd_10_masked_variance16x32, + vp9_highbd_10_masked_sub_pixel_variance16x32) + HIGHBD_MBFP(BLOCK_16X16, + vp9_highbd_masked_sad16x16_bits10, + vp9_highbd_10_masked_variance16x16, + vp9_highbd_10_masked_sub_pixel_variance16x16) + HIGHBD_MBFP(BLOCK_8X16, + vp9_highbd_masked_sad8x16_bits10, + vp9_highbd_10_masked_variance8x16, + vp9_highbd_10_masked_sub_pixel_variance8x16) + HIGHBD_MBFP(BLOCK_16X8, + vp9_highbd_masked_sad16x8_bits10, + vp9_highbd_10_masked_variance16x8, + vp9_highbd_10_masked_sub_pixel_variance16x8) + HIGHBD_MBFP(BLOCK_8X8, + vp9_highbd_masked_sad8x8_bits10, + vp9_highbd_10_masked_variance8x8, + vp9_highbd_10_masked_sub_pixel_variance8x8) + HIGHBD_MBFP(BLOCK_4X8, + vp9_highbd_masked_sad4x8_bits10, + vp9_highbd_10_masked_variance4x8, + vp9_highbd_10_masked_sub_pixel_variance4x8) + HIGHBD_MBFP(BLOCK_8X4, + vp9_highbd_masked_sad8x4_bits10, + vp9_highbd_10_masked_variance8x4, + vp9_highbd_10_masked_sub_pixel_variance8x4) + HIGHBD_MBFP(BLOCK_4X4, + vp9_highbd_masked_sad4x4_bits10, + vp9_highbd_10_masked_variance4x4, + vp9_highbd_10_masked_sub_pixel_variance4x4) +#endif // CONFIG_WEDGE_PARTITION break; case VPX_BITS_12: @@ -1219,6 +1379,61 @@ static void highbd_set_var_fns(VP9_COMP *const cpi) { vp9_highbd_sad4x4x3_bits12, vp9_highbd_sad4x4x8_bits12, vp9_highbd_sad4x4x4d_bits12) + +#if CONFIG_WEDGE_PARTITION + HIGHBD_MBFP(BLOCK_64X64, + vp9_highbd_masked_sad64x64_bits12, + vp9_highbd_12_masked_variance64x64, + vp9_highbd_12_masked_sub_pixel_variance64x64) + HIGHBD_MBFP(BLOCK_64X32, + vp9_highbd_masked_sad64x32_bits12, + vp9_highbd_12_masked_variance64x32, + vp9_highbd_12_masked_sub_pixel_variance64x32) + HIGHBD_MBFP(BLOCK_32X64, + vp9_highbd_masked_sad32x64_bits12, + vp9_highbd_12_masked_variance32x64, + vp9_highbd_12_masked_sub_pixel_variance32x64) + HIGHBD_MBFP(BLOCK_32X32, + vp9_highbd_masked_sad32x32_bits12, + vp9_highbd_12_masked_variance32x32, + vp9_highbd_12_masked_sub_pixel_variance32x32) + HIGHBD_MBFP(BLOCK_32X16, + vp9_highbd_masked_sad32x16_bits12, + vp9_highbd_12_masked_variance32x16, + vp9_highbd_12_masked_sub_pixel_variance32x16) + HIGHBD_MBFP(BLOCK_16X32, + vp9_highbd_masked_sad16x32_bits12, + vp9_highbd_12_masked_variance16x32, + vp9_highbd_12_masked_sub_pixel_variance16x32) + HIGHBD_MBFP(BLOCK_16X16, + vp9_highbd_masked_sad16x16_bits12, + vp9_highbd_12_masked_variance16x16, + vp9_highbd_12_masked_sub_pixel_variance16x16) + HIGHBD_MBFP(BLOCK_8X16, + vp9_highbd_masked_sad8x16_bits12, + vp9_highbd_12_masked_variance8x16, + vp9_highbd_12_masked_sub_pixel_variance8x16) + HIGHBD_MBFP(BLOCK_16X8, + vp9_highbd_masked_sad16x8_bits12, + vp9_highbd_12_masked_variance16x8, + vp9_highbd_12_masked_sub_pixel_variance16x8) + HIGHBD_MBFP(BLOCK_8X8, + vp9_highbd_masked_sad8x8_bits12, + vp9_highbd_12_masked_variance8x8, + vp9_highbd_12_masked_sub_pixel_variance8x8) + HIGHBD_MBFP(BLOCK_4X8, + vp9_highbd_masked_sad4x8_bits12, + vp9_highbd_12_masked_variance4x8, + vp9_highbd_12_masked_sub_pixel_variance4x8) + HIGHBD_MBFP(BLOCK_8X4, + vp9_highbd_masked_sad8x4_bits12, + vp9_highbd_12_masked_variance8x4, + vp9_highbd_12_masked_sub_pixel_variance8x4) + HIGHBD_MBFP(BLOCK_4X4, + vp9_highbd_masked_sad4x4_bits12, + vp9_highbd_12_masked_variance4x4, + vp9_highbd_12_masked_sub_pixel_variance4x4) +#endif // CONFIG_WEDGE_PARTITION break; default: diff --git a/vp9/encoder/vp9_sad.c b/vp9/encoder/vp9_sad.c index e7eda9781..f926a5500 100644 --- a/vp9/encoder/vp9_sad.c +++ b/vp9/encoder/vp9_sad.c @@ -317,4 +317,53 @@ MASKSADMxN(8, 8) MASKSADMxN(8, 4) MASKSADMxN(4, 8) MASKSADMxN(4, 4) + +#if CONFIG_VP9_HIGHBITDEPTH +static INLINE unsigned int highbd_masked_sad(const uint8_t *a8, int a_stride, + const uint8_t *b8, int b_stride, + const uint8_t *m, int m_stride, + int width, int height) { + int y, x; + unsigned int sad = 0; + const uint16_t *a = CONVERT_TO_SHORTPTR(a8); + const uint16_t *b = CONVERT_TO_SHORTPTR(b8); + + for (y = 0; y < height; y++) { + for (x = 0; x < width; x++) + sad += m[x] * abs(a[x] - b[x]); + + a += a_stride; + b += b_stride; + m += m_stride; + } + sad = (sad + 31) >> 6; + + return sad; +} + +#define highbd_MASKSADMxN(m, n) \ +unsigned int vp9_highbd_masked_sad##m##x##n##_c(const uint8_t *src, \ + int src_stride, \ + const uint8_t *ref, \ + int ref_stride, \ + const uint8_t *msk, \ + int msk_stride) { \ + return highbd_masked_sad(src, src_stride, ref, ref_stride, \ + msk, msk_stride, m, n); \ +} + +highbd_MASKSADMxN(64, 64) +highbd_MASKSADMxN(64, 32) +highbd_MASKSADMxN(32, 64) +highbd_MASKSADMxN(32, 32) +highbd_MASKSADMxN(32, 16) +highbd_MASKSADMxN(16, 32) +highbd_MASKSADMxN(16, 16) +highbd_MASKSADMxN(16, 8) +highbd_MASKSADMxN(8, 16) +highbd_MASKSADMxN(8, 8) +highbd_MASKSADMxN(8, 4) +highbd_MASKSADMxN(4, 8) +highbd_MASKSADMxN(4, 4) +#endif // CONFIG_VP9_HIGHBITDEPTH #endif // CONFIG_WEDGE_PARTITION diff --git a/vp9/encoder/vp9_variance.c b/vp9/encoder/vp9_variance.c index 7225e92c8..74c77287f 100644 --- a/vp9/encoder/vp9_variance.c +++ b/vp9/encoder/vp9_variance.c @@ -743,4 +743,213 @@ MASK_SUBPIX_VAR(64, 32) MASK_VAR(64, 64) MASK_SUBPIX_VAR(64, 64) + +#if CONFIG_VP9_HIGHBITDEPTH +void highbd_masked_variance64(const uint8_t *a8, int a_stride, + const uint8_t *b8, int b_stride, + const uint8_t *m, int m_stride, + int w, int h, + uint64_t *sse, int64_t *sum) { + int i, j; + uint16_t *a = CONVERT_TO_SHORTPTR(a8); + uint16_t *b = CONVERT_TO_SHORTPTR(b8); + + *sum = 0; + *sse = 0; + + for (i = 0; i < h; i++) { + for (j = 0; j < w; j++) { + const int diff = (a[j] - b[j]) * (m[j]); + *sum += diff; + *sse += diff * diff; + } + + a += a_stride; + b += b_stride; + m += m_stride; + } + *sum = (*sum >= 0) ? ((*sum + 31) >> 6) : -((-*sum + 31) >> 6); + *sse = (*sse + 2047) >> 12; +} + +void highbd_masked_variance(const uint8_t *a8, int a_stride, + const uint8_t *b8, int b_stride, + const uint8_t *m, int m_stride, + int w, int h, + unsigned int *sse, int *sum) { + uint64_t sse_long = 0; + int64_t sum_long = 0; + highbd_masked_variance64(a8, a_stride, b8, b_stride, m, m_stride, + w, h, &sse_long, &sum_long); + *sse = sse_long; + *sum = sum_long; +} + +void highbd_10_masked_variance(const uint8_t *a8, int a_stride, + const uint8_t *b8, int b_stride, + const uint8_t *m, int m_stride, + int w, int h, + unsigned int *sse, int *sum) { + uint64_t sse_long = 0; + int64_t sum_long = 0; + highbd_masked_variance64(a8, a_stride, b8, b_stride, m, m_stride, + w, h, &sse_long, &sum_long); + *sum = ROUND_POWER_OF_TWO(sum_long, 2); + *sse = ROUND_POWER_OF_TWO(sse_long, 4); +} + +void highbd_12_masked_variance(const uint8_t *a8, int a_stride, + const uint8_t *b8, int b_stride, + const uint8_t *m, int m_stride, + int w, int h, + unsigned int *sse, int *sum) { + uint64_t sse_long = 0; + int64_t sum_long = 0; + highbd_masked_variance64(a8, a_stride, b8, b_stride, m, m_stride, + w, h, &sse_long, &sum_long); + *sum = ROUND_POWER_OF_TWO(sum_long, 4); + *sse = ROUND_POWER_OF_TWO(sse_long, 8); +} + +#define HIGHBD_MASK_VAR(W, H) \ +unsigned int vp9_highbd_masked_variance##W##x##H##_c(const uint8_t *a, \ + int a_stride, \ + const uint8_t *b, \ + int b_stride, \ + const uint8_t *m, \ + int m_stride, \ + unsigned int *sse) { \ + int sum; \ + highbd_masked_variance(a, a_stride, b, b_stride, m, m_stride, \ + W, H, sse, &sum); \ + return *sse - (((int64_t)sum * sum) / (W * H)); \ +} \ +\ +unsigned int vp9_highbd_10_masked_variance##W##x##H##_c(const uint8_t *a, \ + int a_stride, \ + const uint8_t *b, \ + int b_stride, \ + const uint8_t *m, \ + int m_stride, \ + unsigned int *sse) { \ + int sum; \ + highbd_10_masked_variance(a, a_stride, b, b_stride, m, m_stride, \ + W, H, sse, &sum); \ + return *sse - (((int64_t)sum * sum) / (W * H)); \ +} \ +\ +unsigned int vp9_highbd_12_masked_variance##W##x##H##_c(const uint8_t *a, \ + int a_stride, \ + const uint8_t *b, \ + int b_stride, \ + const uint8_t *m, \ + int m_stride, \ + unsigned int *sse) { \ + int sum; \ + highbd_12_masked_variance(a, a_stride, b, b_stride, m, m_stride, \ + W, H, sse, &sum); \ + return *sse - (((int64_t)sum * sum) / (W * H)); \ +} + +#define HIGHBD_MASK_SUBPIX_VAR(W, H) \ +unsigned int vp9_highbd_masked_sub_pixel_variance##W##x##H##_c( \ + const uint8_t *src, int src_stride, \ + int xoffset, int yoffset, \ + const uint8_t *dst, int dst_stride, \ + const uint8_t *msk, int msk_stride, \ + unsigned int *sse) { \ + uint16_t fdata3[(H + 1) * W]; \ + uint16_t temp2[H * W]; \ +\ + highbd_var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, \ + H + 1, W, \ + BILINEAR_FILTERS_2TAP(xoffset)); \ + highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \ + BILINEAR_FILTERS_2TAP(yoffset)); \ +\ + return vp9_highbd_masked_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp2), \ + W, dst, dst_stride, \ + msk, msk_stride, sse); \ +} \ +\ +unsigned int vp9_highbd_10_masked_sub_pixel_variance##W##x##H##_c( \ + const uint8_t *src, int src_stride, \ + int xoffset, int yoffset, \ + const uint8_t *dst, int dst_stride, \ + const uint8_t *msk, int msk_stride, \ + unsigned int *sse) { \ + uint16_t fdata3[(H + 1) * W]; \ + uint16_t temp2[H * W]; \ +\ + highbd_var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, \ + H + 1, W, \ + BILINEAR_FILTERS_2TAP(xoffset)); \ + highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \ + BILINEAR_FILTERS_2TAP(yoffset)); \ +\ + return vp9_highbd_10_masked_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp2), \ + W, dst, dst_stride, \ + msk, msk_stride, sse); \ +} \ +\ +unsigned int vp9_highbd_12_masked_sub_pixel_variance##W##x##H##_c( \ + const uint8_t *src, int src_stride, \ + int xoffset, int yoffset, \ + const uint8_t *dst, int dst_stride, \ + const uint8_t *msk, int msk_stride, \ + unsigned int *sse) { \ + uint16_t fdata3[(H + 1) * W]; \ + uint16_t temp2[H * W]; \ +\ + highbd_var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, \ + H + 1, W, \ + BILINEAR_FILTERS_2TAP(xoffset)); \ + highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \ + BILINEAR_FILTERS_2TAP(yoffset)); \ +\ + return vp9_highbd_12_masked_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp2), \ + W, dst, dst_stride, \ + msk, msk_stride, sse); \ +} + +HIGHBD_MASK_VAR(4, 4) +HIGHBD_MASK_SUBPIX_VAR(4, 4) + +HIGHBD_MASK_VAR(4, 8) +HIGHBD_MASK_SUBPIX_VAR(4, 8) + +HIGHBD_MASK_VAR(8, 4) +HIGHBD_MASK_SUBPIX_VAR(8, 4) + +HIGHBD_MASK_VAR(8, 8) +HIGHBD_MASK_SUBPIX_VAR(8, 8) + +HIGHBD_MASK_VAR(8, 16) +HIGHBD_MASK_SUBPIX_VAR(8, 16) + +HIGHBD_MASK_VAR(16, 8) +HIGHBD_MASK_SUBPIX_VAR(16, 8) + +HIGHBD_MASK_VAR(16, 16) +HIGHBD_MASK_SUBPIX_VAR(16, 16) + +HIGHBD_MASK_VAR(16, 32) +HIGHBD_MASK_SUBPIX_VAR(16, 32) + +HIGHBD_MASK_VAR(32, 16) +HIGHBD_MASK_SUBPIX_VAR(32, 16) + +HIGHBD_MASK_VAR(32, 32) +HIGHBD_MASK_SUBPIX_VAR(32, 32) + +HIGHBD_MASK_VAR(32, 64) +HIGHBD_MASK_SUBPIX_VAR(32, 64) + +HIGHBD_MASK_VAR(64, 32) +HIGHBD_MASK_SUBPIX_VAR(64, 32) + +HIGHBD_MASK_VAR(64, 64) +HIGHBD_MASK_SUBPIX_VAR(64, 64) + +#endif // CONFIG_VP9_HIGHBITDEPTH #endif // CONFIG_WEDGE_PARTITION -- 2.49.0