From c79ae1713cc18f0a7952bffa3c244ab1d07aa249 Mon Sep 17 00:00:00 2001 From: "Ronald S. Bultje" Date: Mon, 12 Nov 2012 16:18:35 -0800 Subject: [PATCH] Use full 32-pixel edge for superblock bestrefmv motion vector ordering. Change-Id: I417e39867c020a17d85370972446a8ce2bbe9a6d --- vp9/common/findnearmv.c | 176 ++++++++++++++++++++++++++++++++++------ vp9/common/rtcd_defs.sh | 10 +++ 2 files changed, 162 insertions(+), 24 deletions(-) diff --git a/vp9/common/findnearmv.c b/vp9/common/findnearmv.c index 8cc0474a7..a2bb0f207 100644 --- a/vp9/common/findnearmv.c +++ b/vp9/common/findnearmv.c @@ -42,23 +42,39 @@ vp9_prob *vp9_mv_ref_probs(VP9_COMMON *pc, } #define SP(x) (((x) & 7) << 1) -unsigned int vp9_sad3x16_c( - const unsigned char *src_ptr, - int src_stride, - const unsigned char *ref_ptr, - int ref_stride, - int max_sad) { +unsigned int vp9_sad3x16_c(const unsigned char *src_ptr, + int src_stride, + const unsigned char *ref_ptr, + int ref_stride, + int max_sad) { return sad_mx_n_c(src_ptr, src_stride, ref_ptr, ref_stride, 3, 16); } -unsigned int vp9_sad16x3_c( - const unsigned char *src_ptr, - int src_stride, - const unsigned char *ref_ptr, - int ref_stride, - int max_sad) { +unsigned int vp9_sad16x3_c(const unsigned char *src_ptr, + int src_stride, + const unsigned char *ref_ptr, + int ref_stride, + int max_sad) { return sad_mx_n_c(src_ptr, src_stride, ref_ptr, ref_stride, 16, 3); } +#if CONFIG_SUPERBLOCKS +unsigned int vp9_sad3x32_c(const unsigned char *src_ptr, + int src_stride, + const unsigned char *ref_ptr, + int ref_stride, + int max_sad) { + return sad_mx_n_c(src_ptr, src_stride, ref_ptr, ref_stride, 3, 32); +} + +unsigned int vp9_sad32x3_c(const unsigned char *src_ptr, + int src_stride, + const unsigned char *ref_ptr, + int ref_stride, + int max_sad) { + return sad_mx_n_c(src_ptr, src_stride, ref_ptr, ref_stride, 32, 3); +} +#endif + #if CONFIG_SUBPELREFMV unsigned int vp9_variance2x16_c(const unsigned char *src_ptr, const int source_stride, @@ -93,7 +109,7 @@ unsigned int vp9_sub_pixel_variance16x2_c(const unsigned char *src_ptr, const unsigned char *dst_ptr, const int dst_pixels_per_line, unsigned int *sse) { - unsigned short FData3[16 * 3]; // Temp data bufffer used in filtering + unsigned short FData3[16 * 3]; // Temp data buffer used in filtering unsigned char temp2[20 * 16]; const short *HFilter, *VFilter; @@ -114,7 +130,7 @@ unsigned int vp9_sub_pixel_variance2x16_c(const unsigned char *src_ptr, const unsigned char *dst_ptr, const int dst_pixels_per_line, unsigned int *sse) { - unsigned short FData3[2 * 17]; // Temp data bufffer used in filtering + unsigned short FData3[2 * 17]; // Temp data buffer used in filtering unsigned char temp2[2 * 16]; const short *HFilter, *VFilter; @@ -127,6 +143,76 @@ unsigned int vp9_sub_pixel_variance2x16_c(const unsigned char *src_ptr, return vp9_variance2x16_c(temp2, 2, dst_ptr, dst_pixels_per_line, sse); } + +#if CONFIG_SUPERBLOCKS +unsigned int vp9_variance2x32_c(const unsigned char *src_ptr, + const int source_stride, + const unsigned char *ref_ptr, + const int recon_stride, + unsigned int *sse) { + unsigned int var; + int avg; + + variance(src_ptr, source_stride, ref_ptr, recon_stride, 2, 32, &var, &avg); + *sse = var; + return (var - (((unsigned int)avg * avg) >> 5)); +} + +unsigned int vp9_variance32x2_c(const unsigned char *src_ptr, + const int source_stride, + const unsigned char *ref_ptr, + const int recon_stride, + unsigned int *sse) { + unsigned int var; + int avg; + + variance(src_ptr, source_stride, ref_ptr, recon_stride, 32, 2, &var, &avg); + *sse = var; + return (var - (((unsigned int)avg * avg) >> 5)); +} + +unsigned int vp9_sub_pixel_variance32x2_c(const unsigned char *src_ptr, + const int src_pixels_per_line, + const int xoffset, + const int yoffset, + const unsigned char *dst_ptr, + const int dst_pixels_per_line, + unsigned int *sse) { + unsigned short FData3[32 * 3]; // Temp data buffer used in filtering + unsigned char temp2[20 * 32]; + const short *HFilter, *VFilter; + + HFilter = vp9_bilinear_filters[xoffset]; + VFilter = vp9_bilinear_filters[yoffset]; + + var_filter_block2d_bil_first_pass(src_ptr, FData3, + src_pixels_per_line, 1, 3, 32, HFilter); + var_filter_block2d_bil_second_pass(FData3, temp2, 32, 32, 2, 32, VFilter); + + return vp9_variance16x2_c(temp2, 32, dst_ptr, dst_pixels_per_line, sse); +} + +unsigned int vp9_sub_pixel_variance2x16_c(const unsigned char *src_ptr, + const int src_pixels_per_line, + const int xoffset, + const int yoffset, + const unsigned char *dst_ptr, + const int dst_pixels_per_line, + unsigned int *sse) { + unsigned short FData3[2 * 33]; // Temp data buffer used in filtering + unsigned char temp2[2 * 32]; + const short *HFilter, *VFilter; + + HFilter = vp9_bilinear_filters[xoffset]; + VFilter = vp9_bilinear_filters[yoffset]; + + var_filter_block2d_bil_first_pass(src_ptr, FData3, + src_pixels_per_line, 1, 33, 2, HFilter); + var_filter_block2d_bil_second_pass(FData3, temp2, 2, 2, 32, 2, VFilter); + + return vp9_variance2x16_c(temp2, 2, dst_ptr, dst_pixels_per_line, sse); +} +#endif #endif /* check a list of motion vectors by sad score using a number rows of pixels @@ -194,15 +280,39 @@ void vp9_find_best_ref_mvs(MACROBLOCKD *xd, offset = ref_y_stride * row_offset + col_offset; score = 0; if (xd->up_available) { - vp9_sub_pixel_variance16x2_c(above_ref + offset, ref_y_stride, - SP(this_mv.as_mv.col), SP(this_mv.as_mv.row), - above_src, xd->dst.y_stride, &sse); +#if CONFIG_SUPERBLOCKS + if (xd->mode_info_context->mbmi.encoded_as_sb) { + vp9_sub_pixel_variance32x2_c(above_ref + offset, ref_y_stride, + SP(this_mv.as_mv.col), + SP(this_mv.as_mv.row), + above_src, xd->dst.y_stride, &sse); + } else { +#endif + vp9_sub_pixel_variance16x2_c(above_ref + offset, ref_y_stride, + SP(this_mv.as_mv.col), + SP(this_mv.as_mv.row), + above_src, xd->dst.y_stride, &sse); +#if CONFIG_SUPERBLOCKS + } +#endif score += sse; } if (xd->left_available) { - vp9_sub_pixel_variance2x16_c(left_ref + offset, ref_y_stride, - SP(this_mv.as_mv.col), SP(this_mv.as_mv.row), - left_src, xd->dst.y_stride, &sse); +#if CONFIG_SUPERBLOCKS + if (xd->mode_info_context->mbmi.encoded_as_sb) { + vp9_sub_pixel_variance2x32_c(left_ref + offset, ref_y_stride, + SP(this_mv.as_mv.col), + SP(this_mv.as_mv.row), + left_src, xd->dst.y_stride, &sse); + } else { +#endif + vp9_sub_pixel_variance2x16_c(left_ref + offset, ref_y_stride, + SP(this_mv.as_mv.col), + SP(this_mv.as_mv.row), + left_src, xd->dst.y_stride, &sse); +#if CONFIG_SUPERBLOCKS + } +#endif score += sse; } #else @@ -213,12 +323,30 @@ void vp9_find_best_ref_mvs(MACROBLOCKD *xd, offset = ref_y_stride * row_offset + col_offset; score = 0; if (xd->up_available) { - score += vp9_sad16x3(above_src, xd->dst.y_stride, - above_ref + offset, ref_y_stride, INT_MAX); +#if CONFIG_SUPERBLOCKS + if (xd->mode_info_context->mbmi.encoded_as_sb) { + score += vp9_sad32x3(above_src, xd->dst.y_stride, + above_ref + offset, ref_y_stride, INT_MAX); + } else { +#endif + score += vp9_sad16x3(above_src, xd->dst.y_stride, + above_ref + offset, ref_y_stride, INT_MAX); +#if CONFIG_SUPERBLOCKS + } +#endif } if (xd->left_available) { - score += vp9_sad3x16(left_src, xd->dst.y_stride, - left_ref + offset, ref_y_stride, INT_MAX); +#if CONFIG_SUPERBLOCKS + if (xd->mode_info_context->mbmi.encoded_as_sb) { + score += vp9_sad3x32(left_src, xd->dst.y_stride, + left_ref + offset, ref_y_stride, INT_MAX); + } else { +#endif + score += vp9_sad3x16(left_src, xd->dst.y_stride, + left_ref + offset, ref_y_stride, INT_MAX); +#if CONFIG_SUPERBLOCKS + } +#endif } #endif // Add the entry to our list and then resort the list on score. diff --git a/vp9/common/rtcd_defs.sh b/vp9/common/rtcd_defs.sh index 8b3ee0c71..14e9d7a75 100644 --- a/vp9/common/rtcd_defs.sh +++ b/vp9/common/rtcd_defs.sh @@ -233,6 +233,16 @@ specialize vp9_sad16x3 sse2 prototype unsigned int vp9_sad3x16 "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, int max_sad" specialize vp9_sad3x16 sse2 +if [ "$CONFIG_SUPERBLOCKS" = "yes" ]; then + +prototype unsigned int vp9_sad32x3 "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, int max_sad" +specialize vp9_sad32x3 + +prototype unsigned int vp9_sad3x32 "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, int max_sad" +specialize vp9_sad3x32 + +fi + # # Encoder functions below this point. # -- 2.40.0