From: Yunqing Wang Date: Fri, 14 Feb 2014 23:55:49 +0000 (-0800) Subject: Use fast HEX search in real time mode X-Git-Tag: v1.4.0~2312^2 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=360c80cfb95559b3a49fc4624a24dee01e1abb6a;p=libvpx Use fast HEX search in real time mode In good quality mode motion search, the best matches are normally found after searching in a large area. In real time mode, to make encoding fast, a center-biased fast HEX search is used, which converges quickly most of the time. A 4-point diamond search is also carried out as the following refining search, which gives more precise results, and maintains good motion search quality. At speed 5, the borg test on rtc set showed an overall PSNR loss of 0.936%. The encoding speed gain is 4% - 5%. Change-Id: I42cd68bb56a09ca1b86293c99d5f7312225ca7ae --- diff --git a/vp9/encoder/vp9_mcomp.c b/vp9/encoder/vp9_mcomp.c index 94e49bd4a..10dee52ed 100644 --- a/vp9/encoder/vp9_mcomp.c +++ b/vp9/encoder/vp9_mcomp.c @@ -855,6 +855,184 @@ int vp9_square_search(const MACROBLOCK *x, square_num_candidates, square_candidates); }; +// Number of candidates in first hex search +#define FIRST_HEX_CANDIDATES 6 +// Index of previous hex search's best match +#define PRE_BEST_CANDIDATE 6 +// Number of candidates in following hex search +#define NEXT_HEX_CANDIDATES 3 +// Number of candidates in refining search +#define REFINE_CANDIDATES 4 + +int vp9_fast_hex_search(const MACROBLOCK *x, + MV *ref_mv, + int search_param, + int sad_per_bit, + const vp9_variance_fn_ptr_t *vfp, + int use_mvcost, + const MV *center_mv, + MV *best_mv) { + const MACROBLOCKD* const xd = &x->e_mbd; + static const MV hex[FIRST_HEX_CANDIDATES] = { + { -1, -2}, {1, -2}, {2, 0}, {1, 2}, { -1, 2}, { -2, 0} + }; + static const MV next_chkpts[PRE_BEST_CANDIDATE][NEXT_HEX_CANDIDATES] = { + {{ -2, 0}, { -1, -2}, {1, -2}}, + {{ -1, -2}, {1, -2}, {2, 0}}, + {{1, -2}, {2, 0}, {1, 2}}, + {{2, 0}, {1, 2}, { -1, 2}}, + {{1, 2}, { -1, 2}, { -2, 0}}, + {{ -1, 2}, { -2, 0}, { -1, -2}} + }; + static const MV neighbors[REFINE_CANDIDATES] = { + {0, -1}, { -1, 0}, {1, 0}, {0, 1} + }; + int i, j; + + const uint8_t *what = x->plane[0].src.buf; + const int what_stride = x->plane[0].src.stride; + const int in_what_stride = xd->plane[0].pre[0].stride; + int br, bc; + MV this_mv; + unsigned int bestsad = 0x7fffffff; + unsigned int thissad; + const uint8_t *base_offset; + const uint8_t *this_offset; + int k = -1; + int best_site = -1; + const int max_hex_search = 512; + const int max_dia_search = 32; + + const int *mvjsadcost = x->nmvjointsadcost; + int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]}; + + const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3}; + + // Adjust ref_mv to make sure it is within MV range + clamp_mv(ref_mv, x->mv_col_min, x->mv_col_max, x->mv_row_min, x->mv_row_max); + br = ref_mv->row; + bc = ref_mv->col; + + // Check the start point + base_offset = xd->plane[0].pre[0].buf; + this_offset = base_offset + (br * in_what_stride) + bc; + this_mv.row = br; + this_mv.col = bc; + bestsad = vfp->sdf(what, what_stride, this_offset, in_what_stride, 0x7fffffff) + + mvsad_err_cost(&this_mv, &fcenter_mv, mvjsadcost, mvsadcost, + sad_per_bit); + + // Initial 6-point hex search + if (check_bounds(x, br, bc, 2)) { + for (i = 0; i < FIRST_HEX_CANDIDATES; i++) { + this_mv.row = br + hex[i].row; + this_mv.col = bc + hex[i].col; + this_offset = base_offset + (this_mv.row * in_what_stride) + this_mv.col; + thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride, + bestsad); + CHECK_BETTER + } + } else { + for (i = 0; i < FIRST_HEX_CANDIDATES; i++) { + this_mv.row = br + hex[i].row; + this_mv.col = bc + hex[i].col; + if (!is_mv_in(x, &this_mv)) + continue; + this_offset = base_offset + (this_mv.row * in_what_stride) + this_mv.col; + thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride, + bestsad); + CHECK_BETTER + } + } + + // Continue hex search if we find a better match in first round + if (best_site != -1) { + br += hex[best_site].row; + bc += hex[best_site].col; + k = best_site; + + // Allow search covering maximum MV range + for (j = 1; j < max_hex_search; j++) { + best_site = -1; + + if (check_bounds(x, br, bc, 2)) { + for (i = 0; i < 3; i++) { + this_mv.row = br + next_chkpts[k][i].row; + this_mv.col = bc + next_chkpts[k][i].col; + this_offset = base_offset + (this_mv.row * in_what_stride) + + this_mv.col; + thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride, + bestsad); + CHECK_BETTER + } + } else { + for (i = 0; i < 3; i++) { + this_mv.row = br + next_chkpts[k][i].row; + this_mv.col = bc + next_chkpts[k][i].col; + if (!is_mv_in(x, &this_mv)) + continue; + this_offset = base_offset + (this_mv.row * in_what_stride) + + this_mv.col; + thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride, + bestsad); + CHECK_BETTER + } + } + + if (best_site == -1) { + break; + } else { + br += next_chkpts[k][best_site].row; + bc += next_chkpts[k][best_site].col; + k += 5 + best_site; + if (k >= 12) k -= 12; + else if (k >= 6) k -= 6; + } + } + } + + // Check 4 1-away neighbors + for (j = 0; j < max_dia_search; j++) { + best_site = -1; + + if (check_bounds(x, br, bc, 1)) { + for (i = 0; i < REFINE_CANDIDATES; i++) { + this_mv.row = br + neighbors[i].row; + this_mv.col = bc + neighbors[i].col; + this_offset = base_offset + (this_mv.row * in_what_stride) + + this_mv.col; + thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride, + bestsad); + CHECK_BETTER + } + } else { + for (i = 0; i < REFINE_CANDIDATES; i++) { + this_mv.row = br + neighbors[i].row; + this_mv.col = bc + neighbors[i].col; + if (!is_mv_in(x, &this_mv)) + continue; + this_offset = base_offset + (this_mv.row * in_what_stride) + + this_mv.col; + thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride, + bestsad); + CHECK_BETTER + } + } + + if (best_site == -1) { + break; + } else { + br += neighbors[best_site].row; + bc += neighbors[best_site].col; + } + } + + best_mv->row = br; + best_mv->col = bc; + + return bestsad; +} + #undef CHECK_BETTER int vp9_full_range_search_c(const MACROBLOCK *x, MV *ref_mv, MV *best_mv, diff --git a/vp9/encoder/vp9_mcomp.h b/vp9/encoder/vp9_mcomp.h index 4414f3d49..ff4b1df75 100644 --- a/vp9/encoder/vp9_mcomp.h +++ b/vp9/encoder/vp9_mcomp.h @@ -75,6 +75,14 @@ int vp9_square_search(const MACROBLOCK *x, int use_mvcost, const MV *center_mv, MV *best_mv); +int vp9_fast_hex_search(const MACROBLOCK *x, + MV *ref_mv, + int search_param, + int sad_per_bit, + const vp9_variance_fn_ptr_t *vfp, + int use_mvcost, + const MV *center_mv, + MV *best_mv); typedef int (fractional_mv_step_fp) ( const MACROBLOCK *x, diff --git a/vp9/encoder/vp9_onyx_if.c b/vp9/encoder/vp9_onyx_if.c index 84a0e6069..b83c1f98c 100644 --- a/vp9/encoder/vp9_onyx_if.c +++ b/vp9/encoder/vp9_onyx_if.c @@ -861,6 +861,8 @@ static void set_rt_speed_feature(VP9_COMMON *cm, } sf->frame_parameter_update = 0; sf->encode_breakout_thresh = 1000; + + sf->search_method = FAST_HEX; } if (speed >= 6) { sf->always_this_block_size = BLOCK_16X16; diff --git a/vp9/encoder/vp9_onyx_int.h b/vp9/encoder/vp9_onyx_int.h index 4831b6f72..1d15f0904 100644 --- a/vp9/encoder/vp9_onyx_int.h +++ b/vp9/encoder/vp9_onyx_int.h @@ -136,7 +136,8 @@ typedef enum { NSTEP = 1, HEX = 2, BIGDIA = 3, - SQUARE = 4 + SQUARE = 4, + FAST_HEX = 5 } SEARCH_METHODS; typedef enum { diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c index d617f783d..11b2ae874 100644 --- a/vp9/encoder/vp9_rdopt.c +++ b/vp9/encoder/vp9_rdopt.c @@ -2437,7 +2437,11 @@ static void single_motion_search(VP9_COMP *cpi, MACROBLOCK *x, // Further step/diamond searches as necessary further_steps = (cpi->sf.max_step_search_steps - 1) - step_param; - if (cpi->sf.search_method == HEX) { + if (cpi->sf.search_method == FAST_HEX) { + bestsme = vp9_fast_hex_search(x, &mvp_full, step_param, sadpb, + &cpi->fn_ptr[bsize], 1, + &ref_mv.as_mv, &tmp_mv->as_mv); + } else if (cpi->sf.search_method == HEX) { bestsme = vp9_hex_search(x, &mvp_full, step_param, sadpb, 1, &cpi->fn_ptr[bsize], 1, &ref_mv.as_mv, &tmp_mv->as_mv);