From 71b43b0ff0aad2ab222a56e7fc0564ea00892de4 Mon Sep 17 00:00:00 2001 From: Deb Mukherjee Date: Tue, 6 Aug 2013 15:53:35 -0700 Subject: [PATCH] Clean ups of the subpel search functions Removes some unused code and speed features, and organizes the interfaces for fractional mv step functions for use in new speed features to come. In the process a new speed feature - number of iterations per step during the subpel search - is exposed. No change when this parameter is set as the original value of 3. Results: subpel_iters_per_step = 3: baseline subpel_iters_per_step = 2: psnr -0.067%, 1% speedup subpel_iters_per_step = 1: psnr -0.331%, 3-4% speedup Change-Id: I2eba8a21f6461be8caf56af04a5337257a5693a8 --- vp9/encoder/vp9_mbgraph.c | 2 +- vp9/encoder/vp9_mcomp.c | 795 ++++-------------------------- vp9/encoder/vp9_mcomp.h | 38 +- vp9/encoder/vp9_onyx_if.c | 19 +- vp9/encoder/vp9_onyx_int.h | 10 +- vp9/encoder/vp9_rdopt.c | 18 +- vp9/encoder/vp9_temporal_filter.c | 1 + 7 files changed, 149 insertions(+), 734 deletions(-) diff --git a/vp9/encoder/vp9_mbgraph.c b/vp9/encoder/vp9_mbgraph.c index 67b809f8d..314e195d2 100644 --- a/vp9/encoder/vp9_mbgraph.c +++ b/vp9/encoder/vp9_mbgraph.c @@ -59,7 +59,7 @@ static unsigned int do_16x16_motion_iteration(VP9_COMP *cpi, x, dst_mv, ref_mv, x->errorperbit, &v_fn_ptr, - NULL, NULL, + 0, cpi->sf.subpel_iters_per_step, NULL, NULL, & distortion, &sse); } diff --git a/vp9/encoder/vp9_mcomp.c b/vp9/encoder/vp9_mcomp.c index 77d6554b0..014f54a49 100644 --- a/vp9/encoder/vp9_mcomp.c +++ b/vp9/encoder/vp9_mcomp.c @@ -245,13 +245,15 @@ void vp9_init3smotion_compensation(MACROBLOCK *x, int stride) { }, \ v = INT_MAX;) -int vp9_find_best_sub_pixel_step_iteratively(MACROBLOCK *x, - int_mv *bestmv, int_mv *ref_mv, - int error_per_bit, - const vp9_variance_fn_ptr_t *vfp, - int *mvjcost, int *mvcost[2], - int *distortion, - unsigned int *sse1) { +int vp9_find_best_sub_pixel_iterative(MACROBLOCK *x, + int_mv *bestmv, int_mv *ref_mv, + int error_per_bit, + const vp9_variance_fn_ptr_t *vfp, + int forced_stop, + int iters_per_step, + int *mvjcost, int *mvcost[2], + int *distortion, + unsigned int *sse1) { uint8_t *z = x->plane[0].src.buf; int src_stride = x->plane[0].src.stride; MACROBLOCKD *xd = &x->e_mbd; @@ -262,9 +264,9 @@ int vp9_find_best_sub_pixel_step_iteratively(MACROBLOCK *x, unsigned int left, right, up, down, diag; unsigned int sse; unsigned int whichdir; - unsigned int halfiters = 4; - unsigned int quarteriters = 4; - unsigned int eighthiters = 4; + unsigned int halfiters = iters_per_step; + unsigned int quarteriters = iters_per_step; + unsigned int eighthiters = iters_per_step; int thismse; int maxc, minc, maxr, minr; int y_stride; @@ -289,7 +291,6 @@ int vp9_find_best_sub_pixel_step_iteratively(MACROBLOCK *x, tr = br; tc = bc; - offset = (bestmv->as_mv.row) * y_stride + bestmv->as_mv.col; // central mv @@ -303,7 +304,7 @@ int vp9_find_best_sub_pixel_step_iteratively(MACROBLOCK *x, // TODO: Each subsequent iteration checks at least one point in // common with the last iteration could be 2 ( if diag selected) - while (--halfiters) { + while (halfiters--) { // 1/2 pel CHECK_BETTER(left, tr, tc - hstep); CHECK_BETTER(right, tr, tc + hstep); @@ -337,41 +338,46 @@ int vp9_find_best_sub_pixel_step_iteratively(MACROBLOCK *x, // TODO: Each subsequent iteration checks at least one point in common with // the last iteration could be 2 ( if diag selected) 1/4 pel - hstep >>= 1; - while (--quarteriters) { - CHECK_BETTER(left, tr, tc - hstep); - CHECK_BETTER(right, tr, tc + hstep); - CHECK_BETTER(up, tr - hstep, tc); - CHECK_BETTER(down, tr + hstep, tc); - whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2); + // Note forced_stop: 0 - full, 1 - qtr only, 2 - half only + if (forced_stop != 2) { + hstep >>= 1; + while (quarteriters--) { + CHECK_BETTER(left, tr, tc - hstep); + CHECK_BETTER(right, tr, tc + hstep); + CHECK_BETTER(up, tr - hstep, tc); + CHECK_BETTER(down, tr + hstep, tc); - switch (whichdir) { - case 0: - CHECK_BETTER(diag, tr - hstep, tc - hstep); - break; - case 1: - CHECK_BETTER(diag, tr - hstep, tc + hstep); - break; - case 2: - CHECK_BETTER(diag, tr + hstep, tc - hstep); - break; - case 3: - CHECK_BETTER(diag, tr + hstep, tc + hstep); - break; - } + whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2); - // no reason to check the same one again. - if (tr == br && tc == bc) - break; + switch (whichdir) { + case 0: + CHECK_BETTER(diag, tr - hstep, tc - hstep); + break; + case 1: + CHECK_BETTER(diag, tr - hstep, tc + hstep); + break; + case 2: + CHECK_BETTER(diag, tr + hstep, tc - hstep); + break; + case 3: + CHECK_BETTER(diag, tr + hstep, tc + hstep); + break; + } - tr = br; - tc = bc; + // no reason to check the same one again. + if (tr == br && tc == bc) + break; + + tr = br; + tc = bc; + } } - if (xd->allow_high_precision_mv && vp9_use_mv_hp(&ref_mv->as_mv)) { + if (xd->allow_high_precision_mv && vp9_use_mv_hp(&ref_mv->as_mv) && + forced_stop == 0) { hstep >>= 1; - while (--eighthiters) { + while (eighthiters--) { CHECK_BETTER(left, tr, tc - hstep); CHECK_BETTER(right, tr, tc + hstep); CHECK_BETTER(up, tr - hstep, tc); @@ -402,6 +408,7 @@ int vp9_find_best_sub_pixel_step_iteratively(MACROBLOCK *x, tc = bc; } } + bestmv->as_mv.row = br; bestmv->as_mv.col = bc; @@ -418,14 +425,17 @@ int vp9_find_best_sub_pixel_step_iteratively(MACROBLOCK *x, vfp->svaf(PRE(r, c), y_stride, SP(c), SP(r), \ z, src_stride, &sse, second_pred) -int vp9_find_best_sub_pixel_comp(MACROBLOCK *x, - int_mv *bestmv, int_mv *ref_mv, - int error_per_bit, - const vp9_variance_fn_ptr_t *vfp, - int *mvjcost, int *mvcost[2], - int *distortion, - unsigned int *sse1, - const uint8_t *second_pred, int w, int h) { +int vp9_find_best_sub_pixel_comp_iterative(MACROBLOCK *x, + int_mv *bestmv, int_mv *ref_mv, + int error_per_bit, + const vp9_variance_fn_ptr_t *vfp, + int forced_stop, + int iters_per_step, + int *mvjcost, int *mvcost[2], + int *distortion, + unsigned int *sse1, + const uint8_t *second_pred, + int w, int h) { uint8_t *z = x->plane[0].src.buf; int src_stride = x->plane[0].src.stride; MACROBLOCKD *xd = &x->e_mbd; @@ -436,9 +446,9 @@ int vp9_find_best_sub_pixel_comp(MACROBLOCK *x, unsigned int left, right, up, down, diag; unsigned int sse; unsigned int whichdir; - unsigned int halfiters = 4; - unsigned int quarteriters = 4; - unsigned int eighthiters = 4; + unsigned int halfiters = iters_per_step; + unsigned int quarteriters = iters_per_step; + unsigned int eighthiters = iters_per_step; int thismse; int maxc, minc, maxr, minr; int y_stride; @@ -485,7 +495,7 @@ int vp9_find_best_sub_pixel_comp(MACROBLOCK *x, // Each subsequent iteration checks at least one point in // common with the last iteration could be 2 ( if diag selected) - while (--halfiters) { + while (halfiters--) { // 1/2 pel CHECK_BETTER(left, tr, tc - hstep); CHECK_BETTER(right, tr, tc + hstep); @@ -519,41 +529,46 @@ int vp9_find_best_sub_pixel_comp(MACROBLOCK *x, // Each subsequent iteration checks at least one point in common with // the last iteration could be 2 ( if diag selected) 1/4 pel - hstep >>= 1; - while (--quarteriters) { - CHECK_BETTER(left, tr, tc - hstep); - CHECK_BETTER(right, tr, tc + hstep); - CHECK_BETTER(up, tr - hstep, tc); - CHECK_BETTER(down, tr + hstep, tc); - whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2); + // Note forced_stop: 0 - full, 1 - qtr only, 2 - half only + if (forced_stop != 2) { + hstep >>= 1; + while (quarteriters--) { + CHECK_BETTER(left, tr, tc - hstep); + CHECK_BETTER(right, tr, tc + hstep); + CHECK_BETTER(up, tr - hstep, tc); + CHECK_BETTER(down, tr + hstep, tc); - switch (whichdir) { - case 0: - CHECK_BETTER(diag, tr - hstep, tc - hstep); - break; - case 1: - CHECK_BETTER(diag, tr - hstep, tc + hstep); - break; - case 2: - CHECK_BETTER(diag, tr + hstep, tc - hstep); - break; - case 3: - CHECK_BETTER(diag, tr + hstep, tc + hstep); - break; - } + whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2); - // no reason to check the same one again. - if (tr == br && tc == bc) - break; + switch (whichdir) { + case 0: + CHECK_BETTER(diag, tr - hstep, tc - hstep); + break; + case 1: + CHECK_BETTER(diag, tr - hstep, tc + hstep); + break; + case 2: + CHECK_BETTER(diag, tr + hstep, tc - hstep); + break; + case 3: + CHECK_BETTER(diag, tr + hstep, tc + hstep); + break; + } - tr = br; - tc = bc; + // no reason to check the same one again. + if (tr == br && tc == bc) + break; + + tr = br; + tc = bc; + } } - if (xd->allow_high_precision_mv && vp9_use_mv_hp(&ref_mv->as_mv)) { + if (xd->allow_high_precision_mv && vp9_use_mv_hp(&ref_mv->as_mv) && + forced_stop == 0) { hstep >>= 1; - while (--eighthiters) { + while (eighthiters--) { CHECK_BETTER(left, tr, tc - hstep); CHECK_BETTER(right, tr, tc + hstep); CHECK_BETTER(up, tr - hstep, tc); @@ -594,7 +609,6 @@ int vp9_find_best_sub_pixel_comp(MACROBLOCK *x, return besterr; } - #undef MVC #undef PRE #undef DIST @@ -603,627 +617,8 @@ int vp9_find_best_sub_pixel_comp(MACROBLOCK *x, #undef MIN #undef MAX -int vp9_find_best_sub_pixel_step(MACROBLOCK *x, - int_mv *bestmv, int_mv *ref_mv, - int error_per_bit, - const vp9_variance_fn_ptr_t *vfp, - int *mvjcost, int *mvcost[2], int *distortion, - unsigned int *sse1) { - int bestmse = INT_MAX; - int_mv startmv; - int_mv this_mv; - int_mv orig_mv; - int yrow_movedback = 0, ycol_movedback = 0; - uint8_t *z = x->plane[0].src.buf; - int src_stride = x->plane[0].src.stride; - int left, right, up, down, diag; - unsigned int sse; - int whichdir; - int thismse; - int y_stride; - MACROBLOCKD *xd = &x->e_mbd; - - uint8_t *y = xd->plane[0].pre[0].buf + - (bestmv->as_mv.row) * xd->plane[0].pre[0].stride + - bestmv->as_mv.col; - y_stride = xd->plane[0].pre[0].stride; - - // central mv - bestmv->as_mv.row <<= 3; - bestmv->as_mv.col <<= 3; - startmv = *bestmv; - orig_mv = *bestmv; - - // calculate central point error - bestmse = vfp->vf(y, y_stride, z, src_stride, sse1); - *distortion = bestmse; - bestmse += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, error_per_bit); - - // go left then right and check error - this_mv.as_mv.row = startmv.as_mv.row; - this_mv.as_mv.col = ((startmv.as_mv.col - 8) | 4); - thismse = vfp->svf_halfpix_h(y - 1, y_stride, z, src_stride, &sse); - left = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, - error_per_bit); - - if (left < bestmse) { - *bestmv = this_mv; - bestmse = left; - *distortion = thismse; - *sse1 = sse; - } - - this_mv.as_mv.col += 8; - thismse = vfp->svf_halfpix_h(y, y_stride, z, src_stride, &sse); - right = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, - error_per_bit); - - if (right < bestmse) { - *bestmv = this_mv; - bestmse = right; - *distortion = thismse; - *sse1 = sse; - } - - // go up then down and check error - this_mv.as_mv.col = startmv.as_mv.col; - this_mv.as_mv.row = ((startmv.as_mv.row - 8) | 4); - thismse = vfp->svf_halfpix_v(y - y_stride, y_stride, z, src_stride, &sse); - up = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit); - - if (up < bestmse) { - *bestmv = this_mv; - bestmse = up; - *distortion = thismse; - *sse1 = sse; - } - - this_mv.as_mv.row += 8; - thismse = vfp->svf_halfpix_v(y, y_stride, z, src_stride, &sse); - down = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, - error_per_bit); - - if (down < bestmse) { - *bestmv = this_mv; - bestmse = down; - *distortion = thismse; - *sse1 = sse; - } - - - // now check 1 more diagonal - whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2); - // for(whichdir =0;whichdir<4;whichdir++) - // { - this_mv = startmv; - - switch (whichdir) { - case 0: - this_mv.as_mv.col = (this_mv.as_mv.col - 8) | 4; - this_mv.as_mv.row = (this_mv.as_mv.row - 8) | 4; - thismse = vfp->svf_halfpix_hv(y - 1 - y_stride, y_stride, z, src_stride, - &sse); - break; - case 1: - this_mv.as_mv.col += 4; - this_mv.as_mv.row = (this_mv.as_mv.row - 8) | 4; - thismse = vfp->svf_halfpix_hv(y - y_stride, y_stride, z, src_stride, - &sse); - break; - case 2: - this_mv.as_mv.col = (this_mv.as_mv.col - 8) | 4; - this_mv.as_mv.row += 4; - thismse = vfp->svf_halfpix_hv(y - 1, y_stride, z, src_stride, &sse); - break; - case 3: - default: - this_mv.as_mv.col += 4; - this_mv.as_mv.row += 4; - thismse = vfp->svf_halfpix_hv(y, y_stride, z, src_stride, &sse); - break; - } - - diag = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, - error_per_bit); - - if (diag < bestmse) { - *bestmv = this_mv; - bestmse = diag; - *distortion = thismse; - *sse1 = sse; - } - -// } - - - // time to check quarter pels. - if (bestmv->as_mv.row < startmv.as_mv.row) { - y -= y_stride; - yrow_movedback = 1; - } - - if (bestmv->as_mv.col < startmv.as_mv.col) { - y--; - ycol_movedback = 1; - } - - startmv = *bestmv; - - - - // go left then right and check error - this_mv.as_mv.row = startmv.as_mv.row; - - if (startmv.as_mv.col & 7) { - this_mv.as_mv.col = startmv.as_mv.col - 2; - thismse = vfp->svf(y, y_stride, - SP(this_mv.as_mv.col), SP(this_mv.as_mv.row), - z, src_stride, &sse); - } else { - this_mv.as_mv.col = (startmv.as_mv.col - 8) | 6; - thismse = vfp->svf(y - 1, y_stride, SP(6), SP(this_mv.as_mv.row), z, - src_stride, &sse); - } - - left = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, - error_per_bit); - - if (left < bestmse) { - *bestmv = this_mv; - bestmse = left; - *distortion = thismse; - *sse1 = sse; - } - - this_mv.as_mv.col += 4; - thismse = vfp->svf(y, y_stride, - SP(this_mv.as_mv.col), SP(this_mv.as_mv.row), - z, src_stride, &sse); - right = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, - error_per_bit); - - if (right < bestmse) { - *bestmv = this_mv; - bestmse = right; - *distortion = thismse; - *sse1 = sse; - } - - // go up then down and check error - this_mv.as_mv.col = startmv.as_mv.col; - - if (startmv.as_mv.row & 7) { - this_mv.as_mv.row = startmv.as_mv.row - 2; - thismse = vfp->svf(y, y_stride, - SP(this_mv.as_mv.col), SP(this_mv.as_mv.row), - z, src_stride, &sse); - } else { - this_mv.as_mv.row = (startmv.as_mv.row - 8) | 6; - thismse = vfp->svf(y - y_stride, y_stride, SP(this_mv.as_mv.col), SP(6), - z, src_stride, &sse); - } - - up = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit); - - if (up < bestmse) { - *bestmv = this_mv; - bestmse = up; - *distortion = thismse; - *sse1 = sse; - } - - this_mv.as_mv.row += 4; - thismse = vfp->svf(y, y_stride, SP(this_mv.as_mv.col), SP(this_mv.as_mv.row), - z, src_stride, &sse); - down = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, - error_per_bit); - - - if (down < bestmse) { - *bestmv = this_mv; - bestmse = down; - *distortion = thismse; - *sse1 = sse; - } - - - // now check 1 more diagonal - whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2); - -// for(whichdir=0;whichdir<4;whichdir++) -// { - this_mv = startmv; - - switch (whichdir) { - case 0: - - if (startmv.as_mv.row & 7) { - this_mv.as_mv.row -= 2; - - if (startmv.as_mv.col & 7) { - this_mv.as_mv.col -= 2; - thismse = vfp->svf(y, y_stride, - SP(this_mv.as_mv.col), SP(this_mv.as_mv.row), - z, src_stride, &sse); - } else { - this_mv.as_mv.col = (startmv.as_mv.col - 8) | 6; - thismse = vfp->svf(y - 1, y_stride, - SP(6), SP(this_mv.as_mv.row), z, src_stride, &sse); - } - } else { - this_mv.as_mv.row = (startmv.as_mv.row - 8) | 6; - - if (startmv.as_mv.col & 7) { - this_mv.as_mv.col -= 2; - thismse = vfp->svf(y - y_stride, y_stride, - SP(this_mv.as_mv.col), SP(6), z, src_stride, &sse); - } else { - this_mv.as_mv.col = (startmv.as_mv.col - 8) | 6; - thismse = vfp->svf(y - y_stride - 1, y_stride, - SP(6), SP(6), z, src_stride, &sse); - } - } - - break; - case 1: - this_mv.as_mv.col += 2; - - if (startmv.as_mv.row & 7) { - this_mv.as_mv.row -= 2; - thismse = vfp->svf(y, y_stride, - SP(this_mv.as_mv.col), SP(this_mv.as_mv.row), - z, src_stride, &sse); - } else { - this_mv.as_mv.row = (startmv.as_mv.row - 8) | 6; - thismse = vfp->svf(y - y_stride, y_stride, - SP(this_mv.as_mv.col), SP(6), z, src_stride, &sse); - } - - break; - case 2: - this_mv.as_mv.row += 2; - - if (startmv.as_mv.col & 7) { - this_mv.as_mv.col -= 2; - thismse = vfp->svf(y, y_stride, - SP(this_mv.as_mv.col), SP(this_mv.as_mv.row), - z, src_stride, &sse); - } else { - this_mv.as_mv.col = (startmv.as_mv.col - 8) | 6; - thismse = vfp->svf(y - 1, y_stride, SP(6), SP(this_mv.as_mv.row), z, - src_stride, &sse); - } - - break; - case 3: - this_mv.as_mv.col += 2; - this_mv.as_mv.row += 2; - thismse = vfp->svf(y, y_stride, - SP(this_mv.as_mv.col), SP(this_mv.as_mv.row), - z, src_stride, &sse); - break; - } - - diag = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, - error_per_bit); - - if (diag < bestmse) { - *bestmv = this_mv; - bestmse = diag; - *distortion = thismse; - *sse1 = sse; - } - - if (!(xd->allow_high_precision_mv && vp9_use_mv_hp(&ref_mv->as_mv))) - return bestmse; - - /* Now do 1/8th pixel */ - if (bestmv->as_mv.row < orig_mv.as_mv.row && !yrow_movedback) { - y -= y_stride; - yrow_movedback = 1; - } - - if (bestmv->as_mv.col < orig_mv.as_mv.col && !ycol_movedback) { - y--; - ycol_movedback = 1; - } - - startmv = *bestmv; - - // go left then right and check error - this_mv.as_mv.row = startmv.as_mv.row; - - if (startmv.as_mv.col & 7) { - this_mv.as_mv.col = startmv.as_mv.col - 1; - thismse = vfp->svf(y, y_stride, - SP(this_mv.as_mv.col), SP(this_mv.as_mv.row), - z, src_stride, &sse); - } else { - this_mv.as_mv.col = (startmv.as_mv.col - 8) | 7; - thismse = vfp->svf(y - 1, y_stride, SP(7), SP(this_mv.as_mv.row), - z, src_stride, &sse); - } - - left = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, - error_per_bit); - - if (left < bestmse) { - *bestmv = this_mv; - bestmse = left; - *distortion = thismse; - *sse1 = sse; - } - - this_mv.as_mv.col += 2; - thismse = vfp->svf(y, y_stride, SP(this_mv.as_mv.col), SP(this_mv.as_mv.row), - z, src_stride, &sse); - right = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, - error_per_bit); - - if (right < bestmse) { - *bestmv = this_mv; - bestmse = right; - *distortion = thismse; - *sse1 = sse; - } - - // go up then down and check error - this_mv.as_mv.col = startmv.as_mv.col; - - if (startmv.as_mv.row & 7) { - this_mv.as_mv.row = startmv.as_mv.row - 1; - thismse = vfp->svf(y, y_stride, - SP(this_mv.as_mv.col), SP(this_mv.as_mv.row), - z, src_stride, &sse); - } else { - this_mv.as_mv.row = (startmv.as_mv.row - 8) | 7; - thismse = vfp->svf(y - y_stride, y_stride, - SP(this_mv.as_mv.col), SP(7), z, src_stride, &sse); - } - - up = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit); - - if (up < bestmse) { - *bestmv = this_mv; - bestmse = up; - *distortion = thismse; - *sse1 = sse; - } - - this_mv.as_mv.row += 2; - thismse = vfp->svf(y, y_stride, - SP(this_mv.as_mv.col), SP(this_mv.as_mv.row), - z, src_stride, &sse); - down = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, - error_per_bit); - - if (down < bestmse) { - *bestmv = this_mv; - bestmse = down; - *distortion = thismse; - *sse1 = sse; - } - - // now check 1 more diagonal - whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2); - -// for(whichdir=0;whichdir<4;whichdir++) -// { - this_mv = startmv; - - switch (whichdir) { - case 0: - - if (startmv.as_mv.row & 7) { - this_mv.as_mv.row -= 1; - - if (startmv.as_mv.col & 7) { - this_mv.as_mv.col -= 1; - thismse = vfp->svf(y, y_stride, - SP(this_mv.as_mv.col), SP(this_mv.as_mv.row), - z, src_stride, &sse); - } else { - this_mv.as_mv.col = (startmv.as_mv.col - 8) | 7; - thismse = vfp->svf(y - 1, y_stride, - SP(7), SP(this_mv.as_mv.row), - z, src_stride, &sse); - } - } else { - this_mv.as_mv.row = (startmv.as_mv.row - 8) | 7; - - if (startmv.as_mv.col & 7) { - this_mv.as_mv.col -= 1; - thismse = vfp->svf(y - y_stride, y_stride, - SP(this_mv.as_mv.col), SP(7), z, src_stride, &sse); - } else { - this_mv.as_mv.col = (startmv.as_mv.col - 8) | 7; - thismse = vfp->svf(y - y_stride - 1, y_stride, - SP(7), SP(7), z, src_stride, &sse); - } - } - - break; - case 1: - this_mv.as_mv.col += 1; - - if (startmv.as_mv.row & 7) { - this_mv.as_mv.row -= 1; - thismse = vfp->svf(y, y_stride, - SP(this_mv.as_mv.col), SP(this_mv.as_mv.row), - z, src_stride, &sse); - } else { - this_mv.as_mv.row = (startmv.as_mv.row - 8) | 7; - thismse = vfp->svf(y - y_stride, y_stride, - SP(this_mv.as_mv.col), SP(7), z, src_stride, &sse); - } - - break; - case 2: - this_mv.as_mv.row += 1; - - if (startmv.as_mv.col & 7) { - this_mv.as_mv.col -= 1; - thismse = vfp->svf(y, y_stride, - SP(this_mv.as_mv.col), SP(this_mv.as_mv.row), - z, src_stride, &sse); - } else { - this_mv.as_mv.col = (startmv.as_mv.col - 8) | 7; - thismse = vfp->svf(y - 1, y_stride, - SP(7), SP(this_mv.as_mv.row), z, src_stride, &sse); - } - - break; - case 3: - this_mv.as_mv.col += 1; - this_mv.as_mv.row += 1; - thismse = vfp->svf(y, y_stride, - SP(this_mv.as_mv.col), SP(this_mv.as_mv.row), - z, src_stride, &sse); - break; - } - - diag = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, - error_per_bit); - - if (diag < bestmse) { - *bestmv = this_mv; - bestmse = diag; - *distortion = thismse; - *sse1 = sse; - } - - return bestmse; -} - #undef SP -int vp9_find_best_half_pixel_step(MACROBLOCK *x, - int_mv *bestmv, int_mv *ref_mv, - int error_per_bit, - const vp9_variance_fn_ptr_t *vfp, - int *mvjcost, int *mvcost[2], - int *distortion, - unsigned int *sse1) { - int bestmse = INT_MAX; - int_mv startmv; - int_mv this_mv; - uint8_t *z = x->plane[0].src.buf; - int src_stride = x->plane[0].src.stride; - int left, right, up, down, diag; - unsigned int sse; - int whichdir; - int thismse; - int y_stride; - MACROBLOCKD *xd = &x->e_mbd; - - uint8_t *y = xd->plane[0].pre[0].buf + - (bestmv->as_mv.row) * xd->plane[0].pre[0].stride + bestmv->as_mv.col; - y_stride = xd->plane[0].pre[0].stride; - - // central mv - bestmv->as_mv.row <<= 3; - bestmv->as_mv.col <<= 3; - startmv = *bestmv; - - // calculate central point error - bestmse = vfp->vf(y, y_stride, z, src_stride, sse1); - *distortion = bestmse; - bestmse += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, error_per_bit); - - // go left then right and check error - this_mv.as_mv.row = startmv.as_mv.row; - this_mv.as_mv.col = ((startmv.as_mv.col - 8) | 4); - thismse = vfp->svf_halfpix_h(y - 1, y_stride, z, src_stride, &sse); - left = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, - error_per_bit); - - if (left < bestmse) { - *bestmv = this_mv; - bestmse = left; - *distortion = thismse; - *sse1 = sse; - } - - this_mv.as_mv.col += 8; - thismse = vfp->svf_halfpix_h(y, y_stride, z, src_stride, &sse); - right = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, - error_per_bit); - - if (right < bestmse) { - *bestmv = this_mv; - bestmse = right; - *distortion = thismse; - *sse1 = sse; - } - - // go up then down and check error - this_mv.as_mv.col = startmv.as_mv.col; - this_mv.as_mv.row = ((startmv.as_mv.row - 8) | 4); - thismse = vfp->svf_halfpix_v(y - y_stride, y_stride, z, src_stride, &sse); - up = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit); - - if (up < bestmse) { - *bestmv = this_mv; - bestmse = up; - *distortion = thismse; - *sse1 = sse; - } - - this_mv.as_mv.row += 8; - thismse = vfp->svf_halfpix_v(y, y_stride, z, src_stride, &sse); - down = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, - error_per_bit); - - if (down < bestmse) { - *bestmv = this_mv; - bestmse = down; - *distortion = thismse; - *sse1 = sse; - } - - // now check 1 more diagonal - - whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2); - this_mv = startmv; - - switch (whichdir) { - case 0: - this_mv.as_mv.col = (this_mv.as_mv.col - 8) | 4; - this_mv.as_mv.row = (this_mv.as_mv.row - 8) | 4; - thismse = vfp->svf_halfpix_hv(y - 1 - y_stride, y_stride, - z, src_stride, &sse); - break; - case 1: - this_mv.as_mv.col += 4; - this_mv.as_mv.row = (this_mv.as_mv.row - 8) | 4; - thismse = vfp->svf_halfpix_hv(y - y_stride, y_stride, - z, src_stride, &sse); - break; - case 2: - this_mv.as_mv.col = (this_mv.as_mv.col - 8) | 4; - this_mv.as_mv.row += 4; - thismse = vfp->svf_halfpix_hv(y - 1, y_stride, z, src_stride, &sse); - break; - case 3: - default: - this_mv.as_mv.col += 4; - this_mv.as_mv.row += 4; - thismse = vfp->svf_halfpix_hv(y, y_stride, z, src_stride, &sse); - break; - } - - diag = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, - error_per_bit); - - if (diag < bestmse) { - *bestmv = this_mv; - bestmse = diag; - *distortion = thismse; - *sse1 = sse; - } - - return bestmse; -} - #define CHECK_BOUNDS(range) \ {\ all_in = 1;\ diff --git a/vp9/encoder/vp9_mcomp.h b/vp9/encoder/vp9_mcomp.h index 5d0c0e8ba..b91e6fd34 100644 --- a/vp9/encoder/vp9_mcomp.h +++ b/vp9/encoder/vp9_mcomp.h @@ -67,12 +67,19 @@ int vp9_square_search(MACROBLOCK *x, int_mv *center_mv, int_mv *best_mv); -typedef int (fractional_mv_step_fp) (MACROBLOCK *x, int_mv - *bestmv, int_mv *ref_mv, int error_per_bit, const vp9_variance_fn_ptr_t *vfp, - int *mvjcost, int *mvcost[2], int *distortion, unsigned int *sse); -extern fractional_mv_step_fp vp9_find_best_sub_pixel_step_iteratively; -extern fractional_mv_step_fp vp9_find_best_sub_pixel_step; -extern fractional_mv_step_fp vp9_find_best_half_pixel_step; +typedef int (fractional_mv_step_fp) ( + MACROBLOCK *x, + int_mv *bestmv, + int_mv *ref_mv, + int error_per_bit, + const vp9_variance_fn_ptr_t *vfp, + int forced_stop, // 0 - full, 1 - qtr only, 2 - half only + int iters_per_step, + int *mvjcost, + int *mvcost[2], + int *distortion, + unsigned int *sse); +extern fractional_mv_step_fp vp9_find_best_sub_pixel_iterative; typedef int (*vp9_full_search_fn_t)(MACROBLOCK *x, int_mv *ref_mv, int sad_per_bit, @@ -95,14 +102,17 @@ typedef int (*vp9_diamond_search_fn_t)(MACROBLOCK *x, int *mvjcost, int *mvcost[2], int_mv *center_mv); -int vp9_find_best_sub_pixel_comp(MACROBLOCK *x, - int_mv *bestmv, int_mv *ref_mv, - int error_per_bit, - const vp9_variance_fn_ptr_t *vfp, - int *mvjcost, int *mvcost[2], - int *distortion, unsigned int *sse1, - const uint8_t *second_pred, - int w, int h); +int vp9_find_best_sub_pixel_comp_iterative( + MACROBLOCK *x, + int_mv *bestmv, int_mv *ref_mv, + int error_per_bit, + const vp9_variance_fn_ptr_t *vfp, + int forced_stop, // 0 - full, 1 - qtr only, 2 - half only + int iters_per_step, + int *mvjcost, int *mvcost[2], + int *distortion, unsigned int *sse1, + const uint8_t *second_pred, + int w, int h); int vp9_refining_search_8p_c(MACROBLOCK *x, int_mv *ref_mv, int error_per_bit, diff --git a/vp9/encoder/vp9_onyx_if.c b/vp9/encoder/vp9_onyx_if.c index d1db91086..cf5ae5252 100644 --- a/vp9/encoder/vp9_onyx_if.c +++ b/vp9/encoder/vp9_onyx_if.c @@ -713,9 +713,8 @@ void vp9_set_speed_features(VP9_COMP *cpi) { sf->search_method = NSTEP; sf->auto_filter = 1; sf->recode_loop = 1; - sf->quarter_pixel_search = 1; - sf->half_pixel_search = 1; - sf->iterative_sub_pixel = 1; + sf->subpel_search_method = SUBPEL_ITERATIVE; + sf->subpel_iters_per_step = 3; sf->optimize_coefficients = !cpi->oxcf.lossless; sf->reduce_first_step_size = 0; sf->auto_mv_step_size = 0; @@ -831,6 +830,7 @@ void vp9_set_speed_features(VP9_COMP *cpi) { (MIN(cpi->common.width, cpi->common.height) >= 720)? 1 : 0; sf->auto_mv_step_size = 1; sf->search_method = SQUARE; + sf->subpel_iters_per_step = 2; } if (speed == 3) { sf->comp_inter_joint_search_thresh = BLOCK_SIZE_TYPES; @@ -851,6 +851,7 @@ void vp9_set_speed_features(VP9_COMP *cpi) { sf->disable_splitmv = 1; sf->auto_mv_step_size = 1; sf->search_method = BIGDIA; + sf->subpel_iters_per_step = 1; } if (speed == 4) { sf->comp_inter_joint_search_thresh = BLOCK_SIZE_TYPES; @@ -875,6 +876,7 @@ void vp9_set_speed_features(VP9_COMP *cpi) { sf->disable_splitmv = 1; sf->search_method = HEX; + sf->subpel_iters_per_step = 1; } /* if (speed == 2) { @@ -918,12 +920,11 @@ void vp9_set_speed_features(VP9_COMP *cpi) { cpi->mb.quantize_b_4x4 = vp9_regular_quantize_b_4x4; - if (cpi->sf.iterative_sub_pixel == 1) { - cpi->find_fractional_mv_step = vp9_find_best_sub_pixel_step_iteratively; - } else if (cpi->sf.quarter_pixel_search) { - cpi->find_fractional_mv_step = vp9_find_best_sub_pixel_step; - } else if (cpi->sf.half_pixel_search) { - cpi->find_fractional_mv_step = vp9_find_best_half_pixel_step; + if (cpi->sf.subpel_search_method == SUBPEL_ITERATIVE) { + cpi->find_fractional_mv_step = vp9_find_best_sub_pixel_iterative; + } else { + // TODO(debargha): Other methods to come + assert(0); } cpi->mb.optimize = cpi->sf.optimize_coefficients == 1 && cpi->pass != 1; diff --git a/vp9/encoder/vp9_onyx_int.h b/vp9/encoder/vp9_onyx_int.h index 7eae7007f..124910769 100644 --- a/vp9/encoder/vp9_onyx_int.h +++ b/vp9/encoder/vp9_onyx_int.h @@ -232,14 +232,18 @@ typedef enum { FLAG_SKIP_INTRA_LOWVAR = 32, } MODE_SEARCH_SKIP_LOGIC; +typedef enum { + SUBPEL_ITERATIVE = 0, + // Other methods to come +} SUBPEL_SEARCH_METHODS; + typedef struct { int RD; SEARCH_METHODS search_method; int auto_filter; int recode_loop; - int iterative_sub_pixel; - int half_pixel_search; - int quarter_pixel_search; + SUBPEL_SEARCH_METHODS subpel_search_method; + int subpel_iters_per_step; int thresh_mult[MAX_MODES]; int max_step_search_steps; int reduce_first_step_size; diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c index 238c9815b..90d35f86a 100644 --- a/vp9/encoder/vp9_rdopt.c +++ b/vp9/encoder/vp9_rdopt.c @@ -1967,6 +1967,7 @@ static void rd_check_segment_txsize(VP9_COMP *cpi, MACROBLOCK *x, unsigned int sse; cpi->find_fractional_mv_step(x, &mode_mv[NEWMV], bsi->ref_mv, x->errorperbit, v_fn_ptr, + 0, cpi->sf.subpel_iters_per_step, x->nmvjointcost, x->mvcost, &distortion, &sse); @@ -2547,6 +2548,7 @@ static void single_motion_search(VP9_COMP *cpi, MACROBLOCK *x, cpi->find_fractional_mv_step(x, tmp_mv, &ref_mv, x->errorperbit, &cpi->fn_ptr[block_size], + 0, cpi->sf.subpel_iters_per_step, x->nmvjointcost, x->mvcost, &dis, &sse); } @@ -2673,13 +2675,15 @@ static void joint_motion_search(VP9_COMP *cpi, MACROBLOCK *x, int dis; /* TODO: use dis in distortion calculation later. */ unsigned int sse; - bestsme = vp9_find_best_sub_pixel_comp(x, &tmp_mv, - &ref_mv[id], - x->errorperbit, - &cpi->fn_ptr[block_size], - x->nmvjointcost, x->mvcost, - &dis, &sse, second_pred, - pw, ph); + bestsme = vp9_find_best_sub_pixel_comp_iterative( + x, &tmp_mv, + &ref_mv[id], + x->errorperbit, + &cpi->fn_ptr[block_size], + 0, cpi->sf.subpel_iters_per_step, + x->nmvjointcost, x->mvcost, + &dis, &sse, second_pred, + pw, ph); } if (id) diff --git a/vp9/encoder/vp9_temporal_filter.c b/vp9/encoder/vp9_temporal_filter.c index 11d601bb3..3052e8f70 100644 --- a/vp9/encoder/vp9_temporal_filter.c +++ b/vp9/encoder/vp9_temporal_filter.c @@ -170,6 +170,7 @@ static int temporal_filter_find_matching_mb_c(VP9_COMP *cpi, &best_ref_mv1, x->errorperbit, &cpi->fn_ptr[BLOCK_16X16], + 0, cpi->sf.subpel_iters_per_step, NULL, NULL, &distortion, &sse); } -- 2.40.0