]> granicus.if.org Git - libvpx/commitdiff
Clean ups of the subpel search functions
authorDeb Mukherjee <debargha@google.com>
Tue, 6 Aug 2013 22:53:35 +0000 (15:53 -0700)
committerDeb Mukherjee <debargha@google.com>
Wed, 7 Aug 2013 00:23:50 +0000 (17:23 -0700)
Removes some unused code and speed features, and organizes the
interfaces for fractional mv step functions for use in new speed
features to come.

In the process a new speed feature - number of iterations per
step during the subpel search - is exposed.

No change when this parameter is set as the original value of 3.

Results:
subpel_iters_per_step = 3: baseline
subpel_iters_per_step = 2: psnr -0.067%, 1% speedup
subpel_iters_per_step = 1: psnr -0.331%, 3-4% speedup

Change-Id: I2eba8a21f6461be8caf56af04a5337257a5693a8

vp9/encoder/vp9_mbgraph.c
vp9/encoder/vp9_mcomp.c
vp9/encoder/vp9_mcomp.h
vp9/encoder/vp9_onyx_if.c
vp9/encoder/vp9_onyx_int.h
vp9/encoder/vp9_rdopt.c
vp9/encoder/vp9_temporal_filter.c

index 67b809f8dd5991136f06c4f7ee715f049fbf607f..314e195d251ee8cfb000ed38599e2e3447eed753 100644 (file)
@@ -59,7 +59,7 @@ static unsigned int do_16x16_motion_iteration(VP9_COMP *cpi,
         x,
         dst_mv, ref_mv,
         x->errorperbit, &v_fn_ptr,
-        NULL, NULL,
+        0, cpi->sf.subpel_iters_per_step, NULL, NULL,
         & distortion, &sse);
   }
 
index 77d6554b0f84cf7b6f2efb348f708318f9f56244..014f54a49d5cd05144c03ea12323e52a2acedaa4 100644 (file)
@@ -245,13 +245,15 @@ void vp9_init3smotion_compensation(MACROBLOCK *x, int stride) {
     },                                                                   \
     v = INT_MAX;)
 
-int vp9_find_best_sub_pixel_step_iteratively(MACROBLOCK *x,
-                                             int_mv *bestmv, int_mv *ref_mv,
-                                             int error_per_bit,
-                                             const vp9_variance_fn_ptr_t *vfp,
-                                             int *mvjcost, int *mvcost[2],
-                                             int *distortion,
-                                             unsigned int *sse1) {
+int vp9_find_best_sub_pixel_iterative(MACROBLOCK *x,
+                                      int_mv *bestmv, int_mv *ref_mv,
+                                      int error_per_bit,
+                                      const vp9_variance_fn_ptr_t *vfp,
+                                      int forced_stop,
+                                      int iters_per_step,
+                                      int *mvjcost, int *mvcost[2],
+                                      int *distortion,
+                                      unsigned int *sse1) {
   uint8_t *z = x->plane[0].src.buf;
   int src_stride = x->plane[0].src.stride;
   MACROBLOCKD *xd = &x->e_mbd;
@@ -262,9 +264,9 @@ int vp9_find_best_sub_pixel_step_iteratively(MACROBLOCK *x,
   unsigned int left, right, up, down, diag;
   unsigned int sse;
   unsigned int whichdir;
-  unsigned int halfiters = 4;
-  unsigned int quarteriters = 4;
-  unsigned int eighthiters = 4;
+  unsigned int halfiters = iters_per_step;
+  unsigned int quarteriters = iters_per_step;
+  unsigned int eighthiters = iters_per_step;
   int thismse;
   int maxc, minc, maxr, minr;
   int y_stride;
@@ -289,7 +291,6 @@ int vp9_find_best_sub_pixel_step_iteratively(MACROBLOCK *x,
   tr = br;
   tc = bc;
 
-
   offset = (bestmv->as_mv.row) * y_stride + bestmv->as_mv.col;
 
   // central mv
@@ -303,7 +304,7 @@ int vp9_find_best_sub_pixel_step_iteratively(MACROBLOCK *x,
 
   // TODO: Each subsequent iteration checks at least one point in
   // common with the last iteration could be 2 ( if diag selected)
-  while (--halfiters) {
+  while (halfiters--) {
     // 1/2 pel
     CHECK_BETTER(left, tr, tc - hstep);
     CHECK_BETTER(right, tr, tc + hstep);
@@ -337,41 +338,46 @@ int vp9_find_best_sub_pixel_step_iteratively(MACROBLOCK *x,
 
   // TODO: Each subsequent iteration checks at least one point in common with
   // the last iteration could be 2 ( if diag selected) 1/4 pel
-  hstep >>= 1;
-  while (--quarteriters) {
-    CHECK_BETTER(left, tr, tc - hstep);
-    CHECK_BETTER(right, tr, tc + hstep);
-    CHECK_BETTER(up, tr - hstep, tc);
-    CHECK_BETTER(down, tr + hstep, tc);
 
-    whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);
+  // Note forced_stop: 0 - full, 1 - qtr only, 2 - half only
+  if (forced_stop != 2) {
+    hstep >>= 1;
+    while (quarteriters--) {
+      CHECK_BETTER(left, tr, tc - hstep);
+      CHECK_BETTER(right, tr, tc + hstep);
+      CHECK_BETTER(up, tr - hstep, tc);
+      CHECK_BETTER(down, tr + hstep, tc);
 
-    switch (whichdir) {
-      case 0:
-        CHECK_BETTER(diag, tr - hstep, tc - hstep);
-        break;
-      case 1:
-        CHECK_BETTER(diag, tr - hstep, tc + hstep);
-        break;
-      case 2:
-        CHECK_BETTER(diag, tr + hstep, tc - hstep);
-        break;
-      case 3:
-        CHECK_BETTER(diag, tr + hstep, tc + hstep);
-        break;
-    }
+      whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);
 
-    // no reason to check the same one again.
-    if (tr == br && tc == bc)
-      break;
+      switch (whichdir) {
+        case 0:
+          CHECK_BETTER(diag, tr - hstep, tc - hstep);
+          break;
+        case 1:
+          CHECK_BETTER(diag, tr - hstep, tc + hstep);
+          break;
+        case 2:
+          CHECK_BETTER(diag, tr + hstep, tc - hstep);
+          break;
+        case 3:
+          CHECK_BETTER(diag, tr + hstep, tc + hstep);
+          break;
+      }
 
-    tr = br;
-    tc = bc;
+      // no reason to check the same one again.
+      if (tr == br && tc == bc)
+        break;
+
+      tr = br;
+      tc = bc;
+    }
   }
 
-  if (xd->allow_high_precision_mv && vp9_use_mv_hp(&ref_mv->as_mv)) {
+  if (xd->allow_high_precision_mv && vp9_use_mv_hp(&ref_mv->as_mv) &&
+      forced_stop == 0) {
     hstep >>= 1;
-    while (--eighthiters) {
+    while (eighthiters--) {
       CHECK_BETTER(left, tr, tc - hstep);
       CHECK_BETTER(right, tr, tc + hstep);
       CHECK_BETTER(up, tr - hstep, tc);
@@ -402,6 +408,7 @@ int vp9_find_best_sub_pixel_step_iteratively(MACROBLOCK *x,
       tc = bc;
     }
   }
+
   bestmv->as_mv.row = br;
   bestmv->as_mv.col = bc;
 
@@ -418,14 +425,17 @@ int vp9_find_best_sub_pixel_step_iteratively(MACROBLOCK *x,
     vfp->svaf(PRE(r, c), y_stride, SP(c), SP(r), \
               z, src_stride, &sse, second_pred)
 
-int vp9_find_best_sub_pixel_comp(MACROBLOCK *x,
-                                 int_mv *bestmv, int_mv *ref_mv,
-                                 int error_per_bit,
-                                 const vp9_variance_fn_ptr_t *vfp,
-                                 int *mvjcost, int *mvcost[2],
-                                 int *distortion,
-                                 unsigned int *sse1,
-                                 const uint8_t *second_pred, int w, int h) {
+int vp9_find_best_sub_pixel_comp_iterative(MACROBLOCK *x,
+                                           int_mv *bestmv, int_mv *ref_mv,
+                                           int error_per_bit,
+                                           const vp9_variance_fn_ptr_t *vfp,
+                                           int forced_stop,
+                                           int iters_per_step,
+                                           int *mvjcost, int *mvcost[2],
+                                           int *distortion,
+                                           unsigned int *sse1,
+                                           const uint8_t *second_pred,
+                                           int w, int h) {
   uint8_t *z = x->plane[0].src.buf;
   int src_stride = x->plane[0].src.stride;
   MACROBLOCKD *xd = &x->e_mbd;
@@ -436,9 +446,9 @@ int vp9_find_best_sub_pixel_comp(MACROBLOCK *x,
   unsigned int left, right, up, down, diag;
   unsigned int sse;
   unsigned int whichdir;
-  unsigned int halfiters = 4;
-  unsigned int quarteriters = 4;
-  unsigned int eighthiters = 4;
+  unsigned int halfiters = iters_per_step;
+  unsigned int quarteriters = iters_per_step;
+  unsigned int eighthiters = iters_per_step;
   int thismse;
   int maxc, minc, maxr, minr;
   int y_stride;
@@ -485,7 +495,7 @@ int vp9_find_best_sub_pixel_comp(MACROBLOCK *x,
 
   // Each subsequent iteration checks at least one point in
   // common with the last iteration could be 2 ( if diag selected)
-  while (--halfiters) {
+  while (halfiters--) {
     // 1/2 pel
     CHECK_BETTER(left, tr, tc - hstep);
     CHECK_BETTER(right, tr, tc + hstep);
@@ -519,41 +529,46 @@ int vp9_find_best_sub_pixel_comp(MACROBLOCK *x,
 
   // Each subsequent iteration checks at least one point in common with
   // the last iteration could be 2 ( if diag selected) 1/4 pel
-  hstep >>= 1;
-  while (--quarteriters) {
-    CHECK_BETTER(left, tr, tc - hstep);
-    CHECK_BETTER(right, tr, tc + hstep);
-    CHECK_BETTER(up, tr - hstep, tc);
-    CHECK_BETTER(down, tr + hstep, tc);
 
-    whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);
+  // Note forced_stop: 0 - full, 1 - qtr only, 2 - half only
+  if (forced_stop != 2) {
+    hstep >>= 1;
+    while (quarteriters--) {
+      CHECK_BETTER(left, tr, tc - hstep);
+      CHECK_BETTER(right, tr, tc + hstep);
+      CHECK_BETTER(up, tr - hstep, tc);
+      CHECK_BETTER(down, tr + hstep, tc);
 
-    switch (whichdir) {
-      case 0:
-        CHECK_BETTER(diag, tr - hstep, tc - hstep);
-        break;
-      case 1:
-        CHECK_BETTER(diag, tr - hstep, tc + hstep);
-        break;
-      case 2:
-        CHECK_BETTER(diag, tr + hstep, tc - hstep);
-        break;
-      case 3:
-        CHECK_BETTER(diag, tr + hstep, tc + hstep);
-        break;
-    }
+      whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);
 
-    // no reason to check the same one again.
-    if (tr == br && tc == bc)
-      break;
+      switch (whichdir) {
+        case 0:
+          CHECK_BETTER(diag, tr - hstep, tc - hstep);
+          break;
+        case 1:
+          CHECK_BETTER(diag, tr - hstep, tc + hstep);
+          break;
+        case 2:
+          CHECK_BETTER(diag, tr + hstep, tc - hstep);
+          break;
+        case 3:
+          CHECK_BETTER(diag, tr + hstep, tc + hstep);
+          break;
+      }
 
-    tr = br;
-    tc = bc;
+      // no reason to check the same one again.
+      if (tr == br && tc == bc)
+        break;
+
+      tr = br;
+      tc = bc;
+    }
   }
 
-  if (xd->allow_high_precision_mv && vp9_use_mv_hp(&ref_mv->as_mv)) {
+  if (xd->allow_high_precision_mv && vp9_use_mv_hp(&ref_mv->as_mv) &&
+      forced_stop == 0) {
     hstep >>= 1;
-    while (--eighthiters) {
+    while (eighthiters--) {
       CHECK_BETTER(left, tr, tc - hstep);
       CHECK_BETTER(right, tr, tc + hstep);
       CHECK_BETTER(up, tr - hstep, tc);
@@ -594,7 +609,6 @@ int vp9_find_best_sub_pixel_comp(MACROBLOCK *x,
   return besterr;
 }
 
-
 #undef MVC
 #undef PRE
 #undef DIST
@@ -603,627 +617,8 @@ int vp9_find_best_sub_pixel_comp(MACROBLOCK *x,
 #undef MIN
 #undef MAX
 
-int vp9_find_best_sub_pixel_step(MACROBLOCK *x,
-                                 int_mv *bestmv, int_mv *ref_mv,
-                                 int error_per_bit,
-                                 const vp9_variance_fn_ptr_t *vfp,
-                                 int *mvjcost, int *mvcost[2], int *distortion,
-                                 unsigned int *sse1) {
-  int bestmse = INT_MAX;
-  int_mv startmv;
-  int_mv this_mv;
-  int_mv orig_mv;
-  int yrow_movedback = 0, ycol_movedback = 0;
-  uint8_t *z = x->plane[0].src.buf;
-  int src_stride = x->plane[0].src.stride;
-  int left, right, up, down, diag;
-  unsigned int sse;
-  int whichdir;
-  int thismse;
-  int y_stride;
-  MACROBLOCKD *xd = &x->e_mbd;
-
-  uint8_t *y = xd->plane[0].pre[0].buf +
-               (bestmv->as_mv.row) * xd->plane[0].pre[0].stride +
-               bestmv->as_mv.col;
-  y_stride = xd->plane[0].pre[0].stride;
-
-  // central mv
-  bestmv->as_mv.row <<= 3;
-  bestmv->as_mv.col <<= 3;
-  startmv = *bestmv;
-  orig_mv = *bestmv;
-
-  // calculate central point error
-  bestmse = vfp->vf(y, y_stride, z, src_stride, sse1);
-  *distortion = bestmse;
-  bestmse += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, error_per_bit);
-
-  // go left then right and check error
-  this_mv.as_mv.row = startmv.as_mv.row;
-  this_mv.as_mv.col = ((startmv.as_mv.col - 8) | 4);
-  thismse = vfp->svf_halfpix_h(y - 1, y_stride, z, src_stride, &sse);
-  left = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost,
-                               error_per_bit);
-
-  if (left < bestmse) {
-    *bestmv = this_mv;
-    bestmse = left;
-    *distortion = thismse;
-    *sse1 = sse;
-  }
-
-  this_mv.as_mv.col += 8;
-  thismse = vfp->svf_halfpix_h(y, y_stride, z, src_stride, &sse);
-  right = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost,
-                                error_per_bit);
-
-  if (right < bestmse) {
-    *bestmv = this_mv;
-    bestmse = right;
-    *distortion = thismse;
-    *sse1 = sse;
-  }
-
-  // go up then down and check error
-  this_mv.as_mv.col = startmv.as_mv.col;
-  this_mv.as_mv.row = ((startmv.as_mv.row - 8) | 4);
-  thismse =  vfp->svf_halfpix_v(y - y_stride, y_stride, z, src_stride, &sse);
-  up = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit);
-
-  if (up < bestmse) {
-    *bestmv = this_mv;
-    bestmse = up;
-    *distortion = thismse;
-    *sse1 = sse;
-  }
-
-  this_mv.as_mv.row += 8;
-  thismse = vfp->svf_halfpix_v(y, y_stride, z, src_stride, &sse);
-  down = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost,
-                               error_per_bit);
-
-  if (down < bestmse) {
-    *bestmv = this_mv;
-    bestmse = down;
-    *distortion = thismse;
-    *sse1 = sse;
-  }
-
-
-  // now check 1 more diagonal
-  whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);
-  // for(whichdir =0;whichdir<4;whichdir++)
-  // {
-  this_mv = startmv;
-
-  switch (whichdir) {
-    case 0:
-      this_mv.as_mv.col = (this_mv.as_mv.col - 8) | 4;
-      this_mv.as_mv.row = (this_mv.as_mv.row - 8) | 4;
-      thismse = vfp->svf_halfpix_hv(y - 1 - y_stride, y_stride, z, src_stride,
-                                    &sse);
-      break;
-    case 1:
-      this_mv.as_mv.col += 4;
-      this_mv.as_mv.row = (this_mv.as_mv.row - 8) | 4;
-      thismse = vfp->svf_halfpix_hv(y - y_stride, y_stride, z, src_stride,
-                                    &sse);
-      break;
-    case 2:
-      this_mv.as_mv.col = (this_mv.as_mv.col - 8) | 4;
-      this_mv.as_mv.row += 4;
-      thismse = vfp->svf_halfpix_hv(y - 1, y_stride, z, src_stride, &sse);
-      break;
-    case 3:
-    default:
-      this_mv.as_mv.col += 4;
-      this_mv.as_mv.row += 4;
-      thismse = vfp->svf_halfpix_hv(y, y_stride, z, src_stride, &sse);
-      break;
-  }
-
-  diag = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost,
-                               error_per_bit);
-
-  if (diag < bestmse) {
-    *bestmv = this_mv;
-    bestmse = diag;
-    *distortion = thismse;
-    *sse1 = sse;
-  }
-
-//  }
-
-
-  // time to check quarter pels.
-  if (bestmv->as_mv.row < startmv.as_mv.row) {
-    y -= y_stride;
-    yrow_movedback = 1;
-  }
-
-  if (bestmv->as_mv.col < startmv.as_mv.col) {
-    y--;
-    ycol_movedback = 1;
-  }
-
-  startmv = *bestmv;
-
-
-
-  // go left then right and check error
-  this_mv.as_mv.row = startmv.as_mv.row;
-
-  if (startmv.as_mv.col & 7) {
-    this_mv.as_mv.col = startmv.as_mv.col - 2;
-    thismse = vfp->svf(y, y_stride,
-                       SP(this_mv.as_mv.col), SP(this_mv.as_mv.row),
-                       z, src_stride, &sse);
-  } else {
-    this_mv.as_mv.col = (startmv.as_mv.col - 8) | 6;
-    thismse = vfp->svf(y - 1, y_stride, SP(6), SP(this_mv.as_mv.row), z,
-                       src_stride, &sse);
-  }
-
-  left = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost,
-                               error_per_bit);
-
-  if (left < bestmse) {
-    *bestmv = this_mv;
-    bestmse = left;
-    *distortion = thismse;
-    *sse1 = sse;
-  }
-
-  this_mv.as_mv.col += 4;
-  thismse = vfp->svf(y, y_stride,
-                     SP(this_mv.as_mv.col), SP(this_mv.as_mv.row),
-                     z, src_stride, &sse);
-  right = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost,
-                                error_per_bit);
-
-  if (right < bestmse) {
-    *bestmv = this_mv;
-    bestmse = right;
-    *distortion = thismse;
-    *sse1 = sse;
-  }
-
-  // go up then down and check error
-  this_mv.as_mv.col = startmv.as_mv.col;
-
-  if (startmv.as_mv.row & 7) {
-    this_mv.as_mv.row = startmv.as_mv.row - 2;
-    thismse = vfp->svf(y, y_stride,
-                       SP(this_mv.as_mv.col), SP(this_mv.as_mv.row),
-                       z, src_stride, &sse);
-  } else {
-    this_mv.as_mv.row = (startmv.as_mv.row - 8) | 6;
-    thismse = vfp->svf(y - y_stride, y_stride, SP(this_mv.as_mv.col), SP(6),
-                       z, src_stride, &sse);
-  }
-
-  up = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit);
-
-  if (up < bestmse) {
-    *bestmv = this_mv;
-    bestmse = up;
-    *distortion = thismse;
-    *sse1 = sse;
-  }
-
-  this_mv.as_mv.row += 4;
-  thismse = vfp->svf(y, y_stride, SP(this_mv.as_mv.col), SP(this_mv.as_mv.row),
-                     z, src_stride, &sse);
-  down = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost,
-                               error_per_bit);
-
-
-  if (down < bestmse) {
-    *bestmv = this_mv;
-    bestmse = down;
-    *distortion = thismse;
-    *sse1 = sse;
-  }
-
-
-  // now check 1 more diagonal
-  whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);
-
-//  for(whichdir=0;whichdir<4;whichdir++)
-//  {
-  this_mv = startmv;
-
-  switch (whichdir) {
-    case 0:
-
-      if (startmv.as_mv.row & 7) {
-        this_mv.as_mv.row -= 2;
-
-        if (startmv.as_mv.col & 7) {
-          this_mv.as_mv.col -= 2;
-          thismse = vfp->svf(y, y_stride,
-                             SP(this_mv.as_mv.col), SP(this_mv.as_mv.row),
-                             z, src_stride, &sse);
-        } else {
-          this_mv.as_mv.col = (startmv.as_mv.col - 8) | 6;
-          thismse = vfp->svf(y - 1, y_stride,
-                             SP(6), SP(this_mv.as_mv.row), z, src_stride, &sse);
-        }
-      } else {
-        this_mv.as_mv.row = (startmv.as_mv.row - 8) | 6;
-
-        if (startmv.as_mv.col & 7) {
-          this_mv.as_mv.col -= 2;
-          thismse = vfp->svf(y - y_stride, y_stride,
-                             SP(this_mv.as_mv.col), SP(6), z, src_stride, &sse);
-        } else {
-          this_mv.as_mv.col = (startmv.as_mv.col - 8) | 6;
-          thismse = vfp->svf(y - y_stride - 1, y_stride,
-                             SP(6), SP(6), z, src_stride, &sse);
-        }
-      }
-
-      break;
-    case 1:
-      this_mv.as_mv.col += 2;
-
-      if (startmv.as_mv.row & 7) {
-        this_mv.as_mv.row -= 2;
-        thismse = vfp->svf(y, y_stride,
-                           SP(this_mv.as_mv.col), SP(this_mv.as_mv.row),
-                           z, src_stride, &sse);
-      } else {
-        this_mv.as_mv.row = (startmv.as_mv.row - 8) | 6;
-        thismse = vfp->svf(y - y_stride, y_stride,
-                           SP(this_mv.as_mv.col), SP(6), z, src_stride, &sse);
-      }
-
-      break;
-    case 2:
-      this_mv.as_mv.row += 2;
-
-      if (startmv.as_mv.col & 7) {
-        this_mv.as_mv.col -= 2;
-        thismse = vfp->svf(y, y_stride,
-                           SP(this_mv.as_mv.col), SP(this_mv.as_mv.row),
-                           z, src_stride, &sse);
-      } else {
-        this_mv.as_mv.col = (startmv.as_mv.col - 8) | 6;
-        thismse = vfp->svf(y - 1, y_stride, SP(6), SP(this_mv.as_mv.row), z,
-                           src_stride, &sse);
-      }
-
-      break;
-    case 3:
-      this_mv.as_mv.col += 2;
-      this_mv.as_mv.row += 2;
-      thismse = vfp->svf(y, y_stride,
-                         SP(this_mv.as_mv.col), SP(this_mv.as_mv.row),
-                         z, src_stride, &sse);
-      break;
-  }
-
-  diag = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost,
-                               error_per_bit);
-
-  if (diag < bestmse) {
-    *bestmv = this_mv;
-    bestmse = diag;
-    *distortion = thismse;
-    *sse1 = sse;
-  }
-
-  if (!(xd->allow_high_precision_mv && vp9_use_mv_hp(&ref_mv->as_mv)))
-    return bestmse;
-
-  /* Now do 1/8th pixel */
-  if (bestmv->as_mv.row < orig_mv.as_mv.row && !yrow_movedback) {
-    y -= y_stride;
-    yrow_movedback = 1;
-  }
-
-  if (bestmv->as_mv.col < orig_mv.as_mv.col && !ycol_movedback) {
-    y--;
-    ycol_movedback = 1;
-  }
-
-  startmv = *bestmv;
-
-  // go left then right and check error
-  this_mv.as_mv.row = startmv.as_mv.row;
-
-  if (startmv.as_mv.col & 7) {
-    this_mv.as_mv.col = startmv.as_mv.col - 1;
-    thismse = vfp->svf(y, y_stride,
-                       SP(this_mv.as_mv.col), SP(this_mv.as_mv.row),
-                       z, src_stride, &sse);
-  } else {
-    this_mv.as_mv.col = (startmv.as_mv.col - 8) | 7;
-    thismse = vfp->svf(y - 1, y_stride, SP(7), SP(this_mv.as_mv.row),
-                       z, src_stride, &sse);
-  }
-
-  left = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost,
-                               error_per_bit);
-
-  if (left < bestmse) {
-    *bestmv = this_mv;
-    bestmse = left;
-    *distortion = thismse;
-    *sse1 = sse;
-  }
-
-  this_mv.as_mv.col += 2;
-  thismse = vfp->svf(y, y_stride, SP(this_mv.as_mv.col), SP(this_mv.as_mv.row),
-                     z, src_stride, &sse);
-  right = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost,
-                                error_per_bit);
-
-  if (right < bestmse) {
-    *bestmv = this_mv;
-    bestmse = right;
-    *distortion = thismse;
-    *sse1 = sse;
-  }
-
-  // go up then down and check error
-  this_mv.as_mv.col = startmv.as_mv.col;
-
-  if (startmv.as_mv.row & 7) {
-    this_mv.as_mv.row = startmv.as_mv.row - 1;
-    thismse = vfp->svf(y, y_stride,
-                       SP(this_mv.as_mv.col), SP(this_mv.as_mv.row),
-                       z, src_stride, &sse);
-  } else {
-    this_mv.as_mv.row = (startmv.as_mv.row - 8) | 7;
-    thismse = vfp->svf(y - y_stride, y_stride,
-                       SP(this_mv.as_mv.col), SP(7), z, src_stride, &sse);
-  }
-
-  up = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit);
-
-  if (up < bestmse) {
-    *bestmv = this_mv;
-    bestmse = up;
-    *distortion = thismse;
-    *sse1 = sse;
-  }
-
-  this_mv.as_mv.row += 2;
-  thismse = vfp->svf(y, y_stride,
-                     SP(this_mv.as_mv.col), SP(this_mv.as_mv.row),
-                     z, src_stride, &sse);
-  down = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost,
-                               error_per_bit);
-
-  if (down < bestmse) {
-    *bestmv = this_mv;
-    bestmse = down;
-    *distortion = thismse;
-    *sse1 = sse;
-  }
-
-  // now check 1 more diagonal
-  whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);
-
-//  for(whichdir=0;whichdir<4;whichdir++)
-//  {
-  this_mv = startmv;
-
-  switch (whichdir) {
-    case 0:
-
-      if (startmv.as_mv.row & 7) {
-        this_mv.as_mv.row -= 1;
-
-        if (startmv.as_mv.col & 7) {
-          this_mv.as_mv.col -= 1;
-          thismse = vfp->svf(y, y_stride,
-                             SP(this_mv.as_mv.col), SP(this_mv.as_mv.row),
-                             z, src_stride, &sse);
-        } else {
-          this_mv.as_mv.col = (startmv.as_mv.col - 8) | 7;
-          thismse = vfp->svf(y - 1, y_stride,
-                             SP(7), SP(this_mv.as_mv.row),
-                             z, src_stride, &sse);
-        }
-      } else {
-        this_mv.as_mv.row = (startmv.as_mv.row - 8) | 7;
-
-        if (startmv.as_mv.col & 7) {
-          this_mv.as_mv.col -= 1;
-          thismse = vfp->svf(y - y_stride, y_stride,
-                             SP(this_mv.as_mv.col), SP(7), z, src_stride, &sse);
-        } else {
-          this_mv.as_mv.col = (startmv.as_mv.col - 8) | 7;
-          thismse = vfp->svf(y - y_stride - 1, y_stride,
-                             SP(7), SP(7), z, src_stride, &sse);
-        }
-      }
-
-      break;
-    case 1:
-      this_mv.as_mv.col += 1;
-
-      if (startmv.as_mv.row & 7) {
-        this_mv.as_mv.row -= 1;
-        thismse = vfp->svf(y, y_stride,
-                           SP(this_mv.as_mv.col), SP(this_mv.as_mv.row),
-                           z, src_stride, &sse);
-      } else {
-        this_mv.as_mv.row = (startmv.as_mv.row - 8) | 7;
-        thismse = vfp->svf(y - y_stride, y_stride,
-                           SP(this_mv.as_mv.col), SP(7), z, src_stride, &sse);
-      }
-
-      break;
-    case 2:
-      this_mv.as_mv.row += 1;
-
-      if (startmv.as_mv.col & 7) {
-        this_mv.as_mv.col -= 1;
-        thismse = vfp->svf(y, y_stride,
-                           SP(this_mv.as_mv.col), SP(this_mv.as_mv.row),
-                           z, src_stride, &sse);
-      } else {
-        this_mv.as_mv.col = (startmv.as_mv.col - 8) | 7;
-        thismse = vfp->svf(y - 1, y_stride,
-                           SP(7), SP(this_mv.as_mv.row), z, src_stride, &sse);
-      }
-
-      break;
-    case 3:
-      this_mv.as_mv.col += 1;
-      this_mv.as_mv.row += 1;
-      thismse = vfp->svf(y, y_stride,
-                         SP(this_mv.as_mv.col), SP(this_mv.as_mv.row),
-                         z, src_stride, &sse);
-      break;
-  }
-
-  diag = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost,
-                               error_per_bit);
-
-  if (diag < bestmse) {
-    *bestmv = this_mv;
-    bestmse = diag;
-    *distortion = thismse;
-    *sse1 = sse;
-  }
-
-  return bestmse;
-}
-
 #undef SP
 
-int vp9_find_best_half_pixel_step(MACROBLOCK *x,
-                                  int_mv *bestmv, int_mv *ref_mv,
-                                  int error_per_bit,
-                                  const vp9_variance_fn_ptr_t *vfp,
-                                  int *mvjcost, int *mvcost[2],
-                                  int *distortion,
-                                  unsigned int *sse1) {
-  int bestmse = INT_MAX;
-  int_mv startmv;
-  int_mv this_mv;
-  uint8_t *z = x->plane[0].src.buf;
-  int src_stride = x->plane[0].src.stride;
-  int left, right, up, down, diag;
-  unsigned int sse;
-  int whichdir;
-  int thismse;
-  int y_stride;
-  MACROBLOCKD *xd = &x->e_mbd;
-
-  uint8_t *y = xd->plane[0].pre[0].buf +
-      (bestmv->as_mv.row) * xd->plane[0].pre[0].stride + bestmv->as_mv.col;
-  y_stride = xd->plane[0].pre[0].stride;
-
-  // central mv
-  bestmv->as_mv.row <<= 3;
-  bestmv->as_mv.col <<= 3;
-  startmv = *bestmv;
-
-  // calculate central point error
-  bestmse = vfp->vf(y, y_stride, z, src_stride, sse1);
-  *distortion = bestmse;
-  bestmse += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, error_per_bit);
-
-  // go left then right and check error
-  this_mv.as_mv.row = startmv.as_mv.row;
-  this_mv.as_mv.col = ((startmv.as_mv.col - 8) | 4);
-  thismse = vfp->svf_halfpix_h(y - 1, y_stride, z, src_stride, &sse);
-  left = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost,
-                               error_per_bit);
-
-  if (left < bestmse) {
-    *bestmv = this_mv;
-    bestmse = left;
-    *distortion = thismse;
-    *sse1 = sse;
-  }
-
-  this_mv.as_mv.col += 8;
-  thismse = vfp->svf_halfpix_h(y, y_stride, z, src_stride, &sse);
-  right = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost,
-                                error_per_bit);
-
-  if (right < bestmse) {
-    *bestmv = this_mv;
-    bestmse = right;
-    *distortion = thismse;
-    *sse1 = sse;
-  }
-
-  // go up then down and check error
-  this_mv.as_mv.col = startmv.as_mv.col;
-  this_mv.as_mv.row = ((startmv.as_mv.row - 8) | 4);
-  thismse = vfp->svf_halfpix_v(y - y_stride, y_stride, z, src_stride, &sse);
-  up = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit);
-
-  if (up < bestmse) {
-    *bestmv = this_mv;
-    bestmse = up;
-    *distortion = thismse;
-    *sse1 = sse;
-  }
-
-  this_mv.as_mv.row += 8;
-  thismse = vfp->svf_halfpix_v(y, y_stride, z, src_stride, &sse);
-  down = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost,
-                               error_per_bit);
-
-  if (down < bestmse) {
-    *bestmv = this_mv;
-    bestmse = down;
-    *distortion = thismse;
-    *sse1 = sse;
-  }
-
-  // now check 1 more diagonal -
-  whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);
-  this_mv = startmv;
-
-  switch (whichdir) {
-    case 0:
-      this_mv.as_mv.col = (this_mv.as_mv.col - 8) | 4;
-      this_mv.as_mv.row = (this_mv.as_mv.row - 8) | 4;
-      thismse = vfp->svf_halfpix_hv(y - 1 - y_stride, y_stride,
-                                    z, src_stride, &sse);
-      break;
-    case 1:
-      this_mv.as_mv.col += 4;
-      this_mv.as_mv.row = (this_mv.as_mv.row - 8) | 4;
-      thismse = vfp->svf_halfpix_hv(y - y_stride, y_stride,
-                                    z, src_stride, &sse);
-      break;
-    case 2:
-      this_mv.as_mv.col = (this_mv.as_mv.col - 8) | 4;
-      this_mv.as_mv.row += 4;
-      thismse = vfp->svf_halfpix_hv(y - 1, y_stride, z, src_stride, &sse);
-      break;
-    case 3:
-    default:
-      this_mv.as_mv.col += 4;
-      this_mv.as_mv.row += 4;
-      thismse = vfp->svf_halfpix_hv(y, y_stride, z, src_stride, &sse);
-      break;
-  }
-
-  diag = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost,
-                               error_per_bit);
-
-  if (diag < bestmse) {
-    *bestmv = this_mv;
-    bestmse = diag;
-    *distortion = thismse;
-    *sse1 = sse;
-  }
-
-  return bestmse;
-}
-
 #define CHECK_BOUNDS(range) \
   {\
     all_in = 1;\
index 5d0c0e8ba08c0649a077c34047a018b378c8a8c5..b91e6fd346e1a8fdd51a00132ede24870af65891 100644 (file)
@@ -67,12 +67,19 @@ int vp9_square_search(MACROBLOCK *x,
                       int_mv *center_mv,
                       int_mv *best_mv);
 
-typedef int (fractional_mv_step_fp) (MACROBLOCK *x, int_mv
-  *bestmv, int_mv *ref_mv, int error_per_bit, const vp9_variance_fn_ptr_t *vfp,
-  int *mvjcost, int *mvcost[2], int *distortion, unsigned int *sse);
-extern fractional_mv_step_fp vp9_find_best_sub_pixel_step_iteratively;
-extern fractional_mv_step_fp vp9_find_best_sub_pixel_step;
-extern fractional_mv_step_fp vp9_find_best_half_pixel_step;
+typedef int (fractional_mv_step_fp) (
+    MACROBLOCK *x,
+    int_mv *bestmv,
+    int_mv *ref_mv,
+    int error_per_bit,
+    const vp9_variance_fn_ptr_t *vfp,
+    int forced_stop,  // 0 - full, 1 - qtr only, 2 - half only
+    int iters_per_step,
+    int *mvjcost,
+    int *mvcost[2],
+    int *distortion,
+    unsigned int *sse);
+extern fractional_mv_step_fp vp9_find_best_sub_pixel_iterative;
 
 typedef int (*vp9_full_search_fn_t)(MACROBLOCK *x,
                                     int_mv *ref_mv, int sad_per_bit,
@@ -95,14 +102,17 @@ typedef int (*vp9_diamond_search_fn_t)(MACROBLOCK *x,
                                        int *mvjcost, int *mvcost[2],
                                        int_mv *center_mv);
 
-int vp9_find_best_sub_pixel_comp(MACROBLOCK *x,
-                                 int_mv *bestmv, int_mv *ref_mv,
-                                 int error_per_bit,
-                                 const vp9_variance_fn_ptr_t *vfp,
-                                 int *mvjcost, int *mvcost[2],
-                                 int *distortion, unsigned int *sse1,
-                                 const uint8_t *second_pred,
-                                 int w, int h);
+int vp9_find_best_sub_pixel_comp_iterative(
+    MACROBLOCK *x,
+    int_mv *bestmv, int_mv *ref_mv,
+    int error_per_bit,
+    const vp9_variance_fn_ptr_t *vfp,
+    int forced_stop,  // 0 - full, 1 - qtr only, 2 - half only
+    int iters_per_step,
+    int *mvjcost, int *mvcost[2],
+    int *distortion, unsigned int *sse1,
+    const uint8_t *second_pred,
+    int w, int h);
 
 int vp9_refining_search_8p_c(MACROBLOCK *x,
                              int_mv *ref_mv, int error_per_bit,
index d1db91086d389291ac1502afc36e80dcfb8ebc6e..cf5ae5252d10bbe5be29bb93f6b97f9e031fb2a9 100644 (file)
@@ -713,9 +713,8 @@ void vp9_set_speed_features(VP9_COMP *cpi) {
   sf->search_method = NSTEP;
   sf->auto_filter = 1;
   sf->recode_loop = 1;
-  sf->quarter_pixel_search = 1;
-  sf->half_pixel_search = 1;
-  sf->iterative_sub_pixel = 1;
+  sf->subpel_search_method = SUBPEL_ITERATIVE;
+  sf->subpel_iters_per_step = 3;
   sf->optimize_coefficients = !cpi->oxcf.lossless;
   sf->reduce_first_step_size = 0;
   sf->auto_mv_step_size = 0;
@@ -831,6 +830,7 @@ void vp9_set_speed_features(VP9_COMP *cpi) {
             (MIN(cpi->common.width, cpi->common.height) >= 720)? 1 : 0;
         sf->auto_mv_step_size = 1;
         sf->search_method = SQUARE;
+        sf->subpel_iters_per_step = 2;
       }
       if (speed == 3) {
         sf->comp_inter_joint_search_thresh = BLOCK_SIZE_TYPES;
@@ -851,6 +851,7 @@ void vp9_set_speed_features(VP9_COMP *cpi) {
         sf->disable_splitmv = 1;
         sf->auto_mv_step_size = 1;
         sf->search_method = BIGDIA;
+        sf->subpel_iters_per_step = 1;
       }
       if (speed == 4) {
         sf->comp_inter_joint_search_thresh = BLOCK_SIZE_TYPES;
@@ -875,6 +876,7 @@ void vp9_set_speed_features(VP9_COMP *cpi) {
 
         sf->disable_splitmv = 1;
         sf->search_method = HEX;
+        sf->subpel_iters_per_step = 1;
       }
       /*
       if (speed == 2) {
@@ -918,12 +920,11 @@ void vp9_set_speed_features(VP9_COMP *cpi) {
 
   cpi->mb.quantize_b_4x4      = vp9_regular_quantize_b_4x4;
 
-  if (cpi->sf.iterative_sub_pixel == 1) {
-    cpi->find_fractional_mv_step = vp9_find_best_sub_pixel_step_iteratively;
-  } else if (cpi->sf.quarter_pixel_search) {
-    cpi->find_fractional_mv_step = vp9_find_best_sub_pixel_step;
-  } else if (cpi->sf.half_pixel_search) {
-    cpi->find_fractional_mv_step = vp9_find_best_half_pixel_step;
+  if (cpi->sf.subpel_search_method == SUBPEL_ITERATIVE) {
+    cpi->find_fractional_mv_step = vp9_find_best_sub_pixel_iterative;
+  } else {
+    // TODO(debargha): Other methods to come
+    assert(0);
   }
 
   cpi->mb.optimize = cpi->sf.optimize_coefficients == 1 && cpi->pass != 1;
index 7eae7007fe7836e4f1d7cf8e2cee6d6857af1713..12491076987bbc5ce6fdf1dd20df0fa326222509 100644 (file)
@@ -232,14 +232,18 @@ typedef enum {
   FLAG_SKIP_INTRA_LOWVAR = 32,
 } MODE_SEARCH_SKIP_LOGIC;
 
+typedef enum {
+  SUBPEL_ITERATIVE = 0,
+  // Other methods to come
+} SUBPEL_SEARCH_METHODS;
+
 typedef struct {
   int RD;
   SEARCH_METHODS search_method;
   int auto_filter;
   int recode_loop;
-  int iterative_sub_pixel;
-  int half_pixel_search;
-  int quarter_pixel_search;
+  SUBPEL_SEARCH_METHODS subpel_search_method;
+  int subpel_iters_per_step;
   int thresh_mult[MAX_MODES];
   int max_step_search_steps;
   int reduce_first_step_size;
index 238c9815bcf3195e70abce07f6c68fdf6bfc6305..90d35f86a9e109079ab5f4166cd306d22ad6d4e0 100644 (file)
@@ -1967,6 +1967,7 @@ static void rd_check_segment_txsize(VP9_COMP *cpi, MACROBLOCK *x,
             unsigned int sse;
             cpi->find_fractional_mv_step(x, &mode_mv[NEWMV],
                                          bsi->ref_mv, x->errorperbit, v_fn_ptr,
+                                         0, cpi->sf.subpel_iters_per_step,
                                          x->nmvjointcost, x->mvcost,
                                          &distortion, &sse);
 
@@ -2547,6 +2548,7 @@ static void single_motion_search(VP9_COMP *cpi, MACROBLOCK *x,
     cpi->find_fractional_mv_step(x, tmp_mv, &ref_mv,
                                  x->errorperbit,
                                  &cpi->fn_ptr[block_size],
+                                 0, cpi->sf.subpel_iters_per_step,
                                  x->nmvjointcost, x->mvcost,
                                  &dis, &sse);
   }
@@ -2673,13 +2675,15 @@ static void joint_motion_search(VP9_COMP *cpi, MACROBLOCK *x,
       int dis; /* TODO: use dis in distortion calculation later. */
       unsigned int sse;
 
-      bestsme = vp9_find_best_sub_pixel_comp(x, &tmp_mv,
-                                             &ref_mv[id],
-                                             x->errorperbit,
-                                             &cpi->fn_ptr[block_size],
-                                             x->nmvjointcost, x->mvcost,
-                                             &dis, &sse, second_pred,
-                                             pw, ph);
+      bestsme = vp9_find_best_sub_pixel_comp_iterative(
+          x, &tmp_mv,
+          &ref_mv[id],
+          x->errorperbit,
+          &cpi->fn_ptr[block_size],
+          0, cpi->sf.subpel_iters_per_step,
+          x->nmvjointcost, x->mvcost,
+          &dis, &sse, second_pred,
+          pw, ph);
     }
 
     if (id)
index 11d601bb36910120de8bc1004300bdb80f30c100..3052e8f70f07281cf58afa383344eadb881a3fad 100644 (file)
@@ -170,6 +170,7 @@ static int temporal_filter_find_matching_mb_c(VP9_COMP *cpi,
                                            &best_ref_mv1,
                                            x->errorperbit,
                                            &cpi->fn_ptr[BLOCK_16X16],
+                                           0, cpi->sf.subpel_iters_per_step,
                                            NULL, NULL,
                                            &distortion, &sse);
   }