Speed up motion estimation using small partitions' result(experiment)

author Yunqing Wang <yunqingwang@google.com>

Wed, 3 Jul 2013 21:43:23 +0000 (14:43 -0700)

committer Yunqing Wang <yunqingwang@google.com>

Wed, 17 Jul 2013 16:11:47 +0000 (09:11 -0700)
author Yunqing Wang <yunqingwang@google.com>
Wed, 3 Jul 2013 21:43:23 +0000 (14:43 -0700)
committer Yunqing Wang <yunqingwang@google.com>
Wed, 17 Jul 2013 16:11:47 +0000 (09:11 -0700)
diff --git a/vp9/encoder/vp9_block.h b/vp9/encoder/vp9_block.h

index ae9f0aaa7b8d60d8b884468d84d1c2d2cf06390e..b4c06f5dd5c195c316a9f594453fce5a43f5bfb2 100644 (file)
--- a/vp9/encoder/vp9_block.h
+++ b/vp9/encoder/vp9_block.h
@@ -143,6 +143,11 @@ struct macroblock {
    int rd_search;
    int skip_encode;
  
+  // Used to store sub partition's choices.
+  int fast_ms;
+  int_mv pred_mv;
+  int subblock_ref;
+
    // TODO(jingning): Need to refactor the structure arrays that buffers the
    // coding mode decisions of each partition type.
    PICK_MODE_CONTEXT ab4x4_context[4][4][4];
diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c

index 48c1b3361e650758f4e4dc39c08409557669d2e1..3dd235a1f9b1a7b41cc70944b731957aa3463077 100644 (file)
--- a/vp9/encoder/vp9_encodeframe.c
+++ b/vp9/encoder/vp9_encodeframe.c
@@ -1466,6 +1466,138 @@ static void rd_pick_partition(VP9_COMP *cpi, TOKENEXTRA **tp, int mi_row,
        restore_context(cpi, mi_row, mi_col, a, l, sa, sl, bsize);
      }
    }
+
+  x->fast_ms = 0;
+  x->pred_mv.as_int = 0;
+  x->subblock_ref = 0;
+
+  // Use 4 subblocks' motion estimation results to speed up current
+  // partition's checking.
+  if (cpi->sf.using_small_partition_info) {
+    // Only use 8x8 result for non HD videos.
+    // int use_8x8 = (MIN(cpi->common.width, cpi->common.height) < 720) ? 1 : 0;
+    int use_8x8 = 1;
+
+    if (cm->frame_type && !cpi->is_src_frame_alt_ref &&
+        ((use_8x8 && bsize == BLOCK_SIZE_MB16X16) ||
+        bsize == BLOCK_SIZE_SB32X32 || bsize == BLOCK_SIZE_SB64X64)) {
+      int ref0 = 0, ref1 = 0, ref2 = 0, ref3 = 0;
+
+      if (bsize == BLOCK_SIZE_MB16X16) {
+        ref0 = x->sb8x8_context[xd->sb_index][xd->mb_index][0].mic.mbmi.
+            ref_frame[0];
+        ref1 = x->sb8x8_context[xd->sb_index][xd->mb_index][1].mic.mbmi.
+            ref_frame[0];
+        ref2 = x->sb8x8_context[xd->sb_index][xd->mb_index][2].mic.mbmi.
+            ref_frame[0];
+        ref3 = x->sb8x8_context[xd->sb_index][xd->mb_index][3].mic.mbmi.
+            ref_frame[0];
+      } else if (bsize == BLOCK_SIZE_SB32X32) {
+        ref0 = x->mb_context[xd->sb_index][0].mic.mbmi.ref_frame[0];
+        ref1 = x->mb_context[xd->sb_index][1].mic.mbmi.ref_frame[0];
+        ref2 = x->mb_context[xd->sb_index][2].mic.mbmi.ref_frame[0];
+        ref3 = x->mb_context[xd->sb_index][3].mic.mbmi.ref_frame[0];
+      } else if (bsize == BLOCK_SIZE_SB64X64) {
+        ref0 = x->sb32_context[0].mic.mbmi.ref_frame[0];
+        ref1 = x->sb32_context[1].mic.mbmi.ref_frame[0];
+        ref2 = x->sb32_context[2].mic.mbmi.ref_frame[0];
+        ref3 = x->sb32_context[3].mic.mbmi.ref_frame[0];
+      }
+
+      // Currently, only consider 4 inter ref frames.
+      if (ref0 && ref1 && ref2 && ref3) {
+        int16_t mvr0 = 0, mvc0 = 0, mvr1 = 0, mvc1 = 0, mvr2 = 0, mvc2 = 0,
+            mvr3 = 0, mvc3 = 0;
+        int d01, d23, d02, d13;  // motion vector distance between 2 blocks
+
+        // Get each subblock's motion vectors.
+        if (bsize == BLOCK_SIZE_MB16X16) {
+          mvr0 = x->sb8x8_context[xd->sb_index][xd->mb_index][0].mic.mbmi.mv[0].
+              as_mv.row;
+          mvc0 = x->sb8x8_context[xd->sb_index][xd->mb_index][0].mic.mbmi.mv[0].
+              as_mv.col;
+          mvr1 = x->sb8x8_context[xd->sb_index][xd->mb_index][1].mic.mbmi.mv[0].
+              as_mv.row;
+          mvc1 = x->sb8x8_context[xd->sb_index][xd->mb_index][1].mic.mbmi.mv[0].
+              as_mv.col;
+          mvr2 = x->sb8x8_context[xd->sb_index][xd->mb_index][2].mic.mbmi.mv[0].
+              as_mv.row;
+          mvc2 = x->sb8x8_context[xd->sb_index][xd->mb_index][2].mic.mbmi.mv[0].
+              as_mv.col;
+          mvr3 = x->sb8x8_context[xd->sb_index][xd->mb_index][3].mic.mbmi.mv[0].
+              as_mv.row;
+          mvc3 = x->sb8x8_context[xd->sb_index][xd->mb_index][3].mic.mbmi.mv[0].
+              as_mv.col;
+        } else if (bsize == BLOCK_SIZE_SB32X32) {
+          mvr0 = x->mb_context[xd->sb_index][0].mic.mbmi.mv[0].as_mv.row;
+          mvc0 = x->mb_context[xd->sb_index][0].mic.mbmi.mv[0].as_mv.col;
+          mvr1 = x->mb_context[xd->sb_index][1].mic.mbmi.mv[0].as_mv.row;
+          mvc1 = x->mb_context[xd->sb_index][1].mic.mbmi.mv[0].as_mv.col;
+          mvr2 = x->mb_context[xd->sb_index][2].mic.mbmi.mv[0].as_mv.row;
+          mvc2 = x->mb_context[xd->sb_index][2].mic.mbmi.mv[0].as_mv.col;
+          mvr3 = x->mb_context[xd->sb_index][3].mic.mbmi.mv[0].as_mv.row;
+          mvc3 = x->mb_context[xd->sb_index][3].mic.mbmi.mv[0].as_mv.col;
+        } else if (bsize == BLOCK_SIZE_SB64X64) {
+          mvr0 = x->sb32_context[0].mic.mbmi.mv[0].as_mv.row;
+          mvc0 = x->sb32_context[0].mic.mbmi.mv[0].as_mv.col;
+          mvr1 = x->sb32_context[1].mic.mbmi.mv[0].as_mv.row;
+          mvc1 = x->sb32_context[1].mic.mbmi.mv[0].as_mv.col;
+          mvr2 = x->sb32_context[2].mic.mbmi.mv[0].as_mv.row;
+          mvc2 = x->sb32_context[2].mic.mbmi.mv[0].as_mv.col;
+          mvr3 = x->sb32_context[3].mic.mbmi.mv[0].as_mv.row;
+          mvc3 = x->sb32_context[3].mic.mbmi.mv[0].as_mv.col;
+        }
+
+        // Adjust sign if ref is alt_ref
+        if (cm->ref_frame_sign_bias[ref0]) {
+          mvr0 *= -1;
+          mvc0 *= -1;
+        }
+
+        if (cm->ref_frame_sign_bias[ref1]) {
+          mvr1 *= -1;
+          mvc1 *= -1;
+        }
+
+        if (cm->ref_frame_sign_bias[ref2]) {
+          mvr2 *= -1;
+          mvc2 *= -1;
+        }
+
+        if (cm->ref_frame_sign_bias[ref3]) {
+          mvr3 *= -1;
+          mvc3 *= -1;
+        }
+
+        // Calculate mv distances.
+        d01 = MAX(abs(mvr0 - mvr1), abs(mvc0 - mvc1));
+        d23 = MAX(abs(mvr2 - mvr3), abs(mvc2 - mvc3));
+        d02 = MAX(abs(mvr0 - mvr2), abs(mvc0 - mvc2));
+        d13 = MAX(abs(mvr1 - mvr3), abs(mvc1 - mvc3));
+
+        if (d01 < 24 && d23 < 24 && d02 < 24 && d13 < 24) {
+          // Set fast motion search level.
+          x->fast_ms = 1;
+
+          // Calculate prediction MV
+          x->pred_mv.as_mv.row = (mvr0 + mvr1 + mvr2 + mvr3) >> 2;
+          x->pred_mv.as_mv.col = (mvc0 + mvc1 + mvc2 + mvc3) >> 2;
+
+          if (ref0 == ref1 && ref1 == ref2 && ref2 == ref3 &&
+              d01 < 2 && d23 < 2 && d02 < 2 && d13 < 2) {
+            // Set fast motion search level.
+            x->fast_ms = 2;
+
+            if (!d01 && !d23 && !d02 && !d13) {
+              x->fast_ms = 3;
+              x->subblock_ref = ref0;
+            }
+          }
+        }
+      }
+    }
+  }
+
    if (!cpi->sf.use_partitions_less_than
        || (cpi->sf.use_partitions_less_than
            && bsize <= cpi->sf.less_than_block_size)) {
diff --git a/vp9/encoder/vp9_onyx_if.c b/vp9/encoder/vp9_onyx_if.c

index 5ba9a41de450c815c0de1946e5cf391e50411b06..9042111f8dc278587234dfe77a56d87753b867df 100644 (file)
--- a/vp9/encoder/vp9_onyx_if.c
+++ b/vp9/encoder/vp9_onyx_if.c
@@ -723,7 +723,7 @@ void vp9_set_speed_features(VP9_COMP *cpi) {
    sf->use_rd_breakout = 0;
    sf->skip_encode_sb = 0;
    sf->use_uv_intra_rd_estimate = 0;
-
+  sf->using_small_partition_info = 0;
    // Skip any mode not chosen at size < X for all sizes > X
    // Hence BLOCK_SIZE_SB64X64 (skip is off)
    sf->unused_mode_skip_lvl = BLOCK_SIZE_SB64X64;
@@ -795,6 +795,7 @@ void vp9_set_speed_features(VP9_COMP *cpi) {
          sf->use_rd_breakout = 1;
          sf->skip_encode_sb = 1;
          sf->use_uv_intra_rd_estimate = 1;
+        sf->using_small_partition_info = 1;
        }
        if (speed == 3) {
          sf->comp_inter_joint_search_thresh = BLOCK_SIZE_TYPES;
diff --git a/vp9/encoder/vp9_onyx_int.h b/vp9/encoder/vp9_onyx_int.h

index f63800c8d95df5e2af395af458532f7779e0b3ea..19b1e3af5484f65000bd2098694c73e6bde191df 100644 (file)
--- a/vp9/encoder/vp9_onyx_int.h
+++ b/vp9/encoder/vp9_onyx_int.h
@@ -268,6 +268,7 @@ typedef struct {
    int adjust_partitioning_from_last_frame;
    int last_partitioning_redo_frequency;
    int disable_splitmv;
+  int using_small_partition_info;
  
    // Implements various heuristics to skip searching modes
    // The heuristics selected are based on  flags
diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c

index 7d44250a21e3cc89b615c42eba875fcfe6b440ac..eac7b2a19829cb3a7cfefc087b9eb7547cae3e8c 100644 (file)
--- a/vp9/encoder/vp9_rdopt.c
+++ b/vp9/encoder/vp9_rdopt.c
@@ -2334,6 +2334,7 @@ static void single_motion_search(VP9_COMP *cpi, MACROBLOCK *x,
                                   int mi_row, int mi_col,
                                   int_mv *tmp_mv, int *rate_mv) {
    MACROBLOCKD *xd = &x->e_mbd;
+  VP9_COMMON *cm = &cpi->common;
    MB_MODE_INFO *mbmi = &xd->mode_info_context->mbmi;
    struct buf_2d backup_yv12[MAX_MB_PLANE] = {{0}};
    int bestsme = INT_MAX;
@@ -2364,18 +2365,37 @@ static void single_motion_search(VP9_COMP *cpi, MACROBLOCK *x,
  
    vp9_clamp_mv_min_max(x, &ref_mv);
  
-  // Work out the size of the first step in the mv step search.
-  // 0 here is maximum length first step. 1 is MAX >> 1 etc.
-  if (cpi->sf.auto_mv_step_size && cpi->common.show_frame) {
-    step_param = vp9_init_search_range(cpi, cpi->max_mv_magnitude);
+  // Adjust search parameters based on small partitions' result.
+  if (x->fast_ms) {
+    // && abs(mvp_full.as_mv.row - x->pred_mv.as_mv.row) < 24 &&
+    // abs(mvp_full.as_mv.col - x->pred_mv.as_mv.col) < 24) {
+    // adjust search range
+    step_param = 6;
+    if (x->fast_ms > 1)
+      step_param = 8;
+
+    // Get prediction MV.
+    mvp_full.as_int = x->pred_mv.as_int;
+
+    // Adjust MV sign if needed.
+    if (cm->ref_frame_sign_bias[ref]) {
+      mvp_full.as_mv.col *= -1;
+      mvp_full.as_mv.row *= -1;
+    }
    } else {
-    step_param = vp9_init_search_range(
-                   cpi, MIN(cpi->common.width, cpi->common.height));
-  }
+    // Work out the size of the first step in the mv step search.
+    // 0 here is maximum length first step. 1 is MAX >> 1 etc.
+    if (cpi->sf.auto_mv_step_size && cpi->common.show_frame) {
+      step_param = vp9_init_search_range(cpi, cpi->max_mv_magnitude);
+    } else {
+      step_param = vp9_init_search_range(
+                     cpi, MIN(cpi->common.width, cpi->common.height));
+    }
  
-  // mvp_full.as_int = ref_mv[0].as_int;
-  mvp_full.as_int =
-      mbmi->ref_mvs[ref][x->mv_best_ref_index[ref]].as_int;
+    // mvp_full.as_int = ref_mv[0].as_int;
+    mvp_full.as_int =
+        mbmi->ref_mvs[ref][x->mv_best_ref_index[ref]].as_int;
+  }
  
    mvp_full.as_mv.col >>= 3;
    mvp_full.as_mv.row >>= 3;
@@ -3113,9 +3133,9 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
    }
  
    // If intra is not masked off then get uv intra mode rd.
-  if (!cpi->sf.use_avoid_tested_higherror
+  if (x->fast_ms < 2 && (!cpi->sf.use_avoid_tested_higherror
        || (cpi->sf.use_avoid_tested_higherror
-          && (ref_frame_mask & (1 << INTRA_FRAME)))) {
+          && (ref_frame_mask & (1 << INTRA_FRAME))))) {
      // Note that the enumerator TXFM_MODE "matches" TX_SIZE.
      // Eg. ONLY_4X4 = TX_4X4, ALLOW_8X8 = TX_8X8 etc such that the MIN
      // operation below correctly constrains max_uvtxfm_size.
@@ -3194,6 +3214,12 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
  
      x->skip = 0;
  
+    // Skip some checking based on small partitions' result.
+    if (x->fast_ms > 1 && !ref_frame)
+      continue;
+    if (x->fast_ms > 2 && ref_frame != x->subblock_ref)
+      continue;
+
      if (cpi->sf.use_avoid_tested_higherror && bsize >= BLOCK_SIZE_SB8X8) {
        if (!(ref_frame_mask & (1 << ref_frame))) {
          continue;
author	Yunqing Wang <yunqingwang@google.com>
	Wed, 3 Jul 2013 21:43:23 +0000 (14:43 -0700)
committer	Yunqing Wang <yunqingwang@google.com>
	Wed, 17 Jul 2013 16:11:47 +0000 (09:11 -0700)
vp9/encoder/vp9_block.h		patch \| blob \| history
vp9/encoder/vp9_encodeframe.c		patch \| blob \| history
vp9/encoder/vp9_onyx_if.c		patch \| blob \| history
vp9/encoder/vp9_onyx_int.h		patch \| blob \| history
vp9/encoder/vp9_rdopt.c		patch \| blob \| history