]> granicus.if.org Git - libvpx/commitdiff
Extra round of subpel MV search around second best full-pixel MV
authorhui su <huisu@google.com>
Mon, 11 Jul 2016 17:46:17 +0000 (10:46 -0700)
committerhui su <huisu@google.com>
Mon, 18 Jul 2016 19:25:24 +0000 (12:25 -0700)
Keep track of the best and second best full pixel motion vector
candidates, and do subpel search around both of them.

Compression improvement:
lowres 0.22%   midres 0.23%   hdres 0.18%

No noticeable encoding speed changes observed on lowres test clips.

Change-Id: I5f4df2a03d1db061cfdfdba6138b27e9ea91f089

vp10/common/vp10_rtcd_defs.pl
vp10/encoder/block.h
vp10/encoder/mcomp.c
vp10/encoder/mcomp.h
vp10/encoder/rdopt.c

index 51b674b8d97ba050e11c0773928f23231aacb70c..8123f6ff79fc0f9bd49732216c9c3747bc8024bf 100644 (file)
@@ -683,7 +683,7 @@ specialize qw/vp10_full_search_sad sse3 sse4_1/;
 $vp10_full_search_sad_sse3=vp10_full_search_sadx3;
 $vp10_full_search_sad_sse4_1=vp10_full_search_sadx8;
 
-add_proto qw/int vp10_diamond_search_sad/, "const struct macroblock *x, const struct search_site_config *cfg,  struct mv *ref_mv, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct vp10_variance_vtable *fn_ptr, const struct mv *center_mv";
+add_proto qw/int vp10_diamond_search_sad/, "struct macroblock *x, const struct search_site_config *cfg,  struct mv *ref_mv, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct vp10_variance_vtable *fn_ptr, const struct mv *center_mv";
 specialize qw/vp10_diamond_search_sad/;
 
 add_proto qw/int vp10_full_range_search/, "const struct macroblock *x, const struct search_site_config *cfg, struct mv *ref_mv, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct vp10_variance_vtable *fn_ptr, const struct mv *center_mv";
index c39aa21c4ee2a5d4b9954489695052423ba1ac96..41d78fcf5d7c535c9e49e99c312a68b672813cfa 100644 (file)
@@ -164,6 +164,8 @@ struct macroblock {
 
   // Store the best motion vector during motion search
   int_mv best_mv;
+  // Store the second best motion vector during full-pixel motion search
+  int_mv second_best_mv;
 
   // Strong color activity detection. Used in RTC coding mode to enhance
   // the visual quality at the boundary of moving color objects.
index afbf3e9fd8a869341e0762bc2a80f655959cc418..fade67fa0e9b810ecf3a817cd047e1879341b5cb 100644 (file)
@@ -1479,7 +1479,7 @@ static int fast_dia_search(MACROBLOCK *x,
 
 // Exhuastive motion search around a given centre position with a given
 // step size.
-static int exhuastive_mesh_search(const MACROBLOCK *x,
+static int exhuastive_mesh_search(MACROBLOCK *x,
                                   MV *ref_mv, MV *best_mv,
                                   int range, int step, int sad_per_bit,
                                   const vp10_variance_fn_ptr_t *fn_ptr,
@@ -1517,6 +1517,7 @@ static int exhuastive_mesh_search(const MACROBLOCK *x,
           sad += mvsad_err_cost(x, &mv, ref_mv, sad_per_bit);
           if (sad < best_sad) {
             best_sad = sad;
+            x->second_best_mv.as_mv = *best_mv;
             *best_mv = mv;
           }
         }
@@ -1539,6 +1540,7 @@ static int exhuastive_mesh_search(const MACROBLOCK *x,
                   mvsad_err_cost(x, &mv, ref_mv, sad_per_bit);
               if (sad < best_sad) {
                 best_sad = sad;
+                x->second_best_mv.as_mv = *best_mv;
                 *best_mv = mv;
               }
             }
@@ -1552,6 +1554,7 @@ static int exhuastive_mesh_search(const MACROBLOCK *x,
               sad += mvsad_err_cost(x, &mv, ref_mv, sad_per_bit);
               if (sad < best_sad) {
                 best_sad = sad;
+                x->second_best_mv.as_mv = *best_mv;
                 *best_mv = mv;
               }
             }
@@ -1564,7 +1567,7 @@ static int exhuastive_mesh_search(const MACROBLOCK *x,
   return best_sad;
 }
 
-int vp10_diamond_search_sad_c(const MACROBLOCK *x,
+int vp10_diamond_search_sad_c(MACROBLOCK *x,
                               const search_site_config *cfg,
                               MV *ref_mv, MV *best_mv, int search_param,
                               int sad_per_bit, int *num00,
@@ -1673,6 +1676,7 @@ int vp10_diamond_search_sad_c(const MACROBLOCK *x,
       }
     }
     if (best_site != last_site) {
+      x->second_best_mv.as_mv = *best_mv;
       best_mv->row += ss[best_site].mv.row;
       best_mv->col += ss[best_site].mv.col;
       best_address += ss[best_site].offset;
@@ -2234,11 +2238,11 @@ int vp10_full_search_sadx8(const MACROBLOCK *x, const MV *ref_mv,
   return best_sad;
 }
 
-int vp10_refining_search_sad(const MACROBLOCK *x,
-                            MV *ref_mv, int error_per_bit,
-                            int search_range,
-                            const vp10_variance_fn_ptr_t *fn_ptr,
-                            const MV *center_mv) {
+int vp10_refining_search_sad(MACROBLOCK *x,
+                             MV *ref_mv, int error_per_bit,
+                             int search_range,
+                             const vp10_variance_fn_ptr_t *fn_ptr,
+                             const MV *center_mv) {
   const MACROBLOCKD *const xd = &x->e_mbd;
   const MV neighbors[4] = {{ -1, 0}, {0, -1}, {0, 1}, {1, 0}};
   const struct buf_2d *const what = &x->plane[0].src;
@@ -2302,6 +2306,7 @@ int vp10_refining_search_sad(const MACROBLOCK *x,
     if (best_site == -1) {
       break;
     } else {
+      x->second_best_mv.as_mv = *ref_mv;
       ref_mv->row += neighbors[best_site].row;
       ref_mv->col += neighbors[best_site].col;
       best_address = get_buf_from_mv(in_what, ref_mv);
index f97f6c72bfff5684805084b2ec46d430aa501667..98e2b189a7988fb5701caabb5cd6a5c9b56e40cc 100644 (file)
@@ -66,11 +66,11 @@ struct SPEED_FEATURES;
 
 int vp10_init_search_range(int size);
 
-int vp10_refining_search_sad(const struct macroblock *x,
-                            struct mv *ref_mv,
-                            int sad_per_bit, int distance,
-                            const vp10_variance_fn_ptr_t *fn_ptr,
-                            const struct mv *center_mv);
+int vp10_refining_search_sad(struct macroblock *x,
+                             struct mv *ref_mv,
+                             int sad_per_bit, int distance,
+                             const vp10_variance_fn_ptr_t *fn_ptr,
+                             const struct mv *center_mv);
 
 // Runs sequence of diamond searches in smaller steps for RD.
 int vp10_full_pixel_diamond(const struct VP10_COMP *cpi, MACROBLOCK *x,
@@ -122,7 +122,7 @@ typedef int (*vp10_full_search_fn_t)(const MACROBLOCK *x,
                                      const vp10_variance_fn_ptr_t *fn_ptr,
                                      const MV *center_mv, MV *best_mv);
 
-typedef int (*vp10_diamond_search_fn_t)(const MACROBLOCK *x,
+typedef int (*vp10_diamond_search_fn_t)(MACROBLOCK *x,
                                         const search_site_config *cfg,
                                         MV *ref_mv, MV *best_mv,
                                         int search_param, int sad_per_bit,
index 52dcf351b59bedd97acc2739221fc10d575203b5..042bd0cbcb1a29dcca7f26a2c9d0f994f38d4fd2 100644 (file)
@@ -5237,6 +5237,8 @@ static int64_t rd_pick_best_sub8x8_mode(VP10_COMP *cpi, MACROBLOCK *x,
 
           vp10_set_mv_search_range(x, &bsi->ref_mv[0]->as_mv);
 
+          x->best_mv.as_int = x->second_best_mv.as_int = INVALID_MV;
+
 #if CONFIG_REF_MV
           vp10_set_mvcost(x, mbmi->ref_frame[0]);
 #endif
@@ -5248,6 +5250,10 @@ static int64_t rd_pick_best_sub8x8_mode(VP10_COMP *cpi, MACROBLOCK *x,
           if (bestsme < INT_MAX) {
             int distortion;
             if (cpi->sf.use_upsampled_references) {
+              int best_mv_var;
+              const int try_second =
+                  x->second_best_mv.as_int != INVALID_MV &&
+                  x->second_best_mv.as_int != x->best_mv.as_int;
               const int pw = 4 * num_4x4_blocks_wide_lookup[bsize];
               const int ph = 4 * num_4x4_blocks_high_lookup[bsize];
               // Use up-sampled reference frames.
@@ -5267,17 +5273,52 @@ static int64_t rd_pick_best_sub8x8_mode(VP10_COMP *cpi, MACROBLOCK *x,
                   &pd->pre[0].buf[(vp10_raster_block_offset(BLOCK_8X8, i,
                   pd->pre[0].stride)) << 3];
 
-              cpi->find_fractional_mv_step(
-                  x, &bsi->ref_mv[0]->as_mv,
-                  cm->allow_high_precision_mv,
-                  x->errorperbit, &cpi->fn_ptr[bsize],
-                  cpi->sf.mv.subpel_force_stop,
-                  cpi->sf.mv.subpel_iters_per_step,
-                  cond_cost_list(cpi, cost_list),
-                  x->nmvjointcost, x->mvcost,
-                  &distortion,
-                  &x->pred_sse[mbmi->ref_frame[0]],
-                  NULL, pw, ph, 1);
+              best_mv_var =
+                  cpi->find_fractional_mv_step(x, &bsi->ref_mv[0]->as_mv,
+                                               cm->allow_high_precision_mv,
+                                               x->errorperbit,
+                                               &cpi->fn_ptr[bsize],
+                                               cpi->sf.mv.subpel_force_stop,
+                                               cpi->sf.mv.subpel_iters_per_step,
+                                               cond_cost_list(cpi, cost_list),
+                                               x->nmvjointcost, x->mvcost,
+                                               &distortion,
+                                               &x->pred_sse[mbmi->ref_frame[0]],
+                                               NULL, pw, ph, 1);
+
+              if (try_second) {
+                int this_var;
+                MV best_mv = x->best_mv.as_mv;
+                const MV ref_mv = bsi->ref_mv[0]->as_mv;
+                const int minc = VPXMAX(x->mv_col_min * 8, ref_mv.col - MV_MAX);
+                const int maxc = VPXMIN(x->mv_col_max * 8, ref_mv.col + MV_MAX);
+                const int minr = VPXMAX(x->mv_row_min * 8, ref_mv.row - MV_MAX);
+                const int maxr = VPXMIN(x->mv_row_max * 8, ref_mv.row + MV_MAX);
+
+                x->best_mv = x->second_best_mv;
+                if (x->best_mv.as_mv.row * 8 <= maxr &&
+                    x->best_mv.as_mv.row * 8 >= minr &&
+                    x->best_mv.as_mv.col * 8 <= maxc &&
+                    x->best_mv.as_mv.col * 8 >= minc) {
+                  this_var =
+                      cpi->find_fractional_mv_step(x, &bsi->ref_mv[0]->as_mv,
+                                                   cm->allow_high_precision_mv,
+                                                   x->errorperbit,
+                                                   &cpi->fn_ptr[bsize],
+                                                   cpi->sf.mv.subpel_force_stop,
+                                                   cpi->
+                                                   sf.mv.subpel_iters_per_step,
+                                                   cond_cost_list(cpi,
+                                                                  cost_list),
+                                                   x->nmvjointcost, x->mvcost,
+                                                   &distortion,
+                                                   &x->pred_sse[mbmi->
+                                                                ref_frame[0]],
+                                                   NULL, pw, ph, 1);
+                  if (this_var < best_mv_var) best_mv = x->best_mv.as_mv;
+                  x->best_mv.as_mv = best_mv;
+                }
+              }
 
               // Restore the reference frames.
               pd->pre[0] = backup_pred;
@@ -5979,6 +6020,8 @@ static void single_motion_search(VP10_COMP *cpi, MACROBLOCK *x,
   mvp_full.col >>= 3;
   mvp_full.row >>= 3;
 
+  x->best_mv.as_int = x->second_best_mv.as_int = INVALID_MV;
+
   bestsme = vp10_full_pixel_search(cpi, x, bsize, &mvp_full, step_param, sadpb,
                                    cond_cost_list(cpi, cost_list),
                                    &ref_mv, INT_MAX, 1);
@@ -5991,6 +6034,10 @@ static void single_motion_search(VP10_COMP *cpi, MACROBLOCK *x,
   if (bestsme < INT_MAX) {
     int dis;  /* TODO: use dis in distortion calculation later. */
     if (cpi->sf.use_upsampled_references) {
+      int best_mv_var;
+      const int try_second =
+          x->second_best_mv.as_int != INVALID_MV &&
+          x->second_best_mv.as_int != x->best_mv.as_int;
       const int pw = 4 * num_4x4_blocks_wide_lookup[bsize];
       const int ph = 4 * num_4x4_blocks_high_lookup[bsize];
       // Use up-sampled reference frames.
@@ -6003,16 +6050,46 @@ static void single_motion_search(VP10_COMP *cpi, MACROBLOCK *x,
                        upsampled_ref->y_stride, (mi_row << 3), (mi_col << 3),
                        NULL, pd->subsampling_x, pd->subsampling_y);
 
-      bestsme = cpi->find_fractional_mv_step(x, &ref_mv,
-                                             cm->allow_high_precision_mv,
-                                             x->errorperbit,
-                                             &cpi->fn_ptr[bsize],
-                                             cpi->sf.mv.subpel_force_stop,
-                                             cpi->sf.mv.subpel_iters_per_step,
-                                             cond_cost_list(cpi, cost_list),
-                                             x->nmvjointcost, x->mvcost,
-                                             &dis, &x->pred_sse[ref], NULL,
-                                             pw, ph, 1);
+      best_mv_var =
+          cpi->find_fractional_mv_step(x, &ref_mv,
+                                       cm->allow_high_precision_mv,
+                                       x->errorperbit,
+                                       &cpi->fn_ptr[bsize],
+                                       cpi->sf.mv.subpel_force_stop,
+                                       cpi->sf.mv.subpel_iters_per_step,
+                                       cond_cost_list(cpi, cost_list),
+                                       x->nmvjointcost, x->mvcost,
+                                       &dis, &x->pred_sse[ref], NULL,
+                                       pw, ph, 1);
+
+      if (try_second) {
+        const int minc = VPXMAX(x->mv_col_min * 8, ref_mv.col - MV_MAX);
+        const int maxc = VPXMIN(x->mv_col_max * 8, ref_mv.col + MV_MAX);
+        const int minr = VPXMAX(x->mv_row_min * 8, ref_mv.row - MV_MAX);
+        const int maxr = VPXMIN(x->mv_row_max * 8, ref_mv.row + MV_MAX);
+        int this_var;
+        MV best_mv = x->best_mv.as_mv;
+
+        x->best_mv = x->second_best_mv;
+        if (x->best_mv.as_mv.row * 8 <= maxr &&
+            x->best_mv.as_mv.row * 8 >= minr &&
+            x->best_mv.as_mv.col * 8 <= maxc &&
+            x->best_mv.as_mv.col * 8 >= minc) {
+          this_var =
+              cpi->find_fractional_mv_step(x, &ref_mv,
+                                           cm->allow_high_precision_mv,
+                                           x->errorperbit,
+                                           &cpi->fn_ptr[bsize],
+                                           cpi->sf.mv.subpel_force_stop,
+                                           cpi->sf.mv.subpel_iters_per_step,
+                                           cond_cost_list(cpi, cost_list),
+                                           x->nmvjointcost, x->mvcost,
+                                           &dis, &x->pred_sse[ref], NULL,
+                                           pw, ph, 1);
+          if (this_var < best_mv_var) best_mv = x->best_mv.as_mv;
+          x->best_mv.as_mv = best_mv;
+        }
+      }
 
       // Restore the reference frames.
       pd->pre[ref_idx] = backup_pred;