From 33cc1bd21dd56ad72241830bab760c558ecc3870 Mon Sep 17 00:00:00 2001
From: Jingning Han <jingning@google.com>
Date: Tue, 12 Jan 2016 15:06:59 -0800
Subject: [PATCH] Generate compound reference motion vector

This commit allows the codec to add motion vector pairs into
the candidate list. It further improves the compression performance
by 0.1% across derf, hevcmr, stdhd, and hevchr sets without adding
encode/decode time.

Change-Id: I88d36da25a2a89bb506d411844af667081eba98b
---
 vp10/common/blockd.h       |  6 ++-
 vp10/common/mvref_common.c | 30 ++++++------
 vp10/common/mvref_common.h | 10 ++++
 vp10/decoder/decodemv.c    | 30 ++++++++++--
 vp10/encoder/block.h       |  8 ++--
 vp10/encoder/encoder.c     |  1 +
 vp10/encoder/rdopt.c       | 94 ++++++++++++++++++++++++++++++++++++++
 7 files changed, 153 insertions(+), 26 deletions(-)

diff --git a/vp10/common/blockd.h b/vp10/common/blockd.h
index 7b951486b..de54141b1 100644
--- a/vp10/common/blockd.h
+++ b/vp10/common/blockd.h
@@ -85,6 +85,8 @@ typedef int8_t MV_REFERENCE_FRAME;
 
 #if CONFIG_REF_MV
 #define MODE_CTX_REF_FRAMES (MAX_REF_FRAMES + (ALTREF_FRAME - LAST_FRAME))
+#else
+#define MODE_CTX_REF_FRAMES MAX_REF_FRAMES
 #endif
 
 typedef struct {
@@ -263,8 +265,8 @@ typedef struct macroblockd {
   uint8_t n8_w, n8_h;
 
 #if CONFIG_REF_MV
-  uint8_t ref_mv_count[MAX_REF_FRAMES];
-  CANDIDATE_MV ref_mv_stack[MAX_REF_FRAMES][MAX_REF_MV_STACK_SIZE];
+  uint8_t ref_mv_count[MODE_CTX_REF_FRAMES];
+  CANDIDATE_MV ref_mv_stack[MODE_CTX_REF_FRAMES][MAX_REF_MV_STACK_SIZE];
   uint8_t is_sec_rect;
 #endif
 
diff --git a/vp10/common/mvref_common.c b/vp10/common/mvref_common.c
index cd1ea38b6..e6db8187a 100644
--- a/vp10/common/mvref_common.c
+++ b/vp10/common/mvref_common.c
@@ -313,7 +313,8 @@ static void setup_ref_mv_list(const VP10_COMMON *cm, const MACROBLOCKD *xd,
 
   nearest_refmv_count = *refmv_count;
 
-  if (prev_frame_mvs_base && cm->show_frame && cm->last_show_frame) {
+  if (prev_frame_mvs_base && cm->show_frame && cm->last_show_frame
+      && rf[1] == NONE) {
     int ref;
     int blk_row, blk_col;
 
@@ -460,10 +461,19 @@ static void setup_ref_mv_list(const VP10_COMMON *cm, const MACROBLOCKD *xd,
     }
   }
 
-  for (idx = 0; idx < VPXMIN(MAX_MV_REF_CANDIDATES, *refmv_count); ++idx) {
-    mv_ref_list[idx].as_int = ref_mv_stack[idx].this_mv.as_int;
-    clamp_mv_ref(&mv_ref_list[idx].as_mv,
-                 xd->n8_w << 3, xd->n8_h << 3, xd);
+  if (rf[1] > NONE) {
+    for (idx = 0; idx < *refmv_count; ++idx) {
+      clamp_mv_ref(&ref_mv_stack[idx].this_mv.as_mv,
+                   xd->n8_w << 3 , xd->n8_h << 3, xd);
+      clamp_mv_ref(&ref_mv_stack[idx].comp_mv.as_mv,
+                   xd->n8_w << 3 , xd->n8_h << 3, xd);
+    }
+  } else {
+    for (idx = 0; idx < VPXMIN(MAX_MV_REF_CANDIDATES, *refmv_count); ++idx) {
+      mv_ref_list[idx].as_int = ref_mv_stack[idx].this_mv.as_int;
+      clamp_mv_ref(&mv_ref_list[idx].as_mv,
+                   xd->n8_w << 3, xd->n8_h << 3, xd);
+    }
   }
 }
 #endif
@@ -632,16 +642,6 @@ void vp10_find_mv_refs(const VP10_COMMON *cm, const MACROBLOCKD *xd,
 #endif
 }
 
-static void lower_mv_precision(MV *mv, int allow_hp) {
-  const int use_hp = allow_hp && vp10_use_mv_hp(mv);
-  if (!use_hp) {
-    if (mv->row & 1)
-      mv->row += (mv->row > 0 ? -1 : 1);
-    if (mv->col & 1)
-      mv->col += (mv->col > 0 ? -1 : 1);
-  }
-}
-
 void vp10_find_best_ref_mvs(int allow_hp,
                            int_mv *mvlist, int_mv *nearest_mv,
                            int_mv *near_mv) {
diff --git a/vp10/common/mvref_common.h b/vp10/common/mvref_common.h
index 1f388c19e..24bde6cbc 100644
--- a/vp10/common/mvref_common.h
+++ b/vp10/common/mvref_common.h
@@ -195,6 +195,16 @@ static INLINE int is_inside(const TileInfo *const tile,
            mi_col + mi_pos->col >= tile->mi_col_end);
 }
 
+static INLINE void lower_mv_precision(MV *mv, int allow_hp) {
+  const int use_hp = allow_hp && vp10_use_mv_hp(mv);
+  if (!use_hp) {
+    if (mv->row & 1)
+      mv->row += (mv->row > 0 ? -1 : 1);
+    if (mv->col & 1)
+      mv->col += (mv->col > 0 ? -1 : 1);
+  }
+}
+
 #if CONFIG_REF_MV
 static int8_t vp10_ref_frame_type(const MV_REFERENCE_FRAME *const rf) {
   if (rf[1] > INTRA_FRAME)
diff --git a/vp10/decoder/decodemv.c b/vp10/decoder/decodemv.c
index f4386e488..997c19e67 100644
--- a/vp10/decoder/decodemv.c
+++ b/vp10/decoder/decodemv.c
@@ -824,9 +824,9 @@ static void read_inter_block_mode_info(VP10Decoder *const pbi,
   const BLOCK_SIZE bsize = mbmi->sb_type;
   const int allow_hp = cm->allow_high_precision_mv;
   int_mv nearestmv[2], nearmv[2];
-  int_mv ref_mvs[MAX_REF_FRAMES][MAX_MV_REF_CANDIDATES];
+  int_mv ref_mvs[MODE_CTX_REF_FRAMES][MAX_MV_REF_CANDIDATES];
   int ref, is_compound;
-  int16_t inter_mode_ctx[MAX_REF_FRAMES];
+  int16_t inter_mode_ctx[MODE_CTX_REF_FRAMES];
   int16_t mode_ctx = 0;
   MV_REFERENCE_FRAME ref_frame;
 
@@ -845,7 +845,7 @@ static void read_inter_block_mode_info(VP10Decoder *const pbi,
                          &ref_buf->sf);
   }
 
-  for (ref_frame = LAST_FRAME; ref_frame < MAX_REF_FRAMES; ++ref_frame) {
+  for (ref_frame = LAST_FRAME; ref_frame < MODE_CTX_REF_FRAMES; ++ref_frame) {
     vp10_find_mv_refs(cm, xd, mi, ref_frame,
 #if CONFIG_REF_MV
                       &xd->ref_mv_count[ref_frame],
@@ -855,11 +855,11 @@ static void read_inter_block_mode_info(VP10Decoder *const pbi,
                       mi_row, mi_col, fpm_sync, (void *)pbi, inter_mode_ctx);
   }
 
-  mode_ctx = inter_mode_ctx[mbmi->ref_frame[0]];
-
 #if CONFIG_REF_MV
   mode_ctx = vp10_mode_context_analyzer(inter_mode_ctx,
                                         mbmi->ref_frame, bsize, -1);
+#else
+  mode_ctx = inter_mode_ctx[mbmi->ref_frame[0]];
 #endif
 
   if (segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP)) {
@@ -881,6 +881,26 @@ static void read_inter_block_mode_info(VP10Decoder *const pbi,
     }
   }
 
+#if CONFIG_REF_MV
+  if (is_compound && bsize >= BLOCK_8X8 && mbmi->mode != NEWMV &&
+      mbmi->mode != ZEROMV) {
+    uint8_t ref_frame_type = vp10_ref_frame_type(mbmi->ref_frame);
+
+    if (xd->ref_mv_count[ref_frame_type] > 1) {
+      int i;
+      nearestmv[0] = xd->ref_mv_stack[ref_frame_type][0].this_mv;
+      nearestmv[1] = xd->ref_mv_stack[ref_frame_type][0].comp_mv;
+      nearmv[0] = xd->ref_mv_stack[ref_frame_type][1].this_mv;
+      nearmv[1] = xd->ref_mv_stack[ref_frame_type][1].comp_mv;
+
+      for (i = 0; i < MAX_MV_REF_CANDIDATES; ++i) {
+        lower_mv_precision(&nearestmv[i].as_mv, allow_hp);
+        lower_mv_precision(&nearmv[i].as_mv, allow_hp);
+      }
+    }
+  }
+#endif
+
 #if !CONFIG_EXT_INTERP
   mbmi->interp_filter = (cm->interp_filter == SWITCHABLE)
                         ? read_switchable_interp_filter(cm, xd, r)
diff --git a/vp10/encoder/block.h b/vp10/encoder/block.h
index 1383c19e2..597f0d7d6 100644
--- a/vp10/encoder/block.h
+++ b/vp10/encoder/block.h
@@ -51,11 +51,11 @@ typedef unsigned int vp10_coeff_cost[PLANE_TYPES][REF_TYPES][COEF_BANDS][2]
                                    [COEFF_CONTEXTS][ENTROPY_TOKENS];
 
 typedef struct {
-  int_mv ref_mvs[MAX_REF_FRAMES][MAX_MV_REF_CANDIDATES];
-  int16_t mode_context[MAX_REF_FRAMES];
+  int_mv ref_mvs[MODE_CTX_REF_FRAMES][MAX_MV_REF_CANDIDATES];
+  int16_t mode_context[MODE_CTX_REF_FRAMES];
 #if CONFIG_REF_MV
-  uint8_t ref_mv_count[MAX_REF_FRAMES];
-  CANDIDATE_MV ref_mv_stack[MAX_REF_FRAMES][MAX_REF_MV_STACK_SIZE];
+  uint8_t ref_mv_count[MODE_CTX_REF_FRAMES];
+  CANDIDATE_MV ref_mv_stack[MODE_CTX_REF_FRAMES][MAX_REF_MV_STACK_SIZE];
 #endif
 } MB_MODE_INFO_EXT;
 
diff --git a/vp10/encoder/encoder.c b/vp10/encoder/encoder.c
index 6d0fd197f..a3982e3ce 100644
--- a/vp10/encoder/encoder.c
+++ b/vp10/encoder/encoder.c
@@ -3340,6 +3340,7 @@ static void encode_with_recode_loop(VP10_COMP *cpi,
     // update_base_skip_probs(cpi);
 
     vpx_clear_system_state();
+
     // Dummy pack of the bitstream using up to date stats to get an
     // accurate estimate of output frame size to determine if we need
     // to recode.
diff --git a/vp10/encoder/rdopt.c b/vp10/encoder/rdopt.c
index d457199cc..f98382595 100644
--- a/vp10/encoder/rdopt.c
+++ b/vp10/encoder/rdopt.c
@@ -4461,6 +4461,40 @@ static int64_t handle_inter_mode(VP10_COMP *cpi, MACROBLOCK *x,
     mbmi->mv[i].as_int = cur_mv[i].as_int;
   }
 
+#if CONFIG_REF_MV
+  if (this_mode == NEARESTMV && is_comp_pred) {
+    uint8_t ref_frame_type = vp10_ref_frame_type(mbmi->ref_frame);
+    if (mbmi_ext->ref_mv_count[ref_frame_type] > 1) {
+      cur_mv[0] = mbmi_ext->ref_mv_stack[ref_frame_type][0].this_mv;
+      cur_mv[1] = mbmi_ext->ref_mv_stack[ref_frame_type][0].comp_mv;
+
+      for (i = 0; i < 2; ++i) {
+        lower_mv_precision(&cur_mv[i].as_mv, cm->allow_high_precision_mv);
+        clamp_mv2(&cur_mv[i].as_mv, xd);
+        if (mv_check_bounds(x, &cur_mv[i].as_mv))
+          return INT64_MAX;
+        mbmi->mv[i].as_int = cur_mv[i].as_int;
+      }
+    }
+  }
+
+  if (this_mode == NEARMV && is_comp_pred) {
+    uint8_t ref_frame_type = vp10_ref_frame_type(mbmi->ref_frame);
+    if (mbmi_ext->ref_mv_count[ref_frame_type] > 1) {
+      cur_mv[0] = mbmi_ext->ref_mv_stack[ref_frame_type][1].this_mv;
+      cur_mv[1] = mbmi_ext->ref_mv_stack[ref_frame_type][1].comp_mv;
+
+      for (i = 0; i < 2; ++i) {
+        lower_mv_precision(&cur_mv[i].as_mv, cm->allow_high_precision_mv);
+        clamp_mv2(&cur_mv[i].as_mv, xd);
+        if (mv_check_bounds(x, &cur_mv[i].as_mv))
+          return INT64_MAX;
+        mbmi->mv[i].as_int = cur_mv[i].as_int;
+      }
+    }
+  }
+#endif
+
   // do first prediction into the destination buffer. Do the next
   // prediction into a temporary buffer. Then keep track of which one
   // of these currently holds the best predictor, and use the other
@@ -5035,6 +5069,21 @@ void vp10_rd_pick_inter_mode_sb(VP10_COMP *cpi,
     frame_mv[ZEROMV][ref_frame].as_int = 0;
   }
 
+#if CONFIG_REF_MV
+  for (; ref_frame < MODE_CTX_REF_FRAMES; ++ref_frame) {
+    MODE_INFO *const mi = xd->mi[0];
+    int_mv *const candidates = x->mbmi_ext->ref_mvs[ref_frame];
+    x->mbmi_ext->mode_context[ref_frame] = 0;
+    vp10_find_mv_refs(cm, xd, mi, ref_frame,
+#if CONFIG_REF_MV
+                      &mbmi_ext->ref_mv_count[ref_frame],
+                      mbmi_ext->ref_mv_stack[ref_frame],
+#endif
+                      candidates, mi_row, mi_col,
+                      NULL, NULL, mbmi_ext->mode_context);
+  }
+#endif
+
   for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
     if (!(cpi->ref_frame_flags & flag_list[ref_frame])) {
       // Skip checking missing references in both single and compound reference
@@ -5625,7 +5674,51 @@ void vp10_rd_pick_inter_mode_sb(VP10_COMP *cpi,
     const MV_REFERENCE_FRAME refs[2] = {best_mbmode.ref_frame[0],
         best_mbmode.ref_frame[1]};
     int comp_pred_mode = refs[1] > INTRA_FRAME;
+#if CONFIG_REF_MV
+    if (!comp_pred_mode) {
+      if (frame_mv[NEARESTMV][refs[0]].as_int == best_mbmode.mv[0].as_int)
+        best_mbmode.mode = NEARESTMV;
+      else if (frame_mv[NEARMV][refs[0]].as_int == best_mbmode.mv[0].as_int)
+        best_mbmode.mode = NEARMV;
+      else if (best_mbmode.mv[0].as_int == 0)
+        best_mbmode.mode = ZEROMV;
+    } else {
+      uint8_t rf_type = vp10_ref_frame_type(best_mbmode.ref_frame);
+      if (mbmi_ext->ref_mv_count[rf_type] > 1) {
+        int i;
+        int_mv nearestmv[2], nearmv[2];
+        const int allow_hp = cm->allow_high_precision_mv;
+
+        nearestmv[0] = mbmi_ext->ref_mv_stack[rf_type][0].this_mv;
+        nearestmv[1] = mbmi_ext->ref_mv_stack[rf_type][0].comp_mv;
+        nearmv[0] = mbmi_ext->ref_mv_stack[rf_type][1].this_mv;
+        nearmv[1] = mbmi_ext->ref_mv_stack[rf_type][1].comp_mv;
+
+        for (i = 0; i < MAX_MV_REF_CANDIDATES; ++i) {
+          lower_mv_precision(&nearestmv[i].as_mv, allow_hp);
+          lower_mv_precision(&nearmv[i].as_mv, allow_hp);
+        }
 
+        if (nearestmv[0].as_int == best_mbmode.mv[0].as_int &&
+            nearestmv[1].as_int == best_mbmode.mv[1].as_int)
+          best_mbmode.mode = NEARESTMV;
+        else if (nearmv[0].as_int == best_mbmode.mv[0].as_int &&
+            nearmv[1].as_int == best_mbmode.mv[1].as_int)
+          best_mbmode.mode = NEARMV;
+        else if (best_mbmode.mv[0].as_int == 0 && best_mbmode.mv[1].as_int == 0)
+          best_mbmode.mode = ZEROMV;
+      } else {
+        if (frame_mv[NEARESTMV][refs[0]].as_int == best_mbmode.mv[0].as_int &&
+            (frame_mv[NEARESTMV][refs[1]].as_int == best_mbmode.mv[1].as_int))
+          best_mbmode.mode = NEARESTMV;
+        else if (frame_mv[NEARMV][refs[0]].as_int == best_mbmode.mv[0].as_int &&
+            (frame_mv[NEARMV][refs[1]].as_int == best_mbmode.mv[1].as_int))
+          best_mbmode.mode = NEARMV;
+        else if (best_mbmode.mv[0].as_int == 0 && best_mbmode.mv[1].as_int == 0)
+          best_mbmode.mode = ZEROMV;
+      }
+    }
+#else
     if (frame_mv[NEARESTMV][refs[0]].as_int == best_mbmode.mv[0].as_int &&
         ((comp_pred_mode && frame_mv[NEARESTMV][refs[1]].as_int ==
             best_mbmode.mv[1].as_int) || !comp_pred_mode))
@@ -5637,6 +5730,7 @@ void vp10_rd_pick_inter_mode_sb(VP10_COMP *cpi,
     else if (best_mbmode.mv[0].as_int == 0 &&
         ((comp_pred_mode && best_mbmode.mv[1].as_int == 0) || !comp_pred_mode))
       best_mbmode.mode = ZEROMV;
+#endif
   }
 
 #if CONFIG_REF_MV
-- 
2.40.0