joint bidirectional motion refinement (--bime)

author Loren Merritt <pengvado@videolan.org>

Fri, 30 Dec 2005 04:56:49 +0000 (04:56 +0000)

committer Loren Merritt <pengvado@videolan.org>

Fri, 30 Dec 2005 04:56:49 +0000 (04:56 +0000)
author Loren Merritt <pengvado@videolan.org>
Fri, 30 Dec 2005 04:56:49 +0000 (04:56 +0000)
committer Loren Merritt <pengvado@videolan.org>
Fri, 30 Dec 2005 04:56:49 +0000 (04:56 +0000)
diff --git a/common/common.c b/common/common.c

index 73878ebe7f43bf64f5431b9bcf235d3b7534a848..2f7b73fd2f456b13fda4c8df2f285b71b30d1ceb 100644 (file)
--- a/common/common.c
+++ b/common/common.c
@@ -462,9 +462,10 @@ char *x264_param2string( x264_param_t *p, int b_res )
      s += sprintf( s, " bframes=%d", p->i_bframe );
      if( p->i_bframe )
      {
-        s += sprintf( s, " b_pyramid=%d b_adapt=%d b_bias=%d direct=%d wpredb=%d",
+        s += sprintf( s, " b_pyramid=%d b_adapt=%d b_bias=%d direct=%d wpredb=%d bime=%d",
                        p->b_bframe_pyramid, p->b_bframe_adaptive, p->i_bframe_bias,
-                      p->analyse.i_direct_mv_pred, p->analyse.b_weighted_bipred );
+                      p->analyse.i_direct_mv_pred, p->analyse.b_weighted_bipred,
+                      p->analyse.b_bidir_me );
      }
  
      s += sprintf( s, " keyint=%d keyint_min=%d scenecut=%d",
diff --git a/encoder/analyse.c b/encoder/analyse.c

index c0ab8f133cccc800b116cc6b71e4cbd6f1a763a6..2381a2ed73ce78ea0337c4eeb40455fff951074c 100644 (file)
--- a/encoder/analyse.c
+++ b/encoder/analyse.c
@@ -1700,6 +1700,35 @@ static void x264_mb_analyse_inter_b8x16( x264_t *h, x264_mb_analysis_t *a )
      }
  }
  
+static void refine_bidir( x264_t *h, x264_mb_analysis_t *a )
+{
+    const int i_biweight = h->mb.bipred_weight[a->l0.i_ref][a->l1.i_ref];
+    int i;
+
+    switch( h->mb.i_partition )
+    {
+    case D_16x16:
+        if( h->mb.i_type == B_BI_BI )
+            x264_me_refine_bidir( h, &a->l0.me16x16, &a->l1.me16x16, i_biweight );
+        break;
+    case D_16x8:
+        for( i=0; i<2; i++ )
+            if( a->i_mb_partition16x8[i] == D_BI_8x8 )
+                x264_me_refine_bidir( h, &a->l0.me16x8[i], &a->l1.me16x8[i], i_biweight );
+        break;
+    case D_8x16:
+        for( i=0; i<2; i++ )
+            if( a->i_mb_partition8x16[i] == D_BI_8x8 )
+                x264_me_refine_bidir( h, &a->l0.me8x16[i], &a->l1.me8x16[i], i_biweight );
+        break;
+    case D_8x8:
+        for( i=0; i<4; i++ )
+            if( h->mb.i_sub_partition[i] == D_BI_8x8 )
+                x264_me_refine_bidir( h, &a->l0.me8x8[i], &a->l1.me8x8[i], i_biweight );
+        break;
+    }
+}
+
  static inline void x264_mb_analyse_transform( x264_t *h )
  {
      h->mb.cache.b_transform_8x8_allowed =
@@ -2230,6 +2259,9 @@ void x264_macroblock_analyse( x264_t *h )
              }
  
              h->mb.i_type = i_type;
+
+            if( h->param.analyse.b_bidir_me )
+                refine_bidir( h, &analysis );
          }
      }
  
diff --git a/encoder/me.c b/encoder/me.c

index f976d067e1684d0187b87b752455ae0908404fd7..26ec3f476b22bbbfa011deef4f7ecc5a07516e48 100644 (file)
--- a/encoder/me.c
+++ b/encoder/me.c
@@ -451,3 +451,112 @@ static void refine_subpel( x264_t *h, x264_me_t *m, int hpel_iters, int qpel_ite
      m->cost_mv = p_cost_mvx[ bmx ] + p_cost_mvy[ bmy ];
  }
  
+#define BIME_CACHE( dx, dy ) \
+{ \
+    int i = 4 + 3*dx + dy; \
+    h->mc.mc_luma( m0->p_fref, m0->i_stride[0], pix0[i], bw, om0x+dx, om0y+dy, bw, bh ); \
+    h->mc.mc_luma( m1->p_fref, m1->i_stride[0], pix1[i], bw, om1x+dx, om1y+dy, bw, bh ); \
+}
+
+#define BIME_CACHE2(a,b) \
+    BIME_CACHE( a, b) \
+    BIME_CACHE(-a,-b)
+
+#define COST_BIMV_SATD( m0x, m0y, m1x, m1y ) \
+if( pass == 0 || !visited[(m0x)&7][(m0y)&7][(m1x)&7][(m1y)&7] ) \
+{ \
+    int cost; \
+    int i0 = 4 + 3*(m0x-om0x) + (m0y-om0y); \
+    int i1 = 4 + 3*(m1x-om1x) + (m1y-om1y); \
+    visited[(m0x)&7][(m0y)&7][(m1x)&7][(m1y)&7] = 1; \
+    memcpy( pix, pix0[i0], bs ); \
+    if( i_weight == 32 ) \
+        h->mc.avg[i_pixel]( pix, bw, pix1[i1], bw ); \
+    else \
+        h->mc.avg_weight[i_pixel]( pix, bw, pix1[i1], bw, i_weight ); \
+    cost = h->pixf.mbcmp[i_pixel]( m0->p_fenc[0], m0->i_stride[0], pix, bw ) \
+         + p_cost_m0x[ m0x ] + p_cost_m0y[ m0y ] \
+         + p_cost_m1x[ m1x ] + p_cost_m1y[ m1y ]; \
+    if( cost < bcost ) \
+    {                  \
+        bcost = cost;  \
+        bm0x = m0x;    \
+        bm0y = m0y;    \
+        bm1x = m1x;    \
+        bm1y = m1y;    \
+    } \
+}
+
+#define CHECK_BIDIR(a,b,c,d) \
+    COST_BIMV_SATD(om0x+a, om0y+b, om1x+c, om1y+d)
+
+#define CHECK_BIDIR2(a,b,c,d) \
+    CHECK_BIDIR( a ,b, c, d) \
+    CHECK_BIDIR(-a,-b,-c,-d)
+
+#define CHECK_BIDIR8(a,b,c,d) \
+    CHECK_BIDIR2(a,b,c,d) \
+    CHECK_BIDIR2(b,c,d,a) \
+    CHECK_BIDIR2(c,d,a,b) \
+    CHECK_BIDIR2(d,a,b,c)
+
+int x264_me_refine_bidir( x264_t *h, x264_me_t *m0, x264_me_t *m1, int i_weight )
+{
+    const int i_pixel = m0->i_pixel;
+    const int bw = x264_pixel_size[i_pixel].w;
+    const int bh = x264_pixel_size[i_pixel].h;
+    const int bs = bw*bh;
+    const int16_t *p_cost_m0x = m0->p_cost_mv - x264_clip3( m0->mvp[0], h->mb.mv_min[0], h->mb.mv_max[0] );
+    const int16_t *p_cost_m0y = m0->p_cost_mv - x264_clip3( m0->mvp[1], h->mb.mv_min[0], h->mb.mv_max[0] );
+    const int16_t *p_cost_m1x = m1->p_cost_mv - x264_clip3( m1->mvp[0], h->mb.mv_min[0], h->mb.mv_max[0] );
+    const int16_t *p_cost_m1y = m1->p_cost_mv - x264_clip3( m1->mvp[1], h->mb.mv_min[0], h->mb.mv_max[0] );
+    DECLARE_ALIGNED( uint8_t, pix0[9][16*16], 16 );
+    DECLARE_ALIGNED( uint8_t, pix1[9][16*16], 16 );
+    DECLARE_ALIGNED( uint8_t, pix[16*16], 16 );
+    int bm0x = m0->mv[0], om0x = bm0x;
+    int bm0y = m0->mv[1], om0y = bm0y;
+    int bm1x = m1->mv[0], om1x = bm1x;
+    int bm1y = m1->mv[1], om1y = bm1y;
+    int bcost = COST_MAX;
+    int pass = 0;
+    uint8_t visited[8][8][8][8];
+    memset( visited, 0, sizeof(visited) );
+
+    BIME_CACHE( 0, 0 );
+    CHECK_BIDIR( 0, 0, 0, 0 );
+
+    for( pass = 0; pass < 8; pass++ )
+    {
+        /* check all mv pairs that differ in at most 2 components from the current mvs. */
+        /* doesn't do chroma ME. this probably doesn't matter, as the gains
+         * from bidir ME are the same with and without chroma ME. */
+
+        BIME_CACHE2( 1, 0 );
+        BIME_CACHE2( 0, 1 );
+        BIME_CACHE2( 1, 1 );
+        BIME_CACHE2( 1,-1 );
+
+        CHECK_BIDIR8( 0, 0, 0, 1 );
+        CHECK_BIDIR8( 0, 0, 1, 1 );
+        CHECK_BIDIR2( 0, 1, 0, 1 );
+        CHECK_BIDIR2( 1, 0, 1, 0 );
+        CHECK_BIDIR8( 0, 0,-1, 1 );
+        CHECK_BIDIR2( 0,-1, 0, 1 );
+        CHECK_BIDIR2(-1, 0, 1, 0 );
+
+        if( om0x == bm0x && om0y == bm0y && om1x == bm1x && om1y == bm1y )
+            break;
+
+        om0x = bm0x;
+        om0y = bm0y;
+        om1x = bm1x;
+        om1y = bm1y;
+        BIME_CACHE( 0, 0 );
+    }
+
+    m0->mv[0] = bm0x;
+    m0->mv[1] = bm0y;
+    m1->mv[0] = bm1x;
+    m1->mv[1] = bm1y;
+    return bcost;
+}
diff --git a/encoder/me.h b/encoder/me.h

index 18ad9078a23ab18bc79919163f0b948b848322a0..03678c827d3d6b0004541e461755ccacc32fa65d 100644 (file)
--- a/encoder/me.h
+++ b/encoder/me.h
@@ -52,5 +52,6 @@ static inline void x264_me_search( x264_t *h, x264_me_t *m, int (*mvc)[2], int i
      { x264_me_search_ref( h, m, mvc, i_mvc, NULL ); }
  
  void x264_me_refine_qpel( x264_t *h, x264_me_t *m );
+int x264_me_refine_bidir( x264_t *h, x264_me_t *m0, x264_me_t *m1, int i_weight );
  
  #endif
diff --git a/x264.c b/x264.c

index 58898a4f9ded61391f14acc62a850e047e3fc45d..0bc496af66dd45fb62cf38a02d5bcef8426405a6 100644 (file)
--- a/x264.c
+++ b/x264.c
@@ -256,6 +256,7 @@ static void Help( x264_param_t *defaults )
               "      --b-rdo                 RD based mode decision for B-frames. Requires subme 6.\n"
               "      --mixed-refs            Decide references on a per partition basis\n"
               "      --no-chroma-me          Ignore chroma in motion estimation\n"
+             "      --bime                  Jointly optimize both MVs in B-frames\n"
               "  -8, --8x8dct                Adaptive spatial transform size\n"
               "  -t, --trellis <integer>     Trellis RD quantization. Requires CABAC. [%d]\n"
               "                                  - 0: disabled\n"
@@ -480,6 +481,7 @@ static int  Parse( int argc, char **argv,
  #define OPT_CRF 315
  #define OPT_B_RDO 316
  #define OPT_NO_FAST_PSKIP 317
+#define OPT_BIME 318
  
          static struct option long_options[] =
          {
@@ -513,9 +515,10 @@ static int  Parse( int argc, char **argv,
              { "me",      required_argument, NULL, OPT_ME },
              { "merange", required_argument, NULL, OPT_MERANGE },
              { "subme",   required_argument, NULL, 'm' },
-            { "b-rdo", no_argument,    NULL, OPT_B_RDO },
+            { "b-rdo",   no_argument,       NULL, OPT_B_RDO },
              { "mixed-refs", no_argument,    NULL, OPT_MIXED_REFS },
              { "no-chroma-me", no_argument,  NULL, OPT_NO_CHROMA_ME },
+            { "bime",    no_argument,       NULL, OPT_BIME },
              { "8x8dct",  no_argument,       NULL, '8' },
              { "trellis", required_argument, NULL, 't' },
              { "no-fast-pskip", no_argument, NULL, OPT_NO_FAST_PSKIP },
@@ -745,6 +748,9 @@ static int  Parse( int argc, char **argv,
              case OPT_NO_CHROMA_ME:
                  param->analyse.b_chroma_me = 0;
                  break;
+            case OPT_BIME:
+                param->analyse.b_bidir_me = 1;
+                break;
              case '8':
                  param->analyse.b_transform_8x8 = 1;
                  break;
diff --git a/x264.h b/x264.h

index 1de65f7f0610e3745a70b36bc630fcc57fa465b7..993b8e3aef39df9e8d87f72c216d1779c2623ed0 100644 (file)
--- a/x264.h
+++ b/x264.h
@@ -35,7 +35,7 @@
  
  #include <stdarg.h>
  
-#define X264_BUILD 42
+#define X264_BUILD 43
  
  /* x264_t:
   *      opaque handler for decoder and encoder */
@@ -196,6 +196,7 @@ typedef struct
          int          i_me_range; /* integer pixel motion estimation search range (from predicted mv) */
          int          i_mv_range; /* maximum length of a mv (in pixels) */
          int          i_subpel_refine; /* subpixel motion estimation quality */
+        int          b_bidir_me; /* jointly optimize both MVs in B-frames */
          int          b_chroma_me; /* chroma ME for subpel and mode decision in P-frames */
          int          b_bframe_rdo; /* RD based mode decision for B-frames */
          int          b_mixed_references; /* allow each mb partition in P-frames to have it's own reference number */
author	Loren Merritt <pengvado@videolan.org>
	Fri, 30 Dec 2005 04:56:49 +0000 (04:56 +0000)
committer	Loren Merritt <pengvado@videolan.org>
	Fri, 30 Dec 2005 04:56:49 +0000 (04:56 +0000)
common/common.c		patch \| blob \| history
encoder/analyse.c		patch \| blob \| history
encoder/me.c		patch \| blob \| history
encoder/me.h		patch \| blob \| history
x264.c		patch \| blob \| history
x264.h		patch \| blob \| history