RD mode decision for B-frames (--b-rdo)

author Loren Merritt <pengvado@videolan.org>

Fri, 18 Nov 2005 11:20:07 +0000 (11:20 +0000)

committer Loren Merritt <pengvado@videolan.org>

Fri, 18 Nov 2005 11:20:07 +0000 (11:20 +0000)
author Loren Merritt <pengvado@videolan.org>
Fri, 18 Nov 2005 11:20:07 +0000 (11:20 +0000)
committer Loren Merritt <pengvado@videolan.org>
Fri, 18 Nov 2005 11:20:07 +0000 (11:20 +0000)
diff --git a/encoder/analyse.c b/encoder/analyse.c

index 493228dc93714f9c7382e3c4a3a65d651ee36ca9..2fa528548e2d6cd804e11461e26f514b55449dad 100644 (file)
--- a/encoder/analyse.c
+++ b/encoder/analyse.c
@@ -188,7 +188,8 @@ static void x264_mb_analyse_init( x264_t *h, x264_mb_analysis_t *a, int i_qp )
      a->i_qp = h->mb.i_qp = i_qp;
      a->i_lambda = i_qp0_cost_table[i_qp];
      a->i_lambda2 = i_qp0_cost2_table[i_qp];
-    a->b_mbrd = h->param.analyse.i_subpel_refine >= 6 && h->sh.i_type != SLICE_TYPE_B;
+    a->b_mbrd = h->param.analyse.i_subpel_refine >= 6 &&
+                ( h->sh.i_type != SLICE_TYPE_B || h->param.analyse.b_bframe_rdo );
  
      h->mb.i_me_method = h->param.analyse.i_me_method;
      h->mb.i_subpel_refine = h->param.analyse.i_subpel_refine;
@@ -460,8 +461,12 @@ static void x264_mb_analyse_intra( x264_t *h, x264_mb_analysis_t *a, int i_cost_
      int i, idx;
      int i_max;
      int predict_mode[9];
+    int i_satd_thresh;
  
-    const int i_satd_thresh = a->i_best_satd * 5/4 + a->i_lambda * 10;
+    if( h->sh.i_type == SLICE_TYPE_B )
+        i_satd_thresh = a->i_best_satd * 9/8;
+    else
+        i_satd_thresh = a->i_best_satd * 5/4 + a->i_lambda * 10;
  
      /*---------------- Try all mode and calculate their score ---------------*/
  
@@ -1143,8 +1148,16 @@ static void x264_mb_analyse_inter_direct( x264_t *h, x264_mb_analysis_t *a )
          /* mb type cost */
          a->i_cost8x8direct[i] += a->i_lambda * i_sub_mb_b_cost_table[D_DIRECT_8x8];
      }
-
      a->i_cost16x16direct += a->i_lambda * i_mb_b_cost_table[B_DIRECT];
+
+    if( a->b_mbrd )
+    {
+        if( a->i_cost16x16direct < a->i_best_satd )
+            a->i_best_satd = a->i_cost16x16direct;
+
+        h->mb.i_type = B_DIRECT;
+        a->i_cost16x16direct = x264_rd_cost_mb( h, a->i_lambda2 );
+    }
  }
  
  #define WEIGHTED_AVG( size, pix1, stride1, src2, stride2 ) \
@@ -1278,6 +1291,31 @@ static void x264_mb_analyse_inter_b16x16( x264_t *h, x264_mb_analysis_t *a )
      a->i_cost16x16bi   += a->i_lambda * i_mb_b_cost_table[B_BI_BI];
      a->l0.me16x16.cost += a->i_lambda * i_mb_b_cost_table[B_L0_L0];
      a->l1.me16x16.cost += a->i_lambda * i_mb_b_cost_table[B_L1_L1];
+
+    if( a->b_mbrd )
+    {
+        if( a->l0.me16x16.cost < a->i_best_satd )
+            a->i_best_satd = a->l0.me16x16.cost;
+        if( a->l1.me16x16.cost < a->i_best_satd )
+            a->i_best_satd = a->l1.me16x16.cost;
+        if( a->i_cost16x16bi < a->i_best_satd )
+            a->i_best_satd = a->i_cost16x16bi;
+
+        h->mb.i_partition = D_16x16;
+        /* L0 */
+        h->mb.i_type = B_L0_L0;
+        x264_macroblock_cache_mv( h, 0, 0, 4, 4, 0, a->l0.me16x16.mv[0], a->l0.me16x16.mv[1] );
+        a->l0.me16x16.cost = x264_rd_cost_mb( h, a->i_lambda2 );
+
+        /* L1 */
+        h->mb.i_type = B_L1_L1;
+        x264_macroblock_cache_mv( h, 0, 0, 4, 4, 1, a->l1.me16x16.mv[0], a->l1.me16x16.mv[1] );
+        a->l1.me16x16.cost = x264_rd_cost_mb( h, a->i_lambda2 );
+
+        /* BI */
+        h->mb.i_type = B_BI_BI;
+        a->i_cost16x16bi = x264_rd_cost_mb( h, a->i_lambda2 );
+    }
  }
  
  static inline void x264_mb_cache_mv_p8x8( x264_t *h, x264_mb_analysis_t *a, int i )
@@ -1439,6 +1477,16 @@ static void x264_mb_analyse_inter_b8x8( x264_t *h, x264_mb_analysis_t *a )
  
      /* mb type cost */
      a->i_cost8x8bi += a->i_lambda * i_mb_b_cost_table[B_8x8];
+
+    if( a->b_mbrd )
+    {
+        if( a->i_cost8x8bi < a->i_best_satd )
+            a->i_best_satd = a->i_cost8x8bi;
+
+        h->mb.i_type = B_8x8;
+        h->mb.i_partition = D_8x8;
+        a->i_cost8x8bi = x264_rd_cost_mb( h, a->i_lambda2 );
+    }
  }
  
  static void x264_mb_analyse_inter_b16x8( x264_t *h, x264_mb_analysis_t *a )
@@ -1502,8 +1550,7 @@ static void x264_mb_analyse_inter_b16x8( x264_t *h, x264_mb_analysis_t *a )
          }
          a->i_cost16x8bi += i_part_cost;
  
-        if( i == 0 )
-            x264_mb_cache_mv_b16x8( h, a, i, 0 );
+        x264_mb_cache_mv_b16x8( h, a, i, 0 );
      }
  
      /* mb type cost */
@@ -1511,6 +1558,16 @@ static void x264_mb_analyse_inter_b16x8( x264_t *h, x264_mb_analysis_t *a )
          + (a->i_mb_partition16x8[0]>>2) * 3
          + (a->i_mb_partition16x8[1]>>2);
      a->i_cost16x8bi += a->i_lambda * i_mb_b16x8_cost_table[a->i_mb_type16x8];
+
+    if( a->b_mbrd )
+    {
+        if( a->i_cost16x8bi < a->i_best_satd )
+            a->i_best_satd = a->i_cost16x8bi;
+
+        h->mb.i_type = a->i_mb_type16x8;
+        h->mb.i_partition = D_16x8;
+        a->i_cost16x8bi = x264_rd_cost_mb( h, a->i_lambda2 );
+    }
  }
  static void x264_mb_analyse_inter_b8x16( x264_t *h, x264_mb_analysis_t *a )
  {
@@ -1572,8 +1629,7 @@ static void x264_mb_analyse_inter_b8x16( x264_t *h, x264_mb_analysis_t *a )
          }
          a->i_cost8x16bi += i_part_cost;
  
-        if( i == 0 )
-            x264_mb_cache_mv_b8x16( h, a, i, 0 );
+        x264_mb_cache_mv_b8x16( h, a, i, 0 );
      }
  
      /* mb type cost */
@@ -1581,6 +1637,16 @@ static void x264_mb_analyse_inter_b8x16( x264_t *h, x264_mb_analysis_t *a )
          + (a->i_mb_partition8x16[0]>>2) * 3
          + (a->i_mb_partition8x16[1]>>2);
      a->i_cost8x16bi += a->i_lambda * i_mb_b16x8_cost_table[a->i_mb_type8x16];
+
+    if( a->b_mbrd )
+    {
+        if( a->i_cost8x16bi < a->i_best_satd )
+            a->i_best_satd = a->i_cost8x16bi;
+
+        h->mb.i_type = a->i_mb_type8x16;
+        h->mb.i_partition = D_8x16;
+        a->i_cost8x16bi = x264_rd_cost_mb( h, a->i_lambda2 );
+    }
  }
  
  static inline void x264_mb_analyse_transform( x264_t *h )
@@ -1621,6 +1687,10 @@ static inline void x264_mb_analyse_transform_rd( x264_t *h, x264_mb_analysis_t *
          {
              if( *i_cost > 0 )
                  a->i_best_satd = (int64_t)a->i_best_satd * i_cost8 / *i_cost;
+            /* prevent a rare division by zero in x264_mb_analyse_intra */
+            if( a->i_best_satd == 0 )
+                a->i_best_satd = 1;
+
              *i_cost = i_cost8;
          }
          else
@@ -1892,6 +1962,7 @@ void x264_macroblock_analyse( x264_t *h )
      }
      else if( h->sh.i_type == SLICE_TYPE_B )
      {
+        int i_bskip_cost = COST_MAX;
          int b_skip = 0;
  
          analysis.b_direct_available = x264_mb_predict_mv_direct16x16( h );
@@ -1899,15 +1970,34 @@ void x264_macroblock_analyse( x264_t *h )
          {
              h->mb.i_type = B_SKIP;
              x264_mb_mc( h );
-
-            /* Conditioning the probe on neighboring block types
-             * doesn't seem to help speed or quality. */
-            b_skip = !h->mb.b_lossless && x264_macroblock_probe_bskip( h );
+            if( analysis.b_mbrd )
+            {
+                i_bskip_cost = h->pixf.ssd[PIXEL_16x16]( h->mb.pic.p_fenc[0], h->mb.pic.i_stride[0],
+                                                         h->mb.pic.p_fdec[0], h->mb.pic.i_stride[0] )
+                             + h->pixf.ssd[PIXEL_8x8](   h->mb.pic.p_fenc[1], h->mb.pic.i_stride[1],
+                                                         h->mb.pic.p_fdec[1], h->mb.pic.i_stride[1] )
+                             + h->pixf.ssd[PIXEL_8x8](   h->mb.pic.p_fenc[2], h->mb.pic.i_stride[2],
+                                                         h->mb.pic.p_fdec[2], h->mb.pic.i_stride[2] );
+
+                if( i_bskip_cost == 0 )
+                {
+                    h->mb.i_type = B_SKIP;
+                    x264_analyse_update_cache( h, &analysis );
+                    return;
+                }
+            }
+            else
+            {
+                /* Conditioning the probe on neighboring block types
+                 * doesn't seem to help speed or quality. */
+                b_skip = !h->mb.b_lossless && x264_macroblock_probe_bskip( h );
+            }
          }
  
          if( !b_skip )
          {
              const unsigned int flags = h->param.analyse.inter;
+            int i_type;
              int i_partition;
  
              x264_mb_analyse_load_costs( h, &analysis );
@@ -1919,22 +2009,22 @@ void x264_macroblock_analyse( x264_t *h )
  
              x264_mb_analyse_inter_b16x16( h, &analysis );
  
-            h->mb.i_type = B_L0_L0;
+            i_type = B_L0_L0;
              i_partition = D_16x16;
              i_cost = analysis.l0.me16x16.cost;
              if( analysis.l1.me16x16.cost < i_cost )
              {
-                h->mb.i_type = B_L1_L1;
+                i_type = B_L1_L1;
                  i_cost = analysis.l1.me16x16.cost;
              }
              if( analysis.i_cost16x16bi < i_cost )
              {
-                h->mb.i_type = B_BI_BI;
+                i_type = B_BI_BI;
                  i_cost = analysis.i_cost16x16bi;
              }
              if( analysis.i_cost16x16direct < i_cost )
              {
-                h->mb.i_type = B_DIRECT;
+                i_type = B_DIRECT;
                  i_cost = analysis.i_cost16x16direct;
              }
              
@@ -1943,7 +2033,7 @@ void x264_macroblock_analyse( x264_t *h )
                  x264_mb_analyse_inter_b8x8( h, &analysis );
                  if( analysis.i_cost8x8bi < i_cost )
                  {
-                    h->mb.i_type = B_8x8;
+                    i_type = B_8x8;
                      i_partition = D_8x8;
                      i_cost = analysis.i_cost8x8bi;
  
@@ -1955,7 +2045,7 @@ void x264_macroblock_analyse( x264_t *h )
                          {
                              i_partition = D_16x8;
                              i_cost = analysis.i_cost16x8bi;
-                            h->mb.i_type = analysis.i_mb_type16x8;
+                            i_type = analysis.i_mb_type16x8;
                          }
                      }
                      if( h->mb.i_sub_partition[0] == h->mb.i_sub_partition[2] ||
@@ -1966,7 +2056,7 @@ void x264_macroblock_analyse( x264_t *h )
                          {
                              i_partition = D_8x16;
                              i_cost = analysis.i_cost8x16bi;
-                            h->mb.i_type = analysis.i_mb_type8x16;
+                            i_type = analysis.i_mb_type8x16;
                          }
                      }
                  }
@@ -1974,24 +2064,38 @@ void x264_macroblock_analyse( x264_t *h )
  
              h->mb.i_partition = i_partition;
  
+            if( analysis.b_mbrd )
+            {
+                if( i_bskip_cost <= i_cost )
+                {
+                    h->mb.i_type = B_SKIP;
+                    x264_analyse_update_cache( h, &analysis );
+                    return;
+                }
+                else
+                {
+                    h->mb.i_type = i_type;
+                    x264_mb_analyse_transform_rd( h, &analysis, &i_cost );
+                }
+            }
              /* refine qpel */
-            if( i_partition == D_16x16 )
+            else if( i_partition == D_16x16 )
              {
                  analysis.l0.me16x16.cost -= analysis.i_lambda * i_mb_b_cost_table[B_L0_L0];
                  analysis.l1.me16x16.cost -= analysis.i_lambda * i_mb_b_cost_table[B_L1_L1];
-                if( h->mb.i_type == B_L0_L0 )
+                if( i_type == B_L0_L0 )
                  {
                      x264_me_refine_qpel( h, &analysis.l0.me16x16 );
                      i_cost = analysis.l0.me16x16.cost
                             + analysis.i_lambda * i_mb_b_cost_table[B_L0_L0];
                  }
-                else if( h->mb.i_type == B_L1_L1 )
+                else if( i_type == B_L1_L1 )
                  {
                      x264_me_refine_qpel( h, &analysis.l1.me16x16 );
                      i_cost = analysis.l1.me16x16.cost
                             + analysis.i_lambda * i_mb_b_cost_table[B_L1_L1];
                  }
-                else if( h->mb.i_type == B_BI_BI )
+                else if( i_type == B_BI_BI )
                  {
                      x264_me_refine_qpel( h, &analysis.l0.me16x16 );
                      x264_me_refine_qpel( h, &analysis.l1.me16x16 );
@@ -2058,19 +2162,21 @@ void x264_macroblock_analyse( x264_t *h )
  
              if( analysis.i_sad_i16x16 < i_cost )
              {
-                h->mb.i_type = I_16x16;
+                i_type = I_16x16;
                  i_cost = analysis.i_sad_i16x16;
              }
              if( analysis.i_sad_i8x8 < i_cost )
              {
-                h->mb.i_type = I_8x8;
+                i_type = I_8x8;
                  i_cost = analysis.i_sad_i8x8;
              }
              if( analysis.i_sad_i4x4 < i_cost )
              {
-                h->mb.i_type = I_4x4;
+                i_type = I_4x4;
                  i_cost = analysis.i_sad_i4x4;
              }
+
+            h->mb.i_type = i_type;
          }
      }
  
@@ -2159,8 +2265,6 @@ static void x264_analyse_update_cache( x264_t *h, x264_mb_analysis_t *a  )
          }
  
          case B_SKIP:
-            /* nothing has changed since x264_macroblock_probe_bskip */
-            break;
          case B_DIRECT:
              x264_mb_load_mv_direct8x8( h, 0 );
              x264_mb_load_mv_direct8x8( h, 1 );
diff --git a/x264.c b/x264.c

index 95573936a5e21543ea2ae3d35ca949c70fd96148..50f503f7403060b00cfc5708bb7e7377409dcfdd 100644 (file)
--- a/x264.c
+++ b/x264.c
@@ -253,6 +253,7 @@ static void Help( x264_param_t *defaults )
               "      --merange <integer>     Maximum motion vector search range [%d]\n"
               "  -m, --subme <integer>       Subpixel motion estimation and partition\n"
               "                                  decision quality: 1=fast, 6=best. [%d]\n"
+             "      --b-rdo                 RD based mode decision for B-frames. Requires subme 6.\n"
               "      --mixed-refs            Decide references on a per partition basis\n"
               "      --no-chroma-me          Ignore chroma in motion estimation\n"
               "  -8, --8x8dct                Adaptive spatial transform size\n"
@@ -476,6 +477,7 @@ static int  Parse( int argc, char **argv,
  #define OPT_CHROMALOC 313
  #define OPT_MIXED_REFS 314
  #define OPT_CRF 315
+#define OPT_B_RDO 316
  
          static struct option long_options[] =
          {
@@ -509,6 +511,7 @@ static int  Parse( int argc, char **argv,
              { "me",      required_argument, NULL, OPT_ME },
              { "merange", required_argument, NULL, OPT_MERANGE },
              { "subme",   required_argument, NULL, 'm' },
+            { "b-rdo", no_argument,    NULL, OPT_B_RDO },
              { "mixed-refs", no_argument,    NULL, OPT_MIXED_REFS },
              { "no-chroma-me", no_argument,  NULL, OPT_NO_CHROMA_ME },
              { "8x8dct",  no_argument,       NULL, '8' },
@@ -730,6 +733,9 @@ static int  Parse( int argc, char **argv,
              case 'm':
                  param->analyse.i_subpel_refine = atoi(optarg);
                  break;
+            case OPT_B_RDO:
+                param->analyse.b_bframe_rdo = 1;
+                break;
              case OPT_MIXED_REFS:
                  param->analyse.b_mixed_references = 1;
                  break;
diff --git a/x264.h b/x264.h

index b63b844e937448f5306da3aac22723b508fbe942..1b914121accbb9283fdef4d445dd2896b107adaa 100644 (file)
--- a/x264.h
+++ b/x264.h
@@ -197,6 +197,7 @@ typedef struct
          int          i_mv_range; /* maximum length of a mv (in pixels) */
          int          i_subpel_refine; /* subpixel motion estimation quality */
          int          b_chroma_me; /* chroma ME for subpel and mode decision in P-frames */
+        int          b_bframe_rdo; /* RD based mode decision for B-frames */
          int          b_mixed_references; /* allow each mb partition in P-frames to have it's own reference number */
          int          i_trellis;  /* trellis RD quantization */
author	Loren Merritt <pengvado@videolan.org>
	Fri, 18 Nov 2005 11:20:07 +0000 (11:20 +0000)
committer	Loren Merritt <pengvado@videolan.org>
	Fri, 18 Nov 2005 11:20:07 +0000 (11:20 +0000)
encoder/analyse.c		patch \| blob \| history
x264.c		patch \| blob \| history
x264.h		patch \| blob \| history