]> granicus.if.org Git - libx264/commitdiff
Add support for MB-tree + B-pyramid
authorFiona Glaser <fiona@x264.com>
Tue, 8 Dec 2009 11:08:17 +0000 (03:08 -0800)
committerFiona Glaser <fiona@x264.com>
Wed, 9 Dec 2009 12:47:57 +0000 (04:47 -0800)
Modify B-adapt 2 to consider pyramid in its calculations.
Generally results in many more B-frames being used when pyramid is on.
Modify MB-tree statsfile reading to handle the reordering necessary.
Make differing keyint or pyramid between passes into a fatal error.

encoder/encoder.c
encoder/ratecontrol.c
encoder/slicetype.c

index 82d1f02efe7e8d449b6c00fb193e58600dec88be..e2f01ae0ea13aae010c79d79e5be25117ab800bc 100644 (file)
@@ -649,11 +649,6 @@ static int x264_validate_parameters( x264_t *h )
         h->param.rc.i_aq_mode = 1;
         h->param.rc.f_aq_strength = 0;
     }
-    if( h->param.rc.b_mb_tree && h->param.i_bframe_pyramid )
-    {
-        x264_log( h, X264_LOG_WARNING, "b-pyramid + mb-tree is not supported\n" );
-        h->param.i_bframe_pyramid = X264_B_PYRAMID_NONE;
-    }
     h->param.analyse.i_noise_reduction = x264_clip3( h->param.analyse.i_noise_reduction, 0, 1<<16 );
     if( h->param.analyse.i_subpel_refine == 10 && (h->param.analyse.i_trellis != 2 || !h->param.rc.i_aq_mode) )
         h->param.analyse.i_subpel_refine = 9;
index ef23f2323d22a3d71a4e80a510f01d673b085658..ee932bc87af24f8844882589950f541fb5f6f734 100644 (file)
@@ -36,6 +36,7 @@
 typedef struct
 {
     int pict_type;
+    int frame_type;
     int kept_as_ref;
     double qscale;
     int mv_bits;
@@ -128,7 +129,9 @@ struct x264_ratecontrol_t
     double lmin[5];             /* min qscale by frame type */
     double lmax[5];
     double lstep;               /* max change (multiply) in qscale per frame */
-    uint16_t *qp_buffer;        /* Global buffer for converting MB-tree quantizer data. */
+    uint16_t *qp_buffer[2];     /* Global buffers for converting MB-tree quantizer data. */
+    int qpbuf_pos;              /* In order to handle pyramid reordering, QP buffer acts as a stack.
+                                 * This value is the current position (0 or 1). */
 
     /* MBRC stuff */
     double frame_size_estimated;
@@ -278,8 +281,8 @@ void x264_adaptive_quant_frame( x264_t *h, x264_frame_t *frame )
 void x264_adaptive_quant( x264_t *h )
 {
     x264_emms();
-    /* MB-tree currently doesn't adjust quantizers in B-frames. */
-    float qp_offset = h->sh.i_type == SLICE_TYPE_B ? h->fenc->f_qp_offset_aq[h->mb.i_mb_xy] : h->fenc->f_qp_offset[h->mb.i_mb_xy];
+    /* MB-tree currently doesn't adjust quantizers in unreferenced frames. */
+    float qp_offset = h->fdec->b_kept_as_ref ? h->fenc->f_qp_offset[h->mb.i_mb_xy] : h->fenc->f_qp_offset_aq[h->mb.i_mb_xy];
     h->mb.i_qp = x264_clip3( h->rc->f_qpm + qp_offset + .5, h->param.rc.i_qp_min, h->param.rc.i_qp_max );
 }
 
@@ -289,28 +292,35 @@ int x264_macroblock_tree_read( x264_t *h, x264_frame_t *frame )
     uint8_t i_type_actual = rc->entry[frame->i_frame].pict_type;
     int i;
 
-    if( i_type_actual != SLICE_TYPE_B )
+    if( rc->entry[frame->i_frame].kept_as_ref )
     {
         uint8_t i_type;
+        if( rc->qpbuf_pos < 0 )
+        {
+            do
+            {
+                rc->qpbuf_pos++;
 
-        if( !fread( &i_type, 1, 1, rc->p_mbtree_stat_file_in ) )
-            goto fail;
+                if( !fread( &i_type, 1, 1, rc->p_mbtree_stat_file_in ) )
+                    goto fail;
+                if( fread( rc->qp_buffer[rc->qpbuf_pos], sizeof(uint16_t), h->mb.i_mb_count, rc->p_mbtree_stat_file_in ) != h->mb.i_mb_count )
+                    goto fail;
 
-        if( i_type != i_type_actual )
-        {
-            x264_log(h, X264_LOG_ERROR, "MB-tree frametype %d doesn't match actual frametype %d.\n", i_type,i_type_actual);
-            return -1;
+                if( i_type != i_type_actual && rc->qpbuf_pos == 1 )
+                {
+                    x264_log(h, X264_LOG_ERROR, "MB-tree frametype %d doesn't match actual frametype %d.\n", i_type, i_type_actual);
+                    return -1;
+                }
+            } while( i_type != i_type_actual );
         }
 
-        if( fread( rc->qp_buffer, sizeof(uint16_t), h->mb.i_mb_count, rc->p_mbtree_stat_file_in ) != h->mb.i_mb_count )
-            goto fail;
-
         for( i = 0; i < h->mb.i_mb_count; i++ )
         {
-            frame->f_qp_offset[i] = ((float)(int16_t)endian_fix16( rc->qp_buffer[i] )) * (1/256.0);
+            frame->f_qp_offset[i] = ((float)(int16_t)endian_fix16( rc->qp_buffer[rc->qpbuf_pos][i] )) * (1/256.0);
             if( h->frames.b_have_lowres )
                 frame->i_inv_qscale_factor[i] = x264_exp2fix8(frame->f_qp_offset[i]);
         }
+        rc->qpbuf_pos--;
     }
     else
         x264_adaptive_quant_frame( h, frame );
@@ -581,20 +591,20 @@ int x264_ratecontrol_new( x264_t *h )
                 return -1;
             }
 
-            /* since B-adapt doesn't (yet) take into account B-pyramid,
-             * the converse is not a problem */
-            if( h->param.i_bframe )
+            if( h->param.i_bframe && ( p = strstr( opts, "b_pyramid=" ) ) &&
+                sscanf( p, "b_pyramid=%d", &i ) && h->param.i_bframe_pyramid != i )
             {
-                char buf[12];
-                sprintf( buf, "b_pyramid=%d", h->param.i_bframe_pyramid );
-                if( !strstr( opts, buf ) )
-                    x264_log( h, X264_LOG_WARNING, "different B-pyramid setting than 1st pass\n" );
+                x264_log( h, X264_LOG_ERROR, "different B-pyramid setting than 1st pass (%d vs %d)\n", h->param.i_bframe_pyramid, i );
+                return -1;
             }
 
             if( ( p = strstr( opts, "keyint=" ) ) && sscanf( p, "keyint=%d", &i )
                 && h->param.i_keyint_max != i )
-                x264_log( h, X264_LOG_WARNING, "different keyint than 1st pass (%d vs %d)\n",
+            {
+                x264_log( h, X264_LOG_ERROR, "different keyint than 1st pass (%d vs %d)\n",
                           h->param.i_keyint_max, i );
+                return -1;
+            }
 
             if( strstr( opts, "qp=0" ) && h->param.rc.i_rc_method == X264_RC_ABR )
                 x264_log( h, X264_LOG_WARNING, "1st pass was lossless, bitrate prediction will be inaccurate\n" );
@@ -706,13 +716,30 @@ int x264_ratecontrol_new( x264_t *h )
                 if( sscanf( w, "w:%hd,%hd,%hd", &rce->i_weight_denom, &rce->weight[0], &rce->weight[1] ) != 3 )
                     rce->i_weight_denom = -1;
 
-            switch(pict_type)
+            if( pict_type != 'b' )
+                rce->kept_as_ref = 1;
+            switch( pict_type )
             {
-                case 'I': rce->kept_as_ref = 1;
-                case 'i': rce->pict_type = SLICE_TYPE_I; break;
-                case 'P': rce->pict_type = SLICE_TYPE_P; break;
-                case 'B': rce->kept_as_ref = 1;
-                case 'b': rce->pict_type = SLICE_TYPE_B; break;
+                case 'I':
+                    rce->frame_type = X264_TYPE_IDR;
+                    rce->pict_type  = SLICE_TYPE_I;
+                    break;
+                case 'i':
+                    rce->frame_type = X264_TYPE_I;
+                    rce->pict_type  = SLICE_TYPE_I;
+                    break;
+                case 'P':
+                    rce->frame_type = X264_TYPE_P;
+                    rce->pict_type  = SLICE_TYPE_P;
+                    break;
+                case 'B':
+                    rce->frame_type = X264_TYPE_BREF;
+                    rce->pict_type  = SLICE_TYPE_B;
+                    break;
+                case 'b':
+                    rce->frame_type = X264_TYPE_B;
+                    rce->pict_type  = SLICE_TYPE_B;
+                    break;
                 default:  e = -1; break;
             }
             if(e < 10)
@@ -771,7 +798,12 @@ parse_error:
     }
 
     if( h->param.rc.b_mb_tree && (h->param.rc.b_stat_read || h->param.rc.b_stat_write) )
-        CHECKED_MALLOC( rc->qp_buffer, h->mb.i_mb_count * sizeof(uint16_t) );
+    {
+        CHECKED_MALLOC( rc->qp_buffer[0], h->mb.i_mb_count * sizeof(uint16_t) );
+        if( h->param.i_bframe_pyramid && h->param.rc.b_stat_read )
+            CHECKED_MALLOC( rc->qp_buffer[1], h->mb.i_mb_count * sizeof(uint16_t) );
+        rc->qpbuf_pos = -1;
+    }
 
     for( i=0; i<h->param.i_threads; i++ )
     {
@@ -959,7 +991,8 @@ void x264_ratecontrol_delete( x264_t *h )
     x264_free( rc->pred );
     x264_free( rc->pred_b_from_p );
     x264_free( rc->entry );
-    x264_free( rc->qp_buffer );
+    x264_free( rc->qp_buffer[0] );
+    x264_free( rc->qp_buffer[1] );
     if( rc->zones )
     {
         x264_free( rc->zones[0].param );
@@ -1275,23 +1308,10 @@ int x264_ratecontrol_slice_type( x264_t *h, int frame_num )
             }
             return X264_TYPE_AUTO;
         }
-        switch( rc->entry[frame_num].pict_type )
-        {
-            case SLICE_TYPE_I:
-                return rc->entry[frame_num].kept_as_ref ? X264_TYPE_IDR : X264_TYPE_I;
-
-            case SLICE_TYPE_B:
-                return rc->entry[frame_num].kept_as_ref ? X264_TYPE_BREF : X264_TYPE_B;
-
-            case SLICE_TYPE_P:
-            default:
-                return X264_TYPE_P;
-        }
+        return rc->entry[frame_num].frame_type;
     }
     else
-    {
         return X264_TYPE_AUTO;
-    }
 }
 
 void x264_ratecontrol_set_weights( x264_t *h, x264_frame_t *frm )
@@ -1373,10 +1393,10 @@ int x264_ratecontrol_end( x264_t *h, int bits )
             int i;
             /* Values are stored as big-endian FIX8.8 */
             for( i = 0; i < h->mb.i_mb_count; i++ )
-                rc->qp_buffer[i] = endian_fix16( h->fenc->f_qp_offset[i]*256.0 );
+                rc->qp_buffer[0][i] = endian_fix16( h->fenc->f_qp_offset[i]*256.0 );
             if( fwrite( &i_type, 1, 1, rc->p_mbtree_stat_file_out ) < 1 )
                 goto fail;
-            if( fwrite( rc->qp_buffer, sizeof(uint16_t), h->mb.i_mb_count, rc->p_mbtree_stat_file_out ) < h->mb.i_mb_count )
+            if( fwrite( rc->qp_buffer[0], sizeof(uint16_t), h->mb.i_mb_count, rc->p_mbtree_stat_file_out ) < h->mb.i_mb_count )
                 goto fail;
         }
     }
@@ -2025,6 +2045,7 @@ void x264_thread_sync_ratecontrol( x264_t *cur, x264_t *prev, x264_t *next )
         COPY(short_term_cplxcount);
         COPY(bframes);
         COPY(prev_zone);
+        COPY(qpbuf_pos);
 #undef COPY
     }
     if( cur != next )
index 68b5ee290a45935fef48a4cce7ce3bf38770c8b9..63c3dcb6e4daf116696cd4f5641765e72a7a779b 100644 (file)
@@ -643,7 +643,7 @@ static void x264_macroblock_tree_finish( x264_t *h, x264_frame_t *frame, int ref
     }
 }
 
-static void x264_macroblock_tree_propagate( x264_t *h, x264_frame_t **frames, int p0, int p1, int b )
+static void x264_macroblock_tree_propagate( x264_t *h, x264_frame_t **frames, int p0, int p1, int b, int referenced )
 {
     uint16_t *ref_costs[2] = {frames[p0]->i_propagate_cost,frames[p1]->i_propagate_cost};
     int dist_scale_factor = ( ((b-p0) << 8) + ((p1-p0) >> 1) ) / (p1-p0);
@@ -651,13 +651,20 @@ static void x264_macroblock_tree_propagate( x264_t *h, x264_frame_t **frames, in
     int16_t (*mvs[2])[2] = { frames[b]->lowres_mvs[0][b-p0-1], frames[b]->lowres_mvs[1][p1-b-1] };
     int bipred_weights[2] = {i_bipred_weight, 64 - i_bipred_weight};
     int *buf = h->scratch_buffer;
+    uint16_t *propagate_cost = frames[b]->i_propagate_cost;
+
+    /* For non-reffed frames the source costs are always zero, so just memset one row and re-use it. */
+    if( !referenced )
+        memset( frames[b]->i_propagate_cost, 0, h->sps->i_mb_width * sizeof(uint16_t) );
 
     for( h->mb.i_mb_y = 0; h->mb.i_mb_y < h->sps->i_mb_height; h->mb.i_mb_y++ )
     {
         int mb_index = h->mb.i_mb_y*h->mb.i_mb_stride;
-        h->mc.mbtree_propagate_cost( buf, frames[b]->i_propagate_cost+mb_index,
+        h->mc.mbtree_propagate_cost( buf, propagate_cost,
             frames[b]->i_intra_cost+mb_index, frames[b]->lowres_costs[b-p0][p1-b]+mb_index,
             frames[b]->i_inv_qscale_factor+mb_index, h->sps->i_mb_width );
+        if( referenced )
+            propagate_cost += h->sps->i_mb_width;
         for( h->mb.i_mb_x = 0; h->mb.i_mb_x < h->sps->i_mb_width; h->mb.i_mb_x++, mb_index++ )
         {
             int propagate_amount = buf[h->mb.i_mb_x];
@@ -718,14 +725,15 @@ static void x264_macroblock_tree_propagate( x264_t *h, x264_frame_t **frames, in
         }
     }
 
-    if( h->param.rc.i_vbv_buffer_size && b == p1 )
-        x264_macroblock_tree_finish( h, frames[b], b-p0 );
+    if( h->param.rc.i_vbv_buffer_size && referenced )
+        x264_macroblock_tree_finish( h, frames[b], b == p1 ? b - p0 : 0 );
 }
 
 static void x264_macroblock_tree( x264_t *h, x264_mb_analysis_t *a, x264_frame_t **frames, int num_frames, int b_intra )
 {
     int i, idx = !b_intra;
     int last_nonb, cur_nonb = 1;
+    int bframes = 0;
     if( b_intra )
         x264_slicetype_frame_cost( h, a, frames, 0, 0, 0, 0 );
 
@@ -747,18 +755,41 @@ static void x264_macroblock_tree( x264_t *h, x264_mb_analysis_t *a, x264_frame_t
             break;
         x264_slicetype_frame_cost( h, a, frames, cur_nonb, last_nonb, last_nonb, 0 );
         memset( frames[cur_nonb]->i_propagate_cost, 0, h->mb.i_mb_count * sizeof(uint16_t) );
-        while( i > cur_nonb )
+        bframes = last_nonb - cur_nonb - 1;
+        if( h->param.i_bframe_pyramid && bframes > 1 )
         {
-            x264_slicetype_frame_cost( h, a, frames, cur_nonb, last_nonb, i, 0 );
-            memset( frames[i]->i_propagate_cost, 0, h->mb.i_mb_count * sizeof(uint16_t) );
-            x264_macroblock_tree_propagate( h, frames, cur_nonb, last_nonb, i );
-            i--;
+            int middle = (bframes + 1)/2 + cur_nonb;
+            x264_slicetype_frame_cost( h, a, frames, cur_nonb, last_nonb, middle, 0 );
+            memset( frames[middle]->i_propagate_cost, 0, h->mb.i_mb_count * sizeof(uint16_t) );
+            while( i > cur_nonb )
+            {
+                int p0 = i > middle ? middle : cur_nonb;
+                int p1 = i < middle ? middle : last_nonb;
+                if( i != middle )
+                {
+                    x264_slicetype_frame_cost( h, a, frames, p0, p1, i, 0 );
+                    x264_macroblock_tree_propagate( h, frames, p0, p1, i, 0 );
+                }
+                i--;
+            }
+            x264_macroblock_tree_propagate( h, frames, cur_nonb, last_nonb, middle, 1 );
         }
-        x264_macroblock_tree_propagate( h, frames, cur_nonb, last_nonb, last_nonb );
+        else
+        {
+            while( i > cur_nonb )
+            {
+                x264_slicetype_frame_cost( h, a, frames, cur_nonb, last_nonb, i, 0 );
+                x264_macroblock_tree_propagate( h, frames, cur_nonb, last_nonb, i, 0 );
+                i--;
+            }
+        }
+        x264_macroblock_tree_propagate( h, frames, cur_nonb, last_nonb, last_nonb, 1 );
         last_nonb = cur_nonb;
     }
 
     x264_macroblock_tree_finish( h, frames[last_nonb], last_nonb );
+    if( h->param.i_bframe_pyramid && bframes > 1 && !h->param.rc.i_vbv_buffer_size )
+        x264_macroblock_tree_finish( h, frames[last_nonb+(bframes+1)/2], 0 );
 }
 
 static int x264_vbv_frame_cost( x264_t *h, x264_mb_analysis_t *a, x264_frame_t **frames, int p0, int p1, int b )
@@ -828,8 +859,18 @@ static int x264_slicetype_path_cost( x264_t *h, x264_mb_analysis_t *a, x264_fram
         if( cost > threshold )
             break;
 
-        for( next_b = loc; next_b < next_p && cost < threshold; next_b++ )
-            cost += x264_slicetype_frame_cost( h, a, frames, cur_p, next_p, next_b, 0 );
+        if( h->param.i_bframe_pyramid && next_p - cur_p > 2 )
+        {
+            int middle = cur_p + (next_p - cur_p)/2;
+            cost += x264_slicetype_frame_cost( h, a, frames, cur_p, next_p, middle, 0 );
+            for( next_b = loc; next_b < middle && cost < threshold; next_b++ )
+                cost += x264_slicetype_frame_cost( h, a, frames, cur_p, middle, next_b, 0 );
+            for( next_b = middle+1; next_b < next_p && cost < threshold; next_b++ )
+                cost += x264_slicetype_frame_cost( h, a, frames, middle, next_p, next_b, 0 );
+        }
+        else
+            for( next_b = loc; next_b < next_p && cost < threshold; next_b++ )
+                cost += x264_slicetype_frame_cost( h, a, frames, cur_p, next_p, next_b, 0 );
 
         loc = next_p + 1;
         cur_p = next_p;