]> granicus.if.org Git - libx264/commitdiff
Multi-slice encoding support
authorFiona Glaser <fiona@x264.com>
Mon, 31 Aug 2009 13:36:41 +0000 (06:36 -0700)
committerFiona Glaser <fiona@x264.com>
Mon, 31 Aug 2009 21:26:52 +0000 (14:26 -0700)
Slicing support is available through three methods (which can be mixed):
--slices sets a number of slices per frame and ensures rectangular slices (required for Blu-ray).  Overridden by either of the following options:
--slice-max-mbs sets a maximum number of macroblocks per slice.
--slice-max-size sets a maximum slice size, in bytes (includes NAL overhead).
Implement macroblock re-encoding support to allow highly accurate slice size limitation.  Might be useful for other things in the future, too.

Makefile
common/common.c
common/common.h
common/macroblock.c
encoder/analyse.c
encoder/encoder.c
encoder/ratecontrol.c
encoder/slicetype.c
x264.c
x264.h

index 2243775da59d7ebe20639ccb7d00934d965e171b..0f34736ee1a981c6629a9ba0555e1e206d5d065d 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -119,10 +119,10 @@ endif
 SRC2 = $(SRCS) $(SRCCLI)
 # These should cover most of the important codepaths
 OPT0 = --crf 30 -b1 -m1 -r1 --me dia --no-cabac --direct temporal --ssim --no-weightb
-OPT1 = --crf 16 -b2 -m3 -r3 --me hex --no-8x8dct --direct spatial --no-dct-decimate -t0
-OPT2 = --crf 26 -b4 -m5 -r2 --me hex --cqm jvt --nr 100 --psnr --no-mixed-refs --b-adapt 2
+OPT1 = --crf 16 -b2 -m3 -r3 --me hex --no-8x8dct --direct spatial --no-dct-decimate -t0  --slice-max-mbs 50
+OPT2 = --crf 26 -b4 -m5 -r2 --me hex --cqm jvt --nr 100 --psnr --no-mixed-refs --b-adapt 2 --slice-max-size 1500
 OPT3 = --crf 18 -b3 -m9 -r5 --me umh -t1 -A all --b-pyramid --direct auto --no-fast-pskip --no-mbtree
-OPT4 = --crf 22 -b3 -m7 -r4 --me esa -t2 -A all --psy-rd 1.0:1.0
+OPT4 = --crf 22 -b3 -m7 -r4 --me esa -t2 -A all --psy-rd 1.0:1.0 --slices 4
 OPT5 = --frames 50 --crf 24 -b3 -m10 -r3 --me tesa -t2
 OPT6 = --frames 50 -q0 -m9 -r2 --me hex -Aall
 OPT7 = --frames 50 -q0 -m2 -r1 --me hex --no-cabac
index 1f0103066bc148ddf66708fab426516910dcec95..c0a56e3d79bf7774913a76a5e1ce48dc48da0f50 100644 (file)
@@ -62,6 +62,9 @@ void    x264_param_default( x264_param_t *param )
     param->i_fps_num       = 25;
     param->i_fps_den       = 1;
     param->i_level_idc     = -1;
+    param->i_slice_max_size = 0;
+    param->i_slice_max_mbs = 0;
+    param->i_slice_count = 0;
 
     /* Encoder parameters */
     param->i_frame_reference = 3;
@@ -370,6 +373,12 @@ int x264_param_parse( x264_param_t *p, const char *name, const char *value )
         else
             p->b_deblocking_filter = atobool(value);
     }
+    OPT("slice-max-size")
+        p->i_slice_max_size = atoi(value);
+    OPT("slice-max-mbs")
+        p->i_slice_max_mbs = atoi(value);
+    OPT("slices")
+        p->i_slice_count = atoi(value);
     OPT("cabac")
         p->b_cabac = atobool(value);
     OPT("cabac-idc")
@@ -773,9 +782,9 @@ void x264_reduce_fraction( int *n, int *d )
     c = a % b;
     while(c)
     {
-       a = b;
-       b = c;
-       c = a % b;
+        a = b;
+        b = c;
+        c = a % b;
     }
     *n /= b;
     *d /= b;
@@ -851,6 +860,12 @@ char *x264_param2string( x264_param_t *p, int b_res )
     s += sprintf( s, " deadzone=%d,%d", p->analyse.i_luma_deadzone[0], p->analyse.i_luma_deadzone[1] );
     s += sprintf( s, " chroma_qp_offset=%d", p->analyse.i_chroma_qp_offset );
     s += sprintf( s, " threads=%d", p->i_threads );
+    if( p->i_slice_count )
+        s += sprintf( s, " slices=%d", p->i_slice_count );
+    if( p->i_slice_max_size )
+        s += sprintf( s, " slice_max_size=%d", p->i_slice_max_size );
+    if( p->i_slice_max_mbs )
+        s += sprintf( s, " slice_max_mbs=%d", p->i_slice_max_mbs );
     s += sprintf( s, " nr=%d", p->analyse.i_noise_reduction );
     s += sprintf( s, " decimate=%d", p->analyse.b_dct_decimate );
     s += sprintf( s, " mbaff=%d", p->b_interlaced );
index fa0f9c83f6b7f8354136f3e7d825fe1735b1c821..574040d0f6dff90900403548aae5be3dd06c34c5 100644 (file)
@@ -50,8 +50,6 @@ do {\
 
 #define X264_BFRAME_MAX 16
 #define X264_THREAD_MAX 128
-#define X264_SLICE_MAX 4
-#define X264_NAL_MAX (4 + X264_SLICE_MAX)
 #define X264_PCM_COST (386*8)
 #define X264_LOOKAHEAD_MAX 250
 
@@ -68,6 +66,7 @@ do {\
 #include <stdlib.h>
 #include <string.h>
 #include <assert.h>
+#include <limits.h>
 #include "x264.h"
 #include "bs.h"
 #include "set.h"
@@ -293,7 +292,8 @@ struct x264_t
     struct
     {
         int         i_nal;
-        x264_nal_t  nal[X264_NAL_MAX];
+        int         i_nals_allocated;
+        x264_nal_t  *nal;
         int         i_bitstream;    /* size of p_bitstream */
         uint8_t     *p_bitstream;   /* will hold data for all nal */
         bs_t        bs;
@@ -488,6 +488,8 @@ struct x264_t
         /* skip flag for motion compensation */
         /* if we've already done MC, we don't need to do it again */
         int b_skip_mc;
+        /* set to true if we are re-encoding a macroblock. */
+        int b_reencode_mb;
 
         struct
         {
@@ -623,9 +625,9 @@ struct x264_t
         /* Cumulated stats */
 
         /* per slice info */
-        int     i_slice_count[5];
-        int64_t i_slice_size[5];
-        double  f_slice_qp[5];
+        int     i_frame_count[5];
+        int64_t i_frame_size[5];
+        double  f_frame_qp[5];
         int     i_consecutive_bframes[X264_BFRAME_MAX+1];
         /* */
         int64_t i_ssd_global[5];
index f8b20c793e670e03c6ba1e03162c938c9bc7bee0..790dde22b82e499da96b377198670c99b9ff52ed 100644 (file)
@@ -850,7 +850,7 @@ static void ALWAYS_INLINE x264_macroblock_load_pic_pointers( x264_t *h, int i_mb
     h->mc.copy[i?PIXEL_8x8:PIXEL_16x16]( h->mb.pic.p_fenc[i], FENC_STRIDE,
         h->mb.pic.p_fenc_plane[i], i_stride2, w );
     memcpy( &h->mb.pic.p_fdec[i][-1-FDEC_STRIDE], intra_fdec-1, w*3/2+1 );
-    if( h->mb.b_interlaced )
+    if( h->mb.b_interlaced || h->mb.b_reencode_mb )
     {
         const uint8_t *plane_fdec = &h->fdec->plane[i][i_pix_offset];
         for( j = 0; j < w; j++ )
@@ -1016,7 +1016,7 @@ void x264_macroblock_cache_load( x264_t *h, int i_mb_x, int i_mb_y )
           + !!(h->mb.i_neighbour & MB_TOP);
     }
 
-    if( !h->mb.b_interlaced )
+    if( !h->mb.b_interlaced && !h->mb.b_reencode_mb )
     {
         copy_column8( h->mb.pic.p_fdec[0]-1+ 4*FDEC_STRIDE, h->mb.pic.p_fdec[0]+15+ 4*FDEC_STRIDE );
         copy_column8( h->mb.pic.p_fdec[0]-1+12*FDEC_STRIDE, h->mb.pic.p_fdec[0]+15+12*FDEC_STRIDE );
index 7661031d8d3c82398673561b01a15772d98978d4..deca43b00b5e3c8371b600622a851827c68f468e 100644 (file)
@@ -24,7 +24,6 @@
 
 #define _ISOC99_SOURCE
 #include <math.h>
-#include <limits.h>
 #ifndef _MSC_VER
 #include <unistd.h>
 #endif
@@ -457,23 +456,30 @@ static void x264_mb_analyse_init( x264_t *h, x264_mb_analysis_t *a, int i_qp )
 /* Max = 4 */
 static void predict_16x16_mode_available( unsigned int i_neighbour, int *mode, int *pi_count )
 {
-    if( i_neighbour & MB_TOPLEFT )
+    int b_top = i_neighbour & MB_TOP;
+    int b_left = i_neighbour & MB_LEFT;
+    if( b_top && b_left )
     {
         /* top and left available */
         *mode++ = I_PRED_16x16_V;
         *mode++ = I_PRED_16x16_H;
         *mode++ = I_PRED_16x16_DC;
-        *mode++ = I_PRED_16x16_P;
-        *pi_count = 4;
+        *pi_count = 3;
+        if( i_neighbour & MB_TOPLEFT )
+        {
+            /* top left available*/
+            *mode++ = I_PRED_16x16_P;
+            *pi_count = 4;
+        }
     }
-    else if( i_neighbour & MB_LEFT )
+    else if( b_left )
     {
         /* left available*/
         *mode++ = I_PRED_16x16_DC_LEFT;
         *mode++ = I_PRED_16x16_H;
         *pi_count = 2;
     }
-    else if( i_neighbour & MB_TOP )
+    else if( b_top )
     {
         /* top available*/
         *mode++ = I_PRED_16x16_DC_TOP;
@@ -491,23 +497,30 @@ static void predict_16x16_mode_available( unsigned int i_neighbour, int *mode, i
 /* Max = 4 */
 static void predict_8x8chroma_mode_available( unsigned int i_neighbour, int *mode, int *pi_count )
 {
-    if( i_neighbour & MB_TOPLEFT )
+    int b_top = i_neighbour & MB_TOP;
+    int b_left = i_neighbour & MB_LEFT;
+    if( b_top && b_left )
     {
         /* top and left available */
         *mode++ = I_PRED_CHROMA_V;
         *mode++ = I_PRED_CHROMA_H;
         *mode++ = I_PRED_CHROMA_DC;
-        *mode++ = I_PRED_CHROMA_P;
-        *pi_count = 4;
+        *pi_count = 3;
+        if( i_neighbour & MB_TOPLEFT )
+        {
+            /* top left available */
+            *mode++ = I_PRED_CHROMA_P;
+            *pi_count = 4;
+        }
     }
-    else if( i_neighbour & MB_LEFT )
+    else if( b_left )
     {
         /* left available*/
         *mode++ = I_PRED_CHROMA_DC_LEFT;
         *mode++ = I_PRED_CHROMA_H;
         *pi_count = 2;
     }
-    else if( i_neighbour & MB_TOP )
+    else if( b_top )
     {
         /* top available*/
         *mode++ = I_PRED_CHROMA_DC_TOP;
@@ -526,10 +539,9 @@ static void predict_8x8chroma_mode_available( unsigned int i_neighbour, int *mod
 static void predict_4x4_mode_available( unsigned int i_neighbour,
                                         int *mode, int *pi_count )
 {
-    int b_l = i_neighbour & MB_LEFT;
-    int b_t = i_neighbour & MB_TOP;
-
-    if( b_l && b_t )
+    int b_top = i_neighbour & MB_TOP;
+    int b_left = i_neighbour & MB_LEFT;
+    if( b_top && b_left )
     {
         *pi_count = 6;
         *mode++ = I_PRED_4x4_DC;
@@ -546,14 +558,14 @@ static void predict_4x4_mode_available( unsigned int i_neighbour,
         *mode++ = I_PRED_4x4_VL;
         *mode++ = I_PRED_4x4_HU;
     }
-    else if( b_l )
+    else if( b_left )
     {
         *mode++ = I_PRED_4x4_DC_LEFT;
         *mode++ = I_PRED_4x4_H;
         *mode++ = I_PRED_4x4_HU;
         *pi_count = 3;
     }
-    else if( b_t )
+    else if( b_top )
     {
         *mode++ = I_PRED_4x4_DC_TOP;
         *mode++ = I_PRED_4x4_V;
index 841bfe9d58506d52343a839b94ca6dcad72bd104..8cec1552a39c0baf631d3b927728cfdf695f8326 100644 (file)
@@ -88,7 +88,7 @@ static void x264_slice_header_init( x264_t *h, x264_slice_header_t *sh,
     sh->pps = pps;
 
     sh->i_first_mb  = 0;
-    sh->i_last_mb   = h->sps->i_mb_width * h->sps->i_mb_height;
+    sh->i_last_mb   = h->mb.i_mb_count - 1;
     sh->i_pps_id    = pps->i_id;
 
     sh->i_frame_num = i_frame;
@@ -180,7 +180,7 @@ static void x264_slice_header_write( bs_t *s, x264_slice_header_t *sh, int i_nal
     if( !sh->sps->b_frame_mbs_only )
     {
         bs_write1( s, sh->b_field_pic );
-        if ( sh->b_field_pic )
+        if( sh->b_field_pic )
             bs_write1( s, sh->b_bottom_field );
     }
 
@@ -466,6 +466,23 @@ static int x264_validate_parameters( x264_t *h )
                   h->param.i_width, h->param.i_height );
     }
 
+    int max_slices = (h->param.i_height+((16<<h->param.b_interlaced)-1))/(16<<h->param.b_interlaced);
+    h->param.i_slice_count = x264_clip3( h->param.i_slice_count, 0, max_slices );
+    h->param.i_slice_max_size = X264_MAX( h->param.i_slice_max_size, 0 );
+    h->param.i_slice_max_mbs = X264_MAX( h->param.i_slice_max_mbs, 0 );
+    if( h->param.b_interlaced && h->param.i_slice_max_size )
+    {
+        x264_log( h, X264_LOG_WARNING, "interlaced + slice-max-size is not implemented\n" );
+        h->param.i_slice_max_size = 0;
+    }
+    if( h->param.b_interlaced && h->param.i_slice_max_mbs )
+    {
+        x264_log( h, X264_LOG_WARNING, "interlaced + slice-max-mbs is not implemented\n" );
+        h->param.i_slice_max_mbs = 0;
+    }
+    if( h->param.i_slice_max_mbs || h->param.i_slice_max_size )
+        h->param.i_slice_count = 0;
+
     h->param.i_frame_reference = x264_clip3( h->param.i_frame_reference, 1, 16 );
     if( h->param.i_keyint_max <= 0 )
         h->param.i_keyint_max = 1;
@@ -855,6 +872,9 @@ x264_t *x264_encoder_open   ( x264_param_t *param )
         if( !h->thread[i]->fdec )
             goto fail;
         CHECKED_MALLOC( h->thread[i]->out.p_bitstream, h->out.i_bitstream );
+        /* Start each thread with room for 8 NAL units; it'll realloc later if needed. */
+        CHECKED_MALLOC( h->thread[i]->out.nal, 8*sizeof(x264_nal_t) );
+        h->thread[i]->out.i_nals_allocated = 8;
         if( x264_macroblock_cache_init( h->thread[i] ) < 0 )
             goto fail;
     }
@@ -931,6 +951,9 @@ int x264_encoder_reconfig( x264_t *h, x264_param_t *param )
         COPY( analyse.b_transform_8x8 );
     if( h->frames.i_max_ref1 > 1 )
         COPY( b_bframe_pyramid );
+    COPY( i_slice_max_size );
+    COPY( i_slice_max_mbs );
+    COPY( i_slice_count );
 #undef COPY
 
     mbcmp_init( h );
@@ -949,11 +972,24 @@ static void x264_nal_start( x264_t *h, int i_type, int i_ref_idc )
     nal->i_payload= 0;
     nal->p_payload= &h->out.p_bitstream[bs_pos( &h->out.bs ) / 8];
 }
-static void x264_nal_end( x264_t *h )
+static int x264_nal_end( x264_t *h )
 {
     x264_nal_t *nal = &h->out.nal[h->out.i_nal];
     nal->i_payload = &h->out.p_bitstream[bs_pos( &h->out.bs ) / 8] - nal->p_payload;
     h->out.i_nal++;
+
+    /* if number of allocated nals is not enough, re-allocate a larger one. */
+    if( h->out.i_nal >= h->out.i_nals_allocated )
+    {
+        x264_nal_t *new_out = x264_malloc( sizeof(x264_nal_t) * (h->out.i_nals_allocated*2) );
+        if( !new_out )
+            return -1;
+        memcpy( new_out, h->out.nal, sizeof(x264_nal_t) * (h->out.i_nals_allocated) );
+        x264_free( h->out.nal );
+        h->out.nal = new_out;
+        h->out.i_nals_allocated *= 2;
+    }
+    return 0;
 }
 
 /****************************************************************************
@@ -972,17 +1008,20 @@ int x264_encoder_headers( x264_t *h, x264_nal_t **pp_nal, int *pi_nal )
         x264_nal_start( h, NAL_SEI, NAL_PRIORITY_DISPOSABLE );
         if( x264_sei_version_write( h, &h->out.bs ) )
             return -1;
-        x264_nal_end( h );
+        if( x264_nal_end( h ) )
+            return -1;
 
         /* generate sequence parameters */
         x264_nal_start( h, NAL_SPS, NAL_PRIORITY_HIGHEST );
         x264_sps_write( &h->out.bs, h->sps );
-        x264_nal_end( h );
+        if( x264_nal_end( h ) )
+            return -1;
 
         /* generate picture parameters */
         x264_nal_start( h, NAL_PPS, NAL_PRIORITY_HIGHEST );
         x264_pps_write( &h->out.bs, h->pps );
-        x264_nal_end( h );
+        if( x264_nal_end( h ) )
+            return -1;
         bs_flush( &h->out.bs );
     }
     /* now set output*/
@@ -1231,10 +1270,13 @@ static int x264_slice_write( x264_t *h )
 {
     int i_skip;
     int mb_xy, i_mb_x, i_mb_y;
-    int i, i_list, i_ref;
-
-    /* init stats */
-    memset( &h->stat.frame, 0, sizeof(h->stat.frame) );
+    int i, i_list, i_ref, i_skip_bak = 0; /* Shut up GCC. */
+    bs_t bs_bak;
+    x264_cabac_t cabac_bak;
+    uint8_t cabac_prevbyte_bak = 0; /* Shut up GCC. */
+    /* Assume no more than 3 bytes of NALU escaping. */
+    int slice_max_size = h->param.i_slice_max_size > 0 ? (h->param.i_slice_max_size-3-NALU_OVERHEAD)*8 : INT_MAX;
+    int starting_bits = bs_pos(&h->out.bs);
 
     /* Slice */
     x264_nal_start( h, h->i_nal_type, h->i_nal_ref_idc );
@@ -1257,11 +1299,29 @@ static int x264_slice_write( x264_t *h )
     i_mb_x = h->sh.i_first_mb % h->sps->i_mb_width;
     i_skip = 0;
 
-    while( (mb_xy = i_mb_x + i_mb_y * h->sps->i_mb_width) < h->sh.i_last_mb )
+    while( (mb_xy = i_mb_x + i_mb_y * h->sps->i_mb_width) <= h->sh.i_last_mb )
     {
         int mb_spos = bs_pos(&h->out.bs) + x264_cabac_pos(&h->cabac);
+        if( h->param.i_slice_max_size > 0 )
+        {
+            /* We don't need the contexts because flushing the CABAC encoder has no context
+             * dependency and macroblocks are only re-encoded in the case where a slice is
+             * ended (and thus the content of all contexts are thrown away). */
+            if( h->param.b_cabac )
+            {
+                memcpy( &cabac_bak, &h->cabac, offsetof(x264_cabac_t, f8_bits_encoded) );
+                /* x264's CABAC writer modifies the previous byte during carry, so it has to be
+                 * backed up. */
+                cabac_prevbyte_bak = h->cabac.p[-1];
+            }
+            else
+            {
+                bs_bak = h->out.bs;
+                i_skip_bak = i_skip;
+            }
+        }
 
-        if( i_mb_x == 0 )
+        if( i_mb_x == 0 && !h->mb.b_reencode_mb )
             x264_fdec_filter_row( h, i_mb_y );
 
         /* load cache */
@@ -1309,6 +1369,37 @@ static int x264_slice_write( x264_t *h )
             }
         }
 
+        int total_bits = bs_pos(&h->out.bs) + x264_cabac_pos(&h->cabac);
+        int mb_size = total_bits - mb_spos;
+
+        /* We'll just re-encode this last macroblock if we go over the max slice size. */
+        if( total_bits - starting_bits > slice_max_size && !h->mb.b_reencode_mb )
+        {
+            if( mb_xy != h->sh.i_first_mb )
+            {
+                if( h->param.b_cabac )
+                {
+                    memcpy( &h->cabac, &cabac_bak, offsetof(x264_cabac_t, f8_bits_encoded) );
+                    h->cabac.p[-1] = cabac_prevbyte_bak;
+                }
+                else
+                {
+                    h->out.bs = bs_bak;
+                    i_skip = i_skip_bak;
+                }
+                h->mb.b_reencode_mb = 1;
+                h->sh.i_last_mb = mb_xy-1;
+                break;
+            }
+            else
+            {
+                h->sh.i_last_mb = mb_xy;
+                h->mb.b_reencode_mb = 0;
+            }
+        }
+        else
+            h->mb.b_reencode_mb = 0;
+
 #if VISUALIZE
         if( h->param.b_visualize )
             x264_visualize_mb( h );
@@ -1350,7 +1441,7 @@ static int x264_slice_write( x264_t *h )
             h->stat.frame.i_mb_count_8x8dct[1] += h->mb.b_transform_8x8;
         }
 
-        x264_ratecontrol_mb( h, bs_pos(&h->out.bs) + x264_cabac_pos(&h->cabac) - mb_spos );
+        x264_ratecontrol_mb( h, mb_size );
 
         if( h->sh.b_mbaff )
         {
@@ -1359,7 +1450,7 @@ static int x264_slice_write( x264_t *h )
         }
         else
             i_mb_x++;
-        if(i_mb_x == h->sps->i_mb_width)
+        if( i_mb_x == h->sps->i_mb_width )
         {
             i_mb_y++;
             i_mb_x = 0;
@@ -1379,16 +1470,18 @@ static int x264_slice_write( x264_t *h )
         bs_rbsp_trailing( &h->out.bs );
         bs_flush( &h->out.bs );
     }
+    if( x264_nal_end( h ) )
+        return -1;
 
-    x264_nal_end( h );
-
-    x264_fdec_filter_row( h, h->sps->i_mb_height );
+    if( h->sh.i_last_mb == h->mb.i_mb_count-1 )
+    {
+        h->stat.frame.i_misc_bits = bs_pos( &h->out.bs )
+                                  + (h->out.i_nal*NALU_OVERHEAD * 8)
+                                  - h->stat.frame.i_tex_bits
+                                  - h->stat.frame.i_mv_bits;
+        x264_fdec_filter_row( h, h->sps->i_mb_height );
+    }
 
-    /* Compute misc bits */
-    h->stat.frame.i_misc_bits = bs_pos( &h->out.bs )
-                              + NALU_OVERHEAD * 8
-                              - h->stat.frame.i_tex_bits
-                              - h->stat.frame.i_mv_bits;
     return 0;
 }
 
@@ -1416,12 +1509,13 @@ static void x264_thread_sync_stat( x264_t *dst, x264_t *src )
 {
     if( dst == src )
         return;
-    memcpy( &dst->stat.i_slice_count, &src->stat.i_slice_count, sizeof(dst->stat) - sizeof(dst->stat.frame) );
+    memcpy( &dst->stat.i_frame_count, &src->stat.i_frame_count, sizeof(dst->stat) - sizeof(dst->stat.frame) );
 }
 
 static void *x264_slices_write( x264_t *h )
 {
-    int i_frame_size;
+    int i_frame_size = 0;
+    int i_slice_num = 0;
 
 #ifdef HAVE_MMX
     /* Misalign mask has to be set separately for each thread. */
@@ -1435,9 +1529,28 @@ static void *x264_slices_write( x264_t *h )
             return (void *)-1;
 #endif
 
-    if( x264_stack_align( x264_slice_write, h ) )
-        return (void *)-1;
-    i_frame_size = h->out.nal[h->out.i_nal-1].i_payload;
+    /* init stats */
+    memset( &h->stat.frame, 0, sizeof(h->stat.frame) );
+    h->mb.b_reencode_mb = 0;
+    while( h->sh.i_first_mb < h->mb.i_mb_count )
+    {
+        h->sh.i_last_mb = h->mb.i_mb_count - 1;
+        if( h->param.i_slice_max_mbs )
+            h->sh.i_last_mb = h->sh.i_first_mb + h->param.i_slice_max_mbs - 1;
+        else if( h->param.i_slice_count )
+        {
+            x264_emms();
+            i_slice_num++;
+            double height = h->sps->i_mb_height >> h->param.b_interlaced;
+            int width = h->sps->i_mb_width << h->param.b_interlaced;
+            h->sh.i_last_mb = (int)(height * i_slice_num / h->param.i_slice_count + 0.5) * width - 1;
+        }
+        h->sh.i_last_mb = X264_MIN( h->sh.i_last_mb, h->mb.i_mb_count - 1 );
+        if( x264_stack_align( x264_slice_write, h ) )
+            return (void *)-1;
+        h->sh.i_first_mb = h->sh.i_last_mb + 1;
+        i_frame_size += h->out.nal[h->out.i_nal-1].i_payload + NALU_OVERHEAD;
+    }
 
 #if VISUALIZE
     if( h->param.b_visualize )
@@ -1678,7 +1791,8 @@ int     x264_encoder_encode( x264_t *h,
         x264_nal_start(h, NAL_AUD, NAL_PRIORITY_DISPOSABLE);
         bs_write(&h->out.bs, 3, pic_type);
         bs_rbsp_trailing(&h->out.bs);
-        x264_nal_end(h);
+        if( x264_nal_end( h ) )
+            return -1;
     }
 
     h->i_nal_type = i_nal_type;
@@ -1693,18 +1807,21 @@ int     x264_encoder_encode( x264_t *h,
             x264_nal_start( h, NAL_SEI, NAL_PRIORITY_DISPOSABLE );
             if( x264_sei_version_write( h, &h->out.bs ) )
                 return -1;
-            x264_nal_end( h );
+            if( x264_nal_end( h ) )
+                return -1;
         }
 
         /* generate sequence parameters */
         x264_nal_start( h, NAL_SPS, NAL_PRIORITY_HIGHEST );
         x264_sps_write( &h->out.bs, h->sps );
-        x264_nal_end( h );
+        if( x264_nal_end( h ) )
+            return -1;
 
         /* generate picture parameters */
         x264_nal_start( h, NAL_PPS, NAL_PRIORITY_HIGHEST );
         x264_pps_write( &h->out.bs, h->pps );
-        x264_nal_end( h );
+        if( x264_nal_end( h ) )
+            return -1;
     }
 
     /* Write frame */
@@ -1781,9 +1898,9 @@ static int x264_encoder_frame_end( x264_t *h, x264_t *thread_current,
     x264_thread_sync_stat( h, h->thread[0] );
 
     /* Slice stat */
-    h->stat.i_slice_count[h->sh.i_type]++;
-    h->stat.i_slice_size[h->sh.i_type] += h->out.i_frame_size + NALU_OVERHEAD;
-    h->stat.f_slice_qp[h->sh.i_type] += h->fdec->f_qp_avg_aq;
+    h->stat.i_frame_count[h->sh.i_type]++;
+    h->stat.i_frame_size[h->sh.i_type] += h->out.i_frame_size + h->out.i_nal*NALU_OVERHEAD;
+    h->stat.f_frame_qp[h->sh.i_type] += h->fdec->f_qp_avg_aq;
 
     for( i = 0; i < X264_MBTYPE_MAX; i++ )
         h->stat.i_mb_count[h->sh.i_type][i] += h->stat.frame.i_mb_count[i];
@@ -1930,17 +2047,17 @@ void    x264_encoder_close  ( x264_t *h )
         static const char *slice_name[] = { "P", "B", "I", "SP", "SI" };
         int i_slice = slice_order[i];
 
-        if( h->stat.i_slice_count[i_slice] > 0 )
+        if( h->stat.i_frame_count[i_slice] > 0 )
         {
-            const int i_count = h->stat.i_slice_count[i_slice];
+            const int i_count = h->stat.i_frame_count[i_slice];
             if( h->param.analyse.b_psnr )
             {
                 x264_log( h, X264_LOG_INFO,
-                          "slice %s:%-5d Avg QP:%5.2f  size:%6.0f  PSNR Mean Y:%5.2f U:%5.2f V:%5.2f Avg:%5.2f Global:%5.2f\n",
+                          "frame %s:%-5d Avg QP:%5.2f  size:%6.0f  PSNR Mean Y:%5.2f U:%5.2f V:%5.2f Avg:%5.2f Global:%5.2f\n",
                           slice_name[i_slice],
                           i_count,
-                          h->stat.f_slice_qp[i_slice] / i_count,
-                          (double)h->stat.i_slice_size[i_slice] / i_count,
+                          h->stat.f_frame_qp[i_slice] / i_count,
+                          (double)h->stat.i_frame_size[i_slice] / i_count,
                           h->stat.f_psnr_mean_y[i_slice] / i_count, h->stat.f_psnr_mean_u[i_slice] / i_count, h->stat.f_psnr_mean_v[i_slice] / i_count,
                           h->stat.f_psnr_average[i_slice] / i_count,
                           x264_psnr( h->stat.i_ssd_global[i_slice], i_count * i_yuv_size ) );
@@ -1948,15 +2065,15 @@ void    x264_encoder_close  ( x264_t *h )
             else
             {
                 x264_log( h, X264_LOG_INFO,
-                          "slice %s:%-5d Avg QP:%5.2f  size:%6.0f\n",
+                          "frame %s:%-5d Avg QP:%5.2f  size:%6.0f\n",
                           slice_name[i_slice],
                           i_count,
-                          h->stat.f_slice_qp[i_slice] / i_count,
-                          (double)h->stat.i_slice_size[i_slice] / i_count );
+                          h->stat.f_frame_qp[i_slice] / i_count,
+                          (double)h->stat.i_frame_size[i_slice] / i_count );
             }
         }
     }
-    if( h->param.i_bframe && h->stat.i_slice_count[SLICE_TYPE_P] )
+    if( h->param.i_bframe && h->stat.i_frame_count[SLICE_TYPE_P] )
     {
         char *p = buf;
         int den = 0;
@@ -1976,17 +2093,17 @@ void    x264_encoder_close  ( x264_t *h )
         }
 
     /* MB types used */
-    if( h->stat.i_slice_count[SLICE_TYPE_I] > 0 )
+    if( h->stat.i_frame_count[SLICE_TYPE_I] > 0 )
     {
         int64_t *i_mb_count = h->stat.i_mb_count[SLICE_TYPE_I];
-        double i_count = h->stat.i_slice_count[SLICE_TYPE_I] * h->mb.i_mb_count / 100.0;
+        double i_count = h->stat.i_frame_count[SLICE_TYPE_I] * h->mb.i_mb_count / 100.0;
         x264_print_intra( i_mb_count, i_count, b_print_pcm, buf );
         x264_log( h, X264_LOG_INFO, "mb I  %s\n", buf );
     }
-    if( h->stat.i_slice_count[SLICE_TYPE_P] > 0 )
+    if( h->stat.i_frame_count[SLICE_TYPE_P] > 0 )
     {
         int64_t *i_mb_count = h->stat.i_mb_count[SLICE_TYPE_P];
-        double i_count = h->stat.i_slice_count[SLICE_TYPE_P] * h->mb.i_mb_count / 100.0;
+        double i_count = h->stat.i_frame_count[SLICE_TYPE_P] * h->mb.i_mb_count / 100.0;
         int64_t *i_mb_size = i_mb_count_size[SLICE_TYPE_P];
         x264_print_intra( i_mb_count, i_count, b_print_pcm, buf );
         x264_log( h, X264_LOG_INFO,
@@ -1999,10 +2116,10 @@ void    x264_encoder_close  ( x264_t *h )
                   i_mb_size[PIXEL_4x4] / (i_count*4),
                   i_mb_count[P_SKIP] / i_count );
     }
-    if( h->stat.i_slice_count[SLICE_TYPE_B] > 0 )
+    if( h->stat.i_frame_count[SLICE_TYPE_B] > 0 )
     {
         int64_t *i_mb_count = h->stat.i_mb_count[SLICE_TYPE_B];
-        double i_count = h->stat.i_slice_count[SLICE_TYPE_B] * h->mb.i_mb_count / 100.0;
+        double i_count = h->stat.i_frame_count[SLICE_TYPE_B] * h->mb.i_mb_count / 100.0;
         double i_mb_list_count;
         int64_t *i_mb_size = i_mb_count_size[SLICE_TYPE_B];
         int64_t list_count[3] = {0}; /* 0 == L0, 1 == L1, 2 == BI */
@@ -2035,7 +2152,7 @@ void    x264_encoder_close  ( x264_t *h )
 
     x264_ratecontrol_summary( h );
 
-    if( h->stat.i_slice_count[SLICE_TYPE_I] + h->stat.i_slice_count[SLICE_TYPE_P] + h->stat.i_slice_count[SLICE_TYPE_B] > 0 )
+    if( h->stat.i_frame_count[SLICE_TYPE_I] + h->stat.i_frame_count[SLICE_TYPE_P] + h->stat.i_frame_count[SLICE_TYPE_B] > 0 )
     {
 #define SUM3(p) (p[SLICE_TYPE_I] + p[SLICE_TYPE_P] + p[SLICE_TYPE_B])
 #define SUM3b(p,o) (p[SLICE_TYPE_I][o] + p[SLICE_TYPE_P][o] + p[SLICE_TYPE_B][o])
@@ -2043,12 +2160,12 @@ void    x264_encoder_close  ( x264_t *h )
         int64_t i_intra = i_i8x8 + SUM3b( h->stat.i_mb_count, I_4x4 )
                                  + SUM3b( h->stat.i_mb_count, I_16x16 );
         int64_t i_all_intra = i_intra + SUM3b( h->stat.i_mb_count, I_PCM);
-        const int i_count = h->stat.i_slice_count[SLICE_TYPE_I] +
-                            h->stat.i_slice_count[SLICE_TYPE_P] +
-                            h->stat.i_slice_count[SLICE_TYPE_B];
+        const int i_count = h->stat.i_frame_count[SLICE_TYPE_I] +
+                            h->stat.i_frame_count[SLICE_TYPE_P] +
+                            h->stat.i_frame_count[SLICE_TYPE_B];
         int64_t i_mb_count = i_count * h->mb.i_mb_count;
         float fps = (float) h->param.i_fps_num / h->param.i_fps_den;
-        float f_bitrate = fps * SUM3(h->stat.i_slice_size) / i_count / 125;
+        float f_bitrate = fps * SUM3(h->stat.i_frame_size) / i_count / 125;
 
         if( h->pps->b_transform_8x8_mode )
         {
@@ -2058,11 +2175,11 @@ void    x264_encoder_close  ( x264_t *h )
         }
 
         if( h->param.analyse.i_direct_mv_pred == X264_DIRECT_PRED_AUTO
-            && h->stat.i_slice_count[SLICE_TYPE_B] )
+            && h->stat.i_frame_count[SLICE_TYPE_B] )
         {
             x264_log( h, X264_LOG_INFO, "direct mvs  spatial:%.1f%%  temporal:%.1f%%\n",
-                      h->stat.i_direct_frames[1] * 100. / h->stat.i_slice_count[SLICE_TYPE_B],
-                      h->stat.i_direct_frames[0] * 100. / h->stat.i_slice_count[SLICE_TYPE_B] );
+                      h->stat.i_direct_frames[1] * 100. / h->stat.i_frame_count[SLICE_TYPE_B],
+                      h->stat.i_direct_frames[0] * 100. / h->stat.i_frame_count[SLICE_TYPE_B] );
         }
 
         x264_log( h, X264_LOG_INFO, "coded y,uvDC,uvAC intra:%.1f%% %.1f%% %.1f%% inter:%.1f%% %.1f%% %.1f%%\n",
@@ -2168,6 +2285,7 @@ void    x264_encoder_close  ( x264_t *h )
 
         x264_macroblock_cache_end( h->thread[i] );
         x264_free( h->thread[i]->out.p_bitstream );
+        x264_free( h->thread[i]->out.nal);
         x264_free( h->thread[i] );
     }
 }
index d33b00b6b8b24095dee1ed9acee44517df63f576..ca19d64a82460870158794fe1ea0a4f4f1b6323c 100644 (file)
@@ -27,8 +27,6 @@
 #define _ISOC99_SOURCE
 #undef NDEBUG // always check asserts, the speed effect is far too small to disable them
 #include <math.h>
-#include <limits.h>
-#include <assert.h>
 
 #include "common/common.h"
 #include "common/cpu.h"
@@ -1052,7 +1050,7 @@ void x264_ratecontrol_mb( x264_t *h, int bits )
         update_predictor( rc->row_pred, qp2qscale(rc->qpm), h->fdec->i_row_satd[y], h->fdec->i_row_bits[y] );
 
         /* tweak quality based on difference from predicted size */
-        if( y < h->sps->i_mb_height-1 && h->stat.i_slice_count[h->sh.i_type] > 0 )
+        if( y < h->sps->i_mb_height-1 && h->stat.i_frame_count[h->sh.i_type] > 0 )
         {
             int prev_row_qp = h->fdec->i_row_qp[y];
             int b0 = predict_row_size_sum( h, y, rc->qpm );
@@ -1127,8 +1125,8 @@ int x264_ratecontrol_slice_type( x264_t *h, int frame_num )
              * So just calculate the average QP used so far. */
             int i;
 
-            h->param.rc.i_qp_constant = (h->stat.i_slice_count[SLICE_TYPE_P] == 0) ? 24
-                                      : 1 + h->stat.f_slice_qp[SLICE_TYPE_P] / h->stat.i_slice_count[SLICE_TYPE_P];
+            h->param.rc.i_qp_constant = (h->stat.i_frame_count[SLICE_TYPE_P] == 0) ? 24
+                                      : 1 + h->stat.f_frame_qp[SLICE_TYPE_P] / h->stat.i_frame_count[SLICE_TYPE_P];
             rc->qp_constant[SLICE_TYPE_P] = x264_clip3( h->param.rc.i_qp_constant, 0, 51 );
             rc->qp_constant[SLICE_TYPE_I] = x264_clip3( (int)( qscale2qp( qp2qscale( h->param.rc.i_qp_constant ) / fabs( h->param.rc.f_ip_factor )) + 0.5 ), 0, 51 );
             rc->qp_constant[SLICE_TYPE_B] = x264_clip3( (int)( qscale2qp( qp2qscale( h->param.rc.i_qp_constant ) * fabs( h->param.rc.f_pb_factor )) + 0.5 ), 0, 51 );
@@ -1572,9 +1570,9 @@ static float rate_estimate_qscale( x264_t *h )
     int pict_type = h->sh.i_type;
     double lmin = rcc->lmin[pict_type];
     double lmax = rcc->lmax[pict_type];
-    int64_t total_bits = 8*(h->stat.i_slice_size[SLICE_TYPE_I]
-                          + h->stat.i_slice_size[SLICE_TYPE_P]
-                          + h->stat.i_slice_size[SLICE_TYPE_B]);
+    int64_t total_bits = 8*(h->stat.i_frame_size[SLICE_TYPE_I]
+                          + h->stat.i_frame_size[SLICE_TYPE_P]
+                          + h->stat.i_frame_size[SLICE_TYPE_B]);
 
     if( rcc->b_2pass )
     {
index 570d2aaa549b52ade316be27674abf8a3fd3846e..af74427de5655644b76c914960072218c0bcdbb4 100644 (file)
@@ -22,7 +22,6 @@
  *****************************************************************************/
 
 #include <math.h>
-#include <limits.h>
 
 #include "common/common.h"
 #include "common/cpu.h"
diff --git a/x264.c b/x264.c
index 32e65705129ddd857f58e8d72763ab0d9f475629..b7083c0293d44e98c372fd90bbaf2d118a4c21a4 100644 (file)
--- a/x264.c
+++ b/x264.c
@@ -196,6 +196,10 @@ static void Help( x264_param_t *defaults, int b_longhelp )
     H1( "      --no-deblock            Disable loop filter\n" );
     H0( "  -f, --deblock <alpha:beta>  Loop filter AlphaC0 and Beta parameters [%d:%d]\n",
                                        defaults->i_deblocking_filter_alphac0, defaults->i_deblocking_filter_beta );
+    H1( "      --slices <integer>      Number of slices per frame; forces rectangular\n"
+        "                              slices and is overridden by other slicing options\n" );
+    H1( "      --slice-max-size <integer> Limit the size of each slice in bytes\n");
+    H1( "      --slice-max-mbs <integer> Limit the size of each slice in macroblocks\n");
     H0( "      --interlaced            Enable pure-interlaced mode\n" );
     H0( "\n" );
     H0( "Ratecontrol:\n" );
@@ -459,6 +463,9 @@ static struct option long_options[] =
     { "zones",       required_argument, NULL, 0 },
     { "qpfile",      required_argument, NULL, OPT_QPFILE },
     { "threads",     required_argument, NULL, 0 },
+    { "slice-max-size",    required_argument, NULL, 0 },
+    { "slice-max-mbs",     required_argument, NULL, 0 },
+    { "slices",            required_argument, NULL, 0 },
     { "thread-input",      no_argument, NULL, OPT_THREAD_INPUT },
     { "non-deterministic", no_argument, NULL, 0 },
     { "psnr",              no_argument, NULL, 0 },
@@ -1030,13 +1037,13 @@ static void parse_qpfile( cli_opt_t *opt, x264_picture_t *pic, int i_frame )
     {
         file_pos = ftell( opt->qpfile );
         ret = fscanf( opt->qpfile, "%d %c %d\n", &num, &type, &qp );
-               if( num > i_frame || ret == EOF )
-               {
-                       pic->i_type = X264_TYPE_AUTO;
-                       pic->i_qpplus1 = 0;
-                       fseek( opt->qpfile , file_pos , SEEK_SET );
-                       break;
-               }
+        if( num > i_frame || ret == EOF )
+        {
+            pic->i_type = X264_TYPE_AUTO;
+            pic->i_qpplus1 = 0;
+            fseek( opt->qpfile , file_pos , SEEK_SET );
+            break;
+        }
         if( num < i_frame && ret == 3 )
             continue;
         pic->i_qpplus1 = qp+1;
diff --git a/x264.h b/x264.h
index 7fa508d1eec21675957039497d2232b6ff52d1d6..6609a32b3874d150071b66608e1b5febd097b1ff 100644 (file)
--- a/x264.h
+++ b/x264.h
@@ -35,7 +35,7 @@
 
 #include <stdarg.h>
 
-#define X264_BUILD 72
+#define X264_BUILD 73
 
 /* x264_t:
  *      opaque handler for encoder */
@@ -298,6 +298,11 @@ typedef struct x264_param_t
     int b_repeat_headers;       /* put SPS/PPS before each keyframe */
     int i_sps_id;               /* SPS and PPS id number */
 
+    /* Slicing parameters */
+    int i_slice_max_size;    /* Max size per slice in bytes; includes estimated NAL overhead. */
+    int i_slice_max_mbs;     /* Max number of MBs per slice; overrides i_slice_count. */
+    int i_slice_count;       /* Number of slices per frame: forces rectangular slices. */
+
     /* Optional callback for freeing this x264_param_t when it is done being used.
      * Only used when the x264_param_t sits in memory for an indefinite period of time,
      * i.e. when an x264_param_t is passed to x264_t in an x264_picture_t or in zones.