From: Fiona Glaser Date: Thu, 17 Sep 2009 03:00:00 +0000 (-0700) Subject: Major API change: encapsulate NALs within libx264 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=7a0fbed78235a63bf8008d282f5db64ef1f3f2ec;p=libx264 Major API change: encapsulate NALs within libx264 libx264 now returns NAL units instead of raw data. x264_nal_encode is no longer a public function. See x264.h for full documentation of changes. New parameter: b_annexb, on by default. If disabled, startcodes are replaced by sizes as in mp4. x264's VBV now works on a NAL level, taking into account escape codes. VBV will also take into account the bit cost of SPS/PPS, but only if b_repeat_headers is set. Add an overhead tracking system to VBV to better predict the constant overhead of frames (headers, NALU overhead, etc). --- diff --git a/common/common.c b/common/common.c index 46173870..648c75a2 100644 --- a/common/common.c +++ b/common/common.c @@ -151,6 +151,7 @@ void x264_param_default( x264_param_t *param ) memset( param->cqm_8py, 16, 64 ); param->b_repeat_headers = 1; + param->b_annexb = 1; param->b_aud = 0; } @@ -603,6 +604,8 @@ int x264_param_parse( x264_param_t *p, const char *name, const char *value ) p->b_repeat_headers = !atobool(value); OPT("repeat-headers") p->b_repeat_headers = atobool(value); + OPT("annexb") + p->b_annexb = atobool(value); else return X264_PARAM_BAD_NAME; #undef OPT @@ -695,23 +698,23 @@ void x264_picture_clean( x264_picture_t *pic ) /**************************************************************************** * x264_nal_encode: ****************************************************************************/ -int x264_nal_encode( void *p_data, int *pi_data, int b_annexeb, x264_nal_t *nal ) +int x264_nal_encode( uint8_t *dst, int b_annexb, x264_nal_t *nal ) { - uint8_t *dst = p_data; uint8_t *src = nal->p_payload; - uint8_t *end = &nal->p_payload[nal->i_payload]; - int i_count = 0; + uint8_t *end = nal->p_payload + nal->i_payload; + uint8_t *orig_dst = dst; + int i_count = 0, size; - /* FIXME this code doesn't check overflow */ - - if( b_annexeb ) + /* long nal start code (we always use long ones) */ + if( b_annexb ) { - /* long nal start code (we always use long ones)*/ *dst++ = 0x00; *dst++ = 0x00; *dst++ = 0x00; *dst++ = 0x01; } + else /* save room for size later */ + dst += 4; /* nal header */ *dst++ = ( 0x00 << 7 ) | ( nal->i_ref_idc << 5 ) | nal->i_type; @@ -729,9 +732,19 @@ int x264_nal_encode( void *p_data, int *pi_data, int b_annexeb, x264_nal_t *nal i_count = 0; *dst++ = *src++; } - *pi_data = dst - (uint8_t*)p_data; + size = (dst - orig_dst) - 4; + + /* Write the size header for mp4/etc */ + if( !b_annexb ) + { + /* Size doesn't include the size of the header we're writing now. */ + orig_dst[0] = size>>24; + orig_dst[1] = size>>16; + orig_dst[2] = size>> 8; + orig_dst[3] = size>> 0; + } - return *pi_data; + return size+4; } diff --git a/common/common.h b/common/common.h index 6271bc59..d75a7d8a 100644 --- a/common/common.h +++ b/common/common.h @@ -81,7 +81,7 @@ do {\ #include "quant.h" /**************************************************************************** - * Generals functions + * General functions ****************************************************************************/ /* x264_malloc : will do or emulate a memalign * you have to use x264_free for buffers allocated with x264_malloc */ @@ -98,6 +98,8 @@ int64_t x264_mdate( void ); * the encoding options */ char *x264_param2string( x264_param_t *p, int b_res ); +int x264_nal_encode( uint8_t *dst, int b_annexb, x264_nal_t *nal ); + /* log */ void x264_log( x264_t *h, int i_level, const char *psz_fmt, ... ); @@ -312,9 +314,11 @@ struct x264_t int i_bitstream; /* size of p_bitstream */ uint8_t *p_bitstream; /* will hold data for all nal */ bs_t bs; - int i_frame_size; } out; + uint8_t *nal_buffer; + int nal_buffer_size; + /**** thread synchronization starts here ****/ /* frame number/poc */ diff --git a/encoder/encoder.c b/encoder/encoder.c index 8e614a2b..132e9de6 100644 --- a/encoder/encoder.c +++ b/encoder/encoder.c @@ -880,6 +880,9 @@ x264_t *x264_encoder_open( x264_param_t *param ) * ( h->param.rc.i_rc_method == X264_RC_ABR ? pow( 0.95, h->param.rc.i_qp_min ) : pow( 0.95, h->param.rc.i_qp_constant ) * X264_MAX( 1, h->param.rc.f_ip_factor ))); + CHECKED_MALLOC( h->nal_buffer, h->out.i_bitstream * 3/2 + 4 ); + h->nal_buffer_size = h->out.i_bitstream * 3/2 + 4; + h->thread[0] = h; h->i_thread_num = 0; for( i = 1; i < h->param.i_threads + !!h->param.i_sync_lookahead; i++ ) @@ -1013,44 +1016,73 @@ static int x264_nal_end( x264_t *h ) return 0; } +static int x264_encoder_encapsulate_nals( x264_t *h ) +{ + int nal_size = 0, i; + for( i = 0; i < h->out.i_nal; i++ ) + nal_size += h->out.nal[i].i_payload; + + /* Worst-case NAL unit escaping: reallocate the buffer if it's too small. */ + if( h->nal_buffer_size < nal_size * 3/2 + h->out.i_nal * 4 ) + { + uint8_t *buf = x264_malloc( nal_size * 2 + h->out.i_nal * 4 ); + if( !buf ) + return -1; + x264_free( h->nal_buffer ); + h->nal_buffer = buf; + } + + uint8_t *nal_buffer = h->nal_buffer; + + for( i = 0; i < h->out.i_nal; i++ ) + { + int size = x264_nal_encode( nal_buffer, h->param.b_annexb, &h->out.nal[i] ); + h->out.nal[i].i_payload = size; + h->out.nal[i].p_payload = nal_buffer; + nal_buffer += size; + } + + return nal_buffer - h->nal_buffer; +} + /**************************************************************************** * x264_encoder_headers: ****************************************************************************/ int x264_encoder_headers( x264_t *h, x264_nal_t **pp_nal, int *pi_nal ) { + int frame_size = 0; /* init bitstream context */ h->out.i_nal = 0; bs_init( &h->out.bs, h->out.p_bitstream, h->out.i_bitstream ); - /* Put SPS and PPS */ - if( h->i_frame == 0 ) - { - /* identify ourself */ - x264_nal_start( h, NAL_SEI, NAL_PRIORITY_DISPOSABLE ); - if( x264_sei_version_write( h, &h->out.bs ) ) - return -1; - if( x264_nal_end( h ) ) - return -1; + /* Write SEI, SPS and PPS. */ + x264_nal_start( h, NAL_SEI, NAL_PRIORITY_DISPOSABLE ); + if( x264_sei_version_write( h, &h->out.bs ) ) + return -1; + if( x264_nal_end( h ) ) + return -1; - /* generate sequence parameters */ - x264_nal_start( h, NAL_SPS, NAL_PRIORITY_HIGHEST ); - x264_sps_write( &h->out.bs, h->sps ); - if( x264_nal_end( h ) ) - return -1; + /* generate sequence parameters */ + x264_nal_start( h, NAL_SPS, NAL_PRIORITY_HIGHEST ); + x264_sps_write( &h->out.bs, h->sps ); + if( x264_nal_end( h ) ) + return -1; + + /* generate picture parameters */ + x264_nal_start( h, NAL_PPS, NAL_PRIORITY_HIGHEST ); + x264_pps_write( &h->out.bs, h->pps ); + if( x264_nal_end( h ) ) + return -1; + bs_flush( &h->out.bs ); + + frame_size = x264_encoder_encapsulate_nals( h ); - /* generate picture parameters */ - x264_nal_start( h, NAL_PPS, NAL_PRIORITY_HIGHEST ); - x264_pps_write( &h->out.bs, h->pps ); - if( x264_nal_end( h ) ) - return -1; - bs_flush( &h->out.bs ); - } /* now set output*/ *pi_nal = h->out.i_nal; *pp_nal = &h->out.nal[0]; h->out.i_nal = 0; - return 0; + return frame_size; } static inline void x264_reference_build_list( x264_t *h, int i_poc ) @@ -1517,7 +1549,6 @@ static void x264_thread_sync_stat( x264_t *dst, x264_t *src ) static void *x264_slices_write( x264_t *h ) { - int i_frame_size = 0; int i_slice_num = 0; if( h->param.i_sync_lookahead ) x264_lower_thread_priority( 10 ); @@ -1554,7 +1585,6 @@ static void *x264_slices_write( x264_t *h ) if( x264_stack_align( x264_slice_write, h ) ) return (void *)-1; h->sh.i_first_mb = h->sh.i_last_mb + 1; - i_frame_size += h->out.nal[h->out.i_nal-1].i_payload + NALU_OVERHEAD; } #if VISUALIZE @@ -1565,7 +1595,6 @@ static void *x264_slices_write( x264_t *h ) } #endif - h->out.i_frame_size = i_frame_size; return (void *)0; } @@ -1737,19 +1766,9 @@ int x264_encoder_encode( x264_t *h, /* build ref list 0/1 */ x264_reference_build_list( h, h->fdec->i_poc ); - /* Init the rate control */ - x264_ratecontrol_start( h, h->fenc->i_qpplus1 ); - i_global_qp = x264_ratecontrol_qp( h ); - - pic_out->i_qpplus1 = - h->fdec->i_qpplus1 = i_global_qp + 1; - if( h->sh.i_type == SLICE_TYPE_B ) x264_macroblock_bipred_init( h ); - /* ------------------------ Create slice header ----------------------- */ - x264_slice_init( h, i_nal_type, i_global_qp ); - if( i_nal_ref_idc != NAL_PRIORITY_DISPOSABLE ) h->i_frame_num++; @@ -1780,6 +1799,8 @@ int x264_encoder_encode( x264_t *h, h->i_nal_type = i_nal_type; h->i_nal_ref_idc = i_nal_ref_idc; + int overhead = NALU_OVERHEAD; + /* Write SPS and PPS */ if( i_nal_type == NAL_SLICE_IDR && h->param.b_repeat_headers ) { @@ -1791,6 +1812,7 @@ int x264_encoder_encode( x264_t *h, return -1; if( x264_nal_end( h ) ) return -1; + overhead += h->out.nal[h->out.i_nal-1].i_payload + NALU_OVERHEAD; } /* generate sequence parameters */ @@ -1798,14 +1820,27 @@ int x264_encoder_encode( x264_t *h, x264_sps_write( &h->out.bs, h->sps ); if( x264_nal_end( h ) ) return -1; + overhead += h->out.nal[h->out.i_nal-1].i_payload + NALU_OVERHEAD; /* generate picture parameters */ x264_nal_start( h, NAL_PPS, NAL_PRIORITY_HIGHEST ); x264_pps_write( &h->out.bs, h->pps ); if( x264_nal_end( h ) ) return -1; + overhead += h->out.nal[h->out.i_nal-1].i_payload + NALU_OVERHEAD; } + /* Init the rate control */ + /* FIXME: Include slice header bit cost. */ + x264_ratecontrol_start( h, h->fenc->i_qpplus1, overhead*8 ); + i_global_qp = x264_ratecontrol_qp( h ); + + pic_out->i_qpplus1 = + h->fdec->i_qpplus1 = i_global_qp + 1; + + /* ------------------------ Create slice header ----------------------- */ + x264_slice_init( h, i_nal_type, i_global_qp ); + /* Write frame */ if( h->param.i_threads > 1 ) { @@ -1824,7 +1859,7 @@ static int x264_encoder_frame_end( x264_t *h, x264_t *thread_current, x264_nal_t **pp_nal, int *pi_nal, x264_picture_t *pic_out ) { - int i, i_list; + int i, i_list, frame_size; char psz_message[80]; if( h->b_thread_active ) @@ -1846,6 +1881,9 @@ static int x264_encoder_frame_end( x264_t *h, x264_t *thread_current, /* End bitstream, set output */ *pi_nal = h->out.i_nal; *pp_nal = h->out.nal; + + frame_size = x264_encoder_encapsulate_nals( h ); + h->out.i_nal = 0; /* Set output picture properties */ @@ -1868,12 +1906,9 @@ static int x264_encoder_frame_end( x264_t *h, x264_t *thread_current, /* update rc */ x264_emms(); - if( x264_ratecontrol_end( h, h->out.i_frame_size * 8 ) < 0 ) + if( x264_ratecontrol_end( h, frame_size * 8 ) < 0 ) return -1; - /* restore CPU state (before using float again) */ - x264_emms(); - x264_noise_reduction_update( thread_current ); /* ---------------------- Compute/Print statistics --------------------- */ @@ -1881,7 +1916,7 @@ static int x264_encoder_frame_end( x264_t *h, x264_t *thread_current, /* Slice stat */ h->stat.i_frame_count[h->sh.i_type]++; - h->stat.i_frame_size[h->sh.i_type] += h->out.i_frame_size + h->out.i_nal*NALU_OVERHEAD; + h->stat.i_frame_size[h->sh.i_type] += frame_size; h->stat.f_frame_qp[h->sh.i_type] += h->fdec->f_qp_avg_aq; for( i = 0; i < X264_MBTYPE_MAX; i++ ) @@ -1955,7 +1990,7 @@ static int x264_encoder_frame_end( x264_t *h, x264_t *thread_current, h->stat.frame.i_mb_count_i, h->stat.frame.i_mb_count_p, h->stat.frame.i_mb_count_skip, - h->out.i_frame_size, + frame_size, psz_message ); // keep stats all in one place @@ -1984,7 +2019,7 @@ static int x264_encoder_frame_end( x264_t *h, x264_t *thread_current, if( h->param.psz_dump_yuv ) x264_frame_dump( h ); - return 0; + return frame_size; } static void x264_print_intra( int64_t *i_mb_count, double i_count, int b_print_pcm, char *intra ) @@ -2214,7 +2249,7 @@ void x264_encoder_close ( x264_t *h ) f_bitrate ); } else - x264_log( h, X264_LOG_INFO, "kb/s:%.1f\n", f_bitrate ); + x264_log( h, X264_LOG_INFO, "kb/s:%.2f\n", f_bitrate ); } /* rc */ diff --git a/encoder/ratecontrol.c b/encoder/ratecontrol.c index 6944a50d..c6b9f6d2 100644 --- a/encoder/ratecontrol.c +++ b/encoder/ratecontrol.c @@ -141,9 +141,9 @@ struct x264_ratecontrol_t static int parse_zones( x264_t *h ); static int init_pass2(x264_t *); -static float rate_estimate_qscale( x264_t *h ); +static float rate_estimate_qscale( x264_t *h, int overhead ); static void update_vbv( x264_t *h, int bits ); -static void update_vbv_plan( x264_t *h ); +static void update_vbv_plan( x264_t *h, int overhead ); static double predict_size( predictor_t *p, double q, double var ); static void update_predictor( predictor_t *p, double q, double var, double bits ); @@ -887,7 +887,7 @@ static void accum_p_qp_update( x264_t *h, float qp ) } /* Before encoding a frame, choose a QP for it */ -void x264_ratecontrol_start( x264_t *h, int i_force_qp ) +void x264_ratecontrol_start( x264_t *h, int i_force_qp, int overhead ) { x264_ratecontrol_t *rc = h->rc; ratecontrol_entry_t *rce = NULL; @@ -920,7 +920,7 @@ void x264_ratecontrol_start( x264_t *h, int i_force_qp ) { memset( h->fdec->i_row_bits, 0, h->sps->i_mb_height * sizeof(int) ); rc->row_pred = &rc->row_preds[h->sh.i_type]; - update_vbv_plan( h ); + update_vbv_plan( h, overhead ); } if( h->sh.i_type != SLICE_TYPE_B ) @@ -932,11 +932,11 @@ void x264_ratecontrol_start( x264_t *h, int i_force_qp ) } else if( rc->b_abr ) { - q = qscale2qp( rate_estimate_qscale( h ) ); + q = qscale2qp( rate_estimate_qscale( h, overhead ) ); } else if( rc->b_2pass ) { - rce->new_qscale = rate_estimate_qscale( h ); + rce->new_qscale = rate_estimate_qscale( h, overhead ); q = qscale2qp( rce->new_qscale ); } else /* CQP */ @@ -1414,10 +1414,10 @@ static void update_vbv( x264_t *h, int bits ) } // provisionally update VBV according to the planned size of all frames currently in progress -static void update_vbv_plan( x264_t *h ) +static void update_vbv_plan( x264_t *h, int overhead ) { x264_ratecontrol_t *rcc = h->rc; - rcc->buffer_fill = h->thread[0]->rc->buffer_fill_final; + rcc->buffer_fill = h->thread[0]->rc->buffer_fill_final;// - overhead; if( h->param.i_threads > 1 ) { int j = h->rc - h->thread[0]->rc; @@ -1562,7 +1562,7 @@ static double clip_qscale( x264_t *h, int pict_type, double q ) } // update qscale for 1 frame based on actual bits used so far -static float rate_estimate_qscale( x264_t *h ) +static float rate_estimate_qscale( x264_t *h, int overhead ) { float q; x264_ratecontrol_t *rcc = h->rc; @@ -1615,7 +1615,7 @@ static float rate_estimate_qscale( x264_t *h ) else q += rcc->pb_offset; - rcc->frame_size_planned = predict_size( rcc->pred_b_from_p, q, h->fref1[h->i_ref1-1]->i_satd ); + rcc->frame_size_planned = predict_size( rcc->pred_b_from_p, q, h->fref1[h->i_ref1-1]->i_satd ) + overhead; x264_ratecontrol_set_estimated_size(h, rcc->frame_size_planned); rcc->last_satd = 0; return qp2qscale(q); @@ -1778,7 +1778,7 @@ static float rate_estimate_qscale( x264_t *h ) if( rcc->b_2pass && rcc->b_vbv ) rcc->frame_size_planned = qscale2bits(&rce, q); else - rcc->frame_size_planned = predict_size( &rcc->pred[h->sh.i_type], q, rcc->last_satd ); + rcc->frame_size_planned = predict_size( &rcc->pred[h->sh.i_type], q, rcc->last_satd ) + overhead; x264_ratecontrol_set_estimated_size(h, rcc->frame_size_planned); return q; } diff --git a/encoder/ratecontrol.h b/encoder/ratecontrol.h index ed8abab7..36a174dd 100644 --- a/encoder/ratecontrol.h +++ b/encoder/ratecontrol.h @@ -31,7 +31,7 @@ void x264_adaptive_quant_frame( x264_t *h, x264_frame_t *frame ); void x264_adaptive_quant( x264_t * ); int x264_macroblock_tree_read( x264_t *h, x264_frame_t *frame ); void x264_thread_sync_ratecontrol( x264_t *cur, x264_t *prev, x264_t *next ); -void x264_ratecontrol_start( x264_t *, int i_force_qp ); +void x264_ratecontrol_start( x264_t *, int i_force_qp, int overhead ); int x264_ratecontrol_slice_type( x264_t *, int i_frame ); void x264_ratecontrol_mb( x264_t *, int bits ); int x264_ratecontrol_qp( x264_t * ); diff --git a/x264.c b/x264.c index 99c56829..3ef1b980 100644 --- a/x264.c +++ b/x264.c @@ -1112,20 +1112,7 @@ static int Encode_frame( x264_t *h, hnd_t hout, x264_picture_t *pic ) for( i = 0; i < i_nal; i++ ) { - int i_size; - - if( mux_buffer_size < nal[i].i_payload * 3/2 + 4 ) - { - mux_buffer_size = nal[i].i_payload * 2 + 4; - x264_free( mux_buffer ); - mux_buffer = x264_malloc( mux_buffer_size ); - if( !mux_buffer ) - return -1; - } - - i_size = mux_buffer_size; - x264_nal_encode( mux_buffer, &i_size, 1, &nal[i] ); - i_nalu_size = p_write_nalu( hout, mux_buffer, i_size ); + i_nalu_size = p_write_nalu( hout, nal[i].p_payload, nal[i].i_payload ); if( i_nalu_size < 0 ) return -1; i_file += i_nalu_size; diff --git a/x264.h b/x264.h index 66f4f282..ec9321c2 100644 --- a/x264.h +++ b/x264.h @@ -35,7 +35,7 @@ #include -#define X264_BUILD 75 +#define X264_BUILD 76 /* x264_t: * opaque handler for encoder */ @@ -139,7 +139,7 @@ static const char * const x264_colmatrix_names[] = { "GBR", "bt709", "undef", "" /* Threading */ #define X264_THREADS_AUTO 0 /* Automatically select optimal number of threads */ -#define X264_SYNC_LOOKAHEAD_AUTO -1 /* Automatically select optimal lookahead thread buffer size */ +#define X264_SYNC_LOOKAHEAD_AUTO (-1) /* Automatically select optimal lookahead thread buffer size */ /* Zones: override ratecontrol or other options for specific sections of the video. * See x264_encoder_reconfig() for which options can be changed. @@ -298,6 +298,8 @@ typedef struct x264_param_t /* Muxing parameters */ int b_aud; /* generate access unit delimiters */ int b_repeat_headers; /* put SPS/PPS before each keyframe */ + int b_annexb; /* if set, place start codes (4 bytes) before NAL units, + * otherwise place size (4 bytes) before NAL units. */ int i_sps_id; /* SPS and PPS id number */ /* Slicing parameters */ @@ -418,22 +420,24 @@ enum nal_priority_e NAL_PRIORITY_HIGHEST = 3, }; +/* The data within the payload is already NAL-encapsulated; the ref_idc and type + * are merely in the struct for easy access by the calling application. + * All data returned in an x264_nal_t, including the data in p_payload, is no longer + * valid after the next call to x264_encoder_encode. Thus it must be used or copied + * before calling x264_encoder_encode or x264_encoder_headers again. */ typedef struct { int i_ref_idc; /* nal_priority_e */ int i_type; /* nal_unit_type_e */ - /* This data are raw payload */ + /* Size of payload in bytes. */ int i_payload; + /* If param->b_annexb is set, Annex-B bytestream with 4-byte startcode. + * Otherwise, startcode is replaced with a 4-byte size. + * This size is the size used in mp4/similar muxing; it is equal to i_payload-4 */ uint8_t *p_payload; } x264_nal_t; -/* x264_nal_encode: - * encode a nal into a buffer, setting the size. - * if b_annexeb then a long synch work is added - * XXX: it currently doesn't check for overflow */ -int x264_nal_encode( void *, int *, int b_annexeb, x264_nal_t *nal ); - /**************************************************************************** * Encoder functions: ****************************************************************************/ @@ -453,13 +457,20 @@ x264_t *x264_encoder_open( x264_param_t * ); * analysis-related parameters from x264_param_t are copied. * this takes effect immediately, on whichever frame is encoded next; * due to delay, this may not be the next frame passed to encoder_encode. - * if the change should apply to some particular frame, use x264_picture_t->param instead. */ + * if the change should apply to some particular frame, use x264_picture_t->param instead. + * returns 0 on success, negative on parameter validation error. */ int x264_encoder_reconfig( x264_t *, x264_param_t * ); /* x264_encoder_headers: - * return the SPS and PPS that will be used for the whole stream */ + * return the SPS and PPS that will be used for the whole stream. + * if i_nal > 0, returns the total size of all NAL payloads. + * returns negative on error. + * the payloads of all output NALs are guaranteed to be sequential in memory. */ int x264_encoder_headers( x264_t *, x264_nal_t **, int * ); /* x264_encoder_encode: - * encode one picture */ + * encode one picture. + * if i_nal > 0, returns the total size of all NAL payloads. + * returns negative on error, zero if no NAL units returned. + * the payloads of all output NALs are guaranteed to be sequential in memory. */ int x264_encoder_encode ( x264_t *, x264_nal_t **, int *, x264_picture_t *, x264_picture_t * ); /* x264_encoder_close: * close an encoder handler */