libhb: Allow control of audio mix normalization
authorjstebbins <jstebbins.hb@gmail.com>
Wed, 1 Aug 2012 23:05:00 +0000 (23:05 +0000)
committerjstebbins <jstebbins.hb@gmail.com>
Wed, 1 Aug 2012 23:05:00 +0000 (23:05 +0000)
Since switching to libavresample for audio mixing, our output
volume levels have been reduced because libavresample
does mix level normalization by default.  This change
applies a patch to libav to allow us to disable this behavior
and adds a new field to hb_audio_config_t to allow the
hb frontends to control this feature.

git-svn-id: svn://svn.handbrake.fr/HandBrake/trunk@4884 b64f7644-9d1e-0410-96f1-a4d463321fa5

contrib/ffmpeg/A06-mix-normalization.patch [new file with mode: 0644]
libhb/audio_resample.c
libhb/audio_resample.h
libhb/common.c
libhb/common.h
libhb/decavcodec.c
libhb/declpcm.c
libhb/encavcodecaudio.c
test/test.c

diff --git a/contrib/ffmpeg/A06-mix-normalization.patch b/contrib/ffmpeg/A06-mix-normalization.patch
new file mode 100644 (file)
index 0000000..06fb275
--- /dev/null
@@ -0,0 +1,39 @@
+diff --git a/libavresample/audio_mix.c b/libavresample/audio_mix.c
+index 2c2a356..25f9f98 100644
+--- a/libavresample/audio_mix.c
++++ b/libavresample/audio_mix.c
+@@ -327,7 +327,9 @@ int ff_audio_mix_init(AVAudioResampleContext *avr)
+                                       avr->out_channel_layout,
+                                       avr->center_mix_level,
+                                       avr->surround_mix_level,
+-                                      avr->lfe_mix_level, 1, matrix_dbl,
++                                      avr->lfe_mix_level,
++                                      avr->normalize_mix_level,
++                                      matrix_dbl,
+                                       avr->in_channels,
+                                       avr->matrix_encoding);
+         if (ret < 0) {
+diff --git a/libavresample/internal.h b/libavresample/internal.h
+index 7b7648f..006b6fd 100644
+--- a/libavresample/internal.h
++++ b/libavresample/internal.h
+@@ -45,6 +45,7 @@ struct AVAudioResampleContext {
+     double center_mix_level;                    /**< center mix level       */
+     double surround_mix_level;                  /**< surround mix level     */
+     double lfe_mix_level;                       /**< lfe mix level          */
++    int normalize_mix_level;                    /**< enable mix level normalization */
+     int force_resampling;                       /**< force resampling       */
+     int filter_size;                            /**< length of each FIR filter in the resampling filterbank relative to the cutoff frequency */
+     int phase_shift;                            /**< log2 of the number of entries in the resampling polyphase filterbank */
+diff --git a/libavresample/options.c b/libavresample/options.c
+index 02e1f86..e7e0c27 100644
+--- a/libavresample/options.c
++++ b/libavresample/options.c
+@@ -47,6 +47,7 @@ static const AVOption options[] = {
+     { "center_mix_level",       "Center Mix Level",         OFFSET(center_mix_level),       AV_OPT_TYPE_DOUBLE, { M_SQRT1_2             }, -32.0,                32.0,                   PARAM },
+     { "surround_mix_level",     "Surround Mix Level",       OFFSET(surround_mix_level),     AV_OPT_TYPE_DOUBLE, { M_SQRT1_2             }, -32.0,                32.0,                   PARAM },
+     { "lfe_mix_level",          "LFE Mix Level",            OFFSET(lfe_mix_level),          AV_OPT_TYPE_DOUBLE, { 0.0                   }, -32.0,                32.0,                   PARAM },
++    { "normalize_mix_level",    "Normalize Mix Level",      OFFSET(normalize_mix_level),    AV_OPT_TYPE_INT,    { 1                     }, 0,                    1,                      PARAM },
+     { "force_resampling",       "Force Resampling",         OFFSET(force_resampling),       AV_OPT_TYPE_INT,    { 0                     }, 0,                    1,                      PARAM },
+     { "filter_size",            "Resampling Filter Size",   OFFSET(filter_size),            AV_OPT_TYPE_INT,    { 16                    }, 0,                    32, /* ??? */           PARAM },
+     { "phase_shift",            "Resampling Phase Shift",   OFFSET(phase_shift),            AV_OPT_TYPE_INT,    { 10                    }, 0,                    30, /* ??? */           PARAM },
index 3f67da87c476260addf601ca73b76bfe8b329ce1..ea09b38a236c16c94073d5b73d167f26e209cccb 100644 (file)
 
 hb_audio_resample_t* hb_audio_resample_init(enum AVSampleFormat output_sample_fmt,
                                             uint64_t output_channel_layout,
-                                            enum AVMatrixEncoding matrix_encoding)
+                                            enum AVMatrixEncoding matrix_encoding,
+                                            int normalize_mix_level)
 {
     hb_audio_resample_t *resample = malloc(sizeof(hb_audio_resample_t));
     if (resample == NULL)
         return NULL;
 
-    resample->out.sample_fmt      = output_sample_fmt;
-    resample->out.sample_size     = av_get_bytes_per_sample(output_sample_fmt);
-    resample->out.channel_layout  = output_channel_layout;
-    resample->out.channels        =
+    resample->out.sample_fmt          = output_sample_fmt;
+    resample->out.sample_size         =
+        av_get_bytes_per_sample(output_sample_fmt);
+    resample->out.channel_layout      = output_channel_layout;
+    resample->out.channels            =
         av_get_channel_layout_nb_channels(output_channel_layout);
-    resample->out.matrix_encoding = matrix_encoding;
-    resample->resample_needed     = 0;
-    resample->avresample          = NULL;
+    resample->out.matrix_encoding     = matrix_encoding;
+    resample->out.normalize_mix_level = normalize_mix_level;
+    resample->resample_needed         = 0;
+    resample->avresample              = NULL;
 
     return resample;
 }
@@ -72,6 +75,8 @@ int hb_audio_resample_update(hb_audio_resample_t *resample,
                            resample->out.channel_layout, 0);
             av_opt_set_int(resample->avresample, "matrix_encoding",
                            resample->out.matrix_encoding, 0);
+            av_opt_set_int(resample->avresample, "normalize_mix_level",
+                           resample->out.normalize_mix_level, 0);
         }
         else if (resample_changed)
         {
index 5982905e619412ae5d00827cbc3483dca1b3c474..4b1586e73b92f287f3fa4061a3f0742408589a24 100644 (file)
@@ -39,6 +39,7 @@ typedef struct
         int channels;
         int linesize;
         int sample_size;
+        int normalize_mix_level;
         uint64_t channel_layout;
         enum AVSampleFormat sample_fmt;
         enum AVMatrixEncoding matrix_encoding;
@@ -52,7 +53,8 @@ typedef struct
  */
 hb_audio_resample_t* hb_audio_resample_init(enum AVSampleFormat output_sample_fmt,
                                             uint64_t output_channel_layout,
-                                            enum AVMatrixEncoding matrix_encoding);
+                                            enum AVMatrixEncoding matrix_encoding,
+                                            int normalize_mix_level);
 
 /* Update an hb_audio_resample_t, setting the input sample characteristics.
  *
index 13a2e1d9536796cab0a7f554d3386f5f3ac98813..9ff9f44a3c2c78d156352dbcc18687f3d502b266 100644 (file)
@@ -1637,6 +1637,7 @@ void hb_audio_config_init(hb_audio_config_t * audiocfg)
     audiocfg->out.mixdown = HB_INVALID_AMIXDOWN;
     audiocfg->out.dynamic_range_compression = 0;
     audiocfg->out.gain = 0;
+    audiocfg->out.normalize_mix_level = 0;
     audiocfg->out.name = NULL;
 }
 
@@ -1684,6 +1685,7 @@ int hb_audio_add(const hb_job_t * job, const hb_audio_config_t * audiocfg)
         audio->config.out.mixdown = HB_AMIXDOWN_NONE;
         audio->config.out.dynamic_range_compression = 0;
         audio->config.out.gain = 0;
+        audio->config.out.normalize_mix_level = 0;
         audio->config.out.compression_level = -1;
         audio->config.out.quality = HB_INVALID_AUDIO_QUALITY;
     }
@@ -1698,6 +1700,7 @@ int hb_audio_add(const hb_job_t * job, const hb_audio_config_t * audiocfg)
         audio->config.out.dynamic_range_compression = audiocfg->out.dynamic_range_compression;
         audio->config.out.mixdown = audiocfg->out.mixdown;
         audio->config.out.gain = audiocfg->out.gain;
+        audio->config.out.normalize_mix_level = audiocfg->out.normalize_mix_level;
     }
     if (audiocfg->out.name && *audiocfg->out.name)
     {
index 5b2fc08fa43fee109d3161902fd4d44089e91533..1650f8002a96b9dd6f854b3f9609de22c7343964 100644 (file)
@@ -452,6 +452,7 @@ struct hb_audio_config_s
         float    compression_level;  /* Output compression level (encoder-specific) */
         double   dynamic_range_compression; /* Amount of DRC applied to this track */
         double   gain; /* Gain (in dB), negative is quieter */
+        int      normalize_mix_level; /* mix level normalization (boolean) */
         char *   name; /* Output track name */
     } out;
 
index 17f814d619217b72618aeffc3a534c7f8872f550..934f7611e453ae462162c9505d3c10522c8b3c43 100644 (file)
@@ -192,7 +192,8 @@ static int decavcodecaInit( hb_work_object_t * w, hb_job_t * job )
         int mode;
         uint64_t layout = hb_ff_mixdown_xlat(w->audio->config.out.mixdown,
                                              &mode);
-        pv->resample = hb_audio_resample_init(AV_SAMPLE_FMT_FLT, layout, mode);
+        pv->resample = hb_audio_resample_init(AV_SAMPLE_FMT_FLT, layout, mode,
+                                              w->audio->config.out.normalize_mix_level);
         if (pv->resample == NULL)
         {
             hb_error("decavcodecaInit: hb_audio_resample_init() failed");
index e48bf5db7ad188b5aabe26b7400e280eae9efbf6..0d4e449d11c7b63b46f654a41f55be8fdcc3c026 100644 (file)
@@ -164,7 +164,8 @@ static int declpcmInit( hb_work_object_t * w, hb_job_t * job )
 
     int mode;
     uint64_t layout = hb_ff_mixdown_xlat(w->audio->config.out.mixdown, &mode);
-    pv->resample = hb_audio_resample_init(AV_SAMPLE_FMT_FLT, layout, mode);
+    pv->resample = hb_audio_resample_init(AV_SAMPLE_FMT_FLT, layout, mode,
+                                          w->audio->config.out.normalize_mix_level);
     if (pv->resample == NULL)
     {
         hb_error("declpcmInit: hb_audio_resample_init() failed");
index 28eb0d9ae93429f452f77ecc141818ad3dad2ee1..ef5297ccab22e3e2be68f8eb9606ad4414acc271 100644 (file)
@@ -119,7 +119,7 @@ static int encavcodecaInit(hb_work_object_t *w, hb_job_t *job)
     // sample_fmt conversion
     pv->resample = hb_audio_resample_init(context->sample_fmt,
                                           context->channel_layout,
-                                          AV_MATRIX_ENCODING_NONE);
+                                          AV_MATRIX_ENCODING_NONE, 0);
     if (hb_audio_resample_update(pv->resample, AV_SAMPLE_FMT_FLT,
                                  context->channel_layout, context->channels))
     {
index 5d61b579c34c017bfb0e42f5052a065d7d0628eb..b59534d448b00189c62591e8f0b912a11e0d927b 100644 (file)
@@ -71,6 +71,7 @@ static int    allowed_audio_copy = -1;
 static char * mixdowns    = NULL;
 static char * dynamic_range_compression = NULL;
 static char * audio_gain  = NULL;
+static char ** normalize_mix_level  = NULL;
 static char * atracks     = NULL;
 static char * arates      = NULL;
 static char ** abitrates  = NULL;
@@ -2096,6 +2097,41 @@ static int HandleEvents( hb_handle_t * h )
             }
             /* Audio Gain */
 
+            /* Audio Mix Normalization */
+            i = 0;
+            int norm = 0;
+            if( normalize_mix_level )
+            {
+                for ( i = 0; normalize_mix_level[i] != NULL && i < num_audio_tracks; i++ )
+                {
+                    char * token = normalize_mix_level[i];
+                    norm = atoi(token);
+                    audio = hb_list_audio_config_item(job->list_audio, i);
+
+                    if( audio != NULL )
+                    {
+                        audio->out.normalize_mix_level = norm;
+                    }
+                    else
+                    {
+                        fprintf(stderr, "Ignoring normalization %d, no audio tracks\n", norm);
+                    }
+                }
+            }
+            if (i < num_audio_tracks && i == 1)
+            {
+                /* We have fewer inputs than audio tracks,
+                 * and we only have one input, use
+                 * that for all tracks.
+                 */
+                for (; i < num_audio_tracks; i++)
+                {
+                    audio = hb_list_audio_config_item(job->list_audio, i);
+                    audio->out.normalize_mix_level = norm;
+                }
+            }
+            /* Audio Mix Normalization */
+
             /* Audio Track Names */
             if ( anames )
             {
@@ -2883,6 +2919,10 @@ static void ShowHelp()
     "                            Separated by commas for more than one audio track.\n"
     "                            (mono/stereo/dpl1/dpl2/6ch, default: up to 6ch for ac3,\n"
     "                            up to dpl2 for other encoders)\n"
+    "        --normalize-mix     Normalize audio mix levels to prevent clipping.\n"
+    "               <string>     Separated by commas for more than one audio track.\n"
+    "                            0 = Disable Normalization (default)\n"
+    "                            1 = Enable Normalization\n"
     "    -R, --arate             Set audio samplerate(s) (" );
     for( i = 0; i < hb_audio_rates_count; i++ )
     {
@@ -3186,6 +3226,7 @@ static int ParseOptions( int argc, char ** argv )
     #define X264_PRESET         284
     #define X264_TUNE           285
     #define H264_LEVEL          286
+    #define NORMALIZE_MIX       287
     
     for( ;; )
     {
@@ -3212,6 +3253,7 @@ static int ParseOptions( int argc, char ** argv )
             { "markers",     optional_argument, NULL,    'm' },
             { "audio",       required_argument, NULL,    'a' },
             { "mixdown",     required_argument, NULL,    '6' },
+            { "normalize-mix", required_argument, NULL,  NORMALIZE_MIX },
             { "drc",         required_argument, NULL,    'D' },
             { "gain",        required_argument, NULL,    AUDIO_GAIN },
             { "subtitle",    required_argument, NULL,    's' },
@@ -3430,6 +3472,12 @@ static int ParseOptions( int argc, char ** argv )
                     audio_gain = strdup( optarg );
                 }
                 break;
+            case NORMALIZE_MIX:
+                if( optarg != NULL )
+                {
+                    normalize_mix_level = str_split( optarg, ',' );
+                }
+                break;
             case 's':
                 subtracks = str_split( optarg, ',' );
                 break;