From 32cdefd04b29ffdaa602e44eeee87c7757f470cd Mon Sep 17 00:00:00 2001
From: Rodeo <tdskywalker@gmail.com>
Date: Sat, 5 Apr 2014 01:11:10 +0000
Subject: [PATCH] enc_qsv: cleanup encoding loop, part 1.

Mainly:

 - allocate frame surface data in qsv_enc_init
 - setup input frame surface outside the loop
 - remove unnecessary variable
 - handle errors via a common path


git-svn-id: svn://svn.handbrake.fr/HandBrake/trunk@6148 b64f7644-9d1e-0410-96f1-a4d463321fa5
---
 libhb/enc_qsv.c | 386 +++++++++++++++++++++---------------------------
 1 file changed, 172 insertions(+), 214 deletions(-)

diff --git a/libhb/enc_qsv.c b/libhb/enc_qsv.c
index 7f85420f7..1afedffca 100644
--- a/libhb/enc_qsv.c
+++ b/libhb/enc_qsv.c
@@ -287,10 +287,13 @@ int qsv_enc_init(hb_work_private_t *pv)
         }
         for (i = 0; i < qsv_encode->surface_num; i++)
         {
-            qsv_encode->p_surfaces[i] = av_mallocz(sizeof(mfxFrameSurface1));
-            AV_QSV_CHECK_POINTER(qsv_encode->p_surfaces[i], MFX_ERR_MEMORY_ALLOC);
-            memcpy(&(qsv_encode->p_surfaces[i]->Info),
-                   &(qsv_encode->request[0].Info), sizeof(mfxFrameInfo));
+            mfxFrameSurface1 *surface = av_mallocz(sizeof(mfxFrameSurface1));
+            mfxFrameInfo info         = pv->param.videoParam->mfx.FrameInfo;
+            surface->Info             = info;
+            surface->Data.Pitch       = info.Width;
+            surface->Data.Y           = av_mallocz(info.Width * info.Height);
+            surface->Data.VU          = av_mallocz(info.Width * info.Height / 2);
+            qsv_encode->p_surfaces[i] = surface;
         }
     }
     else
@@ -1212,10 +1215,8 @@ void encqsvClose(hb_work_object_t *w)
                 {
                     if (pv->is_sys_mem)
                     {
-                        free(qsv_enc_space->p_surfaces[i]->Data.Y);
-                        qsv_enc_space->p_surfaces[i]->Data.Y  = NULL;
-                        free(qsv_enc_space->p_surfaces[i]->Data.VU);
-                        qsv_enc_space->p_surfaces[i]->Data.VU = NULL;
+                        av_freep(&qsv_enc_space->p_surfaces[i]->Data.VU);
+                        av_freep(&qsv_enc_space->p_surfaces[i]->Data.Y);
                     }
                     av_freep(&qsv_enc_space->p_surfaces[i]);
                 }
@@ -1284,160 +1285,144 @@ int encqsvWork(hb_work_object_t *w, hb_buffer_t **buf_in, hb_buffer_t **buf_out)
 {
     hb_work_private_t *pv = w->private_data;
     hb_job_t *job = pv->job;
-    hb_buffer_t *in = *buf_in, *buf;
-    hb_buffer_t *last_buf = NULL;
+    hb_buffer_t *in = *buf_in;
+    hb_buffer_t *buf, *last_buf = NULL;
     mfxStatus sts = MFX_ERR_NONE;
-    int is_end = 0;
-    av_qsv_list *received_item = NULL;
-    av_qsv_stage *stage = NULL;
 
     while (qsv_enc_init(pv) >= 2)
     {
         av_qsv_sleep(1); // encoding not initialized, wait and repeat the call
     }
-    *buf_out = NULL;
 
     if (*job->die)
     {
-        return HB_WORK_DONE; // unrecoverable error, don't attempt to encode
+        goto fail; // unrecoverable error, don't attempt to encode
     }
 
-    av_qsv_context *qsv        = job->qsv.ctx;
-    av_qsv_space   *qsv_encode = job->qsv.ctx->enc_space;
+    av_qsv_list      *received_item = NULL;
+    mfxEncodeCtrl    *work_control  = NULL;
+    mfxFrameSurface1 *work_surface  = NULL;
+    av_qsv_context   *qsv           = job->qsv.ctx;
+    av_qsv_space     *qsv_encode    = job->qsv.ctx->enc_space;
 
+    /*
+     * EOF on input. Flush the decoder, then send the
+     * EOF downstream to let the muxer know we're done.
+     */
     if (in->size <= 0)
     {
-        // do delayed frames yet
-        *buf_in = NULL;
-        is_end = 1;
+        *buf_in = NULL; // don't let 'work_loop' close this buffer
     }
-
-    // input from decode, as called - we always have some to proceed with
-    while (1)
+    else
     {
-        mfxEncodeCtrl    *work_control = NULL;
-        mfxFrameSurface1 *work_surface = NULL;
-
-        if (!is_end)
+        if (pv->is_sys_mem)
         {
-            if (pv->is_sys_mem)
+            mfxFrameInfo *info = &pv->param.videoParam->mfx.FrameInfo;
+            int surface_index  = av_qsv_get_free_surface(qsv_encode, qsv, info,
+                                                         QSV_PART_ANY);
+            if (surface_index == -1)
             {
-                int surface_idx = av_qsv_get_free_surface(qsv_encode, qsv,
-                                                          &qsv_encode->request[0].Info, QSV_PART_ANY);
-                work_surface = qsv_encode->p_surfaces[surface_idx];
-
-                if (work_surface->Data.Y == NULL)
-                {
-                    // if nv12 and 422 12bits per pixel
-                    work_surface->Data.Pitch = pv->enc_space.m_mfxVideoParam.mfx.FrameInfo.Width;
-                    work_surface->Data.Y     = calloc(1,
-                                                      pv->enc_space.m_mfxVideoParam.mfx.FrameInfo.Width *
-                                                      pv->enc_space.m_mfxVideoParam.mfx.FrameInfo.Height);
-                    work_surface->Data.VU    = calloc(1,
-                                                      pv->enc_space.m_mfxVideoParam.mfx.FrameInfo.Width *
-                                                      pv->enc_space.m_mfxVideoParam.mfx.FrameInfo.Height / 2);
-                }
-                qsv_yuv420_to_nv12(pv->sws_context_to_nv12, work_surface, in);
-            }
-            else
-            {
-                received_item = in->qsv_details.qsv_atom;
-                stage = av_qsv_get_last_stage(received_item);
-                work_surface = stage->out.p_surface;
-
-                // don't let qsv->dts_seq grow needlessly
-                av_qsv_dts_pop(qsv);
+                hb_error("encqsv: av_qsv_get_free_surface failed");
+                goto fail;
             }
 
-            work_surface->Data.TimeStamp = in->s.start;
+            work_surface = qsv_encode->p_surfaces[surface_index];
+            qsv_yuv420_to_nv12(pv->sws_context_to_nv12, work_surface, in);
+        }
+        else
+        {
+            received_item = in->qsv_details.qsv_atom;
+            work_surface  = av_qsv_get_last_stage(received_item)->out.p_surface;
 
             /*
-             * Debugging code to check that the upstream modules have generated
-             * a continuous, self-consistent frame stream.
+             * QSV decoding fills the QSV context's dts_seq list, we need to
+             * pop this surface's DTS so dts_seq doesn't grow unnecessarily.
              */
-            int64_t start = work_surface->Data.TimeStamp;
-            if (pv->last_start > start)
+            av_qsv_dts_pop(qsv);
+        }
+
+        work_surface->Data.TimeStamp = in->s.start;
+
+        /*
+         * Debugging code to check that the upstream modules have generated
+         * a continuous, self-consistent frame stream.
+         */
+        int64_t start = work_surface->Data.TimeStamp;
+        if (pv->last_start > start)
+        {
+            hb_log("encqsv: input continuity error, last start %"PRId64" start %"PRId64"",
+                   pv->last_start, start);
+        }
+        pv->last_start = start;
+
+        /* for DTS generation (when MSDK API < 1.6 or VFR) */
+        if (pv->bfrm_delay && pv->bfrm_workaround)
+        {
+            if (pv->frames_in <= BFRM_DELAY_MAX)
             {
-                hb_log("encqsvWork: input continuity error, last start %"PRId64" start %"PRId64"",
-                       pv->last_start, start);
+                pv->init_pts[pv->frames_in] = work_surface->Data.TimeStamp;
             }
-            pv->last_start = start;
-
-            // for DTS generation (when MSDK API < 1.6 or VFR)
-            if (pv->bfrm_delay && pv->bfrm_workaround)
+            if (pv->frames_in)
             {
-                if (pv->frames_in <= BFRM_DELAY_MAX)
-                {
-                    pv->init_pts[pv->frames_in] = work_surface->Data.TimeStamp;
-                }
-                if (pv->frames_in)
-                {
-                    hb_qsv_add_new_dts(pv->list_dts,
-                                       work_surface->Data.TimeStamp);
-                }
+                hb_qsv_add_new_dts(pv->list_dts,
+                                   work_surface->Data.TimeStamp);
             }
+        }
+        pv->frames_in++;
 
-            /*
-             * Chapters have to start with a keyframe so request that this
-             * frame be coded as IDR. Since there may be several frames
-             * buffered in the encoder, remember the timestamp so when this
-             * frame finally pops out of the encoder we'll mark its buffer
-             * as the start of a chapter.
-             */
-            if (in->s.new_chap > 0 && job->chapter_markers)
+        if (in->s.new_chap > 0 && job->chapter_markers)
+        {
+            if (pv->next_chapter_pts == AV_NOPTS_VALUE)
             {
-                if (pv->next_chapter_pts == AV_NOPTS_VALUE)
-                {
-                    pv->next_chapter_pts = work_surface->Data.TimeStamp;
-                }
-                /* insert an IDR */
-                work_control = &pv->force_keyframe;
-                /*
-                 * Chapter markers are sometimes so close we can get a new
-                 * one before the previous goes through the encoding queue.
-                 *
-                 * Dropping markers can cause weird side-effects downstream,
-                 * including but not limited to missing chapters in the
-                 * output, so we need to save it somehow.
-                 */
-                struct chapter_s *item = malloc(sizeof(struct chapter_s));
-                if (item != NULL)
-                {
-                    item->index = in->s.new_chap;
-                    item->start = work_surface->Data.TimeStamp;
-                    hb_list_add(pv->delayed_chapters, item);
-                }
-                /* don't let 'work_loop' put a chapter mark on the wrong buffer */
-                in->s.new_chap = 0;
+                pv->next_chapter_pts = work_surface->Data.TimeStamp;
             }
 
+            /* Chapters have to start with a keyframe, so request an IDR */
+            work_control = &pv->force_keyframe;
+
             /*
-             * If interlaced encoding is requested during encoder initialization,
-             * but the input mfxFrameSurface1 is flagged as progressive here,
-             * the output bitstream will be progressive (according to MediaInfo).
+             * Chapter markers are sometimes so close we can get a new
+             * one before the previous goes through the encoding queue.
              *
-             * Assume the user knows what he's doing (say he is e.g. encoding a
-             * progressive-flagged source using interlaced compression - he may
-             * well have a good reason to do so; mis-flagged sources do exist).
+             * Dropping markers can cause weird side-effects downstream,
+             * including but not limited to missing chapters in the
+             * output, so we need to save it somehow.
              */
-            work_surface->Info.PicStruct = pv->enc_space.m_mfxVideoParam.mfx.FrameInfo.PicStruct;
-        }
-        else
-        {
-            work_surface = NULL;
-            received_item = NULL;
+            struct chapter_s *item = malloc(sizeof(struct chapter_s));
+            if (item != NULL)
+            {
+                item->index = in->s.new_chap;
+                item->start = work_surface->Data.TimeStamp;
+                hb_list_add(pv->delayed_chapters, item);
+            }
+            /* don't let 'work_loop' put a chapter mark on the wrong buffer */
+            in->s.new_chap = 0;
         }
+
+        /*
+         * If interlaced encoding is requested during encoder initialization,
+         * but the input mfxFrameSurface1 is flagged as progressive here,
+         * the output bitstream will be progressive (according to MediaInfo).
+         *
+         * Assume the user knows what he's doing (say he is e.g. encoding a
+         * progressive-flagged source using interlaced compression - he may
+         * well have a good reason to do so; mis-flagged sources do exist).
+         */
+        work_surface->Info.PicStruct = pv->enc_space.m_mfxVideoParam.mfx.FrameInfo.PicStruct;
+    }
+
+    do
+    {
         int sync_idx = av_qsv_get_free_sync(qsv_encode, qsv);
         if (sync_idx == -1)
         {
-            hb_error("qsv: Not enough resources allocated for QSV encode");
-            return 0;
+            hb_error("encqsv: av_qsv_get_free_sync failed");
+            goto fail;
         }
         av_qsv_task *task = av_qsv_list_item(qsv_encode->tasks, pv->async_depth);
 
-        for (;;)
+        do
         {
-            // Encode a frame asychronously (returns immediately)
             sts = MFXVideoENCODE_EncodeFrameAsync(qsv->mfx_session,
                                                   work_control, work_surface, task->bs,
                                                   qsv_encode->p_syncp[sync_idx]->p_sync);
@@ -1453,32 +1438,31 @@ int encqsvWork(hb_work_object_t *w, hb_buffer_t **buf_in, hb_buffer_t **buf_out)
 
             if (sts == MFX_ERR_MORE_DATA)
             {
-                ff_qsv_atomic_dec(&qsv_encode->p_syncp[sync_idx]->in_use);
                 if (work_surface != NULL && received_item != NULL)
                 {
                     hb_list_add(pv->delayed_processing, received_item);
                 }
+                ff_qsv_atomic_dec(&qsv_encode->p_syncp[sync_idx]->in_use);
                 break;
             }
-
-            AV_QSV_CHECK_RESULT(sts, MFX_ERR_NONE, sts);
-
-            if (sts >= MFX_ERR_NONE /*&& !syncpE*/) // repeat the call if warning and no output
+            else if (sts < MFX_ERR_NONE)
+            {
+                hb_error("encqsv: MFXVideoENCODE_EncodeFrameAsync failed (%d)", sts);
+                goto fail;
+            }
+            else if (sts == MFX_WRN_DEVICE_BUSY)
+            {
+                av_qsv_sleep(10); // device is busy, wait then repeat the call
+                continue;
+            }
+            else
             {
-                if (sts == MFX_WRN_DEVICE_BUSY)
-                {
-                    av_qsv_sleep(10); // wait if device is busy
-                    continue;
-                }
-
                 av_qsv_stage *new_stage = av_qsv_stage_init();
-                new_stage->type = AV_QSV_ENCODE;
+                new_stage->type         = AV_QSV_ENCODE;
                 new_stage->in.p_surface = work_surface;
-                new_stage->out.sync = qsv_encode->p_syncp[sync_idx];
-
-                new_stage->out.p_bs = task->bs;//qsv_encode->bs;
-                task->stage = new_stage;
-
+                new_stage->out.sync     = qsv_encode->p_syncp[sync_idx];
+                new_stage->out.p_bs     = task->bs;
+                task->stage             = new_stage;
                 pv->async_depth++;
 
                 if (received_item != NULL)
@@ -1487,17 +1471,16 @@ int encqsvWork(hb_work_object_t *w, hb_buffer_t **buf_in, hb_buffer_t **buf_out)
                 }
                 else
                 {
-                    // flushing the end
-                    int pipe_idx = av_qsv_list_add(qsv->pipes, av_qsv_list_init(HAVE_THREADS));
-                    av_qsv_list *list_item = av_qsv_list_item(qsv->pipes, pipe_idx);
-                    av_qsv_add_stagee(&list_item, new_stage, HAVE_THREADS);
+                    /* encode-only or flushing */
+                    av_qsv_list *new_atom = av_qsv_list_init(HAVE_THREADS);
+                    av_qsv_add_stagee(&new_atom,  new_stage, HAVE_THREADS);
+                    av_qsv_list_add  (qsv->pipes, new_atom);
                 }
 
-                int i = 0;
-                for (i = hb_list_count(pv->delayed_processing); i > 0; i--)
+                int i = hb_list_count(pv->delayed_processing);
+                while (--i >= 0)
                 {
-                    av_qsv_list *item = hb_list_item(pv->delayed_processing,
-                                                     i - 1);
+                    av_qsv_list *item = hb_list_item(pv->delayed_processing, i);
 
                     if (item != NULL)
                     {
@@ -1505,19 +1488,13 @@ int encqsvWork(hb_work_object_t *w, hb_buffer_t **buf_in, hb_buffer_t **buf_out)
                         av_qsv_flush_stages(qsv->pipes,    &item);
                     }
                 }
-
                 break;
             }
 
             ff_qsv_atomic_dec(&qsv_encode->p_syncp[sync_idx]->in_use);
-
-            if (sts == MFX_ERR_NOT_ENOUGH_BUFFER)
-            {
-                HB_DEBUG_ASSERT(1, "The bitstream buffer size is insufficient.");
-            }
-
             break;
         }
+        while (sts >= MFX_ERR_NONE);
 
         buf = NULL;
 
@@ -1525,58 +1502,42 @@ int encqsvWork(hb_work_object_t *w, hb_buffer_t **buf_in, hb_buffer_t **buf_out)
         {
             if (pv->async_depth == 0) break;
 
-            // working properly with sync depth approach of MediaSDK OR flushing, if at the end
-            if (pv->async_depth >= pv->max_async_depth || is_end)
+            /* we've done enough asynchronous operations or we're flushing */
+            if (pv->async_depth >= pv->max_async_depth || work_surface == NULL)
             {
-                pv->async_depth--;
-
                 av_qsv_task *task = av_qsv_list_item(qsv_encode->tasks, 0);
-                av_qsv_stage *stage = task->stage;
-                av_qsv_list *this_pipe = av_qsv_pipe_by_stage(qsv->pipes, stage);
-                sts = MFX_ERR_NONE;
+                pv->async_depth--;
 
-                // only here we need to wait on operation been completed, therefore SyncOperation is used,
-                // after this step - we continue to work with bitstream, muxing ...
-                av_qsv_wait_on_sync(qsv, stage);
+                /* perform a sync operation to get the output bitstream */
+                av_qsv_wait_on_sync(qsv, task->stage);
 
                 if (task->bs->DataLength > 0)
                 {
-                    av_qsv_flush_stages(qsv->pipes, &this_pipe);
+                    av_qsv_list *pipe = av_qsv_pipe_by_stage(qsv->pipes, task->stage);
+                    av_qsv_flush_stages(qsv->pipes, &pipe);
 
-                    // see nal_encode
-                    buf = hb_video_buffer_init(job->width, job->height);
+                    /* allocate additional data for parse_nalus */
+                    buf = hb_buffer_init(task->bs->DataLength * 2);
                     buf->size = 0;
 
-                    // map Media SDK's FrameType to our internal representation
-                    buf->s.frametype = hb_qsv_frametype_xlat(task->bs->FrameType,
-                                                             &buf->s.flags);
-
                     /*
                      * we need to convert the encoder's Annex B output
                      * to an MP4-compatible format (ISO/IEC 14496-15).
                      */
                     parse_nalus(task->bs->Data + task->bs->DataOffset,
                                 task->bs->DataLength, buf);
+                    task->bs->DataLength = task->bs->DataOffset = 0;
+                    task->bs->MaxLength  = qsv_encode->p_buf_max_size;
 
-                    if (last_buf == NULL)
-                    {
-                        *buf_out = buf;
-                    }
-                    else
-                    {
-                        last_buf->next = buf;
-                    }
-                    last_buf = buf;
+                    /* frame duration (based on average frame rate) */
+                    int64_t duration  = ((double)job->vrate_base /
+                                         (double)job->vrate * 90000.);
 
-                    // simple for now but check on TimeStampCalc from MSDK
-                    int64_t duration  = ((double)pv->enc_space.m_mfxVideoParam.mfx.FrameInfo.FrameRateExtD /
-                                         (double)pv->enc_space.m_mfxVideoParam.mfx.FrameInfo.FrameRateExtN) * 90000.;
+                    buf->s.frametype = hb_qsv_frametype_xlat(task->bs->FrameType, &buf->s.flags);
+                    buf->s.start     = buf->s.renderOffset = task->bs->TimeStamp;
+                    buf->s.stop      = buf->s.start + duration;
+                    buf->s.duration  = duration;
 
-                    // start        -> PTS
-                    // renderOffset -> DTS
-                    buf->s.start    = buf->s.renderOffset = task->bs->TimeStamp;
-                    buf->s.stop     = buf->s.start + duration;
-                    buf->s.duration = duration;
                     if (pv->bfrm_delay)
                     {
                         if ((pv->frames_out == 0)                                  &&
@@ -1627,14 +1588,14 @@ int encqsvWork(hb_work_object_t *w, hb_buffer_t **buf_in, hb_buffer_t **buf_out)
                             (task->bs->FrameType & MFX_FRAMETYPE_B) &&
                             (task->bs->FrameType & MFX_FRAMETYPE_REF))
                         {
-                            hb_log("encqsvWork: BPyramid off not respected (delay: %d)",
+                            hb_log("encqsv: BPyramid off not respected (delay: %d)",
                                    pv->bfrm_delay);
                         }
 
                         // check for PTS < DTS
                         if (buf->s.start < buf->s.renderOffset)
                         {
-                            hb_log("encqsvWork: PTS %"PRId64" < DTS %"PRId64" for frame %d with type '%s' (bfrm_workaround: %d)",
+                            hb_log("encqsv: PTS %"PRId64" < DTS %"PRId64" for frame %d with type '%s' (bfrm_workaround: %d)",
                                    buf->s.start, buf->s.renderOffset, pv->frames_out + 1,
                                    hb_qsv_frametype_name(task->bs->FrameType),
                                    pv->bfrm_workaround);
@@ -1693,44 +1654,32 @@ int encqsvWork(hb_work_object_t *w, hb_buffer_t **buf_in, hb_buffer_t **buf_out)
                         }
                     }
 
-                    // shift for fifo
+                    /* shift for fifo */
                     if (pv->async_depth)
                     {
                         av_qsv_list_rem(qsv_encode->tasks, task);
                         av_qsv_list_add(qsv_encode->tasks, task);
                     }
+                    task->stage = NULL;
 
-                    task->bs->DataLength = 0;
-                    task->bs->DataOffset = 0;
-                    task->bs->MaxLength  = qsv_encode->p_buf_max_size;
-                    task->stage          = NULL;
+                    if (last_buf == NULL)
+                    {
+                        *buf_out = last_buf = buf;
+                    }
+                    else
+                    {
+                        last_buf->next = buf;
+                        last_buf       = buf;
+                    }
                     pv->frames_out++;
                 }
             }
         }
-        while (is_end);
-
-
-        if (is_end)
-        {
-            if (buf == NULL && sts == MFX_ERR_MORE_DATA)
-            {
-                break;
-            }
-        }
-        else
-        {
-            break;
-        }
-
-    }
-
-    if (!is_end)
-    {
-        ++pv->frames_in;
+        while (work_surface == NULL);
     }
+    while (work_surface == NULL && sts != MFX_ERR_MORE_DATA);
 
-    if (is_end)
+    if (work_surface == NULL)
     {
         if (last_buf != NULL)
         {
@@ -1746,6 +1695,15 @@ int encqsvWork(hb_work_object_t *w, hb_buffer_t **buf_in, hb_buffer_t **buf_out)
     {
         return HB_WORK_OK;
     }
+
+fail:
+    if (*job->done_error == HB_ERROR_NONE)
+    {
+        *job->done_error  = HB_ERROR_UNKNOWN;
+    }
+    *job->die = 1;
+    *buf_out  = NULL;
+    return HB_WORK_ERROR;
 }
 
 int nal_find_start_code(uint8_t **pb, size_t *size)
-- 
2.40.0