]> granicus.if.org Git - handbrake/commitdiff
qsv: adding hevc10 support starting from KBL platform
authormaxd <maxim.d33@gmail.com>
Wed, 8 Feb 2017 12:17:00 +0000 (13:17 +0100)
committerBradley Sepos <bradley@bradleysepos.com>
Sun, 5 Mar 2017 17:06:11 +0000 (12:06 -0500)
contrib/ffmpeg/A05-p10-output-support.patch [new file with mode: 0644]
libhb/enc_qsv.c
libhb/qsv_common.c

diff --git a/contrib/ffmpeg/A05-p10-output-support.patch b/contrib/ffmpeg/A05-p10-output-support.patch
new file mode 100644 (file)
index 0000000..5656cfc
--- /dev/null
@@ -0,0 +1,149 @@
+diff -Naur ./libav-12.org/libswscale/output.c ./libav-12/libswscale/output.c
+--- ./libav-12.org/libswscale/output.c 2016-10-16 23:10:02.000000000 +0200
++++ ./libav-12/libswscale/output.c     2017-02-07 23:37:28.150180400 +0100
+@@ -295,6 +295,98 @@
+         }
+ }
++
++#define output_pixel(pos, val) \
++    if (big_endian) { \
++        AV_WB16(pos, av_clip_uintp2(val >> shift, 10) << 6); \
++    } else { \
++        AV_WL16(pos, av_clip_uintp2(val >> shift, 10) << 6); \
++    }
++
++static void yuv2p010l1_c(const int16_t *src,
++                         uint16_t *dest, int dstW,
++                         int big_endian)
++{
++    int i;
++    int shift = 5;
++
++    for (i = 0; i < dstW; i++) {
++        int val = src[i] + (1 << (shift - 1));
++        output_pixel(&dest[i], val);
++    }
++}
++
++static void yuv2p010lX_c(const int16_t *filter, int filterSize,
++                         const int16_t **src, uint16_t *dest, int dstW,
++                         int big_endian)
++{
++    int i, j;
++    int shift = 17;
++
++    for (i = 0; i < dstW; i++) {
++        int val = 1 << (shift - 1);
++
++        for (j = 0; j < filterSize; j++)
++            val += src[j][i] * filter[j];
++
++        output_pixel(&dest[i], val);
++    }
++}
++
++static void yuv2p010cX_c(SwsContext *c, const int16_t *chrFilter, int chrFilterSize,
++                         const int16_t **chrUSrc, const int16_t **chrVSrc,
++                         uint8_t *dest8, int chrDstW)
++{
++    uint16_t *dest = (uint16_t*)dest8;
++    int shift = 17;
++    int big_endian = c->dstFormat == AV_PIX_FMT_P010BE;
++    int i, j;
++
++    for (i = 0; i < chrDstW; i++) {
++        int u = 1 << (shift - 1);
++        int v = 1 << (shift - 1);
++
++        for (j = 0; j < chrFilterSize; j++) {
++            u += chrUSrc[j][i] * chrFilter[j];
++            v += chrVSrc[j][i] * chrFilter[j];
++        }
++
++        output_pixel(&dest[2*i]  , u);
++        output_pixel(&dest[2*i+1], v);
++    }
++}
++
++static void yuv2p010l1_LE_c(const int16_t *src,
++                            uint8_t *dest, int dstW,
++                            const uint8_t *dither, int offset)
++{
++    yuv2p010l1_c(src, (uint16_t*)dest, dstW, 0);
++}
++
++static void yuv2p010l1_BE_c(const int16_t *src,
++                            uint8_t *dest, int dstW,
++                            const uint8_t *dither, int offset)
++{
++    yuv2p010l1_c(src, (uint16_t*)dest, dstW, 1);
++}
++
++static void yuv2p010lX_LE_c(const int16_t *filter, int filterSize,
++                            const int16_t **src, uint8_t *dest, int dstW,
++                            const uint8_t *dither, int offset)
++{
++    yuv2p010lX_c(filter, filterSize, src, (uint16_t*)dest, dstW, 0);
++}
++
++static void yuv2p010lX_BE_c(const int16_t *filter, int filterSize,
++                            const int16_t **src, uint8_t *dest, int dstW,
++                            const uint8_t *dither, int offset)
++{
++    yuv2p010lX_c(filter, filterSize, src, (uint16_t*)dest, dstW, 1);
++}
++
++#undef output_pixel
++
++
+ #define accumulate_bit(acc, val) \
+     acc <<= 1; \
+     acc |= (val) >= (128 + 110)
+@@ -1361,7 +1453,11 @@
+     enum AVPixelFormat dstFormat = c->dstFormat;
+     const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(dstFormat);
+-    if (is16BPS(dstFormat)) {
++    if (dstFormat == AV_PIX_FMT_P010LE || dstFormat == AV_PIX_FMT_P010BE) {
++        *yuv2plane1 = isBE(dstFormat) ? yuv2p010l1_BE_c : yuv2p010l1_LE_c;
++        *yuv2planeX = isBE(dstFormat) ? yuv2p010lX_BE_c : yuv2p010lX_LE_c;
++        *yuv2nv12cX = yuv2p010cX_c;
++    } else if (is16BPS(dstFormat)) {
+         *yuv2planeX = isBE(dstFormat) ? yuv2planeX_16BE_c  : yuv2planeX_16LE_c;
+         *yuv2plane1 = isBE(dstFormat) ? yuv2plane1_16BE_c  : yuv2plane1_16LE_c;
+     } else if (is9_OR_10BPS(dstFormat)) {
+diff -Naur ./libav-12.org/libswscale/utils.c ./libav-12/libswscale/utils.c
+--- ./libav-12.org/libswscale/utils.c  2016-10-16 23:10:02.000000000 +0200
++++ ./libav-12/libswscale/utils.c      2017-02-07 23:20:09.617945500 +0100
+@@ -185,8 +185,8 @@
+     [AV_PIX_FMT_GBRAP16BE]   = { 1, 0 },
+     [AV_PIX_FMT_XYZ12BE]     = { 0, 0, 1 },
+     [AV_PIX_FMT_XYZ12LE]     = { 0, 0, 1 },
+-    [AV_PIX_FMT_P010LE]      = { 1, 0 },
+-    [AV_PIX_FMT_P010BE]      = { 1, 0 },
++    [AV_PIX_FMT_P010LE]      = { 1, 1 },
++    [AV_PIX_FMT_P010BE]      = { 1, 1 },
+ };
+ int sws_isSupportedInput(enum AVPixelFormat pix_fmt)
+diff -Naur ./libav-12.org/libswscale/x86/swscale.c ./libav-12/libswscale/x86/swscale.c
+--- ./libav-12.org/libswscale/x86/swscale.c    2016-10-16 23:10:02.000000000 +0200
++++ ./libav-12/libswscale/x86/swscale.c        2017-02-07 23:15:14.000000000 +0100
+@@ -338,14 +338,14 @@
+ #define ASSIGN_VSCALEX_FUNC(vscalefn, opt, do_16_case, condition_8bit) \
+ switch(c->dstBpc){ \
+     case 16:                          do_16_case;                          break; \
+-    case 10: if (!isBE(c->dstFormat)) vscalefn = ff_yuv2planeX_10_ ## opt; break; \
++    case 10: if (!isBE(c->dstFormat) && c->dstFormat != AV_PIX_FMT_P010LE) vscalefn = ff_yuv2planeX_10_ ## opt; break; \
+     case 9:  if (!isBE(c->dstFormat)) vscalefn = ff_yuv2planeX_9_  ## opt; break; \
+     default: if (condition_8bit)      vscalefn = ff_yuv2planeX_8_  ## opt; break; \
+     }
+ #define ASSIGN_VSCALE_FUNC(vscalefn, opt1, opt2, opt2chk) \
+     switch(c->dstBpc){ \
+     case 16: if (!isBE(c->dstFormat))            vscalefn = ff_yuv2plane1_16_ ## opt1; break; \
+-    case 10: if (!isBE(c->dstFormat) && opt2chk) vscalefn = ff_yuv2plane1_10_ ## opt2; break; \
++    case 10: if (!isBE(c->dstFormat) && c->dstFormat != AV_PIX_FMT_P010LE && opt2chk) vscalefn = ff_yuv2plane1_10_ ## opt2; break; \
+     case 9:  if (!isBE(c->dstFormat) && opt2chk) vscalefn = ff_yuv2plane1_9_  ## opt2;  break; \
+     default:                                     vscalefn = ff_yuv2plane1_8_  ## opt1;  break; \
+     }
index b6e80687e4fd7420e4048bc602ac1253048b532e..a419767eebe343eff9252d40e3359fddc2b0b4aa 100644 (file)
@@ -302,8 +302,14 @@ static int qsv_hevc_make_header(hb_work_object_t *w, mfxSession session)
         ret = -1;
         goto end;
     }
+
+    /* need more space for 10bits */
+    if (pv->param.videoParam->mfx.FrameInfo.FourCC == MFX_FOURCC_P010)
+    {
+         hb_buffer_realloc(bitstream_buf,bitstream_buf->size*2);
+    }
     bitstream.Data      = bitstream_buf->data;
-    bitstream.MaxLength = bitstream_buf->size;
+    bitstream.MaxLength = bitstream_buf->alloc;
 
     /* We only need to encode one frame, so we only need one surface */
     mfxU16 Height            = pv->param.videoParam->mfx.FrameInfo.Height;
@@ -521,13 +527,26 @@ int qsv_enc_init(hb_work_private_t *pv)
     }
     else
     {
-        pv->sws_context_to_nv12 = hb_sws_get_context(
-                                    job->width, job->height,
-                                    AV_PIX_FMT_YUV420P,
-                                    job->width, job->height,
-                                    AV_PIX_FMT_NV12,
-                                    SWS_LANCZOS|SWS_ACCURATE_RND,
-                                    SWS_CS_DEFAULT);
+        if (pv->param.videoParam->mfx.CodecProfile == MFX_PROFILE_HEVC_MAIN10)
+        {
+            pv->sws_context_to_nv12 = hb_sws_get_context(
+                                        job->width, job->height,
+                                        AV_PIX_FMT_YUV420P,
+                                        job->width, job->height,
+                                        AV_PIX_FMT_P010LE,
+                                        SWS_LANCZOS|SWS_ACCURATE_RND,
+                                        SWS_CS_DEFAULT);
+        }
+        else
+        {
+            pv->sws_context_to_nv12 = hb_sws_get_context(
+                                        job->width, job->height,
+                                        AV_PIX_FMT_YUV420P,
+                                        job->width, job->height,
+                                        AV_PIX_FMT_NV12,
+                                        SWS_LANCZOS|SWS_ACCURATE_RND,
+                                        SWS_CS_DEFAULT);
+        }
     }
 
     // allocate tasks
@@ -589,14 +608,17 @@ int qsv_enc_init(hb_work_private_t *pv)
         {
             qsv_encode->surface_num = HB_QSV_SURFACE_NUM;
         }
+
+        /* should have 15bpp/AV_PIX_FMT_YUV420P10LE (almost x2) instead of 12bpp/AV_PIX_FMT_NV12 */
+        int bpp12 = (pv->param.videoParam->mfx.CodecProfile == MFX_PROFILE_HEVC_MAIN10) ? 6 : 3;
         for (i = 0; i < qsv_encode->surface_num; i++)
         {
             mfxFrameSurface1 *surface = av_mallocz(sizeof(mfxFrameSurface1));
             mfxFrameInfo info         = pv->param.videoParam->mfx.FrameInfo;
             surface->Info             = info;
-            surface->Data.Pitch       = info.Width;
-            surface->Data.Y           = av_mallocz(info.Width * info.Height * 3 / 2);
-            surface->Data.VU          = surface->Data.Y + info.Width * info.Height;
+            surface->Data.Pitch       = info.Width * (bpp12 == 6 ? 2 : 1);
+            surface->Data.Y           = av_mallocz(info.Width * info.Height * (bpp12 / 2.0));
+            surface->Data.VU          = surface->Data.Y + info.Width * info.Height * (bpp12 == 6 ? 2 : 1);
             qsv_encode->p_surfaces[i] = surface;
         }
     }
@@ -847,12 +869,21 @@ int encqsvInit(hb_work_object_t *w, hb_job_t *job)
         hb_error("encqsvInit: bad profile %s", job->encoder_profile);
         return -1;
     }
+
     if (hb_qsv_level_parse(&pv->param, pv->qsv_info, job->encoder_level))
     {
         hb_error("encqsvInit: bad level %s", job->encoder_level);
         return -1;
     }
 
+    if (pv->param.videoParam->mfx.CodecProfile == MFX_PROFILE_HEVC_MAIN10)
+    {
+        pv->param.videoParam->mfx.FrameInfo.FourCC         = MFX_FOURCC_P010;
+        pv->param.videoParam->mfx.FrameInfo.BitDepthLuma   = 10;
+        pv->param.videoParam->mfx.FrameInfo.BitDepthChroma = 10;
+        pv->param.videoParam->mfx.FrameInfo.Shift          = 0;
+    }
+
     // interlaced encoding is not always possible
     if (pv->param.videoParam->mfx.CodecId             == MFX_CODEC_AVC &&
         pv->param.videoParam->mfx.FrameInfo.PicStruct != MFX_PICSTRUCT_PROGRESSIVE)
index 011c94e10828931eb3bc22e2773e72e76b31ee9b..cef1990924aae0a7302882965e66f26825d6c0dd 100644 (file)
@@ -148,7 +148,7 @@ static int qsv_implementation_is_hardware(mfxIMPL implementation)
 int hb_qsv_available()
 {
     return ((hb_qsv_video_encoder_is_enabled(HB_VCODEC_QSV_H264) ? HB_VCODEC_QSV_H264 : 0) |
-            (hb_qsv_video_encoder_is_enabled(HB_VCODEC_QSV_H265) ? HB_VCODEC_QSV_H265 : 0)); 
+            (hb_qsv_video_encoder_is_enabled(HB_VCODEC_QSV_H265) ? HB_VCODEC_QSV_H265 : 0));
 }
 
 int hb_qsv_video_encoder_is_enabled(int encoder)
@@ -781,14 +781,14 @@ void hb_qsv_info_print()
                    qsv_hardware_version.Major, qsv_hardware_version.Minor,
                    HB_QSV_MINVERSION_MAJOR,    HB_QSV_MINVERSION_MINOR);
         }
-        
+
         if (qsv_software_version.Version)
         {
             hb_log(" - Intel Media SDK software: API %"PRIu16".%"PRIu16" (minimum: %"PRIu16".%"PRIu16")",
                    qsv_software_version.Major, qsv_software_version.Minor,
                    HB_QSV_MINVERSION_MAJOR,    HB_QSV_MINVERSION_MINOR);
         }
-    
+
         if (hb_qsv_info_avc != NULL && hb_qsv_info_avc->available)
         {
             hb_log(" - H.264 encoder: yes");
@@ -1542,6 +1542,15 @@ int hb_qsv_profile_parse(hb_qsv_param_t *param, hb_qsv_info_t *info, const char
 
             case MFX_CODEC_HEVC:
                 profile = hb_triplet4key(hb_qsv_h265_profiles, profile_key);
+
+                /* HEVC10 supported starting from KBL/G6 */
+                if (profile->value == MFX_PROFILE_HEVC_MAIN10 &&
+                    qsv_hardware_generation(hb_get_cpu_platform()) < QSV_G6)
+                {
+                    hb_log("HEVC Main10 is not supported on this platform");
+                    profile = NULL;
+                }
+
                 break;
 
             default: