From ab39134445f79f1684a3d8ab814085e04ade27d7 Mon Sep 17 00:00:00 2001 From: handbrake Date: Tue, 18 Dec 2012 08:40:09 +0000 Subject: [PATCH] add hardware decoder support for H264(vob/m2ts), mpeg2(vob/m2ts), VC1(WMV3/m2ts) decoding modify the GUI for adding some user options about enable/disable OpenCL or GPU add some test case for tesing our modifications. git-svn-id: svn://svn.handbrake.fr/HandBrake/branches/opencl@5100 b64f7644-9d1e-0410-96f1-a4d463321fa5 --- contrib/ffmpeg/hb-libav-avformat.patch | 214 +++ contrib/ffmpeg/module.defs | 4 +- libhb/common.c | 31 +- libhb/common.h | 83 +- libhb/cropscale.c | 13 + libhb/cropscaleaccl.c | 262 ++++ libhb/decavcodecaccl.c | 1314 +++++++++++++++++ libhb/dxva2api.c | 36 + libhb/dxva2api.h | 822 +++++++++++ libhb/hb.c | 3 +- libhb/hbffmpeg.h | 1 + libhb/internal.h | 4 +- libhb/module.defs | 7 + libhb/oclnv12toyuv.c | 222 +++ libhb/oclnv12toyuv.h | 29 + libhb/oclscale.c | 301 ++++ libhb/openclkernels.h | 122 ++ libhb/openclwrapper.c | 934 ++++++++++++ libhb/openclwrapper.h | 79 + libhb/stream.c | 40 +- libhb/vadxva2.c | 827 +++++++++++ libhb/vadxva2.h | 194 +++ libhb/work.c | 30 +- make/configure.py | 5 +- test/module.defs | 4 + test/test case/handbrake_test.txt | 80 + test/test case/readme.txt | 43 + test/test case/run_handbrake.py | 126 ++ test/test case/sample_testresults.csv | 52 + test/test.c | 54 +- .../HandBrake.ApplicationServices.csproj | 1 + .../Model/EncodeTask.cs | 12 + .../Parsing/Title.cs | 15 + .../Utilities/GeneralUtilities.cs | 2 + .../Utilities/InteropModelCreator.cs | 2 + .../Utilities/PlistUtility.cs | 2 + .../Utilities/QueryGeneratorUtility.cs | 6 + .../Utilities/QueryParserUtility.cs | 4 + .../Utilities/SystemInfo.cs | 36 +- .../HandBrakeInterop/HandBrakeInstance.cs | 2 + .../HandBrakeInterop/HbLib/hb_job_s.cs | 6 + .../Model/Encoding/EncodingProfile.cs | 4 + .../HandBrakeWPF/ViewModels/MainViewModel.cs | 61 + win/CS/HandBrakeWPF/Views/MainView.xaml | 18 + 44 files changed, 6090 insertions(+), 17 deletions(-) create mode 100644 contrib/ffmpeg/hb-libav-avformat.patch create mode 100644 libhb/cropscaleaccl.c create mode 100644 libhb/decavcodecaccl.c create mode 100644 libhb/dxva2api.c create mode 100644 libhb/dxva2api.h create mode 100644 libhb/oclnv12toyuv.c create mode 100644 libhb/oclnv12toyuv.h create mode 100644 libhb/oclscale.c create mode 100644 libhb/openclkernels.h create mode 100644 libhb/openclwrapper.c create mode 100644 libhb/openclwrapper.h create mode 100644 libhb/vadxva2.c create mode 100644 libhb/vadxva2.h create mode 100644 test/test case/handbrake_test.txt create mode 100644 test/test case/readme.txt create mode 100644 test/test case/run_handbrake.py create mode 100644 test/test case/sample_testresults.csv diff --git a/contrib/ffmpeg/hb-libav-avformat.patch b/contrib/ffmpeg/hb-libav-avformat.patch new file mode 100644 index 000000000..d9f978f73 --- /dev/null +++ b/contrib/ffmpeg/hb-libav-avformat.patch @@ -0,0 +1,214 @@ +diff -Ncr hb-libav-orig/libavformat/avformat.h hb-libav-opt/libavformat/avformat.h +*** hb-libav-orig/libavformat/avformat.h Sat Jul 28 23:29:46 2012 +--- hb-libav-opt/libavformat/avformat.h Fri Nov 23 09:54:19 2012 +*************** +*** 721,726 **** +--- 721,730 ---- + int64_t codec_info_duration; + int nb_decoded_frames; + int found_decoder; ++ int64_t fps_first_dts; ++ int fps_first_dts_idx; ++ int64_t fps_last_dts; ++ int fps_last_dts_idx; + } *info; + + int pts_wrap_bits; /**< number of bits in pts (used for wrapping control) */ +diff -Ncr hb-libav-orig/libavformat/utils.c hb-libav-opt/libavformat/utils.c +*** hb-libav-orig/libavformat/utils.c Sat Jul 28 23:29:46 2012 +--- hb-libav-opt/libavformat/utils.c Fri Nov 23 09:58:45 2012 +*************** +*** 2252,2258 **** + int orig_nb_streams = ic->nb_streams; // new streams might appear, no options for those + + for(i=0;inb_streams;i++) { +! AVCodec *codec; + AVDictionary *thread_opt = NULL; + st = ic->streams[i]; + +--- 2252,2258 ---- + int orig_nb_streams = ic->nb_streams; // new streams might appear, no options for those + + for(i=0;inb_streams;i++) { +! const AVCodec *codec; + AVDictionary *thread_opt = NULL; + st = ic->streams[i]; + +*************** +*** 2287,2293 **** +--- 2287,2297 ---- + } + + for (i=0; inb_streams; i++) { ++ #if FF_API_R_FRAME_RATE + ic->streams[i]->info->last_dts = AV_NOPTS_VALUE; ++ #endif ++ ic->streams[i]->info->fps_first_dts = AV_NOPTS_VALUE; ++ ic->streams[i]->info->fps_last_dts = AV_NOPTS_VALUE; + } + + count = 0; +*************** +*** 2314,2321 **** + if (ic->fps_probe_size >= 0) + fps_analyze_framecount = ic->fps_probe_size; + /* variable fps and no guess at the real fps */ +! if( tb_unreliable(st->codec) && !(st->r_frame_rate.num && st->avg_frame_rate.num) +! && st->info->duration_count < fps_analyze_framecount + && st->codec->codec_type == AVMEDIA_TYPE_VIDEO) + break; + if(st->parser && st->parser->parser->split && !st->codec->extradata) +--- 2318,2325 ---- + if (ic->fps_probe_size >= 0) + fps_analyze_framecount = ic->fps_probe_size; + /* variable fps and no guess at the real fps */ +! if( tb_unreliable(st->codec) && !st->avg_frame_rate.num +! && st->codec_info_nb_frames < fps_analyze_framecount + && st->codec->codec_type == AVMEDIA_TYPE_VIDEO) + break; + if(st->parser && st->parser->parser->split && !st->codec->extradata) +*************** +*** 2383,2402 **** + break; + } + +! pkt= add_to_pktbuf(&ic->packet_buffer, &pkt1, &ic->packet_buffer_end); + if ((ret = av_dup_packet(pkt)) < 0) + goto find_stream_info_err; + + read_size += pkt->size; + + st = ic->streams[pkt->stream_index]; +! if (st->codec_info_nb_frames>1) { +! if (av_rescale_q(st->info->codec_info_duration, st->time_base, AV_TIME_BASE_Q) >= ic->max_analyze_duration) { + av_log(ic, AV_LOG_WARNING, "max_analyze_duration reached\n"); + break; + } +- st->info->codec_info_duration += pkt->duration; + } + { + int64_t last = st->info->last_dts; + +--- 2387,2444 ---- + break; + } + +! if (ic->flags & 0x0040) { +! pkt = &pkt1; +! } else { +! pkt = add_to_pktbuf(&ic->packet_buffer, &pkt1, +! &ic->packet_buffer_end); + if ((ret = av_dup_packet(pkt)) < 0) + goto find_stream_info_err; ++ } + + read_size += pkt->size; + + st = ic->streams[pkt->stream_index]; +! if (pkt->dts != AV_NOPTS_VALUE && st->codec_info_nb_frames > 1) { +! /* check for non-increasing dts */ +! if (st->info->fps_last_dts != AV_NOPTS_VALUE && +! st->info->fps_last_dts >= pkt->dts) { +! av_log(ic, AV_LOG_WARNING, "Non-increasing DTS in stream %d: " +! "packet %d with DTS %"PRId64", packet %d with DTS " +! "%"PRId64"\n", st->index, st->info->fps_last_dts_idx, +! st->info->fps_last_dts, st->codec_info_nb_frames, pkt->dts); +! st->info->fps_first_dts = st->info->fps_last_dts = AV_NOPTS_VALUE; +! } +! /* check for a discontinuity in dts - if the difference in dts +! * is more than 1000 times the average packet duration in the sequence, +! * we treat it as a discontinuity */ +! if (st->info->fps_last_dts != AV_NOPTS_VALUE && +! st->info->fps_last_dts_idx > st->info->fps_first_dts_idx && +! (pkt->dts - st->info->fps_last_dts) / 1000 > +! (st->info->fps_last_dts - st->info->fps_first_dts) / (st->info->fps_last_dts_idx - st->info->fps_first_dts_idx)) { +! av_log(ic, AV_LOG_WARNING, "DTS discontinuity in stream %d: " +! "packet %d with DTS %"PRId64", packet %d with DTS " +! "%"PRId64"\n", st->index, st->info->fps_last_dts_idx, +! st->info->fps_last_dts, st->codec_info_nb_frames, pkt->dts); +! st->info->fps_first_dts = st->info->fps_last_dts = AV_NOPTS_VALUE; +! } +! +! /* update stored dts values */ +! if (st->info->fps_first_dts == AV_NOPTS_VALUE) { +! st->info->fps_first_dts = pkt->dts; +! st->info->fps_first_dts_idx = st->codec_info_nb_frames; +! } +! st->info->fps_last_dts = pkt->dts; +! st->info->fps_last_dts_idx = st->codec_info_nb_frames; +! +! /* check max_analyze_duration */ +! if (av_rescale_q(pkt->dts - st->info->fps_first_dts, st->time_base, +! AV_TIME_BASE_Q) >= ic->max_analyze_duration) { + av_log(ic, AV_LOG_WARNING, "max_analyze_duration reached\n"); + break; + } + } ++ #if FF_API_R_FRAME_RATE + { + int64_t last = st->info->last_dts; + +*************** +*** 2420,2425 **** +--- 2462,2468 ---- + if (last == AV_NOPTS_VALUE || st->info->duration_count <= 1) + st->info->last_dts = pkt->dts; + } ++ #endif + if(st->parser && st->parser->parser->split && !st->codec->extradata){ + int i= st->parser->parser->split(st->codec, pkt->data, pkt->size); + if (i > 0 && i < FF_MAX_EXTRADATA_SIZE) { +*************** +*** 2455,2464 **** + for(i=0;inb_streams;i++) { + st = ic->streams[i]; + if (st->codec->codec_type == AVMEDIA_TYPE_VIDEO) { +! if (st->codec_info_nb_frames>2 && !st->avg_frame_rate.num && st->info->codec_info_duration) + av_reduce(&st->avg_frame_rate.num, &st->avg_frame_rate.den, +! (st->codec_info_nb_frames-2)*(int64_t)st->time_base.den, +! st->info->codec_info_duration*(int64_t)st->time_base.num, 60000); + // the check for tb_unreliable() is not completely correct, since this is not about handling + // a unreliable/inexact time base, but a time base that is finer than necessary, as e.g. + // ipmovie.c produces. +--- 2498,2528 ---- + for(i=0;inb_streams;i++) { + st = ic->streams[i]; + if (st->codec->codec_type == AVMEDIA_TYPE_VIDEO) { +! if (!st->avg_frame_rate.num && st->info->fps_last_dts != st->info->fps_first_dts) { +! int64_t delta_dts = st->info->fps_last_dts - st->info->fps_first_dts; +! int delta_packets = st->info->fps_last_dts_idx - st->info->fps_first_dts_idx; +! int best_fps = 0; +! double best_error = 0.01; + av_reduce(&st->avg_frame_rate.num, &st->avg_frame_rate.den, +! delta_packets*(int64_t)st->time_base.den, +! delta_dts*(int64_t)st->time_base.num, 60000); +! +! /* round guessed framerate to a "standard" framerate if it's +! * within 1% of the original estimate*/ +! for (j = 1; j < MAX_STD_TIMEBASES; j++) { +! AVRational std_fps = { get_std_framerate(j), 12*1001 }; +! double error = fabs(av_q2d(st->avg_frame_rate) / av_q2d(std_fps) - 1); +! if (error < best_error) { +! best_error = error; +! best_fps = std_fps.num; +! } +! } +! if (best_fps) { +! av_reduce(&st->avg_frame_rate.num, &st->avg_frame_rate.den, +! best_fps, 12*1001, INT_MAX); +! } +! } +! #if FF_API_R_FRAME_RATE + // the check for tb_unreliable() is not completely correct, since this is not about handling + // a unreliable/inexact time base, but a time base that is finer than necessary, as e.g. + // ipmovie.c produces. +*************** +*** 2481,2486 **** +--- 2545,2551 ---- + if (num && (!st->r_frame_rate.num || (double)num/(12*1001) < 1.01 * av_q2d(st->r_frame_rate))) + av_reduce(&st->r_frame_rate.num, &st->r_frame_rate.den, num, 12*1001, INT_MAX); + } ++ #endif + }else if(st->codec->codec_type == AVMEDIA_TYPE_AUDIO) { + if(!st->codec->bits_per_coded_sample) + st->codec->bits_per_coded_sample= av_get_bits_per_sample(st->codec->codec_id); diff --git a/contrib/ffmpeg/module.defs b/contrib/ffmpeg/module.defs index 54ac675f7..26365ecba 100644 --- a/contrib/ffmpeg/module.defs +++ b/contrib/ffmpeg/module.defs @@ -17,8 +17,8 @@ FFMPEG.CONFIGURE.extra = \ --disable-avfilter \ --disable-muxers \ --disable-network \ - --disable-hwaccels \ - --disable-encoders \ + --disable-vaapi \ + --enable-dxva2 \ --enable-encoder=flac \ --enable-encoder=ac3 \ --enable-encoder=aac \ diff --git a/libhb/common.c b/libhb/common.c index 34c97da54..8ff5605b2 100644 --- a/libhb/common.c +++ b/libhb/common.c @@ -1939,7 +1939,11 @@ hb_filter_object_t * hb_filter_init( int filter_id ) case HB_FILTER_CROP_SCALE: filter = &hb_filter_crop_scale; break; - +#ifdef USE_OPENCL + case HB_FILTER_CROP_SCALE_ACCL: + filter = &hb_filter_crop_scale_accl; + break; +#endif case HB_FILTER_ROTATE: filter = &hb_filter_rotate; break; @@ -2903,3 +2907,28 @@ void hb_hexdump( hb_debug_level_t level, const char * label, const uint8_t * dat hb_deep_log( level, " %-50s%20s", line, ascii ); } } + +int hb_use_dxva( hb_title_t * title ) +{ + return ( (title->video_codec_param == CODEC_ID_MPEG2VIDEO + || title->video_codec_param == CODEC_ID_H264 + || title->video_codec_param == CODEC_ID_VC1 + || title->video_codec_param == CODEC_ID_WMV3 + || title->video_codec_param == CODEC_ID_MPEG4 ) + && title->opaque_priv ); +} +int hb_get_gui_info(hb_gui_t * gui, int option) +{ + if ( option == 1 ) + return gui->use_uvd; + else if ( option == 2 ) + return gui->use_opencl; + else + return gui->title_scan; +} +void hb_set_gui_info(hb_gui_t *gui, int uvd, int opencl, int titlescan) +{ + gui->use_uvd = uvd; + gui->use_opencl = opencl; + gui->title_scan = titlescan; +} diff --git a/libhb/common.h b/libhb/common.h index 796c3df38..f4c6d283a 100644 --- a/libhb/common.h +++ b/libhb/common.h @@ -20,7 +20,9 @@ #include #include #include - +#ifdef USE_OPENCL +#include +#endif /* * It seems WinXP doesn't align the stack of new threads to 16 bytes. * To prevent crashes in SSE functions, we need to force stack alignement @@ -92,6 +94,7 @@ typedef struct hb_filter_object_s hb_filter_object_t; typedef struct hb_buffer_s hb_buffer_t; typedef struct hb_fifo_s hb_fifo_t; typedef struct hb_lock_s hb_lock_t; +typedef struct hb_gui_s hb_gui_t; #include "ports.h" #ifdef __LIBHB__ @@ -140,6 +143,16 @@ int hb_subtitle_can_burn( int source ); int hb_subtitle_can_pass( int source, int mux ); hb_attachment_t *hb_attachment_copy(const hb_attachment_t *src); +int hb_get_gui_info(hb_gui_t *gui, int option); +void hb_set_gui_info(hb_gui_t *gui, int uvd, int opencl, int titlescan); +struct hb_gui_s +{ + int use_uvd; + int use_opencl; + int title_scan; +}; +hb_gui_t hb_gui; + hb_list_t *hb_attachment_list_copy(const hb_list_t *src); void hb_attachment_close(hb_attachment_t **attachment); @@ -418,6 +431,8 @@ struct hb_job_s uint32_t frames_to_skip; // decode but discard this many frames // initially (for frame accurate positioning // to non-I frames). + int use_opencl;/* 0 is disable use of opencl. 1 is enable use of opencl */ + int use_uvd; #ifdef __LIBHB__ /* Internal data */ @@ -760,6 +775,8 @@ struct hb_title_s uint32_t flags; // set if video stream doesn't have IDR frames + int opencl_support; + int uvd_support; #define HBTF_NO_IDR (1 << 0) #define HBTF_SCAN_COMPLETE (1 << 0) }; @@ -923,6 +940,7 @@ extern hb_work_object_t hb_encca_aac; extern hb_work_object_t hb_encca_haac; extern hb_work_object_t hb_encavcodeca; extern hb_work_object_t hb_reader; +extern hb_work_object_t hb_decavcodecv_accl; #define HB_FILTER_OK 0 #define HB_FILTER_DELAY 1 @@ -930,6 +948,60 @@ extern hb_work_object_t hb_reader; #define HB_FILTER_DROP 3 #define HB_FILTER_DONE 4 +typedef struct hb_oclscale_s +{ +#ifdef USE_OPENCL + // input buffer for running horizontal kernel. output buffer of running horizontal kernel. outpuf buffer of running vertiacla kernel + cl_mem h_in_buf; + cl_mem h_out_buf; + cl_mem v_out_buf; + // horizontal coefficent buffer for Y U and V plane, hroizontal source index for Y,U and V plane + cl_mem h_coeff_y; + cl_mem h_coeff_uv; + cl_mem h_index_y; + cl_mem h_index_uv; + // vertical coefficent buffer for Y U and V plane, vertical source index for Y,U and V plane + cl_mem v_coeff_y; + cl_mem v_coeff_uv; + cl_mem v_index_y; + cl_mem v_index_uv; + // horizontal scaling and vertical scaling kernel handle + cl_kernel h_kernel; + cl_kernel v_kernel; + int use_ocl_mem; // 0 use host memory. 1 use gpu oclmem +#endif +} hb_oclscale_t; + +#ifdef USE_OPENCL +int hb_ocl_scale( cl_mem in_buf, uint8_t *in_data, uint8_t *out_data, int in_w, int in_h, int out_w, int out_h, hb_oclscale_t *os ); +int TestGPU(); +#endif + +#ifdef USE_OPENCL +int hb_use_dxva( hb_title_t * title ); +// create opencl buffer +#define CREATEBUF( out, flags, size )\ + {\ + out = clCreateBuffer( kenv->context, (flags), (size), NULL, &status );\ + if( CL_SUCCESS != status ) return -1;\ + } + +#define OCLCHECK( method, ... )\ + status = method( __VA_ARGS__ ); if( status != CL_SUCCESS ) {\ + printf( # method " error '%d'\n", status ); return status; } + +#define CL_FREE( buf )\ +{\ + if( buf )\ + {\ + { clReleaseMemObject( buf ); }\ + buf = NULL;\ + }\ +} + + +#endif + typedef struct hb_filter_init_s { hb_job_t * job; @@ -944,6 +1016,12 @@ typedef struct hb_filter_init_s int pfr_vrate_base; int pfr_vrate; int cfr; +#ifdef USE_OPENCL + int use_dxva; + int title_width; + int title_height; +#endif + } hb_filter_init_t; typedef struct hb_filter_info_s @@ -998,6 +1076,9 @@ enum HB_FILTER_DENOISE, HB_FILTER_RENDER_SUB, HB_FILTER_CROP_SCALE, +#ifdef USE_OPENCL + HB_FILTER_CROP_SCALE_ACCL, +#endif // Finally filters that don't care what order they are in, // except that they must be after the above filters HB_FILTER_ROTATE, diff --git a/libhb/cropscale.c b/libhb/cropscale.c index aa88f3a20..61aa85101 100644 --- a/libhb/cropscale.c +++ b/libhb/cropscale.c @@ -19,6 +19,7 @@ struct hb_filter_private_s int width_out; int height_out; int crop[4]; + int use_dxva; struct SwsContext * context; }; @@ -71,6 +72,9 @@ static int hb_crop_scale_init( hb_filter_object_t * filter, init->width = pv->width_out; init->height = pv->height_out; memcpy( init->crop, pv->crop, sizeof( int[4] ) ); +#ifdef USE_OPENCL + pv->use_dxva = init->use_dxva; +#endif return 0; } @@ -206,6 +210,15 @@ static int hb_crop_scale_work( hb_filter_object_t * filter, *buf_in = NULL; return HB_FILTER_OK; } +#ifdef USE_OPENCL + if ( pv->use_dxva && in->f.width == pv->width_out && in->f.height == pv->height_out ) + { + *buf_out = in; + *buf_in = NULL; + return HB_FILTER_OK; + } +#endif + *buf_out = crop_scale( pv, in ); return HB_FILTER_OK; diff --git a/libhb/cropscaleaccl.c b/libhb/cropscaleaccl.c new file mode 100644 index 000000000..775e71b5c --- /dev/null +++ b/libhb/cropscaleaccl.c @@ -0,0 +1,262 @@ +/* cropscaleaccl.c + + Copyright (c) 2003-2012 HandBrake Team + This file is part of the HandBrake source code + Homepage: . + It may be used under the terms of the GNU General Public License v2. + For full terms see the file COPYING file or visit http://www.gnu.org/licenses/gpl-2.0.html + + Authors: Peng Gao + Li Cao + */ +#ifdef USE_OPENCL +#include "hb.h" +#include "common.h" + +struct hb_filter_private_s +{ + int width_in; + int height_in; + int pix_fmt; + int pix_fmt_out; + int width_out; + int height_out; + int crop[4]; + int use_dxva; + + int title_width; + int title_height; + hb_oclscale_t * os; //ocl scaler handler + struct SwsContext * context; +}; + +static int hb_crop_scale_init( hb_filter_object_t * filter, + hb_filter_init_t * init ); + +static int hb_crop_scale_work( hb_filter_object_t * filter, + hb_buffer_t ** buf_in, + hb_buffer_t ** buf_out ); + +static int hb_crop_scale_info( hb_filter_object_t * filter, + hb_filter_info_t * info ); + +static void hb_crop_scale_close( hb_filter_object_t * filter ); + +hb_filter_object_t hb_filter_crop_scale_accl = +{ + .id = HB_FILTER_CROP_SCALE_ACCL, + .enforce_order = 1, + .name = "Hardware Acceleration Crop and Scale", + .settings = NULL, + .init = hb_crop_scale_init, + .work = hb_crop_scale_work, + .close = hb_crop_scale_close, + .info = hb_crop_scale_info, +}; + +static int hb_crop_scale_init( hb_filter_object_t * filter, + hb_filter_init_t * init ) +{ + filter->private_data = calloc( 1, sizeof(struct hb_filter_private_s) ); + hb_filter_private_t * pv = filter->private_data; + + // TODO: add pix format option to settings + pv->pix_fmt_out = init->pix_fmt; + pv->width_in = init->width; + pv->height_in = init->height; + pv->width_out = init->width; + pv->height_out = init->height; + pv->use_dxva = init->use_dxva; + pv->title_width = init->title_width; + pv->title_height = init->title_height; + memcpy( pv->crop, init->crop, sizeof( int[4] ) ); + if( filter->settings ) + { + sscanf( filter->settings, "%d:%d:%d:%d:%d:%d", + &pv->width_out, &pv->height_out, + &pv->crop[0], &pv->crop[1], &pv->crop[2], &pv->crop[3] ); + } + // Set init values so the next stage in the pipline + // knows what it will be getting + init->pix_fmt = pv->pix_fmt; + init->width = pv->width_out; + init->height = pv->height_out; + memcpy( init->crop, pv->crop, sizeof( int[4] ) ); + pv->os = ( hb_oclscale_t * )malloc( sizeof( hb_oclscale_t ) ); + memset( pv->os, 0, sizeof( hb_oclscale_t ) ); + return 0; +} + +static int hb_crop_scale_info( hb_filter_object_t * filter, + hb_filter_info_t * info ) +{ + hb_filter_private_t * pv = filter->private_data; + + if( !pv ) + return 0; + + // Set init values so the next stage in the pipline + // knows what it will be getting + memset( info, 0, sizeof( hb_filter_info_t ) ); + info->out.pix_fmt = pv->pix_fmt; + info->out.width = pv->width_out; + info->out.height = pv->height_out; + memcpy( info->out.crop, pv->crop, sizeof( int[4] ) ); + + int cropped_width = pv->title_width - ( pv->crop[2] + pv->crop[3] ); + int cropped_height = pv->title_height - ( pv->crop[0] + pv->crop[1] ); + + sprintf( info->human_readable_desc, + "source: %d * %d, crop (%d/%d/%d/%d): %d * %d, scale: %d * %d", + pv->title_width, pv->title_height, + pv->crop[0], pv->crop[1], pv->crop[2], pv->crop[3], + cropped_width, cropped_height, pv->width_out, pv->height_out ); + + return 0; +} + +static void hb_crop_scale_close( hb_filter_object_t * filter ) +{ + hb_filter_private_t * pv = filter->private_data; + + if( !pv ) + { + return; + } + if ( pv->os ) + { + CL_FREE( pv->os->h_in_buf ); + CL_FREE( pv->os->h_out_buf ); + CL_FREE( pv->os->v_out_buf ); + CL_FREE( pv->os->h_coeff_y ); + CL_FREE( pv->os->h_coeff_uv ); + CL_FREE( pv->os->h_index_y ); + CL_FREE( pv->os->h_index_uv ); + CL_FREE( pv->os->v_coeff_y ); + CL_FREE( pv->os->v_coeff_uv ); + CL_FREE( pv->os->v_index_y ); + CL_FREE( pv->os->v_index_uv ); + free( pv->os ); + } + free( pv ); + filter->private_data = NULL; +} + +static uint8_t *copy_plane( uint8_t *dst, uint8_t* src, int dstride, int sstride, int h ) +{ + if( dstride == sstride ) + { + memcpy( dst, src, dstride * h ); + return dst + dstride * h; + } + int lbytes = dstride <= sstride ? dstride : sstride; + while( --h >= 0 ) + { + memcpy( dst, src, lbytes ); + src += sstride; + dst += dstride; + } + return dst; +} + +static hb_buffer_t* crop_scale( hb_filter_private_t * pv, hb_buffer_t * in ) +{ + AVPicture pic_in; + AVPicture pic_out; + AVPicture pic_crop; + hb_buffer_t * out; + out = hb_video_buffer_init( pv->width_out, pv->height_out ); + + hb_avpicture_fill( &pic_in, in ); + hb_avpicture_fill( &pic_out, out ); + + // Crop; this alters the pointer to the data to point to the + // correct place for cropped frame + av_picture_crop( &pic_crop, &pic_in, in->f.fmt, + pv->crop[0], pv->crop[2] ); + + + int w = in->f.width - ( pv->crop[2] + pv->crop[3] ); + int h = in->f.height - ( pv->crop[0] + pv->crop[1] ); + uint8_t *tmp_in = malloc( w * h * 3 / 2 ); + uint8_t *tmp_out = malloc( pv->width_out * pv->height_out * 3 / 2 ); + if( pic_crop.data[0] || pic_crop.data[1] || pic_crop.data[2] || pic_crop.data[3] ) + { + int i; + for( i = 0; i< h>>1; i++ ) + { + memcpy( tmp_in + ( ( i<<1 ) + 0 ) * w, pic_crop.data[0]+ ( ( i<<1 ) + 0 ) * pic_crop.linesize[0], w ); + memcpy( tmp_in + ( ( i<<1 ) + 1 ) * w, pic_crop.data[0]+ ( ( i<<1 ) + 1 ) * pic_crop.linesize[0], w ); + memcpy( tmp_in + ( w * h ) + i * ( w>>1 ), pic_crop.data[1] + i * pic_crop.linesize[1], w >> 1 ); + memcpy( tmp_in + ( w * h ) + ( ( w * h )>>2 ) + i * ( w>>1 ), pic_crop.data[2] + i * pic_crop.linesize[2], w >> 1 ); + } + } + else + { + memcpy( tmp_in, pic_crop.data[0], w * h ); + memcpy( tmp_in + w * h, pic_crop.data[1], (w*h)>>2 ); + memcpy( tmp_in + w * h + ((w*h)>>2), pic_crop.data[2], (w*h)>>2 ); + } + hb_ocl_scale( NULL, tmp_in, tmp_out, w, h, out->f.width, out->f.height, pv->os ); + w = out->plane[0].stride; + h = out->plane[0].height; + uint8_t *dst = out->plane[0].data; + copy_plane( dst, tmp_out, w, pv->width_out, h ); + w = out->plane[1].stride; + h = out->plane[1].height; + dst = out->plane[1].data; + copy_plane( dst, tmp_out + pv->width_out * pv->height_out, w, pv->width_out>>1, h ); + w = out->plane[2].stride; + h = out->plane[2].height; + dst = out->plane[2].data; + copy_plane( dst, tmp_out + pv->width_out * pv->height_out +( ( pv->width_out * pv->height_out )>>2 ), w, pv->width_out>>1, h ); + free( tmp_out ); + free( tmp_in ); + out->s = in->s; + hb_buffer_move_subs( out, in ); + return out; +} + +static int hb_crop_scale_work( hb_filter_object_t * filter, + hb_buffer_t ** buf_in, + hb_buffer_t ** buf_out ) +{ + hb_filter_private_t * pv = filter->private_data; + hb_buffer_t * in = *buf_in; + + if( in->size <= 0 ) + { + *buf_out = in; + *buf_in = NULL; + return HB_FILTER_DONE; + } + + if( !pv ) + { + *buf_out = in; + *buf_in = NULL; + return HB_FILTER_OK; + } + + // If width or height were not set, set them now based on the + // input width & height + if( pv->width_out <= 0 || pv->height_out <= 0 ) + { + pv->width_out = in->f.width - (pv->crop[2] + pv->crop[3]); + pv->height_out = in->f.height - (pv->crop[0] + pv->crop[1]); + } + if( ( in->f.fmt == pv->pix_fmt_out && + !pv->crop[0] && !pv->crop[1] && !pv->crop[2] && !pv->crop[3] && + in->f.width == pv->width_out && in->f.height == pv->height_out ) || + ( pv->use_dxva && in->f.width == pv->width_out && in->f.height == pv->height_out ) ) + { + *buf_out = in; + *buf_in = NULL; + return HB_FILTER_OK; + } + *buf_out = crop_scale( pv, in ); + + + return HB_FILTER_OK; +} +#endif diff --git a/libhb/decavcodecaccl.c b/libhb/decavcodecaccl.c new file mode 100644 index 000000000..5b19ed688 --- /dev/null +++ b/libhb/decavcodecaccl.c @@ -0,0 +1,1314 @@ +/* decavcodecaccl.c + + Copyright (c) 2003-2012 HandBrake Team + This file is part of the HandBrake source code + Homepage: . + It may be used under the terms of the GNU General Public License v2. + For full terms see the file COPYING file or visit http://www.gnu.org/licenses/gpl-2.0.html + + Authors: Peng Gao + Li Cao + + */ + +/* This module is Handbrake's interface to the ffmpeg decoder library + (libavcodec & small parts of libavformat). It contains four Handbrake + "work objects": + + decavcodeca connects HB to an ffmpeg audio decoder + decavcodecvaccl connects HB to an ffmpeg video decoder + + (Two different routines are needed because the ffmpeg library + has different decoder calling conventions for audio & video. + These work objects are self-contained & follow all + of HB's conventions for a decoder module. They can be used like + any other HB decoder (deca52, decmpeg2, etc.). + + These decoders handle 2 kinds of input. Streams that are demuxed + by HandBrake and streams that are demuxed by libavformat. In the + case of streams that are demuxed by HandBrake, there is an extra + parse step required that happens in decodeVideo and decavcodecaWork. + In the case of streams that are demuxed by libavformat, there is context + information that we need from the libavformat. This information is + propagated from hb_stream_open to these decoders through title->opaque_priv. + + A consequence of the above is that the streams that are demuxed by HandBrake + *can't* use information from the AVStream because there isn't one - they + get their data from either the dvd reader or the mpeg reader, not the ffmpeg + stream reader. That means that they have to make up for deficiencies in the + AVCodecContext info by using stuff kept in the HB "title" struct. It + also means that ffmpeg codecs that randomly scatter state needed by + the decoder across both the AVCodecContext & the AVStream (e.g., the + VC1 decoder) can't easily be used by the HB mpeg stream reader. + */ +#define HAVE_DXVA2 +#ifdef HAVE_DXVA2 +#include "hb.h" +#include "hbffmpeg.h" +#include "vadxva2.h" +#include "audio_remap.h" +#include "audio_resample.h" + +static void compute_frame_duration( hb_work_private_t *pv ); +static void flushDelayQueue( hb_work_private_t *pv ); + +#define HEAP_SIZE 8 +typedef struct { + // there are nheap items on the heap indexed 1..nheap (i.e., top of + // heap is 1). The 0th slot is unused - a marker is put there to check + // for overwrite errs. + int64_t h[HEAP_SIZE+1]; + int nheap; +} pts_heap_t; + +struct hb_work_private_s +{ + hb_job_t *job; + hb_title_t *title; + AVCodecContext *context; + AVCodecParserContext *parser; + int threads; + int video_codec_opened; + hb_list_t *list; + double duration; // frame duration (for video) + double field_duration; // field duration (for video) + int frame_duration_set; // Indicates valid timing was found in stream + double pts_next; // next pts we expect to generate + int64_t chap_time; // time of next chap mark (if new_chap != 0) + int new_chap; // output chapter mark pending + uint32_t nframes; + uint32_t ndrops; + uint32_t decode_errors; + int brokenByMicrosoft; // video stream may contain packed b-frames + hb_buffer_t* delayq[HEAP_SIZE]; + int queue_primed; + pts_heap_t pts_heap; + void* buffer; + struct SwsContext *sws_context; // if we have to rescale or convert color space + int sws_width; + int sws_height; + int sws_pix_fmt; + int cadence[12]; + int wait_for_keyframe; + hb_va_dxva2_t * dxva2; + uint8_t *dst_frame; + hb_oclscale_t *os; + hb_audio_resample_t *resample; +}; + +static hb_buffer_t *link_buf_list( hb_work_private_t *pv ); + + +static int64_t heap_pop( pts_heap_t *heap ) +{ + int64_t result; + + if( heap->nheap <= 0 ) + { + return -1; + } + + // return the top of the heap then put the bottom element on top, + // decrease the heap size by one & rebalence the heap. + result = heap->h[1]; + + int64_t v = heap->h[heap->nheap--]; + int parent = 1; + int child = parent << 1; + while( child <= heap->nheap ) + { + // find the smallest of the two children of parent + if( child < heap->nheap && heap->h[child] > heap->h[child+1] ) + ++child; + + if( v <= heap->h[child] ) + // new item is smaller than either child so it's the new parent. + break; + + // smallest child is smaller than new item so move it up then + // check its children. + int64_t hp = heap->h[child]; + heap->h[parent] = hp; + parent = child; + child = parent << 1; + } + heap->h[parent] = v; + return result; +} + +static void heap_push( pts_heap_t *heap, int64_t v ) +{ + if( heap->nheap < HEAP_SIZE ) + { + ++heap->nheap; + } + + // stick the new value on the bottom of the heap then bubble it + // up to its correct spot. + int child = heap->nheap; + while( child > 1 ) { + int parent = child >> 1; + if( heap->h[parent] <= v ) + break; + // move parent down + int64_t hp = heap->h[parent]; + heap->h[child] = hp; + child = parent; + } + heap->h[child] = v; +} + +/*********************************************************************** + * Close + *********************************************************************** + * + **********************************************************************/ +static void closePrivData( hb_work_private_t ** ppv ) +{ + hb_work_private_t * pv = *ppv; + + if( pv ) + { + flushDelayQueue( pv ); + + if( pv->job && pv->context && pv->context->codec ) + { + hb_log( "%s-decoder done: %u frames, %u decoder errors, %u drops", + pv->context->codec->name, pv->nframes, pv->decode_errors, + pv->ndrops ); + } + if( pv->sws_context ) + { + sws_freeContext( pv->sws_context ); + } + if( pv->parser ) + { + av_parser_close( pv->parser ); + } + if( pv->context && pv->context->codec ) + { + hb_avcodec_close( pv->context ); + } + if( pv->context ) + { + av_free( pv->context ); + } + if( pv->list ) + { + hb_list_empty( &pv->list ); + } + + hb_audio_resample_free( pv->resample ); + if ( pv->os ) + { +#ifdef USE_OPENCL + CL_FREE( pv->os->h_in_buf ); + CL_FREE( pv->os->h_out_buf ); + CL_FREE( pv->os->v_out_buf ); + CL_FREE( pv->os->h_coeff_y ); + CL_FREE( pv->os->h_coeff_uv ); + CL_FREE( pv->os->h_index_y ); + CL_FREE( pv->os->h_index_uv ); + CL_FREE( pv->os->v_coeff_y ); + CL_FREE( pv->os->v_coeff_uv ); + CL_FREE( pv->os->v_index_y ); + CL_FREE( pv->os->v_index_uv ); +#endif + free( pv->os ); + } + if ( pv->dxva2 ) + { + +#ifdef USE_OPENCL + CL_FREE( pv->dxva2->cl_mem_nv12 ); +#endif + hb_va_close( pv->dxva2 ); + } + free( pv ); + } + *ppv = NULL; +} + +/* ------------------------------------------------------------- + * General purpose video decoder using libavcodec + */ + +static uint8_t *copy_plane( uint8_t *dst, uint8_t* src, int dstride, int sstride, + int h ) +{ + if( dstride == sstride ) + { + memcpy( dst, src, dstride * h ); + return dst + dstride * h; + } + int lbytes = dstride <= sstride ? dstride : sstride; + while( --h >= 0 ) + { + memcpy( dst, src, lbytes ); + src += sstride; + dst += dstride; + } + return dst; +} + +// copy one video frame into an HB buf. If the frame isn't in our color space +// or at least one of its dimensions is odd, use sws_scale to convert/rescale it. +// Otherwise just copy the bits. +static hb_buffer_t *copy_frame( hb_work_private_t *pv, AVFrame *frame ) +{ + AVCodecContext *context = pv->context; + + int w, h; + if( !pv->job ) + { + // HandBrake's video pipeline uses yuv420 color. This means all + // dimensions must be even. So we must adjust the dimensions + // of incoming video if not even. + w = context->width & ~1; + h = context->height & ~1; + } + else + { + w = pv->job->title->width; + h = pv->job->title->height; + } + if( pv->dxva2 && pv->job ) + { + hb_buffer_t *buf; + int ww, hh; + if( (w > pv->job->width || h > pv->job->height) ) + { + buf = hb_video_buffer_init( pv->job->width, pv->job->height ); + ww = pv->job->width; + hh = pv->job->height; + } + else + { + buf = hb_video_buffer_init( w, h ); + ww = w; + hh = h; + } + if( !pv->dst_frame ) + { + pv->dst_frame = malloc( ww * hh * 3 / 2 ); + } + if( hb_va_extract( pv->dxva2, pv->dst_frame, frame, pv->job->width, pv->job->height, pv->job->title->crop, pv->os ) == HB_WORK_ERROR ) + { + hb_log( "hb_va_Extract failed!!!!!!" ); + } + + w = buf->plane[0].stride; + h = buf->plane[0].height; + uint8_t *dst = buf->plane[0].data; + copy_plane( dst, pv->dst_frame, w, ww, h ); + w = buf->plane[1].stride; + h = buf->plane[1].height; + dst = buf->plane[1].data; + copy_plane( dst, pv->dst_frame + ww * hh, w, ww>>1, h ); + w = buf->plane[2].stride; + h = buf->plane[2].height; + dst = buf->plane[2].data; + copy_plane( dst, pv->dst_frame + ww * hh +( ( ww * hh )>>2 ), w, ww>>1, h ); + return buf; + } + else + { + hb_buffer_t *buf = hb_video_buffer_init( w, h ); + uint8_t *dst = buf->data; + if( context->pix_fmt != PIX_FMT_YUV420P || w != context->width || + h != context->height ) + { + // have to convert to our internal color space and/or rescale + AVPicture dstpic; + hb_avpicture_fill( &dstpic, buf ); + if( !pv->sws_context || + pv->sws_width != context->width || + pv->sws_height != context->height || + pv->sws_pix_fmt != context->pix_fmt ) + { + if( pv->sws_context ) + sws_freeContext( pv->sws_context ); + pv->sws_context = hb_sws_get_context( + context->width, context->height, context->pix_fmt, + w, h, PIX_FMT_YUV420P, + SWS_LANCZOS|SWS_ACCURATE_RND ); + pv->sws_width = context->width; + pv->sws_height = context->height; + pv->sws_pix_fmt = context->pix_fmt; + } + sws_scale( pv->sws_context, (const uint8_t*const*)frame->data, + frame->linesize, 0, context->height, + dstpic.data, dstpic.linesize ); + } + else + { + w = buf->plane[0].stride; + h = buf->plane[0].height; + dst = buf->plane[0].data; + copy_plane( dst, frame->data[0], w, frame->linesize[0], h ); + w = buf->plane[1].stride; + h = buf->plane[1].height; + dst = buf->plane[1].data; + copy_plane( dst, frame->data[1], w, frame->linesize[1], h ); + w = buf->plane[2].stride; + h = buf->plane[2].height; + dst = buf->plane[2].data; + copy_plane( dst, frame->data[2], w, frame->linesize[2], h ); + } + return buf; + } + +} + + +static int get_frame_buf( AVCodecContext *context, AVFrame *frame ) +{ + int result = HB_WORK_ERROR; + hb_work_private_t *pv = (hb_work_private_t*)context->opaque; + if( pv->dxva2 ) + { + result = hb_va_get_frame_buf( pv->dxva2, context, frame ); + } + if( result==HB_WORK_ERROR ) + return avcodec_default_get_buffer( context, frame ); + return 0; +} + +static int reget_frame_buf( AVCodecContext *context, AVFrame *frame ) +{ + return avcodec_default_reget_buffer( context, frame ); +} + +static void log_chapter( hb_work_private_t *pv, int chap_num, int64_t pts ) +{ + hb_chapter_t *c; + + if( !pv->job ) + return; + + c = hb_list_item( pv->job->title->list_chapter, chap_num - 1 ); + if( c && c->title ) + { + hb_log( "%s: \"%s\" (%d) at frame %u time %" PRId64, + pv->context->codec->name, c->title, chap_num, pv->nframes, pts ); + } + else + { + hb_log( "%s: Chapter %d at frame %u time %" PRId64, + pv->context->codec->name, chap_num, pv->nframes, pts ); + } +} + +static void flushDelayQueue( hb_work_private_t *pv ) +{ + hb_buffer_t *buf; + int slot = pv->queue_primed ? pv->nframes & (HEAP_SIZE-1) : 0; + + // flush all the video packets left on our timestamp-reordering delay q + while( ( buf = pv->delayq[slot] ) != NULL ) + { + buf->s.start = heap_pop( &pv->pts_heap ); + hb_list_add( pv->list, buf ); + pv->delayq[slot] = NULL; + slot = ( slot + 1 ) & (HEAP_SIZE-1); + } +} + +#define TOP_FIRST PIC_FLAG_TOP_FIELD_FIRST +#define PROGRESSIVE PIC_FLAG_PROGRESSIVE_FRAME +#define REPEAT_FIRST PIC_FLAG_REPEAT_FIRST_FIELD +#define TB 8 +#define BT 16 +#define BT_PROG 32 +#define BTB_PROG 64 +#define TB_PROG 128 +#define TBT_PROG 256 + +static void checkCadence( int * cadence, uint16_t flags, int64_t start ) +{ + /* Rotate the cadence tracking. */ + int i = 0; + for( i = 11; i > 0; i-- ) + { + cadence[i] = cadence[i-1]; + } + + if( !(flags & PROGRESSIVE) && !(flags & TOP_FIRST) ) + { + /* Not progressive, not top first... + That means it's probably bottom + first, 2 fields displayed. + */ + //hb_log("MPEG2 Flag: Bottom field first, 2 fields displayed."); + cadence[0] = BT; + } + else if( !(flags & PROGRESSIVE) && (flags & TOP_FIRST) ) + { + /* Not progressive, top is first, + Two fields displayed. + */ + //hb_log("MPEG2 Flag: Top field first, 2 fields displayed."); + cadence[0] = TB; + } + else if( (flags & PROGRESSIVE) && !(flags & TOP_FIRST) && !( flags & REPEAT_FIRST ) ) + { + /* Progressive, but noting else. + That means Bottom first, + 2 fields displayed. + */ + //hb_log("MPEG2 Flag: Progressive. Bottom field first, 2 fields displayed."); + cadence[0] = BT_PROG; + } + else if( (flags & PROGRESSIVE) && !(flags & TOP_FIRST) && ( flags & REPEAT_FIRST ) ) + { + /* Progressive, and repeat. . + That means Bottom first, + 3 fields displayed. + */ + //hb_log("MPEG2 Flag: Progressive repeat. Bottom field first, 3 fields displayed."); + cadence[0] = BTB_PROG; + } + else if( (flags & PROGRESSIVE) && (flags & TOP_FIRST) && !( flags & REPEAT_FIRST ) ) + { + /* Progressive, top first. + That means top first, + 2 fields displayed. + */ + //hb_log("MPEG2 Flag: Progressive. Top field first, 2 fields displayed."); + cadence[0] = TB_PROG; + } + else if( (flags & PROGRESSIVE) && (flags & TOP_FIRST) && ( flags & REPEAT_FIRST ) ) + { + /* Progressive, top, repeat. + That means top first, + 3 fields displayed. + */ + //hb_log("MPEG2 Flag: Progressive repeat. Top field first, 3 fields displayed."); + cadence[0] = TBT_PROG; + } + + if( (cadence[2] <= TB) && (cadence[1] <= TB) && (cadence[0] > TB) && (cadence[11]) ) + hb_log( "%fs: Video -> Film", (float)start / 90000 ); + if( (cadence[2] > TB) && (cadence[1] <= TB) && (cadence[0] <= TB) && (cadence[11]) ) + hb_log( "%fs: Film -> Video", (float)start / 90000 ); +} + +/* + * Decodes a video frame from the specified raw packet data + * ('data', 'size', 'sequence'). + * The output of this function is stored in 'pv->list', which contains a list + * of zero or more decoded packets. + * + * The returned packets are guaranteed to have their timestamps in the correct + * order, even if the original packets decoded by libavcodec have misordered + * timestamps, due to the use of 'packed B-frames'. + * + * Internally the set of decoded packets may be buffered in 'pv->delayq' + * until enough packets have been decoded so that the timestamps can be + * correctly rewritten, if this is necessary. + */ +static int decodeFrame( hb_work_object_t *w, uint8_t *data, int size, int sequence, int64_t pts, int64_t dts, uint8_t frametype ) +{ + + hb_work_private_t *pv = w->private_data; + int got_picture, oldlevel = 0; + AVFrame frame; + AVPacket avp; + if( global_verbosity_level <= 1 ) + { + oldlevel = av_log_get_level(); + av_log_set_level( AV_LOG_QUIET ); + } + + av_init_packet( &avp ); + + avp.data = data; + avp.size = size; + avp.pts = pts; + avp.dts = dts; + + /* + * libav avcodec_decode_video2() needs AVPacket flagged with AV_PKT_FLAG_KEY + * for some codecs. For example, sequence of PNG in a mov container. + */ + if ( frametype & HB_FRAME_KEY ) + { + avp.flags |= AV_PKT_FLAG_KEY; + } + if( avcodec_decode_video2( pv->context, &frame, &got_picture, &avp ) < 0 ) + { + ++pv->decode_errors; + } + if( global_verbosity_level <= 1 ) + { + av_log_set_level( oldlevel ); + } + + if( got_picture && pv->wait_for_keyframe > 0 ) + { + // Libav is inconsistant about how it flags keyframes. For many + // codecs it simply sets frame.key_frame. But for others, it only + // sets frame.pict_type. And for yet others neither gets set at all + // (qtrle). + int key = frame.key_frame || + ( w->codec_param != CODEC_ID_H264 && + ( frame.pict_type == AV_PICTURE_TYPE_I || + frame.pict_type == 0 ) ); + if( !key ) + { + pv->wait_for_keyframe--; + return 0; + } + pv->wait_for_keyframe = 0; + } + + if( got_picture ) + { + + uint16_t flags = 0; + + // ffmpeg makes it hard to attach a pts to a frame. if the MPEG ES + // packet had a pts we handed it to av_parser_parse (if the packet had + // no pts we set it to AV_NOPTS_VALUE, but before the parse we can't + // distinguish between the start of a video frame with no pts & an + // intermediate packet of some frame which never has a pts). we hope + // that when parse returns the frame to us the pts we originally + // handed it will be in parser->pts. we put this pts into avp.pts so + // that when avcodec_decode_video finally gets around to allocating an + // AVFrame to hold the decoded frame, avcodec_default_get_buffer can + // stuff that pts into the it. if all of these relays worked at this + // point frame.pts should hold the frame's pts from the original data + // stream or AV_NOPTS_VALUE if it didn't have one. in the latter case + // we generate the next pts in sequence for it. + if( !pv->frame_duration_set ) + compute_frame_duration( pv ); + + double frame_dur = pv->duration; + if( frame.repeat_pict ) + { + frame_dur += frame.repeat_pict * pv->field_duration; + } + + + if( pv->dxva2 && pv->dxva2->do_job==HB_WORK_OK ) + { + if( avp.pts>0 ) + { + if( pv->dxva2->input_pts[0]!=0 && pv->dxva2->input_pts[1]==0 ) + frame.pkt_pts = pv->dxva2->input_pts[0]; + else + frame.pkt_pts = pv->dxva2->input_pts[0]dxva2->input_pts[1] ? pv->dxva2->input_pts[0] : pv->dxva2->input_pts[1]; + } + } + // If there was no pts for this frame, assume constant frame rate + // video & estimate the next frame time from the last & duration. + double pts; + if( frame.pkt_pts == AV_NOPTS_VALUE ) + { + pts = pv->pts_next; + } + else + { + pts = frame.pkt_pts; + } + pv->pts_next = pts + frame_dur; + + if( frame.top_field_first ) + { + flags |= PIC_FLAG_TOP_FIELD_FIRST; + } + if( !frame.interlaced_frame ) + { + flags |= PIC_FLAG_PROGRESSIVE_FRAME; + } + if( frame.repeat_pict == 1 ) + { + flags |= PIC_FLAG_REPEAT_FIRST_FIELD; + } + if( frame.repeat_pict == 2 ) + { + flags |= PIC_FLAG_REPEAT_FRAME; + } + + + + hb_buffer_t *buf; + + // if we're doing a scan or this content couldn't have been broken + // by Microsoft we don't worry about timestamp reordering + if( !pv->job || !pv->brokenByMicrosoft ) + { + buf = copy_frame( pv, &frame ); + buf->s.start = pts; + buf->sequence = sequence; + + buf->s.flags = flags; + + if( pv->new_chap && buf->s.start >= pv->chap_time ) + { + buf->s.new_chap = pv->new_chap; + log_chapter( pv, pv->new_chap, buf->s.start ); + pv->new_chap = 0; + pv->chap_time = 0; + } + else if( pv->nframes == 0 && pv->job ) + { + log_chapter( pv, pv->job->chapter_start, buf->s.start ); + } + checkCadence( pv->cadence, flags, buf->s.start ); + hb_list_add( pv->list, buf ); + ++pv->nframes; + return got_picture; + } + + // XXX This following probably addresses a libavcodec bug but I don't + // see an easy fix so we workaround it here. + // + // The M$ 'packed B-frames' atrocity results in decoded frames with + // the wrong timestamp. E.g., if there are 2 b-frames the timestamps + // we see here will be "2 3 1 5 6 4 ..." instead of "1 2 3 4 5 6". + // The frames are actually delivered in the right order but with + // the wrong timestamp. To get the correct timestamp attached to + // each frame we have a delay queue (longer than the max number of + // b-frames) & a sorting heap for the timestamps. As each frame + // comes out of the decoder the oldest frame in the queue is removed + // and associated with the smallest timestamp. Then the new frame is + // added to the queue & its timestamp is pushed on the heap. + // This does nothing if the timestamps are correct (i.e., the video + // uses a codec that Micro$oft hasn't broken yet) but the frames + // get timestamped correctly even when M$ has munged them. + + // remove the oldest picture from the frame queue (if any) & + // give it the smallest timestamp from our heap. The queue size + // is a power of two so we get the slot of the oldest by masking + // the frame count & this will become the slot of the newest + // once we've removed & processed the oldest. + int slot = pv->nframes & (HEAP_SIZE-1); + if( ( buf = pv->delayq[slot] ) != NULL ) + { + pv->queue_primed = 1; + buf->s.start = heap_pop( &pv->pts_heap ); + + if( pv->new_chap && buf->s.start >= pv->chap_time ) + { + buf->s.new_chap = pv->new_chap; + log_chapter( pv, pv->new_chap, buf->s.start ); + pv->new_chap = 0; + pv->chap_time = 0; + } + else if( pv->nframes == 0 && pv->job ) + { + log_chapter( pv, pv->job->chapter_start, buf->s.start ); + } + checkCadence( pv->cadence, buf->s.flags, buf->s.start ); + hb_list_add( pv->list, buf ); + } + + // add the new frame to the delayq & push its timestamp on the heap + buf = copy_frame( pv, &frame ); + buf->sequence = sequence; + /* Store picture flags for later use by filters */ + buf->s.flags = flags; + pv->delayq[slot] = buf; + heap_push( &pv->pts_heap, pts ); + + ++pv->nframes; + } + + return got_picture; +} +static void decodeVideo( hb_work_object_t *w, uint8_t *data, int size, int sequence, int64_t pts, int64_t dts, uint8_t frametype ) +{ + hb_work_private_t *pv = w->private_data; + + /* + * The following loop is a do..while because we need to handle both + * data & the flush at the end (signaled by size=0). At the end there's + * generally a frame in the parser & one or more frames in the decoder + * (depending on the bframes setting). + */ + int pos = 0; + do { + uint8_t *pout; + int pout_len, len; + int64_t parser_pts, parser_dts; + if( pv->parser ) + { + len = av_parser_parse2( pv->parser, pv->context, &pout, &pout_len, + data + pos, size - pos, pts, dts, 0 ); + parser_pts = pv->parser->pts; + parser_dts = pv->parser->dts; + } + else + { + pout = data; + len = pout_len = size; + parser_pts = pts; + parser_dts = dts; + } + pos += len; + + if( pout_len > 0 ) + { + decodeFrame( w, pout, pout_len, sequence, parser_pts, parser_dts, frametype ); + } + } while( pos < size ); + + /* the stuff above flushed the parser, now flush the decoder */ + if( size <= 0 ) + { + while( decodeFrame( w, NULL, 0, sequence, AV_NOPTS_VALUE, AV_NOPTS_VALUE, 0 ) ) + { + } + flushDelayQueue( pv ); + } + return; +} + +/* + * Removes all packets from 'pv->list', links them together into + * a linked-list, and returns the first packet in the list. + */ +static hb_buffer_t *link_buf_list( hb_work_private_t *pv ) +{ + hb_buffer_t *head = hb_list_item( pv->list, 0 ); + + if( head ) + { + hb_list_rem( pv->list, head ); + + hb_buffer_t *last = head, *buf; + + while( ( buf = hb_list_item( pv->list, 0 ) ) != NULL ) + { + hb_list_rem( pv->list, buf ); + last->next = buf; + last = buf; + } + } + return head; +} +static void hb_ffmpeg_release_frame_buf( struct AVCodecContext *p_context, AVFrame *frame ) +{ + hb_work_private_t *p_dec = (hb_work_private_t*)p_context->opaque; + int i; + if( p_dec->dxva2 ) + { + hb_va_release( p_dec->dxva2, frame ); + } + else if( !frame->opaque ) + { + if( frame->type == FF_BUFFER_TYPE_INTERNAL ) + avcodec_default_release_buffer( p_context, frame ); + } + for( i = 0; i < 4; i++ ) + frame->data[i] = NULL; +} + +static void init_video_avcodec_context( hb_work_private_t *pv ) +{ + /* we have to wrap ffmpeg's get_buffer to be able to set the pts (?!) */ + pv->context->opaque = pv; + pv->context->get_buffer = get_frame_buf; + pv->context->reget_buffer = reget_frame_buf; + if( pv->dxva2 && pv->dxva2->do_job==HB_WORK_OK ) + pv->context->release_buffer = hb_ffmpeg_release_frame_buf; +} + +static int decavcodecvInit( hb_work_object_t * w, hb_job_t * job ) +{ + + hb_work_private_t *pv = calloc( 1, sizeof( hb_work_private_t ) ); + + w->private_data = pv; + pv->wait_for_keyframe = 60; + pv->job = job; + if( job ) + pv->title = job->title; + else + pv->title = w->title; + pv->list = hb_list_init(); + + if( pv->job && pv->job->title ) + { + if( !pv->job->title->has_resolution_change && w->codec_param != CODEC_ID_PRORES ) + { + pv->threads = HB_FFMPEG_THREADS_AUTO; + } + } + + if( pv->title->opaque_priv ) + { + + AVFormatContext *ic = (AVFormatContext*)pv->title->opaque_priv; + AVCodec *codec = avcodec_find_decoder( w->codec_param ); + if( codec == NULL ) + { + hb_log( "decavcodecvInit: failed to find codec for id (%d)", w->codec_param ); + return 1; + } + pv->context = avcodec_alloc_context3( codec ); + avcodec_copy_context( pv->context, ic->streams[pv->title->video_id]->codec ); + pv->context->workaround_bugs = FF_BUG_AUTODETECT; + // Depricated but still used by Libav (twits!) + pv->context->err_recognition = AV_EF_CRCCHECK; + pv->context->error_concealment = FF_EC_GUESS_MVS|FF_EC_DEBLOCK; + if( ((w->codec_param==CODEC_ID_H264) + || (w->codec_param==CODEC_ID_MPEG2VIDEO) + || (w->codec_param==CODEC_ID_VC1) + || (w->codec_param==CODEC_ID_WMV3) + || (w->codec_param==CODEC_ID_MPEG4)) + && pv->job ) + { + pv->dxva2 = hb_va_create_dxva2( pv->dxva2, w->codec_param ); + if( pv->dxva2 && pv->dxva2->do_job==HB_WORK_OK ) + { + hb_va_new_dxva2( pv->dxva2, pv->context ); + init_video_avcodec_context( pv ); + pv->context->get_format = hb_ffmpeg_get_format; + pv->os = ( hb_oclscale_t * )malloc( sizeof( hb_oclscale_t ) ); + memset( pv->os, 0, sizeof( hb_oclscale_t ) ); + pv->threads = 1; + + } + } + if( hb_avcodec_open( pv->context, codec, NULL, pv->threads ) ) + { + hb_log( "decavcodecvInit: avcodec_open failed" ); + return 1; + } + pv->video_codec_opened = 1; + // avi, mkv and possibly mp4 containers can contain the M$ VFW packed + // b-frames abortion that messes up frame ordering and timestamps. + // XXX ffmpeg knows which streams are broken but doesn't expose the + // info externally. We should patch ffmpeg to add a flag to the + // codec context for this but until then we mark all ffmpeg streams + // as suspicious. + pv->brokenByMicrosoft = 1; + } + else + { + AVCodec *codec = avcodec_find_decoder( w->codec_param ); + pv->parser = av_parser_init( w->codec_param ); + pv->context = avcodec_alloc_context3( codec ); + pv->context->workaround_bugs = FF_BUG_AUTODETECT; + // Depricated but still used by Libav (twits!) + pv->context->err_recognition = AV_EF_CRCCHECK; + pv->context->error_concealment = FF_EC_GUESS_MVS|FF_EC_DEBLOCK; + init_video_avcodec_context( pv ); + } + return 0; +} + + +static int next_hdr( hb_buffer_t *in, int offset ) +{ + uint8_t *dat = in->data; + uint16_t last2 = 0xffff; + for( ; in->size - offset > 1; ++offset ) + { + if( last2 == 0 && dat[offset] == 0x01 ) + // found an mpeg start code + return offset - 2; + + last2 = ( last2 << 8 ) | dat[offset]; + } + + return -1; +} + +static int find_hdr( hb_buffer_t *in, int offset, uint8_t hdr_type ) +{ + if( in->size - offset < 4 ) + // not enough room for an mpeg start code + return -1; + + for( ; ( offset = next_hdr( in, offset ) ) >= 0; ++offset ) + { + if( in->data[offset+3] == hdr_type ) + // found it + break; + } + return offset; +} + +static int setup_extradata( hb_work_object_t *w, hb_buffer_t *in ) +{ + hb_work_private_t *pv = w->private_data; + + // we can't call the avstream funcs but the read_header func in the + // AVInputFormat may set up some state in the AVContext. In particular + // vc1t_read_header allocates 'extradata' to deal with header issues + // related to Microsoft's bizarre engineering notions. We alloc a chunk + // of space to make vc1 work then associate the codec with the context. + if( w->codec_param != CODEC_ID_VC1 ) + { + // we haven't been inflicted with M$ - allocate a little space as + // a marker and return success. + pv->context->extradata_size = 0; + // av_malloc uses posix_memalign which is allowed to + // return NULL when allocating 0 bytes. We use extradata == NULL + // to trigger initialization of extradata and the decoder, so + // we can not set it to NULL here. So allocate a small + // buffer instead. + pv->context->extradata = av_malloc( 1 ); + return 0; + } + + // find the start and and of the sequence header + int shdr, shdr_end; + if( ( shdr = find_hdr( in, 0, 0x0f ) ) < 0 ) + { + // didn't find start of seq hdr + return 1; + } + if( ( shdr_end = next_hdr( in, shdr + 4 ) ) < 0 ) + { + shdr_end = in->size; + } + shdr_end -= shdr; + + // find the start and and of the entry point header + int ehdr, ehdr_end; + if( ( ehdr = find_hdr( in, 0, 0x0e ) ) < 0 ) + { + // didn't find start of entry point hdr + return 1; + } + if( ( ehdr_end = next_hdr( in, ehdr + 4 ) ) < 0 ) + { + ehdr_end = in->size; + } + ehdr_end -= ehdr; + + // found both headers - allocate an extradata big enough to hold both + // then copy them into it. + pv->context->extradata_size = shdr_end + ehdr_end; + pv->context->extradata = av_malloc( pv->context->extradata_size + 8 ); + memcpy( pv->context->extradata, in->data + shdr, shdr_end ); + memcpy( pv->context->extradata + shdr_end, in->data + ehdr, ehdr_end ); + memset( pv->context->extradata + shdr_end + ehdr_end, 0, 8 ); + return 0; +} + +static int decavcodecvWork( hb_work_object_t * w, hb_buffer_t ** buf_in, + hb_buffer_t ** buf_out ) +{ + hb_work_private_t *pv = w->private_data; + hb_buffer_t *in = *buf_in; + int64_t pts = AV_NOPTS_VALUE; + int64_t dts = pts; + *buf_in = NULL; + *buf_out = NULL; + + /* if we got an empty buffer signaling end-of-stream send it downstream */ + if( in->size == 0 ) + { + if( pv->context->codec != NULL ) + { + decodeVideo( w, in->data, in->size, in->sequence, pts, dts, in->s.frametype ); + } + hb_list_add( pv->list, in ); + *buf_out = link_buf_list( pv ); + return HB_WORK_DONE; + } + + // if this is the first frame open the codec (we have to wait for the + // first frame because of M$ VC1 braindamage). + if( !pv->video_codec_opened ) + { + AVCodec *codec = avcodec_find_decoder( w->codec_param ); + if( codec == NULL ) + { + hb_log( "decavcodecvWork: failed to find codec for id (%d)", w->codec_param ); + *buf_out = hb_buffer_init( 0 );; + return HB_WORK_DONE; + } + avcodec_get_context_defaults3( pv->context, codec ); + init_video_avcodec_context( pv ); + if( setup_extradata( w, in ) ) + { + // we didn't find the headers needed to set up extradata. + // the codec will abort if we open it so just free the buf + // and hope we eventually get the info we need. + hb_buffer_close( &in ); + return HB_WORK_OK; + } + // disable threaded decoding for scan, can cause crashes + if( hb_avcodec_open( pv->context, codec, NULL, pv->threads ) ) + { + hb_log( "decavcodecvWork: avcodec_open failed" ); + *buf_out = hb_buffer_init( 0 );; + return HB_WORK_DONE; + } + pv->video_codec_opened = 1; + } + + if( in->s.start >= 0 ) + { + pts = in->s.start; + dts = in->s.renderOffset; + } + if( in->s.new_chap ) + { + pv->new_chap = in->s.new_chap; + pv->chap_time = pts >= 0 ? pts : pv->pts_next; + } + if( pv->dxva2 && pv->dxva2->do_job==HB_WORK_OK ) + { + if( pv->dxva2->input_pts[0]<=pv->dxva2->input_pts[1] ) + pv->dxva2->input_pts[0] = pts; + else if( pv->dxva2->input_pts[0]>pv->dxva2->input_pts[1] ) + pv->dxva2->input_pts[1] = pts; + pv->dxva2->input_dts = dts; + } + decodeVideo( w, in->data, in->size, in->sequence, pts, dts, in->s.frametype ); + hb_buffer_close( &in ); + *buf_out = link_buf_list( pv ); + return HB_WORK_OK; +} +static void compute_frame_duration( hb_work_private_t *pv ) +{ + double duration = 0.; + int64_t max_fps = 64L; + + // context->time_base may be in fields, so set the max *fields* per second + if( pv->context->ticks_per_frame > 1 ) + max_fps *= pv->context->ticks_per_frame; + + if( pv->title->opaque_priv ) + { + // If ffmpeg is demuxing for us, it collects some additional + // information about framerates that is often more accurate + // than context->time_base. + AVFormatContext *ic = (AVFormatContext*)pv->title->opaque_priv; + AVStream *st = ic->streams[pv->title->video_id]; + if( st->nb_frames && st->duration ) + { + // compute the average frame duration from the total number + // of frames & the total duration. + duration = ( (double)st->duration * (double)st->time_base.num ) / + ( (double)st->nb_frames * (double)st->time_base.den ); + } + else + { + // XXX We don't have a frame count or duration so try to use the + // far less reliable time base info in the stream. + // Because the time bases are so screwed up, we only take values + // in the range 8fps - 64fps. + AVRational *tb = NULL; + if( st->avg_frame_rate.den * 64L > st->avg_frame_rate.num && + st->avg_frame_rate.num > st->avg_frame_rate.den * 8L ) + { + tb = &(st->avg_frame_rate); + duration = (double)tb->den / (double)tb->num; + } + else if( st->time_base.num * 64L > st->time_base.den && + st->time_base.den > st->time_base.num * 8L ) + { + tb = &(st->time_base); + duration = (double)tb->num / (double)tb->den; + } + else if( st->r_frame_rate.den * 64L > st->r_frame_rate.num && + st->r_frame_rate.num > st->r_frame_rate.den * 8L ) + { + tb = &(st->r_frame_rate); + duration = (double)tb->den / (double)tb->num; + } + } + if( !duration && + pv->context->time_base.num * max_fps > pv->context->time_base.den && + pv->context->time_base.den > pv->context->time_base.num * 8L ) + { + duration = (double)pv->context->time_base.num / + (double)pv->context->time_base.den; + if( pv->context->ticks_per_frame > 1 ) + { + // for ffmpeg 0.5 & later, the H.264 & MPEG-2 time base is + // field rate rather than frame rate so convert back to frames. + duration *= pv->context->ticks_per_frame; + } + } + } + else + { + if( pv->context->time_base.num * max_fps > pv->context->time_base.den && + pv->context->time_base.den > pv->context->time_base.num * 8L ) + { + duration = (double)pv->context->time_base.num / + (double)pv->context->time_base.den; + if( pv->context->ticks_per_frame > 1 ) + { + // for ffmpeg 0.5 & later, the H.264 & MPEG-2 time base is + // field rate rather than frame rate so convert back to frames. + duration *= pv->context->ticks_per_frame; + } + } + } + if( duration == 0 ) + { + // No valid timing info found in the stream, so pick some value + duration = 1001. / 24000.; + } + else + { + pv->frame_duration_set = 1; + } + pv->duration = duration * 90000.; + pv->field_duration = pv->duration; + if( pv->context->ticks_per_frame > 1 ) + { + pv->field_duration /= pv->context->ticks_per_frame; + } +} + +static int decavcodecvInfo( hb_work_object_t *w, hb_work_info_t *info ) +{ + hb_work_private_t *pv = w->private_data; + + memset( info, 0, sizeof(*info) ); + + info->bitrate = pv->context->bit_rate; + // HandBrake's video pipeline uses yuv420 color. This means all + // dimensions must be even. So we must adjust the dimensions + // of incoming video if not even. + info->width = pv->context->width & ~1; + info->height = pv->context->height & ~1; + + info->pixel_aspect_width = pv->context->sample_aspect_ratio.num; + info->pixel_aspect_height = pv->context->sample_aspect_ratio.den; + + compute_frame_duration( pv ); + info->rate = 27000000; + info->rate_base = pv->duration * 300.; + + info->profile = pv->context->profile; + info->level = pv->context->level; + info->name = pv->context->codec->name; + + switch( pv->context->color_primaries ) + { + case AVCOL_PRI_BT709: + info->color_prim = HB_COLR_PRI_BT709; + break; + case AVCOL_PRI_BT470BG: + info->color_prim = HB_COLR_PRI_EBUTECH; + break; + case AVCOL_PRI_BT470M: + case AVCOL_PRI_SMPTE170M: + case AVCOL_PRI_SMPTE240M: + info->color_prim = HB_COLR_PRI_SMPTEC; + break; + default: + { + if( ( info->width >= 1280 || info->height >= 720 ) || + ( info->width > 720 && info->height > 576 ) ) + // ITU BT.709 HD content + info->color_prim = HB_COLR_PRI_BT709; + else if( info->rate_base == 1080000 ) + // ITU BT.601 DVD or SD TV content (PAL) + info->color_prim = HB_COLR_PRI_EBUTECH; + else + // ITU BT.601 DVD or SD TV content (NTSC) + info->color_prim = HB_COLR_PRI_SMPTEC; + break; + } + } + + /* AVCOL_TRC_BT709 -> HB_COLR_TRA_BT709 + * AVCOL_TRC_GAMMA22 (bt470m) -> HB_COLR_TRA_BT709 + * AVCOL_TRC_GAMMA28 (bt470bg) -> HB_COLR_TRA_BT709 + * AVCOL_TRC_UNSPECIFIED, AVCOL_TRC_NB: + * -> ITU BT.709 -> HB_COLR_TRA_BT709 + * -> ITU BT.601 -> HB_COLR_TRA_BT709 + * TODO: AVCOL_TRC_SMPTE240M -> HB_COLR_TRA_SMPTE240M but it's not yet in Libav */ + info->color_transfer = HB_COLR_TRA_BT709; + + switch( pv->context->colorspace ) + { + case AVCOL_SPC_BT709: + info->color_matrix = HB_COLR_MAT_BT709; + break; + case AVCOL_SPC_FCC: + case AVCOL_SPC_BT470BG: + case AVCOL_SPC_SMPTE170M: + case AVCOL_SPC_RGB: // libswscale rgb2yuv + info->color_matrix = HB_COLR_MAT_SMPTE170M; + break; + case AVCOL_SPC_SMPTE240M: + info->color_matrix = HB_COLR_MAT_SMPTE240M; + break; + default: + { + if( ( info->width >= 1280 || info->height >= 720 ) || + ( info->width > 720 && info->height > 576 ) ) + // ITU BT.709 HD content + info->color_matrix = HB_COLR_MAT_BT709; + else + // ITU BT.601 DVD or SD TV content (PAL) + // ITU BT.601 DVD or SD TV content (NTSC) + info->color_matrix = HB_COLR_MAT_SMPTE170M; + break; + } + } + + return 1; +} + +static int decavcodecvBSInfo( hb_work_object_t *w, const hb_buffer_t *buf, + hb_work_info_t *info ) +{ + return 0; +} + +static void decavcodecvFlush( hb_work_object_t *w ) +{ + hb_work_private_t *pv = w->private_data; + + if( pv->context->codec ) + { + flushDelayQueue( pv ); + hb_buffer_t *buf = link_buf_list( pv ); + hb_buffer_close( &buf ); + if( pv->title->opaque_priv == NULL ) + { + pv->video_codec_opened = 0; + hb_avcodec_close( pv->context ); + if( pv->parser ) + { + av_parser_close( pv->parser ); + } + pv->parser = av_parser_init( w->codec_param ); + } + else + { + avcodec_flush_buffers( pv->context ); + } + } + pv->wait_for_keyframe = 60; +} + +static void decavcodecClose( hb_work_object_t * w ) +{ + hb_work_private_t * pv = w->private_data; + if( pv->dst_frame ) free( pv->dst_frame ); + if( pv ) + { + closePrivData( &pv ); + w->private_data = NULL; + } +} + +hb_work_object_t hb_decavcodecv_accl = +{ + .id = WORK_DECAVCODECVACCL, + .name = "Video hardware decoder (libavcodec)", + .init = decavcodecvInit, + .work = decavcodecvWork, + .close = decavcodecClose, + .flush = decavcodecvFlush, + .info = decavcodecvInfo, + .bsinfo = decavcodecvBSInfo +}; + +#endif diff --git a/libhb/dxva2api.c b/libhb/dxva2api.c new file mode 100644 index 000000000..04011c0c5 --- /dev/null +++ b/libhb/dxva2api.c @@ -0,0 +1,36 @@ +/* dxva2api.c + + Copyright (c) 2003-2012 HandBrake Team + This file is part of the HandBrake source code + Homepage: . + It may be used under the terms of the GNU General Public License v2. + For full terms see the file COPYING file or visit http://www.gnu.org/licenses/gpl-2.0.html + + Authors: Peng Gao + Li Cao + + */ +#include "dxva2api.h" + +__inline float hb_dx_fixedtofloat( const DXVA2_Fixed32 _fixed_ ) +{ + return (FLOAT)_fixed_.Value + (FLOAT)_fixed_.Fraction / 0x10000; +} + +__inline const DXVA2_Fixed32 hb_dx_fixed32_opaque_alpha() +{ + DXVA2_Fixed32 _fixed_; + _fixed_.Fraction = 0; + _fixed_.Value = 0; + _fixed_.ll = 1; + return _fixed_; +} + + +__inline DXVA2_Fixed32 hb_dx_floattofixed( const float _float_ ) +{ + DXVA2_Fixed32 _fixed_; + _fixed_.Fraction = LOWORD( _float_ * 0x10000 ); + _fixed_.Value = HIWORD( _float_ * 0x10000 ); + return _fixed_; +} diff --git a/libhb/dxva2api.h b/libhb/dxva2api.h new file mode 100644 index 000000000..dc5909477 --- /dev/null +++ b/libhb/dxva2api.h @@ -0,0 +1,822 @@ +/* dxva2api.h + + Copyright (c) 2003-2012 HandBrake Team + This file is part of the HandBrake source code + Homepage: . + It may be used under the terms of the GNU General Public License v2. + For full terms see the file COPYING file or visit http://www.gnu.org/licenses/gpl-2.0.html + + Authors: Peng Gao + Li Cao + + */ + + +#ifndef _DXVA2API_H +#define _DXVA2API_H + +#define MINGW_DXVA2API_H_VERSION (2) + +#if __GNUC__ >=3 +#pragma GCC system_header +#endif + +#include +#include + +/* Define it to allow using nameless struct/union (non C99 compliant) to match + * the official documentation. */ +//#define DXVA2API_USE_BITFIELDS + +/****************STRUCTURES******************/ +#pragma pack(push, 1) + +#define DXVA2API_USE_BITFIELDS + +typedef struct _DXVA2_ExtendedFormat { +#ifdef DXVA2API_USE_BITFIELDS + union { + struct { + UINT SampleFormat : 8; + UINT VideoChromaSubsampling : 4; + UINT NominalRange : 3; + UINT VideoTransferMatrix : 3; + UINT VideoLighting : 4; + UINT VideoPrimaries : 5; + UINT VideoTransferFunction : 5; + }; + UINT value; + }; +#else + UINT value; +#endif +} DXVA2_ExtendedFormat; + +typedef struct _DXVA2_Frequency { + UINT Numerator; + UINT Denominator; +} DXVA2_Frequency; + +typedef struct _DXVA2_VideoDesc { + UINT SampleWidth; + UINT SampleHeight; + DXVA2_ExtendedFormat SampleFormat; + D3DFORMAT Format; + DXVA2_Frequency InputSampleFreq; + DXVA2_Frequency OutputFrameFreq; + UINT UABProtectionLevel; + UINT Reserved; +} DXVA2_VideoDesc; + +typedef struct _DXVA2_ConfigPictureDecode { + GUID guidConfigBitstreamEncryption; + GUID guidConfigMBcontrolEncryption; + GUID guidConfigResidDiffEncryption; + UINT ConfigBitstreamRaw; + UINT ConfigMBcontrolRasterOrder; + UINT ConfigResidDiffHost; + UINT ConfigSpatialResid8; + UINT ConfigResid8Subtraction; + UINT ConfigSpatialHost8or9Clipping; + UINT ConfigSpatialResidInterleaved; + UINT ConfigIntraResidUnsigned; + UINT ConfigResidDiffAccelerator; + UINT ConfigHostInverseScan; + UINT ConfigSpecificIDCT; + UINT Config4GroupedCoefs; + USHORT ConfigMinRenderTargetBuffCount; + USHORT ConfigDecoderSpecific; +} DXVA2_ConfigPictureDecode; + +typedef struct _DXVA2_DecodeBufferDesc { + DWORD CompressedBufferType; + UINT BufferIndex; + UINT DataOffset; + UINT DataSize; + UINT FirstMBaddress; + UINT NumMBsInBuffer; + UINT Width; + UINT Height; + UINT Stride; + UINT ReservedBits; + PVOID pvPVPState; +} DXVA2_DecodeBufferDesc; + +typedef struct _DXVA2_DecodeExtensionData { + UINT Function; + PVOID pPrivateInputData; + UINT PrivateInputDataSize; + PVOID pPrivateOutputData; + UINT PrivateOutputDataSize; +} DXVA2_DecodeExtensionData; + +typedef struct _DXVA2_DecodeExecuteParams { + UINT NumCompBuffers; + DXVA2_DecodeBufferDesc *pCompressedBuffers; + DXVA2_DecodeExtensionData *pExtensionData; +} DXVA2_DecodeExecuteParams; + +enum { + DXVA2_VideoDecoderRenderTarget = 0, + DXVA2_VideoProcessorRenderTarget = 1, + DXVA2_VideoSoftwareRenderTarget = 2 +}; + +enum { + DXVA2_PictureParametersBufferType = 0, + DXVA2_MacroBlockControlBufferType = 1, + DXVA2_ResidualDifferenceBufferType = 2, + DXVA2_DeblockingControlBufferType = 3, + DXVA2_InverseQuantizationMatrixBufferType = 4, + DXVA2_SliceControlBufferType = 5, + DXVA2_BitStreamDateBufferType = 6, + DXVA2_MotionVectorBuffer = 7, + DXVA2_FilmGrainBuffer = 8 +}; + +/* DXVA MPEG-I/II and VC-1 */ +typedef struct _DXVA_PictureParameters { + USHORT wDecodedPictureIndex; + USHORT wDeblockedPictureIndex; + USHORT wForwardRefPictureIndex; + USHORT wBackwardRefPictureIndex; + USHORT wPicWidthInMBminus1; + USHORT wPicHeightInMBminus1; + UCHAR bMacroblockWidthMinus1; + UCHAR bMacroblockHeightMinus1; + UCHAR bBlockWidthMinus1; + UCHAR bBlockHeightMinus1; + UCHAR bBPPminus1; + UCHAR bPicStructure; + UCHAR bSecondField; + UCHAR bPicIntra; + UCHAR bPicBackwardPrediction; + UCHAR bBidirectionalAveragingMode; + UCHAR bMVprecisionAndChromaRelation; + UCHAR bChromaFormat; + UCHAR bPicScanFixed; + UCHAR bPicScanMethod; + UCHAR bPicReadbackRequests; + UCHAR bRcontrol; + UCHAR bPicSpatialResid8; + UCHAR bPicOverflowBlocks; + UCHAR bPicExtrapolation; + UCHAR bPicDeblocked; + UCHAR bPicDeblockConfined; + UCHAR bPic4MVallowed; + UCHAR bPicOBMC; + UCHAR bPicBinPB; + UCHAR bMV_RPS; + UCHAR bReservedBits; + USHORT wBitstreamFcodes; + USHORT wBitstreamPCEelements; + UCHAR bBitstreamConcealmentNeed; + UCHAR bBitstreamConcealmentMethod; +} DXVA_PictureParameters, *LPDXVA_PictureParameters; + +typedef struct _DXVA_QmatrixData { + BYTE bNewQmatrix[4]; + WORD Qmatrix[4][8 * 8]; +} DXVA_QmatrixData, *LPDXVA_QmatrixData; + +typedef struct _DXVA_SliceInfo { + USHORT wHorizontalPosition; + USHORT wVerticalPosition; + UINT dwSliceBitsInBuffer; + UINT dwSliceDataLocation; + UCHAR bStartCodeBitOffset; + UCHAR bReservedBits; + USHORT wMBbitOffset; + USHORT wNumberMBsInSlice; + USHORT wQuantizerScaleCode; + USHORT wBadSliceChopping; +} DXVA_SliceInfo, *LPDXVA_SliceInfo; + +/* DXVA H264 */ +typedef struct { +#ifdef DXVA2API_USE_BITFIELDS + union { + struct { + UCHAR Index7Bits : 7; + UCHAR AssociatedFlag : 1; + }; + UCHAR bPicEntry; + }; +#else + UCHAR bPicEntry; +#endif +} DXVA_PicEntry_H264; + + +typedef struct { + USHORT wFrameWidthInMbsMinus1; + USHORT wFrameHeightInMbsMinus1; + DXVA_PicEntry_H264 CurrPic; + UCHAR num_ref_frames; +#ifdef DXVA2API_USE_BITFIELDS + union { + struct { + USHORT field_pic_flag : 1; + USHORT MbaffFrameFlag : 1; + USHORT residual_colour_transform_flag : 1; + USHORT sp_for_switch_flag : 1; + USHORT chroma_format_idc : 2; + USHORT RefPicFlag : 1; + USHORT constrained_intra_pred_flag : 1; + USHORT weighted_pred_flag : 1; + USHORT weighted_bipred_idc : 2; + USHORT MbsConsecutiveFlag : 1; + USHORT frame_mbs_only_flag : 1; + USHORT transform_8x8_mode_flag : 1; + USHORT MinLumaBipredSize8x8Flag : 1; + USHORT IntraPicFlag : 1; + }; + USHORT wBitFields; + }; +#else + USHORT wBitFields; +#endif + UCHAR bit_depth_luma_minus8; + UCHAR bit_depth_chroma_minus8; + USHORT Reserved16Bits; + UINT StatusReportFeedbackNumber; + DXVA_PicEntry_H264 RefFrameList[16]; + INT CurrFieldOrderCnt[2]; + INT FieldOrderCntList[16][2]; + CHAR pic_init_qs_minus26; + CHAR chroma_qp_index_offset; + CHAR second_chroma_qp_index_offset; + UCHAR ContinuationFlag; + CHAR pic_init_qp_minus26; + UCHAR num_ref_idx_l0_active_minus1; + UCHAR num_ref_idx_l1_active_minus1; + UCHAR Reserved8BitsA; + USHORT FrameNumList[16]; + + UINT UsedForReferenceFlags; + USHORT NonExistingFrameFlags; + USHORT frame_num; + UCHAR log2_max_frame_num_minus4; + UCHAR pic_order_cnt_type; + UCHAR log2_max_pic_order_cnt_lsb_minus4; + UCHAR delta_pic_order_always_zero_flag; + UCHAR direct_8x8_inference_flag; + UCHAR entropy_coding_mode_flag; + UCHAR pic_order_present_flag; + UCHAR num_slice_groups_minus1; + UCHAR slice_group_map_type; + UCHAR deblocking_filter_control_present_flag; + UCHAR redundant_pic_cnt_present_flag; + UCHAR Reserved8BitsB; + USHORT slice_group_change_rate_minus1; + UCHAR SliceGroupMap[810]; +} DXVA_PicParams_H264; + +typedef struct { + UCHAR bScalingLists4x4[6][16]; + UCHAR bScalingLists8x8[2][64]; +} DXVA_Qmatrix_H264; + + +typedef struct { + UINT BSNALunitDataLocation; + UINT SliceBytesInBuffer; + USHORT wBadSliceChopping; + USHORT first_mb_in_slice; + USHORT NumMbsForSlice; + USHORT BitOffsetToSliceData; + UCHAR slice_type; + UCHAR luma_log2_weight_denom; + UCHAR chroma_log2_weight_denom; + + UCHAR num_ref_idx_l0_active_minus1; + UCHAR num_ref_idx_l1_active_minus1; + CHAR slice_alpha_c0_offset_div2; + CHAR slice_beta_offset_div2; + UCHAR Reserved8Bits; + DXVA_PicEntry_H264 RefPicList[2][32]; + SHORT Weights[2][32][3][2]; + CHAR slice_qs_delta; + CHAR slice_qp_delta; + UCHAR redundant_pic_cnt; + UCHAR direct_spatial_mv_pred_flag; + UCHAR cabac_init_idc; + UCHAR disable_deblocking_filter_idc; + USHORT slice_id; +} DXVA_Slice_H264_Long; + +typedef struct { + UINT BSNALunitDataLocation; + UINT SliceBytesInBuffer; + USHORT wBadSliceChopping; +} DXVA_Slice_H264_Short; + +typedef struct { + USHORT wFrameWidthInMbsMinus1; + USHORT wFrameHeightInMbsMinus1; + DXVA_PicEntry_H264 InPic; + DXVA_PicEntry_H264 OutPic; + USHORT PicOrderCnt_offset; + INT CurrPicOrderCnt; + UINT StatusReportFeedbackNumber; + UCHAR model_id; + UCHAR separate_colour_description_present_flag; + UCHAR film_grain_bit_depth_luma_minus8; + UCHAR film_grain_bit_depth_chroma_minus8; + UCHAR film_grain_full_range_flag; + UCHAR film_grain_colour_primaries; + UCHAR film_grain_transfer_characteristics; + UCHAR film_grain_matrix_coefficients; + UCHAR blending_mode_id; + UCHAR log2_scale_factor; + UCHAR comp_model_present_flag[4]; + UCHAR num_intensity_intervals_minus1[4]; + UCHAR num_model_values_minus1[4]; + UCHAR intensity_interval_lower_bound[3][16]; + UCHAR intensity_interval_upper_bound[3][16]; + SHORT comp_model_value[3][16][8]; +} DXVA_FilmGrainChar_H264; + +typedef struct { + union { + struct { + USHORT Fraction; + SHORT Value; + }; + LONG ll; + }; +}DXVA2_Fixed32; + +typedef struct { + UCHAR Cr; + UCHAR Cb; + UCHAR Y; + UCHAR Alpha; +}DXVA2_AYUVSample8; + +typedef struct { + USHORT Cr; + USHORT Cb; + USHORT Y; + USHORT Alpha; +}DXVA2_AYUVSample16; + +typedef struct { + DXVA2_Fixed32 MinValue; + DXVA2_Fixed32 MaxValue; + DXVA2_Fixed32 DefaultValue; + DXVA2_Fixed32 StepSize; +}DXVA2_ValueRange; + +typedef struct { + DXVA2_Fixed32 Brightness; + DXVA2_Fixed32 Contrast; + DXVA2_Fixed32 Hue; + DXVA2_Fixed32 Saturation; +}DXVA2_ProcAmpValues; + +typedef struct { + DXVA2_Fixed32 Level; + DXVA2_Fixed32 Threshold; + DXVA2_Fixed32 Radius; +}DXVA2_FilterValues; + +typedef struct { + UINT DeviceCaps; + D3DPOOL InputPool; + UINT NumForwardRefSamples; + UINT NumBackwardRefSamples; + UINT Reserved; + UINT DeinterlaceTechnology; + UINT ProcAmpControlCaps; + UINT VideoProcessorOperations; + UINT NoiseFilterTechnology; + UINT DetailFilterTechnology; +}DXVA2_VideoProcessorCaps; + +#ifndef _REFERENCE_TIME_ +#define _REFERENCE_TIME_ +typedef long long int64_t; +typedef int64_t REFERENCE_TIME; +#endif + +typedef struct { + REFERENCE_TIME Start; + REFERENCE_TIME End; + DXVA2_ExtendedFormat SampleFormat; + IDirect3DSurface9 *SrcSurface; + RECT SrcRect; + RECT DstRect; + DXVA2_AYUVSample8 Pal[16]; + DXVA2_Fixed32 PlanarAlpha; + DWORD SampleData; +}DXVA2_VideoSample; + + +typedef struct { + REFERENCE_TIME TargetFrame; + RECT TargetRect; + SIZE ConstrictionSize; + UINT StreamingFlags; + DXVA2_AYUVSample16 BackgroundColor; + DXVA2_ExtendedFormat DestFormat; + DXVA2_ProcAmpValues ProcAmpValues; + DXVA2_Fixed32 Alpha; + DXVA2_FilterValues NoiseFilterLuma; + DXVA2_FilterValues NoiseFilterChroma; + DXVA2_FilterValues DetailFilterLuma; + DXVA2_FilterValues DetailFilterChroma; + DWORD DestData; +} DXVA2_VideoProcessBltParams; + +#pragma pack(pop) + +/*************INTERFACES************/ +#ifdef __cplusplus +extern "C" { +#endif +#define _COM_interface struct +typedef _COM_interface IDirectXVideoDecoderService IDirectXVideoDecoderService; +typedef _COM_interface IDirectXVideoDecoder IDirectXVideoDecoder; + +#undef INTERFACE +#define INTERFACE IDirectXVideoDecoder +DECLARE_INTERFACE_( IDirectXVideoDecoder, IUnknown ) +{ + STDMETHOD( QueryInterface ) ( THIS_ REFIID, PVOID* ) PURE; + STDMETHOD_( ULONG, AddRef ) ( THIS ) PURE; + STDMETHOD_( ULONG, Release ) ( THIS ) PURE; + STDMETHOD( GetVideoDecoderService ) ( THIS_ IDirectXVideoDecoderService** ) PURE; + STDMETHOD( GetCreationParameters ) ( THIS_ GUID*, DXVA2_VideoDesc*, DXVA2_ConfigPictureDecode*, IDirect3DSurface9***, UINT* ) PURE; + STDMETHOD( GetBuffer ) ( THIS_ UINT, void**, UINT* ) PURE; + STDMETHOD( ReleaseBuffer ) ( THIS_ UINT ) PURE; + STDMETHOD( BeginFrame ) ( THIS_ IDirect3DSurface9 *, void* ) PURE; + STDMETHOD( EndFrame ) ( THIS_ HANDLE * ) PURE; + STDMETHOD( Execute ) ( THIS_ const DXVA2_DecodeExecuteParams* ) PURE; + + +}; + +#if !defined(__cplusplus) || defined(CINTERFACE) +#define IDirectXVideoDecoder_QueryInterface( p, a, b ) (p)->lpVtbl->QueryInterface( p, a, b ) +#define IDirectXVideoDecoder_AddRef( p ) (p)->lpVtbl->AddRef( p ) +#define IDirectXVideoDecoder_Release( p ) (p)->lpVtbl->Release( p ) +#define IDirectXVideoDecoder_BeginFrame( p, a, b ) (p)->lpVtbl->BeginFrame( p, a, b ) +#define IDirectXVideoDecoder_EndFrame( p, a ) (p)->lpVtbl->EndFrame( p, a ) +#define IDirectXVideoDecoder_Execute( p, a ) (p)->lpVtbl->Execute( p, a ) +#define IDirectXVideoDecoder_GetBuffer( p, a, b, c ) (p)->lpVtbl->GetBuffer( p, a, b, c ) +#define IDirectXVideoDecoder_GetCreationParameters( p, a, b, c, d, e ) (p)->lpVtbl->GetCreationParameters( p, a, b, c, d, e ) +#define IDirectXVideoDecoder_GetVideoDecoderService( p, a ) (p)->lpVtbl->GetVideoDecoderService( p, a ) +#define IDirectXVideoDecoder_ReleaseBuffer( p, a ) (p)->lpVtbl->ReleaseBuffer( p, a ) +#else +#define IDirectXVideoDecoder_QueryInterface( p, a, b ) (p)->QueryInterface( a, b ) +#define IDirectXVideoDecoder_AddRef( p ) (p)->AddRef() +#define IDirectXVideoDecoder_Release( p ) (p)->Release() +#define IDirectXVideoDecoder_BeginFrame( p, a, b ) (p)->BeginFrame( a, b ) +#define IDirectXVideoDecoder_EndFrame( p, a ) (p)->EndFrame( a ) +#define IDirectXVideoDecoder_Execute( p, a ) (p)->Execute( a ) +#define IDirectXVideoDecoder_GetBuffer( p, a, b, c ) (p)->GetBuffer( a, b, c ) +#define IDirectXVideoDecoder_GetCreationParameters( p, a, b, c, d, e ) (p)->GetCreationParameters( a, b, c, d, e ) +#define IDirectXVideoDecoder_GetVideoDecoderService( p, a ) (p)->GetVideoDecoderService( a ) +#define IDirectXVideoDecoder_ReleaseBuffer( p, a ) (p)->ReleaseBuffer( a ) +#endif + +#undef INTERFACE +#define INTERFACE IDirectXVideoAccelerationService +DECLARE_INTERFACE_( IDirectXVideoAccelerationService, IUnknown ) +{ + STDMETHOD( QueryInterface ) ( THIS_ REFIID, PVOID* ) PURE; + STDMETHOD_( ULONG, AddRef ) ( THIS ) PURE; + STDMETHOD_( ULONG, Release ) ( THIS ) PURE; + STDMETHOD( CreateSurface ) ( THIS_ UINT, UINT, UINT, D3DFORMAT, D3DPOOL, DWORD, DWORD, IDirect3DSurface9**, HANDLE* ) PURE; + +}; + +#if !defined(__cplusplus) || defined(CINTERFACE) +#define IDirectXVideoAccelerationService_QueryInterface( p, a, b ) (p)->lpVtbl->QueryInterface( p, a, b ) +#define IDirectXVideoAccelerationService_AddRef( p ) (p)->lpVtbl->AddRef( p ) +#define IDirectXVideoAccelerationService_Release( p ) (p)->lpVtbl->Release( p ) +#define IDirectXVideoAccelerationService_CreateSurface( p, a, b, c, d, e, f, g, h, i ) (p)->lpVtbl->CreateSurface( p, a, b, c, d, e, f, g, h, i ) +#else +#define IDirectXVideoAccelerationService_QueryInterface( p, a, b ) (p)->QueryInterface( a, b ) +#define IDirectXVideoAccelerationService_AddRef( p ) (p)->AddRef() +#define IDirectXVideoAccelerationService_Release( p ) (p)->Release() +#define IDirectXVideoAccelerationService_CreateSurface( p, a, b, c, d, e, f, g, h, i ) (p)->CreateSurface( a, b, c, d, e, f, g, h, i ) +#endif + +#undef INTERFACE +#define INTERFACE IDirectXVideoDecoderService +DECLARE_INTERFACE_( IDirectXVideoDecoderService, IDirectXVideoAccelerationService ) +{ + STDMETHOD( QueryInterface ) ( THIS_ REFIID, PVOID* ) PURE; + STDMETHOD_( ULONG, AddRef ) ( THIS ) PURE; + STDMETHOD_( ULONG, Release ) ( THIS ) PURE; + STDMETHOD( CreateSurface ) ( THIS_ UINT, UINT, UINT, D3DFORMAT, D3DPOOL, DWORD, DWORD, IDirect3DSurface9**, HANDLE* ) PURE; + STDMETHOD( GetDecoderDeviceGuids ) ( THIS_ UINT*, GUID ** ) PURE; + STDMETHOD( GetDecoderRenderTargets ) ( THIS_ REFGUID, UINT*, D3DFORMAT** ) PURE; + STDMETHOD( GetDecoderConfigurations ) ( THIS_ REFGUID, const DXVA2_VideoDesc*, IUnknown*, UINT*, DXVA2_ConfigPictureDecode** ) PURE; + STDMETHOD( CreateVideoDecoder ) ( THIS_ REFGUID, const DXVA2_VideoDesc*, DXVA2_ConfigPictureDecode*, IDirect3DSurface9**, UINT, IDirectXVideoDecoder** ) PURE; +}; + +#if !defined(__cplusplus) || defined(CINTERFACE) +#define IDirectXVideoDecoderService_QueryInterface( p, a, b ) (p)->lpVtbl->QueryInterface( p, a, b ) +#define IDirectXVideoDecoderService_AddRef( p ) (p)->lpVtbl->AddRef( p ) +#define IDirectXVideoDecoderService_Release( p ) (p)->lpVtbl->Release( p ) +#define IDirectXVideoDecoderService_CreateSurface( p, a, b, c, d, e, f, g, h, i ) (p)->lpVtbl->CreateSurface( p, a, b, c, d, e, f, g, h, i ) +#define IDirectXVideoDecoderService_CreateVideoDecoder( p, a, b, c, d, e, f ) (p)->lpVtbl->CreateVideoDecoder( p, a, b, c, d, e, f ) +#define IDirectXVideoDecoderService_GetDecoderConfigurations( p, a, b, c, d, e ) (p)->lpVtbl->GetDecoderConfigurations( p, a, b, c, d, e ) +#define IDirectXVideoDecoderService_GetDecoderDeviceGuids( p, a, b ) (p)->lpVtbl->GetDecoderDeviceGuids( p, a, b ) +#define IDirectXVideoDecoderService_GetDecoderRenderTargets( p, a, b, c ) (p)->lpVtbl->GetDecoderRenderTargets( p, a, b, c ) +#else +#define IDirectXVideoDecoderService_QueryInterface( p, a, b ) (p)->QueryInterface( a, b ) +#define IDirectXVideoDecoderService_AddRef( p ) (p)->AddRef() +#define IDirectXVideoDecoderService_Release( p ) (p)->Release() +#define IDirectXVideoDecoderService_CreateSurface( p, a, b, c, d, e, f, g, h, i ) (p)->CreateSurface( a, b, c, d, e, f, g, h, i ) +#define IDirectXVideoDecoderService_CreateVideoDecoder( p, a, b, c, d, e, f ) (p)->CreateVideoDecoder( a, b, c, d, e, f ) +#define IDirectXVideoDecoderService_GetDecoderConfigurations( p, a, b, c, d, e ) (p)->GetDecoderConfigurations( a, b, c, d, e ) +#define IDirectXVideoDecoderService_GetDecoderDeviceGuids( p, a, b ) (p)->GetDecoderDeviceGuids( a, b ) +#define IDirectXVideoDecoderService_GetDecoderRenderTargets( p, a, b, c ) (p)->GetDecoderRenderTargets( a, b, c ) +#endif + +#undef INTERFACE +#define INTERFACE IDirect3DDeviceManager9 +DECLARE_INTERFACE_( IDirect3DDeviceManager9, IUnknown ) +{ + STDMETHOD( QueryInterface ) ( THIS_ REFIID, PVOID* ) PURE; + STDMETHOD_( ULONG, AddRef ) ( THIS ) PURE; + STDMETHOD_( ULONG, Release ) ( THIS ) PURE; + STDMETHOD( ResetDevice ) ( THIS_ IDirect3DDevice9*, UINT ) PURE; + STDMETHOD( OpenDeviceHandle ) ( THIS_ HANDLE* ) PURE; + STDMETHOD( CloseDeviceHandle ) ( THIS_ HANDLE ) PURE; + STDMETHOD( TestDevice ) ( THIS_ HANDLE ) PURE; + STDMETHOD( LockDevice ) ( THIS_ HANDLE, IDirect3DDevice9**, BOOL ) PURE; + STDMETHOD( UnlockDevice ) ( THIS_ HANDLE, BOOL ) PURE; + STDMETHOD( GetVideoService ) ( THIS_ HANDLE, REFIID, void** ) PURE; +}; + +#if !defined(__cplusplus) || defined(CINTERFACE) +#define IDirect3DDeviceManager9_QueryInterface( p, a, b ) (p)->lpVtbl->QueryInterface( p, a, b ) +#define IDirect3DDeviceManager9_AddRef( p ) (p)->lpVtbl->AddRef( p ) +#define IDirect3DDeviceManager9_Release( p ) (p)->lpVtbl->Release( p ) +#define IDirect3DDeviceManager9_ResetDevice( p, a, b ) (p)->lpVtbl->ResetDevice( p, a, b ) +#define IDirect3DDeviceManager9_OpenDeviceHandle( p, a ) (p)->lpVtbl->OpenDeviceHandle( p, a ) +#define IDirect3DDeviceManager9_CloseDeviceHandle( p, a ) (p)->lpVtbl->CloseDeviceHandle( p, a ) +#define IDirect3DDeviceManager9_TestDevice( p, a ) (p)->lpVtbl->TestDevice( p, a ) +#define IDirect3DDeviceManager9_LockDevice( p, a, b, c ) (p)->lpVtbl->LockDevice( p, a, b, c ) +#define IDirect3DDeviceManager9_UnlockDevice( p, a, b ) (p)->lpVtbl->UnlockDevice( p, a, b ) +#define IDirect3DDeviceManager9_GetVideoService( p, a, b, c ) (p)->lpVtbl->GetVideoService( p, a, b, c ) +#else +#define IDirect3DDeviceManager9_QueryInterface( p, a, b ) (p)->QueryInterface( a, b ) +#define IDirect3DDeviceManager9_AddRef( p ) (p)->AddRef() +#define IDirect3DDeviceManager9_Release( p ) (p)->Release() +#define IDirect3DDeviceManager9_ResetDevice( p, a, b ) (p)->ResetDevice( a, b ) +#define IDirect3DDeviceManager9_OpenDeviceHandle( p, a ) (p)->OpenDeviceHandle( a ) +#define IDirect3DDeviceManager9_CloseDeviceHandle( p, a ) (p)->CloseDeviceHandle( a ) +#define IDirect3DDeviceManager9_TestDevice( p, a ) (p)->TestDevice( a ) +#define IDirect3DDeviceManager9_LockDevice( p, a, b, c ) (p)->LockDevice( a, b, c ) +#define IDirect3DDeviceManager9_UnlockDevice( p, a, b ) (p)->UnlockDevice( a, b ) +#define IDirect3DDeviceManager9_GetVideoService( p, a, b, c ) (p)->GetVideoService( a, b, c ) +#endif + +typedef _COM_interface IDirectXVideoProcessorService IDirectXVideoProcessorService; +typedef _COM_interface IDirectXVideoProcessor IDirectXVideoProcessor; + +#undef INTERFACE +#define INTERFACE IDirectXVideoProcessor +DECLARE_INTERFACE_( IDirectXVideoProcessor, IUnknown ) +{ + STDMETHOD( QueryInterface ) ( THIS_ REFIID, PVOID* ) PURE; + STDMETHOD_( ULONG, AddRef ) ( THIS ) PURE; + STDMETHOD_( ULONG, Release ) ( THIS ) PURE; + STDMETHOD( GetVideoProcessorService ) ( THIS_ IDirectXVideoProcessorService** ) PURE; + STDMETHOD( GetCreationParameters ) ( THIS_ GUID*, DXVA2_VideoDesc*, D3DFORMAT*, UINT* ) PURE; + STDMETHOD( GetVideoProcessorCaps ) ( THIS_ DXVA2_VideoProcessorCaps* ) PURE; + STDMETHOD( GetProcAmpRange ) ( THIS_ UINT, DXVA2_ValueRange* ) PURE; + STDMETHOD( GetFilterPropertyRange ) ( THIS_ UINT, DXVA2_ValueRange* ) PURE; + STDMETHOD( VideoProcessBlt ) ( THIS_ IDirect3DSurface9*, DXVA2_VideoProcessBltParams*, DXVA2_VideoSample*, UINT, HANDLE* ) PURE; +}; + +#if !defined(__cplusplus) || defined(CINTERFACE) +#define IDirectXVideoProcessor_QueryInterface( p, a, b ) (p)->lpVtbl->QueryInterface( p, a, b ) +#define IDirectXVideoProcessor_AddRef( p ) (p)->lpVtbl->AddRef( p ) +#define IDirectXVideoProcessor_Release( p ) (p)->lpVtbl->Release( p ) +#define IDirectXVideoProcessor_GetVideoProcessorService( p, a ) (p)->lpVtbl->GetVideoProcessorService( p, a ) +#define IDirectXVideoProcessor_GetCreationParameters( p, a, b, c, d ) (p)->lpVtbl->GetCreationParameters( p, a, b, c, d ) +#define IDirectXVideoProcessor_GetVideoProcessorCaps( p, a ) (p)->lpVtbl->GetVideoProcessorCaps( p, a ) +#define IDirectXVideoProcessor_GetProcAmpRange( p, a, b ) (p)->lpVtbl->GetProcAmpRange( p, a, b ) +#define IDirectXVideoProcessor_GetFilterPropertyRange( p, a, b ) (p)->lpVtbl->GetFilterPropertyRange( p, a, b ) +#define IDirectXVideoProcessor_VideoProcessBlt( p, a, b, c, d, e ) (p)->lpVtbl->VideoProcessBlt( p, a, b, c, d, e ) +#else +#define IDirectXVideoProcessor_QueryInterface( p, a, b ) (p)->QueryInterface( a, b ) +#define IDirectXVideoProcessor_AddRef( p ) (p)->AddRef() +#define IDirectXVideoProcessor_Release( p ) (p)->Release() +#define IDirectXVideoProcessor_GetVideoProcessorService( p, a ) (p)->GetVideoProcessorService( a ) +#define IDirectXVideoProcessor_GetCreationParameters( p, a, b, c, d ) (p)->GetCreationParameters( a, b, c, d ) +#define IDirectXVideoProcessor_GetVideoProcessorCaps( p, a ) (p)->GetVideoProcessorCaps( a ) +#define IDirectXVideoProcessor_GetProcAmpRange( p, a, b ) (p)->GetProcAmpRange( a, b ) +#define IDirectXVideoProcessor_GetFilterPropertyRange( p, a, b ) (p)->GetFilterPropertyRange( a, b ) +#define IDirectXVideoProcessor_VideoProcessBlt( p, a, b, c, d, e ) (p)->VideoProcessBlt( a, b, c, d, e ) +#endif + + +#undef INTERFACE +#define INTERFACE IDirectXVideoProcessorService +DECLARE_INTERFACE_( IDirectXVideoProcessorService, IDirectXVideoAccelerationService ) +{ + STDMETHOD( QueryInterface ) ( THIS_ REFIID, PVOID* ) PURE; + STDMETHOD_( ULONG, AddRef ) ( THIS ) PURE; + STDMETHOD_( ULONG, Release ) ( THIS ) PURE; + STDMETHOD( CreateSurface ) ( THIS_ UINT, UINT, UINT, D3DFORMAT, D3DPOOL, DWORD, DWORD, IDirect3DSurface9**, HANDLE* ) PURE; + STDMETHOD( RegisterVideoProcessorSoftwareDevice ) ( THIS_ void* ) PURE; + STDMETHOD( GetVideoProcessorDeviceGuids ) ( THIS_ DXVA2_VideoDesc*, UINT, GUID** ) PURE; + STDMETHOD( GetVideoProcessorRenderTargets ) ( THIS_ REFGUID, DXVA2_VideoDesc*, UINT*, D3DFORMAT** ) PURE; + STDMETHOD( GetVideoProcessorSubStreamFormats ) ( THIS_ REFGUID, DXVA2_VideoDesc*, D3DFORMAT, UINT*, D3DFORMAT** ) PURE; + STDMETHOD( GetVideoProcessorCaps ) ( THIS_ REFGUID, DXVA2_VideoDesc*, D3DFORMAT, DXVA2_VideoProcessorCaps* ) PURE; + STDMETHOD( GetProcAmpRange ) ( THIS_ REFGUID, DXVA2_VideoDesc*, D3DFORMAT, UINT, DXVA2_ValueRange* ) PURE; + STDMETHOD( GetFilterPropertyRange ) ( THIS_ REFGUID, DXVA2_VideoDesc*, D3DFORMAT, UINT, DXVA2_ValueRange* ) PURE; + STDMETHOD( CreateVideoProcessor ) ( THIS_ REFGUID, DXVA2_VideoDesc*, D3DFORMAT, UINT, IDirectXVideoProcessor** ) PURE; +}; + +#if !defined(__cplusplus) || defined(CINTERFACE) +#define IDirectXVideoProcessorService_QueryInterface( p, a, b ) (p)->lpVtbl->QueryInterface( p, a, b ) +#define IDirectXVideoProcessorService_AddRef( p ) (p)->lpVtbl->AddRef( p ) +#define IDirectXVideoProcessorService_Release( p ) (p)->lpVtbl->Release( p ) +#define IDirectXVideoProcessorService_CreateSurface( p, a, b, c, d, e, f, g, h, i ) (p)->lpVtbl->CreateSurface( p, a, b, c, d, e, f, g, h, i ) +#define IDirectXVideoProcessorService_RegisterVideoProcessorSoftwareDevice( p, a ) (p)->lpVtbl->RegisterVideoProcessorSoftwareDevice( p, a ) +#define IDirectXVideoProcessorService_GetVideoProcessorDeviceGuids( p, a, b, c ) (p)->lpVtbl->GetVideoProcessorDeviceGuids( p, a, b, c ) +#define IDirectXVideoProcessorService_GetVideoProcessorRenderTargets( p, a, b, c, d ) (p)->lpVtbl->GetVideoProcessorRenderTargets( p, a, b, c, d ) +#define IDirectXVideoProcessorService_GetVideoProcessorSubStreamFormats( p, a, b, c, d, e ) (p)->lpVtbl->GetVideoProcessorSubStreamFormats( p, a, b, c, d, e ) +#define IDirectXVideoProcessorService_GetVideoProcessorCaps( p, a, b, c, d ) (p)->lpVtbl->GetVideoProcessorCaps( p, a, b, c, d ) +#define IDirectXVideoProcessorService_GetProcAmpRange( p, a, b, c, d, e ) (p)->lpVtbl->GetProcAmpRange( p, a, b, c, d, e ) +#define IDirectXVideoProcessorService_GetFilterPropertyRange( p, a, b, c, d, e ) (p)->lpVtbl->GetFilterPropertyRange( p, a, b, c, d, e ) +#define IDirectXVideoProcessorService_CreateVideoProcessor( p, a, b, c, d, e ) (p)->lpVtbl->CreateVideoProcessor( p, a, b, c, d, e ) +#else +#define IDirectXVideoProcessorService_QueryInterface( p, a, b ) (p)->QueryInterface( a, b ) +#define IDirectXVideoProcessorService_AddRef( p ) (p)->AddRef() +#define IDirectXVideoProcessorService_Release( p ) (p)->Release() +#define IDirectXVideoProcessorService_CreateSurface( p, a, b, c, d, e, f, g, h, i ) (p)->CreateSurface( a, b, c, d, e, f, g, h, i ) +#define IDirectXVideoProcessorService_RegisterVideoProcessorSoftwareDevice( p, a ) (p)->RegisterVideoProcessorSoftwareDevice( a ) +#define IDirectXVideoProcessorService_GetVideoProcessorDeviceGuids( p, a, b, c ) (p)->GetVideoProcessorDeviceGuids( a, b, c ) +#define IDirectXVideoProcessorService_GetVideoProcessorRenderTargets( p, a, b, c, d ) (p)->GetVideoProcessorRenderTargets( a, b, c, d ) +#define IDirectXVideoProcessorService_GetVideoProcessorSubStreamFormats( p, a, b, c, d, e ) (p)->GetVideoProcessorSubStreamFormats( a, b, c, d, e ) +#define IDirectXVideoProcessorService_GetVideoProcessorCaps( p, a, b, c, d ) (p)->GetVideoProcessorCaps( a, b, c, d ) +#define IDirectXVideoProcessorService_GetProcAmpRange( p, a, b, c, d, e ) (p)->GetProcAmpRange( a, b, c, d, e ) +#define IDirectXVideoProcessorService_GetFilterPropertyRange( p, a, b, c, d, e ) (p)->GetFilterPropertyRange( a, b, c, d, e ) +#define IDirectXVideoProcessorService_CreateVideoProcessor( p, a, b, c, d, e ) (p)->CreateVideoProcessor( a, b, c, d, e ) +#endif + + +/***************************************************************************************************** +************************DXVA Video Processor******************************************************** +*******************************************************************************************************/ + + + +/*#undef INTERFACE +#define INTERFACE IDirectXVideoService +DECLARE_INTERFACE_(IDirectXVideoService,IUnknown) +{ + STDMETHOD(DXVA2CreateVideoService)(IDirect3DDevice9*, REFIID, void**) PURE; +}; + +#if !defined(__cplusplus) || defined(CINTERFACE) +#define IDirectXVideoService_DXVA2CreateVideoService(a,b,c) DXVA2CreateVideoService(a,b,c) +#else +#define IDirectXVideoService_DXVA2CreateVideoService(a,b,c) DXVA2CreateVideoService(a,b,c) +#endif*/ + + +#ifdef __cplusplus +}; +#endif + +#ifdef __cplusplus +extern "C" HRESULT WINAPI DXVA2CreateVideoService( IDirect3DDevice9 *, + REFIID riid, + void **ppService ); +#else +extern HRESULT WINAPI DXVA2CreateVideoService( IDirect3DDevice9 *, + REFIID riid, + void **ppService ); +#endif + +typedef +enum _DXVA2_VideoChromaSubSampling +{ DXVA2_VideoChromaSubsamplingMask = 0xf, + DXVA2_VideoChromaSubsampling_Unknown = 0, + DXVA2_VideoChromaSubsampling_ProgressiveChroma = 0x8, + DXVA2_VideoChromaSubsampling_Horizontally_Cosited = 0x4, + DXVA2_VideoChromaSubsampling_Vertically_Cosited = 0x2, + DXVA2_VideoChromaSubsampling_Vertically_AlignedChromaPlanes = 0x1, + DXVA2_VideoChromaSubsampling_MPEG2 = ( DXVA2_VideoChromaSubsampling_Horizontally_Cosited | DXVA2_VideoChromaSubsampling_Vertically_AlignedChromaPlanes ), + DXVA2_VideoChromaSubsampling_MPEG1 = DXVA2_VideoChromaSubsampling_Vertically_AlignedChromaPlanes, + DXVA2_VideoChromaSubsampling_DV_PAL = ( DXVA2_VideoChromaSubsampling_Horizontally_Cosited | DXVA2_VideoChromaSubsampling_Vertically_Cosited ), + DXVA2_VideoChromaSubsampling_Cosited = ( ( DXVA2_VideoChromaSubsampling_Horizontally_Cosited | DXVA2_VideoChromaSubsampling_Vertically_Cosited ) | DXVA2_VideoChromaSubsampling_Vertically_AlignedChromaPlanes )} DXVA2_VideoChromaSubSampling; + +typedef +enum _DXVA2_NominalRange +{ DXVA2_NominalRangeMask = 0x7, + DXVA2_NominalRange_Unknown = 0, + DXVA2_NominalRange_Normal = 1, + DXVA2_NominalRange_Wide = 2, + DXVA2_NominalRange_0_255 = 1, + DXVA2_NominalRange_16_235 = 2, + DXVA2_NominalRange_48_208 = 3} DXVA2_NominalRange; + +typedef +enum _DXVA2_VideoLighting +{ DXVA2_VideoLightingMask = 0xf, + DXVA2_VideoLighting_Unknown = 0, + DXVA2_VideoLighting_bright = 1, + DXVA2_VideoLighting_office = 2, + DXVA2_VideoLighting_dim = 3, + DXVA2_VideoLighting_dark = 4} DXVA2_VideoLighting; + +typedef +enum _DXVA2_VideoPrimaries +{ DXVA2_VideoPrimariesMask = 0x1f, + DXVA2_VideoPrimaries_Unknown = 0, + DXVA2_VideoPrimaries_reserved = 1, + DXVA2_VideoPrimaries_BT709 = 2, + DXVA2_VideoPrimaries_BT470_2_SysM = 3, + DXVA2_VideoPrimaries_BT470_2_SysBG = 4, + DXVA2_VideoPrimaries_SMPTE170M = 5, + DXVA2_VideoPrimaries_SMPTE240M = 6, + DXVA2_VideoPrimaries_EBU3213 = 7, + DXVA2_VideoPrimaries_SMPTE_C = 8} DXVA2_VideoPrimaries; + +typedef +enum _DXVA2_VideoTransferFunction +{ DXVA2_VideoTransFuncMask = 0x1f, + DXVA2_VideoTransFunc_Unknown = 0, + DXVA2_VideoTransFunc_10 = 1, + DXVA2_VideoTransFunc_18 = 2, + DXVA2_VideoTransFunc_20 = 3, + DXVA2_VideoTransFunc_22 = 4, + DXVA2_VideoTransFunc_709 = 5, + DXVA2_VideoTransFunc_240M = 6, + DXVA2_VideoTransFunc_sRGB = 7, + DXVA2_VideoTransFunc_28 = 8} DXVA2_VideoTransferFunction; + +typedef +enum _DXVA2_SampleFormat +{ DXVA2_SampleFormatMask = 0xff, + DXVA2_SampleUnknown = 0, + DXVA2_SampleProgressiveFrame = 2, + DXVA2_SampleFieldInterleavedEvenFirst = 3, + DXVA2_SampleFieldInterleavedOddFirst = 4, + DXVA2_SampleFieldSingleEven = 5, + DXVA2_SampleFieldSingleOdd = 6, + DXVA2_SampleSubStream = 7} DXVA2_SampleFormat; + +typedef +enum _DXVA2_VideoTransferMatrix +{ DXVA2_VideoTransferMatrixMask = 0x7, + DXVA2_VideoTransferMatrix_Unknown = 0, + DXVA2_VideoTransferMatrix_BT709 = 1, + DXVA2_VideoTransferMatrix_BT601 = 2, + DXVA2_VideoTransferMatrix_SMPTE240M = 3} DXVA2_VideoTransferMatrix; + +enum __MIDL___MIDL_itf_dxva2api_0000_0000_0004 +{ DXVA2_NoiseFilterLumaLevel = 1, + DXVA2_NoiseFilterLumaThreshold = 2, + DXVA2_NoiseFilterLumaRadius = 3, + DXVA2_NoiseFilterChromaLevel = 4, + DXVA2_NoiseFilterChromaThreshold = 5, + DXVA2_NoiseFilterChromaRadius = 6, + DXVA2_DetailFilterLumaLevel = 7, + DXVA2_DetailFilterLumaThreshold = 8, + DXVA2_DetailFilterLumaRadius = 9, + DXVA2_DetailFilterChromaLevel = 10, + DXVA2_DetailFilterChromaThreshold = 11, + DXVA2_DetailFilterChromaRadius = 12}; + +enum __MIDL___MIDL_itf_dxva2api_0000_0000_0008 +{ DXVA2_VideoProcess_None = 0, + DXVA2_VideoProcess_YUV2RGB = 0x1, + DXVA2_VideoProcess_StretchX = 0x2, + DXVA2_VideoProcess_StretchY = 0x4, + DXVA2_VideoProcess_AlphaBlend = 0x8, + DXVA2_VideoProcess_SubRects = 0x10, + DXVA2_VideoProcess_SubStreams = 0x20, + DXVA2_VideoProcess_SubStreamsExtended = 0x40, + DXVA2_VideoProcess_YUV2RGBExtended = 0x80, + DXVA2_VideoProcess_AlphaBlendExtended = 0x100, + DXVA2_VideoProcess_Constriction = 0x200, + DXVA2_VideoProcess_NoiseFilter = 0x400, + DXVA2_VideoProcess_DetailFilter = 0x800, + DXVA2_VideoProcess_PlanarAlpha = 0x1000, + DXVA2_VideoProcess_LinearScaling = 0x2000, + DXVA2_VideoProcess_GammaCompensated = 0x4000, + DXVA2_VideoProcess_MaintainsOriginalFieldData = 0x8000, + DXVA2_VideoProcess_Mask = 0xffff}; + + + +__inline float hb_dx_fixedtofloat( const DXVA2_Fixed32 _fixed_ ); + +__inline const DXVA2_Fixed32 hb_dx_fixed32_opaque_alpha(); + +__inline DXVA2_Fixed32 hb_dx_floattofixed( const float _float_ ); + +#endif //_DXVA2API_H diff --git a/libhb/hb.c b/libhb/hb.c index 8a1ec3b1e..438e92fb6 100644 --- a/libhb/hb.c +++ b/libhb/hb.c @@ -477,6 +477,7 @@ hb_handle_t * hb_init( int verbose, int update_check ) #endif hb_register( &hb_encavcodeca ); hb_register( &hb_reader ); + hb_register( &hb_decavcodecv_accl ); return h; } @@ -575,7 +576,7 @@ hb_handle_t * hb_init_dl( int verbose, int update_check ) #endif hb_register( &hb_encavcodeca ); hb_register( &hb_reader ); - + hb_register( &hb_decavcodecv_accl ); return h; } diff --git a/libhb/hbffmpeg.h b/libhb/hbffmpeg.h index 2de771505..a44fc8d02 100644 --- a/libhb/hbffmpeg.h +++ b/libhb/hbffmpeg.h @@ -15,6 +15,7 @@ #include "libavutil/opt.h" #include "libswscale/swscale.h" #include "libavresample/avresample.h" +#include "common.h" #define HB_FFMPEG_THREADS_AUTO (-1) // let hb_avcodec_open() decide thread_count diff --git a/libhb/internal.h b/libhb/internal.h index 1c02ffe85..76845a3cc 100644 --- a/libhb/internal.h +++ b/libhb/internal.h @@ -418,7 +418,8 @@ enum WORK_ENCAVCODEC_AUDIO, WORK_MUX, WORK_READER, - WORK_DECPGSSUB + WORK_DECPGSSUB, + WORK_DECAVCODECVACCL }; extern hb_filter_object_t hb_filter_detelecine; @@ -428,6 +429,7 @@ extern hb_filter_object_t hb_filter_denoise; extern hb_filter_object_t hb_filter_decomb; extern hb_filter_object_t hb_filter_rotate; extern hb_filter_object_t hb_filter_crop_scale; +extern hb_filter_object_t hb_filter_crop_scale_accl; extern hb_filter_object_t hb_filter_render_sub; extern hb_filter_object_t hb_filter_vfr; diff --git a/libhb/module.defs b/libhb/module.defs index e5c5650fa..d3d409513 100644 --- a/libhb/module.defs +++ b/libhb/module.defs @@ -37,8 +37,12 @@ LIBHB.out += $(LIBHB.a) ifeq (1,$(FEATURE.ff.mpeg2)) LIBHB.GCC.D += USE_FF_MPEG2 endif +ifeq (1,$(FEATURE.opencl)) +LIBHB.GCC.D += USE_OPENCL +endif LIBHB.GCC.D += __LIBHB__ USE_PTHREAD LIBHB.GCC.I += $(LIBHB.build/) $(CONTRIB.build/)include +LIBHB.GCC.I += $(AMDAPPSDKROOT)/include ifeq ($(BUILD.system),cygwin) LIBHB.GCC.D += SYS_CYGWIN @@ -131,6 +135,9 @@ LIBHB.GCC.l += ws2_32 ifeq ($(HAS.dlfcn),1) LIBHB.GCC.l += dl endif +ifeq (1,$(FEATURE.opencl)) + LIBHB.GCC.l += OpenCL +endif LIBHB.out += $(LIBHB.dll) $(LIBHB.lib) endif diff --git a/libhb/oclnv12toyuv.c b/libhb/oclnv12toyuv.c new file mode 100644 index 000000000..8d49563bf --- /dev/null +++ b/libhb/oclnv12toyuv.c @@ -0,0 +1,222 @@ +/* oclnv12toyuv.c + + Copyright (c) 2003-2012 HandBrake Team + This file is part of the HandBrake source code + Homepage: . + It may be used under the terms of the GNU General Public License v2. + For full terms see the file COPYING file or visit http://www.gnu.org/licenses/gpl-2.0.html + + Authors: Peng Gao + Li Cao + */ + +#ifdef USE_OPENCL +#include "vadxva2.h" +#include "oclnv12toyuv.h" + +/** + * It creates are opencl bufs w is input frame width, h is input frame height +*/ +static int hb_nv12toyuv_create_cl_buf( KernelEnv *kenv, int w, int h, hb_va_dxva2_t *dxva2 ); + +/** + * It creates are opencl kernel. kernel name is nv12toyuv +*/ +static int hb_nv12toyuv_create_cl_kernel( KernelEnv *kenv, hb_va_dxva2_t *dxva2 ); + +/** + * It set opencl arg, input data,output data, input width, output height +*/ +static int hb_nv12toyuv_setkernelarg( KernelEnv *kenv, int w, int h, hb_va_dxva2_t *dxva2 ); + +/** + * It initialize nv12 to yuv kernel. +*/ +static int hb_init_nv12toyuv_ocl( KernelEnv *kenv, int w, int h, hb_va_dxva2_t *dxva2 ); + +/** + * Run nv12 to yuv kernel. + */ +static int hb_nv12toyuv( void **userdata, KernelEnv *kenv ); + +/** + * register nv12 to yuv kernel. + */ +static int hb_nv12toyuv_reg_kernel( void ); + + +/** + * It creates are opencl bufs w is input frame width, h is input frame height + */ +static int hb_nv12toyuv_create_cl_buf( KernelEnv *kenv, int w, int h, hb_va_dxva2_t *dxva2 ) +{ + cl_int status = CL_SUCCESS; + int in_bytes = w*h*3/2; + CREATEBUF( dxva2->cl_mem_nv12, CL_MEM_READ_ONLY | CL_MEM_ALLOC_HOST_PTR, in_bytes ); + CREATEBUF( dxva2->cl_mem_yuv, CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR, in_bytes ); + return 0; +} +/** + * It creates are opencl kernel. kernel name is nv12toyuv + */ +static int hb_nv12toyuv_create_cl_kernel( KernelEnv *kenv, hb_va_dxva2_t *dxva2 ) +{ + int ret; + dxva2->nv12toyuv = clCreateKernel( kenv->program, "nv12toyuv", &ret ); + return ret; +} +/** + * It set opencl arg, input data,output data, input width, output height + */ +static int hb_nv12toyuv_setkernelarg( KernelEnv *kenv, int w, int h, hb_va_dxva2_t *dxva2 ) +{ + int arg = 0, status; + kenv->kernel = dxva2->nv12toyuv; + OCLCHECK( clSetKernelArg, kenv->kernel, arg++, sizeof(cl_mem), &dxva2->cl_mem_nv12 ); + OCLCHECK( clSetKernelArg, kenv->kernel, arg++, sizeof(cl_mem), &dxva2->cl_mem_yuv ); + OCLCHECK( clSetKernelArg, kenv->kernel, arg++, sizeof(int), &w ); + OCLCHECK( clSetKernelArg, kenv->kernel, arg++, sizeof(int), &h ); + return 0; +} +/** + * It initialize nv12 to yuv kernel. + */ +static int hb_init_nv12toyuv_ocl( KernelEnv *kenv, int w, int h, hb_va_dxva2_t *dxva2 ) +{ + if( !dxva2->nv12toyuv ) + { + if( hb_nv12toyuv_create_cl_buf( kenv, w, h, dxva2 ) ) + { + hb_log( "nv12toyuv_create_cl_buf fial" ); + return -1; + } + if (!dxva2->nv12toyuv_tmp_in) + dxva2->nv12toyuv_tmp_in = malloc (w*h*3/2); + if (!dxva2->nv12toyuv_tmp_out) + dxva2->nv12toyuv_tmp_out = malloc (w*h*3/2); + hb_nv12toyuv_create_cl_kernel( kenv, dxva2 ); + } + return 0; +} + +/** + * Run nv12 to yuv kernel. + */ +static int hb_nv12toyuv( void **userdata, KernelEnv *kenv ) +{ + int status; + int w = (int)userdata[0]; + int h = (int)userdata[1]; + uint8_t *bufi1 = userdata[2]; + int *crop = userdata[3]; + hb_va_dxva2_t *dxva2 = userdata[4]; + + uint8_t *bufi2 = userdata[5]; + int p = (int)userdata[6]; + int i; + if( hb_init_nv12toyuv_ocl( kenv, w, h, dxva2 ) ) + return -1; + + if( hb_nv12toyuv_setkernelarg( kenv, w, h, dxva2 ) ) + return -1; + + int in_bytes = w*h*3/2; + if( kenv->isAMD ) + { + void *data = clEnqueueMapBuffer( kenv->command_queue, dxva2->cl_mem_nv12, CL_MAP_WRITE_INVALIDATE_REGION, CL_TRUE, 0, in_bytes, 0, NULL, NULL, NULL ); + //memcpy( data, bufi, in_bytes ); + for ( i = 0; i < dxva2->height; i++ ) + { + memcpy( data+i*dxva2->width, bufi1+i*p, dxva2->width ); + if ( iheight>>1 ) + memcpy( data+(dxva2->width*dxva2->height)+i*dxva2->width, bufi2+i*p, dxva2->width ); + } + clEnqueueUnmapMemObject( kenv->command_queue, dxva2->cl_mem_nv12, data, 0, NULL, NULL ); + } + else + { + uint8_t *tmp = (uint8_t*)malloc( dxva2->width*dxva2->height*3/2 ); + for( i = 0; i < dxva2->height; i++ ) + { + memcpy( tmp+i*dxva2->width, bufi1+i*p, dxva2->width ); + if( iheight>>1 ) + memcpy( tmp+(dxva2->width*dxva2->height)+i*dxva2->width, bufi2+i*p, dxva2->width ); + } + OCLCHECK( clEnqueueWriteBuffer, kenv->command_queue, dxva2->cl_mem_nv12, CL_TRUE, 0, in_bytes, tmp, 0, NULL, NULL ); + free( tmp ); + } + + size_t gdim[2] = {w>>1, h>>1}; + OCLCHECK( clEnqueueNDRangeKernel, kenv->command_queue, kenv->kernel, 2, NULL, gdim, NULL, 0, NULL, NULL ); + + if( crop[0] || crop[1] || crop[2] || crop[3] ) + { + AVPicture pic_in; + AVPicture pic_crop; + clEnqueueReadBuffer( kenv->command_queue, dxva2->cl_mem_yuv, CL_TRUE, 0, in_bytes, dxva2->nv12toyuv_tmp_out, 0, NULL, NULL ); + hb_buffer_t *in = hb_video_buffer_init( w, h ); + memcpy( in->plane[0].data, dxva2->nv12toyuv_tmp_out, w * h ); + memcpy( in->plane[1].data, dxva2->nv12toyuv_tmp_out + w * h, ( w * h )>>2 ); + memcpy( in->plane[2].data, dxva2->nv12toyuv_tmp_out + w * h + ( ( w * h )>>2 ), ( w * h )>>2 ); + hb_avpicture_fill( &pic_in, in ); + av_picture_crop( &pic_crop, &pic_in, in->f.fmt, crop[0], crop[2] ); + int i, ww = w - ( crop[2] + crop[3] ), hh = h - ( crop[0] + crop[1] ); + for( i = 0; i< hh >> 1; i++ ) + { + memcpy( dxva2->nv12toyuv_tmp_in + ( ( i<<1 ) + 0 ) * ww, pic_crop.data[0]+ ( ( i<<1 ) + 0 ) * pic_crop.linesize[0], ww ); + memcpy( dxva2->nv12toyuv_tmp_in + ( ( i<<1 ) + 1 ) * ww, pic_crop.data[0]+ ( ( i<<1 ) + 1 ) * pic_crop.linesize[0], ww ); + memcpy( dxva2->nv12toyuv_tmp_in + ( ww * hh ) + i * ( ww>>1 ), pic_crop.data[1] + i * pic_crop.linesize[1], ww >> 1 ); + memcpy( dxva2->nv12toyuv_tmp_in + ( ww * hh ) + ( ( ww * hh )>>2 ) + i * ( ww>>1 ), pic_crop.data[2] + i * pic_crop.linesize[2], ww >> 1 ); + } + if( kenv->isAMD ) + { + void *data = clEnqueueMapBuffer( kenv->command_queue, dxva2->cl_mem_yuv, CL_MAP_WRITE_INVALIDATE_REGION, CL_TRUE, 0, ww * hh * 3 / 2, 0, NULL, NULL, NULL ); + memcpy( data, dxva2->nv12toyuv_tmp_in, ww * hh * 3 / 2 ); + clEnqueueUnmapMemObject( kenv->command_queue, dxva2->cl_mem_yuv, data, 0, NULL, NULL ); + } + else + { + OCLCHECK( clEnqueueWriteBuffer, kenv->command_queue, dxva2->cl_mem_yuv, CL_TRUE, 0, in_bytes, dxva2->nv12toyuv_tmp_in, 0, NULL, NULL ); + } + hb_buffer_close( &in ); + } + return 0; +} +/** + * register nv12 to yuv kernel. + */ +static int hb_nv12toyuv_reg_kernel( void ) +{ + int st = hb_register_kernel_wrapper( "nv12toyuv", hb_nv12toyuv ); + if( !st ) + { + hb_log( "register kernel[%s] faild\n", "nv12toyuv" ); + return -1; + } + return 0; +} +/** + * nv12 to yuv interface + * bufi is input frame of nv12, w is input frame width, h is input frame height + */ +int hb_ocl_nv12toyuv( uint8_t *bufi[], int p, int w, int h, int *crop, hb_va_dxva2_t *dxva2 ) +//int hb_ocl_nv12toyuv( uint8_t *bufi, int w, int h, int *crop, hb_va_dxva2_t *dxva2 ) +{ + void *userdata[7]; + userdata[0] = (void*)w; + userdata[1] = (void*)h; + userdata[2] = bufi[0]; + userdata[3] = crop; + userdata[4] = dxva2; + userdata[5] = bufi[1]; + userdata[6] = (void*)p; + if( hb_nv12toyuv_reg_kernel() ) + return -1; + if( hb_run_kernel( "nv12toyuv", userdata ) ) + { + printf( "run kernel[nv12toyuv] faild\n" ); + return -1; + } + return 0; +} +#endif diff --git a/libhb/oclnv12toyuv.h b/libhb/oclnv12toyuv.h new file mode 100644 index 000000000..3307b8efe --- /dev/null +++ b/libhb/oclnv12toyuv.h @@ -0,0 +1,29 @@ +/* oclnv12toyuv.h + + Copyright (c) 2003-2012 HandBrake Team + This file is part of the HandBrake source code + Homepage: . + It may be used under the terms of the GNU General Public License v2. + For full terms see the file COPYING file or visit http://www.gnu.org/licenses/gpl-2.0.html + + Authors: Peng Gao + Li Cao + + */ + +#ifdef USE_OPENCL +#ifndef RENDER_CL_H +#define RENDER_CL_H +#include "CL/cl.h" +#include "common.h" +#include "openclwrapper.h" + +/** + * nv12 to yuv interface + * bufi is input frame of nv12, w is input frame width, h is input frame height + */ +//int hb_ocl_nv12toyuv( uint8_t *bufi, int w, int h, int *crop, hb_va_dxva2_t *dxva2 ); +int hb_ocl_nv12toyuv( uint8_t *bufi[], int p, int w, int h, int *crop, hb_va_dxva2_t *dxva2 ); + +#endif +#endif diff --git a/libhb/oclscale.c b/libhb/oclscale.c new file mode 100644 index 000000000..084e04b13 --- /dev/null +++ b/libhb/oclscale.c @@ -0,0 +1,301 @@ +/* oclscale.c + + Copyright (c) 2003-2012 HandBrake Team + This file is part of the HandBrake source code + Homepage: . + It may be used under the terms of the GNU General Public License v2. + For full terms see the file COPYING file or visit http://www.gnu.org/licenses/gpl-2.0.html + + Authors: Peng Gao + Li Cao + + */ + +#ifdef USE_OPENCL + +#include +#include "common.h" +#include "openclwrapper.h" +#define MaxFilterLength 16 +#define FILTER_LEN 4 + +inline double hb_fit_gauss_kernel( double x ) +{ + double powNum = -1 * M_PI; + + powNum *= x; + + powNum *= x; + + return exp( powNum ); +} +/** + * Using gaussian algorithm to calculate the scale filter + */ +static void hb_set_gauss_interpolation( float *pcoeff, int *pmappedindex, int targetdatalength, int srcdatalength, int filterLength, float bias ) +{ + int i, j; + + float gausskernel[MaxFilterLength]; + + int half = filterLength / 2; + + float scalerate = (float)(srcdatalength) / targetdatalength; + + for( i = 0; i < targetdatalength; ++i ) + { + float flindex = i * scalerate + bias; + + if( flindex > (srcdatalength - 1)) + { + flindex -= (int)(flindex - (srcdatalength - 1)); + } + + int srcindex = (int)(flindex); + + float t = flindex - srcindex; + + for( j = 0; j < (int)half; j++ ) + { + gausskernel[j] = (float)hb_fit_gauss_kernel((half - j) - 1 + t ); + } + + for( j = 0; j < (int)half; j++ ) + { + gausskernel[half + j] = (float)hb_fit_gauss_kernel( j + 1 - t ); + } + + while( srcindex < (int)half - 1 ) + { + /* -1 0 1 2 + * M1 S P1 P2 + * + * if srcindex is 0, M1 and S will be the same sample. To keep the + * convolution kernel from having to check for edge conditions, move + * srcindex to 1, then slide down the coefficients + */ + srcindex += 1; + + gausskernel[0] += gausskernel[1]; + + for( j = 1; j < filterLength - 1; j++ ) + { + gausskernel[j] = gausskernel[j + 1]; + } + + gausskernel[filterLength - 1] = 0; + } + + while( srcindex >= srcdatalength - half ) + { + /* If srcindex is near the edge, shift down srcindex and slide up + * the coefficients + */ + srcindex -= 1; + + gausskernel[3] += gausskernel[2]; + + for( j = filterLength - 2; j > 0; j-- ) + { + gausskernel[j] = gausskernel[j - 1]; + } + + gausskernel[0] = 0; + } + + *pmappedindex++ = srcindex - half + 1; + + // Store normalized Gauss kernel + + float sumtemp = 0; + + for( j = 0; j < filterLength; ++j ) + { + sumtemp += gausskernel[j]; + } + + for( j = 0; j < filterLength; ++j ) + { + pcoeff[targetdatalength * j + i] = gausskernel[j] / sumtemp; + } + } +} +/** +* executive scale using opencl +* get filter args +* create output buffer +* create horizontal filter buffer +* create vertical filter buffer +* create kernels +*/ +int hb_ocl_scale_func( void **data, KernelEnv *kenv ) +{ + cl_int status; + + uint8_t *in_frame = data[0]; + uint8_t *out_frame = data[1]; + int in_frame_w = (int)data[2]; + int in_frame_h = (int)data[3]; + int out_frame_w = (int)data[4]; + int out_frame_h = (int)data[5]; + hb_oclscale_t *os = data[6]; + + if( os->use_ocl_mem ) + os->h_in_buf = data[0]; + int h_filter_len = FILTER_LEN; + int v_filter_len = FILTER_LEN; + //it will make the psnr lower when filter length is 4 in the condition that the video width is shorter than 960 and width is shorter than 544,so we set the filter length to 2 + if( out_frame_w <= 960 && out_frame_h <= 544 ) + { + h_filter_len>>=1; + v_filter_len>>=1; + } + if( !os->h_out_buf ) + { + hb_log( "Scaling With OpenCL\n" ); + //malloc filter args + float *hf_y, *hf_uv, *vf_y, *vf_uv; + int *hi_y, *hi_uv, *vi_y, *vi_uv; + hf_y = (float*)malloc( sizeof(float)*out_frame_w * h_filter_len ); + hf_uv = (float*)malloc( sizeof(float)*(out_frame_w>>1) * h_filter_len ); + hi_y = (int*)malloc( sizeof(int)*out_frame_w ); + hi_uv = (int*)malloc( sizeof(int)*(out_frame_w>>1)); + vf_y = (float*)malloc( sizeof(float)*out_frame_h * v_filter_len ); + vf_uv = (float*)malloc( sizeof(float)*(out_frame_h>>1) * v_filter_len ); + vi_y = (int*)malloc( sizeof(int)*out_frame_h ); + vi_uv = (int*)malloc( sizeof(int)*(out_frame_h>>1) ); + //get filter args + hb_set_gauss_interpolation( hf_y, hi_y, out_frame_w, in_frame_w, h_filter_len, 0 ); + hb_set_gauss_interpolation( hf_uv, hi_uv, out_frame_w>>1, in_frame_w>>1, h_filter_len, 0 ); + hb_set_gauss_interpolation( vf_y, vi_y, out_frame_h, in_frame_h, v_filter_len, 0 ); + hb_set_gauss_interpolation( vf_uv, vi_uv, out_frame_h>>1, in_frame_h>>1, v_filter_len, 0 ); + //create output buffer + if( !os->use_ocl_mem ) + { + CREATEBUF( os->h_in_buf, CL_MEM_ALLOC_HOST_PTR | CL_MEM_READ_ONLY, sizeof(uint8_t)* in_frame_w * in_frame_h*3/2 ); + } + CREATEBUF( os->h_out_buf, CL_MEM_WRITE_ONLY, sizeof(uint8_t) * out_frame_w * in_frame_h*3/2 ); + CREATEBUF( os->v_out_buf, CL_MEM_WRITE_ONLY, sizeof(uint8_t) * out_frame_w * out_frame_h*3/2 ); + //create horizontal filter buffer + CREATEBUF( os->h_coeff_y, CL_MEM_READ_ONLY, sizeof(float) * out_frame_w * h_filter_len ); + CREATEBUF( os->h_coeff_uv, CL_MEM_READ_ONLY, sizeof(float) * (out_frame_w>>1) * h_filter_len ); + CREATEBUF( os->h_index_y, CL_MEM_READ_ONLY, sizeof(int) * out_frame_w ); + CREATEBUF( os->h_index_uv, CL_MEM_READ_ONLY, sizeof(int) * (out_frame_w>>1) ); + OCLCHECK( clEnqueueWriteBuffer, kenv->command_queue, os->h_coeff_y, CL_TRUE, 0, sizeof(float) * out_frame_w * h_filter_len, hf_y, 0, NULL, NULL ); + OCLCHECK( clEnqueueWriteBuffer, kenv->command_queue, os->h_coeff_uv, CL_TRUE, 0, sizeof(float) * (out_frame_w>>1) * h_filter_len, hf_uv, 0, NULL, NULL ); + OCLCHECK( clEnqueueWriteBuffer, kenv->command_queue, os->h_index_y, CL_TRUE, 0, sizeof(int) * out_frame_w, hi_y, 0, NULL, NULL ); + OCLCHECK( clEnqueueWriteBuffer, kenv->command_queue, os->h_index_uv, CL_TRUE, 0, sizeof(int) * (out_frame_w>>1), hi_uv, 0, NULL, NULL ); + //create vertical filter buffer + CREATEBUF( os->v_coeff_y, CL_MEM_READ_ONLY, sizeof(float) * out_frame_h * v_filter_len ); + CREATEBUF( os->v_coeff_uv, CL_MEM_READ_ONLY, sizeof(float) * (out_frame_h>>1) * v_filter_len ); + CREATEBUF( os->v_index_y, CL_MEM_READ_ONLY, sizeof(int) * out_frame_h ); + CREATEBUF( os->v_index_uv, CL_MEM_READ_ONLY, sizeof(int) * (out_frame_h>>1) ); + OCLCHECK( clEnqueueWriteBuffer, kenv->command_queue, os->v_coeff_y, CL_TRUE, 0, sizeof(float) * out_frame_h * v_filter_len, vf_y, 0, NULL, NULL ); + OCLCHECK( clEnqueueWriteBuffer, kenv->command_queue, os->v_coeff_uv, CL_TRUE, 0, sizeof(float) * (out_frame_h>>1) * v_filter_len, vf_uv, 0, NULL, NULL ); + OCLCHECK( clEnqueueWriteBuffer, kenv->command_queue, os->v_index_y, CL_TRUE, 0, sizeof(int) * out_frame_h, vi_y, 0, NULL, NULL ); + OCLCHECK( clEnqueueWriteBuffer, kenv->command_queue, os->v_index_uv, CL_TRUE, 0, sizeof(int) * (out_frame_h>>1), vi_uv, 0, NULL, NULL ); + //create horizontal kernel + os->h_kernel = clCreateKernel( kenv->program, "frame_h_scale", NULL ); + OCLCHECK( clSetKernelArg, os->h_kernel, 1, sizeof(cl_mem), &os->h_coeff_y ); + OCLCHECK( clSetKernelArg, os->h_kernel, 2, sizeof(cl_mem), &os->h_coeff_uv ); + OCLCHECK( clSetKernelArg, os->h_kernel, 3, sizeof(cl_mem), &os->h_index_y ); + OCLCHECK( clSetKernelArg, os->h_kernel, 4, sizeof(cl_mem), &os->h_index_uv ); + OCLCHECK( clSetKernelArg, os->h_kernel, 6, sizeof(int), &in_frame_w ); + OCLCHECK( clSetKernelArg, os->h_kernel, 7, sizeof(int), &h_filter_len ); + //create vertical kernel + os->v_kernel = clCreateKernel( kenv->program, "frame_v_scale", NULL ); + OCLCHECK( clSetKernelArg, os->v_kernel, 1, sizeof(cl_mem), &os->v_coeff_y ); + OCLCHECK( clSetKernelArg, os->v_kernel, 2, sizeof(cl_mem), &os->v_coeff_uv ); + OCLCHECK( clSetKernelArg, os->v_kernel, 3, sizeof(cl_mem), &os->v_index_y ); + OCLCHECK( clSetKernelArg, os->v_kernel, 4, sizeof(cl_mem), &os->v_index_uv ); + OCLCHECK( clSetKernelArg, os->v_kernel, 6, sizeof(int), &in_frame_h ); + OCLCHECK( clSetKernelArg, os->v_kernel, 7, sizeof(int), &v_filter_len ); + free( hf_y ); + free( hf_uv ); + free( vf_y ); + free( vf_uv ); + free( hi_y ); + free( hi_uv ); + free( vi_y ); + free( vi_uv ); + } + //start horizontal scaling kernel + + if( !os->use_ocl_mem ) + { + if( kenv->isAMD ) + { + char *mapped = clEnqueueMapBuffer( kenv->command_queue, os->h_in_buf, CL_TRUE, CL_MAP_WRITE_INVALIDATE_REGION, 0, sizeof(uint8_t) * in_frame_w * in_frame_h*3/2, 0, NULL, NULL, NULL ); + memcpy( mapped, in_frame, sizeof(uint8_t) * in_frame_w * in_frame_h*3/2 ); + clEnqueueUnmapMemObject( kenv->command_queue, os->h_in_buf, mapped, 0, NULL, NULL ); + } + else + { + OCLCHECK( clEnqueueWriteBuffer, kenv->command_queue, os->h_in_buf, CL_TRUE, 0, sizeof(uint8_t) * in_frame_w * in_frame_h * 3/2, in_frame, 0, NULL, NULL ); + } + } + + kenv->kernel = os->h_kernel; + size_t dims[2]; + dims[0] = out_frame_w; + dims[1] = in_frame_h; + OCLCHECK( clSetKernelArg, kenv->kernel, 0, sizeof(cl_mem), &os->h_in_buf ); + OCLCHECK( clSetKernelArg, kenv->kernel, 5, sizeof(cl_mem), &os->h_out_buf ); + OCLCHECK( clEnqueueNDRangeKernel, kenv->command_queue, kenv->kernel, 2, NULL, dims, NULL, 0, NULL, NULL ); + //start vertical scaling kernel + + kenv->kernel = os->v_kernel; + dims[0] = out_frame_w; + dims[1] = out_frame_h; + OCLCHECK( clSetKernelArg, kenv->kernel, 0, sizeof(cl_mem), &os->h_out_buf ); + OCLCHECK( clSetKernelArg, kenv->kernel, 5, sizeof(cl_mem), &os->v_out_buf ); + OCLCHECK( clEnqueueNDRangeKernel, kenv->command_queue, kenv->kernel, 2, NULL, dims, NULL, 0, NULL, NULL ); + OCLCHECK( clEnqueueReadBuffer, kenv->command_queue, os->v_out_buf, CL_TRUE, 0, sizeof(uint8_t) * out_frame_w * out_frame_h * 3/2, out_frame, 0, NULL, NULL ); + + return 1; +} +/** +* function describe: this function is used to scaling video frame. it uses the gausi scaling algorithm +* parameter: +* inputFrameBuffer: the source video frame opencl buffer +* outputdata: the destination video frame buffer +* inputWidth: the width of the source video frame +* inputHeight: the height of the source video frame +* outputWidth: the width of destination video frame +* outputHeight: the height of destination video frame +*/ +int hb_ocl_scale( cl_mem in_buf, uint8_t *in_data, uint8_t *out_data, int in_w, int in_h, int out_w, int out_h, hb_oclscale_t *os ) +{ + void *data[7]; + static int init_flag = 0; + if( init_flag==0 ) + { + int st = hb_register_kernel_wrapper( "frame_h_scale", hb_ocl_scale_func ); + if( !st ) + { + printf( "register kernel[%s] faild\n", "frame_h_scale" ); + return 0; + } + init_flag++; + } + if( in_data==NULL ) + { + data[0] = in_buf; + os->use_ocl_mem = 1; + } + else + { + data[0] = in_data; + os->use_ocl_mem = 0; + } + data[1] = out_data; + data[2] = (void*)in_w; + data[3] = (void*)in_h; + data[4] = (void*)out_w; + data[5] = (void*)out_h; + data[6] = os; + if( !hb_run_kernel( "frame_h_scale", data ) ) + printf( "run kernel[%s] faild\n", "frame_scale" ); + return 0; +} +#endif diff --git a/libhb/openclkernels.h b/libhb/openclkernels.h new file mode 100644 index 000000000..0ab3014ab --- /dev/null +++ b/libhb/openclkernels.h @@ -0,0 +1,122 @@ +#ifndef USE_EXTERNAL_KERNEL + +#define KERNEL( ... )# __VA_ARGS__ + +char *kernel_src_hscale = KERNEL( + + typedef unsigned char fixed8; + + kernel void frame_h_scale( + global fixed8 *src, + global float *hf_Y, + global float *hf_UV, + global int *hi_Y, + global int *hi_UV, + global fixed8 *dst, + int stride, //src_width + int filter_len + ) + { + int x = get_global_id( 0 ); + int y = get_global_id( 1 ); + int width = get_global_size( 0 ); + int height = get_global_size( 1 ); + float result_Y = 0, result_U = 0, result_V = 0; + int i = 0; + + global fixed8 *src_Y = src; + global fixed8 *src_U = src_Y+stride*height; + global fixed8 *src_V = src_U+(stride>>1)*(height>>1); + + global fixed8 *dst_Y = dst; + global fixed8 *dst_U = dst_Y+width*height; + global fixed8 *dst_V = dst_U+(width>>1)*(height>>1); + + int xy = y * width + x; + global fixed8 *rowdata_Y = src_Y+(y * stride); + for( int i = 0; i>1) && x<(width>>1) ) + { + int xy = y * (width>>1) + x; + global fixed8 *rowdata_U = src_U+(y * (stride>>1)); + global fixed8 *rowdata_V = src_V+(y * (stride>>1)); + for( i = 0; i>1)] * rowdata_U[hi_UV[x] + i]); + result_V += ( hf_UV[x+i*(width>>1)] * rowdata_V[hi_UV[x] + i]); + } + dst_U[xy] = result_U; + dst_V[xy] = result_V; + } + } + ); + +char *kernel_src_vscale = KERNEL( + + kernel void frame_v_scale( + global fixed8 *src, + global float *vf_Y, + global float *vf_UV, + global int *vi_Y, + global int *vi_UV, + global fixed8 *dst, + int src_height, + int filter_len + ) + { + int x = get_global_id( 0 ); + int y = get_global_id( 1 ); + int width = get_global_size( 0 ); + int height = get_global_size( 1 ); + float result_Y = 0, result_U = 0, result_V = 0; + int i = 0; + + global fixed8 *src_Y = src; + global fixed8 *src_U = src_Y+src_height*width; + global fixed8 *src_V = src_U+(src_height>>1)*(width>>1); + + global fixed8 *dst_Y = dst; + global fixed8 *dst_U = dst_Y+height*width; + global fixed8 *dst_V = dst_U+(height>>1)*(width>>1); + + int xy = y * width + x; + for( i = 0; i>1) && x<(width>>1) ) + { + int xy = y * (width>>1) + x; + for( i = 0; i>1)] * src_U[(vi_UV[y] + i) * (width>>1) + x]; + result_V += vf_UV[y+i*(height>>1)] * src_V[(vi_UV[y] + i) * (width>>1) + x]; + } + dst_U[xy] = result_U; + dst_V[xy] = result_V; + } + } + ); + +char *kernel_src_nvtoyuv = KERNEL( + + kernel void nv12toyuv( global char *input, global char* output, int w, int h ) + { + int x = get_global_id( 0 ); + int y = get_global_id( 1 ); + int idx = y * (w>>1) + x; + vstore4((vload4( 0, input+(idx<<2))), 0, output+(idx<<2)); //Y + char2 uv = vload2( 0, input+(idx<<1)+w*h ); + output[idx+w*h] = uv.s0; + output[idx+w*h+((w*h)>>2)] = uv.s1; + } + ); + +#endif diff --git a/libhb/openclwrapper.c b/libhb/openclwrapper.c new file mode 100644 index 000000000..7e0195a63 --- /dev/null +++ b/libhb/openclwrapper.c @@ -0,0 +1,934 @@ + +/* openclwrapper.c + + Copyright (c) 2003-2012 HandBrake Team + This file is part of the HandBrake source code + Homepage: . + It may be used under the terms of the GNU General Public License v2. + For full terms see the file COPYING file or visit http://www.gnu.org/licenses/gpl-2.0.html + + Authors: Peng Gao + Li Cao + */ +#ifdef USE_OPENCL + +#include +#include +#include +#include +#include "openclwrapper.h" +#include "openclkernels.h" + +//#define USE_EXTERNAL_KERNEL + +#if defined(__APPLE__) +#include +#else +#include +#endif + +#if defined(_MSC_VER) +#define strcasecmp strcmpi +#endif + +#define MAX_KERNEL_STRING_LEN 64 +#define MAX_CLFILE_NUM 50 +#define MAX_CLKERNEL_NUM 200 +#define MAX_CLFILE_PATH 255 +#define MAX_KERNEL_NUM 50 +#define MAX_KERNEL_NAME_LEN 64 + +#ifndef INVALID_HANDLE_VALUE +#define INVALID_HANDLE_VALUE NULL +#endif + +//#define THREAD_PRIORITY_TIME_CRITICAL 15 + +enum VENDOR{ + AMD = 0, + Intel, + NVIDIA, + others +}; +typedef struct _GPUEnv +{ + //share vb in all modules in hb library + cl_platform_id platform; + cl_device_type dType; + cl_context context; + cl_device_id * devices; + cl_device_id dev; + cl_command_queue command_queue; + cl_kernel kernels[MAX_CLFILE_NUM]; + cl_program programs[MAX_CLFILE_NUM]; //one program object maps one kernel source file + char kernelSrcFile[MAX_CLFILE_NUM][256]; //the max len of kernel file name is 256 + int file_count; // only one kernel file + + char kernel_names[MAX_CLKERNEL_NUM][MAX_KERNEL_STRING_LEN+1]; + cl_kernel_function kernel_functions[MAX_CLKERNEL_NUM]; + int kernel_count; + int isUserCreated; // 1: created , 0:no create and needed to create by opencl wrapper + enum VENDOR vendor; +}GPUEnv; + +typedef struct +{ + char kernelName[MAX_KERNEL_NAME_LEN+1]; + char * kernelStr; +}hb_kernel_node; + +static GPUEnv gpu_env; +static int isInited = 0; +static hb_kernel_node gKernels[MAX_KERNEL_NUM]; + +#define ADD_KERNEL_CFG( idx, s, p ){\ + strcpy( gKernels[idx].kernelName, s );\ + gKernels[idx].kernelStr = p;\ + strcpy( gpu_env.kernel_names[idx], s );\ + gpu_env.kernel_count++; } + + +int hb_regist_opencl_kernel() +{ + if( !gpu_env.isUserCreated ) + memset( &gpu_env, 0, sizeof(gpu_env)); + + gpu_env.file_count = 0; //argc; + gpu_env.kernel_count = 0UL; + + ADD_KERNEL_CFG( 0, "frame_h_scale", NULL ) + ADD_KERNEL_CFG( 1, "frame_v_scale", NULL ) + ADD_KERNEL_CFG( 2, "nv12toyuv", NULL ) + + return 0; +} + +int hb_convert_to_string( + const char *filename, + char **source, + GPUEnv *gpu_info, + int idx ) +{ + int file_size; + size_t result; + FILE * file = NULL; + file_size = 0; + result = 0; + file = fopen( filename, "rb+" ); + + + if( file!=NULL ) + { + fseek( file, 0, SEEK_END ); + + file_size = ftell( file ); + rewind( file ); + *source = (char*)malloc( sizeof(char) * file_size + 1 ); + if( *source == (char*)NULL ) + { + return(0); + } + result = fread( *source, 1, file_size, file ); + if( result != file_size ) + { + free( *source ); + return(0); + } + (*source)[file_size] = '\0'; + fclose( file ); + + return(1); + } + return(0); +} + + + +int hb_binary_generated( cl_context context, const char * cl_file_name, FILE ** fhandle ) +{ + int i = 0; + cl_int status; + size_t numDevices; + cl_device_id *devices; + char * str = NULL; + FILE * fd = NULL; + + status = clGetContextInfo( context, + CL_CONTEXT_NUM_DEVICES, + sizeof(numDevices), + &numDevices, + NULL ); + if( status != CL_SUCCESS ) + { + hb_log( "ERROR: hb_binary_generated: Get context info failed\n" ); + return 0; + } + + devices = (cl_device_id*)malloc( sizeof(cl_device_id) * numDevices ); + if( devices == NULL ) + { + hb_log( "hb_binary_generated: No device found\n" ); + return 0; + } + + /* grab the handles to all of the devices in the context. */ + status = clGetContextInfo( context, + CL_CONTEXT_DEVICES, + sizeof(cl_device_id) * numDevices, + devices, + NULL ); + + status = 0; + /* dump out each binary into its own separate file. */ + for( i = 0; i < numDevices; i++ ) + { + char fileName[256] = {0}; + char cl_name[128] = {0}; + if( devices[i] != 0 ) + { + char deviceName[1024]; + status = clGetDeviceInfo( devices[i], + CL_DEVICE_NAME, + sizeof(deviceName), + deviceName, + NULL ); + str = (char*)strstr( cl_file_name, (char*)".cl" ); + memcpy( cl_name, cl_file_name, str - cl_file_name ); + cl_name[str - cl_file_name] = '\0'; + sprintf( fileName, "./%s-%s.bin", cl_name, deviceName ); + fd = fopen( fileName, "rb" ); + status = (fd != NULL) ? 1 : 0; + } + } + + if( devices != NULL ) + { + free( devices ); + devices = NULL; + } + + if( fd != NULL ) + *fhandle = fd; + + return status; +} + +int hb_write_binary_to_file( const char* fileName, const char* birary, size_t numBytes ) +{ + FILE *output = NULL; + output = fopen( fileName, "wb" ); + if( output == NULL ) + return 0; + + fwrite( birary, sizeof(char), numBytes, output ); + fclose( output ); + + return 1; +} + + +int hb_generat_bin_from_kernel_source( cl_program program, const char * cl_file_name ) +{ + int i = 0; + cl_int status; + size_t *binarySizes, numDevices; + cl_device_id *devices; + char **binaries; + char *str = NULL; + + status = clGetProgramInfo( program, + CL_PROGRAM_NUM_DEVICES, + sizeof(numDevices), + &numDevices, + NULL ); + if( status != CL_SUCCESS ) + { + hb_log( "ERROR: hb_generat_bin_from_kernel_source: Get program info failed\n" ); + return 0; + } + devices = (cl_device_id*)malloc( sizeof(cl_device_id) * numDevices ); + if( devices == NULL ) + { + hb_log( "ERROR: hb_generat_bin_from_kernel_source: No device found\n" ); + return 0; + } + /* grab the handles to all of the devices in the program. */ + status = clGetProgramInfo( program, + CL_PROGRAM_DEVICES, + sizeof(cl_device_id) * numDevices, + devices, + NULL ); + if( status != CL_SUCCESS ) + { + hb_log( "ERROR: hb_generat_bin_from_kernel_source: Get program info failed\n" ); + return 0; + } + /* figure out the sizes of each of the binaries. */ + binarySizes = (size_t*)malloc( sizeof(size_t) * numDevices ); + + status = clGetProgramInfo( program, + CL_PROGRAM_BINARY_SIZES, + sizeof(size_t) * numDevices, + binarySizes, NULL ); + if( status != CL_SUCCESS ) + { + hb_log( "ERROR: hb_generat_bin_from_kernel_source: Get program info failed\n" ); + return 0; + } + /* copy over all of the generated binaries. */ + binaries = (char**)malloc( sizeof(char *) * numDevices ); + if( binaries == NULL ) + { + hb_log( "ERROR: hb_generat_bin_from_kernel_source: malloc for binaries failed\n" ); + return 0; + } + + for( i = 0; i < numDevices; i++ ) + { + if( binarySizes[i] != 0 ) + { + binaries[i] = (char*)malloc( sizeof(char) * binarySizes[i] ); + if( binaries[i] == NULL ) + { + hb_log( "ERROR: hb_generat_bin_from_kernel_source: malloc for binary[%d] failed\n", i ); + return 0; + } + } + else + { + binaries[i] = NULL; + } + } + + status = clGetProgramInfo( program, + CL_PROGRAM_BINARIES, + sizeof(char *) * numDevices, + binaries, + NULL ); + if( status != CL_SUCCESS ) + { + hb_log( "ERROR: hb_generat_bin_from_kernel_source: Get program info failed\n" ); + return 0; + } + /* dump out each binary into its own separate file. */ + for( i = 0; i < numDevices; i++ ) + { + char fileName[256] = {0}; + char cl_name[128] = {0}; + if( binarySizes[i] != 0 ) + { + char deviceName[1024]; + status = clGetDeviceInfo( devices[i], + CL_DEVICE_NAME, + sizeof(deviceName), + deviceName, + NULL ); + + str = (char*)strstr( cl_file_name, (char*)".cl" ); + memcpy( cl_name, cl_file_name, str - cl_file_name ); + cl_name[str - cl_file_name] = '\0'; + sprintf( fileName, "./%s-%s.bin", cl_name, deviceName ); + + if( !hb_write_binary_to_file( fileName, binaries[i], binarySizes[i] )) + { + hb_log( "ERROR: hb_generat_bin_from_kernel_source: write binary[%s] failed\n", fileName ); + //printf( "opencl-wrapper: write binary[%s] failds\n", fileName); + return 0; + } //else + //printf( "opencl-wrapper: write binary[%s] succesfully\n", fileName); + } + } + + // Release all resouces and memory + for( i = 0; i < numDevices; i++ ) + { + if( binaries[i] != NULL ) + { + free( binaries[i] ); + binaries[i] = NULL; + } + } + + if( binaries != NULL ) + { + free( binaries ); + binaries = NULL; + } + + if( binarySizes != NULL ) + { + free( binarySizes ); + binarySizes = NULL; + } + + if( devices != NULL ) + { + free( devices ); + devices = NULL; + } + return 1; +} + + +int hb_init_opencl_attr( OpenCLEnv * env ) +{ + if( gpu_env.isUserCreated ) + return 1; + + gpu_env.context = env->context; + gpu_env.platform = env->platform; + gpu_env.dev = env->devices; + gpu_env.command_queue = env->command_queue; + + gpu_env.isUserCreated = 1; + + return 0; +} + + +int hb_create_kernel( char * kernelname, KernelEnv * env ) +{ + int status; + env->kernel = clCreateKernel( gpu_env.programs[0], kernelname, &status ); + env->context = gpu_env.context; + env->command_queue = gpu_env.command_queue; + return status != CL_SUCCESS ? 1 : 0; +} + +int hb_release_kernel( KernelEnv * env ) +{ + int status = clReleaseKernel( env->kernel ); + return status != CL_SUCCESS ? 1 : 0; +} + + + +int hb_init_opencl_env( GPUEnv *gpu_info ) +{ + size_t length; + cl_int status; + cl_uint numPlatforms, numDevices; + cl_platform_id *platforms; + cl_context_properties cps[3]; + char platformName[100]; + unsigned int i; + void *handle = INVALID_HANDLE_VALUE; + + /* + * Have a look at the available platforms. + */ + if( !gpu_info->isUserCreated ) + { + status = clGetPlatformIDs( 0, NULL, &numPlatforms ); + if( status != CL_SUCCESS ) + { + hb_log( "ERROR: OpenCL device platform not found.\n" ); + return(1); + } + gpu_info->platform = NULL; + if( 0 < numPlatforms ) + { + platforms = (cl_platform_id*)malloc( + numPlatforms * sizeof(cl_platform_id)); + if( platforms == (cl_platform_id*)NULL ) + { + return(1); + } + status = clGetPlatformIDs( numPlatforms, platforms, NULL ); + + if( status != CL_SUCCESS ) + { + hb_log( "ERROR: Specific opencl platform not found.\n" ); + return(1); + } + + for( i = 0; i < numPlatforms; i++ ) + { + status = clGetPlatformInfo( platforms[i], CL_PLATFORM_VENDOR, + sizeof(platformName), platformName, + NULL ); + + if( status != CL_SUCCESS ) + { + hb_log( "ERROR: No more platform vendor info.\n" ); + return(1); + } + gpu_info->platform = platforms[i]; + + if( !strcmp( platformName, "Advanced Micro Devices, Inc." )) + gpu_info->vendor = AMD; + else + gpu_info->vendor = others; + + gpu_info->platform = platforms[i]; + + status = clGetDeviceIDs( gpu_info->platform /* platform */, + CL_DEVICE_TYPE_GPU /* device_type */, + 0 /* num_entries */, + NULL /* devices */, + &numDevices ); + + if( status != CL_SUCCESS ) + { + hb_log( "ERROR: No available GPU device.\n" ); + return(1); + } + + if( numDevices ) + break; + + } + free( platforms ); + } + if( NULL == gpu_info->platform ) + { + return(1); + } + + /* + * Use available platform. + */ + cps[0] = CL_CONTEXT_PLATFORM; + cps[1] = (cl_context_properties)gpu_info->platform; + cps[2] = 0; + /* Check for GPU. */ + gpu_info->dType = CL_DEVICE_TYPE_GPU; + gpu_info->context = clCreateContextFromType( + cps, gpu_info->dType, NULL, NULL, &status ); + if((gpu_info->context == (cl_context)NULL) || (status != CL_SUCCESS)) + { + gpu_info->dType = CL_DEVICE_TYPE_CPU; + gpu_info->context = clCreateContextFromType( + cps, gpu_info->dType, NULL, NULL, &status ); + } + if((gpu_info->context == (cl_context)NULL) || (status != CL_SUCCESS)) + { + gpu_info->dType = CL_DEVICE_TYPE_DEFAULT; + gpu_info->context = clCreateContextFromType( + cps, gpu_info->dType, NULL, NULL, &status ); + } + if((gpu_info->context == (cl_context)NULL) || (status != CL_SUCCESS)) + { + hb_log( "ERROR: Create opencl context error.\n" ); + return(1); + } + /* Detect OpenCL devices. */ + /* First, get the size of device list data */ + status = clGetContextInfo( gpu_info->context, CL_CONTEXT_DEVICES, + 0, NULL, &length ); + if((status != CL_SUCCESS) || (length == 0)) + { + hb_log( "ERROR: Get the list of devices in context error.\n" ); + return(1); + } + /* Now allocate memory for device list based on the size we got earlier */ + gpu_info->devices = (cl_device_id*)malloc( length ); + if( gpu_info->devices == (cl_device_id*)NULL ) + { + return(1); + } + /* Now, get the device list data */ + status = clGetContextInfo( gpu_info->context, CL_CONTEXT_DEVICES, length, + gpu_info->devices, NULL ); + if( status != CL_SUCCESS ) + { + hb_log( "ERROR: Get the device list data in context error.\n" ); + return(1); + } + + /* Create OpenCL command queue. */ + gpu_info->command_queue = clCreateCommandQueue( gpu_info->context, + gpu_info->devices[0], + 0, &status ); + if( status != CL_SUCCESS ) + { + hb_log( "ERROR: Create opencl command queue error.\n" ); + return(1); + } + } + + /* Create OpenCL command queue. */ + /*if(!gpu_info->isUserCreated) + gpu_info->command_queue = clCreateCommandQueue(gpu_info->context, + gpu_info->devices[0], + 0, &status); + else + gpu_info->command_queue = clCreateCommandQueue(gpu_info->context, + gpu_info->dev, + 0, &status); + + if ((gpu_info->command_queue == (cl_command_queue) NULL)) + return(1); + */ + + + if( clGetCommandQueueInfo( gpu_info->command_queue, + CL_QUEUE_THREAD_HANDLE_AMD, sizeof(handle), + &handle, NULL ) == CL_SUCCESS && handle != INVALID_HANDLE_VALUE ) + { + SetThreadPriority( handle, THREAD_PRIORITY_TIME_CRITICAL ); + } + + return 0; +} + + +int hb_release_opencl_env( GPUEnv *gpu_info ) +{ + if( !isInited ) + return 1; + int i; + + for( i = 0; iisUserCreated = 0; + return 1; +} + + +int hb_register_kernel_wrapper( const char *kernel_name, cl_kernel_function function ) +{ + int i; + for( i = 0; i < gpu_env.kernel_count; i++ ) + { + if( strcasecmp( kernel_name, gpu_env.kernel_names[i] )==0 ) + { + gpu_env.kernel_functions[i] = function; + return(1); + } + } + return(0); +} + +int hb_cached_of_kerner_prg( const GPUEnv *gpu_env, const char * cl_file_name ) +{ + int i; + for( i = 0; i < gpu_env->file_count; i++ ) + { + if( strcasecmp( gpu_env->kernelSrcFile[i], cl_file_name )==0 ) + { + if( gpu_env->programs[i] != NULL ) + return(1); + } + } + + return(0); +} + +int hb_compile_kernel_file( const char *filename, GPUEnv *gpu_info, + int indx, const char *build_option ) +{ + cl_int status; + size_t length; + char *source_str; + const char *source; + size_t source_size[1]; + char *buildLog = NULL; + int b_error, binary_status, binaryExisted; + char * binary; + size_t numDevices; + cl_device_id *devices; + FILE * fd; + FILE * fd1; + int idx; + + if( hb_cached_of_kerner_prg( gpu_info, filename ) == 1 ) + return (1); + + idx = gpu_info->file_count; + +#ifdef USE_EXTERNAL_KERNEL + status = hb_convert_to_string( filename, &source_str, gpu_info, idx ); +#else + int kernel_src_size = strlen( kernel_src_hscale )+strlen( kernel_src_vscale )+strlen( kernel_src_nvtoyuv ); + source_str = (char*)malloc( kernel_src_size+2 ); + strcpy( source_str, kernel_src_hscale ); + strcat( source_str, kernel_src_vscale ); + strcat( source_str, kernel_src_nvtoyuv ); +#endif + + if( status == 0 ) + return(0); + + source = source_str; + source_size[0] = strlen( source ); + + binaryExisted = 0; + if((binaryExisted = hb_binary_generated( gpu_info->context, filename, &fd )) == 1 ) + { + status = clGetContextInfo( gpu_info->context, + CL_CONTEXT_NUM_DEVICES, + sizeof(numDevices), + &numDevices, + NULL ); + if( status != CL_SUCCESS ){ + hb_log( "ERROR: Get the number of devices in context error.\n" ); + return 0; + } + + devices = (cl_device_id*)malloc( sizeof(cl_device_id) * numDevices ); + if( devices == NULL ) + return 0; + + b_error = 0; + length = 0; + b_error |= fseek( fd, 0, SEEK_END ) < 0; + b_error |= ( length = ftell( fd ) ) <= 0; + b_error |= fseek( fd, 0, SEEK_SET ) < 0; + if( b_error ) + return 0; + + binary = (char*)malloc( length+2 ); + if( !binary ) + return 0; + + memset( binary, 0, length+2 ); + b_error |= fread( binary, 1, length, fd ) != length; + if( binary[length-1] != '\n' ) + binary[length++] = '\n'; + + fclose( fd ); + fd = NULL; + /* grab the handles to all of the devices in the context. */ + status = clGetContextInfo( gpu_info->context, + CL_CONTEXT_DEVICES, + sizeof(cl_device_id) * numDevices, + devices, + NULL ); + + gpu_info->programs[idx] = clCreateProgramWithBinary( gpu_info->context, + numDevices, + devices, + &length, + (const unsigned char**)&binary, + &binary_status, + &status ); + + free( devices ); + devices = NULL; + } + else + { + /* create a CL program using the kernel source */ + gpu_info->programs[idx] = clCreateProgramWithSource( + gpu_info->context, 1, &source, source_size, &status ); + } + + if((gpu_info->programs[idx] == (cl_program)NULL) || (status != CL_SUCCESS)){ + hb_log( "ERROR: Get list of devices in context error.\n" ); + return(0); + } + + /* create a cl program executable for all the devices specified */ + if( !gpu_info->isUserCreated ) + status = clBuildProgram( gpu_info->programs[idx], 1, gpu_info->devices, + build_option, NULL, NULL ); + else + status = clBuildProgram( gpu_info->programs[idx], 1, &(gpu_info->dev), + build_option, NULL, NULL ); + + if( status != CL_SUCCESS ) + { + if( !gpu_info->isUserCreated ) + status = clGetProgramBuildInfo( gpu_info->programs[idx], + gpu_info->devices[0], + CL_PROGRAM_BUILD_LOG, 0, NULL, &length ); + else + status = clGetProgramBuildInfo( gpu_info->programs[idx], + gpu_info->dev, + CL_PROGRAM_BUILD_LOG, 0, NULL, &length ); + + if( status != CL_SUCCESS ) + { + hb_log( "ERROR: Get GPU build information error.\n" ); + return(0); + } + buildLog = (char*)malloc( length ); + if( buildLog == (char*)NULL ) + { + return(0); + } + if( !gpu_info->isUserCreated ) + status = clGetProgramBuildInfo( gpu_info->programs[idx], gpu_info->devices[0], + CL_PROGRAM_BUILD_LOG, length, buildLog, &length ); + else + status = clGetProgramBuildInfo( gpu_info->programs[idx], gpu_info->dev, + CL_PROGRAM_BUILD_LOG, length, buildLog, &length ); + + fd1 = fopen( "kernel-build.log", "w+" ); + if( fd1 != NULL ) { + fwrite( buildLog, sizeof(char), length, fd1 ); + fclose( fd1 ); + } + + free( buildLog ); + return(0); + } + + strcpy( gpu_env.kernelSrcFile[idx], filename ); + + if( binaryExisted == 0 ) + hb_generat_bin_from_kernel_source( gpu_env.programs[idx], filename ); + + gpu_info->file_count += 1; + + return(1); +} + + +int hb_get_kernel_env_and_func( const char *kernel_name, + KernelEnv *env, + cl_kernel_function *function ) +{ + int i; //,program_idx ; + for( i = 0; i < gpu_env.kernel_count; i++ ) + { + if( strcasecmp( kernel_name, gpu_env.kernel_names[i] )==0 ) + { + //program_idx = 0; + //GetProgramIndex(i, &gpu_env, &program_idx); + env->context = gpu_env.context; + env->command_queue = gpu_env.command_queue; + env->program = gpu_env.programs[0]; + env->kernel = gpu_env.kernels[i]; + env->isAMD = ( gpu_env.vendor == AMD ) ? 1 : 0; + *function = gpu_env.kernel_functions[i]; + return(1); + } + } + return(0); +} + + +int hb_run_kernel( const char *kernel_name, void **userdata ) +{ + KernelEnv env; + cl_kernel_function function; + int status; + memset( &env, 0, sizeof(KernelEnv)); + status = hb_get_kernel_env_and_func( kernel_name, &env, &function ); + strcpy( env.kernel_name, kernel_name ); + if( status == 1 ) + return(function( userdata, &env )); + return(0); +} + + +int hb_init_opencl_run_env( int argc, char **argv, const char *build_option ) +{ + int status = 0; + if( MAX_CLKERNEL_NUM <= 0 ) + return 1; + if((argc > MAX_CLFILE_NUM) || (argc<0)) + return 1; + + if( !isInited ) + { + hb_regist_opencl_kernel(); + + /*initialize devices, context, comand_queue*/ + status = hb_init_opencl_env( &gpu_env ); + if( status ) + return(1); + + /*initialize program, kernel_name, kernel_count*/ + //file_name = argv[i]; + status = hb_compile_kernel_file( "hb-kernels.cl", &gpu_env, 0, build_option ); + + if( status == 0 || gpu_env.kernel_count == 0 ) + { + return(1); + + } + + isInited = 1; + } + + return(0); +} + + +int hb_release_opencl_run_env() +{ + return hb_release_opencl_env( &gpu_env ); +} + + +int hb_opencl_stats() +{ + return isInited; +} + +int hb_get_opencl_env() +{ + int i = 0; + cl_int status; + size_t numDevices; + cl_device_id *devices; + /*initialize devices, context, comand_queue*/ + status = hb_init_opencl_env( &gpu_env ); + if( status ) + return(1); + status = clGetContextInfo( gpu_env.context, + CL_CONTEXT_NUM_DEVICES, + sizeof(numDevices), + &numDevices, + NULL ); + if( status != CL_SUCCESS ) + return 0; + devices = (cl_device_id*)malloc( sizeof(cl_device_id) * numDevices ); + if( devices == NULL ) + return 0; + /* grab the handles to all of the devices in the context. */ + status = clGetContextInfo( gpu_env.context, + CL_CONTEXT_DEVICES, + sizeof(cl_device_id) * numDevices, + devices, + NULL ); + status = 0; + /* dump out each binary into its own separate file. */ + for( i = 0; i < numDevices; i++ ) + { + if( devices[i] != 0 ) + { + char deviceName[1024]; + status = clGetDeviceInfo( devices[i], + CL_DEVICE_NAME, + sizeof(deviceName), + deviceName, + NULL ); + hb_log( "GPU Device Name: %s", deviceName ); + char driverVersion[1024]; + status = clGetDeviceInfo( devices[i], + CL_DRIVER_VERSION, + sizeof(deviceName), + driverVersion, + NULL ); + hb_log( "GPU Driver Version: %s", driverVersion ); + } + } + if( devices != NULL ) + { + free( devices ); + devices = NULL; + } + return status; +} +#endif diff --git a/libhb/openclwrapper.h b/libhb/openclwrapper.h new file mode 100644 index 000000000..933e7a3b3 --- /dev/null +++ b/libhb/openclwrapper.h @@ -0,0 +1,79 @@ +/* openclwrapper.h + + Copyright (c) 2003-2012 HandBrake Team + This file is part of the HandBrake source code + Homepage: . + It may be used under the terms of the GNU General Public License v2. + For full terms see the file COPYING file or visit http://www.gnu.org/licenses/gpl-2.0.html + + Authors: Peng Gao + Li Cao + + + */ +#ifndef __OPENCL_WRAPPER_H +#define __OPENCL_WRAPPER_H +#include "common.h" + +//support AMD opencl +#define CL_QUEUE_THREAD_HANDLE_AMD 0x403E +#define CL_MAP_WRITE_INVALIDATE_REGION (1 << 2) + +typedef struct _KernelEnv +{ +#ifdef USE_OPENCL + cl_context context; + cl_command_queue command_queue; + cl_program program; + cl_kernel kernel; +#endif + char kernel_name[150]; + int isAMD; +}KernelEnv; + +typedef struct _OpenCLEnv +{ +#ifdef USE_OPENCL + cl_platform_id platform; + cl_context context; + cl_device_id devices; + cl_command_queue command_queue; +#endif +}OpenCLEnv; + + +//user defined, this is function wrapper which is used to set the input parameters , +//luanch kernel and copy data from GPU to CPU or CPU to GPU. +typedef int (*cl_kernel_function)( void **userdata, KernelEnv *kenv ); + +// registe a wapper for running the kernel specified by the kernel name +int hb_register_kernel_wrapper( const char *kernel_name, cl_kernel_function function ); + +// run kernel , user call this function to luanch kernel. +// kernel_name: this kernel name is used to find the kernel in opencl runtime environment +// userdata: this userdata is the all parameters for running the kernel specified by kernel name +int hb_run_kernel( const char *kernel_name, void **userdata ); + +// init the run time environment , this function must be called befor calling any function related to opencl +// the argc must be set zero , argv must be set NULL, build_option is the options for build the kernel. +int hb_init_opencl_run_env( int argc, char **argv, const char *build_option ); + + +//relase all resource about the opencl , this function must be called after calling any functions related to opencl +int hb_release_opencl_run_env(); + + +// get the opencl status , 0: not init ; 1, inited; this function is used the check whether or not the opencl run time has been created +int hb_opencl_stats(); + +// update opencl run time environments , such as commandqueue , platforme, context. program +int hb_init_opencl_attr( OpenCLEnv * env ); + +// create kernel object by a kernel name on the specified opencl run time indicated by env parameter +int hb_create_kernel( char * kernelname, KernelEnv * env ); + +// release kernel object which is generated by calling the hb_create_kernel api +int hb_release_kernel( KernelEnv * env ); + +int hb_get_opencl_env(); +#endif diff --git a/libhb/stream.c b/libhb/stream.c index 02a49e20c..bf3f726a6 100644 --- a/libhb/stream.c +++ b/libhb/stream.c @@ -16,6 +16,7 @@ #include "lang.h" #include "a52dec/a52.h" #include "libbluray/bluray.h" +#include "vadxva2.h" #define min(a, b) a < b ? a : b #define HB_MAX_PROBE_SIZE (1*1024*1024) @@ -609,6 +610,8 @@ static int hb_stream_get_type(hb_stream_t *stream) if ( fread(buf, 1, sizeof(buf), stream->file_handle) == sizeof(buf) ) { + if ( hb_get_gui_info(&hb_gui, 1) || (hb_get_gui_info(&hb_gui, 3) == 0) ) + return 0; int psize; if ( ( psize = hb_stream_check_for_ts(buf) ) != 0 ) { @@ -1096,7 +1099,24 @@ hb_title_t * hb_stream_title_scan(hb_stream_t *stream, hb_title_t * title) { hb_log( "transport stream missing PCRs - using video DTS instead" ); } - + if ( hb_get_gui_info(&hb_gui, 3) == 0 ) + { + hb_va_dxva2_t * dxva2 = NULL; + dxva2 = hb_va_create_dxva2( dxva2, title->video_codec_param ); + if (dxva2) + { + title->uvd_support = 1; + hb_va_close(dxva2); + dxva2 = NULL; + } + else + title->uvd_support = 0; +#ifdef USE_OPENCL + title->opencl_support = TestGPU(); +#else + title->opencl_support = 1; +#endif + } // Height, width, rate and aspect ratio information is filled in // when the previews are built return title; @@ -5624,6 +5644,24 @@ static hb_title_t *ffmpeg_title_scan( hb_stream_t *stream, hb_title_t *title ) hb_list_add( title->list_chapter, chapter ); } + if ( hb_get_gui_info(&hb_gui, 3) == 0 ) + { + hb_va_dxva2_t * dxva2 = NULL; + dxva2 = hb_va_create_dxva2( dxva2, title->video_codec_param ); + if (dxva2) + { + title->uvd_support = 1; + hb_va_close(dxva2); + dxva2 = NULL; + } + else + title->uvd_support = 0; +#ifdef USE_OPENCL + title->opencl_support = TestGPU(); +#else + title->opencl_support = 1; +#endif + } return title; } diff --git a/libhb/vadxva2.c b/libhb/vadxva2.c new file mode 100644 index 000000000..e31105f7f --- /dev/null +++ b/libhb/vadxva2.c @@ -0,0 +1,827 @@ +/* vadxva2.c + + Copyright (c) 2003-2012 HandBrake Team + This file is part of the HandBrake source code + Homepage: . + It may be used under the terms of the GNU General Public License v2. + For full terms see the file COPYING file or visit http://www.gnu.org/licenses/gpl-2.0.html + + Authors: Peng Gao + Li Cao + + */ + +#include "vadxva2.h" +#include "CL/cl.h" +#include "oclnv12toyuv.h" + +static int hb_va_setup( hb_va_dxva2_t *dxva2, void **hw, int width, int height ); +static int hb_va_get( hb_va_dxva2_t *dxva2, AVFrame *frame ); +static int hb_d3d_create_device( hb_va_dxva2_t *dxva2 ); +static void hb_d3d_destroy_device( hb_va_dxva2_t *dxvva2 ); +static int hb_d3d_create_device_manager( hb_va_dxva2_t *dxva2 ); +static void hb_d3d_destroy_device_manager( hb_va_dxva2_t *dxva2 ); +static int hb_dx_create_video_service( hb_va_dxva2_t *dxva2 ); +static void hb_dx_destroy_video_service( hb_va_dxva2_t *dxva2 ); +static int hb_dx_find_video_service_conversion( hb_va_dxva2_t *dxva2, GUID *input, D3DFORMAT *output ); +static int hb_dx_create_video_decoder( hb_va_dxva2_t *dxva2, int codec_id, const hb_title_t* fmt ); +static void hb_dx_create_video_conversion( hb_va_dxva2_t *dxva2 ); +static const hb_d3d_format_t *hb_d3d_find_format( D3DFORMAT format ); +static const hb_dx_mode_t *hb_dx_find_mode( const GUID *guid ); +static void hb_dx_destroy_video_decoder( hb_va_dxva2_t *dxva2 ); + +#ifdef USE_OPENCL +int TestGPU() +{ + int status = 1; + unsigned int i; + cl_uint numPlatforms = 0; + status = clGetPlatformIDs(0,NULL,&numPlatforms); + if(status != 0) + { + goto end; + } + if(numPlatforms > 0) + { + cl_platform_id* platforms = (cl_platform_id* )malloc (numPlatforms* sizeof(cl_platform_id)); + status = clGetPlatformIDs (numPlatforms, platforms,NULL); + if(status != 0) + { + goto end; + } + for (i=0; i < numPlatforms; i++) + { + char pbuff[100]; + cl_uint numDevices; + status = clGetPlatformInfo( + platforms[i], + CL_PLATFORM_VENDOR, + sizeof (pbuff), + pbuff, + NULL); + if (status) + continue; + status = clGetDeviceIDs(platforms[i], + CL_DEVICE_TYPE_GPU , + 0 , + NULL , + &numDevices); + if (status != CL_SUCCESS) + continue; + if(numDevices) + break; + } + free(platforms); + } + end: + return status; +} +#endif +/** + * It destroys a Direct3D device manager + */ +static void hb_d3d_destroy_device_manager( hb_va_dxva2_t *dxva2 ) +{ + if( dxva2->devmng ) + IDirect3DDeviceManager9_Release( dxva2->devmng ); +} +/** + * It releases a Direct3D device and its resources. + */ +static void hb_d3d_destroy_device( hb_va_dxva2_t *dxva2 ) +{ + if( dxva2->d3ddev ) + IDirect3DDevice9_Release( dxva2->d3ddev ); + if( dxva2->d3dobj ) + IDirect3D9_Release( dxva2->d3dobj ); +} +/** + * It destroys a DirectX video service + */ +static void hb_dx_destroy_video_service( hb_va_dxva2_t *dxva2 ) +{ + if( dxva2->device ) + IDirect3DDeviceManager9_CloseDeviceHandle( dxva2->devmng, dxva2->device ); + + if( dxva2->vs ) + IDirectXVideoDecoderService_Release( dxva2->vs ); +} + +static const hb_d3d_format_t *hb_d3d_find_format( D3DFORMAT format ) +{ + unsigned i; + for( i = 0; d3d_formats[i].name; i++ ) + { + if( d3d_formats[i].format == format ) + return &d3d_formats[i]; + } + return NULL; +} + +static void hb_dx_create_video_conversion( hb_va_dxva2_t *dxva2 ) +{ + switch( dxva2->render ) + { + case MAKEFOURCC( 'N', 'V', '1', '2' ): + dxva2->output = MAKEFOURCC( 'Y', 'V', '1', '2' ); + break; + default: + dxva2->output = dxva2->render; + break; + } +} + +void hb_va_release( hb_va_dxva2_t *dxva2, AVFrame *frame ) +{ + LPDIRECT3DSURFACE9 d3d = (LPDIRECT3DSURFACE9)(uintptr_t)frame->data[3]; + unsigned i; + for( i = 0; i < dxva2->surface_count; i++ ) + { + hb_va_surface_t *surface = &dxva2->surface[i]; + if( surface->d3d == d3d ) + surface->refcount--; + } +} + + +void hb_va_close( hb_va_dxva2_t *dxva2 ) +{ + hb_dx_destroy_video_decoder( dxva2 ); + hb_dx_destroy_video_service( dxva2 ); + hb_d3d_destroy_device_manager( dxva2 ); + hb_d3d_destroy_device( dxva2 ); + + if( dxva2->hdxva2_dll ) + FreeLibrary( dxva2->hdxva2_dll ); + if( dxva2->hd3d9_dll ) + FreeLibrary( dxva2->hd3d9_dll ); + +#ifdef USE_OPENCL + if ( dxva2->nv12toyuv_tmp_in ) + free( dxva2->nv12toyuv_tmp_in ); + if ( dxva2->nv12toyuv_tmp_out ) + free( dxva2->nv12toyuv_tmp_out ); +#endif + free( dxva2->description ); + free( dxva2 ); +} + +/** + * It creates a DXVA2 decoder using the given video format + */ +static int hb_dx_create_video_decoder( hb_va_dxva2_t *dxva2, int codec_id, const hb_title_t* fmt ) +{ + dxva2->width = fmt->width; + dxva2->height = fmt->height; + dxva2->surface_width = (fmt->width + 15) & ~15; + dxva2->surface_height = (fmt->height + 15) & ~15; + switch( codec_id ) + { + case CODEC_ID_H264: + dxva2->surface_count = 16 + 1; + break; + default: + dxva2->surface_count = 2 + 1; + break; + } + LPDIRECT3DSURFACE9 surface_list[VA_DXVA2_MAX_SURFACE_COUNT]; + if( FAILED( IDirectXVideoDecoderService_CreateSurface( dxva2->vs, + dxva2->surface_width, + dxva2->surface_height, + dxva2->surface_count - 1, + dxva2->render, + D3DPOOL_DEFAULT, + 0, + DXVA2_VideoDecoderRenderTarget, + surface_list, NULL ))) + { + hb_log( "dxva2:IDirectXVideoAccelerationService_CreateSurface failed\n" ); + dxva2->surface_count = 0; + return HB_WORK_ERROR; + } + + unsigned i; + for( i = 0; isurface_count; i++ ) + { + hb_va_surface_t *surface = &dxva2->surface[i]; + surface->d3d = surface_list[i]; + surface->refcount = 0; + surface->order = 0; + } + hb_log( "dxva2:CreateSurface succeed with %d, fmt (%dx%d) surfaces (%dx%d)\n", dxva2->surface_count, + fmt->width, + fmt->height, + dxva2->surface_width, + dxva2->surface_height ); + DXVA2_VideoDesc dsc; + memset( &dsc, 0, sizeof(dsc)); + dsc.SampleWidth = fmt->width; + dsc.SampleHeight = fmt->height; + dsc.Format = dxva2->render; + + if( fmt->rate> 0 && fmt->rate_base> 0 ) + { + dsc.InputSampleFreq.Numerator = fmt->rate; + dsc.InputSampleFreq.Denominator = fmt->rate_base; + } + else + { + dsc.InputSampleFreq.Numerator = 0; + dsc.InputSampleFreq.Denominator = 0; + } + + dsc.OutputFrameFreq = dsc.InputSampleFreq; + dsc.UABProtectionLevel = FALSE; + dsc.Reserved = 0; + + /* FIXME I am unsure we can let unknown everywhere */ + DXVA2_ExtendedFormat *ext = &dsc.SampleFormat; + ext->SampleFormat = 0; //DXVA2_SampleUnknown; + ext->VideoChromaSubsampling = 0; //DXVA2_VideoChromaSubsampling_Unknown; + ext->NominalRange = 0; //DXVA2_NominalRange_Unknown; + ext->VideoTransferMatrix = 0; //DXVA2_VideoTransferMatrix_Unknown; + ext->VideoLighting = 0; //DXVA2_VideoLighting_Unknown; + ext->VideoPrimaries = 0; //DXVA2_VideoPrimaries_Unknown; + ext->VideoTransferFunction = 0; //DXVA2_VideoTransFunc_Unknown; + + /* List all configurations available for the decoder */ + UINT cfg_count = 0; + DXVA2_ConfigPictureDecode *cfg_list = NULL; + if( FAILED( IDirectXVideoDecoderService_GetDecoderConfigurations( dxva2->vs, &dxva2->input, &dsc, NULL, &cfg_count, &cfg_list ))) + { + hb_log( "dxva2:IDirectXVideoDecoderService_GetDecoderConfigurations failed\n" ); + return HB_WORK_ERROR; + } + hb_log( "dxva2:we got %d decoder configurations\n", cfg_count ); + + /* Select the best decoder configuration */ + int cfg_score = 0; + for( i = 0; i < cfg_count; i++ ) + { + const DXVA2_ConfigPictureDecode *cfg = &cfg_list[i]; + hb_log( "dxva2:configuration[%d] ConfigBitstreamRaw %d\n", i, cfg->ConfigBitstreamRaw ); + int score; + if( cfg->ConfigBitstreamRaw == 1 ) + score = 1; + else if( codec_id == CODEC_ID_H264 && cfg->ConfigBitstreamRaw == 2 ) + score = 2; + else + continue; + if( IsEqualGUID( &cfg->guidConfigBitstreamEncryption, &DXVA_NoEncrypt )) + score += 16; + if( cfg_score < score ) + { + dxva2->cfg = *cfg; + cfg_score = score; + } + } + //my_release(cfg_list); + if( cfg_score <= 0 ) + { + hb_log( "dxva2:Failed to find a supported decoder configuration\n" ); + return HB_WORK_ERROR; + } + + /* Create the decoder */ + IDirectXVideoDecoder *decoder; + if( FAILED( IDirectXVideoDecoderService_CreateVideoDecoder( dxva2->vs, &dxva2->input, &dsc, &dxva2->cfg, surface_list, dxva2->surface_count, &decoder ))) + { + hb_log( "dxva2:IDirectXVideoDecoderService_CreateVideoDecoder failed\n" ); + return HB_WORK_ERROR; + } + dxva2->decoder = decoder; + hb_log( "dxva2:IDirectXVideoDecoderService_CreateVideoDecoder succeed\n" ); + return HB_WORK_OK; +} + +typedef HWND (WINAPI *PROCGETSHELLWND)(); +/** + * It creates a DirectX video service + */ +static int hb_d3d_create_device( hb_va_dxva2_t *dxva2 ) +{ + LPDIRECT3D9 (WINAPI *Create9)( UINT SDKVersion ); + Create9 = (void*)GetProcAddress( dxva2->hd3d9_dll, TEXT( "Direct3DCreate9" )); + if( !Create9 ) + { + hb_log( "dxva2:Cannot locate reference to Direct3DCreate9 ABI in DLL\n" ); + return HB_WORK_ERROR; + } + LPDIRECT3D9 d3dobj; + d3dobj = Create9( D3D_SDK_VERSION ); + if( !d3dobj ) + { + hb_log( "dxva2:Direct3DCreate9 failed\n" ); + return HB_WORK_ERROR; + } + dxva2->d3dobj = d3dobj; + D3DADAPTER_IDENTIFIER9 *d3dai = &dxva2->d3dai; + if( FAILED( IDirect3D9_GetAdapterIdentifier( dxva2->d3dobj, D3DADAPTER_DEFAULT, 0, d3dai ))) + { + hb_log( "dxva2:IDirect3D9_GetAdapterIdentifier failed\n" ); + memset( d3dai, 0, sizeof(*d3dai)); + } + + PROCGETSHELLWND GetShellWindow; + HMODULE hUser32 = GetModuleHandle( "user32" ); + GetShellWindow = (PROCGETSHELLWND) + GetProcAddress( hUser32, "GetShellWindow" ); + + D3DPRESENT_PARAMETERS *d3dpp = &dxva2->d3dpp; + memset( d3dpp, 0, sizeof(*d3dpp)); + d3dpp->Flags = D3DPRESENTFLAG_VIDEO; + d3dpp->Windowed = TRUE; + d3dpp->hDeviceWindow = NULL; + d3dpp->SwapEffect = D3DSWAPEFFECT_DISCARD; + d3dpp->MultiSampleType = D3DMULTISAMPLE_NONE; + d3dpp->PresentationInterval = D3DPRESENT_INTERVAL_DEFAULT; + d3dpp->BackBufferCount = 0; /* FIXME what to put here */ + d3dpp->BackBufferFormat = D3DFMT_X8R8G8B8; /* FIXME what to put here */ + d3dpp->BackBufferWidth = 0; + d3dpp->BackBufferHeight = 0; + d3dpp->EnableAutoDepthStencil = FALSE; + + LPDIRECT3DDEVICE9 d3ddev; + //if (FAILED(IDirect3D9_CreateDevice(d3dobj, D3DADAPTER_DEFAULT, D3DDEVTYPE_HAL, GetShellWindow(), D3DCREATE_SOFTWARE_VERTEXPROCESSING|D3DCREATE_MULTITHREADED, d3dpp, &d3ddev))) + if( FAILED( IDirect3D9_CreateDevice( d3dobj, + D3DADAPTER_DEFAULT, + D3DDEVTYPE_HAL, + GetShellWindow(), + D3DCREATE_HARDWARE_VERTEXPROCESSING|D3DCREATE_MULTITHREADED, + d3dpp, + &d3ddev ))) + { + hb_log( "dxva2:IDirect3D9_CreateDevice failed\n" ); + return HB_WORK_ERROR; + } + dxva2->d3ddev = d3ddev; + + + return HB_WORK_OK; +} +/** + * It creates a Direct3D device manager + */ +static int hb_d3d_create_device_manager( hb_va_dxva2_t *dxva2 ) +{ + HRESULT(WINAPI *CreateDeviceManager9)( UINT *pResetToken, IDirect3DDeviceManager9 ** ); + CreateDeviceManager9 = (void*)GetProcAddress( dxva2->hdxva2_dll, TEXT( "DXVA2CreateDirect3DDeviceManager9" )); + + if( !CreateDeviceManager9 ) + { + hb_log( "dxva2:cannot load function\n" ); + return HB_WORK_ERROR; + } + hb_log( "dxva2:OurDirect3DCreateDeviceManager9 Success!\n" ); + + UINT token; + IDirect3DDeviceManager9 *devmng; + if( FAILED( CreateDeviceManager9( &token, &devmng ))) + { + hb_log( "dxva2:OurDirect3DCreateDeviceManager9 failed\n" ); + return HB_WORK_ERROR; + } + dxva2->token = token; + dxva2->devmng = devmng; + hb_log( "dxva2:obtained IDirect3DDeviceManager9\n" ); + + long hr = IDirect3DDeviceManager9_ResetDevice( devmng, dxva2->d3ddev, token ); + if( FAILED( hr )) + { + hb_log( "dxva2:IDirect3DDeviceManager9_ResetDevice failed: %08x\n", (unsigned)hr ); + return HB_WORK_ERROR; + } + return HB_WORK_OK; +} +/** + * It creates a DirectX video service + */ +static int hb_dx_create_video_service( hb_va_dxva2_t *dxva2 ) +{ + HRESULT (WINAPI *CreateVideoService)( IDirect3DDevice9 *, REFIID riid, void **ppService ); + CreateVideoService = (void*)GetProcAddress( dxva2->hdxva2_dll, TEXT( "DXVA2CreateVideoService" )); + + if( !CreateVideoService ) + { + hb_log( "dxva2:cannot load function\n" ); + return HB_WORK_ERROR; + } + hb_log( "dxva2:DXVA2CreateVideoService Success!\n" ); + + HRESULT hr; + + HANDLE device; + hr = IDirect3DDeviceManager9_OpenDeviceHandle( dxva2->devmng, &device ); + if( FAILED( hr )) + { + hb_log( "dxva2:OpenDeviceHandle failed\n" ); + return HB_WORK_ERROR; + } + dxva2->device = device; + + IDirectXVideoDecoderService *vs; + hr = IDirect3DDeviceManager9_GetVideoService( dxva2->devmng, device, &IID_IDirectXVideoDecoderService, (void*)&vs ); + if( FAILED( hr )) + { + hb_log( "dxva2:GetVideoService failed\n" ); + return HB_WORK_ERROR; + } + dxva2->vs = vs; + + return HB_WORK_OK; +} +/** + * Find the best suited decoder mode GUID and render format. + */ +static int hb_dx_find_video_service_conversion( hb_va_dxva2_t *dxva2, GUID *input, D3DFORMAT *output ) +{ + unsigned int input_count = 0; + GUID *input_list = NULL; + if( FAILED( IDirectXVideoDecoderService_GetDecoderDeviceGuids( dxva2->vs, &input_count, &input_list ))) + { + hb_log( "dxva2:IDirectXVideoDecoderService_GetDecoderDeviceGuids failed\n" ); + return HB_WORK_ERROR; + } + unsigned i, j; + for( i = 0; i < input_count; i++ ) + { + const GUID *g = &input_list[i]; + const hb_dx_mode_t *mode = hb_dx_find_mode( g ); + if( mode ) + { + hb_log( "dxva2:'%s' is supported by hardware\n", mode->name ); + } + else + { + //hb_log( "- Unknown GUID = %08X-%04x-%04x-XXXX\n", (unsigned)g->Data1, g->Data2, g->Data3); + } + } + + for( i = 0; dxva2_modes[i].name; i++ ) + { + const hb_dx_mode_t *mode = &dxva2_modes[i]; + if( !mode->codec || mode->codec != dxva2->codec_id ) + continue; + + int is_suported = 0; + const GUID *g; + for( g = &input_list[0]; !is_suported && g < &input_list[input_count]; g++ ) + { + is_suported = IsEqualGUID( mode->guid, g ); + } + if( !is_suported ) + continue; + + hb_log( "dxva2: Trying to use '%s' as input\n", mode->name ); + unsigned int output_count = 0; + D3DFORMAT *output_list = NULL; + if( FAILED( IDirectXVideoDecoderService_GetDecoderRenderTargets( dxva2->vs, mode->guid, &output_count, &output_list ))) + { + hb_log( "dxva2:IDirectXVideoDecoderService_GetDecoderRenderTargets failed" ); + continue; + } + for( j = 0; j < output_count; j++ ) + { + const D3DFORMAT f = output_list[j]; + const hb_d3d_format_t *format = hb_d3d_find_format( f ); + if( format ) + { + hb_log( "dxva2:%s is supported for output\n", format->name ); + } + else + { + hb_log( "dxvar2:%d is supported for output (%4.4s)\n", f, (const char*)&f ); + } + } + + for( j = 0; d3d_formats[j].name; j++ ) + { + const hb_d3d_format_t *format = &d3d_formats[j]; + int is_suported = 0; + unsigned k; + for( k = 0; !is_suported && k < output_count; k++ ) + { + is_suported = format->format == output_list[k]; + } + if( !is_suported ) + continue; + hb_log( "dxva2:Using '%s' to decode to '%s'\n", mode->name, format->name ); + *input = *mode->guid; + *output = format->format; + return HB_WORK_OK; + } + } + return HB_WORK_ERROR; +} +static const hb_dx_mode_t *hb_dx_find_mode( const GUID *guid ) +{ + unsigned i; + for( i = 0; dxva2_modes[i].name; i++ ) + { + if( IsEqualGUID( dxva2_modes[i].guid, guid )) + return &dxva2_modes[i]; + } + return NULL; +} + + +static void hb_dx_destroy_video_decoder( hb_va_dxva2_t *dxva2 ) +{ + if( dxva2->decoder ) + IDirectXVideoDecoder_Release( dxva2->decoder ); + dxva2->decoder = NULL; + + unsigned i; + for( i = 0; isurface_count; i++ ) + IDirect3DSurface9_Release( dxva2->surface[i].d3d ); + dxva2->surface_count = 0; +} +/** + * setup dxva2 +*/ +static int hb_va_setup( hb_va_dxva2_t *dxva2, void **hw, int width, int height ) +{ + if( dxva2->width == width && dxva2->height == height && dxva2->decoder ) + goto ok; + + hb_dx_destroy_video_decoder( dxva2 ); + *hw = NULL; + dxva2->i_chroma = 0; + + if( width <= 0 || height <= 0 ) return HB_WORK_ERROR; + + hb_title_t fmt; + memset( &fmt, 0, sizeof(fmt)); + fmt.width = width; + fmt.height = height; + + if( hb_dx_create_video_decoder( dxva2, dxva2->codec_id, &fmt )==HB_WORK_ERROR ) + return HB_WORK_ERROR; + dxva2->hw.decoder = dxva2->decoder; + dxva2->hw.cfg = &dxva2->cfg; + dxva2->hw.surface_count = dxva2->surface_count; + dxva2->hw.surface = dxva2->hw_surface; + + unsigned i; + for( i = 0; i < dxva2->surface_count; i++ ) + dxva2->hw.surface[i] = dxva2->surface[i].d3d; + + hb_dx_create_video_conversion( dxva2 ); + +ok: + *hw = &dxva2->hw; + const hb_d3d_format_t *output = hb_d3d_find_format( dxva2->output ); + dxva2->i_chroma = output->codec; + return HB_WORK_OK; + +} + +static int hb_va_get( hb_va_dxva2_t *dxva2, AVFrame *frame ) +{ + /*HRESULT hr = IDirect3DDeviceManager9_TestDevice(dxva2->devmng, dxva2->device); + if (hr == DXVA2_E_NEW_VIDEO_DEVICE) + { + return HB_WORK_ERROR; + } + else if (FAILED(hr)) + { + hb_log( "dxva2:IDirect3DDeviceManager9_TestDevice %u\n", (unsigned)hr); + return HB_WORK_ERROR; + }*/ + unsigned i, old; + for( i = 0, old = 0; i < dxva2->surface_count; i++ ) + { + hb_va_surface_t *surface = &dxva2->surface[i]; + if( !surface->refcount ) + break; + if( surface->order < dxva2->surface[old].order ) + old = i; + } + if( i >= dxva2->surface_count ) + i = old; + + hb_va_surface_t *surface = &dxva2->surface[i]; + + surface->refcount = 1; + surface->order = dxva2->surface_order++; + + for( i = 0; i < 4; i++ ) + { + frame->data[i] = NULL; + frame->linesize[i] = 0; + if( i == 0 || i == 3 ) + frame->data[i] = (void*)surface->d3d; + } + return HB_WORK_OK; +} +/** + * nv12 to yuv of c reference + */ +static void hb_copy_from_nv12( uint8_t *dst, uint8_t *src[2], size_t src_pitch[2], unsigned width, unsigned height ) +{ + unsigned int i, j; + uint8_t *dstU, *dstV; + dstU = dst + width*height; + dstV = dstU + width*height/4; + unsigned int heithtUV, widthUV; + heithtUV = height/2; + widthUV = width/2; + + for( i = 0; idata[3]; + D3DLOCKED_RECT lock; + if( FAILED( IDirect3DSurface9_LockRect( d3d, &lock, NULL, D3DLOCK_READONLY ))) + { + hb_log( "dxva2:Failed to lock surface\n" ); + return HB_WORK_ERROR; + } + + if( dxva2->render == MAKEFOURCC( 'N', 'V', '1', '2' )) + { + uint8_t *plane[2] = + { + lock.pBits, + (uint8_t*)lock.pBits + lock.Pitch * dxva2->surface_height + }; + size_t pitch[2] = + { + lock.Pitch, + lock.Pitch, + }; +#ifdef USE_OPENCL + if( ( dxva2->width > job_w || dxva2->height > job_h ) && (TestGPU() == 0) ) + { +/* int i; + uint8_t *tmp = (uint8_t*)malloc( dxva2->width*dxva2->height*3/2 ); + for( i = 0; i < dxva2->height; i++ ) + { + memcpy( tmp+i*dxva2->width, plane[0]+i*lock.Pitch, dxva2->width ); + if( iheight>>1 ) + memcpy( tmp+(dxva2->width*dxva2->height)+i*dxva2->width, plane[1]+i*lock.Pitch, dxva2->width ); + } +*/ + hb_ocl_nv12toyuv( plane, lock.Pitch, dxva2->width, dxva2->height, crop, dxva2 ); + //hb_ocl_nv12toyuv( tmp, dxva2->width, dxva2->height, crop, dxva2 ); + hb_ocl_scale( dxva2->cl_mem_yuv, NULL, dst, dxva2->width - ( crop[2] + crop[3] ), dxva2->height - ( crop[0] + crop[1] ), job_w, job_h, os ); + //free( tmp ); + } + else +#endif + { + hb_copy_from_nv12( dst, plane, pitch, dxva2->width, dxva2->height ); + } + } + IDirect3DSurface9_UnlockRect( d3d ); + + return HB_WORK_OK; +} + +/** + * create dxva2 service + * load library D3D9.dll + */ +hb_va_dxva2_t * hb_va_create_dxva2( hb_va_dxva2_t *dxva2, int codec_id ) +{ + if( dxva2 ) + { + hb_va_close( dxva2 ); + dxva2 = NULL; + } + + hb_va_dxva2_t *dxva = calloc( 1, sizeof(*dxva)); + if( !dxva ) return NULL; + dxva->codec_id = codec_id; + + dxva->hd3d9_dll = LoadLibrary( TEXT( "D3D9.DLL" )); + if( !dxva->hd3d9_dll ) + { + hb_log( "dxva2:cannot load d3d9.dll" ); + goto error; + } + dxva->hdxva2_dll = LoadLibrary( TEXT( "DXVA2.DLL" )); + if( !dxva->hdxva2_dll ) + { + hb_log( "dxva2:cannot load DXVA2.dll" ); + goto error; + } + + if( hb_d3d_create_device( dxva ) == HB_WORK_ERROR ) + { + hb_log( "dxva2:Failed to create Direct3D device" ); + goto error; + } + + hb_log( "dxva2:hb_d3d_create_device succeed" ); + if( hb_d3d_create_device_manager( dxva )== HB_WORK_ERROR ) + { + hb_log( "dxva2:D3dCreateDeviceManager failed" ); + goto error; + } + + + if( hb_dx_create_video_service( dxva )== HB_WORK_ERROR ) + { + hb_log( "dxva2:DxCreateVideoService failed" ); + goto error; + } + + if( hb_dx_find_video_service_conversion( dxva, &dxva->input, &dxva->render )== HB_WORK_ERROR ) + { + hb_log( "dxva2:DxFindVideoServiceConversion failed" ); + goto error; + } + + dxva->do_job = HB_WORK_OK; + dxva->description = "DXVA2"; + + return dxva; + +error: + hb_va_close( dxva ); + return NULL; +} + +void hb_va_new_dxva2( hb_va_dxva2_t *dxva2, AVCodecContext *p_context ) +{ + if( p_context->width > 0 && p_context->height > 0 ) + { + if( hb_va_setup( dxva2, &p_context->hwaccel_context, p_context->width, p_context->height )==HB_WORK_ERROR ) + { + hb_log( "dxva2:hb_va_Setup failed" ); + hb_va_close( dxva2 ); + dxva2 = NULL; + } + } + if( dxva2 ) + { + dxva2->input_pts[0] = 0; + dxva2->input_pts[1] = 0; + if( dxva2->description ) + hb_log( "dxva2:Using %s for hardware decoding\n", dxva2->description ); + p_context->draw_horiz_band = NULL; + } + +} +enum PixelFormat hb_ffmpeg_get_format( AVCodecContext *p_context, const enum PixelFormat *pi_fmt ) +{ + int i; + static const char *ppsz_name[PIX_FMT_NB] = + { + [PIX_FMT_VDPAU_H264] = "PIX_FMT_VDPAU_H264", + [PIX_FMT_VAAPI_IDCT] = "PIX_FMT_VAAPI_IDCT", + [PIX_FMT_VAAPI_VLD] = "PIX_FMT_VAAPI_VLD", + [PIX_FMT_VAAPI_MOCO] = "PIX_FMT_VAAPI_MOCO", + [PIX_FMT_DXVA2_VLD] = "PIX_FMT_DXVA2_VLD", + [PIX_FMT_YUYV422] = "PIX_FMT_YUYV422", + [PIX_FMT_YUV420P] = "PIX_FMT_YUV420P", + }; + for( i = 0; pi_fmt[i] != PIX_FMT_NONE; i++ ) + { + hb_log( "dxva2:Available decoder output format %d (%s)", pi_fmt[i], ppsz_name[pi_fmt[i]] ? : "Unknown" ); + if( pi_fmt[i] == PIX_FMT_DXVA2_VLD ) + { + return pi_fmt[i]; + } + } + return avcodec_default_get_format( p_context, pi_fmt ); +} + +int hb_va_get_frame_buf( hb_va_dxva2_t *dxva2, AVCodecContext *p_context, AVFrame *frame ) +{ + /*if( va_setup(dxva2, &p_context->hwaccel_context, p_context->width, p_context->height )==HB_WORK_ERROR) + { + hb_log("dxva2:hb_va_Setup failed"); + va_close(dxva2); + dxva2 = NULL; + return HB_WORK_ERROR; + }*/ + frame->type = FF_BUFFER_TYPE_USER; + //frame->age = 256*256*256*64; + if( hb_va_get( dxva2, frame ) == HB_WORK_ERROR ) + { + hb_log( "VaGrabSurface failed\n" ); + return HB_WORK_ERROR; + } + return HB_WORK_OK; + +} diff --git a/libhb/vadxva2.h b/libhb/vadxva2.h new file mode 100644 index 000000000..9f6bf55a7 --- /dev/null +++ b/libhb/vadxva2.h @@ -0,0 +1,194 @@ +/* vadxva2.h + + Copyright (c) 2003-2012 HandBrake Team + This file is part of the HandBrake source code + Homepage: . + It may be used under the terms of the GNU General Public License v2. + For full terms see the file COPYING file or visit http://www.gnu.org/licenses/gpl-2.0.html + + Authors: Peng Gao + Li Cao + + */ + + + +#ifndef VA_DXVA2_H +#define VA_DXVA2_H + +#include "hbffmpeg.h" +#include "d3d9.h" +#include "libavcodec/dxva2.h" +#include "dxva2api.h" +#include "common.h" +#include "openclwrapper.h" + +#define HB_FOURCC( a, b, c, d ) ( ((uint32_t)a) | ( ((uint32_t)b) << 8 ) | ( ((uint32_t)c) << 16 ) | ( ((uint32_t)d) << 24 ) ) +#define MAKEFOURCC( a, b, c, d ) ((DWORD)(BYTE)(a) | ((DWORD)(BYTE)(b) << 8) | ((DWORD)(BYTE)(c) << 16) | ((DWORD)(BYTE)(d) << 24 )) +#define HB_CODEC_YV12 HB_FOURCC( 'Y', 'V', '1', '2' ) +#define HB_CODEC_NV12 HB_FOURCC( 'N', 'V', '1', '2' ) +#define DXVA2_E_NOT_INITIALIZED MAKE_HRESULT( 1, 4, 4096 ) +#define DXVA2_E_NEW_VIDEO_DEVICE MAKE_HRESULT( 1, 4, 4097 ) +#define DXVA2_E_VIDEO_DEVICE_LOCKED MAKE_HRESULT( 1, 4, 4098 ) +#define DXVA2_E_NOT_AVAILABLE MAKE_HRESULT( 1, 4, 4099 ) +#define VA_DXVA2_MAX_SURFACE_COUNT (64) + +static const GUID DXVA_NoEncrypt = { 0x1b81bed0, 0xa0c7, 0x11d3, {0xb9, 0x84, 0x00, 0xc0, 0x4f, 0x2e, 0x73, 0xc5} }; +static const GUID IID_IDirectXVideoDecoderService = {0xfc51a551, 0xd5e7, 0x11d9, {0xaf, 0x55, 0x00, 0x05, 0x4e, 0x43, 0xff, 0x02}}; +static const GUID DXVA2_ModeMPEG2_MoComp = { 0xe6a9f44b, 0x61b0, 0x4563, {0x9e, 0xa4, 0x63, 0xd2, 0xa3, 0xc6, 0xfe, 0x66} }; +static const GUID DXVA2_ModeMPEG2_IDCT = { 0xbf22ad00, 0x03ea, 0x4690, {0x80, 0x77, 0x47, 0x33, 0x46, 0x20, 0x9b, 0x7e} }; +static const GUID DXVA2_ModeMPEG2_VLD = { 0xee27417f, 0x5e28, 0x4e65, {0xbe, 0xea, 0x1d, 0x26, 0xb5, 0x08, 0xad, 0xc9} }; +static const GUID DXVA2_ModeH264_A = { 0x1b81be64, 0xa0c7, 0x11d3, {0xb9, 0x84, 0x00, 0xc0, 0x4f, 0x2e, 0x73, 0xc5} }; +static const GUID DXVA2_ModeH264_B = { 0x1b81be65, 0xa0c7, 0x11d3, {0xb9, 0x84, 0x00, 0xc0, 0x4f, 0x2e, 0x73, 0xc5} }; +static const GUID DXVA2_ModeH264_C = { 0x1b81be66, 0xa0c7, 0x11d3, {0xb9, 0x84, 0x00, 0xc0, 0x4f, 0x2e, 0x73, 0xc5} }; +static const GUID DXVA2_ModeH264_D = { 0x1b81be67, 0xa0c7, 0x11d3, {0xb9, 0x84, 0x00, 0xc0, 0x4f, 0x2e, 0x73, 0xc5} }; +static const GUID DXVA2_ModeH264_E = { 0x1b81be68, 0xa0c7, 0x11d3, {0xb9, 0x84, 0x00, 0xc0, 0x4f, 0x2e, 0x73, 0xc5} }; +static const GUID DXVA2_ModeH264_F = { 0x1b81be69, 0xa0c7, 0x11d3, {0xb9, 0x84, 0x00, 0xc0, 0x4f, 0x2e, 0x73, 0xc5} }; +static const GUID DXVADDI_Intel_ModeH264_A = { 0x604F8E64, 0x4951, 0x4c54, {0x88, 0xFE, 0xAB, 0xD2, 0x5C, 0x15, 0xB3, 0xD6} }; +static const GUID DXVADDI_Intel_ModeH264_C = { 0x604F8E66, 0x4951, 0x4c54, {0x88, 0xFE, 0xAB, 0xD2, 0x5C, 0x15, 0xB3, 0xD6} }; +static const GUID DXVADDI_Intel_ModeH264_E = { 0x604F8E68, 0x4951, 0x4c54, {0x88, 0xFE, 0xAB, 0xD2, 0x5C, 0x15, 0xB3, 0xD6} }; +static const GUID DXVA2_ModeWMV8_A = { 0x1b81be80, 0xa0c7, 0x11d3, {0xb9, 0x84, 0x00, 0xc0, 0x4f, 0x2e, 0x73, 0xc5} }; +static const GUID DXVA2_ModeWMV8_B = { 0x1b81be81, 0xa0c7, 0x11d3, {0xb9, 0x84, 0x00, 0xc0, 0x4f, 0x2e, 0x73, 0xc5} }; +static const GUID DXVA2_ModeWMV9_A = { 0x1b81be90, 0xa0c7, 0x11d3, {0xb9, 0x84, 0x00, 0xc0, 0x4f, 0x2e, 0x73, 0xc5} }; +static const GUID DXVA2_ModeWMV9_B = { 0x1b81be91, 0xa0c7, 0x11d3, {0xb9, 0x84, 0x00, 0xc0, 0x4f, 0x2e, 0x73, 0xc5} }; +static const GUID DXVA2_ModeWMV9_C = { 0x1b81be94, 0xa0c7, 0x11d3, {0xb9, 0x84, 0x00, 0xc0, 0x4f, 0x2e, 0x73, 0xc5} }; +static const GUID DXVA2_ModeVC1_A = { 0x1b81beA0, 0xa0c7, 0x11d3, {0xb9, 0x84, 0x00, 0xc0, 0x4f, 0x2e, 0x73, 0xc5} }; +static const GUID DXVA2_ModeVC1_B = { 0x1b81beA1, 0xa0c7, 0x11d3, {0xb9, 0x84, 0x00, 0xc0, 0x4f, 0x2e, 0x73, 0xc5} }; +static const GUID DXVA2_ModeVC1_C = { 0x1b81beA2, 0xa0c7, 0x11d3, {0xb9, 0x84, 0x00, 0xc0, 0x4f, 0x2e, 0x73, 0xc5} }; +static const GUID DXVA2_ModeVC1_D = { 0x1b81beA3, 0xa0c7, 0x11d3, {0xb9, 0x84, 0x00, 0xc0, 0x4f, 0x2e, 0x73, 0xc5} }; + +typedef struct +{ + int width; + int height; + int rate; + int rate_base; + +}hb_dx_format; + +typedef struct +{ + LPDIRECT3DSURFACE9 d3d; + int refcount; + unsigned int order; + +} hb_va_surface_t; + +typedef struct +{ + uint8_t *base; + uint8_t *buffer; + size_t size; + +} hb_copy_cache_t; + +typedef struct +{ + const char *name; + D3DFORMAT format; + uint32_t codec; + +} hb_d3d_format_t; + +typedef struct +{ + const char *name; + const GUID *guid; + int codec; +} hb_dx_mode_t; + +typedef struct +{ + char *description; + int codec_id; + uint32_t i_chroma; + int width; + int height; + HINSTANCE hd3d9_dll; + HINSTANCE hdxva2_dll; + D3DPRESENT_PARAMETERS d3dpp; + LPDIRECT3D9 d3dobj; + D3DADAPTER_IDENTIFIER9 d3dai; + LPDIRECT3DDEVICE9 d3ddev; + UINT token; + IDirect3DDeviceManager9 *devmng; + HANDLE device; + IDirectXVideoDecoderService *vs; + GUID input; + D3DFORMAT render; + DXVA2_ConfigPictureDecode cfg; + IDirectXVideoDecoder *decoder; + D3DFORMAT output; + struct dxva_context hw; + unsigned surface_count; + unsigned surface_order; + int surface_width; + int surface_height; + uint32_t surface_chroma; + hb_va_surface_t surface[VA_DXVA2_MAX_SURFACE_COUNT]; + LPDIRECT3DSURFACE9 hw_surface[VA_DXVA2_MAX_SURFACE_COUNT]; + IDirectXVideoProcessorService *ps; + IDirectXVideoProcessor *vp; + int64_t input_pts[2]; + int64_t input_dts; + int do_job; + + // running nv12toyuv kernel. +#ifdef USE_OPENCL + cl_kernel nv12toyuv; + cl_mem cl_mem_nv12; + cl_mem cl_mem_yuv; + uint8_t * nv12toyuv_tmp_in; + uint8_t * nv12toyuv_tmp_out; +#endif +} hb_va_dxva2_t; + + +static const hb_d3d_format_t d3d_formats[] = +{ + { "YV12", MAKEFOURCC( 'Y', 'V', '1', '2' ), HB_CODEC_YV12 }, + { "NV12", MAKEFOURCC( 'N', 'V', '1', '2' ), HB_CODEC_NV12 }, + { NULL, 0, 0 } +}; + +static const hb_dx_mode_t dxva2_modes[] = +{ + { "DXVA2_ModeMPEG2_VLD", &DXVA2_ModeMPEG2_VLD, CODEC_ID_MPEG2VIDEO }, + { "DXVA2_ModeMPEG2_MoComp", &DXVA2_ModeMPEG2_MoComp, 0 }, + { "DXVA2_ModeMPEG2_IDCT", &DXVA2_ModeMPEG2_IDCT, 0 }, + + { "H.264 variable-length decoder (VLD), FGT", &DXVA2_ModeH264_F, CODEC_ID_H264 }, + { "H.264 VLD, no FGT", &DXVA2_ModeH264_E, CODEC_ID_H264 }, + { "H.264 VLD, no FGT (Intel)", &DXVADDI_Intel_ModeH264_E, CODEC_ID_H264 }, + { "H.264 IDCT, FGT", &DXVA2_ModeH264_D, 0 }, + { "H.264 inverse discrete cosine transform (IDCT), no FGT", &DXVA2_ModeH264_C, 0 }, + { "H.264 inverse discrete cosine transform (IDCT), no FGT (Intel)", &DXVADDI_Intel_ModeH264_C, 0 }, + { "H.264 MoComp, FGT", &DXVA2_ModeH264_B, 0 }, + { "H.264 motion compensation (MoComp), no FGT", &DXVA2_ModeH264_A, 0 }, + { "H.264 motion compensation (MoComp), no FGT (Intel)", &DXVADDI_Intel_ModeH264_A, 0 }, + + { "Windows Media Video 8 MoComp", &DXVA2_ModeWMV8_B, 0 }, + { "Windows Media Video 8 post processing", &DXVA2_ModeWMV8_A, 0 }, + + { "Windows Media Video 9 IDCT", &DXVA2_ModeWMV9_C, 0 }, + { "Windows Media Video 9 MoComp", &DXVA2_ModeWMV9_B, 0 }, + { "Windows Media Video 9 post processing", &DXVA2_ModeWMV9_A, 0 }, + + { "VC-1 VLD", &DXVA2_ModeVC1_D, CODEC_ID_VC1 }, + { "VC-1 VLD", &DXVA2_ModeVC1_D, CODEC_ID_WMV3 }, + { "VC-1 IDCT", &DXVA2_ModeVC1_C, 0 }, + { "VC-1 MoComp", &DXVA2_ModeVC1_B, 0 }, + { "VC-1 post processing", &DXVA2_ModeVC1_A, 0 }, + + { NULL, NULL, 0 } +}; + +int hb_va_get_frame_buf( hb_va_dxva2_t *dxva2, AVCodecContext *p_context, AVFrame *frame ); +int hb_va_extract( hb_va_dxva2_t *dxva2, uint8_t *dst, AVFrame *frame, int job_w, int job_h, int *crop, hb_oclscale_t *os ); +enum PixelFormat hb_ffmpeg_get_format( AVCodecContext *, const enum PixelFormat * ); +hb_va_dxva2_t *hb_va_create_dxva2( hb_va_dxva2_t *dxva2, int codec_id ); +void hb_va_new_dxva2( hb_va_dxva2_t *dxva2, AVCodecContext *p_context ); +void hb_va_release( hb_va_dxva2_t *dxva2, AVFrame *frame ); +void hb_va_close( hb_va_dxva2_t *dxva2 ); + +#endif diff --git a/libhb/work.c b/libhb/work.c index 030920c59..d727f1366 100644 --- a/libhb/work.c +++ b/libhb/work.c @@ -10,6 +10,7 @@ #include "hb.h" #include "a52dec/a52.h" #include "libavformat/avformat.h" +#include "openclwrapper.h" typedef struct { @@ -552,7 +553,18 @@ static void do_job( hb_job_t * job ) job->list_work = hb_list_init(); hb_log( "starting job" ); - +#ifdef USE_OPENCL + if ( job->use_opencl ) + { + /* init opencl environment */ + hb_log( "Using GPU : Yes.\n" ); + job->use_opencl =! hb_init_opencl_run_env(0, NULL, "-I."); + } + else + hb_log( "Using GPU : NO.\n" ); +#else + hb_log( "Using GPU : NO.\n" ); +#endif /* Look for the scanned subtitle in the existing subtitle list * select_subtitle implies that we did a scan. */ if( !job->indepth_scan && interjob->select_subtitle ) @@ -699,6 +711,16 @@ static void do_job( hb_job_t * job ) init.pix_fmt = PIX_FMT_YUV420P; init.width = title->width; init.height = title->height; +#ifdef USE_OPENCL + init.title_width = title->width; + init.title_height = title->height; + init.use_dxva = hb_use_dxva( title ); + if ( init.use_dxva && ( title->width > job->width || title->height > job->height ) ) + { + init.width = job->width; + init.height = job->height; + } +#endif init.par_width = job->anamorphic.par_width; init.par_height = job->anamorphic.par_height; memcpy(init.crop, job->crop, sizeof(int[4])); @@ -997,6 +1019,12 @@ static void do_job( hb_job_t * job ) vcodec = WORK_DECAVCODECV; title->video_codec_param = CODEC_ID_MPEG2VIDEO; } +#endif +#ifdef USE_OPENCL + if ( job->use_opencl && hb_use_dxva( title ) && (TestGPU() == 0) && job->use_uvd ) + { + vcodec = WORK_DECAVCODECVACCL; + } #endif hb_list_add( job->list_work, ( w = hb_get_work( vcodec ) ) ); w->codec_param = title->video_codec_param; diff --git a/make/configure.py b/make/configure.py index 3c73d8949..598ef754b 100644 --- a/make/configure.py +++ b/make/configure.py @@ -1085,7 +1085,9 @@ def createCLI(): grp.add_option( '--disable-gst', default=False, action='store_true', help=h ) h = IfHost( 'enable use of ffmpeg mpeg2 decoding', '*-*-*', none=optparse.SUPPRESS_HELP ).value grp.add_option( '--enable-ff-mpeg2', default=False, action='store_true', help=h ) - + h = IfHost( 'enable OpenCL features', '*-*-*', none=optparse.SUPPRESS_HELP ).value + grp.add_option( '--enable-opencl', default=False, action='store_true', help=h ) + cli.add_option_group( grp ) ## add launch options @@ -1518,6 +1520,7 @@ int main () doc.add( 'FEATURE.gtk.mingw', int( options.enable_gtk_mingw )) doc.add( 'FEATURE.gst', int( not options.disable_gst )) doc.add( 'FEATURE.ff.mpeg2', int( options.enable_ff_mpeg2 )) + doc.add( 'FEATURE.opencl', int( options.enable_opencl )) doc.add( 'FEATURE.xcode', int( not (Tools.xcodebuild.fail or options.disable_xcode or options.cross) )) if not Tools.xcodebuild.fail and not options.disable_xcode: diff --git a/test/module.defs b/test/module.defs index f057db3cc..246b62a94 100644 --- a/test/module.defs +++ b/test/module.defs @@ -43,6 +43,10 @@ else ifeq ($(BUILD.system),solaris) else ifeq (1-mingw,$(BUILD.cross)-$(BUILD.system)) ifeq ($(HAS.dlfcn),1) TEST.GCC.l += dl +endif +ifeq (1,$(FEATURE.opencl)) + TEST.GCC.l += OpenCL + TEST.GCC.D += USE_OPENCL endif TEST.GCC.l += pthreadGC2 iconv ws2_32 TEST.GCC.D += PTW32_STATIC_LIB diff --git a/test/test case/handbrake_test.txt b/test/test case/handbrake_test.txt new file mode 100644 index 000000000..47fb0dd8e --- /dev/null +++ b/test/test case/handbrake_test.txt @@ -0,0 +1,80 @@ +#- automated python test script +#- +#- replace the input filename with "infile", replace the output filename with "outfile.m4v" +#- the python script will replace these with the proper testing input and output file names +#- +#- --start-at frame:0 --stop-at frame:stopframe will be inserted by script, so omit it from the command line here +#- the names are used as test names and file names + +# universal +-i "infile" -t 1 -o "outfile.m4v" -f mp4 -w 720 --loose-anamorphic -e x264 -q 20 --vfr -a 1,1 -E faac,ffac3 -B 160,0 -6 dpl2,auto -R Auto,Auto -D 0,0 --gain=0,0 --audio-copy-mask none --audio-fallback ffac3 --markers="C:\Users\jgallmeier\AppData\Local\Temp\Blueangels-1-1-chapters.csv" -x cabac=0:ref=2:me=umh:bframes=0:weightp=0:8x8dct=0:trellis=0:subq=6 --verbose=1 + +# iPod +-i "infile" -t 1 -o "outfile.m4v" -f mp4 -I -w 320 -l 240 -e x264 -b 700 --vfr -a 1 -E faac -B 160 -6 dpl2 -R Auto -D 0 --gain=0 --audio-copy-mask none --audio-fallback ffac3 --markers="C:\Users\jgallmeier\AppData\Local\Temp\Blueangels-1-1-chapters.csv" -x level=30:bframes=0:weightp=0:cabac=0:ref=1:vbv-maxrate=768:vbv-bufsize=2000:analyse=all:me=umh:no-fast-pskip=1:subq=6:8x8dct=0:trellis=0 --verbose=1 + + +# iPhone_iPod_Touch + -i "infile" -t 1 -o "outfile.m4v" -f mp4 -w 480 -l 320 -e x264 -q 20 --vfr -a 1 -E faac -B 128 -6 dpl2 -R Auto -D 0 --gain=0 --audio-copy-mask none --audio-fallback ffac3 --markers="C:\Users\jgallmeier\AppData\Local\Temp\Blueangels-1-1-chapters.csv" -x cabac=0:ref=2:me=umh:bframes=0:weightp=0:subq=6:8x8dct=0:trellis=0 --verbose=1 + + +# iPhone_4 + -i "infile" -t 1 -o "outfile.m4v" -f mp4 -4 -w 960 --loose-anamorphic -e x264 -q 20 -r 29.97 --pfr -a 1 -E faac -B 160 -6 dpl2 -R Auto -D 0 --gain=0 --audio-copy-mask none --audio-fallback ffac3 --markers="C:\Users\jgallmeier\AppData\Local\Temp\Blueangels-1-1-chapters.csv" --verbose=1 + + +# iPad + +-i "infile" -t 1 -o "outfile.m4v" -f mp4 -4 -w 1024 --loose-anamorphic -e x264 -q 20 -r 29.97 --pfr -a 1 -E faac -B 160 -6 dpl2 -R Auto -D 0 --gain=0 --audio-copy-mask none --audio-fallback ffac3 --markers="C:\Users\jgallmeier\AppData\Local\Temp\Blueangels-1-1-chapters.csv" --verbose=1 + + +# Apple_TV + + -i "infile" -t 1 -o "outfile.m4v" -f mp4 -4 -w 960 --loose-anamorphic -e x264 -q 20 --vfr -a 1,1 -E faac,ffac3 -B 160,0 -6 dpl2,auto -R Auto,Auto -D 0,0 --gain=0,0 --audio-copy-mask none --audio-fallback ffac3 --markers="C:\Users\jgallmeier\AppData\Local\Temp\Blueangels-1-1-chapters.csv" -x cabac=0:ref=2:me=umh:b-pyramid=none:b-adapt=2:weightb=0:trellis=0:weightp=0:vbv-maxrate=9500:vbv-bufsize=9500 --verbose=1 + +# Apple_TV_2 +-i "infile" -t 1 -o "outfile.m4v" -f mp4 -4 -w 1280 --loose-anamorphic -e x264 -q 20 -r 29.97 --pfr -a 1,1 -E faac,ffac3 -B 160,0 -6 dpl2,auto -R Auto,Auto -D 0,0 --gain=0,0 --audio-copy-mask none --audio-fallback ffac3 --markers="C:\Users\jgallmeier\AppData\Local\Temp\Blueangels-1-1-chapters.csv" --verbose=1 + + +# Android_Mid +-i "infile" -t 1 -o "outfile.m4v" -f mp4 -w 480 -l 270 -e x264 -q 22 -r 29.97 --pfr -a 1 -E faac -B 128 -6 dpl2 -R Auto -D 0 --gain=0 --audio-copy-mask none --audio-fallback ffac3 -x cabac=0:ref=2:me=umh:bframes=0:weightp=0:subq=6:8x8dct=0:trellis=0 --verbose=1 + + +# Android_High + -i "infile" -t 1 -o "outfile.mp4" -f mp4 -w 720 --loose-anamorphic -e x264 -q 22 -r 29.97 --pfr -a 1 -E faac -B 128 -6 dpl2 -R Auto -D 0 --gain=0 --audio-copy-mask none --audio-fallback ffac3 -x weightp=0:cabac=0 --verbose=1 + + +# Normal_1080p_to_1080p_fixed_qp +-i "infile" -t 1 -o "outfile.m4v" -f mp4 --strict-anamorphic -e x264 -q 20 --vfr -a 1 -E faac -B 160 -6 dpl2 -R Auto -D 0 --gain=0 --audio-copy-mask none --audio-fallback ffac3 --markers="C:\Users\jgallmeier\AppData\Local\Temp\Blueangels-1-1-chapters.csv" -x ref=1:weightp=1:subq=2:rc-lookahead=10:trellis=0:8x8dct=0 --verbose=1 + + +# Normal_1080p_to_720p_fixed_qp +-i "infile" -t 1 -o "outfile.m4v" -f mp4 -w 1280 -l 720 --custom-anamorphic --display-width 1282 --keep-display-aspect -e x264 -q 20 --vfr -a 1 -E faac -B 160 -6 dpl2 -R Auto -D 0 --gain=0 --audio-copy-mask none --audio-fallback ffac3 --markers="C:\Users\jgallmeier\AppData\Local\Temp\Blueangels-1-1-chapters.csv" -x ref=1:weightp=1:subq=2:rc-lookahead=10:trellis=0:8x8dct=0 --verbose=1 + + + +# Normal_1080p_to_1080p_13_mbps +-i "infile" -t 1 -o "outfile.m4v" -f mp4 --strict-anamorphic -e x264 -b 13000 --vfr -a 1 -E faac -B 160 -6 dpl2 -R Auto -D 0 --gain=0 --audio-copy-mask none --audio-fallback ffac3 --markers="C:\Users\jgallmeier\AppData\Local\Temp\Blueangels-1-1-chapters.csv" -x ref=1:weightp=1:subq=2:rc-lookahead=10:trellis=0:8x8dct=0 --verbose=1 + + +# Normal_1080p_to_720p_6_mbps + -i "infile" -t 1 -c 1 -o "outfile.m4v" -f mp4 -w 1280 -l 720 --custom-anamorphic --display-width 1282 --keep-display-aspect -e x264 -b 6000 --vfr -a 1 -E faac -B 160 -6 dpl2 -R Auto -D 0 --gain=0 --audio-copy-mask none --audio-fallback ffac3 --markers="C:\Users\jgallmeier\AppData\Local\Temp\Blueangels-1-1-chapters.csv" -x ref=1:weightp=1:subq=2:rc-lookahead=10:trellis=0:8x8dct=0 --verbose=1 + +# high_1080p_to_1080p_fixed_qp +-i "infile" -t 1 -o "high_1080p.m4v" -f mp4 -4 --detelecine --decomb -w 1920 --loose-anamorphic -e x264 -q 20 --vfr -a 1,1 -E faac,ffac3 -B 160,0 -6 dpl2,auto -R Auto,Auto -D 0,0 --gain=0,0 --audio-copy-mask none --audio-fallback ffac3 --markers="C:\Users\jgallmeier\AppData\Local\Temp\Blueangels-1-1-chapters.csv" -x b-adapt=2:rc-lookahead=50 --verbose=1 + + +# high_1080p_to_720p_fixed qp + -i "infile" -t 1 -o "outfile.m4v" -f mp4 -4 --detelecine --decomb -w 1280 -l 720 --custom-anamorphic --display-width 1282 --keep-display-aspect -e x264 -q 20 --vfr -a 1,1 -E faac,ffac3 -B 160,0 -6 dpl2,auto -R Auto,Auto -D 0,0 --gain=0,0 --audio-copy-mask none --audio-fallback ffac3 --markers="C:\Users\jgallmeier\AppData\Local\Temp\Blueangels-1-1-chapters.csv" -x b-adapt=2:rc-lookahead=50 --verbose=1 + + +# high_1080p_to_1080p_13_mbps + -i "infile" -t 1 -o "outfile.m4v" -f mp4 -4 --detelecine --decomb -w 1920 --loose-anamorphic -e x264 -b 13000 --vfr -a 1,1 -E faac,ffac3 -B 160,0 -6 dpl2,auto -R Auto,Auto -D 0,0 --gain=0,0 --audio-copy-mask none --audio-fallback ffac3 --markers="C:\Users\jgallmeier\AppData\Local\Temp\Blueangels-1-1-chapters.csv" -x b-adapt=2:rc-lookahead=50 --verbose=1 + + +# high_1080p_to_720p_6mbps +-i "infile" -t 1 -o "outfile.m4v" -f mp4 -4 --detelecine --decomb -w 1280 -l 720 --custom-anamorphic --display-width 1282 --keep-display-aspect -e x264 -b 6000 --vfr -a 1,1 -E faac,ffac3 -B 160,0 -6 dpl2,auto -R Auto,Auto -D 0,0 --gain=0,0 --audio-copy-mask none --audio-fallback ffac3 --markers="C:\Users\jgallmeier\AppData\Local\Temp\Blueangels-1-1-chapters.csv" -x b-adapt=2:rc-lookahead=50 --verbose=1 + +# strange_resolution +-i "infile" -t 1 -o "outfile.m4v" -f mp4 -w 480 -l 1078 -e x264 -q 20 --vfr -a 1 -E faac -B 128 -6 dpl2 -R Auto -D 0 --gain=0 --audio-copy-mask none --audio-fallback ffac3 --markers="C:\Users\jgallmeier\AppData\Local\Temp\Blueangels-1-1-chapters.csv" -x cabac=0:ref=2:me=umh:bframes=0:weightp=0:subq=6:8x8dct=0:trellis=0 --verbose=1 + + +#-end of script diff --git a/test/test case/readme.txt b/test/test case/readme.txt new file mode 100644 index 000000000..8c182e26c --- /dev/null +++ b/test/test case/readme.txt @@ -0,0 +1,43 @@ +Handbrake automation scripts + +this script is written in Python (2.7). It should work with any open source python distribution. +I prefer to use enthought.com's free version of Python. + +To use this script: +1) copy handbrake_test.txt to the handbrakecli.exe directory +2) copy run_handbrake.py to the handbrakecli.exe directory +3) copy the input video files to the handbrakecli.exe directory +4) in a command window, go to the handbrakecli.exe directory +5) type:run_handbrake.py and hit return + +you should see the command lines and % complete in the command window. Output files are located in the ./output directory. +there will be a separate output video file, log file (the handbrake output), and a testresults.csv file + +When the test is complete, import the testresults.csv into an Excel spreadsheet. + +Common Problems: + +you will see zero fps and psnr if handbrakecli.exe exists with a failure. Common problems are missing video files from the current directory. + + + +Notes: + +The run_handbrake.py is a text file python script. The top of the file contains configuration parameters such as +the input video files to use, etc. change these at will. + +The handbrake_test.txt is a "script" file that contains the individual tests to run. The format is: + +#- is a comment + +# name of test (also used as output log file name, so don't use strange characters here + +-i this is the handbrakecli.exe options line + + +blank lines are ignored. + +To enable UVD decoding, add -P -U to each execution line in handbrake_test.txt. +Example: +# universal +-i "infile" -t 1 -o "outfile.m4v" -P -U -f mp4 -w 720 --loose-anamorphic -e x264 -q 20 --vfr -a 1,1 -E faac,ffac3 -B 160,0 -6 dpl2,auto -R Auto,Auto -D 0,0 --gain=0,0 --audio-copy-mask none --audio-fallback ffac3 --markers="C:\Users\jgallmeier\AppData\Local\Temp\Blueangels-1-1-chapters.csv" -x cabac=0:ref=2:me=umh:bframes=0:weightp=0:8x8dct=0:trellis=0:subq=6 --verbose=1 diff --git a/test/test case/run_handbrake.py b/test/test case/run_handbrake.py new file mode 100644 index 000000000..5efb0ac63 --- /dev/null +++ b/test/test case/run_handbrake.py @@ -0,0 +1,126 @@ +#!/usr/bin/python + +import sys, string, os, subprocess, time +from multiprocessing import Process + + +##### This script uses as input a 'script file' +##### each line of the script file is a handbrakecli.exe options line (without the handbrakecli.exe) +##### this is read in, each line is executed and the output results are parsed, pulling out fps, and Global PSNR + +inputscript = 'handbrake_test.txt' + + +#### input image files + +###inputimagefiles = ['x264.avi'] +inputimagefiles = ['blueangels_20m.m2ps','blueangels.m2ts','BigBuckBunny.VOB'] +#### output directory +outdir = 'output/' + +#### output CSV file +outcsv = 'testresults.csv' + +#### limit to the number of frames processed +endframe = 2000 + + +def run_handbrake(execline, testname): + #os.chdir(handbrakedir) + #process = subprocess.Popen(execline, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True) + proc = subprocess.Popen(execline, bufsize=-1, stderr=subprocess.PIPE, shell=True) + + [sout, serr] = proc.communicate() + + # stdout has the percentage complete for the given task + # stderr has all of the output for data collection + + lines = serr.splitlines(); + + psnr = 0.0 + for line in lines: + if line.find("PSNR Mean") > 0: + if line.find("kb/s") > 0: + x = line.find("Global:") + psnr = float(line[x+7:x+7+6]) + + + + # find fps + fps = 0.0 + for line in lines: + if line.find("work: average") > 0: + if line.find("fps") > 0: + x = line.find("job is") + y = line.find("fps") + fps = float(line[x+6:y]) + + + print "fps = " + str(fps) + ", PSNR = " + str(psnr) + + logFILE = open(outdir + testname + '.log', "w") + logFILE.write(serr) + logFILE.close() + + #if process.returncode == None: process.wait() + + return [psnr, fps] + + + +########## main entry point ####### +def mytask(): + + if os.path.exists(outdir) == False: + print 'Creating output directory: ' + outdir + os.makedirs(outdir) + else: + print 'Output directory exists' + + scriptFILE = open(inputscript, "r") + + script = scriptFILE.read() + + outFILE = open(outdir + outcsv, "w") + + script_dict = [s for s in script.splitlines() if s] + + # get the order of the tests + testname = list() + for line in script_dict: + if line.startswith('# '): + testname.append(line[1:]) + + # test code + #for item in testname: + # print item + + outFILE.write("input file, handbrake preset, fps, psnr\n") + + for inimg in inputimagefiles: + i = iter(testname) + [filename, part, fileext] = inimg.partition('.') + for line in script_dict: + if line.startswith('#') == 0: + test = i.next() + ex = line.replace("infile", inimg) + ex = ex.replace("outfile", outdir+fileext + '_' + test.lstrip()) + ex = "handbrakecli.exe " + ex + print "-------------" + test + "----------------" + print ex + [psnr, fps] = run_handbrake(ex, fileext+ '_' + test.lstrip()) + outstr = inimg + "," + test + "," + str(fps) + "," + str(psnr) + "\n" + outFILE.write(outstr) + outFILE.flush() + os.fsync(outFILE.fileno()) + time.sleep(30) + print outstr + + outFILE.close() + print "done" + +if __name__ == '__main__': + p = Process(target = mytask) + p.start() + p.join() + diff --git a/test/test case/sample_testresults.csv b/test/test case/sample_testresults.csv new file mode 100644 index 000000000..f2d7acd47 --- /dev/null +++ b/test/test case/sample_testresults.csv @@ -0,0 +1,52 @@ +input file, handbrake preset, fps, psnr +blueangels_20m.m2ps, universal,51.249393,43.329 +blueangels_20m.m2ps, iPod,104.499466,43.39 +blueangels_20m.m2ps, iPhone_iPod_Touch,82.94838,42.932 +blueangels_20m.m2ps, iPhone_4,27.005239,43.781 +blueangels_20m.m2ps, iPad,24.081524,43.829 +blueangels_20m.m2ps, Apple_TV,26.035112,43.496 +blueangels_20m.m2ps, Apple_TV_2,15.095184,44.122 +blueangels_20m.m2ps, Android_Mid,85.799118,41.523 +blueangels_20m.m2ps, Android_High,51.151642,42.586 +blueangels_20m.m2ps, Normal_1080p_to_1080p_fixed_qp,24.162228,44.721 +blueangels_20m.m2ps, Normal_1080p_to_720p_fixed_qp,43.044079,43.631 +blueangels_20m.m2ps, Normal_1080p_to_1080p_13_mbps,23.51717,45.031 +blueangels_20m.m2ps, Normal_1080p_to_720p_6_mbps,39.531132,44.304 +blueangels_20m.m2ps, high_1080p_to_1080p_fixed_qp,5.44732,45.031 +blueangels_20m.m2ps, high_1080p_to_720p_fixed qp,11.749803,43.892 +blueangels_20m.m2ps, high_1080p_to_1080p_13_mbps,5.319024,45.168 +blueangels_20m.m2ps, high_1080p_to_720p_6mbps,11.092562,44.167 +blueangels_20m.m2ps, strange_resolution,35.530605,44.236 +blueangels.m2ts, universal,38.152428,43.238 +blueangels.m2ts, iPod,58.110367,43.381 +blueangels.m2ts, iPhone_iPod_Touch,51.766876,42.914 +blueangels.m2ts, iPhone_4,21.514906,43.562 +blueangels.m2ts, iPad,19.358091,43.586 +blueangels.m2ts, Apple_TV,21.262102,43.3 +blueangels.m2ts, Apple_TV_2,12.826241,43.734 +blueangels.m2ts, Android_Mid,51.420086,41.512 +blueangels.m2ts, Android_High,36.581734,42.514 +blueangels.m2ts, Normal_1080p_to_1080p_fixed_qp,19.23077,43.986 +blueangels.m2ts, Normal_1080p_to_720p_fixed_qp,31.648577,43.284 +blueangels.m2ts, Normal_1080p_to_1080p_13_mbps,18.489985,44.288 +blueangels.m2ts, Normal_1080p_to_720p_6_mbps,30.822269,43.864 +blueangels.m2ts, high_1080p_to_1080p_fixed_qp,5.114225,44.334 +blueangels.m2ts, high_1080p_to_720p_fixed qp,10.028431,43.695 +blueangels.m2ts, high_1080p_to_1080p_13_mbps,5.020767,44.382 +blueangels.m2ts, high_1080p_to_720p_6mbps,9.411227,44.107 +blueangels.m2ts, strange_resolution,27.356098,44.088 +BigBuckBunny.VOB, universal,74.327034,43.579 +BigBuckBunny.VOB, iPod,212.377899,43.668 +BigBuckBunny.VOB, iPhone_iPod_Touch,133.710114,43.028 +BigBuckBunny.VOB, iPhone_4,33.673248,44.976 +BigBuckBunny.VOB, iPad,29.014545,45.052 +BigBuckBunny.VOB, Apple_TV,35.109959,44.672 +BigBuckBunny.VOB, Apple_TV_2,18.789587,45.661 +BigBuckBunny.VOB, Android_Mid,167.233826,41.377 +BigBuckBunny.VOB, Android_High,71.774277,42.81 +BigBuckBunny.VOB, Normal_1080p_to_1080p_fixed_qp,173.294464,42.192 +BigBuckBunny.VOB, Normal_1080p_to_720p_fixed_qp,67.47831,43.995 +BigBuckBunny.VOB, Normal_1080p_to_1080p_13_mbps,91.628754,53.538 +BigBuckBunny.VOB, Normal_1080p_to_720p_6_mbps,50.870552,46.778 +BigBuckBunny.VOB, high_1080p_to_1080p_fixed_qp,7.289626,47.018 +BigBuckBunny.VOB, high_1080p_to_720p_fixed qp,18.983526,45.495 diff --git a/test/test.c b/test/test.c index 472fabd54..a4ced522d 100644 --- a/test/test.c +++ b/test/test.c @@ -22,6 +22,7 @@ #include "hb.h" #include "lang.h" #include "parsecsv.h" +#include "openclwrapper.h" #if defined( __APPLE_CC__ ) #import @@ -139,6 +140,8 @@ static int start_at_frame = 0; static int64_t stop_at_pts = 0; static int stop_at_frame = 0; static uint64_t min_title_duration = 10; +static int use_opencl = 0; +static int use_uvd = 0; /* Exit cleanly on Ctrl-C */ static volatile int die = 0; @@ -209,7 +212,9 @@ int main( int argc, char ** argv ) /* Init libhb */ h = hb_init( debug, update ); hb_dvd_set_dvdnav( dvdnav ); - +#ifdef USE_OPENCL + hb_get_opencl_env(); +#endif /* Show version */ fprintf( stderr, "%s - %s - %s\n", HB_PROJECT_TITLE, HB_PROJECT_BUILD_TITLE, HB_PROJECT_URL_WEBSITE ); @@ -250,6 +255,7 @@ int main( int argc, char ** argv ) titleindex = 0; } + hb_set_gui_info(&hb_gui, use_uvd, use_opencl, titleindex); hb_scan( h, input, titleindex, preview_count, store_previews, min_title_duration * 90000LL ); /* Wait... */ @@ -422,6 +428,10 @@ static void PrintTitleInfo( hb_title_t * title, int feature ) (float) title->rate / title->rate_base ); fprintf( stderr, " + autocrop: %d/%d/%d/%d\n", title->crop[0], title->crop[1], title->crop[2], title->crop[3] ); + + fprintf( stderr, " + support opencl: %d \n", title->opencl_support); + fprintf( stderr, " + support uvd: %d\n", title->uvd_support); + fprintf( stderr, " + chapters:\n" ); for( i = 0; i < hb_list_count( title->list_chapter ); i++ ) { @@ -1388,6 +1398,10 @@ static int HandleEvents( hb_handle_t * h ) job->maxWidth = maxWidth; if (maxHeight) job->maxHeight = maxHeight; + if (use_uvd) + { + job->use_uvd = use_uvd; + } switch( anamorphic_mode ) { @@ -1560,7 +1574,13 @@ static int HandleEvents( hb_handle_t * h ) filter_str = hb_strdup_printf("%d:%d:%d:%d:%d:%d", job->width, job->height, job->crop[0], job->crop[1], job->crop[2], job->crop[3] ); - filter = hb_filter_init( HB_FILTER_CROP_SCALE ); + +#ifdef USE_OPENCL + if ( use_opencl ) + filter = hb_filter_init( HB_FILTER_CROP_SCALE_ACCL ); + else +#endif + filter = hb_filter_init( HB_FILTER_CROP_SCALE ); hb_add_filter( job, filter, filter_str ); free( filter_str ); @@ -2485,7 +2505,11 @@ static int HandleEvents( hb_handle_t * h ) job->frame_to_start = start_at_frame; subtitle_scan = 0; } - +#ifdef USE_OPENCL + job->use_opencl = use_opencl; +#else + job->use_opencl = 0; +#endif if( subtitle_scan ) { /* @@ -2662,6 +2686,7 @@ static void ShowHelp() " -z, --preset-list See a list of available built-in presets\n" " --no-dvdnav Do not use dvdnav for reading DVDs\n" " (experimental, enabled by default for testing)\n" + " --no-opencl Disable use of OpenCL\n" "\n" "### Source Options-----------------------------------------------------------\n\n" @@ -2695,6 +2720,8 @@ static void ShowHelp() " 4 GB. Note: Breaks iPod, PS3 compatibility.\n""" " -O, --optimize Optimize mp4 files for HTTP streaming\n" " -I, --ipod-atom Mark mp4 files so 5.5G iPods will accept them\n" + " -P, --opencl-support Use OpenCL\n" + " -U, --UVD-support Use UVD hardware\n" "\n" @@ -3188,7 +3215,8 @@ static int ParseOptions( int argc, char ** argv ) #define X264_PRESET 284 #define X264_TUNE 285 #define H264_LEVEL 286 - #define NORMALIZE_MIX 287 + #define NO_OPENCL 287 + #define NORMALIZE_MIX 288 for( ;; ) { @@ -3197,14 +3225,17 @@ static int ParseOptions( int argc, char ** argv ) { "help", no_argument, NULL, 'h' }, { "update", no_argument, NULL, 'u' }, { "verbose", optional_argument, NULL, 'v' }, - { "no-dvdnav", no_argument, NULL, DVDNAV }, - + { "no-dvdnav", no_argument, NULL, DVDNAV }, + { "no-opencl", no_argument, NULL, NO_OPENCL }, + { "format", required_argument, NULL, 'f' }, { "input", required_argument, NULL, 'i' }, { "output", required_argument, NULL, 'o' }, { "large-file", no_argument, NULL, '4' }, { "optimize", no_argument, NULL, 'O' }, { "ipod-atom", no_argument, NULL, 'I' }, + { "use-opencl", no_argument, NULL, 'P' }, + { "use-uvd", no_argument, NULL, 'U' }, { "title", required_argument, NULL, 't' }, { "min-duration",required_argument, NULL, MIN_DURATION }, @@ -3293,7 +3324,7 @@ static int ParseOptions( int argc, char ** argv ) cur_optind = optind; c = getopt_long( argc, argv, - "hv::uC:f:4i:Io:t:c:m::M:a:A:6:s:UF::N:e:E:Q:C:" + "hv::uC:f:4i:Io:PUt:c:m::M:a:A:6:s:F::N:e:E:Q:C:" "2dD:7895gOw:l:n:b:q:S:B:r:R:x:TY:X:Z:z", long_options, &option_index ); if( c < 0 ) @@ -3364,6 +3395,12 @@ static int ParseOptions( int argc, char ** argv ) case 'I': ipod_atom = 1; break; + case 'P': + use_opencl = 1; + break; + case 'U': + use_uvd = 1; + break; case 't': titleindex = atoi( optarg ); @@ -3395,6 +3432,9 @@ static int ParseOptions( int argc, char ** argv ) } break; } + case NO_OPENCL: + use_opencl = 0; + break; case ANGLE: angle = atoi( optarg ); break; diff --git a/win/CS/HandBrake.ApplicationServices/HandBrake.ApplicationServices.csproj b/win/CS/HandBrake.ApplicationServices/HandBrake.ApplicationServices.csproj index 6730e2d66..56df4d218 100644 --- a/win/CS/HandBrake.ApplicationServices/HandBrake.ApplicationServices.csproj +++ b/win/CS/HandBrake.ApplicationServices/HandBrake.ApplicationServices.csproj @@ -66,6 +66,7 @@ 3.5 + diff --git a/win/CS/HandBrake.ApplicationServices/Model/EncodeTask.cs b/win/CS/HandBrake.ApplicationServices/Model/EncodeTask.cs index e7869459b..4f3c1ef11 100644 --- a/win/CS/HandBrake.ApplicationServices/Model/EncodeTask.cs +++ b/win/CS/HandBrake.ApplicationServices/Model/EncodeTask.cs @@ -88,6 +88,8 @@ namespace HandBrake.ApplicationServices.Model this.Height = task.Height; this.IncludeChapterMarkers = task.IncludeChapterMarkers; this.IPod5GSupport = task.IPod5GSupport; + this.OpenCLSupport = task.OpenCLSupport; + this.UVDSupport = task.UVDSupport; this.KeepDisplayAspect = task.KeepDisplayAspect; this.LargeFile = task.LargeFile; this.MaxHeight = task.MaxHeight; @@ -186,6 +188,16 @@ namespace HandBrake.ApplicationServices.Model /// Gets or sets a value indicating whether IPod5GSupport. /// public bool IPod5GSupport { get; set; } + + /// + /// Gets or sets a value indicating whether OpenCLSupport. + /// + public bool OpenCLSupport { get; set; } + + /// + /// Gets or sets a value indicating whether UVDSupport. + /// + public bool UVDSupport { get; set; } #endregion #region Picture diff --git a/win/CS/HandBrake.ApplicationServices/Parsing/Title.cs b/win/CS/HandBrake.ApplicationServices/Parsing/Title.cs index 04d00c1f2..d6e6e57aa 100644 --- a/win/CS/HandBrake.ApplicationServices/Parsing/Title.cs +++ b/win/CS/HandBrake.ApplicationServices/Parsing/Title.cs @@ -115,6 +115,15 @@ namespace HandBrake.ApplicationServices.Parsing /// public string SourceName { get; set; } + /// + /// Gets or sets the OpenCL + /// + public int OpenCLSupport { get; set; } + + /// + /// Gets or sets the UVD + /// + public int UVDSupport { get; set; } #endregion /// @@ -214,6 +223,12 @@ namespace HandBrake.ApplicationServices.Parsing }; } + m = Regex.Match(output.ReadLine(), @"^ \+ support opencl: ([0-9]*)"); + if (m.Success) + thisTitle.OpenCLSupport = int.Parse(m.Groups[1].Value.Trim()); + m = Regex.Match(output.ReadLine(), @" \+ support uvd: ([0-9]*)"); + if (m.Success) + thisTitle.UVDSupport = int.Parse(m.Groups[1].Value.Trim()); thisTitle.Chapters.AddRange(Chapter.ParseList(output)); thisTitle.AudioTracks.AddRange(AudioHelper.ParseList(output)); diff --git a/win/CS/HandBrake.ApplicationServices/Utilities/GeneralUtilities.cs b/win/CS/HandBrake.ApplicationServices/Utilities/GeneralUtilities.cs index 378898c05..01c5625fb 100644 --- a/win/CS/HandBrake.ApplicationServices/Utilities/GeneralUtilities.cs +++ b/win/CS/HandBrake.ApplicationServices/Utilities/GeneralUtilities.cs @@ -102,6 +102,8 @@ namespace HandBrake.ApplicationServices.Utilities logHeader.AppendLine(String.Format("CPU: {0}", SystemInfo.GetCpuCount)); logHeader.Append(String.Format("Ram: {0} MB, ", SystemInfo.TotalPhysicalMemory)); logHeader.AppendLine(String.Format("Screen: {0}x{1}", SystemInfo.ScreenBounds.Bounds.Width, SystemInfo.ScreenBounds.Bounds.Height)); + logHeader.Append(String.Format("GPU: {0}\n", SystemInfo.GetGPUName)); + logHeader.Append(String.Format("GPU driver version: {0}\n", SystemInfo.GetGPUDriverVersion)); logHeader.AppendLine(String.Format("Temp Dir: {0}", Path.GetTempPath())); logHeader.AppendLine(String.Format("Install Dir: {0}", Application.StartupPath)); logHeader.AppendLine(String.Format("Data Dir: {0}\n", Application.UserAppDataPath)); diff --git a/win/CS/HandBrake.ApplicationServices/Utilities/InteropModelCreator.cs b/win/CS/HandBrake.ApplicationServices/Utilities/InteropModelCreator.cs index 8f9260079..5cb0d4768 100644 --- a/win/CS/HandBrake.ApplicationServices/Utilities/InteropModelCreator.cs +++ b/win/CS/HandBrake.ApplicationServices/Utilities/InteropModelCreator.cs @@ -101,6 +101,8 @@ namespace HandBrake.ApplicationServices.Utilities profile.Grayscale = work.Grayscale; profile.Height = work.Height.HasValue ? work.Height.Value : 0; profile.IPod5GSupport = work.IPod5GSupport; + profile.OpenCLGSupport = work.OpenCLSupport; + profile.UVDSupport = work.UVDSupport; profile.IncludeChapterMarkers = work.IncludeChapterMarkers; profile.KeepDisplayAspect = work.KeepDisplayAspect; profile.LargeFile = work.LargeFile; diff --git a/win/CS/HandBrake.ApplicationServices/Utilities/PlistUtility.cs b/win/CS/HandBrake.ApplicationServices/Utilities/PlistUtility.cs index bea765e27..f45ba7423 100644 --- a/win/CS/HandBrake.ApplicationServices/Utilities/PlistUtility.cs +++ b/win/CS/HandBrake.ApplicationServices/Utilities/PlistUtility.cs @@ -141,6 +141,8 @@ namespace HandBrake.ApplicationServices.Utilities AddEncodeElement(xmlWriter, "Mp4HttpOptimize", "integer", parsed.OptimizeMP4 ? "1" : "0"); AddEncodeElement(xmlWriter, "Mp4LargeFile", "integer", parsed.LargeFile ? "1" : "0"); AddEncodeElement(xmlWriter, "Mp4iPodCompatible", "integer", parsed.IPod5GSupport ? "1" : "0"); + AddEncodeElement(xmlWriter, "OpenCLSupport", "integer", parsed.OpenCLSupport ? "1" : "0"); + AddEncodeElement(xmlWriter, "UVDSupport", "integer", parsed.UVDSupport ? "1" : "0"); AddEncodeElement(xmlWriter, "PictureAutoCrop", "integer", "1"); AddEncodeElement(xmlWriter, "PictureBottomCrop", "integer", parsed.Cropping.Bottom.ToString()); diff --git a/win/CS/HandBrake.ApplicationServices/Utilities/QueryGeneratorUtility.cs b/win/CS/HandBrake.ApplicationServices/Utilities/QueryGeneratorUtility.cs index 83e551b42..735470d49 100644 --- a/win/CS/HandBrake.ApplicationServices/Utilities/QueryGeneratorUtility.cs +++ b/win/CS/HandBrake.ApplicationServices/Utilities/QueryGeneratorUtility.cs @@ -245,6 +245,12 @@ namespace HandBrake.ApplicationServices.Utilities if (task.OptimizeMP4) query += " -O "; + if (task.OpenCLSupport) + query += " -P "; + if (task.UVDSupport && task.OpenCLSupport) + query += " -U "; + else if (task.UVDSupport && !task.OpenCLSupport) + query += " -P -U"; return query; } diff --git a/win/CS/HandBrake.ApplicationServices/Utilities/QueryParserUtility.cs b/win/CS/HandBrake.ApplicationServices/Utilities/QueryParserUtility.cs index f0b5d65e3..ead60f906 100644 --- a/win/CS/HandBrake.ApplicationServices/Utilities/QueryParserUtility.cs +++ b/win/CS/HandBrake.ApplicationServices/Utilities/QueryParserUtility.cs @@ -56,6 +56,8 @@ namespace HandBrake.ApplicationServices.Utilities Match grayscale = Regex.Match(input, @" -g"); Match largerMp4 = Regex.Match(input, @" -4"); Match ipodAtom = Regex.Match(input, @" -I"); + Match openclSupport = Regex.Match(input, @" -P"); + Match uvdSupport = Regex.Match(input, @" -U"); // Picture Settings Tab Match width = Regex.Match(input, @"-w ([0-9]+)"); @@ -156,6 +158,8 @@ namespace HandBrake.ApplicationServices.Utilities parsed.LargeFile = largerMp4.Success; parsed.IPod5GSupport = ipodAtom.Success; parsed.OptimizeMP4 = optimizeMP4.Success; + parsed.OpenCLSupport = openclSupport.Success; + parsed.UVDSupport = uvdSupport.Success; #endregion diff --git a/win/CS/HandBrake.ApplicationServices/Utilities/SystemInfo.cs b/win/CS/HandBrake.ApplicationServices/Utilities/SystemInfo.cs index 32328ab47..9f988da9b 100644 --- a/win/CS/HandBrake.ApplicationServices/Utilities/SystemInfo.cs +++ b/win/CS/HandBrake.ApplicationServices/Utilities/SystemInfo.cs @@ -10,7 +10,7 @@ namespace HandBrake.ApplicationServices.Utilities { using System.Windows.Forms; - + using System.Management; using Microsoft.Win32; /// @@ -56,5 +56,39 @@ namespace HandBrake.ApplicationServices.Utilities { get { return Screen.PrimaryScreen; } } + public static object GetGPUDriverVersion + { + get + { + ManagementObjectSearcher searcher = new ManagementObjectSearcher( + "select * from " + "Win32_VideoController"); + foreach (ManagementObject share in searcher.Get()) + { + foreach (PropertyData PC in share.Properties) + { + if (PC.Name.Equals("DriverVersion")) + return PC.Value; + } + } + return null; + } + } + public static object GetGPUName + { + get + { + ManagementObjectSearcher searcher = new ManagementObjectSearcher( + "select * from " + "Win32_VideoController"); + foreach (ManagementObject share in searcher.Get()) + { + foreach (PropertyData PC in share.Properties) + { + if (PC.Name.Equals("Name")) + return PC.Value; + } + } + return null; + } + } } } \ No newline at end of file diff --git a/win/CS/HandBrake.Interop/HandBrakeInterop/HandBrakeInstance.cs b/win/CS/HandBrake.Interop/HandBrakeInterop/HandBrakeInstance.cs index a2bf8cef9..74f1683a9 100644 --- a/win/CS/HandBrake.Interop/HandBrakeInterop/HandBrakeInstance.cs +++ b/win/CS/HandBrake.Interop/HandBrakeInterop/HandBrakeInstance.cs @@ -1447,6 +1447,8 @@ namespace HandBrake.Interop nativeJob.largeFileSize = profile.LargeFile ? 1 : 0; nativeJob.mp4_optimize = profile.Optimize ? 1 : 0; nativeJob.ipod_atom = profile.IPod5GSupport ? 1 : 0; + nativeJob.opencl_support = profile.OpenCLGSupport ? 1 : 0; + nativeJob.uvd_support = profile.UVDSupport ? 1 : 0; if (title.AngleCount > 1) { diff --git a/win/CS/HandBrake.Interop/HandBrakeInterop/HbLib/hb_job_s.cs b/win/CS/HandBrake.Interop/HandBrakeInterop/HbLib/hb_job_s.cs index d4c95d904..a761780fa 100644 --- a/win/CS/HandBrake.Interop/HandBrakeInterop/HbLib/hb_job_s.cs +++ b/win/CS/HandBrake.Interop/HandBrakeInterop/HbLib/hb_job_s.cs @@ -145,6 +145,12 @@ namespace HandBrake.Interop.HbLib /// int public int ipod_atom; + /// int + public int opencl_support; + + /// int + public int uvd_support; + /// int public int indepth_scan; diff --git a/win/CS/HandBrake.Interop/HandBrakeInterop/Model/Encoding/EncodingProfile.cs b/win/CS/HandBrake.Interop/HandBrakeInterop/Model/Encoding/EncodingProfile.cs index 6d2aafec3..9667d5e6e 100644 --- a/win/CS/HandBrake.Interop/HandBrakeInterop/Model/Encoding/EncodingProfile.cs +++ b/win/CS/HandBrake.Interop/HandBrakeInterop/Model/Encoding/EncodingProfile.cs @@ -27,6 +27,8 @@ namespace HandBrake.Interop.Model.Encoding public bool LargeFile { get; set; } public bool Optimize { get; set; } public bool IPod5GSupport { get; set; } + public bool OpenCLGSupport { get; set; } + public bool UVDSupport { get; set; } public int Width { get; set; } public int Height { get; set; } @@ -82,6 +84,8 @@ namespace HandBrake.Interop.Model.Encoding LargeFile = this.LargeFile, Optimize = this.Optimize, IPod5GSupport = this.IPod5GSupport, + OpenCLGSupport = this.OpenCLGSupport, + UVDSupport = this.UVDSupport, Width = this.Width, Height = this.Height, diff --git a/win/CS/HandBrakeWPF/ViewModels/MainViewModel.cs b/win/CS/HandBrakeWPF/ViewModels/MainViewModel.cs index b0ae80f93..23a80d3b7 100644 --- a/win/CS/HandBrakeWPF/ViewModels/MainViewModel.cs +++ b/win/CS/HandBrakeWPF/ViewModels/MainViewModel.cs @@ -115,6 +115,14 @@ namespace HandBrakeWPF.ViewModels private bool isMkv; /// + /// Support Hardware Decoding + /// + private bool supportHardwareDecoding; + + /// + /// Support OpenCL + /// + private bool supportOpenCL; /// The Toolbar Status Label /// private string statusLabel; @@ -368,6 +376,8 @@ namespace HandBrakeWPF.ViewModels this.CurrentTask.LargeFile = selectedPreset.Task.LargeFile; this.CurrentTask.OptimizeMP4 = selectedPreset.Task.OptimizeMP4; this.CurrentTask.IPod5GSupport = selectedPreset.Task.IPod5GSupport; + this.CurrentTask.OpenCLSupport = selectedPreset.Task.OpenCLSupport; + this.CurrentTask.UVDSupport = selectedPreset.Task.UVDSupport; this.SelectedOutputFormat = selectedPreset.Task.OutputFormat; // Tab Settings @@ -590,8 +600,38 @@ namespace HandBrakeWPF.ViewModels this.NotifyOfPropertyChange("IsMkv"); } } + + /// + /// Gets or sets a value indicating whether SupportHardwareDecoding. + /// + public bool SupportHardwareDecoding + { + get + { + return this.supportHardwareDecoding; + } + set + { + this.supportHardwareDecoding = value; + this.NotifyOfPropertyChange("SupportHardwareDecoding"); + } + } /// + /// Gets or sets a value indicating whether SupportHardwareDecoding. + /// + public bool SupportOpenCL + { + get + { + return this.supportOpenCL; + } + set + { + this.supportOpenCL = value; + this.NotifyOfPropertyChange("SupportOpenCL"); + } + } /// Gets RangeMode. /// public IEnumerable OutputFormats @@ -791,6 +831,8 @@ namespace HandBrakeWPF.ViewModels this.NotifyOfPropertyChange(() => SelectedOutputFormat); this.NotifyOfPropertyChange(() => this.CurrentTask.OutputFormat); this.NotifyOfPropertyChange(() => IsMkv); + this.NotifyOfPropertyChange(() => SupportHardwareDecoding); + this.NotifyOfPropertyChange(() => SupportOpenCL); this.SetExtension(string.Format(".{0}", this.selectedOutputFormat.ToString().ToLower())); // TODO, tidy up this.VideoViewModel.RefreshTask(); @@ -1646,6 +1688,25 @@ namespace HandBrakeWPF.ViewModels ?? this.ScannedSource.Titles.FirstOrDefault(); this.SetupTabs(); } + if (e.Successful && this.selectedTitle != null) + { + if (this.selectedTitle.OpenCLSupport == 0) + { + this.SupportOpenCL = false; + } + else + { + this.SupportOpenCL = true; + } + if (this.selectedTitle.UVDSupport == 0) + { + this.SupportHardwareDecoding = true; + } + else + { + this.SupportHardwareDecoding = false; + } + } this.ShowStatusWindow = false; if (e.Successful) diff --git a/win/CS/HandBrakeWPF/Views/MainView.xaml b/win/CS/HandBrakeWPF/Views/MainView.xaml index ebca0805c..e360267b8 100644 --- a/win/CS/HandBrakeWPF/Views/MainView.xaml +++ b/win/CS/HandBrakeWPF/Views/MainView.xaml @@ -473,6 +473,24 @@ Converter={StaticResource boolToVisConverter}, ConverterParameter=true}" /> + + -- 2.40.0