From ab39134445f79f1684a3d8ab814085e04ade27d7 Mon Sep 17 00:00:00 2001
From: handbrake <no-reply@handbrake.fr>
Date: Tue, 18 Dec 2012 08:40:09 +0000
Subject: [PATCH] add hardware decoder support for H264(vob/m2ts),
 mpeg2(vob/m2ts), VC1(WMV3/m2ts) decoding modify the GUI for adding some user
 options about enable/disable OpenCL or GPU add some test case for tesing our
 modifications.

git-svn-id: svn://svn.handbrake.fr/HandBrake/branches/opencl@5100 b64f7644-9d1e-0410-96f1-a4d463321fa5
---
 contrib/ffmpeg/hb-libav-avformat.patch        |  214 +++
 contrib/ffmpeg/module.defs                    |    4 +-
 libhb/common.c                                |   31 +-
 libhb/common.h                                |   83 +-
 libhb/cropscale.c                             |   13 +
 libhb/cropscaleaccl.c                         |  262 ++++
 libhb/decavcodecaccl.c                        | 1314 +++++++++++++++++
 libhb/dxva2api.c                              |   36 +
 libhb/dxva2api.h                              |  822 +++++++++++
 libhb/hb.c                                    |    3 +-
 libhb/hbffmpeg.h                              |    1 +
 libhb/internal.h                              |    4 +-
 libhb/module.defs                             |    7 +
 libhb/oclnv12toyuv.c                          |  222 +++
 libhb/oclnv12toyuv.h                          |   29 +
 libhb/oclscale.c                              |  301 ++++
 libhb/openclkernels.h                         |  122 ++
 libhb/openclwrapper.c                         |  934 ++++++++++++
 libhb/openclwrapper.h                         |   79 +
 libhb/stream.c                                |   40 +-
 libhb/vadxva2.c                               |  827 +++++++++++
 libhb/vadxva2.h                               |  194 +++
 libhb/work.c                                  |   30 +-
 make/configure.py                             |    5 +-
 test/module.defs                              |    4 +
 test/test case/handbrake_test.txt             |   80 +
 test/test case/readme.txt                     |   43 +
 test/test case/run_handbrake.py               |  126 ++
 test/test case/sample_testresults.csv         |   52 +
 test/test.c                                   |   54 +-
 .../HandBrake.ApplicationServices.csproj      |    1 +
 .../Model/EncodeTask.cs                       |   12 +
 .../Parsing/Title.cs                          |   15 +
 .../Utilities/GeneralUtilities.cs             |    2 +
 .../Utilities/InteropModelCreator.cs          |    2 +
 .../Utilities/PlistUtility.cs                 |    2 +
 .../Utilities/QueryGeneratorUtility.cs        |    6 +
 .../Utilities/QueryParserUtility.cs           |    4 +
 .../Utilities/SystemInfo.cs                   |   36 +-
 .../HandBrakeInterop/HandBrakeInstance.cs     |    2 +
 .../HandBrakeInterop/HbLib/hb_job_s.cs        |    6 +
 .../Model/Encoding/EncodingProfile.cs         |    4 +
 .../HandBrakeWPF/ViewModels/MainViewModel.cs  |   61 +
 win/CS/HandBrakeWPF/Views/MainView.xaml       |   18 +
 44 files changed, 6090 insertions(+), 17 deletions(-)
 create mode 100644 contrib/ffmpeg/hb-libav-avformat.patch
 create mode 100644 libhb/cropscaleaccl.c
 create mode 100644 libhb/decavcodecaccl.c
 create mode 100644 libhb/dxva2api.c
 create mode 100644 libhb/dxva2api.h
 create mode 100644 libhb/oclnv12toyuv.c
 create mode 100644 libhb/oclnv12toyuv.h
 create mode 100644 libhb/oclscale.c
 create mode 100644 libhb/openclkernels.h
 create mode 100644 libhb/openclwrapper.c
 create mode 100644 libhb/openclwrapper.h
 create mode 100644 libhb/vadxva2.c
 create mode 100644 libhb/vadxva2.h
 create mode 100644 test/test case/handbrake_test.txt
 create mode 100644 test/test case/readme.txt
 create mode 100644 test/test case/run_handbrake.py
 create mode 100644 test/test case/sample_testresults.csv

diff --git a/contrib/ffmpeg/hb-libav-avformat.patch b/contrib/ffmpeg/hb-libav-avformat.patch
new file mode 100644
index 000000000..d9f978f73
--- /dev/null
+++ b/contrib/ffmpeg/hb-libav-avformat.patch
@@ -0,0 +1,214 @@
+diff -Ncr hb-libav-orig/libavformat/avformat.h hb-libav-opt/libavformat/avformat.h
+*** hb-libav-orig/libavformat/avformat.h	Sat Jul 28 23:29:46 2012
+--- hb-libav-opt/libavformat/avformat.h	Fri Nov 23 09:54:19 2012
+***************
+*** 721,726 ****
+--- 721,730 ----
+          int64_t codec_info_duration;
+          int nb_decoded_frames;
+          int found_decoder;
++         int64_t fps_first_dts;
++         int     fps_first_dts_idx;
++         int64_t fps_last_dts;
++         int     fps_last_dts_idx;
+      } *info;
+  
+      int pts_wrap_bits; /**< number of bits in pts (used for wrapping control) */
+diff -Ncr hb-libav-orig/libavformat/utils.c hb-libav-opt/libavformat/utils.c
+*** hb-libav-orig/libavformat/utils.c	Sat Jul 28 23:29:46 2012
+--- hb-libav-opt/libavformat/utils.c	Fri Nov 23 09:58:45 2012
+***************
+*** 2252,2258 ****
+      int orig_nb_streams = ic->nb_streams;        // new streams might appear, no options for those
+  
+      for(i=0;i<ic->nb_streams;i++) {
+!         AVCodec *codec;
+          AVDictionary *thread_opt = NULL;
+          st = ic->streams[i];
+  
+--- 2252,2258 ----
+      int orig_nb_streams = ic->nb_streams;        // new streams might appear, no options for those
+  
+      for(i=0;i<ic->nb_streams;i++) {
+!         const AVCodec *codec;
+          AVDictionary *thread_opt = NULL;
+          st = ic->streams[i];
+  
+***************
+*** 2287,2293 ****
+--- 2287,2297 ----
+      }
+  
+      for (i=0; i<ic->nb_streams; i++) {
++ #if FF_API_R_FRAME_RATE
+          ic->streams[i]->info->last_dts = AV_NOPTS_VALUE;
++ #endif
++         ic->streams[i]->info->fps_first_dts = AV_NOPTS_VALUE;
++         ic->streams[i]->info->fps_last_dts  = AV_NOPTS_VALUE;
+      }
+  
+      count = 0;
+***************
+*** 2314,2321 ****
+              if (ic->fps_probe_size >= 0)
+                  fps_analyze_framecount = ic->fps_probe_size;
+              /* variable fps and no guess at the real fps */
+!             if(   tb_unreliable(st->codec) && !(st->r_frame_rate.num && st->avg_frame_rate.num)
+!                && st->info->duration_count < fps_analyze_framecount
+                 && st->codec->codec_type == AVMEDIA_TYPE_VIDEO)
+                  break;
+              if(st->parser && st->parser->parser->split && !st->codec->extradata)
+--- 2318,2325 ----
+              if (ic->fps_probe_size >= 0)
+                  fps_analyze_framecount = ic->fps_probe_size;
+              /* variable fps and no guess at the real fps */
+!             if(   tb_unreliable(st->codec) && !st->avg_frame_rate.num
+!                && st->codec_info_nb_frames < fps_analyze_framecount
+                 && st->codec->codec_type == AVMEDIA_TYPE_VIDEO)
+                  break;
+              if(st->parser && st->parser->parser->split && !st->codec->extradata)
+***************
+*** 2383,2402 ****
+              break;
+          }
+  
+!         pkt= add_to_pktbuf(&ic->packet_buffer, &pkt1, &ic->packet_buffer_end);
+          if ((ret = av_dup_packet(pkt)) < 0)
+              goto find_stream_info_err;
+  
+          read_size += pkt->size;
+  
+          st = ic->streams[pkt->stream_index];
+!         if (st->codec_info_nb_frames>1) {
+!             if (av_rescale_q(st->info->codec_info_duration, st->time_base, AV_TIME_BASE_Q) >= ic->max_analyze_duration) {
+                  av_log(ic, AV_LOG_WARNING, "max_analyze_duration reached\n");
+                  break;
+              }
+-             st->info->codec_info_duration += pkt->duration;
+          }
+          {
+              int64_t last = st->info->last_dts;
+  
+--- 2387,2444 ----
+              break;
+          }
+  
+!         if (ic->flags & 0x0040) {
+!             pkt = &pkt1;
+!         } else {
+!             pkt = add_to_pktbuf(&ic->packet_buffer, &pkt1,
+!                                 &ic->packet_buffer_end);
+          if ((ret = av_dup_packet(pkt)) < 0)
+              goto find_stream_info_err;
++         }
+  
+          read_size += pkt->size;
+  
+          st = ic->streams[pkt->stream_index];
+!         if (pkt->dts != AV_NOPTS_VALUE && st->codec_info_nb_frames > 1) {
+!             /* check for non-increasing dts */
+!             if (st->info->fps_last_dts != AV_NOPTS_VALUE &&
+!                 st->info->fps_last_dts >= pkt->dts) {
+!                 av_log(ic, AV_LOG_WARNING, "Non-increasing DTS in stream %d: "
+!                        "packet %d with DTS %"PRId64", packet %d with DTS "
+!                        "%"PRId64"\n", st->index, st->info->fps_last_dts_idx,
+!                        st->info->fps_last_dts, st->codec_info_nb_frames, pkt->dts);
+!                 st->info->fps_first_dts = st->info->fps_last_dts = AV_NOPTS_VALUE;
+!             }
+!             /* check for a discontinuity in dts - if the difference in dts
+!              * is more than 1000 times the average packet duration in the sequence,
+!              * we treat it as a discontinuity */
+!             if (st->info->fps_last_dts != AV_NOPTS_VALUE &&
+!                 st->info->fps_last_dts_idx > st->info->fps_first_dts_idx &&
+!                 (pkt->dts - st->info->fps_last_dts) / 1000 >
+!                 (st->info->fps_last_dts - st->info->fps_first_dts) / (st->info->fps_last_dts_idx - st->info->fps_first_dts_idx)) {
+!                 av_log(ic, AV_LOG_WARNING, "DTS discontinuity in stream %d: "
+!                        "packet %d with DTS %"PRId64", packet %d with DTS "
+!                        "%"PRId64"\n", st->index, st->info->fps_last_dts_idx,
+!                        st->info->fps_last_dts, st->codec_info_nb_frames, pkt->dts);
+!                 st->info->fps_first_dts = st->info->fps_last_dts = AV_NOPTS_VALUE;
+!             }
+! 
+!             /* update stored dts values */
+!             if (st->info->fps_first_dts == AV_NOPTS_VALUE) {
+!                 st->info->fps_first_dts     = pkt->dts;
+!                 st->info->fps_first_dts_idx = st->codec_info_nb_frames;
+!             }
+!             st->info->fps_last_dts = pkt->dts;
+!             st->info->fps_last_dts_idx = st->codec_info_nb_frames;
+! 
+!             /* check max_analyze_duration */
+!             if (av_rescale_q(pkt->dts - st->info->fps_first_dts, st->time_base,
+!                              AV_TIME_BASE_Q) >= ic->max_analyze_duration) {
+                  av_log(ic, AV_LOG_WARNING, "max_analyze_duration reached\n");
+                  break;
+              }
+          }
++ #if FF_API_R_FRAME_RATE
+          {
+              int64_t last = st->info->last_dts;
+  
+***************
+*** 2420,2425 ****
+--- 2462,2468 ----
+              if (last == AV_NOPTS_VALUE || st->info->duration_count <= 1)
+                  st->info->last_dts = pkt->dts;
+          }
++ #endif
+          if(st->parser && st->parser->parser->split && !st->codec->extradata){
+              int i= st->parser->parser->split(st->codec, pkt->data, pkt->size);
+              if (i > 0 && i < FF_MAX_EXTRADATA_SIZE) {
+***************
+*** 2455,2464 ****
+      for(i=0;i<ic->nb_streams;i++) {
+          st = ic->streams[i];
+          if (st->codec->codec_type == AVMEDIA_TYPE_VIDEO) {
+!             if (st->codec_info_nb_frames>2 && !st->avg_frame_rate.num && st->info->codec_info_duration)
+                  av_reduce(&st->avg_frame_rate.num, &st->avg_frame_rate.den,
+!                           (st->codec_info_nb_frames-2)*(int64_t)st->time_base.den,
+!                           st->info->codec_info_duration*(int64_t)st->time_base.num, 60000);
+              // the check for tb_unreliable() is not completely correct, since this is not about handling
+              // a unreliable/inexact time base, but a time base that is finer than necessary, as e.g.
+              // ipmovie.c produces.
+--- 2498,2528 ----
+      for(i=0;i<ic->nb_streams;i++) {
+          st = ic->streams[i];
+          if (st->codec->codec_type == AVMEDIA_TYPE_VIDEO) {
+!             if (!st->avg_frame_rate.num && st->info->fps_last_dts != st->info->fps_first_dts) {
+!                 int64_t delta_dts = st->info->fps_last_dts - st->info->fps_first_dts;
+!                 int delta_packets = st->info->fps_last_dts_idx - st->info->fps_first_dts_idx;
+!                 int      best_fps = 0;
+!                 double best_error = 0.01;
+                  av_reduce(&st->avg_frame_rate.num, &st->avg_frame_rate.den,
+!                           delta_packets*(int64_t)st->time_base.den,
+!                           delta_dts*(int64_t)st->time_base.num, 60000);
+! 
+!                 /* round guessed framerate to a "standard" framerate if it's
+!                  * within 1% of the original estimate*/
+!                 for (j = 1; j < MAX_STD_TIMEBASES; j++) {
+!                     AVRational std_fps = { get_std_framerate(j), 12*1001 };
+!                     double error = fabs(av_q2d(st->avg_frame_rate) / av_q2d(std_fps) - 1);
+!                     if (error < best_error) {
+!                         best_error = error;
+!                         best_fps   = std_fps.num;
+!                     }
+!                 }
+!                 if (best_fps) {
+!                     av_reduce(&st->avg_frame_rate.num, &st->avg_frame_rate.den,
+!                               best_fps, 12*1001, INT_MAX);
+!                 }
+!             }
+! #if FF_API_R_FRAME_RATE
+              // the check for tb_unreliable() is not completely correct, since this is not about handling
+              // a unreliable/inexact time base, but a time base that is finer than necessary, as e.g.
+              // ipmovie.c produces.
+***************
+*** 2481,2486 ****
+--- 2545,2551 ----
+                  if (num && (!st->r_frame_rate.num || (double)num/(12*1001) < 1.01 * av_q2d(st->r_frame_rate)))
+                      av_reduce(&st->r_frame_rate.num, &st->r_frame_rate.den, num, 12*1001, INT_MAX);
+              }
++ #endif
+          }else if(st->codec->codec_type == AVMEDIA_TYPE_AUDIO) {
+              if(!st->codec->bits_per_coded_sample)
+                  st->codec->bits_per_coded_sample= av_get_bits_per_sample(st->codec->codec_id);
diff --git a/contrib/ffmpeg/module.defs b/contrib/ffmpeg/module.defs
index 54ac675f7..26365ecba 100644
--- a/contrib/ffmpeg/module.defs
+++ b/contrib/ffmpeg/module.defs
@@ -17,8 +17,8 @@ FFMPEG.CONFIGURE.extra = \
     --disable-avfilter \
     --disable-muxers \
     --disable-network \
-    --disable-hwaccels \
-    --disable-encoders \
+    --disable-vaapi \
+    --enable-dxva2 \
     --enable-encoder=flac \
     --enable-encoder=ac3 \
     --enable-encoder=aac \
diff --git a/libhb/common.c b/libhb/common.c
index 34c97da54..8ff5605b2 100644
--- a/libhb/common.c
+++ b/libhb/common.c
@@ -1939,7 +1939,11 @@ hb_filter_object_t * hb_filter_init( int filter_id )
         case HB_FILTER_CROP_SCALE:
             filter = &hb_filter_crop_scale;
             break;
-
+#ifdef USE_OPENCL
+        case HB_FILTER_CROP_SCALE_ACCL:
+            filter = &hb_filter_crop_scale_accl;
+            break;
+#endif
         case HB_FILTER_ROTATE:
             filter = &hb_filter_rotate;
             break;
@@ -2903,3 +2907,28 @@ void hb_hexdump( hb_debug_level_t level, const char * label, const uint8_t * dat
         hb_deep_log( level, "    %-50s%20s", line, ascii );
     }
 }
+
+int hb_use_dxva( hb_title_t * title )
+{                
+    return ( (title->video_codec_param == CODEC_ID_MPEG2VIDEO 
+              || title->video_codec_param == CODEC_ID_H264
+              || title->video_codec_param == CODEC_ID_VC1 
+              || title->video_codec_param == CODEC_ID_WMV3 
+              || title->video_codec_param == CODEC_ID_MPEG4 )
+             && title->opaque_priv );
+}
+int hb_get_gui_info(hb_gui_t * gui, int option)
+{
+    if ( option == 1 )
+        return gui->use_uvd;
+    else if ( option == 2 )
+        return gui->use_opencl;
+    else
+        return gui->title_scan;
+}
+void hb_set_gui_info(hb_gui_t *gui, int uvd, int opencl, int titlescan)
+{
+    gui->use_uvd = uvd;
+    gui->use_opencl = opencl;
+    gui->title_scan = titlescan;
+}
diff --git a/libhb/common.h b/libhb/common.h
index 796c3df38..f4c6d283a 100644
--- a/libhb/common.h
+++ b/libhb/common.h
@@ -20,7 +20,9 @@
 #include <sys/types.h>
 #include <sys/stat.h>
 #include <dirent.h>
-
+#ifdef USE_OPENCL
+#include <CL/cl.h>
+#endif
 /*
  * It seems WinXP doesn't align the stack of new threads to 16 bytes.
  * To prevent crashes in SSE functions, we need to force stack alignement
@@ -92,6 +94,7 @@ typedef struct hb_filter_object_s  hb_filter_object_t;
 typedef struct hb_buffer_s hb_buffer_t;
 typedef struct hb_fifo_s hb_fifo_t;
 typedef struct hb_lock_s hb_lock_t;
+typedef struct hb_gui_s hb_gui_t;
 
 #include "ports.h"
 #ifdef __LIBHB__
@@ -140,6 +143,16 @@ int hb_subtitle_can_burn( int source );
 int hb_subtitle_can_pass( int source, int mux );
 
 hb_attachment_t *hb_attachment_copy(const hb_attachment_t *src);
+int hb_get_gui_info(hb_gui_t *gui, int option);
+void hb_set_gui_info(hb_gui_t *gui, int uvd, int opencl, int titlescan);
+struct hb_gui_s
+{
+    int use_uvd;
+    int use_opencl;
+    int title_scan;
+};
+hb_gui_t hb_gui;
+
 hb_list_t *hb_attachment_list_copy(const hb_list_t *src);
 void hb_attachment_close(hb_attachment_t **attachment);
 
@@ -418,6 +431,8 @@ struct hb_job_s
     uint32_t        frames_to_skip;     // decode but discard this many frames
                                         //  initially (for frame accurate positioning
                                         //  to non-I frames).
+    int use_opencl;/* 0 is disable use of opencl. 1 is enable use of opencl */
+    int use_uvd;
 
 #ifdef __LIBHB__
     /* Internal data */
@@ -760,6 +775,8 @@ struct hb_title_s
 
     uint32_t    flags;
                 // set if video stream doesn't have IDR frames
+    int         opencl_support;
+    int         uvd_support;
 #define         HBTF_NO_IDR (1 << 0)
 #define         HBTF_SCAN_COMPLETE (1 << 0)
 };
@@ -923,6 +940,7 @@ extern hb_work_object_t hb_encca_aac;
 extern hb_work_object_t hb_encca_haac;
 extern hb_work_object_t hb_encavcodeca;
 extern hb_work_object_t hb_reader;
+extern hb_work_object_t hb_decavcodecv_accl;
 
 #define HB_FILTER_OK      0
 #define HB_FILTER_DELAY   1
@@ -930,6 +948,60 @@ extern hb_work_object_t hb_reader;
 #define HB_FILTER_DROP    3
 #define HB_FILTER_DONE    4
 
+typedef struct hb_oclscale_s
+{
+#ifdef USE_OPENCL
+    // input buffer for running horizontal kernel. output buffer of running horizontal kernel. outpuf buffer of running vertiacla kernel
+    cl_mem h_in_buf;
+    cl_mem h_out_buf;
+    cl_mem v_out_buf;
+    // horizontal coefficent buffer for Y U and V plane, hroizontal source index for Y,U and V plane
+    cl_mem h_coeff_y;
+    cl_mem h_coeff_uv;
+    cl_mem h_index_y;
+    cl_mem h_index_uv;
+    // vertical coefficent buffer for Y U and V plane, vertical source index for Y,U and V plane
+    cl_mem v_coeff_y; 
+    cl_mem v_coeff_uv;
+    cl_mem v_index_y;
+    cl_mem v_index_uv;
+    // horizontal scaling and vertical scaling kernel handle
+    cl_kernel h_kernel;
+    cl_kernel v_kernel;
+    int use_ocl_mem; // 0 use host memory. 1 use gpu oclmem
+#endif        
+} hb_oclscale_t;
+
+#ifdef USE_OPENCL
+int hb_ocl_scale( cl_mem in_buf, uint8_t *in_data, uint8_t *out_data, int in_w, int in_h, int out_w, int out_h, hb_oclscale_t  *os );
+int TestGPU();
+#endif
+
+#ifdef USE_OPENCL
+int hb_use_dxva( hb_title_t * title );
+// create opencl buffer
+#define CREATEBUF( out, flags, size )\
+    {\
+        out = clCreateBuffer( kenv->context, (flags), (size), NULL, &status );\
+        if( CL_SUCCESS != status ) return -1;\
+    }
+
+#define OCLCHECK( method, ... )\
+    status = method( __VA_ARGS__ ); if( status != CL_SUCCESS ) {\
+        printf( # method " error '%d'\n", status ); return status; }
+
+#define CL_FREE( buf )\
+{\
+    if( buf )\
+    {\
+        { clReleaseMemObject( buf ); }\
+        buf = NULL;\
+    }\
+}
+
+
+#endif
+
 typedef struct hb_filter_init_s
 {
     hb_job_t    * job;
@@ -944,6 +1016,12 @@ typedef struct hb_filter_init_s
     int           pfr_vrate_base;
     int           pfr_vrate;
     int           cfr;
+#ifdef USE_OPENCL
+ 	int           use_dxva;
+	int           title_width;
+	int           title_height;
+#endif    
+
 } hb_filter_init_t;
 
 typedef struct hb_filter_info_s
@@ -998,6 +1076,9 @@ enum
     HB_FILTER_DENOISE,
     HB_FILTER_RENDER_SUB,
     HB_FILTER_CROP_SCALE,
+#ifdef USE_OPENCL    
+    HB_FILTER_CROP_SCALE_ACCL,
+#endif    
     // Finally filters that don't care what order they are in,
     // except that they must be after the above filters
     HB_FILTER_ROTATE,
diff --git a/libhb/cropscale.c b/libhb/cropscale.c
index aa88f3a20..61aa85101 100644
--- a/libhb/cropscale.c
+++ b/libhb/cropscale.c
@@ -19,6 +19,7 @@ struct hb_filter_private_s
     int                 width_out;
     int                 height_out;
     int                 crop[4];
+    int                 use_dxva;
     struct SwsContext * context;
 };
 
@@ -71,6 +72,9 @@ static int hb_crop_scale_init( hb_filter_object_t * filter,
     init->width = pv->width_out;
     init->height = pv->height_out;
     memcpy( init->crop, pv->crop, sizeof( int[4] ) );
+#ifdef USE_OPENCL
+    pv->use_dxva = init->use_dxva;
+#endif
 
     return 0;
 }
@@ -206,6 +210,15 @@ static int hb_crop_scale_work( hb_filter_object_t * filter,
         *buf_in = NULL;
         return HB_FILTER_OK;
     }
+#ifdef USE_OPENCL
+	if ( pv->use_dxva && in->f.width == pv->width_out && in->f.height == pv->height_out )
+	{
+		*buf_out = in;
+		*buf_in = NULL;
+		return HB_FILTER_OK;
+	}
+#endif
+
     *buf_out = crop_scale( pv, in );
 
     return HB_FILTER_OK;
diff --git a/libhb/cropscaleaccl.c b/libhb/cropscaleaccl.c
new file mode 100644
index 000000000..775e71b5c
--- /dev/null
+++ b/libhb/cropscaleaccl.c
@@ -0,0 +1,262 @@
+/* cropscaleaccl.c
+
+   Copyright (c) 2003-2012 HandBrake Team
+   This file is part of the HandBrake source code
+   Homepage: <http://handbrake.fr/>.
+   It may be used under the terms of the GNU General Public License v2.
+   For full terms see the file COPYING file or visit http://www.gnu.org/licenses/gpl-2.0.html
+
+   Authors: Peng Gao <peng@multicorewareinc.com> <http://www.multicorewareinc.com/>
+            Li   Cao <li@multicorewareinc.com> <http://www.multicorewareinc.com/>
+ */
+#ifdef USE_OPENCL
+#include "hb.h"
+#include "common.h"
+
+struct hb_filter_private_s
+{
+    int                 width_in;
+    int                 height_in;
+    int                 pix_fmt;
+    int                 pix_fmt_out;
+    int                 width_out;
+    int                 height_out;
+    int                 crop[4];
+    int                 use_dxva;
+
+    int                 title_width;
+    int                 title_height;
+    hb_oclscale_t       * os; //ocl scaler handler
+    struct SwsContext * context;
+};
+
+static int hb_crop_scale_init( hb_filter_object_t * filter,
+                               hb_filter_init_t * init );
+
+static int hb_crop_scale_work( hb_filter_object_t * filter,
+                               hb_buffer_t ** buf_in,
+                               hb_buffer_t ** buf_out );
+
+static int hb_crop_scale_info( hb_filter_object_t * filter,
+                               hb_filter_info_t * info );
+
+static void hb_crop_scale_close( hb_filter_object_t * filter );
+
+hb_filter_object_t hb_filter_crop_scale_accl =
+{
+    .id            = HB_FILTER_CROP_SCALE_ACCL,
+    .enforce_order = 1,
+    .name          = "Hardware Acceleration Crop and Scale",
+    .settings      = NULL,
+    .init          = hb_crop_scale_init,
+    .work          = hb_crop_scale_work,
+    .close         = hb_crop_scale_close,
+    .info          = hb_crop_scale_info,
+};
+
+static int hb_crop_scale_init( hb_filter_object_t * filter,
+                               hb_filter_init_t * init )
+{
+    filter->private_data = calloc( 1, sizeof(struct hb_filter_private_s) );
+    hb_filter_private_t * pv = filter->private_data;
+
+    // TODO: add pix format option to settings
+    pv->pix_fmt_out = init->pix_fmt;
+    pv->width_in = init->width;
+    pv->height_in = init->height;
+    pv->width_out = init->width;
+    pv->height_out = init->height;
+    pv->use_dxva = init->use_dxva;
+    pv->title_width = init->title_width;
+    pv->title_height = init->title_height;
+    memcpy( pv->crop, init->crop, sizeof( int[4] ) );
+    if( filter->settings )
+    {
+        sscanf( filter->settings, "%d:%d:%d:%d:%d:%d",
+                &pv->width_out, &pv->height_out,
+                &pv->crop[0], &pv->crop[1], &pv->crop[2], &pv->crop[3] );
+    }
+    // Set init values so the next stage in the pipline
+    // knows what it will be getting
+    init->pix_fmt = pv->pix_fmt;
+    init->width = pv->width_out;
+    init->height = pv->height_out;
+    memcpy( init->crop, pv->crop, sizeof( int[4] ) );
+    pv->os = ( hb_oclscale_t * )malloc( sizeof( hb_oclscale_t ) );
+    memset( pv->os, 0, sizeof( hb_oclscale_t ) );
+    return 0;
+}
+
+static int hb_crop_scale_info( hb_filter_object_t * filter,
+                               hb_filter_info_t * info )
+{
+    hb_filter_private_t * pv = filter->private_data;
+
+    if( !pv )
+        return 0;
+
+    // Set init values so the next stage in the pipline
+    // knows what it will be getting
+    memset( info, 0, sizeof( hb_filter_info_t ) );
+    info->out.pix_fmt = pv->pix_fmt;
+    info->out.width = pv->width_out;
+    info->out.height = pv->height_out;
+    memcpy( info->out.crop, pv->crop, sizeof( int[4] ) );
+
+    int cropped_width = pv->title_width - ( pv->crop[2] + pv->crop[3] );
+    int cropped_height = pv->title_height - ( pv->crop[0] + pv->crop[1] );
+
+    sprintf( info->human_readable_desc,
+             "source: %d * %d, crop (%d/%d/%d/%d): %d * %d, scale: %d * %d",
+             pv->title_width, pv->title_height,
+             pv->crop[0], pv->crop[1], pv->crop[2], pv->crop[3],
+             cropped_width, cropped_height, pv->width_out, pv->height_out );
+
+    return 0;
+}
+
+static void hb_crop_scale_close( hb_filter_object_t * filter )
+{
+    hb_filter_private_t * pv = filter->private_data;
+
+    if( !pv )
+    {
+        return;
+    }
+    if ( pv->os )
+    {
+        CL_FREE( pv->os->h_in_buf );
+        CL_FREE( pv->os->h_out_buf );
+        CL_FREE( pv->os->v_out_buf );
+        CL_FREE( pv->os->h_coeff_y );
+        CL_FREE( pv->os->h_coeff_uv );
+        CL_FREE( pv->os->h_index_y );
+        CL_FREE( pv->os->h_index_uv );
+        CL_FREE( pv->os->v_coeff_y );
+        CL_FREE( pv->os->v_coeff_uv );
+        CL_FREE( pv->os->v_index_y );
+        CL_FREE( pv->os->v_index_uv );
+        free( pv->os );
+    }        
+    free( pv );
+    filter->private_data = NULL;
+}
+
+static uint8_t *copy_plane( uint8_t *dst, uint8_t* src, int dstride, int sstride, int h )
+{
+    if( dstride == sstride )
+    {
+        memcpy( dst, src, dstride * h );
+        return dst + dstride * h;
+    }
+    int lbytes = dstride <= sstride ? dstride : sstride;
+    while( --h >= 0 )
+    {
+        memcpy( dst, src, lbytes );
+        src += sstride;
+        dst += dstride;
+    }
+    return dst;
+}
+
+static hb_buffer_t* crop_scale( hb_filter_private_t * pv, hb_buffer_t * in )
+{
+    AVPicture           pic_in;
+    AVPicture           pic_out;
+    AVPicture           pic_crop;
+    hb_buffer_t * out;
+    out = hb_video_buffer_init( pv->width_out, pv->height_out );
+
+    hb_avpicture_fill( &pic_in, in );
+    hb_avpicture_fill( &pic_out, out );
+
+    // Crop; this alters the pointer to the data to point to the
+    // correct place for cropped frame
+    av_picture_crop( &pic_crop, &pic_in, in->f.fmt,
+                     pv->crop[0], pv->crop[2] );
+
+
+    int w = in->f.width - ( pv->crop[2] + pv->crop[3] );
+    int h = in->f.height - ( pv->crop[0] + pv->crop[1] );
+    uint8_t *tmp_in = malloc( w * h * 3 / 2 );
+    uint8_t *tmp_out = malloc( pv->width_out * pv->height_out * 3 / 2 );
+    if( pic_crop.data[0] || pic_crop.data[1] || pic_crop.data[2] || pic_crop.data[3] )
+    {
+        int i;
+        for( i = 0; i< h>>1; i++ )
+        {
+            memcpy( tmp_in + ( ( i<<1 ) + 0 ) * w, pic_crop.data[0]+ ( ( i<<1 ) + 0 ) * pic_crop.linesize[0], w );
+            memcpy( tmp_in + ( ( i<<1 ) + 1 ) * w, pic_crop.data[0]+ ( ( i<<1 ) + 1 ) * pic_crop.linesize[0], w );
+            memcpy( tmp_in + ( w * h ) + i * ( w>>1 ), pic_crop.data[1] + i * pic_crop.linesize[1], w >> 1 );
+            memcpy( tmp_in + ( w * h ) + ( ( w * h )>>2 ) + i * ( w>>1 ), pic_crop.data[2] + i * pic_crop.linesize[2], w >> 1 );
+        }
+    }
+    else
+    {
+        memcpy( tmp_in, pic_crop.data[0], w * h );
+        memcpy( tmp_in + w * h, pic_crop.data[1], (w*h)>>2 );
+        memcpy( tmp_in + w * h + ((w*h)>>2), pic_crop.data[2], (w*h)>>2 );
+    }
+    hb_ocl_scale( NULL, tmp_in, tmp_out, w, h, out->f.width, out->f.height, pv->os );
+    w = out->plane[0].stride;
+    h = out->plane[0].height;
+    uint8_t *dst = out->plane[0].data;
+    copy_plane( dst, tmp_out, w, pv->width_out, h );
+    w = out->plane[1].stride;
+    h = out->plane[1].height;
+    dst = out->plane[1].data;
+    copy_plane( dst, tmp_out + pv->width_out * pv->height_out, w, pv->width_out>>1, h );
+    w = out->plane[2].stride;
+    h = out->plane[2].height;
+    dst = out->plane[2].data;
+    copy_plane( dst, tmp_out + pv->width_out * pv->height_out +( ( pv->width_out * pv->height_out )>>2 ), w, pv->width_out>>1, h );
+    free( tmp_out );
+    free( tmp_in );
+    out->s = in->s;
+    hb_buffer_move_subs( out, in );
+    return out;
+}
+
+static int hb_crop_scale_work( hb_filter_object_t * filter,
+                               hb_buffer_t ** buf_in,
+                               hb_buffer_t ** buf_out )
+{
+    hb_filter_private_t * pv = filter->private_data;
+    hb_buffer_t * in = *buf_in;
+
+    if( in->size <= 0 )
+    {
+        *buf_out = in;
+        *buf_in = NULL;
+        return HB_FILTER_DONE;
+    }
+
+    if( !pv )
+    {
+        *buf_out = in;
+        *buf_in = NULL;
+        return HB_FILTER_OK;
+    }
+
+    // If width or height were not set, set them now based on the
+    // input width & height
+    if( pv->width_out <= 0 || pv->height_out <= 0 )
+    {
+        pv->width_out = in->f.width - (pv->crop[2] + pv->crop[3]);
+        pv->height_out = in->f.height - (pv->crop[0] + pv->crop[1]);
+    }
+    if( ( in->f.fmt == pv->pix_fmt_out &&
+          !pv->crop[0] && !pv->crop[1] && !pv->crop[2] && !pv->crop[3] &&
+          in->f.width == pv->width_out && in->f.height == pv->height_out ) ||
+        ( pv->use_dxva && in->f.width == pv->width_out && in->f.height == pv->height_out ) )
+    {
+        *buf_out = in;
+        *buf_in = NULL;
+        return HB_FILTER_OK;
+    }
+    *buf_out = crop_scale( pv, in );
+
+
+    return HB_FILTER_OK;
+}
+#endif
diff --git a/libhb/decavcodecaccl.c b/libhb/decavcodecaccl.c
new file mode 100644
index 000000000..5b19ed688
--- /dev/null
+++ b/libhb/decavcodecaccl.c
@@ -0,0 +1,1314 @@
+/* decavcodecaccl.c
+
+   Copyright (c) 2003-2012 HandBrake Team
+   This file is part of the HandBrake source code
+   Homepage: <http://handbrake.fr/>.
+   It may be used under the terms of the GNU General Public License v2.
+   For full terms see the file COPYING file or visit http://www.gnu.org/licenses/gpl-2.0.html
+
+   Authors: Peng Gao <peng@multicorewareinc.com> <http://www.multicorewareinc.com/>
+            Li   Cao <li@multicorewareinc.com> <http://www.multicorewareinc.com/>
+
+ */
+
+/* This module is Handbrake's interface to the ffmpeg decoder library
+   (libavcodec & small parts of libavformat). It contains four Handbrake
+   "work objects":
+
+    decavcodeca connects HB to an ffmpeg audio decoder
+    decavcodecvaccl connects HB to an ffmpeg video decoder
+
+        (Two different routines are needed because the ffmpeg library
+        has different decoder calling conventions for audio & video.
+        These work objects are self-contained & follow all
+        of HB's conventions for a decoder module. They can be used like
+        any other HB decoder (deca52, decmpeg2, etc.).
+
+    These decoders handle 2 kinds of input.  Streams that are demuxed
+    by HandBrake and streams that are demuxed by libavformat.  In the
+    case of streams that are demuxed by HandBrake, there is an extra
+    parse step required that happens in decodeVideo and decavcodecaWork.
+    In the case of streams that are demuxed by libavformat, there is context
+    information that we need from the libavformat.  This information is
+    propagated from hb_stream_open to these decoders through title->opaque_priv.
+
+    A consequence of the above is that the streams that are demuxed by HandBrake
+    *can't* use information from the AVStream because there isn't one - they
+    get their data from either the dvd reader or the mpeg reader, not the ffmpeg
+    stream reader. That means that they have to make up for deficiencies in the
+    AVCodecContext info by using stuff kept in the HB "title" struct. It
+    also means that ffmpeg codecs that randomly scatter state needed by
+    the decoder across both the AVCodecContext & the AVStream (e.g., the
+    VC1 decoder) can't easily be used by the HB mpeg stream reader.
+ */
+#define HAVE_DXVA2
+#ifdef HAVE_DXVA2
+#include "hb.h"
+#include "hbffmpeg.h"
+#include "vadxva2.h"
+#include "audio_remap.h"
+#include "audio_resample.h"
+
+static void compute_frame_duration( hb_work_private_t *pv );
+static void flushDelayQueue( hb_work_private_t *pv );
+
+#define HEAP_SIZE 8
+typedef struct {
+    // there are nheap items on the heap indexed 1..nheap (i.e., top of
+    // heap is 1). The 0th slot is unused - a marker is put there to check
+    // for overwrite errs.
+    int64_t h[HEAP_SIZE+1];
+    int     nheap;
+} pts_heap_t;
+
+struct hb_work_private_s
+{
+    hb_job_t        *job;
+    hb_title_t      *title;
+    AVCodecContext  *context;
+    AVCodecParserContext *parser;
+    int             threads;
+    int             video_codec_opened;
+    hb_list_t       *list;
+    double          duration;   // frame duration (for video)
+    double          field_duration;   // field duration (for video)
+    int             frame_duration_set; // Indicates valid timing was found in stream
+    double          pts_next;   // next pts we expect to generate
+    int64_t         chap_time;  // time of next chap mark (if new_chap != 0)
+    int             new_chap;   // output chapter mark pending
+    uint32_t        nframes;
+    uint32_t        ndrops;
+    uint32_t        decode_errors;
+    int             brokenByMicrosoft; // video stream may contain packed b-frames
+    hb_buffer_t*    delayq[HEAP_SIZE];
+    int             queue_primed;
+    pts_heap_t      pts_heap;
+    void*           buffer;
+    struct SwsContext *sws_context; // if we have to rescale or convert color space
+    int             sws_width;
+    int             sws_height;
+    int             sws_pix_fmt;
+    int cadence[12];
+    int wait_for_keyframe;
+    hb_va_dxva2_t * dxva2;
+    uint8_t *dst_frame;
+    hb_oclscale_t  *os;
+    hb_audio_resample_t *resample;
+};
+
+static hb_buffer_t *link_buf_list( hb_work_private_t *pv );
+
+
+static int64_t heap_pop( pts_heap_t *heap )
+{
+    int64_t result;
+
+    if( heap->nheap <= 0 )
+    {
+        return -1;
+    }
+
+    // return the top of the heap then put the bottom element on top,
+    // decrease the heap size by one & rebalence the heap.
+    result = heap->h[1];
+
+    int64_t v = heap->h[heap->nheap--];
+    int parent = 1;
+    int child = parent << 1;
+    while( child <= heap->nheap )
+    {
+        // find the smallest of the two children of parent
+        if( child < heap->nheap && heap->h[child] > heap->h[child+1] )
+            ++child;
+
+        if( v <= heap->h[child] )
+            // new item is smaller than either child so it's the new parent.
+            break;
+
+        // smallest child is smaller than new item so move it up then
+        // check its children.
+        int64_t hp = heap->h[child];
+        heap->h[parent] = hp;
+        parent = child;
+        child = parent << 1;
+    }
+    heap->h[parent] = v;
+    return result;
+}
+
+static void heap_push( pts_heap_t *heap, int64_t v )
+{
+    if( heap->nheap < HEAP_SIZE )
+    {
+        ++heap->nheap;
+    }
+
+    // stick the new value on the bottom of the heap then bubble it
+    // up to its correct spot.
+    int child = heap->nheap;
+    while( child > 1 ) {
+        int parent = child >> 1;
+        if( heap->h[parent] <= v )
+            break;
+        // move parent down
+        int64_t hp = heap->h[parent];
+        heap->h[child] = hp;
+        child = parent;
+    }
+    heap->h[child] = v;
+}
+
+/***********************************************************************
+ * Close
+ ***********************************************************************
+ *
+ **********************************************************************/
+static void closePrivData( hb_work_private_t ** ppv )
+{
+    hb_work_private_t * pv = *ppv;
+
+    if( pv )
+    {
+        flushDelayQueue( pv );
+
+        if( pv->job && pv->context && pv->context->codec )
+        {
+            hb_log( "%s-decoder done: %u frames, %u decoder errors, %u drops",
+                    pv->context->codec->name, pv->nframes, pv->decode_errors,
+                    pv->ndrops );
+        }
+        if( pv->sws_context )
+        {
+            sws_freeContext( pv->sws_context );
+        }
+        if( pv->parser )
+        {
+            av_parser_close( pv->parser );
+        }
+        if( pv->context && pv->context->codec )
+        {
+            hb_avcodec_close( pv->context );
+        }
+        if( pv->context )
+        {
+            av_free( pv->context );
+        }
+        if( pv->list )
+        {
+            hb_list_empty( &pv->list );
+        }
+
+        hb_audio_resample_free( pv->resample );
+        if ( pv->os )
+        {
+#ifdef USE_OPENCL
+            CL_FREE( pv->os->h_in_buf );
+            CL_FREE( pv->os->h_out_buf );
+            CL_FREE( pv->os->v_out_buf );
+            CL_FREE( pv->os->h_coeff_y );
+            CL_FREE( pv->os->h_coeff_uv );
+            CL_FREE( pv->os->h_index_y );
+            CL_FREE( pv->os->h_index_uv );
+            CL_FREE( pv->os->v_coeff_y );
+            CL_FREE( pv->os->v_coeff_uv );
+            CL_FREE( pv->os->v_index_y );
+            CL_FREE( pv->os->v_index_uv );
+#endif
+            free( pv->os );
+        }
+        if ( pv->dxva2 )
+        {
+
+#ifdef USE_OPENCL
+            CL_FREE( pv->dxva2->cl_mem_nv12 );
+#endif
+            hb_va_close( pv->dxva2 );    
+        }        
+        free( pv );
+    }
+    *ppv = NULL;
+}
+
+/* -------------------------------------------------------------
+ * General purpose video decoder using libavcodec
+ */
+
+static uint8_t *copy_plane( uint8_t *dst, uint8_t* src, int dstride, int sstride,
+                            int h )
+{
+    if( dstride == sstride )
+    {
+        memcpy( dst, src, dstride * h );
+        return dst + dstride * h;
+    }
+    int lbytes = dstride <= sstride ? dstride : sstride;
+    while( --h >= 0 )
+    {
+        memcpy( dst, src, lbytes );
+        src += sstride;
+        dst += dstride;
+    }
+    return dst;
+}
+
+// copy one video frame into an HB buf. If the frame isn't in our color space
+// or at least one of its dimensions is odd, use sws_scale to convert/rescale it.
+// Otherwise just copy the bits.
+static hb_buffer_t *copy_frame( hb_work_private_t *pv, AVFrame *frame )
+{
+    AVCodecContext *context = pv->context;
+
+    int w, h;
+    if( !pv->job )
+    {
+        // HandBrake's video pipeline uses yuv420 color.  This means all
+        // dimensions must be even.  So we must adjust the dimensions
+        // of incoming video if not even.
+        w = context->width & ~1;
+        h = context->height & ~1;
+    }
+    else
+    {
+        w =  pv->job->title->width;
+        h =  pv->job->title->height;
+    }
+    if( pv->dxva2 && pv->job )
+    {
+        hb_buffer_t *buf;
+        int ww, hh;
+        if( (w > pv->job->width || h > pv->job->height) )
+        {
+            buf = hb_video_buffer_init( pv->job->width, pv->job->height );
+            ww = pv->job->width;
+            hh = pv->job->height;
+        }
+        else
+        {
+            buf = hb_video_buffer_init( w, h );
+            ww = w;
+            hh = h;
+        }
+        if( !pv->dst_frame )
+        {
+            pv->dst_frame = malloc( ww * hh * 3 / 2 );
+        }
+        if( hb_va_extract( pv->dxva2, pv->dst_frame, frame, pv->job->width, pv->job->height, pv->job->title->crop, pv->os ) == HB_WORK_ERROR )
+        {
+            hb_log( "hb_va_Extract failed!!!!!!" );
+        }
+
+        w = buf->plane[0].stride;
+        h = buf->plane[0].height;
+        uint8_t *dst = buf->plane[0].data;
+        copy_plane( dst, pv->dst_frame, w, ww, h );
+        w = buf->plane[1].stride;
+        h = buf->plane[1].height;
+        dst = buf->plane[1].data;
+        copy_plane( dst, pv->dst_frame + ww * hh, w, ww>>1, h );
+        w = buf->plane[2].stride;
+        h = buf->plane[2].height;
+        dst = buf->plane[2].data;
+        copy_plane( dst, pv->dst_frame + ww * hh +( ( ww * hh )>>2 ), w, ww>>1, h );
+        return buf;
+    }
+    else
+    {
+        hb_buffer_t *buf = hb_video_buffer_init( w, h );
+        uint8_t *dst = buf->data;
+        if( context->pix_fmt != PIX_FMT_YUV420P || w != context->width ||
+            h != context->height )
+        {
+            // have to convert to our internal color space and/or rescale
+            AVPicture dstpic;
+            hb_avpicture_fill( &dstpic, buf );
+            if( !pv->sws_context ||
+                pv->sws_width != context->width ||
+                pv->sws_height != context->height ||
+                pv->sws_pix_fmt != context->pix_fmt )
+            {
+                if( pv->sws_context )
+                    sws_freeContext( pv->sws_context );
+                pv->sws_context = hb_sws_get_context(
+                    context->width, context->height, context->pix_fmt,
+                    w, h, PIX_FMT_YUV420P,
+                    SWS_LANCZOS|SWS_ACCURATE_RND );
+                pv->sws_width = context->width;
+                pv->sws_height = context->height;
+                pv->sws_pix_fmt = context->pix_fmt;
+            }
+            sws_scale( pv->sws_context, (const uint8_t*const*)frame->data,
+                       frame->linesize, 0, context->height,
+                       dstpic.data, dstpic.linesize );
+        }
+        else
+        {
+            w = buf->plane[0].stride;
+            h = buf->plane[0].height;
+            dst = buf->plane[0].data;
+            copy_plane( dst, frame->data[0], w, frame->linesize[0], h );
+            w = buf->plane[1].stride;
+            h = buf->plane[1].height;
+            dst = buf->plane[1].data;
+            copy_plane( dst, frame->data[1], w, frame->linesize[1], h );
+            w = buf->plane[2].stride;
+            h = buf->plane[2].height;
+            dst = buf->plane[2].data;
+            copy_plane( dst, frame->data[2], w, frame->linesize[2], h );
+        }
+        return buf;
+    }
+
+}
+
+
+static int get_frame_buf( AVCodecContext *context, AVFrame *frame )
+{
+    int result = HB_WORK_ERROR;
+    hb_work_private_t *pv = (hb_work_private_t*)context->opaque;
+    if( pv->dxva2 )
+    {
+        result = hb_va_get_frame_buf( pv->dxva2, context, frame );
+    }
+    if( result==HB_WORK_ERROR )
+        return avcodec_default_get_buffer( context, frame );
+    return 0;
+}
+
+static int reget_frame_buf( AVCodecContext *context, AVFrame *frame )
+{
+    return avcodec_default_reget_buffer( context, frame );
+}
+
+static void log_chapter( hb_work_private_t *pv, int chap_num, int64_t pts )
+{
+    hb_chapter_t *c;
+
+    if( !pv->job )
+        return;
+
+    c = hb_list_item( pv->job->title->list_chapter, chap_num - 1 );
+    if( c && c->title )
+    {
+        hb_log( "%s: \"%s\" (%d) at frame %u time %" PRId64,
+                pv->context->codec->name, c->title, chap_num, pv->nframes, pts );
+    }
+    else
+    {
+        hb_log( "%s: Chapter %d at frame %u time %" PRId64,
+                pv->context->codec->name, chap_num, pv->nframes, pts );
+    }
+}
+
+static void flushDelayQueue( hb_work_private_t *pv )
+{
+    hb_buffer_t *buf;
+    int slot = pv->queue_primed ? pv->nframes & (HEAP_SIZE-1) : 0;
+
+    // flush all the video packets left on our timestamp-reordering delay q
+    while( ( buf = pv->delayq[slot] ) != NULL )
+    {
+        buf->s.start = heap_pop( &pv->pts_heap );
+        hb_list_add( pv->list, buf );
+        pv->delayq[slot] = NULL;
+        slot = ( slot + 1 ) & (HEAP_SIZE-1);
+    }
+}
+
+#define TOP_FIRST PIC_FLAG_TOP_FIELD_FIRST
+#define PROGRESSIVE PIC_FLAG_PROGRESSIVE_FRAME
+#define REPEAT_FIRST PIC_FLAG_REPEAT_FIRST_FIELD
+#define TB 8
+#define BT 16
+#define BT_PROG 32
+#define BTB_PROG 64
+#define TB_PROG 128
+#define TBT_PROG 256
+
+static void checkCadence( int * cadence, uint16_t flags, int64_t start )
+{
+    /*  Rotate the cadence tracking. */
+    int i = 0;
+    for( i = 11; i > 0; i-- )
+    {
+        cadence[i] = cadence[i-1];
+    }
+
+    if( !(flags & PROGRESSIVE) && !(flags & TOP_FIRST) )
+    {
+        /* Not progressive, not top first...
+           That means it's probably bottom
+           first, 2 fields displayed.
+        */
+        //hb_log("MPEG2 Flag: Bottom field first, 2 fields displayed.");
+        cadence[0] = BT;
+    }
+    else if( !(flags & PROGRESSIVE) && (flags & TOP_FIRST) )
+    {
+        /* Not progressive, top is first,
+           Two fields displayed.
+        */
+        //hb_log("MPEG2 Flag: Top field first, 2 fields displayed.");
+        cadence[0] = TB;
+    }
+    else if( (flags & PROGRESSIVE) && !(flags & TOP_FIRST) && !( flags & REPEAT_FIRST )  )
+    {
+        /* Progressive, but noting else.
+           That means Bottom first,
+           2 fields displayed.
+        */
+        //hb_log("MPEG2 Flag: Progressive. Bottom field first, 2 fields displayed.");
+        cadence[0] = BT_PROG;
+    }
+    else if( (flags & PROGRESSIVE) && !(flags & TOP_FIRST) && ( flags & REPEAT_FIRST )  )
+    {
+        /* Progressive, and repeat. .
+           That means Bottom first,
+           3 fields displayed.
+        */
+        //hb_log("MPEG2 Flag: Progressive repeat. Bottom field first, 3 fields displayed.");
+        cadence[0] = BTB_PROG;
+    }
+    else if( (flags & PROGRESSIVE) && (flags & TOP_FIRST) && !( flags & REPEAT_FIRST )  )
+    {
+        /* Progressive, top first.
+           That means top first,
+           2 fields displayed.
+        */
+        //hb_log("MPEG2 Flag: Progressive. Top field first, 2 fields displayed.");
+        cadence[0] = TB_PROG;
+    }
+    else if( (flags & PROGRESSIVE) && (flags & TOP_FIRST) && ( flags & REPEAT_FIRST )  )
+    {
+        /* Progressive, top, repeat.
+           That means top first,
+           3 fields displayed.
+        */
+        //hb_log("MPEG2 Flag: Progressive repeat. Top field first, 3 fields displayed.");
+        cadence[0] = TBT_PROG;
+    }
+
+    if( (cadence[2] <= TB) && (cadence[1] <= TB) && (cadence[0] > TB) && (cadence[11]) )
+        hb_log( "%fs: Video -> Film", (float)start / 90000 );
+    if( (cadence[2] > TB) && (cadence[1] <= TB) && (cadence[0] <= TB) && (cadence[11]) )
+        hb_log( "%fs: Film -> Video", (float)start / 90000 );
+}
+
+/*
+ * Decodes a video frame from the specified raw packet data
+ *      ('data', 'size', 'sequence').
+ * The output of this function is stored in 'pv->list', which contains a list
+ * of zero or more decoded packets.
+ *
+ * The returned packets are guaranteed to have their timestamps in the correct
+ * order, even if the original packets decoded by libavcodec have misordered
+ * timestamps, due to the use of 'packed B-frames'.
+ *
+ * Internally the set of decoded packets may be buffered in 'pv->delayq'
+ * until enough packets have been decoded so that the timestamps can be
+ * correctly rewritten, if this is necessary.
+ */
+static int decodeFrame( hb_work_object_t *w, uint8_t *data, int size, int sequence, int64_t pts, int64_t dts, uint8_t frametype )
+{
+
+    hb_work_private_t *pv = w->private_data;
+    int got_picture, oldlevel = 0;
+    AVFrame frame;
+    AVPacket avp;
+    if( global_verbosity_level <= 1 )
+    {
+        oldlevel = av_log_get_level();
+        av_log_set_level( AV_LOG_QUIET );
+    }
+
+    av_init_packet( &avp );
+
+    avp.data = data;
+    avp.size = size;
+    avp.pts = pts;
+    avp.dts = dts;
+
+    /*
+     * libav avcodec_decode_video2() needs AVPacket flagged with AV_PKT_FLAG_KEY
+     * for some codecs. For example, sequence of PNG in a mov container.
+     */ 
+    if ( frametype & HB_FRAME_KEY )
+    {
+        avp.flags |= AV_PKT_FLAG_KEY;
+    }	
+    if( avcodec_decode_video2( pv->context, &frame, &got_picture, &avp ) < 0 )
+    {
+        ++pv->decode_errors;
+    }
+    if( global_verbosity_level <= 1 )
+    {
+        av_log_set_level( oldlevel );
+    }
+
+    if( got_picture && pv->wait_for_keyframe > 0 )
+    {
+        // Libav is inconsistant about how it flags keyframes.  For many
+        // codecs it simply sets frame.key_frame.  But for others, it only
+        // sets frame.pict_type. And for yet others neither gets set at all
+        // (qtrle).
+        int key = frame.key_frame ||
+                  ( w->codec_param != CODEC_ID_H264 &&
+                    ( frame.pict_type == AV_PICTURE_TYPE_I ||
+                      frame.pict_type == 0 ) );
+        if( !key )
+        {
+            pv->wait_for_keyframe--;
+            return 0;
+        }
+        pv->wait_for_keyframe = 0;
+    }
+
+    if( got_picture )
+    {
+
+        uint16_t flags = 0;
+
+        // ffmpeg makes it hard to attach a pts to a frame. if the MPEG ES
+        // packet had a pts we handed it to av_parser_parse (if the packet had
+        // no pts we set it to AV_NOPTS_VALUE, but before the parse we can't
+        // distinguish between the start of a video frame with no pts & an
+        // intermediate packet of some frame which never has a pts). we hope
+        // that when parse returns the frame to us the pts we originally
+        // handed it will be in parser->pts. we put this pts into avp.pts so
+        // that when avcodec_decode_video finally gets around to allocating an
+        // AVFrame to hold the decoded frame, avcodec_default_get_buffer can
+        // stuff that pts into the it. if all of these relays worked at this
+        // point frame.pts should hold the frame's pts from the original data
+        // stream or AV_NOPTS_VALUE if it didn't have one. in the latter case
+        // we generate the next pts in sequence for it.
+        if( !pv->frame_duration_set )
+            compute_frame_duration( pv );
+
+        double frame_dur = pv->duration;
+        if( frame.repeat_pict )
+        {
+            frame_dur += frame.repeat_pict * pv->field_duration;
+        }
+
+
+        if( pv->dxva2 && pv->dxva2->do_job==HB_WORK_OK )
+        {
+            if( avp.pts>0 )
+            {
+                if( pv->dxva2->input_pts[0]!=0 && pv->dxva2->input_pts[1]==0 )
+                    frame.pkt_pts = pv->dxva2->input_pts[0];
+                else
+                    frame.pkt_pts = pv->dxva2->input_pts[0]<pv->dxva2->input_pts[1] ? pv->dxva2->input_pts[0] : pv->dxva2->input_pts[1];
+            }
+        }
+        // If there was no pts for this frame, assume constant frame rate
+        // video & estimate the next frame time from the last & duration.
+        double pts;
+        if( frame.pkt_pts == AV_NOPTS_VALUE )
+        {
+            pts = pv->pts_next;
+        }
+        else
+        {
+            pts = frame.pkt_pts;
+        }
+        pv->pts_next = pts + frame_dur;
+
+        if( frame.top_field_first )
+        {
+            flags |= PIC_FLAG_TOP_FIELD_FIRST;
+        }
+        if( !frame.interlaced_frame )
+        {
+            flags |= PIC_FLAG_PROGRESSIVE_FRAME;
+        }
+        if( frame.repeat_pict == 1 )
+        {
+            flags |= PIC_FLAG_REPEAT_FIRST_FIELD;
+        }
+        if( frame.repeat_pict == 2 )
+        {
+            flags |= PIC_FLAG_REPEAT_FRAME;
+        }
+
+
+
+        hb_buffer_t *buf;
+
+        // if we're doing a scan or this content couldn't have been broken
+        // by Microsoft we don't worry about timestamp reordering
+        if( !pv->job || !pv->brokenByMicrosoft )
+        {
+            buf = copy_frame( pv, &frame );
+            buf->s.start = pts;
+            buf->sequence = sequence;
+
+            buf->s.flags = flags;
+
+            if( pv->new_chap && buf->s.start >= pv->chap_time )
+            {
+                buf->s.new_chap = pv->new_chap;
+                log_chapter( pv, pv->new_chap, buf->s.start );
+                pv->new_chap = 0;
+                pv->chap_time = 0;
+            }
+            else if( pv->nframes == 0 && pv->job )
+            {
+                log_chapter( pv, pv->job->chapter_start, buf->s.start );
+            }
+            checkCadence( pv->cadence, flags, buf->s.start );
+            hb_list_add( pv->list, buf );
+            ++pv->nframes;
+            return got_picture;
+        }
+
+        // XXX This following probably addresses a libavcodec bug but I don't
+        //     see an easy fix so we workaround it here.
+        //
+        // The M$ 'packed B-frames' atrocity results in decoded frames with
+        // the wrong timestamp. E.g., if there are 2 b-frames the timestamps
+        // we see here will be "2 3 1 5 6 4 ..." instead of "1 2 3 4 5 6".
+        // The frames are actually delivered in the right order but with
+        // the wrong timestamp. To get the correct timestamp attached to
+        // each frame we have a delay queue (longer than the max number of
+        // b-frames) & a sorting heap for the timestamps. As each frame
+        // comes out of the decoder the oldest frame in the queue is removed
+        // and associated with the smallest timestamp. Then the new frame is
+        // added to the queue & its timestamp is pushed on the heap.
+        // This does nothing if the timestamps are correct (i.e., the video
+        // uses a codec that Micro$oft hasn't broken yet) but the frames
+        // get timestamped correctly even when M$ has munged them.
+
+        // remove the oldest picture from the frame queue (if any) &
+        // give it the smallest timestamp from our heap. The queue size
+        // is a power of two so we get the slot of the oldest by masking
+        // the frame count & this will become the slot of the newest
+        // once we've removed & processed the oldest.
+        int slot = pv->nframes & (HEAP_SIZE-1);
+        if( ( buf = pv->delayq[slot] ) != NULL )
+        {
+            pv->queue_primed = 1;
+            buf->s.start = heap_pop( &pv->pts_heap );
+
+            if( pv->new_chap && buf->s.start >= pv->chap_time )
+            {
+                buf->s.new_chap = pv->new_chap;
+                log_chapter( pv, pv->new_chap, buf->s.start );
+                pv->new_chap = 0;
+                pv->chap_time = 0;
+            }
+            else if( pv->nframes == 0 && pv->job )
+            {
+                log_chapter( pv, pv->job->chapter_start, buf->s.start );
+            }
+            checkCadence( pv->cadence, buf->s.flags, buf->s.start );
+            hb_list_add( pv->list, buf );
+        }
+
+        // add the new frame to the delayq & push its timestamp on the heap
+        buf = copy_frame( pv, &frame );
+        buf->sequence = sequence;
+        /* Store picture flags for later use by filters */
+        buf->s.flags = flags;
+        pv->delayq[slot] = buf;
+        heap_push( &pv->pts_heap, pts );
+
+        ++pv->nframes;
+    }
+
+    return got_picture;
+}
+static void decodeVideo( hb_work_object_t *w, uint8_t *data, int size, int sequence, int64_t pts, int64_t dts, uint8_t frametype )
+{
+    hb_work_private_t *pv = w->private_data;
+
+    /*
+     * The following loop is a do..while because we need to handle both
+     * data & the flush at the end (signaled by size=0). At the end there's
+     * generally a frame in the parser & one or more frames in the decoder
+     * (depending on the bframes setting).
+     */
+    int pos = 0;
+    do {
+        uint8_t *pout;
+        int pout_len, len;
+        int64_t parser_pts, parser_dts;
+        if( pv->parser )
+        {
+            len = av_parser_parse2( pv->parser, pv->context, &pout, &pout_len,
+                                    data + pos, size - pos, pts, dts, 0 );
+            parser_pts = pv->parser->pts;
+            parser_dts = pv->parser->dts;
+        }
+        else
+        {
+            pout = data;
+            len = pout_len = size;
+            parser_pts = pts;
+            parser_dts = dts;
+        }
+        pos += len;
+
+        if( pout_len > 0 )
+        {
+            decodeFrame( w, pout, pout_len, sequence, parser_pts, parser_dts, frametype );
+        }
+    } while( pos < size );
+
+    /* the stuff above flushed the parser, now flush the decoder */
+    if( size <= 0 )
+    {
+        while( decodeFrame( w, NULL, 0, sequence, AV_NOPTS_VALUE, AV_NOPTS_VALUE, 0 ) )
+        {
+        }
+        flushDelayQueue( pv );
+    }
+    return;
+}
+
+/*
+ * Removes all packets from 'pv->list', links them together into
+ * a linked-list, and returns the first packet in the list.
+ */
+static hb_buffer_t *link_buf_list( hb_work_private_t *pv )
+{
+    hb_buffer_t *head = hb_list_item( pv->list, 0 );
+
+    if( head )
+    {
+        hb_list_rem( pv->list, head );
+
+        hb_buffer_t *last = head, *buf;
+
+        while( ( buf = hb_list_item( pv->list, 0 ) ) != NULL )
+        {
+            hb_list_rem( pv->list, buf );
+            last->next = buf;
+            last = buf;
+        }
+    }
+    return head;
+}
+static void hb_ffmpeg_release_frame_buf( struct AVCodecContext *p_context, AVFrame *frame )
+{
+    hb_work_private_t *p_dec = (hb_work_private_t*)p_context->opaque;
+    int i;
+    if( p_dec->dxva2 )
+    {
+        hb_va_release( p_dec->dxva2, frame );
+    }
+    else if( !frame->opaque )
+    {
+        if( frame->type == FF_BUFFER_TYPE_INTERNAL )
+            avcodec_default_release_buffer( p_context, frame );
+    }
+    for( i = 0; i < 4; i++ )
+        frame->data[i] = NULL;
+}
+
+static void init_video_avcodec_context( hb_work_private_t *pv )
+{
+    /* we have to wrap ffmpeg's get_buffer to be able to set the pts (?!) */
+    pv->context->opaque = pv;
+    pv->context->get_buffer = get_frame_buf;
+    pv->context->reget_buffer = reget_frame_buf;
+    if( pv->dxva2 && pv->dxva2->do_job==HB_WORK_OK )
+        pv->context->release_buffer = hb_ffmpeg_release_frame_buf;
+}
+
+static int decavcodecvInit( hb_work_object_t * w, hb_job_t * job )
+{
+
+    hb_work_private_t *pv = calloc( 1, sizeof( hb_work_private_t ) );
+
+    w->private_data = pv;
+    pv->wait_for_keyframe = 60;
+    pv->job   = job;
+    if( job )
+        pv->title = job->title;
+    else
+        pv->title = w->title;
+    pv->list = hb_list_init();
+
+    if( pv->job && pv->job->title )
+    {
+        if( !pv->job->title->has_resolution_change && w->codec_param != CODEC_ID_PRORES )
+        {
+            pv->threads = HB_FFMPEG_THREADS_AUTO;
+        }
+    }
+
+    if( pv->title->opaque_priv )
+    {
+	
+        AVFormatContext *ic = (AVFormatContext*)pv->title->opaque_priv;
+        AVCodec *codec = avcodec_find_decoder( w->codec_param );
+        if( codec == NULL )
+        {
+            hb_log( "decavcodecvInit: failed to find codec for id (%d)", w->codec_param );
+            return 1;
+        }
+        pv->context = avcodec_alloc_context3( codec );
+        avcodec_copy_context( pv->context, ic->streams[pv->title->video_id]->codec );
+        pv->context->workaround_bugs = FF_BUG_AUTODETECT;
+        // Depricated but still used by Libav (twits!)
+        pv->context->err_recognition = AV_EF_CRCCHECK;
+        pv->context->error_concealment = FF_EC_GUESS_MVS|FF_EC_DEBLOCK;
+        if( ((w->codec_param==CODEC_ID_H264) 
+             || (w->codec_param==CODEC_ID_MPEG2VIDEO)
+             || (w->codec_param==CODEC_ID_VC1)
+             || (w->codec_param==CODEC_ID_WMV3) 
+             || (w->codec_param==CODEC_ID_MPEG4)) 
+             && pv->job )
+        {
+            pv->dxva2 = hb_va_create_dxva2( pv->dxva2, w->codec_param );
+            if( pv->dxva2 && pv->dxva2->do_job==HB_WORK_OK )
+            {
+                hb_va_new_dxva2( pv->dxva2, pv->context );
+                init_video_avcodec_context( pv );
+                pv->context->get_format = hb_ffmpeg_get_format;
+                pv->os = ( hb_oclscale_t * )malloc( sizeof( hb_oclscale_t ) );
+                memset( pv->os, 0, sizeof( hb_oclscale_t ) );
+                pv->threads = 1;
+
+            }
+        }
+        if( hb_avcodec_open( pv->context, codec, NULL, pv->threads ) )
+        {
+            hb_log( "decavcodecvInit: avcodec_open failed" );
+            return 1;
+        }
+        pv->video_codec_opened = 1;
+        // avi, mkv and possibly mp4 containers can contain the M$ VFW packed
+        // b-frames abortion that messes up frame ordering and timestamps.
+        // XXX ffmpeg knows which streams are broken but doesn't expose the
+        //     info externally. We should patch ffmpeg to add a flag to the
+        //     codec context for this but until then we mark all ffmpeg streams
+        //     as suspicious.
+        pv->brokenByMicrosoft = 1;
+    }
+    else
+    {
+        AVCodec *codec = avcodec_find_decoder( w->codec_param );
+        pv->parser = av_parser_init( w->codec_param );
+        pv->context = avcodec_alloc_context3( codec );
+        pv->context->workaround_bugs = FF_BUG_AUTODETECT;
+        // Depricated but still used by Libav (twits!)
+        pv->context->err_recognition = AV_EF_CRCCHECK;
+        pv->context->error_concealment = FF_EC_GUESS_MVS|FF_EC_DEBLOCK;
+        init_video_avcodec_context( pv );
+    }
+    return 0;
+}
+
+
+static int next_hdr( hb_buffer_t *in, int offset )
+{
+    uint8_t *dat = in->data;
+    uint16_t last2 = 0xffff;
+    for( ; in->size - offset > 1; ++offset )
+    {
+        if( last2 == 0 && dat[offset] == 0x01 )
+            // found an mpeg start code
+            return offset - 2;
+
+        last2 = ( last2 << 8 ) | dat[offset];
+    }
+
+    return -1;
+}
+
+static int find_hdr( hb_buffer_t *in, int offset, uint8_t hdr_type )
+{
+    if( in->size - offset < 4 )
+        // not enough room for an mpeg start code
+        return -1;
+
+    for( ; ( offset = next_hdr( in, offset ) ) >= 0; ++offset )
+    {
+        if( in->data[offset+3] == hdr_type )
+            // found it
+            break;
+    }
+    return offset;
+}
+
+static int setup_extradata( hb_work_object_t *w, hb_buffer_t *in )
+{
+    hb_work_private_t *pv = w->private_data;
+
+    // we can't call the avstream funcs but the read_header func in the
+    // AVInputFormat may set up some state in the AVContext. In particular
+    // vc1t_read_header allocates 'extradata' to deal with header issues
+    // related to Microsoft's bizarre engineering notions. We alloc a chunk
+    // of space to make vc1 work then associate the codec with the context.
+    if( w->codec_param != CODEC_ID_VC1 )
+    {
+        // we haven't been inflicted with M$ - allocate a little space as
+        // a marker and return success.
+        pv->context->extradata_size = 0;
+        // av_malloc uses posix_memalign which is allowed to
+        // return NULL when allocating 0 bytes.  We use extradata == NULL
+        // to trigger initialization of extradata and the decoder, so
+        // we can not set it to NULL here. So allocate a small
+        // buffer instead.
+        pv->context->extradata = av_malloc( 1 );
+        return 0;
+    }
+
+    // find the start and and of the sequence header
+    int shdr, shdr_end;
+    if( ( shdr = find_hdr( in, 0, 0x0f ) ) < 0 )
+    {
+        // didn't find start of seq hdr
+        return 1;
+    }
+    if( ( shdr_end = next_hdr( in, shdr + 4 ) ) < 0 )
+    {
+        shdr_end = in->size;
+    }
+    shdr_end -= shdr;
+
+    // find the start and and of the entry point header
+    int ehdr, ehdr_end;
+    if( ( ehdr = find_hdr( in, 0, 0x0e ) ) < 0 )
+    {
+        // didn't find start of entry point hdr
+        return 1;
+    }
+    if( ( ehdr_end = next_hdr( in, ehdr + 4 ) ) < 0 )
+    {
+        ehdr_end = in->size;
+    }
+    ehdr_end -= ehdr;
+
+    // found both headers - allocate an extradata big enough to hold both
+    // then copy them into it.
+    pv->context->extradata_size = shdr_end + ehdr_end;
+    pv->context->extradata = av_malloc( pv->context->extradata_size + 8 );
+    memcpy( pv->context->extradata, in->data + shdr, shdr_end );
+    memcpy( pv->context->extradata + shdr_end, in->data + ehdr, ehdr_end );
+    memset( pv->context->extradata + shdr_end + ehdr_end, 0, 8 );
+    return 0;
+}
+
+static int decavcodecvWork( hb_work_object_t * w, hb_buffer_t ** buf_in,
+                            hb_buffer_t ** buf_out )
+{
+    hb_work_private_t *pv = w->private_data;
+    hb_buffer_t *in = *buf_in;
+    int64_t pts = AV_NOPTS_VALUE;
+    int64_t dts = pts;
+    *buf_in = NULL;
+    *buf_out = NULL;
+
+    /* if we got an empty buffer signaling end-of-stream send it downstream */
+    if( in->size == 0 )
+    {
+        if( pv->context->codec != NULL )
+        {
+            decodeVideo( w, in->data, in->size, in->sequence, pts, dts, in->s.frametype );
+        }
+        hb_list_add( pv->list, in );
+        *buf_out = link_buf_list( pv );
+        return HB_WORK_DONE;
+    }
+
+    // if this is the first frame open the codec (we have to wait for the
+    // first frame because of M$ VC1 braindamage).
+    if( !pv->video_codec_opened )
+    {
+        AVCodec *codec = avcodec_find_decoder( w->codec_param );
+        if( codec == NULL )
+        {
+            hb_log( "decavcodecvWork: failed to find codec for id (%d)", w->codec_param );
+            *buf_out = hb_buffer_init( 0 );;
+            return HB_WORK_DONE;
+        }
+        avcodec_get_context_defaults3( pv->context, codec );
+        init_video_avcodec_context( pv );
+        if( setup_extradata( w, in ) )
+        {
+            // we didn't find the headers needed to set up extradata.
+            // the codec will abort if we open it so just free the buf
+            // and hope we eventually get the info we need.
+            hb_buffer_close( &in );
+            return HB_WORK_OK;
+        }
+        // disable threaded decoding for scan, can cause crashes
+        if( hb_avcodec_open( pv->context, codec, NULL, pv->threads ) )
+        {
+            hb_log( "decavcodecvWork: avcodec_open failed" );
+            *buf_out = hb_buffer_init( 0 );;
+            return HB_WORK_DONE;
+        }
+        pv->video_codec_opened = 1;
+    }
+
+    if( in->s.start >= 0 )
+    {
+        pts = in->s.start;
+        dts = in->s.renderOffset;
+    }
+    if( in->s.new_chap )
+    {
+        pv->new_chap = in->s.new_chap;
+        pv->chap_time = pts >= 0 ? pts : pv->pts_next;
+    }
+    if( pv->dxva2 && pv->dxva2->do_job==HB_WORK_OK )
+    {
+        if( pv->dxva2->input_pts[0]<=pv->dxva2->input_pts[1] )
+            pv->dxva2->input_pts[0] = pts;
+        else if( pv->dxva2->input_pts[0]>pv->dxva2->input_pts[1] )
+            pv->dxva2->input_pts[1] = pts;
+        pv->dxva2->input_dts = dts;
+    }
+    decodeVideo( w, in->data, in->size, in->sequence, pts, dts, in->s.frametype );
+    hb_buffer_close( &in );
+    *buf_out = link_buf_list( pv );
+    return HB_WORK_OK;
+}
+static void compute_frame_duration( hb_work_private_t *pv )
+{
+    double duration = 0.;
+    int64_t max_fps = 64L;
+
+    // context->time_base may be in fields, so set the max *fields* per second
+    if( pv->context->ticks_per_frame > 1 )
+        max_fps *= pv->context->ticks_per_frame;
+
+    if( pv->title->opaque_priv )
+    {
+        // If ffmpeg is demuxing for us, it collects some additional
+        // information about framerates that is often more accurate
+        // than context->time_base.
+        AVFormatContext *ic = (AVFormatContext*)pv->title->opaque_priv;
+        AVStream *st = ic->streams[pv->title->video_id];
+        if( st->nb_frames && st->duration )
+        {
+            // compute the average frame duration from the total number
+            // of frames & the total duration.
+            duration = ( (double)st->duration * (double)st->time_base.num ) /
+                       ( (double)st->nb_frames * (double)st->time_base.den );
+        }
+        else
+        {
+            // XXX We don't have a frame count or duration so try to use the
+            // far less reliable time base info in the stream.
+            // Because the time bases are so screwed up, we only take values
+            // in the range 8fps - 64fps.
+            AVRational *tb = NULL;
+            if( st->avg_frame_rate.den * 64L > st->avg_frame_rate.num &&
+                st->avg_frame_rate.num > st->avg_frame_rate.den * 8L )
+            {
+                tb = &(st->avg_frame_rate);
+                duration =  (double)tb->den / (double)tb->num;
+            }
+            else if( st->time_base.num * 64L > st->time_base.den &&
+                     st->time_base.den > st->time_base.num * 8L )
+            {
+                tb = &(st->time_base);
+                duration =  (double)tb->num / (double)tb->den;
+            }
+            else if( st->r_frame_rate.den * 64L > st->r_frame_rate.num &&
+                     st->r_frame_rate.num > st->r_frame_rate.den * 8L )
+            {
+                tb = &(st->r_frame_rate);
+                duration =  (double)tb->den / (double)tb->num;
+            }
+        }
+        if( !duration &&
+            pv->context->time_base.num * max_fps > pv->context->time_base.den &&
+            pv->context->time_base.den > pv->context->time_base.num * 8L )
+        {
+            duration =  (double)pv->context->time_base.num /
+                       (double)pv->context->time_base.den;
+            if( pv->context->ticks_per_frame > 1 )
+            {
+                // for ffmpeg 0.5 & later, the H.264 & MPEG-2 time base is
+                // field rate rather than frame rate so convert back to frames.
+                duration *= pv->context->ticks_per_frame;
+            }
+        }
+    }
+    else
+    {
+        if( pv->context->time_base.num * max_fps > pv->context->time_base.den &&
+            pv->context->time_base.den > pv->context->time_base.num * 8L )
+        {
+            duration =  (double)pv->context->time_base.num /
+                       (double)pv->context->time_base.den;
+            if( pv->context->ticks_per_frame > 1 )
+            {
+                // for ffmpeg 0.5 & later, the H.264 & MPEG-2 time base is
+                // field rate rather than frame rate so convert back to frames.
+                duration *= pv->context->ticks_per_frame;
+            }
+        }
+    }
+    if( duration == 0 )
+    {
+        // No valid timing info found in the stream, so pick some value
+        duration = 1001. / 24000.;
+    }
+    else
+    {
+        pv->frame_duration_set = 1;
+    }
+    pv->duration = duration * 90000.;
+    pv->field_duration = pv->duration;
+    if( pv->context->ticks_per_frame > 1 )
+    {
+        pv->field_duration /= pv->context->ticks_per_frame;
+    }
+}
+
+static int decavcodecvInfo( hb_work_object_t *w, hb_work_info_t *info )
+{
+    hb_work_private_t *pv = w->private_data;
+
+    memset( info, 0, sizeof(*info) );
+
+    info->bitrate = pv->context->bit_rate;
+    // HandBrake's video pipeline uses yuv420 color.  This means all
+    // dimensions must be even.  So we must adjust the dimensions
+    // of incoming video if not even.
+    info->width = pv->context->width & ~1;
+    info->height = pv->context->height & ~1;
+
+    info->pixel_aspect_width = pv->context->sample_aspect_ratio.num;
+    info->pixel_aspect_height = pv->context->sample_aspect_ratio.den;
+
+    compute_frame_duration( pv );
+    info->rate = 27000000;
+    info->rate_base = pv->duration * 300.;
+
+    info->profile = pv->context->profile;
+    info->level = pv->context->level;
+    info->name = pv->context->codec->name;
+
+    switch( pv->context->color_primaries )
+    {
+        case AVCOL_PRI_BT709:
+            info->color_prim = HB_COLR_PRI_BT709;
+            break;
+        case AVCOL_PRI_BT470BG:
+            info->color_prim = HB_COLR_PRI_EBUTECH;
+            break;
+        case AVCOL_PRI_BT470M:
+        case AVCOL_PRI_SMPTE170M:
+        case AVCOL_PRI_SMPTE240M:
+            info->color_prim = HB_COLR_PRI_SMPTEC;
+            break;
+        default:
+        {
+            if( ( info->width >= 1280 || info->height >= 720 ) ||
+                ( info->width >   720 && info->height >  576 ) )
+                // ITU BT.709 HD content
+                info->color_prim = HB_COLR_PRI_BT709;
+            else if( info->rate_base == 1080000 )
+                // ITU BT.601 DVD or SD TV content (PAL)
+                info->color_prim = HB_COLR_PRI_EBUTECH;
+            else
+                // ITU BT.601 DVD or SD TV content (NTSC)
+                info->color_prim = HB_COLR_PRI_SMPTEC;
+            break;
+        }
+    }
+
+    /* AVCOL_TRC_BT709 -> HB_COLR_TRA_BT709
+     * AVCOL_TRC_GAMMA22 (bt470m) -> HB_COLR_TRA_BT709
+     * AVCOL_TRC_GAMMA28 (bt470bg) -> HB_COLR_TRA_BT709
+     * AVCOL_TRC_UNSPECIFIED, AVCOL_TRC_NB:
+     * -> ITU BT.709 -> HB_COLR_TRA_BT709
+     * -> ITU BT.601 -> HB_COLR_TRA_BT709
+     * TODO: AVCOL_TRC_SMPTE240M -> HB_COLR_TRA_SMPTE240M but it's not yet in Libav */
+    info->color_transfer = HB_COLR_TRA_BT709;
+
+    switch( pv->context->colorspace )
+    {
+        case AVCOL_SPC_BT709:
+            info->color_matrix = HB_COLR_MAT_BT709;
+            break;
+        case AVCOL_SPC_FCC:
+        case AVCOL_SPC_BT470BG:
+        case AVCOL_SPC_SMPTE170M:
+        case AVCOL_SPC_RGB: // libswscale rgb2yuv
+            info->color_matrix = HB_COLR_MAT_SMPTE170M;
+            break;
+        case AVCOL_SPC_SMPTE240M:
+            info->color_matrix = HB_COLR_MAT_SMPTE240M;
+            break;
+        default:
+        {
+            if( ( info->width >= 1280 || info->height >= 720 ) ||
+                ( info->width >   720 && info->height >  576 ) )
+                // ITU BT.709 HD content
+                info->color_matrix = HB_COLR_MAT_BT709;
+            else
+                // ITU BT.601 DVD or SD TV content (PAL)
+                // ITU BT.601 DVD or SD TV content (NTSC)
+                info->color_matrix = HB_COLR_MAT_SMPTE170M;
+            break;
+        }
+    }
+
+    return 1;
+}
+
+static int decavcodecvBSInfo( hb_work_object_t *w, const hb_buffer_t *buf,
+                              hb_work_info_t *info )
+{
+    return 0;
+}
+
+static void decavcodecvFlush( hb_work_object_t *w )
+{
+    hb_work_private_t *pv = w->private_data;
+
+    if( pv->context->codec )
+    {
+        flushDelayQueue( pv );
+        hb_buffer_t *buf = link_buf_list( pv );
+        hb_buffer_close( &buf );
+        if( pv->title->opaque_priv == NULL )
+        {
+            pv->video_codec_opened = 0;
+            hb_avcodec_close( pv->context );
+            if( pv->parser )
+            {
+                av_parser_close( pv->parser );
+            }
+            pv->parser = av_parser_init( w->codec_param );
+        }
+        else
+        {
+            avcodec_flush_buffers( pv->context );
+        }
+    }
+    pv->wait_for_keyframe = 60;
+}
+
+static void decavcodecClose( hb_work_object_t * w )
+{
+    hb_work_private_t * pv = w->private_data;
+    if( pv->dst_frame ) free( pv->dst_frame );
+    if( pv )
+    {
+        closePrivData( &pv );
+        w->private_data = NULL;
+    }
+}
+
+hb_work_object_t hb_decavcodecv_accl =
+{
+    .id = WORK_DECAVCODECVACCL,
+    .name = "Video hardware decoder (libavcodec)",
+    .init = decavcodecvInit,
+    .work = decavcodecvWork,
+    .close = decavcodecClose,
+    .flush = decavcodecvFlush,
+    .info = decavcodecvInfo,
+    .bsinfo = decavcodecvBSInfo
+};
+
+#endif
diff --git a/libhb/dxva2api.c b/libhb/dxva2api.c
new file mode 100644
index 000000000..04011c0c5
--- /dev/null
+++ b/libhb/dxva2api.c
@@ -0,0 +1,36 @@
+/* dxva2api.c
+
+   Copyright (c) 2003-2012 HandBrake Team
+   This file is part of the HandBrake source code
+   Homepage: <http://handbrake.fr/>.
+   It may be used under the terms of the GNU General Public License v2.
+   For full terms see the file COPYING file or visit http://www.gnu.org/licenses/gpl-2.0.html
+
+   Authors: Peng Gao <peng@multicorewareinc.com> <http://www.multicorewareinc.com/>
+            Li   Cao <li@multicorewareinc.com> <http://www.multicorewareinc.com/>
+
+ */
+#include "dxva2api.h"
+
+__inline float hb_dx_fixedtofloat( const DXVA2_Fixed32 _fixed_ )
+{
+    return (FLOAT)_fixed_.Value + (FLOAT)_fixed_.Fraction / 0x10000;
+}
+
+__inline const DXVA2_Fixed32 hb_dx_fixed32_opaque_alpha()
+{
+    DXVA2_Fixed32 _fixed_;
+    _fixed_.Fraction = 0;
+    _fixed_.Value = 0;
+    _fixed_.ll = 1;
+    return _fixed_;
+}
+
+
+__inline DXVA2_Fixed32 hb_dx_floattofixed( const float _float_ )
+{
+    DXVA2_Fixed32 _fixed_;
+    _fixed_.Fraction = LOWORD( _float_ * 0x10000 );
+    _fixed_.Value = HIWORD( _float_ * 0x10000 );
+    return _fixed_;
+}
diff --git a/libhb/dxva2api.h b/libhb/dxva2api.h
new file mode 100644
index 000000000..dc5909477
--- /dev/null
+++ b/libhb/dxva2api.h
@@ -0,0 +1,822 @@
+/* dxva2api.h
+
+   Copyright (c) 2003-2012 HandBrake Team
+   This file is part of the HandBrake source code
+   Homepage: <http://handbrake.fr/>.
+   It may be used under the terms of the GNU General Public License v2.
+   For full terms see the file COPYING file or visit http://www.gnu.org/licenses/gpl-2.0.html
+
+   Authors: Peng Gao <peng@multicorewareinc.com> <http://www.multicorewareinc.com/>
+            Li   Cao <li@multicorewareinc.com> <http://www.multicorewareinc.com/>
+
+ */
+
+
+#ifndef _DXVA2API_H
+#define _DXVA2API_H
+
+#define MINGW_DXVA2API_H_VERSION (2)
+
+#if __GNUC__ >=3
+#pragma GCC system_header
+#endif
+
+#include <objbase.h>
+#include <d3d9.h>
+
+/* Define it to allow using nameless struct/union (non C99 compliant) to match
+ * the official documentation. */
+//#define DXVA2API_USE_BITFIELDS
+
+/****************STRUCTURES******************/
+#pragma pack(push, 1)
+
+#define DXVA2API_USE_BITFIELDS
+
+typedef struct _DXVA2_ExtendedFormat {
+#ifdef DXVA2API_USE_BITFIELDS
+    union {
+        struct {
+            UINT SampleFormat           : 8;
+            UINT VideoChromaSubsampling : 4;
+            UINT NominalRange           : 3;
+            UINT VideoTransferMatrix    : 3;
+            UINT VideoLighting          : 4;
+            UINT VideoPrimaries         : 5;
+            UINT VideoTransferFunction  : 5;
+        };
+        UINT value;
+    };
+#else
+    UINT value;
+#endif
+} DXVA2_ExtendedFormat;
+
+typedef struct _DXVA2_Frequency {
+    UINT Numerator;
+    UINT Denominator;
+} DXVA2_Frequency;
+
+typedef struct _DXVA2_VideoDesc {
+    UINT SampleWidth;
+    UINT SampleHeight;
+    DXVA2_ExtendedFormat SampleFormat;
+    D3DFORMAT Format;
+    DXVA2_Frequency InputSampleFreq;
+    DXVA2_Frequency OutputFrameFreq;
+    UINT UABProtectionLevel;
+    UINT Reserved;
+} DXVA2_VideoDesc;
+
+typedef struct _DXVA2_ConfigPictureDecode {
+    GUID guidConfigBitstreamEncryption;
+    GUID guidConfigMBcontrolEncryption;
+    GUID guidConfigResidDiffEncryption;
+    UINT ConfigBitstreamRaw;
+    UINT ConfigMBcontrolRasterOrder;
+    UINT ConfigResidDiffHost;
+    UINT ConfigSpatialResid8;
+    UINT ConfigResid8Subtraction;
+    UINT ConfigSpatialHost8or9Clipping;
+    UINT ConfigSpatialResidInterleaved;
+    UINT ConfigIntraResidUnsigned;
+    UINT ConfigResidDiffAccelerator;
+    UINT ConfigHostInverseScan;
+    UINT ConfigSpecificIDCT;
+    UINT Config4GroupedCoefs;
+    USHORT ConfigMinRenderTargetBuffCount;
+    USHORT ConfigDecoderSpecific;
+} DXVA2_ConfigPictureDecode;
+
+typedef struct _DXVA2_DecodeBufferDesc {
+    DWORD CompressedBufferType;
+    UINT BufferIndex;
+    UINT DataOffset;
+    UINT DataSize;
+    UINT FirstMBaddress;
+    UINT NumMBsInBuffer;
+    UINT Width;
+    UINT Height;
+    UINT Stride;
+    UINT ReservedBits;
+    PVOID pvPVPState;
+} DXVA2_DecodeBufferDesc;
+
+typedef struct _DXVA2_DecodeExtensionData {
+    UINT Function;
+    PVOID pPrivateInputData;
+    UINT PrivateInputDataSize;
+    PVOID pPrivateOutputData;
+    UINT PrivateOutputDataSize;
+} DXVA2_DecodeExtensionData;
+
+typedef struct _DXVA2_DecodeExecuteParams {
+    UINT NumCompBuffers;
+    DXVA2_DecodeBufferDesc *pCompressedBuffers;
+    DXVA2_DecodeExtensionData *pExtensionData;
+} DXVA2_DecodeExecuteParams;
+
+enum {
+    DXVA2_VideoDecoderRenderTarget  = 0,
+    DXVA2_VideoProcessorRenderTarget    = 1,
+    DXVA2_VideoSoftwareRenderTarget = 2
+};
+
+enum {
+    DXVA2_PictureParametersBufferType   = 0,
+    DXVA2_MacroBlockControlBufferType   = 1,
+    DXVA2_ResidualDifferenceBufferType  = 2,
+    DXVA2_DeblockingControlBufferType   = 3,
+    DXVA2_InverseQuantizationMatrixBufferType   = 4,
+    DXVA2_SliceControlBufferType    = 5,
+    DXVA2_BitStreamDateBufferType   = 6,
+    DXVA2_MotionVectorBuffer    = 7,
+    DXVA2_FilmGrainBuffer   = 8
+};
+
+/* DXVA MPEG-I/II and VC-1 */
+typedef struct _DXVA_PictureParameters {
+    USHORT  wDecodedPictureIndex;
+    USHORT  wDeblockedPictureIndex;
+    USHORT  wForwardRefPictureIndex;
+    USHORT  wBackwardRefPictureIndex;
+    USHORT  wPicWidthInMBminus1;
+    USHORT  wPicHeightInMBminus1;
+    UCHAR   bMacroblockWidthMinus1;
+    UCHAR   bMacroblockHeightMinus1;
+    UCHAR   bBlockWidthMinus1;
+    UCHAR   bBlockHeightMinus1;
+    UCHAR   bBPPminus1;
+    UCHAR   bPicStructure;
+    UCHAR   bSecondField;
+    UCHAR   bPicIntra;
+    UCHAR   bPicBackwardPrediction;
+    UCHAR   bBidirectionalAveragingMode;
+    UCHAR   bMVprecisionAndChromaRelation;
+    UCHAR   bChromaFormat;
+    UCHAR   bPicScanFixed;
+    UCHAR   bPicScanMethod;
+    UCHAR   bPicReadbackRequests;
+    UCHAR   bRcontrol;
+    UCHAR   bPicSpatialResid8;
+    UCHAR   bPicOverflowBlocks;
+    UCHAR   bPicExtrapolation;
+    UCHAR   bPicDeblocked;
+    UCHAR   bPicDeblockConfined;
+    UCHAR   bPic4MVallowed;
+    UCHAR   bPicOBMC;
+    UCHAR   bPicBinPB;
+    UCHAR   bMV_RPS;
+    UCHAR   bReservedBits;
+    USHORT  wBitstreamFcodes;
+    USHORT  wBitstreamPCEelements;
+    UCHAR   bBitstreamConcealmentNeed;
+    UCHAR   bBitstreamConcealmentMethod;
+} DXVA_PictureParameters, *LPDXVA_PictureParameters;
+
+typedef struct _DXVA_QmatrixData {
+    BYTE    bNewQmatrix[4];
+    WORD    Qmatrix[4][8 * 8];
+} DXVA_QmatrixData, *LPDXVA_QmatrixData;
+
+typedef struct _DXVA_SliceInfo {
+    USHORT  wHorizontalPosition;
+    USHORT  wVerticalPosition;
+    UINT    dwSliceBitsInBuffer;
+    UINT    dwSliceDataLocation;
+    UCHAR   bStartCodeBitOffset;
+    UCHAR   bReservedBits;
+    USHORT  wMBbitOffset;
+    USHORT  wNumberMBsInSlice;
+    USHORT  wQuantizerScaleCode;
+    USHORT  wBadSliceChopping;
+} DXVA_SliceInfo, *LPDXVA_SliceInfo;
+
+/* DXVA H264 */
+typedef struct {
+#ifdef DXVA2API_USE_BITFIELDS
+    union {
+        struct {
+            UCHAR Index7Bits     : 7;
+            UCHAR AssociatedFlag : 1;
+        };
+        UCHAR bPicEntry;
+    };
+#else
+    UCHAR bPicEntry;
+#endif
+} DXVA_PicEntry_H264;
+
+
+typedef struct {
+    USHORT wFrameWidthInMbsMinus1;
+    USHORT wFrameHeightInMbsMinus1;
+    DXVA_PicEntry_H264 CurrPic;
+    UCHAR  num_ref_frames;
+#ifdef DXVA2API_USE_BITFIELDS
+    union {
+        struct {
+            USHORT field_pic_flag           : 1;
+            USHORT MbaffFrameFlag           : 1;
+            USHORT residual_colour_transform_flag : 1;
+            USHORT sp_for_switch_flag       : 1;
+            USHORT chroma_format_idc        : 2;
+            USHORT RefPicFlag               : 1;
+            USHORT constrained_intra_pred_flag : 1;
+            USHORT weighted_pred_flag       : 1;
+            USHORT weighted_bipred_idc      : 2;
+            USHORT MbsConsecutiveFlag       : 1;
+            USHORT frame_mbs_only_flag      : 1;
+            USHORT transform_8x8_mode_flag  : 1;
+            USHORT MinLumaBipredSize8x8Flag : 1;
+            USHORT IntraPicFlag             : 1;
+        };
+        USHORT wBitFields;
+    };
+#else
+    USHORT wBitFields;
+#endif
+    UCHAR   bit_depth_luma_minus8;
+    UCHAR   bit_depth_chroma_minus8;
+    USHORT  Reserved16Bits;
+    UINT    StatusReportFeedbackNumber;
+    DXVA_PicEntry_H264 RefFrameList[16];
+    INT     CurrFieldOrderCnt[2];
+    INT     FieldOrderCntList[16][2];
+    CHAR    pic_init_qs_minus26;
+    CHAR    chroma_qp_index_offset;
+    CHAR    second_chroma_qp_index_offset;
+    UCHAR   ContinuationFlag;
+    CHAR    pic_init_qp_minus26;
+    UCHAR   num_ref_idx_l0_active_minus1;
+    UCHAR   num_ref_idx_l1_active_minus1;
+    UCHAR   Reserved8BitsA;
+    USHORT  FrameNumList[16];
+
+    UINT    UsedForReferenceFlags;
+    USHORT  NonExistingFrameFlags;
+    USHORT  frame_num;
+    UCHAR   log2_max_frame_num_minus4;
+    UCHAR   pic_order_cnt_type;
+    UCHAR   log2_max_pic_order_cnt_lsb_minus4;
+    UCHAR   delta_pic_order_always_zero_flag;
+    UCHAR   direct_8x8_inference_flag;
+    UCHAR   entropy_coding_mode_flag;
+    UCHAR   pic_order_present_flag;
+    UCHAR   num_slice_groups_minus1;
+    UCHAR   slice_group_map_type;
+    UCHAR   deblocking_filter_control_present_flag;
+    UCHAR   redundant_pic_cnt_present_flag;
+    UCHAR   Reserved8BitsB;
+    USHORT  slice_group_change_rate_minus1;
+    UCHAR   SliceGroupMap[810];
+} DXVA_PicParams_H264;
+
+typedef struct {
+    UCHAR   bScalingLists4x4[6][16];
+    UCHAR   bScalingLists8x8[2][64];
+} DXVA_Qmatrix_H264;
+
+
+typedef struct {
+    UINT    BSNALunitDataLocation;
+    UINT    SliceBytesInBuffer;
+    USHORT  wBadSliceChopping;
+    USHORT  first_mb_in_slice;
+    USHORT  NumMbsForSlice;
+    USHORT  BitOffsetToSliceData;
+    UCHAR   slice_type;
+    UCHAR   luma_log2_weight_denom;
+    UCHAR   chroma_log2_weight_denom;
+
+    UCHAR   num_ref_idx_l0_active_minus1;
+    UCHAR   num_ref_idx_l1_active_minus1;
+    CHAR    slice_alpha_c0_offset_div2;
+    CHAR    slice_beta_offset_div2;
+    UCHAR   Reserved8Bits;
+    DXVA_PicEntry_H264 RefPicList[2][32];
+    SHORT   Weights[2][32][3][2];
+    CHAR    slice_qs_delta;
+    CHAR    slice_qp_delta;
+    UCHAR   redundant_pic_cnt;
+    UCHAR   direct_spatial_mv_pred_flag;
+    UCHAR   cabac_init_idc;
+    UCHAR   disable_deblocking_filter_idc;
+    USHORT  slice_id;
+} DXVA_Slice_H264_Long;
+
+typedef struct {
+    UINT    BSNALunitDataLocation;
+    UINT    SliceBytesInBuffer;
+    USHORT  wBadSliceChopping;
+} DXVA_Slice_H264_Short;
+
+typedef struct {
+    USHORT  wFrameWidthInMbsMinus1;
+    USHORT  wFrameHeightInMbsMinus1;
+    DXVA_PicEntry_H264 InPic;
+    DXVA_PicEntry_H264 OutPic;
+    USHORT  PicOrderCnt_offset;
+    INT     CurrPicOrderCnt;
+    UINT    StatusReportFeedbackNumber;
+    UCHAR   model_id;
+    UCHAR   separate_colour_description_present_flag;
+    UCHAR   film_grain_bit_depth_luma_minus8;
+    UCHAR   film_grain_bit_depth_chroma_minus8;
+    UCHAR   film_grain_full_range_flag;
+    UCHAR   film_grain_colour_primaries;
+    UCHAR   film_grain_transfer_characteristics;
+    UCHAR   film_grain_matrix_coefficients;
+    UCHAR   blending_mode_id;
+    UCHAR   log2_scale_factor;
+    UCHAR   comp_model_present_flag[4];
+    UCHAR   num_intensity_intervals_minus1[4];
+    UCHAR   num_model_values_minus1[4];
+    UCHAR   intensity_interval_lower_bound[3][16];
+    UCHAR   intensity_interval_upper_bound[3][16];
+    SHORT   comp_model_value[3][16][8];
+} DXVA_FilmGrainChar_H264;
+
+typedef struct {
+    union {
+        struct {
+            USHORT  Fraction;
+            SHORT   Value;
+        };
+        LONG ll;
+    };
+}DXVA2_Fixed32;
+
+typedef struct {
+    UCHAR Cr;
+    UCHAR Cb;
+    UCHAR Y;
+    UCHAR Alpha;
+}DXVA2_AYUVSample8;
+
+typedef struct {
+    USHORT Cr;
+    USHORT Cb;
+    USHORT Y;
+    USHORT Alpha;
+}DXVA2_AYUVSample16;
+
+typedef struct {
+    DXVA2_Fixed32 MinValue;
+    DXVA2_Fixed32 MaxValue;
+    DXVA2_Fixed32 DefaultValue;
+    DXVA2_Fixed32 StepSize;
+}DXVA2_ValueRange;
+
+typedef struct {
+    DXVA2_Fixed32 Brightness;
+    DXVA2_Fixed32 Contrast;
+    DXVA2_Fixed32 Hue;
+    DXVA2_Fixed32 Saturation;
+}DXVA2_ProcAmpValues;
+
+typedef struct {
+    DXVA2_Fixed32 Level;
+    DXVA2_Fixed32 Threshold;
+    DXVA2_Fixed32 Radius;
+}DXVA2_FilterValues;
+
+typedef struct {
+    UINT DeviceCaps;
+    D3DPOOL InputPool;
+    UINT NumForwardRefSamples;
+    UINT NumBackwardRefSamples;
+    UINT Reserved;
+    UINT DeinterlaceTechnology;
+    UINT ProcAmpControlCaps;
+    UINT VideoProcessorOperations;
+    UINT NoiseFilterTechnology;
+    UINT DetailFilterTechnology;
+}DXVA2_VideoProcessorCaps;
+
+#ifndef _REFERENCE_TIME_
+#define _REFERENCE_TIME_
+typedef long long int64_t;
+typedef int64_t REFERENCE_TIME;
+#endif
+
+typedef struct {
+    REFERENCE_TIME Start;
+    REFERENCE_TIME End;
+    DXVA2_ExtendedFormat SampleFormat;
+    IDirect3DSurface9 *SrcSurface;
+    RECT SrcRect;
+    RECT DstRect;
+    DXVA2_AYUVSample8 Pal[16];
+    DXVA2_Fixed32 PlanarAlpha;
+    DWORD SampleData;
+}DXVA2_VideoSample;
+
+
+typedef struct {
+    REFERENCE_TIME TargetFrame;
+    RECT TargetRect;
+    SIZE ConstrictionSize;
+    UINT StreamingFlags;
+    DXVA2_AYUVSample16 BackgroundColor;
+    DXVA2_ExtendedFormat DestFormat;
+    DXVA2_ProcAmpValues ProcAmpValues;
+    DXVA2_Fixed32 Alpha;
+    DXVA2_FilterValues NoiseFilterLuma;
+    DXVA2_FilterValues NoiseFilterChroma;
+    DXVA2_FilterValues DetailFilterLuma;
+    DXVA2_FilterValues DetailFilterChroma;
+    DWORD DestData;
+} DXVA2_VideoProcessBltParams;
+
+#pragma pack(pop)
+
+/*************INTERFACES************/
+#ifdef __cplusplus
+extern "C" {
+#endif
+#define _COM_interface struct
+typedef _COM_interface IDirectXVideoDecoderService IDirectXVideoDecoderService;
+typedef _COM_interface IDirectXVideoDecoder IDirectXVideoDecoder;
+
+#undef INTERFACE
+#define INTERFACE IDirectXVideoDecoder
+DECLARE_INTERFACE_( IDirectXVideoDecoder, IUnknown )
+{
+    STDMETHOD( QueryInterface ) ( THIS_ REFIID, PVOID* ) PURE;
+    STDMETHOD_( ULONG, AddRef ) ( THIS ) PURE;
+    STDMETHOD_( ULONG, Release ) ( THIS ) PURE;
+    STDMETHOD( GetVideoDecoderService ) ( THIS_ IDirectXVideoDecoderService** ) PURE;
+    STDMETHOD( GetCreationParameters ) ( THIS_ GUID*, DXVA2_VideoDesc*, DXVA2_ConfigPictureDecode*, IDirect3DSurface9***, UINT* ) PURE;
+    STDMETHOD( GetBuffer ) ( THIS_ UINT, void**, UINT* ) PURE;
+    STDMETHOD( ReleaseBuffer ) ( THIS_ UINT ) PURE;
+    STDMETHOD( BeginFrame ) ( THIS_ IDirect3DSurface9 *, void* ) PURE;
+    STDMETHOD( EndFrame ) ( THIS_ HANDLE * ) PURE;
+    STDMETHOD( Execute ) ( THIS_ const DXVA2_DecodeExecuteParams* ) PURE;
+
+
+};
+
+#if !defined(__cplusplus) || defined(CINTERFACE)
+#define IDirectXVideoDecoder_QueryInterface( p, a, b )  (p)->lpVtbl->QueryInterface( p, a, b )
+#define IDirectXVideoDecoder_AddRef( p )  (p)->lpVtbl->AddRef( p )
+#define IDirectXVideoDecoder_Release( p ) (p)->lpVtbl->Release( p )
+#define IDirectXVideoDecoder_BeginFrame( p, a, b )  (p)->lpVtbl->BeginFrame( p, a, b )
+#define IDirectXVideoDecoder_EndFrame( p, a )  (p)->lpVtbl->EndFrame( p, a )
+#define IDirectXVideoDecoder_Execute( p, a )   (p)->lpVtbl->Execute( p, a )
+#define IDirectXVideoDecoder_GetBuffer( p, a, b, c ) (p)->lpVtbl->GetBuffer( p, a, b, c )
+#define IDirectXVideoDecoder_GetCreationParameters( p, a, b, c, d, e ) (p)->lpVtbl->GetCreationParameters( p, a, b, c, d, e )
+#define IDirectXVideoDecoder_GetVideoDecoderService( p, a )    (p)->lpVtbl->GetVideoDecoderService( p, a )
+#define IDirectXVideoDecoder_ReleaseBuffer( p, a ) (p)->lpVtbl->ReleaseBuffer( p, a )
+#else
+#define IDirectXVideoDecoder_QueryInterface( p, a, b )  (p)->QueryInterface( a, b )
+#define IDirectXVideoDecoder_AddRef( p )  (p)->AddRef()
+#define IDirectXVideoDecoder_Release( p ) (p)->Release()
+#define IDirectXVideoDecoder_BeginFrame( p, a, b )  (p)->BeginFrame( a, b )
+#define IDirectXVideoDecoder_EndFrame( p, a )  (p)->EndFrame( a )
+#define IDirectXVideoDecoder_Execute( p, a )   (p)->Execute( a )
+#define IDirectXVideoDecoder_GetBuffer( p, a, b, c ) (p)->GetBuffer( a, b, c )
+#define IDirectXVideoDecoder_GetCreationParameters( p, a, b, c, d, e ) (p)->GetCreationParameters( a, b, c, d, e )
+#define IDirectXVideoDecoder_GetVideoDecoderService( p, a )    (p)->GetVideoDecoderService( a )
+#define IDirectXVideoDecoder_ReleaseBuffer( p, a ) (p)->ReleaseBuffer( a )
+#endif
+
+#undef INTERFACE
+#define INTERFACE IDirectXVideoAccelerationService
+DECLARE_INTERFACE_( IDirectXVideoAccelerationService, IUnknown )
+{
+    STDMETHOD( QueryInterface ) ( THIS_ REFIID, PVOID* ) PURE;
+    STDMETHOD_( ULONG, AddRef ) ( THIS ) PURE;
+    STDMETHOD_( ULONG, Release ) ( THIS ) PURE;
+    STDMETHOD( CreateSurface ) ( THIS_ UINT, UINT, UINT, D3DFORMAT, D3DPOOL, DWORD, DWORD, IDirect3DSurface9**, HANDLE* ) PURE;
+
+};
+
+#if !defined(__cplusplus) || defined(CINTERFACE)
+#define IDirectXVideoAccelerationService_QueryInterface( p, a, b )  (p)->lpVtbl->QueryInterface( p, a, b )
+#define IDirectXVideoAccelerationService_AddRef( p )  (p)->lpVtbl->AddRef( p )
+#define IDirectXVideoAccelerationService_Release( p ) (p)->lpVtbl->Release( p )
+#define IDirectXVideoAccelerationService_CreateSurface( p, a, b, c, d, e, f, g, h, i ) (p)->lpVtbl->CreateSurface( p, a, b, c, d, e, f, g, h, i )
+#else
+#define IDirectXVideoAccelerationService_QueryInterface( p, a, b )  (p)->QueryInterface( a, b )
+#define IDirectXVideoAccelerationService_AddRef( p )  (p)->AddRef()
+#define IDirectXVideoAccelerationService_Release( p ) (p)->Release()
+#define IDirectXVideoAccelerationService_CreateSurface( p, a, b, c, d, e, f, g, h, i ) (p)->CreateSurface( a, b, c, d, e, f, g, h, i )
+#endif
+
+#undef INTERFACE
+#define INTERFACE IDirectXVideoDecoderService
+DECLARE_INTERFACE_( IDirectXVideoDecoderService, IDirectXVideoAccelerationService )
+{
+    STDMETHOD( QueryInterface ) ( THIS_ REFIID, PVOID* ) PURE;
+    STDMETHOD_( ULONG, AddRef ) ( THIS ) PURE;
+    STDMETHOD_( ULONG, Release ) ( THIS ) PURE;
+    STDMETHOD( CreateSurface ) ( THIS_ UINT, UINT, UINT, D3DFORMAT, D3DPOOL, DWORD, DWORD, IDirect3DSurface9**, HANDLE* ) PURE;
+    STDMETHOD( GetDecoderDeviceGuids ) ( THIS_ UINT*, GUID ** ) PURE;
+    STDMETHOD( GetDecoderRenderTargets ) ( THIS_ REFGUID, UINT*, D3DFORMAT** ) PURE;
+    STDMETHOD( GetDecoderConfigurations ) ( THIS_ REFGUID, const DXVA2_VideoDesc*, IUnknown*, UINT*, DXVA2_ConfigPictureDecode** ) PURE;
+    STDMETHOD( CreateVideoDecoder ) ( THIS_ REFGUID, const DXVA2_VideoDesc*, DXVA2_ConfigPictureDecode*, IDirect3DSurface9**, UINT, IDirectXVideoDecoder** ) PURE;
+};
+
+#if !defined(__cplusplus) || defined(CINTERFACE)
+#define IDirectXVideoDecoderService_QueryInterface( p, a, b )   (p)->lpVtbl->QueryInterface( p, a, b )
+#define IDirectXVideoDecoderService_AddRef( p )   (p)->lpVtbl->AddRef( p )
+#define IDirectXVideoDecoderService_Release( p )  (p)->lpVtbl->Release( p )
+#define IDirectXVideoDecoderService_CreateSurface( p, a, b, c, d, e, f, g, h, i )  (p)->lpVtbl->CreateSurface( p, a, b, c, d, e, f, g, h, i )
+#define IDirectXVideoDecoderService_CreateVideoDecoder( p, a, b, c, d, e, f )   (p)->lpVtbl->CreateVideoDecoder( p, a, b, c, d, e, f )
+#define IDirectXVideoDecoderService_GetDecoderConfigurations( p, a, b, c, d, e )   (p)->lpVtbl->GetDecoderConfigurations( p, a, b, c, d, e )
+#define IDirectXVideoDecoderService_GetDecoderDeviceGuids( p, a, b )    (p)->lpVtbl->GetDecoderDeviceGuids( p, a, b )
+#define IDirectXVideoDecoderService_GetDecoderRenderTargets( p, a, b, c )    (p)->lpVtbl->GetDecoderRenderTargets( p, a, b, c )
+#else
+#define IDirectXVideoDecoderService_QueryInterface( p, a, b )   (p)->QueryInterface( a, b )
+#define IDirectXVideoDecoderService_AddRef( p )   (p)->AddRef()
+#define IDirectXVideoDecoderService_Release( p )  (p)->Release()
+#define IDirectXVideoDecoderService_CreateSurface( p, a, b, c, d, e, f, g, h, i )  (p)->CreateSurface( a, b, c, d, e, f, g, h, i )
+#define IDirectXVideoDecoderService_CreateVideoDecoder( p, a, b, c, d, e, f )   (p)->CreateVideoDecoder( a, b, c, d, e, f )
+#define IDirectXVideoDecoderService_GetDecoderConfigurations( p, a, b, c, d, e )   (p)->GetDecoderConfigurations( a, b, c, d, e )
+#define IDirectXVideoDecoderService_GetDecoderDeviceGuids( p, a, b )    (p)->GetDecoderDeviceGuids( a, b )
+#define IDirectXVideoDecoderService_GetDecoderRenderTargets( p, a, b, c )    (p)->GetDecoderRenderTargets( a, b, c )
+#endif
+
+#undef INTERFACE
+#define INTERFACE IDirect3DDeviceManager9
+DECLARE_INTERFACE_( IDirect3DDeviceManager9, IUnknown )
+{
+    STDMETHOD( QueryInterface ) ( THIS_ REFIID, PVOID* ) PURE;
+    STDMETHOD_( ULONG, AddRef ) ( THIS ) PURE;
+    STDMETHOD_( ULONG, Release ) ( THIS ) PURE;
+    STDMETHOD( ResetDevice ) ( THIS_ IDirect3DDevice9*, UINT ) PURE;
+    STDMETHOD( OpenDeviceHandle ) ( THIS_ HANDLE* ) PURE;
+    STDMETHOD( CloseDeviceHandle ) ( THIS_ HANDLE ) PURE;
+    STDMETHOD( TestDevice ) ( THIS_ HANDLE ) PURE;
+    STDMETHOD( LockDevice ) ( THIS_ HANDLE, IDirect3DDevice9**, BOOL ) PURE;
+    STDMETHOD( UnlockDevice ) ( THIS_ HANDLE, BOOL ) PURE;
+    STDMETHOD( GetVideoService ) ( THIS_ HANDLE, REFIID, void** ) PURE;
+};
+
+#if !defined(__cplusplus) || defined(CINTERFACE)
+#define IDirect3DDeviceManager9_QueryInterface( p, a, b )   (p)->lpVtbl->QueryInterface( p, a, b )
+#define IDirect3DDeviceManager9_AddRef( p )   (p)->lpVtbl->AddRef( p )
+#define IDirect3DDeviceManager9_Release( p )  (p)->lpVtbl->Release( p )
+#define IDirect3DDeviceManager9_ResetDevice( p, a, b ) (p)->lpVtbl->ResetDevice( p, a, b )
+#define IDirect3DDeviceManager9_OpenDeviceHandle( p, a ) (p)->lpVtbl->OpenDeviceHandle( p, a )
+#define IDirect3DDeviceManager9_CloseDeviceHandle( p, a ) (p)->lpVtbl->CloseDeviceHandle( p, a )
+#define IDirect3DDeviceManager9_TestDevice( p, a ) (p)->lpVtbl->TestDevice( p, a )
+#define IDirect3DDeviceManager9_LockDevice( p, a, b, c ) (p)->lpVtbl->LockDevice( p, a, b, c )
+#define IDirect3DDeviceManager9_UnlockDevice( p, a, b ) (p)->lpVtbl->UnlockDevice( p, a, b )
+#define IDirect3DDeviceManager9_GetVideoService( p, a, b, c ) (p)->lpVtbl->GetVideoService( p, a, b, c )
+#else
+#define IDirect3DDeviceManager9_QueryInterface( p, a, b )   (p)->QueryInterface( a, b )
+#define IDirect3DDeviceManager9_AddRef( p )   (p)->AddRef()
+#define IDirect3DDeviceManager9_Release( p )  (p)->Release()
+#define IDirect3DDeviceManager9_ResetDevice( p, a, b ) (p)->ResetDevice( a, b )
+#define IDirect3DDeviceManager9_OpenDeviceHandle( p, a ) (p)->OpenDeviceHandle( a )
+#define IDirect3DDeviceManager9_CloseDeviceHandle( p, a ) (p)->CloseDeviceHandle( a )
+#define IDirect3DDeviceManager9_TestDevice( p, a ) (p)->TestDevice( a )
+#define IDirect3DDeviceManager9_LockDevice( p, a, b, c ) (p)->LockDevice( a, b, c )
+#define IDirect3DDeviceManager9_UnlockDevice( p, a, b ) (p)->UnlockDevice( a, b )
+#define IDirect3DDeviceManager9_GetVideoService( p, a, b, c ) (p)->GetVideoService( a, b, c )
+#endif
+
+typedef _COM_interface IDirectXVideoProcessorService IDirectXVideoProcessorService;
+typedef _COM_interface IDirectXVideoProcessor IDirectXVideoProcessor;
+
+#undef INTERFACE
+#define INTERFACE IDirectXVideoProcessor
+DECLARE_INTERFACE_( IDirectXVideoProcessor, IUnknown )
+{
+    STDMETHOD( QueryInterface ) ( THIS_ REFIID, PVOID* ) PURE;
+    STDMETHOD_( ULONG, AddRef ) ( THIS ) PURE;
+    STDMETHOD_( ULONG, Release ) ( THIS ) PURE;
+    STDMETHOD( GetVideoProcessorService ) ( THIS_ IDirectXVideoProcessorService** ) PURE;
+    STDMETHOD( GetCreationParameters ) ( THIS_ GUID*, DXVA2_VideoDesc*, D3DFORMAT*, UINT* ) PURE;
+    STDMETHOD( GetVideoProcessorCaps ) ( THIS_ DXVA2_VideoProcessorCaps* ) PURE;
+    STDMETHOD( GetProcAmpRange ) ( THIS_ UINT, DXVA2_ValueRange* ) PURE;
+    STDMETHOD( GetFilterPropertyRange ) ( THIS_ UINT, DXVA2_ValueRange* ) PURE;
+    STDMETHOD( VideoProcessBlt ) ( THIS_ IDirect3DSurface9*, DXVA2_VideoProcessBltParams*, DXVA2_VideoSample*, UINT, HANDLE* ) PURE;
+};
+
+#if !defined(__cplusplus) || defined(CINTERFACE)
+#define IDirectXVideoProcessor_QueryInterface( p, a, b ) (p)->lpVtbl->QueryInterface( p, a, b )
+#define IDirectXVideoProcessor_AddRef( p )  (p)->lpVtbl->AddRef( p )
+#define IDirectXVideoProcessor_Release( p ) (p)->lpVtbl->Release( p )
+#define IDirectXVideoProcessor_GetVideoProcessorService( p, a ) (p)->lpVtbl->GetVideoProcessorService( p, a )
+#define IDirectXVideoProcessor_GetCreationParameters( p, a, b, c, d ) (p)->lpVtbl->GetCreationParameters( p, a, b, c, d )
+#define IDirectXVideoProcessor_GetVideoProcessorCaps( p, a ) (p)->lpVtbl->GetVideoProcessorCaps( p, a )
+#define IDirectXVideoProcessor_GetProcAmpRange( p, a, b ) (p)->lpVtbl->GetProcAmpRange( p, a, b )
+#define IDirectXVideoProcessor_GetFilterPropertyRange( p, a, b ) (p)->lpVtbl->GetFilterPropertyRange( p, a, b )
+#define IDirectXVideoProcessor_VideoProcessBlt( p, a, b, c, d, e ) (p)->lpVtbl->VideoProcessBlt( p, a, b, c, d, e )
+#else
+#define IDirectXVideoProcessor_QueryInterface( p, a, b ) (p)->QueryInterface( a, b )
+#define IDirectXVideoProcessor_AddRef( p )  (p)->AddRef()
+#define IDirectXVideoProcessor_Release( p ) (p)->Release()
+#define IDirectXVideoProcessor_GetVideoProcessorService( p, a ) (p)->GetVideoProcessorService( a )
+#define IDirectXVideoProcessor_GetCreationParameters( p, a, b, c, d ) (p)->GetCreationParameters( a, b, c, d )
+#define IDirectXVideoProcessor_GetVideoProcessorCaps( p, a ) (p)->GetVideoProcessorCaps( a )
+#define IDirectXVideoProcessor_GetProcAmpRange( p, a, b ) (p)->GetProcAmpRange( a, b )
+#define IDirectXVideoProcessor_GetFilterPropertyRange( p, a, b ) (p)->GetFilterPropertyRange( a, b )
+#define IDirectXVideoProcessor_VideoProcessBlt( p, a, b, c, d, e ) (p)->VideoProcessBlt( a, b, c, d, e )
+#endif
+
+
+#undef INTERFACE
+#define INTERFACE IDirectXVideoProcessorService
+DECLARE_INTERFACE_( IDirectXVideoProcessorService, IDirectXVideoAccelerationService )
+{
+    STDMETHOD( QueryInterface ) ( THIS_ REFIID, PVOID* ) PURE;
+    STDMETHOD_( ULONG, AddRef ) ( THIS ) PURE;
+    STDMETHOD_( ULONG, Release ) ( THIS ) PURE;
+    STDMETHOD( CreateSurface ) ( THIS_ UINT, UINT, UINT, D3DFORMAT, D3DPOOL, DWORD, DWORD, IDirect3DSurface9**, HANDLE* ) PURE;
+    STDMETHOD( RegisterVideoProcessorSoftwareDevice ) ( THIS_ void* ) PURE;
+    STDMETHOD( GetVideoProcessorDeviceGuids ) ( THIS_ DXVA2_VideoDesc*, UINT, GUID** ) PURE;
+    STDMETHOD( GetVideoProcessorRenderTargets ) ( THIS_ REFGUID, DXVA2_VideoDesc*, UINT*, D3DFORMAT** ) PURE;
+    STDMETHOD( GetVideoProcessorSubStreamFormats ) ( THIS_ REFGUID, DXVA2_VideoDesc*, D3DFORMAT, UINT*, D3DFORMAT** ) PURE;
+    STDMETHOD( GetVideoProcessorCaps ) ( THIS_ REFGUID, DXVA2_VideoDesc*, D3DFORMAT, DXVA2_VideoProcessorCaps* ) PURE;
+    STDMETHOD( GetProcAmpRange ) ( THIS_ REFGUID, DXVA2_VideoDesc*, D3DFORMAT, UINT, DXVA2_ValueRange* ) PURE;
+    STDMETHOD( GetFilterPropertyRange ) ( THIS_ REFGUID, DXVA2_VideoDesc*, D3DFORMAT, UINT, DXVA2_ValueRange* ) PURE;
+    STDMETHOD( CreateVideoProcessor ) ( THIS_ REFGUID, DXVA2_VideoDesc*, D3DFORMAT, UINT, IDirectXVideoProcessor** ) PURE;
+};
+
+#if !defined(__cplusplus) || defined(CINTERFACE)
+#define IDirectXVideoProcessorService_QueryInterface( p, a, b ) (p)->lpVtbl->QueryInterface( p, a, b )
+#define IDirectXVideoProcessorService_AddRef( p )  (p)->lpVtbl->AddRef( p )
+#define IDirectXVideoProcessorService_Release( p ) (p)->lpVtbl->Release( p )
+#define IDirectXVideoProcessorService_CreateSurface( p, a, b, c, d, e, f, g, h, i ) (p)->lpVtbl->CreateSurface( p, a, b, c, d, e, f, g, h, i )
+#define IDirectXVideoProcessorService_RegisterVideoProcessorSoftwareDevice( p, a ) (p)->lpVtbl->RegisterVideoProcessorSoftwareDevice( p, a )
+#define IDirectXVideoProcessorService_GetVideoProcessorDeviceGuids( p, a, b, c ) (p)->lpVtbl->GetVideoProcessorDeviceGuids( p, a, b, c )
+#define IDirectXVideoProcessorService_GetVideoProcessorRenderTargets( p, a, b, c, d ) (p)->lpVtbl->GetVideoProcessorRenderTargets( p, a, b, c, d )
+#define IDirectXVideoProcessorService_GetVideoProcessorSubStreamFormats( p, a, b, c, d, e ) (p)->lpVtbl->GetVideoProcessorSubStreamFormats( p, a, b, c, d, e )
+#define IDirectXVideoProcessorService_GetVideoProcessorCaps( p, a, b, c, d ) (p)->lpVtbl->GetVideoProcessorCaps( p, a, b, c, d )
+#define IDirectXVideoProcessorService_GetProcAmpRange( p, a, b, c, d, e ) (p)->lpVtbl->GetProcAmpRange( p, a, b, c, d, e )
+#define IDirectXVideoProcessorService_GetFilterPropertyRange( p, a, b, c, d, e ) (p)->lpVtbl->GetFilterPropertyRange( p, a, b, c, d, e )
+#define IDirectXVideoProcessorService_CreateVideoProcessor( p, a, b, c, d, e ) (p)->lpVtbl->CreateVideoProcessor( p, a, b, c, d, e )
+#else
+#define IDirectXVideoProcessorService_QueryInterface( p, a, b ) (p)->QueryInterface( a, b )
+#define IDirectXVideoProcessorService_AddRef( p )  (p)->AddRef()
+#define IDirectXVideoProcessorService_Release( p ) (p)->Release()
+#define IDirectXVideoProcessorService_CreateSurface( p, a, b, c, d, e, f, g, h, i ) (p)->CreateSurface( a, b, c, d, e, f, g, h, i )
+#define IDirectXVideoProcessorService_RegisterVideoProcessorSoftwareDevice( p, a ) (p)->RegisterVideoProcessorSoftwareDevice( a )
+#define IDirectXVideoProcessorService_GetVideoProcessorDeviceGuids( p, a, b, c ) (p)->GetVideoProcessorDeviceGuids( a, b, c )
+#define IDirectXVideoProcessorService_GetVideoProcessorRenderTargets( p, a, b, c, d ) (p)->GetVideoProcessorRenderTargets( a, b, c, d )
+#define IDirectXVideoProcessorService_GetVideoProcessorSubStreamFormats( p, a, b, c, d, e ) (p)->GetVideoProcessorSubStreamFormats( a, b, c, d, e )
+#define IDirectXVideoProcessorService_GetVideoProcessorCaps( p, a, b, c, d ) (p)->GetVideoProcessorCaps( a, b, c, d )
+#define IDirectXVideoProcessorService_GetProcAmpRange( p, a, b, c, d, e ) (p)->GetProcAmpRange( a, b, c, d, e )
+#define IDirectXVideoProcessorService_GetFilterPropertyRange( p, a, b, c, d, e ) (p)->GetFilterPropertyRange( a, b, c, d, e )
+#define IDirectXVideoProcessorService_CreateVideoProcessor( p, a, b, c, d, e ) (p)->CreateVideoProcessor( a, b, c, d, e )
+#endif
+
+
+/*****************************************************************************************************
+************************DXVA Video Processor********************************************************
+*******************************************************************************************************/
+
+
+
+/*#undef INTERFACE
+#define INTERFACE IDirectXVideoService
+DECLARE_INTERFACE_(IDirectXVideoService,IUnknown)
+{
+    STDMETHOD(DXVA2CreateVideoService)(IDirect3DDevice9*, REFIID, void**) PURE;
+};
+
+#if !defined(__cplusplus) || defined(CINTERFACE)
+#define IDirectXVideoService_DXVA2CreateVideoService(a,b,c) DXVA2CreateVideoService(a,b,c)
+#else
+#define IDirectXVideoService_DXVA2CreateVideoService(a,b,c) DXVA2CreateVideoService(a,b,c)
+#endif*/
+
+
+#ifdef __cplusplus
+};
+#endif
+
+#ifdef __cplusplus
+extern "C" HRESULT WINAPI DXVA2CreateVideoService( IDirect3DDevice9 *,
+                                                   REFIID riid,
+                                                   void **ppService );
+#else
+extern HRESULT WINAPI DXVA2CreateVideoService( IDirect3DDevice9 *,
+                                               REFIID riid,
+                                               void **ppService );
+#endif
+
+typedef
+enum _DXVA2_VideoChromaSubSampling
+{   DXVA2_VideoChromaSubsamplingMask    = 0xf,
+    DXVA2_VideoChromaSubsampling_Unknown    = 0,
+    DXVA2_VideoChromaSubsampling_ProgressiveChroma  = 0x8,
+    DXVA2_VideoChromaSubsampling_Horizontally_Cosited   = 0x4,
+    DXVA2_VideoChromaSubsampling_Vertically_Cosited = 0x2,
+    DXVA2_VideoChromaSubsampling_Vertically_AlignedChromaPlanes = 0x1,
+    DXVA2_VideoChromaSubsampling_MPEG2  = ( DXVA2_VideoChromaSubsampling_Horizontally_Cosited | DXVA2_VideoChromaSubsampling_Vertically_AlignedChromaPlanes ),
+    DXVA2_VideoChromaSubsampling_MPEG1  = DXVA2_VideoChromaSubsampling_Vertically_AlignedChromaPlanes,
+    DXVA2_VideoChromaSubsampling_DV_PAL = ( DXVA2_VideoChromaSubsampling_Horizontally_Cosited | DXVA2_VideoChromaSubsampling_Vertically_Cosited ),
+    DXVA2_VideoChromaSubsampling_Cosited    = ( ( DXVA2_VideoChromaSubsampling_Horizontally_Cosited | DXVA2_VideoChromaSubsampling_Vertically_Cosited )  | DXVA2_VideoChromaSubsampling_Vertically_AlignedChromaPlanes )}   DXVA2_VideoChromaSubSampling;
+
+typedef
+enum _DXVA2_NominalRange
+{   DXVA2_NominalRangeMask  = 0x7,
+    DXVA2_NominalRange_Unknown  = 0,
+    DXVA2_NominalRange_Normal   = 1,
+    DXVA2_NominalRange_Wide = 2,
+    DXVA2_NominalRange_0_255    = 1,
+    DXVA2_NominalRange_16_235   = 2,
+    DXVA2_NominalRange_48_208   = 3}   DXVA2_NominalRange;
+
+typedef
+enum _DXVA2_VideoLighting
+{   DXVA2_VideoLightingMask = 0xf,
+    DXVA2_VideoLighting_Unknown = 0,
+    DXVA2_VideoLighting_bright  = 1,
+    DXVA2_VideoLighting_office  = 2,
+    DXVA2_VideoLighting_dim = 3,
+    DXVA2_VideoLighting_dark    = 4}   DXVA2_VideoLighting;
+
+typedef
+enum _DXVA2_VideoPrimaries
+{   DXVA2_VideoPrimariesMask    = 0x1f,
+    DXVA2_VideoPrimaries_Unknown    = 0,
+    DXVA2_VideoPrimaries_reserved   = 1,
+    DXVA2_VideoPrimaries_BT709  = 2,
+    DXVA2_VideoPrimaries_BT470_2_SysM   = 3,
+    DXVA2_VideoPrimaries_BT470_2_SysBG  = 4,
+    DXVA2_VideoPrimaries_SMPTE170M  = 5,
+    DXVA2_VideoPrimaries_SMPTE240M  = 6,
+    DXVA2_VideoPrimaries_EBU3213    = 7,
+    DXVA2_VideoPrimaries_SMPTE_C    = 8}   DXVA2_VideoPrimaries;
+
+typedef
+enum _DXVA2_VideoTransferFunction
+{   DXVA2_VideoTransFuncMask    = 0x1f,
+    DXVA2_VideoTransFunc_Unknown    = 0,
+    DXVA2_VideoTransFunc_10 = 1,
+    DXVA2_VideoTransFunc_18 = 2,
+    DXVA2_VideoTransFunc_20 = 3,
+    DXVA2_VideoTransFunc_22 = 4,
+    DXVA2_VideoTransFunc_709    = 5,
+    DXVA2_VideoTransFunc_240M   = 6,
+    DXVA2_VideoTransFunc_sRGB   = 7,
+    DXVA2_VideoTransFunc_28 = 8}   DXVA2_VideoTransferFunction;
+
+typedef
+enum _DXVA2_SampleFormat
+{   DXVA2_SampleFormatMask  = 0xff,
+    DXVA2_SampleUnknown = 0,
+    DXVA2_SampleProgressiveFrame    = 2,
+    DXVA2_SampleFieldInterleavedEvenFirst   = 3,
+    DXVA2_SampleFieldInterleavedOddFirst    = 4,
+    DXVA2_SampleFieldSingleEven = 5,
+    DXVA2_SampleFieldSingleOdd  = 6,
+    DXVA2_SampleSubStream   = 7}   DXVA2_SampleFormat;
+
+typedef
+enum _DXVA2_VideoTransferMatrix
+{   DXVA2_VideoTransferMatrixMask   = 0x7,
+    DXVA2_VideoTransferMatrix_Unknown   = 0,
+    DXVA2_VideoTransferMatrix_BT709 = 1,
+    DXVA2_VideoTransferMatrix_BT601 = 2,
+    DXVA2_VideoTransferMatrix_SMPTE240M = 3}   DXVA2_VideoTransferMatrix;
+
+enum __MIDL___MIDL_itf_dxva2api_0000_0000_0004
+{   DXVA2_NoiseFilterLumaLevel  = 1,
+    DXVA2_NoiseFilterLumaThreshold  = 2,
+    DXVA2_NoiseFilterLumaRadius = 3,
+    DXVA2_NoiseFilterChromaLevel    = 4,
+    DXVA2_NoiseFilterChromaThreshold    = 5,
+    DXVA2_NoiseFilterChromaRadius   = 6,
+    DXVA2_DetailFilterLumaLevel = 7,
+    DXVA2_DetailFilterLumaThreshold = 8,
+    DXVA2_DetailFilterLumaRadius    = 9,
+    DXVA2_DetailFilterChromaLevel   = 10,
+    DXVA2_DetailFilterChromaThreshold   = 11,
+    DXVA2_DetailFilterChromaRadius  = 12};
+
+enum __MIDL___MIDL_itf_dxva2api_0000_0000_0008
+{   DXVA2_VideoProcess_None = 0,
+    DXVA2_VideoProcess_YUV2RGB  = 0x1,
+    DXVA2_VideoProcess_StretchX = 0x2,
+    DXVA2_VideoProcess_StretchY = 0x4,
+    DXVA2_VideoProcess_AlphaBlend   = 0x8,
+    DXVA2_VideoProcess_SubRects = 0x10,
+    DXVA2_VideoProcess_SubStreams   = 0x20,
+    DXVA2_VideoProcess_SubStreamsExtended   = 0x40,
+    DXVA2_VideoProcess_YUV2RGBExtended  = 0x80,
+    DXVA2_VideoProcess_AlphaBlendExtended   = 0x100,
+    DXVA2_VideoProcess_Constriction = 0x200,
+    DXVA2_VideoProcess_NoiseFilter  = 0x400,
+    DXVA2_VideoProcess_DetailFilter = 0x800,
+    DXVA2_VideoProcess_PlanarAlpha  = 0x1000,
+    DXVA2_VideoProcess_LinearScaling    = 0x2000,
+    DXVA2_VideoProcess_GammaCompensated = 0x4000,
+    DXVA2_VideoProcess_MaintainsOriginalFieldData   = 0x8000,
+    DXVA2_VideoProcess_Mask = 0xffff};
+
+
+
+__inline float hb_dx_fixedtofloat( const DXVA2_Fixed32 _fixed_ );
+
+__inline const DXVA2_Fixed32 hb_dx_fixed32_opaque_alpha();
+
+__inline DXVA2_Fixed32 hb_dx_floattofixed( const float _float_ );
+
+#endif //_DXVA2API_H
diff --git a/libhb/hb.c b/libhb/hb.c
index 8a1ec3b1e..438e92fb6 100644
--- a/libhb/hb.c
+++ b/libhb/hb.c
@@ -477,6 +477,7 @@ hb_handle_t * hb_init( int verbose, int update_check )
 #endif
 	hb_register( &hb_encavcodeca );
 	hb_register( &hb_reader );
+	hb_register( &hb_decavcodecv_accl );
     
     return h;
 }
@@ -575,7 +576,7 @@ hb_handle_t * hb_init_dl( int verbose, int update_check )
 #endif
 	hb_register( &hb_encavcodeca );
 	hb_register( &hb_reader );
-
+	hb_register( &hb_decavcodecv_accl );
 	return h;
 }
 
diff --git a/libhb/hbffmpeg.h b/libhb/hbffmpeg.h
index 2de771505..a44fc8d02 100644
--- a/libhb/hbffmpeg.h
+++ b/libhb/hbffmpeg.h
@@ -15,6 +15,7 @@
 #include "libavutil/opt.h"
 #include "libswscale/swscale.h"
 #include "libavresample/avresample.h"
+#include "common.h"
 
 #define HB_FFMPEG_THREADS_AUTO (-1) // let hb_avcodec_open() decide thread_count
 
diff --git a/libhb/internal.h b/libhb/internal.h
index 1c02ffe85..76845a3cc 100644
--- a/libhb/internal.h
+++ b/libhb/internal.h
@@ -418,7 +418,8 @@ enum
     WORK_ENCAVCODEC_AUDIO,
     WORK_MUX,
     WORK_READER,
-    WORK_DECPGSSUB
+    WORK_DECPGSSUB,
+    WORK_DECAVCODECVACCL
 };
 
 extern hb_filter_object_t hb_filter_detelecine;
@@ -428,6 +429,7 @@ extern hb_filter_object_t hb_filter_denoise;
 extern hb_filter_object_t hb_filter_decomb;
 extern hb_filter_object_t hb_filter_rotate;
 extern hb_filter_object_t hb_filter_crop_scale;
+extern hb_filter_object_t hb_filter_crop_scale_accl;
 extern hb_filter_object_t hb_filter_render_sub;
 extern hb_filter_object_t hb_filter_vfr;
 
diff --git a/libhb/module.defs b/libhb/module.defs
index e5c5650fa..d3d409513 100644
--- a/libhb/module.defs
+++ b/libhb/module.defs
@@ -37,8 +37,12 @@ LIBHB.out += $(LIBHB.a)
 ifeq (1,$(FEATURE.ff.mpeg2))
 LIBHB.GCC.D += USE_FF_MPEG2
 endif
+ifeq (1,$(FEATURE.opencl))
+LIBHB.GCC.D += USE_OPENCL
+endif
 LIBHB.GCC.D += __LIBHB__ USE_PTHREAD
 LIBHB.GCC.I += $(LIBHB.build/) $(CONTRIB.build/)include
+LIBHB.GCC.I += $(AMDAPPSDKROOT)/include
 
 ifeq ($(BUILD.system),cygwin)
     LIBHB.GCC.D += SYS_CYGWIN
@@ -131,6 +135,9 @@ LIBHB.GCC.l += ws2_32
 ifeq ($(HAS.dlfcn),1)
     LIBHB.GCC.l += dl
 endif
+ifeq (1,$(FEATURE.opencl))
+    LIBHB.GCC.l += OpenCL
+endif
 LIBHB.out += $(LIBHB.dll) $(LIBHB.lib)
 
 endif
diff --git a/libhb/oclnv12toyuv.c b/libhb/oclnv12toyuv.c
new file mode 100644
index 000000000..8d49563bf
--- /dev/null
+++ b/libhb/oclnv12toyuv.c
@@ -0,0 +1,222 @@
+/* oclnv12toyuv.c
+
+   Copyright (c) 2003-2012 HandBrake Team
+   This file is part of the HandBrake source code
+   Homepage: <http://handbrake.fr/>.
+   It may be used under the terms of the GNU General Public License v2.
+   For full terms see the file COPYING file or visit http://www.gnu.org/licenses/gpl-2.0.html
+
+   Authors: Peng Gao <peng@multicorewareinc.com> <http://www.multicorewareinc.com/>
+            Li   Cao <li@multicorewareinc.com> <http://www.multicorewareinc.com/>
+ */
+
+#ifdef USE_OPENCL
+#include "vadxva2.h"
+#include "oclnv12toyuv.h"
+
+/**
+  * It creates are opencl bufs w is input frame width, h is input frame height
+*/
+static int hb_nv12toyuv_create_cl_buf( KernelEnv *kenv, int w, int h, hb_va_dxva2_t *dxva2 );
+ 
+/**
+ * It creates are opencl kernel. kernel name is nv12toyuv
+*/
+static int hb_nv12toyuv_create_cl_kernel( KernelEnv *kenv, hb_va_dxva2_t *dxva2 );
+ 
+/**
+ * It set opencl arg, input data,output data, input width, output height
+*/
+static int hb_nv12toyuv_setkernelarg( KernelEnv *kenv, int w, int h, hb_va_dxva2_t *dxva2 );
+ 
+/**
+ * It initialize nv12 to yuv kernel.
+*/
+static int hb_init_nv12toyuv_ocl( KernelEnv *kenv, int w, int h, hb_va_dxva2_t *dxva2 );
+
+/**
+ * Run nv12 to yuv kernel.
+ */
+static int hb_nv12toyuv( void **userdata, KernelEnv *kenv );
+
+/**
+ * register nv12 to yuv kernel.
+ */
+static int hb_nv12toyuv_reg_kernel( void );
+
+
+/**
+ * It creates are opencl bufs w is input frame width, h is input frame height
+ */
+static int hb_nv12toyuv_create_cl_buf( KernelEnv *kenv, int w, int h, hb_va_dxva2_t *dxva2 )
+{
+    cl_int status = CL_SUCCESS;
+    int in_bytes = w*h*3/2;
+    CREATEBUF( dxva2->cl_mem_nv12, CL_MEM_READ_ONLY | CL_MEM_ALLOC_HOST_PTR, in_bytes );
+    CREATEBUF( dxva2->cl_mem_yuv, CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR, in_bytes );
+    return 0;
+}
+/**
+ * It creates are opencl kernel. kernel name is nv12toyuv
+ */
+static int hb_nv12toyuv_create_cl_kernel( KernelEnv *kenv, hb_va_dxva2_t *dxva2 )
+{
+    int ret;
+    dxva2->nv12toyuv = clCreateKernel( kenv->program, "nv12toyuv", &ret );
+    return ret;
+}
+/**
+ * It set opencl arg, input data,output data, input width, output height
+ */
+static int hb_nv12toyuv_setkernelarg( KernelEnv *kenv, int w, int h, hb_va_dxva2_t *dxva2 )
+{
+    int arg = 0, status;
+    kenv->kernel = dxva2->nv12toyuv;
+    OCLCHECK( clSetKernelArg, kenv->kernel, arg++, sizeof(cl_mem), &dxva2->cl_mem_nv12 );
+    OCLCHECK( clSetKernelArg, kenv->kernel, arg++, sizeof(cl_mem), &dxva2->cl_mem_yuv );
+    OCLCHECK( clSetKernelArg, kenv->kernel, arg++, sizeof(int), &w );
+    OCLCHECK( clSetKernelArg, kenv->kernel, arg++, sizeof(int), &h );
+    return 0;
+}
+/**
+ * It initialize nv12 to yuv kernel.
+ */
+static int hb_init_nv12toyuv_ocl( KernelEnv *kenv, int w, int h, hb_va_dxva2_t *dxva2 )
+{
+    if( !dxva2->nv12toyuv )
+    {
+        if( hb_nv12toyuv_create_cl_buf( kenv, w, h, dxva2 ) )
+        {
+            hb_log( "nv12toyuv_create_cl_buf fial" );
+            return -1;
+        }
+        if (!dxva2->nv12toyuv_tmp_in) 
+            dxva2->nv12toyuv_tmp_in = malloc (w*h*3/2);
+        if (!dxva2->nv12toyuv_tmp_out) 
+            dxva2->nv12toyuv_tmp_out = malloc (w*h*3/2);
+        hb_nv12toyuv_create_cl_kernel( kenv, dxva2 );
+    }
+    return 0;
+}
+
+/**
+ * Run nv12 to yuv kernel.
+ */
+static int hb_nv12toyuv( void **userdata, KernelEnv *kenv )
+{
+    int status;
+    int w = (int)userdata[0];
+    int h = (int)userdata[1];
+    uint8_t *bufi1 = userdata[2];
+    int *crop = userdata[3];
+    hb_va_dxva2_t *dxva2 = userdata[4];
+
+    uint8_t *bufi2 = userdata[5];
+    int p = (int)userdata[6];
+    int i;
+    if( hb_init_nv12toyuv_ocl( kenv, w, h, dxva2 ) )
+        return -1;
+
+    if( hb_nv12toyuv_setkernelarg( kenv, w, h, dxva2 ) )
+        return -1;
+
+    int in_bytes = w*h*3/2;
+    if( kenv->isAMD )
+    {
+        void *data = clEnqueueMapBuffer( kenv->command_queue, dxva2->cl_mem_nv12, CL_MAP_WRITE_INVALIDATE_REGION, CL_TRUE, 0, in_bytes, 0, NULL, NULL, NULL );
+        //memcpy( data, bufi, in_bytes );
+        for ( i = 0; i < dxva2->height; i++ )
+        {
+            memcpy( data+i*dxva2->width, bufi1+i*p, dxva2->width );
+            if ( i<dxva2->height>>1 )
+                memcpy( data+(dxva2->width*dxva2->height)+i*dxva2->width, bufi2+i*p, dxva2->width );
+        }
+        clEnqueueUnmapMemObject( kenv->command_queue, dxva2->cl_mem_nv12, data, 0, NULL, NULL );
+    }
+    else
+    {
+        uint8_t *tmp = (uint8_t*)malloc( dxva2->width*dxva2->height*3/2 );
+        for( i = 0; i < dxva2->height; i++ )
+        {
+            memcpy( tmp+i*dxva2->width, bufi1+i*p, dxva2->width );
+            if( i<dxva2->height>>1 )
+                memcpy( tmp+(dxva2->width*dxva2->height)+i*dxva2->width, bufi2+i*p, dxva2->width );
+        }
+        OCLCHECK( clEnqueueWriteBuffer, kenv->command_queue, dxva2->cl_mem_nv12, CL_TRUE, 0, in_bytes, tmp, 0, NULL, NULL );
+        free( tmp );
+    }
+
+    size_t gdim[2] = {w>>1, h>>1};
+    OCLCHECK( clEnqueueNDRangeKernel, kenv->command_queue, kenv->kernel, 2, NULL, gdim, NULL, 0, NULL, NULL );
+
+    if( crop[0] || crop[1] || crop[2] || crop[3] )
+    {
+        AVPicture           pic_in;
+        AVPicture           pic_crop;
+        clEnqueueReadBuffer( kenv->command_queue, dxva2->cl_mem_yuv, CL_TRUE, 0, in_bytes, dxva2->nv12toyuv_tmp_out, 0, NULL, NULL );
+        hb_buffer_t *in = hb_video_buffer_init( w, h );
+        memcpy( in->plane[0].data, dxva2->nv12toyuv_tmp_out, w * h );
+        memcpy( in->plane[1].data, dxva2->nv12toyuv_tmp_out + w * h, ( w * h )>>2 );
+        memcpy( in->plane[2].data, dxva2->nv12toyuv_tmp_out + w * h + ( ( w * h )>>2 ), ( w * h )>>2 );
+        hb_avpicture_fill( &pic_in, in );
+        av_picture_crop( &pic_crop, &pic_in, in->f.fmt, crop[0], crop[2] );
+        int i, ww = w - ( crop[2] + crop[3] ), hh = h - ( crop[0] + crop[1] );
+        for( i = 0; i< hh >> 1; i++ )
+        {
+            memcpy( dxva2->nv12toyuv_tmp_in + ( ( i<<1 ) + 0 ) * ww, pic_crop.data[0]+ ( ( i<<1 ) + 0 ) * pic_crop.linesize[0], ww );
+            memcpy( dxva2->nv12toyuv_tmp_in + ( ( i<<1 ) + 1 ) * ww, pic_crop.data[0]+ ( ( i<<1 ) + 1 ) * pic_crop.linesize[0], ww );
+            memcpy( dxva2->nv12toyuv_tmp_in + ( ww * hh ) + i * ( ww>>1 ), pic_crop.data[1] + i * pic_crop.linesize[1], ww >> 1 );
+            memcpy( dxva2->nv12toyuv_tmp_in + ( ww * hh ) + ( ( ww * hh )>>2 ) + i * ( ww>>1 ), pic_crop.data[2] + i * pic_crop.linesize[2], ww >> 1 );
+        }
+        if( kenv->isAMD )
+        {
+            void *data = clEnqueueMapBuffer( kenv->command_queue, dxva2->cl_mem_yuv, CL_MAP_WRITE_INVALIDATE_REGION, CL_TRUE, 0, ww * hh * 3 / 2, 0, NULL, NULL, NULL );
+            memcpy( data, dxva2->nv12toyuv_tmp_in, ww * hh * 3 / 2 );
+            clEnqueueUnmapMemObject( kenv->command_queue, dxva2->cl_mem_yuv, data, 0, NULL, NULL );
+        }
+        else
+        {
+            OCLCHECK( clEnqueueWriteBuffer, kenv->command_queue, dxva2->cl_mem_yuv, CL_TRUE, 0, in_bytes, dxva2->nv12toyuv_tmp_in, 0, NULL, NULL );
+        }
+        hb_buffer_close( &in );
+    }
+    return 0;
+}
+/**
+ * register nv12 to yuv kernel.
+ */
+static int hb_nv12toyuv_reg_kernel( void )
+{
+    int st = hb_register_kernel_wrapper( "nv12toyuv", hb_nv12toyuv );
+    if( !st )
+    {
+        hb_log( "register kernel[%s] faild\n", "nv12toyuv" );
+        return -1;
+    }
+    return 0;
+}
+/**
+ * nv12 to yuv interface
+ * bufi is input frame of nv12, w is input frame width, h is input frame height
+ */
+int hb_ocl_nv12toyuv( uint8_t *bufi[], int p, int w, int h, int *crop, hb_va_dxva2_t *dxva2 )
+//int hb_ocl_nv12toyuv( uint8_t *bufi, int w, int h, int *crop, hb_va_dxva2_t *dxva2 )
+{
+    void *userdata[7];
+    userdata[0] = (void*)w;
+    userdata[1] = (void*)h;
+    userdata[2] = bufi[0];
+    userdata[3] = crop;
+    userdata[4] = dxva2;
+    userdata[5] = bufi[1];
+    userdata[6] = (void*)p;
+    if( hb_nv12toyuv_reg_kernel() )
+        return -1;
+    if( hb_run_kernel( "nv12toyuv", userdata ) )
+    {
+        printf( "run kernel[nv12toyuv] faild\n" );
+        return -1;
+    }
+    return 0;
+}
+#endif
diff --git a/libhb/oclnv12toyuv.h b/libhb/oclnv12toyuv.h
new file mode 100644
index 000000000..3307b8efe
--- /dev/null
+++ b/libhb/oclnv12toyuv.h
@@ -0,0 +1,29 @@
+/* oclnv12toyuv.h
+
+    Copyright (c) 2003-2012 HandBrake Team
+   This file is part of the HandBrake source code
+   Homepage: <http://handbrake.fr/>.
+   It may be used under the terms of the GNU General Public License v2.
+   For full terms see the file COPYING file or visit http://www.gnu.org/licenses/gpl-2.0.html
+   
+   Authors: Peng Gao <peng@multicorewareinc.com> <http://www.multicorewareinc.com/>
+            Li   Cao <li@multicorewareinc.com> <http://www.multicorewareinc.com/>
+
+ */
+
+#ifdef USE_OPENCL
+#ifndef RENDER_CL_H
+#define RENDER_CL_H
+#include "CL/cl.h"
+#include "common.h"
+#include "openclwrapper.h"
+
+/**
+ * nv12 to yuv interface
+ * bufi is input frame of nv12, w is input frame width, h is input frame height
+ */
+//int hb_ocl_nv12toyuv( uint8_t *bufi, int w, int h, int *crop, hb_va_dxva2_t *dxva2 );
+int hb_ocl_nv12toyuv( uint8_t *bufi[], int p, int w, int h, int *crop, hb_va_dxva2_t *dxva2 );
+
+#endif
+#endif
diff --git a/libhb/oclscale.c b/libhb/oclscale.c
new file mode 100644
index 000000000..084e04b13
--- /dev/null
+++ b/libhb/oclscale.c
@@ -0,0 +1,301 @@
+/* oclscale.c
+
+   Copyright (c) 2003-2012 HandBrake Team
+   This file is part of the HandBrake source code
+   Homepage: <http://handbrake.fr/>.
+   It may be used under the terms of the GNU General Public License v2.
+   For full terms see the file COPYING file or visit http://www.gnu.org/licenses/gpl-2.0.html
+
+   Authors: Peng Gao <peng@multicorewareinc.com> <http://www.multicorewareinc.com/>
+            Li   Cao <li@multicorewareinc.com> <http://www.multicorewareinc.com/>
+
+ */
+
+#ifdef USE_OPENCL
+
+#include <math.h>
+#include "common.h"
+#include "openclwrapper.h"
+#define MaxFilterLength 16
+#define FILTER_LEN 4
+
+inline double  hb_fit_gauss_kernel( double x )
+{
+    double powNum = -1 * M_PI;
+
+    powNum *= x;
+
+    powNum *= x;
+
+    return exp( powNum );
+}
+/**
+ * Using gaussian algorithm to calculate the scale filter
+ */
+static void hb_set_gauss_interpolation( float *pcoeff, int *pmappedindex, int targetdatalength, int srcdatalength, int filterLength, float bias )
+{
+    int i, j;
+
+    float gausskernel[MaxFilterLength];
+
+    int half = filterLength / 2;
+
+    float scalerate = (float)(srcdatalength) / targetdatalength;
+
+    for( i = 0; i < targetdatalength; ++i )
+    {
+        float flindex = i * scalerate + bias;
+
+        if( flindex > (srcdatalength - 1))
+        {
+            flindex -= (int)(flindex - (srcdatalength - 1));
+        }
+
+        int srcindex = (int)(flindex);
+
+        float t = flindex - srcindex;
+
+        for( j = 0; j < (int)half; j++ )
+        {
+            gausskernel[j] = (float)hb_fit_gauss_kernel((half - j) - 1 + t );
+        }
+
+        for( j = 0; j < (int)half; j++ )
+        {
+            gausskernel[half + j] = (float)hb_fit_gauss_kernel( j + 1 - t );
+        }
+
+        while( srcindex < (int)half - 1 )
+        {
+            /* -1 0 1  2
+            * M1 S P1 P2
+            *
+            * if srcindex is 0, M1 and S will be the same sample.  To keep the
+            * convolution kernel from having to check for edge conditions, move
+            * srcindex to 1, then slide down the coefficients
+            */
+            srcindex += 1;
+
+            gausskernel[0] += gausskernel[1];
+
+            for( j = 1; j < filterLength - 1; j++ )
+            {
+                gausskernel[j] = gausskernel[j + 1];
+            }
+
+            gausskernel[filterLength - 1] = 0;
+        }
+
+        while( srcindex >= srcdatalength - half )
+        {
+            /* If srcindex is near the edge, shift down srcindex and slide up
+            * the coefficients
+            */
+            srcindex -= 1;
+
+            gausskernel[3] += gausskernel[2];
+
+            for( j = filterLength - 2; j > 0; j-- )
+            {
+                gausskernel[j] = gausskernel[j - 1];
+            }
+
+            gausskernel[0] = 0;
+        }
+
+        *pmappedindex++ = srcindex - half + 1;
+
+        // Store normalized Gauss kernel
+
+        float sumtemp = 0;
+
+        for( j = 0; j < filterLength; ++j )
+        {
+            sumtemp += gausskernel[j];
+        }
+
+        for( j = 0; j < filterLength; ++j )
+        {
+            pcoeff[targetdatalength * j + i] = gausskernel[j] / sumtemp;
+        }
+    }
+}
+/**
+* executive scale using opencl
+* get filter args
+* create output buffer
+* create horizontal filter buffer
+* create vertical filter buffer
+* create  kernels
+*/
+int hb_ocl_scale_func( void **data, KernelEnv *kenv )
+{
+    cl_int status;
+
+    uint8_t *in_frame = data[0];
+    uint8_t *out_frame = data[1];
+    int in_frame_w = (int)data[2];
+    int in_frame_h = (int)data[3];
+    int out_frame_w = (int)data[4];
+    int out_frame_h = (int)data[5];
+    hb_oclscale_t  *os = data[6];
+
+    if( os->use_ocl_mem )
+        os->h_in_buf = data[0];
+    int h_filter_len = FILTER_LEN;
+    int v_filter_len = FILTER_LEN;
+    //it will make the psnr lower when filter length is 4 in the condition that the video width is shorter than 960 and width is shorter than 544,so we set the filter length to 2
+    if( out_frame_w <= 960 && out_frame_h <= 544 ) 
+    {
+        h_filter_len>>=1;
+        v_filter_len>>=1;
+    }
+    if( !os->h_out_buf )
+    {
+        hb_log( "Scaling With OpenCL\n" );
+        //malloc filter args
+        float *hf_y, *hf_uv, *vf_y, *vf_uv;
+        int   *hi_y, *hi_uv, *vi_y, *vi_uv;
+        hf_y =  (float*)malloc( sizeof(float)*out_frame_w * h_filter_len );
+        hf_uv = (float*)malloc( sizeof(float)*(out_frame_w>>1) * h_filter_len );
+        hi_y =  (int*)malloc( sizeof(int)*out_frame_w );
+        hi_uv = (int*)malloc( sizeof(int)*(out_frame_w>>1));
+        vf_y =  (float*)malloc( sizeof(float)*out_frame_h * v_filter_len );
+        vf_uv = (float*)malloc( sizeof(float)*(out_frame_h>>1) * v_filter_len );
+        vi_y =  (int*)malloc( sizeof(int)*out_frame_h );
+        vi_uv = (int*)malloc( sizeof(int)*(out_frame_h>>1) );
+        //get filter args
+        hb_set_gauss_interpolation( hf_y, hi_y, out_frame_w, in_frame_w, h_filter_len, 0 );
+        hb_set_gauss_interpolation( hf_uv, hi_uv, out_frame_w>>1, in_frame_w>>1, h_filter_len, 0 );
+        hb_set_gauss_interpolation( vf_y, vi_y, out_frame_h, in_frame_h, v_filter_len, 0 );
+        hb_set_gauss_interpolation( vf_uv, vi_uv, out_frame_h>>1, in_frame_h>>1, v_filter_len, 0 );
+        //create output buffer
+        if( !os->use_ocl_mem )
+        {
+            CREATEBUF( os->h_in_buf, CL_MEM_ALLOC_HOST_PTR | CL_MEM_READ_ONLY, sizeof(uint8_t)* in_frame_w * in_frame_h*3/2 );
+        }
+        CREATEBUF( os->h_out_buf, CL_MEM_WRITE_ONLY, sizeof(uint8_t) * out_frame_w * in_frame_h*3/2 );
+        CREATEBUF( os->v_out_buf, CL_MEM_WRITE_ONLY, sizeof(uint8_t) * out_frame_w * out_frame_h*3/2 );
+        //create horizontal filter buffer
+        CREATEBUF( os->h_coeff_y, CL_MEM_READ_ONLY, sizeof(float) * out_frame_w * h_filter_len );
+        CREATEBUF( os->h_coeff_uv, CL_MEM_READ_ONLY, sizeof(float) * (out_frame_w>>1) * h_filter_len );
+        CREATEBUF( os->h_index_y, CL_MEM_READ_ONLY, sizeof(int) * out_frame_w );
+        CREATEBUF( os->h_index_uv, CL_MEM_READ_ONLY, sizeof(int) * (out_frame_w>>1) );
+        OCLCHECK( clEnqueueWriteBuffer, kenv->command_queue, os->h_coeff_y, CL_TRUE, 0, sizeof(float) * out_frame_w * h_filter_len, hf_y, 0, NULL, NULL );
+        OCLCHECK( clEnqueueWriteBuffer, kenv->command_queue, os->h_coeff_uv, CL_TRUE, 0, sizeof(float) * (out_frame_w>>1) * h_filter_len, hf_uv, 0, NULL, NULL );
+        OCLCHECK( clEnqueueWriteBuffer, kenv->command_queue, os->h_index_y, CL_TRUE, 0, sizeof(int) * out_frame_w, hi_y, 0, NULL, NULL );
+        OCLCHECK( clEnqueueWriteBuffer, kenv->command_queue, os->h_index_uv, CL_TRUE, 0, sizeof(int) * (out_frame_w>>1), hi_uv, 0, NULL, NULL );
+        //create vertical filter buffer
+        CREATEBUF( os->v_coeff_y, CL_MEM_READ_ONLY, sizeof(float) * out_frame_h * v_filter_len );
+        CREATEBUF( os->v_coeff_uv, CL_MEM_READ_ONLY, sizeof(float) * (out_frame_h>>1) * v_filter_len );
+        CREATEBUF( os->v_index_y, CL_MEM_READ_ONLY, sizeof(int) * out_frame_h );
+        CREATEBUF( os->v_index_uv, CL_MEM_READ_ONLY, sizeof(int) * (out_frame_h>>1) );
+        OCLCHECK( clEnqueueWriteBuffer, kenv->command_queue, os->v_coeff_y, CL_TRUE, 0, sizeof(float) * out_frame_h * v_filter_len, vf_y, 0, NULL, NULL );
+        OCLCHECK( clEnqueueWriteBuffer, kenv->command_queue, os->v_coeff_uv, CL_TRUE, 0, sizeof(float) * (out_frame_h>>1) * v_filter_len, vf_uv, 0, NULL, NULL );
+        OCLCHECK( clEnqueueWriteBuffer, kenv->command_queue, os->v_index_y, CL_TRUE, 0, sizeof(int) * out_frame_h, vi_y, 0, NULL, NULL );
+        OCLCHECK( clEnqueueWriteBuffer, kenv->command_queue, os->v_index_uv, CL_TRUE, 0, sizeof(int) * (out_frame_h>>1), vi_uv, 0, NULL, NULL );
+        //create horizontal kernel
+        os->h_kernel = clCreateKernel( kenv->program, "frame_h_scale", NULL );
+        OCLCHECK( clSetKernelArg, os->h_kernel, 1, sizeof(cl_mem), &os->h_coeff_y );
+        OCLCHECK( clSetKernelArg, os->h_kernel, 2, sizeof(cl_mem), &os->h_coeff_uv );
+        OCLCHECK( clSetKernelArg, os->h_kernel, 3, sizeof(cl_mem), &os->h_index_y );
+        OCLCHECK( clSetKernelArg, os->h_kernel, 4, sizeof(cl_mem), &os->h_index_uv );
+        OCLCHECK( clSetKernelArg, os->h_kernel, 6, sizeof(int), &in_frame_w );
+        OCLCHECK( clSetKernelArg, os->h_kernel, 7, sizeof(int), &h_filter_len );
+        //create vertical kernel
+        os->v_kernel = clCreateKernel( kenv->program, "frame_v_scale", NULL );
+        OCLCHECK( clSetKernelArg, os->v_kernel, 1, sizeof(cl_mem), &os->v_coeff_y );
+        OCLCHECK( clSetKernelArg, os->v_kernel, 2, sizeof(cl_mem), &os->v_coeff_uv );
+        OCLCHECK( clSetKernelArg, os->v_kernel, 3, sizeof(cl_mem), &os->v_index_y );
+        OCLCHECK( clSetKernelArg, os->v_kernel, 4, sizeof(cl_mem), &os->v_index_uv );
+        OCLCHECK( clSetKernelArg, os->v_kernel, 6, sizeof(int), &in_frame_h );
+        OCLCHECK( clSetKernelArg, os->v_kernel, 7, sizeof(int), &v_filter_len );
+        free( hf_y );
+        free( hf_uv );
+        free( vf_y );
+        free( vf_uv );
+        free( hi_y );
+        free( hi_uv );
+        free( vi_y );
+        free( vi_uv );
+    }
+    //start horizontal scaling kernel
+
+    if( !os->use_ocl_mem )
+    {
+        if( kenv->isAMD )
+        {
+            char *mapped = clEnqueueMapBuffer( kenv->command_queue, os->h_in_buf, CL_TRUE, CL_MAP_WRITE_INVALIDATE_REGION, 0, sizeof(uint8_t) * in_frame_w * in_frame_h*3/2, 0, NULL, NULL, NULL );
+            memcpy( mapped, in_frame, sizeof(uint8_t) * in_frame_w * in_frame_h*3/2 );
+            clEnqueueUnmapMemObject( kenv->command_queue, os->h_in_buf, mapped, 0, NULL, NULL );
+        }
+        else
+        {
+            OCLCHECK( clEnqueueWriteBuffer, kenv->command_queue, os->h_in_buf, CL_TRUE, 0, sizeof(uint8_t) * in_frame_w * in_frame_h * 3/2, in_frame, 0, NULL, NULL );
+        }
+    }
+
+    kenv->kernel = os->h_kernel;
+    size_t dims[2];
+    dims[0] = out_frame_w;
+    dims[1] = in_frame_h;
+    OCLCHECK( clSetKernelArg, kenv->kernel, 0, sizeof(cl_mem), &os->h_in_buf );
+    OCLCHECK( clSetKernelArg, kenv->kernel, 5, sizeof(cl_mem), &os->h_out_buf );
+    OCLCHECK( clEnqueueNDRangeKernel, kenv->command_queue, kenv->kernel, 2, NULL, dims, NULL, 0, NULL, NULL );
+    //start vertical scaling kernel
+
+    kenv->kernel = os->v_kernel;
+    dims[0] = out_frame_w;
+    dims[1] = out_frame_h;
+    OCLCHECK( clSetKernelArg, kenv->kernel, 0, sizeof(cl_mem), &os->h_out_buf );
+    OCLCHECK( clSetKernelArg, kenv->kernel, 5, sizeof(cl_mem), &os->v_out_buf );
+    OCLCHECK( clEnqueueNDRangeKernel, kenv->command_queue, kenv->kernel, 2, NULL, dims, NULL, 0, NULL, NULL );
+    OCLCHECK( clEnqueueReadBuffer, kenv->command_queue, os->v_out_buf, CL_TRUE, 0, sizeof(uint8_t) * out_frame_w * out_frame_h * 3/2, out_frame, 0, NULL, NULL );
+
+    return 1;
+}
+/**
+* function describe: this function is used to scaling video frame. it uses the gausi scaling algorithm
+*  parameter:
+*           inputFrameBuffer: the source video frame opencl buffer
+*           outputdata: the destination video frame buffer
+*           inputWidth: the width of the source video frame
+*           inputHeight: the height of the source video frame
+*           outputWidth: the width of destination video frame
+*           outputHeight: the height of destination video frame
+*/
+int hb_ocl_scale( cl_mem in_buf, uint8_t *in_data, uint8_t *out_data, int in_w, int in_h, int out_w, int out_h, hb_oclscale_t  *os )
+{
+    void *data[7];
+    static int init_flag = 0;
+    if( init_flag==0 )
+    {
+        int st = hb_register_kernel_wrapper( "frame_h_scale", hb_ocl_scale_func );
+        if( !st )
+        {
+            printf( "register kernel[%s] faild\n", "frame_h_scale" );
+            return 0;
+        }
+        init_flag++;
+    }
+    if( in_data==NULL )
+    {
+        data[0] = in_buf;
+        os->use_ocl_mem = 1;
+    }
+    else
+    {
+        data[0] = in_data;
+        os->use_ocl_mem  = 0;
+    }
+    data[1] = out_data;
+    data[2] = (void*)in_w;
+    data[3] = (void*)in_h;
+    data[4] = (void*)out_w;
+    data[5] = (void*)out_h;
+    data[6] = os;
+    if( !hb_run_kernel( "frame_h_scale", data ) )
+        printf( "run kernel[%s] faild\n", "frame_scale" );
+    return 0;
+}
+#endif
diff --git a/libhb/openclkernels.h b/libhb/openclkernels.h
new file mode 100644
index 000000000..0ab3014ab
--- /dev/null
+++ b/libhb/openclkernels.h
@@ -0,0 +1,122 @@
+#ifndef USE_EXTERNAL_KERNEL
+
+#define KERNEL( ... )# __VA_ARGS__
+
+char *kernel_src_hscale = KERNEL(
+
+    typedef unsigned char  fixed8;
+
+    kernel void frame_h_scale(
+        global fixed8 *src,
+        global float   *hf_Y,
+        global float   *hf_UV,
+        global int      *hi_Y,
+        global int      *hi_UV,
+        global fixed8 *dst,
+        int                     stride, //src_width
+        int                     filter_len
+        )
+    {
+        int x = get_global_id( 0 );
+        int y = get_global_id( 1 );
+        int width = get_global_size( 0 );
+        int height = get_global_size( 1 );
+        float result_Y = 0, result_U = 0, result_V = 0;
+        int i = 0;
+
+        global fixed8 *src_Y = src;
+        global fixed8 *src_U = src_Y+stride*height;
+        global fixed8 *src_V = src_U+(stride>>1)*(height>>1);
+
+        global fixed8 *dst_Y = dst;
+        global fixed8 *dst_U = dst_Y+width*height;
+        global fixed8 *dst_V = dst_U+(width>>1)*(height>>1);
+
+        int xy = y * width + x;
+        global fixed8 *rowdata_Y = src_Y+(y * stride);
+        for( int i = 0; i<filter_len; i++ )
+        {
+            result_Y += ( hf_Y[x+i*width] * rowdata_Y[hi_Y[x] + i]);
+        }
+        dst_Y[xy] = result_Y;
+
+        if( y<(height>>1) && x<(width>>1) )
+        {
+            int xy = y * (width>>1) + x;
+            global fixed8 *rowdata_U = src_U+(y * (stride>>1));
+            global fixed8 *rowdata_V = src_V+(y * (stride>>1));
+            for( i = 0; i<filter_len; i++ )
+            {
+                result_U += ( hf_UV[x+i*(width>>1)] * rowdata_U[hi_UV[x] + i]);
+                result_V += ( hf_UV[x+i*(width>>1)] * rowdata_V[hi_UV[x] + i]);
+            }
+            dst_U[xy] = result_U;
+            dst_V[xy] = result_V;
+        }
+    }
+    );
+
+char *kernel_src_vscale = KERNEL(
+
+    kernel void frame_v_scale(
+        global fixed8 *src,
+        global float   *vf_Y,
+        global float   *vf_UV,
+        global int      *vi_Y,
+        global int      *vi_UV,
+        global fixed8 *dst,
+        int                      src_height,
+        int                     filter_len
+        )
+    {
+        int x = get_global_id( 0 );
+        int y = get_global_id( 1 );
+        int width = get_global_size( 0 );
+        int height = get_global_size( 1 );
+        float result_Y = 0, result_U = 0, result_V = 0;
+        int i = 0;
+
+        global fixed8 *src_Y = src;
+        global fixed8 *src_U = src_Y+src_height*width;
+        global fixed8 *src_V = src_U+(src_height>>1)*(width>>1);
+
+        global fixed8 *dst_Y = dst;
+        global fixed8 *dst_U = dst_Y+height*width;
+        global fixed8 *dst_V = dst_U+(height>>1)*(width>>1);
+
+        int xy = y * width + x;
+        for( i = 0; i<filter_len; i++ )
+        {
+            result_Y += vf_Y[y+i*height] * src_Y[(vi_Y[y]+i)*width + x];
+        }
+        dst_Y[xy] = result_Y;
+
+        if( y<(height>>1) && x<(width>>1) )
+        {
+            int xy = y * (width>>1) + x;
+            for( i = 0; i<filter_len; i++ )
+            {
+                result_U += vf_UV[y+i*(height>>1)] * src_U[(vi_UV[y] + i) * (width>>1) + x];
+                result_V += vf_UV[y+i*(height>>1)] * src_V[(vi_UV[y] + i) * (width>>1) + x];
+            }
+            dst_U[xy] = result_U;
+            dst_V[xy] = result_V;
+        }
+    }
+    );
+
+char *kernel_src_nvtoyuv = KERNEL(
+
+    kernel void nv12toyuv( global char *input, global char* output, int w, int h )
+    {
+        int x = get_global_id( 0 );
+        int y = get_global_id( 1 );
+        int idx = y * (w>>1) + x;
+        vstore4((vload4( 0, input+(idx<<2))), 0, output+(idx<<2)); //Y
+        char2 uv = vload2( 0, input+(idx<<1)+w*h );
+        output[idx+w*h] = uv.s0;
+        output[idx+w*h+((w*h)>>2)] = uv.s1;
+    }
+    );
+
+#endif
diff --git a/libhb/openclwrapper.c b/libhb/openclwrapper.c
new file mode 100644
index 000000000..7e0195a63
--- /dev/null
+++ b/libhb/openclwrapper.c
@@ -0,0 +1,934 @@
+
+/* openclwrapper.c
+
+   Copyright (c) 2003-2012 HandBrake Team
+   This file is part of the HandBrake source code
+   Homepage: <http://handbrake.fr/>.
+   It may be used under the terms of the GNU General Public License v2.
+   For full terms see the file COPYING file or visit http://www.gnu.org/licenses/gpl-2.0.html
+
+   Authors: Peng Gao <peng@multicorewareinc.com> <http://www.multicorewareinc.com/>
+            Li   Cao <li@multicorewareinc.com> <http://www.multicorewareinc.com/>
+ */
+#ifdef USE_OPENCL
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <windows.h>
+#include "openclwrapper.h"
+#include "openclkernels.h"
+
+//#define USE_EXTERNAL_KERNEL
+
+#if defined(__APPLE__)
+#include <OpenCL/cl.h>
+#else
+#include <CL/cl.h>
+#endif
+
+#if defined(_MSC_VER)
+#define strcasecmp strcmpi
+#endif
+
+#define MAX_KERNEL_STRING_LEN   64
+#define MAX_CLFILE_NUM 50
+#define MAX_CLKERNEL_NUM 200
+#define MAX_CLFILE_PATH 255
+#define MAX_KERNEL_NUM  50
+#define MAX_KERNEL_NAME_LEN 64
+
+#ifndef INVALID_HANDLE_VALUE
+#define INVALID_HANDLE_VALUE NULL
+#endif
+
+//#define THREAD_PRIORITY_TIME_CRITICAL 15
+
+enum VENDOR{
+    AMD = 0,
+    Intel,
+    NVIDIA,
+    others
+};
+typedef struct _GPUEnv
+{
+    //share vb in all modules in hb library
+    cl_platform_id platform;
+    cl_device_type dType;
+    cl_context context;
+    cl_device_id * devices;
+    cl_device_id  dev;
+    cl_command_queue command_queue;
+    cl_kernel kernels[MAX_CLFILE_NUM];
+    cl_program programs[MAX_CLFILE_NUM]; //one program object maps one kernel source file
+    char  kernelSrcFile[MAX_CLFILE_NUM][256];   //the max len of kernel file name is 256
+    int file_count; // only one kernel file
+
+    char kernel_names[MAX_CLKERNEL_NUM][MAX_KERNEL_STRING_LEN+1];
+    cl_kernel_function kernel_functions[MAX_CLKERNEL_NUM];
+    int kernel_count;
+    int isUserCreated; // 1: created , 0:no create and needed to create by opencl wrapper
+    enum VENDOR vendor;
+}GPUEnv;
+
+typedef struct
+{
+    char kernelName[MAX_KERNEL_NAME_LEN+1];
+    char * kernelStr;
+}hb_kernel_node;
+
+static GPUEnv gpu_env;
+static int isInited = 0;
+static hb_kernel_node gKernels[MAX_KERNEL_NUM];
+
+#define ADD_KERNEL_CFG( idx, s, p ){\
+        strcpy( gKernels[idx].kernelName, s );\
+        gKernels[idx].kernelStr = p;\
+        strcpy( gpu_env.kernel_names[idx], s );\
+        gpu_env.kernel_count++; }
+
+
+int hb_regist_opencl_kernel()
+{
+    if( !gpu_env.isUserCreated )
+        memset( &gpu_env, 0, sizeof(gpu_env));
+
+    gpu_env.file_count = 0; //argc;
+    gpu_env.kernel_count = 0UL;
+
+    ADD_KERNEL_CFG( 0, "frame_h_scale", NULL )
+    ADD_KERNEL_CFG( 1, "frame_v_scale", NULL )
+    ADD_KERNEL_CFG( 2, "nv12toyuv", NULL )
+
+    return 0;
+}
+
+int hb_convert_to_string(
+    const char *filename,
+    char **source,
+    GPUEnv *gpu_info,
+    int idx )
+{
+    int file_size;
+    size_t result;
+    FILE * file = NULL;
+    file_size = 0;
+    result = 0;
+    file = fopen( filename, "rb+" );
+
+
+    if( file!=NULL )
+    {
+        fseek( file, 0, SEEK_END );
+
+        file_size = ftell( file );
+        rewind( file );
+        *source = (char*)malloc( sizeof(char) * file_size + 1 );
+        if( *source == (char*)NULL )
+        {
+            return(0);
+        }
+        result = fread( *source, 1, file_size, file );
+        if( result != file_size )
+        {
+            free( *source );
+            return(0);
+        }
+        (*source)[file_size] = '\0';
+        fclose( file );
+
+        return(1);
+    }
+    return(0);
+}
+
+
+
+int hb_binary_generated( cl_context context, const char * cl_file_name, FILE ** fhandle )
+{
+    int i = 0;
+    cl_int status;
+    size_t numDevices;
+    cl_device_id *devices;
+    char * str = NULL;
+    FILE * fd = NULL;
+
+    status = clGetContextInfo( context,
+                               CL_CONTEXT_NUM_DEVICES,
+                               sizeof(numDevices),
+                               &numDevices,
+                               NULL );
+    if( status != CL_SUCCESS )
+	{
+		hb_log( "ERROR: hb_binary_generated: Get context info failed\n" );
+        return 0;
+    }
+
+    devices = (cl_device_id*)malloc( sizeof(cl_device_id) * numDevices );
+    if( devices == NULL )
+	{
+		hb_log( "hb_binary_generated: No device found\n" );
+        return 0;
+    }
+
+    /* grab the handles to all of the devices in the context. */
+    status = clGetContextInfo( context,
+                               CL_CONTEXT_DEVICES,
+                               sizeof(cl_device_id) * numDevices,
+                               devices,
+                               NULL );
+
+    status = 0;
+    /* dump out each binary into its own separate file. */
+    for( i = 0; i < numDevices; i++ )
+    {
+        char fileName[256] = {0};
+        char cl_name[128] = {0};
+        if( devices[i] != 0 )
+        {
+            char deviceName[1024];
+            status = clGetDeviceInfo( devices[i],
+                                      CL_DEVICE_NAME,
+                                      sizeof(deviceName),
+                                      deviceName,
+                                      NULL );
+            str = (char*)strstr( cl_file_name, (char*)".cl" );
+            memcpy( cl_name, cl_file_name, str - cl_file_name );
+            cl_name[str - cl_file_name] = '\0';
+            sprintf( fileName, "./%s-%s.bin", cl_name, deviceName );
+            fd = fopen( fileName, "rb" );
+            status = (fd != NULL) ? 1 : 0;
+        }
+    }
+
+    if( devices != NULL )
+    {
+        free( devices );
+        devices = NULL;
+    }
+
+    if( fd != NULL )
+        *fhandle = fd;
+
+    return status;
+}
+
+int hb_write_binary_to_file( const char* fileName, const char* birary, size_t numBytes )
+{
+    FILE *output = NULL;
+    output = fopen( fileName, "wb" );
+    if( output == NULL )
+        return 0;
+
+    fwrite( birary, sizeof(char), numBytes, output );
+    fclose( output );
+
+    return 1;
+}
+
+
+int hb_generat_bin_from_kernel_source( cl_program program, const char * cl_file_name )
+{
+    int i = 0;
+    cl_int status;
+    size_t *binarySizes, numDevices;
+    cl_device_id *devices;
+    char **binaries;
+    char *str = NULL;
+
+    status = clGetProgramInfo( program,
+                               CL_PROGRAM_NUM_DEVICES,
+                               sizeof(numDevices),
+                               &numDevices,
+                               NULL );
+    if( status != CL_SUCCESS )
+	{
+		hb_log( "ERROR: hb_generat_bin_from_kernel_source: Get program info failed\n" );
+        return 0;
+    }
+    devices = (cl_device_id*)malloc( sizeof(cl_device_id) * numDevices );
+    if( devices == NULL )
+	{
+		hb_log( "ERROR: hb_generat_bin_from_kernel_source: No device found\n" );
+        return 0;
+    }
+    /* grab the handles to all of the devices in the program. */
+    status = clGetProgramInfo( program,
+                               CL_PROGRAM_DEVICES,
+                               sizeof(cl_device_id) * numDevices,
+                               devices,
+                               NULL );
+    if( status != CL_SUCCESS )
+	{
+		hb_log( "ERROR: hb_generat_bin_from_kernel_source: Get program info failed\n" );
+        return 0;
+    }
+    /* figure out the sizes of each of the binaries. */
+    binarySizes = (size_t*)malloc( sizeof(size_t) * numDevices );
+
+    status = clGetProgramInfo( program,
+                               CL_PROGRAM_BINARY_SIZES,
+                               sizeof(size_t) * numDevices,
+                               binarySizes, NULL );
+    if( status != CL_SUCCESS )
+	{
+		hb_log( "ERROR: hb_generat_bin_from_kernel_source: Get program info failed\n" );
+        return 0;
+    }
+    /* copy over all of the generated binaries. */
+    binaries = (char**)malloc( sizeof(char *) * numDevices );
+    if( binaries == NULL )
+	{
+		hb_log( "ERROR: hb_generat_bin_from_kernel_source: malloc for binaries failed\n" );
+        return 0;
+    }
+
+    for( i = 0; i < numDevices; i++ )
+    {
+        if( binarySizes[i] != 0 )
+        {
+            binaries[i] = (char*)malloc( sizeof(char) * binarySizes[i] );
+            if( binaries[i] == NULL )
+			{
+				hb_log( "ERROR: hb_generat_bin_from_kernel_source: malloc for binary[%d] failed\n", i );
+                return 0;
+	    	}
+        }
+        else
+        {
+            binaries[i] = NULL;
+        }
+    }
+
+    status = clGetProgramInfo( program,
+                               CL_PROGRAM_BINARIES,
+                               sizeof(char *) * numDevices,
+                               binaries,
+                               NULL );
+    if( status != CL_SUCCESS )
+	{
+		hb_log( "ERROR: hb_generat_bin_from_kernel_source: Get program info failed\n" );
+        return 0;
+    }
+    /* dump out each binary into its own separate file. */
+    for( i = 0; i < numDevices; i++ )
+    {
+        char fileName[256] = {0};
+        char cl_name[128] = {0};
+        if( binarySizes[i] != 0 )
+        {
+            char deviceName[1024];
+            status = clGetDeviceInfo( devices[i],
+                                      CL_DEVICE_NAME,
+                                      sizeof(deviceName),
+                                      deviceName,
+                                      NULL );
+
+            str = (char*)strstr( cl_file_name, (char*)".cl" );
+            memcpy( cl_name, cl_file_name, str - cl_file_name );
+            cl_name[str - cl_file_name] = '\0';
+            sprintf( fileName, "./%s-%s.bin", cl_name, deviceName );
+
+            if( !hb_write_binary_to_file( fileName, binaries[i], binarySizes[i] ))
+            {
+	    		hb_log( "ERROR: hb_generat_bin_from_kernel_source: write binary[%s] failed\n", fileName );
+                //printf( "opencl-wrapper: write binary[%s] failds\n", fileName);
+                return 0;
+            } //else
+              //printf( "opencl-wrapper: write binary[%s] succesfully\n", fileName);
+        }
+    }
+
+    // Release all resouces and memory
+    for( i = 0; i < numDevices; i++ )
+    {
+        if( binaries[i] != NULL )
+        {
+            free( binaries[i] );
+            binaries[i] = NULL;
+        }
+    }
+
+    if( binaries != NULL )
+    {
+        free( binaries );
+        binaries = NULL;
+    }
+
+    if( binarySizes != NULL )
+    {
+        free( binarySizes );
+        binarySizes = NULL;
+    }
+
+    if( devices != NULL )
+    {
+        free( devices );
+        devices = NULL;
+    }
+    return 1;
+}
+
+
+int hb_init_opencl_attr( OpenCLEnv * env )
+{
+    if( gpu_env.isUserCreated )
+        return 1;
+
+    gpu_env.context = env->context;
+    gpu_env.platform = env->platform;
+    gpu_env.dev = env->devices;
+    gpu_env.command_queue = env->command_queue;
+
+    gpu_env.isUserCreated = 1;
+
+    return 0;
+}
+
+
+int hb_create_kernel( char * kernelname, KernelEnv * env )
+{
+    int status;
+    env->kernel = clCreateKernel( gpu_env.programs[0], kernelname, &status );
+    env->context = gpu_env.context;
+    env->command_queue = gpu_env.command_queue;
+    return status != CL_SUCCESS ? 1 : 0;
+}
+
+int hb_release_kernel( KernelEnv * env )
+{
+    int status = clReleaseKernel( env->kernel );
+    return status != CL_SUCCESS ? 1 : 0;
+}
+
+
+
+int hb_init_opencl_env( GPUEnv *gpu_info )
+{
+    size_t length;
+    cl_int status;
+    cl_uint numPlatforms, numDevices;
+    cl_platform_id *platforms;
+    cl_context_properties cps[3];
+    char platformName[100];
+    unsigned int i;
+    void *handle = INVALID_HANDLE_VALUE;
+
+    /*
+     * Have a look at the available platforms.
+     */
+    if( !gpu_info->isUserCreated )
+    {
+        status = clGetPlatformIDs( 0, NULL, &numPlatforms );
+        if( status != CL_SUCCESS )
+        {
+			hb_log( "ERROR: OpenCL device platform not found.\n" );
+            return(1);
+        }
+        gpu_info->platform = NULL;
+        if( 0 < numPlatforms )
+        {
+            platforms = (cl_platform_id*)malloc(
+                numPlatforms * sizeof(cl_platform_id));
+            if( platforms == (cl_platform_id*)NULL )
+            {
+                return(1);
+            }
+            status = clGetPlatformIDs( numPlatforms, platforms, NULL );
+
+            if( status != CL_SUCCESS )
+            {
+				hb_log( "ERROR: Specific opencl platform not found.\n" );
+                return(1);
+            }
+
+            for( i = 0; i < numPlatforms; i++ )
+            {
+                status = clGetPlatformInfo( platforms[i], CL_PLATFORM_VENDOR,
+                                            sizeof(platformName), platformName,
+                                            NULL );
+
+                if( status != CL_SUCCESS )
+                {
+					hb_log( "ERROR: No more platform vendor info.\n" );
+                    return(1);
+                }
+                gpu_info->platform = platforms[i];
+
+                if( !strcmp( platformName, "Advanced Micro Devices, Inc." ))
+                    gpu_info->vendor = AMD;
+                else 
+                    gpu_info->vendor = others;
+                
+                gpu_info->platform = platforms[i];
+
+                status = clGetDeviceIDs( gpu_info->platform /* platform */,
+                                         CL_DEVICE_TYPE_GPU /* device_type */,
+                                         0 /* num_entries */,
+                                         NULL /* devices */,
+                                         &numDevices );
+
+                if( status != CL_SUCCESS )
+				{
+					hb_log( "ERROR: No available GPU device.\n" );
+                        return(1);
+				}
+
+                if( numDevices )
+                        break;
+                
+            }
+            free( platforms );
+        }
+        if( NULL == gpu_info->platform )
+        {
+            return(1);
+        }
+
+        /*
+         * Use available platform.
+         */
+        cps[0] = CL_CONTEXT_PLATFORM;
+        cps[1] = (cl_context_properties)gpu_info->platform;
+        cps[2] = 0;
+        /* Check for GPU. */
+        gpu_info->dType = CL_DEVICE_TYPE_GPU;
+        gpu_info->context = clCreateContextFromType(
+            cps, gpu_info->dType, NULL, NULL, &status );
+        if((gpu_info->context == (cl_context)NULL) || (status != CL_SUCCESS))
+        {
+            gpu_info->dType = CL_DEVICE_TYPE_CPU;
+            gpu_info->context = clCreateContextFromType(
+                cps, gpu_info->dType, NULL, NULL, &status );
+        }
+        if((gpu_info->context == (cl_context)NULL) || (status != CL_SUCCESS))
+        {
+            gpu_info->dType = CL_DEVICE_TYPE_DEFAULT;
+            gpu_info->context = clCreateContextFromType(
+                cps, gpu_info->dType, NULL, NULL, &status );
+        }
+        if((gpu_info->context == (cl_context)NULL) || (status != CL_SUCCESS))
+        {
+			hb_log( "ERROR: Create opencl context error.\n" );
+            return(1);
+        }
+        /* Detect OpenCL devices. */
+        /* First, get the size of device list data */
+        status = clGetContextInfo( gpu_info->context, CL_CONTEXT_DEVICES,
+                                   0, NULL, &length );
+        if((status != CL_SUCCESS) || (length == 0))
+        {
+			hb_log( "ERROR: Get the list of devices in context error.\n" );
+            return(1);
+        }
+        /* Now allocate memory for device list based on the size we got earlier */
+        gpu_info->devices = (cl_device_id*)malloc( length );
+        if( gpu_info->devices == (cl_device_id*)NULL )
+        {
+            return(1);
+        }
+        /* Now, get the device list data */
+        status = clGetContextInfo( gpu_info->context, CL_CONTEXT_DEVICES, length,
+                                   gpu_info->devices, NULL );
+        if( status != CL_SUCCESS )
+        {
+			hb_log( "ERROR: Get the device list data in context error.\n" );
+            return(1);
+        }
+
+        /* Create OpenCL command queue. */
+        gpu_info->command_queue = clCreateCommandQueue( gpu_info->context,
+                                                        gpu_info->devices[0],
+                                                        0, &status );
+        if( status != CL_SUCCESS )
+		{
+			hb_log( "ERROR: Create opencl command queue error.\n" );
+            return(1);
+		}
+    }
+
+    /* Create OpenCL command queue. */
+    /*if(!gpu_info->isUserCreated)
+        gpu_info->command_queue = clCreateCommandQueue(gpu_info->context,
+                                                   gpu_info->devices[0],
+                                                   0, &status);
+    else
+        gpu_info->command_queue = clCreateCommandQueue(gpu_info->context,
+                                                   gpu_info->dev,
+                                                   0, &status);
+
+    if ((gpu_info->command_queue == (cl_command_queue) NULL))
+       return(1);
+    */
+
+
+    if( clGetCommandQueueInfo( gpu_info->command_queue,
+                               CL_QUEUE_THREAD_HANDLE_AMD, sizeof(handle),
+                               &handle, NULL ) == CL_SUCCESS && handle != INVALID_HANDLE_VALUE )
+    {
+        SetThreadPriority( handle, THREAD_PRIORITY_TIME_CRITICAL );
+    }
+
+    return 0;
+}
+
+
+int hb_release_opencl_env( GPUEnv *gpu_info )
+{
+    if( !isInited )
+        return 1;
+    int i;
+
+    for( i = 0; i<gpu_env.file_count; i++ )
+    {
+        if( gpu_env.programs[i] ) ;
+        {
+            clReleaseProgram( gpu_env.programs[i] );
+            gpu_env.programs[i] = NULL;
+        }
+    }
+    if( gpu_env.command_queue )
+    {
+        clReleaseCommandQueue( gpu_env.command_queue );
+        gpu_env.command_queue = NULL;
+    }
+    if( gpu_env.context )
+    {
+        clReleaseContext( gpu_env.context );
+        gpu_env.context = NULL;
+    }
+    isInited = 0;
+    gpu_info->isUserCreated = 0;
+    return 1;
+}
+
+
+int hb_register_kernel_wrapper( const char *kernel_name, cl_kernel_function function )
+{
+    int i;
+    for( i = 0; i < gpu_env.kernel_count; i++ )
+    {
+        if( strcasecmp( kernel_name, gpu_env.kernel_names[i] )==0 )
+        {
+            gpu_env.kernel_functions[i] = function;
+            return(1);
+        }
+    }
+    return(0);
+}
+
+int hb_cached_of_kerner_prg( const GPUEnv *gpu_env, const char * cl_file_name )
+{
+    int i;
+    for( i = 0; i < gpu_env->file_count; i++ )
+    {
+        if( strcasecmp( gpu_env->kernelSrcFile[i], cl_file_name )==0 )
+        {
+            if( gpu_env->programs[i] != NULL )
+                return(1);
+        }
+    }
+
+    return(0);
+}
+
+int hb_compile_kernel_file( const char *filename, GPUEnv *gpu_info,
+                            int indx, const char *build_option )
+{
+    cl_int status;
+    size_t length;
+    char *source_str;
+    const char *source;
+    size_t source_size[1];
+    char *buildLog = NULL;
+    int b_error, binary_status, binaryExisted;
+    char * binary;
+    size_t numDevices;
+    cl_device_id *devices;
+    FILE * fd;
+    FILE * fd1;
+    int idx;
+
+    if( hb_cached_of_kerner_prg( gpu_info, filename ) == 1 )
+        return (1);
+
+    idx = gpu_info->file_count;
+
+#ifdef USE_EXTERNAL_KERNEL
+    status = hb_convert_to_string( filename, &source_str, gpu_info, idx );
+#else
+    int kernel_src_size = strlen( kernel_src_hscale )+strlen( kernel_src_vscale )+strlen( kernel_src_nvtoyuv );
+    source_str = (char*)malloc( kernel_src_size+2 );
+    strcpy( source_str, kernel_src_hscale );
+    strcat( source_str, kernel_src_vscale );
+    strcat( source_str, kernel_src_nvtoyuv );
+#endif
+
+    if( status == 0 )
+        return(0);
+
+    source = source_str;
+    source_size[0] = strlen( source );
+
+    binaryExisted = 0;
+    if((binaryExisted = hb_binary_generated( gpu_info->context, filename, &fd )) == 1 )
+    {
+        status = clGetContextInfo( gpu_info->context,
+                                   CL_CONTEXT_NUM_DEVICES,
+                                   sizeof(numDevices),
+                                   &numDevices,
+                                   NULL );
+        if( status != CL_SUCCESS ){
+			hb_log( "ERROR: Get the number of devices in context error.\n" );
+            return 0;
+		}
+
+        devices = (cl_device_id*)malloc( sizeof(cl_device_id) * numDevices );
+        if( devices == NULL )
+            return 0;
+
+        b_error = 0;
+        length = 0;
+        b_error |= fseek( fd, 0, SEEK_END ) < 0;
+        b_error |= ( length = ftell( fd ) ) <= 0;
+        b_error |= fseek( fd, 0, SEEK_SET ) < 0;
+        if( b_error )
+            return 0;
+
+        binary = (char*)malloc( length+2 );
+        if( !binary )
+            return 0;
+
+        memset( binary, 0, length+2 );
+        b_error |= fread( binary, 1, length, fd ) != length;
+        if( binary[length-1] != '\n' )
+            binary[length++] = '\n';
+
+        fclose( fd );
+        fd = NULL;
+        /* grab the handles to all of the devices in the context. */
+        status = clGetContextInfo( gpu_info->context,
+                                   CL_CONTEXT_DEVICES,
+                                   sizeof(cl_device_id) * numDevices,
+                                   devices,
+                                   NULL );
+
+        gpu_info->programs[idx] = clCreateProgramWithBinary( gpu_info->context,
+                                                             numDevices,
+                                                             devices,
+                                                             &length,
+                                                             (const unsigned char**)&binary,
+                                                             &binary_status,
+                                                             &status );
+
+        free( devices );
+        devices = NULL;
+    }
+    else
+    {
+        /* create a CL program using the kernel source */
+        gpu_info->programs[idx] = clCreateProgramWithSource(
+            gpu_info->context, 1, &source, source_size, &status );
+    }
+
+    if((gpu_info->programs[idx] == (cl_program)NULL) || (status != CL_SUCCESS)){
+		hb_log( "ERROR: Get list of devices in context error.\n" );
+        return(0);
+	}
+
+    /* create a cl program executable for all the devices specified */
+    if( !gpu_info->isUserCreated )
+        status = clBuildProgram( gpu_info->programs[idx], 1, gpu_info->devices,
+                                 build_option, NULL, NULL );
+    else
+        status = clBuildProgram( gpu_info->programs[idx], 1, &(gpu_info->dev),
+                                 build_option, NULL, NULL );
+
+    if( status != CL_SUCCESS )
+    {
+        if( !gpu_info->isUserCreated )
+            status = clGetProgramBuildInfo( gpu_info->programs[idx],
+                                            gpu_info->devices[0],
+                                            CL_PROGRAM_BUILD_LOG, 0, NULL, &length );
+        else
+            status = clGetProgramBuildInfo( gpu_info->programs[idx],
+                                            gpu_info->dev,
+                                            CL_PROGRAM_BUILD_LOG, 0, NULL, &length );
+
+        if( status != CL_SUCCESS )
+        {
+			hb_log( "ERROR: Get GPU build information error.\n" );
+            return(0);
+        }
+        buildLog = (char*)malloc( length );
+        if( buildLog == (char*)NULL )
+        {
+            return(0);
+        }
+        if( !gpu_info->isUserCreated )
+            status = clGetProgramBuildInfo( gpu_info->programs[idx], gpu_info->devices[0],
+                                            CL_PROGRAM_BUILD_LOG, length, buildLog, &length );
+        else
+            status = clGetProgramBuildInfo( gpu_info->programs[idx], gpu_info->dev,
+                                            CL_PROGRAM_BUILD_LOG, length, buildLog, &length );
+
+        fd1 = fopen( "kernel-build.log", "w+" );
+        if( fd1 != NULL ) {
+            fwrite( buildLog, sizeof(char), length, fd1 );
+            fclose( fd1 );
+        }
+
+        free( buildLog );
+        return(0);
+    }
+
+    strcpy( gpu_env.kernelSrcFile[idx], filename );
+
+    if( binaryExisted == 0 )
+        hb_generat_bin_from_kernel_source( gpu_env.programs[idx], filename );
+
+    gpu_info->file_count += 1;
+
+    return(1);
+}
+
+
+int hb_get_kernel_env_and_func( const char *kernel_name,
+                                KernelEnv *env,
+                                cl_kernel_function *function )
+{
+    int i; //,program_idx ;
+    for( i = 0; i < gpu_env.kernel_count; i++ )
+    {
+        if( strcasecmp( kernel_name, gpu_env.kernel_names[i] )==0 )
+        {
+            //program_idx = 0;
+            //GetProgramIndex(i, &gpu_env, &program_idx);
+            env->context = gpu_env.context;
+            env->command_queue = gpu_env.command_queue;
+            env->program = gpu_env.programs[0];
+            env->kernel = gpu_env.kernels[i];
+            env->isAMD = ( gpu_env.vendor == AMD ) ? 1 : 0;
+            *function = gpu_env.kernel_functions[i];
+            return(1);
+        }
+    }
+    return(0);
+}
+
+
+int hb_run_kernel( const char *kernel_name, void **userdata )
+{
+    KernelEnv env;
+    cl_kernel_function function;
+    int status;
+    memset( &env, 0, sizeof(KernelEnv));
+    status = hb_get_kernel_env_and_func( kernel_name, &env, &function );
+    strcpy( env.kernel_name, kernel_name );
+    if( status == 1 )
+        return(function( userdata, &env ));
+    return(0);
+}
+
+
+int hb_init_opencl_run_env( int argc, char **argv, const char *build_option )
+{
+    int status = 0;
+    if( MAX_CLKERNEL_NUM <= 0 )
+        return 1;
+    if((argc > MAX_CLFILE_NUM) || (argc<0))
+        return 1;
+
+    if( !isInited )
+    {
+        hb_regist_opencl_kernel();
+
+        /*initialize devices, context, comand_queue*/
+        status = hb_init_opencl_env( &gpu_env );
+        if( status )
+            return(1);
+
+        /*initialize program, kernel_name, kernel_count*/
+        //file_name = argv[i];
+        status = hb_compile_kernel_file( "hb-kernels.cl", &gpu_env, 0, build_option );
+
+        if( status == 0 || gpu_env.kernel_count == 0 )
+        {
+            return(1);
+
+        }
+
+        isInited = 1;
+    }
+
+    return(0);
+}
+
+
+int hb_release_opencl_run_env()
+{
+    return hb_release_opencl_env( &gpu_env );
+}
+
+
+int hb_opencl_stats()
+{
+    return isInited;
+}
+
+int hb_get_opencl_env()
+{
+    int i = 0;
+    cl_int status;
+    size_t numDevices;
+    cl_device_id *devices;
+	/*initialize devices, context, comand_queue*/
+    status = hb_init_opencl_env( &gpu_env );
+    if( status )
+        return(1);
+    status = clGetContextInfo( gpu_env.context,
+                               CL_CONTEXT_NUM_DEVICES,
+                               sizeof(numDevices),
+                               &numDevices,
+                               NULL );
+    if( status != CL_SUCCESS )
+        return 0;
+    devices = (cl_device_id*)malloc( sizeof(cl_device_id) * numDevices );
+    if( devices == NULL )
+        return 0;
+    /* grab the handles to all of the devices in the context. */
+    status = clGetContextInfo( gpu_env.context,
+                               CL_CONTEXT_DEVICES,
+                               sizeof(cl_device_id) * numDevices,
+                               devices,
+                               NULL );
+    status = 0;
+    /* dump out each binary into its own separate file. */
+    for( i = 0; i < numDevices; i++ )
+    {
+        if( devices[i] != 0 )
+        {
+            char deviceName[1024];
+            status = clGetDeviceInfo( devices[i],
+                                      CL_DEVICE_NAME,
+                                      sizeof(deviceName),
+                                      deviceName,
+                                      NULL );
+            hb_log( "GPU Device Name: %s", deviceName );
+			char driverVersion[1024];
+            status = clGetDeviceInfo( devices[i],
+                                      CL_DRIVER_VERSION,
+                                      sizeof(deviceName),
+                                      driverVersion,
+                                      NULL );
+            hb_log( "GPU Driver Version: %s", driverVersion );
+        }
+    }
+    if( devices != NULL )
+    {
+        free( devices );
+        devices = NULL;
+    }
+    return status;
+}
+#endif
diff --git a/libhb/openclwrapper.h b/libhb/openclwrapper.h
new file mode 100644
index 000000000..933e7a3b3
--- /dev/null
+++ b/libhb/openclwrapper.h
@@ -0,0 +1,79 @@
+/* openclwrapper.h
+
+   Copyright (c) 2003-2012 HandBrake Team
+   This file is part of the HandBrake source code
+   Homepage: <http://handbrake.fr/>.
+   It may be used under the terms of the GNU General Public License v2.
+   For full terms see the file COPYING file or visit http://www.gnu.org/licenses/gpl-2.0.html
+
+   Authors: Peng Gao <peng@multicorewareinc.com> <http://www.multicorewareinc.com/>
+            Li   Cao <li@multicorewareinc.com> <http://www.multicorewareinc.com/>
+
+
+ */
+#ifndef __OPENCL_WRAPPER_H
+#define __OPENCL_WRAPPER_H
+#include "common.h"
+
+//support AMD opencl
+#define CL_QUEUE_THREAD_HANDLE_AMD 0x403E
+#define CL_MAP_WRITE_INVALIDATE_REGION (1 << 2)
+
+typedef struct _KernelEnv
+{
+#ifdef USE_OPENCL
+    cl_context context;
+    cl_command_queue command_queue;
+    cl_program program;
+    cl_kernel kernel;
+#endif
+    char kernel_name[150];
+    int isAMD;
+}KernelEnv;
+
+typedef struct _OpenCLEnv
+{
+#ifdef USE_OPENCL
+    cl_platform_id platform;
+    cl_context   context;
+    cl_device_id devices;
+    cl_command_queue command_queue;
+#endif
+}OpenCLEnv;
+
+
+//user defined, this is function wrapper which is used to set the input parameters ,
+//luanch kernel and copy data from GPU to CPU or CPU to GPU.
+typedef int (*cl_kernel_function)( void **userdata, KernelEnv *kenv );
+
+// registe a wapper for running the kernel specified by the kernel name
+int hb_register_kernel_wrapper( const char *kernel_name, cl_kernel_function function );
+
+// run kernel , user call this function to luanch kernel.
+// kernel_name: this kernel name is used to find the kernel in opencl runtime environment
+// userdata: this userdata is the all parameters for running the kernel specified by kernel name
+int hb_run_kernel( const char *kernel_name, void **userdata );
+
+// init the run time environment , this function must be called befor calling any function related to opencl
+// the argc must be set zero , argv must be set NULL, build_option is the options for build the kernel.
+int hb_init_opencl_run_env( int argc, char **argv, const char *build_option );
+
+
+//relase all resource about the opencl , this function must be called after calling any functions related to opencl
+int hb_release_opencl_run_env();
+
+
+// get the opencl status , 0: not init ; 1, inited; this function is used the check whether or not the opencl run time has been created
+int hb_opencl_stats();
+
+// update opencl run time environments , such as commandqueue , platforme, context. program
+int hb_init_opencl_attr( OpenCLEnv * env );
+
+// create kernel object  by a kernel name on the specified opencl run time indicated by env parameter
+int hb_create_kernel( char * kernelname, KernelEnv * env );
+
+// release kernel object which is generated by calling the hb_create_kernel api
+int hb_release_kernel( KernelEnv * env );
+
+int hb_get_opencl_env();
+#endif
diff --git a/libhb/stream.c b/libhb/stream.c
index 02a49e20c..bf3f726a6 100644
--- a/libhb/stream.c
+++ b/libhb/stream.c
@@ -16,6 +16,7 @@
 #include "lang.h"
 #include "a52dec/a52.h"
 #include "libbluray/bluray.h"
+#include "vadxva2.h"
 
 #define min(a, b) a < b ? a : b
 #define HB_MAX_PROBE_SIZE (1*1024*1024)
@@ -609,6 +610,8 @@ static int hb_stream_get_type(hb_stream_t *stream)
 
     if ( fread(buf, 1, sizeof(buf), stream->file_handle) == sizeof(buf) )
     {
+        if ( hb_get_gui_info(&hb_gui, 1) || (hb_get_gui_info(&hb_gui, 3) == 0) )
+            return 0;
         int psize;
         if ( ( psize = hb_stream_check_for_ts(buf) ) != 0 )
         {
@@ -1096,7 +1099,24 @@ hb_title_t * hb_stream_title_scan(hb_stream_t *stream, hb_title_t * title)
     {
         hb_log( "transport stream missing PCRs - using video DTS instead" );
     }
-
+    if ( hb_get_gui_info(&hb_gui, 3) == 0 )
+    {
+	    hb_va_dxva2_t * dxva2 = NULL;
+	    dxva2 = hb_va_create_dxva2( dxva2, title->video_codec_param );
+	    if (dxva2)
+	    {
+	        title->uvd_support = 1;
+	        hb_va_close(dxva2);
+	        dxva2 = NULL;
+	    }
+	    else
+	        title->uvd_support = 0;
+#ifdef USE_OPENCL
+	    title->opencl_support = TestGPU();
+#else
+	    title->opencl_support = 1;
+#endif
+    }
     // Height, width, rate and aspect ratio information is filled in
     // when the previews are built
     return title;
@@ -5624,6 +5644,24 @@ static hb_title_t *ffmpeg_title_scan( hb_stream_t *stream, hb_title_t *title )
         hb_list_add( title->list_chapter, chapter );
     }
 
+    if ( hb_get_gui_info(&hb_gui, 3) == 0 )
+    {
+	    hb_va_dxva2_t * dxva2 = NULL;
+	    dxva2 = hb_va_create_dxva2( dxva2, title->video_codec_param );
+	    if (dxva2)
+	    {
+	        title->uvd_support = 1;
+	        hb_va_close(dxva2);
+	        dxva2 = NULL;
+	    }
+	    else
+	        title->uvd_support = 0;
+#ifdef USE_OPENCL
+	    title->opencl_support = TestGPU();
+#else
+	    title->opencl_support = 1;
+#endif
+    }
     return title;
 }
 
diff --git a/libhb/vadxva2.c b/libhb/vadxva2.c
new file mode 100644
index 000000000..e31105f7f
--- /dev/null
+++ b/libhb/vadxva2.c
@@ -0,0 +1,827 @@
+/* vadxva2.c
+
+   Copyright (c) 2003-2012 HandBrake Team
+   This file is part of the HandBrake source code
+   Homepage: <http://handbrake.fr/>.
+   It may be used under the terms of the GNU General Public License v2.
+   For full terms see the file COPYING file or visit http://www.gnu.org/licenses/gpl-2.0.html
+
+   Authors: Peng Gao <peng@multicorewareinc.com> <http://www.multicorewareinc.com/>
+            Li   Cao <li@multicorewareinc.com> <http://www.multicorewareinc.com/>
+
+ */
+
+#include "vadxva2.h"
+#include "CL/cl.h"
+#include "oclnv12toyuv.h"
+
+static int   hb_va_setup( hb_va_dxva2_t *dxva2, void **hw, int width, int height );
+static int   hb_va_get( hb_va_dxva2_t *dxva2, AVFrame *frame );
+static int   hb_d3d_create_device( hb_va_dxva2_t *dxva2 );
+static void  hb_d3d_destroy_device( hb_va_dxva2_t *dxvva2 );
+static int   hb_d3d_create_device_manager( hb_va_dxva2_t *dxva2 );
+static void  hb_d3d_destroy_device_manager( hb_va_dxva2_t *dxva2 );
+static int   hb_dx_create_video_service( hb_va_dxva2_t *dxva2 );
+static void  hb_dx_destroy_video_service( hb_va_dxva2_t *dxva2 );
+static int   hb_dx_find_video_service_conversion( hb_va_dxva2_t *dxva2, GUID *input, D3DFORMAT *output );
+static int   hb_dx_create_video_decoder( hb_va_dxva2_t *dxva2, int codec_id, const  hb_title_t* fmt );
+static void  hb_dx_create_video_conversion( hb_va_dxva2_t *dxva2 );
+static const hb_d3d_format_t *hb_d3d_find_format( D3DFORMAT format );
+static const hb_dx_mode_t *hb_dx_find_mode( const GUID *guid );
+static void hb_dx_destroy_video_decoder( hb_va_dxva2_t *dxva2 );
+
+#ifdef USE_OPENCL
+int TestGPU()
+{
+    int status = 1;
+    unsigned int i;
+    cl_uint numPlatforms = 0; 
+    status = clGetPlatformIDs(0,NULL,&numPlatforms); 
+    if(status != 0) 
+    { 
+        goto end; 
+    } 
+    if(numPlatforms > 0) 
+    { 
+        cl_platform_id* platforms = (cl_platform_id* )malloc (numPlatforms* sizeof(cl_platform_id)); 
+        status = clGetPlatformIDs (numPlatforms, platforms,NULL); 
+        if(status != 0) 
+        { 
+            goto end; 
+        } 
+        for (i=0; i < numPlatforms; i++) 
+        { 
+            char pbuff[100]; 
+            cl_uint numDevices;
+            status = clGetPlatformInfo( 
+                platforms[i], 
+                CL_PLATFORM_VENDOR, 
+                sizeof (pbuff), 
+                pbuff,
+                NULL); 
+             if (status) 
+		continue; 
+             status = clGetDeviceIDs(platforms[i], 
+                                    CL_DEVICE_TYPE_GPU , 
+                                    0 , 
+                                    NULL , 
+                                    &numDevices); 
+             if (status != CL_SUCCESS)
+                 continue;
+             if(numDevices) 
+                   break; 
+        } 
+        free(platforms); 
+    } 
+    end:
+    return status;
+}
+#endif
+/**
+ * It destroys a Direct3D device manager
+ */
+static void hb_d3d_destroy_device_manager( hb_va_dxva2_t *dxva2 )
+{
+    if( dxva2->devmng )
+        IDirect3DDeviceManager9_Release( dxva2->devmng );
+}
+/**
+ * It releases a Direct3D device and its resources.
+ */
+static void hb_d3d_destroy_device( hb_va_dxva2_t *dxva2 )
+{
+    if( dxva2->d3ddev )
+        IDirect3DDevice9_Release( dxva2->d3ddev );
+    if( dxva2->d3dobj )
+        IDirect3D9_Release( dxva2->d3dobj );
+}
+/**
+ * It destroys a DirectX video service
+ */
+static void hb_dx_destroy_video_service( hb_va_dxva2_t *dxva2 )
+{
+    if( dxva2->device )
+        IDirect3DDeviceManager9_CloseDeviceHandle( dxva2->devmng, dxva2->device );
+
+    if( dxva2->vs )
+        IDirectXVideoDecoderService_Release( dxva2->vs );
+}
+
+static const hb_d3d_format_t *hb_d3d_find_format( D3DFORMAT format )
+{
+    unsigned i;
+    for( i = 0; d3d_formats[i].name; i++ )
+    {
+        if( d3d_formats[i].format == format )
+            return &d3d_formats[i];
+    }
+    return NULL;
+}
+
+static void hb_dx_create_video_conversion( hb_va_dxva2_t *dxva2 )
+{
+    switch( dxva2->render )
+    {
+        case MAKEFOURCC( 'N', 'V', '1', '2' ):
+            dxva2->output = MAKEFOURCC( 'Y', 'V', '1', '2' );
+            break;
+        default:
+            dxva2->output = dxva2->render;
+            break;
+    }
+}
+
+void hb_va_release( hb_va_dxva2_t *dxva2, AVFrame *frame )
+{
+    LPDIRECT3DSURFACE9 d3d = (LPDIRECT3DSURFACE9)(uintptr_t)frame->data[3];
+    unsigned i;
+    for( i = 0; i < dxva2->surface_count; i++ )
+    {
+        hb_va_surface_t *surface = &dxva2->surface[i];
+        if( surface->d3d == d3d )
+            surface->refcount--;
+    }
+}
+
+
+void hb_va_close( hb_va_dxva2_t *dxva2 )
+{
+    hb_dx_destroy_video_decoder( dxva2 );
+    hb_dx_destroy_video_service( dxva2 );
+    hb_d3d_destroy_device_manager( dxva2 );
+    hb_d3d_destroy_device( dxva2 );
+
+    if( dxva2->hdxva2_dll )
+        FreeLibrary( dxva2->hdxva2_dll );
+    if( dxva2->hd3d9_dll )
+        FreeLibrary( dxva2->hd3d9_dll );
+
+#ifdef USE_OPENCL
+    if ( dxva2->nv12toyuv_tmp_in )
+        free( dxva2->nv12toyuv_tmp_in );
+    if ( dxva2->nv12toyuv_tmp_out )
+        free( dxva2->nv12toyuv_tmp_out );
+#endif
+    free( dxva2->description );
+    free( dxva2 );
+}
+
+/**
+ * It creates a DXVA2 decoder using the given video format
+ */
+static int hb_dx_create_video_decoder( hb_va_dxva2_t *dxva2, int codec_id, const  hb_title_t* fmt )
+{
+    dxva2->width  = fmt->width;
+    dxva2->height = fmt->height;
+    dxva2->surface_width  = (fmt->width  + 15) & ~15;
+    dxva2->surface_height = (fmt->height + 15) & ~15;
+    switch( codec_id )
+    {
+        case CODEC_ID_H264:
+            dxva2->surface_count = 16 + 1;
+            break;
+        default:
+            dxva2->surface_count = 2 + 1;
+            break;
+    }
+    LPDIRECT3DSURFACE9 surface_list[VA_DXVA2_MAX_SURFACE_COUNT];
+    if( FAILED( IDirectXVideoDecoderService_CreateSurface( dxva2->vs,
+                                                           dxva2->surface_width,
+                                                           dxva2->surface_height,
+                                                           dxva2->surface_count - 1,
+                                                           dxva2->render,
+                                                           D3DPOOL_DEFAULT,
+                                                           0,
+                                                           DXVA2_VideoDecoderRenderTarget,
+                                                           surface_list, NULL )))
+    {
+        hb_log( "dxva2:IDirectXVideoAccelerationService_CreateSurface failed\n" );
+        dxva2->surface_count = 0;
+        return HB_WORK_ERROR;
+    }
+
+    unsigned i;
+    for( i = 0; i<dxva2->surface_count; i++ )
+    {
+        hb_va_surface_t *surface = &dxva2->surface[i];
+        surface->d3d = surface_list[i];
+        surface->refcount = 0;
+        surface->order = 0;
+    }
+    hb_log( "dxva2:CreateSurface succeed with %d, fmt (%dx%d) surfaces (%dx%d)\n", dxva2->surface_count,
+            fmt->width,
+            fmt->height,
+            dxva2->surface_width,
+            dxva2->surface_height );
+    DXVA2_VideoDesc dsc;
+    memset( &dsc, 0, sizeof(dsc));
+    dsc.SampleWidth     = fmt->width;
+    dsc.SampleHeight    = fmt->height;
+    dsc.Format          = dxva2->render;
+
+    if( fmt->rate> 0 && fmt->rate_base> 0 )
+    {
+        dsc.InputSampleFreq.Numerator   = fmt->rate;
+        dsc.InputSampleFreq.Denominator = fmt->rate_base;
+    }
+    else
+    {
+        dsc.InputSampleFreq.Numerator   = 0;
+        dsc.InputSampleFreq.Denominator = 0;
+    }
+
+    dsc.OutputFrameFreq = dsc.InputSampleFreq;
+    dsc.UABProtectionLevel = FALSE;
+    dsc.Reserved = 0;
+
+    /* FIXME I am unsure we can let unknown everywhere */
+    DXVA2_ExtendedFormat *ext = &dsc.SampleFormat;
+    ext->SampleFormat = 0; //DXVA2_SampleUnknown;
+    ext->VideoChromaSubsampling = 0; //DXVA2_VideoChromaSubsampling_Unknown;
+    ext->NominalRange = 0; //DXVA2_NominalRange_Unknown;
+    ext->VideoTransferMatrix = 0; //DXVA2_VideoTransferMatrix_Unknown;
+    ext->VideoLighting = 0; //DXVA2_VideoLighting_Unknown;
+    ext->VideoPrimaries = 0; //DXVA2_VideoPrimaries_Unknown;
+    ext->VideoTransferFunction = 0; //DXVA2_VideoTransFunc_Unknown;
+
+    /* List all configurations available for the decoder */
+    UINT                      cfg_count = 0;
+    DXVA2_ConfigPictureDecode *cfg_list = NULL;
+    if( FAILED( IDirectXVideoDecoderService_GetDecoderConfigurations( dxva2->vs, &dxva2->input, &dsc, NULL, &cfg_count, &cfg_list )))
+    {
+        hb_log( "dxva2:IDirectXVideoDecoderService_GetDecoderConfigurations failed\n" );
+        return HB_WORK_ERROR;
+    }
+    hb_log( "dxva2:we got %d decoder configurations\n", cfg_count );
+
+    /* Select the best decoder configuration */
+    int cfg_score = 0;
+    for( i = 0; i < cfg_count; i++ )
+    {
+        const DXVA2_ConfigPictureDecode *cfg = &cfg_list[i];
+        hb_log( "dxva2:configuration[%d] ConfigBitstreamRaw %d\n", i, cfg->ConfigBitstreamRaw );
+        int score;
+        if( cfg->ConfigBitstreamRaw == 1 )
+            score = 1;
+        else if( codec_id == CODEC_ID_H264 && cfg->ConfigBitstreamRaw == 2 )
+            score = 2;
+        else
+            continue;
+        if( IsEqualGUID( &cfg->guidConfigBitstreamEncryption, &DXVA_NoEncrypt ))
+            score += 16;
+        if( cfg_score < score )
+        {
+            dxva2->cfg = *cfg;
+            cfg_score = score;
+        }
+    }
+    //my_release(cfg_list);
+    if( cfg_score <= 0 )
+    {
+        hb_log( "dxva2:Failed to find a supported decoder configuration\n" );
+        return HB_WORK_ERROR;
+    }
+
+    /* Create the decoder */
+    IDirectXVideoDecoder *decoder;
+    if( FAILED( IDirectXVideoDecoderService_CreateVideoDecoder( dxva2->vs, &dxva2->input, &dsc, &dxva2->cfg, surface_list, dxva2->surface_count, &decoder )))
+    {
+        hb_log( "dxva2:IDirectXVideoDecoderService_CreateVideoDecoder failed\n" );
+        return HB_WORK_ERROR;
+    }
+    dxva2->decoder = decoder;
+    hb_log( "dxva2:IDirectXVideoDecoderService_CreateVideoDecoder succeed\n" );
+    return HB_WORK_OK;
+}
+
+typedef HWND (WINAPI *PROCGETSHELLWND)();
+/**
+ * It creates a DirectX video service
+ */
+static int hb_d3d_create_device( hb_va_dxva2_t *dxva2 )
+{
+    LPDIRECT3D9 (WINAPI *Create9)( UINT SDKVersion );
+    Create9 = (void*)GetProcAddress( dxva2->hd3d9_dll, TEXT( "Direct3DCreate9" ));
+    if( !Create9 )
+    {
+        hb_log( "dxva2:Cannot locate reference to Direct3DCreate9 ABI in DLL\n" );
+        return HB_WORK_ERROR;
+    }
+    LPDIRECT3D9 d3dobj;
+    d3dobj = Create9( D3D_SDK_VERSION );
+    if( !d3dobj )
+    {
+        hb_log( "dxva2:Direct3DCreate9 failed\n" );
+        return HB_WORK_ERROR;
+    }
+    dxva2->d3dobj = d3dobj;
+    D3DADAPTER_IDENTIFIER9 *d3dai = &dxva2->d3dai;
+    if( FAILED( IDirect3D9_GetAdapterIdentifier( dxva2->d3dobj, D3DADAPTER_DEFAULT, 0, d3dai )))
+    {
+        hb_log( "dxva2:IDirect3D9_GetAdapterIdentifier failed\n" );
+        memset( d3dai, 0, sizeof(*d3dai));
+    }
+
+    PROCGETSHELLWND GetShellWindow;
+    HMODULE hUser32 = GetModuleHandle( "user32" );
+    GetShellWindow = (PROCGETSHELLWND)
+                     GetProcAddress( hUser32, "GetShellWindow" );
+
+    D3DPRESENT_PARAMETERS *d3dpp = &dxva2->d3dpp;
+    memset( d3dpp, 0, sizeof(*d3dpp));
+    d3dpp->Flags                  = D3DPRESENTFLAG_VIDEO;
+    d3dpp->Windowed               = TRUE;
+    d3dpp->hDeviceWindow          = NULL;
+    d3dpp->SwapEffect             = D3DSWAPEFFECT_DISCARD;
+    d3dpp->MultiSampleType        = D3DMULTISAMPLE_NONE;
+    d3dpp->PresentationInterval   = D3DPRESENT_INTERVAL_DEFAULT;
+    d3dpp->BackBufferCount        = 0;                  /* FIXME what to put here */
+    d3dpp->BackBufferFormat       = D3DFMT_X8R8G8B8;    /* FIXME what to put here */
+    d3dpp->BackBufferWidth        = 0;
+    d3dpp->BackBufferHeight       = 0;
+    d3dpp->EnableAutoDepthStencil = FALSE;
+
+    LPDIRECT3DDEVICE9 d3ddev;
+    //if (FAILED(IDirect3D9_CreateDevice(d3dobj, D3DADAPTER_DEFAULT, D3DDEVTYPE_HAL, GetShellWindow(), D3DCREATE_SOFTWARE_VERTEXPROCESSING|D3DCREATE_MULTITHREADED, d3dpp, &d3ddev)))
+    if( FAILED( IDirect3D9_CreateDevice( d3dobj,
+                                         D3DADAPTER_DEFAULT,
+                                         D3DDEVTYPE_HAL,
+                                         GetShellWindow(),
+                                         D3DCREATE_HARDWARE_VERTEXPROCESSING|D3DCREATE_MULTITHREADED,
+                                         d3dpp,
+                                         &d3ddev )))
+    {
+        hb_log( "dxva2:IDirect3D9_CreateDevice failed\n" );
+        return HB_WORK_ERROR;
+    }
+    dxva2->d3ddev = d3ddev;
+
+
+    return HB_WORK_OK;
+}
+/**
+ * It creates a Direct3D device manager
+ */
+static int hb_d3d_create_device_manager( hb_va_dxva2_t *dxva2 )
+{
+    HRESULT(WINAPI *CreateDeviceManager9)( UINT *pResetToken, IDirect3DDeviceManager9 ** );
+    CreateDeviceManager9 = (void*)GetProcAddress( dxva2->hdxva2_dll, TEXT( "DXVA2CreateDirect3DDeviceManager9" ));
+
+    if( !CreateDeviceManager9 )
+    {
+        hb_log( "dxva2:cannot load function\n" );
+        return HB_WORK_ERROR;
+    }
+    hb_log( "dxva2:OurDirect3DCreateDeviceManager9 Success!\n" );
+
+    UINT token;
+    IDirect3DDeviceManager9 *devmng;
+    if( FAILED( CreateDeviceManager9( &token, &devmng )))
+    {
+        hb_log( "dxva2:OurDirect3DCreateDeviceManager9 failed\n" );
+        return HB_WORK_ERROR;
+    }
+    dxva2->token  = token;
+    dxva2->devmng = devmng;
+    hb_log( "dxva2:obtained IDirect3DDeviceManager9\n" );
+
+    long hr = IDirect3DDeviceManager9_ResetDevice( devmng, dxva2->d3ddev, token );
+    if( FAILED( hr ))
+    {
+        hb_log( "dxva2:IDirect3DDeviceManager9_ResetDevice failed: %08x\n", (unsigned)hr );
+        return HB_WORK_ERROR;
+    }
+    return HB_WORK_OK;
+}
+/**
+ * It creates a DirectX video service
+ */
+static int hb_dx_create_video_service( hb_va_dxva2_t *dxva2 )
+{
+    HRESULT (WINAPI *CreateVideoService)( IDirect3DDevice9 *, REFIID riid, void **ppService );
+    CreateVideoService = (void*)GetProcAddress( dxva2->hdxva2_dll, TEXT( "DXVA2CreateVideoService" ));
+
+    if( !CreateVideoService )
+    {
+        hb_log( "dxva2:cannot load function\n" );
+        return HB_WORK_ERROR;
+    }
+    hb_log( "dxva2:DXVA2CreateVideoService Success!\n" );
+
+    HRESULT hr;
+
+    HANDLE device;
+    hr = IDirect3DDeviceManager9_OpenDeviceHandle( dxva2->devmng, &device );
+    if( FAILED( hr ))
+    {
+        hb_log( "dxva2:OpenDeviceHandle failed\n" );
+        return HB_WORK_ERROR;
+    }
+    dxva2->device = device;
+
+    IDirectXVideoDecoderService *vs;
+    hr = IDirect3DDeviceManager9_GetVideoService( dxva2->devmng, device, &IID_IDirectXVideoDecoderService, (void*)&vs );
+    if( FAILED( hr ))
+    {
+        hb_log( "dxva2:GetVideoService failed\n" );
+        return HB_WORK_ERROR;
+    }
+    dxva2->vs = vs;
+
+    return HB_WORK_OK;
+}
+/**
+ * Find the best suited decoder mode GUID and render format.
+ */
+static int hb_dx_find_video_service_conversion( hb_va_dxva2_t *dxva2, GUID *input, D3DFORMAT *output )
+{
+    unsigned int input_count = 0;
+    GUID *input_list = NULL;
+    if( FAILED( IDirectXVideoDecoderService_GetDecoderDeviceGuids( dxva2->vs, &input_count, &input_list )))
+    {
+        hb_log( "dxva2:IDirectXVideoDecoderService_GetDecoderDeviceGuids failed\n" );
+        return HB_WORK_ERROR;
+    }
+    unsigned i, j;
+    for( i = 0; i < input_count; i++ )
+    {
+        const GUID *g = &input_list[i];
+        const hb_dx_mode_t *mode = hb_dx_find_mode( g );
+        if( mode )
+        {
+            hb_log( "dxva2:'%s' is supported by hardware\n", mode->name );
+        }
+        else
+        {
+            //hb_log( "- Unknown GUID = %08X-%04x-%04x-XXXX\n", (unsigned)g->Data1, g->Data2, g->Data3);
+        }
+    }
+
+    for( i = 0; dxva2_modes[i].name; i++ )
+    {
+        const hb_dx_mode_t *mode = &dxva2_modes[i];
+        if( !mode->codec || mode->codec != dxva2->codec_id )
+            continue;
+
+        int is_suported = 0;
+        const GUID *g;
+        for( g = &input_list[0]; !is_suported && g < &input_list[input_count]; g++ )
+        {
+            is_suported = IsEqualGUID( mode->guid, g );
+        }
+        if( !is_suported )
+            continue;
+
+        hb_log( "dxva2: Trying to use '%s' as input\n", mode->name );
+        unsigned int output_count = 0;
+        D3DFORMAT *output_list = NULL;
+        if( FAILED( IDirectXVideoDecoderService_GetDecoderRenderTargets( dxva2->vs, mode->guid, &output_count, &output_list )))
+        {
+            hb_log( "dxva2:IDirectXVideoDecoderService_GetDecoderRenderTargets failed" );
+            continue;
+        }
+        for( j = 0; j < output_count; j++ )
+        {
+            const D3DFORMAT f = output_list[j];
+            const hb_d3d_format_t *format = hb_d3d_find_format( f );
+            if( format )
+            {
+                hb_log( "dxva2:%s is supported for output\n", format->name );
+            }
+            else
+            {
+                hb_log( "dxvar2:%d is supported for output (%4.4s)\n", f, (const char*)&f );
+            }
+        }
+
+        for( j = 0; d3d_formats[j].name; j++ )
+        {
+            const hb_d3d_format_t *format = &d3d_formats[j];
+            int is_suported = 0;
+            unsigned k;
+            for( k = 0; !is_suported && k < output_count; k++ )
+            {
+                is_suported = format->format == output_list[k];
+            }
+            if( !is_suported )
+                continue;
+            hb_log( "dxva2:Using '%s' to decode to '%s'\n", mode->name, format->name );
+            *input  = *mode->guid;
+            *output = format->format;
+            return HB_WORK_OK;
+        }
+    }
+    return HB_WORK_ERROR;
+}
+static const hb_dx_mode_t *hb_dx_find_mode( const GUID *guid )
+{
+    unsigned i;
+    for( i = 0; dxva2_modes[i].name; i++ )
+    {
+        if( IsEqualGUID( dxva2_modes[i].guid, guid ))
+            return &dxva2_modes[i];
+    }
+    return NULL;
+}
+
+
+static void hb_dx_destroy_video_decoder( hb_va_dxva2_t *dxva2 )
+{
+    if( dxva2->decoder )
+        IDirectXVideoDecoder_Release( dxva2->decoder );
+    dxva2->decoder = NULL;
+
+    unsigned i;
+    for( i = 0; i<dxva2->surface_count; i++ )
+        IDirect3DSurface9_Release( dxva2->surface[i].d3d );
+    dxva2->surface_count = 0;
+}
+/**
+ * setup dxva2
+*/
+static int hb_va_setup( hb_va_dxva2_t *dxva2, void **hw, int width, int height )
+{
+    if( dxva2->width == width && dxva2->height == height && dxva2->decoder )
+        goto ok;
+
+    hb_dx_destroy_video_decoder( dxva2 );
+    *hw = NULL;
+    dxva2->i_chroma = 0;
+
+    if( width <= 0 || height <= 0 ) return HB_WORK_ERROR;
+
+    hb_title_t fmt;
+    memset( &fmt, 0, sizeof(fmt));
+    fmt.width = width;
+    fmt.height = height;
+
+    if( hb_dx_create_video_decoder( dxva2, dxva2->codec_id, &fmt )==HB_WORK_ERROR )
+        return HB_WORK_ERROR;
+    dxva2->hw.decoder = dxva2->decoder;
+    dxva2->hw.cfg = &dxva2->cfg;
+    dxva2->hw.surface_count = dxva2->surface_count;
+    dxva2->hw.surface = dxva2->hw_surface;
+
+    unsigned i;
+    for( i = 0; i < dxva2->surface_count; i++ )
+        dxva2->hw.surface[i] = dxva2->surface[i].d3d;
+
+    hb_dx_create_video_conversion( dxva2 );
+
+ok:
+    *hw = &dxva2->hw;
+    const hb_d3d_format_t *output = hb_d3d_find_format( dxva2->output );
+    dxva2->i_chroma = output->codec;
+    return HB_WORK_OK;
+
+}
+
+static int hb_va_get( hb_va_dxva2_t *dxva2, AVFrame *frame )
+{
+    /*HRESULT hr = IDirect3DDeviceManager9_TestDevice(dxva2->devmng, dxva2->device);
+    if (hr == DXVA2_E_NEW_VIDEO_DEVICE)
+    {
+        return HB_WORK_ERROR;
+    }
+    else if (FAILED(hr))
+    {
+        hb_log( "dxva2:IDirect3DDeviceManager9_TestDevice %u\n", (unsigned)hr);
+        return HB_WORK_ERROR;
+    }*/
+    unsigned i, old;
+    for( i = 0, old = 0; i < dxva2->surface_count; i++ )
+    {
+        hb_va_surface_t *surface = &dxva2->surface[i];
+        if( !surface->refcount )
+            break;
+        if( surface->order < dxva2->surface[old].order )
+            old = i;
+    }
+    if( i >= dxva2->surface_count )
+        i = old;
+
+    hb_va_surface_t *surface = &dxva2->surface[i];
+
+    surface->refcount = 1;
+    surface->order = dxva2->surface_order++;
+
+    for( i = 0; i < 4; i++ )
+    {
+        frame->data[i] = NULL;
+        frame->linesize[i] = 0;
+        if( i == 0 || i == 3 )
+            frame->data[i] = (void*)surface->d3d;
+    }
+    return HB_WORK_OK;
+}
+/**
+ * nv12 to yuv of c reference
+ */
+static void hb_copy_from_nv12( uint8_t *dst, uint8_t *src[2], size_t src_pitch[2], unsigned width, unsigned height )
+{
+    unsigned int i, j;
+    uint8_t *dstU, *dstV;
+    dstU = dst + width*height;
+    dstV = dstU + width*height/4;
+    unsigned int heithtUV, widthUV;
+    heithtUV = height/2;
+    widthUV = width/2;
+
+    for( i = 0; i<height; i++ ) //Y
+    {
+        memcpy( dst+i*width, src[0]+i*src_pitch[0], width );
+    }
+    for( i = 0; i<heithtUV; i++ )
+    {
+        for( j = 0; j<widthUV; j++ )
+        {
+            dstU[i*widthUV+j] = *(src[1]+i*src_pitch[1]+2*j);
+            dstV[i*widthUV+j] = *(src[1]+i*src_pitch[1]+2*j+1);
+        }
+    }
+}
+/**
+ *  lock frame data form surface.
+ *  nv12 to yuv with opencl and with C reference
+ *  scale with opencl
+ */
+int hb_va_extract( hb_va_dxva2_t *dxva2, uint8_t *dst, AVFrame *frame, int job_w, int job_h, int *crop, hb_oclscale_t *os )
+
+{
+    LPDIRECT3DSURFACE9 d3d = (LPDIRECT3DSURFACE9)(uintptr_t)frame->data[3];
+    D3DLOCKED_RECT lock;
+    if( FAILED( IDirect3DSurface9_LockRect( d3d, &lock, NULL, D3DLOCK_READONLY )))
+    {
+        hb_log( "dxva2:Failed to lock surface\n" );
+        return HB_WORK_ERROR;
+    }
+
+    if( dxva2->render == MAKEFOURCC( 'N', 'V', '1', '2' ))
+    {
+        uint8_t *plane[2] =
+        {
+            lock.pBits,
+            (uint8_t*)lock.pBits + lock.Pitch * dxva2->surface_height
+        };
+        size_t  pitch[2] =
+        {
+            lock.Pitch,
+            lock.Pitch,
+        };
+#ifdef USE_OPENCL
+        if( ( dxva2->width > job_w || dxva2->height > job_h ) && (TestGPU() == 0) )
+        {
+/*          int i;
+            uint8_t *tmp = (uint8_t*)malloc( dxva2->width*dxva2->height*3/2 );
+            for( i = 0; i < dxva2->height; i++ )
+            {
+                memcpy( tmp+i*dxva2->width, plane[0]+i*lock.Pitch, dxva2->width );
+                if( i<dxva2->height>>1 )
+                    memcpy( tmp+(dxva2->width*dxva2->height)+i*dxva2->width, plane[1]+i*lock.Pitch, dxva2->width );
+            }
+*/
+            hb_ocl_nv12toyuv( plane, lock.Pitch,  dxva2->width, dxva2->height, crop, dxva2 );
+            //hb_ocl_nv12toyuv( tmp, dxva2->width, dxva2->height, crop, dxva2 );
+            hb_ocl_scale( dxva2->cl_mem_yuv, NULL, dst, dxva2->width - ( crop[2] + crop[3] ), dxva2->height - ( crop[0] + crop[1] ), job_w, job_h, os );
+            //free( tmp );
+        }
+        else
+#endif
+        {
+            hb_copy_from_nv12( dst, plane, pitch, dxva2->width, dxva2->height );
+        }
+    }
+    IDirect3DSurface9_UnlockRect( d3d );
+
+    return HB_WORK_OK;
+}
+
+/**
+ *  create dxva2 service
+ *  load library D3D9.dll
+ */
+hb_va_dxva2_t * hb_va_create_dxva2( hb_va_dxva2_t *dxva2, int codec_id )
+{
+    if( dxva2 )
+    {
+        hb_va_close( dxva2 );
+        dxva2 = NULL;
+    }
+
+    hb_va_dxva2_t *dxva = calloc( 1, sizeof(*dxva));
+    if( !dxva ) return NULL;
+    dxva->codec_id = codec_id;
+
+    dxva->hd3d9_dll = LoadLibrary( TEXT( "D3D9.DLL" ));
+    if( !dxva->hd3d9_dll )
+    {
+        hb_log( "dxva2:cannot load d3d9.dll" );
+        goto error;
+    }
+    dxva->hdxva2_dll = LoadLibrary( TEXT( "DXVA2.DLL" ));
+    if( !dxva->hdxva2_dll )
+    {
+        hb_log( "dxva2:cannot load DXVA2.dll" );
+        goto error;
+    }
+
+    if( hb_d3d_create_device( dxva ) == HB_WORK_ERROR )
+    {
+        hb_log( "dxva2:Failed to create Direct3D device" );
+        goto error;
+    }
+
+    hb_log( "dxva2:hb_d3d_create_device succeed" );
+    if( hb_d3d_create_device_manager( dxva )== HB_WORK_ERROR )
+    {
+        hb_log( "dxva2:D3dCreateDeviceManager failed" );
+        goto error;
+    }
+
+
+    if( hb_dx_create_video_service( dxva )== HB_WORK_ERROR )
+    {
+        hb_log( "dxva2:DxCreateVideoService failed" );
+        goto error;
+    }
+
+    if( hb_dx_find_video_service_conversion( dxva, &dxva->input, &dxva->render )== HB_WORK_ERROR )
+    {
+        hb_log( "dxva2:DxFindVideoServiceConversion failed" );
+        goto error;
+    }
+
+    dxva->do_job = HB_WORK_OK;
+    dxva->description = "DXVA2";
+
+    return dxva;
+
+error:
+    hb_va_close( dxva );
+    return NULL;
+}
+
+void hb_va_new_dxva2( hb_va_dxva2_t *dxva2, AVCodecContext *p_context )
+{
+    if( p_context->width > 0 && p_context->height > 0 )
+    {
+        if( hb_va_setup( dxva2, &p_context->hwaccel_context, p_context->width, p_context->height )==HB_WORK_ERROR )
+        {
+            hb_log( "dxva2:hb_va_Setup failed" );
+            hb_va_close( dxva2 );
+            dxva2 = NULL;
+        }
+    }
+    if( dxva2 )
+    {
+        dxva2->input_pts[0] = 0;
+        dxva2->input_pts[1] = 0;
+        if( dxva2->description )
+            hb_log(  "dxva2:Using %s for hardware decoding\n", dxva2->description );
+        p_context->draw_horiz_band = NULL;
+    }
+
+}
+enum PixelFormat hb_ffmpeg_get_format( AVCodecContext *p_context, const enum PixelFormat *pi_fmt )
+{
+    int i;
+    static const char *ppsz_name[PIX_FMT_NB] =
+    {
+        [PIX_FMT_VDPAU_H264] = "PIX_FMT_VDPAU_H264",
+        [PIX_FMT_VAAPI_IDCT] = "PIX_FMT_VAAPI_IDCT",
+        [PIX_FMT_VAAPI_VLD] = "PIX_FMT_VAAPI_VLD",
+        [PIX_FMT_VAAPI_MOCO] = "PIX_FMT_VAAPI_MOCO",
+        [PIX_FMT_DXVA2_VLD] = "PIX_FMT_DXVA2_VLD",
+        [PIX_FMT_YUYV422] = "PIX_FMT_YUYV422",
+        [PIX_FMT_YUV420P] = "PIX_FMT_YUV420P",
+    };
+    for( i = 0; pi_fmt[i] != PIX_FMT_NONE; i++ )
+    {
+        hb_log( "dxva2:Available decoder output format %d (%s)", pi_fmt[i], ppsz_name[pi_fmt[i]] ? : "Unknown" );
+        if( pi_fmt[i] == PIX_FMT_DXVA2_VLD )
+        {
+            return pi_fmt[i];
+        }
+    }
+    return avcodec_default_get_format( p_context, pi_fmt );
+}
+
+int hb_va_get_frame_buf( hb_va_dxva2_t *dxva2, AVCodecContext *p_context, AVFrame *frame )
+{
+    /*if( va_setup(dxva2, &p_context->hwaccel_context, p_context->width, p_context->height )==HB_WORK_ERROR)
+    {
+        hb_log("dxva2:hb_va_Setup failed");
+        va_close(dxva2);
+        dxva2 = NULL;
+        return HB_WORK_ERROR;
+    }*/
+    frame->type = FF_BUFFER_TYPE_USER;
+    //frame->age = 256*256*256*64;
+    if( hb_va_get( dxva2, frame ) == HB_WORK_ERROR )
+    {
+        hb_log(  "VaGrabSurface failed\n" );
+        return HB_WORK_ERROR;
+    }
+    return HB_WORK_OK;
+
+}
diff --git a/libhb/vadxva2.h b/libhb/vadxva2.h
new file mode 100644
index 000000000..9f6bf55a7
--- /dev/null
+++ b/libhb/vadxva2.h
@@ -0,0 +1,194 @@
+/* vadxva2.h
+
+   Copyright (c) 2003-2012 HandBrake Team
+   This file is part of the HandBrake source code
+   Homepage: <http://handbrake.fr/>.
+   It may be used under the terms of the GNU General Public License v2.
+   For full terms see the file COPYING file or visit http://www.gnu.org/licenses/gpl-2.0.html
+
+   Authors: Peng Gao <peng@multicorewareinc.com> <http://www.multicorewareinc.com/>
+            Li   Cao <li@multicorewareinc.com> <http://www.multicorewareinc.com/>
+
+ */
+
+
+
+#ifndef VA_DXVA2_H
+#define VA_DXVA2_H
+
+#include "hbffmpeg.h"
+#include "d3d9.h"
+#include "libavcodec/dxva2.h"
+#include "dxva2api.h"
+#include "common.h"
+#include "openclwrapper.h"
+
+#define HB_FOURCC( a, b, c, d ) ( ((uint32_t)a) | ( ((uint32_t)b) << 8 ) | ( ((uint32_t)c) << 16 ) | ( ((uint32_t)d) << 24 ) )
+#define MAKEFOURCC( a, b, c, d ) ((DWORD)(BYTE)(a) | ((DWORD)(BYTE)(b) << 8) | ((DWORD)(BYTE)(c) << 16) | ((DWORD)(BYTE)(d) << 24 ))
+#define HB_CODEC_YV12 HB_FOURCC( 'Y', 'V', '1', '2' )
+#define HB_CODEC_NV12 HB_FOURCC( 'N', 'V', '1', '2' )
+#define DXVA2_E_NOT_INITIALIZED     MAKE_HRESULT( 1, 4, 4096 )
+#define DXVA2_E_NEW_VIDEO_DEVICE    MAKE_HRESULT( 1, 4, 4097 )
+#define DXVA2_E_VIDEO_DEVICE_LOCKED MAKE_HRESULT( 1, 4, 4098 )
+#define DXVA2_E_NOT_AVAILABLE       MAKE_HRESULT( 1, 4, 4099 )
+#define VA_DXVA2_MAX_SURFACE_COUNT (64)
+
+static const GUID DXVA_NoEncrypt = { 0x1b81bed0, 0xa0c7, 0x11d3, {0xb9, 0x84, 0x00, 0xc0, 0x4f, 0x2e, 0x73, 0xc5} };
+static const GUID IID_IDirectXVideoDecoderService = {0xfc51a551, 0xd5e7, 0x11d9, {0xaf, 0x55, 0x00, 0x05, 0x4e, 0x43, 0xff, 0x02}};
+static const GUID DXVA2_ModeMPEG2_MoComp = { 0xe6a9f44b, 0x61b0, 0x4563, {0x9e, 0xa4, 0x63, 0xd2, 0xa3, 0xc6, 0xfe, 0x66} };
+static const GUID DXVA2_ModeMPEG2_IDCT = { 0xbf22ad00, 0x03ea, 0x4690, {0x80, 0x77, 0x47, 0x33, 0x46, 0x20, 0x9b, 0x7e} };
+static const GUID DXVA2_ModeMPEG2_VLD = { 0xee27417f, 0x5e28, 0x4e65, {0xbe, 0xea, 0x1d, 0x26, 0xb5, 0x08, 0xad, 0xc9} };
+static const GUID DXVA2_ModeH264_A = { 0x1b81be64, 0xa0c7, 0x11d3, {0xb9, 0x84, 0x00, 0xc0, 0x4f, 0x2e, 0x73, 0xc5} };
+static const GUID DXVA2_ModeH264_B = { 0x1b81be65, 0xa0c7, 0x11d3, {0xb9, 0x84, 0x00, 0xc0, 0x4f, 0x2e, 0x73, 0xc5} };
+static const GUID DXVA2_ModeH264_C = { 0x1b81be66, 0xa0c7, 0x11d3, {0xb9, 0x84, 0x00, 0xc0, 0x4f, 0x2e, 0x73, 0xc5} };
+static const GUID DXVA2_ModeH264_D = { 0x1b81be67, 0xa0c7, 0x11d3, {0xb9, 0x84, 0x00, 0xc0, 0x4f, 0x2e, 0x73, 0xc5} };
+static const GUID DXVA2_ModeH264_E = { 0x1b81be68, 0xa0c7, 0x11d3, {0xb9, 0x84, 0x00, 0xc0, 0x4f, 0x2e, 0x73, 0xc5} };
+static const GUID DXVA2_ModeH264_F = { 0x1b81be69, 0xa0c7, 0x11d3, {0xb9, 0x84, 0x00, 0xc0, 0x4f, 0x2e, 0x73, 0xc5} };
+static const GUID DXVADDI_Intel_ModeH264_A = { 0x604F8E64, 0x4951, 0x4c54, {0x88, 0xFE, 0xAB, 0xD2, 0x5C, 0x15, 0xB3, 0xD6} };
+static const GUID DXVADDI_Intel_ModeH264_C = { 0x604F8E66, 0x4951, 0x4c54, {0x88, 0xFE, 0xAB, 0xD2, 0x5C, 0x15, 0xB3, 0xD6} };
+static const GUID DXVADDI_Intel_ModeH264_E = { 0x604F8E68, 0x4951, 0x4c54, {0x88, 0xFE, 0xAB, 0xD2, 0x5C, 0x15, 0xB3, 0xD6} };
+static const GUID DXVA2_ModeWMV8_A = { 0x1b81be80, 0xa0c7, 0x11d3, {0xb9, 0x84, 0x00, 0xc0, 0x4f, 0x2e, 0x73, 0xc5} };
+static const GUID DXVA2_ModeWMV8_B = { 0x1b81be81, 0xa0c7, 0x11d3, {0xb9, 0x84, 0x00, 0xc0, 0x4f, 0x2e, 0x73, 0xc5} };
+static const GUID DXVA2_ModeWMV9_A = { 0x1b81be90, 0xa0c7, 0x11d3, {0xb9, 0x84, 0x00, 0xc0, 0x4f, 0x2e, 0x73, 0xc5} };
+static const GUID DXVA2_ModeWMV9_B = { 0x1b81be91, 0xa0c7, 0x11d3, {0xb9, 0x84, 0x00, 0xc0, 0x4f, 0x2e, 0x73, 0xc5} };
+static const GUID DXVA2_ModeWMV9_C = { 0x1b81be94, 0xa0c7, 0x11d3, {0xb9, 0x84, 0x00, 0xc0, 0x4f, 0x2e, 0x73, 0xc5} };
+static const GUID DXVA2_ModeVC1_A = { 0x1b81beA0, 0xa0c7, 0x11d3, {0xb9, 0x84, 0x00, 0xc0, 0x4f, 0x2e, 0x73, 0xc5} };
+static const GUID DXVA2_ModeVC1_B = { 0x1b81beA1, 0xa0c7, 0x11d3, {0xb9, 0x84, 0x00, 0xc0, 0x4f, 0x2e, 0x73, 0xc5} };
+static const GUID DXVA2_ModeVC1_C = { 0x1b81beA2, 0xa0c7, 0x11d3, {0xb9, 0x84, 0x00, 0xc0, 0x4f, 0x2e, 0x73, 0xc5} };
+static const GUID DXVA2_ModeVC1_D = { 0x1b81beA3, 0xa0c7, 0x11d3, {0xb9, 0x84, 0x00, 0xc0, 0x4f, 0x2e, 0x73, 0xc5} };
+
+typedef struct
+{
+    int width;
+    int height;
+    int rate;
+    int rate_base;
+
+}hb_dx_format;
+
+typedef struct
+{
+    LPDIRECT3DSURFACE9              d3d;
+    int                             refcount;
+    unsigned int                    order;
+
+} hb_va_surface_t;
+
+typedef struct
+{
+    uint8_t                         *base;
+    uint8_t                         *buffer;
+    size_t                          size;
+
+} hb_copy_cache_t;
+
+typedef struct
+{
+    const char                      *name;
+    D3DFORMAT                       format;
+    uint32_t                        codec;
+
+} hb_d3d_format_t;
+
+typedef struct
+{
+    const char   *name;
+    const GUID   *guid;
+    int          codec;
+} hb_dx_mode_t;
+
+typedef struct
+{
+    char                            *description;
+    int                             codec_id;
+    uint32_t                        i_chroma;
+    int                             width;
+    int                             height;
+    HINSTANCE                       hd3d9_dll;
+    HINSTANCE                       hdxva2_dll;
+    D3DPRESENT_PARAMETERS           d3dpp;
+    LPDIRECT3D9                     d3dobj;
+    D3DADAPTER_IDENTIFIER9          d3dai;
+    LPDIRECT3DDEVICE9               d3ddev;
+    UINT                            token;
+    IDirect3DDeviceManager9         *devmng;
+    HANDLE                          device;
+    IDirectXVideoDecoderService     *vs;
+    GUID                            input;
+    D3DFORMAT                       render;
+    DXVA2_ConfigPictureDecode       cfg;
+    IDirectXVideoDecoder            *decoder;
+    D3DFORMAT                       output;
+    struct dxva_context             hw;
+    unsigned                        surface_count;
+    unsigned                        surface_order;
+    int                             surface_width;
+    int                             surface_height;
+    uint32_t                        surface_chroma;
+    hb_va_surface_t                 surface[VA_DXVA2_MAX_SURFACE_COUNT];
+    LPDIRECT3DSURFACE9              hw_surface[VA_DXVA2_MAX_SURFACE_COUNT];
+    IDirectXVideoProcessorService   *ps;
+    IDirectXVideoProcessor          *vp;
+    int64_t input_pts[2];
+    int64_t input_dts;
+    int     do_job;
+
+    // running nv12toyuv kernel.
+#ifdef USE_OPENCL
+    cl_kernel nv12toyuv;
+    cl_mem cl_mem_nv12;
+    cl_mem cl_mem_yuv;
+    uint8_t * nv12toyuv_tmp_in;
+    uint8_t * nv12toyuv_tmp_out;
+#endif
+} hb_va_dxva2_t;
+
+
+static const hb_d3d_format_t d3d_formats[] =
+{
+    { "YV12",   MAKEFOURCC( 'Y', 'V', '1', '2' ),    HB_CODEC_YV12 },
+    { "NV12",   MAKEFOURCC( 'N', 'V', '1', '2' ),    HB_CODEC_NV12 },
+    { NULL, 0, 0 }
+};
+
+static const hb_dx_mode_t dxva2_modes[] =
+{
+    { "DXVA2_ModeMPEG2_VLD",    &DXVA2_ModeMPEG2_VLD,     CODEC_ID_MPEG2VIDEO },
+    { "DXVA2_ModeMPEG2_MoComp", &DXVA2_ModeMPEG2_MoComp,  0 },
+    { "DXVA2_ModeMPEG2_IDCT",   &DXVA2_ModeMPEG2_IDCT,    0 },
+
+    { "H.264 variable-length decoder (VLD), FGT",               &DXVA2_ModeH264_F,         CODEC_ID_H264 },
+    { "H.264 VLD, no FGT",                                      &DXVA2_ModeH264_E,         CODEC_ID_H264 },
+    { "H.264 VLD, no FGT (Intel)",                              &DXVADDI_Intel_ModeH264_E, CODEC_ID_H264 },
+    { "H.264 IDCT, FGT",                                        &DXVA2_ModeH264_D,         0             },
+    { "H.264 inverse discrete cosine transform (IDCT), no FGT", &DXVA2_ModeH264_C,         0             },
+    { "H.264 inverse discrete cosine transform (IDCT), no FGT (Intel)", &DXVADDI_Intel_ModeH264_C, 0     },
+    { "H.264 MoComp, FGT",                                      &DXVA2_ModeH264_B,         0             },
+    { "H.264 motion compensation (MoComp), no FGT",             &DXVA2_ModeH264_A,         0             },
+    { "H.264 motion compensation (MoComp), no FGT (Intel)",     &DXVADDI_Intel_ModeH264_A, 0             },
+
+    { "Windows Media Video 8 MoComp",           &DXVA2_ModeWMV8_B, 0 },
+    { "Windows Media Video 8 post processing",  &DXVA2_ModeWMV8_A, 0 },
+
+    {  "Windows Media Video 9 IDCT",            &DXVA2_ModeWMV9_C, 0 },
+    {  "Windows Media Video 9 MoComp",          &DXVA2_ModeWMV9_B, 0 },
+    {  "Windows Media Video 9 post processing", &DXVA2_ModeWMV9_A, 0 },
+
+    { "VC-1 VLD",             &DXVA2_ModeVC1_D, CODEC_ID_VC1 },
+    { "VC-1 VLD",             &DXVA2_ModeVC1_D, CODEC_ID_WMV3 },
+    { "VC-1 IDCT",            &DXVA2_ModeVC1_C, 0 },
+    { "VC-1 MoComp",          &DXVA2_ModeVC1_B, 0 },
+    { "VC-1 post processing", &DXVA2_ModeVC1_A, 0 },
+
+    { NULL, NULL, 0 }
+};
+
+int hb_va_get_frame_buf( hb_va_dxva2_t *dxva2, AVCodecContext *p_context, AVFrame *frame );
+int hb_va_extract( hb_va_dxva2_t *dxva2, uint8_t *dst, AVFrame *frame, int job_w, int job_h, int *crop, hb_oclscale_t *os );
+enum PixelFormat hb_ffmpeg_get_format( AVCodecContext *, const enum PixelFormat * );
+hb_va_dxva2_t *hb_va_create_dxva2( hb_va_dxva2_t *dxva2, int codec_id );
+void hb_va_new_dxva2( hb_va_dxva2_t *dxva2, AVCodecContext *p_context );
+void hb_va_release( hb_va_dxva2_t *dxva2, AVFrame *frame );
+void  hb_va_close( hb_va_dxva2_t *dxva2 );
+
+#endif
diff --git a/libhb/work.c b/libhb/work.c
index 030920c59..d727f1366 100644
--- a/libhb/work.c
+++ b/libhb/work.c
@@ -10,6 +10,7 @@
 #include "hb.h"
 #include "a52dec/a52.h"
 #include "libavformat/avformat.h"
+#include "openclwrapper.h"
 
 typedef struct
 {
@@ -552,7 +553,18 @@ static void do_job( hb_job_t * job )
     job->list_work = hb_list_init();
 
     hb_log( "starting job" );
-
+#ifdef USE_OPENCL
+    if ( job->use_opencl )
+    {
+	    /* init opencl environment */
+        hb_log( "Using GPU : Yes.\n" );
+        job->use_opencl =! hb_init_opencl_run_env(0, NULL, "-I.");
+    }
+    else
+        hb_log( "Using GPU : NO.\n" );
+#else
+    hb_log( "Using GPU : NO.\n" );
+#endif    
     /* Look for the scanned subtitle in the existing subtitle list
      * select_subtitle implies that we did a scan. */
     if( !job->indepth_scan && interjob->select_subtitle )
@@ -699,6 +711,16 @@ static void do_job( hb_job_t * job )
         init.pix_fmt = PIX_FMT_YUV420P;
         init.width = title->width;
         init.height = title->height;
+#ifdef USE_OPENCL
+        init.title_width = title->width;
+        init.title_height = title->height;
+        init.use_dxva = hb_use_dxva( title ); 
+        if ( init.use_dxva && ( title->width > job->width || title->height > job->height ) )
+        {
+            init.width = job->width;
+            init.height = job->height;
+        }
+#endif
         init.par_width = job->anamorphic.par_width;
         init.par_height = job->anamorphic.par_height;
         memcpy(init.crop, job->crop, sizeof(int[4]));
@@ -997,6 +1019,12 @@ static void do_job( hb_job_t * job )
         vcodec = WORK_DECAVCODECV;
         title->video_codec_param = CODEC_ID_MPEG2VIDEO;
     }
+#endif
+#ifdef USE_OPENCL  
+    if ( job->use_opencl && hb_use_dxva( title ) && (TestGPU() == 0) && job->use_uvd )
+    {        
+        vcodec = WORK_DECAVCODECVACCL;
+    }
 #endif
     hb_list_add( job->list_work, ( w = hb_get_work( vcodec ) ) );
     w->codec_param = title->video_codec_param;
diff --git a/make/configure.py b/make/configure.py
index 3c73d8949..598ef754b 100644
--- a/make/configure.py
+++ b/make/configure.py
@@ -1085,7 +1085,9 @@ def createCLI():
     grp.add_option( '--disable-gst', default=False, action='store_true', help=h )
     h = IfHost( 'enable use of ffmpeg mpeg2 decoding', '*-*-*', none=optparse.SUPPRESS_HELP ).value
     grp.add_option( '--enable-ff-mpeg2', default=False, action='store_true', help=h )
-
+    h = IfHost( 'enable OpenCL features', '*-*-*', none=optparse.SUPPRESS_HELP ).value
+    grp.add_option( '--enable-opencl', default=False, action='store_true', help=h )
+    
     cli.add_option_group( grp )
 
     ## add launch options
@@ -1518,6 +1520,7 @@ int main ()
     doc.add( 'FEATURE.gtk.mingw',  int( options.enable_gtk_mingw ))
     doc.add( 'FEATURE.gst',        int( not options.disable_gst ))
     doc.add( 'FEATURE.ff.mpeg2',   int( options.enable_ff_mpeg2 ))
+    doc.add( 'FEATURE.opencl',   int( options.enable_opencl ))
     doc.add( 'FEATURE.xcode',      int( not (Tools.xcodebuild.fail or options.disable_xcode or options.cross) ))
 
     if not Tools.xcodebuild.fail and not options.disable_xcode:
diff --git a/test/module.defs b/test/module.defs
index f057db3cc..246b62a94 100644
--- a/test/module.defs
+++ b/test/module.defs
@@ -43,6 +43,10 @@ else ifeq ($(BUILD.system),solaris)
 else ifeq (1-mingw,$(BUILD.cross)-$(BUILD.system))
 ifeq ($(HAS.dlfcn),1)
     TEST.GCC.l += dl
+endif
+ifeq (1,$(FEATURE.opencl))
+    TEST.GCC.l += OpenCL
+    TEST.GCC.D += USE_OPENCL
 endif
     TEST.GCC.l += pthreadGC2 iconv ws2_32
     TEST.GCC.D += PTW32_STATIC_LIB
diff --git a/test/test case/handbrake_test.txt b/test/test case/handbrake_test.txt
new file mode 100644
index 000000000..47fb0dd8e
--- /dev/null
+++ b/test/test case/handbrake_test.txt	
@@ -0,0 +1,80 @@
+#- automated python test script
+#-
+#- replace the input filename with "infile", replace the output filename with "outfile.m4v"
+#- the python script will replace these with the proper testing input and output file names
+#- 
+#- --start-at frame:0 --stop-at frame:stopframe will be inserted by script, so omit it from the command line here
+#- the names are used as test names and file names
+
+# universal
+-i "infile" -t 1  -o "outfile.m4v"  -f mp4 -w 720 --loose-anamorphic  -e x264 -q 20 --vfr  -a 1,1 -E faac,ffac3 -B 160,0 -6 dpl2,auto -R Auto,Auto -D 0,0 --gain=0,0 --audio-copy-mask none --audio-fallback ffac3 --markers="C:\Users\jgallmeier\AppData\Local\Temp\Blueangels-1-1-chapters.csv" -x cabac=0:ref=2:me=umh:bframes=0:weightp=0:8x8dct=0:trellis=0:subq=6 --verbose=1
+
+# iPod
+-i "infile" -t 1  -o "outfile.m4v"  -f mp4 -I  -w 320 -l 240 -e x264 -b 700 --vfr  -a 1 -E faac -B 160 -6 dpl2 -R Auto -D 0 --gain=0 --audio-copy-mask none --audio-fallback ffac3 --markers="C:\Users\jgallmeier\AppData\Local\Temp\Blueangels-1-1-chapters.csv" -x level=30:bframes=0:weightp=0:cabac=0:ref=1:vbv-maxrate=768:vbv-bufsize=2000:analyse=all:me=umh:no-fast-pskip=1:subq=6:8x8dct=0:trellis=0 --verbose=1
+
+
+# iPhone_iPod_Touch
+ -i "infile" -t 1  -o "outfile.m4v"  -f mp4 -w 480 -l 320 -e x264 -q 20 --vfr  -a 1 -E faac -B 128 -6 dpl2 -R Auto -D 0 --gain=0 --audio-copy-mask none --audio-fallback ffac3 --markers="C:\Users\jgallmeier\AppData\Local\Temp\Blueangels-1-1-chapters.csv" -x cabac=0:ref=2:me=umh:bframes=0:weightp=0:subq=6:8x8dct=0:trellis=0 --verbose=1
+
+
+# iPhone_4
+  -i "infile" -t 1 -o "outfile.m4v"  -f mp4 -4  -w 960 --loose-anamorphic  -e x264 -q 20 -r 29.97 --pfr  -a 1 -E faac -B 160 -6 dpl2 -R Auto -D 0 --gain=0 --audio-copy-mask none --audio-fallback ffac3 --markers="C:\Users\jgallmeier\AppData\Local\Temp\Blueangels-1-1-chapters.csv" --verbose=1
+
+
+# iPad
+
+-i "infile" -t 1 -o "outfile.m4v"  -f mp4 -4  -w 1024 --loose-anamorphic  -e x264 -q 20 -r 29.97 --pfr  -a 1 -E faac -B 160 -6 dpl2 -R Auto -D 0 --gain=0 --audio-copy-mask none --audio-fallback ffac3 --markers="C:\Users\jgallmeier\AppData\Local\Temp\Blueangels-1-1-chapters.csv" --verbose=1
+
+
+# Apple_TV
+
+ -i "infile" -t 1 -o "outfile.m4v"  -f mp4 -4  -w 960 --loose-anamorphic  -e x264 -q 20 --vfr  -a 1,1 -E faac,ffac3 -B 160,0 -6 dpl2,auto -R Auto,Auto -D 0,0 --gain=0,0 --audio-copy-mask none --audio-fallback ffac3 --markers="C:\Users\jgallmeier\AppData\Local\Temp\Blueangels-1-1-chapters.csv" -x cabac=0:ref=2:me=umh:b-pyramid=none:b-adapt=2:weightb=0:trellis=0:weightp=0:vbv-maxrate=9500:vbv-bufsize=9500 --verbose=1
+
+# Apple_TV_2
+-i "infile" -t 1  -o "outfile.m4v"  -f mp4 -4  -w 1280 --loose-anamorphic  -e x264 -q 20 -r 29.97 --pfr  -a 1,1 -E faac,ffac3 -B 160,0 -6 dpl2,auto -R Auto,Auto -D 0,0 --gain=0,0 --audio-copy-mask none --audio-fallback ffac3 --markers="C:\Users\jgallmeier\AppData\Local\Temp\Blueangels-1-1-chapters.csv" --verbose=1
+
+
+# Android_Mid
+-i "infile" -t 1  -o "outfile.m4v"  -f mp4 -w 480 -l 270 -e x264 -q 22 -r 29.97 --pfr  -a 1 -E faac -B 128 -6 dpl2 -R Auto -D 0 --gain=0 --audio-copy-mask none --audio-fallback ffac3 -x cabac=0:ref=2:me=umh:bframes=0:weightp=0:subq=6:8x8dct=0:trellis=0 --verbose=1
+
+
+# Android_High
+ -i "infile" -t 1  -o "outfile.mp4"  -f mp4 -w 720 --loose-anamorphic  -e x264 -q 22 -r 29.97 --pfr  -a 1 -E faac -B 128 -6 dpl2 -R Auto -D 0 --gain=0 --audio-copy-mask none --audio-fallback ffac3 -x weightp=0:cabac=0 --verbose=1
+
+
+# Normal_1080p_to_1080p_fixed_qp
+-i "infile" -t 1 -o "outfile.m4v"  -f mp4 --strict-anamorphic  -e x264 -q 20 --vfr  -a 1 -E faac -B 160 -6 dpl2 -R Auto -D 0 --gain=0 --audio-copy-mask none --audio-fallback ffac3 --markers="C:\Users\jgallmeier\AppData\Local\Temp\Blueangels-1-1-chapters.csv" -x ref=1:weightp=1:subq=2:rc-lookahead=10:trellis=0:8x8dct=0 --verbose=1
+
+
+# Normal_1080p_to_720p_fixed_qp
+-i "infile" -t 1  -o "outfile.m4v"  -f mp4 -w 1280 -l 720 --custom-anamorphic  --display-width 1282  --keep-display-aspect  -e x264 -q 20 --vfr  -a 1 -E faac -B 160 -6 dpl2 -R Auto -D 0 --gain=0 --audio-copy-mask none --audio-fallback ffac3 --markers="C:\Users\jgallmeier\AppData\Local\Temp\Blueangels-1-1-chapters.csv" -x ref=1:weightp=1:subq=2:rc-lookahead=10:trellis=0:8x8dct=0 --verbose=1
+
+
+
+# Normal_1080p_to_1080p_13_mbps
+-i "infile" -t 1  -o "outfile.m4v"  -f mp4 --strict-anamorphic  -e x264 -b 13000 --vfr  -a 1 -E faac -B 160 -6 dpl2 -R Auto -D 0 --gain=0 --audio-copy-mask none --audio-fallback ffac3 --markers="C:\Users\jgallmeier\AppData\Local\Temp\Blueangels-1-1-chapters.csv" -x ref=1:weightp=1:subq=2:rc-lookahead=10:trellis=0:8x8dct=0 --verbose=1
+
+
+# Normal_1080p_to_720p_6_mbps
+ -i "infile" -t 1 -c 1 -o "outfile.m4v"  -f mp4 -w 1280 -l 720 --custom-anamorphic  --display-width 1282  --keep-display-aspect  -e x264 -b 6000 --vfr  -a 1 -E faac -B 160 -6 dpl2 -R Auto -D 0 --gain=0 --audio-copy-mask none --audio-fallback ffac3 --markers="C:\Users\jgallmeier\AppData\Local\Temp\Blueangels-1-1-chapters.csv" -x ref=1:weightp=1:subq=2:rc-lookahead=10:trellis=0:8x8dct=0 --verbose=1
+
+# high_1080p_to_1080p_fixed_qp
+-i "infile" -t 1  -o "high_1080p.m4v"  -f mp4 -4  --detelecine --decomb -w 1920 --loose-anamorphic  -e x264 -q 20 --vfr  -a 1,1 -E faac,ffac3 -B 160,0 -6 dpl2,auto -R Auto,Auto -D 0,0 --gain=0,0 --audio-copy-mask none --audio-fallback ffac3 --markers="C:\Users\jgallmeier\AppData\Local\Temp\Blueangels-1-1-chapters.csv" -x b-adapt=2:rc-lookahead=50 --verbose=1
+
+
+# high_1080p_to_720p_fixed qp
+ -i "infile" -t 1  -o "outfile.m4v"  -f mp4 -4  --detelecine --decomb -w 1280 -l 720 --custom-anamorphic  --display-width 1282  --keep-display-aspect  -e x264 -q 20 --vfr  -a 1,1 -E faac,ffac3 -B 160,0 -6 dpl2,auto -R Auto,Auto -D 0,0 --gain=0,0 --audio-copy-mask none --audio-fallback ffac3 --markers="C:\Users\jgallmeier\AppData\Local\Temp\Blueangels-1-1-chapters.csv" -x b-adapt=2:rc-lookahead=50 --verbose=1
+
+
+# high_1080p_to_1080p_13_mbps
+ -i "infile" -t 1 -o "outfile.m4v"  -f mp4 -4  --detelecine --decomb -w 1920 --loose-anamorphic  -e x264 -b 13000 --vfr  -a 1,1 -E faac,ffac3 -B 160,0 -6 dpl2,auto -R Auto,Auto -D 0,0 --gain=0,0 --audio-copy-mask none --audio-fallback ffac3 --markers="C:\Users\jgallmeier\AppData\Local\Temp\Blueangels-1-1-chapters.csv" -x b-adapt=2:rc-lookahead=50 --verbose=1
+
+
+# high_1080p_to_720p_6mbps
+-i "infile" -t 1  -o "outfile.m4v"  -f mp4 -4  --detelecine --decomb -w 1280 -l 720 --custom-anamorphic  --display-width 1282  --keep-display-aspect  -e x264 -b 6000 --vfr  -a 1,1 -E faac,ffac3 -B 160,0 -6 dpl2,auto -R Auto,Auto -D 0,0 --gain=0,0 --audio-copy-mask none --audio-fallback ffac3 --markers="C:\Users\jgallmeier\AppData\Local\Temp\Blueangels-1-1-chapters.csv" -x b-adapt=2:rc-lookahead=50 --verbose=1
+
+# strange_resolution
+-i "infile" -t 1  -o "outfile.m4v"  -f mp4 -w 480 -l 1078 -e x264 -q 20 --vfr  -a 1 -E faac -B 128 -6 dpl2 -R Auto -D 0 --gain=0 --audio-copy-mask none --audio-fallback ffac3 --markers="C:\Users\jgallmeier\AppData\Local\Temp\Blueangels-1-1-chapters.csv" -x cabac=0:ref=2:me=umh:bframes=0:weightp=0:subq=6:8x8dct=0:trellis=0 --verbose=1
+
+
+#-end of script
diff --git a/test/test case/readme.txt b/test/test case/readme.txt
new file mode 100644
index 000000000..8c182e26c
--- /dev/null
+++ b/test/test case/readme.txt	
@@ -0,0 +1,43 @@
+Handbrake automation scripts
+
+this script is written in Python (2.7). It should work with any open source python distribution. 
+I prefer to use enthought.com's free version of Python. 
+
+To use this script:
+1) copy handbrake_test.txt to the handbrakecli.exe directory
+2) copy run_handbrake.py to the handbrakecli.exe directory
+3) copy the input video files to the handbrakecli.exe directory
+4) in a command window, go to the handbrakecli.exe directory
+5) type:run_handbrake.py and hit return
+
+you should see the command lines and % complete in the command window. Output files are located in the ./output directory. 
+there will be a separate output video file, log file (the handbrake output), and a testresults.csv file
+
+When the test is complete, import the testresults.csv into an Excel spreadsheet. 
+
+Common Problems:
+
+you will see zero fps and psnr if handbrakecli.exe exists with a failure. Common problems are missing video files from the current directory.
+
+
+
+Notes:
+
+The run_handbrake.py is a text file python script. The top of the file contains configuration parameters such as 
+the input video files to use, etc. change these at will.
+
+The handbrake_test.txt is a "script" file that contains the individual tests to run. The format is:
+
+#- is a comment
+
+# name of test (also used as output log file name, so don't use strange characters here
+
+-i this is the handbrakecli.exe options line
+
+
+blank lines are ignored.
+
+To enable UVD decoding, add -P -U to each execution line in handbrake_test.txt.
+Example:
+# universal
+-i "infile" -t 1  -o "outfile.m4v"  -P -U -f mp4 -w 720 --loose-anamorphic  -e x264 -q 20 --vfr  -a 1,1 -E faac,ffac3 -B 160,0 -6 dpl2,auto -R Auto,Auto -D 0,0 --gain=0,0 --audio-copy-mask none --audio-fallback ffac3 --markers="C:\Users\jgallmeier\AppData\Local\Temp\Blueangels-1-1-chapters.csv" -x cabac=0:ref=2:me=umh:bframes=0:weightp=0:8x8dct=0:trellis=0:subq=6 --verbose=1
diff --git a/test/test case/run_handbrake.py b/test/test case/run_handbrake.py
new file mode 100644
index 000000000..5efb0ac63
--- /dev/null
+++ b/test/test case/run_handbrake.py	
@@ -0,0 +1,126 @@
+#!/usr/bin/python
+
+import sys, string, os, subprocess, time
+from multiprocessing import Process
+
+
+##### This script uses as input a 'script file'
+#####  each line of the script file is a handbrakecli.exe options line (without the handbrakecli.exe)
+#####  this is read in, each line is executed and the output results are parsed, pulling out fps, and Global PSNR
+
+inputscript = 'handbrake_test.txt'
+
+
+#### input image files
+
+###inputimagefiles = ['x264.avi']
+inputimagefiles = ['blueangels_20m.m2ps','blueangels.m2ts','BigBuckBunny.VOB']
+#### output directory
+outdir = 'output/'
+
+#### output CSV file
+outcsv = 'testresults.csv'
+
+#### limit to the number of frames processed
+endframe = 2000
+
+
+def run_handbrake(execline, testname):
+	#os.chdir(handbrakedir)
+	#process = subprocess.Popen(execline, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True)
+	proc = subprocess.Popen(execline, bufsize=-1, stderr=subprocess.PIPE, shell=True)
+	
+	[sout, serr] =  proc.communicate()
+	
+	# stdout has the percentage complete for the given task
+	# stderr has all of the output for data collection
+	
+	lines = serr.splitlines();
+	
+	psnr = 0.0
+	for line in lines:
+		if line.find("PSNR Mean") > 0: 
+			if line.find("kb/s") > 0: 
+				x = line.find("Global:")
+				psnr =  float(line[x+7:x+7+6])
+
+
+	
+	# find fps
+	fps = 0.0
+	for line in lines:
+		if line.find("work: average") > 0:
+			if line.find("fps") > 0:
+				x = line.find("job is")
+				y = line.find("fps")
+				fps = float(line[x+6:y])
+	
+	
+	print "fps = " + str(fps) + ", PSNR = " + str(psnr)
+	
+	logFILE = open(outdir + testname + '.log', "w")
+	logFILE.write(serr)
+	logFILE.close()
+	
+	#if process.returncode == None: process.wait()
+	
+	return [psnr, fps]
+	
+	
+
+########## main entry point #######
+def mytask():
+
+	if os.path.exists(outdir) == False:
+		print 'Creating output directory: ' + outdir
+		os.makedirs(outdir)
+	else:
+		print 'Output directory exists'
+
+	scriptFILE = open(inputscript, "r")
+
+	script = scriptFILE.read()
+
+	outFILE = open(outdir + outcsv, "w")
+
+	script_dict = [s for s in script.splitlines() if s]
+
+	# get the order of the tests
+	testname = list()
+	for line in script_dict:
+		if line.startswith('# '):
+			testname.append(line[1:])
+
+	# test code
+	#for item in testname:
+	#	print item
+
+	outFILE.write("input file, handbrake preset, fps, psnr\n")
+
+	for inimg in inputimagefiles:
+		i = iter(testname)
+		[filename, part, fileext] = inimg.partition('.')
+		for line in script_dict:
+			if line.startswith('#') == 0:
+				test = i.next()
+				ex = line.replace("infile", inimg)
+				ex = ex.replace("outfile", outdir+fileext + '_' + test.lstrip())
+				ex = "handbrakecli.exe " + ex			
+				print "-------------" + test + "----------------"
+				print ex
+				[psnr, fps] = run_handbrake(ex, fileext+ '_' + test.lstrip())
+				outstr = inimg + "," + test + "," + str(fps) + "," + str(psnr) + "\n"			
+				outFILE.write(outstr)
+				outFILE.flush()	
+				os.fsync(outFILE.fileno())
+				time.sleep(30)
+				print outstr				
+
+	outFILE.close()
+	print "done"
+		
+if __name__ == '__main__':
+	p = Process(target = mytask)
+	p.start()
+	p.join()
+	
diff --git a/test/test case/sample_testresults.csv b/test/test case/sample_testresults.csv
new file mode 100644
index 000000000..f2d7acd47
--- /dev/null
+++ b/test/test case/sample_testresults.csv	
@@ -0,0 +1,52 @@
+input file, handbrake preset, fps, psnr
+blueangels_20m.m2ps, universal,51.249393,43.329
+blueangels_20m.m2ps, iPod,104.499466,43.39
+blueangels_20m.m2ps, iPhone_iPod_Touch,82.94838,42.932
+blueangels_20m.m2ps, iPhone_4,27.005239,43.781
+blueangels_20m.m2ps, iPad,24.081524,43.829
+blueangels_20m.m2ps, Apple_TV,26.035112,43.496
+blueangels_20m.m2ps, Apple_TV_2,15.095184,44.122
+blueangels_20m.m2ps, Android_Mid,85.799118,41.523
+blueangels_20m.m2ps, Android_High,51.151642,42.586
+blueangels_20m.m2ps, Normal_1080p_to_1080p_fixed_qp,24.162228,44.721
+blueangels_20m.m2ps, Normal_1080p_to_720p_fixed_qp,43.044079,43.631
+blueangels_20m.m2ps, Normal_1080p_to_1080p_13_mbps,23.51717,45.031
+blueangels_20m.m2ps, Normal_1080p_to_720p_6_mbps,39.531132,44.304
+blueangels_20m.m2ps, high_1080p_to_1080p_fixed_qp,5.44732,45.031
+blueangels_20m.m2ps, high_1080p_to_720p_fixed qp,11.749803,43.892
+blueangels_20m.m2ps, high_1080p_to_1080p_13_mbps,5.319024,45.168
+blueangels_20m.m2ps, high_1080p_to_720p_6mbps,11.092562,44.167
+blueangels_20m.m2ps, strange_resolution,35.530605,44.236
+blueangels.m2ts, universal,38.152428,43.238
+blueangels.m2ts, iPod,58.110367,43.381
+blueangels.m2ts, iPhone_iPod_Touch,51.766876,42.914
+blueangels.m2ts, iPhone_4,21.514906,43.562
+blueangels.m2ts, iPad,19.358091,43.586
+blueangels.m2ts, Apple_TV,21.262102,43.3
+blueangels.m2ts, Apple_TV_2,12.826241,43.734
+blueangels.m2ts, Android_Mid,51.420086,41.512
+blueangels.m2ts, Android_High,36.581734,42.514
+blueangels.m2ts, Normal_1080p_to_1080p_fixed_qp,19.23077,43.986
+blueangels.m2ts, Normal_1080p_to_720p_fixed_qp,31.648577,43.284
+blueangels.m2ts, Normal_1080p_to_1080p_13_mbps,18.489985,44.288
+blueangels.m2ts, Normal_1080p_to_720p_6_mbps,30.822269,43.864
+blueangels.m2ts, high_1080p_to_1080p_fixed_qp,5.114225,44.334
+blueangels.m2ts, high_1080p_to_720p_fixed qp,10.028431,43.695
+blueangels.m2ts, high_1080p_to_1080p_13_mbps,5.020767,44.382
+blueangels.m2ts, high_1080p_to_720p_6mbps,9.411227,44.107
+blueangels.m2ts, strange_resolution,27.356098,44.088
+BigBuckBunny.VOB, universal,74.327034,43.579
+BigBuckBunny.VOB, iPod,212.377899,43.668
+BigBuckBunny.VOB, iPhone_iPod_Touch,133.710114,43.028
+BigBuckBunny.VOB, iPhone_4,33.673248,44.976
+BigBuckBunny.VOB, iPad,29.014545,45.052
+BigBuckBunny.VOB, Apple_TV,35.109959,44.672
+BigBuckBunny.VOB, Apple_TV_2,18.789587,45.661
+BigBuckBunny.VOB, Android_Mid,167.233826,41.377
+BigBuckBunny.VOB, Android_High,71.774277,42.81
+BigBuckBunny.VOB, Normal_1080p_to_1080p_fixed_qp,173.294464,42.192
+BigBuckBunny.VOB, Normal_1080p_to_720p_fixed_qp,67.47831,43.995
+BigBuckBunny.VOB, Normal_1080p_to_1080p_13_mbps,91.628754,53.538
+BigBuckBunny.VOB, Normal_1080p_to_720p_6_mbps,50.870552,46.778
+BigBuckBunny.VOB, high_1080p_to_1080p_fixed_qp,7.289626,47.018
+BigBuckBunny.VOB, high_1080p_to_720p_fixed qp,18.983526,45.495
diff --git a/test/test.c b/test/test.c
index 472fabd54..a4ced522d 100644
--- a/test/test.c
+++ b/test/test.c
@@ -22,6 +22,7 @@
 #include "hb.h"
 #include "lang.h"
 #include "parsecsv.h"
+#include "openclwrapper.h"
 
 #if defined( __APPLE_CC__ )
 #import <CoreServices/CoreServices.h>
@@ -139,6 +140,8 @@ static int    start_at_frame = 0;
 static int64_t stop_at_pts    = 0;
 static int    stop_at_frame = 0;
 static uint64_t min_title_duration = 10;
+static int use_opencl = 0;
+static int use_uvd = 0;
 
 /* Exit cleanly on Ctrl-C */
 static volatile int die = 0;
@@ -209,7 +212,9 @@ int main( int argc, char ** argv )
     /* Init libhb */
     h = hb_init( debug, update );
     hb_dvd_set_dvdnav( dvdnav );
-
+#ifdef USE_OPENCL
+    hb_get_opencl_env();
+#endif
     /* Show version */
     fprintf( stderr, "%s - %s - %s\n",
              HB_PROJECT_TITLE, HB_PROJECT_BUILD_TITLE, HB_PROJECT_URL_WEBSITE );
@@ -250,6 +255,7 @@ int main( int argc, char ** argv )
         titleindex = 0;
     }
 
+    hb_set_gui_info(&hb_gui, use_uvd, use_opencl, titleindex);
     hb_scan( h, input, titleindex, preview_count, store_previews, min_title_duration * 90000LL );
 
     /* Wait... */
@@ -422,6 +428,10 @@ static void PrintTitleInfo( hb_title_t * title, int feature )
              (float) title->rate / title->rate_base );
     fprintf( stderr, "  + autocrop: %d/%d/%d/%d\n", title->crop[0],
              title->crop[1], title->crop[2], title->crop[3] );
+
+    fprintf( stderr, "  + support opencl: %d \n", title->opencl_support);
+    fprintf( stderr, "  + support uvd: %d\n", title->uvd_support);
+
     fprintf( stderr, "  + chapters:\n" );
     for( i = 0; i < hb_list_count( title->list_chapter ); i++ )
     {
@@ -1388,6 +1398,10 @@ static int HandleEvents( hb_handle_t * h )
                 job->maxWidth = maxWidth;
             if (maxHeight)
                 job->maxHeight = maxHeight;
+            if (use_uvd)
+            {
+                job->use_uvd = use_uvd;
+            }
 
             switch( anamorphic_mode )
             {
@@ -1560,7 +1574,13 @@ static int HandleEvents( hb_handle_t * h )
             filter_str = hb_strdup_printf("%d:%d:%d:%d:%d:%d",
                 job->width, job->height, 
                 job->crop[0], job->crop[1], job->crop[2], job->crop[3] );
-            filter = hb_filter_init( HB_FILTER_CROP_SCALE );
+
+#ifdef USE_OPENCL
+            if ( use_opencl )
+                filter = hb_filter_init( HB_FILTER_CROP_SCALE_ACCL );
+            else
+#endif                    
+                filter = hb_filter_init( HB_FILTER_CROP_SCALE );
             hb_add_filter( job, filter, filter_str );
             free( filter_str );
 
@@ -2485,7 +2505,11 @@ static int HandleEvents( hb_handle_t * h )
                 job->frame_to_start = start_at_frame;
                 subtitle_scan = 0;
             }
-            
+#ifdef USE_OPENCL 
+            job->use_opencl = use_opencl;
+#else
+            job->use_opencl = 0;
+#endif            
             if( subtitle_scan )
             {
                 /*
@@ -2662,6 +2686,7 @@ static void ShowHelp()
     "    -z, --preset-list       See a list of available built-in presets\n"
     "        --no-dvdnav         Do not use dvdnav for reading DVDs\n"
     "                            (experimental, enabled by default for testing)\n"
+    "    --no-opencl             Disable use of OpenCL\n"
     "\n"
 
     "### Source Options-----------------------------------------------------------\n\n"
@@ -2695,6 +2720,8 @@ static void ShowHelp()
     "                            4 GB. Note: Breaks iPod, PS3 compatibility.\n"""
     "    -O, --optimize          Optimize mp4 files for HTTP streaming\n"
     "    -I, --ipod-atom         Mark mp4 files so 5.5G iPods will accept them\n"
+    "    -P, --opencl-support    Use OpenCL\n"
+    "    -U, --UVD-support       Use UVD hardware\n"
     "\n"
 
 
@@ -3188,7 +3215,8 @@ static int ParseOptions( int argc, char ** argv )
     #define X264_PRESET         284
     #define X264_TUNE           285
     #define H264_LEVEL          286
-    #define NORMALIZE_MIX       287
+	#define NO_OPENCL           287
+    #define NORMALIZE_MIX       288
     
     for( ;; )
     {
@@ -3197,14 +3225,17 @@ static int ParseOptions( int argc, char ** argv )
             { "help",        no_argument,       NULL,    'h' },
             { "update",      no_argument,       NULL,    'u' },
             { "verbose",     optional_argument, NULL,    'v' },
-            { "no-dvdnav",      no_argument,       NULL,    DVDNAV },
-
+            { "no-dvdnav",   no_argument,       NULL,    DVDNAV },
+            { "no-opencl",   no_argument,       NULL,    NO_OPENCL },
+            
             { "format",      required_argument, NULL,    'f' },
             { "input",       required_argument, NULL,    'i' },
             { "output",      required_argument, NULL,    'o' },
             { "large-file",  no_argument,       NULL,    '4' },
             { "optimize",    no_argument,       NULL,    'O' },
             { "ipod-atom",   no_argument,       NULL,    'I' },
+            { "use-opencl",  no_argument,       NULL,    'P' },
+            { "use-uvd",     no_argument,       NULL,    'U' },
 
             { "title",       required_argument, NULL,    't' },
             { "min-duration",required_argument, NULL,    MIN_DURATION },
@@ -3293,7 +3324,7 @@ static int ParseOptions( int argc, char ** argv )
 
         cur_optind = optind;
         c = getopt_long( argc, argv,
-                         "hv::uC:f:4i:Io:t:c:m::M:a:A:6:s:UF::N:e:E:Q:C:"
+                         "hv::uC:f:4i:Io:PUt:c:m::M:a:A:6:s:F::N:e:E:Q:C:"
                          "2dD:7895gOw:l:n:b:q:S:B:r:R:x:TY:X:Z:z",
                          long_options, &option_index );
         if( c < 0 )
@@ -3364,6 +3395,12 @@ static int ParseOptions( int argc, char ** argv )
             case 'I':
                 ipod_atom = 1;
                 break;
+            case 'P':
+                use_opencl = 1;
+                break;
+            case 'U':
+                use_uvd = 1;
+                break;
 
             case 't':
                 titleindex = atoi( optarg );
@@ -3395,6 +3432,9 @@ static int ParseOptions( int argc, char ** argv )
                 }
                 break;
             }
+            case NO_OPENCL:
+                use_opencl = 0;
+                break;
             case ANGLE:
                 angle = atoi( optarg );
                 break;
diff --git a/win/CS/HandBrake.ApplicationServices/HandBrake.ApplicationServices.csproj b/win/CS/HandBrake.ApplicationServices/HandBrake.ApplicationServices.csproj
index 6730e2d66..56df4d218 100644
--- a/win/CS/HandBrake.ApplicationServices/HandBrake.ApplicationServices.csproj
+++ b/win/CS/HandBrake.ApplicationServices/HandBrake.ApplicationServices.csproj
@@ -66,6 +66,7 @@
       <RequiredTargetFramework>3.5</RequiredTargetFramework>
     </Reference>
     <Reference Include="System.Drawing" />
+    <Reference Include="System.Management" />
     <Reference Include="System.Runtime.Serialization" />
     <Reference Include="System.ServiceModel" />
     <Reference Include="System.Windows.Forms" />
diff --git a/win/CS/HandBrake.ApplicationServices/Model/EncodeTask.cs b/win/CS/HandBrake.ApplicationServices/Model/EncodeTask.cs
index e7869459b..4f3c1ef11 100644
--- a/win/CS/HandBrake.ApplicationServices/Model/EncodeTask.cs
+++ b/win/CS/HandBrake.ApplicationServices/Model/EncodeTask.cs
@@ -88,6 +88,8 @@ namespace HandBrake.ApplicationServices.Model
             this.Height = task.Height;
             this.IncludeChapterMarkers = task.IncludeChapterMarkers;
             this.IPod5GSupport = task.IPod5GSupport;
+            this.OpenCLSupport = task.OpenCLSupport;
+            this.UVDSupport = task.UVDSupport;
             this.KeepDisplayAspect = task.KeepDisplayAspect;
             this.LargeFile = task.LargeFile;
             this.MaxHeight = task.MaxHeight;
@@ -186,6 +188,16 @@ namespace HandBrake.ApplicationServices.Model
         /// Gets or sets a value indicating whether IPod5GSupport.
         /// </summary>
         public bool IPod5GSupport { get; set; }
+
+        /// <summary>
+        /// Gets or sets a value indicating whether OpenCLSupport.
+        /// </summary>
+        public bool OpenCLSupport { get; set; }
+
+        /// <summary>
+        /// Gets or sets a value indicating whether UVDSupport.
+        /// </summary>
+        public bool UVDSupport { get; set; }
         #endregion
 
         #region Picture
diff --git a/win/CS/HandBrake.ApplicationServices/Parsing/Title.cs b/win/CS/HandBrake.ApplicationServices/Parsing/Title.cs
index 04d00c1f2..d6e6e57aa 100644
--- a/win/CS/HandBrake.ApplicationServices/Parsing/Title.cs
+++ b/win/CS/HandBrake.ApplicationServices/Parsing/Title.cs
@@ -115,6 +115,15 @@ namespace HandBrake.ApplicationServices.Parsing
         /// </summary>
         public string SourceName { get; set; }
 
+        /// <summary>
+        /// Gets or sets the OpenCL
+        /// </summary>
+        public int OpenCLSupport { get; set; }
+
+        /// <summary>
+        /// Gets or sets the UVD
+        /// </summary>
+        public int UVDSupport { get; set; }
         #endregion
 
         /// <summary>
@@ -214,6 +223,12 @@ namespace HandBrake.ApplicationServices.Parsing
                     };
             }
 
+            m = Regex.Match(output.ReadLine(), @"^  \+ support opencl: ([0-9]*)");
+            if (m.Success)
+                thisTitle.OpenCLSupport = int.Parse(m.Groups[1].Value.Trim());
+            m = Regex.Match(output.ReadLine(), @"  \+ support uvd: ([0-9]*)");
+            if (m.Success)
+                thisTitle.UVDSupport = int.Parse(m.Groups[1].Value.Trim());
             thisTitle.Chapters.AddRange(Chapter.ParseList(output));
 
             thisTitle.AudioTracks.AddRange(AudioHelper.ParseList(output));
diff --git a/win/CS/HandBrake.ApplicationServices/Utilities/GeneralUtilities.cs b/win/CS/HandBrake.ApplicationServices/Utilities/GeneralUtilities.cs
index 378898c05..01c5625fb 100644
--- a/win/CS/HandBrake.ApplicationServices/Utilities/GeneralUtilities.cs
+++ b/win/CS/HandBrake.ApplicationServices/Utilities/GeneralUtilities.cs
@@ -102,6 +102,8 @@ namespace HandBrake.ApplicationServices.Utilities
             logHeader.AppendLine(String.Format("CPU: {0}", SystemInfo.GetCpuCount));
             logHeader.Append(String.Format("Ram: {0} MB, ", SystemInfo.TotalPhysicalMemory));
             logHeader.AppendLine(String.Format("Screen: {0}x{1}", SystemInfo.ScreenBounds.Bounds.Width, SystemInfo.ScreenBounds.Bounds.Height));
+            logHeader.Append(String.Format("GPU: {0}\n", SystemInfo.GetGPUName));
+            logHeader.Append(String.Format("GPU driver version: {0}\n", SystemInfo.GetGPUDriverVersion));
             logHeader.AppendLine(String.Format("Temp Dir: {0}", Path.GetTempPath()));
             logHeader.AppendLine(String.Format("Install Dir: {0}", Application.StartupPath));
             logHeader.AppendLine(String.Format("Data Dir: {0}\n", Application.UserAppDataPath));
diff --git a/win/CS/HandBrake.ApplicationServices/Utilities/InteropModelCreator.cs b/win/CS/HandBrake.ApplicationServices/Utilities/InteropModelCreator.cs
index 8f9260079..5cb0d4768 100644
--- a/win/CS/HandBrake.ApplicationServices/Utilities/InteropModelCreator.cs
+++ b/win/CS/HandBrake.ApplicationServices/Utilities/InteropModelCreator.cs
@@ -101,6 +101,8 @@ namespace HandBrake.ApplicationServices.Utilities
             profile.Grayscale = work.Grayscale;
             profile.Height = work.Height.HasValue ? work.Height.Value : 0;
             profile.IPod5GSupport = work.IPod5GSupport;
+            profile.OpenCLGSupport = work.OpenCLSupport;
+            profile.UVDSupport = work.UVDSupport;
             profile.IncludeChapterMarkers = work.IncludeChapterMarkers;
             profile.KeepDisplayAspect = work.KeepDisplayAspect;
             profile.LargeFile = work.LargeFile;
diff --git a/win/CS/HandBrake.ApplicationServices/Utilities/PlistUtility.cs b/win/CS/HandBrake.ApplicationServices/Utilities/PlistUtility.cs
index bea765e27..f45ba7423 100644
--- a/win/CS/HandBrake.ApplicationServices/Utilities/PlistUtility.cs
+++ b/win/CS/HandBrake.ApplicationServices/Utilities/PlistUtility.cs
@@ -141,6 +141,8 @@ namespace HandBrake.ApplicationServices.Utilities
             AddEncodeElement(xmlWriter, "Mp4HttpOptimize", "integer", parsed.OptimizeMP4 ? "1" : "0");
             AddEncodeElement(xmlWriter, "Mp4LargeFile", "integer", parsed.LargeFile ? "1" : "0");
             AddEncodeElement(xmlWriter, "Mp4iPodCompatible", "integer", parsed.IPod5GSupport ? "1" : "0");
+            AddEncodeElement(xmlWriter, "OpenCLSupport", "integer", parsed.OpenCLSupport ? "1" : "0");
+            AddEncodeElement(xmlWriter, "UVDSupport", "integer", parsed.UVDSupport ? "1" : "0");
             AddEncodeElement(xmlWriter, "PictureAutoCrop", "integer", "1");
             AddEncodeElement(xmlWriter, "PictureBottomCrop", "integer", parsed.Cropping.Bottom.ToString());
 
diff --git a/win/CS/HandBrake.ApplicationServices/Utilities/QueryGeneratorUtility.cs b/win/CS/HandBrake.ApplicationServices/Utilities/QueryGeneratorUtility.cs
index 83e551b42..735470d49 100644
--- a/win/CS/HandBrake.ApplicationServices/Utilities/QueryGeneratorUtility.cs
+++ b/win/CS/HandBrake.ApplicationServices/Utilities/QueryGeneratorUtility.cs
@@ -245,6 +245,12 @@ namespace HandBrake.ApplicationServices.Utilities
             if (task.OptimizeMP4)
                 query += " -O ";
 
+            if (task.OpenCLSupport)
+                query += " -P ";
+            if (task.UVDSupport && task.OpenCLSupport)
+                query += " -U ";
+            else if (task.UVDSupport && !task.OpenCLSupport)
+                query += " -P -U";
             return query;
         }
 
diff --git a/win/CS/HandBrake.ApplicationServices/Utilities/QueryParserUtility.cs b/win/CS/HandBrake.ApplicationServices/Utilities/QueryParserUtility.cs
index f0b5d65e3..ead60f906 100644
--- a/win/CS/HandBrake.ApplicationServices/Utilities/QueryParserUtility.cs
+++ b/win/CS/HandBrake.ApplicationServices/Utilities/QueryParserUtility.cs
@@ -56,6 +56,8 @@ namespace HandBrake.ApplicationServices.Utilities
             Match grayscale = Regex.Match(input, @" -g");
             Match largerMp4 = Regex.Match(input, @" -4");
             Match ipodAtom = Regex.Match(input, @" -I");
+            Match openclSupport = Regex.Match(input, @" -P");
+            Match uvdSupport = Regex.Match(input, @" -U");
 
             // Picture Settings Tab
             Match width = Regex.Match(input, @"-w ([0-9]+)");
@@ -156,6 +158,8 @@ namespace HandBrake.ApplicationServices.Utilities
                 parsed.LargeFile = largerMp4.Success;
                 parsed.IPod5GSupport = ipodAtom.Success;
                 parsed.OptimizeMP4 = optimizeMP4.Success;
+                parsed.OpenCLSupport = openclSupport.Success;
+                parsed.UVDSupport = uvdSupport.Success;
 
                 #endregion
 
diff --git a/win/CS/HandBrake.ApplicationServices/Utilities/SystemInfo.cs b/win/CS/HandBrake.ApplicationServices/Utilities/SystemInfo.cs
index 32328ab47..9f988da9b 100644
--- a/win/CS/HandBrake.ApplicationServices/Utilities/SystemInfo.cs
+++ b/win/CS/HandBrake.ApplicationServices/Utilities/SystemInfo.cs
@@ -10,7 +10,7 @@
 namespace HandBrake.ApplicationServices.Utilities
 {
     using System.Windows.Forms;
-
+    using System.Management;
     using Microsoft.Win32;
 
     /// <summary>
@@ -56,5 +56,39 @@ namespace HandBrake.ApplicationServices.Utilities
         {
             get { return Screen.PrimaryScreen; }
         }
+        public static object GetGPUDriverVersion
+        {
+            get
+            {
+                ManagementObjectSearcher searcher = new ManagementObjectSearcher(
+                "select * from " + "Win32_VideoController");
+                foreach (ManagementObject share in searcher.Get())
+                {
+                    foreach (PropertyData PC in share.Properties)
+                    {
+                        if (PC.Name.Equals("DriverVersion"))
+                            return PC.Value;
+                    }
+                }
+                return null;
+            }
+        }
+        public static object GetGPUName
+        {
+            get
+            {
+                ManagementObjectSearcher searcher = new ManagementObjectSearcher(
+                "select * from " + "Win32_VideoController");
+                foreach (ManagementObject share in searcher.Get())
+                {
+                    foreach (PropertyData PC in share.Properties)
+                    {
+                        if (PC.Name.Equals("Name"))
+                            return PC.Value;
+                    }
+                }
+                return null;
+            }
+        }
     }
 }
\ No newline at end of file
diff --git a/win/CS/HandBrake.Interop/HandBrakeInterop/HandBrakeInstance.cs b/win/CS/HandBrake.Interop/HandBrakeInterop/HandBrakeInstance.cs
index a2bf8cef9..74f1683a9 100644
--- a/win/CS/HandBrake.Interop/HandBrakeInterop/HandBrakeInstance.cs
+++ b/win/CS/HandBrake.Interop/HandBrakeInterop/HandBrakeInstance.cs
@@ -1447,6 +1447,8 @@ namespace HandBrake.Interop
 			nativeJob.largeFileSize = profile.LargeFile ? 1 : 0;
 			nativeJob.mp4_optimize = profile.Optimize ? 1 : 0;
 			nativeJob.ipod_atom = profile.IPod5GSupport ? 1 : 0;
+            nativeJob.opencl_support = profile.OpenCLGSupport ? 1 : 0;
+            nativeJob.uvd_support = profile.UVDSupport ? 1 : 0;
 
 			if (title.AngleCount > 1)
 			{
diff --git a/win/CS/HandBrake.Interop/HandBrakeInterop/HbLib/hb_job_s.cs b/win/CS/HandBrake.Interop/HandBrakeInterop/HbLib/hb_job_s.cs
index d4c95d904..a761780fa 100644
--- a/win/CS/HandBrake.Interop/HandBrakeInterop/HbLib/hb_job_s.cs
+++ b/win/CS/HandBrake.Interop/HandBrakeInterop/HbLib/hb_job_s.cs
@@ -145,6 +145,12 @@ namespace HandBrake.Interop.HbLib
 		/// int
 		public int ipod_atom;
 
+        /// int
+        public int opencl_support;
+
+        /// int
+        public int uvd_support;
+
 		/// int
 		public int indepth_scan;
 
diff --git a/win/CS/HandBrake.Interop/HandBrakeInterop/Model/Encoding/EncodingProfile.cs b/win/CS/HandBrake.Interop/HandBrakeInterop/Model/Encoding/EncodingProfile.cs
index 6d2aafec3..9667d5e6e 100644
--- a/win/CS/HandBrake.Interop/HandBrakeInterop/Model/Encoding/EncodingProfile.cs
+++ b/win/CS/HandBrake.Interop/HandBrakeInterop/Model/Encoding/EncodingProfile.cs
@@ -27,6 +27,8 @@ namespace HandBrake.Interop.Model.Encoding
 		public bool LargeFile { get; set; }
 		public bool Optimize { get; set; }
 		public bool IPod5GSupport { get; set; }
+        public bool OpenCLGSupport { get; set; }
+        public bool UVDSupport { get; set; }
 
 		public int Width { get; set; }
 		public int Height { get; set; }
@@ -82,6 +84,8 @@ namespace HandBrake.Interop.Model.Encoding
 				LargeFile = this.LargeFile,
 				Optimize = this.Optimize,
 				IPod5GSupport = this.IPod5GSupport,
+                OpenCLGSupport = this.OpenCLGSupport,
+                UVDSupport = this.UVDSupport,
 
 				Width = this.Width,
 				Height = this.Height,
diff --git a/win/CS/HandBrakeWPF/ViewModels/MainViewModel.cs b/win/CS/HandBrakeWPF/ViewModels/MainViewModel.cs
index b0ae80f93..23a80d3b7 100644
--- a/win/CS/HandBrakeWPF/ViewModels/MainViewModel.cs
+++ b/win/CS/HandBrakeWPF/ViewModels/MainViewModel.cs
@@ -115,6 +115,14 @@ namespace HandBrakeWPF.ViewModels
         private bool isMkv;
 
         /// <summary>
+        /// Support Hardware Decoding
+        /// </summary>
+        private bool supportHardwareDecoding;
+
+        /// <summary>
+        /// Support OpenCL
+        /// </summary>
+        private bool supportOpenCL;
         /// The Toolbar Status Label
         /// </summary>
         private string statusLabel;
@@ -368,6 +376,8 @@ namespace HandBrakeWPF.ViewModels
                     this.CurrentTask.LargeFile = selectedPreset.Task.LargeFile;
                     this.CurrentTask.OptimizeMP4 = selectedPreset.Task.OptimizeMP4;
                     this.CurrentTask.IPod5GSupport = selectedPreset.Task.IPod5GSupport;
+                    this.CurrentTask.OpenCLSupport = selectedPreset.Task.OpenCLSupport;
+                    this.CurrentTask.UVDSupport = selectedPreset.Task.UVDSupport;
                     this.SelectedOutputFormat = selectedPreset.Task.OutputFormat;
 
                     // Tab Settings
@@ -590,8 +600,38 @@ namespace HandBrakeWPF.ViewModels
                 this.NotifyOfPropertyChange("IsMkv");
             }
         }
+  
+        /// <summary>
+        /// Gets or sets a value indicating whether SupportHardwareDecoding.
+        /// </summary>
+        public bool SupportHardwareDecoding
+        {
+            get
+            {
+                return this.supportHardwareDecoding;
+            }
+            set
+            {
+                this.supportHardwareDecoding = value;
+                this.NotifyOfPropertyChange("SupportHardwareDecoding");
+            }
+        }
 
         /// <summary>
+        /// Gets or sets a value indicating whether SupportHardwareDecoding.
+        /// </summary>
+        public bool SupportOpenCL
+        {
+            get
+            {
+                return this.supportOpenCL;
+            }
+            set
+            {
+                this.supportOpenCL = value;
+                this.NotifyOfPropertyChange("SupportOpenCL");
+            }
+        }
         /// Gets RangeMode.
         /// </summary>
         public IEnumerable<OutputFormat> OutputFormats
@@ -791,6 +831,8 @@ namespace HandBrakeWPF.ViewModels
                 this.NotifyOfPropertyChange(() => SelectedOutputFormat);
                 this.NotifyOfPropertyChange(() => this.CurrentTask.OutputFormat);
                 this.NotifyOfPropertyChange(() => IsMkv);
+                this.NotifyOfPropertyChange(() => SupportHardwareDecoding);
+                this.NotifyOfPropertyChange(() => SupportOpenCL);
                 this.SetExtension(string.Format(".{0}", this.selectedOutputFormat.ToString().ToLower())); // TODO, tidy up
 
                 this.VideoViewModel.RefreshTask();
@@ -1646,6 +1688,25 @@ namespace HandBrakeWPF.ViewModels
                                              ?? this.ScannedSource.Titles.FirstOrDefault();
                         this.SetupTabs();
                     }
+                    if (e.Successful && this.selectedTitle != null)
+                    {
+                        if (this.selectedTitle.OpenCLSupport == 0)
+                        {
+                            this.SupportOpenCL = false;
+                        }
+                        else
+                        {
+                            this.SupportOpenCL = true;
+                        }
+                        if (this.selectedTitle.UVDSupport == 0)
+                        {
+                            this.SupportHardwareDecoding = true;
+                        }
+                        else
+                        {
+                            this.SupportHardwareDecoding = false;
+                        }
+                    }
 
                     this.ShowStatusWindow = false;
                     if (e.Successful)
diff --git a/win/CS/HandBrakeWPF/Views/MainView.xaml b/win/CS/HandBrakeWPF/Views/MainView.xaml
index ebca0805c..e360267b8 100644
--- a/win/CS/HandBrakeWPF/Views/MainView.xaml
+++ b/win/CS/HandBrakeWPF/Views/MainView.xaml
@@ -473,6 +473,24 @@
                                                    Converter={StaticResource boolToVisConverter},
                                                    ConverterParameter=true}"
                               />
+                    <CheckBox Name="OpenCL"
+                              Margin="8,0,0,0"
+                              VerticalAlignment="Center"
+                              Content="OpenCL Support"
+                              IsChecked="{Binding Path=CurrentTask.OpenCLSupport}" IsEnabled="True" 
+                              Visibility="{Binding SupportOpenCL,
+                                                   Converter={StaticResource boolToVisConverter},
+                                                   ConverterParameter=true}"
+                              />
+                    <CheckBox Name="UVD"
+                              Margin="8,0,0,0"
+                              VerticalAlignment="Center"
+                              Content="UVD Support"
+                              IsChecked="{Binding Path=CurrentTask.UVDSupport}" IsEnabled="True" 
+                              Visibility="{Binding SupportHardwareDecoding,
+                                                   Converter={StaticResource boolToVisConverter},
+                                                   ConverterParameter=true}"
+                              />
                 </StackPanel>
             </StackPanel>
 
-- 
2.40.0