Add decomb filter with OpenCl acceleration

author handbrake <no-reply@handbrake.fr>

Wed, 26 Jun 2013 02:40:56 +0000 (02:40 +0000)

committer handbrake <no-reply@handbrake.fr>

Wed, 26 Jun 2013 02:40:56 +0000 (02:40 +0000)
author handbrake <no-reply@handbrake.fr>
Wed, 26 Jun 2013 02:40:56 +0000 (02:40 +0000)
committer handbrake <no-reply@handbrake.fr>
Wed, 26 Jun 2013 02:40:56 +0000 (02:40 +0000)
diff --git a/libhb/decomb.c b/libhb/decomb.c

index a95e998d88695e20ccd112fbd8c9922f21953e8e..5682311639f6033bb2251ea75a98cadd99a83462 100644 (file)
--- a/libhb/decomb.c
+++ b/libhb/decomb.c
@@ -138,6 +138,7 @@ struct hb_filter_private_s
  {
      // Decomb parameters
      int              mode;
+    int              use_opencl;
      int              filter_mode;
      int              spatial_metric;
      int              motion_threshold;
@@ -206,6 +207,11 @@ struct hb_filter_private_s
      taskset_t        mask_dilate_taskset; // Threads for decomb mask dilate
  
      taskset_t        eedi2_taskset;       // Threads for eedi2 - one per plane
+
+    void* cl_mem_dst;
+    void* cl_mem_prev;
+    void* cl_mem_cur;
+    void* cl_mem_next;
  };
  
  static int hb_decomb_init( hb_filter_object_t * filter,
@@ -1986,6 +1992,8 @@ static int hb_decomb_init( hb_filter_object_t * filter,
  
      build_gamma_lut( pv );
  
+    pv->use_opencl = init->job->use_opencl;
+
      pv->deinterlaced_frames = 0;
      pv->blended_frames = 0;
      pv->unfiltered_frames = 0;
@@ -2059,6 +2067,32 @@ static int hb_decomb_init( hb_filter_object_t * filter,
      memset(pv->mask_filtered->data, 0, pv->mask_filtered->size);
      memset(pv->mask_temp->data, 0, pv->mask_temp->size);
  
+#ifdef USE_OPENCL
+    if(pv->use_opencl){
+        hb_log("decomb with OpenCL");
+        if( !hb_create_buffer( &(pv->cl_mem_dst), CL_MEM_READ_WRITE, 3133440 ) )
+        {
+            hb_log("hb_create_buffer cl_outbuf Error\n");
+            return -1;
+        }
+        if( !hb_create_buffer( &(pv->cl_mem_prev), CL_MEM_READ_WRITE, 3133440 ) )
+        {
+            hb_log("hb_create_buffer cl_outbuf Error\n");
+            return -1;
+        }
+        if( !hb_create_buffer( &(pv->cl_mem_cur), CL_MEM_READ_WRITE, 3133440 ) )
+        {
+            hb_log("hb_create_buffer cl_outbuf Error\n");
+            return -1;
+        }
+        if( !hb_create_buffer( &(pv->cl_mem_next), CL_MEM_READ_WRITE, 3133440 ) )
+        {
+            hb_log("hb_create_buffer cl_outbuf Error\n");
+            return -1;
+        }
+    }
+#endif
+
      int ii;
      if( pv->mode & MODE_EEDI2 )
      {
@@ -2499,6 +2533,11 @@ static void hb_decomb_close( hb_filter_object_t * filter )
      hb_buffer_close(&pv->mask_filtered);
      hb_buffer_close(&pv->mask_temp);
  
+    clReleaseMemObject( pv->cl_mem_dst );
+    clReleaseMemObject( pv->cl_mem_prev );
+    clReleaseMemObject( pv->cl_mem_cur );
+    clReleaseMemObject( pv->cl_mem_next );
+
      if( pv->mode & MODE_EEDI2 )
      {
          /* Cleanup eedi-half  buffers */
@@ -2580,16 +2619,7 @@ static int hb_decomb_work( hb_filter_object_t * filter,
  
      /* deinterlace both fields if mcdeint is enabled without bob */
      int frame, num_frames = 1;
-    if (pv->mode & (MODE_MCDEINT | MODE_BOB))
-    {
-        num_frames = 2;
-    }
      
-    // Will need up to 2 buffers simultaneously
-    int idx = 0;
-    hb_buffer_t * o_buf[2] = {NULL,};
-
-    /* Perform yadif filtering */        
      for( frame = 0; frame < num_frames; frame++ )
      {
          int parity = frame ^ tff ^ 1;
@@ -2603,9 +2633,8 @@ static int hb_decomb_work( hb_filter_object_t * filter,
          // tff for eedi2
          pv->tff = !parity;
  
-        if (o_buf[idx] == NULL)
-        {
-            o_buf[idx] = hb_video_buffer_init(in->f.width, in->f.height);
+        if(out == NULL){
+            out = hb_video_buffer_init(in->f.width, in->f.height);
          }
  
          if (frame)
@@ -2613,73 +2642,95 @@ static int hb_decomb_work( hb_filter_object_t * filter,
          else
              pv->skip_comb_check = 0;
  
-        yadif_filter(pv, o_buf[idx], parity, tff);
-
-        // Unfortunately, all frames must be fed to mcdeint combed or
-        // not since it maintains state that is updated by each frame.
-        if (pv->mcdeint_mode >= 0)
-        {
-            if (o_buf[idx^1] == NULL)
+#ifdef USE_OPENCL
+        if(pv->use_opencl){
+            int is_combed;
+            if (!pv->skip_comb_check)
              {
-                o_buf[idx^1] = hb_video_buffer_init(in->f.width, in->f.height);
+                is_combed = pv->spatial_metric >= 0 ? comb_segmenter( pv ) : 1;
+            }
+            else
+            {
+                is_combed = pv->is_combed;
              }
-            /* Perform mcdeint filtering */
-            mcdeint_filter(o_buf[idx^1], o_buf[idx], parity, &pv->mcdeint);
-
-            // If frame was combed, we will use results from mcdeint
-            // else we will use yadif result
-            if (pv->is_combed)
-                idx ^= 1;
-        }
  
-        // Add to list of output buffers (should be at most 2)
-        if ((pv->mode & MODE_BOB) ||
-            pv->is_combed == 0 ||
-            frame == num_frames - 1)
-        {
-            if ( out == NULL )
+            if( is_combed == 1 )
              {
-                last = out = o_buf[idx];
+                pv->deinterlaced_frames++;
+            }
+            else if( is_combed == 2 )
+            {
+                pv->blended_frames++;
              }
              else
              {
-                last->next = o_buf[idx];
-                last = last->next;
+                pv->unfiltered_frames++;
              }
-            last->next = NULL;
  
-            // Indicate that buffer was consumed
-            o_buf[idx] = NULL;
+            pv->is_combed = is_combed;
+            out->s = in->s;
+            out->f = in->f;
+
+            if(is_combed){
+                hb_write_opencl_frame_buffer(pv->cl_mem_prev, pv->ref[0]->plane[0].data, pv->ref[0]->plane[1].data, pv->ref[0]->plane[2].data, pv->ref[0]->plane[0].stride, pv->ref[0]->plane[1].stride, pv->ref[0]->plane[2].stride, in->f.height, 0);
  
-            /* Copy buffered settings to output buffer settings */
-            last->s = pv->ref[1]->s;
-            idx ^= 1;
+                hb_write_opencl_frame_buffer(pv->cl_mem_cur, pv->ref[1]->plane[0].data, pv->ref[1]->plane[1].data, pv->ref[1]->plane[2].data, pv->ref[1]->plane[0].stride, pv->ref[1]->plane[1].stride, pv->ref[1]->plane[2].stride, in->f.height, 0);
  
-            if ((pv->mode & MODE_MASK) && pv->spatial_metric >= 0 )
+                hb_write_opencl_frame_buffer(pv->cl_mem_next, pv->ref[2]->plane[0].data, pv->ref[2]->plane[1].data, pv->ref[2]->plane[2].data, pv->ref[2]->plane[0].stride, pv->ref[2]->plane[1].stride, pv->ref[2]->plane[2].stride, in->f.height, 0);
+
+                cl_yadif_filter(pv->cl_mem_dst, 
+                            pv->cl_mem_prev, 
+                            pv->cl_mem_cur, 
+                            pv->cl_mem_next, 
+                            parity, 
+                            tff, 
+                            pv->ref[1]->plane[0].stride, 
+                            pv->ref[1]->plane[1].stride, 
+                            pv->ref[1]->plane[0].stride, 
+                            pv->ref[1]->plane[1].stride, 
+                            pv->mode, 
+                            in->f.width, 
+                            in->f.height);
+
+                hb_read_opencl_frame_buffer(pv->cl_mem_dst, out->plane[0].data, out->plane[1].data, out->plane[2].data, pv->ref[1]->plane[0].stride, pv->ref[1]->plane[1].stride, pv->ref[1]->plane[2].stride, in->f.height);
+
+                hb_buffer_move_subs( out, pv->ref[1] );
+            }
+            else
              {
-                if (pv->mode == MODE_MASK ||
-                    ((pv->mode & MODE_MASK) && (pv->mode & MODE_FILTER)) ||
-                    ((pv->mode & MODE_MASK) && (pv->mode & MODE_GAMMA)) ||
-                    pv->is_combed)
-                {
-                    apply_mask(pv, last);
-                }
+                hb_buffer_copy(out, pv->ref[1]); 
              }
+        }else{
+            yadif_filter(pv, out, parity, tff);
          }
-    }
-    // Copy subs only to first output buffer
-    hb_buffer_move_subs( out, pv->ref[1] );
+#else
+        yadif_filter(pv, out, parity, tff);
+#endif
  
-    hb_buffer_close(&o_buf[0]);
-    hb_buffer_close(&o_buf[1]);
+        hb_buffer_t* mcdeint_out = NULL;
+        if (pv->mcdeint_mode >= 0)
+        {    
+            mcdeint_out = hb_video_buffer_init(in->f.width, in->f.height);
+            mcdeint_filter(mcdeint_out, out, parity, &pv->mcdeint);
+        }
  
-    /* if this frame was deinterlaced and bob mode is engaged, halve
-       the duration of the saved timestamps. */
-    if ((pv->mode & MODE_BOB) && pv->is_combed)
-    {
-        out->s.stop -= (out->s.stop - out->s.start) / 2LL;
-        last->s.start = out->s.stop;
-        last->s.new_chap = 0;
+        if(mcdeint_out != NULL){
+            last = mcdeint_out;
+        }else{
+            last = out;
+        }
+
+        last->s = pv->ref[1]->s;
+        if ((pv->mode & MODE_MASK) && pv->spatial_metric >= 0 )
+        {
+            if (pv->mode == MODE_MASK ||
+                ((pv->mode & MODE_MASK) && (pv->mode & MODE_FILTER)) ||
+                ((pv->mode & MODE_MASK) && (pv->mode & MODE_GAMMA)) ||
+                pv->is_combed)
+            {
+                apply_mask(pv, last);
+            }
+        }
      }
  
      *buf_out = out;
diff --git a/libhb/openclkernels.h b/libhb/openclkernels.h

index 7e165b4c864a44a502e3a74fad3ea207d0b933dc..e347486140753133ed2afb6c0f2c26daf23b1233 100644 (file)
--- a/libhb/openclkernels.h
+++ b/libhb/openclkernels.h
@@ -544,4 +544,140 @@ char *kernel_src_vscalefast = KERNEL (
      }\r
      );\r
  \r
+char *kernel_src_yadif_filter = KERNEL(\r
+    void filter_v6(\r
+        global unsigned char *dst,\r
+        global unsigned char *prev, \r
+        global unsigned char *cur, \r
+        global unsigned char *next,\r
+        int x,\r
+        int y,\r
+        int width,\r
+        int height,\r
+        int parity,\r
+        int inlinesize,\r
+        int outlinesize,\r
+        int inmode,\r
+        int uvflag\r
+    )\r
+    {\r
+\r
+        int flag = uvflag * (y >=height) * height;  \r
+        int prefs = select(-(inlinesize), inlinesize,((y+1) - flag) <height);\r
+        int mrefs = select(inlinesize, -(inlinesize),y - flag);\r
+        int mode  = select(inmode,2,(y - flag==1) || (y - flag + 2==height));\r
+        int score;\r
+    \r
+        global unsigned char *prev2 = parity ? prev : cur ;\r
+        global unsigned char *next2 = parity ? cur  : next;\r
+        int index = x + y * inlinesize;\r
+        int outindex = x + y * outlinesize;\r
+        int c = cur[index + mrefs]; \r
+        int d = (prev2[index] + next2[index])>>1; \r
+        int e = cur[index + prefs]; \r
+        int temporal_diff0 = abs((prev2[index]) - (next2[index])); \r
+        int temporal_diff1 =(abs(prev[index + mrefs] - c) + abs(prev[index + prefs] - e) )>>1; \r
+        int temporal_diff2 =(abs(next[index + mrefs] - c) + abs(next[index + prefs] - e) )>>1; \r
+        int diff = max(max(temporal_diff0>>1, temporal_diff1), temporal_diff2); \r
+        int spatial_pred = (c+e)>>1; \r
+        int spatial_score = abs(cur[index + mrefs-1] - cur[index + prefs-1]) + abs(c-e) + abs(cur[index + mrefs+1] - cur[index + prefs+1]) - 1; \r
+        //check -1\r
+        score = abs(cur[index + mrefs-2] - cur[index + prefs])\r
+            + abs(cur[index + mrefs-1] - cur[index + prefs+1])\r
+            + abs(cur[index + mrefs] - cur[index + prefs+2]);\r
+        if (score < spatial_score)\r
+        {\r
+            spatial_score= score;\r
+            spatial_pred= (cur[index + mrefs-1] + cur[index + prefs+1])>>1;\r
+        }\r
+        //check -2\r
+        score = abs(cur[index + mrefs-3] - cur[index + prefs+1])\r
+            + abs(cur[index + mrefs-2] - cur[index + prefs+2])\r
+            + abs(cur[index + mrefs-1] - cur[index + prefs+3]);\r
+        if (score < spatial_score)\r
+        {\r
+            spatial_score= score;\r
+            spatial_pred= (cur[index + mrefs-2] + cur[index + prefs+2])>>1;\r
+        }\r
+        //check 1\r
+        score = abs(cur[index + mrefs] - cur[index + prefs-2])\r
+            + abs(cur[index + mrefs+1] - cur[index + prefs-1])\r
+            + abs(cur[index + mrefs+2] - cur[index + prefs]);\r
+        if (score < spatial_score)\r
+        {\r
+            spatial_score= score;\r
+            spatial_pred= (cur[index + mrefs+1] + cur[index + prefs-1])>>1;\r
+        }\r
+        //check 2\r
+        score = abs(cur[index + mrefs+1] - cur[index + prefs-3])\r
+            + abs(cur[index + mrefs+2] - cur[index + prefs-2])\r
+            + abs(cur[index + mrefs+3] - cur[index + prefs-1]);\r
+        if (score < spatial_score)\r
+        {\r
+            spatial_score= score;\r
+            spatial_pred= (cur[index + mrefs+2] + cur[index + prefs-2])>>1;\r
+        }\r
+        if (mode < 2)\r
+        { \r
+            int b = (prev2[index + (mrefs<<1)] + next2[index + (mrefs<<1)])>>1; \r
+            int f = (prev2[index + (prefs<<1)] + next2[index + (prefs<<1)])>>1; \r
+            int diffmax = max(max(d-e, d-c), min(b-c, f-e)); \r
+            int diffmin = min(min(d-e, d-c), max(b-c, f-e)); \r
+\r
+            diff = max(max(diff, diffmin), -diffmax); \r
+        } \r
+        if (spatial_pred > d + diff) \r
+        {\r
+            spatial_pred = d + diff; \r
+        }\r
+        else if (spatial_pred < d - diff) \r
+        {\r
+            spatial_pred = d - diff; \r
+        }\r
+\r
+        dst[outindex] = spatial_pred; \r
+    }\r
+\r
+    kernel void yadif_filter(\r
+        global unsigned char *dst,\r
+        global unsigned char *prev,\r
+        global unsigned char *cur,\r
+        global unsigned char *next,\r
+        int parity,\r
+        int inlinesizeY,\r
+        int inlinesizeUV,\r
+        int outlinesizeY,\r
+        int outlinesizeUV,\r
+        int mode)\r
+    {\r
+        int x=get_global_id(0);\r
+        int y=(get_global_id(1)<<1) + (!parity);\r
+        int width=(get_global_size(0)<<1)/3;\r
+        int height=get_global_size(1)<<1;\r
+    \r
+\r
+        global unsigned char *dst_Y=dst;\r
+        global unsigned char *dst_U=dst_Y+height*outlinesizeY;\r
+\r
+        global unsigned char *prev_Y=prev;\r
+        global unsigned char *prev_U=prev_Y+height*inlinesizeY;\r
+\r
+        global unsigned char *cur_Y=cur;\r
+        global unsigned char *cur_U=cur_Y+height*inlinesizeY;\r
+\r
+        global unsigned char *next_Y=next;\r
+        global unsigned char *next_U=next_Y+height*inlinesizeY;\r
+\r
+        if(x < width)\r
+        {\r
+            filter_v6(dst_Y,prev_Y,cur_Y,next_Y,x,y,width,height,parity,inlinesizeY,outlinesizeY,mode,0);\r
+        }\r
+        else\r
+        {\r
+            x = x - width;\r
+            filter_v6(dst_U,prev_U,cur_U,next_U,x,y,width>>1,height>>1,parity,inlinesizeUV,outlinesizeUV,mode,1);\r
+        }\r
+    }\r
+    );\r
+\r
  #endif\r
diff --git a/libhb/openclwrapper.c b/libhb/openclwrapper.c

index a673226cf5b12159b227f6d312db10ddb91b9015..01aef90d102c7b71537ee0ae12c699cf7192a828 100644 (file)
--- a/libhb/openclwrapper.c
+++ b/libhb/openclwrapper.c
@@ -175,6 +175,7 @@ int hb_regist_opencl_kernel()
      ADD_KERNEL_CFG( 1, "frame_v_scale", NULL )\r
      ADD_KERNEL_CFG( 2, "nv12toyuv", NULL )\r
      ADD_KERNEL_CFG( 3, "scale_opencl", NULL )\r
+    ADD_KERNEL_CFG( 4, "yadif_filter", NULL )\r
  \r
      return 0;\r
  }\r
@@ -423,7 +424,7 @@ int hb_generat_bin_from_kernel_source( cl_program program, const char * cl_file_
                                       deviceName,\r
                                       NULL);\r
  \r
-            str = (char*)strstr(cl_file_name, ".cl");\r
+            str = (char*)strstr( cl_file_name, (char*)".cl" );\r
              memcpy(cl_name, cl_file_name, str - cl_file_name);\r
              cl_name[str - cl_file_name] = '\0';\r
              sprintf(fileName, "./%s - %s.bin", cl_name, deviceName);\r
@@ -797,8 +798,9 @@ int hb_compile_kernel_file( const char *filename, GPUEnv *gpu_info,
  #else\r
      int kernel_src_size = strlen( kernel_src_hscale ) + strlen( kernel_src_vscale ) \r
                                    + strlen( kernel_src_nvtoyuv ) + strlen( kernel_src_hscaleall ) \r
-                                     + strlen( kernel_src_hscalefast ) + strlen( kernel_src_vscalealldither ) \r
-                                     + strlen( kernel_src_vscaleallnodither ) + strlen( kernel_src_vscalefast );\r
+                                  + strlen( kernel_src_hscalefast ) + strlen( kernel_src_vscalealldither ) \r
+                                  + strlen( kernel_src_vscaleallnodither ) + strlen( kernel_src_vscalefast )\r
+                                  + strlen( kernel_src_yadif_filter );\r
      source_str = (char*)malloc( kernel_src_size + 2 );\r
      strcpy( source_str, kernel_src_hscale );\r
      strcat( source_str, kernel_src_vscale );\r
@@ -808,6 +810,7 @@ int hb_compile_kernel_file( const char *filename, GPUEnv *gpu_info,
      strcat( source_str, kernel_src_vscalealldither );\r
      strcat( source_str, kernel_src_vscaleallnodither );\r
      strcat( source_str, kernel_src_vscalefast );\r
+    strcat( source_str, kernel_src_yadif_filter );\r
  #endif\r
  \r
      source = source_str;\r
@@ -885,30 +888,30 @@ int hb_compile_kernel_file( const char *filename, GPUEnv *gpu_info,
  \r
      /* create a cl program executable for all the devices specified */\r
      if( !gpu_info->isUserCreated ) \r
-       {\r
+    {\r
          status = clBuildProgram( gpu_info->programs[idx], 1, gpu_info->devices,\r
                                   build_option, NULL, NULL );\r
-       }\r
+    }\r
      else\r
-       {\r
+    {\r
          status = clBuildProgram( gpu_info->programs[idx], 1, &(gpu_info->dev),\r
                                   build_option, NULL, NULL );\r
-       }\r
+    }\r
  \r
      if( status != CL_SUCCESS )\r
      {\r
          if( !gpu_info->isUserCreated ) \r
-               {\r
+        {\r
              status = clGetProgramBuildInfo( gpu_info->programs[idx],\r
                                              gpu_info->devices[0],\r
                                              CL_PROGRAM_BUILD_LOG, 0, NULL, &length );\r
-               }\r
-               else\r
-               {\r
-                       status = clGetProgramBuildInfo( gpu_info->programs[idx],\r
+        }\r
+        else\r
+        {\r
+            status = clGetProgramBuildInfo( gpu_info->programs[idx],\r
                                              gpu_info->dev,\r
                                              CL_PROGRAM_BUILD_LOG, 0, NULL, &length );\r
-               }\r
+        }\r
  \r
          if( status != CL_SUCCESS )\r
          {\r
@@ -923,15 +926,15 @@ int hb_compile_kernel_file( const char *filename, GPUEnv *gpu_info,
          }\r
  \r
          if( !gpu_info->isUserCreated )\r
-               {\r
+        {\r
              status = clGetProgramBuildInfo( gpu_info->programs[idx], gpu_info->devices[0],\r
                                              CL_PROGRAM_BUILD_LOG, length, buildLog, &length );\r
-               }\r
+        }\r
          else\r
-               {\r
+        {\r
              status = clGetProgramBuildInfo( gpu_info->programs[idx], gpu_info->dev,\r
                                              CL_PROGRAM_BUILD_LOG, length, buildLog, &length );\r
-               }\r
+        }\r
  \r
          fd1 = fopen( "kernel-build.log", "w+" );\r
          if( fd1 != NULL ) {\r
@@ -997,9 +1000,9 @@ int hb_run_kernel( const char *kernel_name, void **userdata )
      status = hb_get_kernel_env_and_func( kernel_name, &env, &function );\r
      strcpy( env.kernel_name, kernel_name );\r
      if( status == 1 ) \r
-       {\r
+    {\r
          return(function( userdata, &env ));\r
-       }\r
+    }\r
  \r
      return(0);\r
  }\r
@@ -1014,14 +1017,14 @@ int hb_init_opencl_run_env( int argc, char **argv, const char *build_option )
  {\r
      int status = 0;\r
      if( MAX_CLKERNEL_NUM <= 0 )\r
-       {\r
+    {\r
          return 1;\r
-       }\r
+    }\r
  \r
      if((argc > MAX_CLFILE_NUM) || (argc<0))\r
-       {\r
+    {\r
          return 1;\r
-       }\r
+    }\r
  \r
      if( !isInited )\r
      {\r
@@ -1163,4 +1166,59 @@ int hb_read_opencl_buffer( cl_mem cl_inBuf, unsigned char *outbuf, int size )
  \r
      return 1;\r
  }\r
+\r
+int hb_copy_buffer(cl_mem src_buffer,cl_mem dst_buffer,size_t src_offset,size_t dst_offset,size_t cb)\r
+{\r
+    int status = clEnqueueCopyBuffer(gpu_env.command_queue,\r
+        src_buffer,\r
+        dst_buffer,\r
+        src_offset, dst_offset, cb,\r
+        0, 0, 0);\r
+    if( status != CL_SUCCESS )\r
+    { \r
+        av_log(NULL,AV_LOG_ERROR, "hb_read_opencl_buffer error '%d'\n", status ); \r
+        return 0; \r
+    }\r
+    return 1;\r
+}\r
+\r
+int hb_read_opencl_frame_buffer(cl_mem cl_inBuf,unsigned char *Ybuf,unsigned char *Ubuf,unsigned char *Vbuf,int linesize0,int linesize1,int linesize2,int height)\r
+{\r
+\r
+    int chrH = -(-height >> 1);\r
+        unsigned char *temp = (unsigned char *)av_malloc(sizeof(uint8_t) * (linesize0 * height + linesize1 * chrH * 2));\r
+    if(hb_read_opencl_buffer(cl_inBuf,temp,sizeof(uint8_t)*(linesize0 + linesize1)*height))\r
+    {\r
+        memcpy(Ybuf,temp,linesize0 * height);\r
+        memcpy(Ubuf,temp + linesize0 * height,linesize1 *chrH);\r
+        memcpy(Vbuf,temp + linesize0 * height + linesize1 * chrH,linesize2 * chrH);\r
+        \r
+    }\r
+    av_free(temp);\r
+\r
+    return 1;\r
+}\r
+\r
+int hb_write_opencl_frame_buffer(cl_mem cl_inBuf,unsigned char *Ybuf,unsigned char *Ubuf,unsigned char *Vbuf,int linesize0,int linesize1,int linesize2,int height,int offset)\r
+{\r
+    int status;\r
+    void *mapped = clEnqueueMapBuffer( gpu_env.command_queue, cl_inBuf,  CL_TRUE,CL_MAP_WRITE, 0, sizeof(uint8_t) * (linesize0  + linesize1)*height + offset, 0, NULL, NULL, NULL );\r
+    uint8_t *temp = (uint8_t *)mapped;\r
+    temp += offset;\r
+    memcpy(temp,Ybuf,sizeof(uint8_t) * linesize0 * height);\r
+    memcpy(temp + sizeof(uint8_t) * linesize0 * height,Ubuf,sizeof(uint8_t) * linesize1 * height/2);\r
+    memcpy(temp + sizeof(uint8_t) * (linesize0 * height + linesize1 * height/2),Vbuf,sizeof(uint8_t) * linesize2 * height/2);\r
+    clEnqueueUnmapMemObject(gpu_env.command_queue, cl_inBuf, mapped, 0, NULL, NULL );\r
+    return 1;\r
+}\r
+\r
+cl_command_queue hb_get_command_queue()\r
+{\r
+    return gpu_env.command_queue;\r
+}\r
+\r
+cl_context hb_get_context()\r
+{\r
+    return gpu_env.context;\r
+}\r
  #endif\r
author	handbrake <no-reply@handbrake.fr>
	Wed, 26 Jun 2013 02:40:56 +0000 (02:40 +0000)
committer	handbrake <no-reply@handbrake.fr>
	Wed, 26 Jun 2013 02:40:56 +0000 (02:40 +0000)
libhb/decomb.c		patch \| blob \| history
libhb/openclkernels.h		patch \| blob \| history
libhb/openclwrapper.c		patch \| blob \| history