*/
static int hb_nv12toyuv_reg_kernel( void );
-
/**
* It creates are opencl bufs w is input frame width, h is input frame height
*/
CREATEBUF( dxva2->cl_mem_yuv, CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR, in_bytes );
return 0;
}
+
/**
* It creates are opencl kernel. kernel name is nv12toyuv
*/
dxva2->nv12toyuv = clCreateKernel( kenv->program, "nv12toyuv", &ret );
return ret;
}
+
/**
* It set opencl arg, input data,output data, input width, output height
*/
OCLCHECK( clSetKernelArg, kenv->kernel, arg++, sizeof(int), &h );
return 0;
}
+
/**
* It initialize nv12 to yuv kernel.
*/
{
if( hb_nv12toyuv_create_cl_buf( kenv, w, h, dxva2 ) )
{
- hb_log( "nv12toyuv_create_cl_buf fail" );
+ hb_log( "OpenCL: nv12toyuv_create_cl_buf fail" );
return -1;
}
if (!dxva2->nv12toyuv_tmp_in)
+ {
dxva2->nv12toyuv_tmp_in = malloc (w*h*3/2);
+ }
+
if (!dxva2->nv12toyuv_tmp_out)
+ {
dxva2->nv12toyuv_tmp_out = malloc (w*h*3/2);
+ }
+
hb_nv12toyuv_create_cl_kernel( kenv, dxva2 );
}
return 0;
}
+/**
+ * copy_plane
+ * @param dst -
+ * @param src -
+ * @param dstride -
+ * @param sstride -
+ * @param h -
+ */
static uint8_t *copy_plane( uint8_t *dst, uint8_t* src, int dstride, int sstride,
int h )
{
memcpy( dst, src, dstride * h );
return dst + dstride * h;
}
+
int lbytes = dstride <= sstride? dstride : sstride;
while ( --h >= 0 )
{
src += sstride;
dst += dstride;
}
+
return dst;
}
int detelecine = (int)userdata[8];
int i;
if( hb_init_nv12toyuv_ocl( kenv, w, h, dxva2 ) )
+ {
return -1;
+ }
if( hb_nv12toyuv_setkernelarg( kenv, w, h, dxva2 ) )
+ {
return -1;
+ }
int in_bytes = w*h*3/2;
if( kenv->isAMD )
{
memcpy( data + i * dxva2->width, bufi1 + i * p, dxva2->width );
if ( i < dxva2->height >> 1 )
+ {
memcpy( data + ( dxva2->width * dxva2->height ) + i * dxva2->width, bufi2 + i * p, dxva2->width );
- }
+ }
+ }
clEnqueueUnmapMemObject( kenv->command_queue, dxva2->cl_mem_nv12, data, 0, NULL, NULL );
}
else
{
memcpy( tmp + i * dxva2->width, bufi1 + i * p, dxva2->width );
if( i < dxva2->height >> 1 )
+ {
memcpy( tmp + (dxva2->width * dxva2->height) + i * dxva2->width, bufi2 + i * p, dxva2->width );
+ }
}
OCLCHECK( clEnqueueWriteBuffer, kenv->command_queue, dxva2->cl_mem_nv12, CL_TRUE, 0, in_bytes, tmp, 0, NULL, NULL );
free( tmp );
memcpy( dxva2->nv12toyuv_tmp_in + ( ww * hh ) + i * ( ww >> 1 ), pic_crop.data[1] + i * pic_crop.linesize[1], ww >> 1 );
memcpy( dxva2->nv12toyuv_tmp_in + ( ww * hh ) + ( ( ww * hh )>>2 ) + i * ( ww >> 1 ), pic_crop.data[2] + i * pic_crop.linesize[2], ww >> 1 );
}
+
if( kenv->isAMD )
{
void *data = clEnqueueMapBuffer( kenv->command_queue, dxva2->cl_mem_yuv, CL_MAP_WRITE_INVALIDATE_REGION, CL_TRUE, 0, ww * hh * 3 / 2, 0, NULL, NULL, NULL );
{
OCLCHECK( clEnqueueWriteBuffer, kenv->command_queue, dxva2->cl_mem_yuv, CL_TRUE, 0, in_bytes, dxva2->nv12toyuv_tmp_in, 0, NULL, NULL );
}
+
hb_buffer_close( &in );
}
return 0;
int st = hb_register_kernel_wrapper( "nv12toyuv", hb_nv12toyuv );
if( !st )
{
- hb_log( "register kernel[%s] failed", "nv12toyuv" );
+ hb_log( "OpenCL: register kernel[%s] failed", "nv12toyuv" );
return -1;
}
return 0;
userdata[6] = (void*)p;
userdata[7] = decomb;
userdata[8] = detelecine;
+
if( hb_nv12toyuv_reg_kernel() )
+ {
return -1;
+ }
+
if( hb_run_kernel( "nv12toyuv", userdata ) )
{
- hb_log( "run kernel[nv12toyuv] failed" );
+ hb_log( "OpenCL: run kernel[nv12toyuv] failed" );
return -1;
}
return 0;
gpu_env.kernel_count++; }\r
\r
\r
+/**\r
+ * hb_confirm_gpu_type\r
+ */\r
int hb_confirm_gpu_type()\r
{\r
int status = 1;\r
return -1;\r
}\r
\r
+/**\r
+ * hb_regist_opencl_kernel\r
+ */\r
int hb_regist_opencl_kernel()\r
{\r
if( !gpu_env.isUserCreated )\r
return 0;\r
}\r
\r
+/**\r
+ * hb_regist_opencl_kernel\r
+ * @param filename -\r
+ * @param source -\r
+ * @param gpu_info -\r
+ * @param int idx -\r
+ */\r
int hb_convert_to_string( const char *filename, char **source, GPUEnv *gpu_info, int idx )\r
{\r
int file_size;\r
return(0);\r
}\r
\r
+/**\r
+ * hb_binary_generated\r
+ * @param context -\r
+ * @param cl_file_name -\r
+ * @param fhandle -\r
+ */\r
int hb_binary_generated( cl_context context, const char * cl_file_name, FILE ** fhandle )\r
{\r
int i = 0;\r
NULL );\r
if( status != CL_SUCCESS )\r
{\r
- hb_log( "Notice: Get context info failed" );\r
+ hb_log( "OpenCL: Get context info failed" );\r
return 0;\r
}\r
\r
devices = (cl_device_id*)malloc( sizeof(cl_device_id) * numDevices );\r
if( devices == NULL )\r
{\r
- hb_log( "Notice: No device found" );\r
+ hb_log( "OpenCL: No device found" );\r
return 0;\r
}\r
\r
return status;\r
}\r
\r
+/**\r
+ * hb_write_binary_to_file\r
+ * @param fileName -\r
+ * @param birary -\r
+ * @param numBytes -\r
+ */\r
int hb_write_binary_to_file( const char* fileName, const char* birary, size_t numBytes )\r
{\r
FILE *output = NULL;\r
return 1;\r
}\r
\r
+/**\r
+ * hb_generat_bin_from_kernel_source\r
+ * @param program -\r
+ * @param cl_file_name -\r
+ */\r
int hb_generat_bin_from_kernel_source( cl_program program, const char * cl_file_name )\r
{\r
int i = 0;\r
NULL );\r
if( status != CL_SUCCESS )\r
{\r
- hb_log( "Notice: Get program info failed, when generate binary file from kernel source" );\r
+ hb_log( "OpenCL: Get program info failed, when generate binary file from kernel source" );\r
return 0;\r
}\r
+\r
devices = (cl_device_id*)malloc( sizeof(cl_device_id) * numDevices );\r
if( devices == NULL )\r
{\r
- hb_log( "Notice: No device found, when generate binary file from kernel source" );\r
+ hb_log( "OpenCL: No device found, when generate binary file from kernel source" );\r
return 0;\r
}\r
+\r
/* grab the handles to all of the devices in the program. */\r
status = clGetProgramInfo( program,\r
CL_PROGRAM_DEVICES,\r
NULL );\r
if( status != CL_SUCCESS )\r
{\r
- hb_log( "Notice: Get program info failed, when generate binary file from kernel source" );\r
+ hb_log( "OpenCL: Get program info failed, when generate binary file from kernel source" );\r
return 0;\r
}\r
+\r
/* figure out the sizes of each of the binaries. */\r
binarySizes = (size_t*)malloc( sizeof(size_t) * numDevices );\r
\r
binarySizes, NULL );\r
if( status != CL_SUCCESS )\r
{\r
- hb_log( "Notice: Get program info failed, when generate binary file from kernel source" );\r
+ hb_log( "OpenCL: Get program info failed, when generate binary file from kernel source" );\r
return 0;\r
}\r
+\r
/* copy over all of the generated binaries. */\r
binaries = (char**)malloc( sizeof(char *) * numDevices );\r
if( binaries == NULL )\r
{\r
- hb_log( "Notice: malloc for binaries failed, when generate binary file from kernel source" );\r
+ hb_log( "OpenCL: malloc for binaries failed, when generate binary file from kernel source" );\r
return 0;\r
}\r
\r
binaries[i] = (char*)malloc( sizeof(char) * binarySizes[i] );\r
if( binaries[i] == NULL )\r
{\r
- hb_log( "Notice: malloc for binary[%d] failed, when generate binary file from kernel source", i );\r
+ hb_log( "OpenCL: malloc for binary[%d] failed, when generate binary file from kernel source", i );\r
return 0;\r
}\r
}\r
NULL );\r
if( status != CL_SUCCESS )\r
{\r
- hb_log( "Notice: Get program info failed, when generate binary file from kernel source" );\r
+ hb_log( "OpenCL: Get program info failed, when generate binary file from kernel source" );\r
return 0;\r
}\r
+\r
/* dump out each binary into its own separate file. */\r
for( i = 0; i < numDevices; i++ )\r
{\r
\r
if( !hb_write_binary_to_file( fileName, binaries[i], binarySizes[i] ))\r
{\r
- hb_log( "Notice: Unable to write opencl kernel, writing to temporary directory instead." );\r
+ hb_log( "OpenCL: Unable to write opencl kernel, writing to temporary directory instead." );\r
return 0;\r
}\r
}\r
return 1;\r
}\r
\r
+\r
+/**\r
+ * hb_init_opencl_attr\r
+ * @param env -\r
+ */\r
int hb_init_opencl_attr( OpenCLEnv * env )\r
{\r
if( gpu_env.isUserCreated )\r
return 0;\r
}\r
\r
+/**\r
+ * hb_create_kernel\r
+ * @param kernelname -\r
+ * @param env -\r
+ */\r
int hb_create_kernel( char * kernelname, KernelEnv * env )\r
{\r
int status;\r
return status != CL_SUCCESS ? 1 : 0;\r
}\r
\r
+/**\r
+ * hb_release_kernel\r
+ * @param env -\r
+ */\r
int hb_release_kernel( KernelEnv * env )\r
{\r
int status = clReleaseKernel( env->kernel );\r
return status != CL_SUCCESS ? 1 : 0;\r
}\r
\r
+/**\r
+ * hb_init_opencl_env\r
+ * @param gpu_info -\r
+ */\r
int hb_init_opencl_env( GPUEnv *gpu_info )\r
{\r
size_t length;\r
status = clGetPlatformIDs( 0, NULL, &numPlatforms );\r
if( status != CL_SUCCESS )\r
{\r
- hb_log( "Notice: OpenCL device platform not found." );\r
+ hb_log( "OpenCL: OpenCL device platform not found." );\r
return(1);\r
}\r
+\r
gpu_info->platform = NULL;\r
if( 0 < numPlatforms )\r
{\r
\r
if( status != CL_SUCCESS )\r
{\r
- hb_log( "Notice: Specific opencl platform not found." );\r
+ hb_log( "OpenCL: Specific opencl platform not found." );\r
return(1);\r
}\r
\r
}\r
free( platforms );\r
}\r
+\r
if( NULL == gpu_info->platform )\r
{\r
- hb_log( "Notice: No OpenCL-compatible GPU found." );\r
+ hb_log( "OpenCL: No OpenCL-compatible GPU found." );\r
return(1);\r
}\r
+\r
if( status != CL_SUCCESS )\r
{\r
- hb_log( "Notice: No OpenCL-compatible GPU found." );\r
+ hb_log( "OpenCL: No OpenCL-compatible GPU found." );\r
return(1);\r
}\r
\r
gpu_info->dType = CL_DEVICE_TYPE_GPU;\r
gpu_info->context = clCreateContextFromType(\r
cps, gpu_info->dType, NULL, NULL, &status );\r
+\r
if( (gpu_info->context == (cl_context)NULL) || (status != CL_SUCCESS) )\r
{\r
gpu_info->dType = CL_DEVICE_TYPE_CPU;\r
gpu_info->context = clCreateContextFromType(\r
cps, gpu_info->dType, NULL, NULL, &status );\r
}\r
+\r
if( (gpu_info->context == (cl_context)NULL) || (status != CL_SUCCESS) )\r
{\r
gpu_info->dType = CL_DEVICE_TYPE_DEFAULT;\r
gpu_info->context = clCreateContextFromType(\r
cps, gpu_info->dType, NULL, NULL, &status );\r
}\r
+\r
if( (gpu_info->context == (cl_context)NULL) || (status != CL_SUCCESS) )\r
{\r
- hb_log( "Notice: Unable to create opencl context." );\r
+ hb_log( "OpenCL: Unable to create opencl context." );\r
return(1);\r
}\r
+\r
/* Detect OpenCL devices. */\r
/* First, get the size of device list data */\r
status = clGetContextInfo( gpu_info->context, CL_CONTEXT_DEVICES,\r
0, NULL, &length );\r
if((status != CL_SUCCESS) || (length == 0))\r
{\r
- hb_log( "Notice: Unable to get the list of devices in context." );\r
+ hb_log( "OpenCL: Unable to get the list of devices in context." );\r
return(1);\r
}\r
+\r
/* Now allocate memory for device list based on the size we got earlier */\r
gpu_info->devices = (cl_device_id*)malloc( length );\r
if( gpu_info->devices == (cl_device_id*)NULL )\r
{\r
return(1);\r
}\r
+\r
/* Now, get the device list data */\r
status = clGetContextInfo( gpu_info->context, CL_CONTEXT_DEVICES, length,\r
gpu_info->devices, NULL );\r
if( status != CL_SUCCESS )\r
{\r
- hb_log( "Notice: Unable to get the device list data in context." );\r
+ hb_log( "OpenCL: Unable to get the device list data in context." );\r
return(1);\r
}\r
\r
0, &status );\r
if( status != CL_SUCCESS )\r
{\r
- hb_log( "Notice: Unable to create opencl command queue." );\r
+ hb_log( "OpenCL: Unable to create opencl command queue." );\r
return(1);\r
}\r
}\r
}\r
\r
\r
+/**\r
+ * hb_release_opencl_env\r
+ * @param gpu_info -\r
+ */\r
int hb_release_opencl_env( GPUEnv *gpu_info )\r
{\r
if( !isInited )\r
gpu_env.programs[i] = NULL;\r
}\r
}\r
+\r
if( gpu_env.command_queue )\r
{\r
clReleaseCommandQueue( gpu_env.command_queue );\r
gpu_env.command_queue = NULL;\r
}\r
+\r
if( gpu_env.context )\r
{\r
clReleaseContext( gpu_env.context );\r
gpu_env.context = NULL;\r
}\r
+\r
isInited = 0;\r
gpu_info->isUserCreated = 0;\r
return 1;\r
}\r
\r
\r
+/**\r
+ * hb_register_kernel_wrapper\r
+ * @param kernel_name -\r
+ * @param function -\r
+ */\r
int hb_register_kernel_wrapper( const char *kernel_name, cl_kernel_function function )\r
{\r
int i;\r
return(0);\r
}\r
\r
+/**\r
+ * hb_cached_of_kerner_prg\r
+ * @param gpu_env -\r
+ * @param cl_file_name -\r
+ */\r
int hb_cached_of_kerner_prg( const GPUEnv *gpu_env, const char * cl_file_name )\r
{\r
int i;\r
return(0);\r
}\r
\r
+/**\r
+ * hb_compile_kernel_file\r
+ * @param filename -\r
+ * @param gpu_info -\r
+ * @param indx -\r
+ * @param build_option -\r
+ */\r
int hb_compile_kernel_file( const char *filename, GPUEnv *gpu_info,\r
int indx, const char *build_option )\r
{\r
NULL );\r
if( status != CL_SUCCESS )\r
{\r
- hb_log( "Notice: Unable to get the number of devices in context." );\r
+ hb_log( "OpenCL: Unable to get the number of devices in context." );\r
return 0;\r
}\r
\r
gpu_info->programs[idx] = clCreateProgramWithSource(\r
gpu_info->context, 1, &source, source_size, &status );\r
}\r
+\r
if((gpu_info->programs[idx] == (cl_program)NULL) || (status != CL_SUCCESS)){\r
- hb_log( "Notice: Unable to get list of devices in context." );\r
+ hb_log( "OpenCL: Unable to get list of devices in context." );\r
return(0);\r
}\r
\r
/* create a cl program executable for all the devices specified */\r
- if( !gpu_info->isUserCreated )\r
+ if( !gpu_info->isUserCreated ) \r
+ {\r
status = clBuildProgram( gpu_info->programs[idx], 1, gpu_info->devices,\r
build_option, NULL, NULL );\r
+ }\r
else\r
+ {\r
status = clBuildProgram( gpu_info->programs[idx], 1, &(gpu_info->dev),\r
build_option, NULL, NULL );\r
+ }\r
\r
if( status != CL_SUCCESS )\r
{\r
- if( !gpu_info->isUserCreated )\r
+ if( !gpu_info->isUserCreated ) \r
+ {\r
status = clGetProgramBuildInfo( gpu_info->programs[idx],\r
gpu_info->devices[0],\r
CL_PROGRAM_BUILD_LOG, 0, NULL, &length );\r
- else\r
- status = clGetProgramBuildInfo( gpu_info->programs[idx],\r
+ }\r
+ else\r
+ {\r
+ status = clGetProgramBuildInfo( gpu_info->programs[idx],\r
gpu_info->dev,\r
CL_PROGRAM_BUILD_LOG, 0, NULL, &length );\r
+ }\r
\r
if( status != CL_SUCCESS )\r
{\r
- hb_log( "Notice: Unable to get GPU build information." );\r
+ hb_log( "OpenCL: Unable to get GPU build information." );\r
return(0);\r
}\r
+\r
buildLog = (char*)malloc( length );\r
if( buildLog == (char*)NULL )\r
{\r
return(0);\r
}\r
+\r
if( !gpu_info->isUserCreated )\r
+ {\r
status = clGetProgramBuildInfo( gpu_info->programs[idx], gpu_info->devices[0],\r
CL_PROGRAM_BUILD_LOG, length, buildLog, &length );\r
+ }\r
else\r
+ {\r
status = clGetProgramBuildInfo( gpu_info->programs[idx], gpu_info->dev,\r
CL_PROGRAM_BUILD_LOG, length, buildLog, &length );\r
+ }\r
\r
fd1 = fopen( "kernel-build.log", "w+" );\r
if( fd1 != NULL ) {\r
\r
strcpy( gpu_env.kernelSrcFile[idx], filename );\r
\r
- if( binaryExisted == 0 )\r
+ if( binaryExisted == 0 ) \r
+ {\r
hb_generat_bin_from_kernel_source( gpu_env.programs[idx], filename );\r
+ }\r
\r
gpu_info->file_count += 1;\r
\r
}\r
\r
\r
+/**\r
+ * hb_get_kernel_env_and_func\r
+ * @param kernel_name -\r
+ * @param env -\r
+ * @param function -\r
+ */\r
int hb_get_kernel_env_and_func( const char *kernel_name,\r
KernelEnv *env,\r
cl_kernel_function *function )\r
return(0);\r
}\r
\r
-\r
+/**\r
+ * hb_get_kernel_env_and_func\r
+ * @param kernel_name -\r
+ * @param userdata -\r
+ */\r
int hb_run_kernel( const char *kernel_name, void **userdata )\r
{\r
KernelEnv env;\r
memset( &env, 0, sizeof(KernelEnv));\r
status = hb_get_kernel_env_and_func( kernel_name, &env, &function );\r
strcpy( env.kernel_name, kernel_name );\r
- if( status == 1 )\r
+ if( status == 1 ) \r
+ {\r
return(function( userdata, &env ));\r
+ }\r
+\r
return(0);\r
}\r
\r
-\r
+/**\r
+ * hb_init_opencl_run_env\r
+ * @param argc -\r
+ * @param argv -\r
+ * @param build_option -\r
+ */\r
int hb_init_opencl_run_env( int argc, char **argv, const char *build_option )\r
{\r
int status = 0;\r
if( MAX_CLKERNEL_NUM <= 0 )\r
+ {\r
return 1;\r
+ }\r
+\r
if((argc > MAX_CLFILE_NUM) || (argc<0))\r
+ {\r
return 1;\r
+ }\r
\r
if( !isInited )\r
{\r
return(0);\r
}\r
\r
-\r
+/**\r
+ * hb_release_opencl_run_env\r
+ */\r
int hb_release_opencl_run_env()\r
{\r
return hb_release_opencl_env( &gpu_env );\r
}\r
\r
-\r
+/**\r
+ * hb_opencl_stats\r
+ */\r
int hb_opencl_stats()\r
{\r
return isInited;\r
}\r
\r
+/**\r
+ * hb_get_opencl_env\r
+ */\r
int hb_get_opencl_env()\r
{\r
int i = 0;\r
cl_int status;\r
size_t numDevices;\r
cl_device_id *devices;\r
+\r
/*initialize devices, context, comand_queue*/\r
status = hb_init_opencl_env( &gpu_env );\r
if( status )\r
NULL );\r
if( status != CL_SUCCESS )\r
return 0;\r
+\r
devices = (cl_device_id*)malloc( sizeof(cl_device_id) * numDevices );\r
if( devices == NULL )\r
return 0;\r
+\r
/* grab the handles to all of the devices in the context. */\r
status = clGetContextInfo( gpu_env.context,\r
CL_CONTEXT_DEVICES,\r
devices,\r
NULL );\r
status = 0;\r
+\r
/* dump out each binary into its own separate file. */\r
for( i = 0; i < numDevices; i++ )\r
{\r
hb_log( "GPU Driver Version: %s", driverVersion );\r
}\r
}\r
+\r
if( devices != NULL )\r
{\r
free( devices );\r
devices = NULL;\r
}\r
+\r
return status;\r
}\r
\r
-\r
+/**\r
+ * hb_create_buffer\r
+ * @param cl_inBuf -\r
+ * @param flags -\r
+ * @param size -\r
+ */\r
int hb_create_buffer( cl_mem *cl_Buf, int flags, int size )\r
{\r
int status;\r
\r
if( status != CL_SUCCESS )\r
{ \r
- hb_log( "clCreateBuffer error '%d'", status );\r
+ hb_log( "OpenCL: clCreateBuffer error '%d'", status );\r
return 0; \r
}\r
+\r
return 1;\r
}\r
\r
+\r
+/**\r
+ * hb_read_opencl_buffer\r
+ * @param cl_inBuf -\r
+ * @param outbuf -\r
+ * @param size -\r
+ */\r
int hb_read_opencl_buffer( cl_mem cl_inBuf, unsigned char *outbuf, int size )\r
{\r
int status;\r
status = clEnqueueReadBuffer( gpu_env.command_queue, cl_inBuf, CL_TRUE, 0, size, outbuf, 0, 0, 0 );\r
if( status != CL_SUCCESS )\r
{ \r
- hb_log( "av_read_opencl_buffer error '%d'", status );\r
+ hb_log( "OpenCL: av_read_opencl_buffer error '%d'", status );\r
return 0; \r
}\r
+\r
return 1;\r
}\r
#endif\r