-/* openclwrapper.c\r
-\r
- Copyright (c) 2003-2012 HandBrake Team\r
- This file is part of the HandBrake source code\r
- Homepage: <http://handbrake.fr/>.\r
- It may be used under the terms of the GNU General Public License v2.\r
- For full terms see the file COPYING file or visit http://www.gnu.org/licenses/gpl-2.0.html\r
-\r
- Authors: Peng Gao <peng@multicorewareinc.com> <http://www.multicorewareinc.com/>\r
- Li Cao <li@multicorewareinc.com> <http://www.multicorewareinc.com/>\r
- */\r
- \r
-#ifdef USE_OPENCL\r
-\r
-#include <stdio.h>\r
-#include <stdlib.h>\r
-#include <string.h>\r
-#include "openclwrapper.h"\r
-#include "openclkernels.h"\r
-\r
-//#define USE_EXTERNAL_KERNEL\r
-#ifdef SYS_MINGW\r
-#include <windows.h>\r
-#endif\r
-\r
-#if defined(__APPLE__)\r
-#include <OpenCL/cl.h>\r
-#else\r
-#include <CL/cl.h>\r
-#endif\r
-\r
-#if defined(_MSC_VER)\r
-#define strcasecmp strcmpi\r
-#endif\r
-\r
-#define MAX_KERNEL_STRING_LEN 64\r
-#define MAX_CLFILE_NUM 50\r
-#define MAX_CLKERNEL_NUM 200\r
-#define MAX_CLFILE_PATH 255\r
-#define MAX_KERNEL_NUM 50\r
-#define MAX_KERNEL_NAME_LEN 64\r
-\r
-#ifndef INVALID_HANDLE_VALUE\r
-#define INVALID_HANDLE_VALUE NULL\r
-#endif\r
-\r
-//#define THREAD_PRIORITY_TIME_CRITICAL 15\r
-\r
-enum VENDOR\r
-{\r
- AMD = 0,\r
- Intel,\r
- NVIDIA,\r
- others\r
-};\r
-typedef struct _GPUEnv\r
-{\r
- //share vb in all modules in hb library\r
- cl_platform_id platform;\r
- cl_device_type dType;\r
- cl_context context;\r
- cl_device_id * devices;\r
- cl_device_id dev;\r
- cl_command_queue command_queue;\r
- cl_kernel kernels[MAX_CLFILE_NUM];\r
- cl_program programs[MAX_CLFILE_NUM]; //one program object maps one kernel source file\r
- char kernelSrcFile[MAX_CLFILE_NUM][256]; //the max len of kernel file name is 256\r
- int file_count; // only one kernel file\r
-\r
- char kernel_names[MAX_CLKERNEL_NUM][MAX_KERNEL_STRING_LEN+1];\r
- cl_kernel_function kernel_functions[MAX_CLKERNEL_NUM];\r
- int kernel_count;\r
- int isUserCreated; // 1: created , 0:no create and needed to create by opencl wrapper\r
- enum VENDOR vendor;\r
-}GPUEnv;\r
-\r
-typedef struct\r
-{\r
- char kernelName[MAX_KERNEL_NAME_LEN+1];\r
- char * kernelStr;\r
-}hb_kernel_node;\r
+/* openclwrapper.c
+
+ Copyright (c) 2003-2012 HandBrake Team
+ This file is part of the HandBrake source code
+ Homepage: <http://handbrake.fr/>.
+ It may be used under the terms of the GNU General Public License v2.
+ For full terms see the file COPYING file or visit http://www.gnu.org/licenses/gpl-2.0.html
+
+ Authors: Peng Gao <peng@multicorewareinc.com> <http://www.multicorewareinc.com/>
+ Li Cao <li@multicorewareinc.com> <http://www.multicorewareinc.com/>
+ */
+
+#ifdef USE_OPENCL
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include "openclwrapper.h"
+#include "openclkernels.h"
+
+//#define USE_EXTERNAL_KERNEL
+#ifdef SYS_MINGW
+#include <windows.h>
+#endif
+
+#if defined(__APPLE__)
+#include <OpenCL/cl.h>
+#else
+#include <CL/cl.h>
+#endif
+
+#if defined(_MSC_VER)
+#define strcasecmp strcmpi
+#endif
+
+#define MAX_KERNEL_STRING_LEN 64
+#define MAX_CLFILE_NUM 50
+#define MAX_CLKERNEL_NUM 200
+#define MAX_CLFILE_PATH 255
+#define MAX_KERNEL_NUM 50
+#define MAX_KERNEL_NAME_LEN 64
+
+#ifndef INVALID_HANDLE_VALUE
+#define INVALID_HANDLE_VALUE NULL
+#endif
+
+//#define THREAD_PRIORITY_TIME_CRITICAL 15
+
+enum VENDOR
+{
+ AMD = 0,
+ Intel,
+ NVIDIA,
+ others
+};
+typedef struct _GPUEnv
+{
+ //share vb in all modules in hb library
+ cl_platform_id platform;
+ cl_device_type dType;
+ cl_context context;
+ cl_device_id * devices;
+ cl_device_id dev;
+ cl_command_queue command_queue;
+ cl_kernel kernels[MAX_CLFILE_NUM];
+ cl_program programs[MAX_CLFILE_NUM]; //one program object maps one kernel source file
+ char kernelSrcFile[MAX_CLFILE_NUM][256]; //the max len of kernel file name is 256
+ int file_count; // only one kernel file
+
+ char kernel_names[MAX_CLKERNEL_NUM][MAX_KERNEL_STRING_LEN+1];
+ cl_kernel_function kernel_functions[MAX_CLKERNEL_NUM];
+ int kernel_count;
+ int isUserCreated; // 1: created , 0:no create and needed to create by opencl wrapper
+ enum VENDOR vendor;
+}GPUEnv;
+
+typedef struct
+{
+ char kernelName[MAX_KERNEL_NAME_LEN+1];
+ char * kernelStr;
+}hb_kernel_node;
static GPUEnv gpu_env;
static int isInited = 0;
static hb_kernel_node gKernels[MAX_KERNEL_NUM];
#define ADD_KERNEL_CFG( idx, s, p ){\
- strcpy( gKernels[idx].kernelName, s );\\r
- gKernels[idx].kernelStr = p;\\r
- strcpy( gpu_env.kernel_names[idx], s );\\r
- gpu_env.kernel_count++; }\r
-\r
-\r
-/**\r
- * hb_confirm_gpu_type\r
- */\r
-int hb_confirm_gpu_type()\r
-{\r
- int status = 1;\r
- unsigned int i, j;\r
- cl_uint numPlatforms = 0; \r
- status = clGetPlatformIDs(0,NULL,&numPlatforms); \r
- if(status != 0) \r
- { \r
- goto end; \r
- } \r
- if(numPlatforms > 0) \r
- { \r
- cl_platform_id* platforms = (cl_platform_id* )malloc (numPlatforms * sizeof(cl_platform_id)); \r
- status = clGetPlatformIDs (numPlatforms, platforms, NULL); \r
- if (status != 0) \r
- { \r
- goto end; \r
- } \r
- for (i=0; i < numPlatforms; i++)\r
- { \r
- char pbuff[100];\r
- cl_uint numDevices;\r
- status = clGetPlatformInfo( platforms[i], \r
- CL_PLATFORM_VENDOR, \r
- sizeof (pbuff), \r
- pbuff,\r
- NULL); \r
- if (status)\r
- continue;\r
- status = clGetDeviceIDs( platforms[i], \r
- CL_DEVICE_TYPE_GPU , \r
- 0 , \r
- NULL , \r
- &numDevices); \r
- \r
- cl_device_id *devices = (cl_device_id *)malloc(numDevices * sizeof(cl_device_id));\r
- status = clGetDeviceIDs(platforms[i], CL_DEVICE_TYPE_GPU, numDevices, devices, NULL);\r
- for (j = 0; j < numDevices; j++)\r
- {\r
- char dbuff[100];\r
- status = clGetDeviceInfo(devices[j], CL_DEVICE_VENDOR, sizeof(dbuff), dbuff, NULL); \r
- if (!strcmp(dbuff, "Advanced Micro Devices, Inc.") ||\r
- !strcmp(dbuff, "Intel(R) Corporation") ||\r
-#ifdef __APPLE__\r
- !strcmp(dbuff, "AMD") ||\r
- /* MacBook Pro, AMD ATI Radeon HD 6750M, OS X 10.8.3 */\r
- !strcmp(dbuff, "NVIDIA") ||\r
- /* MacBook Pro, NVIDIA GeForce GT 330M, OS X 10.7.4 */\r
-#endif\r
- !strcmp(dbuff, "NVIDIA Corporation"))\r
- {\r
- return 0;\r
- }\r
- }\r
-\r
- if ( status != CL_SUCCESS )\r
- continue;\r
- if( numDevices ) \r
- break; \r
- } \r
- free( platforms ); \r
- } \r
- end:\r
- return -1;\r
-}\r
-\r
-/**\r
- * hb_regist_opencl_kernel\r
+ strcpy( gKernels[idx].kernelName, s );\
+ gKernels[idx].kernelStr = p;\
+ strcpy( gpu_env.kernel_names[idx], s );\
+ gpu_env.kernel_count++; }
+
+
+/**
+ * hb_confirm_gpu_type
+ */
+int hb_confirm_gpu_type()
+{
+ int status = 1;
+ unsigned int i, j;
+ cl_uint numPlatforms = 0;
+ status = clGetPlatformIDs(0,NULL,&numPlatforms);
+ if(status != 0)
+ {
+ goto end;
+ }
+ if(numPlatforms > 0)
+ {
+ cl_platform_id* platforms = (cl_platform_id* )malloc (numPlatforms * sizeof(cl_platform_id));
+ status = clGetPlatformIDs (numPlatforms, platforms, NULL);
+ if (status != 0)
+ {
+ goto end;
+ }
+ for (i=0; i < numPlatforms; i++)
+ {
+ char pbuff[100];
+ cl_uint numDevices;
+ status = clGetPlatformInfo( platforms[i],
+ CL_PLATFORM_VENDOR,
+ sizeof (pbuff),
+ pbuff,
+ NULL);
+ if (status)
+ continue;
+ status = clGetDeviceIDs( platforms[i],
+ CL_DEVICE_TYPE_GPU ,
+ 0 ,
+ NULL ,
+ &numDevices);
+
+ cl_device_id *devices = (cl_device_id *)malloc(numDevices * sizeof(cl_device_id));
+ status = clGetDeviceIDs(platforms[i], CL_DEVICE_TYPE_GPU, numDevices, devices, NULL);
+ for (j = 0; j < numDevices; j++)
+ {
+ char dbuff[100];
+ status = clGetDeviceInfo(devices[j], CL_DEVICE_VENDOR, sizeof(dbuff), dbuff, NULL);
+ if (!strcmp(dbuff, "Advanced Micro Devices, Inc.") ||
+ !strcmp(dbuff, "Intel(R) Corporation") ||
+#ifdef __APPLE__
+ !strcmp(dbuff, "AMD") ||
+ /* MacBook Pro, AMD ATI Radeon HD 6750M, OS X 10.8.3 */
+ !strcmp(dbuff, "NVIDIA") ||
+ /* MacBook Pro, NVIDIA GeForce GT 330M, OS X 10.7.4 */
+#endif
+ !strcmp(dbuff, "NVIDIA Corporation"))
+ {
+ return 0;
+ }
+ }
+
+ if ( status != CL_SUCCESS )
+ continue;
+ if( numDevices )
+ break;
+ }
+ free( platforms );
+ }
+ end:
+ return -1;
+}
+
+/**
+ * hb_regist_opencl_kernel
*/
int hb_regist_opencl_kernel()
{
return 0;
}
-\r
-/**\r
- * hb_regist_opencl_kernel\r
- * @param filename -\r
- * @param source -\r
- * @param gpu_info -\r
- * @param int idx -\r
- */\r
-int hb_convert_to_string( const char *filename, char **source, GPUEnv *gpu_info, int idx )\r
-{\r
- int file_size;\r
- size_t result;\r
- FILE * file = NULL;\r
- file_size = 0;\r
- result = 0;\r
- file = fopen( filename, "rb+" );\r
-\r
- if( file!=NULL )\r
- {\r
- fseek( file, 0, SEEK_END );\r
-\r
- file_size = ftell( file );\r
- rewind( file );\r
- *source = (char*)malloc( sizeof(char) * file_size + 1 );\r
- if( *source == (char*)NULL )\r
- {\r
- return(0);\r
- }\r
- result = fread( *source, 1, file_size, file );\r
- if( result != file_size )\r
- {\r
- free( *source );\r
- return(0);\r
- }\r
- (*source)[file_size] = '\0';\r
- fclose( file );\r
-\r
- return(1);\r
- }\r
- return(0);\r
-}\r
-\r
-/**\r
- * hb_binary_generated\r
- * @param context -\r
- * @param cl_file_name -\r
- * @param fhandle -\r
- */\r
-int hb_binary_generated( cl_context context, const char * cl_file_name, FILE ** fhandle )\r
-{\r
- int i = 0;\r
- cl_int status;\r
- cl_uint numDevices;\r
- cl_device_id *devices;\r
- char * str = NULL;\r
- FILE * fd = NULL;\r
-\r
- status = clGetContextInfo( context,\r
- CL_CONTEXT_NUM_DEVICES,\r
- sizeof(numDevices),\r
- &numDevices,\r
- NULL );\r
- if( status != CL_SUCCESS )\r
- {\r
- hb_log( "OpenCL: Get context info failed" );\r
- return 0;\r
- }\r
-\r
- devices = (cl_device_id*)malloc( sizeof(cl_device_id) * numDevices );\r
- if( devices == NULL )\r
- {\r
- hb_log( "OpenCL: No device found" );\r
- return 0;\r
- }\r
-\r
- /* grab the handles to all of the devices in the context. */\r
- status = clGetContextInfo( context,\r
- CL_CONTEXT_DEVICES,\r
- sizeof(cl_device_id) * numDevices,\r
- devices,\r
- NULL );\r
-\r
- status = 0;\r
- /* dump out each binary into its own separate file. */\r
- for (i = 0; i < numDevices; i++)\r
- {\r
- char fileName[256] = { 0 };\r
- char cl_name[128] = { 0 };\r
- if (devices[i])\r
- {\r
- char deviceName[1024];\r
- status = clGetDeviceInfo(devices[i],\r
- CL_DEVICE_NAME,\r
- sizeof(deviceName),\r
- deviceName,\r
- NULL);\r
-\r
- str = (char*)strstr(cl_file_name, ".cl");\r
- memcpy(cl_name, cl_file_name, str - cl_file_name);\r
- cl_name[str - cl_file_name] = '\0';\r
- sprintf(fileName, "./%s - %s.bin", cl_name, deviceName);\r
- fd = fopen(fileName, "rb");\r
- status = fd != NULL;\r
- }\r
- }\r
-\r
- if( devices != NULL )\r
- {\r
- free( devices );\r
- devices = NULL;\r
- }\r
-\r
- if( fd != NULL )\r
- *fhandle = fd;\r
-\r
- return status;\r
-}\r
-\r
-/**\r
- * hb_write_binary_to_file\r
- * @param fileName -\r
- * @param birary -\r
- * @param numBytes -\r
- */\r
-int hb_write_binary_to_file( const char* fileName, const char* birary, size_t numBytes )\r
-{\r
- FILE *output = NULL;\r
- output = fopen( fileName, "wb" );\r
- if( output == NULL )\r
- return 0;\r
-\r
- fwrite( birary, sizeof(char), numBytes, output );\r
- fclose( output );\r
-\r
- return 1;\r
-}\r
-\r
-/**\r
- * hb_generat_bin_from_kernel_source\r
- * @param program -\r
- * @param cl_file_name -\r
- */\r
-int hb_generat_bin_from_kernel_source( cl_program program, const char * cl_file_name )\r
-{\r
- int i = 0;\r
- cl_int status;\r
- cl_uint numDevices;\r
- size_t *binarySizes;\r
- cl_device_id *devices;\r
- char **binaries;\r
- char *str = NULL;\r
-\r
- status = clGetProgramInfo( program,\r
- CL_PROGRAM_NUM_DEVICES,\r
- sizeof(numDevices),\r
- &numDevices,\r
- NULL );\r
- if( status != CL_SUCCESS )\r
- {\r
- hb_log("OpenCL: hb_generat_bin_from_kernel_source: clGetProgramInfo for CL_PROGRAM_NUM_DEVICES failed");\r
- return 0;\r
- }\r
-\r
- devices = (cl_device_id*)malloc( sizeof(cl_device_id) * numDevices );\r
- if( devices == NULL )\r
- {\r
- hb_log("OpenCL: hb_generat_bin_from_kernel_source: no device found");\r
- return 0;\r
- }\r
-\r
- /* grab the handles to all of the devices in the program. */\r
- status = clGetProgramInfo( program,\r
- CL_PROGRAM_DEVICES,\r
- sizeof(cl_device_id) * numDevices,\r
- devices,\r
- NULL );\r
- if( status != CL_SUCCESS )\r
- {\r
- hb_log("OpenCL: hb_generat_bin_from_kernel_source: clGetProgramInfo for CL_PROGRAM_DEVICES failed");\r
- return 0;\r
- }\r
-\r
- /* figure out the sizes of each of the binaries. */\r
- binarySizes = (size_t*)malloc( sizeof(size_t) * numDevices );\r
-\r
- status = clGetProgramInfo( program,\r
- CL_PROGRAM_BINARY_SIZES,\r
- sizeof(size_t) * numDevices,\r
- binarySizes, NULL );\r
- if( status != CL_SUCCESS )\r
- {\r
- hb_log("OpenCL: hb_generat_bin_from_kernel_source: clGetProgramInfo for CL_PROGRAM_BINARY_SIZES failed");\r
- return 0;\r
- }\r
-\r
- /* copy over all of the generated binaries. */\r
- binaries = (char**)malloc( sizeof(char *) * numDevices );\r
- if( binaries == NULL )\r
- {\r
- hb_log("OpenCL: hb_generat_bin_from_kernel_source: malloc for binaries failed");\r
- return 0;\r
- }\r
-\r
- for( i = 0; i < numDevices; i++ )\r
- {\r
- if( binarySizes[i] != 0 )\r
- {\r
- binaries[i] = (char*)malloc( sizeof(char) * binarySizes[i] );\r
- if( binaries[i] == NULL )\r
- {\r
- hb_log("OpenCL: hb_generat_bin_from_kernel_source: malloc for binaries[%d] failed", i);\r
- return 0;\r
- }\r
- }\r
- else\r
- {\r
- binaries[i] = NULL;\r
- }\r
- }\r
-\r
- status = clGetProgramInfo( program,\r
- CL_PROGRAM_BINARIES,\r
- sizeof(char *) * numDevices,\r
- binaries,\r
- NULL );\r
- if( status != CL_SUCCESS )\r
- {\r
- hb_log("OpenCL: hb_generat_bin_from_kernel_source: clGetProgramInfo for CL_PROGRAM_BINARIES failed");\r
- return 0;\r
- }\r
-\r
- /* dump out each binary into its own separate file. */\r
- for (i = 0; i < numDevices; i++)\r
- {\r
- char fileName[256] = {0};\r
- char cl_name[128] = {0};\r
- if (binarySizes[i])\r
- {\r
- char deviceName[1024];\r
- status = clGetDeviceInfo(devices[i],\r
- CL_DEVICE_NAME,\r
- sizeof(deviceName),\r
- deviceName,\r
- NULL);\r
-\r
- str = (char*)strstr( cl_file_name, (char*)".cl" );\r
- memcpy(cl_name, cl_file_name, str - cl_file_name);\r
- cl_name[str - cl_file_name] = '\0';\r
- sprintf(fileName, "./%s - %s.bin", cl_name, deviceName);\r
-\r
- if (!hb_write_binary_to_file(fileName, binaries[i], binarySizes[i]))\r
- {\r
- hb_log("OpenCL: hb_generat_bin_from_kernel_source: unable to write kernel, writing to temporary directory instead.");\r
- return 0;\r
- }\r
- }\r
- }\r
-\r
- // Release all resouces and memory\r
- for( i = 0; i < numDevices; i++ )\r
- {\r
- if( binaries[i] != NULL )\r
- {\r
- free( binaries[i] );\r
- binaries[i] = NULL;\r
- }\r
- }\r
-\r
- if( binaries != NULL )\r
- {\r
- free( binaries );\r
- binaries = NULL;\r
- }\r
-\r
- if( binarySizes != NULL )\r
- {\r
- free( binarySizes );\r
- binarySizes = NULL;\r
- }\r
-\r
- if( devices != NULL )\r
- {\r
- free( devices );\r
- devices = NULL;\r
- }\r
- return 1;\r
-}\r
-\r
-\r
-/**\r
- * hb_init_opencl_attr\r
- * @param env -\r
- */\r
-int hb_init_opencl_attr( OpenCLEnv * env )\r
-{\r
- if( gpu_env.isUserCreated )\r
- return 1;\r
-\r
- gpu_env.context = env->context;\r
- gpu_env.platform = env->platform;\r
- gpu_env.dev = env->devices;\r
- gpu_env.command_queue = env->command_queue;\r
-\r
- gpu_env.isUserCreated = 1;\r
-\r
- return 0;\r
-}\r
-\r
-/**\r
- * hb_create_kernel\r
- * @param kernelname -\r
- * @param env -\r
- */\r
-int hb_create_kernel( char * kernelname, KernelEnv * env )\r
-{\r
- int status;\r
- env->kernel = clCreateKernel( gpu_env.programs[0], kernelname, &status );\r
- env->context = gpu_env.context;\r
- env->command_queue = gpu_env.command_queue;\r
- return status != CL_SUCCESS ? 1 : 0;\r
-}\r
-\r
-/**\r
- * hb_release_kernel\r
- * @param env -\r
- */\r
-int hb_release_kernel( KernelEnv * env )\r
-{\r
- int status = clReleaseKernel( env->kernel );\r
- return status != CL_SUCCESS ? 1 : 0;\r
-}\r
-\r
-/**\r
+
+/**
+ * hb_regist_opencl_kernel
+ * @param filename -
+ * @param source -
+ * @param gpu_info -
+ * @param int idx -
+ */
+int hb_convert_to_string( const char *filename, char **source, GPUEnv *gpu_info, int idx )
+{
+ int file_size;
+ size_t result;
+ FILE * file = NULL;
+ file_size = 0;
+ result = 0;
+ file = fopen( filename, "rb+" );
+
+ if( file!=NULL )
+ {
+ fseek( file, 0, SEEK_END );
+
+ file_size = ftell( file );
+ rewind( file );
+ *source = (char*)malloc( sizeof(char) * file_size + 1 );
+ if( *source == (char*)NULL )
+ {
+ return(0);
+ }
+ result = fread( *source, 1, file_size, file );
+ if( result != file_size )
+ {
+ free( *source );
+ return(0);
+ }
+ (*source)[file_size] = '\0';
+ fclose( file );
+
+ return(1);
+ }
+ return(0);
+}
+
+/**
+ * hb_binary_generated
+ * @param context -
+ * @param cl_file_name -
+ * @param fhandle -
+ */
+int hb_binary_generated( cl_context context, const char * cl_file_name, FILE ** fhandle )
+{
+ int i = 0;
+ cl_int status;
+ cl_uint numDevices;
+ cl_device_id *devices;
+ char * str = NULL;
+ FILE * fd = NULL;
+
+ status = clGetContextInfo( context,
+ CL_CONTEXT_NUM_DEVICES,
+ sizeof(numDevices),
+ &numDevices,
+ NULL );
+ if( status != CL_SUCCESS )
+ {
+ hb_log( "OpenCL: Get context info failed" );
+ return 0;
+ }
+
+ devices = (cl_device_id*)malloc( sizeof(cl_device_id) * numDevices );
+ if( devices == NULL )
+ {
+ hb_log( "OpenCL: No device found" );
+ return 0;
+ }
+
+ /* grab the handles to all of the devices in the context. */
+ status = clGetContextInfo( context,
+ CL_CONTEXT_DEVICES,
+ sizeof(cl_device_id) * numDevices,
+ devices,
+ NULL );
+
+ status = 0;
+ /* dump out each binary into its own separate file. */
+ for (i = 0; i < numDevices; i++)
+ {
+ char fileName[256] = { 0 };
+ char cl_name[128] = { 0 };
+ if (devices[i])
+ {
+ char deviceName[1024];
+ status = clGetDeviceInfo(devices[i],
+ CL_DEVICE_NAME,
+ sizeof(deviceName),
+ deviceName,
+ NULL);
+
+ str = (char*)strstr(cl_file_name, ".cl");
+ memcpy(cl_name, cl_file_name, str - cl_file_name);
+ cl_name[str - cl_file_name] = '\0';
+ sprintf(fileName, "./%s - %s.bin", cl_name, deviceName);
+ fd = fopen(fileName, "rb");
+ status = fd != NULL;
+ }
+ }
+
+ if( devices != NULL )
+ {
+ free( devices );
+ devices = NULL;
+ }
+
+ if( fd != NULL )
+ *fhandle = fd;
+
+ return status;
+}
+
+/**
+ * hb_write_binary_to_file
+ * @param fileName -
+ * @param birary -
+ * @param numBytes -
+ */
+int hb_write_binary_to_file( const char* fileName, const char* birary, size_t numBytes )
+{
+ FILE *output = NULL;
+ output = fopen( fileName, "wb" );
+ if( output == NULL )
+ return 0;
+
+ fwrite( birary, sizeof(char), numBytes, output );
+ fclose( output );
+
+ return 1;
+}
+
+/**
+ * hb_generat_bin_from_kernel_source
+ * @param program -
+ * @param cl_file_name -
+ */
+int hb_generat_bin_from_kernel_source( cl_program program, const char * cl_file_name )
+{
+ int i = 0;
+ cl_int status;
+ cl_uint numDevices;
+ size_t *binarySizes;
+ cl_device_id *devices;
+ char **binaries;
+ char *str = NULL;
+
+ status = clGetProgramInfo( program,
+ CL_PROGRAM_NUM_DEVICES,
+ sizeof(numDevices),
+ &numDevices,
+ NULL );
+ if( status != CL_SUCCESS )
+ {
+ hb_log("OpenCL: hb_generat_bin_from_kernel_source: clGetProgramInfo for CL_PROGRAM_NUM_DEVICES failed");
+ return 0;
+ }
+
+ devices = (cl_device_id*)malloc( sizeof(cl_device_id) * numDevices );
+ if( devices == NULL )
+ {
+ hb_log("OpenCL: hb_generat_bin_from_kernel_source: no device found");
+ return 0;
+ }
+
+ /* grab the handles to all of the devices in the program. */
+ status = clGetProgramInfo( program,
+ CL_PROGRAM_DEVICES,
+ sizeof(cl_device_id) * numDevices,
+ devices,
+ NULL );
+ if( status != CL_SUCCESS )
+ {
+ hb_log("OpenCL: hb_generat_bin_from_kernel_source: clGetProgramInfo for CL_PROGRAM_DEVICES failed");
+ return 0;
+ }
+
+ /* figure out the sizes of each of the binaries. */
+ binarySizes = (size_t*)malloc( sizeof(size_t) * numDevices );
+
+ status = clGetProgramInfo( program,
+ CL_PROGRAM_BINARY_SIZES,
+ sizeof(size_t) * numDevices,
+ binarySizes, NULL );
+ if( status != CL_SUCCESS )
+ {
+ hb_log("OpenCL: hb_generat_bin_from_kernel_source: clGetProgramInfo for CL_PROGRAM_BINARY_SIZES failed");
+ return 0;
+ }
+
+ /* copy over all of the generated binaries. */
+ binaries = (char**)malloc( sizeof(char *) * numDevices );
+ if( binaries == NULL )
+ {
+ hb_log("OpenCL: hb_generat_bin_from_kernel_source: malloc for binaries failed");
+ return 0;
+ }
+
+ for( i = 0; i < numDevices; i++ )
+ {
+ if( binarySizes[i] != 0 )
+ {
+ binaries[i] = (char*)malloc( sizeof(char) * binarySizes[i] );
+ if( binaries[i] == NULL )
+ {
+ hb_log("OpenCL: hb_generat_bin_from_kernel_source: malloc for binaries[%d] failed", i);
+ return 0;
+ }
+ }
+ else
+ {
+ binaries[i] = NULL;
+ }
+ }
+
+ status = clGetProgramInfo( program,
+ CL_PROGRAM_BINARIES,
+ sizeof(char *) * numDevices,
+ binaries,
+ NULL );
+ if( status != CL_SUCCESS )
+ {
+ hb_log("OpenCL: hb_generat_bin_from_kernel_source: clGetProgramInfo for CL_PROGRAM_BINARIES failed");
+ return 0;
+ }
+
+ /* dump out each binary into its own separate file. */
+ for (i = 0; i < numDevices; i++)
+ {
+ char fileName[256] = {0};
+ char cl_name[128] = {0};
+ if (binarySizes[i])
+ {
+ char deviceName[1024];
+ status = clGetDeviceInfo(devices[i],
+ CL_DEVICE_NAME,
+ sizeof(deviceName),
+ deviceName,
+ NULL);
+
+ str = (char*)strstr( cl_file_name, (char*)".cl" );
+ memcpy(cl_name, cl_file_name, str - cl_file_name);
+ cl_name[str - cl_file_name] = '\0';
+ sprintf(fileName, "./%s - %s.bin", cl_name, deviceName);
+
+ if (!hb_write_binary_to_file(fileName, binaries[i], binarySizes[i]))
+ {
+ hb_log("OpenCL: hb_generat_bin_from_kernel_source: unable to write kernel, writing to temporary directory instead.");
+ return 0;
+ }
+ }
+ }
+
+ // Release all resouces and memory
+ for( i = 0; i < numDevices; i++ )
+ {
+ if( binaries[i] != NULL )
+ {
+ free( binaries[i] );
+ binaries[i] = NULL;
+ }
+ }
+
+ if( binaries != NULL )
+ {
+ free( binaries );
+ binaries = NULL;
+ }
+
+ if( binarySizes != NULL )
+ {
+ free( binarySizes );
+ binarySizes = NULL;
+ }
+
+ if( devices != NULL )
+ {
+ free( devices );
+ devices = NULL;
+ }
+ return 1;
+}
+
+
+/**
+ * hb_init_opencl_attr
+ * @param env -
+ */
+int hb_init_opencl_attr( OpenCLEnv * env )
+{
+ if( gpu_env.isUserCreated )
+ return 1;
+
+ gpu_env.context = env->context;
+ gpu_env.platform = env->platform;
+ gpu_env.dev = env->devices;
+ gpu_env.command_queue = env->command_queue;
+
+ gpu_env.isUserCreated = 1;
+
+ return 0;
+}
+
+/**
+ * hb_create_kernel
+ * @param kernelname -
+ * @param env -
+ */
+int hb_create_kernel( char * kernelname, KernelEnv * env )
+{
+ int status;
+ env->kernel = clCreateKernel( gpu_env.programs[0], kernelname, &status );
+ env->context = gpu_env.context;
+ env->command_queue = gpu_env.command_queue;
+ return status != CL_SUCCESS ? 1 : 0;
+}
+
+/**
+ * hb_release_kernel
+ * @param env -
+ */
+int hb_release_kernel( KernelEnv * env )
+{
+ int status = clReleaseKernel( env->kernel );
+ return status != CL_SUCCESS ? 1 : 0;
+}
+
+/**
* hb_init_opencl_env
* @param gpu_info -
*/
int hb_init_opencl_env( GPUEnv *gpu_info )
{
size_t length;
- cl_int status;\r
- cl_uint numPlatforms, numDevices;\r
- cl_platform_id *platforms;\r
- cl_context_properties cps[3];\r
- char platformName[100];\r
+ cl_int status;
+ cl_uint numPlatforms, numDevices;
+ cl_platform_id *platforms;
+ cl_context_properties cps[3];
+ char platformName[100];
unsigned int i;
void *handle = INVALID_HANDLE_VALUE;
/*
* Have a look at the available platforms.
*/
- if( !gpu_info->isUserCreated )\r
- {\r
- status = clGetPlatformIDs( 0, NULL, &numPlatforms );\r
- if( status != CL_SUCCESS )\r
- {\r
- hb_log( "OpenCL: OpenCL device platform not found." );\r
- return(1);\r
- }\r
-\r
- gpu_info->platform = NULL;\r
- if( 0 < numPlatforms )\r
- {\r
- platforms = (cl_platform_id*)malloc(\r
- numPlatforms * sizeof(cl_platform_id));\r
- if( platforms == (cl_platform_id*)NULL )\r
- {\r
- return(1);\r
- }\r
- status = clGetPlatformIDs( numPlatforms, platforms, NULL );\r
-\r
- if( status != CL_SUCCESS )\r
- {\r
- hb_log( "OpenCL: Specific opencl platform not found." );\r
- return(1);\r
- }\r
-\r
- for( i = 0; i < numPlatforms; i++ )\r
- {\r
- status = clGetPlatformInfo( platforms[i], CL_PLATFORM_VENDOR,\r
- sizeof(platformName), platformName,\r
- NULL );\r
-\r
- if( status != CL_SUCCESS )\r
- {\r
- continue;\r
- }\r
- gpu_info->platform = platforms[i];\r
-\r
- if (!strcmp(platformName, "Advanced Micro Devices, Inc.") ||\r
- !strcmp(platformName, "AMD"))\r
- gpu_info->vendor = AMD;\r
- else \r
- gpu_info->vendor = others;\r
- \r
- gpu_info->platform = platforms[i];\r
-\r
- status = clGetDeviceIDs( gpu_info->platform /* platform */,\r
- CL_DEVICE_TYPE_GPU /* device_type */,\r
- 0 /* num_entries */,\r
- NULL /* devices */,\r
- &numDevices );\r
-\r
- if( status != CL_SUCCESS )\r
- {\r
- continue;\r
- }\r
-\r
- if( numDevices )\r
- break;\r
- \r
- }\r
- free( platforms );\r
- }\r
-\r
- if( NULL == gpu_info->platform )\r
- {\r
- hb_log( "OpenCL: No OpenCL-compatible GPU found." );\r
- return(1);\r
- }\r
-\r
- if( status != CL_SUCCESS )\r
- {\r
- hb_log( "OpenCL: No OpenCL-compatible GPU found." );\r
- return(1);\r
- }\r
-\r
- /*\r
- * Use available platform.\r
- */\r
- cps[0] = CL_CONTEXT_PLATFORM;\r
- cps[1] = (cl_context_properties)gpu_info->platform;\r
- cps[2] = 0;\r
- /* Check for GPU. */\r
- gpu_info->dType = CL_DEVICE_TYPE_GPU;\r
- gpu_info->context = clCreateContextFromType(\r
- cps, gpu_info->dType, NULL, NULL, &status );\r
-\r
- if( (gpu_info->context == (cl_context)NULL) || (status != CL_SUCCESS) )\r
- {\r
- gpu_info->dType = CL_DEVICE_TYPE_CPU;\r
- gpu_info->context = clCreateContextFromType(\r
- cps, gpu_info->dType, NULL, NULL, &status );\r
- }\r
-\r
- if( (gpu_info->context == (cl_context)NULL) || (status != CL_SUCCESS) )\r
- {\r
- gpu_info->dType = CL_DEVICE_TYPE_DEFAULT;\r
- gpu_info->context = clCreateContextFromType(\r
- cps, gpu_info->dType, NULL, NULL, &status );\r
- }\r
-\r
- if( (gpu_info->context == (cl_context)NULL) || (status != CL_SUCCESS) )\r
- {\r
- hb_log( "OpenCL: Unable to create opencl context." );\r
- return(1);\r
- }\r
-\r
- /* Detect OpenCL devices. */\r
- /* First, get the size of device list data */\r
- status = clGetContextInfo( gpu_info->context, CL_CONTEXT_DEVICES,\r
- 0, NULL, &length );\r
- if((status != CL_SUCCESS) || (length == 0))\r
- {\r
- hb_log( "OpenCL: Unable to get the list of devices in context." );\r
- return(1);\r
- }\r
-\r
- /* Now allocate memory for device list based on the size we got earlier */\r
- gpu_info->devices = (cl_device_id*)malloc( length );\r
- if( gpu_info->devices == (cl_device_id*)NULL )\r
- {\r
- return(1);\r
- }\r
-\r
- /* Now, get the device list data */\r
- status = clGetContextInfo( gpu_info->context, CL_CONTEXT_DEVICES, length,\r
- gpu_info->devices, NULL );\r
- if( status != CL_SUCCESS )\r
- {\r
- hb_log( "OpenCL: Unable to get the device list data in context." );\r
- return(1);\r
- }\r
-\r
- /* Create OpenCL command queue. */\r
- gpu_info->command_queue = clCreateCommandQueue( gpu_info->context,\r
- gpu_info->devices[0],\r
- 0, &status );\r
- if( status != CL_SUCCESS )\r
- {\r
- hb_log( "OpenCL: Unable to create opencl command queue." );\r
- return(1);\r
- }\r
- }\r
-\r
- if( clGetCommandQueueInfo( gpu_info->command_queue,\r
- CL_QUEUE_THREAD_HANDLE_AMD, sizeof(handle),\r
- &handle, NULL ) == CL_SUCCESS && handle != INVALID_HANDLE_VALUE )\r
- {\r
-#ifdef SYS_MINGW \r
- SetThreadPriority( handle, THREAD_PRIORITY_TIME_CRITICAL );\r
-#endif\r
- }\r
-\r
- return 0;\r
-}\r
-\r
-\r
-/**\r
- * hb_release_opencl_env\r
- * @param gpu_info -\r
- */\r
-int hb_release_opencl_env( GPUEnv *gpu_info )\r
-{\r
- if( !isInited )\r
- return 1;\r
- int i;\r
-\r
- for( i = 0; i<gpu_env.file_count; i++ )\r
- {\r
- if( gpu_env.programs[i] ) ;\r
- {\r
- clReleaseProgram( gpu_env.programs[i] );\r
- gpu_env.programs[i] = NULL;\r
- }\r
- }\r
-\r
- if( gpu_env.command_queue )\r
- {\r
- clReleaseCommandQueue( gpu_env.command_queue );\r
- gpu_env.command_queue = NULL;\r
- }\r
-\r
- if( gpu_env.context )\r
- {\r
- clReleaseContext( gpu_env.context );\r
- gpu_env.context = NULL;\r
- }\r
-\r
- isInited = 0;\r
- gpu_info->isUserCreated = 0;\r
- return 1;\r
-}\r
-\r
-\r
-/**\r
- * hb_register_kernel_wrapper\r
- * @param kernel_name -\r
- * @param function -\r
- */\r
-int hb_register_kernel_wrapper( const char *kernel_name, cl_kernel_function function )\r
-{\r
- int i;\r
- for( i = 0; i < gpu_env.kernel_count; i++ )\r
- {\r
- if( strcasecmp( kernel_name, gpu_env.kernel_names[i] ) == 0 )\r
- {\r
- gpu_env.kernel_functions[i] = function;\r
- return(1);\r
- }\r
- }\r
- return(0);\r
-}\r
-\r
-/**\r
- * hb_cached_of_kerner_prg\r
- * @param gpu_env -\r
- * @param cl_file_name -\r
- */\r
-int hb_cached_of_kerner_prg( const GPUEnv *gpu_env, const char * cl_file_name )\r
-{\r
- int i;\r
- for( i = 0; i < gpu_env->file_count; i++ )\r
- {\r
- if( strcasecmp( gpu_env->kernelSrcFile[i], cl_file_name ) == 0 )\r
- {\r
- if( gpu_env->programs[i] != NULL )\r
- return(1);\r
- }\r
- }\r
-\r
- return(0);\r
-}\r
-\r
-/**\r
- * hb_compile_kernel_file\r
- * @param filename -\r
- * @param gpu_info -\r
- * @param indx -\r
- * @param build_option -\r
- */\r
-int hb_compile_kernel_file( const char *filename, GPUEnv *gpu_info,\r
- int indx, const char *build_option )\r
-{\r
- cl_int status;\r
- size_t length;\r
- char *source_str;\r
- const char *source;\r
- size_t source_size[1];\r
- char *buildLog = NULL;\r
- int b_error, binary_status, binaryExisted;\r
- char * binary;\r
- cl_uint numDevices;\r
- cl_device_id *devices;\r
- FILE * fd;\r
- FILE * fd1;\r
- int idx;\r
-\r
- if( hb_cached_of_kerner_prg( gpu_info, filename ) == 1 )\r
- return (1);\r
-\r
- idx = gpu_info->file_count;\r
-\r
-#ifdef USE_EXTERNAL_KERNEL\r
- status = hb_convert_to_string( filename, &source_str, gpu_info, idx );\r
+ if( !gpu_info->isUserCreated )
+ {
+ status = clGetPlatformIDs( 0, NULL, &numPlatforms );
+ if( status != CL_SUCCESS )
+ {
+ hb_log( "OpenCL: OpenCL device platform not found." );
+ return(1);
+ }
+
+ gpu_info->platform = NULL;
+ if( 0 < numPlatforms )
+ {
+ platforms = (cl_platform_id*)malloc(
+ numPlatforms * sizeof(cl_platform_id));
+ if( platforms == (cl_platform_id*)NULL )
+ {
+ return(1);
+ }
+ status = clGetPlatformIDs( numPlatforms, platforms, NULL );
+
+ if( status != CL_SUCCESS )
+ {
+ hb_log( "OpenCL: Specific opencl platform not found." );
+ return(1);
+ }
+
+ for( i = 0; i < numPlatforms; i++ )
+ {
+ status = clGetPlatformInfo( platforms[i], CL_PLATFORM_VENDOR,
+ sizeof(platformName), platformName,
+ NULL );
+
+ if( status != CL_SUCCESS )
+ {
+ continue;
+ }
+ gpu_info->platform = platforms[i];
+
+ if (!strcmp(platformName, "Advanced Micro Devices, Inc.") ||
+ !strcmp(platformName, "AMD"))
+ gpu_info->vendor = AMD;
+ else
+ gpu_info->vendor = others;
+
+ gpu_info->platform = platforms[i];
+
+ status = clGetDeviceIDs( gpu_info->platform /* platform */,
+ CL_DEVICE_TYPE_GPU /* device_type */,
+ 0 /* num_entries */,
+ NULL /* devices */,
+ &numDevices );
+
+ if( status != CL_SUCCESS )
+ {
+ continue;
+ }
+
+ if( numDevices )
+ break;
+
+ }
+ free( platforms );
+ }
+
+ if( NULL == gpu_info->platform )
+ {
+ hb_log( "OpenCL: No OpenCL-compatible GPU found." );
+ return(1);
+ }
+
+ if( status != CL_SUCCESS )
+ {
+ hb_log( "OpenCL: No OpenCL-compatible GPU found." );
+ return(1);
+ }
+
+ /*
+ * Use available platform.
+ */
+ cps[0] = CL_CONTEXT_PLATFORM;
+ cps[1] = (cl_context_properties)gpu_info->platform;
+ cps[2] = 0;
+ /* Check for GPU. */
+ gpu_info->dType = CL_DEVICE_TYPE_GPU;
+ gpu_info->context = clCreateContextFromType(
+ cps, gpu_info->dType, NULL, NULL, &status );
+
+ if( (gpu_info->context == (cl_context)NULL) || (status != CL_SUCCESS) )
+ {
+ gpu_info->dType = CL_DEVICE_TYPE_CPU;
+ gpu_info->context = clCreateContextFromType(
+ cps, gpu_info->dType, NULL, NULL, &status );
+ }
+
+ if( (gpu_info->context == (cl_context)NULL) || (status != CL_SUCCESS) )
+ {
+ gpu_info->dType = CL_DEVICE_TYPE_DEFAULT;
+ gpu_info->context = clCreateContextFromType(
+ cps, gpu_info->dType, NULL, NULL, &status );
+ }
+
+ if( (gpu_info->context == (cl_context)NULL) || (status != CL_SUCCESS) )
+ {
+ hb_log( "OpenCL: Unable to create opencl context." );
+ return(1);
+ }
+
+ /* Detect OpenCL devices. */
+ /* First, get the size of device list data */
+ status = clGetContextInfo( gpu_info->context, CL_CONTEXT_DEVICES,
+ 0, NULL, &length );
+ if((status != CL_SUCCESS) || (length == 0))
+ {
+ hb_log( "OpenCL: Unable to get the list of devices in context." );
+ return(1);
+ }
+
+ /* Now allocate memory for device list based on the size we got earlier */
+ gpu_info->devices = (cl_device_id*)malloc( length );
+ if( gpu_info->devices == (cl_device_id*)NULL )
+ {
+ return(1);
+ }
+
+ /* Now, get the device list data */
+ status = clGetContextInfo( gpu_info->context, CL_CONTEXT_DEVICES, length,
+ gpu_info->devices, NULL );
+ if( status != CL_SUCCESS )
+ {
+ hb_log( "OpenCL: Unable to get the device list data in context." );
+ return(1);
+ }
+
+ /* Create OpenCL command queue. */
+ gpu_info->command_queue = clCreateCommandQueue( gpu_info->context,
+ gpu_info->devices[0],
+ 0, &status );
+ if( status != CL_SUCCESS )
+ {
+ hb_log( "OpenCL: Unable to create opencl command queue." );
+ return(1);
+ }
+ }
+
+ if( clGetCommandQueueInfo( gpu_info->command_queue,
+ CL_QUEUE_THREAD_HANDLE_AMD, sizeof(handle),
+ &handle, NULL ) == CL_SUCCESS && handle != INVALID_HANDLE_VALUE )
+ {
+#ifdef SYS_MINGW
+ SetThreadPriority( handle, THREAD_PRIORITY_TIME_CRITICAL );
+#endif
+ }
+
+ return 0;
+}
+
+
+/**
+ * hb_release_opencl_env
+ * @param gpu_info -
+ */
+int hb_release_opencl_env( GPUEnv *gpu_info )
+{
+ if( !isInited )
+ return 1;
+ int i;
+
+ for( i = 0; i<gpu_env.file_count; i++ )
+ {
+ if( gpu_env.programs[i] ) ;
+ {
+ clReleaseProgram( gpu_env.programs[i] );
+ gpu_env.programs[i] = NULL;
+ }
+ }
+
+ if( gpu_env.command_queue )
+ {
+ clReleaseCommandQueue( gpu_env.command_queue );
+ gpu_env.command_queue = NULL;
+ }
+
+ if( gpu_env.context )
+ {
+ clReleaseContext( gpu_env.context );
+ gpu_env.context = NULL;
+ }
+
+ isInited = 0;
+ gpu_info->isUserCreated = 0;
+ return 1;
+}
+
+
+/**
+ * hb_register_kernel_wrapper
+ * @param kernel_name -
+ * @param function -
+ */
+int hb_register_kernel_wrapper( const char *kernel_name, cl_kernel_function function )
+{
+ int i;
+ for( i = 0; i < gpu_env.kernel_count; i++ )
+ {
+ if( strcasecmp( kernel_name, gpu_env.kernel_names[i] ) == 0 )
+ {
+ gpu_env.kernel_functions[i] = function;
+ return(1);
+ }
+ }
+ return(0);
+}
+
+/**
+ * hb_cached_of_kerner_prg
+ * @param gpu_env -
+ * @param cl_file_name -
+ */
+int hb_cached_of_kerner_prg( const GPUEnv *gpu_env, const char * cl_file_name )
+{
+ int i;
+ for( i = 0; i < gpu_env->file_count; i++ )
+ {
+ if( strcasecmp( gpu_env->kernelSrcFile[i], cl_file_name ) == 0 )
+ {
+ if( gpu_env->programs[i] != NULL )
+ return(1);
+ }
+ }
+
+ return(0);
+}
+
+/**
+ * hb_compile_kernel_file
+ * @param filename -
+ * @param gpu_info -
+ * @param indx -
+ * @param build_option -
+ */
+int hb_compile_kernel_file( const char *filename, GPUEnv *gpu_info,
+ int indx, const char *build_option )
+{
+ cl_int status;
+ size_t length;
+ char *source_str;
+ const char *source;
+ size_t source_size[1];
+ char *buildLog = NULL;
+ int b_error, binary_status, binaryExisted;
+ char * binary;
+ cl_uint numDevices;
+ cl_device_id *devices;
+ FILE * fd;
+ FILE * fd1;
+ int idx;
+
+ if( hb_cached_of_kerner_prg( gpu_info, filename ) == 1 )
+ return (1);
+
+ idx = gpu_info->file_count;
+
+#ifdef USE_EXTERNAL_KERNEL
+ status = hb_convert_to_string( filename, &source_str, gpu_info, idx );
if( status == 0 )
return(0);
#else
strcat( source_str, kernel_src_yadif_filter );
#endif
- source = source_str;\r
- source_size[0] = strlen( source );\r
-\r
- if ((binaryExisted = hb_binary_generated(gpu_info->context, filename, &fd)) == 1)\r
- {\r
- status = clGetContextInfo(gpu_info->context,\r
- CL_CONTEXT_NUM_DEVICES,\r
- sizeof(numDevices),\r
- &numDevices,\r
- NULL);\r
- if (status != CL_SUCCESS)\r
- {\r
- hb_log("OpenCL: Unable to get the number of devices in context.");\r
- return 0;\r
- }\r
-\r
- devices = (cl_device_id*)malloc(sizeof(cl_device_id) * numDevices);\r
- if (devices == NULL)\r
- return 0;\r
-\r
- length = 0;\r
- b_error = 0;\r
- b_error |= fseek(fd, 0, SEEK_END) < 0;\r
- b_error |= (length = ftell(fd)) <= 0;\r
- b_error |= fseek(fd, 0, SEEK_SET) < 0;\r
- if (b_error)\r
- return 0;\r
-\r
- binary = (char*)calloc(length + 2, sizeof(char));\r
- if (binary == NULL)\r
- return 0;\r
-\r
- b_error |= fread(binary, 1, length, fd) != length;\r
-#if 0 // this doesn't work under OS X and/or with some non-AMD GPUs\r
- if (binary[length-1] != '\n')\r
- binary[length++] = '\n;\r
-#endif\r
-\r
- if (b_error)\r
- return 0;\r
-\r
- /* grab the handles to all of the devices in the context. */\r
- status = clGetContextInfo(gpu_info->context,\r
- CL_CONTEXT_DEVICES,\r
- sizeof(cl_device_id) * numDevices,\r
- devices,\r
- NULL);\r
-\r
- gpu_info->programs[idx] = clCreateProgramWithBinary(gpu_info->context,\r
- numDevices,\r
- devices,\r
- &length,\r
- (const unsigned char**)&binary,\r
- &binary_status,\r
- &status);\r
-\r
- fclose(fd);\r
- free(devices);\r
- fd = NULL;\r
- devices = NULL;\r
- }\r
- else\r
- {\r
- /* create a CL program using the kernel source */\r
- gpu_info->programs[idx] = clCreateProgramWithSource(\r
- gpu_info->context, 1, &source, source_size, &status );\r
- }\r
-\r
- if((gpu_info->programs[idx] == (cl_program)NULL) || (status != CL_SUCCESS)){\r
- hb_log( "OpenCL: Unable to get list of devices in context." );\r
- return(0);\r
- }\r
-\r
- /* create a cl program executable for all the devices specified */\r
- if( !gpu_info->isUserCreated ) \r
- {\r
- status = clBuildProgram( gpu_info->programs[idx], 1, gpu_info->devices,\r
- build_option, NULL, NULL );\r
- }\r
- else\r
- {\r
- status = clBuildProgram( gpu_info->programs[idx], 1, &(gpu_info->dev),\r
- build_option, NULL, NULL );\r
- }\r
-\r
- if( status != CL_SUCCESS )\r
- {\r
- if( !gpu_info->isUserCreated ) \r
- {\r
- status = clGetProgramBuildInfo( gpu_info->programs[idx],\r
- gpu_info->devices[0],\r
- CL_PROGRAM_BUILD_LOG, 0, NULL, &length );\r
- }\r
- else\r
- {\r
- status = clGetProgramBuildInfo( gpu_info->programs[idx],\r
- gpu_info->dev,\r
- CL_PROGRAM_BUILD_LOG, 0, NULL, &length );\r
- }\r
-\r
- if( status != CL_SUCCESS )\r
- {\r
- hb_log( "OpenCL: Unable to get GPU build information." );\r
- return(0);\r
- }\r
-\r
- buildLog = (char*)malloc( length );\r
- if( buildLog == (char*)NULL )\r
- {\r
- return(0);\r
- }\r
-\r
- if( !gpu_info->isUserCreated )\r
- {\r
- status = clGetProgramBuildInfo( gpu_info->programs[idx], gpu_info->devices[0],\r
- CL_PROGRAM_BUILD_LOG, length, buildLog, &length );\r
- }\r
- else\r
- {\r
- status = clGetProgramBuildInfo( gpu_info->programs[idx], gpu_info->dev,\r
- CL_PROGRAM_BUILD_LOG, length, buildLog, &length );\r
- }\r
-\r
- fd1 = fopen( "kernel-build.log", "w+" );\r
- if( fd1 != NULL ) {\r
- fwrite( buildLog, sizeof(char), length, fd1 );\r
- fclose( fd1 );\r
- }\r
-\r
- free( buildLog );\r
- return(0);\r
- }\r
-\r
- strcpy( gpu_env.kernelSrcFile[idx], filename );\r
+ source = source_str;
+ source_size[0] = strlen( source );
+
+ if ((binaryExisted = hb_binary_generated(gpu_info->context, filename, &fd)) == 1)
+ {
+ status = clGetContextInfo(gpu_info->context,
+ CL_CONTEXT_NUM_DEVICES,
+ sizeof(numDevices),
+ &numDevices,
+ NULL);
+ if (status != CL_SUCCESS)
+ {
+ hb_log("OpenCL: Unable to get the number of devices in context.");
+ return 0;
+ }
+
+ devices = (cl_device_id*)malloc(sizeof(cl_device_id) * numDevices);
+ if (devices == NULL)
+ return 0;
+
+ length = 0;
+ b_error = 0;
+ b_error |= fseek(fd, 0, SEEK_END) < 0;
+ b_error |= (length = ftell(fd)) <= 0;
+ b_error |= fseek(fd, 0, SEEK_SET) < 0;
+ if (b_error)
+ return 0;
+
+ binary = (char*)calloc(length + 2, sizeof(char));
+ if (binary == NULL)
+ return 0;
+
+ b_error |= fread(binary, 1, length, fd) != length;
+#if 0 // this doesn't work under OS X and/or with some non-AMD GPUs
+ if (binary[length-1] != '\n')
+ binary[length++] = '\n';
+#endif
+
+ if (b_error)
+ return 0;
+
+ /* grab the handles to all of the devices in the context. */
+ status = clGetContextInfo(gpu_info->context,
+ CL_CONTEXT_DEVICES,
+ sizeof(cl_device_id) * numDevices,
+ devices,
+ NULL);
+
+ gpu_info->programs[idx] = clCreateProgramWithBinary(gpu_info->context,
+ numDevices,
+ devices,
+ &length,
+ (const unsigned char**)&binary,
+ &binary_status,
+ &status);
+
+ fclose(fd);
+ free(devices);
+ fd = NULL;
+ devices = NULL;
+ }
+ else
+ {
+ /* create a CL program using the kernel source */
+ gpu_info->programs[idx] = clCreateProgramWithSource(
+ gpu_info->context, 1, &source, source_size, &status );
+ }
+
+ if((gpu_info->programs[idx] == (cl_program)NULL) || (status != CL_SUCCESS)){
+ hb_log( "OpenCL: Unable to get list of devices in context." );
+ return(0);
+ }
+
+ /* create a cl program executable for all the devices specified */
+ if( !gpu_info->isUserCreated )
+ {
+ status = clBuildProgram( gpu_info->programs[idx], 1, gpu_info->devices,
+ build_option, NULL, NULL );
+ }
+ else
+ {
+ status = clBuildProgram( gpu_info->programs[idx], 1, &(gpu_info->dev),
+ build_option, NULL, NULL );
+ }
+
+ if( status != CL_SUCCESS )
+ {
+ if( !gpu_info->isUserCreated )
+ {
+ status = clGetProgramBuildInfo( gpu_info->programs[idx],
+ gpu_info->devices[0],
+ CL_PROGRAM_BUILD_LOG, 0, NULL, &length );
+ }
+ else
+ {
+ status = clGetProgramBuildInfo( gpu_info->programs[idx],
+ gpu_info->dev,
+ CL_PROGRAM_BUILD_LOG, 0, NULL, &length );
+ }
+
+ if( status != CL_SUCCESS )
+ {
+ hb_log( "OpenCL: Unable to get GPU build information." );
+ return(0);
+ }
+
+ buildLog = (char*)malloc( length );
+ if( buildLog == (char*)NULL )
+ {
+ return(0);
+ }
+
+ if( !gpu_info->isUserCreated )
+ {
+ status = clGetProgramBuildInfo( gpu_info->programs[idx], gpu_info->devices[0],
+ CL_PROGRAM_BUILD_LOG, length, buildLog, &length );
+ }
+ else
+ {
+ status = clGetProgramBuildInfo( gpu_info->programs[idx], gpu_info->dev,
+ CL_PROGRAM_BUILD_LOG, length, buildLog, &length );
+ }
+
+ fd1 = fopen( "kernel-build.log", "w+" );
+ if( fd1 != NULL ) {
+ fwrite( buildLog, sizeof(char), length, fd1 );
+ fclose( fd1 );
+ }
+
+ free( buildLog );
+ return(0);
+ }
+
+ strcpy( gpu_env.kernelSrcFile[idx], filename );
if (binaryExisted != 1)
{
}
gpu_info->file_count += 1;
-\r
- return(1);\r
-}\r
-\r
-\r
-/**\r
- * hb_get_kernel_env_and_func\r
- * @param kernel_name -\r
- * @param env -\r
- * @param function -\r
- */\r
-int hb_get_kernel_env_and_func( const char *kernel_name,\r
- KernelEnv *env,\r
- cl_kernel_function *function )\r
-{\r
- int i;\r
- for( i = 0; i < gpu_env.kernel_count; i++ )\r
- {\r
- if( strcasecmp( kernel_name, gpu_env.kernel_names[i] ) == 0 )\r
- {\r
- env->context = gpu_env.context;\r
- env->command_queue = gpu_env.command_queue;\r
- env->program = gpu_env.programs[0];\r
- env->kernel = gpu_env.kernels[i];\r
- env->isAMD = ( gpu_env.vendor == AMD ) ? 1 : 0;\r
- *function = gpu_env.kernel_functions[i];\r
- return(1);\r
- }\r
- }\r
- return(0);\r
-}\r
-\r
-/**\r
- * hb_get_kernel_env_and_func\r
- * @param kernel_name -\r
- * @param userdata -\r
- */\r
-int hb_run_kernel( const char *kernel_name, void **userdata )\r
-{\r
- KernelEnv env;\r
- cl_kernel_function function;\r
- int status;\r
- memset( &env, 0, sizeof(KernelEnv));\r
- status = hb_get_kernel_env_and_func( kernel_name, &env, &function );\r
- strcpy( env.kernel_name, kernel_name );\r
- if( status == 1 ) \r
- {\r
- return(function( userdata, &env ));\r
- }\r
-\r
- return(0);\r
-}\r
-\r
-/**\r
- * hb_init_opencl_run_env\r
- * @param argc -\r
- * @param argv -\r
- * @param build_option -\r
- */\r
-int hb_init_opencl_run_env( int argc, char **argv, const char *build_option )\r
-{\r
- int status = 0;\r
- if( MAX_CLKERNEL_NUM <= 0 )\r
- {\r
- return 1;\r
- }\r
-\r
- if((argc > MAX_CLFILE_NUM) || (argc<0))\r
- {\r
- return 1;\r
- }\r
-\r
- if( !isInited )\r
- {\r
- hb_regist_opencl_kernel();\r
-\r
- /*initialize devices, context, comand_queue*/\r
- status = hb_init_opencl_env( &gpu_env );\r
- if( status )\r
- return(1);\r
-\r
- /*initialize program, kernel_name, kernel_count*/\r
- status = hb_compile_kernel_file("hb-opencl-kernels.cl",\r
- &gpu_env, 0, build_option);\r
-\r
- if( status == 0 || gpu_env.kernel_count == 0 )\r
- {\r
- return(1);\r
+
+ return(1);
+}
+
+
+/**
+ * hb_get_kernel_env_and_func
+ * @param kernel_name -
+ * @param env -
+ * @param function -
+ */
+int hb_get_kernel_env_and_func( const char *kernel_name,
+ KernelEnv *env,
+ cl_kernel_function *function )
+{
+ int i;
+ for( i = 0; i < gpu_env.kernel_count; i++ )
+ {
+ if( strcasecmp( kernel_name, gpu_env.kernel_names[i] ) == 0 )
+ {
+ env->context = gpu_env.context;
+ env->command_queue = gpu_env.command_queue;
+ env->program = gpu_env.programs[0];
+ env->kernel = gpu_env.kernels[i];
+ env->isAMD = ( gpu_env.vendor == AMD ) ? 1 : 0;
+ *function = gpu_env.kernel_functions[i];
+ return(1);
+ }
+ }
+ return(0);
+}
+
+/**
+ * hb_get_kernel_env_and_func
+ * @param kernel_name -
+ * @param userdata -
+ */
+int hb_run_kernel( const char *kernel_name, void **userdata )
+{
+ KernelEnv env;
+ cl_kernel_function function;
+ int status;
+ memset( &env, 0, sizeof(KernelEnv));
+ status = hb_get_kernel_env_and_func( kernel_name, &env, &function );
+ strcpy( env.kernel_name, kernel_name );
+ if( status == 1 )
+ {
+ return(function( userdata, &env ));
+ }
+
+ return(0);
+}
+
+/**
+ * hb_init_opencl_run_env
+ * @param argc -
+ * @param argv -
+ * @param build_option -
+ */
+int hb_init_opencl_run_env( int argc, char **argv, const char *build_option )
+{
+ int status = 0;
+ if( MAX_CLKERNEL_NUM <= 0 )
+ {
+ return 1;
+ }
+
+ if((argc > MAX_CLFILE_NUM) || (argc<0))
+ {
+ return 1;
+ }
+
+ if( !isInited )
+ {
+ hb_regist_opencl_kernel();
+
+ /*initialize devices, context, comand_queue*/
+ status = hb_init_opencl_env( &gpu_env );
+ if( status )
+ return(1);
+
+ /*initialize program, kernel_name, kernel_count*/
+ status = hb_compile_kernel_file("hb-opencl-kernels.cl",
+ &gpu_env, 0, build_option);
+
+ if( status == 0 || gpu_env.kernel_count == 0 )
+ {
+ return(1);
}
isInited = 1;
}
- return(0);\r
-}\r
-\r
-/**\r
- * hb_release_opencl_run_env\r
- */\r
-int hb_release_opencl_run_env()\r
-{\r
- return hb_release_opencl_env( &gpu_env );\r
-}\r
-\r
-/**\r
- * hb_opencl_stats\r
- */\r
-int hb_opencl_stats()\r
-{\r
- return isInited;\r
-}\r
-\r
-/**\r
- * hb_get_opencl_env\r
- */\r
-int hb_get_opencl_env()\r
-{\r
- int i = 0;\r
- cl_int status;\r
- cl_uint numDevices;\r
- cl_device_id *devices;\r
-\r
- /*initialize devices, context, comand_queue*/\r
- status = hb_init_opencl_env( &gpu_env );\r
- if( status )\r
- return(1);\r
- status = clGetContextInfo( gpu_env.context,\r
- CL_CONTEXT_NUM_DEVICES,\r
- sizeof(numDevices),\r
- &numDevices,\r
- NULL );\r
- if( status != CL_SUCCESS )\r
- return 0;\r
-\r
- devices = (cl_device_id*)malloc( sizeof(cl_device_id) * numDevices );\r
- if( devices == NULL )\r
- return 0;\r
-\r
- /* grab the handles to all of the devices in the context. */\r
- status = clGetContextInfo( gpu_env.context,\r
- CL_CONTEXT_DEVICES,\r
- sizeof(cl_device_id) * numDevices,\r
- devices,\r
- NULL );\r
-\r
- for (i = 0; i < numDevices; i++)\r
- {\r
- if (devices[i] != NULL)\r
- {\r
- char deviceVendor[100], deviceName[1024], driverVersion[1024];\r
- clGetDeviceInfo(devices[i], CL_DEVICE_VENDOR, sizeof(deviceVendor),\r
- deviceVendor, NULL);\r
- clGetDeviceInfo(devices[i], CL_DEVICE_NAME, sizeof(deviceName),\r
- deviceName, NULL);\r
- clGetDeviceInfo(devices[i], CL_DRIVER_VERSION, sizeof(driverVersion),\r
- driverVersion, NULL);\r
- hb_log("hb_get_opencl_env: GPU #%d, Device Vendor: %s", i + 1, deviceVendor);\r
- hb_log("hb_get_opencl_env: GPU #%d, Device Name: %s", i + 1, deviceName);\r
- hb_log("hb_get_opencl_env: GPU #%d, Driver Version: %s", i + 1, driverVersion);\r
- }\r
- }\r
-\r
- if( devices != NULL )\r
- {\r
- free( devices );\r
- devices = NULL;\r
- }\r
-\r
- return status;\r
-}\r
-\r
-/**\r
- * hb_create_buffer\r
- * @param cl_inBuf -\r
- * @param flags -\r
- * @param size -\r
- */\r
-int hb_create_buffer( cl_mem *cl_Buf, int flags, int size )\r
-{\r
- int status;\r
- *cl_Buf = clCreateBuffer( gpu_env.context, (flags), (size), NULL, &status );\r
- \r
- if( status != CL_SUCCESS )\r
- { \r
- hb_log( "OpenCL: clCreateBuffer error '%d'", status );\r
- return 0; \r
- }\r
-\r
- return 1;\r
-}\r
-\r
-\r
-/**\r
- * hb_read_opencl_buffer\r
- * @param cl_inBuf -\r
- * @param outbuf -\r
- * @param size -\r
- */\r
-int hb_read_opencl_buffer( cl_mem cl_inBuf, unsigned char *outbuf, int size )\r
-{\r
- int status;\r
-\r
- status = clEnqueueReadBuffer( gpu_env.command_queue, cl_inBuf, CL_TRUE, 0, size, outbuf, 0, 0, 0 );\r
- if( status != CL_SUCCESS )\r
- { \r
- hb_log( "OpenCL: av_read_opencl_buffer error '%d'", status );\r
- return 0; \r
- }\r
-\r
+ return(0);
+}
+
+/**
+ * hb_release_opencl_run_env
+ */
+int hb_release_opencl_run_env()
+{
+ return hb_release_opencl_env( &gpu_env );
+}
+
+/**
+ * hb_opencl_stats
+ */
+int hb_opencl_stats()
+{
+ return isInited;
+}
+
+/**
+ * hb_get_opencl_env
+ */
+int hb_get_opencl_env()
+{
+ int i = 0;
+ cl_int status;
+ cl_uint numDevices;
+ cl_device_id *devices;
+
+ /*initialize devices, context, comand_queue*/
+ status = hb_init_opencl_env( &gpu_env );
+ if( status )
+ return(1);
+ status = clGetContextInfo( gpu_env.context,
+ CL_CONTEXT_NUM_DEVICES,
+ sizeof(numDevices),
+ &numDevices,
+ NULL );
+ if( status != CL_SUCCESS )
+ return 0;
+
+ devices = (cl_device_id*)malloc( sizeof(cl_device_id) * numDevices );
+ if( devices == NULL )
+ return 0;
+
+ /* grab the handles to all of the devices in the context. */
+ status = clGetContextInfo( gpu_env.context,
+ CL_CONTEXT_DEVICES,
+ sizeof(cl_device_id) * numDevices,
+ devices,
+ NULL );
+
+ for (i = 0; i < numDevices; i++)
+ {
+ if (devices[i] != NULL)
+ {
+ char deviceVendor[100], deviceName[1024], driverVersion[1024];
+ clGetDeviceInfo(devices[i], CL_DEVICE_VENDOR, sizeof(deviceVendor),
+ deviceVendor, NULL);
+ clGetDeviceInfo(devices[i], CL_DEVICE_NAME, sizeof(deviceName),
+ deviceName, NULL);
+ clGetDeviceInfo(devices[i], CL_DRIVER_VERSION, sizeof(driverVersion),
+ driverVersion, NULL);
+ hb_log("hb_get_opencl_env: GPU #%d, Device Vendor: %s", i + 1, deviceVendor);
+ hb_log("hb_get_opencl_env: GPU #%d, Device Name: %s", i + 1, deviceName);
+ hb_log("hb_get_opencl_env: GPU #%d, Driver Version: %s", i + 1, driverVersion);
+ }
+ }
+
+ if( devices != NULL )
+ {
+ free( devices );
+ devices = NULL;
+ }
+
+ return status;
+}
+
+/**
+ * hb_create_buffer
+ * @param cl_inBuf -
+ * @param flags -
+ * @param size -
+ */
+int hb_create_buffer( cl_mem *cl_Buf, int flags, int size )
+{
+ int status;
+ *cl_Buf = clCreateBuffer( gpu_env.context, (flags), (size), NULL, &status );
+
+ if( status != CL_SUCCESS )
+ {
+ hb_log( "OpenCL: clCreateBuffer error '%d'", status );
+ return 0;
+ }
+
+ return 1;
+}
+
+
+/**
+ * hb_read_opencl_buffer
+ * @param cl_inBuf -
+ * @param outbuf -
+ * @param size -
+ */
+int hb_read_opencl_buffer( cl_mem cl_inBuf, unsigned char *outbuf, int size )
+{
+ int status;
+
+ status = clEnqueueReadBuffer( gpu_env.command_queue, cl_inBuf, CL_TRUE, 0, size, outbuf, 0, 0, 0 );
+ if( status != CL_SUCCESS )
+ {
+ hb_log( "OpenCL: av_read_opencl_buffer error '%d'", status );
+ return 0;
+ }
+
return 1;
}
int hb_copy_buffer(cl_mem src_buffer,cl_mem dst_buffer,size_t src_offset,size_t dst_offset,size_t cb)
{
int status = clEnqueueCopyBuffer(gpu_env.command_queue,
- src_buffer,\r
- dst_buffer,\r
- src_offset, dst_offset, cb,\r
- 0, 0, 0);\r
- if( status != CL_SUCCESS )\r
- { \r
- av_log(NULL,AV_LOG_ERROR, "hb_read_opencl_buffer error '%d'\n", status ); \r
- return 0; \r
- }\r
- return 1;\r
-}\r
-\r
-int hb_read_opencl_frame_buffer(cl_mem cl_inBuf,unsigned char *Ybuf,unsigned char *Ubuf,unsigned char *Vbuf,int linesize0,int linesize1,int linesize2,int height)\r
-{\r
-\r
- int chrH = -(-height >> 1);\r
- unsigned char *temp = (unsigned char *)av_malloc(sizeof(uint8_t) * (linesize0 * height + linesize1 * chrH * 2));\r
- if(hb_read_opencl_buffer(cl_inBuf,temp,sizeof(uint8_t)*(linesize0 + linesize1)*height))\r
- {\r
- memcpy(Ybuf,temp,linesize0 * height);\r
- memcpy(Ubuf,temp + linesize0 * height,linesize1 *chrH);\r
- memcpy(Vbuf,temp + linesize0 * height + linesize1 * chrH,linesize2 * chrH);\r
- \r
- }\r
- av_free(temp);\r
-\r
- return 1;\r
-}\r
-\r
-int hb_write_opencl_frame_buffer(cl_mem cl_inBuf,unsigned char *Ybuf,unsigned char *Ubuf,unsigned char *Vbuf,int linesize0,int linesize1,int linesize2,int height,int offset)\r
-{\r
- int status;\r
- void *mapped = clEnqueueMapBuffer( gpu_env.command_queue, cl_inBuf, CL_TRUE,CL_MAP_WRITE, 0, sizeof(uint8_t) * (linesize0 + linesize1)*height + offset, 0, NULL, NULL, NULL );\r
- uint8_t *temp = (uint8_t *)mapped;\r
- temp += offset;\r
- memcpy(temp,Ybuf,sizeof(uint8_t) * linesize0 * height);\r
- memcpy(temp + sizeof(uint8_t) * linesize0 * height,Ubuf,sizeof(uint8_t) * linesize1 * height/2);\r
- memcpy(temp + sizeof(uint8_t) * (linesize0 * height + linesize1 * height/2),Vbuf,sizeof(uint8_t) * linesize2 * height/2);\r
- clEnqueueUnmapMemObject(gpu_env.command_queue, cl_inBuf, mapped, 0, NULL, NULL );\r
- return 1;\r
-}\r
-\r
-cl_command_queue hb_get_command_queue()\r
-{\r
- return gpu_env.command_queue;\r
-}\r
-\r
-cl_context hb_get_context()\r
-{\r
- return gpu_env.context;\r
-}\r
-#endif\r
+ src_buffer,
+ dst_buffer,
+ src_offset, dst_offset, cb,
+ 0, 0, 0);
+ if( status != CL_SUCCESS )
+ {
+ av_log(NULL,AV_LOG_ERROR, "hb_read_opencl_buffer error '%d'\n", status );
+ return 0;
+ }
+ return 1;
+}
+
+int hb_read_opencl_frame_buffer(cl_mem cl_inBuf,unsigned char *Ybuf,unsigned char *Ubuf,unsigned char *Vbuf,int linesize0,int linesize1,int linesize2,int height)
+{
+
+ int chrH = -(-height >> 1);
+ unsigned char *temp = (unsigned char *)av_malloc(sizeof(uint8_t) * (linesize0 * height + linesize1 * chrH * 2));
+ if(hb_read_opencl_buffer(cl_inBuf,temp,sizeof(uint8_t)*(linesize0 + linesize1)*height))
+ {
+ memcpy(Ybuf,temp,linesize0 * height);
+ memcpy(Ubuf,temp + linesize0 * height,linesize1 *chrH);
+ memcpy(Vbuf,temp + linesize0 * height + linesize1 * chrH,linesize2 * chrH);
+
+ }
+ av_free(temp);
+
+ return 1;
+}
+
+int hb_write_opencl_frame_buffer(cl_mem cl_inBuf,unsigned char *Ybuf,unsigned char *Ubuf,unsigned char *Vbuf,int linesize0,int linesize1,int linesize2,int height,int offset)
+{
+ int status;
+ void *mapped = clEnqueueMapBuffer( gpu_env.command_queue, cl_inBuf, CL_TRUE,CL_MAP_WRITE, 0, sizeof(uint8_t) * (linesize0 + linesize1)*height + offset, 0, NULL, NULL, NULL );
+ uint8_t *temp = (uint8_t *)mapped;
+ temp += offset;
+ memcpy(temp,Ybuf,sizeof(uint8_t) * linesize0 * height);
+ memcpy(temp + sizeof(uint8_t) * linesize0 * height,Ubuf,sizeof(uint8_t) * linesize1 * height/2);
+ memcpy(temp + sizeof(uint8_t) * (linesize0 * height + linesize1 * height/2),Vbuf,sizeof(uint8_t) * linesize2 * height/2);
+ clEnqueueUnmapMemObject(gpu_env.command_queue, cl_inBuf, mapped, 0, NULL, NULL );
+ return 1;
+}
+
+cl_command_queue hb_get_command_queue()
+{
+ return gpu_env.command_queue;
+}
+
+cl_context hb_get_context()
+{
+ return gpu_env.context;
+}
+#endif