From: jstebbins Date: Sun, 22 Sep 2013 17:18:42 +0000 (+0000) Subject: libhb: remove windows ^M line endings X-Git-Tag: 0.10.0~734 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=10fda25f44d362349694a42127e8ba4974a4a6ad;p=handbrake libhb: remove windows ^M line endings git-svn-id: svn://svn.handbrake.fr/HandBrake/trunk@5797 b64f7644-9d1e-0410-96f1-a4d463321fa5 --- diff --git a/libhb/openclwrapper.c b/libhb/openclwrapper.c index 9a7e9888d..43da091e1 100644 --- a/libhb/openclwrapper.c +++ b/libhb/openclwrapper.c @@ -1,84 +1,84 @@ -/* openclwrapper.c - - Copyright (c) 2003-2012 HandBrake Team - This file is part of the HandBrake source code - Homepage: . - It may be used under the terms of the GNU General Public License v2. - For full terms see the file COPYING file or visit http://www.gnu.org/licenses/gpl-2.0.html - - Authors: Peng Gao - Li Cao - */ - -#ifdef USE_OPENCL - -#include -#include -#include -#include "openclwrapper.h" -#include "openclkernels.h" - -//#define USE_EXTERNAL_KERNEL -#ifdef SYS_MINGW -#include -#endif - -#if defined(__APPLE__) -#include -#else -#include -#endif - -#if defined(_MSC_VER) -#define strcasecmp strcmpi -#endif - -#define MAX_KERNEL_STRING_LEN 64 -#define MAX_CLFILE_NUM 50 -#define MAX_CLKERNEL_NUM 200 -#define MAX_CLFILE_PATH 255 -#define MAX_KERNEL_NUM 50 -#define MAX_KERNEL_NAME_LEN 64 - -#ifndef INVALID_HANDLE_VALUE -#define INVALID_HANDLE_VALUE NULL -#endif - -//#define THREAD_PRIORITY_TIME_CRITICAL 15 - -enum VENDOR -{ - AMD = 0, - Intel, - NVIDIA, - others -}; -typedef struct _GPUEnv -{ - //share vb in all modules in hb library - cl_platform_id platform; - cl_device_type dType; - cl_context context; - cl_device_id * devices; - cl_device_id dev; - cl_command_queue command_queue; - cl_kernel kernels[MAX_CLFILE_NUM]; - cl_program programs[MAX_CLFILE_NUM]; //one program object maps one kernel source file - char kernelSrcFile[MAX_CLFILE_NUM][256]; //the max len of kernel file name is 256 - int file_count; // only one kernel file - - char kernel_names[MAX_CLKERNEL_NUM][MAX_KERNEL_STRING_LEN+1]; - cl_kernel_function kernel_functions[MAX_CLKERNEL_NUM]; - int kernel_count; - int isUserCreated; // 1: created , 0:no create and needed to create by opencl wrapper - enum VENDOR vendor; -}GPUEnv; - -typedef struct -{ - char kernelName[MAX_KERNEL_NAME_LEN+1]; - char * kernelStr; -}hb_kernel_node; +/* openclwrapper.c + + Copyright (c) 2003-2012 HandBrake Team + This file is part of the HandBrake source code + Homepage: . + It may be used under the terms of the GNU General Public License v2. + For full terms see the file COPYING file or visit http://www.gnu.org/licenses/gpl-2.0.html + + Authors: Peng Gao + Li Cao + */ + +#ifdef USE_OPENCL + +#include +#include +#include +#include "openclwrapper.h" +#include "openclkernels.h" + +//#define USE_EXTERNAL_KERNEL +#ifdef SYS_MINGW +#include +#endif + +#if defined(__APPLE__) +#include +#else +#include +#endif + +#if defined(_MSC_VER) +#define strcasecmp strcmpi +#endif + +#define MAX_KERNEL_STRING_LEN 64 +#define MAX_CLFILE_NUM 50 +#define MAX_CLKERNEL_NUM 200 +#define MAX_CLFILE_PATH 255 +#define MAX_KERNEL_NUM 50 +#define MAX_KERNEL_NAME_LEN 64 + +#ifndef INVALID_HANDLE_VALUE +#define INVALID_HANDLE_VALUE NULL +#endif + +//#define THREAD_PRIORITY_TIME_CRITICAL 15 + +enum VENDOR +{ + AMD = 0, + Intel, + NVIDIA, + others +}; +typedef struct _GPUEnv +{ + //share vb in all modules in hb library + cl_platform_id platform; + cl_device_type dType; + cl_context context; + cl_device_id * devices; + cl_device_id dev; + cl_command_queue command_queue; + cl_kernel kernels[MAX_CLFILE_NUM]; + cl_program programs[MAX_CLFILE_NUM]; //one program object maps one kernel source file + char kernelSrcFile[MAX_CLFILE_NUM][256]; //the max len of kernel file name is 256 + int file_count; // only one kernel file + + char kernel_names[MAX_CLKERNEL_NUM][MAX_KERNEL_STRING_LEN+1]; + cl_kernel_function kernel_functions[MAX_CLKERNEL_NUM]; + int kernel_count; + int isUserCreated; // 1: created , 0:no create and needed to create by opencl wrapper + enum VENDOR vendor; +}GPUEnv; + +typedef struct +{ + char kernelName[MAX_KERNEL_NAME_LEN+1]; + char * kernelStr; +}hb_kernel_node; static GPUEnv gpu_env; static int isInited = 0; @@ -86,83 +86,83 @@ static int useBuffers = 0; static hb_kernel_node gKernels[MAX_KERNEL_NUM]; #define ADD_KERNEL_CFG( idx, s, p ){\ - strcpy( gKernels[idx].kernelName, s );\ - gKernels[idx].kernelStr = p;\ - strcpy( gpu_env.kernel_names[idx], s );\ - gpu_env.kernel_count++; } - - -/** - * hb_confirm_gpu_type - */ -int hb_confirm_gpu_type() -{ - int status = 1; - unsigned int i, j; - cl_uint numPlatforms = 0; - status = clGetPlatformIDs(0,NULL,&numPlatforms); - if(status != 0) - { - goto end; - } - if(numPlatforms > 0) - { - cl_platform_id* platforms = (cl_platform_id* )malloc (numPlatforms * sizeof(cl_platform_id)); - status = clGetPlatformIDs (numPlatforms, platforms, NULL); - if (status != 0) - { - goto end; - } - for (i=0; i < numPlatforms; i++) - { - char pbuff[100]; - cl_uint numDevices; - status = clGetPlatformInfo( platforms[i], - CL_PLATFORM_VENDOR, - sizeof (pbuff), - pbuff, - NULL); - if (status) - continue; - status = clGetDeviceIDs( platforms[i], - CL_DEVICE_TYPE_GPU , - 0 , - NULL , - &numDevices); - - cl_device_id *devices = (cl_device_id *)malloc(numDevices * sizeof(cl_device_id)); - status = clGetDeviceIDs(platforms[i], CL_DEVICE_TYPE_GPU, numDevices, devices, NULL); - for (j = 0; j < numDevices; j++) - { - char dbuff[100]; - status = clGetDeviceInfo(devices[j], CL_DEVICE_VENDOR, sizeof(dbuff), dbuff, NULL); - if (!strcmp(dbuff, "Advanced Micro Devices, Inc.") || - !strcmp(dbuff, "Intel(R) Corporation") || -#ifdef __APPLE__ - !strcmp(dbuff, "AMD") || - /* MacBook Pro, AMD ATI Radeon HD 6750M, OS X 10.8.3 */ - !strcmp(dbuff, "NVIDIA") || - /* MacBook Pro, NVIDIA GeForce GT 330M, OS X 10.7.4 */ -#endif - !strcmp(dbuff, "NVIDIA Corporation")) - { - return 0; - } - } - - if ( status != CL_SUCCESS ) - continue; - if( numDevices ) - break; - } - free( platforms ); - } - end: - return -1; -} - -/** - * hb_regist_opencl_kernel + strcpy( gKernels[idx].kernelName, s );\ + gKernels[idx].kernelStr = p;\ + strcpy( gpu_env.kernel_names[idx], s );\ + gpu_env.kernel_count++; } + + +/** + * hb_confirm_gpu_type + */ +int hb_confirm_gpu_type() +{ + int status = 1; + unsigned int i, j; + cl_uint numPlatforms = 0; + status = clGetPlatformIDs(0,NULL,&numPlatforms); + if(status != 0) + { + goto end; + } + if(numPlatforms > 0) + { + cl_platform_id* platforms = (cl_platform_id* )malloc (numPlatforms * sizeof(cl_platform_id)); + status = clGetPlatformIDs (numPlatforms, platforms, NULL); + if (status != 0) + { + goto end; + } + for (i=0; i < numPlatforms; i++) + { + char pbuff[100]; + cl_uint numDevices; + status = clGetPlatformInfo( platforms[i], + CL_PLATFORM_VENDOR, + sizeof (pbuff), + pbuff, + NULL); + if (status) + continue; + status = clGetDeviceIDs( platforms[i], + CL_DEVICE_TYPE_GPU , + 0 , + NULL , + &numDevices); + + cl_device_id *devices = (cl_device_id *)malloc(numDevices * sizeof(cl_device_id)); + status = clGetDeviceIDs(platforms[i], CL_DEVICE_TYPE_GPU, numDevices, devices, NULL); + for (j = 0; j < numDevices; j++) + { + char dbuff[100]; + status = clGetDeviceInfo(devices[j], CL_DEVICE_VENDOR, sizeof(dbuff), dbuff, NULL); + if (!strcmp(dbuff, "Advanced Micro Devices, Inc.") || + !strcmp(dbuff, "Intel(R) Corporation") || +#ifdef __APPLE__ + !strcmp(dbuff, "AMD") || + /* MacBook Pro, AMD ATI Radeon HD 6750M, OS X 10.8.3 */ + !strcmp(dbuff, "NVIDIA") || + /* MacBook Pro, NVIDIA GeForce GT 330M, OS X 10.7.4 */ +#endif + !strcmp(dbuff, "NVIDIA Corporation")) + { + return 0; + } + } + + if ( status != CL_SUCCESS ) + continue; + if( numDevices ) + break; + } + free( platforms ); + } + end: + return -1; +} + +/** + * hb_regist_opencl_kernel */ int hb_regist_opencl_kernel() { @@ -178,339 +178,339 @@ int hb_regist_opencl_kernel() return 0; } - -/** - * hb_regist_opencl_kernel - * @param filename - - * @param source - - * @param gpu_info - - * @param int idx - - */ -int hb_convert_to_string( const char *filename, char **source, GPUEnv *gpu_info, int idx ) -{ - int file_size; - size_t result; - FILE * file = NULL; - file_size = 0; - result = 0; - file = fopen( filename, "rb+" ); - - if( file!=NULL ) - { - fseek( file, 0, SEEK_END ); - - file_size = ftell( file ); - rewind( file ); - *source = (char*)malloc( sizeof(char) * file_size + 1 ); - if( *source == (char*)NULL ) - { - return(0); - } - result = fread( *source, 1, file_size, file ); - if( result != file_size ) - { - free( *source ); - return(0); - } - (*source)[file_size] = '\0'; - fclose( file ); - - return(1); - } - return(0); -} - -/** - * hb_binary_generated - * @param context - - * @param cl_file_name - - * @param fhandle - - */ -int hb_binary_generated( cl_context context, const char * cl_file_name, FILE ** fhandle ) -{ - int i = 0; - cl_int status; - cl_uint numDevices; - cl_device_id *devices; - char * str = NULL; - FILE * fd = NULL; - - status = clGetContextInfo( context, - CL_CONTEXT_NUM_DEVICES, - sizeof(numDevices), - &numDevices, - NULL ); - if( status != CL_SUCCESS ) - { - hb_log( "OpenCL: Get context info failed" ); - return 0; - } - - devices = (cl_device_id*)malloc( sizeof(cl_device_id) * numDevices ); - if( devices == NULL ) - { - hb_log( "OpenCL: No device found" ); - return 0; - } - - /* grab the handles to all of the devices in the context. */ - status = clGetContextInfo( context, - CL_CONTEXT_DEVICES, - sizeof(cl_device_id) * numDevices, - devices, - NULL ); - - status = 0; - /* dump out each binary into its own separate file. */ - for (i = 0; i < numDevices; i++) - { - char fileName[256] = { 0 }; - char cl_name[128] = { 0 }; - if (devices[i]) - { - char deviceName[1024]; - status = clGetDeviceInfo(devices[i], - CL_DEVICE_NAME, - sizeof(deviceName), - deviceName, - NULL); - - str = (char*)strstr(cl_file_name, ".cl"); - memcpy(cl_name, cl_file_name, str - cl_file_name); - cl_name[str - cl_file_name] = '\0'; - sprintf(fileName, "./%s - %s.bin", cl_name, deviceName); - fd = fopen(fileName, "rb"); - status = fd != NULL; - } - } - - if( devices != NULL ) - { - free( devices ); - devices = NULL; - } - - if( fd != NULL ) - *fhandle = fd; - - return status; -} - -/** - * hb_write_binary_to_file - * @param fileName - - * @param birary - - * @param numBytes - - */ -int hb_write_binary_to_file( const char* fileName, const char* birary, size_t numBytes ) -{ - FILE *output = NULL; - output = fopen( fileName, "wb" ); - if( output == NULL ) - return 0; - - fwrite( birary, sizeof(char), numBytes, output ); - fclose( output ); - - return 1; -} - -/** - * hb_generat_bin_from_kernel_source - * @param program - - * @param cl_file_name - - */ -int hb_generat_bin_from_kernel_source( cl_program program, const char * cl_file_name ) -{ - int i = 0; - cl_int status; - cl_uint numDevices; - size_t *binarySizes; - cl_device_id *devices; - char **binaries; - char *str = NULL; - - status = clGetProgramInfo( program, - CL_PROGRAM_NUM_DEVICES, - sizeof(numDevices), - &numDevices, - NULL ); - if( status != CL_SUCCESS ) - { - hb_log("OpenCL: hb_generat_bin_from_kernel_source: clGetProgramInfo for CL_PROGRAM_NUM_DEVICES failed"); - return 0; - } - - devices = (cl_device_id*)malloc( sizeof(cl_device_id) * numDevices ); - if( devices == NULL ) - { - hb_log("OpenCL: hb_generat_bin_from_kernel_source: no device found"); - return 0; - } - - /* grab the handles to all of the devices in the program. */ - status = clGetProgramInfo( program, - CL_PROGRAM_DEVICES, - sizeof(cl_device_id) * numDevices, - devices, - NULL ); - if( status != CL_SUCCESS ) - { - hb_log("OpenCL: hb_generat_bin_from_kernel_source: clGetProgramInfo for CL_PROGRAM_DEVICES failed"); - return 0; - } - - /* figure out the sizes of each of the binaries. */ - binarySizes = (size_t*)malloc( sizeof(size_t) * numDevices ); - - status = clGetProgramInfo( program, - CL_PROGRAM_BINARY_SIZES, - sizeof(size_t) * numDevices, - binarySizes, NULL ); - if( status != CL_SUCCESS ) - { - hb_log("OpenCL: hb_generat_bin_from_kernel_source: clGetProgramInfo for CL_PROGRAM_BINARY_SIZES failed"); - return 0; - } - - /* copy over all of the generated binaries. */ - binaries = (char**)malloc( sizeof(char *) * numDevices ); - if( binaries == NULL ) - { - hb_log("OpenCL: hb_generat_bin_from_kernel_source: malloc for binaries failed"); - return 0; - } - - for( i = 0; i < numDevices; i++ ) - { - if( binarySizes[i] != 0 ) - { - binaries[i] = (char*)malloc( sizeof(char) * binarySizes[i] ); - if( binaries[i] == NULL ) - { - hb_log("OpenCL: hb_generat_bin_from_kernel_source: malloc for binaries[%d] failed", i); - return 0; - } - } - else - { - binaries[i] = NULL; - } - } - - status = clGetProgramInfo( program, - CL_PROGRAM_BINARIES, - sizeof(char *) * numDevices, - binaries, - NULL ); - if( status != CL_SUCCESS ) - { - hb_log("OpenCL: hb_generat_bin_from_kernel_source: clGetProgramInfo for CL_PROGRAM_BINARIES failed"); - return 0; - } - - /* dump out each binary into its own separate file. */ - for (i = 0; i < numDevices; i++) - { - char fileName[256] = {0}; - char cl_name[128] = {0}; - if (binarySizes[i]) - { - char deviceName[1024]; - status = clGetDeviceInfo(devices[i], - CL_DEVICE_NAME, - sizeof(deviceName), - deviceName, - NULL); - - str = (char*)strstr( cl_file_name, (char*)".cl" ); - memcpy(cl_name, cl_file_name, str - cl_file_name); - cl_name[str - cl_file_name] = '\0'; - sprintf(fileName, "./%s - %s.bin", cl_name, deviceName); - - if (!hb_write_binary_to_file(fileName, binaries[i], binarySizes[i])) - { - hb_log("OpenCL: hb_generat_bin_from_kernel_source: unable to write kernel, writing to temporary directory instead."); - return 0; - } - } - } - - // Release all resouces and memory - for( i = 0; i < numDevices; i++ ) - { - if( binaries[i] != NULL ) - { - free( binaries[i] ); - binaries[i] = NULL; - } - } - - if( binaries != NULL ) - { - free( binaries ); - binaries = NULL; - } - - if( binarySizes != NULL ) - { - free( binarySizes ); - binarySizes = NULL; - } - - if( devices != NULL ) - { - free( devices ); - devices = NULL; - } - return 1; -} - - -/** - * hb_init_opencl_attr - * @param env - - */ -int hb_init_opencl_attr( OpenCLEnv * env ) -{ - if( gpu_env.isUserCreated ) - return 1; - - gpu_env.context = env->context; - gpu_env.platform = env->platform; - gpu_env.dev = env->devices; - gpu_env.command_queue = env->command_queue; - - gpu_env.isUserCreated = 1; - - return 0; -} - -/** - * hb_create_kernel - * @param kernelname - - * @param env - - */ -int hb_create_kernel( char * kernelname, KernelEnv * env ) -{ - int status; - env->kernel = clCreateKernel( gpu_env.programs[0], kernelname, &status ); - env->context = gpu_env.context; - env->command_queue = gpu_env.command_queue; - return status != CL_SUCCESS ? 1 : 0; -} - -/** - * hb_release_kernel - * @param env - - */ -int hb_release_kernel( KernelEnv * env ) -{ - int status = clReleaseKernel( env->kernel ); - return status != CL_SUCCESS ? 1 : 0; -} - -/** + +/** + * hb_regist_opencl_kernel + * @param filename - + * @param source - + * @param gpu_info - + * @param int idx - + */ +int hb_convert_to_string( const char *filename, char **source, GPUEnv *gpu_info, int idx ) +{ + int file_size; + size_t result; + FILE * file = NULL; + file_size = 0; + result = 0; + file = fopen( filename, "rb+" ); + + if( file!=NULL ) + { + fseek( file, 0, SEEK_END ); + + file_size = ftell( file ); + rewind( file ); + *source = (char*)malloc( sizeof(char) * file_size + 1 ); + if( *source == (char*)NULL ) + { + return(0); + } + result = fread( *source, 1, file_size, file ); + if( result != file_size ) + { + free( *source ); + return(0); + } + (*source)[file_size] = '\0'; + fclose( file ); + + return(1); + } + return(0); +} + +/** + * hb_binary_generated + * @param context - + * @param cl_file_name - + * @param fhandle - + */ +int hb_binary_generated( cl_context context, const char * cl_file_name, FILE ** fhandle ) +{ + int i = 0; + cl_int status; + cl_uint numDevices; + cl_device_id *devices; + char * str = NULL; + FILE * fd = NULL; + + status = clGetContextInfo( context, + CL_CONTEXT_NUM_DEVICES, + sizeof(numDevices), + &numDevices, + NULL ); + if( status != CL_SUCCESS ) + { + hb_log( "OpenCL: Get context info failed" ); + return 0; + } + + devices = (cl_device_id*)malloc( sizeof(cl_device_id) * numDevices ); + if( devices == NULL ) + { + hb_log( "OpenCL: No device found" ); + return 0; + } + + /* grab the handles to all of the devices in the context. */ + status = clGetContextInfo( context, + CL_CONTEXT_DEVICES, + sizeof(cl_device_id) * numDevices, + devices, + NULL ); + + status = 0; + /* dump out each binary into its own separate file. */ + for (i = 0; i < numDevices; i++) + { + char fileName[256] = { 0 }; + char cl_name[128] = { 0 }; + if (devices[i]) + { + char deviceName[1024]; + status = clGetDeviceInfo(devices[i], + CL_DEVICE_NAME, + sizeof(deviceName), + deviceName, + NULL); + + str = (char*)strstr(cl_file_name, ".cl"); + memcpy(cl_name, cl_file_name, str - cl_file_name); + cl_name[str - cl_file_name] = '\0'; + sprintf(fileName, "./%s - %s.bin", cl_name, deviceName); + fd = fopen(fileName, "rb"); + status = fd != NULL; + } + } + + if( devices != NULL ) + { + free( devices ); + devices = NULL; + } + + if( fd != NULL ) + *fhandle = fd; + + return status; +} + +/** + * hb_write_binary_to_file + * @param fileName - + * @param birary - + * @param numBytes - + */ +int hb_write_binary_to_file( const char* fileName, const char* birary, size_t numBytes ) +{ + FILE *output = NULL; + output = fopen( fileName, "wb" ); + if( output == NULL ) + return 0; + + fwrite( birary, sizeof(char), numBytes, output ); + fclose( output ); + + return 1; +} + +/** + * hb_generat_bin_from_kernel_source + * @param program - + * @param cl_file_name - + */ +int hb_generat_bin_from_kernel_source( cl_program program, const char * cl_file_name ) +{ + int i = 0; + cl_int status; + cl_uint numDevices; + size_t *binarySizes; + cl_device_id *devices; + char **binaries; + char *str = NULL; + + status = clGetProgramInfo( program, + CL_PROGRAM_NUM_DEVICES, + sizeof(numDevices), + &numDevices, + NULL ); + if( status != CL_SUCCESS ) + { + hb_log("OpenCL: hb_generat_bin_from_kernel_source: clGetProgramInfo for CL_PROGRAM_NUM_DEVICES failed"); + return 0; + } + + devices = (cl_device_id*)malloc( sizeof(cl_device_id) * numDevices ); + if( devices == NULL ) + { + hb_log("OpenCL: hb_generat_bin_from_kernel_source: no device found"); + return 0; + } + + /* grab the handles to all of the devices in the program. */ + status = clGetProgramInfo( program, + CL_PROGRAM_DEVICES, + sizeof(cl_device_id) * numDevices, + devices, + NULL ); + if( status != CL_SUCCESS ) + { + hb_log("OpenCL: hb_generat_bin_from_kernel_source: clGetProgramInfo for CL_PROGRAM_DEVICES failed"); + return 0; + } + + /* figure out the sizes of each of the binaries. */ + binarySizes = (size_t*)malloc( sizeof(size_t) * numDevices ); + + status = clGetProgramInfo( program, + CL_PROGRAM_BINARY_SIZES, + sizeof(size_t) * numDevices, + binarySizes, NULL ); + if( status != CL_SUCCESS ) + { + hb_log("OpenCL: hb_generat_bin_from_kernel_source: clGetProgramInfo for CL_PROGRAM_BINARY_SIZES failed"); + return 0; + } + + /* copy over all of the generated binaries. */ + binaries = (char**)malloc( sizeof(char *) * numDevices ); + if( binaries == NULL ) + { + hb_log("OpenCL: hb_generat_bin_from_kernel_source: malloc for binaries failed"); + return 0; + } + + for( i = 0; i < numDevices; i++ ) + { + if( binarySizes[i] != 0 ) + { + binaries[i] = (char*)malloc( sizeof(char) * binarySizes[i] ); + if( binaries[i] == NULL ) + { + hb_log("OpenCL: hb_generat_bin_from_kernel_source: malloc for binaries[%d] failed", i); + return 0; + } + } + else + { + binaries[i] = NULL; + } + } + + status = clGetProgramInfo( program, + CL_PROGRAM_BINARIES, + sizeof(char *) * numDevices, + binaries, + NULL ); + if( status != CL_SUCCESS ) + { + hb_log("OpenCL: hb_generat_bin_from_kernel_source: clGetProgramInfo for CL_PROGRAM_BINARIES failed"); + return 0; + } + + /* dump out each binary into its own separate file. */ + for (i = 0; i < numDevices; i++) + { + char fileName[256] = {0}; + char cl_name[128] = {0}; + if (binarySizes[i]) + { + char deviceName[1024]; + status = clGetDeviceInfo(devices[i], + CL_DEVICE_NAME, + sizeof(deviceName), + deviceName, + NULL); + + str = (char*)strstr( cl_file_name, (char*)".cl" ); + memcpy(cl_name, cl_file_name, str - cl_file_name); + cl_name[str - cl_file_name] = '\0'; + sprintf(fileName, "./%s - %s.bin", cl_name, deviceName); + + if (!hb_write_binary_to_file(fileName, binaries[i], binarySizes[i])) + { + hb_log("OpenCL: hb_generat_bin_from_kernel_source: unable to write kernel, writing to temporary directory instead."); + return 0; + } + } + } + + // Release all resouces and memory + for( i = 0; i < numDevices; i++ ) + { + if( binaries[i] != NULL ) + { + free( binaries[i] ); + binaries[i] = NULL; + } + } + + if( binaries != NULL ) + { + free( binaries ); + binaries = NULL; + } + + if( binarySizes != NULL ) + { + free( binarySizes ); + binarySizes = NULL; + } + + if( devices != NULL ) + { + free( devices ); + devices = NULL; + } + return 1; +} + + +/** + * hb_init_opencl_attr + * @param env - + */ +int hb_init_opencl_attr( OpenCLEnv * env ) +{ + if( gpu_env.isUserCreated ) + return 1; + + gpu_env.context = env->context; + gpu_env.platform = env->platform; + gpu_env.dev = env->devices; + gpu_env.command_queue = env->command_queue; + + gpu_env.isUserCreated = 1; + + return 0; +} + +/** + * hb_create_kernel + * @param kernelname - + * @param env - + */ +int hb_create_kernel( char * kernelname, KernelEnv * env ) +{ + int status; + env->kernel = clCreateKernel( gpu_env.programs[0], kernelname, &status ); + env->context = gpu_env.context; + env->command_queue = gpu_env.command_queue; + return status != CL_SUCCESS ? 1 : 0; +} + +/** + * hb_release_kernel + * @param env - + */ +int hb_release_kernel( KernelEnv * env ) +{ + int status = clReleaseKernel( env->kernel ); + return status != CL_SUCCESS ? 1 : 0; +} + +/** * hb_init_opencl_env * @param gpu_info - */ @@ -519,11 +519,11 @@ static int init_once = 0; int hb_init_opencl_env( GPUEnv *gpu_info ) { size_t length; - cl_int status; - cl_uint numPlatforms, numDevices; - cl_platform_id *platforms; - cl_context_properties cps[3]; - char platformName[100]; + cl_int status; + cl_uint numPlatforms, numDevices; + cl_platform_id *platforms; + cl_context_properties cps[3]; + char platformName[100]; unsigned int i; void *handle = INVALID_HANDLE_VALUE; @@ -535,270 +535,270 @@ int hb_init_opencl_env( GPUEnv *gpu_info ) /* * Have a look at the available platforms. */ - if( !gpu_info->isUserCreated ) - { - status = clGetPlatformIDs( 0, NULL, &numPlatforms ); - if( status != CL_SUCCESS ) - { - hb_log( "OpenCL: OpenCL device platform not found." ); - return(1); - } - - gpu_info->platform = NULL; - if( 0 < numPlatforms ) - { - platforms = (cl_platform_id*)malloc( - numPlatforms * sizeof(cl_platform_id)); - if( platforms == (cl_platform_id*)NULL ) - { - return(1); - } - status = clGetPlatformIDs( numPlatforms, platforms, NULL ); - - if( status != CL_SUCCESS ) - { - hb_log( "OpenCL: Specific opencl platform not found." ); - return(1); - } - - for( i = 0; i < numPlatforms; i++ ) - { - status = clGetPlatformInfo( platforms[i], CL_PLATFORM_VENDOR, - sizeof(platformName), platformName, - NULL ); - - if( status != CL_SUCCESS ) - { - continue; - } - gpu_info->platform = platforms[i]; - - if (!strcmp(platformName, "Advanced Micro Devices, Inc.") || - !strcmp(platformName, "AMD")) - gpu_info->vendor = AMD; - else - gpu_info->vendor = others; - - gpu_info->platform = platforms[i]; - - status = clGetDeviceIDs( gpu_info->platform /* platform */, - CL_DEVICE_TYPE_GPU /* device_type */, - 0 /* num_entries */, - NULL /* devices */, - &numDevices ); - - if( status != CL_SUCCESS ) - { - continue; - } - - if( numDevices ) - break; - - } - free( platforms ); - } - - if( NULL == gpu_info->platform ) - { - hb_log( "OpenCL: No OpenCL-compatible GPU found." ); - return(1); - } - - if( status != CL_SUCCESS ) - { - hb_log( "OpenCL: No OpenCL-compatible GPU found." ); - return(1); - } - - /* - * Use available platform. - */ - cps[0] = CL_CONTEXT_PLATFORM; - cps[1] = (cl_context_properties)gpu_info->platform; - cps[2] = 0; - /* Check for GPU. */ - gpu_info->dType = CL_DEVICE_TYPE_GPU; - gpu_info->context = clCreateContextFromType( - cps, gpu_info->dType, NULL, NULL, &status ); - - if( (gpu_info->context == (cl_context)NULL) || (status != CL_SUCCESS) ) - { - gpu_info->dType = CL_DEVICE_TYPE_CPU; - gpu_info->context = clCreateContextFromType( - cps, gpu_info->dType, NULL, NULL, &status ); - } - - if( (gpu_info->context == (cl_context)NULL) || (status != CL_SUCCESS) ) - { - gpu_info->dType = CL_DEVICE_TYPE_DEFAULT; - gpu_info->context = clCreateContextFromType( - cps, gpu_info->dType, NULL, NULL, &status ); - } - - if( (gpu_info->context == (cl_context)NULL) || (status != CL_SUCCESS) ) - { - hb_log( "OpenCL: Unable to create opencl context." ); - return(1); - } - - /* Detect OpenCL devices. */ - /* First, get the size of device list data */ - status = clGetContextInfo( gpu_info->context, CL_CONTEXT_DEVICES, - 0, NULL, &length ); - if((status != CL_SUCCESS) || (length == 0)) - { - hb_log( "OpenCL: Unable to get the list of devices in context." ); - return(1); - } - - /* Now allocate memory for device list based on the size we got earlier */ - gpu_info->devices = (cl_device_id*)malloc( length ); - if( gpu_info->devices == (cl_device_id*)NULL ) - { - return(1); - } - - /* Now, get the device list data */ - status = clGetContextInfo( gpu_info->context, CL_CONTEXT_DEVICES, length, - gpu_info->devices, NULL ); - if( status != CL_SUCCESS ) - { - hb_log( "OpenCL: Unable to get the device list data in context." ); - return(1); - } - - /* Create OpenCL command queue. */ - gpu_info->command_queue = clCreateCommandQueue( gpu_info->context, - gpu_info->devices[0], - 0, &status ); - if( status != CL_SUCCESS ) - { - hb_log( "OpenCL: Unable to create opencl command queue." ); - return(1); - } - } - - if( clGetCommandQueueInfo( gpu_info->command_queue, - CL_QUEUE_THREAD_HANDLE_AMD, sizeof(handle), - &handle, NULL ) == CL_SUCCESS && handle != INVALID_HANDLE_VALUE ) - { -#ifdef SYS_MINGW - SetThreadPriority( handle, THREAD_PRIORITY_TIME_CRITICAL ); -#endif - } - - return 0; -} - - -/** - * hb_release_opencl_env - * @param gpu_info - - */ -int hb_release_opencl_env( GPUEnv *gpu_info ) -{ - if( !isInited ) - return 1; - int i; - - for( i = 0; iisUserCreated = 0; - return 1; -} - - -/** - * hb_register_kernel_wrapper - * @param kernel_name - - * @param function - - */ -int hb_register_kernel_wrapper( const char *kernel_name, cl_kernel_function function ) -{ - int i; - for( i = 0; i < gpu_env.kernel_count; i++ ) - { - if( strcasecmp( kernel_name, gpu_env.kernel_names[i] ) == 0 ) - { - gpu_env.kernel_functions[i] = function; - return(1); - } - } - return(0); -} - -/** - * hb_cached_of_kerner_prg - * @param gpu_env - - * @param cl_file_name - - */ -int hb_cached_of_kerner_prg( const GPUEnv *gpu_env, const char * cl_file_name ) -{ - int i; - for( i = 0; i < gpu_env->file_count; i++ ) - { - if( strcasecmp( gpu_env->kernelSrcFile[i], cl_file_name ) == 0 ) - { - if( gpu_env->programs[i] != NULL ) - return(1); - } - } - - return(0); -} - -/** - * hb_compile_kernel_file - * @param filename - - * @param gpu_info - - * @param indx - - * @param build_option - - */ -int hb_compile_kernel_file( const char *filename, GPUEnv *gpu_info, - int indx, const char *build_option ) -{ - cl_int status; - size_t length; - char *source_str; - const char *source; - size_t source_size[1]; - char *buildLog = NULL; - int b_error, binary_status, binaryExisted; - char * binary; - cl_uint numDevices; - cl_device_id *devices; - FILE * fd; - FILE * fd1; - int idx; - - if( hb_cached_of_kerner_prg( gpu_info, filename ) == 1 ) - return (1); - - idx = gpu_info->file_count; - -#ifdef USE_EXTERNAL_KERNEL - status = hb_convert_to_string( filename, &source_str, gpu_info, idx ); + if( !gpu_info->isUserCreated ) + { + status = clGetPlatformIDs( 0, NULL, &numPlatforms ); + if( status != CL_SUCCESS ) + { + hb_log( "OpenCL: OpenCL device platform not found." ); + return(1); + } + + gpu_info->platform = NULL; + if( 0 < numPlatforms ) + { + platforms = (cl_platform_id*)malloc( + numPlatforms * sizeof(cl_platform_id)); + if( platforms == (cl_platform_id*)NULL ) + { + return(1); + } + status = clGetPlatformIDs( numPlatforms, platforms, NULL ); + + if( status != CL_SUCCESS ) + { + hb_log( "OpenCL: Specific opencl platform not found." ); + return(1); + } + + for( i = 0; i < numPlatforms; i++ ) + { + status = clGetPlatformInfo( platforms[i], CL_PLATFORM_VENDOR, + sizeof(platformName), platformName, + NULL ); + + if( status != CL_SUCCESS ) + { + continue; + } + gpu_info->platform = platforms[i]; + + if (!strcmp(platformName, "Advanced Micro Devices, Inc.") || + !strcmp(platformName, "AMD")) + gpu_info->vendor = AMD; + else + gpu_info->vendor = others; + + gpu_info->platform = platforms[i]; + + status = clGetDeviceIDs( gpu_info->platform /* platform */, + CL_DEVICE_TYPE_GPU /* device_type */, + 0 /* num_entries */, + NULL /* devices */, + &numDevices ); + + if( status != CL_SUCCESS ) + { + continue; + } + + if( numDevices ) + break; + + } + free( platforms ); + } + + if( NULL == gpu_info->platform ) + { + hb_log( "OpenCL: No OpenCL-compatible GPU found." ); + return(1); + } + + if( status != CL_SUCCESS ) + { + hb_log( "OpenCL: No OpenCL-compatible GPU found." ); + return(1); + } + + /* + * Use available platform. + */ + cps[0] = CL_CONTEXT_PLATFORM; + cps[1] = (cl_context_properties)gpu_info->platform; + cps[2] = 0; + /* Check for GPU. */ + gpu_info->dType = CL_DEVICE_TYPE_GPU; + gpu_info->context = clCreateContextFromType( + cps, gpu_info->dType, NULL, NULL, &status ); + + if( (gpu_info->context == (cl_context)NULL) || (status != CL_SUCCESS) ) + { + gpu_info->dType = CL_DEVICE_TYPE_CPU; + gpu_info->context = clCreateContextFromType( + cps, gpu_info->dType, NULL, NULL, &status ); + } + + if( (gpu_info->context == (cl_context)NULL) || (status != CL_SUCCESS) ) + { + gpu_info->dType = CL_DEVICE_TYPE_DEFAULT; + gpu_info->context = clCreateContextFromType( + cps, gpu_info->dType, NULL, NULL, &status ); + } + + if( (gpu_info->context == (cl_context)NULL) || (status != CL_SUCCESS) ) + { + hb_log( "OpenCL: Unable to create opencl context." ); + return(1); + } + + /* Detect OpenCL devices. */ + /* First, get the size of device list data */ + status = clGetContextInfo( gpu_info->context, CL_CONTEXT_DEVICES, + 0, NULL, &length ); + if((status != CL_SUCCESS) || (length == 0)) + { + hb_log( "OpenCL: Unable to get the list of devices in context." ); + return(1); + } + + /* Now allocate memory for device list based on the size we got earlier */ + gpu_info->devices = (cl_device_id*)malloc( length ); + if( gpu_info->devices == (cl_device_id*)NULL ) + { + return(1); + } + + /* Now, get the device list data */ + status = clGetContextInfo( gpu_info->context, CL_CONTEXT_DEVICES, length, + gpu_info->devices, NULL ); + if( status != CL_SUCCESS ) + { + hb_log( "OpenCL: Unable to get the device list data in context." ); + return(1); + } + + /* Create OpenCL command queue. */ + gpu_info->command_queue = clCreateCommandQueue( gpu_info->context, + gpu_info->devices[0], + 0, &status ); + if( status != CL_SUCCESS ) + { + hb_log( "OpenCL: Unable to create opencl command queue." ); + return(1); + } + } + + if( clGetCommandQueueInfo( gpu_info->command_queue, + CL_QUEUE_THREAD_HANDLE_AMD, sizeof(handle), + &handle, NULL ) == CL_SUCCESS && handle != INVALID_HANDLE_VALUE ) + { +#ifdef SYS_MINGW + SetThreadPriority( handle, THREAD_PRIORITY_TIME_CRITICAL ); +#endif + } + + return 0; +} + + +/** + * hb_release_opencl_env + * @param gpu_info - + */ +int hb_release_opencl_env( GPUEnv *gpu_info ) +{ + if( !isInited ) + return 1; + int i; + + for( i = 0; iisUserCreated = 0; + return 1; +} + + +/** + * hb_register_kernel_wrapper + * @param kernel_name - + * @param function - + */ +int hb_register_kernel_wrapper( const char *kernel_name, cl_kernel_function function ) +{ + int i; + for( i = 0; i < gpu_env.kernel_count; i++ ) + { + if( strcasecmp( kernel_name, gpu_env.kernel_names[i] ) == 0 ) + { + gpu_env.kernel_functions[i] = function; + return(1); + } + } + return(0); +} + +/** + * hb_cached_of_kerner_prg + * @param gpu_env - + * @param cl_file_name - + */ +int hb_cached_of_kerner_prg( const GPUEnv *gpu_env, const char * cl_file_name ) +{ + int i; + for( i = 0; i < gpu_env->file_count; i++ ) + { + if( strcasecmp( gpu_env->kernelSrcFile[i], cl_file_name ) == 0 ) + { + if( gpu_env->programs[i] != NULL ) + return(1); + } + } + + return(0); +} + +/** + * hb_compile_kernel_file + * @param filename - + * @param gpu_info - + * @param indx - + * @param build_option - + */ +int hb_compile_kernel_file( const char *filename, GPUEnv *gpu_info, + int indx, const char *build_option ) +{ + cl_int status; + size_t length; + char *source_str; + const char *source; + size_t source_size[1]; + char *buildLog = NULL; + int b_error, binary_status, binaryExisted; + char * binary; + cl_uint numDevices; + cl_device_id *devices; + FILE * fd; + FILE * fd1; + int idx; + + if( hb_cached_of_kerner_prg( gpu_info, filename ) == 1 ) + return (1); + + idx = gpu_info->file_count; + +#ifdef USE_EXTERNAL_KERNEL + status = hb_convert_to_string( filename, &source_str, gpu_info, idx ); if( status == 0 ) return(0); #else @@ -815,140 +815,140 @@ int hb_compile_kernel_file( const char *filename, GPUEnv *gpu_info, strcat( source_str, kernel_src_yadif_filter ); #endif - source = source_str; - source_size[0] = strlen( source ); - - if ((binaryExisted = hb_binary_generated(gpu_info->context, filename, &fd)) == 1) - { - status = clGetContextInfo(gpu_info->context, - CL_CONTEXT_NUM_DEVICES, - sizeof(numDevices), - &numDevices, - NULL); - if (status != CL_SUCCESS) - { - hb_log("OpenCL: Unable to get the number of devices in context."); - return 0; - } - - devices = (cl_device_id*)malloc(sizeof(cl_device_id) * numDevices); - if (devices == NULL) - return 0; - - length = 0; - b_error = 0; - b_error |= fseek(fd, 0, SEEK_END) < 0; - b_error |= (length = ftell(fd)) <= 0; - b_error |= fseek(fd, 0, SEEK_SET) < 0; - if (b_error) - return 0; - - binary = (char*)calloc(length + 2, sizeof(char)); - if (binary == NULL) - return 0; - - b_error |= fread(binary, 1, length, fd) != length; -#if 0 // this doesn't work under OS X and/or with some non-AMD GPUs - if (binary[length-1] != '\n') - binary[length++] = '\n; -#endif - - if (b_error) - return 0; - - /* grab the handles to all of the devices in the context. */ - status = clGetContextInfo(gpu_info->context, - CL_CONTEXT_DEVICES, - sizeof(cl_device_id) * numDevices, - devices, - NULL); - - gpu_info->programs[idx] = clCreateProgramWithBinary(gpu_info->context, - numDevices, - devices, - &length, - (const unsigned char**)&binary, - &binary_status, - &status); - - fclose(fd); - free(devices); - fd = NULL; - devices = NULL; - } - else - { - /* create a CL program using the kernel source */ - gpu_info->programs[idx] = clCreateProgramWithSource( - gpu_info->context, 1, &source, source_size, &status ); - } - - if((gpu_info->programs[idx] == (cl_program)NULL) || (status != CL_SUCCESS)){ - hb_log( "OpenCL: Unable to get list of devices in context." ); - return(0); - } - - /* create a cl program executable for all the devices specified */ - if( !gpu_info->isUserCreated ) - { - status = clBuildProgram( gpu_info->programs[idx], 1, gpu_info->devices, - build_option, NULL, NULL ); - } - else - { - status = clBuildProgram( gpu_info->programs[idx], 1, &(gpu_info->dev), - build_option, NULL, NULL ); - } - - if( status != CL_SUCCESS ) - { - if( !gpu_info->isUserCreated ) - { - status = clGetProgramBuildInfo( gpu_info->programs[idx], - gpu_info->devices[0], - CL_PROGRAM_BUILD_LOG, 0, NULL, &length ); - } - else - { - status = clGetProgramBuildInfo( gpu_info->programs[idx], - gpu_info->dev, - CL_PROGRAM_BUILD_LOG, 0, NULL, &length ); - } - - if( status != CL_SUCCESS ) - { - hb_log( "OpenCL: Unable to get GPU build information." ); - return(0); - } - - buildLog = (char*)malloc( length ); - if( buildLog == (char*)NULL ) - { - return(0); - } - - if( !gpu_info->isUserCreated ) - { - status = clGetProgramBuildInfo( gpu_info->programs[idx], gpu_info->devices[0], - CL_PROGRAM_BUILD_LOG, length, buildLog, &length ); - } - else - { - status = clGetProgramBuildInfo( gpu_info->programs[idx], gpu_info->dev, - CL_PROGRAM_BUILD_LOG, length, buildLog, &length ); - } - - fd1 = fopen( "kernel-build.log", "w+" ); - if( fd1 != NULL ) { - fwrite( buildLog, sizeof(char), length, fd1 ); - fclose( fd1 ); - } - - free( buildLog ); - return(0); - } - - strcpy( gpu_env.kernelSrcFile[idx], filename ); + source = source_str; + source_size[0] = strlen( source ); + + if ((binaryExisted = hb_binary_generated(gpu_info->context, filename, &fd)) == 1) + { + status = clGetContextInfo(gpu_info->context, + CL_CONTEXT_NUM_DEVICES, + sizeof(numDevices), + &numDevices, + NULL); + if (status != CL_SUCCESS) + { + hb_log("OpenCL: Unable to get the number of devices in context."); + return 0; + } + + devices = (cl_device_id*)malloc(sizeof(cl_device_id) * numDevices); + if (devices == NULL) + return 0; + + length = 0; + b_error = 0; + b_error |= fseek(fd, 0, SEEK_END) < 0; + b_error |= (length = ftell(fd)) <= 0; + b_error |= fseek(fd, 0, SEEK_SET) < 0; + if (b_error) + return 0; + + binary = (char*)calloc(length + 2, sizeof(char)); + if (binary == NULL) + return 0; + + b_error |= fread(binary, 1, length, fd) != length; +#if 0 // this doesn't work under OS X and/or with some non-AMD GPUs + if (binary[length-1] != '\n') + binary[length++] = '\n'; +#endif + + if (b_error) + return 0; + + /* grab the handles to all of the devices in the context. */ + status = clGetContextInfo(gpu_info->context, + CL_CONTEXT_DEVICES, + sizeof(cl_device_id) * numDevices, + devices, + NULL); + + gpu_info->programs[idx] = clCreateProgramWithBinary(gpu_info->context, + numDevices, + devices, + &length, + (const unsigned char**)&binary, + &binary_status, + &status); + + fclose(fd); + free(devices); + fd = NULL; + devices = NULL; + } + else + { + /* create a CL program using the kernel source */ + gpu_info->programs[idx] = clCreateProgramWithSource( + gpu_info->context, 1, &source, source_size, &status ); + } + + if((gpu_info->programs[idx] == (cl_program)NULL) || (status != CL_SUCCESS)){ + hb_log( "OpenCL: Unable to get list of devices in context." ); + return(0); + } + + /* create a cl program executable for all the devices specified */ + if( !gpu_info->isUserCreated ) + { + status = clBuildProgram( gpu_info->programs[idx], 1, gpu_info->devices, + build_option, NULL, NULL ); + } + else + { + status = clBuildProgram( gpu_info->programs[idx], 1, &(gpu_info->dev), + build_option, NULL, NULL ); + } + + if( status != CL_SUCCESS ) + { + if( !gpu_info->isUserCreated ) + { + status = clGetProgramBuildInfo( gpu_info->programs[idx], + gpu_info->devices[0], + CL_PROGRAM_BUILD_LOG, 0, NULL, &length ); + } + else + { + status = clGetProgramBuildInfo( gpu_info->programs[idx], + gpu_info->dev, + CL_PROGRAM_BUILD_LOG, 0, NULL, &length ); + } + + if( status != CL_SUCCESS ) + { + hb_log( "OpenCL: Unable to get GPU build information." ); + return(0); + } + + buildLog = (char*)malloc( length ); + if( buildLog == (char*)NULL ) + { + return(0); + } + + if( !gpu_info->isUserCreated ) + { + status = clGetProgramBuildInfo( gpu_info->programs[idx], gpu_info->devices[0], + CL_PROGRAM_BUILD_LOG, length, buildLog, &length ); + } + else + { + status = clGetProgramBuildInfo( gpu_info->programs[idx], gpu_info->dev, + CL_PROGRAM_BUILD_LOG, length, buildLog, &length ); + } + + fd1 = fopen( "kernel-build.log", "w+" ); + if( fd1 != NULL ) { + fwrite( buildLog, sizeof(char), length, fd1 ); + fclose( fd1 ); + } + + free( buildLog ); + return(0); + } + + strcpy( gpu_env.kernelSrcFile[idx], filename ); if (binaryExisted != 1) { @@ -956,94 +956,94 @@ int hb_compile_kernel_file( const char *filename, GPUEnv *gpu_info, } gpu_info->file_count += 1; - - return(1); -} - - -/** - * hb_get_kernel_env_and_func - * @param kernel_name - - * @param env - - * @param function - - */ -int hb_get_kernel_env_and_func( const char *kernel_name, - KernelEnv *env, - cl_kernel_function *function ) -{ - int i; - for( i = 0; i < gpu_env.kernel_count; i++ ) - { - if( strcasecmp( kernel_name, gpu_env.kernel_names[i] ) == 0 ) - { - env->context = gpu_env.context; - env->command_queue = gpu_env.command_queue; - env->program = gpu_env.programs[0]; - env->kernel = gpu_env.kernels[i]; - env->isAMD = ( gpu_env.vendor == AMD ) ? 1 : 0; - *function = gpu_env.kernel_functions[i]; - return(1); - } - } - return(0); -} - -/** - * hb_get_kernel_env_and_func - * @param kernel_name - - * @param userdata - - */ -int hb_run_kernel( const char *kernel_name, void **userdata ) -{ - KernelEnv env; - cl_kernel_function function; - int status; - memset( &env, 0, sizeof(KernelEnv)); - status = hb_get_kernel_env_and_func( kernel_name, &env, &function ); - strcpy( env.kernel_name, kernel_name ); - if( status == 1 ) - { - return(function( userdata, &env )); - } - - return(0); -} - -/** - * hb_init_opencl_run_env - * @param argc - - * @param argv - - * @param build_option - - */ -int hb_init_opencl_run_env( int argc, char **argv, const char *build_option ) -{ - int status = 0; - if( MAX_CLKERNEL_NUM <= 0 ) - { - return 1; - } - - if((argc > MAX_CLFILE_NUM) || (argc<0)) - { - return 1; - } - - if( !isInited ) - { - hb_regist_opencl_kernel(); - - /*initialize devices, context, comand_queue*/ - status = hb_init_opencl_env( &gpu_env ); - if( status ) - return(1); - - /*initialize program, kernel_name, kernel_count*/ - status = hb_compile_kernel_file("hb-opencl-kernels.cl", - &gpu_env, 0, build_option); - - if( status == 0 || gpu_env.kernel_count == 0 ) - { - return(1); + + return(1); +} + + +/** + * hb_get_kernel_env_and_func + * @param kernel_name - + * @param env - + * @param function - + */ +int hb_get_kernel_env_and_func( const char *kernel_name, + KernelEnv *env, + cl_kernel_function *function ) +{ + int i; + for( i = 0; i < gpu_env.kernel_count; i++ ) + { + if( strcasecmp( kernel_name, gpu_env.kernel_names[i] ) == 0 ) + { + env->context = gpu_env.context; + env->command_queue = gpu_env.command_queue; + env->program = gpu_env.programs[0]; + env->kernel = gpu_env.kernels[i]; + env->isAMD = ( gpu_env.vendor == AMD ) ? 1 : 0; + *function = gpu_env.kernel_functions[i]; + return(1); + } + } + return(0); +} + +/** + * hb_get_kernel_env_and_func + * @param kernel_name - + * @param userdata - + */ +int hb_run_kernel( const char *kernel_name, void **userdata ) +{ + KernelEnv env; + cl_kernel_function function; + int status; + memset( &env, 0, sizeof(KernelEnv)); + status = hb_get_kernel_env_and_func( kernel_name, &env, &function ); + strcpy( env.kernel_name, kernel_name ); + if( status == 1 ) + { + return(function( userdata, &env )); + } + + return(0); +} + +/** + * hb_init_opencl_run_env + * @param argc - + * @param argv - + * @param build_option - + */ +int hb_init_opencl_run_env( int argc, char **argv, const char *build_option ) +{ + int status = 0; + if( MAX_CLKERNEL_NUM <= 0 ) + { + return 1; + } + + if((argc > MAX_CLFILE_NUM) || (argc<0)) + { + return 1; + } + + if( !isInited ) + { + hb_regist_opencl_kernel(); + + /*initialize devices, context, comand_queue*/ + status = hb_init_opencl_env( &gpu_env ); + if( status ) + return(1); + + /*initialize program, kernel_name, kernel_count*/ + status = hb_compile_kernel_file("hb-opencl-kernels.cl", + &gpu_env, 0, build_option); + + if( status == 0 || gpu_env.kernel_count == 0 ) + { + return(1); } @@ -1051,122 +1051,122 @@ int hb_init_opencl_run_env( int argc, char **argv, const char *build_option ) isInited = 1; } - return(0); -} - -/** - * hb_release_opencl_run_env - */ -int hb_release_opencl_run_env() -{ - return hb_release_opencl_env( &gpu_env ); -} - -/** - * hb_opencl_stats - */ -int hb_opencl_stats() -{ - return isInited; -} - -/** - * hb_get_opencl_env - */ -int hb_get_opencl_env() -{ - int i = 0; - cl_int status; - cl_uint numDevices; - cl_device_id *devices; - - /*initialize devices, context, comand_queue*/ - status = hb_init_opencl_env( &gpu_env ); - if( status ) - return(1); - status = clGetContextInfo( gpu_env.context, - CL_CONTEXT_NUM_DEVICES, - sizeof(numDevices), - &numDevices, - NULL ); - if( status != CL_SUCCESS ) - return 0; - - devices = (cl_device_id*)malloc( sizeof(cl_device_id) * numDevices ); - if( devices == NULL ) - return 0; - - /* grab the handles to all of the devices in the context. */ - status = clGetContextInfo( gpu_env.context, - CL_CONTEXT_DEVICES, - sizeof(cl_device_id) * numDevices, - devices, - NULL ); - - for (i = 0; i < numDevices; i++) - { - if (devices[i] != NULL) - { - char deviceVendor[100], deviceName[1024], driverVersion[1024]; - clGetDeviceInfo(devices[i], CL_DEVICE_VENDOR, sizeof(deviceVendor), - deviceVendor, NULL); - clGetDeviceInfo(devices[i], CL_DEVICE_NAME, sizeof(deviceName), - deviceName, NULL); - clGetDeviceInfo(devices[i], CL_DRIVER_VERSION, sizeof(driverVersion), - driverVersion, NULL); - hb_log("hb_get_opencl_env: GPU #%d, Device Vendor: %s", i + 1, deviceVendor); - hb_log("hb_get_opencl_env: GPU #%d, Device Name: %s", i + 1, deviceName); - hb_log("hb_get_opencl_env: GPU #%d, Driver Version: %s", i + 1, driverVersion); - } - } - - if( devices != NULL ) - { - free( devices ); - devices = NULL; - } - - return status; -} - -/** - * hb_create_buffer - * @param cl_inBuf - - * @param flags - - * @param size - - */ -int hb_create_buffer( cl_mem *cl_Buf, int flags, int size ) -{ - int status; - *cl_Buf = clCreateBuffer( gpu_env.context, (flags), (size), NULL, &status ); - - if( status != CL_SUCCESS ) - { - hb_log( "OpenCL: clCreateBuffer error '%d'", status ); - return 0; - } - - return 1; -} - - -/** - * hb_read_opencl_buffer - * @param cl_inBuf - - * @param outbuf - - * @param size - - */ -int hb_read_opencl_buffer( cl_mem cl_inBuf, unsigned char *outbuf, int size ) -{ - int status; - - status = clEnqueueReadBuffer( gpu_env.command_queue, cl_inBuf, CL_TRUE, 0, size, outbuf, 0, 0, 0 ); - if( status != CL_SUCCESS ) - { - hb_log( "OpenCL: av_read_opencl_buffer error '%d'", status ); - return 0; - } - + return(0); +} + +/** + * hb_release_opencl_run_env + */ +int hb_release_opencl_run_env() +{ + return hb_release_opencl_env( &gpu_env ); +} + +/** + * hb_opencl_stats + */ +int hb_opencl_stats() +{ + return isInited; +} + +/** + * hb_get_opencl_env + */ +int hb_get_opencl_env() +{ + int i = 0; + cl_int status; + cl_uint numDevices; + cl_device_id *devices; + + /*initialize devices, context, comand_queue*/ + status = hb_init_opencl_env( &gpu_env ); + if( status ) + return(1); + status = clGetContextInfo( gpu_env.context, + CL_CONTEXT_NUM_DEVICES, + sizeof(numDevices), + &numDevices, + NULL ); + if( status != CL_SUCCESS ) + return 0; + + devices = (cl_device_id*)malloc( sizeof(cl_device_id) * numDevices ); + if( devices == NULL ) + return 0; + + /* grab the handles to all of the devices in the context. */ + status = clGetContextInfo( gpu_env.context, + CL_CONTEXT_DEVICES, + sizeof(cl_device_id) * numDevices, + devices, + NULL ); + + for (i = 0; i < numDevices; i++) + { + if (devices[i] != NULL) + { + char deviceVendor[100], deviceName[1024], driverVersion[1024]; + clGetDeviceInfo(devices[i], CL_DEVICE_VENDOR, sizeof(deviceVendor), + deviceVendor, NULL); + clGetDeviceInfo(devices[i], CL_DEVICE_NAME, sizeof(deviceName), + deviceName, NULL); + clGetDeviceInfo(devices[i], CL_DRIVER_VERSION, sizeof(driverVersion), + driverVersion, NULL); + hb_log("hb_get_opencl_env: GPU #%d, Device Vendor: %s", i + 1, deviceVendor); + hb_log("hb_get_opencl_env: GPU #%d, Device Name: %s", i + 1, deviceName); + hb_log("hb_get_opencl_env: GPU #%d, Driver Version: %s", i + 1, driverVersion); + } + } + + if( devices != NULL ) + { + free( devices ); + devices = NULL; + } + + return status; +} + +/** + * hb_create_buffer + * @param cl_inBuf - + * @param flags - + * @param size - + */ +int hb_create_buffer( cl_mem *cl_Buf, int flags, int size ) +{ + int status; + *cl_Buf = clCreateBuffer( gpu_env.context, (flags), (size), NULL, &status ); + + if( status != CL_SUCCESS ) + { + hb_log( "OpenCL: clCreateBuffer error '%d'", status ); + return 0; + } + + return 1; +} + + +/** + * hb_read_opencl_buffer + * @param cl_inBuf - + * @param outbuf - + * @param size - + */ +int hb_read_opencl_buffer( cl_mem cl_inBuf, unsigned char *outbuf, int size ) +{ + int status; + + status = clEnqueueReadBuffer( gpu_env.command_queue, cl_inBuf, CL_TRUE, 0, size, outbuf, 0, 0, 0 ); + if( status != CL_SUCCESS ) + { + hb_log( "OpenCL: av_read_opencl_buffer error '%d'", status ); + return 0; + } + return 1; } @@ -1207,55 +1207,55 @@ int hb_use_buffers() int hb_copy_buffer(cl_mem src_buffer,cl_mem dst_buffer,size_t src_offset,size_t dst_offset,size_t cb) { int status = clEnqueueCopyBuffer(gpu_env.command_queue, - src_buffer, - dst_buffer, - src_offset, dst_offset, cb, - 0, 0, 0); - if( status != CL_SUCCESS ) - { - av_log(NULL,AV_LOG_ERROR, "hb_read_opencl_buffer error '%d'\n", status ); - return 0; - } - return 1; -} - -int hb_read_opencl_frame_buffer(cl_mem cl_inBuf,unsigned char *Ybuf,unsigned char *Ubuf,unsigned char *Vbuf,int linesize0,int linesize1,int linesize2,int height) -{ - - int chrH = -(-height >> 1); - unsigned char *temp = (unsigned char *)av_malloc(sizeof(uint8_t) * (linesize0 * height + linesize1 * chrH * 2)); - if(hb_read_opencl_buffer(cl_inBuf,temp,sizeof(uint8_t)*(linesize0 + linesize1)*height)) - { - memcpy(Ybuf,temp,linesize0 * height); - memcpy(Ubuf,temp + linesize0 * height,linesize1 *chrH); - memcpy(Vbuf,temp + linesize0 * height + linesize1 * chrH,linesize2 * chrH); - - } - av_free(temp); - - return 1; -} - -int hb_write_opencl_frame_buffer(cl_mem cl_inBuf,unsigned char *Ybuf,unsigned char *Ubuf,unsigned char *Vbuf,int linesize0,int linesize1,int linesize2,int height,int offset) -{ - int status; - void *mapped = clEnqueueMapBuffer( gpu_env.command_queue, cl_inBuf, CL_TRUE,CL_MAP_WRITE, 0, sizeof(uint8_t) * (linesize0 + linesize1)*height + offset, 0, NULL, NULL, NULL ); - uint8_t *temp = (uint8_t *)mapped; - temp += offset; - memcpy(temp,Ybuf,sizeof(uint8_t) * linesize0 * height); - memcpy(temp + sizeof(uint8_t) * linesize0 * height,Ubuf,sizeof(uint8_t) * linesize1 * height/2); - memcpy(temp + sizeof(uint8_t) * (linesize0 * height + linesize1 * height/2),Vbuf,sizeof(uint8_t) * linesize2 * height/2); - clEnqueueUnmapMemObject(gpu_env.command_queue, cl_inBuf, mapped, 0, NULL, NULL ); - return 1; -} - -cl_command_queue hb_get_command_queue() -{ - return gpu_env.command_queue; -} - -cl_context hb_get_context() -{ - return gpu_env.context; -} -#endif + src_buffer, + dst_buffer, + src_offset, dst_offset, cb, + 0, 0, 0); + if( status != CL_SUCCESS ) + { + av_log(NULL,AV_LOG_ERROR, "hb_read_opencl_buffer error '%d'\n", status ); + return 0; + } + return 1; +} + +int hb_read_opencl_frame_buffer(cl_mem cl_inBuf,unsigned char *Ybuf,unsigned char *Ubuf,unsigned char *Vbuf,int linesize0,int linesize1,int linesize2,int height) +{ + + int chrH = -(-height >> 1); + unsigned char *temp = (unsigned char *)av_malloc(sizeof(uint8_t) * (linesize0 * height + linesize1 * chrH * 2)); + if(hb_read_opencl_buffer(cl_inBuf,temp,sizeof(uint8_t)*(linesize0 + linesize1)*height)) + { + memcpy(Ybuf,temp,linesize0 * height); + memcpy(Ubuf,temp + linesize0 * height,linesize1 *chrH); + memcpy(Vbuf,temp + linesize0 * height + linesize1 * chrH,linesize2 * chrH); + + } + av_free(temp); + + return 1; +} + +int hb_write_opencl_frame_buffer(cl_mem cl_inBuf,unsigned char *Ybuf,unsigned char *Ubuf,unsigned char *Vbuf,int linesize0,int linesize1,int linesize2,int height,int offset) +{ + int status; + void *mapped = clEnqueueMapBuffer( gpu_env.command_queue, cl_inBuf, CL_TRUE,CL_MAP_WRITE, 0, sizeof(uint8_t) * (linesize0 + linesize1)*height + offset, 0, NULL, NULL, NULL ); + uint8_t *temp = (uint8_t *)mapped; + temp += offset; + memcpy(temp,Ybuf,sizeof(uint8_t) * linesize0 * height); + memcpy(temp + sizeof(uint8_t) * linesize0 * height,Ubuf,sizeof(uint8_t) * linesize1 * height/2); + memcpy(temp + sizeof(uint8_t) * (linesize0 * height + linesize1 * height/2),Vbuf,sizeof(uint8_t) * linesize2 * height/2); + clEnqueueUnmapMemObject(gpu_env.command_queue, cl_inBuf, mapped, 0, NULL, NULL ); + return 1; +} + +cl_command_queue hb_get_command_queue() +{ + return gpu_env.command_queue; +} + +cl_context hb_get_context() +{ + return gpu_env.context; +} +#endif diff --git a/libhb/openclwrapper.h b/libhb/openclwrapper.h index 8436c3a9c..c7606afc0 100644 --- a/libhb/openclwrapper.h +++ b/libhb/openclwrapper.h @@ -1,72 +1,72 @@ -/* openclwrapper.h - - Copyright (c) 2003-2012 HandBrake Team - This file is part of the HandBrake source code - Homepage: . - It may be used under the terms of the GNU General Public License v2. - For full terms see the file COPYING file or visit http://www.gnu.org/licenses/gpl-2.0.html - - Authors: Peng Gao - Li Cao - - - */ -#ifndef __OPENCL_WRAPPER_H -#define __OPENCL_WRAPPER_H -#ifdef USE_OPENCL -#include "common.h" - -//support AMD opencl -#define CL_QUEUE_THREAD_HANDLE_AMD 0x403E -#define CL_MAP_WRITE_INVALIDATE_REGION (1 << 2) - -typedef struct _KernelEnv -{ - cl_context context; - cl_command_queue command_queue; - cl_program program; - cl_kernel kernel; - char kernel_name[150]; - int isAMD; -}KernelEnv; - -typedef struct _OpenCLEnv -{ - cl_platform_id platform; - cl_context context; - cl_device_id devices; - cl_command_queue command_queue; -}OpenCLEnv; - - -//user defined, this is function wrapper which is used to set the input parameters , -//luanch kernel and copy data from GPU to CPU or CPU to GPU. -typedef int (*cl_kernel_function)( void **userdata, KernelEnv *kenv ); - -// registe a wapper for running the kernel specified by the kernel name -int hb_register_kernel_wrapper( const char *kernel_name, cl_kernel_function function ); - -// run kernel , user call this function to luanch kernel. -// kernel_name: this kernel name is used to find the kernel in opencl runtime environment -// userdata: this userdata is the all parameters for running the kernel specified by kernel name -int hb_run_kernel( const char *kernel_name, void **userdata ); - -// init the run time environment , this function must be called befor calling any function related to opencl -// the argc must be set zero , argv must be set NULL, build_option is the options for build the kernel. -int hb_init_opencl_run_env( int argc, char **argv, const char *build_option ); - -//relase all resource about the opencl , this function must be called after calling any functions related to opencl -int hb_release_opencl_run_env(); - -// get the opencl status , 0: not init ; 1, inited; this function is used the check whether or not the opencl run time has been created -int hb_opencl_stats(); - -// update opencl run time environments , such as commandqueue , platforme, context. program -int hb_init_opencl_attr( OpenCLEnv * env ); - -// create kernel object by a kernel name on the specified opencl run time indicated by env parameter -int hb_create_kernel( char * kernelname, KernelEnv * env ); - +/* openclwrapper.h + + Copyright (c) 2003-2012 HandBrake Team + This file is part of the HandBrake source code + Homepage: . + It may be used under the terms of the GNU General Public License v2. + For full terms see the file COPYING file or visit http://www.gnu.org/licenses/gpl-2.0.html + + Authors: Peng Gao + Li Cao + + + */ +#ifndef __OPENCL_WRAPPER_H +#define __OPENCL_WRAPPER_H +#ifdef USE_OPENCL +#include "common.h" + +//support AMD opencl +#define CL_QUEUE_THREAD_HANDLE_AMD 0x403E +#define CL_MAP_WRITE_INVALIDATE_REGION (1 << 2) + +typedef struct _KernelEnv +{ + cl_context context; + cl_command_queue command_queue; + cl_program program; + cl_kernel kernel; + char kernel_name[150]; + int isAMD; +}KernelEnv; + +typedef struct _OpenCLEnv +{ + cl_platform_id platform; + cl_context context; + cl_device_id devices; + cl_command_queue command_queue; +}OpenCLEnv; + + +//user defined, this is function wrapper which is used to set the input parameters , +//luanch kernel and copy data from GPU to CPU or CPU to GPU. +typedef int (*cl_kernel_function)( void **userdata, KernelEnv *kenv ); + +// registe a wapper for running the kernel specified by the kernel name +int hb_register_kernel_wrapper( const char *kernel_name, cl_kernel_function function ); + +// run kernel , user call this function to luanch kernel. +// kernel_name: this kernel name is used to find the kernel in opencl runtime environment +// userdata: this userdata is the all parameters for running the kernel specified by kernel name +int hb_run_kernel( const char *kernel_name, void **userdata ); + +// init the run time environment , this function must be called befor calling any function related to opencl +// the argc must be set zero , argv must be set NULL, build_option is the options for build the kernel. +int hb_init_opencl_run_env( int argc, char **argv, const char *build_option ); + +//relase all resource about the opencl , this function must be called after calling any functions related to opencl +int hb_release_opencl_run_env(); + +// get the opencl status , 0: not init ; 1, inited; this function is used the check whether or not the opencl run time has been created +int hb_opencl_stats(); + +// update opencl run time environments , such as commandqueue , platforme, context. program +int hb_init_opencl_attr( OpenCLEnv * env ); + +// create kernel object by a kernel name on the specified opencl run time indicated by env parameter +int hb_create_kernel( char * kernelname, KernelEnv * env ); + // release kernel object which is generated by calling the hb_create_kernel api int hb_release_kernel( KernelEnv * env );