From 449984e8b846cda8c18e7daec813dbbd83c6760e Mon Sep 17 00:00:00 2001 From: sr55 Date: Mon, 25 Feb 2013 21:16:26 +0000 Subject: [PATCH] OpenCL: 1. Tabs to Spaces. 2. Couple of Typos git-svn-id: svn://svn.handbrake.fr/HandBrake/branches/opencl@5270 b64f7644-9d1e-0410-96f1-a4d463321fa5 --- libhb/oclnv12toyuv.c | 7 +- libhb/oclscale.c | 4 +- libhb/openclkernels.h | 32 ++-- libhb/openclwrapper.c | 132 ++++++++--------- libhb/scale.c | 116 +++++++-------- libhb/scale.h | 2 +- libhb/scale_kernel.c | 330 +++++++++++++++++++++--------------------- libhb/vadxva2.c | 26 ++-- 8 files changed, 324 insertions(+), 325 deletions(-) diff --git a/libhb/oclnv12toyuv.c b/libhb/oclnv12toyuv.c index 6e7a60148..138a345b5 100644 --- a/libhb/oclnv12toyuv.c +++ b/libhb/oclnv12toyuv.c @@ -125,7 +125,7 @@ static int hb_nv12toyuv( void **userdata, KernelEnv *kenv ) if( kenv->isAMD ) { void *data = clEnqueueMapBuffer( kenv->command_queue, dxva2->cl_mem_nv12, CL_MAP_WRITE_INVALIDATE_REGION, CL_TRUE, 0, in_bytes, 0, NULL, NULL, NULL ); - //memcpy( data, bufi, in_bytes ); + for ( i = 0; i < dxva2->height; i++ ) { memcpy( data+i*dxva2->width, bufi1+i*p, dxva2->width ); @@ -191,7 +191,7 @@ static int hb_nv12toyuv_reg_kernel( void ) int st = hb_register_kernel_wrapper( "nv12toyuv", hb_nv12toyuv ); if( !st ) { - hb_log( "register kernel[%s] faild", "nv12toyuv" ); + hb_log( "register kernel[%s] failed", "nv12toyuv" ); return -1; } return 0; @@ -201,7 +201,6 @@ static int hb_nv12toyuv_reg_kernel( void ) * bufi is input frame of nv12, w is input frame width, h is input frame height */ int hb_ocl_nv12toyuv( uint8_t *bufi[], int p, int w, int h, int *crop, hb_va_dxva2_t *dxva2 ) -//int hb_ocl_nv12toyuv( uint8_t *bufi, int w, int h, int *crop, hb_va_dxva2_t *dxva2 ) { void *userdata[7]; userdata[0] = (void*)w; @@ -215,7 +214,7 @@ int hb_ocl_nv12toyuv( uint8_t *bufi[], int p, int w, int h, int *crop, hb_va_dxv return -1; if( hb_run_kernel( "nv12toyuv", userdata ) ) { - hb_log( "run kernel[nv12toyuv] faild" ); + hb_log( "run kernel[nv12toyuv] failed" ); return -1; } return 0; diff --git a/libhb/oclscale.c b/libhb/oclscale.c index 3cace8559..11f718449 100644 --- a/libhb/oclscale.c +++ b/libhb/oclscale.c @@ -273,7 +273,7 @@ int hb_ocl_scale( cl_mem in_buf, uint8_t *in_data, uint8_t *out_data, int in_w, int st = hb_register_kernel_wrapper( "frame_h_scale", hb_ocl_scale_func ); if( !st ) { - hb_log( "register kernel[%s] faild", "frame_h_scale" ); + hb_log( "register kernel[%s] failed", "frame_h_scale" ); return 0; } init_flag++; @@ -295,7 +295,7 @@ int hb_ocl_scale( cl_mem in_buf, uint8_t *in_data, uint8_t *out_data, int in_w, data[5] = (void*)out_h; data[6] = os; if( !hb_run_kernel( "frame_h_scale", data ) ) - hb_log( "run kernel[%s] faild", "frame_scale" ); + hb_log( "run kernel[%s] failed", "frame_scale" ); return 0; } #endif diff --git a/libhb/openclkernels.h b/libhb/openclkernels.h index 093f23a6b..c8399972b 100644 --- a/libhb/openclkernels.h +++ b/libhb/openclkernels.h @@ -326,16 +326,16 @@ char *kernel_src_vscalealldither = KERNEL( int srcStride, int srcChrStride) { - const unsigned char hb_dither_8x8_128[8][8] = { - { 36, 68, 60, 92, 34, 66, 58, 90, }, - { 100, 4, 124, 28, 98, 2, 122, 26, }, - { 52, 84, 44, 76, 50, 82, 42, 74, }, - { 116, 20, 108, 12, 114, 18, 106, 10, }, - { 32, 64, 56, 88, 38, 70, 62, 94, }, - { 96, 0, 120, 24, 102, 6, 126, 30, }, - { 48, 80, 40, 72, 54, 86, 46, 78, }, - { 112, 16, 104, 8, 118, 22, 110, 14, }, - }; + const unsigned char hb_dither_8x8_128[8][8] = { + { 36, 68, 60, 92, 34, 66, 58, 90, }, + { 100, 4, 124, 28, 98, 2, 122, 26, }, + { 52, 84, 44, 76, 50, 82, 42, 74, }, + { 116, 20, 108, 12, 114, 18, 106, 10, }, + { 32, 64, 56, 88, 38, 70, 62, 94, }, + { 96, 0, 120, 24, 102, 6, 126, 30, }, + { 48, 80, 40, 72, 54, 86, 46, 78, }, + { 112, 16, 104, 8, 118, 22, 110, 14, }, + }; int w = get_global_id(0); @@ -414,9 +414,9 @@ char *kernel_src_vscaleallnodither = KERNEL( int srcStride, int srcChrStride) { - const unsigned char hb_sws_pb_64[8] = { - 64, 64, 64, 64, 64, 64, 64, 64 - }; + const unsigned char hb_sws_pb_64[8] = { + 64, 64, 64, 64, 64, 64, 64, 64 + }; int w = get_global_id(0); int h = get_global_id(1); @@ -492,9 +492,9 @@ char *kernel_src_vscalefast = KERNEL( int srcStride, int srcChrStride) { - const unsigned char hb_sws_pb_64[8] = { - 64, 64, 64, 64, 64, 64, 64, 64 - }; + const unsigned char hb_sws_pb_64[8] = { + 64, 64, 64, 64, 64, 64, 64, 64 + }; int w = get_global_id(0); int h = get_global_id(1); diff --git a/libhb/openclwrapper.c b/libhb/openclwrapper.c index b1d6c172d..4db1e8da7 100644 --- a/libhb/openclwrapper.c +++ b/libhb/openclwrapper.c @@ -119,24 +119,24 @@ int hb_confirm_gpu_type() pbuff, NULL); if (status) - continue; + continue; status = clGetDeviceIDs(platforms[i], CL_DEVICE_TYPE_GPU , 0 , NULL , &numDevices); - cl_device_id *devices = (cl_device_id *)malloc(numDevices * sizeof(cl_device_id)); - status = clGetDeviceIDs(platforms[i], CL_DEVICE_TYPE_GPU, numDevices, devices, NULL); - for (j = 0; j < numDevices; j++) - { - char dbuff[100]; - status = clGetDeviceInfo(devices[j], CL_DEVICE_VENDOR, sizeof(dbuff), dbuff, NULL); - if(!strcmp(dbuff, "Advanced Micro Devices, Inc.") || !strcmp(dbuff, "NVIDIA Corporation")) - { - return 0; - } - } + cl_device_id *devices = (cl_device_id *)malloc(numDevices * sizeof(cl_device_id)); + status = clGetDeviceIDs(platforms[i], CL_DEVICE_TYPE_GPU, numDevices, devices, NULL); + for (j = 0; j < numDevices; j++) + { + char dbuff[100]; + status = clGetDeviceInfo(devices[j], CL_DEVICE_VENDOR, sizeof(dbuff), dbuff, NULL); + if(!strcmp(dbuff, "Advanced Micro Devices, Inc.") || !strcmp(dbuff, "NVIDIA Corporation")) + { + return 0; + } + } if (status != CL_SUCCESS) continue; @@ -160,7 +160,7 @@ int hb_regist_opencl_kernel() ADD_KERNEL_CFG( 0, "frame_h_scale", NULL ) ADD_KERNEL_CFG( 1, "frame_v_scale", NULL ) ADD_KERNEL_CFG( 2, "nv12toyuv", NULL ) - ADD_KERNEL_CFG( 3, "scale_opencl", NULL ) + ADD_KERNEL_CFG( 3, "scale_opencl", NULL ) return 0; } @@ -221,15 +221,15 @@ int hb_binary_generated( cl_context context, const char * cl_file_name, FILE ** &numDevices, NULL ); if( status != CL_SUCCESS ) - { - hb_log( "Notice: Get context info failed" ); + { + hb_log( "Notice: Get context info failed" ); return 0; } devices = (cl_device_id*)malloc( sizeof(cl_device_id) * numDevices ); if( devices == NULL ) - { - hb_log( "Notice: No device found" ); + { + hb_log( "Notice: No device found" ); return 0; } @@ -304,14 +304,14 @@ int hb_generat_bin_from_kernel_source( cl_program program, const char * cl_file_ &numDevices, NULL ); if( status != CL_SUCCESS ) - { - hb_log( "Notice: Get program info failed, when generate binary file from kernel source" ); + { + hb_log( "Notice: Get program info failed, when generate binary file from kernel source" ); return 0; } devices = (cl_device_id*)malloc( sizeof(cl_device_id) * numDevices ); if( devices == NULL ) - { - hb_log( "Notice: No device found, when generate binary file from kernel source" ); + { + hb_log( "Notice: No device found, when generate binary file from kernel source" ); return 0; } /* grab the handles to all of the devices in the program. */ @@ -321,8 +321,8 @@ int hb_generat_bin_from_kernel_source( cl_program program, const char * cl_file_ devices, NULL ); if( status != CL_SUCCESS ) - { - hb_log( "Notice: Get program info failed, when generate binary file from kernel source" ); + { + hb_log( "Notice: Get program info failed, when generate binary file from kernel source" ); return 0; } /* figure out the sizes of each of the binaries. */ @@ -333,15 +333,15 @@ int hb_generat_bin_from_kernel_source( cl_program program, const char * cl_file_ sizeof(size_t) * numDevices, binarySizes, NULL ); if( status != CL_SUCCESS ) - { - hb_log( "Notice: Get program info failed, when generate binary file from kernel source" ); + { + hb_log( "Notice: Get program info failed, when generate binary file from kernel source" ); return 0; } /* copy over all of the generated binaries. */ binaries = (char**)malloc( sizeof(char *) * numDevices ); if( binaries == NULL ) - { - hb_log( "Notice: malloc for binaries failed, when generate binary file from kernel source" ); + { + hb_log( "Notice: malloc for binaries failed, when generate binary file from kernel source" ); return 0; } @@ -351,10 +351,10 @@ int hb_generat_bin_from_kernel_source( cl_program program, const char * cl_file_ { binaries[i] = (char*)malloc( sizeof(char) * binarySizes[i] ); if( binaries[i] == NULL ) - { - hb_log( "Notice: malloc for binary[%d] failed, when generate binary file from kernel source", i ); + { + hb_log( "Notice: malloc for binary[%d] failed, when generate binary file from kernel source", i ); return 0; - } + } } else { @@ -368,8 +368,8 @@ int hb_generat_bin_from_kernel_source( cl_program program, const char * cl_file_ binaries, NULL ); if( status != CL_SUCCESS ) - { - hb_log( "Notice: Get program info failed, when generate binary file from kernel source" ); + { + hb_log( "Notice: Get program info failed, when generate binary file from kernel source" ); return 0; } /* dump out each binary into its own separate file. */ @@ -482,7 +482,7 @@ int hb_init_opencl_env( GPUEnv *gpu_info ) status = clGetPlatformIDs( 0, NULL, &numPlatforms ); if( status != CL_SUCCESS ) { - hb_log( "Notice: OpenCL device platform not found." ); + hb_log( "Notice: OpenCL device platform not found." ); return(1); } gpu_info->platform = NULL; @@ -498,7 +498,7 @@ int hb_init_opencl_env( GPUEnv *gpu_info ) if( status != CL_SUCCESS ) { - hb_log( "Notice: Specific opencl platform not found." ); + hb_log( "Notice: Specific opencl platform not found." ); return(1); } @@ -528,9 +528,9 @@ int hb_init_opencl_env( GPUEnv *gpu_info ) &numDevices ); if( status != CL_SUCCESS ) - { + { continue; - } + } if( numDevices ) break; @@ -540,7 +540,7 @@ int hb_init_opencl_env( GPUEnv *gpu_info ) } if( NULL == gpu_info->platform ) { - hb_log( "Notice: No OpenCL-compatible GPU found." ); + hb_log( "Notice: No OpenCL-compatible GPU found." ); return(1); } if( status != CL_SUCCESS ) @@ -573,7 +573,7 @@ int hb_init_opencl_env( GPUEnv *gpu_info ) } if((gpu_info->context == (cl_context)NULL) || (status != CL_SUCCESS)) { - hb_log( "Notice: Unable to create opencl context." ); + hb_log( "Notice: Unable to create opencl context." ); return(1); } /* Detect OpenCL devices. */ @@ -582,7 +582,7 @@ int hb_init_opencl_env( GPUEnv *gpu_info ) 0, NULL, &length ); if((status != CL_SUCCESS) || (length == 0)) { - hb_log( "Notice: Unable to get the list of devices in context." ); + hb_log( "Notice: Unable to get the list of devices in context." ); return(1); } /* Now allocate memory for device list based on the size we got earlier */ @@ -596,7 +596,7 @@ int hb_init_opencl_env( GPUEnv *gpu_info ) gpu_info->devices, NULL ); if( status != CL_SUCCESS ) { - hb_log( "Notice: Unable to get the device list data in context." ); + hb_log( "Notice: Unable to get the device list data in context." ); return(1); } @@ -605,10 +605,10 @@ int hb_init_opencl_env( GPUEnv *gpu_info ) gpu_info->devices[0], 0, &status ); if( status != CL_SUCCESS ) - { - hb_log( "Notice: Unable to create opencl command queue." ); + { + hb_log( "Notice: Unable to create opencl command queue." ); return(1); - } + } } if( clGetCommandQueueInfo( gpu_info->command_queue, @@ -710,7 +710,7 @@ int hb_compile_kernel_file( const char *filename, GPUEnv *gpu_info, if( status == 0 ) return(0); #else - int kernel_src_size = strlen( kernel_src_hscale ) + strlen( kernel_src_vscale ) + strlen( kernel_src_nvtoyuv ) + strlen( kernel_src_hscaleall ) + strlen( kernel_src_hscalefast ) + strlen( kernel_src_vscalealldither ) + strlen( kernel_src_vscaleallnodither ) + strlen( kernel_src_vscalefast ); + int kernel_src_size = strlen( kernel_src_hscale ) + strlen( kernel_src_vscale ) + strlen( kernel_src_nvtoyuv ) + strlen( kernel_src_hscaleall ) + strlen( kernel_src_hscalefast ) + strlen( kernel_src_vscalealldither ) + strlen( kernel_src_vscaleallnodither ) + strlen( kernel_src_vscalefast ); source_str = (char*)malloc( kernel_src_size + 2 ); strcpy( source_str, kernel_src_hscale ); strcat( source_str, kernel_src_vscale ); @@ -735,9 +735,9 @@ int hb_compile_kernel_file( const char *filename, GPUEnv *gpu_info, NULL ); if( status != CL_SUCCESS ) { - hb_log( "Notice: Unable to get the number of devices in context." ); + hb_log( "Notice: Unable to get the number of devices in context." ); return 0; - } + } devices = (cl_device_id*)malloc( sizeof(cl_device_id) * numDevices ); if( devices == NULL ) @@ -787,9 +787,9 @@ int hb_compile_kernel_file( const char *filename, GPUEnv *gpu_info, gpu_info->context, 1, &source, source_size, &status ); } if((gpu_info->programs[idx] == (cl_program)NULL) || (status != CL_SUCCESS)){ - hb_log( "Notice: Unable to get list of devices in context." ); + hb_log( "Notice: Unable to get list of devices in context." ); return(0); - } + } /* create a cl program executable for all the devices specified */ if( !gpu_info->isUserCreated ) @@ -812,7 +812,7 @@ int hb_compile_kernel_file( const char *filename, GPUEnv *gpu_info, if( status != CL_SUCCESS ) { - hb_log( "Notice: Unable to get GPU build information." ); + hb_log( "Notice: Unable to get GPU build information." ); return(0); } buildLog = (char*)malloc( length ); @@ -937,7 +937,7 @@ int hb_get_opencl_env() cl_int status; size_t numDevices; cl_device_id *devices; - /*initialize devices, context, comand_queue*/ + /*initialize devices, context, comand_queue*/ status = hb_init_opencl_env( &gpu_env ); if( status ) return(1); @@ -970,7 +970,7 @@ int hb_get_opencl_env() deviceName, NULL ); hb_log( "GPU Device Name: %s", deviceName ); - char driverVersion[1024]; + char driverVersion[1024]; status = clGetDeviceInfo( devices[i], CL_DRIVER_VERSION, sizeof(deviceName), @@ -990,27 +990,27 @@ int hb_get_opencl_env() int hb_create_buffer(cl_mem *cl_Buf,int flags,int size) { - int status; - *cl_Buf = clCreateBuffer( gpu_env.context, (flags), (size), NULL, &status ); - + int status; + *cl_Buf = clCreateBuffer( gpu_env.context, (flags), (size), NULL, &status ); + if( status != CL_SUCCESS ) - { - hb_log("clCreateBuffer error '%d'",status); - return 0; - } - return 1; + { + hb_log("clCreateBuffer error '%d'",status); + return 0; + } + return 1; } int hb_read_opencl_buffer(cl_mem cl_inBuf,unsigned char *outbuf,int size) { - int status; + int status; - status = clEnqueueReadBuffer(gpu_env.command_queue, cl_inBuf, CL_TRUE, 0, size, outbuf, 0, 0, 0); + status = clEnqueueReadBuffer(gpu_env.command_queue, cl_inBuf, CL_TRUE, 0, size, outbuf, 0, 0, 0); if( status != CL_SUCCESS ) - { - hb_log("av_read_opencl_buffer error '%d'",status); - return 0; - } - return 1; + { + hb_log("av_read_opencl_buffer error '%d'",status); + return 0; + } + return 1; } #endif diff --git a/libhb/scale.c b/libhb/scale.c index 8fe7dc70e..5c92105fc 100644 --- a/libhb/scale.c +++ b/libhb/scale.c @@ -90,20 +90,20 @@ static int initScaleFilter(int16_t **outFilter, int32_t **filterPos, const int64_t fone = 1LL << 54; int ret = -1; - *filterPos = (int32_t *)av_malloc((dstW + 3) * sizeof(**filterPos)); - if (*filterPos == NULL && ((dstW + 3) * sizeof(**filterPos)) != 0) { - hb_log("Cannot allocate memory."); + *filterPos = (int32_t *)av_malloc((dstW + 3) * sizeof(**filterPos)); + if (*filterPos == NULL && ((dstW + 3) * sizeof(**filterPos)) != 0) { + hb_log("Cannot allocate memory."); goto fail; - } + } if (FFABS(xInc - 0x10000) < 10) { // unscaled int i; filterSize = 1; - filter = (int64_t *)av_mallocz(dstW * sizeof(*filter) * filterSize); - if (filter == NULL && (dstW * sizeof(*filter) * filterSize) != 0) { + filter = (int64_t *)av_mallocz(dstW * sizeof(*filter) * filterSize); + if (filter == NULL && (dstW * sizeof(*filter) * filterSize) != 0) { hb_log("Cannot allocate memory."); goto fail; - } + } for (i = 0; i < dstW; i++) { @@ -114,11 +114,11 @@ static int initScaleFilter(int16_t **outFilter, int32_t **filterPos, int i; int64_t xDstInSrc; filterSize = 1; - filter = (int64_t *)av_malloc(dstW * sizeof(*filter) * filterSize); - if(filter == NULL && (dstW * sizeof(*filter) * filterSize) != 0){ - hb_log("Cannot allocate memory."); + filter = (int64_t *)av_malloc(dstW * sizeof(*filter) * filterSize); + if(filter == NULL && (dstW * sizeof(*filter) * filterSize) != 0){ + hb_log("Cannot allocate memory."); goto fail; - } + } xDstInSrc = xInc / 2 - 0x8000; for (i = 0; i < dstW; i++) { @@ -133,11 +133,11 @@ static int initScaleFilter(int16_t **outFilter, int32_t **filterPos, int i; int64_t xDstInSrc; filterSize = 2; - filter = (int64_t *)av_malloc(dstW * sizeof(*filter) * filterSize); - if(filter == NULL && (dstW * sizeof(*filter) * filterSize) != 0){ - hb_log("Cannot allocate memory."); + filter = (int64_t *)av_malloc(dstW * sizeof(*filter) * filterSize); + if(filter == NULL && (dstW * sizeof(*filter) * filterSize) != 0){ + hb_log("Cannot allocate memory."); goto fail; - } + } xDstInSrc = xInc / 2 - 0x8000; for (i = 0; i < dstW; i++) { @@ -190,10 +190,10 @@ static int initScaleFilter(int16_t **outFilter, int32_t **filterPos, filterSize = FFMAX(filterSize, 1); filter = (int64_t *)av_malloc(dstW * sizeof(*filter) * filterSize); - if(filter == NULL && (dstW * sizeof(*filter) * filterSize) != 0){ - hb_log("Cannot allocate memory."); + if(filter == NULL && (dstW * sizeof(*filter) * filterSize) != 0){ + hb_log("Cannot allocate memory."); goto fail; - } + } xDstInSrc = xInc - 0x10000; for (i = 0; i < dstW; i++) { @@ -298,12 +298,12 @@ static int initScaleFilter(int16_t **outFilter, int32_t **filterPos, if (dstFilter) filter2Size += dstFilter->length - 1; assert(filter2Size > 0); - filter2 = (int64_t *)av_mallocz(filter2Size * dstW * sizeof(*filter2)); - if(filter2 == NULL && (filter2Size * dstW * sizeof(*filter2)) != 0) - { - hb_log("Can't alloc memory."); - goto fail; - } + filter2 = (int64_t *)av_mallocz(filter2Size * dstW * sizeof(*filter2)); + if(filter2 == NULL && (filter2Size * dstW * sizeof(*filter2)) != 0) + { + hb_log("Can't alloc memory."); + goto fail; + } for (i = 0; i < dstW; i++) { int j, k; @@ -372,7 +372,7 @@ static int initScaleFilter(int16_t **outFilter, int32_t **filterPos, *outFilterSize = filterSize; if (flags & SWS_PRINT_INFO) - hb_log("SwScaler: reducing / aligning filtersize %d -> %d",filter2Size,filterSize); + hb_log("SwScaler: reducing / aligning filtersize %d -> %d",filter2Size,filterSize); for (i = 0; i < dstW; i++) { int j; @@ -416,12 +416,12 @@ static int initScaleFilter(int16_t **outFilter, int32_t **filterPos, // Note the +1 is for the MMX scaler which reads over the end // FF_ALLOCZ_OR_GOTO(NULL, *outFilter, // *outFilterSize * (dstW + 3) * sizeof(int16_t), fail); - *outFilter = (int16_t *)av_mallocz(*outFilterSize * (dstW + 3) * sizeof(int16_t)); - if( *outFilter == NULL && (*outFilterSize * (dstW + 3) * sizeof(int16_t)) != 0) - { - hb_log("Can't alloc memory"); - goto fail; - } + *outFilter = (int16_t *)av_mallocz(*outFilterSize * (dstW + 3) * sizeof(int16_t)); + if( *outFilter == NULL && (*outFilterSize * (dstW + 3) * sizeof(int16_t)) != 0) + { + hb_log("Can't alloc memory"); + goto fail; + } for (i = 0; i < dstW; i++) { int j; @@ -836,17 +836,17 @@ int scale_init_context(ScaleContext *c, ScaleFilter *srcFilter, ScaleFilter *dst enum PixelFormat srcFormat = c->srcFormat; enum PixelFormat dstFormat = c->dstFormat; - cpu_flags = 0; + cpu_flags = 0; flags = c->flags; if(srcFormat != c->srcFormat || dstFormat != c->dstFormat){ - hb_log("deprecated pixel format used, make sure you did set range correctly."); + hb_log("deprecated pixel format used, make sure you did set range correctly."); c->srcFormat = srcFormat; c->dstFormat = dstFormat; } if (srcW < 4 || srcH < 1 || dstW < 8 || dstH < 1) { - hb_log("%dx%d -> %dx%d is invalid scaling dimension.",srcW,srcH,dstW,dstH); + hb_log("%dx%d -> %dx%d is invalid scaling dimension.",srcW,srcH,dstW,dstH); return -1; } @@ -874,7 +874,7 @@ int scale_init_context(ScaleContext *c, ScaleFilter *srcFilter, ScaleFilter *dst c->chrXInc = (((int64_t)c->chrSrcW << 16) + (c->chrDstW >> 1)) / c->chrDstW; c->chrYInc = (((int64_t)c->chrSrcH << 16) + (c->chrDstH >> 1)) / c->chrDstH; - const int filterAlign = 1; + const int filterAlign = 1; if (initScaleFilter(&c->hLumFilter, &c->hLumFilterPos, &c->hLumFilterSize, c->lumXInc, @@ -882,7 +882,7 @@ int scale_init_context(ScaleContext *c, ScaleFilter *srcFilter, ScaleFilter *dst (flags & SWS_BICUBLIN) ? (flags | SWS_BICUBIC) : flags, cpu_flags, srcFilter->lumH, dstFilter->lumH, c->param) < 0) - goto fail; + goto fail; if (initScaleFilter(&c->hChrFilter, &c->hChrFilterPos, &c->hChrFilterSize, c->chrXInc, @@ -897,7 +897,7 @@ int scale_init_context(ScaleContext *c, ScaleFilter *srcFilter, ScaleFilter *dst (flags & SWS_BICUBLIN) ? (flags | SWS_BICUBIC) : flags, cpu_flags, srcFilter->lumV, dstFilter->lumV, c->param) < 0) - goto fail; + goto fail; if (initScaleFilter(&c->vChrFilter, &c->vChrFilterPos, &c->vChrFilterSize, c->chrYInc, c->chrSrcH, c->chrDstH, @@ -916,7 +916,7 @@ ScaleContext *scale_getContext(int srcW, int srcH, enum PixelFormat srcFormat, int flags, ScaleFilter *srcFilter, ScaleFilter *dstFilter, const double *param) { - ScaleContext *sc = (ScaleContext*)malloc(sizeof(ScaleContext)); + ScaleContext *sc = (ScaleContext*)malloc(sizeof(ScaleContext)); sc->flags = flags; sc->srcW = srcW; sc->srcH = srcH; @@ -926,8 +926,8 @@ ScaleContext *scale_getContext(int srcW, int srcH, enum PixelFormat srcFormat, sc->dstRange = handle_scale_jpeg(&dstFormat); sc->srcFormat = srcFormat; sc->dstFormat = dstFormat; - sc->hyscale_fast = 0; - sc->hcscale_fast = 0; + sc->hyscale_fast = 0; + sc->hcscale_fast = 0; if (param) { sc->param[0] = param[0]; @@ -948,7 +948,7 @@ int scale_opencl(ScaleContext *c, int *srcStride, int *dstStride) { - int should_dither = is9_OR_10BPS(c->srcFormat) || is16BPS(c->srcFormat); + int should_dither = is9_OR_10BPS(c->srcFormat) || is16BPS(c->srcFormat); av_scale_frame(c,cl_outbuf,cl_inbuf,srcStride,dstStride,&should_dither); @@ -957,34 +957,34 @@ int scale_opencl(ScaleContext *c, void scale_init( int width, int height, int dstwidth, int dstheight ) { - int srcW = width; - int srcH = height; - int dstW = dstwidth; - int dstH = dstheight; - enum PixelFormat inputfmt = AV_PIX_FMT_YUV420P; - enum PixelFormat outputfmt = AV_PIX_FMT_YUV420P; - int flags = SWS_BILINEAR; - - g_scale = scale_getContext(srcW,srcH,inputfmt,dstW,dstH,outputfmt,flags,NULL,NULL,NULL); + int srcW = width; + int srcH = height; + int dstW = dstwidth; + int dstH = dstheight; + enum PixelFormat inputfmt = AV_PIX_FMT_YUV420P; + enum PixelFormat outputfmt = AV_PIX_FMT_YUV420P; + int flags = SWS_BILINEAR; + + g_scale = scale_getContext(srcW,srcH,inputfmt,dstW,dstH,outputfmt,flags,NULL,NULL,NULL); } void scale_release() { - sws_freeContext( g_scale ); + sws_freeContext( g_scale ); } #ifdef USE_OPENCL int scale_run( cl_mem inbuf, cl_mem outbuf, int linesizey, int linesizeuv, int height ) { - g_scale->cl_src = inbuf; - g_scale->cl_dst = outbuf; + g_scale->cl_src = inbuf; + g_scale->cl_dst = outbuf; - int src_stride[4] = { linesizey, linesizeuv, linesizeuv, 0 }; - int dst_stride[4] = { g_scale->dstW, g_scale->chrDstW, g_scale->chrDstW, 0 }; - int ret = -1; + int src_stride[4] = { linesizey, linesizeuv, linesizeuv, 0 }; + int dst_stride[4] = { g_scale->dstW, g_scale->chrDstW, g_scale->chrDstW, 0 }; + int ret = -1; - ret = scale_opencl( g_scale, inbuf, outbuf, src_stride, dst_stride ); + ret = scale_opencl( g_scale, inbuf, outbuf, src_stride, dst_stride ); - return ret; + return ret; } #endif #endif diff --git a/libhb/scale.h b/libhb/scale.h index a28ccdf38..f80ebb15b 100644 --- a/libhb/scale.h +++ b/libhb/scale.h @@ -94,7 +94,7 @@ typedef struct ScaleContext { int dstBpc, srcBpc; int chrSrcHSubSample; ///< Binary logarithm of horizontal subsampling factor between luma/alpha and chroma planes in source image. int chrSrcVSubSample; ///< Binary logarithm of vertical subsampling factor between luma/alpha and chroma planes in source image. - int chrDstHSubSample; ///< Binary logarithm of horizontal subsampling factor between luma/alpha and chroma planes in destination image. + int chrDstHSubSample; ///< Binary logarithm of horizontal subsampling factor between luma/alpha and chroma planes in destination image. int chrDstVSubSample; ///< Binary logarithm of vertical subsampling factor between luma/alpha and chroma planes in destination image. int vChrDrop; ///< Binary logarithm of extra vertical subsampling factor in source image chroma planes specified by user. int sliceDir; ///< Direction that slices are fed to the scaler (1 = top-to-bottom, -1 = bottom-to-top). diff --git a/libhb/scale_kernel.c b/libhb/scale_kernel.c index d55d5b121..27d444116 100644 --- a/libhb/scale_kernel.c +++ b/libhb/scale_kernel.c @@ -9,9 +9,9 @@ #include "openclwrapper.h" #define OCLCHECK( method, ...) \ - status = method(__VA_ARGS__); if(status != CL_SUCCESS) { \ - hb_log(" error %s %d",# method, status); assert(0); return status; } - + status = method(__VA_ARGS__); if(status != CL_SUCCESS) { \ + hb_log(" error %s %d",# method, status); assert(0); return status; } + #define CREATEBUF( out, flags, size, ptr)\ out = clCreateBuffer( kenv->context, (flags), (size), ptr, &status );\ if( status != CL_SUCCESS ) { hb_log( "clCreateBuffer faild %d", status ); return -1; } @@ -23,177 +23,177 @@ /****************************************************************************************************************************/ static int CreateCLBuffer( ScaleContext *c, KernelEnv *kenv ) { - cl_int status; - - if(!c->hyscale_fast || !c->hcscale_fast) - { - CREATEBUF(c->cl_hLumFilter,CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, c->dstW*c->hLumFilterSize*sizeof(cl_short),c->hLumFilter); - CREATEBUF(c->cl_hLumFilterPos,CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, c->dstW*sizeof(cl_int),c->hLumFilterPos); - CREATEBUF(c->cl_hChrFilter,CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, c->chrDstW*c->hChrFilterSize*sizeof(cl_short),c->hChrFilter); - CREATEBUF(c->cl_hChrFilterPos,CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, c->chrDstW*sizeof(cl_int),c->hChrFilterPos); - } - if( c->vLumFilterSize > 1 && c->vChrFilterSize > 1 ) - { - CREATEBUF(c->cl_vLumFilter,CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,c->dstH*c->vLumFilterSize*sizeof(cl_short),c->vLumFilter); - CREATEBUF(c->cl_vChrFilter,CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,c->chrDstH*c->vChrFilterSize*sizeof(cl_short),c->vChrFilter); - } - CREATEBUF(c->cl_vLumFilterPos,CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,c->dstH*sizeof(cl_int),c->vLumFilterPos); - CREATEBUF(c->cl_vChrFilterPos,CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,c->chrDstH*sizeof(cl_int),c->vChrFilterPos); - - return 1; + cl_int status; + + if(!c->hyscale_fast || !c->hcscale_fast) + { + CREATEBUF(c->cl_hLumFilter,CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, c->dstW*c->hLumFilterSize*sizeof(cl_short),c->hLumFilter); + CREATEBUF(c->cl_hLumFilterPos,CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, c->dstW*sizeof(cl_int),c->hLumFilterPos); + CREATEBUF(c->cl_hChrFilter,CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, c->chrDstW*c->hChrFilterSize*sizeof(cl_short),c->hChrFilter); + CREATEBUF(c->cl_hChrFilterPos,CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, c->chrDstW*sizeof(cl_int),c->hChrFilterPos); + } + if( c->vLumFilterSize > 1 && c->vChrFilterSize > 1 ) + { + CREATEBUF(c->cl_vLumFilter,CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,c->dstH*c->vLumFilterSize*sizeof(cl_short),c->vLumFilter); + CREATEBUF(c->cl_vChrFilter,CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,c->chrDstH*c->vChrFilterSize*sizeof(cl_short),c->vChrFilter); + } + CREATEBUF(c->cl_vLumFilterPos,CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,c->dstH*sizeof(cl_int),c->vLumFilterPos); + CREATEBUF(c->cl_vChrFilterPos,CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,c->chrDstH*sizeof(cl_int),c->vChrFilterPos); + + return 1; } int av_scale_frame_func( void **userdata, KernelEnv *kenv ) { - ScaleContext *c = (ScaleContext *)userdata[0]; - - c->cl_src = (cl_mem)userdata[2]; - c->cl_dst = (cl_mem)userdata[1]; - - /*frame size*/ - int *tmp = (int *)userdata[3]; - int srcStride = tmp[0]; - int srcChrStride = tmp[1]; - int srcW = c->srcW; - int srcH = c->srcH; - - tmp = (int *)userdata[4]; - int dstStride = tmp[0]; - int dstChrStride = tmp[1]; - int dstW = c->dstW; - int dstH = c->dstH; - - /* local variable */ - cl_int status; - size_t global_work_size[2]; - - int intermediaSize; - - int st = CreateCLBuffer(c,kenv); + ScaleContext *c = (ScaleContext *)userdata[0]; + + c->cl_src = (cl_mem)userdata[2]; + c->cl_dst = (cl_mem)userdata[1]; + + /*frame size*/ + int *tmp = (int *)userdata[3]; + int srcStride = tmp[0]; + int srcChrStride = tmp[1]; + int srcW = c->srcW; + int srcH = c->srcH; + + tmp = (int *)userdata[4]; + int dstStride = tmp[0]; + int dstChrStride = tmp[1]; + int dstW = c->dstW; + int dstH = c->dstH; + + /* local variable */ + cl_int status; + size_t global_work_size[2]; + + int intermediaSize; + + int st = CreateCLBuffer(c,kenv); if( !st ) { hb_log( "CreateBuffer[%s] faild %d", "scale_opencl",st ); return -1; } - intermediaSize = dstStride * srcH + dstChrStride * srcH; - - CREATEBUF(c->cl_intermediaBuf,CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR,intermediaSize*sizeof(cl_short),NULL); - - static int init_chr_status = 0; - static cl_kernel chr_kernel; - - if(init_chr_status == 0){ - - if(!(c->flags & 1)) - { - chr_kernel = clCreateKernel( kenv->program, "hscale_all_opencl", NULL ); - //Set the Kernel Argument; - OCLCHECK(clSetKernelArg,chr_kernel,2,sizeof(cl_mem),(void*)&c->cl_hLumFilter); - OCLCHECK(clSetKernelArg,chr_kernel,3,sizeof(cl_mem),(void*)&c->cl_hLumFilterPos); - OCLCHECK(clSetKernelArg,chr_kernel,4,sizeof(int),(void*)&c->hLumFilterSize); - OCLCHECK(clSetKernelArg,chr_kernel,5,sizeof(cl_mem),(void*)&c->cl_hChrFilter); - OCLCHECK(clSetKernelArg,chr_kernel,6,sizeof(cl_mem),(void*)&c->cl_hChrFilterPos); - OCLCHECK(clSetKernelArg,chr_kernel,7,sizeof(int),(void*)&c->hChrFilterSize); - } - - /*Set the arguments*/ - OCLCHECK(clSetKernelArg,chr_kernel,8,sizeof(dstW),(void*)&dstW); - OCLCHECK(clSetKernelArg,chr_kernel,9,sizeof(srcH),(void*)&srcH); - OCLCHECK(clSetKernelArg,chr_kernel,10,sizeof(srcW),(void*)&srcW); - OCLCHECK(clSetKernelArg,chr_kernel,11,sizeof(srcH),(void*)&srcH); - OCLCHECK(clSetKernelArg,chr_kernel,12,sizeof(dstStride),(void*)&dstStride); - OCLCHECK(clSetKernelArg,chr_kernel,13,sizeof(dstChrStride),(void*)&dstChrStride); - OCLCHECK(clSetKernelArg,chr_kernel,14,sizeof(srcStride),(void*)&srcStride); - OCLCHECK(clSetKernelArg,chr_kernel,15,sizeof(srcChrStride),(void*)&srcChrStride); - init_chr_status = 1; - } - - kenv->kernel = chr_kernel; - OCLCHECK(clSetKernelArg,chr_kernel,0,sizeof(cl_mem),(void*)&c->cl_intermediaBuf); - OCLCHECK(clSetKernelArg,chr_kernel,1,sizeof(cl_mem),(void*)&c->cl_src); - /*Run the Kernel*/ - global_work_size[0] = c->chrDstW;//dstW >> 1; //must times 256; - global_work_size[1] = c->chrSrcH; - - OCLCHECK(clEnqueueNDRangeKernel,kenv->command_queue, kenv->kernel, 2, NULL, global_work_size, NULL, 0, NULL, NULL); - - static int init_lum_status = 0; - static cl_kernel lum_kernel; - - if( init_lum_status == 0 ){ - //Vertical: - /*Create Kernel*/ - if( c->vLumFilterSize > 1 && c->vChrFilterSize > 1 ) - lum_kernel = clCreateKernel( kenv->program, "vscale_all_nodither_opencl", NULL ); - else - lum_kernel = clCreateKernel( kenv->program, "vscale_fast_opencl", NULL ); - - if( c->vLumFilterSize > 1 && c->vChrFilterSize > 1 ) - { - OCLCHECK(clSetKernelArg,lum_kernel,2,sizeof(cl_mem),(void*)&c->cl_vLumFilter); - OCLCHECK(clSetKernelArg,lum_kernel,3,sizeof(int),(void*)&c->vLumFilterSize); - OCLCHECK(clSetKernelArg,lum_kernel,4,sizeof(cl_mem),(void*)&c->cl_vChrFilter); - OCLCHECK(clSetKernelArg,lum_kernel,5,sizeof(int),(void*)&c->vChrFilterSize); - OCLCHECK(clSetKernelArg,lum_kernel,6,sizeof(cl_mem),(void*)&c->cl_vLumFilterPos); - OCLCHECK(clSetKernelArg,lum_kernel,7,sizeof(cl_mem),(void*)&c->cl_vChrFilterPos); - OCLCHECK(clSetKernelArg,lum_kernel,8,sizeof(dstW),(void*)&dstW); - OCLCHECK(clSetKernelArg,lum_kernel,9,sizeof(dstH),(void*)&dstH); - OCLCHECK(clSetKernelArg,lum_kernel,10,sizeof(srcW),(void*)&srcW); - OCLCHECK(clSetKernelArg,lum_kernel,11,sizeof(srcH),(void*)&srcH); - OCLCHECK(clSetKernelArg,lum_kernel,12,sizeof(dstStride),(void*)&dstStride); - OCLCHECK(clSetKernelArg,lum_kernel,13,sizeof(dstChrStride),(void*)&dstChrStride); - OCLCHECK(clSetKernelArg,lum_kernel,14,sizeof(dstStride),(void*)&dstStride); - OCLCHECK(clSetKernelArg,lum_kernel,15,sizeof(dstChrStride),(void*)&dstChrStride); - }else{ - - OCLCHECK(clSetKernelArg,lum_kernel,2,sizeof(cl_mem),(void*)&c->cl_vLumFilterPos); - OCLCHECK(clSetKernelArg,lum_kernel,3,sizeof(cl_mem),(void*)&c->cl_vChrFilterPos); - OCLCHECK(clSetKernelArg,lum_kernel,4,sizeof(dstW),(void*)&dstW); - OCLCHECK(clSetKernelArg,lum_kernel,5,sizeof(dstH),(void*)&dstH); - OCLCHECK(clSetKernelArg,lum_kernel,6,sizeof(srcW),(void*)&srcW); - OCLCHECK(clSetKernelArg,lum_kernel,7,sizeof(srcH),(void*)&srcH); - OCLCHECK(clSetKernelArg,lum_kernel,8,sizeof(dstStride),(void*)&dstStride); - OCLCHECK(clSetKernelArg,lum_kernel,9,sizeof(dstChrStride),(void*)&dstChrStride); - OCLCHECK(clSetKernelArg,lum_kernel,10,sizeof(dstStride),(void*)&dstStride); - OCLCHECK(clSetKernelArg,lum_kernel,11,sizeof(dstChrStride),(void*)&dstChrStride); - } - init_lum_status = 1; - } - - kenv->kernel = lum_kernel; - OCLCHECK(clSetKernelArg,kenv->kernel,0,sizeof(cl_mem),(void*)&c->cl_dst); - OCLCHECK(clSetKernelArg,kenv->kernel,1,sizeof(cl_mem),(void*)&c->cl_intermediaBuf); - - /*Run the Kernel*/ - global_work_size[0] = c->chrDstW; - global_work_size[1] = c->chrDstH; - - OCLCHECK(clEnqueueNDRangeKernel,kenv->command_queue,kenv->kernel,2,NULL,global_work_size,NULL,0,NULL,NULL); - - clReleaseMemObject( c->cl_intermediaBuf ); - - return 1; + intermediaSize = dstStride * srcH + dstChrStride * srcH; + + CREATEBUF(c->cl_intermediaBuf,CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR,intermediaSize*sizeof(cl_short),NULL); + + static int init_chr_status = 0; + static cl_kernel chr_kernel; + + if(init_chr_status == 0){ + + if(!(c->flags & 1)) + { + chr_kernel = clCreateKernel( kenv->program, "hscale_all_opencl", NULL ); + //Set the Kernel Argument; + OCLCHECK(clSetKernelArg,chr_kernel,2,sizeof(cl_mem),(void*)&c->cl_hLumFilter); + OCLCHECK(clSetKernelArg,chr_kernel,3,sizeof(cl_mem),(void*)&c->cl_hLumFilterPos); + OCLCHECK(clSetKernelArg,chr_kernel,4,sizeof(int),(void*)&c->hLumFilterSize); + OCLCHECK(clSetKernelArg,chr_kernel,5,sizeof(cl_mem),(void*)&c->cl_hChrFilter); + OCLCHECK(clSetKernelArg,chr_kernel,6,sizeof(cl_mem),(void*)&c->cl_hChrFilterPos); + OCLCHECK(clSetKernelArg,chr_kernel,7,sizeof(int),(void*)&c->hChrFilterSize); + } + + /*Set the arguments*/ + OCLCHECK(clSetKernelArg,chr_kernel,8,sizeof(dstW),(void*)&dstW); + OCLCHECK(clSetKernelArg,chr_kernel,9,sizeof(srcH),(void*)&srcH); + OCLCHECK(clSetKernelArg,chr_kernel,10,sizeof(srcW),(void*)&srcW); + OCLCHECK(clSetKernelArg,chr_kernel,11,sizeof(srcH),(void*)&srcH); + OCLCHECK(clSetKernelArg,chr_kernel,12,sizeof(dstStride),(void*)&dstStride); + OCLCHECK(clSetKernelArg,chr_kernel,13,sizeof(dstChrStride),(void*)&dstChrStride); + OCLCHECK(clSetKernelArg,chr_kernel,14,sizeof(srcStride),(void*)&srcStride); + OCLCHECK(clSetKernelArg,chr_kernel,15,sizeof(srcChrStride),(void*)&srcChrStride); + init_chr_status = 1; + } + + kenv->kernel = chr_kernel; + OCLCHECK(clSetKernelArg,chr_kernel,0,sizeof(cl_mem),(void*)&c->cl_intermediaBuf); + OCLCHECK(clSetKernelArg,chr_kernel,1,sizeof(cl_mem),(void*)&c->cl_src); + /*Run the Kernel*/ + global_work_size[0] = c->chrDstW;//dstW >> 1; //must times 256; + global_work_size[1] = c->chrSrcH; + + OCLCHECK(clEnqueueNDRangeKernel,kenv->command_queue, kenv->kernel, 2, NULL, global_work_size, NULL, 0, NULL, NULL); + + static int init_lum_status = 0; + static cl_kernel lum_kernel; + + if( init_lum_status == 0 ){ + //Vertical: + /*Create Kernel*/ + if( c->vLumFilterSize > 1 && c->vChrFilterSize > 1 ) + lum_kernel = clCreateKernel( kenv->program, "vscale_all_nodither_opencl", NULL ); + else + lum_kernel = clCreateKernel( kenv->program, "vscale_fast_opencl", NULL ); + + if( c->vLumFilterSize > 1 && c->vChrFilterSize > 1 ) + { + OCLCHECK(clSetKernelArg,lum_kernel,2,sizeof(cl_mem),(void*)&c->cl_vLumFilter); + OCLCHECK(clSetKernelArg,lum_kernel,3,sizeof(int),(void*)&c->vLumFilterSize); + OCLCHECK(clSetKernelArg,lum_kernel,4,sizeof(cl_mem),(void*)&c->cl_vChrFilter); + OCLCHECK(clSetKernelArg,lum_kernel,5,sizeof(int),(void*)&c->vChrFilterSize); + OCLCHECK(clSetKernelArg,lum_kernel,6,sizeof(cl_mem),(void*)&c->cl_vLumFilterPos); + OCLCHECK(clSetKernelArg,lum_kernel,7,sizeof(cl_mem),(void*)&c->cl_vChrFilterPos); + OCLCHECK(clSetKernelArg,lum_kernel,8,sizeof(dstW),(void*)&dstW); + OCLCHECK(clSetKernelArg,lum_kernel,9,sizeof(dstH),(void*)&dstH); + OCLCHECK(clSetKernelArg,lum_kernel,10,sizeof(srcW),(void*)&srcW); + OCLCHECK(clSetKernelArg,lum_kernel,11,sizeof(srcH),(void*)&srcH); + OCLCHECK(clSetKernelArg,lum_kernel,12,sizeof(dstStride),(void*)&dstStride); + OCLCHECK(clSetKernelArg,lum_kernel,13,sizeof(dstChrStride),(void*)&dstChrStride); + OCLCHECK(clSetKernelArg,lum_kernel,14,sizeof(dstStride),(void*)&dstStride); + OCLCHECK(clSetKernelArg,lum_kernel,15,sizeof(dstChrStride),(void*)&dstChrStride); + }else{ + + OCLCHECK(clSetKernelArg,lum_kernel,2,sizeof(cl_mem),(void*)&c->cl_vLumFilterPos); + OCLCHECK(clSetKernelArg,lum_kernel,3,sizeof(cl_mem),(void*)&c->cl_vChrFilterPos); + OCLCHECK(clSetKernelArg,lum_kernel,4,sizeof(dstW),(void*)&dstW); + OCLCHECK(clSetKernelArg,lum_kernel,5,sizeof(dstH),(void*)&dstH); + OCLCHECK(clSetKernelArg,lum_kernel,6,sizeof(srcW),(void*)&srcW); + OCLCHECK(clSetKernelArg,lum_kernel,7,sizeof(srcH),(void*)&srcH); + OCLCHECK(clSetKernelArg,lum_kernel,8,sizeof(dstStride),(void*)&dstStride); + OCLCHECK(clSetKernelArg,lum_kernel,9,sizeof(dstChrStride),(void*)&dstChrStride); + OCLCHECK(clSetKernelArg,lum_kernel,10,sizeof(dstStride),(void*)&dstStride); + OCLCHECK(clSetKernelArg,lum_kernel,11,sizeof(dstChrStride),(void*)&dstChrStride); + } + init_lum_status = 1; + } + + kenv->kernel = lum_kernel; + OCLCHECK(clSetKernelArg,kenv->kernel,0,sizeof(cl_mem),(void*)&c->cl_dst); + OCLCHECK(clSetKernelArg,kenv->kernel,1,sizeof(cl_mem),(void*)&c->cl_intermediaBuf); + + /*Run the Kernel*/ + global_work_size[0] = c->chrDstW; + global_work_size[1] = c->chrDstH; + + OCLCHECK(clEnqueueNDRangeKernel,kenv->command_queue,kenv->kernel,2,NULL,global_work_size,NULL,0,NULL,NULL); + + clReleaseMemObject( c->cl_intermediaBuf ); + + return 1; } void av_scale_frame(ScaleContext *c, - void *dst, - void *src, - int *srcStride, - int *dstStride, - int *should_dither) + void *dst, + void *src, + int *srcStride, + int *dstStride, + int *should_dither) { - - static int regflg = 0; - void *userdata[CL_PARAM_NUM]; - userdata[0] = (void *)c; - userdata[1] = (void *)dst; - userdata[2] = (void *)src; - userdata[3] = (void *)srcStride; - userdata[4] = (void *)dstStride; - userdata[5] = (void *)should_dither; - - if( regflg==0 ) + + static int regflg = 0; + void *userdata[CL_PARAM_NUM]; + userdata[0] = (void *)c; + userdata[1] = (void *)dst; + userdata[2] = (void *)src; + userdata[3] = (void *)srcStride; + userdata[4] = (void *)dstStride; + userdata[5] = (void *)should_dither; + + if( regflg==0 ) { int st = hb_register_kernel_wrapper( "scale_opencl", av_scale_frame_func); if( !st ) @@ -204,11 +204,11 @@ void av_scale_frame(ScaleContext *c, regflg++; } - if( !hb_run_kernel( "scale_opencl", userdata )) - { - hb_log("run kernel function[%s] faild", "scale_opencl_func" ); - return; - } + if( !hb_run_kernel( "scale_opencl", userdata )) + { + hb_log("run kernel function[%s] faild", "scale_opencl_func" ); + return; + } } #endif diff --git a/libhb/vadxva2.c b/libhb/vadxva2.c index e7a1d1546..d67a5b377 100644 --- a/libhb/vadxva2.c +++ b/libhb/vadxva2.c @@ -580,7 +580,7 @@ void hb_init_filter( cl_mem src, int srcwidth, int srcheight, uint8_t* dst, int { T_FilterLink fl = {0}; int STEP = srcwidth * srcheight * 3 / 2; - int OUTSTEP = dstwidth * dstheight * 3 / 2; + int OUTSTEP = dstwidth * dstheight * 3 / 2; int HEIGHT = srcheight; int LINESIZEY = srcwidth; int LINESIZEUV = srcwidth / 2; @@ -597,10 +597,10 @@ void hb_init_filter( cl_mem src, int srcwidth, int srcheight, uint8_t* dst, int scale_run( src, fl.cl_outbuf, LINESIZEY, LINESIZEUV, HEIGHT ); - hb_read_opencl_buffer( fl.cl_outbuf, dst, OUTSTEP ); - CL_FREE( cl_outbuf ); + hb_read_opencl_buffer( fl.cl_outbuf, dst, OUTSTEP ); + CL_FREE( cl_outbuf ); - return; + return; } #endif /** @@ -636,14 +636,14 @@ int hb_va_extract( hb_va_dxva2_t *dxva2, uint8_t *dst, AVFrame *frame, int job_w { hb_ocl_nv12toyuv( plane, lock.Pitch, dxva2->width, dxva2->height, crop, dxva2 ); - static int init_flag = 0; - if(init_flag == 0){ - scale_init( dxva2->width - crop[2] - crop[3], dxva2->height - crop[0] - crop[1], job_w, job_h ); - init_flag = 1; - } + static int init_flag = 0; + if(init_flag == 0){ + scale_init( dxva2->width - crop[2] - crop[3], dxva2->height - crop[0] - crop[1], job_w, job_h ); + init_flag = 1; + } - hb_init_filter( dxva2->cl_mem_yuv, dxva2->width - crop[2] - crop[3], dxva2->height - crop[0] - crop[1], dst, job_w, job_h, crop ); - } + hb_init_filter( dxva2->cl_mem_yuv, dxva2->width - crop[2] - crop[3], dxva2->height - crop[0] - crop[1], dst, job_w, job_h, crop ); + } else #endif { @@ -743,7 +743,7 @@ void hb_va_new_dxva2( hb_va_dxva2_t *dxva2, AVCodecContext *p_context ) char* hb_get_pix_fmt_name( int pix_fmt ) { - static const char *ppsz_name[AV_PIX_FMT_NB] = + static const char *ppsz_name[AV_PIX_FMT_NB] = { [AV_PIX_FMT_VDPAU_H264] = "AV_PIX_FMT_VDPAU_H264", [AV_PIX_FMT_VAAPI_IDCT] = "AV_PIX_FMT_VAAPI_IDCT", @@ -754,7 +754,7 @@ char* hb_get_pix_fmt_name( int pix_fmt ) [AV_PIX_FMT_YUV420P] = "AV_PIX_FMT_YUV420P", }; - return ppsz_name[pix_fmt]; + return ppsz_name[pix_fmt]; } enum PixelFormat hb_ffmpeg_get_format( AVCodecContext *p_context, const enum PixelFormat *pi_fmt ) -- 2.49.0