if( kenv->isAMD )
{
void *data = clEnqueueMapBuffer( kenv->command_queue, dxva2->cl_mem_nv12, CL_MAP_WRITE_INVALIDATE_REGION, CL_TRUE, 0, in_bytes, 0, NULL, NULL, NULL );
- //memcpy( data, bufi, in_bytes );
+
for ( i = 0; i < dxva2->height; i++ )
{
memcpy( data+i*dxva2->width, bufi1+i*p, dxva2->width );
int st = hb_register_kernel_wrapper( "nv12toyuv", hb_nv12toyuv );
if( !st )
{
- hb_log( "register kernel[%s] faild", "nv12toyuv" );
+ hb_log( "register kernel[%s] failed", "nv12toyuv" );
return -1;
}
return 0;
* bufi is input frame of nv12, w is input frame width, h is input frame height
*/
int hb_ocl_nv12toyuv( uint8_t *bufi[], int p, int w, int h, int *crop, hb_va_dxva2_t *dxva2 )
-//int hb_ocl_nv12toyuv( uint8_t *bufi, int w, int h, int *crop, hb_va_dxva2_t *dxva2 )
{
void *userdata[7];
userdata[0] = (void*)w;
return -1;
if( hb_run_kernel( "nv12toyuv", userdata ) )
{
- hb_log( "run kernel[nv12toyuv] faild" );
+ hb_log( "run kernel[nv12toyuv] failed" );
return -1;
}
return 0;
int st = hb_register_kernel_wrapper( "frame_h_scale", hb_ocl_scale_func );\r
if( !st )\r
{\r
- hb_log( "register kernel[%s] faild", "frame_h_scale" );\r
+ hb_log( "register kernel[%s] failed", "frame_h_scale" );\r
return 0;\r
}\r
init_flag++;\r
data[5] = (void*)out_h;\r
data[6] = os;\r
if( !hb_run_kernel( "frame_h_scale", data ) )\r
- hb_log( "run kernel[%s] faild", "frame_scale" );\r
+ hb_log( "run kernel[%s] failed", "frame_scale" );\r
return 0;\r
}\r
#endif\r
int srcStride,\r
int srcChrStride)\r
{\r
- const unsigned char hb_dither_8x8_128[8][8] = {\r
- { 36, 68, 60, 92, 34, 66, 58, 90, },\r
- { 100, 4, 124, 28, 98, 2, 122, 26, },\r
- { 52, 84, 44, 76, 50, 82, 42, 74, },\r
- { 116, 20, 108, 12, 114, 18, 106, 10, },\r
- { 32, 64, 56, 88, 38, 70, 62, 94, },\r
- { 96, 0, 120, 24, 102, 6, 126, 30, },\r
- { 48, 80, 40, 72, 54, 86, 46, 78, },\r
- { 112, 16, 104, 8, 118, 22, 110, 14, },\r
- };\r
+ const unsigned char hb_dither_8x8_128[8][8] = {\r
+ { 36, 68, 60, 92, 34, 66, 58, 90, },\r
+ { 100, 4, 124, 28, 98, 2, 122, 26, },\r
+ { 52, 84, 44, 76, 50, 82, 42, 74, },\r
+ { 116, 20, 108, 12, 114, 18, 106, 10, },\r
+ { 32, 64, 56, 88, 38, 70, 62, 94, },\r
+ { 96, 0, 120, 24, 102, 6, 126, 30, },\r
+ { 48, 80, 40, 72, 54, 86, 46, 78, },\r
+ { 112, 16, 104, 8, 118, 22, 110, 14, },\r
+ };\r
\r
\r
int w = get_global_id(0);\r
int srcStride,\r
int srcChrStride)\r
{\r
- const unsigned char hb_sws_pb_64[8] = {\r
- 64, 64, 64, 64, 64, 64, 64, 64\r
- };\r
+ const unsigned char hb_sws_pb_64[8] = {\r
+ 64, 64, 64, 64, 64, 64, 64, 64\r
+ };\r
\r
int w = get_global_id(0);\r
int h = get_global_id(1);\r
int srcStride,\r
int srcChrStride)\r
{\r
- const unsigned char hb_sws_pb_64[8] = {\r
- 64, 64, 64, 64, 64, 64, 64, 64\r
- };\r
+ const unsigned char hb_sws_pb_64[8] = {\r
+ 64, 64, 64, 64, 64, 64, 64, 64\r
+ };\r
\r
int w = get_global_id(0);\r
int h = get_global_id(1);\r
pbuff,\r
NULL); \r
if (status) \r
- continue; \r
+ continue; \r
status = clGetDeviceIDs(platforms[i], \r
CL_DEVICE_TYPE_GPU , \r
0 , \r
NULL , \r
&numDevices); \r
\r
- cl_device_id *devices = (cl_device_id *)malloc(numDevices * sizeof(cl_device_id));\r
- status = clGetDeviceIDs(platforms[i], CL_DEVICE_TYPE_GPU, numDevices, devices, NULL);\r
- for (j = 0; j < numDevices; j++)\r
- {\r
- char dbuff[100];\r
- status = clGetDeviceInfo(devices[j], CL_DEVICE_VENDOR, sizeof(dbuff), dbuff, NULL); \r
- if(!strcmp(dbuff, "Advanced Micro Devices, Inc.") || !strcmp(dbuff, "NVIDIA Corporation"))\r
- {\r
- return 0;\r
- }\r
- }\r
+ cl_device_id *devices = (cl_device_id *)malloc(numDevices * sizeof(cl_device_id));\r
+ status = clGetDeviceIDs(platforms[i], CL_DEVICE_TYPE_GPU, numDevices, devices, NULL);\r
+ for (j = 0; j < numDevices; j++)\r
+ {\r
+ char dbuff[100];\r
+ status = clGetDeviceInfo(devices[j], CL_DEVICE_VENDOR, sizeof(dbuff), dbuff, NULL); \r
+ if(!strcmp(dbuff, "Advanced Micro Devices, Inc.") || !strcmp(dbuff, "NVIDIA Corporation"))\r
+ {\r
+ return 0;\r
+ }\r
+ }\r
\r
if (status != CL_SUCCESS)\r
continue;\r
ADD_KERNEL_CFG( 0, "frame_h_scale", NULL )\r
ADD_KERNEL_CFG( 1, "frame_v_scale", NULL )\r
ADD_KERNEL_CFG( 2, "nv12toyuv", NULL )\r
- ADD_KERNEL_CFG( 3, "scale_opencl", NULL )\r
+ ADD_KERNEL_CFG( 3, "scale_opencl", NULL )\r
\r
return 0;\r
}\r
&numDevices,\r
NULL );\r
if( status != CL_SUCCESS )\r
- {\r
- hb_log( "Notice: Get context info failed" );\r
+ {\r
+ hb_log( "Notice: Get context info failed" );\r
return 0;\r
}\r
\r
devices = (cl_device_id*)malloc( sizeof(cl_device_id) * numDevices );\r
if( devices == NULL )\r
- {\r
- hb_log( "Notice: No device found" );\r
+ {\r
+ hb_log( "Notice: No device found" );\r
return 0;\r
}\r
\r
&numDevices,\r
NULL );\r
if( status != CL_SUCCESS )\r
- {\r
- hb_log( "Notice: Get program info failed, when generate binary file from kernel source" );\r
+ {\r
+ hb_log( "Notice: Get program info failed, when generate binary file from kernel source" );\r
return 0;\r
}\r
devices = (cl_device_id*)malloc( sizeof(cl_device_id) * numDevices );\r
if( devices == NULL )\r
- {\r
- hb_log( "Notice: No device found, when generate binary file from kernel source" );\r
+ {\r
+ hb_log( "Notice: No device found, when generate binary file from kernel source" );\r
return 0;\r
}\r
/* grab the handles to all of the devices in the program. */\r
devices,\r
NULL );\r
if( status != CL_SUCCESS )\r
- {\r
- hb_log( "Notice: Get program info failed, when generate binary file from kernel source" );\r
+ {\r
+ hb_log( "Notice: Get program info failed, when generate binary file from kernel source" );\r
return 0;\r
}\r
/* figure out the sizes of each of the binaries. */\r
sizeof(size_t) * numDevices,\r
binarySizes, NULL );\r
if( status != CL_SUCCESS )\r
- {\r
- hb_log( "Notice: Get program info failed, when generate binary file from kernel source" );\r
+ {\r
+ hb_log( "Notice: Get program info failed, when generate binary file from kernel source" );\r
return 0;\r
}\r
/* copy over all of the generated binaries. */\r
binaries = (char**)malloc( sizeof(char *) * numDevices );\r
if( binaries == NULL )\r
- {\r
- hb_log( "Notice: malloc for binaries failed, when generate binary file from kernel source" );\r
+ {\r
+ hb_log( "Notice: malloc for binaries failed, when generate binary file from kernel source" );\r
return 0;\r
}\r
\r
{\r
binaries[i] = (char*)malloc( sizeof(char) * binarySizes[i] );\r
if( binaries[i] == NULL )\r
- {\r
- hb_log( "Notice: malloc for binary[%d] failed, when generate binary file from kernel source", i );\r
+ {\r
+ hb_log( "Notice: malloc for binary[%d] failed, when generate binary file from kernel source", i );\r
return 0;\r
- }\r
+ }\r
}\r
else\r
{\r
binaries,\r
NULL );\r
if( status != CL_SUCCESS )\r
- {\r
- hb_log( "Notice: Get program info failed, when generate binary file from kernel source" );\r
+ {\r
+ hb_log( "Notice: Get program info failed, when generate binary file from kernel source" );\r
return 0;\r
}\r
/* dump out each binary into its own separate file. */\r
status = clGetPlatformIDs( 0, NULL, &numPlatforms );\r
if( status != CL_SUCCESS )\r
{\r
- hb_log( "Notice: OpenCL device platform not found." );\r
+ hb_log( "Notice: OpenCL device platform not found." );\r
return(1);\r
}\r
gpu_info->platform = NULL;\r
\r
if( status != CL_SUCCESS )\r
{\r
- hb_log( "Notice: Specific opencl platform not found." );\r
+ hb_log( "Notice: Specific opencl platform not found." );\r
return(1);\r
}\r
\r
&numDevices );\r
\r
if( status != CL_SUCCESS )\r
- {\r
+ {\r
continue;\r
- }\r
+ }\r
\r
if( numDevices )\r
break;\r
}\r
if( NULL == gpu_info->platform )\r
{\r
- hb_log( "Notice: No OpenCL-compatible GPU found." );\r
+ hb_log( "Notice: No OpenCL-compatible GPU found." );\r
return(1);\r
}\r
if( status != CL_SUCCESS )\r
}\r
if((gpu_info->context == (cl_context)NULL) || (status != CL_SUCCESS))\r
{\r
- hb_log( "Notice: Unable to create opencl context." );\r
+ hb_log( "Notice: Unable to create opencl context." );\r
return(1);\r
}\r
/* Detect OpenCL devices. */\r
0, NULL, &length );\r
if((status != CL_SUCCESS) || (length == 0))\r
{\r
- hb_log( "Notice: Unable to get the list of devices in context." );\r
+ hb_log( "Notice: Unable to get the list of devices in context." );\r
return(1);\r
}\r
/* Now allocate memory for device list based on the size we got earlier */\r
gpu_info->devices, NULL );\r
if( status != CL_SUCCESS )\r
{\r
- hb_log( "Notice: Unable to get the device list data in context." );\r
+ hb_log( "Notice: Unable to get the device list data in context." );\r
return(1);\r
}\r
\r
gpu_info->devices[0],\r
0, &status );\r
if( status != CL_SUCCESS )\r
- {\r
- hb_log( "Notice: Unable to create opencl command queue." );\r
+ {\r
+ hb_log( "Notice: Unable to create opencl command queue." );\r
return(1);\r
- }\r
+ }\r
}\r
\r
if( clGetCommandQueueInfo( gpu_info->command_queue,\r
if( status == 0 )\r
return(0);\r
#else\r
- int kernel_src_size = strlen( kernel_src_hscale ) + strlen( kernel_src_vscale ) + strlen( kernel_src_nvtoyuv ) + strlen( kernel_src_hscaleall ) + strlen( kernel_src_hscalefast ) + strlen( kernel_src_vscalealldither ) + strlen( kernel_src_vscaleallnodither ) + strlen( kernel_src_vscalefast );\r
+ int kernel_src_size = strlen( kernel_src_hscale ) + strlen( kernel_src_vscale ) + strlen( kernel_src_nvtoyuv ) + strlen( kernel_src_hscaleall ) + strlen( kernel_src_hscalefast ) + strlen( kernel_src_vscalealldither ) + strlen( kernel_src_vscaleallnodither ) + strlen( kernel_src_vscalefast );\r
source_str = (char*)malloc( kernel_src_size + 2 );\r
strcpy( source_str, kernel_src_hscale );\r
strcat( source_str, kernel_src_vscale );\r
NULL );\r
if( status != CL_SUCCESS )\r
{\r
- hb_log( "Notice: Unable to get the number of devices in context." );\r
+ hb_log( "Notice: Unable to get the number of devices in context." );\r
return 0;\r
- }\r
+ }\r
\r
devices = (cl_device_id*)malloc( sizeof(cl_device_id) * numDevices );\r
if( devices == NULL )\r
gpu_info->context, 1, &source, source_size, &status );\r
}\r
if((gpu_info->programs[idx] == (cl_program)NULL) || (status != CL_SUCCESS)){\r
- hb_log( "Notice: Unable to get list of devices in context." );\r
+ hb_log( "Notice: Unable to get list of devices in context." );\r
return(0);\r
- }\r
+ }\r
\r
/* create a cl program executable for all the devices specified */\r
if( !gpu_info->isUserCreated )\r
\r
if( status != CL_SUCCESS )\r
{\r
- hb_log( "Notice: Unable to get GPU build information." );\r
+ hb_log( "Notice: Unable to get GPU build information." );\r
return(0);\r
}\r
buildLog = (char*)malloc( length );\r
cl_int status;\r
size_t numDevices;\r
cl_device_id *devices;\r
- /*initialize devices, context, comand_queue*/\r
+ /*initialize devices, context, comand_queue*/\r
status = hb_init_opencl_env( &gpu_env );\r
if( status )\r
return(1);\r
deviceName,\r
NULL );\r
hb_log( "GPU Device Name: %s", deviceName );\r
- char driverVersion[1024];\r
+ char driverVersion[1024];\r
status = clGetDeviceInfo( devices[i],\r
CL_DRIVER_VERSION,\r
sizeof(deviceName),\r
\r
int hb_create_buffer(cl_mem *cl_Buf,int flags,int size)\r
{\r
- int status;\r
- *cl_Buf = clCreateBuffer( gpu_env.context, (flags), (size), NULL, &status );\r
- \r
+ int status;\r
+ *cl_Buf = clCreateBuffer( gpu_env.context, (flags), (size), NULL, &status );\r
+ \r
if( status != CL_SUCCESS )\r
- { \r
- hb_log("clCreateBuffer error '%d'",status);\r
- return 0; \r
- }\r
- return 1;\r
+ { \r
+ hb_log("clCreateBuffer error '%d'",status);\r
+ return 0; \r
+ }\r
+ return 1;\r
}\r
\r
int hb_read_opencl_buffer(cl_mem cl_inBuf,unsigned char *outbuf,int size)\r
{\r
- int status;\r
+ int status;\r
\r
- status = clEnqueueReadBuffer(gpu_env.command_queue, cl_inBuf, CL_TRUE, 0, size, outbuf, 0, 0, 0); \r
+ status = clEnqueueReadBuffer(gpu_env.command_queue, cl_inBuf, CL_TRUE, 0, size, outbuf, 0, 0, 0); \r
if( status != CL_SUCCESS )\r
- { \r
- hb_log("av_read_opencl_buffer error '%d'",status);\r
- return 0; \r
- }\r
- return 1;\r
+ { \r
+ hb_log("av_read_opencl_buffer error '%d'",status);\r
+ return 0; \r
+ }\r
+ return 1;\r
}\r
#endif\r
const int64_t fone = 1LL << 54;\r
int ret = -1;\r
\r
- *filterPos = (int32_t *)av_malloc((dstW + 3) * sizeof(**filterPos));\r
- if (*filterPos == NULL && ((dstW + 3) * sizeof(**filterPos)) != 0) {\r
- hb_log("Cannot allocate memory."); \r
+ *filterPos = (int32_t *)av_malloc((dstW + 3) * sizeof(**filterPos));\r
+ if (*filterPos == NULL && ((dstW + 3) * sizeof(**filterPos)) != 0) {\r
+ hb_log("Cannot allocate memory."); \r
goto fail;\r
- }\r
+ }\r
\r
if (FFABS(xInc - 0x10000) < 10) { // unscaled\r
int i;\r
filterSize = 1;\r
- filter = (int64_t *)av_mallocz(dstW * sizeof(*filter) * filterSize);\r
- if (filter == NULL && (dstW * sizeof(*filter) * filterSize) != 0) {\r
+ filter = (int64_t *)av_mallocz(dstW * sizeof(*filter) * filterSize);\r
+ if (filter == NULL && (dstW * sizeof(*filter) * filterSize) != 0) {\r
hb_log("Cannot allocate memory."); \r
goto fail;\r
- }\r
+ }\r
\r
\r
for (i = 0; i < dstW; i++) {\r
int i;\r
int64_t xDstInSrc;\r
filterSize = 1;\r
- filter = (int64_t *)av_malloc(dstW * sizeof(*filter) * filterSize);\r
- if(filter == NULL && (dstW * sizeof(*filter) * filterSize) != 0){\r
- hb_log("Cannot allocate memory."); \r
+ filter = (int64_t *)av_malloc(dstW * sizeof(*filter) * filterSize);\r
+ if(filter == NULL && (dstW * sizeof(*filter) * filterSize) != 0){\r
+ hb_log("Cannot allocate memory."); \r
goto fail;\r
- }\r
+ }\r
\r
xDstInSrc = xInc / 2 - 0x8000;\r
for (i = 0; i < dstW; i++) {\r
int i;\r
int64_t xDstInSrc;\r
filterSize = 2;\r
- filter = (int64_t *)av_malloc(dstW * sizeof(*filter) * filterSize);\r
- if(filter == NULL && (dstW * sizeof(*filter) * filterSize) != 0){\r
- hb_log("Cannot allocate memory."); \r
+ filter = (int64_t *)av_malloc(dstW * sizeof(*filter) * filterSize);\r
+ if(filter == NULL && (dstW * sizeof(*filter) * filterSize) != 0){\r
+ hb_log("Cannot allocate memory."); \r
goto fail;\r
- }\r
+ }\r
\r
xDstInSrc = xInc / 2 - 0x8000;\r
for (i = 0; i < dstW; i++) {\r
filterSize = FFMAX(filterSize, 1);\r
\r
filter = (int64_t *)av_malloc(dstW * sizeof(*filter) * filterSize);\r
- if(filter == NULL && (dstW * sizeof(*filter) * filterSize) != 0){\r
- hb_log("Cannot allocate memory."); \r
+ if(filter == NULL && (dstW * sizeof(*filter) * filterSize) != 0){\r
+ hb_log("Cannot allocate memory."); \r
goto fail;\r
- }\r
+ }\r
\r
xDstInSrc = xInc - 0x10000;\r
for (i = 0; i < dstW; i++) {\r
if (dstFilter)\r
filter2Size += dstFilter->length - 1;\r
assert(filter2Size > 0);\r
- filter2 = (int64_t *)av_mallocz(filter2Size * dstW * sizeof(*filter2));\r
- if(filter2 == NULL && (filter2Size * dstW * sizeof(*filter2)) != 0)\r
- {\r
- hb_log("Can't alloc memory.");\r
- goto fail;\r
- }\r
+ filter2 = (int64_t *)av_mallocz(filter2Size * dstW * sizeof(*filter2));\r
+ if(filter2 == NULL && (filter2Size * dstW * sizeof(*filter2)) != 0)\r
+ {\r
+ hb_log("Can't alloc memory.");\r
+ goto fail;\r
+ }\r
\r
for (i = 0; i < dstW; i++) {\r
int j, k;\r
*outFilterSize = filterSize;\r
\r
if (flags & SWS_PRINT_INFO)\r
- hb_log("SwScaler: reducing / aligning filtersize %d -> %d",filter2Size,filterSize);\r
+ hb_log("SwScaler: reducing / aligning filtersize %d -> %d",filter2Size,filterSize);\r
for (i = 0; i < dstW; i++) {\r
int j;\r
\r
// Note the +1 is for the MMX scaler which reads over the end\r
// FF_ALLOCZ_OR_GOTO(NULL, *outFilter,\r
// *outFilterSize * (dstW + 3) * sizeof(int16_t), fail);\r
- *outFilter = (int16_t *)av_mallocz(*outFilterSize * (dstW + 3) * sizeof(int16_t));\r
- if( *outFilter == NULL && (*outFilterSize * (dstW + 3) * sizeof(int16_t)) != 0)\r
- {\r
- hb_log("Can't alloc memory");\r
- goto fail;\r
- }\r
+ *outFilter = (int16_t *)av_mallocz(*outFilterSize * (dstW + 3) * sizeof(int16_t));\r
+ if( *outFilter == NULL && (*outFilterSize * (dstW + 3) * sizeof(int16_t)) != 0)\r
+ {\r
+ hb_log("Can't alloc memory");\r
+ goto fail;\r
+ }\r
\r
for (i = 0; i < dstW; i++) {\r
int j;\r
enum PixelFormat srcFormat = c->srcFormat;\r
enum PixelFormat dstFormat = c->dstFormat;\r
\r
- cpu_flags = 0;\r
+ cpu_flags = 0;\r
flags = c->flags;\r
\r
if(srcFormat != c->srcFormat || dstFormat != c->dstFormat){\r
- hb_log("deprecated pixel format used, make sure you did set range correctly.");\r
+ hb_log("deprecated pixel format used, make sure you did set range correctly.");\r
c->srcFormat = srcFormat;\r
c->dstFormat = dstFormat;\r
}\r
\r
if (srcW < 4 || srcH < 1 || dstW < 8 || dstH < 1) {\r
- hb_log("%dx%d -> %dx%d is invalid scaling dimension.",srcW,srcH,dstW,dstH);\r
+ hb_log("%dx%d -> %dx%d is invalid scaling dimension.",srcW,srcH,dstW,dstH);\r
return -1;\r
}\r
\r
c->chrXInc = (((int64_t)c->chrSrcW << 16) + (c->chrDstW >> 1)) / c->chrDstW;\r
c->chrYInc = (((int64_t)c->chrSrcH << 16) + (c->chrDstH >> 1)) / c->chrDstH;\r
\r
- const int filterAlign = 1;\r
+ const int filterAlign = 1;\r
\r
if (initScaleFilter(&c->hLumFilter, &c->hLumFilterPos,\r
&c->hLumFilterSize, c->lumXInc,\r
(flags & SWS_BICUBLIN) ? (flags | SWS_BICUBIC) : flags,\r
cpu_flags, srcFilter->lumH, dstFilter->lumH,\r
c->param) < 0)\r
- goto fail;\r
+ goto fail;\r
\r
if (initScaleFilter(&c->hChrFilter, &c->hChrFilterPos,\r
&c->hChrFilterSize, c->chrXInc,\r
(flags & SWS_BICUBLIN) ? (flags | SWS_BICUBIC) : flags,\r
cpu_flags, srcFilter->lumV, dstFilter->lumV,\r
c->param) < 0)\r
- goto fail;\r
+ goto fail;\r
\r
if (initScaleFilter(&c->vChrFilter, &c->vChrFilterPos, &c->vChrFilterSize,\r
c->chrYInc, c->chrSrcH, c->chrDstH,\r
int flags, ScaleFilter *srcFilter,\r
ScaleFilter *dstFilter, const double *param)\r
{\r
- ScaleContext *sc = (ScaleContext*)malloc(sizeof(ScaleContext));\r
+ ScaleContext *sc = (ScaleContext*)malloc(sizeof(ScaleContext));\r
sc->flags = flags;\r
sc->srcW = srcW;\r
sc->srcH = srcH;\r
sc->dstRange = handle_scale_jpeg(&dstFormat);\r
sc->srcFormat = srcFormat;\r
sc->dstFormat = dstFormat;\r
- sc->hyscale_fast = 0;\r
- sc->hcscale_fast = 0;\r
+ sc->hyscale_fast = 0;\r
+ sc->hcscale_fast = 0;\r
\r
if (param) {\r
sc->param[0] = param[0];\r
int *srcStride,\r
int *dstStride)\r
{\r
- int should_dither = is9_OR_10BPS(c->srcFormat) || is16BPS(c->srcFormat); \r
+ int should_dither = is9_OR_10BPS(c->srcFormat) || is16BPS(c->srcFormat); \r
\r
av_scale_frame(c,cl_outbuf,cl_inbuf,srcStride,dstStride,&should_dither);\r
\r
\r
void scale_init( int width, int height, int dstwidth, int dstheight )\r
{\r
- int srcW = width;\r
- int srcH = height;\r
- int dstW = dstwidth;\r
- int dstH = dstheight;\r
- enum PixelFormat inputfmt = AV_PIX_FMT_YUV420P;\r
- enum PixelFormat outputfmt = AV_PIX_FMT_YUV420P;\r
- int flags = SWS_BILINEAR;\r
-\r
- g_scale = scale_getContext(srcW,srcH,inputfmt,dstW,dstH,outputfmt,flags,NULL,NULL,NULL);\r
+ int srcW = width;\r
+ int srcH = height;\r
+ int dstW = dstwidth;\r
+ int dstH = dstheight;\r
+ enum PixelFormat inputfmt = AV_PIX_FMT_YUV420P;\r
+ enum PixelFormat outputfmt = AV_PIX_FMT_YUV420P;\r
+ int flags = SWS_BILINEAR;\r
+\r
+ g_scale = scale_getContext(srcW,srcH,inputfmt,dstW,dstH,outputfmt,flags,NULL,NULL,NULL);\r
}\r
\r
void scale_release()\r
{\r
- sws_freeContext( g_scale );\r
+ sws_freeContext( g_scale );\r
}\r
#ifdef USE_OPENCL\r
int scale_run( cl_mem inbuf, cl_mem outbuf, int linesizey, int linesizeuv, int height )\r
{\r
- g_scale->cl_src = inbuf;\r
- g_scale->cl_dst = outbuf;\r
+ g_scale->cl_src = inbuf;\r
+ g_scale->cl_dst = outbuf;\r
\r
- int src_stride[4] = { linesizey, linesizeuv, linesizeuv, 0 };\r
- int dst_stride[4] = { g_scale->dstW, g_scale->chrDstW, g_scale->chrDstW, 0 };\r
- int ret = -1;\r
+ int src_stride[4] = { linesizey, linesizeuv, linesizeuv, 0 };\r
+ int dst_stride[4] = { g_scale->dstW, g_scale->chrDstW, g_scale->chrDstW, 0 };\r
+ int ret = -1;\r
\r
- ret = scale_opencl( g_scale, inbuf, outbuf, src_stride, dst_stride );\r
+ ret = scale_opencl( g_scale, inbuf, outbuf, src_stride, dst_stride );\r
\r
- return ret;\r
+ return ret;\r
}\r
#endif\r
#endif\r
int dstBpc, srcBpc;\r
int chrSrcHSubSample; ///< Binary logarithm of horizontal subsampling factor between luma/alpha and chroma planes in source image.\r
int chrSrcVSubSample; ///< Binary logarithm of vertical subsampling factor between luma/alpha and chroma planes in source image.\r
- int chrDstHSubSample; ///< Binary logarithm of horizontal subsampling factor between luma/alpha and chroma planes in destination image.\r
+ int chrDstHSubSample; ///< Binary logarithm of horizontal subsampling factor between luma/alpha and chroma planes in destination image.\r
int chrDstVSubSample; ///< Binary logarithm of vertical subsampling factor between luma/alpha and chroma planes in destination image.\r
int vChrDrop; ///< Binary logarithm of extra vertical subsampling factor in source image chroma planes specified by user.\r
int sliceDir; ///< Direction that slices are fed to the scaler (1 = top-to-bottom, -1 = bottom-to-top).\r
#include "openclwrapper.h"\r
\r
#define OCLCHECK( method, ...) \\r
- status = method(__VA_ARGS__); if(status != CL_SUCCESS) { \\r
- hb_log(" error %s %d",# method, status); assert(0); return status; }\r
- \r
+ status = method(__VA_ARGS__); if(status != CL_SUCCESS) { \\r
+ hb_log(" error %s %d",# method, status); assert(0); return status; }\r
+ \r
#define CREATEBUF( out, flags, size, ptr)\\r
out = clCreateBuffer( kenv->context, (flags), (size), ptr, &status );\\r
if( status != CL_SUCCESS ) { hb_log( "clCreateBuffer faild %d", status ); return -1; }\r
/****************************************************************************************************************************/\r
static int CreateCLBuffer( ScaleContext *c, KernelEnv *kenv )\r
{\r
- cl_int status;\r
- \r
- if(!c->hyscale_fast || !c->hcscale_fast)\r
- {\r
- CREATEBUF(c->cl_hLumFilter,CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, c->dstW*c->hLumFilterSize*sizeof(cl_short),c->hLumFilter);\r
- CREATEBUF(c->cl_hLumFilterPos,CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, c->dstW*sizeof(cl_int),c->hLumFilterPos);\r
- CREATEBUF(c->cl_hChrFilter,CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, c->chrDstW*c->hChrFilterSize*sizeof(cl_short),c->hChrFilter);\r
- CREATEBUF(c->cl_hChrFilterPos,CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, c->chrDstW*sizeof(cl_int),c->hChrFilterPos);\r
- }\r
- if( c->vLumFilterSize > 1 && c->vChrFilterSize > 1 )\r
- {\r
- CREATEBUF(c->cl_vLumFilter,CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,c->dstH*c->vLumFilterSize*sizeof(cl_short),c->vLumFilter);\r
- CREATEBUF(c->cl_vChrFilter,CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,c->chrDstH*c->vChrFilterSize*sizeof(cl_short),c->vChrFilter);\r
- }\r
- CREATEBUF(c->cl_vLumFilterPos,CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,c->dstH*sizeof(cl_int),c->vLumFilterPos);\r
- CREATEBUF(c->cl_vChrFilterPos,CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,c->chrDstH*sizeof(cl_int),c->vChrFilterPos);\r
- \r
- return 1;\r
+ cl_int status;\r
+ \r
+ if(!c->hyscale_fast || !c->hcscale_fast)\r
+ {\r
+ CREATEBUF(c->cl_hLumFilter,CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, c->dstW*c->hLumFilterSize*sizeof(cl_short),c->hLumFilter);\r
+ CREATEBUF(c->cl_hLumFilterPos,CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, c->dstW*sizeof(cl_int),c->hLumFilterPos);\r
+ CREATEBUF(c->cl_hChrFilter,CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, c->chrDstW*c->hChrFilterSize*sizeof(cl_short),c->hChrFilter);\r
+ CREATEBUF(c->cl_hChrFilterPos,CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, c->chrDstW*sizeof(cl_int),c->hChrFilterPos);\r
+ }\r
+ if( c->vLumFilterSize > 1 && c->vChrFilterSize > 1 )\r
+ {\r
+ CREATEBUF(c->cl_vLumFilter,CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,c->dstH*c->vLumFilterSize*sizeof(cl_short),c->vLumFilter);\r
+ CREATEBUF(c->cl_vChrFilter,CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,c->chrDstH*c->vChrFilterSize*sizeof(cl_short),c->vChrFilter);\r
+ }\r
+ CREATEBUF(c->cl_vLumFilterPos,CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,c->dstH*sizeof(cl_int),c->vLumFilterPos);\r
+ CREATEBUF(c->cl_vChrFilterPos,CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,c->chrDstH*sizeof(cl_int),c->vChrFilterPos);\r
+ \r
+ return 1;\r
}\r
\r
int av_scale_frame_func( void **userdata, KernelEnv *kenv )\r
{\r
- ScaleContext *c = (ScaleContext *)userdata[0];\r
-\r
- c->cl_src = (cl_mem)userdata[2];\r
- c->cl_dst = (cl_mem)userdata[1];\r
-\r
- /*frame size*/\r
- int *tmp = (int *)userdata[3];\r
- int srcStride = tmp[0];\r
- int srcChrStride = tmp[1];\r
- int srcW = c->srcW;\r
- int srcH = c->srcH;\r
- \r
- tmp = (int *)userdata[4];\r
- int dstStride = tmp[0];\r
- int dstChrStride = tmp[1];\r
- int dstW = c->dstW;\r
- int dstH = c->dstH;\r
- \r
- /* local variable */\r
- cl_int status;\r
- size_t global_work_size[2];\r
-\r
- int intermediaSize;\r
-\r
- int st = CreateCLBuffer(c,kenv);\r
+ ScaleContext *c = (ScaleContext *)userdata[0];\r
+\r
+ c->cl_src = (cl_mem)userdata[2];\r
+ c->cl_dst = (cl_mem)userdata[1];\r
+\r
+ /*frame size*/\r
+ int *tmp = (int *)userdata[3];\r
+ int srcStride = tmp[0];\r
+ int srcChrStride = tmp[1];\r
+ int srcW = c->srcW;\r
+ int srcH = c->srcH;\r
+ \r
+ tmp = (int *)userdata[4];\r
+ int dstStride = tmp[0];\r
+ int dstChrStride = tmp[1];\r
+ int dstW = c->dstW;\r
+ int dstH = c->dstH;\r
+ \r
+ /* local variable */\r
+ cl_int status;\r
+ size_t global_work_size[2];\r
+\r
+ int intermediaSize;\r
+\r
+ int st = CreateCLBuffer(c,kenv);\r
if( !st )\r
{\r
hb_log( "CreateBuffer[%s] faild %d", "scale_opencl",st );\r
return -1;\r
}\r
\r
- intermediaSize = dstStride * srcH + dstChrStride * srcH;\r
-\r
- CREATEBUF(c->cl_intermediaBuf,CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR,intermediaSize*sizeof(cl_short),NULL);\r
-\r
- static int init_chr_status = 0;\r
- static cl_kernel chr_kernel;\r
-\r
- if(init_chr_status == 0){\r
-\r
- if(!(c->flags & 1))\r
- {\r
- chr_kernel = clCreateKernel( kenv->program, "hscale_all_opencl", NULL );\r
- //Set the Kernel Argument;\r
- OCLCHECK(clSetKernelArg,chr_kernel,2,sizeof(cl_mem),(void*)&c->cl_hLumFilter);\r
- OCLCHECK(clSetKernelArg,chr_kernel,3,sizeof(cl_mem),(void*)&c->cl_hLumFilterPos);\r
- OCLCHECK(clSetKernelArg,chr_kernel,4,sizeof(int),(void*)&c->hLumFilterSize);\r
- OCLCHECK(clSetKernelArg,chr_kernel,5,sizeof(cl_mem),(void*)&c->cl_hChrFilter);\r
- OCLCHECK(clSetKernelArg,chr_kernel,6,sizeof(cl_mem),(void*)&c->cl_hChrFilterPos);\r
- OCLCHECK(clSetKernelArg,chr_kernel,7,sizeof(int),(void*)&c->hChrFilterSize);\r
- }\r
- \r
- /*Set the arguments*/\r
- OCLCHECK(clSetKernelArg,chr_kernel,8,sizeof(dstW),(void*)&dstW);\r
- OCLCHECK(clSetKernelArg,chr_kernel,9,sizeof(srcH),(void*)&srcH);\r
- OCLCHECK(clSetKernelArg,chr_kernel,10,sizeof(srcW),(void*)&srcW);\r
- OCLCHECK(clSetKernelArg,chr_kernel,11,sizeof(srcH),(void*)&srcH);\r
- OCLCHECK(clSetKernelArg,chr_kernel,12,sizeof(dstStride),(void*)&dstStride);\r
- OCLCHECK(clSetKernelArg,chr_kernel,13,sizeof(dstChrStride),(void*)&dstChrStride);\r
- OCLCHECK(clSetKernelArg,chr_kernel,14,sizeof(srcStride),(void*)&srcStride);\r
- OCLCHECK(clSetKernelArg,chr_kernel,15,sizeof(srcChrStride),(void*)&srcChrStride);\r
- init_chr_status = 1;\r
- }\r
-\r
- kenv->kernel = chr_kernel;\r
- OCLCHECK(clSetKernelArg,chr_kernel,0,sizeof(cl_mem),(void*)&c->cl_intermediaBuf);\r
- OCLCHECK(clSetKernelArg,chr_kernel,1,sizeof(cl_mem),(void*)&c->cl_src);\r
- /*Run the Kernel*/\r
- global_work_size[0] = c->chrDstW;//dstW >> 1; //must times 256;\r
- global_work_size[1] = c->chrSrcH;\r
-\r
- OCLCHECK(clEnqueueNDRangeKernel,kenv->command_queue, kenv->kernel, 2, NULL, global_work_size, NULL, 0, NULL, NULL);\r
-\r
- static int init_lum_status = 0;\r
- static cl_kernel lum_kernel;\r
-\r
- if( init_lum_status == 0 ){\r
- //Vertical:\r
- /*Create Kernel*/\r
- if( c->vLumFilterSize > 1 && c->vChrFilterSize > 1 )\r
- lum_kernel = clCreateKernel( kenv->program, "vscale_all_nodither_opencl", NULL );\r
- else\r
- lum_kernel = clCreateKernel( kenv->program, "vscale_fast_opencl", NULL );\r
-\r
- if( c->vLumFilterSize > 1 && c->vChrFilterSize > 1 )\r
- {\r
- OCLCHECK(clSetKernelArg,lum_kernel,2,sizeof(cl_mem),(void*)&c->cl_vLumFilter);\r
- OCLCHECK(clSetKernelArg,lum_kernel,3,sizeof(int),(void*)&c->vLumFilterSize);\r
- OCLCHECK(clSetKernelArg,lum_kernel,4,sizeof(cl_mem),(void*)&c->cl_vChrFilter);\r
- OCLCHECK(clSetKernelArg,lum_kernel,5,sizeof(int),(void*)&c->vChrFilterSize);\r
- OCLCHECK(clSetKernelArg,lum_kernel,6,sizeof(cl_mem),(void*)&c->cl_vLumFilterPos);\r
- OCLCHECK(clSetKernelArg,lum_kernel,7,sizeof(cl_mem),(void*)&c->cl_vChrFilterPos);\r
- OCLCHECK(clSetKernelArg,lum_kernel,8,sizeof(dstW),(void*)&dstW);\r
- OCLCHECK(clSetKernelArg,lum_kernel,9,sizeof(dstH),(void*)&dstH);\r
- OCLCHECK(clSetKernelArg,lum_kernel,10,sizeof(srcW),(void*)&srcW);\r
- OCLCHECK(clSetKernelArg,lum_kernel,11,sizeof(srcH),(void*)&srcH);\r
- OCLCHECK(clSetKernelArg,lum_kernel,12,sizeof(dstStride),(void*)&dstStride);\r
- OCLCHECK(clSetKernelArg,lum_kernel,13,sizeof(dstChrStride),(void*)&dstChrStride);\r
- OCLCHECK(clSetKernelArg,lum_kernel,14,sizeof(dstStride),(void*)&dstStride);\r
- OCLCHECK(clSetKernelArg,lum_kernel,15,sizeof(dstChrStride),(void*)&dstChrStride);\r
- }else{\r
- \r
- OCLCHECK(clSetKernelArg,lum_kernel,2,sizeof(cl_mem),(void*)&c->cl_vLumFilterPos);\r
- OCLCHECK(clSetKernelArg,lum_kernel,3,sizeof(cl_mem),(void*)&c->cl_vChrFilterPos);\r
- OCLCHECK(clSetKernelArg,lum_kernel,4,sizeof(dstW),(void*)&dstW);\r
- OCLCHECK(clSetKernelArg,lum_kernel,5,sizeof(dstH),(void*)&dstH);\r
- OCLCHECK(clSetKernelArg,lum_kernel,6,sizeof(srcW),(void*)&srcW);\r
- OCLCHECK(clSetKernelArg,lum_kernel,7,sizeof(srcH),(void*)&srcH);\r
- OCLCHECK(clSetKernelArg,lum_kernel,8,sizeof(dstStride),(void*)&dstStride);\r
- OCLCHECK(clSetKernelArg,lum_kernel,9,sizeof(dstChrStride),(void*)&dstChrStride);\r
- OCLCHECK(clSetKernelArg,lum_kernel,10,sizeof(dstStride),(void*)&dstStride);\r
- OCLCHECK(clSetKernelArg,lum_kernel,11,sizeof(dstChrStride),(void*)&dstChrStride);\r
- }\r
- init_lum_status = 1;\r
- }\r
- \r
- kenv->kernel = lum_kernel;\r
- OCLCHECK(clSetKernelArg,kenv->kernel,0,sizeof(cl_mem),(void*)&c->cl_dst);\r
- OCLCHECK(clSetKernelArg,kenv->kernel,1,sizeof(cl_mem),(void*)&c->cl_intermediaBuf);\r
- \r
- /*Run the Kernel*/\r
- global_work_size[0] = c->chrDstW;\r
- global_work_size[1] = c->chrDstH;\r
-\r
- OCLCHECK(clEnqueueNDRangeKernel,kenv->command_queue,kenv->kernel,2,NULL,global_work_size,NULL,0,NULL,NULL);\r
-\r
- clReleaseMemObject( c->cl_intermediaBuf );\r
- \r
- return 1;\r
+ intermediaSize = dstStride * srcH + dstChrStride * srcH;\r
+\r
+ CREATEBUF(c->cl_intermediaBuf,CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR,intermediaSize*sizeof(cl_short),NULL);\r
+\r
+ static int init_chr_status = 0;\r
+ static cl_kernel chr_kernel;\r
+\r
+ if(init_chr_status == 0){\r
+\r
+ if(!(c->flags & 1))\r
+ {\r
+ chr_kernel = clCreateKernel( kenv->program, "hscale_all_opencl", NULL );\r
+ //Set the Kernel Argument;\r
+ OCLCHECK(clSetKernelArg,chr_kernel,2,sizeof(cl_mem),(void*)&c->cl_hLumFilter);\r
+ OCLCHECK(clSetKernelArg,chr_kernel,3,sizeof(cl_mem),(void*)&c->cl_hLumFilterPos);\r
+ OCLCHECK(clSetKernelArg,chr_kernel,4,sizeof(int),(void*)&c->hLumFilterSize);\r
+ OCLCHECK(clSetKernelArg,chr_kernel,5,sizeof(cl_mem),(void*)&c->cl_hChrFilter);\r
+ OCLCHECK(clSetKernelArg,chr_kernel,6,sizeof(cl_mem),(void*)&c->cl_hChrFilterPos);\r
+ OCLCHECK(clSetKernelArg,chr_kernel,7,sizeof(int),(void*)&c->hChrFilterSize);\r
+ }\r
+ \r
+ /*Set the arguments*/\r
+ OCLCHECK(clSetKernelArg,chr_kernel,8,sizeof(dstW),(void*)&dstW);\r
+ OCLCHECK(clSetKernelArg,chr_kernel,9,sizeof(srcH),(void*)&srcH);\r
+ OCLCHECK(clSetKernelArg,chr_kernel,10,sizeof(srcW),(void*)&srcW);\r
+ OCLCHECK(clSetKernelArg,chr_kernel,11,sizeof(srcH),(void*)&srcH);\r
+ OCLCHECK(clSetKernelArg,chr_kernel,12,sizeof(dstStride),(void*)&dstStride);\r
+ OCLCHECK(clSetKernelArg,chr_kernel,13,sizeof(dstChrStride),(void*)&dstChrStride);\r
+ OCLCHECK(clSetKernelArg,chr_kernel,14,sizeof(srcStride),(void*)&srcStride);\r
+ OCLCHECK(clSetKernelArg,chr_kernel,15,sizeof(srcChrStride),(void*)&srcChrStride);\r
+ init_chr_status = 1;\r
+ }\r
+\r
+ kenv->kernel = chr_kernel;\r
+ OCLCHECK(clSetKernelArg,chr_kernel,0,sizeof(cl_mem),(void*)&c->cl_intermediaBuf);\r
+ OCLCHECK(clSetKernelArg,chr_kernel,1,sizeof(cl_mem),(void*)&c->cl_src);\r
+ /*Run the Kernel*/\r
+ global_work_size[0] = c->chrDstW;//dstW >> 1; //must times 256;\r
+ global_work_size[1] = c->chrSrcH;\r
+\r
+ OCLCHECK(clEnqueueNDRangeKernel,kenv->command_queue, kenv->kernel, 2, NULL, global_work_size, NULL, 0, NULL, NULL);\r
+\r
+ static int init_lum_status = 0;\r
+ static cl_kernel lum_kernel;\r
+\r
+ if( init_lum_status == 0 ){\r
+ //Vertical:\r
+ /*Create Kernel*/\r
+ if( c->vLumFilterSize > 1 && c->vChrFilterSize > 1 )\r
+ lum_kernel = clCreateKernel( kenv->program, "vscale_all_nodither_opencl", NULL );\r
+ else\r
+ lum_kernel = clCreateKernel( kenv->program, "vscale_fast_opencl", NULL );\r
+\r
+ if( c->vLumFilterSize > 1 && c->vChrFilterSize > 1 )\r
+ {\r
+ OCLCHECK(clSetKernelArg,lum_kernel,2,sizeof(cl_mem),(void*)&c->cl_vLumFilter);\r
+ OCLCHECK(clSetKernelArg,lum_kernel,3,sizeof(int),(void*)&c->vLumFilterSize);\r
+ OCLCHECK(clSetKernelArg,lum_kernel,4,sizeof(cl_mem),(void*)&c->cl_vChrFilter);\r
+ OCLCHECK(clSetKernelArg,lum_kernel,5,sizeof(int),(void*)&c->vChrFilterSize);\r
+ OCLCHECK(clSetKernelArg,lum_kernel,6,sizeof(cl_mem),(void*)&c->cl_vLumFilterPos);\r
+ OCLCHECK(clSetKernelArg,lum_kernel,7,sizeof(cl_mem),(void*)&c->cl_vChrFilterPos);\r
+ OCLCHECK(clSetKernelArg,lum_kernel,8,sizeof(dstW),(void*)&dstW);\r
+ OCLCHECK(clSetKernelArg,lum_kernel,9,sizeof(dstH),(void*)&dstH);\r
+ OCLCHECK(clSetKernelArg,lum_kernel,10,sizeof(srcW),(void*)&srcW);\r
+ OCLCHECK(clSetKernelArg,lum_kernel,11,sizeof(srcH),(void*)&srcH);\r
+ OCLCHECK(clSetKernelArg,lum_kernel,12,sizeof(dstStride),(void*)&dstStride);\r
+ OCLCHECK(clSetKernelArg,lum_kernel,13,sizeof(dstChrStride),(void*)&dstChrStride);\r
+ OCLCHECK(clSetKernelArg,lum_kernel,14,sizeof(dstStride),(void*)&dstStride);\r
+ OCLCHECK(clSetKernelArg,lum_kernel,15,sizeof(dstChrStride),(void*)&dstChrStride);\r
+ }else{\r
+ \r
+ OCLCHECK(clSetKernelArg,lum_kernel,2,sizeof(cl_mem),(void*)&c->cl_vLumFilterPos);\r
+ OCLCHECK(clSetKernelArg,lum_kernel,3,sizeof(cl_mem),(void*)&c->cl_vChrFilterPos);\r
+ OCLCHECK(clSetKernelArg,lum_kernel,4,sizeof(dstW),(void*)&dstW);\r
+ OCLCHECK(clSetKernelArg,lum_kernel,5,sizeof(dstH),(void*)&dstH);\r
+ OCLCHECK(clSetKernelArg,lum_kernel,6,sizeof(srcW),(void*)&srcW);\r
+ OCLCHECK(clSetKernelArg,lum_kernel,7,sizeof(srcH),(void*)&srcH);\r
+ OCLCHECK(clSetKernelArg,lum_kernel,8,sizeof(dstStride),(void*)&dstStride);\r
+ OCLCHECK(clSetKernelArg,lum_kernel,9,sizeof(dstChrStride),(void*)&dstChrStride);\r
+ OCLCHECK(clSetKernelArg,lum_kernel,10,sizeof(dstStride),(void*)&dstStride);\r
+ OCLCHECK(clSetKernelArg,lum_kernel,11,sizeof(dstChrStride),(void*)&dstChrStride);\r
+ }\r
+ init_lum_status = 1;\r
+ }\r
+ \r
+ kenv->kernel = lum_kernel;\r
+ OCLCHECK(clSetKernelArg,kenv->kernel,0,sizeof(cl_mem),(void*)&c->cl_dst);\r
+ OCLCHECK(clSetKernelArg,kenv->kernel,1,sizeof(cl_mem),(void*)&c->cl_intermediaBuf);\r
+ \r
+ /*Run the Kernel*/\r
+ global_work_size[0] = c->chrDstW;\r
+ global_work_size[1] = c->chrDstH;\r
+\r
+ OCLCHECK(clEnqueueNDRangeKernel,kenv->command_queue,kenv->kernel,2,NULL,global_work_size,NULL,0,NULL,NULL);\r
+\r
+ clReleaseMemObject( c->cl_intermediaBuf );\r
+ \r
+ return 1;\r
}\r
\r
void av_scale_frame(ScaleContext *c,\r
- void *dst,\r
- void *src,\r
- int *srcStride,\r
- int *dstStride,\r
- int *should_dither)\r
+ void *dst,\r
+ void *src,\r
+ int *srcStride,\r
+ int *dstStride,\r
+ int *should_dither)\r
{\r
- \r
- static int regflg = 0;\r
- void *userdata[CL_PARAM_NUM];\r
- userdata[0] = (void *)c;\r
- userdata[1] = (void *)dst;\r
- userdata[2] = (void *)src;\r
- userdata[3] = (void *)srcStride;\r
- userdata[4] = (void *)dstStride;\r
- userdata[5] = (void *)should_dither;\r
-\r
- if( regflg==0 )\r
+ \r
+ static int regflg = 0;\r
+ void *userdata[CL_PARAM_NUM];\r
+ userdata[0] = (void *)c;\r
+ userdata[1] = (void *)dst;\r
+ userdata[2] = (void *)src;\r
+ userdata[3] = (void *)srcStride;\r
+ userdata[4] = (void *)dstStride;\r
+ userdata[5] = (void *)should_dither;\r
+\r
+ if( regflg==0 )\r
{\r
int st = hb_register_kernel_wrapper( "scale_opencl", av_scale_frame_func);\r
if( !st )\r
regflg++;\r
}\r
\r
- if( !hb_run_kernel( "scale_opencl", userdata ))\r
- {\r
- hb_log("run kernel function[%s] faild", "scale_opencl_func" );\r
- return;\r
- } \r
+ if( !hb_run_kernel( "scale_opencl", userdata ))\r
+ {\r
+ hb_log("run kernel function[%s] faild", "scale_opencl_func" );\r
+ return;\r
+ } \r
}\r
\r
#endif\r
{
T_FilterLink fl = {0};
int STEP = srcwidth * srcheight * 3 / 2;
- int OUTSTEP = dstwidth * dstheight * 3 / 2;
+ int OUTSTEP = dstwidth * dstheight * 3 / 2;
int HEIGHT = srcheight;
int LINESIZEY = srcwidth;
int LINESIZEUV = srcwidth / 2;
scale_run( src, fl.cl_outbuf, LINESIZEY, LINESIZEUV, HEIGHT );
- hb_read_opencl_buffer( fl.cl_outbuf, dst, OUTSTEP );
- CL_FREE( cl_outbuf );
+ hb_read_opencl_buffer( fl.cl_outbuf, dst, OUTSTEP );
+ CL_FREE( cl_outbuf );
- return;
+ return;
}
#endif
/**
{
hb_ocl_nv12toyuv( plane, lock.Pitch, dxva2->width, dxva2->height, crop, dxva2 );
- static int init_flag = 0;
- if(init_flag == 0){
- scale_init( dxva2->width - crop[2] - crop[3], dxva2->height - crop[0] - crop[1], job_w, job_h );
- init_flag = 1;
- }
+ static int init_flag = 0;
+ if(init_flag == 0){
+ scale_init( dxva2->width - crop[2] - crop[3], dxva2->height - crop[0] - crop[1], job_w, job_h );
+ init_flag = 1;
+ }
- hb_init_filter( dxva2->cl_mem_yuv, dxva2->width - crop[2] - crop[3], dxva2->height - crop[0] - crop[1], dst, job_w, job_h, crop );
- }
+ hb_init_filter( dxva2->cl_mem_yuv, dxva2->width - crop[2] - crop[3], dxva2->height - crop[0] - crop[1], dst, job_w, job_h, crop );
+ }
else
#endif
{
char* hb_get_pix_fmt_name( int pix_fmt )
{
- static const char *ppsz_name[AV_PIX_FMT_NB] =
+ static const char *ppsz_name[AV_PIX_FMT_NB] =
{
[AV_PIX_FMT_VDPAU_H264] = "AV_PIX_FMT_VDPAU_H264",
[AV_PIX_FMT_VAAPI_IDCT] = "AV_PIX_FMT_VAAPI_IDCT",
[AV_PIX_FMT_YUV420P] = "AV_PIX_FMT_YUV420P",
};
- return ppsz_name[pix_fmt];
+ return ppsz_name[pix_fmt];
}
enum PixelFormat hb_ffmpeg_get_format( AVCodecContext *p_context, const enum PixelFormat *pi_fmt )