From: John Stebbins Date: Mon, 12 Jun 2017 17:21:46 +0000 (-0700) Subject: opencl: removit it (#777) X-Git-Tag: 1.1.0~528 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=e8e6af1ec69651c540fd04ae5417c77a8dc1c9ae;p=handbrake opencl: removit it (#777) It was only used for scaling, it fails far too often and is only faster on a limited selectoin of hardware. --- diff --git a/libhb/common.h b/libhb/common.h index 6aac0bacd..04a969221 100644 --- a/libhb/common.h +++ b/libhb/common.h @@ -626,7 +626,6 @@ struct hb_job_s uint32_t frames_to_skip; // decode but discard this many frames // initially (for frame accurate positioning // to non-I frames). - int use_opencl; PRIVATE int use_decomb; PRIVATE int use_detelecine; @@ -1022,9 +1021,6 @@ struct hb_title_s #define HBTF_NO_IDR (1 << 0) #define HBTF_SCAN_COMPLETE (1 << 1) #define HBTF_RAW_VIDEO (1 << 2) - - // whether OpenCL scaling is supported for this source - int opencl_support; }; // Update win/CS/HandBrake.Interop/HandBrakeInterop/HbLib/hb_state_s.cs when changing this struct diff --git a/libhb/cropscale.c b/libhb/cropscale.c index b64154377..02e59eb29 100644 --- a/libhb/cropscale.c +++ b/libhb/cropscale.c @@ -10,7 +10,6 @@ #include "hb.h" #include "hbffmpeg.h" #include "common.h" -#include "opencl.h" struct hb_filter_private_s { @@ -23,9 +22,6 @@ struct hb_filter_private_s int height_out; int crop[4]; - /* OpenCL */ - hb_oclscale_t *os; //ocl scaler handler - struct SwsContext * context; }; @@ -72,13 +68,6 @@ static int hb_crop_scale_init( hb_filter_object_t * filter, pv->width_out = init->geometry.width - (init->crop[2] + init->crop[3]); pv->height_out = init->geometry.height - (init->crop[0] + init->crop[1]); - /* OpenCL */ - if (pv->job->use_opencl && pv->job->title->opencl_support) - { - pv->os = ( hb_oclscale_t * )malloc( sizeof( hb_oclscale_t ) ); - memset( pv->os, 0, sizeof( hb_oclscale_t ) ); - } - memcpy( pv->crop, init->crop, sizeof( int[4] ) ); hb_dict_extract_int(&pv->width_out, filter->settings, "width"); hb_dict_extract_int(&pv->height_out, filter->settings, "height"); @@ -135,21 +124,6 @@ static void hb_crop_scale_close( hb_filter_object_t * filter ) return; } - /* OpenCL */ - if (pv->job->use_opencl && pv->job->title->opencl_support && pv->os) - { - if (hb_ocl != NULL) - { - HB_OCL_BUF_FREE(hb_ocl, pv->os->bicubic_x_weights); - HB_OCL_BUF_FREE(hb_ocl, pv->os->bicubic_y_weights); - if (pv->os->initialized == 1) - { - hb_ocl->clReleaseKernel(pv->os->m_kernel); - } - } - free(pv->os); - } - if( pv->context ) { sws_freeContext( pv->context ); @@ -159,7 +133,6 @@ static void hb_crop_scale_close( hb_filter_object_t * filter ) filter->private_data = NULL; } -/* OpenCL */ static hb_buffer_t* crop_scale( hb_filter_private_t * pv, hb_buffer_t * in ) { hb_buffer_t * out; @@ -173,51 +146,40 @@ static hb_buffer_t* crop_scale( hb_filter_private_t * pv, hb_buffer_t * in ) // correct place for cropped frame hb_picture_crop(crop_data, crop_stride, in, pv->crop[0], pv->crop[2]); - // Use bicubic OpenCL scaling when selected and when downsampling < 4:1; - if ((pv->job->use_opencl && pv->job->title->opencl_support) && - (pv->width_out * 4 > pv->width_in) && - (in->cl.buffer != NULL) && (out->cl.buffer != NULL)) + if (pv->context == NULL || + pv->width_in != in->f.width || + pv->height_in != in->f.height || + pv->pix_fmt != in->f.fmt) { - /* OpenCL */ - hb_ocl_scale(in, out, pv->crop, pv->os); - } - else - { - if (pv->context == NULL || - pv->width_in != in->f.width || - pv->height_in != in->f.height || - pv->pix_fmt != in->f.fmt) + // Something changed, need a new scaling context. + if (pv->context != NULL) { - // Something changed, need a new scaling context. - if (pv->context != NULL) - { - sws_freeContext(pv->context); - } - - pv->context = hb_sws_get_context( - in->f.width - (pv->crop[2] + pv->crop[3]), - in->f.height - (pv->crop[0] + pv->crop[1]), - in->f.fmt, out->f.width, out->f.height, - out->f.fmt, SWS_LANCZOS|SWS_ACCURATE_RND, - hb_ff_get_colorspace(pv->job->title->color_matrix)); - pv->width_in = in->f.width; - pv->height_in = in->f.height; - pv->pix_fmt = in->f.fmt; + sws_freeContext(pv->context); } - if (pv->context == NULL) - { - hb_buffer_close(&out); - return NULL; - } + pv->context = hb_sws_get_context( + in->f.width - (pv->crop[2] + pv->crop[3]), + in->f.height - (pv->crop[0] + pv->crop[1]), + in->f.fmt, out->f.width, out->f.height, + out->f.fmt, SWS_LANCZOS|SWS_ACCURATE_RND, + hb_ff_get_colorspace(pv->job->title->color_matrix)); + pv->width_in = in->f.width; + pv->height_in = in->f.height; + pv->pix_fmt = in->f.fmt; + } - // Scale crop into out according to the context set up above - sws_scale(pv->context, - (const uint8_t* const*)crop_data, crop_stride, - 0, in->f.height - (pv->crop[0] + pv->crop[1]), - out_data, out_stride); + if (pv->context == NULL) + { + hb_buffer_close(&out); + return NULL; } + // Scale crop into out according to the context set up above + sws_scale(pv->context, + (const uint8_t* const*)crop_data, crop_stride, + 0, in->f.height - (pv->crop[0] + pv->crop[1]), + out_data, out_stride); + out->s = in->s; return out; } diff --git a/libhb/fifo.c b/libhb/fifo.c index 4308d7d63..798179415 100644 --- a/libhb/fifo.c +++ b/libhb/fifo.c @@ -8,7 +8,6 @@ */ #include "hb.h" -#include "openclwrapper.h" #ifdef USE_QSV #include "qsv_libav.h" #endif @@ -292,20 +291,7 @@ void hb_buffer_pool_free( void ) if( b->data ) { freed += b->alloc; - - if (b->cl.buffer != NULL) - { - /* OpenCL */ - if (hb_cl_free_mapped_buffer(b->cl.buffer, b->data) == 0) - { - hb_log("hb_buffer_pool_free: bad free: %p -> buffer %p map %p", - b, b->cl.buffer, b->data); - } - } - else - { - free(b->data); - } + free(b->data); } free( b ); count++; @@ -350,7 +336,7 @@ static hb_fifo_t *size_to_pool( int size ) return NULL; } -hb_buffer_t * hb_buffer_init_internal( int size , int needsMapped ) +hb_buffer_t * hb_buffer_init_internal( int size ) { hb_buffer_t * b; // Certain libraries (hrm ffmpeg) expect buffers passed to them to @@ -365,20 +351,6 @@ hb_buffer_t * hb_buffer_init_internal( int size , int needsMapped ) { b = hb_fifo_get( buffer_pool ); - /* OpenCL */ - if (b != NULL && needsMapped && b->cl.buffer == NULL) - { - // We need a mapped OpenCL buffer and that is not - // what we got out of the pool. - // Ditch it; it will get replaced with what we need. - if (b->data != NULL) - { - free(b->data); - } - free(b); - b = NULL; - } - if( b ) { /* @@ -387,11 +359,6 @@ hb_buffer_t * hb_buffer_init_internal( int size , int needsMapped ) */ uint8_t *data = b->data; - /* OpenCL */ - cl_mem buffer = b->cl.buffer; - cl_event last_event = b->cl.last_event; - int loc = b->cl.buffer_location; - memset( b, 0, sizeof(hb_buffer_t) ); b->alloc = buffer_pool->buffer_size; b->size = size; @@ -401,11 +368,6 @@ hb_buffer_t * hb_buffer_init_internal( int size , int needsMapped ) b->s.renderOffset = AV_NOPTS_VALUE; b->s.scr_sequence = -1; - /* OpenCL */ - b->cl.buffer = buffer; - b->cl.last_event = last_event; - b->cl.buffer_location = loc; - #if defined(HB_BUFFER_DEBUG) hb_lock(buffers.lock); hb_list_add(buffers.alloc_list, b); @@ -429,34 +391,14 @@ hb_buffer_t * hb_buffer_init_internal( int size , int needsMapped ) if (size) { - /* OpenCL */ - b->cl.last_event = NULL; - b->cl.buffer_location = HOST; - - /* OpenCL */ - if (needsMapped) - { - int status = hb_cl_create_mapped_buffer(&b->cl.buffer, &b->data, b->alloc); - if (!status) - { - hb_error("Failed to map CL buffer"); - free(b); - return NULL; - } - } - else - { - b->cl.buffer = NULL; - #if defined( SYS_DARWIN ) || defined( SYS_FREEBSD ) || defined( SYS_MINGW ) - b->data = malloc( b->alloc ); + b->data = malloc( b->alloc ); #elif defined( SYS_CYGWIN ) - /* FIXME */ - b->data = malloc( b->alloc + 17 ); + /* FIXME */ + b->data = malloc( b->alloc + 17 ); #else - b->data = memalign( 16, b->alloc ); + b->data = memalign( 16, b->alloc ); #endif - } if( !b->data ) { @@ -485,7 +427,7 @@ hb_buffer_t * hb_buffer_init_internal( int size , int needsMapped ) hb_buffer_t * hb_buffer_init( int size ) { - return hb_buffer_init_internal(size, 0); + return hb_buffer_init_internal(size); } hb_buffer_t * hb_buffer_eof_init(void) @@ -637,8 +579,7 @@ hb_buffer_t * hb_frame_buffer_init( int pix_fmt, int width, int height ) } } - /* OpenCL */ - buf = hb_buffer_init_internal(size , hb_use_buffers()); + buf = hb_buffer_init_internal(size); if( buf == NULL ) return NULL; @@ -697,21 +638,11 @@ void hb_buffer_swap_copy( hb_buffer_t *src, hb_buffer_t *dst ) int size = dst->size; int alloc = dst->alloc; - /* OpenCL */ - cl_mem buffer = dst->cl.buffer; - cl_event last_event = dst->cl.last_event; - int loc = dst->cl.buffer_location; - *dst = *src; src->data = data; src->size = size; src->alloc = alloc; - - /* OpenCL */ - src->cl.buffer = buffer; - src->cl.last_event = last_event; - src->cl.buffer_location = loc; } // Frees the specified buffer list. @@ -771,19 +702,7 @@ void hb_buffer_close( hb_buffer_t ** _b ) // free the buf if( b->data ) { - if (b->cl.buffer != NULL) - { - /* OpenCL */ - if (hb_cl_free_mapped_buffer(b->cl.buffer, b->data) == 0) - { - hb_log("hb_buffer_close: bad free %p -> buffer %p map %p", - b, b->cl.buffer, b->data); - } - } - else - { - free(b->data); - } + free(b->data); hb_lock(buffers.lock); buffers.allocated -= b->alloc; hb_unlock(buffers.lock); diff --git a/libhb/hb.c b/libhb/hb.c index a680df91f..3df3ec3ac 100644 --- a/libhb/hb.c +++ b/libhb/hb.c @@ -8,7 +8,6 @@ */ #include "hb.h" -#include "opencl.h" #include "hbffmpeg.h" #include "encx264.h" #include "libavfilter/avfilter.h" @@ -65,8 +64,6 @@ struct hb_handle_s // power management opaque pointer void * system_sleep_opaque; - - int enable_opencl; }; hb_work_object_t * hb_objects = NULL; @@ -143,11 +140,6 @@ int hb_avcodec_open(AVCodecContext *avctx, AVCodec *codec, return ret; } -int hb_get_opencl_enabled(hb_handle_t *h) -{ - return h->enable_opencl; -} - int hb_avcodec_close(AVCodecContext *avctx) { int ret; @@ -416,14 +408,6 @@ void hb_log_level_set(hb_handle_t *h, int level) global_verbosity_level = level; } -/* - * Enable or disable support for OpenCL detection. - */ -void hb_opencl_set_enable(hb_handle_t *h, int enable_opencl) -{ - h->enable_opencl = enable_opencl; -} - /** * libhb initialization routine. * @param verbose HB_DEBUG_NONE or HB_DEBUG_ALL. @@ -642,12 +626,6 @@ void hb_scan( hb_handle_t * h, const char * path, int title_index, } hb_log(" - logical processor count: %d", hb_get_cpu_count()); - /* Print OpenCL info here so that it's in all scan and encode logs */ - if (hb_get_opencl_enabled(h)) - { - hb_opencl_info_print(); - } - #ifdef USE_QSV /* Print QSV info here so that it's in all scan and encode logs */ hb_qsv_info_print(); @@ -1947,9 +1925,6 @@ void hb_global_close() hb_presets_free(); - /* OpenCL library (dynamically loaded) */ - hb_ocl_close(); - /* Find and remove temp folder */ memset( dirname, 0, 1024 ); hb_get_temporary_directory( dirname ); diff --git a/libhb/hb.h b/libhb/hb.h index fd8061431..f6d7eb5f4 100644 --- a/libhb/hb.h +++ b/libhb/hb.h @@ -32,7 +32,6 @@ void hb_register( hb_work_object_t * ); void hb_register_logger( void (*log_cb)(const char* message) ); hb_handle_t * hb_init( int verbose ); void hb_log_level_set(hb_handle_t *h, int level); -void hb_opencl_set_enable(hb_handle_t *h, int enable_opencl); /* hb_get_version() */ const char * hb_get_full_description(); @@ -49,8 +48,6 @@ int hb_check_update( hb_handle_t * h, char ** version ); char * hb_dvd_name( char * path ); void hb_dvd_set_dvdnav( int enable ); -int hb_get_opencl_enabled(hb_handle_t *h); - /* hb_scan() Scan the specified path. Can be a DVD device, a VIDEO_TS folder or a VOB file. If title_index is 0, scan all titles. */ diff --git a/libhb/hb_json.c b/libhb/hb_json.c index 32975785d..3d42078f7 100644 --- a/libhb/hb_json.c +++ b/libhb/hb_json.c @@ -396,8 +396,8 @@ hb_dict_t* hb_job_to_dict( const hb_job_t * job ) "s:{s:o, s:o, s:o,}," // PAR {Num, Den} "s:{s:o, s:o}," - // Video {Encoder, OpenCL, QSV {Decode, AsyncDepth}} - "s:{s:o, s:o, s:{s:o, s:o}}," + // Video {Encoder, QSV {Decode, AsyncDepth}} + "s:{s:o, s:{s:o, s:o}}," // Audio {CopyMask, FallbackEncoder, AudioList []} "s:{s:[], s:o, s:[]}," // Subtitles {Search {Enable, Forced, Default, Burn}, SubtitleList []} @@ -421,7 +421,6 @@ hb_dict_t* hb_job_to_dict( const hb_job_t * job ) "Den", hb_value_int(job->par.den), "Video", "Encoder", hb_value_int(job->vcodec), - "OpenCL", hb_value_bool(job->use_opencl), "QSV", "Decode", hb_value_bool(job->qsv.decode), "AsyncDepth", hb_value_int(job->qsv.async_depth), @@ -860,10 +859,10 @@ hb_job_t* hb_dict_to_job( hb_handle_t * h, hb_dict_t *dict ) "s?{s:i, s:i}," // Video {Codec, Quality, Bitrate, Preset, Tune, Profile, Level, Options // TwoPass, Turbo, ColorMatrixCode, - // OpenCL, QSV {Decode, AsyncDepth}} + // QSV {Decode, AsyncDepth}} "s:{s:o, s?f, s?i, s?s, s?s, s?s, s?s, s?s," " s?b, s?b, s?i," - " s?b, s?{s?b, s?i}}," + " s?{s?b, s?i}}," // Audio {CopyMask, FallbackEncoder, AudioList} "s?{s?o, s?o, s?o}," // Subtitle {Search {Enable, Forced, Default, Burn}, SubtitleList} @@ -905,7 +904,6 @@ hb_job_t* hb_dict_to_job( hb_handle_t * h, hb_dict_t *dict ) "TwoPass", unpack_b(&job->twopass), "Turbo", unpack_b(&job->fastfirstpass), "ColorMatrixCode", unpack_i(&job->color_matrix_code), - "OpenCL", unpack_b(&job->use_opencl), "QSV", "Decode", unpack_b(&job->qsv.decode), "AsyncDepth", unpack_i(&job->qsv.async_depth), diff --git a/libhb/internal.h b/libhb/internal.h index 5d462f37f..487304383 100644 --- a/libhb/internal.h +++ b/libhb/internal.h @@ -146,14 +146,6 @@ struct hb_buffer_s } qsv_details; #endif - /* OpenCL */ - struct cl_data - { - cl_mem buffer; - cl_event last_event; - enum { HOST, DEVICE } buffer_location; - } cl; - // libav may attach AV_PKT_DATA_PALETTE side data to some AVPackets // Store this data here when read and pass to decoder. hb_buffer_t * palette; diff --git a/libhb/oclscale.c b/libhb/oclscale.c deleted file mode 100644 index 936bd7c4d..000000000 --- a/libhb/oclscale.c +++ /dev/null @@ -1,302 +0,0 @@ -/* oclscale.c - - Copyright (c) 2003-2017 HandBrake Team - This file is part of the HandBrake source code - Homepage: . - It may be used under the terms of the GNU General Public License v2. - For full terms see the file COPYING file or visit http://www.gnu.org/licenses/gpl-2.0.html - - Authors: Peng Gao - Li Cao - - */ - -#include -#include "common.h" -#include "opencl.h" -#include "openclwrapper.h" -#define FILTER_LEN 4 - -#define _A -0.5f - -cl_float cubic(cl_float x) -{ - if (x < 0) - x = -x; - - if (x < 1) - return (_A + 2.0f) * (x * x * x) - (_A + 3.0f) * (x * x) + 0 + 1; - else if (x < 2) - return (_A) * (x * x * x) - (5.0f * _A) * (x * x) + (8.0f * _A) * x - (4.0f * _A); - else - return 0; -} - - -cl_float *hb_bicubic_weights(cl_float scale, int length) -{ - cl_float *weights = (cl_float*) malloc(length * sizeof(cl_float) * 4); - - int i; // C rocks - cl_float *out = weights; - for (i = 0; i < length; ++i) - { - cl_float x = i / scale; - cl_float dx = x - (int)x; - *out++ = cubic(-dx - 1.0f); - *out++ = cubic(-dx); - *out++ = cubic(-dx + 1.0f); - *out++ = cubic(-dx + 2.0f); - } - return weights; -} - -int setupScaleWeights(cl_float xscale, cl_float yscale, int width, int height, hb_oclscale_t *os, KernelEnv *kenv); - -/** -* executive scale using opencl -* get filter args -* create output buffer -* create horizontal filter buffer -* create vertical filter buffer -* create kernels -*/ -int hb_ocl_scale_func( void **data, KernelEnv *kenv ) -{ - cl_int status; - - cl_mem in_buf = data[0]; - cl_mem out_buf = data[1]; - int crop_top = (intptr_t)data[2]; - int crop_bottom = (intptr_t)data[3]; - int crop_left = (intptr_t)data[4]; - int crop_right = (intptr_t)data[5]; - cl_int in_frame_w = (intptr_t)data[6]; - cl_int in_frame_h = (intptr_t)data[7]; - cl_int out_frame_w = (intptr_t)data[8]; - cl_int out_frame_h = (intptr_t)data[9]; - hb_oclscale_t *os = data[10]; - hb_buffer_t *in = data[11]; - hb_buffer_t *out = data[12]; - - if (hb_ocl == NULL) - { - hb_error("hb_ocl_scale_func: OpenCL support not available"); - return 0; - } - - if (os->initialized == 0) - { - hb_log( "Scaling With OpenCL" ); - if (kenv->isAMD != 0) - hb_log( "Using Zero Copy"); - // create the block kernel - cl_int status; - os->m_kernel = hb_ocl->clCreateKernel(kenv->program, "frame_scale", &status); - - os->initialized = 1; - } - - { - // Use the new kernel - cl_event events[5]; - int eventCount = 0; - - if (kenv->isAMD == 0) { - status = hb_ocl->clEnqueueUnmapMemObject(kenv->command_queue, - in->cl.buffer, in->data, 0, - NULL, &events[eventCount++]); - status = hb_ocl->clEnqueueUnmapMemObject(kenv->command_queue, - out->cl.buffer, out->data, 0, - NULL, &events[eventCount++]); - } - - cl_int srcPlaneOffset0 = in->plane[0].data - in->data; - cl_int srcPlaneOffset1 = in->plane[1].data - in->data; - cl_int srcPlaneOffset2 = in->plane[2].data - in->data; - cl_int srcRowWords0 = in->plane[0].stride; - cl_int srcRowWords1 = in->plane[1].stride; - cl_int srcRowWords2 = in->plane[2].stride; - cl_int dstPlaneOffset0 = out->plane[0].data - out->data; - cl_int dstPlaneOffset1 = out->plane[1].data - out->data; - cl_int dstPlaneOffset2 = out->plane[2].data - out->data; - cl_int dstRowWords0 = out->plane[0].stride; - cl_int dstRowWords1 = out->plane[1].stride; - cl_int dstRowWords2 = out->plane[2].stride; - - if (crop_top != 0 || crop_bottom != 0 || crop_left != 0 || crop_right != 0) { - srcPlaneOffset0 += crop_left + crop_top * srcRowWords0; - srcPlaneOffset1 += crop_left / 2 + (crop_top / 2) * srcRowWords1; - srcPlaneOffset2 += crop_left / 2 + (crop_top / 2) * srcRowWords2; - in_frame_w = in_frame_w - crop_right - crop_left; - in_frame_h = in_frame_h - crop_bottom - crop_top; - } - - cl_float xscale = (out_frame_w * 1.0f) / in_frame_w; - cl_float yscale = (out_frame_h * 1.0f) / in_frame_h; - setupScaleWeights(xscale, yscale, out_frame_w, out_frame_h, os, kenv); - - HB_OCL_CHECK(hb_ocl->clSetKernelArg, os->m_kernel, 0, sizeof(cl_mem), &out_buf); - HB_OCL_CHECK(hb_ocl->clSetKernelArg, os->m_kernel, 1, sizeof(cl_mem), &in_buf); - HB_OCL_CHECK(hb_ocl->clSetKernelArg, os->m_kernel, 2, sizeof(cl_float), &xscale); - HB_OCL_CHECK(hb_ocl->clSetKernelArg, os->m_kernel, 3, sizeof(cl_float), &yscale); - HB_OCL_CHECK(hb_ocl->clSetKernelArg, os->m_kernel, 4, sizeof(cl_int), &srcPlaneOffset0); - HB_OCL_CHECK(hb_ocl->clSetKernelArg, os->m_kernel, 5, sizeof(cl_int), &srcPlaneOffset1); - HB_OCL_CHECK(hb_ocl->clSetKernelArg, os->m_kernel, 6, sizeof(cl_int), &srcPlaneOffset2); - HB_OCL_CHECK(hb_ocl->clSetKernelArg, os->m_kernel, 7, sizeof(cl_int), &dstPlaneOffset0); - HB_OCL_CHECK(hb_ocl->clSetKernelArg, os->m_kernel, 8, sizeof(cl_int), &dstPlaneOffset1); - HB_OCL_CHECK(hb_ocl->clSetKernelArg, os->m_kernel, 9, sizeof(cl_int), &dstPlaneOffset2); - HB_OCL_CHECK(hb_ocl->clSetKernelArg, os->m_kernel, 10, sizeof(cl_int), &srcRowWords0); - HB_OCL_CHECK(hb_ocl->clSetKernelArg, os->m_kernel, 11, sizeof(cl_int), &srcRowWords1); - HB_OCL_CHECK(hb_ocl->clSetKernelArg, os->m_kernel, 12, sizeof(cl_int), &srcRowWords2); - HB_OCL_CHECK(hb_ocl->clSetKernelArg, os->m_kernel, 13, sizeof(cl_int), &dstRowWords0); - HB_OCL_CHECK(hb_ocl->clSetKernelArg, os->m_kernel, 14, sizeof(cl_int), &dstRowWords1); - HB_OCL_CHECK(hb_ocl->clSetKernelArg, os->m_kernel, 15, sizeof(cl_int), &dstRowWords2); - HB_OCL_CHECK(hb_ocl->clSetKernelArg, os->m_kernel, 16, sizeof(cl_int), &in_frame_w); - HB_OCL_CHECK(hb_ocl->clSetKernelArg, os->m_kernel, 17, sizeof(cl_int), &in_frame_h); - HB_OCL_CHECK(hb_ocl->clSetKernelArg, os->m_kernel, 18, sizeof(cl_int), &out_frame_w); - HB_OCL_CHECK(hb_ocl->clSetKernelArg, os->m_kernel, 19, sizeof(cl_int), &out_frame_h); - HB_OCL_CHECK(hb_ocl->clSetKernelArg, os->m_kernel, 20, sizeof(cl_mem), &os->bicubic_x_weights); - HB_OCL_CHECK(hb_ocl->clSetKernelArg, os->m_kernel, 21, sizeof(cl_mem), &os->bicubic_y_weights); - - size_t workOffset[] = { 0, 0, 0 }; - size_t globalWorkSize[] = { 1, 1, 1 }; - size_t localWorkSize[] = { 1, 1, 1 }; - - int xgroups = (out_frame_w + 63) / 64; - int ygroups = (out_frame_h + 15) / 16; - - localWorkSize[0] = 64; - localWorkSize[1] = 1; - localWorkSize[2] = 1; - globalWorkSize[0] = xgroups * 64; - globalWorkSize[1] = ygroups; - globalWorkSize[2] = 3; - - HB_OCL_CHECK(hb_ocl->clEnqueueNDRangeKernel, kenv->command_queue, - os->m_kernel, 3, workOffset, globalWorkSize, localWorkSize, - eventCount, eventCount == 0 ? NULL : &events[0], &events[eventCount]); - ++eventCount; - - if (kenv->isAMD == 0) { - in->data = hb_ocl->clEnqueueMapBuffer(kenv->command_queue, in->cl.buffer, - CL_FALSE, CL_MAP_READ|CL_MAP_WRITE, - 0, in->alloc, - eventCount ? 1 : 0, - eventCount ? &events[eventCount - 1] : NULL, - &events[eventCount], &status); - out->data = hb_ocl->clEnqueueMapBuffer(kenv->command_queue, out->cl.buffer, - CL_FALSE, CL_MAP_READ|CL_MAP_WRITE, - 0, out->alloc, - eventCount ? 1 : 0, - eventCount ? &events[eventCount - 1] : NULL, - &events[eventCount + 1], &status); - eventCount += 2; - } - - hb_ocl->clFlush(kenv->command_queue); - hb_ocl->clWaitForEvents(eventCount, &events[0]); - int i; - for (i = 0; i < eventCount; ++i) - { - hb_ocl->clReleaseEvent(events[i]); - } - } - - return 1; -} - -int setupScaleWeights(cl_float xscale, cl_float yscale, int width, int height, hb_oclscale_t *os, KernelEnv *kenv) -{ - cl_int status; - - if (hb_ocl == NULL) - { - hb_error("setupScaleWeights: OpenCL support not available"); - return 1; - } - - if (os->xscale != xscale || os->width < width) - { - cl_float *xweights = hb_bicubic_weights(xscale, width); - HB_OCL_BUF_FREE (hb_ocl, os->bicubic_x_weights); - HB_OCL_BUF_CREATE(hb_ocl, os->bicubic_x_weights, CL_MEM_READ_ONLY, - sizeof(cl_float) * width * 4); - HB_OCL_CHECK(hb_ocl->clEnqueueWriteBuffer, kenv->command_queue, os->bicubic_x_weights, - CL_TRUE, 0, sizeof(cl_float) * width * 4, xweights, 0, NULL, NULL); - os->width = width; - os->xscale = xscale; - free(xweights); - } - - if ((os->yscale != yscale) || (os->height < height)) - { - cl_float *yweights = hb_bicubic_weights(yscale, height); - HB_OCL_BUF_FREE (hb_ocl, os->bicubic_y_weights); - HB_OCL_BUF_CREATE(hb_ocl, os->bicubic_y_weights, CL_MEM_READ_ONLY, - sizeof(cl_float) * height * 4); - HB_OCL_CHECK(hb_ocl->clEnqueueWriteBuffer, kenv->command_queue, os->bicubic_y_weights, - CL_TRUE, 0, sizeof(cl_float) * height * 4, yweights, 0, NULL, NULL); - os->height = height; - os->yscale = yscale; - free(yweights); - } - return 0; -} - - -/** -* function describe: this function is used to scaling video frame. it uses the gausi scaling algorithm -* parameter: -* inputFrameBuffer: the source video frame opencl buffer -* outputdata: the destination video frame buffer -* inputWidth: the width of the source video frame -* inputHeight: the height of the source video frame -* outputWidth: the width of destination video frame -* outputHeight: the height of destination video frame -*/ - - -static int s_scale_init_flag = 0; - -int do_scale_init() -{ - if ( s_scale_init_flag==0 ) - { - int st = hb_register_kernel_wrapper( "frame_scale", hb_ocl_scale_func ); - if( !st ) - { - hb_log( "register kernel[%s] failed", "frame_scale" ); - return 0; - } - s_scale_init_flag++; - } - return 1; -} - - -int hb_ocl_scale(hb_buffer_t *in, hb_buffer_t *out, int *crop, hb_oclscale_t *os) -{ - void *data[13]; - - if (do_scale_init() == 0) - return 0; - - data[0] = in->cl.buffer; - data[1] = out->cl.buffer; - data[2] = (void*)(intptr_t)(crop[0]); - data[3] = (void*)(intptr_t)(crop[1]); - data[4] = (void*)(intptr_t)(crop[2]); - data[5] = (void*)(intptr_t)(crop[3]); - data[6] = (void*)(intptr_t)(in->f.width); - data[7] = (void*)(intptr_t)(in->f.height); - data[8] = (void*)(intptr_t)(out->f.width); - data[9] = (void*)(intptr_t)(out->f.height); - data[10] = os; - data[11] = in; - data[12] = out; - - if( !hb_run_kernel( "frame_scale", data ) ) - hb_log( "run kernel[%s] failed", "frame_scale" ); - return 0; -} diff --git a/libhb/opencl.c b/libhb/opencl.c deleted file mode 100644 index 479c61399..000000000 --- a/libhb/opencl.c +++ /dev/null @@ -1,401 +0,0 @@ -/* opencl.c - - Copyright (c) 2003-2017 HandBrake Team - This file is part of the HandBrake source code - Homepage: . - It may be used under the terms of the GNU General Public License v2. - For full terms see the file COPYING file or visit http://www.gnu.org/licenses/gpl-2.0.html - */ - -#ifdef _WIN32 -#include -#define HB_OCL_DLOPEN LoadLibraryW(L"OpenCL") -#define HB_OCL_DLSYM GetProcAddress -#define HB_OCL_DLCLOSE FreeLibrary -#else -#include -#ifdef __APPLE__ -#define HB_OCL_DLOPEN dlopen("/System/Library/Frameworks/OpenCL.framework/OpenCL", RTLD_NOW) -#else -#define HB_OCL_DLOPEN dlopen("libOpenCL.so", RTLD_NOW) -#endif -#define HB_OCL_DLSYM dlsym -#define HB_OCL_DLCLOSE dlclose -#endif - -#include "common.h" -#include "opencl.h" - -hb_opencl_library_t *hb_ocl = NULL; - -int hb_ocl_init() -{ - if (hb_ocl == NULL) - { - if ((hb_ocl = hb_opencl_library_init()) == NULL) - { - return -1; - } - } - return 0; -} - -void hb_ocl_close() -{ - hb_opencl_library_close(&hb_ocl); -} - -hb_opencl_library_t* hb_opencl_library_init() -{ - hb_opencl_library_t *opencl; - if ((opencl = calloc(1, sizeof(hb_opencl_library_t))) == NULL) - { - hb_error("hb_opencl_library_init: memory allocation failure"); - goto fail; - } - - opencl->library = HB_OCL_DLOPEN; - if (opencl->library == NULL) - { - goto fail; - } - -#define HB_OCL_LOAD(func) \ -{ \ - if ((opencl->func = (void*)HB_OCL_DLSYM(opencl->library, #func)) == NULL) \ - { \ - hb_log("hb_opencl_library_init: failed to load function '%s'", #func); \ - goto fail; \ - } \ -} - HB_OCL_LOAD(clBuildProgram); - HB_OCL_LOAD(clCreateBuffer); - HB_OCL_LOAD(clCreateCommandQueue); - HB_OCL_LOAD(clCreateContextFromType); - HB_OCL_LOAD(clCreateKernel); - HB_OCL_LOAD(clCreateProgramWithBinary); - HB_OCL_LOAD(clCreateProgramWithSource); - HB_OCL_LOAD(clEnqueueCopyBuffer); - HB_OCL_LOAD(clEnqueueMapBuffer); - HB_OCL_LOAD(clEnqueueNDRangeKernel); - HB_OCL_LOAD(clEnqueueReadBuffer); - HB_OCL_LOAD(clEnqueueUnmapMemObject); - HB_OCL_LOAD(clEnqueueWriteBuffer); - HB_OCL_LOAD(clFlush); - HB_OCL_LOAD(clGetCommandQueueInfo); - HB_OCL_LOAD(clGetContextInfo); - HB_OCL_LOAD(clGetDeviceIDs); - HB_OCL_LOAD(clGetDeviceInfo); - HB_OCL_LOAD(clGetPlatformIDs); - HB_OCL_LOAD(clGetPlatformInfo); - HB_OCL_LOAD(clGetProgramBuildInfo); - HB_OCL_LOAD(clGetProgramInfo); - HB_OCL_LOAD(clReleaseCommandQueue); - HB_OCL_LOAD(clReleaseContext); - HB_OCL_LOAD(clReleaseEvent); - HB_OCL_LOAD(clReleaseKernel); - HB_OCL_LOAD(clReleaseMemObject); - HB_OCL_LOAD(clReleaseProgram); - HB_OCL_LOAD(clSetKernelArg); - HB_OCL_LOAD(clWaitForEvents); - - //success - return opencl; - -fail: - hb_opencl_library_close(&opencl); - return NULL; -} - -void hb_opencl_library_close(hb_opencl_library_t **_opencl) -{ - if (_opencl == NULL) - { - return; - } - hb_opencl_library_t *opencl = *_opencl; - - if (opencl != NULL) - { - if (opencl->library != NULL) - { - HB_OCL_DLCLOSE(opencl->library); - } - free(opencl); - } - *_opencl = NULL; -} - -static int hb_opencl_device_is_supported(hb_opencl_device_t* device) -{ - // we only support OpenCL on GPUs for now - // Ivy Bridge supports OpenCL on GPU, but it's too slow to be usable - // FIXME: disable on NVIDIA to to a bug - if ((device != NULL) && - (device->type & CL_DEVICE_TYPE_GPU) && - (device->ocl_vendor != HB_OCL_VENDOR_NVIDIA) && - (device->ocl_vendor != HB_OCL_VENDOR_INTEL || - hb_get_cpu_platform() != HB_CPU_PLATFORM_INTEL_IVB)) - { - int major, minor; - // check OpenCL version: - // OpenCL - if (sscanf(device->version, "OpenCL %d.%d", &major, &minor) != 2) - { - return 0; - } - return (major > HB_OCL_MINVERSION_MAJOR) || (major == HB_OCL_MINVERSION_MAJOR && - minor >= HB_OCL_MINVERSION_MINOR); - } - return 0; -} - -static hb_opencl_device_t* hb_opencl_device_get(hb_opencl_library_t *opencl, - cl_device_id device_id) -{ - if (opencl == NULL || opencl->clGetDeviceInfo == NULL) - { - hb_error("hb_opencl_device_get: OpenCL support not available"); - return NULL; - } - else if (device_id == NULL) - { - hb_error("hb_opencl_device_get: invalid device ID"); - return NULL; - } - - hb_opencl_device_t *device = calloc(1, sizeof(hb_opencl_device_t)); - if (device == NULL) - { - hb_error("hb_opencl_device_get: memory allocation failure"); - return NULL; - } - - cl_int status = CL_SUCCESS; - device->id = device_id; - - status |= opencl->clGetDeviceInfo(device->id, CL_DEVICE_VENDOR, sizeof(device->vendor), - device->vendor, NULL); - status |= opencl->clGetDeviceInfo(device->id, CL_DEVICE_NAME, sizeof(device->name), - device->name, NULL); - status |= opencl->clGetDeviceInfo(device->id, CL_DEVICE_VERSION, sizeof(device->version), - device->version, NULL); - status |= opencl->clGetDeviceInfo(device->id, CL_DEVICE_TYPE, sizeof(device->type), - &device->type, NULL); - status |= opencl->clGetDeviceInfo(device->id, CL_DEVICE_PLATFORM, sizeof(device->platform), - &device->platform, NULL); - status |= opencl->clGetDeviceInfo(device->id, CL_DRIVER_VERSION, sizeof(device->driver), - device->driver, NULL); - if (status != CL_SUCCESS) - { - free(device); - return NULL; - } - - if (!strcmp(device->vendor, "Advanced Micro Devices, Inc.") || - !strcmp(device->vendor, "AMD")) - { - device->ocl_vendor = HB_OCL_VENDOR_AMD; - } - else if (!strncmp(device->vendor, "NVIDIA", 6 /* strlen("NVIDIA") */)) - { - device->ocl_vendor = HB_OCL_VENDOR_NVIDIA; - } - else if (!strncmp(device->vendor, "Intel", 5 /* strlen("Intel") */)) - { - device->ocl_vendor = HB_OCL_VENDOR_INTEL; - } - else - { - device->ocl_vendor = HB_OCL_VENDOR_OTHER; - } - - return device; -} - -static void hb_opencl_devices_list_close(hb_list_t **_list) -{ - if (_list != NULL) - { - hb_list_t *list = *_list; - hb_opencl_device_t *device; - while (list != NULL && hb_list_count(list) > 0) - { - if ((device = hb_list_item(list, 0)) != NULL) - { - hb_list_rem(list, device); - free(device); - } - } - } - hb_list_close(_list); -} - -static hb_list_t* hb_opencl_devices_list_get(hb_opencl_library_t *opencl, - cl_device_type device_type) -{ - if (opencl == NULL || - opencl->library == NULL || - opencl->clGetDeviceIDs == NULL || - opencl->clGetDeviceInfo == NULL || - opencl->clGetPlatformIDs == NULL) - { - hb_error("hb_opencl_devices_list_get: OpenCL support not available"); - return NULL; - } - - hb_list_t *list = hb_list_init(); - if (list == NULL) - { - hb_error("hb_opencl_devices_list_get: memory allocation failure"); - return NULL; - } - - cl_device_id *device_ids = NULL; - hb_opencl_device_t *device = NULL; - cl_platform_id *platform_ids = NULL; - cl_uint i, j, num_platforms, num_devices; - - if (opencl->clGetPlatformIDs(0, NULL, &num_platforms) != CL_SUCCESS || !num_platforms) - { - goto fail; - } - if ((platform_ids = malloc(sizeof(cl_platform_id) * num_platforms)) == NULL) - { - hb_error("hb_opencl_devices_list_get: memory allocation failure"); - goto fail; - } - if (opencl->clGetPlatformIDs(num_platforms, platform_ids, NULL) != CL_SUCCESS) - { - goto fail; - } - for (i = 0; i < num_platforms; i++) - { - if (opencl->clGetDeviceIDs(platform_ids[i], device_type, 0, NULL, &num_devices) != CL_SUCCESS || !num_devices) - { - // non-fatal - continue; - } - if ((device_ids = malloc(sizeof(cl_device_id) * num_devices)) == NULL) - { - hb_error("hb_opencl_devices_list_get: memory allocation failure"); - goto fail; - } - if (opencl->clGetDeviceIDs(platform_ids[i], device_type, num_devices, device_ids, NULL) != CL_SUCCESS) - { - // non-fatal - continue; - } - for (j = 0; j < num_devices; j++) - { - if ((device = hb_opencl_device_get(opencl, device_ids[j])) != NULL) - { - hb_list_add(list, device); - } - } - } - - goto end; - -fail: - hb_opencl_devices_list_close(&list); - -end: - free(platform_ids); - free(device_ids); - return list; -} - -int hb_opencl_available() -{ - static int opencl_available = -1; - if (opencl_available >= 0) - { - return opencl_available; - } - opencl_available = 0; - - /* - * Check whether we can load the OpenCL library, then check devices and make - * sure we support running OpenCL code on at least one of them. - */ - hb_opencl_library_t *opencl; - if ((opencl = hb_opencl_library_init()) != NULL) - { - int i; - hb_list_t *device_list; - hb_opencl_device_t *device; - if ((device_list = hb_opencl_devices_list_get(opencl, CL_DEVICE_TYPE_ALL)) != NULL) - { - for (i = 0; i < hb_list_count(device_list); i++) - { - if ((device = hb_list_item(device_list, i)) != NULL && - (hb_opencl_device_is_supported(device))) - { - opencl_available = 1; - break; - } - } - hb_opencl_devices_list_close(&device_list); - } - hb_opencl_library_close(&opencl); - } - return opencl_available; -} - -void hb_opencl_info_print() -{ - /* - * Note: this function should not log any warnings or errors. - * Its only purpose is to list OpenCL-capable devices, so let's initialize - * only what we absolutely need here, rather than calling library_open(). - */ - hb_opencl_library_t ocl, *opencl = &ocl; - if ((opencl->library = (void*)HB_OCL_DLOPEN) == NULL || - (opencl->clGetDeviceIDs = (void*)HB_OCL_DLSYM(opencl->library, "clGetDeviceIDs" )) == NULL || - (opencl->clGetDeviceInfo = (void*)HB_OCL_DLSYM(opencl->library, "clGetDeviceInfo" )) == NULL || - (opencl->clGetPlatformIDs = (void*)HB_OCL_DLSYM(opencl->library, "clGetPlatformIDs")) == NULL) - { - // zero or insufficient OpenCL support - hb_log("OpenCL: library not available"); - goto end; - } - - int i, idx; - hb_list_t *device_list; - hb_opencl_device_t *device; - if ((device_list = hb_opencl_devices_list_get(opencl, CL_DEVICE_TYPE_ALL)) != NULL) - { - for (i = 0, idx = 1; i < hb_list_count(device_list); i++) - { - if ((device = hb_list_item(device_list, i)) != NULL) - { - // don't list CPU devices (always unsupported) - if (!(device->type & CL_DEVICE_TYPE_CPU)) - { - hb_log("OpenCL device #%d: %s %s", idx++, device->vendor, device->name); - hb_log(" - OpenCL version: %s", device->version + 7 /* strlen("OpenCL ") */); - hb_log(" - driver version: %s", device->driver); - hb_log(" - device type: %s%s", - device->type & CL_DEVICE_TYPE_CPU ? "CPU" : - device->type & CL_DEVICE_TYPE_GPU ? "GPU" : - device->type & CL_DEVICE_TYPE_CUSTOM ? "Custom" : - device->type & CL_DEVICE_TYPE_ACCELERATOR ? "Accelerator" : "Unknown", - device->type & CL_DEVICE_TYPE_DEFAULT ? " (default)" : ""); - hb_log(" - supported: %s", - hb_opencl_device_is_supported(device) ? "YES" : "no"); - } - } - } - hb_opencl_devices_list_close(&device_list); - } - -end: - /* - * Close only the initialized part - */ - if (opencl->library != NULL) - { - HB_OCL_DLCLOSE(opencl->library); - } -} diff --git a/libhb/opencl.h b/libhb/opencl.h deleted file mode 100644 index c68dfb8ce..000000000 --- a/libhb/opencl.h +++ /dev/null @@ -1,749 +0,0 @@ -/* opencl.h - - Copyright (c) 2003-2017 HandBrake Team - This file is part of the HandBrake source code - Homepage: . - It may be used under the terms of the GNU General Public License v2. - For full terms see the file COPYING file or visit http://www.gnu.org/licenses/gpl-2.0.html - */ - -#ifndef HB_OPENCL_H -#define HB_OPENCL_H - -#include "extras/cl.h" -#include "openclwrapper.h" - -// we only support OpenCL 1.1 or later -#define HB_OCL_MINVERSION_MAJOR 1 -#define HB_OCL_MINVERSION_MINOR 1 - -#define HB_OCL_FUNC_TYPE(name) hb_opencl_##name##_func -#define HB_OCL_FUNC_DECL(name) HB_OCL_FUNC_TYPE(name) name -#define HB_OCL_API(ret, attr, name) typedef ret (attr* HB_OCL_FUNC_TYPE(name)) - -#ifdef __APPLE__ -#pragma mark - -#pragma mark OpenCL API -#endif // __APPLE__ - -/* Platform API */ -HB_OCL_API(cl_int, CL_API_CALL, clGetPlatformIDs) -(cl_uint /* num_entries */, - cl_platform_id * /* platforms */, - cl_uint * /* num_platforms */); - -HB_OCL_API(cl_int, CL_API_CALL, clGetPlatformInfo) -(cl_platform_id /* platform */, - cl_platform_info /* param_name */, - size_t /* param_value_size */, - void * /* param_value */, - size_t * /* param_value_size_ret */); - -/* Device APIs */ -HB_OCL_API(cl_int, CL_API_CALL, clGetDeviceIDs) -(cl_platform_id /* platform */, - cl_device_type /* device_type */, - cl_uint /* num_entries */, - cl_device_id * /* devices */, - cl_uint * /* num_devices */); - -HB_OCL_API(cl_int, CL_API_CALL, clGetDeviceInfo) -(cl_device_id /* device */, - cl_device_info /* param_name */, - size_t /* param_value_size */, - void * /* param_value */, - size_t * /* param_value_size_ret */); - -HB_OCL_API(cl_int, CL_API_CALL, clCreateSubDevices) -(cl_device_id /* in_device */, - const cl_device_partition_property * /* properties */, - cl_uint /* num_devices */, - cl_device_id * /* out_devices */, - cl_uint * /* num_devices_ret */); - -HB_OCL_API(cl_int, CL_API_CALL, clRetainDevice) -(cl_device_id /* device */); - -HB_OCL_API(cl_int, CL_API_CALL, clReleaseDevice) -(cl_device_id /* device */); - -/* Context APIs */ -HB_OCL_API(cl_context, CL_API_CALL, clCreateContext) -(const cl_context_properties * /* properties */, - cl_uint /* num_devices */, - const cl_device_id * /* devices */, - void (CL_CALLBACK * /* pfn_notify */)(const char *, const void *, size_t, void *), - void * /* user_data */, - cl_int * /* errcode_ret */); - -HB_OCL_API(cl_context, CL_API_CALL, clCreateContextFromType) -(const cl_context_properties * /* properties */, - cl_device_type /* device_type */, - void (CL_CALLBACK * /* pfn_notify*/ )(const char *, const void *, size_t, void *), - void * /* user_data */, - cl_int * /* errcode_ret */); - -HB_OCL_API(cl_int, CL_API_CALL, clRetainContext) -(cl_context /* context */); - -HB_OCL_API(cl_int, CL_API_CALL, clReleaseContext) -(cl_context /* context */); - -HB_OCL_API(cl_int, CL_API_CALL, clGetContextInfo) -(cl_context /* context */, - cl_context_info /* param_name */, - size_t /* param_value_size */, - void * /* param_value */, - size_t * /* param_value_size_ret */); - -/* Command Queue APIs */ -HB_OCL_API(cl_command_queue, CL_API_CALL, clCreateCommandQueue) -(cl_context /* context */, - cl_device_id /* device */, - cl_command_queue_properties /* properties */, - cl_int * /* errcode_ret */); - -HB_OCL_API(cl_int, CL_API_CALL, clRetainCommandQueue) -(cl_command_queue /* command_queue */); - -HB_OCL_API(cl_int, CL_API_CALL, clReleaseCommandQueue) -(cl_command_queue /* command_queue */); - -HB_OCL_API(cl_int, CL_API_CALL, clGetCommandQueueInfo) -(cl_command_queue /* command_queue */, - cl_command_queue_info /* param_name */, - size_t /* param_value_size */, - void * /* param_value */, - size_t * /* param_value_size_ret */); - -/* Memory Object APIs */ -HB_OCL_API(cl_mem, CL_API_CALL, clCreateBuffer) -(cl_context /* context */, - cl_mem_flags /* flags */, - size_t /* size */, - void * /* host_ptr */, - cl_int * /* errcode_ret */); - -HB_OCL_API(cl_mem, CL_API_CALL, clCreateSubBuffer) -(cl_mem /* buffer */, - cl_mem_flags /* flags */, - cl_buffer_create_type /* buffer_create_type */, - const void * /* buffer_create_info */, - cl_int * /* errcode_ret */); - -HB_OCL_API(cl_mem, CL_API_CALL, clCreateImage) -(cl_context /* context */, - cl_mem_flags /* flags */, - const cl_image_format * /* image_format */, - const cl_image_desc * /* image_desc */, - void * /* host_ptr */, - cl_int * /* errcode_ret */); - -HB_OCL_API(cl_int, CL_API_CALL, clRetainMemObject) -(cl_mem /* memobj */); - -HB_OCL_API(cl_int, CL_API_CALL, clReleaseMemObject) -(cl_mem /* memobj */); - -HB_OCL_API(cl_int, CL_API_CALL, clGetSupportedImageFormats) -(cl_context /* context */, - cl_mem_flags /* flags */, - cl_mem_object_type /* image_type */, - cl_uint /* num_entries */, - cl_image_format * /* image_formats */, - cl_uint * /* num_image_formats */); - -HB_OCL_API(cl_int, CL_API_CALL, clGetMemObjectInfo) -(cl_mem /* memobj */, - cl_mem_info /* param_name */, - size_t /* param_value_size */, - void * /* param_value */, - size_t * /* param_value_size_ret */); - -HB_OCL_API(cl_int, CL_API_CALL, clGetImageInfo) -(cl_mem /* image */, - cl_image_info /* param_name */, - size_t /* param_value_size */, - void * /* param_value */, - size_t * /* param_value_size_ret */); - -HB_OCL_API(cl_int, CL_API_CALL, clSetMemObjectDestructorCallback) -(cl_mem /* memobj */, - void (CL_CALLBACK * /*pfn_notify*/)( cl_mem /* memobj */, void* /*user_data*/), - void * /*user_data */ ); - -/* Sampler APIs */ -HB_OCL_API(cl_sampler, CL_API_CALL, clCreateSampler) -(cl_context /* context */, - cl_bool /* normalized_coords */, - cl_addressing_mode /* addressing_mode */, - cl_filter_mode /* filter_mode */, - cl_int * /* errcode_ret */); - -HB_OCL_API(cl_int, CL_API_CALL, clRetainSampler) -(cl_sampler /* sampler */); - -HB_OCL_API(cl_int, CL_API_CALL, clReleaseSampler) -(cl_sampler /* sampler */); - -HB_OCL_API(cl_int, CL_API_CALL, clGetSamplerInfo) -(cl_sampler /* sampler */, - cl_sampler_info /* param_name */, - size_t /* param_value_size */, - void * /* param_value */, - size_t * /* param_value_size_ret */); - -/* Program Object APIs */ -HB_OCL_API(cl_program, CL_API_CALL, clCreateProgramWithSource) -(cl_context /* context */, - cl_uint /* count */, - const char ** /* strings */, - const size_t * /* lengths */, - cl_int * /* errcode_ret */); - -HB_OCL_API(cl_program, CL_API_CALL, clCreateProgramWithBinary) -(cl_context /* context */, - cl_uint /* num_devices */, - const cl_device_id * /* device_list */, - const size_t * /* lengths */, - const unsigned char ** /* binaries */, - cl_int * /* binary_status */, - cl_int * /* errcode_ret */); - -HB_OCL_API(cl_program, CL_API_CALL, clCreateProgramWithBuiltInKernels) -(cl_context /* context */, - cl_uint /* num_devices */, - const cl_device_id * /* device_list */, - const char * /* kernel_names */, - cl_int * /* errcode_ret */); - -HB_OCL_API(cl_int, CL_API_CALL, clRetainProgram) -(cl_program /* program */); - -HB_OCL_API(cl_int, CL_API_CALL, clReleaseProgram) -(cl_program /* program */); - -HB_OCL_API(cl_int, CL_API_CALL, clBuildProgram) -(cl_program /* program */, - cl_uint /* num_devices */, - const cl_device_id * /* device_list */, - const char * /* options */, - void (CL_CALLBACK * /* pfn_notify */)(cl_program /* program */, void * /* user_data */), - void * /* user_data */); - -HB_OCL_API(cl_int, CL_API_CALL, clCompileProgram) -(cl_program /* program */, - cl_uint /* num_devices */, - const cl_device_id * /* device_list */, - const char * /* options */, - cl_uint /* num_input_headers */, - const cl_program * /* input_headers */, - const char ** /* header_include_names */, - void (CL_CALLBACK * /* pfn_notify */)(cl_program /* program */, void * /* user_data */), - void * /* user_data */); - -HB_OCL_API(cl_program, CL_API_CALL, clLinkProgram) -(cl_context /* context */, - cl_uint /* num_devices */, - const cl_device_id * /* device_list */, - const char * /* options */, - cl_uint /* num_input_programs */, - const cl_program * /* input_programs */, - void (CL_CALLBACK * /* pfn_notify */)(cl_program /* program */, void * /* user_data */), - void * /* user_data */, - cl_int * /* errcode_ret */ ); - - -HB_OCL_API(cl_int, CL_API_CALL, clUnloadPlatformCompiler) -(cl_platform_id /* platform */); - -HB_OCL_API(cl_int, CL_API_CALL, clGetProgramInfo) -(cl_program /* program */, - cl_program_info /* param_name */, - size_t /* param_value_size */, - void * /* param_value */, - size_t * /* param_value_size_ret */); - -HB_OCL_API(cl_int, CL_API_CALL, clGetProgramBuildInfo) -(cl_program /* program */, - cl_device_id /* device */, - cl_program_build_info /* param_name */, - size_t /* param_value_size */, - void * /* param_value */, - size_t * /* param_value_size_ret */); - -/* Kernel Object APIs */ -HB_OCL_API(cl_kernel, CL_API_CALL, clCreateKernel) -(cl_program /* program */, - const char * /* kernel_name */, - cl_int * /* errcode_ret */); - -HB_OCL_API(cl_int, CL_API_CALL, clCreateKernelsInProgram) -(cl_program /* program */, - cl_uint /* num_kernels */, - cl_kernel * /* kernels */, - cl_uint * /* num_kernels_ret */); - -HB_OCL_API(cl_int, CL_API_CALL, clRetainKernel) -(cl_kernel /* kernel */); - -HB_OCL_API(cl_int, CL_API_CALL, clReleaseKernel) -(cl_kernel /* kernel */); - -HB_OCL_API(cl_int, CL_API_CALL, clSetKernelArg) -(cl_kernel /* kernel */, - cl_uint /* arg_index */, - size_t /* arg_size */, - const void * /* arg_value */); - -HB_OCL_API(cl_int, CL_API_CALL, clGetKernelInfo) -(cl_kernel /* kernel */, - cl_kernel_info /* param_name */, - size_t /* param_value_size */, - void * /* param_value */, - size_t * /* param_value_size_ret */); - -HB_OCL_API(cl_int, CL_API_CALL, clGetKernelArgInfo) -(cl_kernel /* kernel */, - cl_uint /* arg_indx */, - cl_kernel_arg_info /* param_name */, - size_t /* param_value_size */, - void * /* param_value */, - size_t * /* param_value_size_ret */); - -HB_OCL_API(cl_int, CL_API_CALL, clGetKernelWorkGroupInfo) -(cl_kernel /* kernel */, - cl_device_id /* device */, - cl_kernel_work_group_info /* param_name */, - size_t /* param_value_size */, - void * /* param_value */, - size_t * /* param_value_size_ret */); - -/* Event Object APIs */ -HB_OCL_API(cl_int, CL_API_CALL, clWaitForEvents) -(cl_uint /* num_events */, - const cl_event * /* event_list */); - -HB_OCL_API(cl_int, CL_API_CALL, clGetEventInfo) -(cl_event /* event */, - cl_event_info /* param_name */, - size_t /* param_value_size */, - void * /* param_value */, - size_t * /* param_value_size_ret */); - -HB_OCL_API(cl_event, CL_API_CALL, clCreateUserEvent) -(cl_context /* context */, - cl_int * /* errcode_ret */); - -HB_OCL_API(cl_int, CL_API_CALL, clRetainEvent) -(cl_event /* event */); - -HB_OCL_API(cl_int, CL_API_CALL, clReleaseEvent) -(cl_event /* event */); - -HB_OCL_API(cl_int, CL_API_CALL, clSetUserEventStatus) -(cl_event /* event */, - cl_int /* execution_status */); - -HB_OCL_API(cl_int, CL_API_CALL, clSetEventCallback) -(cl_event /* event */, - cl_int /* command_exec_callback_type */, - void (CL_CALLBACK * /* pfn_notify */)(cl_event, cl_int, void *), - void * /* user_data */); - -/* Profiling APIs */ -HB_OCL_API(cl_int, CL_API_CALL, clGetEventProfilingInfo) -(cl_event /* event */, - cl_profiling_info /* param_name */, - size_t /* param_value_size */, - void * /* param_value */, - size_t * /* param_value_size_ret */); - -/* Flush and Finish APIs */ -HB_OCL_API(cl_int, CL_API_CALL, clFlush) -(cl_command_queue /* command_queue */); - -HB_OCL_API(cl_int, CL_API_CALL, clFinish) -(cl_command_queue /* command_queue */); - -/* Enqueued Commands APIs */ -HB_OCL_API(cl_int, CL_API_CALL, clEnqueueReadBuffer) -(cl_command_queue /* command_queue */, - cl_mem /* buffer */, - cl_bool /* blocking_read */, - size_t /* offset */, - size_t /* size */, - void * /* ptr */, - cl_uint /* num_events_in_wait_list */, - const cl_event * /* event_wait_list */, - cl_event * /* event */); - -HB_OCL_API(cl_int, CL_API_CALL, clEnqueueReadBufferRect) -(cl_command_queue /* command_queue */, - cl_mem /* buffer */, - cl_bool /* blocking_read */, - const size_t * /* buffer_offset */, - const size_t * /* host_offset */, - const size_t * /* region */, - size_t /* buffer_row_pitch */, - size_t /* buffer_slice_pitch */, - size_t /* host_row_pitch */, - size_t /* host_slice_pitch */, - void * /* ptr */, - cl_uint /* num_events_in_wait_list */, - const cl_event * /* event_wait_list */, - cl_event * /* event */); - -HB_OCL_API(cl_int, CL_API_CALL, clEnqueueWriteBuffer) -(cl_command_queue /* command_queue */, - cl_mem /* buffer */, - cl_bool /* blocking_write */, - size_t /* offset */, - size_t /* size */, - const void * /* ptr */, - cl_uint /* num_events_in_wait_list */, - const cl_event * /* event_wait_list */, - cl_event * /* event */); - -HB_OCL_API(cl_int, CL_API_CALL, clEnqueueWriteBufferRect) -(cl_command_queue /* command_queue */, - cl_mem /* buffer */, - cl_bool /* blocking_write */, - const size_t * /* buffer_offset */, - const size_t * /* host_offset */, - const size_t * /* region */, - size_t /* buffer_row_pitch */, - size_t /* buffer_slice_pitch */, - size_t /* host_row_pitch */, - size_t /* host_slice_pitch */, - const void * /* ptr */, - cl_uint /* num_events_in_wait_list */, - const cl_event * /* event_wait_list */, - cl_event * /* event */); - -HB_OCL_API(cl_int, CL_API_CALL, clEnqueueFillBuffer) -(cl_command_queue /* command_queue */, - cl_mem /* buffer */, - const void * /* pattern */, - size_t /* pattern_size */, - size_t /* offset */, - size_t /* size */, - cl_uint /* num_events_in_wait_list */, - const cl_event * /* event_wait_list */, - cl_event * /* event */); - -HB_OCL_API(cl_int, CL_API_CALL, clEnqueueCopyBuffer) -(cl_command_queue /* command_queue */, - cl_mem /* src_buffer */, - cl_mem /* dst_buffer */, - size_t /* src_offset */, - size_t /* dst_offset */, - size_t /* size */, - cl_uint /* num_events_in_wait_list */, - const cl_event * /* event_wait_list */, - cl_event * /* event */); - -HB_OCL_API(cl_int, CL_API_CALL, clEnqueueCopyBufferRect) -(cl_command_queue /* command_queue */, - cl_mem /* src_buffer */, - cl_mem /* dst_buffer */, - const size_t * /* src_origin */, - const size_t * /* dst_origin */, - const size_t * /* region */, - size_t /* src_row_pitch */, - size_t /* src_slice_pitch */, - size_t /* dst_row_pitch */, - size_t /* dst_slice_pitch */, - cl_uint /* num_events_in_wait_list */, - const cl_event * /* event_wait_list */, - cl_event * /* event */); - -HB_OCL_API(cl_int, CL_API_CALL, clEnqueueReadImage) -(cl_command_queue /* command_queue */, - cl_mem /* image */, - cl_bool /* blocking_read */, - const size_t * /* origin[3] */, - const size_t * /* region[3] */, - size_t /* row_pitch */, - size_t /* slice_pitch */, - void * /* ptr */, - cl_uint /* num_events_in_wait_list */, - const cl_event * /* event_wait_list */, - cl_event * /* event */); - -HB_OCL_API(cl_int, CL_API_CALL, clEnqueueWriteImage) -(cl_command_queue /* command_queue */, - cl_mem /* image */, - cl_bool /* blocking_write */, - const size_t * /* origin[3] */, - const size_t * /* region[3] */, - size_t /* input_row_pitch */, - size_t /* input_slice_pitch */, - const void * /* ptr */, - cl_uint /* num_events_in_wait_list */, - const cl_event * /* event_wait_list */, - cl_event * /* event */); - -HB_OCL_API(cl_int, CL_API_CALL, clEnqueueFillImage) -(cl_command_queue /* command_queue */, - cl_mem /* image */, - const void * /* fill_color */, - const size_t * /* origin[3] */, - const size_t * /* region[3] */, - cl_uint /* num_events_in_wait_list */, - const cl_event * /* event_wait_list */, - cl_event * /* event */); - -HB_OCL_API(cl_int, CL_API_CALL, clEnqueueCopyImage) -(cl_command_queue /* command_queue */, - cl_mem /* src_image */, - cl_mem /* dst_image */, - const size_t * /* src_origin[3] */, - const size_t * /* dst_origin[3] */, - const size_t * /* region[3] */, - cl_uint /* num_events_in_wait_list */, - const cl_event * /* event_wait_list */, - cl_event * /* event */); - -HB_OCL_API(cl_int, CL_API_CALL, clEnqueueCopyImageToBuffer) -(cl_command_queue /* command_queue */, - cl_mem /* src_image */, - cl_mem /* dst_buffer */, - const size_t * /* src_origin[3] */, - const size_t * /* region[3] */, - size_t /* dst_offset */, - cl_uint /* num_events_in_wait_list */, - const cl_event * /* event_wait_list */, - cl_event * /* event */); - -HB_OCL_API(cl_int, CL_API_CALL, clEnqueueCopyBufferToImage) -(cl_command_queue /* command_queue */, - cl_mem /* src_buffer */, - cl_mem /* dst_image */, - size_t /* src_offset */, - const size_t * /* dst_origin[3] */, - const size_t * /* region[3] */, - cl_uint /* num_events_in_wait_list */, - const cl_event * /* event_wait_list */, - cl_event * /* event */); - -HB_OCL_API(void *, CL_API_CALL, clEnqueueMapBuffer) -(cl_command_queue /* command_queue */, - cl_mem /* buffer */, - cl_bool /* blocking_map */, - cl_map_flags /* map_flags */, - size_t /* offset */, - size_t /* size */, - cl_uint /* num_events_in_wait_list */, - const cl_event * /* event_wait_list */, - cl_event * /* event */, - cl_int * /* errcode_ret */); - -HB_OCL_API(void *, CL_API_CALL, clEnqueueMapImage) -(cl_command_queue /* command_queue */, - cl_mem /* image */, - cl_bool /* blocking_map */, - cl_map_flags /* map_flags */, - const size_t * /* origin[3] */, - const size_t * /* region[3] */, - size_t * /* image_row_pitch */, - size_t * /* image_slice_pitch */, - cl_uint /* num_events_in_wait_list */, - const cl_event * /* event_wait_list */, - cl_event * /* event */, - cl_int * /* errcode_ret */); - -HB_OCL_API(cl_int, CL_API_CALL, clEnqueueUnmapMemObject) -(cl_command_queue /* command_queue */, - cl_mem /* memobj */, - void * /* mapped_ptr */, - cl_uint /* num_events_in_wait_list */, - const cl_event * /* event_wait_list */, - cl_event * /* event */); - -HB_OCL_API(cl_int, CL_API_CALL, clEnqueueMigrateMemObjects) -(cl_command_queue /* command_queue */, - cl_uint /* num_mem_objects */, - const cl_mem * /* mem_objects */, - cl_mem_migration_flags /* flags */, - cl_uint /* num_events_in_wait_list */, - const cl_event * /* event_wait_list */, - cl_event * /* event */); - -HB_OCL_API(cl_int, CL_API_CALL, clEnqueueNDRangeKernel) -(cl_command_queue /* command_queue */, - cl_kernel /* kernel */, - cl_uint /* work_dim */, - const size_t * /* global_work_offset */, - const size_t * /* global_work_size */, - const size_t * /* local_work_size */, - cl_uint /* num_events_in_wait_list */, - const cl_event * /* event_wait_list */, - cl_event * /* event */); - -HB_OCL_API(cl_int, CL_API_CALL, clEnqueueTask) -(cl_command_queue /* command_queue */, - cl_kernel /* kernel */, - cl_uint /* num_events_in_wait_list */, - const cl_event * /* event_wait_list */, - cl_event * /* event */); - -HB_OCL_API(cl_int, CL_API_CALL, clEnqueueNativeKernel) -(cl_command_queue /* command_queue */, - void (CL_CALLBACK * /*user_func*/)(void *), - void * /* args */, - size_t /* cb_args */, - cl_uint /* num_mem_objects */, - const cl_mem * /* mem_list */, - const void ** /* args_mem_loc */, - cl_uint /* num_events_in_wait_list */, - const cl_event * /* event_wait_list */, - cl_event * /* event */); - -HB_OCL_API(cl_int, CL_API_CALL, clEnqueueMarkerWithWaitList) -(cl_command_queue /* command_queue */, - cl_uint /* num_events_in_wait_list */, - const cl_event * /* event_wait_list */, - cl_event * /* event */); - -HB_OCL_API(cl_int, CL_API_CALL, clEnqueueBarrierWithWaitList) -(cl_command_queue /* command_queue */, - cl_uint /* num_events_in_wait_list */, - const cl_event * /* event_wait_list */, - cl_event * /* event */); - - -/* Extension function access - * - * Returns the extension function address for the given function name, - * or NULL if a valid function can not be found. The client must - * check to make sure the address is not NULL, before using or - * calling the returned function address. - */ -HB_OCL_API(void *, CL_API_CALL, clGetExtensionFunctionAddressForPlatform) -(cl_platform_id /* platform */, - const char * /* func_name */); - -#ifdef __APPLE__ -#pragma mark - -#endif // __APPLE__ - -typedef struct hb_opencl_library_s -{ - void *library; - - /* Pointers to select OpenCL API functions */ - HB_OCL_FUNC_DECL(clBuildProgram); - HB_OCL_FUNC_DECL(clCreateBuffer); - HB_OCL_FUNC_DECL(clCreateCommandQueue); - HB_OCL_FUNC_DECL(clCreateContextFromType); - HB_OCL_FUNC_DECL(clCreateKernel); - HB_OCL_FUNC_DECL(clCreateProgramWithBinary); - HB_OCL_FUNC_DECL(clCreateProgramWithSource); - HB_OCL_FUNC_DECL(clEnqueueCopyBuffer); - HB_OCL_FUNC_DECL(clEnqueueMapBuffer); - HB_OCL_FUNC_DECL(clEnqueueNDRangeKernel); - HB_OCL_FUNC_DECL(clEnqueueReadBuffer); - HB_OCL_FUNC_DECL(clEnqueueUnmapMemObject); - HB_OCL_FUNC_DECL(clEnqueueWriteBuffer); - HB_OCL_FUNC_DECL(clFlush); - HB_OCL_FUNC_DECL(clGetCommandQueueInfo); - HB_OCL_FUNC_DECL(clGetContextInfo); - HB_OCL_FUNC_DECL(clGetDeviceIDs); - HB_OCL_FUNC_DECL(clGetDeviceInfo); - HB_OCL_FUNC_DECL(clGetPlatformIDs); - HB_OCL_FUNC_DECL(clGetPlatformInfo); - HB_OCL_FUNC_DECL(clGetProgramBuildInfo); - HB_OCL_FUNC_DECL(clGetProgramInfo); - HB_OCL_FUNC_DECL(clReleaseCommandQueue); - HB_OCL_FUNC_DECL(clReleaseContext); - HB_OCL_FUNC_DECL(clReleaseEvent); - HB_OCL_FUNC_DECL(clReleaseKernel); - HB_OCL_FUNC_DECL(clReleaseMemObject); - HB_OCL_FUNC_DECL(clReleaseProgram); - HB_OCL_FUNC_DECL(clSetKernelArg); - HB_OCL_FUNC_DECL(clWaitForEvents); -} hb_opencl_library_t; - -hb_opencl_library_t* hb_opencl_library_init(); -void hb_opencl_library_close(hb_opencl_library_t **_opencl); - -/* - * Convenience pointer to a single shared OpenCL library wrapper. - * - * It can be initialized and closed via hb_ocl_init/close(). - */ -extern hb_opencl_library_t *hb_ocl; -int hb_ocl_init(); -void hb_ocl_close(); - -typedef struct hb_opencl_device_s -{ - cl_platform_id platform; - cl_device_type type; - cl_device_id id; - char version[128]; - char driver[128]; - char vendor[128]; - char name[128]; - enum - { - HB_OCL_VENDOR_AMD, - HB_OCL_VENDOR_NVIDIA, - HB_OCL_VENDOR_INTEL, - HB_OCL_VENDOR_OTHER, - } ocl_vendor; -} hb_opencl_device_t; - -int hb_opencl_available(); -void hb_opencl_info_print(); - -/* OpenCL scaling */ -typedef struct hb_oclscale_s -{ - int initialized; - // bicubic scale weights - cl_mem bicubic_x_weights; - cl_mem bicubic_y_weights; - cl_float xscale; - cl_float yscale; - int width; - int height; - // horizontal scaling and vertical scaling kernel handle - cl_kernel m_kernel; - int use_ocl_mem; // 0 use host memory. 1 use gpu oclmem -} hb_oclscale_t; - -int hb_ocl_scale(hb_buffer_t *in, hb_buffer_t *out, int *crop, - hb_oclscale_t *os); - -/* Utilities */ -#define HB_OCL_BUF_CREATE(ocl_lib, out, flags, size) \ -{ \ - out = ocl_lib->clCreateBuffer(kenv->context, flags, size, NULL, &status); \ - if (CL_SUCCESS != status) \ - { \ - return -1; \ - } \ -} - -#define HB_OCL_BUF_FREE(ocl_lib, buf) \ -{ \ - if (buf != NULL) \ - { \ - ocl_lib->clReleaseMemObject(buf); \ - buf = NULL; \ - } \ -} - -#define HB_OCL_CHECK(method, ...) \ -{ \ - status = method(__VA_ARGS__); \ - if (status != CL_SUCCESS) \ - { \ - hb_error("%s:%d (%s) error: %d\n",__FUNCTION__,__LINE__,#method,status);\ - return status; \ - } \ -} - -#endif//HB_OPENCL_H diff --git a/libhb/openclkernels.h b/libhb/openclkernels.h deleted file mode 100644 index 3e172fa06..000000000 --- a/libhb/openclkernels.h +++ /dev/null @@ -1,771 +0,0 @@ -/* openclkernels.h - - Copyright (c) 2003-2017 HandBrake Team - This file is part of the HandBrake source code - Homepage: . - It may be used under the terms of the GNU General Public License v2. - For full terms see the file COPYING file or visit http://www.gnu.org/licenses/gpl-2.0.html - - Authors: Peng Gao - Li Cao - - */ - -#ifndef USE_EXTERNAL_KERNEL - -#define KERNEL( ... )# __VA_ARGS__ - - -char *kernel_src_hscale = KERNEL ( - - typedef unsigned char fixed8; - -/******************************************************************************************************* -dst: Horizontal scale destination; -src: YUV content in opencl buf; -hf_Y: Horizontal filter coefficients for Y planes; -hf_UV: Horizontal filter coefficients for UV planes; -hi_Y: Horizontal filter index for Y planes; -hi_UV: Horizontal filter index for UV planes; -stride: Src width; -filter_len: Length of filter; -********************************************************************************************************/ - kernel void frame_h_scale ( - global fixed8 *src, - global float *hf_Y, - global float *hf_UV, - global int *hi_Y, - global int *hi_UV, - global fixed8 *dst, - int stride, //src_width - int filter_len - ) - { - int x = get_global_id( 0 ); - int y = get_global_id( 1 ); - int width = get_global_size( 0 ); - int height = get_global_size( 1 ); - float result_Y = 0, result_U = 0, result_V = 0; - int i = 0; - - global fixed8 *src_Y = src; - global fixed8 *src_U = src_Y + stride * height; - global fixed8 *src_V = src_U + (stride >> 1) * (height >> 1); - - global fixed8 *dst_Y = dst; - global fixed8 *dst_U = dst_Y + width * height; - global fixed8 *dst_V = dst_U + (width >> 1) * (height >> 1); - - int xy = y * width + x; - global fixed8 *rowdata_Y = src_Y + (y * stride); - for( int i = 0; i < filter_len; i++ ) - { - result_Y += ( hf_Y[x + i * width] * rowdata_Y[hi_Y[x] + i]); - } - dst_Y[xy] = result_Y; - - if( y < (height >> 1) && x < (width >> 1) ) - { - int xy = y * (width >> 1) + x; - global fixed8 *rowdata_U = src_U + (y * (stride >> 1)); - global fixed8 *rowdata_V = src_V + (y * (stride >> 1)); - for( i = 0; i < filter_len; i++ ) - { - result_U += ( hf_UV[x + i * (width >> 1)] * rowdata_U[hi_UV[x] + i]); - result_V += ( hf_UV[x + i * (width >> 1)] * rowdata_V[hi_UV[x] + i]); - } - dst_U[xy] = result_U; - dst_V[xy] = result_V; - } - } - ); - -/******************************************************************************************************* -dst: Vertical scale destination; -src: YUV content in opencl buf; -hf_Y: Vertical filter coefficients for Y planes; -hf_UV: Vertical filter coefficients for UV planes; -hi_Y: Vertical filter index for Y planes; -hi_UV: Vertical filter index for UV planes; -stride: Src height; -filter_len: Length of filter; -********************************************************************************************************/ -char *kernel_src_vscale = KERNEL ( - - kernel void frame_v_scale ( - global fixed8 *src, - global float *vf_Y, - global float *vf_UV, - global int *vi_Y, - global int *vi_UV, - global fixed8 *dst, - int src_height, - int filter_len - ) - { - int x = get_global_id( 0 ); - int y = get_global_id( 1 ); - int width = get_global_size( 0 ); - int height = get_global_size( 1 ); - float result_Y = 0, result_U = 0, result_V = 0; - int i = 0; - - global fixed8 *src_Y = src; - global fixed8 *src_U = src_Y + src_height * width; - global fixed8 *src_V = src_U + (src_height >> 1) * (width >> 1); - - global fixed8 *dst_Y = dst; - global fixed8 *dst_U = dst_Y + height * width; - global fixed8 *dst_V = dst_U + (height >> 1) * (width >> 1); - - int xy = y * width + x; - for( i = 0; i < filter_len; i++ ) - { - result_Y += vf_Y[y + i * height] * src_Y[(vi_Y[y] + i) * width + x]; - } - dst_Y[xy] = result_Y; - - if( y < (height >> 1) && x < (width >> 1) ) - { - int xy = y * (width >> 1) + x; - for( i = 0; i < filter_len; i++ ) - { - result_U += vf_UV[y + i * (height >> 1)] * src_U[(vi_UV[y] + i) * (width >> 1) + x]; - result_V += vf_UV[y + i * (height >> 1)] * src_V[(vi_UV[y] + i) * (width >> 1) + x]; - } - dst_U[xy] = result_U; - dst_V[xy] = result_V; - } - } - ); - -/******************************************************************************************************* -input: Input buffer; -output: Output buffer; -w: Width of frame; -h: Height of frame; -********************************************************************************************************/ -char *kernel_src_nvtoyuv = KERNEL ( - - kernel void nv12toyuv ( global char *input, global char* output, int w, int h ) - { - int x = get_global_id( 0 ); - int y = get_global_id( 1 ); - int idx = y * (w >> 1) + x; - vstore4((vload4( 0, input + (idx << 2))), 0, output + (idx << 2)); //Y - char2 uv = vload2( 0, input + (idx << 1) + w * h ); - output[idx + w * h] = uv.s0; - output[idx + w * h + ((w * h) >> 2)] = uv.s1; - } - ); - -/******************************************************************************************************* -dst: Horizontal scale destination; -src: YUV content in opencl buf; -yfilter: Opencl memory of horizontal filter coefficients for luma/alpha planes; -yfilterPos: Opencl memory of horizontal filter starting positions for each dst[i] for luma/alpha planes; -yfilterSize: Horizontal filter size for luma/alpha pixels; -cfilter: Opencl memory of horizontal filter coefficients for chroma planes; -cfilterPos: Opencl memory of horizontal filter starting positions for each dst[i] for chroma planes; -cfilterSize: Horizontal filter size for chroma pixels; -dstStride: Width of destination luma/alpha planes; -dstChrStride: Width of destination chroma planes; -********************************************************************************************************/ - -char *kernel_src_hscaleall = KERNEL ( - - kernel void hscale_all_opencl ( - global short *dst, - const global unsigned char *src, - const global short *yfilter, - const global int *yfilterPos, - int yfilterSize, - const global short *cfilter, - const global int *cfilterPos, - int cfilterSize, - int dstWidth, - int dstHeight, - int srcWidth, - int srcHeight, - int dstStride, - int dstChrStride, - int srcStride, - int srcChrStride) - { - int w = get_global_id(0); - int h = get_global_id(1); - - int chrWidth = get_global_size(0); - int chrHeight = get_global_size(1); - - int srcPos1 = h * srcStride + yfilterPos[w]; - int srcPos2 = h * srcStride + yfilterPos[w + chrWidth]; - int srcPos3 = (h + (srcHeight >> 1)) * srcStride + yfilterPos[w]; - int srcPos4 = (h + (srcHeight >> 1)) * srcStride + yfilterPos[w + chrWidth]; - int srcc1Pos = srcStride * srcHeight + (h) * (srcChrStride) + cfilterPos[w]; - int srcc2Pos = srcc1Pos + ((srcChrStride)*(chrHeight)); - - int val1 = 0; - int val2 = 0; - int val3 = 0; - int val4 = 0; - int val5 = 0; - int val6 = 0; - - int filterPos1 = yfilterSize * w; - int filterPos2 = yfilterSize * (w + chrWidth); - int cfilterPos1 = cfilterSize * w; - - int j; - for (j = 0; j < yfilterSize; j++) - { - val1 += src[srcPos1 + j] * yfilter[filterPos1+ j]; - val2 += src[srcPos2 + j] * yfilter[filterPos2 + j]; - val3 += src[srcPos3 + j] * yfilter[filterPos1 + j]; - val4 += src[srcPos4 + j] * yfilter[filterPos2 + j]; - val5 += src[srcc1Pos+j] * cfilter[cfilterPos1 + j]; - val6 += src[srcc2Pos+j] * cfilter[cfilterPos1 + j]; - } - int dstPos1 = h *dstStride; - int dstPos2 = (h + chrHeight) * dstStride; - - dst[dstPos1 + w] = ((val1 >> 7) > ((1 << 15) - 1) ? ((1 << 15) - 1) : (val1 >> 7)); - dst[dstPos1 + w + chrWidth] = ((val2 >> 7) > ((1 << 15) - 1) ? ((1 << 15) - 1) : (val2 >> 7)); - dst[dstPos2 + w] = ((val3 >> 7) > ((1 << 15) - 1) ? ((1 << 15) - 1) : (val3 >> 7)); - dst[dstPos2 + w + chrWidth] = ((val4 >> 7) > ((1 << 15) - 1) ? ((1 << 15) - 1) : (val4 >> 7)); - - int dstPos3 = h * (dstChrStride) + w + dstStride * dstHeight; - int dstPos4 = h * (dstChrStride) + w + dstStride * dstHeight + ((dstChrStride) * chrHeight); - dst[dstPos3] = ((val5 >> 7) > ((1 << 15) - 1) ? ((1 << 15) - 1) : (val5 >> 7)); - dst[dstPos4] = ((val6 >> 7) > ((1 << 15) - 1) ? ((1 << 15) - 1) : (val6 >> 7)); - } - ); - -char *kernel_src_hscalefast = KERNEL ( - - kernel void hscale_fast_opencl ( - global short *dst, - const global unsigned char *src, - int xInc, - int chrXInc, - int dstWidth, - int dstHeight, - int srcWidth, - int srcHeight, - int dstStride, - int dstChrStride, - int srcStride, - int srcChrStride) - { - - int w = get_global_id(0); - int h = get_global_id(1); - - int chrWidth = get_global_size(0); - int chrHeight = get_global_size(1); - int xpos1 = 0; - int xpos2 = 0; - int xx = xpos1 >> 16; - int xalpha = (xpos1 & 0xFFFF) >> 9; - dst[h * dstStride + w] = (src[h * srcStride + xx] << 7) + (src[h * srcStride + xx + 1] -src[h * srcStride + xx]) * xalpha; - int lowpart = h + (chrHeight); - dst[lowpart * dstStride + w] = (src[lowpart * srcStride + xx] << 7) + (src[lowpart * srcStride + xx + 1] - src[lowpart * srcStride + xx]) * xalpha; - - int inv_i = w * xInc >> 16; - if( inv_i >= srcWidth - 1) - { - dst[h*dstStride + w] = src[h*srcStride + srcWidth-1]*128; - dst[lowpart*dstStride + w] = src[lowpart*srcStride + srcWidth - 1] * 128; - } - - int rightpart = w + (chrWidth); - xx = xpos2 >> 16; - xalpha = (xpos2 & 0xFFFF) >> 9; - dst[h * dstStride + rightpart] = (src[h *srcStride + xx] << 7) + (src[h * srcStride + xx + 1] - src[h * srcStride + xx]) * xalpha; - dst[lowpart * dstStride + rightpart] = (src[lowpart * srcStride + xx] << 7) + (src[lowpart * srcStride + xx + 1] - src[lowpart * srcStride + xx]) * xalpha; - inv_i = rightpart * xInc >> 16; - if( inv_i >= srcWidth - 1) - { - dst[h * dstStride + rightpart] = src[h * srcStride + srcWidth - 1] * 128; - dst[lowpart * dstStride + rightpart] = src[lowpart * srcStride + srcWidth - 1] * 128; - } - - int xpos = 0; - xpos = chrXInc * w; - xx = xpos >> 16; - xalpha = (xpos & 0xFFFF) >> 9; - src += srcStride * srcHeight; - dst += dstStride * dstHeight; - dst[h * (dstChrStride) + w] = (src[h * (srcChrStride) + xx] * (xalpha^127) + src[h * (srcChrStride) + xx + 1] * xalpha); - inv_i = w * xInc >> 16; - if( inv_i >= (srcWidth >> 1) - 1) - { - dst[h * (dstChrStride) + w] = src[h * (srcChrStride) + (srcWidth >> 1) -1]*128; - } - - xpos = chrXInc * (w); - xx = xpos >> 16; - src += srcChrStride * srcHeight >> 1; - dst += (dstChrStride * chrHeight); - dst[h * (dstChrStride) + w] = (src[h * (srcChrStride) + xx] * (xalpha^127) + src[h * (srcChrStride) + xx + 1 ] * xalpha); - - if( inv_i >= (srcWidth >> 1) - 1) - { - //v channel: - dst[h * (dstChrStride) + w] = src[h * (srcChrStride) + (srcWidth >> 1) -1] * 128; - } - } - ); - -char *kernel_src_vscalealldither = KERNEL ( - - kernel void vscale_all_dither_opencl ( - global unsigned char *dst, - const global short *src, - const global short *yfilter, - int yfilterSize, - const global short *cfilter, - int cfilterSize, - const global int *yfilterPos, - const global int *cfilterPos, - int dstWidth, - int dstHeight, - int srcWidth, - int srcHeight, - int dstStride, - int dstChrStride, - int srcStride, - int srcChrStride) - { - const unsigned char hb_dither_8x8_128[8][8] = { - { 36, 68, 60, 92, 34, 66, 58, 90, }, - { 100, 4, 124, 28, 98, 2, 122, 26, }, - { 52, 84, 44, 76, 50, 82, 42, 74, }, - { 116, 20, 108, 12, 114, 18, 106, 10, }, - { 32, 64, 56, 88, 38, 70, 62, 94, }, - { 96, 0, 120, 24, 102, 6, 126, 30, }, - { 48, 80, 40, 72, 54, 86, 46, 78, }, - { 112, 16, 104, 8, 118, 22, 110, 14, }, - }; - - - int w = get_global_id(0); - int h = get_global_id(1); - - int chrWidth = get_global_size(0); - int chrHeight = get_global_size(1); - const unsigned char *local_up_dither; - const unsigned char *local_down_dither; - - local_up_dither = hb_dither_8x8_128[h & 7]; - local_down_dither = hb_dither_8x8_128[(h + chrHeight) & 7]; - - //yscale; - int srcPos1 = (yfilterPos[h]) * srcStride + w; - int srcPos2 = (yfilterPos[h]) * srcStride + w + (chrWidth); - int srcPos3 = (yfilterPos[h + chrHeight]) * srcStride + w; - int srcPos4 = (yfilterPos[h + chrHeight]) * srcStride + w + chrWidth; - int src1Pos = dstStride * srcHeight + (cfilterPos[h]) * dstChrStride + (w); - int src2Pos = dstStride * srcHeight + (dstChrStride*(srcHeight>>1)) + (cfilterPos[h]) * dstChrStride + w; - - int val1 = (local_up_dither[w & 7] << 12); //y offset is 0; - int val2 = (local_up_dither[(w + chrWidth) & 7] << 12); - int val3 = (local_down_dither[w &7] << 12); - int val4 = (local_down_dither[(w + chrWidth) & 7] << 12); - int val5 = (local_up_dither[w & 7] << 12); - int val6 = (local_up_dither[(w + 3) & 7] << 12); // 3 is offset of the chrome channel. - - int j; - int filterPos1 = h * yfilterSize; - int filterPos2 = ( h + chrHeight ) * yfilterSize; - for(j = 0; j < yfilterSize; j++) - { - val1 += src[srcPos1] * yfilter[filterPos1 + j]; - srcPos1 += srcStride; - val2 += src[srcPos2] * yfilter[filterPos1 + j]; - srcPos2 += srcStride; - val3 += src[srcPos3] * yfilter[filterPos2 + j]; - srcPos3 += srcStride; - val4 += src[srcPos4] * yfilter[filterPos2 + j]; - srcPos4 += srcStride; - val5 += src[src1Pos] * cfilter[filterPos1 + j]; - val6 += src[src2Pos] * cfilter[filterPos1 + j]; - src1Pos += dstChrStride; - src2Pos += dstChrStride; - } - dst[h * dstStride + w] = (((val1 >> 19)&(~0xFF)) ? ((-(val1 >> 19)) >> 31) : (val1 >> 19)); - dst[h * dstStride + w + chrWidth] = (((val2 >> 19)&(~0xFF)) ? ((-(val2 >> 19)) >> 31) : (val2 >> 19)); - dst[(h + chrHeight) * dstStride + w] = (((val3 >> 19)&(~0xFF)) ? ((-(val3 >> 19)) >> 31) : (val3 >> 19)); - dst[(h + chrHeight) * dstStride + w + chrWidth] = (((val4 >> 19)&(~0xFF)) ? ((-(val4 >> 19)) >> 31) : (val4 >> 19)); - - int dst1Pos = dstStride * dstHeight + h*(dstChrStride)+(w); - int dst2Pos = (dstChrStride * chrHeight) + dst1Pos; - dst[dst1Pos] = (((val5 >> 19)&(~0xFF)) ? ((-(val5 >> 19)) >> 31) : (val5 >> 19)); - dst[dst2Pos] = (((val6 >> 19)&(~0xFF)) ? ((-(val6 >> 19)) >> 31) : (val6 >> 19)); - } - ); - -char *kernel_src_vscaleallnodither = KERNEL ( - - kernel void vscale_all_nodither_opencl ( - global unsigned char *dst, - const global short *src, - const global short *yfilter, - int yfilterSize, - const global short *cfilter, - int cfilterSize, - const global int *yfilterPos, - const global int *cfilterPos, - int dstWidth, - int dstHeight, - int srcWidth, - int srcHeight, - int dstStride, - int dstChrStride, - int srcStride, - int srcChrStride) - { - const unsigned char hb_sws_pb_64[8] = { - 64, 64, 64, 64, 64, 64, 64, 64 - }; - - int w = get_global_id(0); - int h = get_global_id(1); - - int chrWidth = get_global_size(0); - int chrHeight = get_global_size(1); - const unsigned char *local_up_dither; - const unsigned char *local_down_dither; - - local_up_dither = hb_sws_pb_64; - local_down_dither = hb_sws_pb_64; - - - //yscale; - int srcPos1 = (yfilterPos[h]) * srcStride + w; - int srcPos2 = (yfilterPos[h]) * srcStride + w + (chrWidth); - int srcPos3 = (yfilterPos[h + chrHeight]) * srcStride + w; - int srcPos4 = (yfilterPos[h + chrHeight]) * srcStride + w + chrWidth; - int src1Pos = dstStride * srcHeight + (cfilterPos[h]) * dstChrStride + (w); - int src2Pos = dstStride * srcHeight + (dstChrStride*(srcHeight>>1)) + (cfilterPos[h]) * dstChrStride + w; - - int val1 = (local_up_dither[w & 7] << 12); //y offset is 0; - int val2 = (local_up_dither[(w + chrWidth) & 7] << 12); - int val3 = (local_down_dither[w &7] << 12); - int val4 = (local_down_dither[(w + chrWidth) & 7] << 12); - int val5 = (local_up_dither[w & 7] << 12); - int val6 = (local_up_dither[(w + 3) & 7] << 12); // 3 is offset of the chrome channel. - - - int j; - int filterPos1 = h * yfilterSize; - int filterPos2 = ( h + chrHeight ) * yfilterSize; - for(j = 0; j < yfilterSize; j++) - { - val1 += src[srcPos1] * yfilter[filterPos1 + j]; - srcPos1 += srcStride; - val2 += src[srcPos2] * yfilter[filterPos1 + j]; - srcPos2 += srcStride; - val3 += src[srcPos3] * yfilter[filterPos2 + j]; - srcPos3 += srcStride; - val4 += src[srcPos4] * yfilter[filterPos2 + j]; - srcPos4 += srcStride; - val5 += src[src1Pos] * cfilter[filterPos1 + j]; - val6 += src[src2Pos] * cfilter[filterPos1 + j]; - src1Pos += dstChrStride; - src2Pos += dstChrStride; - } - dst[h * dstStride + w] = (((val1 >> 19)&(~0xFF)) ? ((-(val1 >> 19)) >> 31) : (val1 >> 19)); - dst[h * dstStride + w + chrWidth] = (((val2 >> 19)&(~0xFF)) ? ((-(val2 >> 19)) >> 31) : (val2 >> 19)); - dst[(h + chrHeight) * dstStride + w] = (((val3 >> 19)&(~0xFF)) ? ((-(val3 >> 19)) >> 31) : (val3 >> 19)); - dst[(h + chrHeight) * dstStride + w + chrWidth] = (((val4 >> 19)&(~0xFF)) ? ((-(val4 >> 19)) >> 31) : (val4 >> 19));; - - int dst1Pos = dstStride * dstHeight + h * (dstChrStride) + (w); - int dst2Pos = (dstChrStride * chrHeight) + dst1Pos; - dst[dst1Pos] = (((val5 >> 19)&(~0xFF)) ? ((-(val5 >> 19)) >> 31) : (val5 >> 19)); - dst[dst2Pos] = (((val6 >> 19)&(~0xFF)) ? ((-(val6 >> 19)) >> 31) : (val6 >> 19)); - } - ); - -char *kernel_src_vscalefast = KERNEL ( - - kernel void vscale_fast_opencl ( - global unsigned char *dst, - const global short *src, - const global int *yfilterPos, - const global int *cfilterPos, - int dstWidth, - int dstHeight, - int srcWidth, - int srcHeight, - int dstStride, - int dstChrStride, - int srcStride, - int srcChrStride) - { - const unsigned char hb_sws_pb_64[8] = { - 64, 64, 64, 64, 64, 64, 64, 64 - }; - - int w = get_global_id(0); - int h = get_global_id(1); - - int chrWidth = get_global_size(0); - int chrHeight = get_global_size(1); - - const unsigned char *local_up_dither; - const unsigned char *local_down_dither; - - local_up_dither = hb_sws_pb_64; - local_down_dither = hb_sws_pb_64; - - - int rightpart = w + chrWidth; - int bh = h + chrHeight; // bottom part - short val1 = (src[(yfilterPos[h]) * dstStride + w] + local_up_dither[(w + 0) & 7]) >> 7; //lum offset is 0; - short val2 = (src[(yfilterPos[h]) * dstStride + rightpart] + local_up_dither[rightpart & 7]) >> 7; - short val3 = (src[(yfilterPos[bh]) * dstStride + w] + local_down_dither[w & 7]) >> 7; - short val4 = (src[(yfilterPos[bh]) * dstStride + rightpart] + local_down_dither[rightpart & 7]) >> 7; - dst[h * dstStride + w] = ((val1&(~0xFF)) ? ((-val1) >> 31) : (val1)); - dst[h * dstStride + rightpart] = ((val2&(~0xFF)) ? ((-val2) >> 31) : (val2)); - dst[bh * dstStride + w] = ((val3&(~0xFF)) ? ((-val3) >> 31) : (val3)); - dst[bh * dstStride + rightpart] = ((val4&(~0xFF)) ? ((-val4) >> 31) : (val4)); - - src += dstStride * srcHeight; - dst += dstStride * dstHeight; - val1 = (src[cfilterPos[h] * (dstChrStride) + w] + local_up_dither[ w & 7]) >> 7; - dst[h * (dstChrStride) + w] = ((val1&(~0xFF)) ? ((-val1) >> 31) : (val1)); - - src += dstChrStride * (srcHeight >> 1); - dst += dstChrStride * chrHeight; - val1 = (src[cfilterPos[h] * dstChrStride + w] + local_up_dither[ (w + 3) & 7] ) >> 7; - dst[h * dstChrStride + w] = ((val1&(~0xFF)) ? ((-val1) >> 31) : (val1)); - - } - ); - -char *kernel_src_scale = KERNEL ( - -__kernel __attribute__((reqd_work_group_size(64, 1, 1))) void frame_scale(__global uchar *dst, - __global const uchar *src, - const float xscale, - const float yscale, - const int srcPlaneOffset0, - const int srcPlaneOffset1, - const int srcPlaneOffset2, - const int dstPlaneOffset0, - const int dstPlaneOffset1, - const int dstPlaneOffset2, - const int srcRowWords0, - const int srcRowWords1, - const int srcRowWords2, - const int dstRowWords0, - const int dstRowWords1, - const int dstRowWords2, - const int srcWidth, - const int srcHeight, - const int dstWidth, - const int dstHeight, - __global const float4* restrict xweights, - __global const float4* restrict yweights - ) -{ - const int x = get_global_id(0); - const int y = get_global_id(1); - const int z = get_global_id(2); - - // Abort work items outside the dst image bounds. - - if ((get_group_id(0) * 64 >= (dstWidth >> ((z == 0) ? 0 : 1))) || (get_group_id(1) * 16 >= (dstHeight >> ((z == 0) ? 0 : 1)))) - return; - - const int srcPlaneOffset = (z == 0) ? srcPlaneOffset0 : ((z == 1) ? srcPlaneOffset1 : srcPlaneOffset2); - const int dstPlaneOffset = (z == 0) ? dstPlaneOffset0 : ((z == 1) ? dstPlaneOffset1 : dstPlaneOffset2); - const int srcRowWords = (z == 0) ? srcRowWords0: ((z == 1) ? srcRowWords1 : srcRowWords2); - const int dstRowWords = (z == 0) ? dstRowWords0: ((z == 1) ? dstRowWords1 : dstRowWords2); - - __local uchar pixels[64 * 36]; - const int localRowPixels = 64; - const int groupHeight = 16; // src pixel height output by the workgroup - const int ypad = 2; - const int localx = get_local_id(0); - - const int globalStartRow = floor((get_group_id(1) * groupHeight) / yscale); - const int globalRowCount = ceil(groupHeight / yscale) + 2 * ypad; - - float4 weights = xweights[x]; - int4 woffs = floor(x / xscale); - woffs += (int4)(-1, 0, 1, 2); - woffs = clamp(woffs, 0, (srcWidth >> ((z == 0) ? 0 : 1)) - 1); - const int maxy = (srcHeight >> ((z == 0) ? 0 : 1)) - 1; - - // Scale x from global into LDS - - for (int i = 0; i <= globalRowCount; ++i) { - int4 offs = srcPlaneOffset + clamp(globalStartRow - ypad + i, 0, maxy) * srcRowWords; - offs += woffs; - pixels[localx + i * localRowPixels] = convert_uchar(clamp(round(dot(weights, - (float4)(src[offs.x], src[offs.y], src[offs.z], src[offs.w]))), 0.0f, 255.0f)); - } - - barrier(CLK_LOCAL_MEM_FENCE); - - // Scale y from LDS into global - - if (x >= dstWidth >> ((z == 0) ? 0 : 1)) - return; - - int off = dstPlaneOffset + x + (get_group_id(1) * groupHeight) * dstRowWords; - - for (int i = 0; i < groupHeight; ++i) { - if (y >= dstHeight >> ((z == 0) ? 0 : 1)) - break; - int localy = floor((get_group_id(1) * groupHeight + i) / yscale); - localy = localy - globalStartRow + ypad; - int loff = localx + localy * localRowPixels; - dst[off] = convert_uchar(clamp(round(dot(yweights[get_group_id(1) * groupHeight + i], - (float4)(pixels[loff - localRowPixels], pixels[loff], pixels[loff + localRowPixels] - , pixels[loff + localRowPixels * 2]))), 0.0f, 255.0f)); - off += dstRowWords; - } -} -); - - -char *kernel_src_yadif_filter = KERNEL( - void filter_v6( - global unsigned char *dst, - global unsigned char *prev, - global unsigned char *cur, - global unsigned char *next, - int x, - int y, - int width, - int height, - int parity, - int inlinesize, - int outlinesize, - int inmode, - int uvflag - ) - { - - int flag = uvflag * (y >=height) * height; - int prefs = select(-(inlinesize), inlinesize,((y+1) - flag) >1; - int e = cur[index + prefs]; - int temporal_diff0 = abs((prev2[index]) - (next2[index])); - int temporal_diff1 =(abs(prev[index + mrefs] - c) + abs(prev[index + prefs] - e) )>>1; - int temporal_diff2 =(abs(next[index + mrefs] - c) + abs(next[index + prefs] - e) )>>1; - int diff = max(max(temporal_diff0>>1, temporal_diff1), temporal_diff2); - int spatial_pred = (c+e)>>1; - int spatial_score = abs(cur[index + mrefs-1] - cur[index + prefs-1]) + abs(c-e) + abs(cur[index + mrefs+1] - cur[index + prefs+1]) - 1; - //check -1 - score = abs(cur[index + mrefs-2] - cur[index + prefs]) - + abs(cur[index + mrefs-1] - cur[index + prefs+1]) - + abs(cur[index + mrefs] - cur[index + prefs+2]); - if (score < spatial_score) - { - spatial_score= score; - spatial_pred= (cur[index + mrefs-1] + cur[index + prefs+1])>>1; - } - //check -2 - score = abs(cur[index + mrefs-3] - cur[index + prefs+1]) - + abs(cur[index + mrefs-2] - cur[index + prefs+2]) - + abs(cur[index + mrefs-1] - cur[index + prefs+3]); - if (score < spatial_score) - { - spatial_score= score; - spatial_pred= (cur[index + mrefs-2] + cur[index + prefs+2])>>1; - } - //check 1 - score = abs(cur[index + mrefs] - cur[index + prefs-2]) - + abs(cur[index + mrefs+1] - cur[index + prefs-1]) - + abs(cur[index + mrefs+2] - cur[index + prefs]); - if (score < spatial_score) - { - spatial_score= score; - spatial_pred= (cur[index + mrefs+1] + cur[index + prefs-1])>>1; - } - //check 2 - score = abs(cur[index + mrefs+1] - cur[index + prefs-3]) - + abs(cur[index + mrefs+2] - cur[index + prefs-2]) - + abs(cur[index + mrefs+3] - cur[index + prefs-1]); - if (score < spatial_score) - { - spatial_score= score; - spatial_pred= (cur[index + mrefs+2] + cur[index + prefs-2])>>1; - } - if (mode < 2) - { - int b = (prev2[index + (mrefs<<1)] + next2[index + (mrefs<<1)])>>1; - int f = (prev2[index + (prefs<<1)] + next2[index + (prefs<<1)])>>1; - int diffmax = max(max(d-e, d-c), min(b-c, f-e)); - int diffmin = min(min(d-e, d-c), max(b-c, f-e)); - - diff = max(max(diff, diffmin), -diffmax); - } - if (spatial_pred > d + diff) - { - spatial_pred = d + diff; - } - else if (spatial_pred < d - diff) - { - spatial_pred = d - diff; - } - - dst[outindex] = spatial_pred; - } - - kernel void yadif_filter( - global unsigned char *dst, - global unsigned char *prev, - global unsigned char *cur, - global unsigned char *next, - int parity, - int inlinesizeY, - int inlinesizeUV, - int outlinesizeY, - int outlinesizeUV, - int mode) - { - int x=get_global_id(0); - int y=(get_global_id(1)<<1) + (!parity); - int width=(get_global_size(0)<<1)/3; - int height=get_global_size(1)<<1; - - - global unsigned char *dst_Y=dst; - global unsigned char *dst_U=dst_Y+height*outlinesizeY; - - global unsigned char *prev_Y=prev; - global unsigned char *prev_U=prev_Y+height*inlinesizeY; - - global unsigned char *cur_Y=cur; - global unsigned char *cur_U=cur_Y+height*inlinesizeY; - - global unsigned char *next_Y=next; - global unsigned char *next_U=next_Y+height*inlinesizeY; - - if(x < width) - { - filter_v6(dst_Y,prev_Y,cur_Y,next_Y,x,y,width,height,parity,inlinesizeY,outlinesizeY,mode,0); - } - else - { - x = x - width; - filter_v6(dst_U,prev_U,cur_U,next_U,x,y,width>>1,height>>1,parity,inlinesizeUV,outlinesizeUV,mode,1); - } - } - ); - -#endif diff --git a/libhb/openclwrapper.c b/libhb/openclwrapper.c deleted file mode 100644 index 1cf52fdc2..000000000 --- a/libhb/openclwrapper.c +++ /dev/null @@ -1,1257 +0,0 @@ -/* openclwrapper.c - - Copyright (c) 2003-2017 HandBrake Team - This file is part of the HandBrake source code - Homepage: . - It may be used under the terms of the GNU General Public License v2. - For full terms see the file COPYING file or visit http://www.gnu.org/licenses/gpl-2.0.html - - Authors: Peng Gao - Li Cao - */ - -#include -#include -#include -#include "extras/cl.h" -#include "opencl.h" -#include "openclwrapper.h" -#include "openclkernels.h" - -//#define USE_EXTERNAL_KERNEL -#ifdef SYS_MINGW -#include -#endif - -#if defined(_MSC_VER) -#define strcasecmp strcmpi -#endif - -#define MAX_KERNEL_STRING_LEN 64 -#define MAX_CLFILE_NUM 50 -#define MAX_CLKERNEL_NUM 200 -#define MAX_CLFILE_PATH 255 -#define MAX_KERNEL_NUM 50 -#define MAX_KERNEL_NAME_LEN 64 - -#ifndef INVALID_HANDLE_VALUE -#define INVALID_HANDLE_VALUE NULL -#endif - -//#define THREAD_PRIORITY_TIME_CRITICAL 15 - -enum VENDOR -{ - AMD = 0, - Intel, - NVIDIA, - others -}; -typedef struct _GPUEnv -{ - //share vb in all modules in hb library - cl_platform_id platform; - cl_device_type dType; - cl_context context; - cl_device_id * devices; - cl_device_id dev; - cl_command_queue command_queue; - cl_kernel kernels[MAX_CLFILE_NUM]; - cl_program programs[MAX_CLFILE_NUM]; //one program object maps one kernel source file - char kernelSrcFile[MAX_CLFILE_NUM][256]; //the max len of kernel file name is 256 - int file_count; // only one kernel file - - char kernel_names[MAX_CLKERNEL_NUM][MAX_KERNEL_STRING_LEN+1]; - cl_kernel_function kernel_functions[MAX_CLKERNEL_NUM]; - int kernel_count; - int isUserCreated; // 1: created , 0:no create and needed to create by opencl wrapper - enum VENDOR vendor; -}GPUEnv; - -typedef struct -{ - char kernelName[MAX_KERNEL_NAME_LEN+1]; - char * kernelStr; -}hb_kernel_node; - -static GPUEnv gpu_env; -static int isInited = 0; -static int useBuffers = 0; -static hb_kernel_node gKernels[MAX_KERNEL_NUM]; - -#define HB_OCL_ADD_KERNEL_CFG(idx, s, p) \ -{ \ - strcpy(gKernels[idx].kernelName, s); \ - gKernels[idx].kernelStr = p; \ - strcpy(gpu_env.kernel_names[idx], s); \ - gpu_env.kernel_count++; \ -} - -/** - * hb_regist_opencl_kernel - */ -int hb_regist_opencl_kernel() -{ - //if( !gpu_env.isUserCreated ) - // memset( &gpu_env, 0, sizeof(gpu_env) ); - //Comment for posterity: When in doubt just zero out a structure full of pointers to allocated resources. - - gpu_env.file_count = 0; //argc; - gpu_env.kernel_count = 0UL; - - HB_OCL_ADD_KERNEL_CFG(0, "frame_scale", NULL); - HB_OCL_ADD_KERNEL_CFG(1, "yadif_filter", NULL); - - return 0; -} - -/** - * hb_regist_opencl_kernel - * @param filename - - * @param source - - * @param gpu_info - - * @param int idx - - */ -int hb_convert_to_string( const char *filename, char **source, GPUEnv *gpu_info, int idx ) -{ - int file_size; - size_t result; - FILE * file = NULL; - file_size = 0; - result = 0; - file = fopen( filename, "rb+" ); - - if( file!=NULL ) - { - fseek( file, 0, SEEK_END ); - - file_size = ftell( file ); - rewind( file ); - *source = (char*)malloc( sizeof(char) * file_size + 1 ); - if( *source == (char*)NULL ) - { - fclose( file ); - return(0); - } - result = fread( *source, 1, file_size, file ); - if( result != file_size ) - { - free( *source ); - fclose( file ); - return(0); - } - (*source)[file_size] = '\0'; - fclose( file ); - - return(1); - } - return(0); -} - -/** - * hb_binary_generated - * @param context - - * @param cl_file_name - - * @param fhandle - - */ -int hb_binary_generated( cl_context context, const char * cl_file_name, FILE ** fhandle ) -{ - int i = 0; - cl_int status; - cl_uint numDevices; - cl_device_id *devices; - char * str = NULL; - FILE * fd = NULL; - - if (hb_ocl == NULL) - { - hb_error("hb_binary_generated: OpenCL support not available"); - return 0; - } - - status = hb_ocl->clGetContextInfo(context, CL_CONTEXT_NUM_DEVICES, - sizeof(numDevices), &numDevices, NULL); - if( status != CL_SUCCESS ) - { - hb_log( "OpenCL: Get context info failed" ); - return 0; - } - - devices = (cl_device_id*)malloc( sizeof(cl_device_id) * numDevices ); - if( devices == NULL ) - { - hb_log( "OpenCL: No device found" ); - return 0; - } - - /* grab the handles to all of the devices in the context. */ - status = hb_ocl->clGetContextInfo(context, CL_CONTEXT_DEVICES, - sizeof(cl_device_id) * numDevices, - devices, NULL); - - status = 0; - /* dump out each binary into its own separate file. */ - for (i = 0; i < numDevices; i++) - { - char fileName[256] = { 0 }; - char cl_name[128] = { 0 }; - if (devices[i]) - { - char deviceName[1024]; - status = hb_ocl->clGetDeviceInfo(devices[i], CL_DEVICE_NAME, - sizeof(deviceName), deviceName, NULL); - - str = (char*)strstr(cl_file_name, ".cl"); - memcpy(cl_name, cl_file_name, str - cl_file_name); - cl_name[str - cl_file_name] = '\0'; - sprintf(fileName, "./%s - %s.bin", cl_name, deviceName); - fd = fopen(fileName, "rb"); - status = fd != NULL; - } - } - - if( devices != NULL ) - { - free( devices ); - devices = NULL; - } - - if( fd != NULL ) - *fhandle = fd; - - return status; -} - -/** - * hb_write_binary_to_file - * @param fileName - - * @param birary - - * @param numBytes - - */ -int hb_write_binary_to_file( const char* fileName, const char* birary, size_t numBytes ) -{ - FILE *output = NULL; - output = fopen( fileName, "wb" ); - if( output == NULL ) - return 0; - - fwrite( birary, sizeof(char), numBytes, output ); - fclose( output ); - - return 1; -} - -/** - * hb_generat_bin_from_kernel_source - * @param program - - * @param cl_file_name - - */ -int hb_generat_bin_from_kernel_source( cl_program program, const char * cl_file_name ) -{ - int i = 0; - cl_int status = CL_SUCCESS; - cl_uint numDevices = 0; - size_t *binarySizes = NULL; - cl_device_id *devices = NULL; - char **binaries = NULL; - char *str = NULL; - int ret_value = 1; - - if (hb_ocl == NULL) - { - hb_error("hb_generat_bin_from_kernel_source: OpenCL support not available"); - return 0; - } - - status = hb_ocl->clGetProgramInfo(program, CL_PROGRAM_NUM_DEVICES, - sizeof(numDevices), &numDevices, NULL); - if( status != CL_SUCCESS ) - { - hb_log("OpenCL: hb_generat_bin_from_kernel_source: clGetProgramInfo for CL_PROGRAM_NUM_DEVICES failed"); - return 0; - } - - devices = (cl_device_id*)malloc( sizeof(cl_device_id) * numDevices ); - if( devices == NULL ) - { - hb_log("OpenCL: hb_generat_bin_from_kernel_source: no device found"); - ret_value = 0; - goto to_exit; - } - - /* grab the handles to all of the devices in the program. */ - status = hb_ocl->clGetProgramInfo(program, CL_PROGRAM_DEVICES, - sizeof(cl_device_id) * numDevices, - devices, NULL); - if( status != CL_SUCCESS ) - { - hb_log("OpenCL: hb_generat_bin_from_kernel_source: clGetProgramInfo for CL_PROGRAM_DEVICES failed"); - ret_value = 0; - goto to_exit; - } - - /* figure out the sizes of each of the binaries. */ - binarySizes = (size_t*)malloc( sizeof(size_t) * numDevices ); - - status = hb_ocl->clGetProgramInfo(program, CL_PROGRAM_BINARY_SIZES, - sizeof(size_t) * numDevices, - binarySizes, NULL); - if( status != CL_SUCCESS ) - { - hb_log("OpenCL: hb_generat_bin_from_kernel_source: clGetProgramInfo for CL_PROGRAM_BINARY_SIZES failed"); - ret_value = 0; - goto to_exit; - } - - /* copy over all of the generated binaries. */ - binaries = (char**)malloc( sizeof(char *) * numDevices ); - if( binaries == NULL ) - { - hb_log("OpenCL: hb_generat_bin_from_kernel_source: malloc for binaries failed"); - ret_value = 0; - goto to_exit; - } - - for( i = 0; i < numDevices; i++ ) - { - if( binarySizes[i] != 0 ) - { - binaries[i] = (char*)malloc( sizeof(char) * binarySizes[i] ); - if( binaries[i] == NULL ) - { - hb_log("OpenCL: hb_generat_bin_from_kernel_source: malloc for binaries[%d] failed", i); - ret_value = 0; - goto to_exit; - } - } - else - { - binaries[i] = NULL; - } - } - - status = hb_ocl->clGetProgramInfo(program, CL_PROGRAM_BINARIES, - sizeof(char *) * numDevices, - binaries, NULL); - if( status != CL_SUCCESS ) - { - hb_log("OpenCL: hb_generat_bin_from_kernel_source: clGetProgramInfo for CL_PROGRAM_BINARIES failed"); - ret_value = 0; - goto to_exit; - } - - /* dump out each binary into its own separate file. */ - for (i = 0; i < numDevices; i++) - { - char fileName[256] = {0}; - char cl_name[128] = {0}; - if (binarySizes[i]) - { - char deviceName[1024]; - status = hb_ocl->clGetDeviceInfo(devices[i], CL_DEVICE_NAME, - sizeof(deviceName), deviceName, - NULL); - - str = (char*)strstr( cl_file_name, (char*)".cl" ); - memcpy(cl_name, cl_file_name, str - cl_file_name); - cl_name[str - cl_file_name] = '\0'; - sprintf(fileName, "./%s - %s.bin", cl_name, deviceName); - - if (!hb_write_binary_to_file(fileName, binaries[i], binarySizes[i])) - { - hb_log("OpenCL: hb_generat_bin_from_kernel_source: unable to write kernel, writing to temporary directory instead."); - ret_value = 0; - goto to_exit; - } - } - } - -to_exit: - // Release all resouces and memory - for( i = 0; i < numDevices; i++ ) - { - if( binaries[i] != NULL ) - { - free( binaries[i] ); - binaries[i] = NULL; - } - } - - if( binaries != NULL ) - { - free( binaries ); - binaries = NULL; - } - - if( binarySizes != NULL ) - { - free( binarySizes ); - binarySizes = NULL; - } - - if( devices != NULL ) - { - free( devices ); - devices = NULL; - } - return ret_value; -} - - -/** - * hb_init_opencl_attr - * @param env - - */ -int hb_init_opencl_attr( OpenCLEnv * env ) -{ - if( gpu_env.isUserCreated ) - return 1; - - gpu_env.context = env->context; - gpu_env.platform = env->platform; - gpu_env.dev = env->devices; - gpu_env.command_queue = env->command_queue; - - gpu_env.isUserCreated = 1; - - return 0; -} - -/** - * hb_create_kernel - * @param kernelname - - * @param env - - */ -int hb_create_kernel( char * kernelname, KernelEnv * env ) -{ - int status; - - if (hb_ocl == NULL) - { - hb_error("hb_create_kernel: OpenCL support not available"); - return 0; - } - - env->kernel = hb_ocl->clCreateKernel(gpu_env.programs[0], kernelname, &status); - env->context = gpu_env.context; - env->command_queue = gpu_env.command_queue; - return status != CL_SUCCESS ? 1 : 0; -} - -/** - * hb_release_kernel - * @param env - - */ -int hb_release_kernel( KernelEnv * env ) -{ - if (hb_ocl == NULL) - { - hb_error("hb_release_kernel: OpenCL support not available"); - return 0; - } - - int status = hb_ocl->clReleaseKernel(env->kernel); - return status != CL_SUCCESS ? 1 : 0; -} - -/** - * hb_init_opencl_env - * @param gpu_info - - */ - -static int init_once = 0; -int hb_init_opencl_env( GPUEnv *gpu_info ) -{ - size_t length; - cl_int status; - cl_uint numPlatforms, numDevices; - cl_platform_id *platforms; - cl_context_properties cps[3]; - char platformName[100]; - unsigned int i; - void *handle = INVALID_HANDLE_VALUE; - - if (init_once != 0) - return 0; - else - init_once = 1; - - if (hb_ocl == NULL) - { - hb_error("hb_init_opencl_env: OpenCL support not available"); - return 1; - } - - /* - * Have a look at the available platforms. - */ - if( !gpu_info->isUserCreated ) - { - status = hb_ocl->clGetPlatformIDs(0, NULL, &numPlatforms); - if( status != CL_SUCCESS ) - { - hb_log( "OpenCL: OpenCL device platform not found." ); - return(1); - } - - gpu_info->platform = NULL; - if( 0 < numPlatforms ) - { - platforms = (cl_platform_id*)malloc( - numPlatforms * sizeof(cl_platform_id)); - if( platforms == (cl_platform_id*)NULL ) - { - return(1); - } - status = hb_ocl->clGetPlatformIDs(numPlatforms, platforms, NULL); - - if( status != CL_SUCCESS ) - { - hb_log( "OpenCL: Specific opencl platform not found." ); - return(1); - } - - for( i = 0; i < numPlatforms; i++ ) - { - status = hb_ocl->clGetPlatformInfo(platforms[i], CL_PLATFORM_VENDOR, - sizeof(platformName), platformName, NULL); - - if( status != CL_SUCCESS ) - { - continue; - } - gpu_info->platform = platforms[i]; - - if (!strcmp(platformName, "Advanced Micro Devices, Inc.") || - !strcmp(platformName, "AMD")) - gpu_info->vendor = AMD; - else - gpu_info->vendor = others; - - gpu_info->platform = platforms[i]; - - status = hb_ocl->clGetDeviceIDs(gpu_info->platform /* platform */, - CL_DEVICE_TYPE_GPU /* device_type */, - 0 /* num_entries */, - NULL /* devices */, &numDevices); - - if( status != CL_SUCCESS ) - { - continue; - } - - if( numDevices ) - break; - - } - free( platforms ); - } - - if( NULL == gpu_info->platform ) - { - hb_log( "OpenCL: No OpenCL-compatible GPU found." ); - return(1); - } - - if( status != CL_SUCCESS ) - { - hb_log( "OpenCL: No OpenCL-compatible GPU found." ); - return(1); - } - - /* - * Use available platform. - */ - cps[0] = CL_CONTEXT_PLATFORM; - cps[1] = (cl_context_properties)gpu_info->platform; - cps[2] = 0; - /* Check for GPU. */ - gpu_info->dType = CL_DEVICE_TYPE_GPU; - gpu_info->context = hb_ocl->clCreateContextFromType(cps, gpu_info->dType, - NULL, NULL, &status); - - if( (gpu_info->context == (cl_context)NULL) || (status != CL_SUCCESS) ) - { - gpu_info->dType = CL_DEVICE_TYPE_CPU; - gpu_info->context = hb_ocl->clCreateContextFromType(cps, gpu_info->dType, - NULL, NULL, &status); - } - - if( (gpu_info->context == (cl_context)NULL) || (status != CL_SUCCESS) ) - { - gpu_info->dType = CL_DEVICE_TYPE_DEFAULT; - gpu_info->context = hb_ocl->clCreateContextFromType(cps, gpu_info->dType, - NULL, NULL, &status); - } - - if( (gpu_info->context == (cl_context)NULL) || (status != CL_SUCCESS) ) - { - hb_log( "OpenCL: Unable to create opencl context." ); - return(1); - } - - /* Detect OpenCL devices. */ - /* First, get the size of device list data */ - status = hb_ocl->clGetContextInfo(gpu_info->context, CL_CONTEXT_DEVICES, - 0, NULL, &length); - if((status != CL_SUCCESS) || (length == 0)) - { - hb_log( "OpenCL: Unable to get the list of devices in context." ); - return(1); - } - - /* Now allocate memory for device list based on the size we got earlier */ - gpu_info->devices = (cl_device_id*)malloc( length ); - if( gpu_info->devices == (cl_device_id*)NULL ) - { - return(1); - } - - /* Now, get the device list data */ - status = hb_ocl->clGetContextInfo(gpu_info->context, CL_CONTEXT_DEVICES, - length, gpu_info->devices, NULL); - if( status != CL_SUCCESS ) - { - hb_log( "OpenCL: Unable to get the device list data in context." ); - return(1); - } - - /* Create OpenCL command queue. */ - gpu_info->command_queue = hb_ocl->clCreateCommandQueue(gpu_info->context, - gpu_info->devices[0], - 0, &status); - if( status != CL_SUCCESS ) - { - hb_log( "OpenCL: Unable to create opencl command queue." ); - return(1); - } - } - - if ((CL_SUCCESS == hb_ocl->clGetCommandQueueInfo(gpu_info->command_queue, - CL_QUEUE_THREAD_HANDLE_AMD, - sizeof(handle), &handle, NULL)) && - (INVALID_HANDLE_VALUE != handle)) - { -#ifdef SYS_MINGW - SetThreadPriority( handle, THREAD_PRIORITY_TIME_CRITICAL ); -#endif - } - - return 0; -} - - -/** - * hb_release_opencl_env - * @param gpu_info - - */ -int hb_release_opencl_env( GPUEnv *gpu_info ) -{ - if( !isInited ) - return 1; - int i; - - if (hb_ocl == NULL) - { - hb_error("hb_release_opencl_env: OpenCL support not available"); - return 0; - } - - for( i = 0; iclReleaseProgram(gpu_env.programs[i]); - gpu_env.programs[i] = NULL; - } - } - - if( gpu_env.command_queue ) - { - hb_ocl->clReleaseCommandQueue(gpu_env.command_queue); - gpu_env.command_queue = NULL; - } - - if( gpu_env.context ) - { - hb_ocl->clReleaseContext(gpu_env.context); - gpu_env.context = NULL; - } - - isInited = 0; - useBuffers = 0; - gpu_info->isUserCreated = 0; - - return 1; -} - - -/** - * hb_register_kernel_wrapper - * @param kernel_name - - * @param function - - */ -int hb_register_kernel_wrapper( const char *kernel_name, cl_kernel_function function ) -{ - int i; - for( i = 0; i < gpu_env.kernel_count; i++ ) - { - if( strcasecmp( kernel_name, gpu_env.kernel_names[i] ) == 0 ) - { - gpu_env.kernel_functions[i] = function; - return(1); - } - } - return(0); -} - -/** - * hb_cached_of_kerner_prg - * @param gpu_env - - * @param cl_file_name - - */ -int hb_cached_of_kerner_prg( const GPUEnv *gpu_env, const char * cl_file_name ) -{ - int i; - for( i = 0; i < gpu_env->file_count; i++ ) - { - if( strcasecmp( gpu_env->kernelSrcFile[i], cl_file_name ) == 0 ) - { - if( gpu_env->programs[i] != NULL ) - return(1); - } - } - - return(0); -} - -/** - * hb_compile_kernel_file - * @param filename - - * @param gpu_info - - * @param indx - - * @param build_option - - */ -int hb_compile_kernel_file( const char *filename, GPUEnv *gpu_info, - int indx, const char *build_option ) -{ - cl_int status; - size_t length; - char *source_str = NULL; - const char *source; - size_t source_size[1]; - char *buildLog = NULL; - int b_error, binary_status, binaryExisted; - char *binary; - cl_uint numDevices; - cl_device_id *devices = NULL; - FILE *fd = NULL; - FILE *fd1 = NULL; - int idx; - int ret_value = 1; - - if (hb_cached_of_kerner_prg(gpu_info, filename) == 1) - return 1; - - idx = gpu_info->file_count; - -#ifdef USE_EXTERNAL_KERNEL - status = hb_convert_to_string(filename, &source_str, gpu_info, idx); - if (status == 0) - return 0; -#else - int kernel_src_size = strlen(kernel_src_scale) + strlen(kernel_src_yadif_filter); - - source_str = (char*)malloc( kernel_src_size + 2 ); - strcpy( source_str, kernel_src_scale ); - strcat( source_str, kernel_src_yadif_filter ); -#endif - - source = source_str; - source_size[0] = strlen( source ); - - if (hb_ocl == NULL) - { - hb_error("OpenCL: Support is not available"); - ret_value = 0; - goto to_exit; - } - - if ((binaryExisted = hb_binary_generated(gpu_info->context, filename, &fd)) == 1) - { - status = hb_ocl->clGetContextInfo(gpu_info->context, CL_CONTEXT_NUM_DEVICES, - sizeof(numDevices), &numDevices, NULL); - if (status != CL_SUCCESS) - { - hb_log("OpenCL: Unable to get the number of devices in context."); - ret_value = 0; - goto to_exit; - } - - devices = (cl_device_id*)malloc(sizeof(cl_device_id) * numDevices); - if (devices == NULL) - { - ret_value = 0; - goto to_exit; - } - - length = 0; - b_error = 0; - b_error |= fseek(fd, 0, SEEK_END) < 0; - b_error |= (length = ftell(fd)) <= 0; - b_error |= fseek(fd, 0, SEEK_SET) < 0; - if (b_error) - { - ret_value = 0; - goto to_exit; - } - - binary = (char*)calloc(length + 2, sizeof(char)); - if (binary == NULL) - { - ret_value = 0; - goto to_exit; - } - - b_error |= fread(binary, 1, length, fd) != length; - - if (b_error) - { - ret_value = 0; - goto to_exit; - } - - /* grab the handles to all of the devices in the context. */ - status = hb_ocl->clGetContextInfo(gpu_info->context, CL_CONTEXT_DEVICES, - sizeof(cl_device_id) * numDevices, - devices, NULL); - - gpu_info->programs[idx] = hb_ocl->clCreateProgramWithBinary(gpu_info->context, - numDevices, - devices, - &length, - (const unsigned char**)&binary, - &binary_status, - &status); - - } - else - { - /* create a CL program using the kernel source */ - gpu_info->programs[idx] = hb_ocl->clCreateProgramWithSource(gpu_info->context, 1, - &source, source_size, - &status); - } - - if ((gpu_info->programs[idx] == (cl_program)NULL) || (status != CL_SUCCESS)) - { - hb_log( "OpenCL: Unable to get list of devices in context." ); - ret_value = 0; - goto to_exit; - } - - /* create a cl program executable for all the devices specified */ - if (!gpu_info->isUserCreated) - { - status = hb_ocl->clBuildProgram(gpu_info->programs[idx], 1, gpu_info->devices, - build_option, NULL, NULL); - } - else - { - status = hb_ocl->clBuildProgram(gpu_info->programs[idx], 1, &(gpu_info->dev), - build_option, NULL, NULL); - } - - if (status != CL_SUCCESS) - { - if (!gpu_info->isUserCreated) - { - status = hb_ocl->clGetProgramBuildInfo(gpu_info->programs[idx], - gpu_info->devices[0], - CL_PROGRAM_BUILD_LOG, - 0, NULL, &length); - } - else - { - status = hb_ocl->clGetProgramBuildInfo(gpu_info->programs[idx], - gpu_info->dev, - CL_PROGRAM_BUILD_LOG, - 0, NULL, &length); - } - - if (status != CL_SUCCESS) - { - hb_log( "OpenCL: Unable to get GPU build information." ); - ret_value = 0; - goto to_exit; - } - - buildLog = (char*)malloc(length); - if (buildLog == (char*)NULL) - { - ret_value = 0; - goto to_exit; - } - - if (!gpu_info->isUserCreated) - { - status = hb_ocl->clGetProgramBuildInfo(gpu_info->programs[idx], - gpu_info->devices[0], - CL_PROGRAM_BUILD_LOG, - length, buildLog, &length); - } - else - { - status = hb_ocl->clGetProgramBuildInfo(gpu_info->programs[idx], - gpu_info->dev, - CL_PROGRAM_BUILD_LOG, - length, buildLog, &length); - } - - fd1 = fopen("kernel-build.log", "w+"); - if (fd1 != NULL) { - fwrite(buildLog, sizeof(char), length, fd1); - fclose(fd1); - } - - ret_value = 0; - goto to_exit; - } - - strcpy(gpu_env.kernelSrcFile[idx], filename); - - gpu_info->file_count += 1; - -to_exit: - if (source_str != NULL) - { - free(source_str); - source_str = NULL; - // only used as pointer to source_str - source = NULL; - } - - if (devices != NULL) - { - free(devices); - devices = NULL; - } - - if (binary != NULL) - { - free(binary); - binary = NULL; - } - - if (buildLog != NULL) - { - free(buildLog); - buildLog = NULL; - } - - if (fd != NULL) - { - fclose(fd); - fd = NULL; - } - - return ret_value; -} - - -/** - * hb_get_kernel_env_and_func - * @param kernel_name - - * @param env - - * @param function - - */ -int hb_get_kernel_env_and_func( const char *kernel_name, - KernelEnv *env, - cl_kernel_function *function ) -{ - int i; - for( i = 0; i < gpu_env.kernel_count; i++ ) - { - if( strcasecmp( kernel_name, gpu_env.kernel_names[i] ) == 0 ) - { - env->context = gpu_env.context; - env->command_queue = gpu_env.command_queue; - env->program = gpu_env.programs[0]; - env->kernel = gpu_env.kernels[i]; - env->isAMD = ( gpu_env.vendor == AMD ) ? 1 : 0; - *function = gpu_env.kernel_functions[i]; - return(1); - } - } - return(0); -} - -/** - * hb_get_kernel_env_and_func - * @param kernel_name - - * @param userdata - - */ -int hb_run_kernel( const char *kernel_name, void **userdata ) -{ - KernelEnv env; - cl_kernel_function function; - int status; - memset( &env, 0, sizeof(KernelEnv)); - status = hb_get_kernel_env_and_func( kernel_name, &env, &function ); - strcpy( env.kernel_name, kernel_name ); - if( status == 1 ) - { - return(function( userdata, &env )); - } - - return(0); -} - -/** - * hb_init_opencl_run_env - * @param argc - - * @param argv - - * @param build_option - - */ -int hb_init_opencl_run_env( int argc, char **argv, const char *build_option ) -{ - int status = 0; - if( MAX_CLKERNEL_NUM <= 0 ) - { - return 1; - } - - if((argc > MAX_CLFILE_NUM) || (argc<0)) - { - return 1; - } - - if( !isInited ) - { - hb_regist_opencl_kernel(); - - /*initialize devices, context, comand_queue*/ - status = hb_init_opencl_env( &gpu_env ); - if( status ) - return(1); - - /*initialize program, kernel_name, kernel_count*/ - status = hb_compile_kernel_file("hb-opencl-kernels.cl", - &gpu_env, 0, build_option); - - if( status == 0 || gpu_env.kernel_count == 0 ) - { - return(1); - - } - - useBuffers = 1; - isInited = 1; - } - - return(0); -} - -/** - * hb_release_opencl_run_env - */ -int hb_release_opencl_run_env() -{ - return hb_release_opencl_env( &gpu_env ); -} - -/** - * hb_opencl_stats - */ -int hb_opencl_stats() -{ - return isInited; -} - -/** - * hb_get_opencl_env - */ -int hb_get_opencl_env() -{ - /* initialize devices, context, command_queue */ - return hb_init_opencl_env(&gpu_env); -} - -/** - * hb_create_buffer - * @param cl_inBuf - - * @param flags - - * @param size - - */ -int hb_create_buffer( cl_mem *cl_Buf, int flags, int size ) -{ - int status; - - if (hb_ocl == NULL) - { - hb_error("hb_create_buffer: OpenCL support not available"); - return 0; - } - - *cl_Buf = hb_ocl->clCreateBuffer(gpu_env.context, flags, size, NULL, &status); - - if( status != CL_SUCCESS ) - { - hb_log( "OpenCL: clCreateBuffer error '%d'", status ); - return 0; - } - - return 1; -} - - -/** - * hb_read_opencl_buffer - * @param cl_inBuf - - * @param outbuf - - * @param size - - */ -int hb_read_opencl_buffer( cl_mem cl_inBuf, unsigned char *outbuf, int size ) -{ - int status; - - if (hb_ocl == NULL) - { - hb_error("hb_read_opencl_suffer: OpenCL support not available"); - return 0; - } - - status = hb_ocl->clEnqueueReadBuffer(gpu_env.command_queue, cl_inBuf, - CL_TRUE, 0, size, outbuf, 0, 0, 0); - if( status != CL_SUCCESS ) - { - hb_log( "OpenCL: av_read_opencl_buffer error '%d'", status ); - return 0; - } - - return 1; -} - -int hb_cl_create_mapped_buffer(cl_mem *mem, unsigned char **addr, int size) -{ - int status; - int flags = CL_MEM_ALLOC_HOST_PTR; - - if (hb_ocl == NULL) - { - hb_error("hb_cl_create_mapped_buffer: OpenCL support not available"); - return 0; - } - - //cl_event event; - *mem = hb_ocl->clCreateBuffer(gpu_env.context, flags, size, NULL, &status); - *addr = hb_ocl->clEnqueueMapBuffer(gpu_env.command_queue, *mem, CL_TRUE, - CL_MAP_READ|CL_MAP_WRITE, 0, size, 0, - NULL, NULL/*&event*/, &status); - - //hb_log("\t **** context: %.8x cmdqueue: %.8x cl_mem: %.8x mapaddr: %.8x size: %d status: %d", gpu_env.context, gpu_env.command_queue, mem, addr, size, status); - - return (status == CL_SUCCESS) ? 1 : 0; -} - -int hb_cl_free_mapped_buffer(cl_mem mem, unsigned char *addr) -{ - cl_event event; - - if (hb_ocl == NULL) - { - hb_error("hb_cl_free_mapped_buffer: OpenCL support not available"); - return 0; - } - - int status = hb_ocl->clEnqueueUnmapMemObject(gpu_env.command_queue, mem, - addr, 0, NULL, &event); - if (status == CL_SUCCESS) - hb_ocl->clWaitForEvents(1, &event); - else - hb_log("hb_free_mapped_buffer: error %d", status); - - status = hb_ocl->clReleaseMemObject(mem); - if (status != CL_SUCCESS) - hb_log("hb_free_mapped_buffer: release error %d",status); - - return (status == CL_SUCCESS) ? 1 : 0; -} - -void hb_opencl_init() -{ - hb_get_opencl_env(); -} - -int hb_use_buffers() -{ - return useBuffers; -} - -int hb_copy_buffer(cl_mem src_buffer,cl_mem dst_buffer,size_t src_offset,size_t dst_offset,size_t cb) -{ - if (hb_ocl == NULL) - { - hb_error("hb_copy_buffer: OpenCL support not available"); - return 0; - } - - int status = hb_ocl->clEnqueueCopyBuffer(gpu_env.command_queue, - src_buffer, dst_buffer, - src_offset, dst_offset, - cb, 0, 0, 0); - if( status != CL_SUCCESS ) - { - av_log(NULL,AV_LOG_ERROR, "hb_read_opencl_buffer error '%d'\n", status ); - return 0; - } - return 1; -} - -int hb_read_opencl_frame_buffer(cl_mem cl_inBuf,unsigned char *Ybuf,unsigned char *Ubuf,unsigned char *Vbuf,int linesize0,int linesize1,int linesize2,int height) -{ - - int chrH = -(-height >> 1); - unsigned char *temp = (unsigned char *)av_malloc(sizeof(uint8_t) * (linesize0 * height + linesize1 * chrH * 2)); - if(hb_read_opencl_buffer(cl_inBuf,temp,sizeof(uint8_t)*(linesize0 + linesize1)*height)) - { - memcpy(Ybuf,temp,linesize0 * height); - memcpy(Ubuf,temp + linesize0 * height,linesize1 *chrH); - memcpy(Vbuf,temp + linesize0 * height + linesize1 * chrH,linesize2 * chrH); - - } - av_free(temp); - - return 1; -} - -int hb_write_opencl_frame_buffer(cl_mem cl_inBuf,unsigned char *Ybuf,unsigned char *Ubuf,unsigned char *Vbuf,int linesize0,int linesize1,int linesize2,int height,int offset) -{ - if (hb_ocl == NULL) - { - hb_error("hb_write_opencl_frame_buffer: OpenCL support not available"); - return 0; - } - - void *mapped = hb_ocl->clEnqueueMapBuffer(gpu_env.command_queue, cl_inBuf, - CL_TRUE,CL_MAP_WRITE, 0, - sizeof(uint8_t) * (linesize0 + linesize1) * height + offset, - 0, NULL, NULL, NULL); - uint8_t *temp = (uint8_t *)mapped; - temp += offset; - memcpy(temp,Ybuf,sizeof(uint8_t) * linesize0 * height); - memcpy(temp + sizeof(uint8_t) * linesize0 * height,Ubuf,sizeof(uint8_t) * linesize1 * height/2); - memcpy(temp + sizeof(uint8_t) * (linesize0 * height + linesize1 * height/2),Vbuf,sizeof(uint8_t) * linesize2 * height/2); - hb_ocl->clEnqueueUnmapMemObject(gpu_env.command_queue, cl_inBuf, mapped, 0, NULL, NULL); - return 1; -} - -cl_command_queue hb_get_command_queue() -{ - return gpu_env.command_queue; -} - -cl_context hb_get_context() -{ - return gpu_env.context; -} diff --git a/libhb/openclwrapper.h b/libhb/openclwrapper.h deleted file mode 100644 index 1b7e1941b..000000000 --- a/libhb/openclwrapper.h +++ /dev/null @@ -1,90 +0,0 @@ -/* openclwrapper.h - - Copyright (c) 2003-2017 HandBrake Team - This file is part of the HandBrake source code - Homepage: . - It may be used under the terms of the GNU General Public License v2. - For full terms see the file COPYING file or visit http://www.gnu.org/licenses/gpl-2.0.html - - Authors: Peng Gao - Li Cao - - - */ -#ifndef HB_OPENCL_WRAPPER_H -#define HB_OPENCL_WRAPPER_H - -#include "common.h" -#include "extras/cl.h" - -//support AMD opencl -#define CL_QUEUE_THREAD_HANDLE_AMD 0x403E -#define CL_MAP_WRITE_INVALIDATE_REGION (1 << 2) - -typedef struct _KernelEnv -{ - cl_context context; - cl_command_queue command_queue; - cl_program program; - cl_kernel kernel; - char kernel_name[150]; - int isAMD; -}KernelEnv; - -typedef struct _OpenCLEnv -{ - cl_platform_id platform; - cl_context context; - cl_device_id devices; - cl_command_queue command_queue; -}OpenCLEnv; - - -//user defined, this is function wrapper which is used to set the input parameters , -//luanch kernel and copy data from GPU to CPU or CPU to GPU. -typedef int (*cl_kernel_function)( void **userdata, KernelEnv *kenv ); - -// registe a wapper for running the kernel specified by the kernel name -int hb_register_kernel_wrapper( const char *kernel_name, cl_kernel_function function ); - -// run kernel , user call this function to luanch kernel. -// kernel_name: this kernel name is used to find the kernel in opencl runtime environment -// userdata: this userdata is the all parameters for running the kernel specified by kernel name -int hb_run_kernel( const char *kernel_name, void **userdata ); - -// init the run time environment , this function must be called befor calling any function related to opencl -// the argc must be set zero , argv must be set NULL, build_option is the options for build the kernel. -int hb_init_opencl_run_env( int argc, char **argv, const char *build_option ); - -//relase all resource about the opencl , this function must be called after calling any functions related to opencl -int hb_release_opencl_run_env(); - -// get the opencl status , 0: not init ; 1, inited; this function is used the check whether or not the opencl run time has been created -int hb_opencl_stats(); - -// update opencl run time environments , such as commandqueue , platforme, context. program -int hb_init_opencl_attr( OpenCLEnv * env ); - -// create kernel object by a kernel name on the specified opencl run time indicated by env parameter -int hb_create_kernel( char * kernelname, KernelEnv * env ); - -// release kernel object which is generated by calling the hb_create_kernel api -int hb_release_kernel( KernelEnv * env ); - -void hb_opencl_init(); - -int hb_get_opencl_env(); - -int hb_create_buffer(cl_mem *cl_Buf,int flags,int size); - -int hb_read_opencl_buffer(cl_mem cl_inBuf,unsigned char *outbuf,int size); - -int hb_cl_create_mapped_buffer(cl_mem *mem, unsigned char **addr, int size); - -int hb_cl_free_mapped_buffer(cl_mem mem, unsigned char *addr); - -int hb_use_buffers(); - -int hb_confirm_gpu_type(); - -#endif // HB_OPENCL_WRAPPER_H diff --git a/libhb/preset.c b/libhb/preset.c index 5ad3dbf76..d31805a63 100644 --- a/libhb/preset.c +++ b/libhb/preset.c @@ -1681,15 +1681,6 @@ int hb_preset_apply_video(const hb_dict_t *preset, hb_dict_t *job_dict) hb_value_xform(value, HB_VALUE_TYPE_INT)); } - if ((value = hb_dict_get(preset, "VideoScaler")) != NULL) - { - const char *s = hb_value_get_string(value); - if (!strcasecmp(s, "opencl")) - { - hb_dict_set(video_dict, "OpenCL", hb_value_bool(1)); - } - } - return 0; } @@ -2221,6 +2212,11 @@ void hb_presets_clean(hb_value_t *preset) presets_clean(preset, hb_preset_template); } +static void import_video_scaler_25_0_0(hb_value_t *preset) +{ + hb_dict_set(preset, "VideoScaler", hb_value_string("swscale")); +} + static void import_anamorphic_20_0_0(hb_value_t *preset) { hb_value_t *val = hb_dict_get(preset, "PicturePAR"); @@ -2833,9 +2829,16 @@ static void import_video_0_0_0(hb_value_t *preset) } } +static void import_25_0_0(hb_value_t *preset) +{ + import_video_scaler_25_0_0(preset); +} + static void import_20_0_0(hb_value_t *preset) { import_anamorphic_20_0_0(preset); + + import_25_0_0(preset); } static void import_12_0_0(hb_value_t *preset) @@ -2931,6 +2934,11 @@ static int preset_import(hb_value_t *preset, int major, int minor, int micro) import_20_0_0(preset); result = 1; } + else if (cmpVersion(major, minor, micro, 25, 0, 0) <= 0) + { + import_25_0_0(preset); + result = 1; + } preset_clean(preset, hb_preset_template); } return result; diff --git a/libhb/scan.c b/libhb/scan.c index 9d8b84049..e117e58fc 100644 --- a/libhb/scan.c +++ b/libhb/scan.c @@ -8,7 +8,6 @@ */ #include "hb.h" -#include "opencl.h" #include "hbffmpeg.h" typedef struct @@ -1013,13 +1012,6 @@ skip_preview: title->video_decode_support = vid_info.video_decode_support; - // TODO: check video dimensions - hb_handle_t *hb_handle = (hb_handle_t *)data->h; - if (hb_get_opencl_enabled(hb_handle)) - { - title->opencl_support = !!hb_opencl_available(); - } - // compute the aspect ratio based on the storage dimensions and PAR. hb_reduce(&title->dar.num, &title->dar.den, title->geometry.par.num * title->geometry.width, diff --git a/libhb/work.c b/libhb/work.c index 9062111e5..a35cc76cf 100644 --- a/libhb/work.c +++ b/libhb/work.c @@ -9,8 +9,6 @@ #include "hb.h" #include "libavformat/avformat.h" -#include "openclwrapper.h" -#include "opencl.h" #include "decomb.h" #ifdef USE_QSV @@ -1232,12 +1230,6 @@ static int sanitize_qsv( hb_job_t * job ) hb_dict_extract_int(&vpp_settings[5], filter->settings, "crop-right"); - // VPP crop/scale takes precedence over OpenCL scale too - if (job->use_opencl) - { - hb_release_opencl_run_env(); - job->use_opencl = 0; - } hb_list_rem(job->list_filter, filter); hb_filter_close(&filter); break; @@ -1374,28 +1366,6 @@ static void do_job(hb_job_t *job) w = hb_get_work(job->h, WORK_READER); hb_list_add(job->list_work, w); - /* - * OpenCL - * - * Note: we delay hb_ocl_init until here, since they're no point it loading - * the library if we aren't going to use it. But we only call hb_ocl_close - * in hb_global_close, since un/reloading the library each run is wasteful. - */ - if (job->use_opencl) - { - if (hb_ocl_init() || hb_init_opencl_run_env(0, NULL, "-I.")) - { - hb_log("work: failed to initialize OpenCL environment, using fallback"); - hb_release_opencl_run_env(); - job->use_opencl = 0; - } - } - else - { - // we're not (re-)using OpenCL here, we can release the environment - hb_release_opencl_run_env(); - } - hb_log( "starting job" ); // This must be performed before initializing filters because @@ -1848,11 +1818,6 @@ cleanup: hb_buffer_pool_free(); - if (job->use_opencl) - { - hb_release_opencl_run_env(); - } - hb_job_close(&job); } diff --git a/preset/preset_builtin.list b/preset/preset_builtin.list index df9ef896b..607780e65 100644 --- a/preset/preset_builtin.list +++ b/preset/preset_builtin.list @@ -1,6 +1,6 @@
- + diff --git a/test/test.c b/test/test.c index da5db9341..73411aa63 100644 --- a/test/test.c +++ b/test/test.c @@ -33,7 +33,6 @@ #include "hb.h" #include "lang.h" #include "parsecsv.h" -#include "openclwrapper.h" #ifdef USE_QSV #include "qsv_common.h" @@ -177,7 +176,6 @@ static int start_at_frame = 0; static int64_t stop_at_pts = 0; static int stop_at_frame = 0; static uint64_t min_title_duration = 10; -static int use_opencl = -1; #ifdef USE_QSV static int qsv_async_depth = -1; static int qsv_decode = -1; @@ -482,11 +480,6 @@ int main( int argc, char ** argv ) hb_dvd_set_dvdnav( dvdnav ); - if (use_opencl == 1) - { - hb_opencl_set_enable(h, use_opencl); - } - /* Show version */ fprintf( stderr, "%s - %s - %s\n", HB_PROJECT_TITLE, HB_PROJECT_BUILD_TITLE, HB_PROJECT_URL_WEBSITE ); @@ -654,8 +647,6 @@ static void PrintTitleInfo( hb_title_t * title, int feature ) fprintf( stderr, " + autocrop: %d/%d/%d/%d\n", title->crop[0], title->crop[1], title->crop[2], title->crop[3] ); - fprintf(stderr, " + support opencl: %s\n", title->opencl_support ? "yes" : "no"); - fprintf( stderr, " + chapters:\n" ); for( i = 0; i < hb_list_count( title->list_chapter ); i++ ) { @@ -3820,11 +3811,6 @@ static hb_dict_t * PreparePreset(const char *preset_name) hb_dict_set(preset, "VideoQSVDecode", hb_value_int(qsv_decode)); } #endif - if (use_opencl != -1) - { - hb_dict_set(preset, "VideoScaler", - hb_value_string(use_opencl ? "opencl" : "swscale")); - } if (maxWidth > 0) { hb_dict_set(preset, "PictureWidth", hb_value_int(maxWidth));