uint32_t frames_to_skip; // decode but discard this many frames
// initially (for frame accurate positioning
// to non-I frames).
- int use_opencl;
PRIVATE int use_decomb;
PRIVATE int use_detelecine;
#define HBTF_NO_IDR (1 << 0)
#define HBTF_SCAN_COMPLETE (1 << 1)
#define HBTF_RAW_VIDEO (1 << 2)
-
- // whether OpenCL scaling is supported for this source
- int opencl_support;
};
// Update win/CS/HandBrake.Interop/HandBrakeInterop/HbLib/hb_state_s.cs when changing this struct
#include "hb.h"
#include "hbffmpeg.h"
#include "common.h"
-#include "opencl.h"
struct hb_filter_private_s
{
int height_out;
int crop[4];
- /* OpenCL */
- hb_oclscale_t *os; //ocl scaler handler
-
struct SwsContext * context;
};
pv->width_out = init->geometry.width - (init->crop[2] + init->crop[3]);
pv->height_out = init->geometry.height - (init->crop[0] + init->crop[1]);
- /* OpenCL */
- if (pv->job->use_opencl && pv->job->title->opencl_support)
- {
- pv->os = ( hb_oclscale_t * )malloc( sizeof( hb_oclscale_t ) );
- memset( pv->os, 0, sizeof( hb_oclscale_t ) );
- }
-
memcpy( pv->crop, init->crop, sizeof( int[4] ) );
hb_dict_extract_int(&pv->width_out, filter->settings, "width");
hb_dict_extract_int(&pv->height_out, filter->settings, "height");
return;
}
- /* OpenCL */
- if (pv->job->use_opencl && pv->job->title->opencl_support && pv->os)
- {
- if (hb_ocl != NULL)
- {
- HB_OCL_BUF_FREE(hb_ocl, pv->os->bicubic_x_weights);
- HB_OCL_BUF_FREE(hb_ocl, pv->os->bicubic_y_weights);
- if (pv->os->initialized == 1)
- {
- hb_ocl->clReleaseKernel(pv->os->m_kernel);
- }
- }
- free(pv->os);
- }
-
if( pv->context )
{
sws_freeContext( pv->context );
filter->private_data = NULL;
}
-/* OpenCL */
static hb_buffer_t* crop_scale( hb_filter_private_t * pv, hb_buffer_t * in )
{
hb_buffer_t * out;
// correct place for cropped frame
hb_picture_crop(crop_data, crop_stride, in, pv->crop[0], pv->crop[2]);
- // Use bicubic OpenCL scaling when selected and when downsampling < 4:1;
- if ((pv->job->use_opencl && pv->job->title->opencl_support) &&
- (pv->width_out * 4 > pv->width_in) &&
- (in->cl.buffer != NULL) && (out->cl.buffer != NULL))
+ if (pv->context == NULL ||
+ pv->width_in != in->f.width ||
+ pv->height_in != in->f.height ||
+ pv->pix_fmt != in->f.fmt)
{
- /* OpenCL */
- hb_ocl_scale(in, out, pv->crop, pv->os);
- }
- else
- {
- if (pv->context == NULL ||
- pv->width_in != in->f.width ||
- pv->height_in != in->f.height ||
- pv->pix_fmt != in->f.fmt)
+ // Something changed, need a new scaling context.
+ if (pv->context != NULL)
{
- // Something changed, need a new scaling context.
- if (pv->context != NULL)
- {
- sws_freeContext(pv->context);
- }
-
- pv->context = hb_sws_get_context(
- in->f.width - (pv->crop[2] + pv->crop[3]),
- in->f.height - (pv->crop[0] + pv->crop[1]),
- in->f.fmt, out->f.width, out->f.height,
- out->f.fmt, SWS_LANCZOS|SWS_ACCURATE_RND,
- hb_ff_get_colorspace(pv->job->title->color_matrix));
- pv->width_in = in->f.width;
- pv->height_in = in->f.height;
- pv->pix_fmt = in->f.fmt;
+ sws_freeContext(pv->context);
}
- if (pv->context == NULL)
- {
- hb_buffer_close(&out);
- return NULL;
- }
+ pv->context = hb_sws_get_context(
+ in->f.width - (pv->crop[2] + pv->crop[3]),
+ in->f.height - (pv->crop[0] + pv->crop[1]),
+ in->f.fmt, out->f.width, out->f.height,
+ out->f.fmt, SWS_LANCZOS|SWS_ACCURATE_RND,
+ hb_ff_get_colorspace(pv->job->title->color_matrix));
+ pv->width_in = in->f.width;
+ pv->height_in = in->f.height;
+ pv->pix_fmt = in->f.fmt;
+ }
- // Scale crop into out according to the context set up above
- sws_scale(pv->context,
- (const uint8_t* const*)crop_data, crop_stride,
- 0, in->f.height - (pv->crop[0] + pv->crop[1]),
- out_data, out_stride);
+ if (pv->context == NULL)
+ {
+ hb_buffer_close(&out);
+ return NULL;
}
+ // Scale crop into out according to the context set up above
+ sws_scale(pv->context,
+ (const uint8_t* const*)crop_data, crop_stride,
+ 0, in->f.height - (pv->crop[0] + pv->crop[1]),
+ out_data, out_stride);
+
out->s = in->s;
return out;
}
*/
#include "hb.h"
-#include "openclwrapper.h"
#ifdef USE_QSV
#include "qsv_libav.h"
#endif
if( b->data )
{
freed += b->alloc;
-
- if (b->cl.buffer != NULL)
- {
- /* OpenCL */
- if (hb_cl_free_mapped_buffer(b->cl.buffer, b->data) == 0)
- {
- hb_log("hb_buffer_pool_free: bad free: %p -> buffer %p map %p",
- b, b->cl.buffer, b->data);
- }
- }
- else
- {
- free(b->data);
- }
+ free(b->data);
}
free( b );
count++;
return NULL;
}
-hb_buffer_t * hb_buffer_init_internal( int size , int needsMapped )
+hb_buffer_t * hb_buffer_init_internal( int size )
{
hb_buffer_t * b;
// Certain libraries (hrm ffmpeg) expect buffers passed to them to
{
b = hb_fifo_get( buffer_pool );
- /* OpenCL */
- if (b != NULL && needsMapped && b->cl.buffer == NULL)
- {
- // We need a mapped OpenCL buffer and that is not
- // what we got out of the pool.
- // Ditch it; it will get replaced with what we need.
- if (b->data != NULL)
- {
- free(b->data);
- }
- free(b);
- b = NULL;
- }
-
if( b )
{
/*
*/
uint8_t *data = b->data;
- /* OpenCL */
- cl_mem buffer = b->cl.buffer;
- cl_event last_event = b->cl.last_event;
- int loc = b->cl.buffer_location;
-
memset( b, 0, sizeof(hb_buffer_t) );
b->alloc = buffer_pool->buffer_size;
b->size = size;
b->s.renderOffset = AV_NOPTS_VALUE;
b->s.scr_sequence = -1;
- /* OpenCL */
- b->cl.buffer = buffer;
- b->cl.last_event = last_event;
- b->cl.buffer_location = loc;
-
#if defined(HB_BUFFER_DEBUG)
hb_lock(buffers.lock);
hb_list_add(buffers.alloc_list, b);
if (size)
{
- /* OpenCL */
- b->cl.last_event = NULL;
- b->cl.buffer_location = HOST;
-
- /* OpenCL */
- if (needsMapped)
- {
- int status = hb_cl_create_mapped_buffer(&b->cl.buffer, &b->data, b->alloc);
- if (!status)
- {
- hb_error("Failed to map CL buffer");
- free(b);
- return NULL;
- }
- }
- else
- {
- b->cl.buffer = NULL;
-
#if defined( SYS_DARWIN ) || defined( SYS_FREEBSD ) || defined( SYS_MINGW )
- b->data = malloc( b->alloc );
+ b->data = malloc( b->alloc );
#elif defined( SYS_CYGWIN )
- /* FIXME */
- b->data = malloc( b->alloc + 17 );
+ /* FIXME */
+ b->data = malloc( b->alloc + 17 );
#else
- b->data = memalign( 16, b->alloc );
+ b->data = memalign( 16, b->alloc );
#endif
- }
if( !b->data )
{
hb_buffer_t * hb_buffer_init( int size )
{
- return hb_buffer_init_internal(size, 0);
+ return hb_buffer_init_internal(size);
}
hb_buffer_t * hb_buffer_eof_init(void)
}
}
- /* OpenCL */
- buf = hb_buffer_init_internal(size , hb_use_buffers());
+ buf = hb_buffer_init_internal(size);
if( buf == NULL )
return NULL;
int size = dst->size;
int alloc = dst->alloc;
- /* OpenCL */
- cl_mem buffer = dst->cl.buffer;
- cl_event last_event = dst->cl.last_event;
- int loc = dst->cl.buffer_location;
-
*dst = *src;
src->data = data;
src->size = size;
src->alloc = alloc;
-
- /* OpenCL */
- src->cl.buffer = buffer;
- src->cl.last_event = last_event;
- src->cl.buffer_location = loc;
}
// Frees the specified buffer list.
// free the buf
if( b->data )
{
- if (b->cl.buffer != NULL)
- {
- /* OpenCL */
- if (hb_cl_free_mapped_buffer(b->cl.buffer, b->data) == 0)
- {
- hb_log("hb_buffer_close: bad free %p -> buffer %p map %p",
- b, b->cl.buffer, b->data);
- }
- }
- else
- {
- free(b->data);
- }
+ free(b->data);
hb_lock(buffers.lock);
buffers.allocated -= b->alloc;
hb_unlock(buffers.lock);
*/
#include "hb.h"
-#include "opencl.h"
#include "hbffmpeg.h"
#include "encx264.h"
#include "libavfilter/avfilter.h"
// power management opaque pointer
void * system_sleep_opaque;
-
- int enable_opencl;
};
hb_work_object_t * hb_objects = NULL;
return ret;
}
-int hb_get_opencl_enabled(hb_handle_t *h)
-{
- return h->enable_opencl;
-}
-
int hb_avcodec_close(AVCodecContext *avctx)
{
int ret;
global_verbosity_level = level;
}
-/*
- * Enable or disable support for OpenCL detection.
- */
-void hb_opencl_set_enable(hb_handle_t *h, int enable_opencl)
-{
- h->enable_opencl = enable_opencl;
-}
-
/**
* libhb initialization routine.
* @param verbose HB_DEBUG_NONE or HB_DEBUG_ALL.
}
hb_log(" - logical processor count: %d", hb_get_cpu_count());
- /* Print OpenCL info here so that it's in all scan and encode logs */
- if (hb_get_opencl_enabled(h))
- {
- hb_opencl_info_print();
- }
-
#ifdef USE_QSV
/* Print QSV info here so that it's in all scan and encode logs */
hb_qsv_info_print();
hb_presets_free();
- /* OpenCL library (dynamically loaded) */
- hb_ocl_close();
-
/* Find and remove temp folder */
memset( dirname, 0, 1024 );
hb_get_temporary_directory( dirname );
void hb_register_logger( void (*log_cb)(const char* message) );
hb_handle_t * hb_init( int verbose );
void hb_log_level_set(hb_handle_t *h, int level);
-void hb_opencl_set_enable(hb_handle_t *h, int enable_opencl);
/* hb_get_version() */
const char * hb_get_full_description();
char * hb_dvd_name( char * path );
void hb_dvd_set_dvdnav( int enable );
-int hb_get_opencl_enabled(hb_handle_t *h);
-
/* hb_scan()
Scan the specified path. Can be a DVD device, a VIDEO_TS folder or
a VOB file. If title_index is 0, scan all titles. */
"s:{s:o, s:o, s:o,},"
// PAR {Num, Den}
"s:{s:o, s:o},"
- // Video {Encoder, OpenCL, QSV {Decode, AsyncDepth}}
- "s:{s:o, s:o, s:{s:o, s:o}},"
+ // Video {Encoder, QSV {Decode, AsyncDepth}}
+ "s:{s:o, s:{s:o, s:o}},"
// Audio {CopyMask, FallbackEncoder, AudioList []}
"s:{s:[], s:o, s:[]},"
// Subtitles {Search {Enable, Forced, Default, Burn}, SubtitleList []}
"Den", hb_value_int(job->par.den),
"Video",
"Encoder", hb_value_int(job->vcodec),
- "OpenCL", hb_value_bool(job->use_opencl),
"QSV",
"Decode", hb_value_bool(job->qsv.decode),
"AsyncDepth", hb_value_int(job->qsv.async_depth),
"s?{s:i, s:i},"
// Video {Codec, Quality, Bitrate, Preset, Tune, Profile, Level, Options
// TwoPass, Turbo, ColorMatrixCode,
- // OpenCL, QSV {Decode, AsyncDepth}}
+ // QSV {Decode, AsyncDepth}}
"s:{s:o, s?f, s?i, s?s, s?s, s?s, s?s, s?s,"
" s?b, s?b, s?i,"
- " s?b, s?{s?b, s?i}},"
+ " s?{s?b, s?i}},"
// Audio {CopyMask, FallbackEncoder, AudioList}
"s?{s?o, s?o, s?o},"
// Subtitle {Search {Enable, Forced, Default, Burn}, SubtitleList}
"TwoPass", unpack_b(&job->twopass),
"Turbo", unpack_b(&job->fastfirstpass),
"ColorMatrixCode", unpack_i(&job->color_matrix_code),
- "OpenCL", unpack_b(&job->use_opencl),
"QSV",
"Decode", unpack_b(&job->qsv.decode),
"AsyncDepth", unpack_i(&job->qsv.async_depth),
} qsv_details;
#endif
- /* OpenCL */
- struct cl_data
- {
- cl_mem buffer;
- cl_event last_event;
- enum { HOST, DEVICE } buffer_location;
- } cl;
-
// libav may attach AV_PKT_DATA_PALETTE side data to some AVPackets
// Store this data here when read and pass to decoder.
hb_buffer_t * palette;
+++ /dev/null
-/* oclscale.c
-
- Copyright (c) 2003-2017 HandBrake Team
- This file is part of the HandBrake source code
- Homepage: <http://handbrake.fr/>.
- It may be used under the terms of the GNU General Public License v2.
- For full terms see the file COPYING file or visit http://www.gnu.org/licenses/gpl-2.0.html
-
- Authors: Peng Gao <peng@multicorewareinc.com> <http://www.multicorewareinc.com/>
- Li Cao <li@multicorewareinc.com> <http://www.multicorewareinc.com/>
-
- */
-
-#include <math.h>
-#include "common.h"
-#include "opencl.h"
-#include "openclwrapper.h"
-#define FILTER_LEN 4
-
-#define _A -0.5f
-
-cl_float cubic(cl_float x)
-{
- if (x < 0)
- x = -x;
-
- if (x < 1)
- return (_A + 2.0f) * (x * x * x) - (_A + 3.0f) * (x * x) + 0 + 1;
- else if (x < 2)
- return (_A) * (x * x * x) - (5.0f * _A) * (x * x) + (8.0f * _A) * x - (4.0f * _A);
- else
- return 0;
-}
-
-
-cl_float *hb_bicubic_weights(cl_float scale, int length)
-{
- cl_float *weights = (cl_float*) malloc(length * sizeof(cl_float) * 4);
-
- int i; // C rocks
- cl_float *out = weights;
- for (i = 0; i < length; ++i)
- {
- cl_float x = i / scale;
- cl_float dx = x - (int)x;
- *out++ = cubic(-dx - 1.0f);
- *out++ = cubic(-dx);
- *out++ = cubic(-dx + 1.0f);
- *out++ = cubic(-dx + 2.0f);
- }
- return weights;
-}
-
-int setupScaleWeights(cl_float xscale, cl_float yscale, int width, int height, hb_oclscale_t *os, KernelEnv *kenv);
-
-/**
-* executive scale using opencl
-* get filter args
-* create output buffer
-* create horizontal filter buffer
-* create vertical filter buffer
-* create kernels
-*/
-int hb_ocl_scale_func( void **data, KernelEnv *kenv )
-{
- cl_int status;
-
- cl_mem in_buf = data[0];
- cl_mem out_buf = data[1];
- int crop_top = (intptr_t)data[2];
- int crop_bottom = (intptr_t)data[3];
- int crop_left = (intptr_t)data[4];
- int crop_right = (intptr_t)data[5];
- cl_int in_frame_w = (intptr_t)data[6];
- cl_int in_frame_h = (intptr_t)data[7];
- cl_int out_frame_w = (intptr_t)data[8];
- cl_int out_frame_h = (intptr_t)data[9];
- hb_oclscale_t *os = data[10];
- hb_buffer_t *in = data[11];
- hb_buffer_t *out = data[12];
-
- if (hb_ocl == NULL)
- {
- hb_error("hb_ocl_scale_func: OpenCL support not available");
- return 0;
- }
-
- if (os->initialized == 0)
- {
- hb_log( "Scaling With OpenCL" );
- if (kenv->isAMD != 0)
- hb_log( "Using Zero Copy");
- // create the block kernel
- cl_int status;
- os->m_kernel = hb_ocl->clCreateKernel(kenv->program, "frame_scale", &status);
-
- os->initialized = 1;
- }
-
- {
- // Use the new kernel
- cl_event events[5];
- int eventCount = 0;
-
- if (kenv->isAMD == 0) {
- status = hb_ocl->clEnqueueUnmapMemObject(kenv->command_queue,
- in->cl.buffer, in->data, 0,
- NULL, &events[eventCount++]);
- status = hb_ocl->clEnqueueUnmapMemObject(kenv->command_queue,
- out->cl.buffer, out->data, 0,
- NULL, &events[eventCount++]);
- }
-
- cl_int srcPlaneOffset0 = in->plane[0].data - in->data;
- cl_int srcPlaneOffset1 = in->plane[1].data - in->data;
- cl_int srcPlaneOffset2 = in->plane[2].data - in->data;
- cl_int srcRowWords0 = in->plane[0].stride;
- cl_int srcRowWords1 = in->plane[1].stride;
- cl_int srcRowWords2 = in->plane[2].stride;
- cl_int dstPlaneOffset0 = out->plane[0].data - out->data;
- cl_int dstPlaneOffset1 = out->plane[1].data - out->data;
- cl_int dstPlaneOffset2 = out->plane[2].data - out->data;
- cl_int dstRowWords0 = out->plane[0].stride;
- cl_int dstRowWords1 = out->plane[1].stride;
- cl_int dstRowWords2 = out->plane[2].stride;
-
- if (crop_top != 0 || crop_bottom != 0 || crop_left != 0 || crop_right != 0) {
- srcPlaneOffset0 += crop_left + crop_top * srcRowWords0;
- srcPlaneOffset1 += crop_left / 2 + (crop_top / 2) * srcRowWords1;
- srcPlaneOffset2 += crop_left / 2 + (crop_top / 2) * srcRowWords2;
- in_frame_w = in_frame_w - crop_right - crop_left;
- in_frame_h = in_frame_h - crop_bottom - crop_top;
- }
-
- cl_float xscale = (out_frame_w * 1.0f) / in_frame_w;
- cl_float yscale = (out_frame_h * 1.0f) / in_frame_h;
- setupScaleWeights(xscale, yscale, out_frame_w, out_frame_h, os, kenv);
-
- HB_OCL_CHECK(hb_ocl->clSetKernelArg, os->m_kernel, 0, sizeof(cl_mem), &out_buf);
- HB_OCL_CHECK(hb_ocl->clSetKernelArg, os->m_kernel, 1, sizeof(cl_mem), &in_buf);
- HB_OCL_CHECK(hb_ocl->clSetKernelArg, os->m_kernel, 2, sizeof(cl_float), &xscale);
- HB_OCL_CHECK(hb_ocl->clSetKernelArg, os->m_kernel, 3, sizeof(cl_float), &yscale);
- HB_OCL_CHECK(hb_ocl->clSetKernelArg, os->m_kernel, 4, sizeof(cl_int), &srcPlaneOffset0);
- HB_OCL_CHECK(hb_ocl->clSetKernelArg, os->m_kernel, 5, sizeof(cl_int), &srcPlaneOffset1);
- HB_OCL_CHECK(hb_ocl->clSetKernelArg, os->m_kernel, 6, sizeof(cl_int), &srcPlaneOffset2);
- HB_OCL_CHECK(hb_ocl->clSetKernelArg, os->m_kernel, 7, sizeof(cl_int), &dstPlaneOffset0);
- HB_OCL_CHECK(hb_ocl->clSetKernelArg, os->m_kernel, 8, sizeof(cl_int), &dstPlaneOffset1);
- HB_OCL_CHECK(hb_ocl->clSetKernelArg, os->m_kernel, 9, sizeof(cl_int), &dstPlaneOffset2);
- HB_OCL_CHECK(hb_ocl->clSetKernelArg, os->m_kernel, 10, sizeof(cl_int), &srcRowWords0);
- HB_OCL_CHECK(hb_ocl->clSetKernelArg, os->m_kernel, 11, sizeof(cl_int), &srcRowWords1);
- HB_OCL_CHECK(hb_ocl->clSetKernelArg, os->m_kernel, 12, sizeof(cl_int), &srcRowWords2);
- HB_OCL_CHECK(hb_ocl->clSetKernelArg, os->m_kernel, 13, sizeof(cl_int), &dstRowWords0);
- HB_OCL_CHECK(hb_ocl->clSetKernelArg, os->m_kernel, 14, sizeof(cl_int), &dstRowWords1);
- HB_OCL_CHECK(hb_ocl->clSetKernelArg, os->m_kernel, 15, sizeof(cl_int), &dstRowWords2);
- HB_OCL_CHECK(hb_ocl->clSetKernelArg, os->m_kernel, 16, sizeof(cl_int), &in_frame_w);
- HB_OCL_CHECK(hb_ocl->clSetKernelArg, os->m_kernel, 17, sizeof(cl_int), &in_frame_h);
- HB_OCL_CHECK(hb_ocl->clSetKernelArg, os->m_kernel, 18, sizeof(cl_int), &out_frame_w);
- HB_OCL_CHECK(hb_ocl->clSetKernelArg, os->m_kernel, 19, sizeof(cl_int), &out_frame_h);
- HB_OCL_CHECK(hb_ocl->clSetKernelArg, os->m_kernel, 20, sizeof(cl_mem), &os->bicubic_x_weights);
- HB_OCL_CHECK(hb_ocl->clSetKernelArg, os->m_kernel, 21, sizeof(cl_mem), &os->bicubic_y_weights);
-
- size_t workOffset[] = { 0, 0, 0 };
- size_t globalWorkSize[] = { 1, 1, 1 };
- size_t localWorkSize[] = { 1, 1, 1 };
-
- int xgroups = (out_frame_w + 63) / 64;
- int ygroups = (out_frame_h + 15) / 16;
-
- localWorkSize[0] = 64;
- localWorkSize[1] = 1;
- localWorkSize[2] = 1;
- globalWorkSize[0] = xgroups * 64;
- globalWorkSize[1] = ygroups;
- globalWorkSize[2] = 3;
-
- HB_OCL_CHECK(hb_ocl->clEnqueueNDRangeKernel, kenv->command_queue,
- os->m_kernel, 3, workOffset, globalWorkSize, localWorkSize,
- eventCount, eventCount == 0 ? NULL : &events[0], &events[eventCount]);
- ++eventCount;
-
- if (kenv->isAMD == 0) {
- in->data = hb_ocl->clEnqueueMapBuffer(kenv->command_queue, in->cl.buffer,
- CL_FALSE, CL_MAP_READ|CL_MAP_WRITE,
- 0, in->alloc,
- eventCount ? 1 : 0,
- eventCount ? &events[eventCount - 1] : NULL,
- &events[eventCount], &status);
- out->data = hb_ocl->clEnqueueMapBuffer(kenv->command_queue, out->cl.buffer,
- CL_FALSE, CL_MAP_READ|CL_MAP_WRITE,
- 0, out->alloc,
- eventCount ? 1 : 0,
- eventCount ? &events[eventCount - 1] : NULL,
- &events[eventCount + 1], &status);
- eventCount += 2;
- }
-
- hb_ocl->clFlush(kenv->command_queue);
- hb_ocl->clWaitForEvents(eventCount, &events[0]);
- int i;
- for (i = 0; i < eventCount; ++i)
- {
- hb_ocl->clReleaseEvent(events[i]);
- }
- }
-
- return 1;
-}
-
-int setupScaleWeights(cl_float xscale, cl_float yscale, int width, int height, hb_oclscale_t *os, KernelEnv *kenv)
-{
- cl_int status;
-
- if (hb_ocl == NULL)
- {
- hb_error("setupScaleWeights: OpenCL support not available");
- return 1;
- }
-
- if (os->xscale != xscale || os->width < width)
- {
- cl_float *xweights = hb_bicubic_weights(xscale, width);
- HB_OCL_BUF_FREE (hb_ocl, os->bicubic_x_weights);
- HB_OCL_BUF_CREATE(hb_ocl, os->bicubic_x_weights, CL_MEM_READ_ONLY,
- sizeof(cl_float) * width * 4);
- HB_OCL_CHECK(hb_ocl->clEnqueueWriteBuffer, kenv->command_queue, os->bicubic_x_weights,
- CL_TRUE, 0, sizeof(cl_float) * width * 4, xweights, 0, NULL, NULL);
- os->width = width;
- os->xscale = xscale;
- free(xweights);
- }
-
- if ((os->yscale != yscale) || (os->height < height))
- {
- cl_float *yweights = hb_bicubic_weights(yscale, height);
- HB_OCL_BUF_FREE (hb_ocl, os->bicubic_y_weights);
- HB_OCL_BUF_CREATE(hb_ocl, os->bicubic_y_weights, CL_MEM_READ_ONLY,
- sizeof(cl_float) * height * 4);
- HB_OCL_CHECK(hb_ocl->clEnqueueWriteBuffer, kenv->command_queue, os->bicubic_y_weights,
- CL_TRUE, 0, sizeof(cl_float) * height * 4, yweights, 0, NULL, NULL);
- os->height = height;
- os->yscale = yscale;
- free(yweights);
- }
- return 0;
-}
-
-
-/**
-* function describe: this function is used to scaling video frame. it uses the gausi scaling algorithm
-* parameter:
-* inputFrameBuffer: the source video frame opencl buffer
-* outputdata: the destination video frame buffer
-* inputWidth: the width of the source video frame
-* inputHeight: the height of the source video frame
-* outputWidth: the width of destination video frame
-* outputHeight: the height of destination video frame
-*/
-
-
-static int s_scale_init_flag = 0;
-
-int do_scale_init()
-{
- if ( s_scale_init_flag==0 )
- {
- int st = hb_register_kernel_wrapper( "frame_scale", hb_ocl_scale_func );
- if( !st )
- {
- hb_log( "register kernel[%s] failed", "frame_scale" );
- return 0;
- }
- s_scale_init_flag++;
- }
- return 1;
-}
-
-
-int hb_ocl_scale(hb_buffer_t *in, hb_buffer_t *out, int *crop, hb_oclscale_t *os)
-{
- void *data[13];
-
- if (do_scale_init() == 0)
- return 0;
-
- data[0] = in->cl.buffer;
- data[1] = out->cl.buffer;
- data[2] = (void*)(intptr_t)(crop[0]);
- data[3] = (void*)(intptr_t)(crop[1]);
- data[4] = (void*)(intptr_t)(crop[2]);
- data[5] = (void*)(intptr_t)(crop[3]);
- data[6] = (void*)(intptr_t)(in->f.width);
- data[7] = (void*)(intptr_t)(in->f.height);
- data[8] = (void*)(intptr_t)(out->f.width);
- data[9] = (void*)(intptr_t)(out->f.height);
- data[10] = os;
- data[11] = in;
- data[12] = out;
-
- if( !hb_run_kernel( "frame_scale", data ) )
- hb_log( "run kernel[%s] failed", "frame_scale" );
- return 0;
-}
+++ /dev/null
-/* opencl.c
-
- Copyright (c) 2003-2017 HandBrake Team
- This file is part of the HandBrake source code
- Homepage: <http://handbrake.fr/>.
- It may be used under the terms of the GNU General Public License v2.
- For full terms see the file COPYING file or visit http://www.gnu.org/licenses/gpl-2.0.html
- */
-
-#ifdef _WIN32
-#include <windows.h>
-#define HB_OCL_DLOPEN LoadLibraryW(L"OpenCL")
-#define HB_OCL_DLSYM GetProcAddress
-#define HB_OCL_DLCLOSE FreeLibrary
-#else
-#include <dlfcn.h>
-#ifdef __APPLE__
-#define HB_OCL_DLOPEN dlopen("/System/Library/Frameworks/OpenCL.framework/OpenCL", RTLD_NOW)
-#else
-#define HB_OCL_DLOPEN dlopen("libOpenCL.so", RTLD_NOW)
-#endif
-#define HB_OCL_DLSYM dlsym
-#define HB_OCL_DLCLOSE dlclose
-#endif
-
-#include "common.h"
-#include "opencl.h"
-
-hb_opencl_library_t *hb_ocl = NULL;
-
-int hb_ocl_init()
-{
- if (hb_ocl == NULL)
- {
- if ((hb_ocl = hb_opencl_library_init()) == NULL)
- {
- return -1;
- }
- }
- return 0;
-}
-
-void hb_ocl_close()
-{
- hb_opencl_library_close(&hb_ocl);
-}
-
-hb_opencl_library_t* hb_opencl_library_init()
-{
- hb_opencl_library_t *opencl;
- if ((opencl = calloc(1, sizeof(hb_opencl_library_t))) == NULL)
- {
- hb_error("hb_opencl_library_init: memory allocation failure");
- goto fail;
- }
-
- opencl->library = HB_OCL_DLOPEN;
- if (opencl->library == NULL)
- {
- goto fail;
- }
-
-#define HB_OCL_LOAD(func) \
-{ \
- if ((opencl->func = (void*)HB_OCL_DLSYM(opencl->library, #func)) == NULL) \
- { \
- hb_log("hb_opencl_library_init: failed to load function '%s'", #func); \
- goto fail; \
- } \
-}
- HB_OCL_LOAD(clBuildProgram);
- HB_OCL_LOAD(clCreateBuffer);
- HB_OCL_LOAD(clCreateCommandQueue);
- HB_OCL_LOAD(clCreateContextFromType);
- HB_OCL_LOAD(clCreateKernel);
- HB_OCL_LOAD(clCreateProgramWithBinary);
- HB_OCL_LOAD(clCreateProgramWithSource);
- HB_OCL_LOAD(clEnqueueCopyBuffer);
- HB_OCL_LOAD(clEnqueueMapBuffer);
- HB_OCL_LOAD(clEnqueueNDRangeKernel);
- HB_OCL_LOAD(clEnqueueReadBuffer);
- HB_OCL_LOAD(clEnqueueUnmapMemObject);
- HB_OCL_LOAD(clEnqueueWriteBuffer);
- HB_OCL_LOAD(clFlush);
- HB_OCL_LOAD(clGetCommandQueueInfo);
- HB_OCL_LOAD(clGetContextInfo);
- HB_OCL_LOAD(clGetDeviceIDs);
- HB_OCL_LOAD(clGetDeviceInfo);
- HB_OCL_LOAD(clGetPlatformIDs);
- HB_OCL_LOAD(clGetPlatformInfo);
- HB_OCL_LOAD(clGetProgramBuildInfo);
- HB_OCL_LOAD(clGetProgramInfo);
- HB_OCL_LOAD(clReleaseCommandQueue);
- HB_OCL_LOAD(clReleaseContext);
- HB_OCL_LOAD(clReleaseEvent);
- HB_OCL_LOAD(clReleaseKernel);
- HB_OCL_LOAD(clReleaseMemObject);
- HB_OCL_LOAD(clReleaseProgram);
- HB_OCL_LOAD(clSetKernelArg);
- HB_OCL_LOAD(clWaitForEvents);
-
- //success
- return opencl;
-
-fail:
- hb_opencl_library_close(&opencl);
- return NULL;
-}
-
-void hb_opencl_library_close(hb_opencl_library_t **_opencl)
-{
- if (_opencl == NULL)
- {
- return;
- }
- hb_opencl_library_t *opencl = *_opencl;
-
- if (opencl != NULL)
- {
- if (opencl->library != NULL)
- {
- HB_OCL_DLCLOSE(opencl->library);
- }
- free(opencl);
- }
- *_opencl = NULL;
-}
-
-static int hb_opencl_device_is_supported(hb_opencl_device_t* device)
-{
- // we only support OpenCL on GPUs for now
- // Ivy Bridge supports OpenCL on GPU, but it's too slow to be usable
- // FIXME: disable on NVIDIA to to a bug
- if ((device != NULL) &&
- (device->type & CL_DEVICE_TYPE_GPU) &&
- (device->ocl_vendor != HB_OCL_VENDOR_NVIDIA) &&
- (device->ocl_vendor != HB_OCL_VENDOR_INTEL ||
- hb_get_cpu_platform() != HB_CPU_PLATFORM_INTEL_IVB))
- {
- int major, minor;
- // check OpenCL version:
- // OpenCL<space><major_version.minor_version><space><vendor-specific information>
- if (sscanf(device->version, "OpenCL %d.%d", &major, &minor) != 2)
- {
- return 0;
- }
- return (major > HB_OCL_MINVERSION_MAJOR) || (major == HB_OCL_MINVERSION_MAJOR &&
- minor >= HB_OCL_MINVERSION_MINOR);
- }
- return 0;
-}
-
-static hb_opencl_device_t* hb_opencl_device_get(hb_opencl_library_t *opencl,
- cl_device_id device_id)
-{
- if (opencl == NULL || opencl->clGetDeviceInfo == NULL)
- {
- hb_error("hb_opencl_device_get: OpenCL support not available");
- return NULL;
- }
- else if (device_id == NULL)
- {
- hb_error("hb_opencl_device_get: invalid device ID");
- return NULL;
- }
-
- hb_opencl_device_t *device = calloc(1, sizeof(hb_opencl_device_t));
- if (device == NULL)
- {
- hb_error("hb_opencl_device_get: memory allocation failure");
- return NULL;
- }
-
- cl_int status = CL_SUCCESS;
- device->id = device_id;
-
- status |= opencl->clGetDeviceInfo(device->id, CL_DEVICE_VENDOR, sizeof(device->vendor),
- device->vendor, NULL);
- status |= opencl->clGetDeviceInfo(device->id, CL_DEVICE_NAME, sizeof(device->name),
- device->name, NULL);
- status |= opencl->clGetDeviceInfo(device->id, CL_DEVICE_VERSION, sizeof(device->version),
- device->version, NULL);
- status |= opencl->clGetDeviceInfo(device->id, CL_DEVICE_TYPE, sizeof(device->type),
- &device->type, NULL);
- status |= opencl->clGetDeviceInfo(device->id, CL_DEVICE_PLATFORM, sizeof(device->platform),
- &device->platform, NULL);
- status |= opencl->clGetDeviceInfo(device->id, CL_DRIVER_VERSION, sizeof(device->driver),
- device->driver, NULL);
- if (status != CL_SUCCESS)
- {
- free(device);
- return NULL;
- }
-
- if (!strcmp(device->vendor, "Advanced Micro Devices, Inc.") ||
- !strcmp(device->vendor, "AMD"))
- {
- device->ocl_vendor = HB_OCL_VENDOR_AMD;
- }
- else if (!strncmp(device->vendor, "NVIDIA", 6 /* strlen("NVIDIA") */))
- {
- device->ocl_vendor = HB_OCL_VENDOR_NVIDIA;
- }
- else if (!strncmp(device->vendor, "Intel", 5 /* strlen("Intel") */))
- {
- device->ocl_vendor = HB_OCL_VENDOR_INTEL;
- }
- else
- {
- device->ocl_vendor = HB_OCL_VENDOR_OTHER;
- }
-
- return device;
-}
-
-static void hb_opencl_devices_list_close(hb_list_t **_list)
-{
- if (_list != NULL)
- {
- hb_list_t *list = *_list;
- hb_opencl_device_t *device;
- while (list != NULL && hb_list_count(list) > 0)
- {
- if ((device = hb_list_item(list, 0)) != NULL)
- {
- hb_list_rem(list, device);
- free(device);
- }
- }
- }
- hb_list_close(_list);
-}
-
-static hb_list_t* hb_opencl_devices_list_get(hb_opencl_library_t *opencl,
- cl_device_type device_type)
-{
- if (opencl == NULL ||
- opencl->library == NULL ||
- opencl->clGetDeviceIDs == NULL ||
- opencl->clGetDeviceInfo == NULL ||
- opencl->clGetPlatformIDs == NULL)
- {
- hb_error("hb_opencl_devices_list_get: OpenCL support not available");
- return NULL;
- }
-
- hb_list_t *list = hb_list_init();
- if (list == NULL)
- {
- hb_error("hb_opencl_devices_list_get: memory allocation failure");
- return NULL;
- }
-
- cl_device_id *device_ids = NULL;
- hb_opencl_device_t *device = NULL;
- cl_platform_id *platform_ids = NULL;
- cl_uint i, j, num_platforms, num_devices;
-
- if (opencl->clGetPlatformIDs(0, NULL, &num_platforms) != CL_SUCCESS || !num_platforms)
- {
- goto fail;
- }
- if ((platform_ids = malloc(sizeof(cl_platform_id) * num_platforms)) == NULL)
- {
- hb_error("hb_opencl_devices_list_get: memory allocation failure");
- goto fail;
- }
- if (opencl->clGetPlatformIDs(num_platforms, platform_ids, NULL) != CL_SUCCESS)
- {
- goto fail;
- }
- for (i = 0; i < num_platforms; i++)
- {
- if (opencl->clGetDeviceIDs(platform_ids[i], device_type, 0, NULL, &num_devices) != CL_SUCCESS || !num_devices)
- {
- // non-fatal
- continue;
- }
- if ((device_ids = malloc(sizeof(cl_device_id) * num_devices)) == NULL)
- {
- hb_error("hb_opencl_devices_list_get: memory allocation failure");
- goto fail;
- }
- if (opencl->clGetDeviceIDs(platform_ids[i], device_type, num_devices, device_ids, NULL) != CL_SUCCESS)
- {
- // non-fatal
- continue;
- }
- for (j = 0; j < num_devices; j++)
- {
- if ((device = hb_opencl_device_get(opencl, device_ids[j])) != NULL)
- {
- hb_list_add(list, device);
- }
- }
- }
-
- goto end;
-
-fail:
- hb_opencl_devices_list_close(&list);
-
-end:
- free(platform_ids);
- free(device_ids);
- return list;
-}
-
-int hb_opencl_available()
-{
- static int opencl_available = -1;
- if (opencl_available >= 0)
- {
- return opencl_available;
- }
- opencl_available = 0;
-
- /*
- * Check whether we can load the OpenCL library, then check devices and make
- * sure we support running OpenCL code on at least one of them.
- */
- hb_opencl_library_t *opencl;
- if ((opencl = hb_opencl_library_init()) != NULL)
- {
- int i;
- hb_list_t *device_list;
- hb_opencl_device_t *device;
- if ((device_list = hb_opencl_devices_list_get(opencl, CL_DEVICE_TYPE_ALL)) != NULL)
- {
- for (i = 0; i < hb_list_count(device_list); i++)
- {
- if ((device = hb_list_item(device_list, i)) != NULL &&
- (hb_opencl_device_is_supported(device)))
- {
- opencl_available = 1;
- break;
- }
- }
- hb_opencl_devices_list_close(&device_list);
- }
- hb_opencl_library_close(&opencl);
- }
- return opencl_available;
-}
-
-void hb_opencl_info_print()
-{
- /*
- * Note: this function should not log any warnings or errors.
- * Its only purpose is to list OpenCL-capable devices, so let's initialize
- * only what we absolutely need here, rather than calling library_open().
- */
- hb_opencl_library_t ocl, *opencl = &ocl;
- if ((opencl->library = (void*)HB_OCL_DLOPEN) == NULL ||
- (opencl->clGetDeviceIDs = (void*)HB_OCL_DLSYM(opencl->library, "clGetDeviceIDs" )) == NULL ||
- (opencl->clGetDeviceInfo = (void*)HB_OCL_DLSYM(opencl->library, "clGetDeviceInfo" )) == NULL ||
- (opencl->clGetPlatformIDs = (void*)HB_OCL_DLSYM(opencl->library, "clGetPlatformIDs")) == NULL)
- {
- // zero or insufficient OpenCL support
- hb_log("OpenCL: library not available");
- goto end;
- }
-
- int i, idx;
- hb_list_t *device_list;
- hb_opencl_device_t *device;
- if ((device_list = hb_opencl_devices_list_get(opencl, CL_DEVICE_TYPE_ALL)) != NULL)
- {
- for (i = 0, idx = 1; i < hb_list_count(device_list); i++)
- {
- if ((device = hb_list_item(device_list, i)) != NULL)
- {
- // don't list CPU devices (always unsupported)
- if (!(device->type & CL_DEVICE_TYPE_CPU))
- {
- hb_log("OpenCL device #%d: %s %s", idx++, device->vendor, device->name);
- hb_log(" - OpenCL version: %s", device->version + 7 /* strlen("OpenCL ") */);
- hb_log(" - driver version: %s", device->driver);
- hb_log(" - device type: %s%s",
- device->type & CL_DEVICE_TYPE_CPU ? "CPU" :
- device->type & CL_DEVICE_TYPE_GPU ? "GPU" :
- device->type & CL_DEVICE_TYPE_CUSTOM ? "Custom" :
- device->type & CL_DEVICE_TYPE_ACCELERATOR ? "Accelerator" : "Unknown",
- device->type & CL_DEVICE_TYPE_DEFAULT ? " (default)" : "");
- hb_log(" - supported: %s",
- hb_opencl_device_is_supported(device) ? "YES" : "no");
- }
- }
- }
- hb_opencl_devices_list_close(&device_list);
- }
-
-end:
- /*
- * Close only the initialized part
- */
- if (opencl->library != NULL)
- {
- HB_OCL_DLCLOSE(opencl->library);
- }
-}
+++ /dev/null
-/* opencl.h
-
- Copyright (c) 2003-2017 HandBrake Team
- This file is part of the HandBrake source code
- Homepage: <http://handbrake.fr/>.
- It may be used under the terms of the GNU General Public License v2.
- For full terms see the file COPYING file or visit http://www.gnu.org/licenses/gpl-2.0.html
- */
-
-#ifndef HB_OPENCL_H
-#define HB_OPENCL_H
-
-#include "extras/cl.h"
-#include "openclwrapper.h"
-
-// we only support OpenCL 1.1 or later
-#define HB_OCL_MINVERSION_MAJOR 1
-#define HB_OCL_MINVERSION_MINOR 1
-
-#define HB_OCL_FUNC_TYPE(name) hb_opencl_##name##_func
-#define HB_OCL_FUNC_DECL(name) HB_OCL_FUNC_TYPE(name) name
-#define HB_OCL_API(ret, attr, name) typedef ret (attr* HB_OCL_FUNC_TYPE(name))
-
-#ifdef __APPLE__
-#pragma mark -
-#pragma mark OpenCL API
-#endif // __APPLE__
-
-/* Platform API */
-HB_OCL_API(cl_int, CL_API_CALL, clGetPlatformIDs)
-(cl_uint /* num_entries */,
- cl_platform_id * /* platforms */,
- cl_uint * /* num_platforms */);
-
-HB_OCL_API(cl_int, CL_API_CALL, clGetPlatformInfo)
-(cl_platform_id /* platform */,
- cl_platform_info /* param_name */,
- size_t /* param_value_size */,
- void * /* param_value */,
- size_t * /* param_value_size_ret */);
-
-/* Device APIs */
-HB_OCL_API(cl_int, CL_API_CALL, clGetDeviceIDs)
-(cl_platform_id /* platform */,
- cl_device_type /* device_type */,
- cl_uint /* num_entries */,
- cl_device_id * /* devices */,
- cl_uint * /* num_devices */);
-
-HB_OCL_API(cl_int, CL_API_CALL, clGetDeviceInfo)
-(cl_device_id /* device */,
- cl_device_info /* param_name */,
- size_t /* param_value_size */,
- void * /* param_value */,
- size_t * /* param_value_size_ret */);
-
-HB_OCL_API(cl_int, CL_API_CALL, clCreateSubDevices)
-(cl_device_id /* in_device */,
- const cl_device_partition_property * /* properties */,
- cl_uint /* num_devices */,
- cl_device_id * /* out_devices */,
- cl_uint * /* num_devices_ret */);
-
-HB_OCL_API(cl_int, CL_API_CALL, clRetainDevice)
-(cl_device_id /* device */);
-
-HB_OCL_API(cl_int, CL_API_CALL, clReleaseDevice)
-(cl_device_id /* device */);
-
-/* Context APIs */
-HB_OCL_API(cl_context, CL_API_CALL, clCreateContext)
-(const cl_context_properties * /* properties */,
- cl_uint /* num_devices */,
- const cl_device_id * /* devices */,
- void (CL_CALLBACK * /* pfn_notify */)(const char *, const void *, size_t, void *),
- void * /* user_data */,
- cl_int * /* errcode_ret */);
-
-HB_OCL_API(cl_context, CL_API_CALL, clCreateContextFromType)
-(const cl_context_properties * /* properties */,
- cl_device_type /* device_type */,
- void (CL_CALLBACK * /* pfn_notify*/ )(const char *, const void *, size_t, void *),
- void * /* user_data */,
- cl_int * /* errcode_ret */);
-
-HB_OCL_API(cl_int, CL_API_CALL, clRetainContext)
-(cl_context /* context */);
-
-HB_OCL_API(cl_int, CL_API_CALL, clReleaseContext)
-(cl_context /* context */);
-
-HB_OCL_API(cl_int, CL_API_CALL, clGetContextInfo)
-(cl_context /* context */,
- cl_context_info /* param_name */,
- size_t /* param_value_size */,
- void * /* param_value */,
- size_t * /* param_value_size_ret */);
-
-/* Command Queue APIs */
-HB_OCL_API(cl_command_queue, CL_API_CALL, clCreateCommandQueue)
-(cl_context /* context */,
- cl_device_id /* device */,
- cl_command_queue_properties /* properties */,
- cl_int * /* errcode_ret */);
-
-HB_OCL_API(cl_int, CL_API_CALL, clRetainCommandQueue)
-(cl_command_queue /* command_queue */);
-
-HB_OCL_API(cl_int, CL_API_CALL, clReleaseCommandQueue)
-(cl_command_queue /* command_queue */);
-
-HB_OCL_API(cl_int, CL_API_CALL, clGetCommandQueueInfo)
-(cl_command_queue /* command_queue */,
- cl_command_queue_info /* param_name */,
- size_t /* param_value_size */,
- void * /* param_value */,
- size_t * /* param_value_size_ret */);
-
-/* Memory Object APIs */
-HB_OCL_API(cl_mem, CL_API_CALL, clCreateBuffer)
-(cl_context /* context */,
- cl_mem_flags /* flags */,
- size_t /* size */,
- void * /* host_ptr */,
- cl_int * /* errcode_ret */);
-
-HB_OCL_API(cl_mem, CL_API_CALL, clCreateSubBuffer)
-(cl_mem /* buffer */,
- cl_mem_flags /* flags */,
- cl_buffer_create_type /* buffer_create_type */,
- const void * /* buffer_create_info */,
- cl_int * /* errcode_ret */);
-
-HB_OCL_API(cl_mem, CL_API_CALL, clCreateImage)
-(cl_context /* context */,
- cl_mem_flags /* flags */,
- const cl_image_format * /* image_format */,
- const cl_image_desc * /* image_desc */,
- void * /* host_ptr */,
- cl_int * /* errcode_ret */);
-
-HB_OCL_API(cl_int, CL_API_CALL, clRetainMemObject)
-(cl_mem /* memobj */);
-
-HB_OCL_API(cl_int, CL_API_CALL, clReleaseMemObject)
-(cl_mem /* memobj */);
-
-HB_OCL_API(cl_int, CL_API_CALL, clGetSupportedImageFormats)
-(cl_context /* context */,
- cl_mem_flags /* flags */,
- cl_mem_object_type /* image_type */,
- cl_uint /* num_entries */,
- cl_image_format * /* image_formats */,
- cl_uint * /* num_image_formats */);
-
-HB_OCL_API(cl_int, CL_API_CALL, clGetMemObjectInfo)
-(cl_mem /* memobj */,
- cl_mem_info /* param_name */,
- size_t /* param_value_size */,
- void * /* param_value */,
- size_t * /* param_value_size_ret */);
-
-HB_OCL_API(cl_int, CL_API_CALL, clGetImageInfo)
-(cl_mem /* image */,
- cl_image_info /* param_name */,
- size_t /* param_value_size */,
- void * /* param_value */,
- size_t * /* param_value_size_ret */);
-
-HB_OCL_API(cl_int, CL_API_CALL, clSetMemObjectDestructorCallback)
-(cl_mem /* memobj */,
- void (CL_CALLBACK * /*pfn_notify*/)( cl_mem /* memobj */, void* /*user_data*/),
- void * /*user_data */ );
-
-/* Sampler APIs */
-HB_OCL_API(cl_sampler, CL_API_CALL, clCreateSampler)
-(cl_context /* context */,
- cl_bool /* normalized_coords */,
- cl_addressing_mode /* addressing_mode */,
- cl_filter_mode /* filter_mode */,
- cl_int * /* errcode_ret */);
-
-HB_OCL_API(cl_int, CL_API_CALL, clRetainSampler)
-(cl_sampler /* sampler */);
-
-HB_OCL_API(cl_int, CL_API_CALL, clReleaseSampler)
-(cl_sampler /* sampler */);
-
-HB_OCL_API(cl_int, CL_API_CALL, clGetSamplerInfo)
-(cl_sampler /* sampler */,
- cl_sampler_info /* param_name */,
- size_t /* param_value_size */,
- void * /* param_value */,
- size_t * /* param_value_size_ret */);
-
-/* Program Object APIs */
-HB_OCL_API(cl_program, CL_API_CALL, clCreateProgramWithSource)
-(cl_context /* context */,
- cl_uint /* count */,
- const char ** /* strings */,
- const size_t * /* lengths */,
- cl_int * /* errcode_ret */);
-
-HB_OCL_API(cl_program, CL_API_CALL, clCreateProgramWithBinary)
-(cl_context /* context */,
- cl_uint /* num_devices */,
- const cl_device_id * /* device_list */,
- const size_t * /* lengths */,
- const unsigned char ** /* binaries */,
- cl_int * /* binary_status */,
- cl_int * /* errcode_ret */);
-
-HB_OCL_API(cl_program, CL_API_CALL, clCreateProgramWithBuiltInKernels)
-(cl_context /* context */,
- cl_uint /* num_devices */,
- const cl_device_id * /* device_list */,
- const char * /* kernel_names */,
- cl_int * /* errcode_ret */);
-
-HB_OCL_API(cl_int, CL_API_CALL, clRetainProgram)
-(cl_program /* program */);
-
-HB_OCL_API(cl_int, CL_API_CALL, clReleaseProgram)
-(cl_program /* program */);
-
-HB_OCL_API(cl_int, CL_API_CALL, clBuildProgram)
-(cl_program /* program */,
- cl_uint /* num_devices */,
- const cl_device_id * /* device_list */,
- const char * /* options */,
- void (CL_CALLBACK * /* pfn_notify */)(cl_program /* program */, void * /* user_data */),
- void * /* user_data */);
-
-HB_OCL_API(cl_int, CL_API_CALL, clCompileProgram)
-(cl_program /* program */,
- cl_uint /* num_devices */,
- const cl_device_id * /* device_list */,
- const char * /* options */,
- cl_uint /* num_input_headers */,
- const cl_program * /* input_headers */,
- const char ** /* header_include_names */,
- void (CL_CALLBACK * /* pfn_notify */)(cl_program /* program */, void * /* user_data */),
- void * /* user_data */);
-
-HB_OCL_API(cl_program, CL_API_CALL, clLinkProgram)
-(cl_context /* context */,
- cl_uint /* num_devices */,
- const cl_device_id * /* device_list */,
- const char * /* options */,
- cl_uint /* num_input_programs */,
- const cl_program * /* input_programs */,
- void (CL_CALLBACK * /* pfn_notify */)(cl_program /* program */, void * /* user_data */),
- void * /* user_data */,
- cl_int * /* errcode_ret */ );
-
-
-HB_OCL_API(cl_int, CL_API_CALL, clUnloadPlatformCompiler)
-(cl_platform_id /* platform */);
-
-HB_OCL_API(cl_int, CL_API_CALL, clGetProgramInfo)
-(cl_program /* program */,
- cl_program_info /* param_name */,
- size_t /* param_value_size */,
- void * /* param_value */,
- size_t * /* param_value_size_ret */);
-
-HB_OCL_API(cl_int, CL_API_CALL, clGetProgramBuildInfo)
-(cl_program /* program */,
- cl_device_id /* device */,
- cl_program_build_info /* param_name */,
- size_t /* param_value_size */,
- void * /* param_value */,
- size_t * /* param_value_size_ret */);
-
-/* Kernel Object APIs */
-HB_OCL_API(cl_kernel, CL_API_CALL, clCreateKernel)
-(cl_program /* program */,
- const char * /* kernel_name */,
- cl_int * /* errcode_ret */);
-
-HB_OCL_API(cl_int, CL_API_CALL, clCreateKernelsInProgram)
-(cl_program /* program */,
- cl_uint /* num_kernels */,
- cl_kernel * /* kernels */,
- cl_uint * /* num_kernels_ret */);
-
-HB_OCL_API(cl_int, CL_API_CALL, clRetainKernel)
-(cl_kernel /* kernel */);
-
-HB_OCL_API(cl_int, CL_API_CALL, clReleaseKernel)
-(cl_kernel /* kernel */);
-
-HB_OCL_API(cl_int, CL_API_CALL, clSetKernelArg)
-(cl_kernel /* kernel */,
- cl_uint /* arg_index */,
- size_t /* arg_size */,
- const void * /* arg_value */);
-
-HB_OCL_API(cl_int, CL_API_CALL, clGetKernelInfo)
-(cl_kernel /* kernel */,
- cl_kernel_info /* param_name */,
- size_t /* param_value_size */,
- void * /* param_value */,
- size_t * /* param_value_size_ret */);
-
-HB_OCL_API(cl_int, CL_API_CALL, clGetKernelArgInfo)
-(cl_kernel /* kernel */,
- cl_uint /* arg_indx */,
- cl_kernel_arg_info /* param_name */,
- size_t /* param_value_size */,
- void * /* param_value */,
- size_t * /* param_value_size_ret */);
-
-HB_OCL_API(cl_int, CL_API_CALL, clGetKernelWorkGroupInfo)
-(cl_kernel /* kernel */,
- cl_device_id /* device */,
- cl_kernel_work_group_info /* param_name */,
- size_t /* param_value_size */,
- void * /* param_value */,
- size_t * /* param_value_size_ret */);
-
-/* Event Object APIs */
-HB_OCL_API(cl_int, CL_API_CALL, clWaitForEvents)
-(cl_uint /* num_events */,
- const cl_event * /* event_list */);
-
-HB_OCL_API(cl_int, CL_API_CALL, clGetEventInfo)
-(cl_event /* event */,
- cl_event_info /* param_name */,
- size_t /* param_value_size */,
- void * /* param_value */,
- size_t * /* param_value_size_ret */);
-
-HB_OCL_API(cl_event, CL_API_CALL, clCreateUserEvent)
-(cl_context /* context */,
- cl_int * /* errcode_ret */);
-
-HB_OCL_API(cl_int, CL_API_CALL, clRetainEvent)
-(cl_event /* event */);
-
-HB_OCL_API(cl_int, CL_API_CALL, clReleaseEvent)
-(cl_event /* event */);
-
-HB_OCL_API(cl_int, CL_API_CALL, clSetUserEventStatus)
-(cl_event /* event */,
- cl_int /* execution_status */);
-
-HB_OCL_API(cl_int, CL_API_CALL, clSetEventCallback)
-(cl_event /* event */,
- cl_int /* command_exec_callback_type */,
- void (CL_CALLBACK * /* pfn_notify */)(cl_event, cl_int, void *),
- void * /* user_data */);
-
-/* Profiling APIs */
-HB_OCL_API(cl_int, CL_API_CALL, clGetEventProfilingInfo)
-(cl_event /* event */,
- cl_profiling_info /* param_name */,
- size_t /* param_value_size */,
- void * /* param_value */,
- size_t * /* param_value_size_ret */);
-
-/* Flush and Finish APIs */
-HB_OCL_API(cl_int, CL_API_CALL, clFlush)
-(cl_command_queue /* command_queue */);
-
-HB_OCL_API(cl_int, CL_API_CALL, clFinish)
-(cl_command_queue /* command_queue */);
-
-/* Enqueued Commands APIs */
-HB_OCL_API(cl_int, CL_API_CALL, clEnqueueReadBuffer)
-(cl_command_queue /* command_queue */,
- cl_mem /* buffer */,
- cl_bool /* blocking_read */,
- size_t /* offset */,
- size_t /* size */,
- void * /* ptr */,
- cl_uint /* num_events_in_wait_list */,
- const cl_event * /* event_wait_list */,
- cl_event * /* event */);
-
-HB_OCL_API(cl_int, CL_API_CALL, clEnqueueReadBufferRect)
-(cl_command_queue /* command_queue */,
- cl_mem /* buffer */,
- cl_bool /* blocking_read */,
- const size_t * /* buffer_offset */,
- const size_t * /* host_offset */,
- const size_t * /* region */,
- size_t /* buffer_row_pitch */,
- size_t /* buffer_slice_pitch */,
- size_t /* host_row_pitch */,
- size_t /* host_slice_pitch */,
- void * /* ptr */,
- cl_uint /* num_events_in_wait_list */,
- const cl_event * /* event_wait_list */,
- cl_event * /* event */);
-
-HB_OCL_API(cl_int, CL_API_CALL, clEnqueueWriteBuffer)
-(cl_command_queue /* command_queue */,
- cl_mem /* buffer */,
- cl_bool /* blocking_write */,
- size_t /* offset */,
- size_t /* size */,
- const void * /* ptr */,
- cl_uint /* num_events_in_wait_list */,
- const cl_event * /* event_wait_list */,
- cl_event * /* event */);
-
-HB_OCL_API(cl_int, CL_API_CALL, clEnqueueWriteBufferRect)
-(cl_command_queue /* command_queue */,
- cl_mem /* buffer */,
- cl_bool /* blocking_write */,
- const size_t * /* buffer_offset */,
- const size_t * /* host_offset */,
- const size_t * /* region */,
- size_t /* buffer_row_pitch */,
- size_t /* buffer_slice_pitch */,
- size_t /* host_row_pitch */,
- size_t /* host_slice_pitch */,
- const void * /* ptr */,
- cl_uint /* num_events_in_wait_list */,
- const cl_event * /* event_wait_list */,
- cl_event * /* event */);
-
-HB_OCL_API(cl_int, CL_API_CALL, clEnqueueFillBuffer)
-(cl_command_queue /* command_queue */,
- cl_mem /* buffer */,
- const void * /* pattern */,
- size_t /* pattern_size */,
- size_t /* offset */,
- size_t /* size */,
- cl_uint /* num_events_in_wait_list */,
- const cl_event * /* event_wait_list */,
- cl_event * /* event */);
-
-HB_OCL_API(cl_int, CL_API_CALL, clEnqueueCopyBuffer)
-(cl_command_queue /* command_queue */,
- cl_mem /* src_buffer */,
- cl_mem /* dst_buffer */,
- size_t /* src_offset */,
- size_t /* dst_offset */,
- size_t /* size */,
- cl_uint /* num_events_in_wait_list */,
- const cl_event * /* event_wait_list */,
- cl_event * /* event */);
-
-HB_OCL_API(cl_int, CL_API_CALL, clEnqueueCopyBufferRect)
-(cl_command_queue /* command_queue */,
- cl_mem /* src_buffer */,
- cl_mem /* dst_buffer */,
- const size_t * /* src_origin */,
- const size_t * /* dst_origin */,
- const size_t * /* region */,
- size_t /* src_row_pitch */,
- size_t /* src_slice_pitch */,
- size_t /* dst_row_pitch */,
- size_t /* dst_slice_pitch */,
- cl_uint /* num_events_in_wait_list */,
- const cl_event * /* event_wait_list */,
- cl_event * /* event */);
-
-HB_OCL_API(cl_int, CL_API_CALL, clEnqueueReadImage)
-(cl_command_queue /* command_queue */,
- cl_mem /* image */,
- cl_bool /* blocking_read */,
- const size_t * /* origin[3] */,
- const size_t * /* region[3] */,
- size_t /* row_pitch */,
- size_t /* slice_pitch */,
- void * /* ptr */,
- cl_uint /* num_events_in_wait_list */,
- const cl_event * /* event_wait_list */,
- cl_event * /* event */);
-
-HB_OCL_API(cl_int, CL_API_CALL, clEnqueueWriteImage)
-(cl_command_queue /* command_queue */,
- cl_mem /* image */,
- cl_bool /* blocking_write */,
- const size_t * /* origin[3] */,
- const size_t * /* region[3] */,
- size_t /* input_row_pitch */,
- size_t /* input_slice_pitch */,
- const void * /* ptr */,
- cl_uint /* num_events_in_wait_list */,
- const cl_event * /* event_wait_list */,
- cl_event * /* event */);
-
-HB_OCL_API(cl_int, CL_API_CALL, clEnqueueFillImage)
-(cl_command_queue /* command_queue */,
- cl_mem /* image */,
- const void * /* fill_color */,
- const size_t * /* origin[3] */,
- const size_t * /* region[3] */,
- cl_uint /* num_events_in_wait_list */,
- const cl_event * /* event_wait_list */,
- cl_event * /* event */);
-
-HB_OCL_API(cl_int, CL_API_CALL, clEnqueueCopyImage)
-(cl_command_queue /* command_queue */,
- cl_mem /* src_image */,
- cl_mem /* dst_image */,
- const size_t * /* src_origin[3] */,
- const size_t * /* dst_origin[3] */,
- const size_t * /* region[3] */,
- cl_uint /* num_events_in_wait_list */,
- const cl_event * /* event_wait_list */,
- cl_event * /* event */);
-
-HB_OCL_API(cl_int, CL_API_CALL, clEnqueueCopyImageToBuffer)
-(cl_command_queue /* command_queue */,
- cl_mem /* src_image */,
- cl_mem /* dst_buffer */,
- const size_t * /* src_origin[3] */,
- const size_t * /* region[3] */,
- size_t /* dst_offset */,
- cl_uint /* num_events_in_wait_list */,
- const cl_event * /* event_wait_list */,
- cl_event * /* event */);
-
-HB_OCL_API(cl_int, CL_API_CALL, clEnqueueCopyBufferToImage)
-(cl_command_queue /* command_queue */,
- cl_mem /* src_buffer */,
- cl_mem /* dst_image */,
- size_t /* src_offset */,
- const size_t * /* dst_origin[3] */,
- const size_t * /* region[3] */,
- cl_uint /* num_events_in_wait_list */,
- const cl_event * /* event_wait_list */,
- cl_event * /* event */);
-
-HB_OCL_API(void *, CL_API_CALL, clEnqueueMapBuffer)
-(cl_command_queue /* command_queue */,
- cl_mem /* buffer */,
- cl_bool /* blocking_map */,
- cl_map_flags /* map_flags */,
- size_t /* offset */,
- size_t /* size */,
- cl_uint /* num_events_in_wait_list */,
- const cl_event * /* event_wait_list */,
- cl_event * /* event */,
- cl_int * /* errcode_ret */);
-
-HB_OCL_API(void *, CL_API_CALL, clEnqueueMapImage)
-(cl_command_queue /* command_queue */,
- cl_mem /* image */,
- cl_bool /* blocking_map */,
- cl_map_flags /* map_flags */,
- const size_t * /* origin[3] */,
- const size_t * /* region[3] */,
- size_t * /* image_row_pitch */,
- size_t * /* image_slice_pitch */,
- cl_uint /* num_events_in_wait_list */,
- const cl_event * /* event_wait_list */,
- cl_event * /* event */,
- cl_int * /* errcode_ret */);
-
-HB_OCL_API(cl_int, CL_API_CALL, clEnqueueUnmapMemObject)
-(cl_command_queue /* command_queue */,
- cl_mem /* memobj */,
- void * /* mapped_ptr */,
- cl_uint /* num_events_in_wait_list */,
- const cl_event * /* event_wait_list */,
- cl_event * /* event */);
-
-HB_OCL_API(cl_int, CL_API_CALL, clEnqueueMigrateMemObjects)
-(cl_command_queue /* command_queue */,
- cl_uint /* num_mem_objects */,
- const cl_mem * /* mem_objects */,
- cl_mem_migration_flags /* flags */,
- cl_uint /* num_events_in_wait_list */,
- const cl_event * /* event_wait_list */,
- cl_event * /* event */);
-
-HB_OCL_API(cl_int, CL_API_CALL, clEnqueueNDRangeKernel)
-(cl_command_queue /* command_queue */,
- cl_kernel /* kernel */,
- cl_uint /* work_dim */,
- const size_t * /* global_work_offset */,
- const size_t * /* global_work_size */,
- const size_t * /* local_work_size */,
- cl_uint /* num_events_in_wait_list */,
- const cl_event * /* event_wait_list */,
- cl_event * /* event */);
-
-HB_OCL_API(cl_int, CL_API_CALL, clEnqueueTask)
-(cl_command_queue /* command_queue */,
- cl_kernel /* kernel */,
- cl_uint /* num_events_in_wait_list */,
- const cl_event * /* event_wait_list */,
- cl_event * /* event */);
-
-HB_OCL_API(cl_int, CL_API_CALL, clEnqueueNativeKernel)
-(cl_command_queue /* command_queue */,
- void (CL_CALLBACK * /*user_func*/)(void *),
- void * /* args */,
- size_t /* cb_args */,
- cl_uint /* num_mem_objects */,
- const cl_mem * /* mem_list */,
- const void ** /* args_mem_loc */,
- cl_uint /* num_events_in_wait_list */,
- const cl_event * /* event_wait_list */,
- cl_event * /* event */);
-
-HB_OCL_API(cl_int, CL_API_CALL, clEnqueueMarkerWithWaitList)
-(cl_command_queue /* command_queue */,
- cl_uint /* num_events_in_wait_list */,
- const cl_event * /* event_wait_list */,
- cl_event * /* event */);
-
-HB_OCL_API(cl_int, CL_API_CALL, clEnqueueBarrierWithWaitList)
-(cl_command_queue /* command_queue */,
- cl_uint /* num_events_in_wait_list */,
- const cl_event * /* event_wait_list */,
- cl_event * /* event */);
-
-
-/* Extension function access
- *
- * Returns the extension function address for the given function name,
- * or NULL if a valid function can not be found. The client must
- * check to make sure the address is not NULL, before using or
- * calling the returned function address.
- */
-HB_OCL_API(void *, CL_API_CALL, clGetExtensionFunctionAddressForPlatform)
-(cl_platform_id /* platform */,
- const char * /* func_name */);
-
-#ifdef __APPLE__
-#pragma mark -
-#endif // __APPLE__
-
-typedef struct hb_opencl_library_s
-{
- void *library;
-
- /* Pointers to select OpenCL API functions */
- HB_OCL_FUNC_DECL(clBuildProgram);
- HB_OCL_FUNC_DECL(clCreateBuffer);
- HB_OCL_FUNC_DECL(clCreateCommandQueue);
- HB_OCL_FUNC_DECL(clCreateContextFromType);
- HB_OCL_FUNC_DECL(clCreateKernel);
- HB_OCL_FUNC_DECL(clCreateProgramWithBinary);
- HB_OCL_FUNC_DECL(clCreateProgramWithSource);
- HB_OCL_FUNC_DECL(clEnqueueCopyBuffer);
- HB_OCL_FUNC_DECL(clEnqueueMapBuffer);
- HB_OCL_FUNC_DECL(clEnqueueNDRangeKernel);
- HB_OCL_FUNC_DECL(clEnqueueReadBuffer);
- HB_OCL_FUNC_DECL(clEnqueueUnmapMemObject);
- HB_OCL_FUNC_DECL(clEnqueueWriteBuffer);
- HB_OCL_FUNC_DECL(clFlush);
- HB_OCL_FUNC_DECL(clGetCommandQueueInfo);
- HB_OCL_FUNC_DECL(clGetContextInfo);
- HB_OCL_FUNC_DECL(clGetDeviceIDs);
- HB_OCL_FUNC_DECL(clGetDeviceInfo);
- HB_OCL_FUNC_DECL(clGetPlatformIDs);
- HB_OCL_FUNC_DECL(clGetPlatformInfo);
- HB_OCL_FUNC_DECL(clGetProgramBuildInfo);
- HB_OCL_FUNC_DECL(clGetProgramInfo);
- HB_OCL_FUNC_DECL(clReleaseCommandQueue);
- HB_OCL_FUNC_DECL(clReleaseContext);
- HB_OCL_FUNC_DECL(clReleaseEvent);
- HB_OCL_FUNC_DECL(clReleaseKernel);
- HB_OCL_FUNC_DECL(clReleaseMemObject);
- HB_OCL_FUNC_DECL(clReleaseProgram);
- HB_OCL_FUNC_DECL(clSetKernelArg);
- HB_OCL_FUNC_DECL(clWaitForEvents);
-} hb_opencl_library_t;
-
-hb_opencl_library_t* hb_opencl_library_init();
-void hb_opencl_library_close(hb_opencl_library_t **_opencl);
-
-/*
- * Convenience pointer to a single shared OpenCL library wrapper.
- *
- * It can be initialized and closed via hb_ocl_init/close().
- */
-extern hb_opencl_library_t *hb_ocl;
-int hb_ocl_init();
-void hb_ocl_close();
-
-typedef struct hb_opencl_device_s
-{
- cl_platform_id platform;
- cl_device_type type;
- cl_device_id id;
- char version[128];
- char driver[128];
- char vendor[128];
- char name[128];
- enum
- {
- HB_OCL_VENDOR_AMD,
- HB_OCL_VENDOR_NVIDIA,
- HB_OCL_VENDOR_INTEL,
- HB_OCL_VENDOR_OTHER,
- } ocl_vendor;
-} hb_opencl_device_t;
-
-int hb_opencl_available();
-void hb_opencl_info_print();
-
-/* OpenCL scaling */
-typedef struct hb_oclscale_s
-{
- int initialized;
- // bicubic scale weights
- cl_mem bicubic_x_weights;
- cl_mem bicubic_y_weights;
- cl_float xscale;
- cl_float yscale;
- int width;
- int height;
- // horizontal scaling and vertical scaling kernel handle
- cl_kernel m_kernel;
- int use_ocl_mem; // 0 use host memory. 1 use gpu oclmem
-} hb_oclscale_t;
-
-int hb_ocl_scale(hb_buffer_t *in, hb_buffer_t *out, int *crop,
- hb_oclscale_t *os);
-
-/* Utilities */
-#define HB_OCL_BUF_CREATE(ocl_lib, out, flags, size) \
-{ \
- out = ocl_lib->clCreateBuffer(kenv->context, flags, size, NULL, &status); \
- if (CL_SUCCESS != status) \
- { \
- return -1; \
- } \
-}
-
-#define HB_OCL_BUF_FREE(ocl_lib, buf) \
-{ \
- if (buf != NULL) \
- { \
- ocl_lib->clReleaseMemObject(buf); \
- buf = NULL; \
- } \
-}
-
-#define HB_OCL_CHECK(method, ...) \
-{ \
- status = method(__VA_ARGS__); \
- if (status != CL_SUCCESS) \
- { \
- hb_error("%s:%d (%s) error: %d\n",__FUNCTION__,__LINE__,#method,status);\
- return status; \
- } \
-}
-
-#endif//HB_OPENCL_H
+++ /dev/null
-/* openclkernels.h\r
-\r
- Copyright (c) 2003-2017 HandBrake Team\r
- This file is part of the HandBrake source code\r
- Homepage: <http://handbrake.fr/>.\r
- It may be used under the terms of the GNU General Public License v2.\r
- For full terms see the file COPYING file or visit http://www.gnu.org/licenses/gpl-2.0.html\r
- \r
- Authors: Peng Gao <peng@multicorewareinc.com> <http://www.multicorewareinc.com/>\r
- Li Cao <li@multicorewareinc.com> <http://www.multicorewareinc.com/>\r
-\r
- */\r
- \r
-#ifndef USE_EXTERNAL_KERNEL\r
-\r
-#define KERNEL( ... )# __VA_ARGS__\r
-\r
-\r
-char *kernel_src_hscale = KERNEL (\r
-\r
- typedef unsigned char fixed8;\r
-\r
-/*******************************************************************************************************\r
-dst: Horizontal scale destination;\r
-src: YUV content in opencl buf;\r
-hf_Y: Horizontal filter coefficients for Y planes;\r
-hf_UV: Horizontal filter coefficients for UV planes;\r
-hi_Y: Horizontal filter index for Y planes;\r
-hi_UV: Horizontal filter index for UV planes;\r
-stride: Src width;\r
-filter_len: Length of filter;\r
-********************************************************************************************************/\r
- kernel void frame_h_scale (\r
- global fixed8 *src,\r
- global float *hf_Y,\r
- global float *hf_UV,\r
- global int *hi_Y,\r
- global int *hi_UV,\r
- global fixed8 *dst,\r
- int stride, //src_width\r
- int filter_len\r
- )\r
- {\r
- int x = get_global_id( 0 );\r
- int y = get_global_id( 1 );\r
- int width = get_global_size( 0 );\r
- int height = get_global_size( 1 );\r
- float result_Y = 0, result_U = 0, result_V = 0;\r
- int i = 0;\r
-\r
- global fixed8 *src_Y = src;\r
- global fixed8 *src_U = src_Y + stride * height;\r
- global fixed8 *src_V = src_U + (stride >> 1) * (height >> 1);\r
-\r
- global fixed8 *dst_Y = dst;\r
- global fixed8 *dst_U = dst_Y + width * height;\r
- global fixed8 *dst_V = dst_U + (width >> 1) * (height >> 1);\r
-\r
- int xy = y * width + x;\r
- global fixed8 *rowdata_Y = src_Y + (y * stride);\r
- for( int i = 0; i < filter_len; i++ )\r
- {\r
- result_Y += ( hf_Y[x + i * width] * rowdata_Y[hi_Y[x] + i]);\r
- }\r
- dst_Y[xy] = result_Y;\r
-\r
- if( y < (height >> 1) && x < (width >> 1) )\r
- {\r
- int xy = y * (width >> 1) + x;\r
- global fixed8 *rowdata_U = src_U + (y * (stride >> 1));\r
- global fixed8 *rowdata_V = src_V + (y * (stride >> 1));\r
- for( i = 0; i < filter_len; i++ )\r
- {\r
- result_U += ( hf_UV[x + i * (width >> 1)] * rowdata_U[hi_UV[x] + i]);\r
- result_V += ( hf_UV[x + i * (width >> 1)] * rowdata_V[hi_UV[x] + i]);\r
- }\r
- dst_U[xy] = result_U;\r
- dst_V[xy] = result_V;\r
- }\r
- }\r
- );\r
-\r
-/*******************************************************************************************************\r
-dst: Vertical scale destination;\r
-src: YUV content in opencl buf;\r
-hf_Y: Vertical filter coefficients for Y planes;\r
-hf_UV: Vertical filter coefficients for UV planes;\r
-hi_Y: Vertical filter index for Y planes;\r
-hi_UV: Vertical filter index for UV planes;\r
-stride: Src height;\r
-filter_len: Length of filter;\r
-********************************************************************************************************/\r
-char *kernel_src_vscale = KERNEL (\r
-\r
- kernel void frame_v_scale (\r
- global fixed8 *src,\r
- global float *vf_Y,\r
- global float *vf_UV,\r
- global int *vi_Y,\r
- global int *vi_UV,\r
- global fixed8 *dst,\r
- int src_height,\r
- int filter_len\r
- )\r
- {\r
- int x = get_global_id( 0 );\r
- int y = get_global_id( 1 );\r
- int width = get_global_size( 0 );\r
- int height = get_global_size( 1 );\r
- float result_Y = 0, result_U = 0, result_V = 0;\r
- int i = 0;\r
-\r
- global fixed8 *src_Y = src;\r
- global fixed8 *src_U = src_Y + src_height * width;\r
- global fixed8 *src_V = src_U + (src_height >> 1) * (width >> 1);\r
-\r
- global fixed8 *dst_Y = dst;\r
- global fixed8 *dst_U = dst_Y + height * width;\r
- global fixed8 *dst_V = dst_U + (height >> 1) * (width >> 1);\r
-\r
- int xy = y * width + x;\r
- for( i = 0; i < filter_len; i++ )\r
- {\r
- result_Y += vf_Y[y + i * height] * src_Y[(vi_Y[y] + i) * width + x];\r
- }\r
- dst_Y[xy] = result_Y;\r
-\r
- if( y < (height >> 1) && x < (width >> 1) )\r
- {\r
- int xy = y * (width >> 1) + x;\r
- for( i = 0; i < filter_len; i++ )\r
- {\r
- result_U += vf_UV[y + i * (height >> 1)] * src_U[(vi_UV[y] + i) * (width >> 1) + x];\r
- result_V += vf_UV[y + i * (height >> 1)] * src_V[(vi_UV[y] + i) * (width >> 1) + x];\r
- }\r
- dst_U[xy] = result_U;\r
- dst_V[xy] = result_V;\r
- }\r
- }\r
- );\r
-\r
-/*******************************************************************************************************\r
-input: Input buffer;\r
-output: Output buffer;\r
-w: Width of frame;\r
-h: Height of frame;\r
-********************************************************************************************************/\r
-char *kernel_src_nvtoyuv = KERNEL (\r
-\r
- kernel void nv12toyuv ( global char *input, global char* output, int w, int h )\r
- {\r
- int x = get_global_id( 0 );\r
- int y = get_global_id( 1 );\r
- int idx = y * (w >> 1) + x;\r
- vstore4((vload4( 0, input + (idx << 2))), 0, output + (idx << 2)); //Y\r
- char2 uv = vload2( 0, input + (idx << 1) + w * h );\r
- output[idx + w * h] = uv.s0;\r
- output[idx + w * h + ((w * h) >> 2)] = uv.s1;\r
- }\r
- );\r
-\r
-/*******************************************************************************************************\r
-dst: Horizontal scale destination;\r
-src: YUV content in opencl buf;\r
-yfilter: Opencl memory of horizontal filter coefficients for luma/alpha planes;\r
-yfilterPos: Opencl memory of horizontal filter starting positions for each dst[i] for luma/alpha planes;\r
-yfilterSize: Horizontal filter size for luma/alpha pixels;\r
-cfilter: Opencl memory of horizontal filter coefficients for chroma planes;\r
-cfilterPos: Opencl memory of horizontal filter starting positions for each dst[i] for chroma planes;\r
-cfilterSize: Horizontal filter size for chroma pixels;\r
-dstStride: Width of destination luma/alpha planes;\r
-dstChrStride: Width of destination chroma planes;\r
-********************************************************************************************************/\r
-\r
-char *kernel_src_hscaleall = KERNEL (\r
-\r
- kernel void hscale_all_opencl (\r
- global short *dst,\r
- const global unsigned char *src,\r
- const global short *yfilter,\r
- const global int *yfilterPos,\r
- int yfilterSize,\r
- const global short *cfilter,\r
- const global int *cfilterPos,\r
- int cfilterSize,\r
- int dstWidth,\r
- int dstHeight,\r
- int srcWidth,\r
- int srcHeight,\r
- int dstStride,\r
- int dstChrStride,\r
- int srcStride,\r
- int srcChrStride)\r
- {\r
- int w = get_global_id(0);\r
- int h = get_global_id(1);\r
-\r
- int chrWidth = get_global_size(0);\r
- int chrHeight = get_global_size(1);\r
-\r
- int srcPos1 = h * srcStride + yfilterPos[w];\r
- int srcPos2 = h * srcStride + yfilterPos[w + chrWidth];\r
- int srcPos3 = (h + (srcHeight >> 1)) * srcStride + yfilterPos[w];\r
- int srcPos4 = (h + (srcHeight >> 1)) * srcStride + yfilterPos[w + chrWidth];\r
- int srcc1Pos = srcStride * srcHeight + (h) * (srcChrStride) + cfilterPos[w];\r
- int srcc2Pos = srcc1Pos + ((srcChrStride)*(chrHeight));\r
-\r
- int val1 = 0;\r
- int val2 = 0;\r
- int val3 = 0;\r
- int val4 = 0;\r
- int val5 = 0;\r
- int val6 = 0;\r
-\r
- int filterPos1 = yfilterSize * w;\r
- int filterPos2 = yfilterSize * (w + chrWidth);\r
- int cfilterPos1 = cfilterSize * w;\r
-\r
- int j;\r
- for (j = 0; j < yfilterSize; j++)\r
- {\r
- val1 += src[srcPos1 + j] * yfilter[filterPos1+ j];\r
- val2 += src[srcPos2 + j] * yfilter[filterPos2 + j];\r
- val3 += src[srcPos3 + j] * yfilter[filterPos1 + j];\r
- val4 += src[srcPos4 + j] * yfilter[filterPos2 + j];\r
- val5 += src[srcc1Pos+j] * cfilter[cfilterPos1 + j];\r
- val6 += src[srcc2Pos+j] * cfilter[cfilterPos1 + j];\r
- }\r
- int dstPos1 = h *dstStride;\r
- int dstPos2 = (h + chrHeight) * dstStride;\r
-\r
- dst[dstPos1 + w] = ((val1 >> 7) > ((1 << 15) - 1) ? ((1 << 15) - 1) : (val1 >> 7));\r
- dst[dstPos1 + w + chrWidth] = ((val2 >> 7) > ((1 << 15) - 1) ? ((1 << 15) - 1) : (val2 >> 7));\r
- dst[dstPos2 + w] = ((val3 >> 7) > ((1 << 15) - 1) ? ((1 << 15) - 1) : (val3 >> 7));\r
- dst[dstPos2 + w + chrWidth] = ((val4 >> 7) > ((1 << 15) - 1) ? ((1 << 15) - 1) : (val4 >> 7));\r
-\r
- int dstPos3 = h * (dstChrStride) + w + dstStride * dstHeight;\r
- int dstPos4 = h * (dstChrStride) + w + dstStride * dstHeight + ((dstChrStride) * chrHeight);\r
- dst[dstPos3] = ((val5 >> 7) > ((1 << 15) - 1) ? ((1 << 15) - 1) : (val5 >> 7));\r
- dst[dstPos4] = ((val6 >> 7) > ((1 << 15) - 1) ? ((1 << 15) - 1) : (val6 >> 7));\r
- }\r
- );\r
-\r
-char *kernel_src_hscalefast = KERNEL (\r
-\r
- kernel void hscale_fast_opencl (\r
- global short *dst,\r
- const global unsigned char *src,\r
- int xInc,\r
- int chrXInc,\r
- int dstWidth,\r
- int dstHeight,\r
- int srcWidth,\r
- int srcHeight,\r
- int dstStride,\r
- int dstChrStride,\r
- int srcStride,\r
- int srcChrStride)\r
- {\r
-\r
- int w = get_global_id(0);\r
- int h = get_global_id(1);\r
-\r
- int chrWidth = get_global_size(0);\r
- int chrHeight = get_global_size(1);\r
- int xpos1 = 0;\r
- int xpos2 = 0;\r
- int xx = xpos1 >> 16;\r
- int xalpha = (xpos1 & 0xFFFF) >> 9;\r
- dst[h * dstStride + w] = (src[h * srcStride + xx] << 7) + (src[h * srcStride + xx + 1] -src[h * srcStride + xx]) * xalpha;\r
- int lowpart = h + (chrHeight);\r
- dst[lowpart * dstStride + w] = (src[lowpart * srcStride + xx] << 7) + (src[lowpart * srcStride + xx + 1] - src[lowpart * srcStride + xx]) * xalpha;\r
-\r
- int inv_i = w * xInc >> 16;\r
- if( inv_i >= srcWidth - 1)\r
- {\r
- dst[h*dstStride + w] = src[h*srcStride + srcWidth-1]*128;\r
- dst[lowpart*dstStride + w] = src[lowpart*srcStride + srcWidth - 1] * 128;\r
- }\r
-\r
- int rightpart = w + (chrWidth);\r
- xx = xpos2 >> 16;\r
- xalpha = (xpos2 & 0xFFFF) >> 9;\r
- dst[h * dstStride + rightpart] = (src[h *srcStride + xx] << 7) + (src[h * srcStride + xx + 1] - src[h * srcStride + xx]) * xalpha;\r
- dst[lowpart * dstStride + rightpart] = (src[lowpart * srcStride + xx] << 7) + (src[lowpart * srcStride + xx + 1] - src[lowpart * srcStride + xx]) * xalpha;\r
- inv_i = rightpart * xInc >> 16;\r
- if( inv_i >= srcWidth - 1)\r
- {\r
- dst[h * dstStride + rightpart] = src[h * srcStride + srcWidth - 1] * 128;\r
- dst[lowpart * dstStride + rightpart] = src[lowpart * srcStride + srcWidth - 1] * 128;\r
- }\r
-\r
- int xpos = 0;\r
- xpos = chrXInc * w;\r
- xx = xpos >> 16;\r
- xalpha = (xpos & 0xFFFF) >> 9;\r
- src += srcStride * srcHeight;\r
- dst += dstStride * dstHeight;\r
- dst[h * (dstChrStride) + w] = (src[h * (srcChrStride) + xx] * (xalpha^127) + src[h * (srcChrStride) + xx + 1] * xalpha);\r
- inv_i = w * xInc >> 16;\r
- if( inv_i >= (srcWidth >> 1) - 1)\r
- {\r
- dst[h * (dstChrStride) + w] = src[h * (srcChrStride) + (srcWidth >> 1) -1]*128;\r
- }\r
-\r
- xpos = chrXInc * (w);\r
- xx = xpos >> 16;\r
- src += srcChrStride * srcHeight >> 1;\r
- dst += (dstChrStride * chrHeight);\r
- dst[h * (dstChrStride) + w] = (src[h * (srcChrStride) + xx] * (xalpha^127) + src[h * (srcChrStride) + xx + 1 ] * xalpha);\r
-\r
- if( inv_i >= (srcWidth >> 1) - 1)\r
- {\r
- //v channel:\r
- dst[h * (dstChrStride) + w] = src[h * (srcChrStride) + (srcWidth >> 1) -1] * 128;\r
- }\r
- }\r
- );\r
-\r
-char *kernel_src_vscalealldither = KERNEL (\r
-\r
- kernel void vscale_all_dither_opencl (\r
- global unsigned char *dst,\r
- const global short *src,\r
- const global short *yfilter,\r
- int yfilterSize,\r
- const global short *cfilter,\r
- int cfilterSize,\r
- const global int *yfilterPos,\r
- const global int *cfilterPos,\r
- int dstWidth,\r
- int dstHeight,\r
- int srcWidth,\r
- int srcHeight,\r
- int dstStride,\r
- int dstChrStride,\r
- int srcStride,\r
- int srcChrStride)\r
- {\r
- const unsigned char hb_dither_8x8_128[8][8] = {\r
- { 36, 68, 60, 92, 34, 66, 58, 90, },\r
- { 100, 4, 124, 28, 98, 2, 122, 26, },\r
- { 52, 84, 44, 76, 50, 82, 42, 74, },\r
- { 116, 20, 108, 12, 114, 18, 106, 10, },\r
- { 32, 64, 56, 88, 38, 70, 62, 94, },\r
- { 96, 0, 120, 24, 102, 6, 126, 30, },\r
- { 48, 80, 40, 72, 54, 86, 46, 78, },\r
- { 112, 16, 104, 8, 118, 22, 110, 14, },\r
- };\r
-\r
-\r
- int w = get_global_id(0);\r
- int h = get_global_id(1);\r
-\r
- int chrWidth = get_global_size(0);\r
- int chrHeight = get_global_size(1);\r
- const unsigned char *local_up_dither;\r
- const unsigned char *local_down_dither;\r
-\r
- local_up_dither = hb_dither_8x8_128[h & 7];\r
- local_down_dither = hb_dither_8x8_128[(h + chrHeight) & 7];\r
-\r
- //yscale;\r
- int srcPos1 = (yfilterPos[h]) * srcStride + w;\r
- int srcPos2 = (yfilterPos[h]) * srcStride + w + (chrWidth);\r
- int srcPos3 = (yfilterPos[h + chrHeight]) * srcStride + w;\r
- int srcPos4 = (yfilterPos[h + chrHeight]) * srcStride + w + chrWidth;\r
- int src1Pos = dstStride * srcHeight + (cfilterPos[h]) * dstChrStride + (w);\r
- int src2Pos = dstStride * srcHeight + (dstChrStride*(srcHeight>>1)) + (cfilterPos[h]) * dstChrStride + w;\r
-\r
- int val1 = (local_up_dither[w & 7] << 12); //y offset is 0;\r
- int val2 = (local_up_dither[(w + chrWidth) & 7] << 12);\r
- int val3 = (local_down_dither[w &7] << 12);\r
- int val4 = (local_down_dither[(w + chrWidth) & 7] << 12);\r
- int val5 = (local_up_dither[w & 7] << 12);\r
- int val6 = (local_up_dither[(w + 3) & 7] << 12); // 3 is offset of the chrome channel.\r
-\r
- int j;\r
- int filterPos1 = h * yfilterSize;\r
- int filterPos2 = ( h + chrHeight ) * yfilterSize;\r
- for(j = 0; j < yfilterSize; j++)\r
- {\r
- val1 += src[srcPos1] * yfilter[filterPos1 + j];\r
- srcPos1 += srcStride;\r
- val2 += src[srcPos2] * yfilter[filterPos1 + j];\r
- srcPos2 += srcStride;\r
- val3 += src[srcPos3] * yfilter[filterPos2 + j];\r
- srcPos3 += srcStride;\r
- val4 += src[srcPos4] * yfilter[filterPos2 + j];\r
- srcPos4 += srcStride;\r
- val5 += src[src1Pos] * cfilter[filterPos1 + j];\r
- val6 += src[src2Pos] * cfilter[filterPos1 + j];\r
- src1Pos += dstChrStride;\r
- src2Pos += dstChrStride;\r
- }\r
- dst[h * dstStride + w] = (((val1 >> 19)&(~0xFF)) ? ((-(val1 >> 19)) >> 31) : (val1 >> 19));\r
- dst[h * dstStride + w + chrWidth] = (((val2 >> 19)&(~0xFF)) ? ((-(val2 >> 19)) >> 31) : (val2 >> 19));\r
- dst[(h + chrHeight) * dstStride + w] = (((val3 >> 19)&(~0xFF)) ? ((-(val3 >> 19)) >> 31) : (val3 >> 19));\r
- dst[(h + chrHeight) * dstStride + w + chrWidth] = (((val4 >> 19)&(~0xFF)) ? ((-(val4 >> 19)) >> 31) : (val4 >> 19));\r
-\r
- int dst1Pos = dstStride * dstHeight + h*(dstChrStride)+(w);\r
- int dst2Pos = (dstChrStride * chrHeight) + dst1Pos;\r
- dst[dst1Pos] = (((val5 >> 19)&(~0xFF)) ? ((-(val5 >> 19)) >> 31) : (val5 >> 19));\r
- dst[dst2Pos] = (((val6 >> 19)&(~0xFF)) ? ((-(val6 >> 19)) >> 31) : (val6 >> 19));\r
- }\r
- );\r
-\r
-char *kernel_src_vscaleallnodither = KERNEL (\r
-\r
- kernel void vscale_all_nodither_opencl (\r
- global unsigned char *dst,\r
- const global short *src,\r
- const global short *yfilter,\r
- int yfilterSize,\r
- const global short *cfilter,\r
- int cfilterSize,\r
- const global int *yfilterPos,\r
- const global int *cfilterPos,\r
- int dstWidth,\r
- int dstHeight,\r
- int srcWidth,\r
- int srcHeight,\r
- int dstStride,\r
- int dstChrStride,\r
- int srcStride,\r
- int srcChrStride)\r
- {\r
- const unsigned char hb_sws_pb_64[8] = {\r
- 64, 64, 64, 64, 64, 64, 64, 64\r
- };\r
-\r
- int w = get_global_id(0);\r
- int h = get_global_id(1);\r
-\r
- int chrWidth = get_global_size(0);\r
- int chrHeight = get_global_size(1);\r
- const unsigned char *local_up_dither;\r
- const unsigned char *local_down_dither;\r
-\r
- local_up_dither = hb_sws_pb_64;\r
- local_down_dither = hb_sws_pb_64;\r
-\r
-\r
- //yscale;\r
- int srcPos1 = (yfilterPos[h]) * srcStride + w;\r
- int srcPos2 = (yfilterPos[h]) * srcStride + w + (chrWidth);\r
- int srcPos3 = (yfilterPos[h + chrHeight]) * srcStride + w;\r
- int srcPos4 = (yfilterPos[h + chrHeight]) * srcStride + w + chrWidth;\r
- int src1Pos = dstStride * srcHeight + (cfilterPos[h]) * dstChrStride + (w);\r
- int src2Pos = dstStride * srcHeight + (dstChrStride*(srcHeight>>1)) + (cfilterPos[h]) * dstChrStride + w;\r
-\r
- int val1 = (local_up_dither[w & 7] << 12); //y offset is 0;\r
- int val2 = (local_up_dither[(w + chrWidth) & 7] << 12);\r
- int val3 = (local_down_dither[w &7] << 12);\r
- int val4 = (local_down_dither[(w + chrWidth) & 7] << 12);\r
- int val5 = (local_up_dither[w & 7] << 12);\r
- int val6 = (local_up_dither[(w + 3) & 7] << 12); // 3 is offset of the chrome channel.\r
-\r
-\r
- int j;\r
- int filterPos1 = h * yfilterSize;\r
- int filterPos2 = ( h + chrHeight ) * yfilterSize;\r
- for(j = 0; j < yfilterSize; j++)\r
- {\r
- val1 += src[srcPos1] * yfilter[filterPos1 + j];\r
- srcPos1 += srcStride;\r
- val2 += src[srcPos2] * yfilter[filterPos1 + j];\r
- srcPos2 += srcStride;\r
- val3 += src[srcPos3] * yfilter[filterPos2 + j];\r
- srcPos3 += srcStride;\r
- val4 += src[srcPos4] * yfilter[filterPos2 + j];\r
- srcPos4 += srcStride;\r
- val5 += src[src1Pos] * cfilter[filterPos1 + j];\r
- val6 += src[src2Pos] * cfilter[filterPos1 + j];\r
- src1Pos += dstChrStride;\r
- src2Pos += dstChrStride;\r
- }\r
- dst[h * dstStride + w] = (((val1 >> 19)&(~0xFF)) ? ((-(val1 >> 19)) >> 31) : (val1 >> 19));\r
- dst[h * dstStride + w + chrWidth] = (((val2 >> 19)&(~0xFF)) ? ((-(val2 >> 19)) >> 31) : (val2 >> 19));\r
- dst[(h + chrHeight) * dstStride + w] = (((val3 >> 19)&(~0xFF)) ? ((-(val3 >> 19)) >> 31) : (val3 >> 19));\r
- dst[(h + chrHeight) * dstStride + w + chrWidth] = (((val4 >> 19)&(~0xFF)) ? ((-(val4 >> 19)) >> 31) : (val4 >> 19));;\r
-\r
- int dst1Pos = dstStride * dstHeight + h * (dstChrStride) + (w);\r
- int dst2Pos = (dstChrStride * chrHeight) + dst1Pos;\r
- dst[dst1Pos] = (((val5 >> 19)&(~0xFF)) ? ((-(val5 >> 19)) >> 31) : (val5 >> 19));\r
- dst[dst2Pos] = (((val6 >> 19)&(~0xFF)) ? ((-(val6 >> 19)) >> 31) : (val6 >> 19));\r
- }\r
- );\r
-\r
-char *kernel_src_vscalefast = KERNEL (\r
-\r
- kernel void vscale_fast_opencl (\r
- global unsigned char *dst,\r
- const global short *src,\r
- const global int *yfilterPos,\r
- const global int *cfilterPos,\r
- int dstWidth,\r
- int dstHeight,\r
- int srcWidth,\r
- int srcHeight,\r
- int dstStride,\r
- int dstChrStride,\r
- int srcStride,\r
- int srcChrStride)\r
- {\r
- const unsigned char hb_sws_pb_64[8] = {\r
- 64, 64, 64, 64, 64, 64, 64, 64\r
- };\r
-\r
- int w = get_global_id(0);\r
- int h = get_global_id(1);\r
-\r
- int chrWidth = get_global_size(0);\r
- int chrHeight = get_global_size(1);\r
-\r
- const unsigned char *local_up_dither;\r
- const unsigned char *local_down_dither;\r
-\r
- local_up_dither = hb_sws_pb_64;\r
- local_down_dither = hb_sws_pb_64;\r
-\r
-\r
- int rightpart = w + chrWidth;\r
- int bh = h + chrHeight; // bottom part\r
- short val1 = (src[(yfilterPos[h]) * dstStride + w] + local_up_dither[(w + 0) & 7]) >> 7; //lum offset is 0;\r
- short val2 = (src[(yfilterPos[h]) * dstStride + rightpart] + local_up_dither[rightpart & 7]) >> 7;\r
- short val3 = (src[(yfilterPos[bh]) * dstStride + w] + local_down_dither[w & 7]) >> 7;\r
- short val4 = (src[(yfilterPos[bh]) * dstStride + rightpart] + local_down_dither[rightpart & 7]) >> 7;\r
- dst[h * dstStride + w] = ((val1&(~0xFF)) ? ((-val1) >> 31) : (val1));\r
- dst[h * dstStride + rightpart] = ((val2&(~0xFF)) ? ((-val2) >> 31) : (val2));\r
- dst[bh * dstStride + w] = ((val3&(~0xFF)) ? ((-val3) >> 31) : (val3));\r
- dst[bh * dstStride + rightpart] = ((val4&(~0xFF)) ? ((-val4) >> 31) : (val4));\r
-\r
- src += dstStride * srcHeight;\r
- dst += dstStride * dstHeight;\r
- val1 = (src[cfilterPos[h] * (dstChrStride) + w] + local_up_dither[ w & 7]) >> 7;\r
- dst[h * (dstChrStride) + w] = ((val1&(~0xFF)) ? ((-val1) >> 31) : (val1));\r
-\r
- src += dstChrStride * (srcHeight >> 1);\r
- dst += dstChrStride * chrHeight;\r
- val1 = (src[cfilterPos[h] * dstChrStride + w] + local_up_dither[ (w + 3) & 7] ) >> 7;\r
- dst[h * dstChrStride + w] = ((val1&(~0xFF)) ? ((-val1) >> 31) : (val1));\r
-\r
- }\r
- );\r
-\r
-char *kernel_src_scale = KERNEL (\r
-\r
-__kernel __attribute__((reqd_work_group_size(64, 1, 1))) void frame_scale(__global uchar *dst,\r
- __global const uchar *src,\r
- const float xscale,\r
- const float yscale,\r
- const int srcPlaneOffset0,\r
- const int srcPlaneOffset1,\r
- const int srcPlaneOffset2,\r
- const int dstPlaneOffset0,\r
- const int dstPlaneOffset1,\r
- const int dstPlaneOffset2,\r
- const int srcRowWords0,\r
- const int srcRowWords1,\r
- const int srcRowWords2,\r
- const int dstRowWords0,\r
- const int dstRowWords1,\r
- const int dstRowWords2,\r
- const int srcWidth,\r
- const int srcHeight,\r
- const int dstWidth,\r
- const int dstHeight,\r
- __global const float4* restrict xweights,\r
- __global const float4* restrict yweights\r
- )\r
-{\r
- const int x = get_global_id(0);\r
- const int y = get_global_id(1);\r
- const int z = get_global_id(2);\r
-\r
- // Abort work items outside the dst image bounds.\r
-\r
- if ((get_group_id(0) * 64 >= (dstWidth >> ((z == 0) ? 0 : 1))) || (get_group_id(1) * 16 >= (dstHeight >> ((z == 0) ? 0 : 1))))\r
- return;\r
-\r
- const int srcPlaneOffset = (z == 0) ? srcPlaneOffset0 : ((z == 1) ? srcPlaneOffset1 : srcPlaneOffset2);\r
- const int dstPlaneOffset = (z == 0) ? dstPlaneOffset0 : ((z == 1) ? dstPlaneOffset1 : dstPlaneOffset2);\r
- const int srcRowWords = (z == 0) ? srcRowWords0: ((z == 1) ? srcRowWords1 : srcRowWords2);\r
- const int dstRowWords = (z == 0) ? dstRowWords0: ((z == 1) ? dstRowWords1 : dstRowWords2);\r
-\r
- __local uchar pixels[64 * 36];\r
- const int localRowPixels = 64;\r
- const int groupHeight = 16; // src pixel height output by the workgroup\r
- const int ypad = 2;\r
- const int localx = get_local_id(0);\r
-\r
- const int globalStartRow = floor((get_group_id(1) * groupHeight) / yscale);\r
- const int globalRowCount = ceil(groupHeight / yscale) + 2 * ypad;\r
-\r
- float4 weights = xweights[x];\r
- int4 woffs = floor(x / xscale);\r
- woffs += (int4)(-1, 0, 1, 2);\r
- woffs = clamp(woffs, 0, (srcWidth >> ((z == 0) ? 0 : 1)) - 1);\r
- const int maxy = (srcHeight >> ((z == 0) ? 0 : 1)) - 1;\r
-\r
- // Scale x from global into LDS\r
-\r
- for (int i = 0; i <= globalRowCount; ++i) {\r
- int4 offs = srcPlaneOffset + clamp(globalStartRow - ypad + i, 0, maxy) * srcRowWords;\r
- offs += woffs;\r
- pixels[localx + i * localRowPixels] = convert_uchar(clamp(round(dot(weights,\r
- (float4)(src[offs.x], src[offs.y], src[offs.z], src[offs.w]))), 0.0f, 255.0f));\r
- }\r
- \r
- barrier(CLK_LOCAL_MEM_FENCE);\r
-\r
- // Scale y from LDS into global\r
-\r
- if (x >= dstWidth >> ((z == 0) ? 0 : 1))\r
- return;\r
-\r
- int off = dstPlaneOffset + x + (get_group_id(1) * groupHeight) * dstRowWords;\r
-\r
- for (int i = 0; i < groupHeight; ++i) {\r
- if (y >= dstHeight >> ((z == 0) ? 0 : 1))\r
- break;\r
- int localy = floor((get_group_id(1) * groupHeight + i) / yscale);\r
- localy = localy - globalStartRow + ypad;\r
- int loff = localx + localy * localRowPixels;\r
- dst[off] = convert_uchar(clamp(round(dot(yweights[get_group_id(1) * groupHeight + i],\r
- (float4)(pixels[loff - localRowPixels], pixels[loff], pixels[loff + localRowPixels]\r
- , pixels[loff + localRowPixels * 2]))), 0.0f, 255.0f));\r
- off += dstRowWords;\r
- }\r
-}\r
-);\r
-\r
-\r
-char *kernel_src_yadif_filter = KERNEL(\r
- void filter_v6(\r
- global unsigned char *dst,\r
- global unsigned char *prev, \r
- global unsigned char *cur, \r
- global unsigned char *next,\r
- int x,\r
- int y,\r
- int width,\r
- int height,\r
- int parity,\r
- int inlinesize,\r
- int outlinesize,\r
- int inmode,\r
- int uvflag\r
- )\r
- {\r
-\r
- int flag = uvflag * (y >=height) * height; \r
- int prefs = select(-(inlinesize), inlinesize,((y+1) - flag) <height);\r
- int mrefs = select(inlinesize, -(inlinesize),y - flag);\r
- int mode = select(inmode,2,(y - flag==1) || (y - flag + 2==height));\r
- int score;\r
- \r
- global unsigned char *prev2 = parity ? prev : cur ;\r
- global unsigned char *next2 = parity ? cur : next;\r
- int index = x + y * inlinesize;\r
- int outindex = x + y * outlinesize;\r
- int c = cur[index + mrefs]; \r
- int d = (prev2[index] + next2[index])>>1; \r
- int e = cur[index + prefs]; \r
- int temporal_diff0 = abs((prev2[index]) - (next2[index])); \r
- int temporal_diff1 =(abs(prev[index + mrefs] - c) + abs(prev[index + prefs] - e) )>>1; \r
- int temporal_diff2 =(abs(next[index + mrefs] - c) + abs(next[index + prefs] - e) )>>1; \r
- int diff = max(max(temporal_diff0>>1, temporal_diff1), temporal_diff2); \r
- int spatial_pred = (c+e)>>1; \r
- int spatial_score = abs(cur[index + mrefs-1] - cur[index + prefs-1]) + abs(c-e) + abs(cur[index + mrefs+1] - cur[index + prefs+1]) - 1; \r
- //check -1\r
- score = abs(cur[index + mrefs-2] - cur[index + prefs])\r
- + abs(cur[index + mrefs-1] - cur[index + prefs+1])\r
- + abs(cur[index + mrefs] - cur[index + prefs+2]);\r
- if (score < spatial_score)\r
- {\r
- spatial_score= score;\r
- spatial_pred= (cur[index + mrefs-1] + cur[index + prefs+1])>>1;\r
- }\r
- //check -2\r
- score = abs(cur[index + mrefs-3] - cur[index + prefs+1])\r
- + abs(cur[index + mrefs-2] - cur[index + prefs+2])\r
- + abs(cur[index + mrefs-1] - cur[index + prefs+3]);\r
- if (score < spatial_score)\r
- {\r
- spatial_score= score;\r
- spatial_pred= (cur[index + mrefs-2] + cur[index + prefs+2])>>1;\r
- }\r
- //check 1\r
- score = abs(cur[index + mrefs] - cur[index + prefs-2])\r
- + abs(cur[index + mrefs+1] - cur[index + prefs-1])\r
- + abs(cur[index + mrefs+2] - cur[index + prefs]);\r
- if (score < spatial_score)\r
- {\r
- spatial_score= score;\r
- spatial_pred= (cur[index + mrefs+1] + cur[index + prefs-1])>>1;\r
- }\r
- //check 2\r
- score = abs(cur[index + mrefs+1] - cur[index + prefs-3])\r
- + abs(cur[index + mrefs+2] - cur[index + prefs-2])\r
- + abs(cur[index + mrefs+3] - cur[index + prefs-1]);\r
- if (score < spatial_score)\r
- {\r
- spatial_score= score;\r
- spatial_pred= (cur[index + mrefs+2] + cur[index + prefs-2])>>1;\r
- }\r
- if (mode < 2)\r
- { \r
- int b = (prev2[index + (mrefs<<1)] + next2[index + (mrefs<<1)])>>1; \r
- int f = (prev2[index + (prefs<<1)] + next2[index + (prefs<<1)])>>1; \r
- int diffmax = max(max(d-e, d-c), min(b-c, f-e)); \r
- int diffmin = min(min(d-e, d-c), max(b-c, f-e)); \r
-\r
- diff = max(max(diff, diffmin), -diffmax); \r
- } \r
- if (spatial_pred > d + diff) \r
- {\r
- spatial_pred = d + diff; \r
- }\r
- else if (spatial_pred < d - diff) \r
- {\r
- spatial_pred = d - diff; \r
- }\r
-\r
- dst[outindex] = spatial_pred; \r
- }\r
-\r
- kernel void yadif_filter(\r
- global unsigned char *dst,\r
- global unsigned char *prev,\r
- global unsigned char *cur,\r
- global unsigned char *next,\r
- int parity,\r
- int inlinesizeY,\r
- int inlinesizeUV,\r
- int outlinesizeY,\r
- int outlinesizeUV,\r
- int mode)\r
- {\r
- int x=get_global_id(0);\r
- int y=(get_global_id(1)<<1) + (!parity);\r
- int width=(get_global_size(0)<<1)/3;\r
- int height=get_global_size(1)<<1;\r
- \r
-\r
- global unsigned char *dst_Y=dst;\r
- global unsigned char *dst_U=dst_Y+height*outlinesizeY;\r
-\r
- global unsigned char *prev_Y=prev;\r
- global unsigned char *prev_U=prev_Y+height*inlinesizeY;\r
-\r
- global unsigned char *cur_Y=cur;\r
- global unsigned char *cur_U=cur_Y+height*inlinesizeY;\r
-\r
- global unsigned char *next_Y=next;\r
- global unsigned char *next_U=next_Y+height*inlinesizeY;\r
-\r
- if(x < width)\r
- {\r
- filter_v6(dst_Y,prev_Y,cur_Y,next_Y,x,y,width,height,parity,inlinesizeY,outlinesizeY,mode,0);\r
- }\r
- else\r
- {\r
- x = x - width;\r
- filter_v6(dst_U,prev_U,cur_U,next_U,x,y,width>>1,height>>1,parity,inlinesizeUV,outlinesizeUV,mode,1);\r
- }\r
- }\r
- );\r
-\r
-#endif\r
+++ /dev/null
-/* openclwrapper.c
-
- Copyright (c) 2003-2017 HandBrake Team
- This file is part of the HandBrake source code
- Homepage: <http://handbrake.fr/>.
- It may be used under the terms of the GNU General Public License v2.
- For full terms see the file COPYING file or visit http://www.gnu.org/licenses/gpl-2.0.html
-
- Authors: Peng Gao <peng@multicorewareinc.com> <http://www.multicorewareinc.com/>
- Li Cao <li@multicorewareinc.com> <http://www.multicorewareinc.com/>
- */
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include "extras/cl.h"
-#include "opencl.h"
-#include "openclwrapper.h"
-#include "openclkernels.h"
-
-//#define USE_EXTERNAL_KERNEL
-#ifdef SYS_MINGW
-#include <windows.h>
-#endif
-
-#if defined(_MSC_VER)
-#define strcasecmp strcmpi
-#endif
-
-#define MAX_KERNEL_STRING_LEN 64
-#define MAX_CLFILE_NUM 50
-#define MAX_CLKERNEL_NUM 200
-#define MAX_CLFILE_PATH 255
-#define MAX_KERNEL_NUM 50
-#define MAX_KERNEL_NAME_LEN 64
-
-#ifndef INVALID_HANDLE_VALUE
-#define INVALID_HANDLE_VALUE NULL
-#endif
-
-//#define THREAD_PRIORITY_TIME_CRITICAL 15
-
-enum VENDOR
-{
- AMD = 0,
- Intel,
- NVIDIA,
- others
-};
-typedef struct _GPUEnv
-{
- //share vb in all modules in hb library
- cl_platform_id platform;
- cl_device_type dType;
- cl_context context;
- cl_device_id * devices;
- cl_device_id dev;
- cl_command_queue command_queue;
- cl_kernel kernels[MAX_CLFILE_NUM];
- cl_program programs[MAX_CLFILE_NUM]; //one program object maps one kernel source file
- char kernelSrcFile[MAX_CLFILE_NUM][256]; //the max len of kernel file name is 256
- int file_count; // only one kernel file
-
- char kernel_names[MAX_CLKERNEL_NUM][MAX_KERNEL_STRING_LEN+1];
- cl_kernel_function kernel_functions[MAX_CLKERNEL_NUM];
- int kernel_count;
- int isUserCreated; // 1: created , 0:no create and needed to create by opencl wrapper
- enum VENDOR vendor;
-}GPUEnv;
-
-typedef struct
-{
- char kernelName[MAX_KERNEL_NAME_LEN+1];
- char * kernelStr;
-}hb_kernel_node;
-
-static GPUEnv gpu_env;
-static int isInited = 0;
-static int useBuffers = 0;
-static hb_kernel_node gKernels[MAX_KERNEL_NUM];
-
-#define HB_OCL_ADD_KERNEL_CFG(idx, s, p) \
-{ \
- strcpy(gKernels[idx].kernelName, s); \
- gKernels[idx].kernelStr = p; \
- strcpy(gpu_env.kernel_names[idx], s); \
- gpu_env.kernel_count++; \
-}
-
-/**
- * hb_regist_opencl_kernel
- */
-int hb_regist_opencl_kernel()
-{
- //if( !gpu_env.isUserCreated )
- // memset( &gpu_env, 0, sizeof(gpu_env) );
- //Comment for posterity: When in doubt just zero out a structure full of pointers to allocated resources.
-
- gpu_env.file_count = 0; //argc;
- gpu_env.kernel_count = 0UL;
-
- HB_OCL_ADD_KERNEL_CFG(0, "frame_scale", NULL);
- HB_OCL_ADD_KERNEL_CFG(1, "yadif_filter", NULL);
-
- return 0;
-}
-
-/**
- * hb_regist_opencl_kernel
- * @param filename -
- * @param source -
- * @param gpu_info -
- * @param int idx -
- */
-int hb_convert_to_string( const char *filename, char **source, GPUEnv *gpu_info, int idx )
-{
- int file_size;
- size_t result;
- FILE * file = NULL;
- file_size = 0;
- result = 0;
- file = fopen( filename, "rb+" );
-
- if( file!=NULL )
- {
- fseek( file, 0, SEEK_END );
-
- file_size = ftell( file );
- rewind( file );
- *source = (char*)malloc( sizeof(char) * file_size + 1 );
- if( *source == (char*)NULL )
- {
- fclose( file );
- return(0);
- }
- result = fread( *source, 1, file_size, file );
- if( result != file_size )
- {
- free( *source );
- fclose( file );
- return(0);
- }
- (*source)[file_size] = '\0';
- fclose( file );
-
- return(1);
- }
- return(0);
-}
-
-/**
- * hb_binary_generated
- * @param context -
- * @param cl_file_name -
- * @param fhandle -
- */
-int hb_binary_generated( cl_context context, const char * cl_file_name, FILE ** fhandle )
-{
- int i = 0;
- cl_int status;
- cl_uint numDevices;
- cl_device_id *devices;
- char * str = NULL;
- FILE * fd = NULL;
-
- if (hb_ocl == NULL)
- {
- hb_error("hb_binary_generated: OpenCL support not available");
- return 0;
- }
-
- status = hb_ocl->clGetContextInfo(context, CL_CONTEXT_NUM_DEVICES,
- sizeof(numDevices), &numDevices, NULL);
- if( status != CL_SUCCESS )
- {
- hb_log( "OpenCL: Get context info failed" );
- return 0;
- }
-
- devices = (cl_device_id*)malloc( sizeof(cl_device_id) * numDevices );
- if( devices == NULL )
- {
- hb_log( "OpenCL: No device found" );
- return 0;
- }
-
- /* grab the handles to all of the devices in the context. */
- status = hb_ocl->clGetContextInfo(context, CL_CONTEXT_DEVICES,
- sizeof(cl_device_id) * numDevices,
- devices, NULL);
-
- status = 0;
- /* dump out each binary into its own separate file. */
- for (i = 0; i < numDevices; i++)
- {
- char fileName[256] = { 0 };
- char cl_name[128] = { 0 };
- if (devices[i])
- {
- char deviceName[1024];
- status = hb_ocl->clGetDeviceInfo(devices[i], CL_DEVICE_NAME,
- sizeof(deviceName), deviceName, NULL);
-
- str = (char*)strstr(cl_file_name, ".cl");
- memcpy(cl_name, cl_file_name, str - cl_file_name);
- cl_name[str - cl_file_name] = '\0';
- sprintf(fileName, "./%s - %s.bin", cl_name, deviceName);
- fd = fopen(fileName, "rb");
- status = fd != NULL;
- }
- }
-
- if( devices != NULL )
- {
- free( devices );
- devices = NULL;
- }
-
- if( fd != NULL )
- *fhandle = fd;
-
- return status;
-}
-
-/**
- * hb_write_binary_to_file
- * @param fileName -
- * @param birary -
- * @param numBytes -
- */
-int hb_write_binary_to_file( const char* fileName, const char* birary, size_t numBytes )
-{
- FILE *output = NULL;
- output = fopen( fileName, "wb" );
- if( output == NULL )
- return 0;
-
- fwrite( birary, sizeof(char), numBytes, output );
- fclose( output );
-
- return 1;
-}
-
-/**
- * hb_generat_bin_from_kernel_source
- * @param program -
- * @param cl_file_name -
- */
-int hb_generat_bin_from_kernel_source( cl_program program, const char * cl_file_name )
-{
- int i = 0;
- cl_int status = CL_SUCCESS;
- cl_uint numDevices = 0;
- size_t *binarySizes = NULL;
- cl_device_id *devices = NULL;
- char **binaries = NULL;
- char *str = NULL;
- int ret_value = 1;
-
- if (hb_ocl == NULL)
- {
- hb_error("hb_generat_bin_from_kernel_source: OpenCL support not available");
- return 0;
- }
-
- status = hb_ocl->clGetProgramInfo(program, CL_PROGRAM_NUM_DEVICES,
- sizeof(numDevices), &numDevices, NULL);
- if( status != CL_SUCCESS )
- {
- hb_log("OpenCL: hb_generat_bin_from_kernel_source: clGetProgramInfo for CL_PROGRAM_NUM_DEVICES failed");
- return 0;
- }
-
- devices = (cl_device_id*)malloc( sizeof(cl_device_id) * numDevices );
- if( devices == NULL )
- {
- hb_log("OpenCL: hb_generat_bin_from_kernel_source: no device found");
- ret_value = 0;
- goto to_exit;
- }
-
- /* grab the handles to all of the devices in the program. */
- status = hb_ocl->clGetProgramInfo(program, CL_PROGRAM_DEVICES,
- sizeof(cl_device_id) * numDevices,
- devices, NULL);
- if( status != CL_SUCCESS )
- {
- hb_log("OpenCL: hb_generat_bin_from_kernel_source: clGetProgramInfo for CL_PROGRAM_DEVICES failed");
- ret_value = 0;
- goto to_exit;
- }
-
- /* figure out the sizes of each of the binaries. */
- binarySizes = (size_t*)malloc( sizeof(size_t) * numDevices );
-
- status = hb_ocl->clGetProgramInfo(program, CL_PROGRAM_BINARY_SIZES,
- sizeof(size_t) * numDevices,
- binarySizes, NULL);
- if( status != CL_SUCCESS )
- {
- hb_log("OpenCL: hb_generat_bin_from_kernel_source: clGetProgramInfo for CL_PROGRAM_BINARY_SIZES failed");
- ret_value = 0;
- goto to_exit;
- }
-
- /* copy over all of the generated binaries. */
- binaries = (char**)malloc( sizeof(char *) * numDevices );
- if( binaries == NULL )
- {
- hb_log("OpenCL: hb_generat_bin_from_kernel_source: malloc for binaries failed");
- ret_value = 0;
- goto to_exit;
- }
-
- for( i = 0; i < numDevices; i++ )
- {
- if( binarySizes[i] != 0 )
- {
- binaries[i] = (char*)malloc( sizeof(char) * binarySizes[i] );
- if( binaries[i] == NULL )
- {
- hb_log("OpenCL: hb_generat_bin_from_kernel_source: malloc for binaries[%d] failed", i);
- ret_value = 0;
- goto to_exit;
- }
- }
- else
- {
- binaries[i] = NULL;
- }
- }
-
- status = hb_ocl->clGetProgramInfo(program, CL_PROGRAM_BINARIES,
- sizeof(char *) * numDevices,
- binaries, NULL);
- if( status != CL_SUCCESS )
- {
- hb_log("OpenCL: hb_generat_bin_from_kernel_source: clGetProgramInfo for CL_PROGRAM_BINARIES failed");
- ret_value = 0;
- goto to_exit;
- }
-
- /* dump out each binary into its own separate file. */
- for (i = 0; i < numDevices; i++)
- {
- char fileName[256] = {0};
- char cl_name[128] = {0};
- if (binarySizes[i])
- {
- char deviceName[1024];
- status = hb_ocl->clGetDeviceInfo(devices[i], CL_DEVICE_NAME,
- sizeof(deviceName), deviceName,
- NULL);
-
- str = (char*)strstr( cl_file_name, (char*)".cl" );
- memcpy(cl_name, cl_file_name, str - cl_file_name);
- cl_name[str - cl_file_name] = '\0';
- sprintf(fileName, "./%s - %s.bin", cl_name, deviceName);
-
- if (!hb_write_binary_to_file(fileName, binaries[i], binarySizes[i]))
- {
- hb_log("OpenCL: hb_generat_bin_from_kernel_source: unable to write kernel, writing to temporary directory instead.");
- ret_value = 0;
- goto to_exit;
- }
- }
- }
-
-to_exit:
- // Release all resouces and memory
- for( i = 0; i < numDevices; i++ )
- {
- if( binaries[i] != NULL )
- {
- free( binaries[i] );
- binaries[i] = NULL;
- }
- }
-
- if( binaries != NULL )
- {
- free( binaries );
- binaries = NULL;
- }
-
- if( binarySizes != NULL )
- {
- free( binarySizes );
- binarySizes = NULL;
- }
-
- if( devices != NULL )
- {
- free( devices );
- devices = NULL;
- }
- return ret_value;
-}
-
-
-/**
- * hb_init_opencl_attr
- * @param env -
- */
-int hb_init_opencl_attr( OpenCLEnv * env )
-{
- if( gpu_env.isUserCreated )
- return 1;
-
- gpu_env.context = env->context;
- gpu_env.platform = env->platform;
- gpu_env.dev = env->devices;
- gpu_env.command_queue = env->command_queue;
-
- gpu_env.isUserCreated = 1;
-
- return 0;
-}
-
-/**
- * hb_create_kernel
- * @param kernelname -
- * @param env -
- */
-int hb_create_kernel( char * kernelname, KernelEnv * env )
-{
- int status;
-
- if (hb_ocl == NULL)
- {
- hb_error("hb_create_kernel: OpenCL support not available");
- return 0;
- }
-
- env->kernel = hb_ocl->clCreateKernel(gpu_env.programs[0], kernelname, &status);
- env->context = gpu_env.context;
- env->command_queue = gpu_env.command_queue;
- return status != CL_SUCCESS ? 1 : 0;
-}
-
-/**
- * hb_release_kernel
- * @param env -
- */
-int hb_release_kernel( KernelEnv * env )
-{
- if (hb_ocl == NULL)
- {
- hb_error("hb_release_kernel: OpenCL support not available");
- return 0;
- }
-
- int status = hb_ocl->clReleaseKernel(env->kernel);
- return status != CL_SUCCESS ? 1 : 0;
-}
-
-/**
- * hb_init_opencl_env
- * @param gpu_info -
- */
-
-static int init_once = 0;
-int hb_init_opencl_env( GPUEnv *gpu_info )
-{
- size_t length;
- cl_int status;
- cl_uint numPlatforms, numDevices;
- cl_platform_id *platforms;
- cl_context_properties cps[3];
- char platformName[100];
- unsigned int i;
- void *handle = INVALID_HANDLE_VALUE;
-
- if (init_once != 0)
- return 0;
- else
- init_once = 1;
-
- if (hb_ocl == NULL)
- {
- hb_error("hb_init_opencl_env: OpenCL support not available");
- return 1;
- }
-
- /*
- * Have a look at the available platforms.
- */
- if( !gpu_info->isUserCreated )
- {
- status = hb_ocl->clGetPlatformIDs(0, NULL, &numPlatforms);
- if( status != CL_SUCCESS )
- {
- hb_log( "OpenCL: OpenCL device platform not found." );
- return(1);
- }
-
- gpu_info->platform = NULL;
- if( 0 < numPlatforms )
- {
- platforms = (cl_platform_id*)malloc(
- numPlatforms * sizeof(cl_platform_id));
- if( platforms == (cl_platform_id*)NULL )
- {
- return(1);
- }
- status = hb_ocl->clGetPlatformIDs(numPlatforms, platforms, NULL);
-
- if( status != CL_SUCCESS )
- {
- hb_log( "OpenCL: Specific opencl platform not found." );
- return(1);
- }
-
- for( i = 0; i < numPlatforms; i++ )
- {
- status = hb_ocl->clGetPlatformInfo(platforms[i], CL_PLATFORM_VENDOR,
- sizeof(platformName), platformName, NULL);
-
- if( status != CL_SUCCESS )
- {
- continue;
- }
- gpu_info->platform = platforms[i];
-
- if (!strcmp(platformName, "Advanced Micro Devices, Inc.") ||
- !strcmp(platformName, "AMD"))
- gpu_info->vendor = AMD;
- else
- gpu_info->vendor = others;
-
- gpu_info->platform = platforms[i];
-
- status = hb_ocl->clGetDeviceIDs(gpu_info->platform /* platform */,
- CL_DEVICE_TYPE_GPU /* device_type */,
- 0 /* num_entries */,
- NULL /* devices */, &numDevices);
-
- if( status != CL_SUCCESS )
- {
- continue;
- }
-
- if( numDevices )
- break;
-
- }
- free( platforms );
- }
-
- if( NULL == gpu_info->platform )
- {
- hb_log( "OpenCL: No OpenCL-compatible GPU found." );
- return(1);
- }
-
- if( status != CL_SUCCESS )
- {
- hb_log( "OpenCL: No OpenCL-compatible GPU found." );
- return(1);
- }
-
- /*
- * Use available platform.
- */
- cps[0] = CL_CONTEXT_PLATFORM;
- cps[1] = (cl_context_properties)gpu_info->platform;
- cps[2] = 0;
- /* Check for GPU. */
- gpu_info->dType = CL_DEVICE_TYPE_GPU;
- gpu_info->context = hb_ocl->clCreateContextFromType(cps, gpu_info->dType,
- NULL, NULL, &status);
-
- if( (gpu_info->context == (cl_context)NULL) || (status != CL_SUCCESS) )
- {
- gpu_info->dType = CL_DEVICE_TYPE_CPU;
- gpu_info->context = hb_ocl->clCreateContextFromType(cps, gpu_info->dType,
- NULL, NULL, &status);
- }
-
- if( (gpu_info->context == (cl_context)NULL) || (status != CL_SUCCESS) )
- {
- gpu_info->dType = CL_DEVICE_TYPE_DEFAULT;
- gpu_info->context = hb_ocl->clCreateContextFromType(cps, gpu_info->dType,
- NULL, NULL, &status);
- }
-
- if( (gpu_info->context == (cl_context)NULL) || (status != CL_SUCCESS) )
- {
- hb_log( "OpenCL: Unable to create opencl context." );
- return(1);
- }
-
- /* Detect OpenCL devices. */
- /* First, get the size of device list data */
- status = hb_ocl->clGetContextInfo(gpu_info->context, CL_CONTEXT_DEVICES,
- 0, NULL, &length);
- if((status != CL_SUCCESS) || (length == 0))
- {
- hb_log( "OpenCL: Unable to get the list of devices in context." );
- return(1);
- }
-
- /* Now allocate memory for device list based on the size we got earlier */
- gpu_info->devices = (cl_device_id*)malloc( length );
- if( gpu_info->devices == (cl_device_id*)NULL )
- {
- return(1);
- }
-
- /* Now, get the device list data */
- status = hb_ocl->clGetContextInfo(gpu_info->context, CL_CONTEXT_DEVICES,
- length, gpu_info->devices, NULL);
- if( status != CL_SUCCESS )
- {
- hb_log( "OpenCL: Unable to get the device list data in context." );
- return(1);
- }
-
- /* Create OpenCL command queue. */
- gpu_info->command_queue = hb_ocl->clCreateCommandQueue(gpu_info->context,
- gpu_info->devices[0],
- 0, &status);
- if( status != CL_SUCCESS )
- {
- hb_log( "OpenCL: Unable to create opencl command queue." );
- return(1);
- }
- }
-
- if ((CL_SUCCESS == hb_ocl->clGetCommandQueueInfo(gpu_info->command_queue,
- CL_QUEUE_THREAD_HANDLE_AMD,
- sizeof(handle), &handle, NULL)) &&
- (INVALID_HANDLE_VALUE != handle))
- {
-#ifdef SYS_MINGW
- SetThreadPriority( handle, THREAD_PRIORITY_TIME_CRITICAL );
-#endif
- }
-
- return 0;
-}
-
-
-/**
- * hb_release_opencl_env
- * @param gpu_info -
- */
-int hb_release_opencl_env( GPUEnv *gpu_info )
-{
- if( !isInited )
- return 1;
- int i;
-
- if (hb_ocl == NULL)
- {
- hb_error("hb_release_opencl_env: OpenCL support not available");
- return 0;
- }
-
- for( i = 0; i<gpu_env.file_count; i++ )
- {
- if( gpu_env.programs[i] )
- {
- hb_ocl->clReleaseProgram(gpu_env.programs[i]);
- gpu_env.programs[i] = NULL;
- }
- }
-
- if( gpu_env.command_queue )
- {
- hb_ocl->clReleaseCommandQueue(gpu_env.command_queue);
- gpu_env.command_queue = NULL;
- }
-
- if( gpu_env.context )
- {
- hb_ocl->clReleaseContext(gpu_env.context);
- gpu_env.context = NULL;
- }
-
- isInited = 0;
- useBuffers = 0;
- gpu_info->isUserCreated = 0;
-
- return 1;
-}
-
-
-/**
- * hb_register_kernel_wrapper
- * @param kernel_name -
- * @param function -
- */
-int hb_register_kernel_wrapper( const char *kernel_name, cl_kernel_function function )
-{
- int i;
- for( i = 0; i < gpu_env.kernel_count; i++ )
- {
- if( strcasecmp( kernel_name, gpu_env.kernel_names[i] ) == 0 )
- {
- gpu_env.kernel_functions[i] = function;
- return(1);
- }
- }
- return(0);
-}
-
-/**
- * hb_cached_of_kerner_prg
- * @param gpu_env -
- * @param cl_file_name -
- */
-int hb_cached_of_kerner_prg( const GPUEnv *gpu_env, const char * cl_file_name )
-{
- int i;
- for( i = 0; i < gpu_env->file_count; i++ )
- {
- if( strcasecmp( gpu_env->kernelSrcFile[i], cl_file_name ) == 0 )
- {
- if( gpu_env->programs[i] != NULL )
- return(1);
- }
- }
-
- return(0);
-}
-
-/**
- * hb_compile_kernel_file
- * @param filename -
- * @param gpu_info -
- * @param indx -
- * @param build_option -
- */
-int hb_compile_kernel_file( const char *filename, GPUEnv *gpu_info,
- int indx, const char *build_option )
-{
- cl_int status;
- size_t length;
- char *source_str = NULL;
- const char *source;
- size_t source_size[1];
- char *buildLog = NULL;
- int b_error, binary_status, binaryExisted;
- char *binary;
- cl_uint numDevices;
- cl_device_id *devices = NULL;
- FILE *fd = NULL;
- FILE *fd1 = NULL;
- int idx;
- int ret_value = 1;
-
- if (hb_cached_of_kerner_prg(gpu_info, filename) == 1)
- return 1;
-
- idx = gpu_info->file_count;
-
-#ifdef USE_EXTERNAL_KERNEL
- status = hb_convert_to_string(filename, &source_str, gpu_info, idx);
- if (status == 0)
- return 0;
-#else
- int kernel_src_size = strlen(kernel_src_scale) + strlen(kernel_src_yadif_filter);
-
- source_str = (char*)malloc( kernel_src_size + 2 );
- strcpy( source_str, kernel_src_scale );
- strcat( source_str, kernel_src_yadif_filter );
-#endif
-
- source = source_str;
- source_size[0] = strlen( source );
-
- if (hb_ocl == NULL)
- {
- hb_error("OpenCL: Support is not available");
- ret_value = 0;
- goto to_exit;
- }
-
- if ((binaryExisted = hb_binary_generated(gpu_info->context, filename, &fd)) == 1)
- {
- status = hb_ocl->clGetContextInfo(gpu_info->context, CL_CONTEXT_NUM_DEVICES,
- sizeof(numDevices), &numDevices, NULL);
- if (status != CL_SUCCESS)
- {
- hb_log("OpenCL: Unable to get the number of devices in context.");
- ret_value = 0;
- goto to_exit;
- }
-
- devices = (cl_device_id*)malloc(sizeof(cl_device_id) * numDevices);
- if (devices == NULL)
- {
- ret_value = 0;
- goto to_exit;
- }
-
- length = 0;
- b_error = 0;
- b_error |= fseek(fd, 0, SEEK_END) < 0;
- b_error |= (length = ftell(fd)) <= 0;
- b_error |= fseek(fd, 0, SEEK_SET) < 0;
- if (b_error)
- {
- ret_value = 0;
- goto to_exit;
- }
-
- binary = (char*)calloc(length + 2, sizeof(char));
- if (binary == NULL)
- {
- ret_value = 0;
- goto to_exit;
- }
-
- b_error |= fread(binary, 1, length, fd) != length;
-
- if (b_error)
- {
- ret_value = 0;
- goto to_exit;
- }
-
- /* grab the handles to all of the devices in the context. */
- status = hb_ocl->clGetContextInfo(gpu_info->context, CL_CONTEXT_DEVICES,
- sizeof(cl_device_id) * numDevices,
- devices, NULL);
-
- gpu_info->programs[idx] = hb_ocl->clCreateProgramWithBinary(gpu_info->context,
- numDevices,
- devices,
- &length,
- (const unsigned char**)&binary,
- &binary_status,
- &status);
-
- }
- else
- {
- /* create a CL program using the kernel source */
- gpu_info->programs[idx] = hb_ocl->clCreateProgramWithSource(gpu_info->context, 1,
- &source, source_size,
- &status);
- }
-
- if ((gpu_info->programs[idx] == (cl_program)NULL) || (status != CL_SUCCESS))
- {
- hb_log( "OpenCL: Unable to get list of devices in context." );
- ret_value = 0;
- goto to_exit;
- }
-
- /* create a cl program executable for all the devices specified */
- if (!gpu_info->isUserCreated)
- {
- status = hb_ocl->clBuildProgram(gpu_info->programs[idx], 1, gpu_info->devices,
- build_option, NULL, NULL);
- }
- else
- {
- status = hb_ocl->clBuildProgram(gpu_info->programs[idx], 1, &(gpu_info->dev),
- build_option, NULL, NULL);
- }
-
- if (status != CL_SUCCESS)
- {
- if (!gpu_info->isUserCreated)
- {
- status = hb_ocl->clGetProgramBuildInfo(gpu_info->programs[idx],
- gpu_info->devices[0],
- CL_PROGRAM_BUILD_LOG,
- 0, NULL, &length);
- }
- else
- {
- status = hb_ocl->clGetProgramBuildInfo(gpu_info->programs[idx],
- gpu_info->dev,
- CL_PROGRAM_BUILD_LOG,
- 0, NULL, &length);
- }
-
- if (status != CL_SUCCESS)
- {
- hb_log( "OpenCL: Unable to get GPU build information." );
- ret_value = 0;
- goto to_exit;
- }
-
- buildLog = (char*)malloc(length);
- if (buildLog == (char*)NULL)
- {
- ret_value = 0;
- goto to_exit;
- }
-
- if (!gpu_info->isUserCreated)
- {
- status = hb_ocl->clGetProgramBuildInfo(gpu_info->programs[idx],
- gpu_info->devices[0],
- CL_PROGRAM_BUILD_LOG,
- length, buildLog, &length);
- }
- else
- {
- status = hb_ocl->clGetProgramBuildInfo(gpu_info->programs[idx],
- gpu_info->dev,
- CL_PROGRAM_BUILD_LOG,
- length, buildLog, &length);
- }
-
- fd1 = fopen("kernel-build.log", "w+");
- if (fd1 != NULL) {
- fwrite(buildLog, sizeof(char), length, fd1);
- fclose(fd1);
- }
-
- ret_value = 0;
- goto to_exit;
- }
-
- strcpy(gpu_env.kernelSrcFile[idx], filename);
-
- gpu_info->file_count += 1;
-
-to_exit:
- if (source_str != NULL)
- {
- free(source_str);
- source_str = NULL;
- // only used as pointer to source_str
- source = NULL;
- }
-
- if (devices != NULL)
- {
- free(devices);
- devices = NULL;
- }
-
- if (binary != NULL)
- {
- free(binary);
- binary = NULL;
- }
-
- if (buildLog != NULL)
- {
- free(buildLog);
- buildLog = NULL;
- }
-
- if (fd != NULL)
- {
- fclose(fd);
- fd = NULL;
- }
-
- return ret_value;
-}
-
-
-/**
- * hb_get_kernel_env_and_func
- * @param kernel_name -
- * @param env -
- * @param function -
- */
-int hb_get_kernel_env_and_func( const char *kernel_name,
- KernelEnv *env,
- cl_kernel_function *function )
-{
- int i;
- for( i = 0; i < gpu_env.kernel_count; i++ )
- {
- if( strcasecmp( kernel_name, gpu_env.kernel_names[i] ) == 0 )
- {
- env->context = gpu_env.context;
- env->command_queue = gpu_env.command_queue;
- env->program = gpu_env.programs[0];
- env->kernel = gpu_env.kernels[i];
- env->isAMD = ( gpu_env.vendor == AMD ) ? 1 : 0;
- *function = gpu_env.kernel_functions[i];
- return(1);
- }
- }
- return(0);
-}
-
-/**
- * hb_get_kernel_env_and_func
- * @param kernel_name -
- * @param userdata -
- */
-int hb_run_kernel( const char *kernel_name, void **userdata )
-{
- KernelEnv env;
- cl_kernel_function function;
- int status;
- memset( &env, 0, sizeof(KernelEnv));
- status = hb_get_kernel_env_and_func( kernel_name, &env, &function );
- strcpy( env.kernel_name, kernel_name );
- if( status == 1 )
- {
- return(function( userdata, &env ));
- }
-
- return(0);
-}
-
-/**
- * hb_init_opencl_run_env
- * @param argc -
- * @param argv -
- * @param build_option -
- */
-int hb_init_opencl_run_env( int argc, char **argv, const char *build_option )
-{
- int status = 0;
- if( MAX_CLKERNEL_NUM <= 0 )
- {
- return 1;
- }
-
- if((argc > MAX_CLFILE_NUM) || (argc<0))
- {
- return 1;
- }
-
- if( !isInited )
- {
- hb_regist_opencl_kernel();
-
- /*initialize devices, context, comand_queue*/
- status = hb_init_opencl_env( &gpu_env );
- if( status )
- return(1);
-
- /*initialize program, kernel_name, kernel_count*/
- status = hb_compile_kernel_file("hb-opencl-kernels.cl",
- &gpu_env, 0, build_option);
-
- if( status == 0 || gpu_env.kernel_count == 0 )
- {
- return(1);
-
- }
-
- useBuffers = 1;
- isInited = 1;
- }
-
- return(0);
-}
-
-/**
- * hb_release_opencl_run_env
- */
-int hb_release_opencl_run_env()
-{
- return hb_release_opencl_env( &gpu_env );
-}
-
-/**
- * hb_opencl_stats
- */
-int hb_opencl_stats()
-{
- return isInited;
-}
-
-/**
- * hb_get_opencl_env
- */
-int hb_get_opencl_env()
-{
- /* initialize devices, context, command_queue */
- return hb_init_opencl_env(&gpu_env);
-}
-
-/**
- * hb_create_buffer
- * @param cl_inBuf -
- * @param flags -
- * @param size -
- */
-int hb_create_buffer( cl_mem *cl_Buf, int flags, int size )
-{
- int status;
-
- if (hb_ocl == NULL)
- {
- hb_error("hb_create_buffer: OpenCL support not available");
- return 0;
- }
-
- *cl_Buf = hb_ocl->clCreateBuffer(gpu_env.context, flags, size, NULL, &status);
-
- if( status != CL_SUCCESS )
- {
- hb_log( "OpenCL: clCreateBuffer error '%d'", status );
- return 0;
- }
-
- return 1;
-}
-
-
-/**
- * hb_read_opencl_buffer
- * @param cl_inBuf -
- * @param outbuf -
- * @param size -
- */
-int hb_read_opencl_buffer( cl_mem cl_inBuf, unsigned char *outbuf, int size )
-{
- int status;
-
- if (hb_ocl == NULL)
- {
- hb_error("hb_read_opencl_suffer: OpenCL support not available");
- return 0;
- }
-
- status = hb_ocl->clEnqueueReadBuffer(gpu_env.command_queue, cl_inBuf,
- CL_TRUE, 0, size, outbuf, 0, 0, 0);
- if( status != CL_SUCCESS )
- {
- hb_log( "OpenCL: av_read_opencl_buffer error '%d'", status );
- return 0;
- }
-
- return 1;
-}
-
-int hb_cl_create_mapped_buffer(cl_mem *mem, unsigned char **addr, int size)
-{
- int status;
- int flags = CL_MEM_ALLOC_HOST_PTR;
-
- if (hb_ocl == NULL)
- {
- hb_error("hb_cl_create_mapped_buffer: OpenCL support not available");
- return 0;
- }
-
- //cl_event event;
- *mem = hb_ocl->clCreateBuffer(gpu_env.context, flags, size, NULL, &status);
- *addr = hb_ocl->clEnqueueMapBuffer(gpu_env.command_queue, *mem, CL_TRUE,
- CL_MAP_READ|CL_MAP_WRITE, 0, size, 0,
- NULL, NULL/*&event*/, &status);
-
- //hb_log("\t **** context: %.8x cmdqueue: %.8x cl_mem: %.8x mapaddr: %.8x size: %d status: %d", gpu_env.context, gpu_env.command_queue, mem, addr, size, status);
-
- return (status == CL_SUCCESS) ? 1 : 0;
-}
-
-int hb_cl_free_mapped_buffer(cl_mem mem, unsigned char *addr)
-{
- cl_event event;
-
- if (hb_ocl == NULL)
- {
- hb_error("hb_cl_free_mapped_buffer: OpenCL support not available");
- return 0;
- }
-
- int status = hb_ocl->clEnqueueUnmapMemObject(gpu_env.command_queue, mem,
- addr, 0, NULL, &event);
- if (status == CL_SUCCESS)
- hb_ocl->clWaitForEvents(1, &event);
- else
- hb_log("hb_free_mapped_buffer: error %d", status);
-
- status = hb_ocl->clReleaseMemObject(mem);
- if (status != CL_SUCCESS)
- hb_log("hb_free_mapped_buffer: release error %d",status);
-
- return (status == CL_SUCCESS) ? 1 : 0;
-}
-
-void hb_opencl_init()
-{
- hb_get_opencl_env();
-}
-
-int hb_use_buffers()
-{
- return useBuffers;
-}
-
-int hb_copy_buffer(cl_mem src_buffer,cl_mem dst_buffer,size_t src_offset,size_t dst_offset,size_t cb)
-{
- if (hb_ocl == NULL)
- {
- hb_error("hb_copy_buffer: OpenCL support not available");
- return 0;
- }
-
- int status = hb_ocl->clEnqueueCopyBuffer(gpu_env.command_queue,
- src_buffer, dst_buffer,
- src_offset, dst_offset,
- cb, 0, 0, 0);
- if( status != CL_SUCCESS )
- {
- av_log(NULL,AV_LOG_ERROR, "hb_read_opencl_buffer error '%d'\n", status );
- return 0;
- }
- return 1;
-}
-
-int hb_read_opencl_frame_buffer(cl_mem cl_inBuf,unsigned char *Ybuf,unsigned char *Ubuf,unsigned char *Vbuf,int linesize0,int linesize1,int linesize2,int height)
-{
-
- int chrH = -(-height >> 1);
- unsigned char *temp = (unsigned char *)av_malloc(sizeof(uint8_t) * (linesize0 * height + linesize1 * chrH * 2));
- if(hb_read_opencl_buffer(cl_inBuf,temp,sizeof(uint8_t)*(linesize0 + linesize1)*height))
- {
- memcpy(Ybuf,temp,linesize0 * height);
- memcpy(Ubuf,temp + linesize0 * height,linesize1 *chrH);
- memcpy(Vbuf,temp + linesize0 * height + linesize1 * chrH,linesize2 * chrH);
-
- }
- av_free(temp);
-
- return 1;
-}
-
-int hb_write_opencl_frame_buffer(cl_mem cl_inBuf,unsigned char *Ybuf,unsigned char *Ubuf,unsigned char *Vbuf,int linesize0,int linesize1,int linesize2,int height,int offset)
-{
- if (hb_ocl == NULL)
- {
- hb_error("hb_write_opencl_frame_buffer: OpenCL support not available");
- return 0;
- }
-
- void *mapped = hb_ocl->clEnqueueMapBuffer(gpu_env.command_queue, cl_inBuf,
- CL_TRUE,CL_MAP_WRITE, 0,
- sizeof(uint8_t) * (linesize0 + linesize1) * height + offset,
- 0, NULL, NULL, NULL);
- uint8_t *temp = (uint8_t *)mapped;
- temp += offset;
- memcpy(temp,Ybuf,sizeof(uint8_t) * linesize0 * height);
- memcpy(temp + sizeof(uint8_t) * linesize0 * height,Ubuf,sizeof(uint8_t) * linesize1 * height/2);
- memcpy(temp + sizeof(uint8_t) * (linesize0 * height + linesize1 * height/2),Vbuf,sizeof(uint8_t) * linesize2 * height/2);
- hb_ocl->clEnqueueUnmapMemObject(gpu_env.command_queue, cl_inBuf, mapped, 0, NULL, NULL);
- return 1;
-}
-
-cl_command_queue hb_get_command_queue()
-{
- return gpu_env.command_queue;
-}
-
-cl_context hb_get_context()
-{
- return gpu_env.context;
-}
+++ /dev/null
-/* openclwrapper.h
-
- Copyright (c) 2003-2017 HandBrake Team
- This file is part of the HandBrake source code
- Homepage: <http://handbrake.fr/>.
- It may be used under the terms of the GNU General Public License v2.
- For full terms see the file COPYING file or visit http://www.gnu.org/licenses/gpl-2.0.html
-
- Authors: Peng Gao <peng@multicorewareinc.com> <http://www.multicorewareinc.com/>
- Li Cao <li@multicorewareinc.com> <http://www.multicorewareinc.com/>
-
-
- */
-#ifndef HB_OPENCL_WRAPPER_H
-#define HB_OPENCL_WRAPPER_H
-
-#include "common.h"
-#include "extras/cl.h"
-
-//support AMD opencl
-#define CL_QUEUE_THREAD_HANDLE_AMD 0x403E
-#define CL_MAP_WRITE_INVALIDATE_REGION (1 << 2)
-
-typedef struct _KernelEnv
-{
- cl_context context;
- cl_command_queue command_queue;
- cl_program program;
- cl_kernel kernel;
- char kernel_name[150];
- int isAMD;
-}KernelEnv;
-
-typedef struct _OpenCLEnv
-{
- cl_platform_id platform;
- cl_context context;
- cl_device_id devices;
- cl_command_queue command_queue;
-}OpenCLEnv;
-
-
-//user defined, this is function wrapper which is used to set the input parameters ,
-//luanch kernel and copy data from GPU to CPU or CPU to GPU.
-typedef int (*cl_kernel_function)( void **userdata, KernelEnv *kenv );
-
-// registe a wapper for running the kernel specified by the kernel name
-int hb_register_kernel_wrapper( const char *kernel_name, cl_kernel_function function );
-
-// run kernel , user call this function to luanch kernel.
-// kernel_name: this kernel name is used to find the kernel in opencl runtime environment
-// userdata: this userdata is the all parameters for running the kernel specified by kernel name
-int hb_run_kernel( const char *kernel_name, void **userdata );
-
-// init the run time environment , this function must be called befor calling any function related to opencl
-// the argc must be set zero , argv must be set NULL, build_option is the options for build the kernel.
-int hb_init_opencl_run_env( int argc, char **argv, const char *build_option );
-
-//relase all resource about the opencl , this function must be called after calling any functions related to opencl
-int hb_release_opencl_run_env();
-
-// get the opencl status , 0: not init ; 1, inited; this function is used the check whether or not the opencl run time has been created
-int hb_opencl_stats();
-
-// update opencl run time environments , such as commandqueue , platforme, context. program
-int hb_init_opencl_attr( OpenCLEnv * env );
-
-// create kernel object by a kernel name on the specified opencl run time indicated by env parameter
-int hb_create_kernel( char * kernelname, KernelEnv * env );
-
-// release kernel object which is generated by calling the hb_create_kernel api
-int hb_release_kernel( KernelEnv * env );
-
-void hb_opencl_init();
-
-int hb_get_opencl_env();
-
-int hb_create_buffer(cl_mem *cl_Buf,int flags,int size);
-
-int hb_read_opencl_buffer(cl_mem cl_inBuf,unsigned char *outbuf,int size);
-
-int hb_cl_create_mapped_buffer(cl_mem *mem, unsigned char **addr, int size);
-
-int hb_cl_free_mapped_buffer(cl_mem mem, unsigned char *addr);
-
-int hb_use_buffers();
-
-int hb_confirm_gpu_type();
-
-#endif // HB_OPENCL_WRAPPER_H
hb_value_xform(value, HB_VALUE_TYPE_INT));
}
- if ((value = hb_dict_get(preset, "VideoScaler")) != NULL)
- {
- const char *s = hb_value_get_string(value);
- if (!strcasecmp(s, "opencl"))
- {
- hb_dict_set(video_dict, "OpenCL", hb_value_bool(1));
- }
- }
-
return 0;
}
presets_clean(preset, hb_preset_template);
}
+static void import_video_scaler_25_0_0(hb_value_t *preset)
+{
+ hb_dict_set(preset, "VideoScaler", hb_value_string("swscale"));
+}
+
static void import_anamorphic_20_0_0(hb_value_t *preset)
{
hb_value_t *val = hb_dict_get(preset, "PicturePAR");
}
}
+static void import_25_0_0(hb_value_t *preset)
+{
+ import_video_scaler_25_0_0(preset);
+}
+
static void import_20_0_0(hb_value_t *preset)
{
import_anamorphic_20_0_0(preset);
+
+ import_25_0_0(preset);
}
static void import_12_0_0(hb_value_t *preset)
import_20_0_0(preset);
result = 1;
}
+ else if (cmpVersion(major, minor, micro, 25, 0, 0) <= 0)
+ {
+ import_25_0_0(preset);
+ result = 1;
+ }
preset_clean(preset, hb_preset_template);
}
return result;
*/
#include "hb.h"
-#include "opencl.h"
#include "hbffmpeg.h"
typedef struct
title->video_decode_support = vid_info.video_decode_support;
- // TODO: check video dimensions
- hb_handle_t *hb_handle = (hb_handle_t *)data->h;
- if (hb_get_opencl_enabled(hb_handle))
- {
- title->opencl_support = !!hb_opencl_available();
- }
-
// compute the aspect ratio based on the storage dimensions and PAR.
hb_reduce(&title->dar.num, &title->dar.den,
title->geometry.par.num * title->geometry.width,
#include "hb.h"
#include "libavformat/avformat.h"
-#include "openclwrapper.h"
-#include "opencl.h"
#include "decomb.h"
#ifdef USE_QSV
hb_dict_extract_int(&vpp_settings[5], filter->settings,
"crop-right");
- // VPP crop/scale takes precedence over OpenCL scale too
- if (job->use_opencl)
- {
- hb_release_opencl_run_env();
- job->use_opencl = 0;
- }
hb_list_rem(job->list_filter, filter);
hb_filter_close(&filter);
break;
w = hb_get_work(job->h, WORK_READER);
hb_list_add(job->list_work, w);
- /*
- * OpenCL
- *
- * Note: we delay hb_ocl_init until here, since they're no point it loading
- * the library if we aren't going to use it. But we only call hb_ocl_close
- * in hb_global_close, since un/reloading the library each run is wasteful.
- */
- if (job->use_opencl)
- {
- if (hb_ocl_init() || hb_init_opencl_run_env(0, NULL, "-I."))
- {
- hb_log("work: failed to initialize OpenCL environment, using fallback");
- hb_release_opencl_run_env();
- job->use_opencl = 0;
- }
- }
- else
- {
- // we're not (re-)using OpenCL here, we can release the environment
- hb_release_opencl_run_env();
- }
-
hb_log( "starting job" );
// This must be performed before initializing filters because
hb_buffer_pool_free();
- if (job->use_opencl)
- {
- hb_release_opencl_run_env();
- }
-
hb_job_close(&job);
}
<resources>
<section name="PresetTemplate">
- <integer name="VersionMajor" value="25" />
+ <integer name="VersionMajor" value="26" />
<integer name="VersionMinor" value="0" />
<integer name="VersionMicro" value="0" />
<json name="Preset" file="preset_template.json" />
#include "hb.h"
#include "lang.h"
#include "parsecsv.h"
-#include "openclwrapper.h"
#ifdef USE_QSV
#include "qsv_common.h"
static int64_t stop_at_pts = 0;
static int stop_at_frame = 0;
static uint64_t min_title_duration = 10;
-static int use_opencl = -1;
#ifdef USE_QSV
static int qsv_async_depth = -1;
static int qsv_decode = -1;
hb_dvd_set_dvdnav( dvdnav );
- if (use_opencl == 1)
- {
- hb_opencl_set_enable(h, use_opencl);
- }
-
/* Show version */
fprintf( stderr, "%s - %s - %s\n",
HB_PROJECT_TITLE, HB_PROJECT_BUILD_TITLE, HB_PROJECT_URL_WEBSITE );
fprintf( stderr, " + autocrop: %d/%d/%d/%d\n", title->crop[0],
title->crop[1], title->crop[2], title->crop[3] );
- fprintf(stderr, " + support opencl: %s\n", title->opencl_support ? "yes" : "no");
-
fprintf( stderr, " + chapters:\n" );
for( i = 0; i < hb_list_count( title->list_chapter ); i++ )
{
hb_dict_set(preset, "VideoQSVDecode", hb_value_int(qsv_decode));
}
#endif
- if (use_opencl != -1)
- {
- hb_dict_set(preset, "VideoScaler",
- hb_value_string(use_opencl ? "opencl" : "swscale"));
- }
if (maxWidth > 0)
{
hb_dict_set(preset, "PictureWidth", hb_value_int(maxWidth));