inline float getAlphaF4(float4 p) { return p.w; }
inline void setAlphaF4(float4* p, float value) { (*p).w = value; }
- inline void ReadChannels(__global CLQuantum *p, const unsigned int number_channels,
+ inline void ReadChannels(const __global CLQuantum *p, const unsigned int number_channels,
const ChannelType channel, float *red, float *green, float *blue, float *alpha)
{
if ((channel & RedChannel) != 0)
STRINGIFY(
- __kernel void UnsharpMask(__global CLPixelType *im,
- __global CLPixelType *filtered_im,__constant float *filter,
- const unsigned int width,const unsigned int imageColumns,
- const unsigned int imageRows,__local float4 *pixels,const float gain,
- const float threshold, const unsigned int justBlur)
- {
- const int x = get_global_id(0);
- const int y = get_global_id(1);
-
- const unsigned int radius = (width - 1) / 2;
+ __kernel void UnsharpMask(__global CLPixelType *im,
+ __global CLPixelType *filtered_im,__constant float *filter,
+ const unsigned int width,const unsigned int imageColumns,
+ const unsigned int imageRows,__local float4 *pixels,const float gain,
+ const float threshold, const unsigned int justBlur)
+ {
+ const int x = get_global_id(0);
+ const int y = get_global_id(1);
- int row = y - radius;
- int baseRow = get_group_id(1) * get_local_size(1) - radius;
- int endRow = (get_group_id(1) + 1) * get_local_size(1) + radius;
+ const unsigned int radius = (width - 1) / 2;
- while (row < endRow) {
- int srcy = (row < 0) ? -row : row; // mirror pad
- srcy = (srcy >= imageRows) ? (2 * imageRows - srcy - 1) : srcy;
+ int row = y - radius;
+ int baseRow = get_group_id(1) * get_local_size(1) - radius;
+ int endRow = (get_group_id(1) + 1) * get_local_size(1) + radius;
- float4 value = 0.0f;
+ while (row < endRow) {
+ int srcy = (row < 0) ? -row : row; // mirror pad
+ srcy = (srcy >= imageRows) ? (2 * imageRows - srcy - 1) : srcy;
- int ix = x - radius;
- int i = 0;
+ float4 value = 0.0f;
- while (i + 7 < width) {
- for (int j = 0; j < 8; ++j) { // unrolled
- int srcx = ix + j;
- srcx = (srcx < 0) ? -srcx : srcx;
- srcx = (srcx >= imageColumns) ? (2 * imageColumns - srcx - 1) : srcx;
- value += filter[i + j] * convert_float4(im[srcx + srcy * imageColumns]);
- }
- ix += 8;
- i += 8;
- }
+ int ix = x - radius;
+ int i = 0;
- while (i < width) {
- int srcx = (ix < 0) ? -ix : ix; // mirror pad
+ while (i + 7 < width) {
+ for (int j = 0; j < 8; ++j) { // unrolled
+ int srcx = ix + j;
+ srcx = (srcx < 0) ? -srcx : srcx;
srcx = (srcx >= imageColumns) ? (2 * imageColumns - srcx - 1) : srcx;
- value += filter[i] * convert_float4(im[srcx + srcy * imageColumns]);
- ++i;
- ++ix;
+ value += filter[i + j] * convert_float4(im[srcx + srcy * imageColumns]);
}
- pixels[(row - baseRow) * get_local_size(0) + get_local_id(0)] = value;
- row += get_local_size(1);
+ ix += 8;
+ i += 8;
}
+ while (i < width) {
+ int srcx = (ix < 0) ? -ix : ix; // mirror pad
+ srcx = (srcx >= imageColumns) ? (2 * imageColumns - srcx - 1) : srcx;
+ value += filter[i] * convert_float4(im[srcx + srcy * imageColumns]);
+ ++i;
+ ++ix;
+ }
+ pixels[(row - baseRow) * get_local_size(0) + get_local_id(0)] = value;
+ row += get_local_size(1);
+ }
+
barrier(CLK_LOCAL_MEM_FENCE);
const int px = get_local_id(0);
STRINGIFY(
- __kernel __attribute__((reqd_work_group_size(64, 4, 1)))
- void WaveletDenoise(__global CLQuantum *srcImage,__global CLQuantum *dstImage,
- const unsigned int number_channels,const unsigned int max_channels,
- const float threshold,const int passes,const unsigned int imageWidth,
- const unsigned int imageHeight)
+ __kernel __attribute__((reqd_work_group_size(64, 4, 1)))
+ void WaveletDenoise(__global CLQuantum *srcImage,__global CLQuantum *dstImage,
+ const unsigned int number_channels,const unsigned int max_channels,
+ const float threshold,const int passes,const unsigned int imageWidth,
+ const unsigned int imageHeight)
{
const int pad = (1 << (passes - 1));
const int tileSize = 64;