%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
*/
- STRINGIFY(
- /*
- Reduce image noise and reduce detail levels by line
- im: input pixels filtered_in filtered_im: output pixels
- filter : convolve kernel width: convolve kernel size
- channel : define which channel is blured\
- is_RGBA_BGRA : define the input is RGBA or BGRA
- */
- __kernel void BlurSectionColumn(const __global float4 *blurRowData, __global CLPixelType *filtered_im,
- const ChannelType channel, __constant float *filter,
- const unsigned int width,
- const unsigned int imageColumns, const unsigned int imageRows,
- __local float4 *temp,
- const unsigned int offsetRows, const unsigned int section)
+ STRINGIFY(
+ /*
+ Reduce image noise and reduce detail levels by line
+ im: input pixels filtered_in filtered_im: output pixels
+ filter : convolve kernel width: convolve kernel size
+ channel : define which channel is blured\
+ is_RGBA_BGRA : define the input is RGBA or BGRA
+ */
+ __kernel void BlurSectionColumn(const __global float4 *blurRowData, __global CLPixelType *filtered_im,
+ const ChannelType channel, __constant float *filter,
+ const unsigned int width,
+ const unsigned int imageColumns, const unsigned int imageRows,
+ __local float4 *temp,
+ const unsigned int offsetRows, const unsigned int section)
+ {
+ const int x = get_global_id(0);
+ const int y = get_global_id(1);
+
+ //const int columns = get_global_size(0);
+ //const int rows = get_global_size(1);
+ const int columns = imageColumns;
+ const int rows = imageRows;
+
+ unsigned int radius = (width-1)/2;
+ const int wsize = get_local_size(1);
+ const unsigned int loadSize = wsize+width;
+
+ //group coordinate
+ const int groupX=get_local_size(0)*get_group_id(0);
+ const int groupY=get_local_size(1)*get_group_id(1);
+ //notice that get_local_size(0) is 1, so
+ //groupX=get_group_id(0);
+
+ // offset the input data
+ blurRowData += imageColumns * radius * section;
+
+ //parallel load and clamp
+ for (int i = get_local_id(1); i < loadSize; i=i+get_local_size(1))
{
- const int x = get_global_id(0);
- const int y = get_global_id(1);
+ int pos = ClampToCanvasWithHalo(i+groupY-radius, rows, radius, section) * columns + groupX;
+ temp[i] = *(blurRowData+pos);
+ }
+
+ // barrier
+ barrier(CLK_LOCAL_MEM_FENCE);
- //const int columns = get_global_size(0);
- //const int rows = get_global_size(1);
- const int columns = imageColumns;
- const int rows = imageRows;
-
- unsigned int radius = (width-1)/2;
- const int wsize = get_local_size(1);
- const unsigned int loadSize = wsize+width;
-
- //group coordinate
- const int groupX=get_local_size(0)*get_group_id(0);
- const int groupY=get_local_size(1)*get_group_id(1);
- //notice that get_local_size(0) is 1, so
- //groupX=get_group_id(0);
-
- // offset the input data
- blurRowData += imageColumns * radius * section;
-
- //parallel load and clamp
- for (int i = get_local_id(1); i < loadSize; i=i+get_local_size(1))
- {
- int pos = ClampToCanvasWithHalo(i+groupY-radius, rows, radius, section) * columns + groupX;
- temp[i] = *(blurRowData+pos);
- }
-
- // barrier
- barrier(CLK_LOCAL_MEM_FENCE);
+ // only do the work if this is not a patched item
+ if (get_global_id(1) < rows)
+ {
+ // compute
+ float4 result = (float4) 0;
- // only do the work if this is not a patched item
- if (get_global_id(1) < rows)
+ int i = 0;
+
+ \n #ifndef UFACTOR \n
+ \n #define UFACTOR 8 \n
+ \n #endif \n
+
+ for ( ; i+UFACTOR < width; )
{
- // compute
- float4 result = (float4) 0;
-
- int i = 0;
-
- \n #ifndef UFACTOR \n
- \n #define UFACTOR 8 \n
- \n #endif \n
-
- for ( ; i+UFACTOR < width; )
- {
- \n #pragma unroll UFACTOR \n
- for (int j=0; j < UFACTOR; j++, i++)
- {
- result+=filter[i]*temp[i+get_local_id(1)];
- }
- }
- for ( ; i < width; i++)
+ \n #pragma unroll UFACTOR \n
+ for (int j=0; j < UFACTOR; j++, i++)
{
result+=filter[i]*temp[i+get_local_id(1)];
}
+ }
+ for ( ; i < width; i++)
+ {
+ result+=filter[i]*temp[i+get_local_id(1)];
+ }
- result.x = ClampToQuantum(result.x);
- result.y = ClampToQuantum(result.y);
- result.z = ClampToQuantum(result.z);
- result.w = ClampToQuantum(result.w);
-
- // offset the output data
- filtered_im += imageColumns * offsetRows;
+ result.x = ClampToQuantum(result.x);
+ result.y = ClampToQuantum(result.y);
+ result.z = ClampToQuantum(result.z);
+ result.w = ClampToQuantum(result.w);
- // write back to global
- filtered_im[y*columns+x] = (CLPixelType) (result.x,result.y,result.z,result.w);
- }
+ // offset the output data
+ filtered_im += imageColumns * offsetRows;
+ // write back to global
+ filtered_im[y*columns+x] = (CLPixelType) (result.x,result.y,result.z,result.w);
}
- )
-
- STRINGIFY(
- /*
- Reduce image noise and reduce detail levels by row
- im: input pixels filtered_in filtered_im: output pixels
- filter : convolve kernel width: convolve kernel size
- channel : define which channel is blured
- is_RGBA_BGRA : define the input is RGBA or BGRA
- */
- __kernel void BlurSectionRow(__global CLPixelType *im, __global float4 *filtered_im,
- const ChannelType channel, __constant float *filter,
- const unsigned int width,
- const unsigned int imageColumns, const unsigned int imageRows,
- __local CLPixelType *temp,
- const unsigned int offsetRows, const unsigned int section)
- {
- const int x = get_global_id(0);
- const int y = get_global_id(1);
- const int columns = imageColumns;
+ }
+ )
- const unsigned int radius = (width-1)/2;
- const int wsize = get_local_size(0);
- const unsigned int loadSize = wsize+width;
+ STRINGIFY(
+ /*
+ Reduce image noise and reduce detail levels by row
+ im: input pixels filtered_in filtered_im: output pixels
+ filter : convolve kernel width: convolve kernel size
+ channel : define which channel is blured
+ is_RGBA_BGRA : define the input is RGBA or BGRA
+ */
+ __kernel void BlurSectionRow(__global CLPixelType *im, __global float4 *filtered_im,
+ const ChannelType channel, __constant float *filter,
+ const unsigned int width,
+ const unsigned int imageColumns, const unsigned int imageRows,
+ __local CLPixelType *temp,
+ const unsigned int offsetRows, const unsigned int section)
+ {
+ const int x = get_global_id(0);
+ const int y = get_global_id(1);
- //group coordinate
- const int groupX=get_local_size(0)*get_group_id(0);
- const int groupY=get_local_size(1)*get_group_id(1);
+ const int columns = imageColumns;
- //offset the input data, assuming section is 0, 1
- im += imageColumns * (offsetRows - radius * section);
+ const unsigned int radius = (width-1)/2;
+ const int wsize = get_local_size(0);
+ const unsigned int loadSize = wsize+width;
- //parallel load and clamp
- for (int i=get_local_id(0); i < loadSize; i=i+get_local_size(0))
- {
- //int cx = ClampToCanvas(groupX+i, columns);
- temp[i] = im[y * columns + ClampToCanvas(i+groupX-radius, columns)];
+ //group coordinate
+ const int groupX=get_local_size(0)*get_group_id(0);
+ const int groupY=get_local_size(1)*get_group_id(1);
- /*if (0 && y==0 && get_group_id(1) == 0)
- {
- printf("(%d %d) temp %d load %d groupX %d\n", x, y, i, ClampToCanvas(groupX+i, columns), groupX);
- }*/
- }
+ //offset the input data, assuming section is 0, 1
+ im += imageColumns * (offsetRows - radius * section);
- // barrier
- barrier(CLK_LOCAL_MEM_FENCE);
+ //parallel load and clamp
+ for (int i=get_local_id(0); i < loadSize; i=i+get_local_size(0))
+ {
+ //int cx = ClampToCanvas(groupX+i, columns);
+ temp[i] = im[y * columns + ClampToCanvas(i+groupX-radius, columns)];
- // only do the work if this is not a patched item
- if (get_global_id(0) < columns)
+ /*if (0 && y==0 && get_group_id(1) == 0)
{
- // compute
- float4 result = (float4) 0;
+ printf("(%d %d) temp %d load %d groupX %d\n", x, y, i, ClampToCanvas(groupX+i, columns), groupX);
+ }*/
+ }
- int i = 0;
-
- \n #ifndef UFACTOR \n
- \n #define UFACTOR 8 \n
- \n #endif \n
+ // barrier
+ barrier(CLK_LOCAL_MEM_FENCE);
- for ( ; i+UFACTOR < width; )
- {
- \n #pragma unroll UFACTOR\n
- for (int j=0; j < UFACTOR; j++, i++)
- {
- result+=filter[i]*convert_float4(temp[i+get_local_id(0)]);
- }
- }
+ // only do the work if this is not a patched item
+ if (get_global_id(0) < columns)
+ {
+ // compute
+ float4 result = (float4) 0;
- for ( ; i < width; i++)
+ int i = 0;
+
+ \n #ifndef UFACTOR \n
+ \n #define UFACTOR 8 \n
+ \n #endif \n
+
+ for ( ; i+UFACTOR < width; )
+ {
+ \n #pragma unroll UFACTOR\n
+ for (int j=0; j < UFACTOR; j++, i++)
{
result+=filter[i]*convert_float4(temp[i+get_local_id(0)]);
}
+ }
- result.x = ClampToQuantum(result.x);
- result.y = ClampToQuantum(result.y);
- result.z = ClampToQuantum(result.z);
- result.w = ClampToQuantum(result.w);
-
- // write back to global
- filtered_im[y*columns+x] = result;
+ for ( ; i < width; i++)
+ {
+ result+=filter[i]*convert_float4(temp[i+get_local_id(0)]);
}
+ result.x = ClampToQuantum(result.x);
+ result.y = ClampToQuantum(result.y);
+ result.z = ClampToQuantum(result.z);
+ result.w = ClampToQuantum(result.w);
+
+ // write back to global
+ filtered_im[y*columns+x] = result;
}
- )
- STRINGIFY(
- /*
- Reduce image noise and reduce detail levels by line
- im: input pixels filtered_in filtered_im: output pixels
- filter : convolve kernel width: convolve kernel size
- channel : define which channel is blured\
- is_RGBA_BGRA : define the input is RGBA or BGRA
- */
- __kernel void BlurColumn(const __global float4 *blurRowData, __global CLPixelType *filtered_im,
- const ChannelType channel, __constant float *filter,
- const unsigned int width,
- const unsigned int imageColumns, const unsigned int imageRows,
- __local float4 *temp)
- {
- const int x = get_global_id(0);
- const int y = get_global_id(1);
+ }
+ )
- //const int columns = get_global_size(0);
- //const int rows = get_global_size(1);
- const int columns = imageColumns;
- const int rows = imageRows;
+ STRINGIFY(
+ /*
+ Reduce image noise and reduce detail levels by line
+ im: input pixels filtered_in filtered_im: output pixels
+ filter : convolve kernel width: convolve kernel size
+ channel : define which channel is blured\
+ is_RGBA_BGRA : define the input is RGBA or BGRA
+ */
+ __kernel void BlurColumn(const __global float4 *blurRowData, __global CLPixelType *filtered_im,
+ const ChannelType channel, __constant float *filter,
+ const unsigned int width,
+ const unsigned int imageColumns, const unsigned int imageRows,
+ __local float4 *temp)
+ {
+ const int x = get_global_id(0);
+ const int y = get_global_id(1);
+
+ //const int columns = get_global_size(0);
+ //const int rows = get_global_size(1);
+ const int columns = imageColumns;
+ const int rows = imageRows;
+
+ unsigned int radius = (width-1)/2;
+ const int wsize = get_local_size(1);
+ const unsigned int loadSize = wsize+width;
+
+ //group coordinate
+ const int groupX=get_local_size(0)*get_group_id(0);
+ const int groupY=get_local_size(1)*get_group_id(1);
+ //notice that get_local_size(0) is 1, so
+ //groupX=get_group_id(0);
+
+ //parallel load and clamp
+ for (int i = get_local_id(1); i < loadSize; i=i+get_local_size(1))
+ {
+ temp[i] = blurRowData[ClampToCanvas(i+groupY-radius, rows) * columns + groupX];
+ }
+
+ // barrier
+ barrier(CLK_LOCAL_MEM_FENCE);
- unsigned int radius = (width-1)/2;
- const int wsize = get_local_size(1);
- const unsigned int loadSize = wsize+width;
+ // only do the work if this is not a patched item
+ if (get_global_id(1) < rows)
+ {
+ // compute
+ float4 result = (float4) 0;
- //group coordinate
- const int groupX=get_local_size(0)*get_group_id(0);
- const int groupY=get_local_size(1)*get_group_id(1);
- //notice that get_local_size(0) is 1, so
- //groupX=get_group_id(0);
+ int i = 0;
- //parallel load and clamp
- for (int i = get_local_id(1); i < loadSize; i=i+get_local_size(1))
- {
- temp[i] = blurRowData[ClampToCanvas(i+groupY-radius, rows) * columns + groupX];
- }
+ \n #ifndef UFACTOR \n
+ \n #define UFACTOR 8 \n
+ \n #endif \n
- // barrier
- barrier(CLK_LOCAL_MEM_FENCE);
-
- // only do the work if this is not a patched item
- if (get_global_id(1) < rows)
+ for ( ; i+UFACTOR < width; )
{
- // compute
- float4 result = (float4) 0;
-
- int i = 0;
-
- \n #ifndef UFACTOR \n
- \n #define UFACTOR 8 \n
- \n #endif \n
-
- for ( ; i+UFACTOR < width; )
- {
- \n #pragma unroll UFACTOR \n
- for (int j=0; j < UFACTOR; j++, i++)
- {
- result+=filter[i]*temp[i+get_local_id(1)];
- }
- }
-
- for ( ; i < width; i++)
+ \n #pragma unroll UFACTOR \n
+ for (int j=0; j < UFACTOR; j++, i++)
{
result+=filter[i]*temp[i+get_local_id(1)];
}
+ }
- result.x = ClampToQuantum(result.x);
- result.y = ClampToQuantum(result.y);
- result.z = ClampToQuantum(result.z);
- result.w = ClampToQuantum(result.w);
-
- // write back to global
- filtered_im[y*columns+x] = (CLPixelType) (result.x,result.y,result.z,result.w);
+ for ( ; i < width; i++)
+ {
+ result+=filter[i]*temp[i+get_local_id(1)];
}
- }
- )
+ result.x = ClampToQuantum(result.x);
+ result.y = ClampToQuantum(result.y);
+ result.z = ClampToQuantum(result.z);
+ result.w = ClampToQuantum(result.w);
- STRINGIFY(
- /*
- Reduce image noise and reduce detail levels by row
- im: input pixels filtered_in filtered_im: output pixels
- filter : convolve kernel width: convolve kernel size
- channel : define which channel is blured
- is_RGBA_BGRA : define the input is RGBA or BGRA
- */
- __kernel void BlurRow(__global CLPixelType *im, __global float4 *filtered_im,
- const ChannelType channel, __constant float *filter,
- const unsigned int width,
- const unsigned int imageColumns, const unsigned int imageRows,
- __local CLPixelType *temp)
- {
- const int x = get_global_id(0);
- const int y = get_global_id(1);
+ // write back to global
+ filtered_im[y*columns+x] = (CLPixelType) (result.x,result.y,result.z,result.w);
+ }
- const int columns = imageColumns;
+ }
+ )
- const unsigned int radius = (width-1)/2;
- const int wsize = get_local_size(0);
- const unsigned int loadSize = wsize+width;
+ STRINGIFY(
+ /*
+ Reduce image noise and reduce detail levels by row
+ im: input pixels filtered_in filtered_im: output pixels
+ filter : convolve kernel width: convolve kernel size
+ channel : define which channel is blured
+ is_RGBA_BGRA : define the input is RGBA or BGRA
+ */
+ __kernel void BlurRow(__global CLPixelType *im, __global float4 *filtered_im,
+ const ChannelType channel, __constant float *filter,
+ const unsigned int width,
+ const unsigned int imageColumns, const unsigned int imageRows,
+ __local CLPixelType *temp)
+ {
+ const int x = get_global_id(0);
+ const int y = get_global_id(1);
- //load chunk only for now
- //event_t e = async_work_group_copy(temp+radius, im+x+y*columns, wsize, 0);
- //wait_group_events(1,&e);
+ const int columns = imageColumns;
- //parallel load and clamp
- /*
- int count = 0;
- for (int i=0; i < loadSize; i=i+wsize)
- {
- int currentX = x + wsize*(count++);
+ const unsigned int radius = (width-1)/2;
+ const int wsize = get_local_size(0);
+ const unsigned int loadSize = wsize+width;
- int localId = get_local_id(0);
+ //load chunk only for now
+ //event_t e = async_work_group_copy(temp+radius, im+x+y*columns, wsize, 0);
+ //wait_group_events(1,&e);
- if ((localId+i) > loadSize)
- break;
+ //parallel load and clamp
+ /*
+ int count = 0;
+ for (int i=0; i < loadSize; i=i+wsize)
+ {
+ int currentX = x + wsize*(count++);
- temp[localId+i] = im[y*columns+ClampToCanvas(currentX-radius, columns)];
+ int localId = get_local_id(0);
- if (y==0 && get_group_id(0) == 0)
- {
- printf("(%d %d) temp %d load %d currentX %d\n", x, y, localId+i, ClampToCanvas(currentX-radius, columns), currentX);
- }
- }
- */
+ if ((localId+i) > loadSize)
+ break;
- //group coordinate
- const int groupX=get_local_size(0)*get_group_id(0);
- const int groupY=get_local_size(1)*get_group_id(1);
+ temp[localId+i] = im[y*columns+ClampToCanvas(currentX-radius, columns)];
- //parallel load and clamp
- for (int i=get_local_id(0); i < loadSize; i=i+get_local_size(0))
+ if (y==0 && get_group_id(0) == 0)
{
- //int cx = ClampToCanvas(groupX+i, columns);
- temp[i] = im[y * columns + ClampToCanvas(i+groupX-radius, columns)];
-
- /*if (0 && y==0 && get_group_id(1) == 0)
- {
- printf("(%d %d) temp %d load %d groupX %d\n", x, y, i, ClampToCanvas(groupX+i, columns), groupX);
- }*/
+ printf("(%d %d) temp %d load %d currentX %d\n", x, y, localId+i, ClampToCanvas(currentX-radius, columns), currentX);
}
+ }
+ */
- // barrier
- barrier(CLK_LOCAL_MEM_FENCE);
+ //group coordinate
+ const int groupX=get_local_size(0)*get_group_id(0);
+ const int groupY=get_local_size(1)*get_group_id(1);
+
+ //parallel load and clamp
+ for (int i=get_local_id(0); i < loadSize; i=i+get_local_size(0))
+ {
+ //int cx = ClampToCanvas(groupX+i, columns);
+ temp[i] = im[y * columns + ClampToCanvas(i+groupX-radius, columns)];
- // only do the work if this is not a patched item
- if (get_global_id(0) < columns)
+ /*if (0 && y==0 && get_group_id(1) == 0)
{
- // compute
- float4 result = (float4) 0;
+ printf("(%d %d) temp %d load %d groupX %d\n", x, y, i, ClampToCanvas(groupX+i, columns), groupX);
+ }*/
+ }
- int i = 0;
-
- \n #ifndef UFACTOR \n
- \n #define UFACTOR 8 \n
- \n #endif \n
+ // barrier
+ barrier(CLK_LOCAL_MEM_FENCE);
- for ( ; i+UFACTOR < width; )
- {
- \n #pragma unroll UFACTOR\n
- for (int j=0; j < UFACTOR; j++, i++)
- {
- result+=filter[i]*convert_float4(temp[i+get_local_id(0)]);
- }
- }
+ // only do the work if this is not a patched item
+ if (get_global_id(0) < columns)
+ {
+ // compute
+ float4 result = (float4) 0;
+
+ int i = 0;
+
+ \n #ifndef UFACTOR \n
+ \n #define UFACTOR 8 \n
+ \n #endif \n
- for ( ; i < width; i++)
+ for ( ; i+UFACTOR < width; )
+ {
+ \n #pragma unroll UFACTOR\n
+ for (int j=0; j < UFACTOR; j++, i++)
{
result+=filter[i]*convert_float4(temp[i+get_local_id(0)]);
}
+ }
- result.x = ClampToQuantum(result.x);
- result.y = ClampToQuantum(result.y);
- result.z = ClampToQuantum(result.z);
- result.w = ClampToQuantum(result.w);
-
- // write back to global
- filtered_im[y*columns+x] = result;
+ for ( ; i < width; i++)
+ {
+ result+=filter[i]*convert_float4(temp[i+get_local_id(0)]);
}
+
+ result.x = ClampToQuantum(result.x);
+ result.y = ClampToQuantum(result.y);
+ result.z = ClampToQuantum(result.z);
+ result.w = ClampToQuantum(result.w);
+
+ // write back to global
+ filtered_im[y*columns+x] = result;
}
- )
+ }
+ )
/*
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%