2 Copyright 1999-2014 ImageMagick Studio LLC, a non-profit organization
3 dedicated to making software imaging solutions freely available.
5 You may not use this file except in compliance with the License.
6 obtain a copy of the License at
8 http://www.imagemagick.org/script/license.php
10 Unless required by applicable law or agreed to in writing, software
11 distributed under the License is distributed on an "AS IS" BASIS,
12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 See the License for the specific language governing permissions and
14 limitations under the License.
16 MagickCore private methods for accelerated functions.
19 #ifndef _MAGICKCORE_ACCELERATE_PRIVATE_H
20 #define _MAGICKCORE_ACCELERATE_PRIVATE_H
22 #if defined(__cplusplus) || defined(c_plusplus)
27 #if defined(MAGICKCORE_OPENCL_SUPPORT)
29 #define OPENCL_DEFINE(VAR,...) "\n #""define " #VAR " " #__VA_ARGS__ " \n"
30 #define OPENCL_ELIF(...) "\n #""elif " #__VA_ARGS__ " \n"
31 #define OPENCL_ELSE() "\n #""else " " \n"
32 #define OPENCL_ENDIF() "\n #""endif " " \n"
33 #define OPENCL_IF(...) "\n #""if " #__VA_ARGS__ " \n"
34 #define STRINGIFY(...) #__VA_ARGS__ "\n"
36 typedef struct _FloatPixelPacket
38 #ifdef MAGICK_PIXEL_RGBA
45 #ifdef MAGICK_PIXEL_BGRA
54 const char* accelerateKernels =
62 GreenChannel = 0x0002,
63 MagentaChannel = 0x0002,
65 YellowChannel = 0x0004,
66 AlphaChannel = 0x0008,
67 OpacityChannel = 0x0008,
68 MatteChannel = 0x0008, /* deprecated */
69 BlackChannel = 0x0020,
70 IndexChannel = 0x0020,
71 CompositeChannels = 0x002F,
72 AllChannels = 0x7ffffff,
74 Special purpose channel types.
76 TrueAlphaChannel = 0x0040, /* extract actual alpha channel from opacity */
77 RGBChannels = 0x0080, /* set alpha from grayscale mask in RGB */
78 GrayChannels = 0x0080,
79 SyncChannels = 0x0100, /* channels should be modified equally */
80 DefaultChannels = ((AllChannels | SyncChannels) &~ OpacityChannel)
84 OPENCL_IF((MAGICKCORE_QUANTUM_DEPTH == 8))
87 inline CLQuantum ScaleCharToQuantum(const unsigned char value)
89 return((CLQuantum) value);
93 OPENCL_ELIF((MAGICKCORE_QUANTUM_DEPTH == 16))
96 inline CLQuantum ScaleCharToQuantum(const unsigned char value)
98 return((CLQuantum) (257.0f*value));
102 OPENCL_ELIF((MAGICKCORE_QUANTUM_DEPTH == 32))
105 inline CLQuantum ScaleCharToQuantum(const unsigned char value)
107 return((Quantum) (16843009.0*value));
115 inline int ClampToCanvas(const int offset,const int range)
117 return clamp(offset, (int)0, range-1);
122 inline int ClampToCanvasWithHalo(const int offset,const int range, const int edge, const int section)
124 return clamp(offset, section?(int)(0-edge):(int)0, section?(range-1):(range-1+edge));
129 inline CLQuantum ClampToQuantum(const float value)
131 return (CLQuantum) (clamp(value, 0.0f, (float) QuantumRange) + 0.5f);
136 inline uint ScaleQuantumToMap(CLQuantum value)
138 if (value >= (CLQuantum) MaxMap)
139 return ((uint)MaxMap);
141 return ((uint)value);
146 inline float PerceptibleReciprocal(const float x)
148 float sign = x < (float) 0.0 ? (float) -1.0 : (float) 1.0;
149 return((sign*x) >= MagickEpsilon ? (float) 1.0/x : sign*((float) 1.0/MagickEpsilon));
153 OPENCL_DEFINE(GetPixelAlpha(pixel),(QuantumRange-(pixel).w))
158 UndefinedPixelIntensityMethod = 0,
159 AveragePixelIntensityMethod,
160 BrightnessPixelIntensityMethod,
161 LightnessPixelIntensityMethod,
162 Rec601LumaPixelIntensityMethod,
163 Rec601LuminancePixelIntensityMethod,
164 Rec709LumaPixelIntensityMethod,
165 Rec709LuminancePixelIntensityMethod,
166 RMSPixelIntensityMethod,
167 MSPixelIntensityMethod
168 } PixelIntensityMethod;
175 RGBColorspace, /* Linear RGB colorspace */
176 GRAYColorspace, /* greyscale (linear) image (faked 1 channel) */
177 TransparentColorspace,
186 CMYKColorspace, /* negared linear RGB with black separated */
187 sRGBColorspace, /* Default: non-lienar sRGB colorspace */
191 Rec601LumaColorspace,
192 Rec601YCbCrColorspace,
193 Rec709LumaColorspace,
194 Rec709YCbCrColorspace,
196 CMYColorspace, /* negated linear RGB colorspace */
199 LCHColorspace, /* alias for LCHuv */
201 LCHabColorspace, /* Cylindrical (Polar) Lab */
202 LCHuvColorspace, /* Cylindrical (Polar) Luv */
205 HSVColorspace, /* alias for HSB */
212 inline float RoundToUnity(const float value)
214 return clamp(value,0.0f,1.0f);
220 inline CLQuantum getBlue(CLPixelType p) { return p.x; }
221 inline void setBlue(CLPixelType* p, CLQuantum value) { (*p).x = value; }
222 inline float getBlueF4(float4 p) { return p.x; }
223 inline void setBlueF4(float4* p, float value) { (*p).x = value; }
225 inline CLQuantum getGreen(CLPixelType p) { return p.y; }
226 inline void setGreen(CLPixelType* p, CLQuantum value) { (*p).y = value; }
227 inline float getGreenF4(float4 p) { return p.y; }
228 inline void setGreenF4(float4* p, float value) { (*p).y = value; }
230 inline CLQuantum getRed(CLPixelType p) { return p.z; }
231 inline void setRed(CLPixelType* p, CLQuantum value) { (*p).z = value; }
232 inline float getRedF4(float4 p) { return p.z; }
233 inline void setRedF4(float4* p, float value) { (*p).z = value; }
235 inline CLQuantum getOpacity(CLPixelType p) { return p.w; }
236 inline void setOpacity(CLPixelType* p, CLQuantum value) { (*p).w = value; }
237 inline float getOpacityF4(float4 p) { return p.w; }
238 inline void setOpacityF4(float4* p, float value) { (*p).w = value; }
240 inline void setGray(CLPixelType* p, CLQuantum value) { (*p).z = value; (*p).y = value; (*p).x = value; }
242 inline float GetPixelIntensity(const int method, const int colorspace, CLPixelType p)
244 float red = getRed(p);
245 float green = getGreen(p);
246 float blue = getBlue(p);
250 if (colorspace == GRAYColorspace)
255 case AveragePixelIntensityMethod:
257 intensity=(red+green+blue)/3.0;
260 case BrightnessPixelIntensityMethod:
262 intensity=max(max(red,green),blue);
265 case LightnessPixelIntensityMethod:
267 intensity=(min(min(red,green),blue)+
268 max(max(red,green),blue))/2.0;
271 case MSPixelIntensityMethod:
273 intensity=(float) (((float) red*red+green*green+blue*blue)/
277 case Rec601LumaPixelIntensityMethod:
280 if (image->colorspace == RGBColorspace)
282 red=EncodePixelGamma(red);
283 green=EncodePixelGamma(green);
284 blue=EncodePixelGamma(blue);
287 intensity=0.298839*red+0.586811*green+0.114350*blue;
290 case Rec601LuminancePixelIntensityMethod:
293 if (image->colorspace == sRGBColorspace)
295 red=DecodePixelGamma(red);
296 green=DecodePixelGamma(green);
297 blue=DecodePixelGamma(blue);
300 intensity=0.298839*red+0.586811*green+0.114350*blue;
303 case Rec709LumaPixelIntensityMethod:
307 if (image->colorspace == RGBColorspace)
309 red=EncodePixelGamma(red);
310 green=EncodePixelGamma(green);
311 blue=EncodePixelGamma(blue);
314 intensity=0.212656*red+0.715158*green+0.072186*blue;
317 case Rec709LuminancePixelIntensityMethod:
320 if (image->colorspace == sRGBColorspace)
322 red=DecodePixelGamma(red);
323 green=DecodePixelGamma(green);
324 blue=DecodePixelGamma(blue);
327 intensity=0.212656*red+0.715158*green+0.072186*blue;
330 case RMSPixelIntensityMethod:
332 intensity=(float) (sqrt((float) red*red+green*green+blue*blue)/
345 void ConvolveOptimized(const __global CLPixelType *input, __global CLPixelType *output,
346 const unsigned int imageWidth, const unsigned int imageHeight,
347 __constant float *filter, const unsigned int filterWidth, const unsigned int filterHeight,
348 const uint matte, const ChannelType channel, __local CLPixelType *pixelLocalCache, __local float* filterCache) {
351 blockID.x = get_group_id(0);
352 blockID.y = get_group_id(1);
354 // image area processed by this workgroup
356 imageAreaOrg.x = blockID.x * get_local_size(0);
357 imageAreaOrg.y = blockID.y * get_local_size(1);
360 midFilterDimen.x = (filterWidth-1)/2;
361 midFilterDimen.y = (filterHeight-1)/2;
363 int2 cachedAreaOrg = imageAreaOrg - midFilterDimen;
365 // dimension of the local cache
366 int2 cachedAreaDimen;
367 cachedAreaDimen.x = get_local_size(0) + filterWidth - 1;
368 cachedAreaDimen.y = get_local_size(1) + filterHeight - 1;
370 // cache the pixels accessed by this workgroup in local memory
371 int localID = get_local_id(1)*get_local_size(0)+get_local_id(0);
372 int cachedAreaNumPixels = cachedAreaDimen.x * cachedAreaDimen.y;
373 int groupSize = get_local_size(0) * get_local_size(1);
374 for (int i = localID; i < cachedAreaNumPixels; i+=groupSize) {
376 int2 cachedAreaIndex;
377 cachedAreaIndex.x = i % cachedAreaDimen.x;
378 cachedAreaIndex.y = i / cachedAreaDimen.x;
380 int2 imagePixelIndex;
381 imagePixelIndex = cachedAreaOrg + cachedAreaIndex;
383 // only support EdgeVirtualPixelMethod through ClampToCanvas
384 // TODO: implement other virtual pixel method
385 imagePixelIndex.x = ClampToCanvas(imagePixelIndex.x, imageWidth);
386 imagePixelIndex.y = ClampToCanvas(imagePixelIndex.y, imageHeight);
388 pixelLocalCache[i] = input[imagePixelIndex.y * imageWidth + imagePixelIndex.x];
392 for (int i = localID; i < filterHeight*filterWidth; i+=groupSize) {
393 filterCache[i] = filter[i];
395 barrier(CLK_LOCAL_MEM_FENCE);
399 imageIndex.x = imageAreaOrg.x + get_local_id(0);
400 imageIndex.y = imageAreaOrg.y + get_local_id(1);
402 // if out-of-range, stops here and quit
403 if (imageIndex.x >= imageWidth
404 || imageIndex.y >= imageHeight) {
409 float4 sum = (float4)0.0f;
411 if (((channel & OpacityChannel) == 0) || (matte == 0)) {
412 int cacheIndexY = get_local_id(1);
413 for (int j = 0; j < filterHeight; j++) {
414 int cacheIndexX = get_local_id(0);
415 for (int i = 0; i < filterWidth; i++) {
416 CLPixelType p = pixelLocalCache[cacheIndexY*cachedAreaDimen.x + cacheIndexX];
417 float f = filterCache[filterIndex];
432 int cacheIndexY = get_local_id(1);
433 for (int j = 0; j < filterHeight; j++) {
434 int cacheIndexX = get_local_id(0);
435 for (int i = 0; i < filterWidth; i++) {
437 CLPixelType p = pixelLocalCache[cacheIndexY*cachedAreaDimen.x + cacheIndexX];
438 float alpha = QuantumScale*(QuantumRange-p.w);
439 float f = filterCache[filterIndex];
453 gamma = PerceptibleReciprocal(gamma);
454 sum.xyz = gamma*sum.xyz;
456 CLPixelType outputPixel;
457 outputPixel.x = ClampToQuantum(sum.x);
458 outputPixel.y = ClampToQuantum(sum.y);
459 outputPixel.z = ClampToQuantum(sum.z);
460 outputPixel.w = ((channel & OpacityChannel)!=0)?ClampToQuantum(sum.w):input[imageIndex.y * imageWidth + imageIndex.x].w;
462 output[imageIndex.y * imageWidth + imageIndex.x] = outputPixel;
468 void Convolve(const __global CLPixelType *input, __global CLPixelType *output,
469 const uint imageWidth, const uint imageHeight,
470 __constant float *filter, const unsigned int filterWidth, const unsigned int filterHeight,
471 const uint matte, const ChannelType channel) {
474 imageIndex.x = get_global_id(0);
475 imageIndex.y = get_global_id(1);
478 unsigned int imageWidth = get_global_size(0);
479 unsigned int imageHeight = get_global_size(1);
481 if (imageIndex.x >= imageWidth
482 || imageIndex.y >= imageHeight)
486 midFilterDimen.x = (filterWidth-1)/2;
487 midFilterDimen.y = (filterHeight-1)/2;
490 float4 sum = (float4)0.0f;
492 if (((channel & OpacityChannel) == 0) || (matte == 0)) {
493 for (int j = 0; j < filterHeight; j++) {
494 int2 inputPixelIndex;
495 inputPixelIndex.y = imageIndex.y - midFilterDimen.y + j;
496 inputPixelIndex.y = ClampToCanvas(inputPixelIndex.y, imageHeight);
497 for (int i = 0; i < filterWidth; i++) {
498 inputPixelIndex.x = imageIndex.x - midFilterDimen.x + i;
499 inputPixelIndex.x = ClampToCanvas(inputPixelIndex.x, imageWidth);
501 CLPixelType p = input[inputPixelIndex.y * imageWidth + inputPixelIndex.x];
502 float f = filter[filterIndex];
517 for (int j = 0; j < filterHeight; j++) {
518 int2 inputPixelIndex;
519 inputPixelIndex.y = imageIndex.y - midFilterDimen.y + j;
520 inputPixelIndex.y = ClampToCanvas(inputPixelIndex.y, imageHeight);
521 for (int i = 0; i < filterWidth; i++) {
522 inputPixelIndex.x = imageIndex.x - midFilterDimen.x + i;
523 inputPixelIndex.x = ClampToCanvas(inputPixelIndex.x, imageWidth);
525 CLPixelType p = input[inputPixelIndex.y * imageWidth + inputPixelIndex.x];
526 float alpha = QuantumScale*(QuantumRange-p.w);
527 float f = filter[filterIndex];
541 gamma = PerceptibleReciprocal(gamma);
542 sum.xyz = gamma*sum.xyz;
545 CLPixelType outputPixel;
546 outputPixel.x = ClampToQuantum(sum.x);
547 outputPixel.y = ClampToQuantum(sum.y);
548 outputPixel.z = ClampToQuantum(sum.z);
549 outputPixel.w = ((channel & OpacityChannel)!=0)?ClampToQuantum(sum.w):input[imageIndex.y * imageWidth + imageIndex.x].w;
551 output[imageIndex.y * imageWidth + imageIndex.x] = outputPixel;
569 apply FunctionImageChannel(braightness-contrast)
571 CLPixelType ApplyFunction(CLPixelType pixel,const MagickFunction function,
572 const unsigned int number_parameters,
573 __constant float *parameters)
575 float4 result = (float4) 0.0f;
578 case PolynomialFunction:
580 for (unsigned int i=0; i < number_parameters; i++)
581 result = result*(float4)QuantumScale*convert_float4(pixel) + parameters[i];
582 result *= (float4)QuantumRange;
585 case SinusoidFunction:
587 float freq,phase,ampl,bias;
588 freq = ( number_parameters >= 1 ) ? parameters[0] : 1.0f;
589 phase = ( number_parameters >= 2 ) ? parameters[1] : 0.0f;
590 ampl = ( number_parameters >= 3 ) ? parameters[2] : 0.5f;
591 bias = ( number_parameters >= 4 ) ? parameters[3] : 0.5f;
592 result.x = QuantumRange*(ampl*sin(2.0f*MagickPI*
593 (freq*QuantumScale*(float)pixel.x + phase/360.0f)) + bias);
594 result.y = QuantumRange*(ampl*sin(2.0f*MagickPI*
595 (freq*QuantumScale*(float)pixel.y + phase/360.0f)) + bias);
596 result.z = QuantumRange*(ampl*sin(2.0f*MagickPI*
597 (freq*QuantumScale*(float)pixel.z + phase/360.0f)) + bias);
598 result.w = QuantumRange*(ampl*sin(2.0f*MagickPI*
599 (freq*QuantumScale*(float)pixel.w + phase/360.0f)) + bias);
604 float width,range,center,bias;
605 width = ( number_parameters >= 1 ) ? parameters[0] : 1.0f;
606 center = ( number_parameters >= 2 ) ? parameters[1] : 0.5f;
607 range = ( number_parameters >= 3 ) ? parameters[2] : 1.0f;
608 bias = ( number_parameters >= 4 ) ? parameters[3] : 0.5f;
610 result.x = 2.0f/width*(QuantumScale*(float)pixel.x - center);
611 result.x = range/MagickPI*asin(result.x)+bias;
612 result.x = ( result.x <= -1.0f ) ? bias - range/2.0f : result.x;
613 result.x = ( result.x >= 1.0f ) ? bias + range/2.0f : result.x;
615 result.y = 2.0f/width*(QuantumScale*(float)pixel.y - center);
616 result.y = range/MagickPI*asin(result.y)+bias;
617 result.y = ( result.y <= -1.0f ) ? bias - range/2.0f : result.y;
618 result.y = ( result.y >= 1.0f ) ? bias + range/2.0f : result.y;
620 result.z = 2.0f/width*(QuantumScale*(float)pixel.z - center);
621 result.z = range/MagickPI*asin(result.z)+bias;
622 result.z = ( result.z <= -1.0f ) ? bias - range/2.0f : result.x;
623 result.z = ( result.z >= 1.0f ) ? bias + range/2.0f : result.x;
626 result.w = 2.0f/width*(QuantumScale*(float)pixel.w - center);
627 result.w = range/MagickPI*asin(result.w)+bias;
628 result.w = ( result.w <= -1.0f ) ? bias - range/2.0f : result.w;
629 result.w = ( result.w >= 1.0f ) ? bias + range/2.0f : result.w;
631 result *= (float4)QuantumRange;
636 float slope,range,center,bias;
637 slope = ( number_parameters >= 1 ) ? parameters[0] : 1.0f;
638 center = ( number_parameters >= 2 ) ? parameters[1] : 0.5f;
639 range = ( number_parameters >= 3 ) ? parameters[2] : 1.0f;
640 bias = ( number_parameters >= 4 ) ? parameters[3] : 0.5f;
641 result = (float4)MagickPI*(float4)slope*((float4)QuantumScale*convert_float4(pixel)-(float4)center);
642 result = (float4)QuantumRange*((float4)range/(float4)MagickPI*atan(result) + (float4)bias);
645 case UndefinedFunction:
648 return (CLPixelType) (ClampToQuantum(result.x), ClampToQuantum(result.y),
649 ClampToQuantum(result.z), ClampToQuantum(result.w));
655 Improve brightness / contrast of the image
656 channel : define which channel is improved
657 function : the function called to enchance the brightness contrast
658 number_parameters : numbers of parameters
659 parameters : the parameter
661 __kernel void FunctionImage(__global CLPixelType *im,
662 const ChannelType channel, const MagickFunction function,
663 const unsigned int number_parameters, __constant float *parameters)
665 const int x = get_global_id(0);
666 const int y = get_global_id(1);
667 const int columns = get_global_size(0);
668 const int c = x + y * columns;
669 im[c] = ApplyFunction(im[c], function, number_parameters, parameters);
676 __kernel void Stretch(__global CLPixelType * restrict im,
677 const ChannelType channel,
678 __global CLPixelType * restrict stretch_map,
679 const float4 white, const float4 black)
681 const int x = get_global_id(0);
682 const int y = get_global_id(1);
683 const int columns = get_global_size(0);
684 const int c = x + y * columns;
687 CLPixelType oValue, eValue;
688 CLQuantum red, green, blue, opacity;
693 if ((channel & RedChannel) != 0)
695 if (getRedF4(white) != getRedF4(black))
697 ePos = ScaleQuantumToMap(getRed(oValue));
698 eValue = stretch_map[ePos];
699 red = getRed(eValue);
703 if ((channel & GreenChannel) != 0)
705 if (getGreenF4(white) != getGreenF4(black))
707 ePos = ScaleQuantumToMap(getGreen(oValue));
708 eValue = stretch_map[ePos];
709 green = getGreen(eValue);
713 if ((channel & BlueChannel) != 0)
715 if (getBlueF4(white) != getBlueF4(black))
717 ePos = ScaleQuantumToMap(getBlue(oValue));
718 eValue = stretch_map[ePos];
719 blue = getBlue(eValue);
723 if ((channel & OpacityChannel) != 0)
725 if (getOpacityF4(white) != getOpacityF4(black))
727 ePos = ScaleQuantumToMap(getOpacity(oValue));
728 eValue = stretch_map[ePos];
729 opacity = getOpacity(eValue);
734 im[c]=(CLPixelType)(blue, green, red, opacity);
743 __kernel void Equalize(__global CLPixelType * restrict im,
744 const ChannelType channel,
745 __global CLPixelType * restrict equalize_map,
746 const float4 white, const float4 black)
748 const int x = get_global_id(0);
749 const int y = get_global_id(1);
750 const int columns = get_global_size(0);
751 const int c = x + y * columns;
754 CLPixelType oValue, eValue;
755 CLQuantum red, green, blue, opacity;
760 if ((channel & SyncChannels) != 0)
762 if (getRedF4(white) != getRedF4(black))
764 ePos = ScaleQuantumToMap(getRed(oValue));
765 eValue = equalize_map[ePos];
766 red = getRed(eValue);
767 ePos = ScaleQuantumToMap(getGreen(oValue));
768 eValue = equalize_map[ePos];
769 green = getRed(eValue);
770 ePos = ScaleQuantumToMap(getBlue(oValue));
771 eValue = equalize_map[ePos];
772 blue = getRed(eValue);
773 ePos = ScaleQuantumToMap(getOpacity(oValue));
774 eValue = equalize_map[ePos];
775 opacity = getRed(eValue);
778 im[c]=(CLPixelType)(blue, green, red, opacity);
783 // for equalizing, we always need all channels?
784 // otherwise something more
792 __kernel void Histogram(__global CLPixelType * restrict im,
793 const ChannelType channel,
795 const int colorspace,
796 __global uint4 * restrict histogram)
798 const int x = get_global_id(0);
799 const int y = get_global_id(1);
800 const int columns = get_global_size(0);
801 const int c = x + y * columns;
802 if ((channel & SyncChannels) != 0)
804 float intensity = GetPixelIntensity(method, colorspace,im[c]);
805 uint pos = ScaleQuantumToMap(ClampToQuantum(intensity));
806 atomic_inc((__global uint *)(&(histogram[pos]))+2); //red position
810 // for equalizing, we always need all channels?
811 // otherwise something more
818 Reduce image noise and reduce detail levels by row
819 im: input pixels filtered_in filtered_im: output pixels
820 filter : convolve kernel width: convolve kernel size
821 channel : define which channel is blured
822 is_RGBA_BGRA : define the input is RGBA or BGRA
824 __kernel void BlurRow(__global CLPixelType *im, __global float4 *filtered_im,
825 const ChannelType channel, __constant float *filter,
826 const unsigned int width,
827 const unsigned int imageColumns, const unsigned int imageRows,
828 __local CLPixelType *temp)
830 const int x = get_global_id(0);
831 const int y = get_global_id(1);
833 const int columns = imageColumns;
835 const unsigned int radius = (width-1)/2;
836 const int wsize = get_local_size(0);
837 const unsigned int loadSize = wsize+width;
839 //load chunk only for now
840 //event_t e = async_work_group_copy(temp+radius, im+x+y*columns, wsize, 0);
841 //wait_group_events(1,&e);
843 //parallel load and clamp
846 for (int i=0; i < loadSize; i=i+wsize)
848 int currentX = x + wsize*(count++);
850 int localId = get_local_id(0);
852 if ((localId+i) > loadSize)
855 temp[localId+i] = im[y*columns+ClampToCanvas(currentX-radius, columns)];
857 if (y==0 && get_group_id(0) == 0)
859 printf("(%d %d) temp %d load %d currentX %d\n", x, y, localId+i, ClampToCanvas(currentX-radius, columns), currentX);
865 const int groupX=get_local_size(0)*get_group_id(0);
866 const int groupY=get_local_size(1)*get_group_id(1);
868 //parallel load and clamp
869 for (int i=get_local_id(0); i < loadSize; i=i+get_local_size(0))
871 //int cx = ClampToCanvas(groupX+i, columns);
872 temp[i] = im[y * columns + ClampToCanvas(i+groupX-radius, columns)];
874 if (0 && y==0 && get_group_id(1) == 0)
876 printf("(%d %d) temp %d load %d groupX %d\n", x, y, i, ClampToCanvas(groupX+i, columns), groupX);
881 barrier(CLK_LOCAL_MEM_FENCE);
883 // only do the work if this is not a patched item
884 if (get_global_id(0) < columns)
887 float4 result = (float4) 0;
891 \n #ifndef UFACTOR \n
892 \n #define UFACTOR 8 \n
895 for ( ; i+UFACTOR < width; )
897 \n #pragma unroll UFACTOR\n
898 for (int j=0; j < UFACTOR; j++, i++)
900 result+=filter[i]*convert_float4(temp[i+get_local_id(0)]);
904 for ( ; i < width; i++)
906 result+=filter[i]*convert_float4(temp[i+get_local_id(0)]);
909 result.x = ClampToQuantum(result.x);
910 result.y = ClampToQuantum(result.y);
911 result.z = ClampToQuantum(result.z);
912 result.w = ClampToQuantum(result.w);
914 // write back to global
915 filtered_im[y*columns+x] = result;
922 Reduce image noise and reduce detail levels by row
923 im: input pixels filtered_in filtered_im: output pixels
924 filter : convolve kernel width: convolve kernel size
925 channel : define which channel is blured
926 is_RGBA_BGRA : define the input is RGBA or BGRA
928 __kernel void BlurRowSection(__global CLPixelType *im, __global float4 *filtered_im,
929 const ChannelType channel, __constant float *filter,
930 const unsigned int width,
931 const unsigned int imageColumns, const unsigned int imageRows,
932 __local CLPixelType *temp,
933 const unsigned int offsetRows, const unsigned int section)
935 const int x = get_global_id(0);
936 const int y = get_global_id(1);
938 const int columns = imageColumns;
940 const unsigned int radius = (width-1)/2;
941 const int wsize = get_local_size(0);
942 const unsigned int loadSize = wsize+width;
945 const int groupX=get_local_size(0)*get_group_id(0);
946 const int groupY=get_local_size(1)*get_group_id(1);
948 //offset the input data, assuming section is 0, 1
949 im += imageColumns * (offsetRows - radius * section);
951 //parallel load and clamp
952 for (int i=get_local_id(0); i < loadSize; i=i+get_local_size(0))
954 //int cx = ClampToCanvas(groupX+i, columns);
955 temp[i] = im[y * columns + ClampToCanvas(i+groupX-radius, columns)];
957 if (0 && y==0 && get_group_id(1) == 0)
959 printf("(%d %d) temp %d load %d groupX %d\n", x, y, i, ClampToCanvas(groupX+i, columns), groupX);
964 barrier(CLK_LOCAL_MEM_FENCE);
966 // only do the work if this is not a patched item
967 if (get_global_id(0) < columns)
970 float4 result = (float4) 0;
974 \n #ifndef UFACTOR \n
975 \n #define UFACTOR 8 \n
978 for ( ; i+UFACTOR < width; )
980 \n #pragma unroll UFACTOR\n
981 for (int j=0; j < UFACTOR; j++, i++)
983 result+=filter[i]*convert_float4(temp[i+get_local_id(0)]);
987 for ( ; i < width; i++)
989 result+=filter[i]*convert_float4(temp[i+get_local_id(0)]);
992 result.x = ClampToQuantum(result.x);
993 result.y = ClampToQuantum(result.y);
994 result.z = ClampToQuantum(result.z);
995 result.w = ClampToQuantum(result.w);
997 // write back to global
998 filtered_im[y*columns+x] = result;
1006 Reduce image noise and reduce detail levels by line
1007 im: input pixels filtered_in filtered_im: output pixels
1008 filter : convolve kernel width: convolve kernel size
1009 channel : define which channel is blured\
1010 is_RGBA_BGRA : define the input is RGBA or BGRA
1012 __kernel void BlurColumn(const __global float4 *blurRowData, __global CLPixelType *filtered_im,
1013 const ChannelType channel, __constant float *filter,
1014 const unsigned int width,
1015 const unsigned int imageColumns, const unsigned int imageRows,
1016 __local float4 *temp)
1018 const int x = get_global_id(0);
1019 const int y = get_global_id(1);
1021 //const int columns = get_global_size(0);
1022 //const int rows = get_global_size(1);
1023 const int columns = imageColumns;
1024 const int rows = imageRows;
1026 unsigned int radius = (width-1)/2;
1027 const int wsize = get_local_size(1);
1028 const unsigned int loadSize = wsize+width;
1031 const int groupX=get_local_size(0)*get_group_id(0);
1032 const int groupY=get_local_size(1)*get_group_id(1);
1033 //notice that get_local_size(0) is 1, so
1034 //groupX=get_group_id(0);
1036 //parallel load and clamp
1037 for (int i = get_local_id(1); i < loadSize; i=i+get_local_size(1))
1039 temp[i] = blurRowData[ClampToCanvas(i+groupY-radius, rows) * columns + groupX];
1043 barrier(CLK_LOCAL_MEM_FENCE);
1045 // only do the work if this is not a patched item
1046 if (get_global_id(1) < rows)
1049 float4 result = (float4) 0;
1053 \n #ifndef UFACTOR \n
1054 \n #define UFACTOR 8 \n
1057 for ( ; i+UFACTOR < width; )
1059 \n #pragma unroll UFACTOR \n
1060 for (int j=0; j < UFACTOR; j++, i++)
1062 result+=filter[i]*temp[i+get_local_id(1)];
1066 for ( ; i < width; i++)
1068 result+=filter[i]*temp[i+get_local_id(1)];
1071 result.x = ClampToQuantum(result.x);
1072 result.y = ClampToQuantum(result.y);
1073 result.z = ClampToQuantum(result.z);
1074 result.w = ClampToQuantum(result.w);
1076 // write back to global
1077 filtered_im[y*columns+x] = (CLPixelType) (result.x,result.y,result.z,result.w);
1086 Reduce image noise and reduce detail levels by line
1087 im: input pixels filtered_in filtered_im: output pixels
1088 filter : convolve kernel width: convolve kernel size
1089 channel : define which channel is blured\
1090 is_RGBA_BGRA : define the input is RGBA or BGRA
1092 __kernel void BlurColumnSection(const __global float4 *blurRowData, __global CLPixelType *filtered_im,
1093 const ChannelType channel, __constant float *filter,
1094 const unsigned int width,
1095 const unsigned int imageColumns, const unsigned int imageRows,
1096 __local float4 *temp,
1097 const unsigned int offsetRows, const unsigned int section)
1099 const int x = get_global_id(0);
1100 const int y = get_global_id(1);
1102 //const int columns = get_global_size(0);
1103 //const int rows = get_global_size(1);
1104 const int columns = imageColumns;
1105 const int rows = imageRows;
1107 unsigned int radius = (width-1)/2;
1108 const int wsize = get_local_size(1);
1109 const unsigned int loadSize = wsize+width;
1112 const int groupX=get_local_size(0)*get_group_id(0);
1113 const int groupY=get_local_size(1)*get_group_id(1);
1114 //notice that get_local_size(0) is 1, so
1115 //groupX=get_group_id(0);
1117 // offset the input data
1118 blurRowData += imageColumns * radius * section;
1120 //parallel load and clamp
1121 for (int i = get_local_id(1); i < loadSize; i=i+get_local_size(1))
1123 int pos = ClampToCanvasWithHalo(i+groupY-radius, rows, radius, section) * columns + groupX;
1124 temp[i] = *(blurRowData+pos);
1128 barrier(CLK_LOCAL_MEM_FENCE);
1130 // only do the work if this is not a patched item
1131 if (get_global_id(1) < rows)
1134 float4 result = (float4) 0;
1138 \n #ifndef UFACTOR \n
1139 \n #define UFACTOR 8 \n
1142 for ( ; i+UFACTOR < width; )
1144 \n #pragma unroll UFACTOR \n
1145 for (int j=0; j < UFACTOR; j++, i++)
1147 result+=filter[i]*temp[i+get_local_id(1)];
1150 for ( ; i < width; i++)
1152 result+=filter[i]*temp[i+get_local_id(1)];
1155 result.x = ClampToQuantum(result.x);
1156 result.y = ClampToQuantum(result.y);
1157 result.z = ClampToQuantum(result.z);
1158 result.w = ClampToQuantum(result.w);
1160 // offset the output data
1161 filtered_im += imageColumns * offsetRows;
1163 // write back to global
1164 filtered_im[y*columns+x] = (CLPixelType) (result.x,result.y,result.z,result.w);
1172 __kernel void UnsharpMaskBlurColumn(const __global CLPixelType* inputImage,
1173 const __global float4 *blurRowData, __global CLPixelType *filtered_im,
1174 const unsigned int imageColumns, const unsigned int imageRows,
1175 __local float4* cachedData, __local float* cachedFilter,
1176 const ChannelType channel, const __global float *filter, const unsigned int width,
1177 const float gain, const float threshold)
1179 const unsigned int radius = (width-1)/2;
1181 // cache the pixel shared by the workgroup
1182 const int groupX = get_group_id(0);
1183 const int groupStartY = get_group_id(1)*get_local_size(1) - radius;
1184 const int groupStopY = (get_group_id(1)+1)*get_local_size(1) + radius;
1186 if (groupStartY >= 0
1187 && groupStopY < imageRows) {
1188 event_t e = async_work_group_strided_copy(cachedData
1189 ,blurRowData+groupStartY*imageColumns+groupX
1190 ,groupStopY-groupStartY,imageColumns,0);
1191 wait_group_events(1,&e);
1194 for (int i = get_local_id(1); i < (groupStopY - groupStartY); i+=get_local_size(1)) {
1195 cachedData[i] = blurRowData[ClampToCanvas(groupStartY+i,imageRows)*imageColumns+ groupX];
1197 barrier(CLK_LOCAL_MEM_FENCE);
1199 // cache the filter as well
1200 event_t e = async_work_group_copy(cachedFilter,filter,width,0);
1201 wait_group_events(1,&e);
1203 // only do the work if this is not a patched item
1204 //const int cy = get_group_id(1)*get_local_size(1)+get_local_id(1);
1205 const int cy = get_global_id(1);
1207 if (cy < imageRows) {
1208 float4 blurredPixel = (float4) 0.0f;
1212 \n #ifndef UFACTOR \n
1213 \n #define UFACTOR 8 \n
1216 for ( ; i+UFACTOR < width; )
1218 \n #pragma unroll UFACTOR \n
1219 for (int j=0; j < UFACTOR; j++, i++)
1221 blurredPixel+=cachedFilter[i]*cachedData[i+get_local_id(1)];
1225 for ( ; i < width; i++)
1227 blurredPixel+=cachedFilter[i]*cachedData[i+get_local_id(1)];
1230 blurredPixel = floor((float4)(ClampToQuantum(blurredPixel.x), ClampToQuantum(blurredPixel.y)
1231 ,ClampToQuantum(blurredPixel.z), ClampToQuantum(blurredPixel.w)));
1233 float4 inputImagePixel = convert_float4(inputImage[cy*imageColumns+groupX]);
1234 float4 outputPixel = inputImagePixel - blurredPixel;
1236 float quantumThreshold = QuantumRange*threshold;
1238 int4 mask = isless(fabs(2.0f*outputPixel), (float4)quantumThreshold);
1239 outputPixel = select(inputImagePixel + outputPixel * gain, inputImagePixel, mask);
1242 filtered_im[cy*imageColumns+groupX] = (CLPixelType) (ClampToQuantum(outputPixel.x), ClampToQuantum(outputPixel.y)
1243 ,ClampToQuantum(outputPixel.z), ClampToQuantum(outputPixel.w));
1248 __kernel void UnsharpMaskBlurColumnSection(const __global CLPixelType* inputImage,
1249 const __global float4 *blurRowData, __global CLPixelType *filtered_im,
1250 const unsigned int imageColumns, const unsigned int imageRows,
1251 __local float4* cachedData, __local float* cachedFilter,
1252 const ChannelType channel, const __global float *filter, const unsigned int width,
1253 const float gain, const float threshold,
1254 const unsigned int offsetRows, const unsigned int section)
1256 const unsigned int radius = (width-1)/2;
1258 // cache the pixel shared by the workgroup
1259 const int groupX = get_group_id(0);
1260 const int groupStartY = get_group_id(1)*get_local_size(1) - radius;
1261 const int groupStopY = (get_group_id(1)+1)*get_local_size(1) + radius;
1263 // offset the input data
1264 blurRowData += imageColumns * radius * section;
1266 if (groupStartY >= 0
1267 && groupStopY < imageRows) {
1268 event_t e = async_work_group_strided_copy(cachedData
1269 ,blurRowData+groupStartY*imageColumns+groupX
1270 ,groupStopY-groupStartY,imageColumns,0);
1271 wait_group_events(1,&e);
1274 for (int i = get_local_id(1); i < (groupStopY - groupStartY); i+=get_local_size(1)) {
1275 int pos = ClampToCanvasWithHalo(groupStartY+i,imageRows, radius, section)*imageColumns+ groupX;
1276 cachedData[i] = *(blurRowData + pos);
1278 barrier(CLK_LOCAL_MEM_FENCE);
1280 // cache the filter as well
1281 event_t e = async_work_group_copy(cachedFilter,filter,width,0);
1282 wait_group_events(1,&e);
1284 // only do the work if this is not a patched item
1285 //const int cy = get_group_id(1)*get_local_size(1)+get_local_id(1);
1286 const int cy = get_global_id(1);
1288 if (cy < imageRows) {
1289 float4 blurredPixel = (float4) 0.0f;
1293 \n #ifndef UFACTOR \n
1294 \n #define UFACTOR 8 \n
1297 for ( ; i+UFACTOR < width; )
1299 \n #pragma unroll UFACTOR \n
1300 for (int j=0; j < UFACTOR; j++, i++)
1302 blurredPixel+=cachedFilter[i]*cachedData[i+get_local_id(1)];
1306 for ( ; i < width; i++)
1308 blurredPixel+=cachedFilter[i]*cachedData[i+get_local_id(1)];
1311 blurredPixel = floor((float4)(ClampToQuantum(blurredPixel.x), ClampToQuantum(blurredPixel.y)
1312 ,ClampToQuantum(blurredPixel.z), ClampToQuantum(blurredPixel.w)));
1314 // offset the output data
1315 inputImage += imageColumns * offsetRows;
1316 filtered_im += imageColumns * offsetRows;
1318 float4 inputImagePixel = convert_float4(inputImage[cy*imageColumns+groupX]);
1319 float4 outputPixel = inputImagePixel - blurredPixel;
1321 float quantumThreshold = QuantumRange*threshold;
1323 int4 mask = isless(fabs(2.0f*outputPixel), (float4)quantumThreshold);
1324 outputPixel = select(inputImagePixel + outputPixel * gain, inputImagePixel, mask);
1327 filtered_im[cy*imageColumns+groupX] = (CLPixelType) (ClampToQuantum(outputPixel.x), ClampToQuantum(outputPixel.y)
1328 ,ClampToQuantum(outputPixel.z), ClampToQuantum(outputPixel.w));
1339 __kernel void HullPass1(const __global CLPixelType *inputImage, __global CLPixelType *outputImage
1340 , const unsigned int imageWidth, const unsigned int imageHeight
1341 , const int2 offset, const int polarity, const int matte) {
1343 int x = get_global_id(0);
1344 int y = get_global_id(1);
1346 CLPixelType v = inputImage[y*imageWidth+x];
1349 neighbor.y = y + offset.y;
1350 neighbor.x = x + offset.x;
1352 int2 clampedNeighbor;
1353 clampedNeighbor.x = ClampToCanvas(neighbor.x, imageWidth);
1354 clampedNeighbor.y = ClampToCanvas(neighbor.y, imageHeight);
1356 CLPixelType r = (clampedNeighbor.x == neighbor.x
1357 && clampedNeighbor.y == neighbor.y)?inputImage[clampedNeighbor.y*imageWidth+clampedNeighbor.x]
1373 \n #pragma unroll 4\n
1374 for (unsigned int i = 0; i < 4; i++) {
1375 sv[i] = (sr[i] >= (sv[i]+ScaleCharToQuantum(2)))?(sv[i]+ScaleCharToQuantum(1)):sv[i];
1379 \n #pragma unroll 4\n
1380 for (unsigned int i = 0; i < 4; i++) {
1381 sv[i] = (sr[i] <= (sv[i]-ScaleCharToQuantum(2)))?(sv[i]-ScaleCharToQuantum(1)):sv[i];
1386 v.x = (CLQuantum)sv[0];
1387 v.y = (CLQuantum)sv[1];
1388 v.z = (CLQuantum)sv[2];
1391 v.w = (CLQuantum)sv[3];
1393 outputImage[y*imageWidth+x] = v;
1404 __kernel void HullPass2(const __global CLPixelType *inputImage, __global CLPixelType *outputImage
1405 , const unsigned int imageWidth, const unsigned int imageHeight
1406 , const int2 offset, const int polarity, const int matte) {
1408 int x = get_global_id(0);
1409 int y = get_global_id(1);
1411 CLPixelType v = inputImage[y*imageWidth+x];
1413 int2 neighbor, clampedNeighbor;
1415 neighbor.y = y + offset.y;
1416 neighbor.x = x + offset.x;
1417 clampedNeighbor.x = ClampToCanvas(neighbor.x, imageWidth);
1418 clampedNeighbor.y = ClampToCanvas(neighbor.y, imageHeight);
1420 CLPixelType r = (clampedNeighbor.x == neighbor.x
1421 && clampedNeighbor.y == neighbor.y)?inputImage[clampedNeighbor.y*imageWidth+clampedNeighbor.x]
1425 neighbor.y = y - offset.y;
1426 neighbor.x = x - offset.x;
1427 clampedNeighbor.x = ClampToCanvas(neighbor.x, imageWidth);
1428 clampedNeighbor.y = ClampToCanvas(neighbor.y, imageHeight);
1430 CLPixelType s = (clampedNeighbor.x == neighbor.x
1431 && clampedNeighbor.y == neighbor.y)?inputImage[clampedNeighbor.y*imageWidth+clampedNeighbor.x]
1454 \n #pragma unroll 4\n
1455 for (unsigned int i = 0; i < 4; i++) {
1456 //sv[i] = (ss[i] >= (sv[i]+ScaleCharToQuantum(2)) && sr[i] > sv[i] ) ? (sv[i]+ScaleCharToQuantum(1)):sv[i];
1458 //sv[i] =(!( (int)(ss[i] >= (sv[i]+ScaleCharToQuantum(2))) && (int) (sr[i] > sv[i] ) )) ? sv[i]:(sv[i]+ScaleCharToQuantum(1));
1459 //sv[i] =(( (int)( ss[i] < (sv[i]+ScaleCharToQuantum(2))) || (int) ( sr[i] <= sv[i] ) )) ? sv[i]:(sv[i]+ScaleCharToQuantum(1));
1460 sv[i] =(( (int)( ss[i] < (sv[i]+ScaleCharToQuantum(2))) + (int) ( sr[i] <= sv[i] ) ) !=0) ? sv[i]:(sv[i]+ScaleCharToQuantum(1));
1464 \n #pragma unroll 4\n
1465 for (unsigned int i = 0; i < 4; i++) {
1466 //sv[i] = (ss[i] <= (sv[i]-ScaleCharToQuantum(2)) && sr[i] < sv[i] ) ? (sv[i]-ScaleCharToQuantum(1)):sv[i];
1468 //sv[i] = ( (int)(ss[i] <= (sv[i]-ScaleCharToQuantum(2)) ) + (int)( sr[i] < sv[i] ) ==0) ? sv[i]:(sv[i]-ScaleCharToQuantum(1));
1469 sv[i] = (( (int)(ss[i] > (sv[i]-ScaleCharToQuantum(2))) + (int)( sr[i] >= sv[i] )) !=0) ? sv[i]:(sv[i]-ScaleCharToQuantum(1));
1473 v.x = (CLQuantum)sv[0];
1474 v.y = (CLQuantum)sv[1];
1475 v.z = (CLQuantum)sv[2];
1478 v.w = (CLQuantum)sv[3];
1480 outputImage[y*imageWidth+x] = v;
1489 __kernel void RotationalBlur(const __global CLPixelType *im, __global CLPixelType *filtered_im,
1491 const unsigned int channel, const unsigned int matte,
1492 const float2 blurCenter,
1493 __constant float *cos_theta, __constant float *sin_theta,
1494 const unsigned int cossin_theta_size)
1496 const int x = get_global_id(0);
1497 const int y = get_global_id(1);
1498 const int columns = get_global_size(0);
1499 const int rows = get_global_size(1);
1500 unsigned int step = 1;
1501 float center_x = (float) x - blurCenter.x;
1502 float center_y = (float) y - blurCenter.y;
1503 float radius = hypot(center_x, center_y);
1505 //float blur_radius = hypot((float) columns/2.0f, (float) rows/2.0f);
1506 float blur_radius = hypot(blurCenter.x, blurCenter.y);
1508 if (radius > MagickEpsilon)
1510 step = (unsigned int) (blur_radius / radius);
1513 if (step >= cossin_theta_size)
1514 step = cossin_theta_size-1;
1518 result.x = (float)bias.x;
1519 result.y = (float)bias.y;
1520 result.z = (float)bias.z;
1521 result.w = (float)bias.w;
1522 float normalize = 0.0f;
1524 if (((channel & OpacityChannel) == 0) || (matte == 0)) {
1525 for (unsigned int i=0; i<cossin_theta_size; i+=step)
1527 result += convert_float4(im[
1528 ClampToCanvas(blurCenter.x+center_x*cos_theta[i]-center_y*sin_theta[i]+0.5f,columns)+
1529 ClampToCanvas(blurCenter.y+center_x*sin_theta[i]+center_y*cos_theta[i]+0.5f, rows)*columns]);
1532 normalize = PerceptibleReciprocal(normalize);
1533 result = result * normalize;
1537 for (unsigned int i=0; i<cossin_theta_size; i+=step)
1539 float4 p = convert_float4(im[
1540 ClampToCanvas(blurCenter.x+center_x*cos_theta[i]-center_y*sin_theta[i]+0.5f,columns)+
1541 ClampToCanvas(blurCenter.y+center_x*sin_theta[i]+center_y*cos_theta[i]+0.5f, rows)*columns]);
1543 float alpha = (float)(QuantumScale*(QuantumRange-p.w));
1544 result.x += alpha * p.x;
1545 result.y += alpha * p.y;
1546 result.z += alpha * p.z;
1551 gamma = PerceptibleReciprocal(gamma);
1552 normalize = PerceptibleReciprocal(normalize);
1553 result.x = gamma*result.x;
1554 result.y = gamma*result.y;
1555 result.z = gamma*result.z;
1556 result.w = normalize*result.w;
1558 filtered_im[y * columns + x] = (CLPixelType) (ClampToQuantum(result.x), ClampToQuantum(result.y),
1559 ClampToQuantum(result.z), ClampToQuantum(result.w));
1565 inline float3 ConvertRGBToHSB(CLPixelType pixel) {
1566 float3 HueSaturationBrightness;
1567 HueSaturationBrightness.x = 0.0f; // Hue
1568 HueSaturationBrightness.y = 0.0f; // Saturation
1569 HueSaturationBrightness.z = 0.0f; // Brightness
1571 float r=(float) getRed(pixel);
1572 float g=(float) getGreen(pixel);
1573 float b=(float) getBlue(pixel);
1575 float tmin=min(min(r,g),b);
1576 float tmax=max(max(r,g),b);
1579 float delta=tmax-tmin;
1580 HueSaturationBrightness.y=delta/tmax;
1581 HueSaturationBrightness.z=QuantumScale*tmax;
1583 if (delta != 0.0f) {
1584 HueSaturationBrightness.x = ((r == tmax)?0.0f:((g == tmax)?2.0f:4.0f));
1585 HueSaturationBrightness.x += ((r == tmax)?(g-b):((g == tmax)?(b-r):(r-g)))/delta;
1586 HueSaturationBrightness.x/=6.0f;
1587 HueSaturationBrightness.x += (HueSaturationBrightness.x < 0.0f)?0.0f:1.0f;
1590 return HueSaturationBrightness;
1593 inline CLPixelType ConvertHSBToRGB(float3 HueSaturationBrightness) {
1595 float hue = HueSaturationBrightness.x;
1596 float brightness = HueSaturationBrightness.z;
1597 float saturation = HueSaturationBrightness.y;
1601 if (saturation == 0.0f) {
1602 setRed(&rgb,ClampToQuantum(QuantumRange*brightness));
1603 setGreen(&rgb,getRed(rgb));
1604 setBlue(&rgb,getRed(rgb));
1608 float h=6.0f*(hue-floor(hue));
1610 float p=brightness*(1.0f-saturation);
1611 float q=brightness*(1.0f-saturation*f);
1612 float t=brightness*(1.0f-(saturation*(1.0f-f)));
1614 float clampedBrightness = ClampToQuantum(QuantumRange*brightness);
1615 float clamped_t = ClampToQuantum(QuantumRange*t);
1616 float clamped_p = ClampToQuantum(QuantumRange*p);
1617 float clamped_q = ClampToQuantum(QuantumRange*q);
1619 setRed(&rgb, (ih == 1)?clamped_q:
1620 (ih == 2 || ih == 3)?clamped_p:
1621 (ih == 4)?clamped_t:
1624 setGreen(&rgb, (ih == 1 || ih == 2)?clampedBrightness:
1625 (ih == 3)?clamped_q:
1626 (ih == 4 || ih == 5)?clamped_p:
1629 setBlue(&rgb, (ih == 2)?clamped_t:
1630 (ih == 3 || ih == 4)?clampedBrightness:
1631 (ih == 5)?clamped_q:
1637 __kernel void Contrast(__global CLPixelType *im, const unsigned int sharpen)
1640 const int sign = sharpen!=0?1:-1;
1641 const int x = get_global_id(0);
1642 const int y = get_global_id(1);
1643 const int columns = get_global_size(0);
1644 const int c = x + y * columns;
1646 CLPixelType pixel = im[c];
1647 float3 HueSaturationBrightness = ConvertRGBToHSB(pixel);
1648 float brightness = HueSaturationBrightness.z;
1649 brightness+=0.5f*sign*(0.5f*(sinpi(brightness-0.5f)+1.0f)-brightness);
1650 brightness = clamp(brightness,0.0f,1.0f);
1651 HueSaturationBrightness.z = brightness;
1653 CLPixelType filteredPixel = ConvertHSBToRGB(HueSaturationBrightness);
1654 filteredPixel.w = pixel.w;
1655 im[c] = filteredPixel;
1663 inline void ConvertRGBToHSL(const CLQuantum red,const CLQuantum green, const CLQuantum blue,
1664 float *hue, float *saturation, float *lightness)
1672 Convert RGB to HSL colorspace.
1674 tmax=max(QuantumScale*red,max(QuantumScale*green, QuantumScale*blue));
1675 tmin=min(QuantumScale*red,min(QuantumScale*green, QuantumScale*blue));
1679 *lightness=(tmax+tmin)/2.0;
1687 if (tmax == (QuantumScale*red))
1689 *hue=(QuantumScale*green-QuantumScale*blue)/c;
1690 if ((QuantumScale*green) < (QuantumScale*blue))
1694 if (tmax == (QuantumScale*green))
1695 *hue=2.0+(QuantumScale*blue-QuantumScale*red)/c;
1697 *hue=4.0+(QuantumScale*red-QuantumScale*green)/c;
1700 if (*lightness <= 0.5)
1701 *saturation=c/(2.0*(*lightness));
1703 *saturation=c/(2.0-2.0*(*lightness));
1706 inline void ConvertHSLToRGB(const float hue,const float saturation, const float lightness,
1707 CLQuantum *red,CLQuantum *green,CLQuantum *blue)
1719 Convert HSL to RGB colorspace.
1722 if (lightness <= 0.5)
1723 c=2.0*lightness*saturation;
1725 c=(2.0-2.0*lightness)*saturation;
1726 tmin=lightness-0.5*c;
1727 h-=360.0*floor(h/360.0);
1729 x=c*(1.0-fabs(h-2.0*floor(h/2.0)-1.0));
1730 switch ((int) floor(h))
1781 *red=ClampToQuantum(QuantumRange*r);
1782 *green=ClampToQuantum(QuantumRange*g);
1783 *blue=ClampToQuantum(QuantumRange*b);
1786 inline void ModulateHSL(const float percent_hue, const float percent_saturation,const float percent_lightness,
1787 CLQuantum *red,CLQuantum *green,CLQuantum *blue)
1795 Increase or decrease color lightness, saturation, or hue.
1797 ConvertRGBToHSL(*red,*green,*blue,&hue,&saturation,&lightness);
1798 hue+=0.5*(0.01*percent_hue-1.0);
1803 saturation*=0.01*percent_saturation;
1804 lightness*=0.01*percent_lightness;
1805 ConvertHSLToRGB(hue,saturation,lightness,red,green,blue);
1808 __kernel void Modulate(__global CLPixelType *im,
1809 const float percent_brightness,
1810 const float percent_hue,
1811 const float percent_saturation,
1812 const int colorspace)
1815 const int x = get_global_id(0);
1816 const int y = get_global_id(1);
1817 const int columns = get_global_size(0);
1818 const int c = x + y * columns;
1820 CLPixelType pixel = im[c];
1828 green=getGreen(pixel);
1829 blue=getBlue(pixel);
1836 ModulateHSL(percent_hue, percent_saturation, percent_brightness,
1837 &red, &green, &blue);
1842 CLPixelType filteredPixel;
1844 setRed(&filteredPixel, red);
1845 setGreen(&filteredPixel, green);
1846 setBlue(&filteredPixel, blue);
1847 filteredPixel.w = pixel.w;
1849 im[c] = filteredPixel;
1854 __kernel void Negate(__global CLPixelType *im,
1855 const ChannelType channel)
1858 const int x = get_global_id(0);
1859 const int y = get_global_id(1);
1860 const int columns = get_global_size(0);
1861 const int c = x + y * columns;
1863 CLPixelType pixel = im[c];
1871 green=getGreen(pixel);
1872 blue=getBlue(pixel);
1874 CLPixelType filteredPixel;
1876 if ((channel & RedChannel) !=0)
1877 setRed(&filteredPixel, QuantumRange-red);
1878 if ((channel & GreenChannel) !=0)
1879 setGreen(&filteredPixel, QuantumRange-green);
1880 if ((channel & BlueChannel) !=0)
1881 setBlue(&filteredPixel, QuantumRange-blue);
1883 filteredPixel.w = pixel.w;
1885 im[c] = filteredPixel;
1890 __kernel void Grayscale(__global CLPixelType *im,
1891 const int method, const int colorspace)
1894 const int x = get_global_id(0);
1895 const int y = get_global_id(1);
1896 const int columns = get_global_size(0);
1897 const int c = x + y * columns;
1899 CLPixelType pixel = im[c];
1907 red=(float)getRed(pixel);
1908 green=(float)getGreen(pixel);
1909 blue=(float)getBlue(pixel);
1913 CLPixelType filteredPixel;
1917 case AveragePixelIntensityMethod:
1919 intensity=(red+green+blue)/3.0;
1922 case BrightnessPixelIntensityMethod:
1924 intensity=max(max(red,green),blue);
1927 case LightnessPixelIntensityMethod:
1929 intensity=(min(min(red,green),blue)+
1930 max(max(red,green),blue))/2.0;
1933 case MSPixelIntensityMethod:
1935 intensity=(float) (((float) red*red+green*green+
1936 blue*blue)/(3.0*QuantumRange));
1939 case Rec601LumaPixelIntensityMethod:
1942 if (colorspace == RGBColorspace)
1944 red=EncodePixelGamma(red);
1945 green=EncodePixelGamma(green);
1946 blue=EncodePixelGamma(blue);
1949 intensity=0.298839*red+0.586811*green+0.114350*blue;
1952 case Rec601LuminancePixelIntensityMethod:
1955 if (image->colorspace == sRGBColorspace)
1957 red=DecodePixelGamma(red);
1958 green=DecodePixelGamma(green);
1959 blue=DecodePixelGamma(blue);
1962 intensity=0.298839*red+0.586811*green+0.114350*blue;
1965 case Rec709LumaPixelIntensityMethod:
1969 if (image->colorspace == RGBColorspace)
1971 red=EncodePixelGamma(red);
1972 green=EncodePixelGamma(green);
1973 blue=EncodePixelGamma(blue);
1976 intensity=0.212656*red+0.715158*green+0.072186*blue;
1979 case Rec709LuminancePixelIntensityMethod:
1982 if (image->colorspace == sRGBColorspace)
1984 red=DecodePixelGamma(red);
1985 green=DecodePixelGamma(green);
1986 blue=DecodePixelGamma(blue);
1989 intensity=0.212656*red+0.715158*green+0.072186*blue;
1992 case RMSPixelIntensityMethod:
1994 intensity=(float) (sqrt((float) red*red+green*green+
1995 blue*blue)/sqrt(3.0));
2001 setGray(&filteredPixel, ClampToQuantum(intensity));
2003 filteredPixel.w = pixel.w;
2005 im[c] = filteredPixel;
2010 // Based on Box from resize.c
2011 float BoxResizeFilter(const float x)
2018 // Based on CubicBC from resize.c
2019 float CubicBC(const float x,const __global float* resizeFilterCoefficients)
2022 Cubic Filters using B,C determined values:
2023 Mitchell-Netravali B = 1/3 C = 1/3 "Balanced" cubic spline filter
2024 Catmull-Rom B = 0 C = 1/2 Interpolatory and exact on linears
2025 Spline B = 1 C = 0 B-Spline Gaussian approximation
2026 Hermite B = 0 C = 0 B-Spline interpolator
2028 See paper by Mitchell and Netravali, Reconstruction Filters in Computer
2029 Graphics Computer Graphics, Volume 22, Number 4, August 1988
2030 http://www.cs.utexas.edu/users/fussell/courses/cs384g/lectures/mitchell/
2033 Coefficents are determined from B,C values:
2034 P0 = ( 6 - 2*B )/6 = coeff[0]
2036 P2 = (-18 +12*B + 6*C )/6 = coeff[1]
2037 P3 = ( 12 - 9*B - 6*C )/6 = coeff[2]
2038 Q0 = ( 8*B +24*C )/6 = coeff[3]
2039 Q1 = ( -12*B -48*C )/6 = coeff[4]
2040 Q2 = ( 6*B +30*C )/6 = coeff[5]
2041 Q3 = ( - 1*B - 6*C )/6 = coeff[6]
2043 which are used to define the filter:
2045 P0 + P1*x + P2*x^2 + P3*x^3 0 <= x < 1
2046 Q0 + Q1*x + Q2*x^2 + Q3*x^3 1 <= x < 2
2048 which ensures function is continuous in value and derivative (slope).
2051 return(resizeFilterCoefficients[0]+x*(x*
2052 (resizeFilterCoefficients[1]+x*resizeFilterCoefficients[2])));
2054 return(resizeFilterCoefficients[3]+x*(resizeFilterCoefficients[4]+x*
2055 (resizeFilterCoefficients[5]+x*resizeFilterCoefficients[6])));
2061 float Sinc(const float x)
2065 const float alpha=(float) (MagickPI*x);
2066 return sinpi(x)/alpha;
2073 float Triangle(const float x)
2076 1st order (linear) B-Spline, bilinear interpolation, Tent 1D filter, or
2077 a Bartlett 2D Cone filter. Also used as a Bartlett Windowing function
2080 return ((x<1.0f)?(1.0f-x):0.0f);
2086 float Hanning(const float x)
2089 Cosine window function:
2092 const float cosine=cos((MagickPI*x));
2093 return(0.5f+0.5f*cosine);
2098 float Hamming(const float x)
2101 Offset cosine window function:
2102 .54 + .46 cos(pi x).
2104 const float cosine=cos((MagickPI*x));
2105 return(0.54f+0.46f*cosine);
2110 float Blackman(const float x)
2113 Blackman: 2nd order cosine windowing function:
2114 0.42 + 0.5 cos(pi x) + 0.08 cos(2pi x)
2116 Refactored by Chantal Racette and Nicolas Robidoux to one trig call and
2119 const float cosine=cos((MagickPI*x));
2120 return(0.34f+cosine*(0.5f+cosine*0.16f));
2127 BoxWeightingFunction = 0,
2128 TriangleWeightingFunction,
2129 CubicBCWeightingFunction,
2130 HanningWeightingFunction,
2131 HammingWeightingFunction,
2132 BlackmanWeightingFunction,
2133 GaussianWeightingFunction,
2134 QuadraticWeightingFunction,
2135 JincWeightingFunction,
2136 SincWeightingFunction,
2137 SincFastWeightingFunction,
2138 KaiserWeightingFunction,
2139 WelshWeightingFunction,
2140 BohmanWeightingFunction,
2141 LagrangeWeightingFunction,
2142 CosineWeightingFunction,
2143 } ResizeWeightingFunctionType;
2147 inline float applyResizeFilter(const float x, const ResizeWeightingFunctionType filterType, const __global float* filterCoefficients)
2151 /* Call Sinc even for SincFast to get better precision on GPU
2152 and to avoid thread divergence. Sinc is pretty fast on GPU anyway...*/
2153 case SincWeightingFunction:
2154 case SincFastWeightingFunction:
2156 case CubicBCWeightingFunction:
2157 return CubicBC(x,filterCoefficients);
2158 case BoxWeightingFunction:
2159 return BoxResizeFilter(x);
2160 case TriangleWeightingFunction:
2162 case HanningWeightingFunction:
2164 case HammingWeightingFunction:
2166 case BlackmanWeightingFunction:
2177 inline float getResizeFilterWeight(const __global float* resizeFilterCubicCoefficients, const ResizeWeightingFunctionType resizeFilterType
2178 , const ResizeWeightingFunctionType resizeWindowType
2179 , const float resizeFilterScale, const float resizeWindowSupport, const float resizeFilterBlur, const float x)
2182 float xBlur = fabs(x/resizeFilterBlur);
2183 if (resizeWindowSupport < MagickEpsilon
2184 || resizeWindowType == BoxWeightingFunction)
2190 scale = resizeFilterScale;
2191 scale = applyResizeFilter(xBlur*scale, resizeWindowType, resizeFilterCubicCoefficients);
2193 float weight = scale * applyResizeFilter(xBlur, resizeFilterType, resizeFilterCubicCoefficients);
2200 const char* accelerateKernels2 =
2204 inline unsigned int getNumWorkItemsPerPixel(const unsigned int pixelPerWorkgroup, const unsigned int numWorkItems) {
2205 return (numWorkItems/pixelPerWorkgroup);
2208 // returns the index of the pixel for the current workitem to compute.
2209 // returns -1 if this workitem doesn't need to participate in any computation
2210 inline int pixelToCompute(const unsigned itemID, const unsigned int pixelPerWorkgroup, const unsigned int numWorkItems) {
2211 const unsigned int numWorkItemsPerPixel = getNumWorkItemsPerPixel(pixelPerWorkgroup, numWorkItems);
2212 int pixelIndex = itemID/numWorkItemsPerPixel;
2213 pixelIndex = (pixelIndex<pixelPerWorkgroup)?pixelIndex:-1;
2220 __kernel __attribute__((reqd_work_group_size(256, 1, 1)))
2221 void ResizeHorizontalFilter(const __global CLPixelType* inputImage, const unsigned int inputColumns, const unsigned int inputRows, const unsigned int matte
2222 , const float xFactor, __global CLPixelType* filteredImage, const unsigned int filteredColumns, const unsigned int filteredRows
2223 , const int resizeFilterType, const int resizeWindowType
2224 , const __global float* resizeFilterCubicCoefficients
2225 , const float resizeFilterScale, const float resizeFilterSupport, const float resizeFilterWindowSupport, const float resizeFilterBlur
2226 , __local CLPixelType* inputImageCache, const int numCachedPixels, const unsigned int pixelPerWorkgroup, const unsigned int pixelChunkSize
2227 , __local float4* outputPixelCache, __local float* densityCache, __local float* gammaCache) {
2230 // calculate the range of resized image pixels computed by this workgroup
2231 const unsigned int startX = get_group_id(0)*pixelPerWorkgroup;
2232 const unsigned int stopX = min(startX + pixelPerWorkgroup,filteredColumns);
2233 const unsigned int actualNumPixelToCompute = stopX - startX;
2235 // calculate the range of input image pixels to cache
2236 float scale = max(1.0f/xFactor+MagickEpsilon ,1.0f);
2237 const float support = max(scale*resizeFilterSupport,0.5f);
2238 scale = PerceptibleReciprocal(scale);
2240 const int cacheRangeStartX = max((int)((startX+0.5f)/xFactor+MagickEpsilon-support+0.5f),(int)(0));
2241 const int cacheRangeEndX = min((int)(cacheRangeStartX + numCachedPixels), (int)inputColumns);
2243 // cache the input pixels into local memory
2244 const unsigned int y = get_global_id(1);
2245 event_t e = async_work_group_copy(inputImageCache,inputImage+y*inputColumns+cacheRangeStartX,cacheRangeEndX-cacheRangeStartX,0);
2246 wait_group_events(1,&e);
2248 unsigned int totalNumChunks = (actualNumPixelToCompute+pixelChunkSize-1)/pixelChunkSize;
2249 for (unsigned int chunk = 0; chunk < totalNumChunks; chunk++)
2252 const unsigned int chunkStartX = startX + chunk*pixelChunkSize;
2253 const unsigned int chunkStopX = min(chunkStartX + pixelChunkSize, stopX);
2254 const unsigned int actualNumPixelInThisChunk = chunkStopX - chunkStartX;
2256 // determine which resized pixel computed by this workitem
2257 const unsigned int itemID = get_local_id(0);
2258 const unsigned int numItems = getNumWorkItemsPerPixel(actualNumPixelInThisChunk, get_local_size(0));
2260 const int pixelIndex = pixelToCompute(itemID, actualNumPixelInThisChunk, get_local_size(0));
2262 float4 filteredPixel = (float4)0.0f;
2263 float density = 0.0f;
2265 // -1 means this workitem doesn't participate in the computation
2266 if (pixelIndex != -1) {
2268 // x coordinated of the resized pixel computed by this workitem
2269 const int x = chunkStartX + pixelIndex;
2271 // calculate how many steps required for this pixel
2272 const float bisect = (x+0.5)/xFactor+MagickEpsilon;
2273 const unsigned int start = (unsigned int)max(bisect-support+0.5f,0.0f);
2274 const unsigned int stop = (unsigned int)min(bisect+support+0.5f,(float)inputColumns);
2275 const unsigned int n = stop - start;
2277 // calculate how many steps this workitem will contribute
2278 unsigned int numStepsPerWorkItem = n / numItems;
2279 numStepsPerWorkItem += ((numItems*numStepsPerWorkItem)==n?0:1);
2281 const unsigned int startStep = (itemID%numItems)*numStepsPerWorkItem;
2282 if (startStep < n) {
2283 const unsigned int stopStep = min(startStep+numStepsPerWorkItem, n);
2285 unsigned int cacheIndex = start+startStep-cacheRangeStartX;
2288 for (unsigned int i = startStep; i < stopStep; i++,cacheIndex++) {
2289 float4 cp = convert_float4(inputImageCache[cacheIndex]);
2291 float weight = getResizeFilterWeight(resizeFilterCubicCoefficients,(ResizeWeightingFunctionType)resizeFilterType
2292 , (ResizeWeightingFunctionType)resizeWindowType
2293 , resizeFilterScale, resizeFilterWindowSupport, resizeFilterBlur,scale*(start+i-bisect+0.5));
2295 filteredPixel += ((float4)weight)*cp;
2302 for (unsigned int i = startStep; i < stopStep; i++,cacheIndex++) {
2303 CLPixelType p = inputImageCache[cacheIndex];
2305 float weight = getResizeFilterWeight(resizeFilterCubicCoefficients,(ResizeWeightingFunctionType)resizeFilterType
2306 , (ResizeWeightingFunctionType)resizeWindowType
2307 , resizeFilterScale, resizeFilterWindowSupport, resizeFilterBlur,scale*(start+i-bisect+0.5));
2309 float alpha = weight * QuantumScale * GetPixelAlpha(p);
2310 float4 cp = convert_float4(p);
2312 filteredPixel.x += alpha * cp.x;
2313 filteredPixel.y += alpha * cp.y;
2314 filteredPixel.z += alpha * cp.z;
2315 filteredPixel.w += weight * cp.w;
2324 // initialize the accumulators to zero
2325 if (itemID < actualNumPixelInThisChunk) {
2326 outputPixelCache[itemID] = (float4)0.0f;
2327 densityCache[itemID] = 0.0f;
2329 gammaCache[itemID] = 0.0f;
2331 barrier(CLK_LOCAL_MEM_FENCE);
2333 // accumulatte the filtered pixel value and the density
2334 for (unsigned int i = 0; i < numItems; i++) {
2335 if (pixelIndex != -1) {
2336 if (itemID%numItems == i) {
2337 outputPixelCache[pixelIndex]+=filteredPixel;
2338 densityCache[pixelIndex]+=density;
2340 gammaCache[pixelIndex]+=gamma;
2344 barrier(CLK_LOCAL_MEM_FENCE);
2347 if (itemID < actualNumPixelInThisChunk) {
2349 float density = densityCache[itemID];
2350 float4 filteredPixel = outputPixelCache[itemID];
2351 if (density!= 0.0f && density != 1.0)
2353 density = PerceptibleReciprocal(density);
2354 filteredPixel *= (float4)density;
2356 filteredImage[y*filteredColumns+chunkStartX+itemID] = (CLPixelType) (ClampToQuantum(filteredPixel.x)
2357 , ClampToQuantum(filteredPixel.y)
2358 , ClampToQuantum(filteredPixel.z)
2359 , ClampToQuantum(filteredPixel.w));
2362 float density = densityCache[itemID];
2363 float gamma = gammaCache[itemID];
2364 float4 filteredPixel = outputPixelCache[itemID];
2366 if (density!= 0.0f && density != 1.0) {
2367 density = PerceptibleReciprocal(density);
2368 filteredPixel *= (float4)density;
2371 gamma = PerceptibleReciprocal(gamma);
2374 fp = (CLPixelType) ( ClampToQuantum(gamma*filteredPixel.x)
2375 , ClampToQuantum(gamma*filteredPixel.y)
2376 , ClampToQuantum(gamma*filteredPixel.z)
2377 , ClampToQuantum(filteredPixel.w));
2379 filteredImage[y*filteredColumns+chunkStartX+itemID] = fp;
2384 } // end of chunking loop
2391 __kernel __attribute__((reqd_work_group_size(256, 1, 1)))
2392 void ResizeHorizontalFilterSinc(const __global CLPixelType* inputImage, const unsigned int inputColumns, const unsigned int inputRows, const unsigned int matte
2393 , const float xFactor, __global CLPixelType* filteredImage, const unsigned int filteredColumns, const unsigned int filteredRows
2394 , const int resizeFilterType, const int resizeWindowType
2395 , const __global float* resizeFilterCubicCoefficients
2396 , const float resizeFilterScale, const float resizeFilterSupport, const float resizeFilterWindowSupport, const float resizeFilterBlur
2397 , __local CLPixelType* inputImageCache, const int numCachedPixels, const unsigned int pixelPerWorkgroup, const unsigned int pixelChunkSize
2398 , __local float4* outputPixelCache, __local float* densityCache, __local float* gammaCache) {
2400 ResizeHorizontalFilter(inputImage,inputColumns,inputRows,matte
2401 ,xFactor, filteredImage, filteredColumns, filteredRows
2402 ,SincWeightingFunction, SincWeightingFunction
2403 ,resizeFilterCubicCoefficients
2404 ,resizeFilterScale, resizeFilterSupport, resizeFilterWindowSupport, resizeFilterBlur
2405 ,inputImageCache, numCachedPixels, pixelPerWorkgroup, pixelChunkSize
2406 ,outputPixelCache, densityCache, gammaCache);
2413 __kernel __attribute__((reqd_work_group_size(1, 256, 1)))
2414 void ResizeVerticalFilter(const __global CLPixelType* inputImage, const unsigned int inputColumns, const unsigned int inputRows, const unsigned int matte
2415 , const float yFactor, __global CLPixelType* filteredImage, const unsigned int filteredColumns, const unsigned int filteredRows
2416 , const int resizeFilterType, const int resizeWindowType
2417 , const __global float* resizeFilterCubicCoefficients
2418 , const float resizeFilterScale, const float resizeFilterSupport, const float resizeFilterWindowSupport, const float resizeFilterBlur
2419 , __local CLPixelType* inputImageCache, const int numCachedPixels, const unsigned int pixelPerWorkgroup, const unsigned int pixelChunkSize
2420 , __local float4* outputPixelCache, __local float* densityCache, __local float* gammaCache) {
2423 // calculate the range of resized image pixels computed by this workgroup
2424 const unsigned int startY = get_group_id(1)*pixelPerWorkgroup;
2425 const unsigned int stopY = min(startY + pixelPerWorkgroup,filteredRows);
2426 const unsigned int actualNumPixelToCompute = stopY - startY;
2428 // calculate the range of input image pixels to cache
2429 float scale = max(1.0f/yFactor+MagickEpsilon ,1.0f);
2430 const float support = max(scale*resizeFilterSupport,0.5f);
2431 scale = PerceptibleReciprocal(scale);
2433 const int cacheRangeStartY = max((int)((startY+0.5f)/yFactor+MagickEpsilon-support+0.5f),(int)(0));
2434 const int cacheRangeEndY = min((int)(cacheRangeStartY + numCachedPixels), (int)inputRows);
2436 // cache the input pixels into local memory
2437 const unsigned int x = get_global_id(0);
2438 event_t e = async_work_group_strided_copy(inputImageCache, inputImage+cacheRangeStartY*inputColumns+x, cacheRangeEndY-cacheRangeStartY, inputColumns, 0);
2439 wait_group_events(1,&e);
2441 unsigned int totalNumChunks = (actualNumPixelToCompute+pixelChunkSize-1)/pixelChunkSize;
2442 for (unsigned int chunk = 0; chunk < totalNumChunks; chunk++)
2445 const unsigned int chunkStartY = startY + chunk*pixelChunkSize;
2446 const unsigned int chunkStopY = min(chunkStartY + pixelChunkSize, stopY);
2447 const unsigned int actualNumPixelInThisChunk = chunkStopY - chunkStartY;
2449 // determine which resized pixel computed by this workitem
2450 const unsigned int itemID = get_local_id(1);
2451 const unsigned int numItems = getNumWorkItemsPerPixel(actualNumPixelInThisChunk, get_local_size(1));
2453 const int pixelIndex = pixelToCompute(itemID, actualNumPixelInThisChunk, get_local_size(1));
2455 float4 filteredPixel = (float4)0.0f;
2456 float density = 0.0f;
2458 // -1 means this workitem doesn't participate in the computation
2459 if (pixelIndex != -1) {
2461 // x coordinated of the resized pixel computed by this workitem
2462 const int y = chunkStartY + pixelIndex;
2464 // calculate how many steps required for this pixel
2465 const float bisect = (y+0.5)/yFactor+MagickEpsilon;
2466 const unsigned int start = (unsigned int)max(bisect-support+0.5f,0.0f);
2467 const unsigned int stop = (unsigned int)min(bisect+support+0.5f,(float)inputRows);
2468 const unsigned int n = stop - start;
2470 // calculate how many steps this workitem will contribute
2471 unsigned int numStepsPerWorkItem = n / numItems;
2472 numStepsPerWorkItem += ((numItems*numStepsPerWorkItem)==n?0:1);
2474 const unsigned int startStep = (itemID%numItems)*numStepsPerWorkItem;
2475 if (startStep < n) {
2476 const unsigned int stopStep = min(startStep+numStepsPerWorkItem, n);
2478 unsigned int cacheIndex = start+startStep-cacheRangeStartY;
2481 for (unsigned int i = startStep; i < stopStep; i++,cacheIndex++) {
2482 float4 cp = convert_float4(inputImageCache[cacheIndex]);
2484 float weight = getResizeFilterWeight(resizeFilterCubicCoefficients,(ResizeWeightingFunctionType)resizeFilterType
2485 , (ResizeWeightingFunctionType)resizeWindowType
2486 , resizeFilterScale, resizeFilterWindowSupport, resizeFilterBlur,scale*(start+i-bisect+0.5));
2488 filteredPixel += ((float4)weight)*cp;
2495 for (unsigned int i = startStep; i < stopStep; i++,cacheIndex++) {
2496 CLPixelType p = inputImageCache[cacheIndex];
2498 float weight = getResizeFilterWeight(resizeFilterCubicCoefficients,(ResizeWeightingFunctionType)resizeFilterType
2499 , (ResizeWeightingFunctionType)resizeWindowType
2500 , resizeFilterScale, resizeFilterWindowSupport, resizeFilterBlur,scale*(start+i-bisect+0.5));
2502 float alpha = weight * QuantumScale * GetPixelAlpha(p);
2503 float4 cp = convert_float4(p);
2505 filteredPixel.x += alpha * cp.x;
2506 filteredPixel.y += alpha * cp.y;
2507 filteredPixel.z += alpha * cp.z;
2508 filteredPixel.w += weight * cp.w;
2517 // initialize the accumulators to zero
2518 if (itemID < actualNumPixelInThisChunk) {
2519 outputPixelCache[itemID] = (float4)0.0f;
2520 densityCache[itemID] = 0.0f;
2522 gammaCache[itemID] = 0.0f;
2524 barrier(CLK_LOCAL_MEM_FENCE);
2526 // accumulatte the filtered pixel value and the density
2527 for (unsigned int i = 0; i < numItems; i++) {
2528 if (pixelIndex != -1) {
2529 if (itemID%numItems == i) {
2530 outputPixelCache[pixelIndex]+=filteredPixel;
2531 densityCache[pixelIndex]+=density;
2533 gammaCache[pixelIndex]+=gamma;
2537 barrier(CLK_LOCAL_MEM_FENCE);
2540 if (itemID < actualNumPixelInThisChunk) {
2542 float density = densityCache[itemID];
2543 float4 filteredPixel = outputPixelCache[itemID];
2544 if (density!= 0.0f && density != 1.0)
2546 density = PerceptibleReciprocal(density);
2547 filteredPixel *= (float4)density;
2549 filteredImage[(chunkStartY+itemID)*filteredColumns+x] = (CLPixelType) (ClampToQuantum(filteredPixel.x)
2550 , ClampToQuantum(filteredPixel.y)
2551 , ClampToQuantum(filteredPixel.z)
2552 , ClampToQuantum(filteredPixel.w));
2555 float density = densityCache[itemID];
2556 float gamma = gammaCache[itemID];
2557 float4 filteredPixel = outputPixelCache[itemID];
2559 if (density!= 0.0f && density != 1.0) {
2560 density = PerceptibleReciprocal(density);
2561 filteredPixel *= (float4)density;
2564 gamma = PerceptibleReciprocal(gamma);
2567 fp = (CLPixelType) ( ClampToQuantum(gamma*filteredPixel.x)
2568 , ClampToQuantum(gamma*filteredPixel.y)
2569 , ClampToQuantum(gamma*filteredPixel.z)
2570 , ClampToQuantum(filteredPixel.w));
2572 filteredImage[(chunkStartY+itemID)*filteredColumns+x] = fp;
2577 } // end of chunking loop
2584 __kernel __attribute__((reqd_work_group_size(1, 256, 1)))
2585 void ResizeVerticalFilterSinc(const __global CLPixelType* inputImage, const unsigned int inputColumns, const unsigned int inputRows, const unsigned int matte
2586 , const float yFactor, __global CLPixelType* filteredImage, const unsigned int filteredColumns, const unsigned int filteredRows
2587 , const int resizeFilterType, const int resizeWindowType
2588 , const __global float* resizeFilterCubicCoefficients
2589 , const float resizeFilterScale, const float resizeFilterSupport, const float resizeFilterWindowSupport, const float resizeFilterBlur
2590 , __local CLPixelType* inputImageCache, const int numCachedPixels, const unsigned int pixelPerWorkgroup, const unsigned int pixelChunkSize
2591 , __local float4* outputPixelCache, __local float* densityCache, __local float* gammaCache) {
2592 ResizeVerticalFilter(inputImage,inputColumns,inputRows,matte
2593 ,yFactor,filteredImage,filteredColumns,filteredRows
2594 ,SincWeightingFunction, SincWeightingFunction
2595 ,resizeFilterCubicCoefficients
2596 ,resizeFilterScale,resizeFilterSupport,resizeFilterWindowSupport,resizeFilterBlur
2597 ,inputImageCache,numCachedPixels,pixelPerWorkgroup,pixelChunkSize
2598 ,outputPixelCache,densityCache,gammaCache);
2604 inline float GetPseudoRandomValue(uint4* seed, const float normalizeRand) {
2607 unsigned int alpha = (unsigned int) (s.y ^ (s.y << 11));
2611 s.x = (s.x ^ (s.x >> 19)) ^ (alpha ^ (alpha >> 8));
2612 } while (s.x == ~0UL);
2614 return (normalizeRand*s.x);
2617 __kernel void randomNumberGeneratorKernel(__global uint* seeds, const float normalizeRand
2618 , __global float* randomNumbers, const uint init
2619 ,const uint numRandomNumbers) {
2621 unsigned int id = get_global_id(0);
2622 unsigned int seed[4];
2625 seed[0] = seeds[id*4];
2626 seed[1] = 0x50a7f451;
2627 seed[2] = 0x5365417e;
2628 seed[3] = 0xc3a4171a;
2631 seed[0] = seeds[id*4];
2632 seed[1] = seeds[id*4+1];
2633 seed[2] = seeds[id*4+2];
2634 seed[3] = seeds[id*4+3];
2637 unsigned int numRandomNumbersPerItem = (numRandomNumbers+get_global_size(0)-1)/get_global_size(0);
2638 for (unsigned int i = 0; i < numRandomNumbersPerItem; i++) {
2641 unsigned int alpha=(unsigned int) (seed[1] ^ (seed[1] << 11));
2645 seed[0]=(seed[0] ^ (seed[0] >> 19)) ^ (alpha ^ (alpha >> 8));
2646 } while (seed[0] == ~0UL);
2647 unsigned int pos = (get_group_id(0)*get_local_size(0)*numRandomNumbersPerItem)
2648 + get_local_size(0) * i + get_local_id(0);
2650 if (pos >= numRandomNumbers)
2652 randomNumbers[pos] = normalizeRand*seed[0];
2655 /* save the seeds for the time*/
2656 seeds[id*4] = seed[0];
2657 seeds[id*4+1] = seed[1];
2658 seeds[id*4+2] = seed[2];
2659 seeds[id*4+3] = seed[3];
2672 MultiplicativeGaussianNoise,
2680 const global float* rns;
2684 float ReadPseudoRandomValue(RandomNumbers* r) {
2691 OPENCL_DEFINE(SigmaUniform, (attenuate*0.015625f))
2692 OPENCL_DEFINE(SigmaGaussian,(attenuate*0.015625f))
2693 OPENCL_DEFINE(SigmaImpulse, (attenuate*0.1f))
2694 OPENCL_DEFINE(SigmaLaplacian, (attenuate*0.0390625f))
2695 OPENCL_DEFINE(SigmaMultiplicativeGaussian, (attenuate*0.5f))
2696 OPENCL_DEFINE(SigmaPoisson, (attenuate*12.5f))
2697 OPENCL_DEFINE(SigmaRandom, (attenuate))
2698 OPENCL_DEFINE(TauGaussian, (attenuate*0.078125f))
2701 float GenerateDifferentialNoise(RandomNumbers* r, CLQuantum pixel, NoiseType noise_type, float attenuate) {
2710 alpha=ReadPseudoRandomValue(r);
2711 switch(noise_type) {
2715 noise=(pixel+QuantumRange*SigmaUniform*(alpha-0.5f));
2726 beta=ReadPseudoRandomValue(r);
2727 gamma=sqrt(-2.0f*log(alpha));
2728 sigma=gamma*cospi((2.0f*beta));
2729 tau=gamma*sinpi((2.0f*beta));
2730 noise=(float)(pixel+sqrt((float) pixel)*SigmaGaussian*sigma+
2731 QuantumRange*TauGaussian*tau);
2738 if (alpha < (SigmaImpulse/2.0f))
2741 if (alpha >= (1.0f-(SigmaImpulse/2.0f)))
2742 noise=(float)QuantumRange;
2747 case LaplacianNoise:
2751 if (alpha <= MagickEpsilon)
2752 noise=(float) (pixel-QuantumRange);
2754 noise=(float) (pixel+QuantumRange*SigmaLaplacian*log(2.0f*alpha)+
2759 if (beta <= (0.5f*MagickEpsilon))
2760 noise=(float) (pixel+QuantumRange);
2762 noise=(float) (pixel-QuantumRange*SigmaLaplacian*log(2.0f*beta)+0.5f);
2765 case MultiplicativeGaussianNoise:
2768 if (alpha > MagickEpsilon)
2769 sigma=sqrt(-2.0f*log(alpha));
2770 beta=ReadPseudoRandomValue(r);
2771 noise=(float) (pixel+pixel*SigmaMultiplicativeGaussian*sigma*
2772 cospi((float) (2.0f*beta))/2.0f);
2780 poisson=exp(-SigmaPoisson*QuantumScale*pixel);
2781 for (i=0; alpha > poisson; i++)
2783 beta=ReadPseudoRandomValue(r);
2786 noise=(float) (QuantumRange*i/SigmaPoisson);
2791 noise=(float) (QuantumRange*SigmaRandom*alpha);
2800 void AddNoiseImage(const __global CLPixelType* inputImage, __global CLPixelType* filteredImage
2801 ,const unsigned int inputColumns, const unsigned int inputRows
2802 ,const ChannelType channel
2803 ,const NoiseType noise_type, const float attenuate
2804 ,const __global float* randomNumbers, const unsigned int numRandomNumbersPerPixel
2805 ,const unsigned int rowOffset) {
2807 unsigned int x = get_global_id(0);
2808 unsigned int y = get_global_id(1) + rowOffset;
2810 r.rns = randomNumbers + (get_global_id(1) * inputColumns + get_global_id(0))*numRandomNumbersPerPixel;
2812 CLPixelType p = inputImage[y*inputColumns+x];
2813 CLPixelType q = filteredImage[y*inputColumns+x];
2815 if ((channel&RedChannel)!=0) {
2816 setRed(&q,ClampToQuantum(GenerateDifferentialNoise(&r,getRed(p),noise_type,attenuate)));
2819 if ((channel&GreenChannel)!=0) {
2820 setGreen(&q,ClampToQuantum(GenerateDifferentialNoise(&r,getGreen(p),noise_type,attenuate)));
2823 if ((channel&BlueChannel)!=0) {
2824 setBlue(&q,ClampToQuantum(GenerateDifferentialNoise(&r,getBlue(p),noise_type,attenuate)));
2827 if ((channel & OpacityChannel) != 0) {
2828 setOpacity(&q,ClampToQuantum(GenerateDifferentialNoise(&r,getOpacity(p),noise_type,attenuate)));
2831 filteredImage[y*inputColumns+x] = q;
2838 void RandomImage(__global CLPixelType* inputImage,
2839 const uint imageColumns, const uint imageRows,
2840 __global uint* seeds,
2841 const float randNormNumerator,
2842 const uint randNormDenominator) {
2844 unsigned int numGenerators = get_global_size(0);
2845 unsigned numRandPixelsPerWorkItem = ((imageColumns*imageRows) + (numGenerators-1))
2849 s.x = seeds[get_global_id(0)*4];
2850 s.y = seeds[get_global_id(0)*4+1];
2851 s.z = seeds[get_global_id(0)*4+2];
2852 s.w = seeds[get_global_id(0)*4+3];
2854 unsigned int offset = get_group_id(0) * get_local_size(0) * numRandPixelsPerWorkItem;
2855 for (unsigned int n = 0; n < numRandPixelsPerWorkItem; n++)
2857 int i = offset + n*get_local_size(0) + get_local_id(0);
2858 if (i >= imageColumns*imageRows)
2861 float rand = GetPseudoRandomValue(&s,randNormNumerator/randNormDenominator);
2862 CLQuantum v = ClampToQuantum(QuantumRange*rand);
2873 seeds[get_global_id(0)*4] = s.x;
2874 seeds[get_global_id(0)*4+1] = s.y;
2875 seeds[get_global_id(0)*4+2] = s.z;
2876 seeds[get_global_id(0)*4+3] = s.w;
2882 void MotionBlur(const __global CLPixelType *input, __global CLPixelType *output,
2883 const unsigned int imageWidth, const unsigned int imageHeight,
2884 const __global float *filter, const unsigned int width, const __global int2* offset,
2886 const ChannelType channel, const unsigned int matte) {
2889 currentPixel.x = get_global_id(0);
2890 currentPixel.y = get_global_id(1);
2892 if (currentPixel.x >= imageWidth
2893 || currentPixel.y >= imageHeight)
2897 pixel.x = (float)bias.x;
2898 pixel.y = (float)bias.y;
2899 pixel.z = (float)bias.z;
2900 pixel.w = (float)bias.w;
2902 if (((channel & OpacityChannel) == 0) || (matte == 0)) {
2904 for (int i = 0; i < width; i++) {
2905 // only support EdgeVirtualPixelMethod through ClampToCanvas
2906 // TODO: implement other virtual pixel method
2907 int2 samplePixel = currentPixel + offset[i];
2908 samplePixel.x = ClampToCanvas(samplePixel.x, imageWidth);
2909 samplePixel.y = ClampToCanvas(samplePixel.y, imageHeight);
2910 CLPixelType samplePixelValue = input[ samplePixel.y * imageWidth + samplePixel.x];
2912 pixel.x += (filter[i] * (float)samplePixelValue.x);
2913 pixel.y += (filter[i] * (float)samplePixelValue.y);
2914 pixel.z += (filter[i] * (float)samplePixelValue.z);
2915 pixel.w += (filter[i] * (float)samplePixelValue.w);
2918 CLPixelType outputPixel;
2919 outputPixel.x = ClampToQuantum(pixel.x);
2920 outputPixel.y = ClampToQuantum(pixel.y);
2921 outputPixel.z = ClampToQuantum(pixel.z);
2922 outputPixel.w = ClampToQuantum(pixel.w);
2923 output[currentPixel.y * imageWidth + currentPixel.x] = outputPixel;
2928 for (int i = 0; i < width; i++) {
2929 // only support EdgeVirtualPixelMethod through ClampToCanvas
2930 // TODO: implement other virtual pixel method
2931 int2 samplePixel = currentPixel + offset[i];
2932 samplePixel.x = ClampToCanvas(samplePixel.x, imageWidth);
2933 samplePixel.y = ClampToCanvas(samplePixel.y, imageHeight);
2935 CLPixelType samplePixelValue = input[ samplePixel.y * imageWidth + samplePixel.x];
2937 float alpha = QuantumScale*(QuantumRange-samplePixelValue.w);
2938 float k = filter[i];
2939 pixel.x = pixel.x + k * alpha * samplePixelValue.x;
2940 pixel.y = pixel.y + k * alpha * samplePixelValue.y;
2941 pixel.z = pixel.z + k * alpha * samplePixelValue.z;
2943 pixel.w += k * alpha * samplePixelValue.w;
2947 gamma = PerceptibleReciprocal(gamma);
2948 pixel.xyz = gamma*pixel.xyz;
2950 CLPixelType outputPixel;
2951 outputPixel.x = ClampToQuantum(pixel.x);
2952 outputPixel.y = ClampToQuantum(pixel.y);
2953 outputPixel.z = ClampToQuantum(pixel.z);
2954 outputPixel.w = ClampToQuantum(pixel.w);
2955 output[currentPixel.y * imageWidth + currentPixel.x] = outputPixel;
2963 UndefinedCompositeOp,
2965 ModulusAddCompositeOp,
2969 ChangeMaskCompositeOp,
2971 ColorBurnCompositeOp,
2972 ColorDodgeCompositeOp,
2973 ColorizeCompositeOp,
2974 CopyBlackCompositeOp,
2975 CopyBlueCompositeOp,
2977 CopyCyanCompositeOp,
2978 CopyGreenCompositeOp,
2979 CopyMagentaCompositeOp,
2980 CopyOpacityCompositeOp,
2982 CopyYellowCompositeOp,
2989 DifferenceCompositeOp,
2990 DisplaceCompositeOp,
2991 DissolveCompositeOp,
2992 ExclusionCompositeOp,
2993 HardLightCompositeOp,
2997 LinearLightCompositeOp,
2998 LuminizeCompositeOp,
2999 MinusDstCompositeOp,
3000 ModulateCompositeOp,
3001 MultiplyCompositeOp,
3007 SaturateCompositeOp,
3009 SoftLightCompositeOp,
3015 ModulusSubtractCompositeOp,
3016 ThresholdCompositeOp,
3018 /* These are new operators, added after the above was last sorted.
3019 * The list should be re-sorted only when a new library version is
3022 DivideDstCompositeOp,
3025 PegtopLightCompositeOp,
3026 VividLightCompositeOp,
3027 PinLightCompositeOp,
3028 LinearDodgeCompositeOp,
3029 LinearBurnCompositeOp,
3030 MathematicsCompositeOp,
3031 DivideSrcCompositeOp,
3032 MinusSrcCompositeOp,
3033 DarkenIntensityCompositeOp,
3034 LightenIntensityCompositeOp
3035 } CompositeOperator;
3039 inline float ColorDodge(const float Sca,
3040 const float Sa,const float Dca,const float Da)
3043 Oct 2004 SVG specification.
3045 if ((Sca*Da+Dca*Sa) >= Sa*Da)
3046 return(Sa*Da+Sca*(1.0-Da)+Dca*(1.0-Sa));
3047 return(Dca*Sa*Sa/(Sa-Sca)+Sca*(1.0-Da)+Dca*(1.0-Sa));
3051 New specification, March 2009 SVG specification. This specification was
3052 also wrong of non-overlap cases.
3055 if ((fabs(Sca-Sa) < MagickEpsilon) && (fabs(Dca) < MagickEpsilon))
3056 return(Sca*(1.0-Da));
3057 if (fabs(Sca-Sa) < MagickEpsilon)
3058 return(Sa*Da+Sca*(1.0-Da)+Dca*(1.0-Sa));
3059 return(Sa*MagickMin(Da,Dca*Sa/(Sa-Sca)));
3063 Working from first principles using the original formula:
3065 f(Sc,Dc) = Dc/(1-Sc)
3067 This works correctly! Looks like the 2004 model was right but just
3068 required a extra condition for correct handling.
3072 if ((fabs(Sca-Sa) < MagickEpsilon) && (fabs(Dca) < MagickEpsilon))
3073 return(Sca*(1.0-Da)+Dca*(1.0-Sa));
3074 if (fabs(Sca-Sa) < MagickEpsilon)
3075 return(Sa*Da+Sca*(1.0-Da)+Dca*(1.0-Sa));
3076 return(Dca*Sa*Sa/(Sa-Sca)+Sca*(1.0-Da)+Dca*(1.0-Sa));
3080 inline void CompositeColorDodge(const float4 *p,
3081 const float4 *q,float4 *composite) {
3088 Sa=1.0f-QuantumScale*getOpacityF4(*p); /* simplify and speed up equations */
3089 Da=1.0f-QuantumScale*getOpacityF4(*q);
3090 gamma=RoundToUnity(Sa+Da-Sa*Da); /* over blend, as per SVG doc */
3091 setOpacityF4(composite, QuantumRange*(1.0-gamma));
3092 gamma=QuantumRange/(fabs(gamma) < MagickEpsilon ? MagickEpsilon : gamma);
3093 setRedF4(composite,gamma*ColorDodge(QuantumScale*getRedF4(*p)*Sa,Sa,QuantumScale*
3094 getRedF4(*q)*Da,Da));
3095 setGreenF4(composite,gamma*ColorDodge(QuantumScale*getGreenF4(*p)*Sa,Sa,QuantumScale*
3096 getGreenF4(*q)*Da,Da));
3097 setBlueF4(composite,gamma*ColorDodge(QuantumScale*getBlueF4(*p)*Sa,Sa,QuantumScale*
3098 getBlueF4(*q)*Da,Da));
3103 inline void MagickPixelCompositePlus(const float4 *p,
3104 const float alpha,const float4 *q,
3105 const float beta,float4 *composite)
3114 Add two pixels with the given opacities.
3116 Sa=1.0-QuantumScale*alpha;
3117 Da=1.0-QuantumScale*beta;
3118 gamma=RoundToUnity(Sa+Da); /* 'Plus' blending -- not 'Over' blending */
3119 setOpacityF4(composite,(float) QuantumRange*(1.0-gamma));
3120 gamma=PerceptibleReciprocal(gamma);
3121 setRedF4(composite,gamma*(Sa*getRedF4(*p)+Da*getRedF4(*q)));
3122 setGreenF4(composite,gamma*(Sa*getGreenF4(*p)+Da*getGreenF4(*q)));
3123 setBlueF4(composite,gamma*(Sa*getBlueF4(*p)+Da*getBlueF4(*q)));
3128 inline void MagickPixelCompositeBlend(const float4 *p,
3129 const float alpha,const float4 *q,
3130 const float beta,float4 *composite)
3132 MagickPixelCompositePlus(p,(float) (QuantumRange-alpha*
3133 (QuantumRange-getOpacityF4(*p))),q,(float) (QuantumRange-beta*
3134 (QuantumRange-getOpacityF4(*q))),composite);
3140 void Composite(__global CLPixelType *image,
3141 const unsigned int imageWidth,
3142 const unsigned int imageHeight,
3143 const __global CLPixelType *compositeImage,
3144 const unsigned int compositeWidth,
3145 const unsigned int compositeHeight,
3146 const unsigned int compose,
3147 const ChannelType channel,
3148 const unsigned int matte,
3149 const float destination_dissolve,
3150 const float source_dissolve) {
3153 index.x = get_global_id(0);
3154 index.y = get_global_id(1);
3157 if (index.x >= imageWidth
3158 || index.y >= imageHeight) {
3161 const CLPixelType inputPixel = image[index.y*imageWidth+index.x];
3163 setRedF4(&destination,getRed(inputPixel));
3164 setGreenF4(&destination,getGreen(inputPixel));
3165 setBlueF4(&destination,getBlue(inputPixel));
3168 const CLPixelType compositePixel
3169 = compositeImage[index.y*imageWidth+index.x];
3171 setRedF4(&source,getRed(compositePixel));
3172 setGreenF4(&source,getGreen(compositePixel));
3173 setBlueF4(&source,getBlue(compositePixel));
3176 setOpacityF4(&destination,getOpacity(inputPixel));
3177 setOpacityF4(&source,getOpacity(compositePixel));
3180 setOpacityF4(&destination,0.0f);
3181 setOpacityF4(&source,0.0f);
3184 float4 composite=destination;
3186 CompositeOperator op = (CompositeOperator)compose;
3188 case ColorDodgeCompositeOp:
3189 CompositeColorDodge(&source,&destination,&composite);
3191 case BlendCompositeOp:
3192 MagickPixelCompositeBlend(&source,source_dissolve,&destination,
3193 destination_dissolve,&composite);
3196 // unsupported operators
3200 CLPixelType outputPixel;
3201 setRed(&outputPixel, ClampToQuantum(getRedF4(composite)));
3202 setGreen(&outputPixel, ClampToQuantum(getGreenF4(composite)));
3203 setBlue(&outputPixel, ClampToQuantum(getBlueF4(composite)));
3204 setOpacity(&outputPixel, ClampToQuantum(getOpacityF4(composite)));
3205 image[index.y*imageWidth+index.x] = outputPixel;
3211 #endif // MAGICKCORE_OPENCL_SUPPORT
3213 #if defined(__cplusplus) || defined(c_plusplus)
3217 #endif // _MAGICKCORE_ACCELERATE_PRIVATE_H