]> granicus.if.org Git - imagemagick/blob - MagickCore/accelerate.c
The -preview option is an image operator
[imagemagick] / MagickCore / accelerate.c
1 /*
2 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
3 %                                                                             %
4 %                                                                             %
5 %                                                                             %
6 %     AAA     CCCC    CCCC  EEEEE  L      EEEEE  RRRR    AAA   TTTTT  EEEEE   %
7 %    A   A   C       C      E      L      E      R   R  A   A    T    E       %
8 %    AAAAA   C       C      EEE    L      EEE    RRRR   AAAAA    T    EEE     %
9 %    A   A   C       C      E      L      E      R R    A   A    T    E       %
10 %    A   A    CCCC    CCCC  EEEEE  LLLLL  EEEEE  R  R   A   A    T    EEEEE   %
11 %                                                                             %
12 %                                                                             %
13 %                       MagickCore Acceleration Methods                       %
14 %                                                                             %
15 %                              Software Design                                %
16 %                                  Cristy                                     %
17 %                               SiuChi Chan                                   %
18 %                              Guansong Zhang                                 %
19 %                               January 2010                                  %
20 %                               Dirk Lemstra                                  %
21 %                                April 2016                                   %
22 %                                                                             %
23 %                                                                             %
24 %  Copyright 1999-2017 ImageMagick Studio LLC, a non-profit organization      %
25 %  dedicated to making software imaging solutions freely available.           %
26 %                                                                             %
27 %  You may not use this file except in compliance with the License.  You may  %
28 %  obtain a copy of the License at                                            %
29 %                                                                             %
30 %    https://www.imagemagick.org/script/license.php                           %
31 %                                                                             %
32 %  Unless required by applicable law or agreed to in writing, software        %
33 %  distributed under the License is distributed on an "AS IS" BASIS,          %
34 %  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.   %
35 %  See the License for the specific language governing permissions and        %
36 %  limitations under the License.                                             %
37 %                                                                             %
38 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
39 */
40  
41 /*
42 Include declarations.
43 */
44 #include "MagickCore/studio.h"
45 #include "MagickCore/accelerate-private.h"
46 #include "MagickCore/accelerate-kernels-private.h"
47 #include "MagickCore/artifact.h"
48 #include "MagickCore/cache.h"
49 #include "MagickCore/cache-private.h"
50 #include "MagickCore/cache-view.h"
51 #include "MagickCore/color-private.h"
52 #include "MagickCore/delegate-private.h"
53 #include "MagickCore/enhance.h"
54 #include "MagickCore/exception.h"
55 #include "MagickCore/exception-private.h"
56 #include "MagickCore/gem.h"
57 #include "MagickCore/image.h"
58 #include "MagickCore/image-private.h"
59 #include "MagickCore/linked-list.h"
60 #include "MagickCore/list.h"
61 #include "MagickCore/memory_.h"
62 #include "MagickCore/monitor-private.h"
63 #include "MagickCore/opencl.h"
64 #include "MagickCore/opencl-private.h"
65 #include "MagickCore/option.h"
66 #include "MagickCore/pixel-accessor.h"
67 #include "MagickCore/pixel-private.h"
68 #include "MagickCore/prepress.h"
69 #include "MagickCore/quantize.h"
70 #include "MagickCore/quantum-private.h"
71 #include "MagickCore/random_.h"
72 #include "MagickCore/random-private.h"
73 #include "MagickCore/registry.h"
74 #include "MagickCore/resize.h"
75 #include "MagickCore/resize-private.h"
76 #include "MagickCore/semaphore.h"
77 #include "MagickCore/splay-tree.h"
78 #include "MagickCore/statistic.h"
79 #include "MagickCore/string_.h"
80 #include "MagickCore/string-private.h"
81 #include "MagickCore/token.h"
82
83 #define MAGICK_MAX(x,y) (((x) >= (y))?(x):(y))
84 #define MAGICK_MIN(x,y) (((x) <= (y))?(x):(y))
85
86 #if defined(MAGICKCORE_OPENCL_SUPPORT)
87
88 /*
89   Define declarations.
90 */
91 #define ALIGNED(pointer,type) ((((size_t)(pointer)) & (sizeof(type)-1)) == 0)
92
93 /*
94   Static declarations.
95 */
96 static const ResizeWeightingFunctionType supportedResizeWeighting[] =
97 {
98   BoxWeightingFunction,
99   TriangleWeightingFunction,
100   HannWeightingFunction,
101   HammingWeightingFunction,
102   BlackmanWeightingFunction,
103   CubicBCWeightingFunction,
104   SincWeightingFunction,
105   SincFastWeightingFunction,
106   LastWeightingFunction
107 };
108
109 /*
110   Helper functions.
111 */
112 static MagickBooleanType checkAccelerateCondition(const Image* image)
113 {
114   /* check if the image's colorspace is supported */
115   if (image->colorspace != RGBColorspace &&
116       image->colorspace != sRGBColorspace &&
117       image->colorspace != GRAYColorspace)
118     return(MagickFalse);
119
120   /* check if the virtual pixel method is compatible with the OpenCL implementation */
121   if ((GetImageVirtualPixelMethod(image) != UndefinedVirtualPixelMethod) &&
122       (GetImageVirtualPixelMethod(image) != EdgeVirtualPixelMethod))
123     return(MagickFalse);
124
125   /* check if the image has read / write mask */
126   if (image->read_mask != MagickFalse || image->write_mask != MagickFalse)
127     return(MagickFalse);
128
129   if (image->number_channels > 4)
130     return(MagickFalse);
131
132   /* check if pixel order is R */
133   if (GetPixelChannelOffset(image,RedPixelChannel) != 0)
134     return(MagickFalse);
135
136   if (image->number_channels == 1)
137     return(MagickTrue);
138
139   /* check if pixel order is RA */
140   if ((image->number_channels == 2) &&
141       (GetPixelChannelOffset(image,AlphaPixelChannel) == 1))
142     return(MagickTrue);
143
144   if (image->number_channels == 2)
145     return(MagickFalse);
146
147   /* check if pixel order is RGB */
148   if ((GetPixelChannelOffset(image,GreenPixelChannel) != 1) ||
149       (GetPixelChannelOffset(image,BluePixelChannel) != 2))
150     return(MagickFalse);
151
152   if (image->number_channels == 3)
153     return(MagickTrue);
154
155   /* check if pixel order is RGBA */
156   if (GetPixelChannelOffset(image,AlphaPixelChannel) != 3)
157     return(MagickFalse);
158
159   return(MagickTrue);
160 }
161
162 static MagickBooleanType checkAccelerateConditionRGBA(const Image* image)
163 {
164   if (checkAccelerateCondition(image) == MagickFalse)
165     return(MagickFalse);
166
167   /* the order will be RGBA if the image has 4 channels */
168   if (image->number_channels != 4)
169     return(MagickFalse);
170
171   if ((GetPixelRedTraits(image) == UndefinedPixelTrait) ||
172       (GetPixelGreenTraits(image) == UndefinedPixelTrait) ||
173       (GetPixelBlueTraits(image) == UndefinedPixelTrait) ||
174       (GetPixelAlphaTraits(image) == UndefinedPixelTrait))
175     return(MagickFalse);
176
177   return(MagickTrue);
178 }
179
180 static MagickBooleanType checkPixelIntensity(const Image *image,
181   const PixelIntensityMethod method)
182 {
183   /* EncodePixelGamma and DecodePixelGamma are not supported */
184   if ((method == Rec601LumaPixelIntensityMethod) ||
185       (method == Rec709LumaPixelIntensityMethod))
186     {
187       if (image->colorspace == RGBColorspace)
188         return(MagickFalse);
189     }
190
191   if ((method == Rec601LuminancePixelIntensityMethod) ||
192       (method == Rec709LuminancePixelIntensityMethod))
193     {
194       if (image->colorspace == sRGBColorspace)
195         return(MagickFalse);
196     }
197
198   return(MagickTrue);
199 }
200
201 static MagickBooleanType checkHistogramCondition(const Image *image,
202   const PixelIntensityMethod method)
203 {
204   /* ensure this is the only pass get in for now. */
205   if ((image->channel_mask & SyncChannels) == 0)
206     return MagickFalse;
207
208   return(checkPixelIntensity(image,method));
209 }
210
211 static MagickCLEnv getOpenCLEnvironment(ExceptionInfo* exception)
212 {
213   MagickCLEnv
214     clEnv;
215
216   clEnv=GetCurrentOpenCLEnv();
217   if (clEnv == (MagickCLEnv) NULL)
218     return((MagickCLEnv) NULL);
219
220   if (clEnv->enabled == MagickFalse)
221     return((MagickCLEnv) NULL);
222
223   if (InitializeOpenCL(clEnv,exception) == MagickFalse)
224     return((MagickCLEnv) NULL);
225
226   return(clEnv);
227 }
228
229 static Image *cloneImage(const Image* image,ExceptionInfo *exception)
230 {
231   Image
232     *clone;
233
234   if (((image->channel_mask & RedChannel) != 0) &&
235       ((image->channel_mask & GreenChannel) != 0) &&
236       ((image->channel_mask & BlueChannel) != 0) &&
237       ((image->channel_mask & AlphaChannel) != 0))
238     clone=CloneImage(image,image->columns,image->rows,MagickTrue,exception);
239   else
240     {
241       clone=CloneImage(image,0,0,MagickTrue,exception);
242       if (clone != (Image *) NULL)
243         SyncImagePixelCache(clone,exception);
244     }
245   return(clone);
246 }
247
248 /* pad the global workgroup size to the next multiple of 
249    the local workgroup size */
250 inline static unsigned int padGlobalWorkgroupSizeToLocalWorkgroupSize(
251   const unsigned int orgGlobalSize,const unsigned int localGroupSize) 
252 {
253   return ((orgGlobalSize+(localGroupSize-1))/localGroupSize*localGroupSize);
254 }
255
256 static cl_mem createKernelInfo(MagickCLDevice device,const double radius,
257   const double sigma,cl_uint *width,ExceptionInfo *exception)
258 {
259   char
260     geometry[MagickPathExtent];
261
262   cl_int
263     status;
264
265   cl_mem
266     imageKernelBuffer;
267
268   float
269     *kernelBufferPtr;
270
271   KernelInfo
272     *kernel;
273
274   ssize_t
275     i;
276
277   (void) FormatLocaleString(geometry,MagickPathExtent,
278     "blur:%.20gx%.20g;blur:%.20gx%.20g+90",radius,sigma,radius,sigma);
279   kernel=AcquireKernelInfo(geometry,exception);
280   if (kernel == (KernelInfo *) NULL)
281   {
282     (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
283       ResourceLimitWarning,"AcquireKernelInfo failed.",".");
284     return((cl_mem) NULL);
285   }
286   kernelBufferPtr=(float *)AcquireMagickMemory(kernel->width*
287     sizeof(*kernelBufferPtr));
288   for (i = 0; i < (ssize_t) kernel->width; i++)
289     kernelBufferPtr[i] = (float)kernel->values[i];
290   imageKernelBuffer=CreateOpenCLBuffer(device,CL_MEM_COPY_HOST_PTR |
291     CL_MEM_READ_ONLY,kernel->width*sizeof(*kernelBufferPtr),kernelBufferPtr);
292   *width=(cl_uint) kernel->width;
293   kernelBufferPtr=(float *) RelinquishMagickMemory(kernelBufferPtr);
294   kernel=DestroyKernelInfo(kernel);
295   if (imageKernelBuffer == (cl_mem) NULL)
296     (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
297       ResourceLimitWarning,"CreateOpenCLBuffer failed.",".");
298   return(imageKernelBuffer);
299 }
300
301 static MagickBooleanType LaunchHistogramKernel(MagickCLEnv clEnv,
302   MagickCLDevice device,cl_command_queue queue,cl_mem imageBuffer,
303   cl_mem histogramBuffer,Image *image,const ChannelType channel,
304   ExceptionInfo *exception)
305 {
306   MagickBooleanType
307     outputReady;
308
309   cl_int
310     clStatus;
311
312   cl_kernel
313     histogramKernel;
314
315   cl_event
316     event;
317
318   cl_uint
319     colorspace,
320     method;
321
322   register ssize_t
323     i;
324
325   size_t
326     global_work_size[2];
327
328   histogramKernel = NULL; 
329
330   outputReady = MagickFalse;
331   colorspace = image->colorspace;
332   method = image->intensity;
333
334   /* get the OpenCL kernel */
335   histogramKernel = AcquireOpenCLKernel(device,"Histogram");
336   if (histogramKernel == NULL)
337   {
338     (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "AcquireOpenCLKernel failed.", ".");
339     goto cleanup;
340   }
341
342   /* set the kernel arguments */
343   i = 0;
344   clStatus=clEnv->library->clSetKernelArg(histogramKernel,i++,sizeof(cl_mem),(void *)&imageBuffer);
345   clStatus|=clEnv->library->clSetKernelArg(histogramKernel,i++,sizeof(ChannelType),&channel);
346   clStatus|=clEnv->library->clSetKernelArg(histogramKernel,i++,sizeof(cl_uint),&colorspace);
347   clStatus|=clEnv->library->clSetKernelArg(histogramKernel,i++,sizeof(cl_uint),&method);
348   clStatus|=clEnv->library->clSetKernelArg(histogramKernel,i++,sizeof(cl_mem),(void *)&histogramBuffer);
349   if (clStatus != CL_SUCCESS)
350   {
351     (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", ".");
352     goto cleanup;
353   }
354
355   /* launch the kernel */
356   global_work_size[0] = image->columns;
357   global_work_size[1] = image->rows;
358
359   clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, histogramKernel, 2, NULL, global_work_size, NULL, 0, NULL, &event);
360
361   if (clStatus != CL_SUCCESS)
362   {
363     (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", ".");
364     goto cleanup;
365   }
366   RecordProfileData(device,histogramKernel,event);
367
368   outputReady = MagickTrue;
369
370 cleanup:
371  
372   if (histogramKernel!=NULL)
373     ReleaseOpenCLKernel(histogramKernel);
374
375   return(outputReady);
376 }
377
378 /*
379 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
380 %                                                                             %
381 %                                                                             %
382 %                                                                             %
383 %     A c c e l e r a t e A d d N o i s e I m a g e                           %
384 %                                                                             %
385 %                                                                             %
386 %                                                                             %
387 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
388 */
389
390 static Image *ComputeAddNoiseImage(const Image *image,MagickCLEnv clEnv,
391   const NoiseType noise_type,ExceptionInfo *exception)
392 {
393   cl_command_queue
394     queue;
395
396   cl_float
397     attenuate;
398
399   cl_int
400     status;
401
402   cl_kernel
403     addNoiseKernel;
404
405   cl_mem
406     filteredImageBuffer,
407     imageBuffer;
408
409   cl_uint
410     bufferLength,
411     inputPixelCount,
412     number_channels,
413     numRandomNumberPerPixel,
414     pixelsPerWorkitem,
415     seed0,
416     seed1,
417     workItemCount;
418
419   const char
420     *option;
421
422   const unsigned long
423     *s;
424
425   MagickBooleanType
426     outputReady;
427
428   MagickCLDevice
429     device;
430
431   Image
432     *filteredImage;
433
434   RandomInfo
435     *randomInfo;
436
437   size_t
438     gsize[1],
439     i,
440     lsize[1],
441     numRandPerChannel;
442
443   filteredImage=NULL;
444   addNoiseKernel=NULL;
445   outputReady=MagickFalse;
446
447   device=RequestOpenCLDevice(clEnv);
448   queue=AcquireOpenCLCommandQueue(device);
449   if (queue == (cl_command_queue) NULL)
450     goto cleanup;
451   filteredImage=cloneImage(image,exception);
452   if (filteredImage == (Image *) NULL)
453     goto cleanup;
454   if (filteredImage->number_channels != image->number_channels)
455     goto cleanup;
456   imageBuffer=GetAuthenticOpenCLBuffer(image,device,exception);
457   if (imageBuffer == (cl_mem) NULL)
458     goto cleanup;
459   filteredImageBuffer=GetAuthenticOpenCLBuffer(filteredImage,device,exception);
460   if (filteredImageBuffer == (cl_mem) NULL)
461     goto cleanup;
462
463   /* find out how many random numbers needed by pixel */
464   numRandPerChannel=0;
465   numRandomNumberPerPixel=0;
466   switch (noise_type)
467   {
468     case UniformNoise:
469     case ImpulseNoise:
470     case LaplacianNoise:
471     case RandomNoise:
472     default:
473       numRandPerChannel=1;
474       break;
475     case GaussianNoise:
476     case MultiplicativeGaussianNoise:
477     case PoissonNoise:
478       numRandPerChannel=2;
479       break;
480   };
481   if (GetPixelRedTraits(image) != UndefinedPixelTrait)
482     numRandomNumberPerPixel+=(cl_uint) numRandPerChannel;
483   if (GetPixelGreenTraits(image) != UndefinedPixelTrait)
484     numRandomNumberPerPixel+=(cl_uint) numRandPerChannel;
485   if (GetPixelBlueTraits(image) != UndefinedPixelTrait)
486     numRandomNumberPerPixel+=(cl_uint) numRandPerChannel;
487   if (GetPixelAlphaTraits(image) != UndefinedPixelTrait)
488     numRandomNumberPerPixel+=(cl_uint) numRandPerChannel;
489
490   addNoiseKernel=AcquireOpenCLKernel(device,"AddNoise");
491   if (addNoiseKernel == (cl_kernel) NULL)
492   {
493     (void)OpenCLThrowMagickException(device,exception,GetMagickModule(),
494       ResourceLimitWarning,"AcquireOpenCLKernel failed.",".");
495     goto cleanup;
496   }
497
498   /* 256 work items per group, 2 groups per CU */
499   workItemCount=device->max_compute_units*2*256;
500   inputPixelCount=(cl_int) (image->columns*image->rows);
501   pixelsPerWorkitem=(inputPixelCount+workItemCount-1)/workItemCount;
502   pixelsPerWorkitem=((pixelsPerWorkitem+3)/4)*4;
503   lsize[0]=256;
504   gsize[0]=workItemCount;
505
506   randomInfo=AcquireRandomInfo();
507   s=GetRandomInfoSeed(randomInfo);
508   seed0=s[0];
509   (void) GetPseudoRandomValue(randomInfo);
510   seed1=s[0];
511   randomInfo=DestroyRandomInfo(randomInfo);
512
513   number_channels=(cl_uint) image->number_channels;
514   bufferLength=(cl_uint) (image->columns*image->rows*image->number_channels);
515   attenuate=1.0f;
516   option=GetImageArtifact(image,"attenuate");
517   if (option != (char *) NULL)
518     attenuate=(float)StringToDouble(option,(char **) NULL);
519
520   i=0;
521   status =SetOpenCLKernelArg(addNoiseKernel,i++,sizeof(cl_mem),(void *)&imageBuffer);
522   status|=SetOpenCLKernelArg(addNoiseKernel,i++,sizeof(cl_uint),(void *)&number_channels);
523   status|=SetOpenCLKernelArg(addNoiseKernel,i++,sizeof(ChannelType),(void *)&image->channel_mask);
524   status|=SetOpenCLKernelArg(addNoiseKernel,i++,sizeof(cl_uint),(void *)&bufferLength);
525   status|=SetOpenCLKernelArg(addNoiseKernel,i++,sizeof(cl_uint),(void *)&pixelsPerWorkitem);
526   status|=SetOpenCLKernelArg(addNoiseKernel,i++,sizeof(NoiseType),(void *)&noise_type);
527   status|=SetOpenCLKernelArg(addNoiseKernel,i++,sizeof(cl_float),(void *)&attenuate);
528   status|=SetOpenCLKernelArg(addNoiseKernel,i++,sizeof(cl_uint),(void *)&seed0);
529   status|=SetOpenCLKernelArg(addNoiseKernel,i++,sizeof(cl_uint),(void *)&seed1);
530   status|=SetOpenCLKernelArg(addNoiseKernel,i++,sizeof(cl_uint),(void *)&numRandomNumberPerPixel);
531   status|=SetOpenCLKernelArg(addNoiseKernel,i++,sizeof(cl_mem),(void *)&filteredImageBuffer);
532   if (status != CL_SUCCESS)
533   {
534     (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
535       ResourceLimitWarning,"clSetKernelArg failed.",".");
536     goto cleanup;
537   }
538
539   outputReady=EnqueueOpenCLKernel(queue,addNoiseKernel,1,(const size_t *) NULL,gsize,
540     lsize,image,filteredImage,MagickFalse,exception);
541
542 cleanup:
543
544   if (addNoiseKernel != (cl_kernel) NULL)
545     ReleaseOpenCLKernel(addNoiseKernel);
546   if (queue != (cl_command_queue) NULL)
547     ReleaseOpenCLCommandQueue(device,queue);
548   if (device != (MagickCLDevice) NULL)
549     ReleaseOpenCLDevice(device);
550   if ((outputReady == MagickFalse) && (filteredImage != (Image *) NULL))
551     filteredImage=DestroyImage(filteredImage);
552
553   return(filteredImage);
554 }
555
556 MagickPrivate Image *AccelerateAddNoiseImage(const Image *image,
557   const NoiseType noise_type,ExceptionInfo *exception)
558 {
559   Image
560     *filteredImage;
561
562   MagickCLEnv
563     clEnv;
564
565   assert(image != NULL);
566   assert(exception != (ExceptionInfo *) NULL);
567
568   if (checkAccelerateCondition(image) == MagickFalse)
569     return((Image *) NULL);
570
571   clEnv=getOpenCLEnvironment(exception);
572   if (clEnv == (MagickCLEnv) NULL)
573     return((Image *) NULL);
574
575   filteredImage=ComputeAddNoiseImage(image,clEnv,noise_type,exception);
576   return(filteredImage);
577 }
578
579 /*
580 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
581 %                                                                             %
582 %                                                                             %
583 %                                                                             %
584 %     A c c e l e r a t e B l u r I m a g e                                   %
585 %                                                                             %
586 %                                                                             %
587 %                                                                             %
588 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
589 */
590
591 static Image *ComputeBlurImage(const Image* image,MagickCLEnv clEnv,
592   const double radius,const double sigma,ExceptionInfo *exception)
593 {
594   cl_command_queue
595     queue;
596
597   cl_int
598     status;
599
600   cl_kernel
601     blurColumnKernel,
602     blurRowKernel;
603
604   cl_mem
605     filteredImageBuffer,
606     imageBuffer,
607     imageKernelBuffer,
608     tempImageBuffer;
609
610   cl_uint
611     imageColumns,
612     imageRows,
613     kernelWidth,
614     number_channels;
615
616   Image
617     *filteredImage;
618
619   MagickBooleanType
620     outputReady;
621
622   MagickCLDevice
623     device;
624
625   MagickSizeType
626     length;
627
628   size_t
629     chunkSize=256,
630     gsize[2],
631     i,
632     lsize[2];
633
634   filteredImage=NULL;
635   tempImageBuffer=NULL;
636   imageKernelBuffer=NULL;
637   blurRowKernel=NULL;
638   blurColumnKernel=NULL;
639   outputReady=MagickFalse;
640
641   device=RequestOpenCLDevice(clEnv);
642   queue=AcquireOpenCLCommandQueue(device);
643   filteredImage=cloneImage(image,exception);
644   if (filteredImage == (Image *) NULL)
645     goto cleanup;
646   if (filteredImage->number_channels != image->number_channels)
647     goto cleanup;
648   imageBuffer=GetAuthenticOpenCLBuffer(image,device,exception);
649   if (imageBuffer == (cl_mem) NULL)
650     goto cleanup;
651   filteredImageBuffer=GetAuthenticOpenCLBuffer(filteredImage,device,exception);
652   if (filteredImageBuffer == (cl_mem) NULL)
653     goto cleanup;
654
655   imageKernelBuffer=createKernelInfo(device,radius,sigma,&kernelWidth,
656     exception);
657   if (imageKernelBuffer == (cl_mem) NULL)
658     goto cleanup;
659
660   length=image->columns*image->rows;
661   tempImageBuffer=CreateOpenCLBuffer(device,CL_MEM_READ_WRITE,length*
662     sizeof(cl_float4),(void *) NULL);
663   if (tempImageBuffer == (cl_mem) NULL)
664     goto cleanup;
665
666   blurRowKernel=AcquireOpenCLKernel(device,"BlurRow");
667   if (blurRowKernel == (cl_kernel) NULL)
668   {
669     (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
670       ResourceLimitWarning,"AcquireOpenCLKernel failed.",".");
671     goto cleanup;
672   }
673
674   number_channels=(cl_uint) image->number_channels;
675   imageColumns=(cl_uint) image->columns;
676   imageRows=(cl_uint) image->rows;
677
678   i=0;
679   status =SetOpenCLKernelArg(blurRowKernel,i++,sizeof(cl_mem),(void *)&imageBuffer);
680   status|=SetOpenCLKernelArg(blurRowKernel,i++,sizeof(cl_uint),&number_channels);
681   status|=SetOpenCLKernelArg(blurRowKernel,i++,sizeof(ChannelType),&image->channel_mask);
682   status|=SetOpenCLKernelArg(blurRowKernel,i++,sizeof(cl_mem),(void *)&imageKernelBuffer);
683   status|=SetOpenCLKernelArg(blurRowKernel,i++,sizeof(cl_uint),(void *)&kernelWidth);
684   status|=SetOpenCLKernelArg(blurRowKernel,i++,sizeof(cl_uint),(void *)&imageColumns);
685   status|=SetOpenCLKernelArg(blurRowKernel,i++,sizeof(cl_uint),(void *)&imageRows);
686   status|=SetOpenCLKernelArg(blurRowKernel,i++,sizeof(cl_float4)*(chunkSize+kernelWidth),(void *) NULL);
687   status|=SetOpenCLKernelArg(blurRowKernel,i++,sizeof(cl_mem),(void *)&tempImageBuffer);
688   if (status != CL_SUCCESS)
689   {
690     (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
691       ResourceLimitWarning,"SetOpenCLKernelArg failed.",".");
692     goto cleanup;
693   }
694
695   gsize[0]=chunkSize*((image->columns+chunkSize-1)/chunkSize);
696   gsize[1]=image->rows;
697   lsize[0]=chunkSize;
698   lsize[1]=1;
699
700   outputReady=EnqueueOpenCLKernel(queue,blurRowKernel,2,(size_t *) NULL,gsize,
701     lsize,image,filteredImage,MagickFalse,exception);
702   if (outputReady == MagickFalse)
703     goto cleanup;
704
705   blurColumnKernel=AcquireOpenCLKernel(device,"BlurColumn");
706   if (blurColumnKernel == (cl_kernel) NULL)
707   {
708     (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
709       ResourceLimitWarning,"AcquireOpenCLKernel failed.",".");
710     goto cleanup;
711   }
712
713   i=0;
714   status =SetOpenCLKernelArg(blurColumnKernel,i++,sizeof(cl_mem),(void *)&tempImageBuffer);
715   status|=SetOpenCLKernelArg(blurColumnKernel,i++,sizeof(cl_uint),&number_channels);
716   status|=SetOpenCLKernelArg(blurColumnKernel,i++,sizeof(ChannelType),&image->channel_mask);
717   status|=SetOpenCLKernelArg(blurColumnKernel,i++,sizeof(cl_mem),(void *)&imageKernelBuffer);
718   status|=SetOpenCLKernelArg(blurColumnKernel,i++,sizeof(cl_uint),(void *)&kernelWidth);
719   status|=SetOpenCLKernelArg(blurColumnKernel,i++,sizeof(cl_uint),(void *)&imageColumns);
720   status|=SetOpenCLKernelArg(blurColumnKernel,i++,sizeof(cl_uint),(void *)&imageRows);
721   status|=SetOpenCLKernelArg(blurColumnKernel,i++,sizeof(cl_float4)*(chunkSize+kernelWidth),(void *) NULL);
722   status|=SetOpenCLKernelArg(blurColumnKernel,i++,sizeof(cl_mem),(void *)&filteredImageBuffer);
723   if (status != CL_SUCCESS)
724   {
725     (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
726       ResourceLimitWarning,"SetOpenCLKernelArg failed.",".");
727     goto cleanup;
728   }
729
730   gsize[0]=image->columns;
731   gsize[1]=chunkSize*((image->rows+chunkSize-1)/chunkSize);
732   lsize[0]=1;
733   lsize[1]=chunkSize;
734
735   outputReady=EnqueueOpenCLKernel(queue,blurColumnKernel,2,(size_t *) NULL,gsize,
736     lsize,image,filteredImage,MagickFalse,exception);
737
738 cleanup:
739
740   if (tempImageBuffer != (cl_mem) NULL)
741     ReleaseOpenCLMemObject(tempImageBuffer);
742   if (imageKernelBuffer != (cl_mem) NULL)
743     ReleaseOpenCLMemObject(imageKernelBuffer);
744   if (blurRowKernel != (cl_kernel) NULL)
745     ReleaseOpenCLKernel(blurRowKernel);
746   if (blurColumnKernel != (cl_kernel) NULL)
747     ReleaseOpenCLKernel(blurColumnKernel);
748   if (queue != (cl_command_queue) NULL)
749     ReleaseOpenCLCommandQueue(device,queue);
750   if (device != (MagickCLDevice) NULL)
751     ReleaseOpenCLDevice(device);
752   if ((outputReady == MagickFalse) && (filteredImage != (Image *) NULL))
753     filteredImage=DestroyImage(filteredImage);
754
755   return(filteredImage);
756 }
757
758 MagickPrivate Image* AccelerateBlurImage(const Image *image,
759   const double radius,const double sigma,ExceptionInfo *exception)
760 {
761   Image
762     *filteredImage;
763
764   MagickCLEnv
765     clEnv;
766
767   assert(image != NULL);
768   assert(exception != (ExceptionInfo *) NULL);
769
770   if (checkAccelerateCondition(image) == MagickFalse)
771     return((Image *) NULL);
772
773   clEnv=getOpenCLEnvironment(exception);
774   if (clEnv == (MagickCLEnv) NULL)
775     return((Image *) NULL);
776
777   filteredImage=ComputeBlurImage(image,clEnv,radius,sigma,exception);
778   return(filteredImage);
779 }
780
781 /*
782 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
783 %                                                                             %
784 %                                                                             %
785 %                                                                             %
786 %     A c c e l e r a t e C o n t r a s t I m a g e                           %
787 %                                                                             %
788 %                                                                             %
789 %                                                                             %
790 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
791 */
792
793 static MagickBooleanType ComputeContrastImage(Image *image,MagickCLEnv clEnv,
794   const MagickBooleanType sharpen,ExceptionInfo *exception)
795 {
796   cl_command_queue
797     queue;
798
799   cl_int
800     status,
801     sign;
802
803   cl_kernel
804     contrastKernel;
805
806   cl_event
807     event;
808
809   cl_mem
810     imageBuffer;
811
812   cl_uint
813     number_channels;
814
815   MagickBooleanType
816     outputReady;
817
818   MagickCLDevice
819     device;
820
821   size_t
822     gsize[2],
823     i;
824
825   contrastKernel=NULL;
826   outputReady=MagickFalse;
827
828   device=RequestOpenCLDevice(clEnv);
829   queue=AcquireOpenCLCommandQueue(device);
830   imageBuffer=GetAuthenticOpenCLBuffer(image,device,exception);
831   if (imageBuffer == (cl_mem) NULL)
832     goto cleanup;
833
834   contrastKernel=AcquireOpenCLKernel(device,"Contrast");
835   if (contrastKernel == (cl_kernel) NULL)
836   {
837     (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
838       ResourceLimitWarning,"AcquireOpenCLKernel failed.",".");
839     goto cleanup;
840   }
841
842   number_channels=(cl_uint) image->number_channels;
843   sign=sharpen != MagickFalse ? 1 : -1;
844
845   i=0;
846   status =SetOpenCLKernelArg(contrastKernel,i++,sizeof(cl_mem),(void *)&imageBuffer);
847   status|=SetOpenCLKernelArg(contrastKernel,i++,sizeof(cl_uint),&number_channels);
848   status|=SetOpenCLKernelArg(contrastKernel,i++,sizeof(cl_int),&sign);
849   if (status != CL_SUCCESS)
850   {
851     (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
852       ResourceLimitWarning,"SetOpenCLKernelArg failed.",".");
853     goto cleanup;
854   }
855
856   gsize[0]=image->columns;
857   gsize[1]=image->rows;
858
859   outputReady=EnqueueOpenCLKernel(queue,contrastKernel,2,(const size_t *) NULL,
860     gsize,(const size_t *) NULL,image,(Image *) NULL,MagickFalse,exception);
861
862 cleanup:
863
864   if (contrastKernel != (cl_kernel) NULL)
865     ReleaseOpenCLKernel(contrastKernel);
866   if (queue != (cl_command_queue) NULL)
867     ReleaseOpenCLCommandQueue(device,queue);
868   if (device != (MagickCLDevice) NULL)
869     ReleaseOpenCLDevice(device);
870
871   return(outputReady);
872 }
873
874 MagickPrivate MagickBooleanType AccelerateContrastImage(Image *image,
875   const MagickBooleanType sharpen,ExceptionInfo *exception)
876 {
877   MagickBooleanType
878     status;
879
880   MagickCLEnv
881     clEnv;
882
883   assert(image != NULL);
884   assert(exception != (ExceptionInfo *) NULL);
885
886   if (checkAccelerateCondition(image) == MagickFalse)
887     return(MagickFalse);
888
889   clEnv=getOpenCLEnvironment(exception);
890   if (clEnv == (MagickCLEnv) NULL)
891     return(MagickFalse);
892
893   status=ComputeContrastImage(image,clEnv,sharpen,exception);
894   return(status);
895 }
896
897 /*
898 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
899 %                                                                             %
900 %                                                                             %
901 %                                                                             %
902 %     A c c e l e r a t e C o n t r a s t S t r e t c h I m a g e             %
903 %                                                                             %
904 %                                                                             %
905 %                                                                             %
906 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
907 */
908
909 static MagickBooleanType ComputeContrastStretchImage(Image *image,
910   MagickCLEnv clEnv,const double black_point,const double white_point,
911   ExceptionInfo *exception)
912 {
913 #define ContrastStretchImageTag  "ContrastStretch/Image"
914 #define MaxRange(color)  ((cl_float) ScaleQuantumToMap((Quantum) (color)))
915
916   CacheView
917     *image_view;
918
919   cl_command_queue
920     queue;
921
922   cl_int
923     clStatus;
924
925   cl_mem_flags
926     mem_flags;
927
928   cl_mem
929     histogramBuffer,
930     imageBuffer,
931     stretchMapBuffer;
932
933   cl_kernel
934     histogramKernel,
935     stretchKernel;
936
937   cl_event
938     event;
939
940   cl_uint4
941     *histogram;
942
943   double
944     intensity;
945
946   cl_float4
947     black,
948     white;
949
950   MagickBooleanType
951     outputReady,
952     status;
953
954   MagickCLDevice
955     device;
956
957   MagickSizeType
958     length;
959
960   PixelPacket
961     *stretch_map;
962
963   register ssize_t
964     i;
965
966   size_t
967     global_work_size[2];
968
969   void
970     *hostPtr,
971     *inputPixels;
972
973   histogram=NULL;
974   stretch_map=NULL;
975   inputPixels = NULL;
976   imageBuffer = NULL;
977   histogramBuffer = NULL;
978   stretchMapBuffer = NULL;
979   histogramKernel = NULL; 
980   stretchKernel = NULL; 
981   queue = NULL;
982   outputReady = MagickFalse;
983
984
985   assert(image != (Image *) NULL);
986   assert(image->signature == MagickCoreSignature);
987   if (image->debug != MagickFalse)
988     (void) LogMagickEvent(TraceEvent,GetMagickModule(),"%s",image->filename);
989
990   //exception=(&image->exception);
991
992   /*
993    * initialize opencl env
994    */
995   device = RequestOpenCLDevice(clEnv);
996   queue = AcquireOpenCLCommandQueue(device);
997
998   /*
999     Allocate and initialize histogram arrays.
1000   */
1001   histogram=(cl_uint4 *) AcquireQuantumMemory(MaxMap+1UL, sizeof(*histogram));
1002
1003   if (histogram == (cl_uint4 *) NULL)
1004     ThrowBinaryException(ResourceLimitError,"MemoryAllocationFailed", image->filename);
1005  
1006   /* reset histogram */
1007   (void) ResetMagickMemory(histogram,0,(MaxMap+1)*sizeof(*histogram));
1008
1009   /*
1010   if (IsGrayImage(image,exception) != MagickFalse)
1011     (void) SetImageColorspace(image,GRAYColorspace);
1012   */
1013
1014   status=MagickTrue;
1015
1016
1017   /*
1018     Form histogram.
1019   */
1020   /* Create and initialize OpenCL buffers. */
1021   /* inputPixels = AcquirePixelCachePixels(image, &length, exception); */
1022   /* assume this  will get a writable image */
1023   image_view=AcquireAuthenticCacheView(image,exception);
1024   inputPixels=GetCacheViewAuthenticPixels(image_view,0,0,image->columns,image->rows,exception);
1025
1026   if (inputPixels == (void *) NULL)
1027   {
1028     (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),CacheWarning,"UnableToReadPixelCache.","`%s'",image->filename);
1029     goto cleanup;
1030   }
1031   /* If the host pointer is aligned to the size of CLPixelPacket, 
1032      then use the host buffer directly from the GPU; otherwise, 
1033      create a buffer on the GPU and copy the data over */
1034   if (ALIGNED(inputPixels,CLPixelPacket)) 
1035   {
1036     mem_flags = CL_MEM_READ_ONLY|CL_MEM_USE_HOST_PTR;
1037   }
1038   else 
1039   {
1040     mem_flags = CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR;
1041   }
1042   /* create a CL buffer from image pixel buffer */
1043   length = image->columns * image->rows;
1044   imageBuffer = clEnv->library->clCreateBuffer(device->context, mem_flags, length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus);
1045   if (clStatus != CL_SUCCESS)
1046   {
1047     (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
1048     goto cleanup;
1049   }
1050
1051   /* If the host pointer is aligned to the size of cl_uint, 
1052      then use the host buffer directly from the GPU; otherwise, 
1053      create a buffer on the GPU and copy the data over */
1054   if (ALIGNED(histogram,cl_uint4)) 
1055   {
1056     mem_flags = CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR;
1057     hostPtr = histogram;
1058   }
1059   else 
1060   {
1061     mem_flags = CL_MEM_READ_WRITE|CL_MEM_COPY_HOST_PTR;
1062     hostPtr = histogram;
1063   }
1064   /* create a CL buffer for histogram  */
1065   length = (MaxMap+1); 
1066   histogramBuffer = clEnv->library->clCreateBuffer(device->context, mem_flags, length * sizeof(cl_uint4), hostPtr, &clStatus);
1067   if (clStatus != CL_SUCCESS)
1068   {
1069     (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
1070     goto cleanup;
1071   }
1072
1073   status = LaunchHistogramKernel(clEnv, device, queue, imageBuffer, histogramBuffer, image, image->channel_mask,exception);
1074   if (status == MagickFalse)
1075     goto cleanup;
1076
1077   /* read from the kenel output */
1078   if (ALIGNED(histogram,cl_uint4)) 
1079   {
1080     length = (MaxMap+1); 
1081     clEnv->library->clEnqueueMapBuffer(queue, histogramBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(cl_uint4), 0, NULL, NULL, &clStatus);
1082   }
1083   else 
1084   {
1085     length = (MaxMap+1); 
1086     clStatus = clEnv->library->clEnqueueReadBuffer(queue, histogramBuffer, CL_TRUE, 0, length * sizeof(cl_uint4), histogram, 0, NULL, NULL);
1087   }
1088   if (clStatus != CL_SUCCESS)
1089   {
1090     (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "Reading output image from CL buffer failed.", ".");
1091     goto cleanup;
1092   }
1093
1094   /* unmap, don't block gpu to use this buffer again.  */
1095   if (ALIGNED(histogram,cl_uint4))
1096   {
1097     clStatus = clEnv->library->clEnqueueUnmapMemObject(queue, histogramBuffer, histogram, 0, NULL, NULL);
1098     if (clStatus != CL_SUCCESS)
1099     {
1100       (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueUnmapMemObject failed.", ".");
1101       goto cleanup;
1102     }
1103   }
1104
1105   /* recreate input buffer later, in case image updated */
1106 #ifdef RECREATEBUFFER 
1107   if (imageBuffer!=NULL)                      
1108     clEnv->library->clReleaseMemObject(imageBuffer);
1109 #endif
1110
1111   /* CPU stuff */
1112   /*
1113      Find the histogram boundaries by locating the black/white levels.
1114   */
1115   black.x=0.0;
1116   white.x=MaxRange(QuantumRange);
1117   if ((image->channel_mask & RedChannel) != 0)
1118   {
1119     intensity=0.0;
1120     for (i=0; i <= (ssize_t) MaxMap; i++)
1121     {
1122       intensity+=histogram[i].s[2];
1123       if (intensity > black_point)
1124         break;
1125     }
1126     black.x=(cl_float) i;
1127     intensity=0.0;
1128     for (i=(ssize_t) MaxMap; i != 0; i--)
1129     {
1130       intensity+=histogram[i].s[2];
1131       if (intensity > ((double) image->columns*image->rows-white_point))
1132         break;
1133     }
1134     white.x=(cl_float) i;
1135   }
1136   black.y=0.0;
1137   white.y=MaxRange(QuantumRange);
1138   if ((image->channel_mask & GreenChannel) != 0)
1139   {
1140     intensity=0.0;
1141     for (i=0; i <= (ssize_t) MaxMap; i++)
1142     {
1143       intensity+=histogram[i].s[2];
1144       if (intensity > black_point)
1145         break;
1146     }
1147     black.y=(cl_float) i;
1148     intensity=0.0;
1149     for (i=(ssize_t) MaxMap; i != 0; i--)
1150     {
1151       intensity+=histogram[i].s[2];
1152       if (intensity > ((double) image->columns*image->rows-white_point))
1153         break;
1154     }
1155     white.y=(cl_float) i;
1156   }
1157   black.z=0.0;
1158   white.z=MaxRange(QuantumRange);
1159   if ((image->channel_mask & BlueChannel) != 0)
1160   {
1161     intensity=0.0;
1162     for (i=0; i <= (ssize_t) MaxMap; i++)
1163     {
1164       intensity+=histogram[i].s[2];
1165       if (intensity > black_point)
1166         break;
1167     }
1168     black.z=(cl_float) i;
1169     intensity=0.0;
1170     for (i=(ssize_t) MaxMap; i != 0; i--)
1171     {
1172       intensity+=histogram[i].s[2];
1173       if (intensity > ((double) image->columns*image->rows-white_point))
1174         break;
1175     }
1176     white.z=(cl_float) i;
1177   }
1178   black.w=0.0;
1179   white.w=MaxRange(QuantumRange);
1180   if ((image->channel_mask & AlphaChannel) != 0)
1181   {
1182     intensity=0.0;
1183     for (i=0; i <= (ssize_t) MaxMap; i++)
1184     {
1185       intensity+=histogram[i].s[2];
1186       if (intensity > black_point)
1187         break;
1188     }
1189     black.w=(cl_float) i;
1190     intensity=0.0;
1191     for (i=(ssize_t) MaxMap; i != 0; i--)
1192     {
1193       intensity+=histogram[i].s[2];
1194       if (intensity > ((double) image->columns*image->rows-white_point))
1195         break;
1196     }
1197     white.w=(cl_float) i;
1198   }
1199
1200   stretch_map=(PixelPacket *) AcquireQuantumMemory(MaxMap+1UL,
1201     sizeof(*stretch_map));
1202
1203   if (stretch_map == (PixelPacket *) NULL)
1204     ThrowBinaryException(ResourceLimitError,"MemoryAllocationFailed",
1205       image->filename);
1206  
1207   /*
1208     Stretch the histogram to create the stretched image mapping.
1209   */
1210   (void) ResetMagickMemory(stretch_map,0,(MaxMap+1)*sizeof(*stretch_map));
1211   for (i=0; i <= (ssize_t) MaxMap; i++)
1212   {
1213     if ((image->channel_mask & RedChannel) != 0)
1214     {
1215       if (i < (ssize_t) black.x)
1216         stretch_map[i].red=(Quantum) 0;
1217       else
1218         if (i > (ssize_t) white.x)
1219           stretch_map[i].red=QuantumRange;
1220         else
1221           if (black.x != white.x)
1222             stretch_map[i].red=ScaleMapToQuantum((MagickRealType) (MaxMap*
1223                   (i-black.x)/(white.x-black.x)));
1224     }
1225     if ((image->channel_mask & GreenChannel) != 0)
1226     {
1227       if (i < (ssize_t) black.y)
1228         stretch_map[i].green=0;
1229       else
1230         if (i > (ssize_t) white.y)
1231           stretch_map[i].green=QuantumRange;
1232         else
1233           if (black.y != white.y)
1234             stretch_map[i].green=ScaleMapToQuantum((MagickRealType) (MaxMap*
1235                   (i-black.y)/(white.y-black.y)));
1236     }
1237     if ((image->channel_mask & BlueChannel) != 0)
1238     {
1239       if (i < (ssize_t) black.z)
1240         stretch_map[i].blue=0;
1241       else
1242         if (i > (ssize_t) white.z)
1243           stretch_map[i].blue= QuantumRange;
1244         else
1245           if (black.z != white.z)
1246             stretch_map[i].blue=ScaleMapToQuantum((MagickRealType) (MaxMap*
1247                   (i-black.z)/(white.z-black.z)));
1248     }
1249     if ((image->channel_mask & AlphaChannel) != 0)
1250     {
1251       if (i < (ssize_t) black.w)
1252         stretch_map[i].alpha=0;
1253       else
1254         if (i > (ssize_t) white.w)
1255           stretch_map[i].alpha=QuantumRange;
1256         else
1257           if (black.w != white.w)
1258             stretch_map[i].alpha=ScaleMapToQuantum((MagickRealType) (MaxMap*
1259                   (i-black.w)/(white.w-black.w)));
1260     }
1261   }
1262
1263   /*
1264     Stretch the image.
1265   */
1266   if (((image->channel_mask & AlphaChannel) != 0) || (((image->channel_mask & IndexChannel) != 0) &&
1267       (image->colorspace == CMYKColorspace)))
1268     image->storage_class=DirectClass;
1269   if (image->storage_class == PseudoClass)
1270   {
1271     /*
1272        Stretch colormap.
1273        */
1274     for (i=0; i < (ssize_t) image->colors; i++)
1275     {
1276       if ((image->channel_mask & RedChannel) != 0)
1277       {
1278         if (black.x != white.x)
1279           image->colormap[i].red=stretch_map[
1280             ScaleQuantumToMap(image->colormap[i].red)].red;
1281       }
1282       if ((image->channel_mask & GreenChannel) != 0)
1283       {
1284         if (black.y != white.y)
1285           image->colormap[i].green=stretch_map[
1286             ScaleQuantumToMap(image->colormap[i].green)].green;
1287       }
1288       if ((image->channel_mask & BlueChannel) != 0)
1289       {
1290         if (black.z != white.z)
1291           image->colormap[i].blue=stretch_map[
1292             ScaleQuantumToMap(image->colormap[i].blue)].blue;
1293       }
1294       if ((image->channel_mask & AlphaChannel) != 0)
1295       {
1296         if (black.w != white.w)
1297           image->colormap[i].alpha=stretch_map[
1298             ScaleQuantumToMap(image->colormap[i].alpha)].alpha;
1299       }
1300     }
1301   }
1302
1303   /*
1304     Stretch image.
1305   */
1306
1307
1308   /* GPU can work on this again, image and equalize map as input
1309     image:        uchar4 (CLPixelPacket)
1310     stretch_map:  uchar4 (PixelPacket)
1311     black, white: float4 (FloatPixelPacket) */
1312
1313 #ifdef RECREATEBUFFER 
1314   /* If the host pointer is aligned to the size of CLPixelPacket, 
1315      then use the host buffer directly from the GPU; otherwise, 
1316      create a buffer on the GPU and copy the data over */
1317   if (ALIGNED(inputPixels,CLPixelPacket)) 
1318   {
1319     mem_flags = CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR;
1320   }
1321   else 
1322   {
1323     mem_flags = CL_MEM_READ_WRITE|CL_MEM_COPY_HOST_PTR;
1324   }
1325   /* create a CL buffer from image pixel buffer */
1326   length = image->columns * image->rows;
1327   imageBuffer = clEnv->library->clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus);
1328   if (clStatus != CL_SUCCESS)
1329   {
1330     (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
1331     goto cleanup;
1332   }
1333 #endif
1334
1335   /* Create and initialize OpenCL buffers. */
1336   if (ALIGNED(stretch_map, PixelPacket)) 
1337   {
1338     mem_flags = CL_MEM_READ_ONLY|CL_MEM_USE_HOST_PTR;
1339     hostPtr = stretch_map;
1340   }
1341   else 
1342   {
1343     mem_flags = CL_MEM_READ_WRITE|CL_MEM_COPY_HOST_PTR;
1344     hostPtr = stretch_map;
1345   }
1346   /* create a CL buffer for stretch_map  */
1347   length = (MaxMap+1); 
1348   stretchMapBuffer = clEnv->library->clCreateBuffer(device->context, mem_flags, length * sizeof(PixelPacket), hostPtr, &clStatus);
1349   if (clStatus != CL_SUCCESS)
1350   {
1351     (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
1352     goto cleanup;
1353   }
1354
1355   /* get the OpenCL kernel */
1356   stretchKernel = AcquireOpenCLKernel(device,"ContrastStretch");
1357   if (stretchKernel == NULL)
1358   {
1359     (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "AcquireOpenCLKernel failed.", ".");
1360     goto cleanup;
1361   }
1362
1363   /* set the kernel arguments */
1364   i = 0;
1365   clStatus=clEnv->library->clSetKernelArg(stretchKernel,i++,sizeof(cl_mem),(void *)&imageBuffer);
1366   clStatus|=clEnv->library->clSetKernelArg(stretchKernel,i++,sizeof(ChannelType),&image->channel_mask);
1367   clStatus|=clEnv->library->clSetKernelArg(stretchKernel,i++,sizeof(cl_mem),(void *)&stretchMapBuffer);
1368   clStatus|=clEnv->library->clSetKernelArg(stretchKernel,i++,sizeof(cl_float4),&white);
1369   clStatus|=clEnv->library->clSetKernelArg(stretchKernel,i++,sizeof(cl_float4),&black);
1370   if (clStatus != CL_SUCCESS)
1371   {
1372     (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", ".");
1373     goto cleanup;
1374   }
1375
1376   /* launch the kernel */
1377   global_work_size[0] = image->columns;
1378   global_work_size[1] = image->rows;
1379
1380   clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, stretchKernel, 2, NULL, global_work_size, NULL, 0, NULL, &event);
1381
1382   if (clStatus != CL_SUCCESS)
1383   {
1384     (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", ".");
1385     goto cleanup;
1386   }
1387   RecordProfileData(device,stretchKernel,event);
1388
1389   /* read the data back */
1390   if (ALIGNED(inputPixels,CLPixelPacket)) 
1391   {
1392     length = image->columns * image->rows;
1393     clEnv->library->clEnqueueMapBuffer(queue, imageBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(CLPixelPacket), 0, NULL, NULL, &clStatus);
1394   }
1395   else 
1396   {
1397     length = image->columns * image->rows;
1398     clStatus = clEnv->library->clEnqueueReadBuffer(queue, imageBuffer, CL_TRUE, 0, length * sizeof(CLPixelPacket), inputPixels, 0, NULL, NULL);
1399   }
1400   if (clStatus != CL_SUCCESS)
1401   {
1402     (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "Reading output image from CL buffer failed.", ".");
1403     goto cleanup;
1404   }
1405
1406   outputReady=SyncCacheViewAuthenticPixels(image_view,exception);
1407
1408 cleanup:
1409
1410   image_view=DestroyCacheView(image_view);
1411
1412   if (imageBuffer!=NULL)                      
1413     clEnv->library->clReleaseMemObject(imageBuffer);
1414
1415   if (stretchMapBuffer!=NULL)
1416     clEnv->library->clReleaseMemObject(stretchMapBuffer);
1417   if (stretch_map!=NULL)
1418     stretch_map=(PixelPacket *) RelinquishMagickMemory(stretch_map);
1419   if (histogramBuffer!=NULL)
1420     clEnv->library->clReleaseMemObject(histogramBuffer);
1421   if (histogram!=NULL)
1422     histogram=(cl_uint4 *) RelinquishMagickMemory(histogram);
1423   if (histogramKernel!=NULL)
1424     ReleaseOpenCLKernel(histogramKernel);
1425   if (stretchKernel!=NULL)
1426     ReleaseOpenCLKernel(stretchKernel);
1427   if (queue != NULL)
1428     ReleaseOpenCLCommandQueue(device,queue);
1429   if (device != NULL)
1430     ReleaseOpenCLDevice(device);
1431
1432   return(outputReady);
1433 }
1434
1435 MagickPrivate MagickBooleanType AccelerateContrastStretchImage(
1436   Image *image,const double black_point,const double white_point,
1437   ExceptionInfo *exception)
1438 {
1439   MagickBooleanType
1440     status;
1441
1442   MagickCLEnv
1443     clEnv;
1444
1445   assert(image != NULL);
1446   assert(exception != (ExceptionInfo *) NULL);
1447
1448   if ((checkAccelerateConditionRGBA(image) == MagickFalse) ||
1449       (checkHistogramCondition(image,image->intensity) == MagickFalse))
1450     return(MagickFalse);
1451
1452   clEnv=getOpenCLEnvironment(exception);
1453   if (clEnv == (MagickCLEnv) NULL)
1454     return(MagickFalse);
1455
1456   status=ComputeContrastStretchImage(image,clEnv,black_point,white_point,
1457     exception);
1458   return(status);
1459 }
1460
1461 /*
1462 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
1463 %                                                                             %
1464 %                                                                             %
1465 %                                                                             %
1466 %     A c c e l e r a t e C o n v o l v e I m a g e                           %
1467 %                                                                             %
1468 %                                                                             %
1469 %                                                                             %
1470 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
1471 */
1472
1473 static Image *ComputeConvolveImage(const Image* image,MagickCLEnv clEnv,
1474   const KernelInfo *kernel,ExceptionInfo *exception)
1475 {
1476   CacheView
1477     *filteredImage_view,
1478     *image_view;
1479
1480   cl_command_queue
1481     queue;
1482
1483   cl_event
1484     event;
1485
1486   cl_kernel
1487     clkernel;
1488
1489   cl_int
1490     clStatus;
1491
1492   cl_mem
1493     convolutionKernel,
1494     filteredImageBuffer,
1495     imageBuffer;
1496
1497   cl_mem_flags
1498     mem_flags;
1499
1500   const void
1501     *inputPixels;
1502
1503   float
1504     *kernelBufferPtr;
1505
1506   Image
1507     *filteredImage;
1508
1509   MagickBooleanType
1510     outputReady;
1511
1512   MagickCLDevice
1513     device;
1514
1515   MagickSizeType
1516     length;
1517
1518   size_t
1519     global_work_size[3],
1520     localGroupSize[3],
1521     localMemoryRequirement;
1522
1523   unsigned
1524     kernelSize;
1525
1526   unsigned int
1527     filterHeight,
1528     filterWidth,
1529     i,
1530     imageHeight,
1531     imageWidth,
1532     matte;
1533
1534   void
1535     *filteredPixels,
1536     *hostPtr;
1537
1538   /* intialize all CL objects to NULL */
1539   imageBuffer = NULL;
1540   filteredImageBuffer = NULL;
1541   convolutionKernel = NULL;
1542   clkernel = NULL;
1543   queue = NULL;
1544
1545   filteredImage = NULL;
1546   filteredImage_view = NULL;
1547   outputReady = MagickFalse;
1548
1549   device = RequestOpenCLDevice(clEnv);
1550
1551   image_view=AcquireAuthenticCacheView(image,exception);
1552   inputPixels=GetCacheViewAuthenticPixels(image_view,0,0,image->columns,image->rows,exception);
1553   if (inputPixels == (const void *) NULL)
1554   {
1555     (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),CacheWarning,"UnableToReadPixelCache.","`%s'",image->filename);
1556     goto cleanup;
1557   }
1558
1559   /* Create and initialize OpenCL buffers. */
1560
1561   /* If the host pointer is aligned to the size of CLPixelPacket, 
1562      then use the host buffer directly from the GPU; otherwise, 
1563      create a buffer on the GPU and copy the data over */
1564   if (ALIGNED(inputPixels,CLPixelPacket)) 
1565   {
1566     mem_flags = CL_MEM_READ_ONLY|CL_MEM_USE_HOST_PTR;
1567   }
1568   else 
1569   {
1570     mem_flags = CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR;
1571   }
1572   /* create a CL buffer from image pixel buffer */
1573   length = image->columns * image->rows;
1574   imageBuffer = clEnv->library->clCreateBuffer(device->context, mem_flags, length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus);
1575   if (clStatus != CL_SUCCESS)
1576   {
1577     (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
1578     goto cleanup;
1579   }
1580
1581   filteredImage = CloneImage(image,image->columns,image->rows,MagickTrue,exception);
1582   assert(filteredImage != NULL);
1583   if (SetImageStorageClass(filteredImage,DirectClass,exception) != MagickTrue)
1584   {
1585     (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "CloneImage failed.", ".");
1586     goto cleanup;
1587   }
1588   filteredImage_view=AcquireAuthenticCacheView(filteredImage,exception);
1589   filteredPixels=GetCacheViewAuthenticPixels(filteredImage_view,0,0,filteredImage->columns,filteredImage->rows,exception);
1590   if (filteredPixels == (void *) NULL)
1591   {
1592     (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),CacheWarning, "UnableToReadPixelCache.","`%s'",filteredImage->filename);
1593     goto cleanup;
1594   }
1595
1596   if (ALIGNED(filteredPixels,CLPixelPacket)) 
1597   {
1598     mem_flags = CL_MEM_WRITE_ONLY|CL_MEM_USE_HOST_PTR;
1599     hostPtr = filteredPixels;
1600   }
1601   else 
1602   {
1603     mem_flags = CL_MEM_WRITE_ONLY;
1604     hostPtr = NULL;
1605   }
1606   /* create a CL buffer from image pixel buffer */
1607   length = image->columns * image->rows;
1608   filteredImageBuffer = clEnv->library->clCreateBuffer(device->context, mem_flags, length * sizeof(CLPixelPacket), hostPtr, &clStatus);
1609   if (clStatus != CL_SUCCESS)
1610   {
1611     (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
1612     goto cleanup;
1613   }
1614
1615   kernelSize = (unsigned int) (kernel->width * kernel->height);
1616   convolutionKernel = clEnv->library->clCreateBuffer(device->context, CL_MEM_READ_ONLY|CL_MEM_ALLOC_HOST_PTR, kernelSize * sizeof(float), NULL, &clStatus);
1617   if (clStatus != CL_SUCCESS)
1618   {
1619     (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
1620     goto cleanup;
1621   }
1622
1623   queue = AcquireOpenCLCommandQueue(device);
1624
1625   kernelBufferPtr = (float*)clEnv->library->clEnqueueMapBuffer(queue, convolutionKernel, CL_TRUE, CL_MAP_WRITE, 0, kernelSize * sizeof(float)
1626           , 0, NULL, NULL, &clStatus);
1627   if (clStatus != CL_SUCCESS)
1628   {
1629     (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueMapBuffer failed.",".");
1630     goto cleanup;
1631   }
1632   for (i = 0; i < kernelSize; i++)
1633   {
1634     kernelBufferPtr[i] = (float) kernel->values[i];
1635   }
1636   clStatus = clEnv->library->clEnqueueUnmapMemObject(queue, convolutionKernel, kernelBufferPtr, 0, NULL, NULL);
1637   if (clStatus != CL_SUCCESS)
1638   {
1639     (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueUnmapMemObject failed.", ".");
1640     goto cleanup;
1641   }
1642
1643   /* Compute the local memory requirement for a 16x16 workgroup.
1644      If it's larger than 16k, reduce the workgroup size to 8x8 */
1645   localGroupSize[0] = 16;
1646   localGroupSize[1] = 16;
1647   localMemoryRequirement = (localGroupSize[0]+kernel->width-1) * (localGroupSize[1]+kernel->height-1) * sizeof(CLPixelPacket)
1648     + kernel->width*kernel->height*sizeof(float);
1649
1650   if (localMemoryRequirement > device->local_memory_size)
1651   {
1652     localGroupSize[0] = 8;
1653     localGroupSize[1] = 8;
1654     localMemoryRequirement = (localGroupSize[0]+kernel->width-1) * (localGroupSize[1]+kernel->height-1) * sizeof(CLPixelPacket)
1655       + kernel->width*kernel->height*sizeof(float);
1656   }
1657   if (localMemoryRequirement <= device->local_memory_size)
1658   {
1659     /* get the OpenCL kernel */
1660     clkernel = AcquireOpenCLKernel(device,"ConvolveOptimized");
1661     if (clkernel == NULL)
1662     {
1663       (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "AcquireOpenCLKernel failed.", ".");
1664       goto cleanup;
1665     }
1666
1667     /* set the kernel arguments */
1668     i = 0;
1669     clStatus =clEnv->library->clSetKernelArg(clkernel,i++,sizeof(cl_mem),(void *)&imageBuffer);
1670     clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(cl_mem),(void *)&filteredImageBuffer);
1671     imageWidth = (unsigned int) image->columns;
1672     imageHeight = (unsigned int) image->rows;
1673     clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(unsigned int),(void *)&imageWidth);
1674     clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(unsigned int),(void *)&imageHeight);
1675     clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(cl_mem),(void *)&convolutionKernel);
1676     filterWidth = (unsigned int) kernel->width;
1677     filterHeight = (unsigned int) kernel->height;
1678     clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(unsigned int),(void *)&filterWidth);
1679     clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(unsigned int),(void *)&filterHeight);
1680     matte = (image->alpha_trait > CopyPixelTrait)?1:0;
1681     clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(unsigned int),(void *)&matte);
1682     clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(ChannelType),(void *)&image->channel_mask);
1683     clStatus|=clEnv->library->clSetKernelArg(clkernel,i++, (localGroupSize[0] + kernel->width-1)*(localGroupSize[1] + kernel->height-1)*sizeof(CLPixelPacket),NULL);
1684     clStatus|=clEnv->library->clSetKernelArg(clkernel,i++, kernel->width*kernel->height*sizeof(float),NULL);
1685     if (clStatus != CL_SUCCESS)
1686     {
1687       (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", ".");
1688       goto cleanup;
1689     }
1690
1691     /* pad the global size to a multiple of the local work size dimension */
1692     global_work_size[0] = ((image->columns + localGroupSize[0]  - 1)/localGroupSize[0] ) * localGroupSize[0] ;
1693     global_work_size[1] = ((image->rows + localGroupSize[1] - 1)/localGroupSize[1]) * localGroupSize[1];
1694
1695     /* launch the kernel */
1696     clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, clkernel, 2, NULL, global_work_size, localGroupSize, 0, NULL, &event);
1697     if (clStatus != CL_SUCCESS)
1698     {
1699       (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", ".");
1700       goto cleanup;
1701     }
1702     RecordProfileData(device,clkernel,event);
1703   }
1704   else
1705   {
1706     /* get the OpenCL kernel */
1707     clkernel = AcquireOpenCLKernel(device,"Convolve");
1708     if (clkernel == NULL)
1709     {
1710       (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "AcquireOpenCLKernel failed.", ".");
1711       goto cleanup;
1712     }
1713
1714     /* set the kernel arguments */
1715     i = 0;
1716     clStatus =clEnv->library->clSetKernelArg(clkernel,i++,sizeof(cl_mem),(void *)&imageBuffer);
1717     clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(cl_mem),(void *)&filteredImageBuffer);
1718     imageWidth = (unsigned int) image->columns;
1719     imageHeight = (unsigned int) image->rows;
1720     clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(unsigned int),(void *)&imageWidth);
1721     clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(unsigned int),(void *)&imageHeight);
1722     clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(cl_mem),(void *)&convolutionKernel);
1723     filterWidth = (unsigned int) kernel->width;
1724     filterHeight = (unsigned int) kernel->height;
1725     clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(unsigned int),(void *)&filterWidth);
1726     clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(unsigned int),(void *)&filterHeight);
1727     matte = (image->alpha_trait > CopyPixelTrait)?1:0;
1728     clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(unsigned int),(void *)&matte);
1729     clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(ChannelType),(void *)&image->channel_mask);
1730     if (clStatus != CL_SUCCESS)
1731     {
1732       (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", ".");
1733       goto cleanup;
1734     }
1735
1736     localGroupSize[0] = 8;
1737     localGroupSize[1] = 8;
1738     global_work_size[0] = (image->columns + (localGroupSize[0]-1))/localGroupSize[0] * localGroupSize[0];
1739     global_work_size[1] = (image->rows    + (localGroupSize[1]-1))/localGroupSize[1] * localGroupSize[1];
1740         clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, clkernel, 2, NULL, global_work_size, localGroupSize, 0, NULL, &event);
1741     
1742     if (clStatus != CL_SUCCESS)
1743     {
1744       (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", ".");
1745       goto cleanup;
1746     }
1747   }
1748   RecordProfileData(device,clkernel,event);
1749
1750   if (ALIGNED(filteredPixels,CLPixelPacket)) 
1751   {
1752     length = image->columns * image->rows;
1753     clEnv->library->clEnqueueMapBuffer(queue, filteredImageBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(CLPixelPacket), 0, NULL, NULL, &clStatus);
1754   }
1755   else 
1756   {
1757     length = image->columns * image->rows;
1758     clStatus = clEnv->library->clEnqueueReadBuffer(queue, filteredImageBuffer, CL_TRUE, 0, length * sizeof(CLPixelPacket), filteredPixels, 0, NULL, NULL);
1759   }
1760   if (clStatus != CL_SUCCESS)
1761   {
1762     (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "Reading output image from CL buffer failed.", ".");
1763     goto cleanup;
1764   }
1765
1766   outputReady=SyncCacheViewAuthenticPixels(filteredImage_view,exception);
1767
1768 cleanup:
1769
1770   image_view=DestroyCacheView(image_view);
1771   if (filteredImage_view != NULL)
1772     filteredImage_view=DestroyCacheView(filteredImage_view);
1773   if (imageBuffer != NULL)
1774     clEnv->library->clReleaseMemObject(imageBuffer);
1775   if (filteredImageBuffer != NULL)
1776     clEnv->library->clReleaseMemObject(filteredImageBuffer);
1777   if (convolutionKernel != NULL)
1778     clEnv->library->clReleaseMemObject(convolutionKernel);
1779   if (clkernel != NULL)
1780     ReleaseOpenCLKernel(clkernel);
1781   if (queue != NULL)
1782     ReleaseOpenCLCommandQueue(device,queue);
1783   if (device != NULL)
1784     ReleaseOpenCLDevice(device);
1785   if (outputReady == MagickFalse)
1786   {
1787     if (filteredImage != NULL)
1788     {
1789       DestroyImage(filteredImage);
1790       filteredImage = NULL;
1791     }
1792   }
1793
1794   return(filteredImage);
1795 }
1796
1797 MagickPrivate Image *AccelerateConvolveImage(const Image *image,
1798   const KernelInfo *kernel,ExceptionInfo *exception)
1799 {
1800   /* Temporary disabled due to access violation
1801
1802   Image
1803     *filteredImage;
1804
1805   assert(image != NULL);
1806   assert(kernel != (KernelInfo *) NULL);
1807   assert(exception != (ExceptionInfo *) NULL);
1808   if ((checkAccelerateConditionRGBA(image) == MagickFalse) ||
1809       (checkOpenCLEnvironment(exception) == MagickFalse))
1810     return((Image *) NULL);
1811
1812   filteredImage=ComputeConvolveImage(image,kernel,exception);
1813   return(filteredImage);
1814   */
1815   magick_unreferenced(image);
1816   magick_unreferenced(kernel);
1817   magick_unreferenced(exception);
1818   return((Image *)NULL);
1819 }
1820
1821 /*
1822 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
1823 %                                                                             %
1824 %                                                                             %
1825 %                                                                             %
1826 %     A c c e l e r a t e D e s p e c k l e I m a g e                         %
1827 %                                                                             %
1828 %                                                                             %
1829 %                                                                             %
1830 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
1831 */
1832
1833 static Image *ComputeDespeckleImage(const Image *image,MagickCLEnv clEnv,
1834   ExceptionInfo*exception)
1835 {
1836   static const int 
1837     X[4] = {0, 1, 1,-1},
1838     Y[4] = {1, 0, 1, 1};
1839
1840   CacheView
1841     *filteredImage_view,
1842     *image_view;
1843
1844   cl_command_queue
1845     queue;
1846
1847   cl_int
1848     clStatus;
1849
1850   cl_kernel
1851     hullPass1,
1852     hullPass2;
1853
1854   cl_event
1855     event;
1856
1857   cl_mem_flags
1858     mem_flags;
1859
1860   cl_mem
1861     filteredImageBuffer,
1862     imageBuffer,
1863     tempImageBuffer[2];
1864
1865   const void
1866     *inputPixels;
1867
1868   Image
1869     *filteredImage;
1870
1871   int
1872     k,
1873     matte;
1874
1875   MagickBooleanType
1876     outputReady;
1877
1878   MagickCLDevice
1879     device;
1880
1881   MagickSizeType
1882     length;
1883
1884   size_t
1885     global_work_size[2];
1886
1887   unsigned int
1888     imageHeight,
1889     imageWidth;
1890
1891   void
1892     *filteredPixels,
1893     *hostPtr;
1894
1895   outputReady = MagickFalse;
1896   inputPixels = NULL;
1897   filteredImage = NULL;
1898   filteredImage_view = NULL;
1899   filteredPixels = NULL;
1900   imageBuffer = NULL;
1901   filteredImageBuffer = NULL;
1902   hullPass1 = NULL;
1903   hullPass2 = NULL;
1904   queue = NULL;
1905   tempImageBuffer[0] = tempImageBuffer[1] = NULL;
1906
1907   device = RequestOpenCLDevice(clEnv);
1908   queue = AcquireOpenCLCommandQueue(device);
1909  
1910   image_view=AcquireAuthenticCacheView(image,exception);
1911   inputPixels=GetCacheViewAuthenticPixels(image_view,0,0,image->columns,image->rows,exception);
1912   if (inputPixels == (void *) NULL)
1913   {
1914     (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),CacheWarning,"UnableToReadPixelCache.","`%s'",image->filename);
1915     goto cleanup;
1916   }
1917
1918   if (ALIGNED(inputPixels,CLPixelPacket)) 
1919   {
1920     mem_flags = CL_MEM_READ_ONLY|CL_MEM_USE_HOST_PTR;
1921   }
1922   else 
1923   {
1924     mem_flags = CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR;
1925   }
1926   /* create a CL buffer from image pixel buffer */
1927   length = image->columns * image->rows;
1928   imageBuffer = clEnv->library->clCreateBuffer(device->context, mem_flags, length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus);
1929   if (clStatus != CL_SUCCESS)
1930   {
1931     (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
1932     goto cleanup;
1933   }
1934
1935   mem_flags = CL_MEM_READ_WRITE;
1936   length = image->columns * image->rows;
1937   for (k = 0; k < 2; k++)
1938   {
1939     tempImageBuffer[k] = clEnv->library->clCreateBuffer(device->context, mem_flags, length * sizeof(CLPixelPacket), NULL, &clStatus);
1940     if (clStatus != CL_SUCCESS)
1941     {
1942       (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
1943       goto cleanup;
1944     }
1945   }
1946
1947   filteredImage = CloneImage(image,image->columns,image->rows,MagickTrue,exception);
1948   assert(filteredImage != NULL);
1949   if (SetImageStorageClass(filteredImage,DirectClass,exception) != MagickTrue)
1950   {
1951     (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "CloneImage failed.", ".");
1952     goto cleanup;
1953   }
1954   filteredImage_view=AcquireAuthenticCacheView(filteredImage,exception);
1955   filteredPixels=GetCacheViewAuthenticPixels(filteredImage_view,0,0,filteredImage->columns,filteredImage->rows,exception);
1956   if (filteredPixels == (void *) NULL)
1957   {
1958     (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),CacheWarning, "UnableToReadPixelCache.","`%s'",filteredImage->filename);
1959     goto cleanup;
1960   }
1961
1962   if (ALIGNED(filteredPixels,CLPixelPacket)) 
1963   {
1964     mem_flags = CL_MEM_WRITE_ONLY|CL_MEM_USE_HOST_PTR;
1965     hostPtr = filteredPixels;
1966   }
1967   else 
1968   {
1969     mem_flags = CL_MEM_WRITE_ONLY;
1970     hostPtr = NULL;
1971   }
1972   /* create a CL buffer from image pixel buffer */
1973   length = image->columns * image->rows;
1974   filteredImageBuffer = clEnv->library->clCreateBuffer(device->context, mem_flags, length * sizeof(CLPixelPacket), hostPtr, &clStatus);
1975   if (clStatus != CL_SUCCESS)
1976   {
1977     (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
1978     goto cleanup;
1979   }
1980
1981   hullPass1 = AcquireOpenCLKernel(device,"HullPass1");
1982   hullPass2 = AcquireOpenCLKernel(device,"HullPass2");
1983
1984   clStatus =clEnv->library->clSetKernelArg(hullPass1,0,sizeof(cl_mem),(void *)&imageBuffer);
1985   clStatus |=clEnv->library->clSetKernelArg(hullPass1,1,sizeof(cl_mem),(void *)(tempImageBuffer+1));
1986   imageWidth = (unsigned int) image->columns;
1987   clStatus |=clEnv->library->clSetKernelArg(hullPass1,2,sizeof(unsigned int),(void *)&imageWidth);
1988   imageHeight = (unsigned int) image->rows;
1989   clStatus |=clEnv->library->clSetKernelArg(hullPass1,3,sizeof(unsigned int),(void *)&imageHeight);
1990   matte = (image->alpha_trait > CopyPixelTrait)?1:0;
1991   clStatus |=clEnv->library->clSetKernelArg(hullPass1,6,sizeof(int),(void *)&matte);
1992   if (clStatus != CL_SUCCESS)
1993   {
1994     (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", ".");
1995     goto cleanup;
1996   }
1997
1998   clStatus = clEnv->library->clSetKernelArg(hullPass2,0,sizeof(cl_mem),(void *)(tempImageBuffer+1));
1999   clStatus |=clEnv->library->clSetKernelArg(hullPass2,1,sizeof(cl_mem),(void *)tempImageBuffer);
2000   imageWidth = (unsigned int) image->columns;
2001   clStatus |=clEnv->library->clSetKernelArg(hullPass2,2,sizeof(unsigned int),(void *)&imageWidth);
2002   imageHeight = (unsigned int) image->rows;
2003   clStatus |=clEnv->library->clSetKernelArg(hullPass2,3,sizeof(unsigned int),(void *)&imageHeight);
2004   matte = (image->alpha_trait > CopyPixelTrait)?1:0;
2005   clStatus |=clEnv->library->clSetKernelArg(hullPass2,6,sizeof(int),(void *)&matte);
2006   if (clStatus != CL_SUCCESS)
2007   {
2008     (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", ".");
2009     goto cleanup;
2010   }
2011
2012
2013   global_work_size[0] = image->columns;
2014   global_work_size[1] = image->rows;
2015
2016   
2017   for (k = 0; k < 4; k++)
2018   {
2019     cl_int2 offset;
2020     int polarity;
2021
2022     
2023     offset.s[0] = X[k];
2024     offset.s[1] = Y[k];
2025     polarity = 1;
2026     clStatus = clEnv->library->clSetKernelArg(hullPass1,4,sizeof(cl_int2),(void *)&offset);
2027     clStatus|= clEnv->library->clSetKernelArg(hullPass1,5,sizeof(int),(void *)&polarity);
2028     clStatus|=clEnv->library->clSetKernelArg(hullPass2,4,sizeof(cl_int2),(void *)&offset);
2029     clStatus|=clEnv->library->clSetKernelArg(hullPass2,5,sizeof(int),(void *)&polarity);
2030     if (clStatus != CL_SUCCESS)
2031     {
2032       (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", ".");
2033       goto cleanup;
2034     }
2035     /* launch the kernel */
2036         clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, hullPass1, 2, NULL, global_work_size, NULL, 0, NULL, &event);
2037     if (clStatus != CL_SUCCESS)
2038     {
2039       (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", ".");
2040       goto cleanup;
2041     }
2042     RecordProfileData(device,hullPass1,event);
2043
2044     /* launch the kernel */
2045         clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, hullPass2, 2, NULL, global_work_size, NULL, 0, NULL, &event);
2046     if (clStatus != CL_SUCCESS)
2047     {
2048       (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", ".");
2049       goto cleanup;
2050     }
2051     RecordProfileData(device,hullPass2,event);
2052
2053     if (k == 0)
2054       clStatus =clEnv->library->clSetKernelArg(hullPass1,0,sizeof(cl_mem),(void *)(tempImageBuffer));
2055     offset.s[0] = -X[k];
2056     offset.s[1] = -Y[k];
2057     polarity = 1;
2058     clStatus = clEnv->library->clSetKernelArg(hullPass1,4,sizeof(cl_int2),(void *)&offset);
2059     clStatus|= clEnv->library->clSetKernelArg(hullPass1,5,sizeof(int),(void *)&polarity);
2060     clStatus|=clEnv->library->clSetKernelArg(hullPass2,4,sizeof(cl_int2),(void *)&offset);
2061     clStatus|=clEnv->library->clSetKernelArg(hullPass2,5,sizeof(int),(void *)&polarity);
2062     if (clStatus != CL_SUCCESS)
2063     {
2064       (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", ".");
2065       goto cleanup;
2066     }
2067     /* launch the kernel */
2068         clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, hullPass1, 2, NULL, global_work_size, NULL, 0, NULL, &event);
2069     if (clStatus != CL_SUCCESS)
2070     {
2071       (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", ".");
2072       goto cleanup;
2073     }
2074     RecordProfileData(device,hullPass1,event);
2075
2076     /* launch the kernel */
2077         clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, hullPass2, 2, NULL, global_work_size, NULL, 0, NULL, &event);
2078     if (clStatus != CL_SUCCESS)
2079     {
2080       (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", ".");
2081       goto cleanup;
2082     }
2083     RecordProfileData(device,hullPass2,event);
2084
2085     offset.s[0] = -X[k];
2086     offset.s[1] = -Y[k];
2087     polarity = -1;
2088     clStatus = clEnv->library->clSetKernelArg(hullPass1,4,sizeof(cl_int2),(void *)&offset);
2089     clStatus|= clEnv->library->clSetKernelArg(hullPass1,5,sizeof(int),(void *)&polarity);
2090     clStatus|=clEnv->library->clSetKernelArg(hullPass2,4,sizeof(cl_int2),(void *)&offset);
2091     clStatus|=clEnv->library->clSetKernelArg(hullPass2,5,sizeof(int),(void *)&polarity);
2092     if (clStatus != CL_SUCCESS)
2093     {
2094       (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", ".");
2095       goto cleanup;
2096     }
2097     /* launch the kernel */
2098         clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, hullPass1, 2, NULL, global_work_size, NULL, 0, NULL, &event);
2099     if (clStatus != CL_SUCCESS)
2100     {
2101       (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", ".");
2102       goto cleanup;
2103     }
2104     RecordProfileData(device,hullPass1,event);
2105
2106     /* launch the kernel */
2107         clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, hullPass2, 2, NULL, global_work_size, NULL, 0, NULL, &event);
2108     if (clStatus != CL_SUCCESS)
2109     {
2110       (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", ".");
2111       goto cleanup;
2112     }
2113     RecordProfileData(device,hullPass2,event);
2114
2115     offset.s[0] = X[k];
2116     offset.s[1] = Y[k];
2117     polarity = -1;
2118     clStatus = clEnv->library->clSetKernelArg(hullPass1,4,sizeof(cl_int2),(void *)&offset);
2119     clStatus|= clEnv->library->clSetKernelArg(hullPass1,5,sizeof(int),(void *)&polarity);
2120     clStatus|=clEnv->library->clSetKernelArg(hullPass2,4,sizeof(cl_int2),(void *)&offset);
2121     clStatus|=clEnv->library->clSetKernelArg(hullPass2,5,sizeof(int),(void *)&polarity);
2122
2123     if (k == 3)
2124       clStatus |=clEnv->library->clSetKernelArg(hullPass2,1,sizeof(cl_mem),(void *)&filteredImageBuffer);
2125
2126     if (clStatus != CL_SUCCESS)
2127     {
2128       (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", ".");
2129       goto cleanup;
2130     }
2131     /* launch the kernel */
2132         clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, hullPass1, 2, NULL, global_work_size, NULL, 0, NULL, &event);
2133     if (clStatus != CL_SUCCESS)
2134     {
2135       (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", ".");
2136       goto cleanup;
2137     }
2138     RecordProfileData(device,hullPass1,event);
2139
2140     /* launch the kernel */
2141         clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, hullPass2, 2, NULL, global_work_size, NULL, 0, NULL, &event);
2142     if (clStatus != CL_SUCCESS)
2143     {
2144       (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", ".");
2145       goto cleanup;
2146     }
2147     RecordProfileData(device,hullPass2,event);
2148   }
2149
2150   if (ALIGNED(filteredPixels,CLPixelPacket)) 
2151   {
2152     length = image->columns * image->rows;
2153     clEnv->library->clEnqueueMapBuffer(queue, filteredImageBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(CLPixelPacket), 0, NULL, NULL, &clStatus);
2154   }
2155   else 
2156   {
2157     length = image->columns * image->rows;
2158     clStatus = clEnv->library->clEnqueueReadBuffer(queue, filteredImageBuffer, CL_TRUE, 0, length * sizeof(CLPixelPacket), filteredPixels, 0, NULL, NULL);
2159   }
2160   if (clStatus != CL_SUCCESS)
2161   {
2162     (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "Reading output image from CL buffer failed.", ".");
2163     goto cleanup;
2164   }
2165
2166   outputReady=SyncCacheViewAuthenticPixels(filteredImage_view,exception);
2167
2168 cleanup:
2169
2170   image_view=DestroyCacheView(image_view);
2171   if (filteredImage_view != NULL)
2172     filteredImage_view=DestroyCacheView(filteredImage_view);
2173
2174   if (queue != NULL)
2175     ReleaseOpenCLCommandQueue(device,queue);
2176   if (device != NULL)
2177     ReleaseOpenCLDevice(device);
2178   if (imageBuffer!=NULL)
2179     clEnv->library->clReleaseMemObject(imageBuffer);
2180   for (k = 0; k < 2; k++)
2181   {
2182     if (tempImageBuffer[k]!=NULL)
2183       clEnv->library->clReleaseMemObject(tempImageBuffer[k]);
2184   }
2185   if (filteredImageBuffer!=NULL)
2186     clEnv->library->clReleaseMemObject(filteredImageBuffer);
2187   if (hullPass1!=NULL)
2188     ReleaseOpenCLKernel(hullPass1);
2189   if (hullPass2!=NULL)
2190     ReleaseOpenCLKernel(hullPass2);
2191   if (outputReady == MagickFalse && filteredImage != NULL)
2192     filteredImage=DestroyImage(filteredImage);
2193
2194   return(filteredImage);
2195 }
2196
2197 MagickPrivate Image *AccelerateDespeckleImage(const Image* image,
2198   ExceptionInfo* exception)
2199 {
2200   Image
2201     *filteredImage;
2202
2203   MagickCLEnv
2204     clEnv;
2205
2206   assert(image != NULL);
2207   assert(exception != (ExceptionInfo *) NULL);
2208
2209   if (checkAccelerateConditionRGBA(image) == MagickFalse)
2210     return((Image *) NULL);
2211
2212   clEnv=getOpenCLEnvironment(exception);
2213   if (clEnv == (MagickCLEnv) NULL)
2214     return((Image *) NULL);
2215
2216   filteredImage=ComputeDespeckleImage(image,clEnv,exception);
2217   return(filteredImage);
2218 }
2219
2220 /*
2221 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
2222 %                                                                             %
2223 %                                                                             %
2224 %                                                                             %
2225 %     A c c e l e r a t e E q u a l i z e I m a g e                           %
2226 %                                                                             %
2227 %                                                                             %
2228 %                                                                             %
2229 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
2230 */
2231
2232 static MagickBooleanType ComputeEqualizeImage(Image *image,MagickCLEnv clEnv,
2233   ExceptionInfo *exception)
2234 {
2235 #define EqualizeImageTag  "Equalize/Image"
2236
2237   CacheView
2238     *image_view;
2239
2240   cl_command_queue
2241     queue;
2242
2243   cl_int
2244     clStatus;
2245
2246   cl_mem_flags
2247     mem_flags;
2248
2249   cl_mem
2250     equalizeMapBuffer,
2251     histogramBuffer,
2252     imageBuffer;
2253
2254   cl_kernel
2255     equalizeKernel,
2256     histogramKernel;
2257
2258   cl_event
2259     event;
2260
2261   cl_uint4
2262     *histogram;
2263
2264   cl_float4
2265     white,
2266     black,
2267     intensity,
2268     *map;
2269
2270   MagickBooleanType
2271     outputReady,
2272     status;
2273
2274   MagickCLDevice
2275     device;
2276
2277   MagickSizeType
2278     length;
2279
2280   PixelPacket
2281     *equalize_map;
2282
2283   register ssize_t
2284     i;
2285
2286   size_t
2287     global_work_size[2];
2288
2289   void
2290     *hostPtr,
2291     *inputPixels;
2292
2293   map=NULL;
2294   histogram=NULL;
2295   equalize_map=NULL;
2296   inputPixels = NULL;
2297   imageBuffer = NULL;
2298   histogramBuffer = NULL;
2299   equalizeMapBuffer = NULL;
2300   histogramKernel = NULL; 
2301   equalizeKernel = NULL; 
2302   queue = NULL;
2303   outputReady = MagickFalse;
2304
2305   assert(image != (Image *) NULL);
2306   assert(image->signature == MagickCoreSignature);
2307   if (image->debug != MagickFalse)
2308     (void) LogMagickEvent(TraceEvent,GetMagickModule(),"%s",image->filename);
2309
2310   /*
2311    * initialize opencl env
2312    */
2313   device = RequestOpenCLDevice(clEnv);
2314   queue = AcquireOpenCLCommandQueue(device);
2315
2316   /*
2317     Allocate and initialize histogram arrays.
2318   */
2319   histogram=(cl_uint4 *) AcquireQuantumMemory(MaxMap+1UL, sizeof(*histogram));
2320   if (histogram == (cl_uint4 *) NULL)
2321       ThrowBinaryException(ResourceLimitWarning,"MemoryAllocationFailed", image->filename);
2322
2323   /* reset histogram */
2324   (void) ResetMagickMemory(histogram,0,(MaxMap+1)*sizeof(*histogram));
2325
2326   /* Create and initialize OpenCL buffers. */
2327   /* inputPixels = AcquirePixelCachePixels(image, &length, exception); */
2328   /* assume this  will get a writable image */
2329   image_view=AcquireAuthenticCacheView(image,exception);
2330   inputPixels=GetCacheViewAuthenticPixels(image_view,0,0,image->columns,image->rows,exception);
2331
2332   if (inputPixels == (void *) NULL)
2333   {
2334     (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),CacheWarning,"UnableToReadPixelCache.","`%s'",image->filename);
2335     goto cleanup;
2336   }
2337   /* If the host pointer is aligned to the size of CLPixelPacket, 
2338      then use the host buffer directly from the GPU; otherwise, 
2339      create a buffer on the GPU and copy the data over */
2340   if (ALIGNED(inputPixels,CLPixelPacket)) 
2341   {
2342     mem_flags = CL_MEM_READ_ONLY|CL_MEM_USE_HOST_PTR;
2343   }
2344   else 
2345   {
2346     mem_flags = CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR;
2347   }
2348   /* create a CL buffer from image pixel buffer */
2349   length = image->columns * image->rows;
2350   imageBuffer = clEnv->library->clCreateBuffer(device->context, mem_flags, length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus);
2351   if (clStatus != CL_SUCCESS)
2352   {
2353     (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
2354     goto cleanup;
2355   }
2356
2357   /* If the host pointer is aligned to the size of cl_uint, 
2358      then use the host buffer directly from the GPU; otherwise, 
2359      create a buffer on the GPU and copy the data over */
2360   if (ALIGNED(histogram,cl_uint4)) 
2361   {
2362     mem_flags = CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR;
2363     hostPtr = histogram;
2364   }
2365   else 
2366   {
2367     mem_flags = CL_MEM_READ_WRITE|CL_MEM_COPY_HOST_PTR;
2368     hostPtr = histogram;
2369   }
2370   /* create a CL buffer for histogram  */
2371   length = (MaxMap+1); 
2372   histogramBuffer = clEnv->library->clCreateBuffer(device->context, mem_flags, length * sizeof(cl_uint4), hostPtr, &clStatus);
2373   if (clStatus != CL_SUCCESS)
2374   {
2375     (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
2376     goto cleanup;
2377   }
2378
2379   status = LaunchHistogramKernel(clEnv, device, queue, imageBuffer, histogramBuffer, image, image->channel_mask, exception);
2380   if (status == MagickFalse)
2381     goto cleanup;
2382
2383   /* read from the kenel output */
2384   if (ALIGNED(histogram,cl_uint4)) 
2385   {
2386     length = (MaxMap+1); 
2387     clEnv->library->clEnqueueMapBuffer(queue, histogramBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(cl_uint4), 0, NULL, NULL, &clStatus);
2388   }
2389   else 
2390   {
2391     length = (MaxMap+1); 
2392     clStatus = clEnv->library->clEnqueueReadBuffer(queue, histogramBuffer, CL_TRUE, 0, length * sizeof(cl_uint4), histogram, 0, NULL, NULL);
2393   }
2394   if (clStatus != CL_SUCCESS)
2395   {
2396     (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "Reading output image from CL buffer failed.", ".");
2397     goto cleanup;
2398   }
2399
2400   /* unmap, don't block gpu to use this buffer again.  */
2401   if (ALIGNED(histogram,cl_uint4))
2402   {
2403     clStatus = clEnv->library->clEnqueueUnmapMemObject(queue, histogramBuffer, histogram, 0, NULL, NULL);
2404     if (clStatus != CL_SUCCESS)
2405     {
2406       (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueUnmapMemObject failed.", ".");
2407       goto cleanup;
2408     }
2409   }
2410
2411   /* recreate input buffer later, in case image updated */
2412 #ifdef RECREATEBUFFER 
2413   if (imageBuffer!=NULL)                      
2414     clEnv->library->clReleaseMemObject(imageBuffer);
2415 #endif
2416  
2417   /* CPU stuff */
2418   equalize_map=(PixelPacket *) AcquireQuantumMemory(MaxMap+1UL, sizeof(*equalize_map));
2419   if (equalize_map == (PixelPacket *) NULL)
2420     ThrowBinaryException(ResourceLimitWarning,"MemoryAllocationFailed", image->filename);
2421
2422   map=(cl_float4 *) AcquireQuantumMemory(MaxMap+1UL,sizeof(*map));
2423   if (map == (cl_float4 *) NULL)
2424     ThrowBinaryException(ResourceLimitWarning,"MemoryAllocationFailed", image->filename);
2425
2426   /*
2427     Integrate the histogram to get the equalization map.
2428   */
2429   (void) ResetMagickMemory(&intensity,0,sizeof(intensity));
2430   for (i=0; i <= (ssize_t) MaxMap; i++)
2431   {
2432     if ((image->channel_mask & SyncChannels) != 0)
2433     {
2434       intensity.x+=histogram[i].s[2];
2435       map[i]=intensity;
2436       continue;
2437     }
2438     if ((image->channel_mask & RedChannel) != 0)
2439       intensity.x+=histogram[i].s[2];
2440     if ((image->channel_mask & GreenChannel) != 0)
2441       intensity.y+=histogram[i].s[1];
2442     if ((image->channel_mask & BlueChannel) != 0)
2443       intensity.z+=histogram[i].s[0];
2444     if ((image->channel_mask & AlphaChannel) != 0)
2445       intensity.w+=histogram[i].s[3];
2446     map[i]=intensity;
2447   }
2448   black=map[0];
2449   white=map[(int) MaxMap];
2450   (void) ResetMagickMemory(equalize_map,0,(MaxMap+1)*sizeof(*equalize_map));
2451   for (i=0; i <= (ssize_t) MaxMap; i++)
2452   {
2453     if ((image->channel_mask & SyncChannels) != 0)
2454     {
2455       if (white.x != black.x)
2456         equalize_map[i].red=ScaleMapToQuantum((MagickRealType) ((MaxMap*
2457                 (map[i].x-black.x))/(white.x-black.x)));
2458       continue;
2459     }
2460     if (((image->channel_mask & RedChannel) != 0) && (white.x != black.x))
2461       equalize_map[i].red=ScaleMapToQuantum((MagickRealType) ((MaxMap*
2462               (map[i].x-black.x))/(white.x-black.x)));
2463     if (((image->channel_mask & GreenChannel) != 0) && (white.y != black.y))
2464       equalize_map[i].green=ScaleMapToQuantum((MagickRealType) ((MaxMap*
2465               (map[i].y-black.y))/(white.y-black.y)));
2466     if (((image->channel_mask & BlueChannel) != 0) && (white.z != black.z))
2467       equalize_map[i].blue=ScaleMapToQuantum((MagickRealType) ((MaxMap*
2468               (map[i].z-black.z))/(white.z-black.z)));
2469     if (((image->channel_mask & AlphaChannel) != 0) && (white.w != black.w))
2470       equalize_map[i].alpha=ScaleMapToQuantum((MagickRealType) ((MaxMap*
2471               (map[i].w-black.w))/(white.w-black.w)));
2472   }
2473
2474   if (image->storage_class == PseudoClass)
2475   {
2476     /*
2477        Equalize colormap.
2478        */
2479     for (i=0; i < (ssize_t) image->colors; i++)
2480     {
2481       if ((image->channel_mask & SyncChannels) != 0)
2482       {
2483         if (white.x != black.x)
2484         {
2485           image->colormap[i].red=equalize_map[
2486             ScaleQuantumToMap(image->colormap[i].red)].red;
2487           image->colormap[i].green=equalize_map[
2488             ScaleQuantumToMap(image->colormap[i].green)].red;
2489           image->colormap[i].blue=equalize_map[
2490             ScaleQuantumToMap(image->colormap[i].blue)].red;
2491           image->colormap[i].alpha=equalize_map[
2492             ScaleQuantumToMap(image->colormap[i].alpha)].red;
2493         }
2494         continue;
2495       }
2496       if (((image->channel_mask & RedChannel) != 0) && (white.x != black.x))
2497         image->colormap[i].red=equalize_map[
2498           ScaleQuantumToMap(image->colormap[i].red)].red;
2499       if (((image->channel_mask & GreenChannel) != 0) && (white.y != black.y))
2500         image->colormap[i].green=equalize_map[
2501           ScaleQuantumToMap(image->colormap[i].green)].green;
2502       if (((image->channel_mask & BlueChannel) != 0) && (white.z != black.z))
2503         image->colormap[i].blue=equalize_map[
2504           ScaleQuantumToMap(image->colormap[i].blue)].blue;
2505       if (((image->channel_mask & AlphaChannel) != 0) && (white.w != black.w))
2506         image->colormap[i].alpha=equalize_map[
2507           ScaleQuantumToMap(image->colormap[i].alpha)].alpha;
2508     }
2509   }
2510
2511   /*
2512     Equalize image.
2513   */
2514
2515   /* GPU can work on this again, image and equalize map as input
2516     image:        uchar4 (CLPixelPacket)
2517     equalize_map: uchar4 (PixelPacket)
2518     black, white: float4 (FloatPixelPacket) */
2519
2520 #ifdef RECREATEBUFFER 
2521   /* If the host pointer is aligned to the size of CLPixelPacket, 
2522      then use the host buffer directly from the GPU; otherwise, 
2523      create a buffer on the GPU and copy the data over */
2524   if (ALIGNED(inputPixels,CLPixelPacket)) 
2525   {
2526     mem_flags = CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR;
2527   }
2528   else 
2529   {
2530     mem_flags = CL_MEM_READ_WRITE|CL_MEM_COPY_HOST_PTR;
2531   }
2532   /* create a CL buffer from image pixel buffer */
2533   length = image->columns * image->rows;
2534   imageBuffer = clEnv->library->clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus);
2535   if (clStatus != CL_SUCCESS)
2536   {
2537     (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
2538     goto cleanup;
2539   }
2540 #endif
2541
2542   /* Create and initialize OpenCL buffers. */
2543   if (ALIGNED(equalize_map, PixelPacket)) 
2544   {
2545     mem_flags = CL_MEM_READ_ONLY|CL_MEM_USE_HOST_PTR;
2546     hostPtr = equalize_map;
2547   }
2548   else 
2549   {
2550     mem_flags = CL_MEM_READ_WRITE|CL_MEM_COPY_HOST_PTR;
2551     hostPtr = equalize_map;
2552   }
2553   /* create a CL buffer for eqaulize_map  */
2554   length = (MaxMap+1); 
2555   equalizeMapBuffer = clEnv->library->clCreateBuffer(device->context, mem_flags, length * sizeof(PixelPacket), hostPtr, &clStatus);
2556   if (clStatus != CL_SUCCESS)
2557   {
2558     (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
2559     goto cleanup;
2560   }
2561
2562   /* get the OpenCL kernel */
2563   equalizeKernel = AcquireOpenCLKernel(device,"Equalize");
2564   if (equalizeKernel == NULL)
2565   {
2566     (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "AcquireOpenCLKernel failed.", ".");
2567     goto cleanup;
2568   }
2569
2570   /* set the kernel arguments */
2571   i = 0;
2572   clStatus=clEnv->library->clSetKernelArg(equalizeKernel,i++,sizeof(cl_mem),(void *)&imageBuffer);
2573   clStatus|=clEnv->library->clSetKernelArg(equalizeKernel,i++,sizeof(ChannelType),&image->channel_mask);
2574   clStatus|=clEnv->library->clSetKernelArg(equalizeKernel,i++,sizeof(cl_mem),(void *)&equalizeMapBuffer);
2575   clStatus|=clEnv->library->clSetKernelArg(equalizeKernel,i++,sizeof(cl_float4),&white);
2576   clStatus|=clEnv->library->clSetKernelArg(equalizeKernel,i++,sizeof(cl_float4),&black);
2577   if (clStatus != CL_SUCCESS)
2578   {
2579     (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", ".");
2580     goto cleanup;
2581   }
2582
2583   /* launch the kernel */
2584   global_work_size[0] = image->columns;
2585   global_work_size[1] = image->rows;
2586
2587   clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, equalizeKernel, 2, NULL, global_work_size, NULL, 0, NULL, &event);
2588
2589   if (clStatus != CL_SUCCESS)
2590   {
2591     (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", ".");
2592     goto cleanup;
2593   }
2594   RecordProfileData(device,equalizeKernel,event);
2595
2596   /* read the data back */
2597   if (ALIGNED(inputPixels,CLPixelPacket)) 
2598   {
2599     length = image->columns * image->rows;
2600     clEnv->library->clEnqueueMapBuffer(queue, imageBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(CLPixelPacket), 0, NULL, NULL, &clStatus);
2601   }
2602   else 
2603   {
2604     length = image->columns * image->rows;
2605     clStatus = clEnv->library->clEnqueueReadBuffer(queue, imageBuffer, CL_TRUE, 0, length * sizeof(CLPixelPacket), inputPixels, 0, NULL, NULL);
2606   }
2607   if (clStatus != CL_SUCCESS)
2608   {
2609     (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "Reading output image from CL buffer failed.", ".");
2610     goto cleanup;
2611   }
2612
2613   outputReady=SyncCacheViewAuthenticPixels(image_view,exception);
2614
2615 cleanup:
2616
2617   image_view=DestroyCacheView(image_view);
2618
2619   if (imageBuffer!=NULL)
2620     clEnv->library->clReleaseMemObject(imageBuffer);
2621   if (map!=NULL)
2622     map=(cl_float4 *) RelinquishMagickMemory(map);
2623   if (equalizeMapBuffer!=NULL)
2624     clEnv->library->clReleaseMemObject(equalizeMapBuffer);
2625   if (equalize_map!=NULL)
2626     equalize_map=(PixelPacket *) RelinquishMagickMemory(equalize_map);
2627   if (histogramBuffer!=NULL)
2628     clEnv->library->clReleaseMemObject(histogramBuffer);
2629   if (histogram!=NULL)
2630     histogram=(cl_uint4 *) RelinquishMagickMemory(histogram);
2631   if (histogramKernel!=NULL)
2632     ReleaseOpenCLKernel(histogramKernel);
2633   if (equalizeKernel!=NULL)
2634     ReleaseOpenCLKernel(equalizeKernel);
2635   if (queue != NULL)
2636     ReleaseOpenCLCommandQueue(device, queue);
2637   if (device != NULL)
2638     ReleaseOpenCLDevice(device);
2639
2640   return(outputReady);
2641 }
2642
2643 MagickPrivate MagickBooleanType AccelerateEqualizeImage(Image *image,
2644   ExceptionInfo *exception)
2645 {
2646   MagickBooleanType
2647     status;
2648
2649   MagickCLEnv
2650     clEnv;
2651
2652   assert(image != NULL);
2653   assert(exception != (ExceptionInfo *) NULL);
2654
2655   if ((checkAccelerateConditionRGBA(image) == MagickFalse) ||
2656       (checkHistogramCondition(image,image->intensity) == MagickFalse))
2657     return(MagickFalse);
2658
2659   clEnv=getOpenCLEnvironment(exception);
2660   if (clEnv == (MagickCLEnv) NULL)
2661     return(MagickFalse);
2662
2663   status=ComputeEqualizeImage(image,clEnv,exception);
2664   return(status);
2665 }
2666
2667 /*
2668 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
2669 %                                                                             %
2670 %                                                                             %
2671 %                                                                             %
2672 %     A c c e l e r a t e F u n c t i o n I m a g e                           %
2673 %                                                                             %
2674 %                                                                             %
2675 %                                                                             %
2676 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
2677 */
2678
2679 static MagickBooleanType ComputeFunctionImage(Image *image,MagickCLEnv clEnv,
2680   const MagickFunction function,const size_t number_parameters,
2681   const double *parameters,ExceptionInfo *exception)
2682 {
2683   cl_command_queue
2684     queue;
2685
2686   cl_int
2687     status;
2688
2689   cl_kernel
2690     functionKernel;
2691
2692   cl_mem
2693     imageBuffer,
2694     parametersBuffer;
2695
2696   cl_uint
2697     number_params,
2698     number_channels;
2699
2700   float
2701     *parametersBufferPtr;
2702
2703   MagickBooleanType
2704     outputReady;
2705
2706   MagickCLDevice
2707     device;
2708
2709   size_t
2710     gsize[2],
2711     i;
2712
2713   outputReady=MagickFalse;
2714
2715   functionKernel=NULL;
2716   parametersBuffer=NULL;
2717
2718   device=RequestOpenCLDevice(clEnv);
2719   queue=AcquireOpenCLCommandQueue(device);
2720   imageBuffer=GetAuthenticOpenCLBuffer(image,device,exception);
2721   if (imageBuffer == (cl_mem) NULL)
2722     goto cleanup;
2723
2724   parametersBufferPtr=(float *) AcquireQuantumMemory(number_parameters,
2725     sizeof(float));
2726   if (parametersBufferPtr == (float *) NULL)
2727     goto cleanup;
2728   for (i=0; i<number_parameters; i++)
2729     parametersBufferPtr[i]=(float) parameters[i];
2730   parametersBuffer=CreateOpenCLBuffer(device,CL_MEM_READ_ONLY |
2731     CL_MEM_COPY_HOST_PTR,number_parameters*sizeof(*parametersBufferPtr),
2732     parametersBufferPtr);
2733   parametersBufferPtr=RelinquishMagickMemory(parametersBufferPtr);
2734   if (parametersBuffer == (cl_mem) NULL)
2735   {
2736     (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
2737       ResourceLimitWarning,"CreateOpenCLBuffer failed.",".");
2738     goto cleanup;
2739   }
2740
2741   functionKernel=AcquireOpenCLKernel(device,"ComputeFunction");
2742   if (functionKernel == (cl_kernel) NULL)
2743   {
2744     (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
2745       ResourceLimitWarning,"AcquireOpenCLKernel failed.",".");
2746     goto cleanup;
2747   }
2748
2749   number_channels=(cl_uint) image->number_channels;
2750   number_params=(cl_uint) number_parameters;
2751
2752   i=0;
2753   status =SetOpenCLKernelArg(functionKernel,i++,sizeof(cl_mem),(void *)&imageBuffer);
2754   status|=SetOpenCLKernelArg(functionKernel,i++,sizeof(cl_uint),(void *)&number_channels);
2755   status|=SetOpenCLKernelArg(functionKernel,i++,sizeof(ChannelType),(void *)&image->channel_mask);
2756   status|=SetOpenCLKernelArg(functionKernel,i++,sizeof(MagickFunction),(void *)&function);
2757   status|=SetOpenCLKernelArg(functionKernel,i++,sizeof(cl_uint),(void *)&number_params);
2758   status|=SetOpenCLKernelArg(functionKernel,i++,sizeof(cl_mem),(void *)&parametersBuffer);
2759   if (status != CL_SUCCESS)
2760   {
2761     (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
2762       ResourceLimitWarning,"SetOpenCLKernelArg failed.",".");
2763     goto cleanup;
2764   }
2765
2766   gsize[0]=image->columns;
2767   gsize[1]=image->rows;
2768   outputReady=EnqueueOpenCLKernel(queue,functionKernel,2,(const size_t *) NULL,
2769     gsize,(const size_t *) NULL,image,(const Image *) NULL,MagickFalse,
2770     exception);
2771
2772 cleanup:
2773
2774   if (parametersBuffer != (cl_mem) NULL)
2775     ReleaseOpenCLMemObject(parametersBuffer);
2776   if (functionKernel != (cl_kernel) NULL)
2777     ReleaseOpenCLKernel(functionKernel);
2778   if (queue != (cl_command_queue) NULL)
2779     ReleaseOpenCLCommandQueue(device,queue);
2780   if (device != (MagickCLDevice) NULL)
2781     ReleaseOpenCLDevice(device);
2782   return(outputReady);
2783 }
2784
2785 MagickPrivate MagickBooleanType AccelerateFunctionImage(Image *image,
2786   const MagickFunction function,const size_t number_parameters,
2787   const double *parameters,ExceptionInfo *exception)
2788 {
2789   MagickBooleanType
2790     status;
2791
2792   MagickCLEnv
2793     clEnv;
2794
2795   assert(image != NULL);
2796   assert(exception != (ExceptionInfo *) NULL);
2797
2798   if (checkAccelerateCondition(image) == MagickFalse)
2799     return(MagickFalse);
2800
2801   clEnv=getOpenCLEnvironment(exception);
2802   if (clEnv == (MagickCLEnv) NULL)
2803     return(MagickFalse);
2804
2805   status=ComputeFunctionImage(image,clEnv,function,number_parameters,
2806     parameters,exception);
2807   return(status);
2808 }
2809
2810 /*
2811 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
2812 %                                                                             %
2813 %                                                                             %
2814 %                                                                             %
2815 %     A c c e l e r a t e G r a y s c a l e I m a g e                         %
2816 %                                                                             %
2817 %                                                                             %
2818 %                                                                             %
2819 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
2820 */
2821
2822 static MagickBooleanType ComputeGrayscaleImage(Image *image,MagickCLEnv clEnv,
2823   const PixelIntensityMethod method,ExceptionInfo *exception)
2824 {
2825   cl_command_queue
2826     queue;
2827
2828   cl_int
2829     status;
2830
2831   cl_kernel
2832     grayscaleKernel;
2833
2834   cl_mem
2835     imageBuffer;
2836
2837   cl_uint
2838     number_channels,
2839     colorspace,
2840     intensityMethod;
2841
2842   MagickBooleanType
2843     outputReady;
2844
2845   MagickCLDevice
2846     device;
2847
2848   size_t
2849     gsize[2],
2850     i;
2851
2852   outputReady=MagickFalse;
2853   grayscaleKernel=NULL;
2854
2855   assert(image != (Image *) NULL);
2856   assert(image->signature == MagickCoreSignature);
2857   device=RequestOpenCLDevice(clEnv);
2858   queue=AcquireOpenCLCommandQueue(device);
2859   imageBuffer=GetAuthenticOpenCLBuffer(image,device,exception);
2860   if (imageBuffer == (cl_mem) NULL)
2861     goto cleanup;
2862
2863   grayscaleKernel=AcquireOpenCLKernel(device,"Grayscale");
2864   if (grayscaleKernel == (cl_kernel) NULL)
2865   {
2866     (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
2867       ResourceLimitWarning,"AcquireOpenCLKernel failed.",".");
2868     goto cleanup;
2869   }
2870
2871   number_channels=(cl_uint) image->number_channels;
2872   intensityMethod=(cl_uint) method;
2873   colorspace=(cl_uint) image->colorspace;
2874
2875   i=0;
2876   status =SetOpenCLKernelArg(grayscaleKernel,i++,sizeof(cl_mem),(void *)&imageBuffer);
2877   status|=SetOpenCLKernelArg(grayscaleKernel,i++,sizeof(cl_uint),&number_channels);
2878   status|=SetOpenCLKernelArg(grayscaleKernel,i++,sizeof(cl_uint),&colorspace);
2879   status|=SetOpenCLKernelArg(grayscaleKernel,i++,sizeof(cl_uint),&intensityMethod);
2880   if (status != CL_SUCCESS)
2881   {
2882     (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
2883       ResourceLimitWarning,"SetOpenCLKernelArg failed.",".");
2884     goto cleanup;
2885   }
2886
2887   gsize[0]=image->columns;
2888   gsize[1]=image->rows;
2889   outputReady=EnqueueOpenCLKernel(queue,grayscaleKernel,2,
2890     (const size_t *) NULL,gsize,(const size_t *) NULL,image,(Image *) NULL,
2891     MagickFalse,exception);
2892
2893 cleanup:
2894
2895   if (grayscaleKernel != (cl_kernel) NULL)
2896     ReleaseOpenCLKernel(grayscaleKernel);
2897   if (queue != (cl_command_queue) NULL)
2898     ReleaseOpenCLCommandQueue(device,queue);
2899   if (device != (MagickCLDevice) NULL)
2900     ReleaseOpenCLDevice(device);
2901
2902   return(outputReady);
2903 }
2904
2905 MagickPrivate MagickBooleanType AccelerateGrayscaleImage(Image* image,
2906   const PixelIntensityMethod method,ExceptionInfo *exception)
2907 {
2908   MagickBooleanType
2909     status;
2910
2911   MagickCLEnv
2912     clEnv;
2913
2914   assert(image != NULL);
2915   assert(exception != (ExceptionInfo *) NULL);
2916
2917   if ((checkAccelerateCondition(image) == MagickFalse) ||
2918       (checkPixelIntensity(image,method) == MagickFalse))
2919     return(MagickFalse);
2920
2921   if (image->number_channels < 3)
2922     return(MagickFalse);
2923
2924   if ((GetPixelRedTraits(image) == UndefinedPixelTrait) ||
2925       (GetPixelGreenTraits(image) == UndefinedPixelTrait) ||
2926       (GetPixelBlueTraits(image) == UndefinedPixelTrait))
2927     return(MagickFalse);
2928
2929   clEnv=getOpenCLEnvironment(exception);
2930   if (clEnv == (MagickCLEnv) NULL)
2931     return(MagickFalse);
2932
2933   status=ComputeGrayscaleImage(image,clEnv,method,exception);
2934   return(status);
2935 }
2936
2937 /*
2938 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
2939 %                                                                             %
2940 %                                                                             %
2941 %                                                                             %
2942 %     A c c e l e r a t e L o c a l C o n t r a s t I m a g e                 %
2943 %                                                                             %
2944 %                                                                             %
2945 %                                                                             %
2946 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
2947 */
2948
2949 static Image *ComputeLocalContrastImage(const Image *image,MagickCLEnv clEnv,
2950   const double radius,const double strength,ExceptionInfo *exception)
2951 {
2952   CacheView
2953     *filteredImage_view,
2954     *image_view;
2955
2956   cl_command_queue
2957     queue;
2958
2959   cl_int
2960     clStatus,
2961     iRadius;
2962
2963   cl_kernel
2964     blurRowKernel,
2965     blurColumnKernel;
2966
2967   cl_event
2968     event;
2969
2970   cl_mem
2971     filteredImageBuffer,
2972     imageBuffer,
2973     imageKernelBuffer,
2974     tempImageBuffer;
2975
2976   cl_mem_flags
2977     mem_flags;
2978
2979   const void
2980     *inputPixels;
2981
2982   Image
2983     *filteredImage;
2984
2985   MagickBooleanType
2986     outputReady;
2987
2988   MagickCLDevice
2989     device;
2990
2991   MagickSizeType
2992     length;
2993
2994   void
2995     *filteredPixels,
2996     *hostPtr;
2997
2998   unsigned int
2999     i,
3000     imageColumns,
3001     imageRows,
3002     passes;
3003
3004   filteredImage = NULL;
3005   filteredImage_view = NULL;
3006   imageBuffer = NULL;
3007   filteredImageBuffer = NULL;
3008   tempImageBuffer = NULL;
3009   imageKernelBuffer = NULL;
3010   blurRowKernel = NULL;
3011   blurColumnKernel = NULL;
3012   queue = NULL;
3013   outputReady = MagickFalse;
3014
3015   device = RequestOpenCLDevice(clEnv);
3016   queue = AcquireOpenCLCommandQueue(device);
3017
3018   /* Create and initialize OpenCL buffers. */
3019   {
3020     image_view=AcquireAuthenticCacheView(image,exception);
3021     inputPixels=GetCacheViewAuthenticPixels(image_view,0,0,image->columns,image->rows,exception);
3022     if (inputPixels == (const void *) NULL)
3023     {
3024       (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),CacheWarning,"UnableToReadPixelCache.","`%s'",image->filename);
3025       goto cleanup;
3026     }
3027
3028     /* If the host pointer is aligned to the size of CLPixelPacket, 
3029      then use the host buffer directly from the GPU; otherwise, 
3030      create a buffer on the GPU and copy the data over */
3031     if (ALIGNED(inputPixels,CLPixelPacket)) 
3032     {
3033       mem_flags = CL_MEM_READ_ONLY|CL_MEM_USE_HOST_PTR;
3034     }
3035     else 
3036     {
3037       mem_flags = CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR;
3038     }
3039     /* create a CL buffer from image pixel buffer */
3040     length = image->columns * image->rows;
3041     imageBuffer = clEnv->library->clCreateBuffer(device->context, mem_flags, length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus);
3042     if (clStatus != CL_SUCCESS)
3043     {
3044       (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
3045       goto cleanup;
3046     }
3047   }
3048
3049   /* create output */
3050   {
3051     filteredImage = CloneImage(image,image->columns,image->rows,MagickTrue,exception);
3052     assert(filteredImage != NULL);
3053     if (SetImageStorageClass(filteredImage,DirectClass,exception) != MagickTrue)
3054     {
3055       (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "CloneImage failed.", ".");
3056       goto cleanup;
3057     }
3058     filteredImage_view=AcquireAuthenticCacheView(filteredImage,exception);
3059     filteredPixels=GetCacheViewAuthenticPixels(filteredImage_view,0,0,filteredImage->columns,filteredImage->rows,exception);
3060     if (filteredPixels == (void *) NULL)
3061     {
3062       (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),CacheWarning, "UnableToReadPixelCache.","`%s'",filteredImage->filename);
3063       goto cleanup;
3064     }
3065
3066     if (ALIGNED(filteredPixels,CLPixelPacket)) 
3067     {
3068       mem_flags = CL_MEM_WRITE_ONLY|CL_MEM_USE_HOST_PTR;
3069       hostPtr = filteredPixels;
3070     }
3071     else 
3072     {
3073       mem_flags = CL_MEM_WRITE_ONLY;
3074       hostPtr = NULL;
3075     }
3076
3077     /* create a CL buffer from image pixel buffer */
3078     length = image->columns * image->rows;
3079     filteredImageBuffer = clEnv->library->clCreateBuffer(device->context, mem_flags, length * sizeof(CLPixelPacket), hostPtr, &clStatus);
3080     if (clStatus != CL_SUCCESS)
3081     {
3082       (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
3083       goto cleanup;
3084     }
3085   }
3086
3087   {
3088     /* create temp buffer */
3089     {
3090       length = image->columns * image->rows;
3091       tempImageBuffer = clEnv->library->clCreateBuffer(device->context, CL_MEM_READ_WRITE, length * sizeof(float), NULL, &clStatus);
3092       if (clStatus != CL_SUCCESS)
3093       {
3094         (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
3095         goto cleanup;
3096       }
3097     }
3098
3099     /* get the opencl kernel */
3100     {
3101       blurRowKernel = AcquireOpenCLKernel(device,"LocalContrastBlurRow");
3102       if (blurRowKernel == NULL)
3103       {
3104         (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "AcquireOpenCLKernel failed.", ".");
3105         goto cleanup;
3106       };
3107
3108       blurColumnKernel = AcquireOpenCLKernel(device,"LocalContrastBlurApplyColumn");
3109       if (blurColumnKernel == NULL)
3110       {
3111         (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "AcquireOpenCLKernel failed.", ".");
3112         goto cleanup;
3113       };
3114     }
3115
3116     {
3117       imageColumns = (unsigned int) image->columns;
3118       imageRows = (unsigned int) image->rows;
3119       iRadius = (cl_int) (image->rows > image->columns ? image->rows : image->columns) * 0.002f * fabs(radius); // Normalized radius, 100% gives blur radius of 20% of the largest dimension
3120
3121       passes = (((1.0f * imageRows) * imageColumns * iRadius) + 3999999999) / 4000000000.0f;
3122       passes = (passes < 1) ? 1: passes;
3123
3124       /* set the kernel arguments */
3125       i = 0;
3126       clStatus=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(cl_mem),(void *)&imageBuffer);
3127       clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(cl_mem),(void *)&filteredImageBuffer);
3128       clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(cl_mem),(void *)&tempImageBuffer);
3129       clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(cl_int),(void *)&iRadius);
3130       clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(unsigned int),(void *)&imageColumns);
3131       clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(unsigned int),(void *)&imageRows);
3132       
3133       if (clStatus != CL_SUCCESS)
3134       {
3135         (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", ".");
3136         goto cleanup;
3137       }
3138     }
3139
3140     /* launch the kernel */
3141     {
3142       int x;
3143       for (x = 0; x < passes; ++x) {
3144         size_t gsize[2];
3145         size_t wsize[2];
3146         size_t goffset[2];
3147
3148         gsize[0] = 256;
3149         gsize[1] = (image->rows + passes - 1) / passes;
3150         wsize[0] = 256;
3151         wsize[1] = 1;
3152         goffset[0] = 0;
3153         goffset[1] = x * gsize[1];
3154
3155         clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, blurRowKernel, 2, goffset, gsize, wsize, 0, NULL, &event);
3156         if (clStatus != CL_SUCCESS)
3157         {
3158           (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", ".");
3159           goto cleanup;
3160         }
3161         clEnv->library->clFlush(queue);
3162         RecordProfileData(device,blurRowKernel,event);
3163       }
3164     }
3165
3166     {
3167       cl_float FStrength = strength;
3168       i = 0;
3169       clStatus=clEnv->library->clSetKernelArg(blurColumnKernel,i++,sizeof(cl_mem),(void *)&imageBuffer);
3170       clStatus|=clEnv->library->clSetKernelArg(blurColumnKernel,i++,sizeof(cl_mem),(void *)&filteredImageBuffer);
3171       clStatus|=clEnv->library->clSetKernelArg(blurColumnKernel,i++,sizeof(cl_mem),(void *)&tempImageBuffer);
3172       clStatus|=clEnv->library->clSetKernelArg(blurColumnKernel,i++,sizeof(unsigned int),(void *)&iRadius);
3173       clStatus|=clEnv->library->clSetKernelArg(blurColumnKernel,i++,sizeof(cl_float),(void *)&FStrength);
3174       clStatus|=clEnv->library->clSetKernelArg(blurColumnKernel,i++,sizeof(unsigned int),(void *)&imageColumns);
3175       clStatus|=clEnv->library->clSetKernelArg(blurColumnKernel,i++,sizeof(unsigned int),(void *)&imageRows);
3176
3177       if (clStatus != CL_SUCCESS)
3178       {
3179         (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", ".");
3180         goto cleanup;
3181       }
3182     }
3183
3184     /* launch the kernel */
3185     {
3186       int x;
3187       for (x = 0; x < passes; ++x) {
3188         size_t gsize[2];
3189         size_t wsize[2];
3190         size_t goffset[2];
3191
3192         gsize[0] = ((image->columns + 3) / 4) * 4;
3193         gsize[1] = ((((image->rows + 63) / 64) + (passes + 1)) / passes) * 64;
3194         wsize[0] = 4;
3195         wsize[1] = 64;
3196         goffset[0] = 0;
3197         goffset[1] = x * gsize[1];
3198
3199         clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, blurColumnKernel, 2, goffset, gsize, wsize, 0, NULL, &event);
3200         if (clStatus != CL_SUCCESS)
3201         {
3202           (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", ".");
3203           goto cleanup;
3204         }
3205         clEnv->library->clFlush(queue);
3206         RecordProfileData(device,blurColumnKernel,event);
3207       }
3208     }
3209   }
3210
3211   /* get result */
3212   if (ALIGNED(filteredPixels,CLPixelPacket)) 
3213   {
3214     length = image->columns * image->rows;
3215     clEnv->library->clEnqueueMapBuffer(queue, filteredImageBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(CLPixelPacket), 0, NULL, NULL, &clStatus);
3216   }
3217   else 
3218   {
3219     length = image->columns * image->rows;
3220     clStatus = clEnv->library->clEnqueueReadBuffer(queue, filteredImageBuffer, CL_TRUE, 0, length * sizeof(CLPixelPacket), filteredPixels, 0, NULL, NULL);
3221   }
3222   if (clStatus != CL_SUCCESS)
3223   {
3224     (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "Reading output image from CL buffer failed.", ".");
3225     goto cleanup;
3226   }
3227
3228   outputReady=SyncCacheViewAuthenticPixels(filteredImage_view,exception);
3229
3230 cleanup:
3231
3232   image_view=DestroyCacheView(image_view);
3233   if (filteredImage_view != NULL)
3234     filteredImage_view=DestroyCacheView(filteredImage_view);
3235
3236   if (imageBuffer!=NULL)
3237     clEnv->library->clReleaseMemObject(imageBuffer);
3238   if (filteredImageBuffer!=NULL)
3239     clEnv->library->clReleaseMemObject(filteredImageBuffer);
3240   if (tempImageBuffer!=NULL)
3241     clEnv->library->clReleaseMemObject(tempImageBuffer);
3242   if (imageKernelBuffer!=NULL)
3243     clEnv->library->clReleaseMemObject(imageKernelBuffer);
3244   if (blurRowKernel!=NULL)
3245     ReleaseOpenCLKernel(blurRowKernel);
3246   if (blurColumnKernel!=NULL)
3247     ReleaseOpenCLKernel(blurColumnKernel);
3248   if (queue != NULL)
3249     ReleaseOpenCLCommandQueue(device, queue);
3250   if (device != NULL)
3251     ReleaseOpenCLDevice(device);
3252   if (outputReady == MagickFalse)
3253   {
3254     if (filteredImage != NULL)
3255     {
3256       DestroyImage(filteredImage);
3257       filteredImage = NULL;
3258     }
3259   }
3260
3261   return(filteredImage);
3262 }
3263
3264 MagickPrivate Image *AccelerateLocalContrastImage(const Image *image,
3265   const double radius,const double strength,ExceptionInfo *exception)
3266 {
3267   Image
3268     *filteredImage;
3269
3270   MagickCLEnv
3271     clEnv;
3272
3273   assert(image != NULL);
3274   assert(exception != (ExceptionInfo *) NULL);
3275
3276   if (checkAccelerateConditionRGBA(image) == MagickFalse)
3277     return((Image *) NULL);
3278
3279   clEnv=getOpenCLEnvironment(exception);
3280   if (clEnv == (MagickCLEnv) NULL)
3281     return((Image *) NULL);
3282
3283   filteredImage=ComputeLocalContrastImage(image,clEnv,radius,strength,
3284     exception);
3285   return(filteredImage);
3286 }
3287
3288 /*
3289 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
3290 %                                                                             %
3291 %                                                                             %
3292 %                                                                             %
3293 %     A c c e l e r a t e M o d u l a t e I m a g e                           %
3294 %                                                                             %
3295 %                                                                             %
3296 %                                                                             %
3297 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
3298 */
3299
3300 static MagickBooleanType ComputeModulateImage(Image *image,MagickCLEnv clEnv,
3301   const double percent_brightness,const double percent_hue,
3302   const double percent_saturation,const ColorspaceType colorspace,
3303   ExceptionInfo *exception)
3304 {
3305   CacheView
3306     *image_view;
3307
3308   cl_float
3309     bright,
3310     hue,
3311     saturation;
3312
3313   cl_command_queue
3314     queue;
3315
3316   cl_int
3317     color,
3318     clStatus;
3319
3320   cl_kernel
3321     modulateKernel;
3322
3323   cl_event
3324     event;
3325
3326   cl_mem
3327     imageBuffer;
3328
3329   cl_mem_flags
3330     mem_flags;
3331
3332   MagickBooleanType
3333     outputReady;
3334
3335   MagickCLDevice
3336     device;
3337
3338   MagickSizeType
3339     length;
3340
3341   register ssize_t
3342     i;
3343
3344   void
3345     *inputPixels;
3346
3347   inputPixels = NULL;
3348   imageBuffer = NULL;
3349   modulateKernel = NULL; 
3350
3351   assert(image != (Image *) NULL);
3352   assert(image->signature == MagickCoreSignature);
3353   if (image->debug != MagickFalse)
3354     (void) LogMagickEvent(TraceEvent,GetMagickModule(),"%s",image->filename);
3355
3356   /*
3357    * initialize opencl env
3358    */
3359   device = RequestOpenCLDevice(clEnv);
3360   queue = AcquireOpenCLCommandQueue(device);
3361
3362   outputReady = MagickFalse;
3363
3364   /* Create and initialize OpenCL buffers.
3365    inputPixels = AcquirePixelCachePixels(image, &length, exception);
3366    assume this  will get a writable image
3367    */
3368   image_view=AcquireAuthenticCacheView(image,exception);
3369   inputPixels=GetCacheViewAuthenticPixels(image_view,0,0,image->columns,image->rows,exception);
3370   if (inputPixels == (void *) NULL)
3371   {
3372     (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),CacheWarning,"UnableToReadPixelCache.","`%s'",image->filename);
3373     goto cleanup;
3374   }
3375
3376   /* If the host pointer is aligned to the size of CLPixelPacket, 
3377    then use the host buffer directly from the GPU; otherwise, 
3378    create a buffer on the GPU and copy the data over
3379    */
3380   if (ALIGNED(inputPixels,CLPixelPacket)) 
3381   {
3382     mem_flags = CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR;
3383   }
3384   else 
3385   {
3386     mem_flags = CL_MEM_READ_WRITE|CL_MEM_COPY_HOST_PTR;
3387   }
3388   /* create a CL buffer from image pixel buffer */
3389   length = image->columns * image->rows;
3390   imageBuffer = clEnv->library->clCreateBuffer(device->context, mem_flags, length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus);
3391   if (clStatus != CL_SUCCESS)
3392   {
3393     (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
3394     goto cleanup;
3395   }
3396
3397   modulateKernel = AcquireOpenCLKernel(device, "Modulate");
3398   if (modulateKernel == NULL)
3399   {
3400     (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "AcquireOpenCLKernel failed.", ".");
3401     goto cleanup;
3402   }
3403
3404   bright=percent_brightness;
3405   hue=percent_hue;
3406   saturation=percent_saturation;
3407   color=colorspace;
3408
3409   i = 0;
3410   clStatus=clEnv->library->clSetKernelArg(modulateKernel,i++,sizeof(cl_mem),(void *)&imageBuffer);
3411   clStatus|=clEnv->library->clSetKernelArg(modulateKernel,i++,sizeof(cl_float),&bright);
3412   clStatus|=clEnv->library->clSetKernelArg(modulateKernel,i++,sizeof(cl_float),&hue);
3413   clStatus|=clEnv->library->clSetKernelArg(modulateKernel,i++,sizeof(cl_float),&saturation);
3414   clStatus|=clEnv->library->clSetKernelArg(modulateKernel,i++,sizeof(cl_float),&color);
3415   if (clStatus != CL_SUCCESS)
3416   {
3417     (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", ".");
3418     goto cleanup;
3419   }
3420
3421   {
3422     size_t global_work_size[2];
3423     global_work_size[0] = image->columns;
3424     global_work_size[1] = image->rows;
3425     /* launch the kernel */
3426         clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, modulateKernel, 2, NULL, global_work_size, NULL, 0, NULL, &event);
3427     if (clStatus != CL_SUCCESS)
3428     {
3429       (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", ".");
3430       goto cleanup;
3431     }
3432     RecordProfileData(device,modulateKernel,event);
3433   }
3434
3435   if (ALIGNED(inputPixels,CLPixelPacket)) 
3436   {
3437     length = image->columns * image->rows;
3438     clEnv->library->clEnqueueMapBuffer(queue, imageBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(CLPixelPacket), 0, NULL, NULL, &clStatus);
3439   }
3440   else 
3441   {
3442     length = image->columns * image->rows;
3443     clStatus = clEnv->library->clEnqueueReadBuffer(queue, imageBuffer, CL_TRUE, 0, length * sizeof(CLPixelPacket), inputPixels, 0, NULL, NULL);
3444   }
3445   if (clStatus != CL_SUCCESS)
3446   {
3447     (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "Reading output image from CL buffer failed.", ".");
3448     goto cleanup;
3449   }
3450
3451   outputReady=SyncCacheViewAuthenticPixels(image_view,exception);
3452
3453 cleanup:
3454
3455   image_view=DestroyCacheView(image_view);
3456
3457   if (imageBuffer!=NULL)
3458     clEnv->library->clReleaseMemObject(imageBuffer);
3459   if (modulateKernel!=NULL)
3460     ReleaseOpenCLKernel(modulateKernel);
3461   if (queue != NULL)
3462     ReleaseOpenCLCommandQueue(device,queue);
3463   if (device != NULL)
3464     ReleaseOpenCLDevice(device);
3465
3466   return outputReady;
3467
3468 }
3469
3470 MagickPrivate MagickBooleanType AccelerateModulateImage(Image *image,
3471   const double percent_brightness,const double percent_hue,
3472   const double percent_saturation,const ColorspaceType colorspace,
3473   ExceptionInfo *exception)
3474 {
3475   MagickBooleanType
3476     status;
3477
3478   MagickCLEnv
3479     clEnv;
3480
3481   assert(image != NULL);
3482   assert(exception != (ExceptionInfo *) NULL);
3483
3484   if (checkAccelerateConditionRGBA(image) == MagickFalse)
3485     return(MagickFalse);
3486
3487   if ((colorspace != HSLColorspace) && (colorspace != UndefinedColorspace))
3488     return(MagickFalse);
3489
3490   clEnv=getOpenCLEnvironment(exception);
3491   if (clEnv == (MagickCLEnv) NULL)
3492     return(MagickFalse);
3493
3494   status=ComputeModulateImage(image,clEnv,percent_brightness,percent_hue,
3495     percent_saturation,colorspace,exception);
3496   return(status);
3497 }
3498
3499 /*
3500 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
3501 %                                                                             %
3502 %                                                                             %
3503 %                                                                             %
3504 %     A c c e l e r a t e M o t i o n B l u r I m a g e                       %
3505 %                                                                             %
3506 %                                                                             %
3507 %                                                                             %
3508 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
3509 */
3510
3511 static Image* ComputeMotionBlurImage(const Image *image,MagickCLEnv clEnv,
3512   const double *kernel,const size_t width,const OffsetInfo *offset,
3513   ExceptionInfo *exception)
3514 {
3515   CacheView
3516     *filteredImage_view,
3517     *image_view;
3518
3519   cl_command_queue
3520     queue;
3521
3522   cl_float4
3523     biasPixel;
3524
3525   cl_int
3526     clStatus;
3527
3528   cl_kernel
3529     motionBlurKernel;
3530
3531   cl_event
3532     event;
3533
3534   cl_mem
3535     filteredImageBuffer,
3536     imageBuffer,
3537     imageKernelBuffer, 
3538     offsetBuffer;
3539
3540   cl_mem_flags
3541     mem_flags;
3542
3543   const void
3544     *inputPixels;
3545
3546   float
3547     *kernelBufferPtr;
3548
3549   Image
3550     *filteredImage;
3551
3552   int
3553     *offsetBufferPtr;
3554
3555   MagickBooleanType
3556     outputReady;
3557
3558   MagickCLDevice
3559     device;
3560
3561   PixelInfo
3562     bias;
3563
3564   MagickSizeType
3565     length;
3566
3567   size_t
3568     global_work_size[2],
3569     local_work_size[2];
3570
3571   unsigned int
3572     i,
3573     imageHeight,
3574     imageWidth,
3575     matte;
3576
3577   void
3578     *filteredPixels,
3579     *hostPtr;
3580
3581   outputReady = MagickFalse;
3582   filteredImage = NULL;
3583   filteredImage_view = NULL;
3584   imageBuffer = NULL;
3585   filteredImageBuffer = NULL;
3586   imageKernelBuffer = NULL;
3587   motionBlurKernel = NULL;
3588   queue = NULL;
3589
3590   device = RequestOpenCLDevice(clEnv);
3591
3592   /* Create and initialize OpenCL buffers. */
3593
3594   image_view=AcquireAuthenticCacheView(image,exception);
3595   inputPixels=GetCacheViewAuthenticPixels(image_view,0,0,image->columns,image->rows,exception);
3596   if (inputPixels == (const void *) NULL)
3597   {
3598     (void) ThrowMagickException(exception,GetMagickModule(),CacheError,
3599       "UnableToReadPixelCache.","`%s'",image->filename);
3600     goto cleanup;
3601   }
3602
3603   // If the host pointer is aligned to the size of CLPixelPacket, 
3604   // then use the host buffer directly from the GPU; otherwise, 
3605   // create a buffer on the GPU and copy the data over
3606   if (ALIGNED(inputPixels,CLPixelPacket)) 
3607   {
3608     mem_flags = CL_MEM_READ_ONLY|CL_MEM_USE_HOST_PTR;
3609   }
3610   else 
3611   {
3612     mem_flags = CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR;
3613   }
3614   // create a CL buffer from image pixel buffer
3615   length = image->columns * image->rows;
3616   imageBuffer = clEnv->library->clCreateBuffer(device->context, mem_flags, 
3617     length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus);
3618   if (clStatus != CL_SUCCESS)
3619   {
3620     (void) ThrowMagickException(exception, GetMagickModule(),
3621       ResourceLimitError, "clEnv->library->clCreateBuffer failed.",".");
3622     goto cleanup;
3623   }
3624
3625
3626   filteredImage = CloneImage(image,image->columns,image->rows,
3627     MagickTrue,exception);
3628   assert(filteredImage != NULL);
3629   if (SetImageStorageClass(filteredImage,DirectClass,exception) != MagickTrue)
3630   {
3631     (void) ThrowMagickException(exception, GetMagickModule(), 
3632       ResourceLimitError, "CloneImage failed.", ".");
3633     goto cleanup;
3634   }
3635   filteredImage_view=AcquireAuthenticCacheView(filteredImage,exception);
3636   filteredPixels=GetCacheViewAuthenticPixels(filteredImage_view,0,0,filteredImage->columns,filteredImage->rows,exception);
3637   if (filteredPixels == (void *) NULL)
3638   {
3639     (void) ThrowMagickException(exception,GetMagickModule(),CacheError, 
3640       "UnableToReadPixelCache.","`%s'",filteredImage->filename);
3641     goto cleanup;
3642   }
3643
3644   if (ALIGNED(filteredPixels,CLPixelPacket)) 
3645   {
3646     mem_flags = CL_MEM_WRITE_ONLY|CL_MEM_USE_HOST_PTR;
3647     hostPtr = filteredPixels;
3648   }
3649   else 
3650   {
3651     mem_flags = CL_MEM_WRITE_ONLY;
3652     hostPtr = NULL;
3653   }
3654   // create a CL buffer from image pixel buffer
3655   length = image->columns * image->rows;
3656   filteredImageBuffer = clEnv->library->clCreateBuffer(device->context, mem_flags, 
3657     length * sizeof(CLPixelPacket), hostPtr, &clStatus);
3658   if (clStatus != CL_SUCCESS)
3659   {
3660     (void) ThrowMagickException(exception, GetMagickModule(), 
3661       ResourceLimitError, "clEnv->library->clCreateBuffer failed.",".");
3662     goto cleanup;
3663   }
3664
3665
3666   imageKernelBuffer = clEnv->library->clCreateBuffer(device->context, 
3667     CL_MEM_READ_ONLY|CL_MEM_ALLOC_HOST_PTR, width * sizeof(float), NULL,
3668     &clStatus);
3669   if (clStatus != CL_SUCCESS)
3670   {
3671     (void) ThrowMagickException(exception, GetMagickModule(), 
3672       ResourceLimitError, "clEnv->library->clCreateBuffer failed.",".");
3673     goto cleanup;
3674   }
3675
3676   queue = AcquireOpenCLCommandQueue(device);
3677   kernelBufferPtr = (float*)clEnv->library->clEnqueueMapBuffer(queue, imageKernelBuffer, 
3678     CL_TRUE, CL_MAP_WRITE, 0, width * sizeof(float), 0, NULL, NULL, &clStatus);
3679   if (clStatus != CL_SUCCESS)
3680   {
3681     (void) ThrowMagickException(exception, GetMagickModule(), 
3682       ResourceLimitError, "clEnv->library->clEnqueueMapBuffer failed.",".");
3683     goto cleanup;
3684   }
3685   for (i = 0; i < width; i++)
3686   {
3687     kernelBufferPtr[i] = (float) kernel[i];
3688   }
3689   clStatus = clEnv->library->clEnqueueUnmapMemObject(queue, imageKernelBuffer, kernelBufferPtr,
3690     0, NULL, NULL);
3691  if (clStatus != CL_SUCCESS)
3692   {
3693     (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError, 
3694       "clEnv->library->clEnqueueUnmapMemObject failed.", ".");
3695     goto cleanup;
3696   }
3697
3698   offsetBuffer = clEnv->library->clCreateBuffer(device->context, 
3699     CL_MEM_READ_ONLY|CL_MEM_ALLOC_HOST_PTR, width * sizeof(cl_int2), NULL,
3700     &clStatus);
3701   if (clStatus != CL_SUCCESS)
3702   {
3703     (void) ThrowMagickException(exception, GetMagickModule(), 
3704       ResourceLimitError, "clEnv->library->clCreateBuffer failed.",".");
3705     goto cleanup;
3706   }
3707
3708   offsetBufferPtr = (int*)clEnv->library->clEnqueueMapBuffer(queue, offsetBuffer, CL_TRUE, 
3709     CL_MAP_WRITE, 0, width * sizeof(cl_int2), 0, NULL, NULL, &clStatus);
3710   if (clStatus != CL_SUCCESS)
3711   {
3712     (void) ThrowMagickException(exception, GetMagickModule(), 
3713       ResourceLimitError, "clEnv->library->clEnqueueMapBuffer failed.",".");
3714     goto cleanup;
3715   }
3716   for (i = 0; i < width; i++)
3717   {
3718     offsetBufferPtr[2*i] = (int)offset[i].x;
3719     offsetBufferPtr[2*i+1] = (int)offset[i].y;
3720   }
3721   clStatus = clEnv->library->clEnqueueUnmapMemObject(queue, offsetBuffer, offsetBufferPtr, 0, 
3722     NULL, NULL);
3723  if (clStatus != CL_SUCCESS)
3724   {
3725     (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError,
3726       "clEnv->library->clEnqueueUnmapMemObject failed.", ".");
3727     goto cleanup;
3728   }
3729
3730
3731  // get the OpenCL kernel
3732   motionBlurKernel = AcquireOpenCLKernel(device,"MotionBlur");
3733   if (motionBlurKernel == NULL)
3734   {
3735     (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError,
3736       "AcquireOpenCLKernel failed.", ".");
3737     goto cleanup;
3738   }
3739   
3740   // set the kernel arguments
3741   i = 0;
3742   clStatus=clEnv->library->clSetKernelArg(motionBlurKernel,i++,sizeof(cl_mem),
3743     (void *)&imageBuffer);
3744   clStatus|=clEnv->library->clSetKernelArg(motionBlurKernel,i++,sizeof(cl_mem),
3745     (void *)&filteredImageBuffer);
3746   imageWidth = (unsigned int) image->columns;
3747   imageHeight = (unsigned int) image->rows;
3748   clStatus|=clEnv->library->clSetKernelArg(motionBlurKernel,i++,sizeof(unsigned int),
3749     &imageWidth);
3750   clStatus|=clEnv->library->clSetKernelArg(motionBlurKernel,i++,sizeof(unsigned int),
3751     &imageHeight);
3752   clStatus|=clEnv->library->clSetKernelArg(motionBlurKernel,i++,sizeof(cl_mem),
3753     (void *)&imageKernelBuffer);
3754   clStatus|=clEnv->library->clSetKernelArg(motionBlurKernel,i++,sizeof(unsigned int),
3755     &width);
3756   clStatus|=clEnv->library->clSetKernelArg(motionBlurKernel,i++,sizeof(cl_mem),
3757     (void *)&offsetBuffer);
3758
3759   GetPixelInfo(image,&bias);
3760   biasPixel.s[0] = bias.red;
3761   biasPixel.s[1] = bias.green;
3762   biasPixel.s[2] = bias.blue;
3763   biasPixel.s[3] = bias.alpha;
3764   clStatus|=clEnv->library->clSetKernelArg(motionBlurKernel,i++,sizeof(cl_float4), &biasPixel);
3765
3766   clStatus|=clEnv->library->clSetKernelArg(motionBlurKernel,i++,sizeof(ChannelType), &image->channel_mask);
3767   matte = (image->alpha_trait > CopyPixelTrait)?1:0;
3768   clStatus|=clEnv->library->clSetKernelArg(motionBlurKernel,i++,sizeof(unsigned int), &matte);
3769   if (clStatus != CL_SUCCESS)
3770   {
3771     (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError,
3772       "clEnv->library->clSetKernelArg failed.", ".");
3773     goto cleanup;
3774   }
3775
3776   // launch the kernel
3777   local_work_size[0] = 16;
3778   local_work_size[1] = 16;
3779   global_work_size[0] = (size_t)padGlobalWorkgroupSizeToLocalWorkgroupSize(
3780                                 (unsigned int) image->columns,(unsigned int) local_work_size[0]);
3781   global_work_size[1] = (size_t)padGlobalWorkgroupSizeToLocalWorkgroupSize(
3782                                 (unsigned int) image->rows,(unsigned int) local_work_size[1]);
3783   clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, motionBlurKernel, 2, NULL, 
3784           global_work_size, local_work_size, 0, NULL, &event);
3785
3786   if (clStatus != CL_SUCCESS)
3787   {
3788     (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError,
3789       "clEnv->library->clEnqueueNDRangeKernel failed.", ".");
3790     goto cleanup;
3791   }
3792   RecordProfileData(device,motionBlurKernel,event);
3793
3794   if (ALIGNED(filteredPixels,CLPixelPacket)) 
3795   {
3796     length = image->columns * image->rows;
3797     clEnv->library->clEnqueueMapBuffer(queue, filteredImageBuffer, CL_TRUE, 
3798       CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(CLPixelPacket), 0, NULL, 
3799       NULL, &clStatus);
3800   }
3801   else 
3802   {
3803     length = image->columns * image->rows;
3804     clStatus = clEnv->library->clEnqueueReadBuffer(queue, filteredImageBuffer, CL_TRUE, 0, 
3805       length * sizeof(CLPixelPacket), filteredPixels, 0, NULL, NULL);
3806   }
3807   if (clStatus != CL_SUCCESS)
3808   {
3809     (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError,
3810       "Reading output image from CL buffer failed.", ".");
3811     goto cleanup;
3812   }
3813   outputReady=SyncCacheViewAuthenticPixels(filteredImage_view,exception);
3814
3815 cleanup:
3816
3817   image_view=DestroyCacheView(image_view);
3818   if (filteredImage_view != NULL)
3819     filteredImage_view=DestroyCacheView(filteredImage_view);
3820
3821   if (filteredImageBuffer!=NULL)
3822     clEnv->library->clReleaseMemObject(filteredImageBuffer);
3823   if (imageBuffer!=NULL)
3824     clEnv->library->clReleaseMemObject(imageBuffer);
3825   if (imageKernelBuffer!=NULL)
3826     clEnv->library->clReleaseMemObject(imageKernelBuffer);
3827   if (motionBlurKernel!=NULL)
3828     ReleaseOpenCLKernel(motionBlurKernel);
3829   if (queue != NULL)
3830     ReleaseOpenCLCommandQueue(device,queue);
3831   if (device != NULL)
3832     ReleaseOpenCLDevice(device);
3833   if (outputReady == MagickFalse && filteredImage != NULL)
3834     filteredImage=DestroyImage(filteredImage);
3835
3836   return(filteredImage);
3837 }
3838
3839 MagickPrivate Image *AccelerateMotionBlurImage(const Image *image,
3840   const double* kernel,const size_t width,const OffsetInfo *offset,
3841   ExceptionInfo *exception)
3842 {
3843   Image
3844     *filteredImage;
3845
3846   MagickCLEnv
3847     clEnv;
3848
3849   assert(image != NULL);
3850   assert(kernel != (double *) NULL);
3851   assert(offset != (OffsetInfo *) NULL);
3852   assert(exception != (ExceptionInfo *) NULL);
3853
3854   if (checkAccelerateConditionRGBA(image) == MagickFalse)
3855     return((Image *) NULL);
3856
3857   clEnv=getOpenCLEnvironment(exception);
3858   if (clEnv == (MagickCLEnv) NULL)
3859     return((Image *) NULL);
3860
3861   filteredImage=ComputeMotionBlurImage(image,clEnv,kernel,width,offset,
3862     exception);
3863   return(filteredImage);
3864 }
3865
3866 /*
3867 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
3868 %                                                                             %
3869 %                                                                             %
3870 %                                                                             %
3871 %     A c c e l e r a t e R e s i z e I m a g e                               %
3872 %                                                                             %
3873 %                                                                             %
3874 %                                                                             %
3875 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
3876 */
3877
3878 static MagickBooleanType resizeHorizontalFilter(MagickCLDevice device,
3879   cl_command_queue queue,const Image *image,Image *filteredImage,
3880   cl_mem imageBuffer,cl_uint number_channels,cl_uint columns,cl_uint rows,
3881   cl_mem resizedImageBuffer,cl_uint resizedColumns,cl_uint resizedRows,
3882   const ResizeFilter *resizeFilter,cl_mem resizeFilterCubicCoefficients,
3883   const float xFactor,ExceptionInfo *exception)
3884 {
3885   cl_kernel
3886     horizontalKernel;
3887
3888   cl_int
3889     status;
3890
3891   const unsigned int
3892     workgroupSize = 256;
3893
3894   float
3895     resizeFilterScale,
3896     resizeFilterSupport,
3897     resizeFilterWindowSupport,
3898     resizeFilterBlur,
3899     scale,
3900     support;
3901
3902   int
3903     cacheRangeStart,
3904     cacheRangeEnd,
3905     numCachedPixels,
3906     resizeFilterType,
3907     resizeWindowType;
3908
3909   MagickBooleanType
3910     outputReady;
3911
3912   size_t
3913     gammaAccumulatorLocalMemorySize,
3914     gsize[2],
3915     i,
3916     imageCacheLocalMemorySize,
3917     pixelAccumulatorLocalMemorySize,
3918     lsize[2],
3919     totalLocalMemorySize,
3920     weightAccumulatorLocalMemorySize;
3921
3922   unsigned int
3923     chunkSize,
3924     pixelPerWorkgroup;
3925
3926   horizontalKernel=NULL;
3927   outputReady=MagickFalse;
3928
3929   /*
3930   Apply filter to resize vertically from image to resize image.
3931   */
3932   scale=MAGICK_MAX(1.0/xFactor+MagickEpsilon,1.0);
3933   support=scale*GetResizeFilterSupport(resizeFilter);
3934   if (support < 0.5)
3935   {
3936     /*
3937     Support too small even for nearest neighbour: Reduce to point
3938     sampling.
3939     */
3940     support=(float) 0.5;
3941     scale=1.0;
3942   }
3943   scale=PerceptibleReciprocal(scale);
3944
3945   if (resizedColumns < workgroupSize) 
3946   {
3947     chunkSize=32;
3948     pixelPerWorkgroup=32;
3949   }
3950   else
3951   {
3952     chunkSize=workgroupSize;
3953     pixelPerWorkgroup=workgroupSize;
3954   }
3955
3956 DisableMSCWarning(4127)
3957   while(1)
3958 RestoreMSCWarning
3959   {
3960     /* calculate the local memory size needed per workgroup */
3961     cacheRangeStart=(int) (((0 + 0.5)/xFactor+MagickEpsilon)-support+0.5);
3962     cacheRangeEnd=(int) ((((pixelPerWorkgroup-1) + 0.5)/xFactor+
3963       MagickEpsilon)+support+0.5);
3964     numCachedPixels=cacheRangeEnd-cacheRangeStart+1;
3965     imageCacheLocalMemorySize=numCachedPixels*sizeof(CLQuantum)*
3966       number_channels;
3967     totalLocalMemorySize=imageCacheLocalMemorySize;
3968
3969     /* local size for the pixel accumulator */
3970     pixelAccumulatorLocalMemorySize=chunkSize*sizeof(cl_float4);
3971     totalLocalMemorySize+=pixelAccumulatorLocalMemorySize;
3972
3973     /* local memory size for the weight accumulator */
3974     weightAccumulatorLocalMemorySize=chunkSize*sizeof(float);
3975     totalLocalMemorySize+=weightAccumulatorLocalMemorySize;
3976
3977     /* local memory size for the gamma accumulator */
3978     if ((number_channels == 4) || (number_channels == 2))
3979       gammaAccumulatorLocalMemorySize=chunkSize*sizeof(float);
3980     else
3981       gammaAccumulatorLocalMemorySize=sizeof(float);
3982     totalLocalMemorySize+=gammaAccumulatorLocalMemorySize;
3983
3984     if (totalLocalMemorySize <= device->local_memory_size)
3985       break;
3986     else
3987     {
3988       pixelPerWorkgroup=pixelPerWorkgroup/2;
3989       chunkSize=chunkSize/2;
3990       if ((pixelPerWorkgroup == 0) || (chunkSize == 0))
3991       {
3992         /* quit, fallback to CPU */
3993         goto cleanup;
3994       }
3995     }
3996   }
3997
3998   resizeFilterType=(int)GetResizeFilterWeightingType(resizeFilter);
3999   resizeWindowType=(int)GetResizeFilterWindowWeightingType(resizeFilter);
4000
4001   horizontalKernel=AcquireOpenCLKernel(device,"ResizeHorizontalFilter");
4002   if (horizontalKernel == (cl_kernel) NULL)
4003   {
4004     (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
4005       ResourceLimitWarning,"AcquireOpenCLKernel failed.", ".");
4006     goto cleanup;
4007   }
4008
4009   resizeFilterScale=(float) GetResizeFilterScale(resizeFilter);
4010   resizeFilterSupport=(float) GetResizeFilterSupport(resizeFilter);
4011   resizeFilterWindowSupport=(float) GetResizeFilterWindowSupport(resizeFilter);
4012   resizeFilterBlur=(float) GetResizeFilterBlur(resizeFilter);
4013
4014   i=0;
4015   status =SetOpenCLKernelArg(horizontalKernel,i++,sizeof(cl_mem),(void*)&imageBuffer);
4016   status|=SetOpenCLKernelArg(horizontalKernel,i++,sizeof(cl_uint),(void*)&number_channels);
4017   status|=SetOpenCLKernelArg(horizontalKernel,i++,sizeof(cl_uint),(void*)&columns);
4018   status|=SetOpenCLKernelArg(horizontalKernel,i++,sizeof(cl_uint),(void*)&rows);
4019   status|=SetOpenCLKernelArg(horizontalKernel,i++,sizeof(cl_mem),(void*)&resizedImageBuffer);
4020   status|=SetOpenCLKernelArg(horizontalKernel,i++,sizeof(cl_uint),(void*)&resizedColumns);
4021   status|=SetOpenCLKernelArg(horizontalKernel,i++,sizeof(cl_uint),(void*)&resizedRows);
4022   status|=SetOpenCLKernelArg(horizontalKernel,i++,sizeof(float),(void*)&xFactor);
4023   status|=SetOpenCLKernelArg(horizontalKernel,i++,sizeof(int),(void*)&resizeFilterType);
4024   status|=SetOpenCLKernelArg(horizontalKernel,i++,sizeof(int),(void*)&resizeWindowType);
4025   status|=SetOpenCLKernelArg(horizontalKernel,i++,sizeof(cl_mem),(void*)&resizeFilterCubicCoefficients);
4026   status|=SetOpenCLKernelArg(horizontalKernel,i++,sizeof(float),(void*)&resizeFilterScale);
4027   status|=SetOpenCLKernelArg(horizontalKernel,i++,sizeof(float),(void*)&resizeFilterSupport);
4028   status|=SetOpenCLKernelArg(horizontalKernel,i++,sizeof(float),(void*)&resizeFilterWindowSupport);
4029   status|=SetOpenCLKernelArg(horizontalKernel,i++,sizeof(float),(void*)&resizeFilterBlur);
4030   status|=SetOpenCLKernelArg(horizontalKernel,i++,imageCacheLocalMemorySize,NULL);
4031   status|=SetOpenCLKernelArg(horizontalKernel,i++,sizeof(int),&numCachedPixels);
4032   status|=SetOpenCLKernelArg(horizontalKernel,i++,sizeof(unsigned int),&pixelPerWorkgroup);
4033   status|=SetOpenCLKernelArg(horizontalKernel,i++,sizeof(unsigned int),&chunkSize);
4034   status|=SetOpenCLKernelArg(horizontalKernel,i++,pixelAccumulatorLocalMemorySize,NULL);
4035   status|=SetOpenCLKernelArg(horizontalKernel,i++,weightAccumulatorLocalMemorySize,NULL);
4036   status|=SetOpenCLKernelArg(horizontalKernel,i++,gammaAccumulatorLocalMemorySize,NULL);
4037
4038   if (status != CL_SUCCESS)
4039   {
4040     (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
4041       ResourceLimitWarning,"SetOpenCLKernelArg failed.",".");
4042     goto cleanup;
4043   }
4044
4045   gsize[0]=(resizedColumns+pixelPerWorkgroup-1)/pixelPerWorkgroup*
4046     workgroupSize;
4047   gsize[1]=resizedRows;
4048   lsize[0]=workgroupSize;
4049   lsize[1]=1;
4050   outputReady=EnqueueOpenCLKernel(queue,horizontalKernel,2,
4051     (const size_t *) NULL,gsize,lsize,image,filteredImage,MagickFalse,
4052     exception);
4053
4054 cleanup:
4055
4056   if (horizontalKernel != (cl_kernel) NULL)
4057     ReleaseOpenCLKernel(horizontalKernel);
4058
4059   return(outputReady);
4060 }
4061
4062 static MagickBooleanType resizeVerticalFilter(MagickCLDevice device,
4063   cl_command_queue queue,const Image *image,Image * filteredImage,
4064   cl_mem imageBuffer,cl_uint number_channels,cl_uint columns,cl_uint rows,
4065   cl_mem resizedImageBuffer,cl_uint resizedColumns,cl_uint resizedRows,
4066   const ResizeFilter *resizeFilter,cl_mem resizeFilterCubicCoefficients,
4067   const float yFactor,ExceptionInfo *exception)
4068 {
4069   cl_kernel
4070     verticalKernel;
4071
4072   cl_int
4073     status;
4074
4075   const unsigned int
4076     workgroupSize = 256;
4077
4078   float
4079     resizeFilterScale,
4080     resizeFilterSupport,
4081     resizeFilterWindowSupport,
4082     resizeFilterBlur,
4083     scale,
4084     support;
4085
4086   int
4087     cacheRangeStart,
4088     cacheRangeEnd,
4089     numCachedPixels,
4090     resizeFilterType,
4091     resizeWindowType;
4092
4093   MagickBooleanType
4094     outputReady;
4095
4096   size_t
4097     gammaAccumulatorLocalMemorySize,
4098     gsize[2],
4099     i,
4100     imageCacheLocalMemorySize,
4101     pixelAccumulatorLocalMemorySize,
4102     lsize[2],
4103     totalLocalMemorySize,
4104     weightAccumulatorLocalMemorySize;
4105
4106   unsigned int
4107     chunkSize,
4108     pixelPerWorkgroup;
4109
4110   verticalKernel=NULL;
4111   outputReady=MagickFalse;
4112
4113   /*
4114   Apply filter to resize vertically from image to resize image.
4115   */
4116   scale=MAGICK_MAX(1.0/yFactor+MagickEpsilon,1.0);
4117   support=scale*GetResizeFilterSupport(resizeFilter);
4118   if (support < 0.5)
4119   {
4120     /*
4121     Support too small even for nearest neighbour: Reduce to point
4122     sampling.
4123     */
4124     support=(float) 0.5;
4125     scale=1.0;
4126   }
4127   scale=PerceptibleReciprocal(scale);
4128
4129   if (resizedRows < workgroupSize) 
4130   {
4131     chunkSize=32;
4132     pixelPerWorkgroup=32;
4133   }
4134   else
4135   {
4136     chunkSize=workgroupSize;
4137     pixelPerWorkgroup=workgroupSize;
4138   }
4139
4140 DisableMSCWarning(4127)
4141   while(1)
4142 RestoreMSCWarning
4143   {
4144     /* calculate the local memory size needed per workgroup */
4145     cacheRangeStart=(int) (((0 + 0.5)/yFactor+MagickEpsilon)-support+0.5);
4146     cacheRangeEnd=(int) ((((pixelPerWorkgroup-1) + 0.5)/yFactor+
4147       MagickEpsilon)+support+0.5);
4148     numCachedPixels=cacheRangeEnd-cacheRangeStart+1;
4149     imageCacheLocalMemorySize=numCachedPixels*sizeof(CLQuantum)*
4150       number_channels;
4151     totalLocalMemorySize=imageCacheLocalMemorySize;
4152
4153     /* local size for the pixel accumulator */
4154     pixelAccumulatorLocalMemorySize=chunkSize*sizeof(cl_float4);
4155     totalLocalMemorySize+=pixelAccumulatorLocalMemorySize;
4156
4157     /* local memory size for the weight accumulator */
4158     weightAccumulatorLocalMemorySize=chunkSize*sizeof(float);
4159     totalLocalMemorySize+=weightAccumulatorLocalMemorySize;
4160
4161     /* local memory size for the gamma accumulator */
4162     if ((number_channels == 4) || (number_channels == 2))
4163       gammaAccumulatorLocalMemorySize=chunkSize*sizeof(float);
4164     else
4165       gammaAccumulatorLocalMemorySize=sizeof(float);
4166     totalLocalMemorySize+=gammaAccumulatorLocalMemorySize;
4167
4168     if (totalLocalMemorySize <= device->local_memory_size)
4169       break;
4170     else
4171     {
4172       pixelPerWorkgroup=pixelPerWorkgroup/2;
4173       chunkSize=chunkSize/2;
4174       if ((pixelPerWorkgroup == 0) || (chunkSize == 0))
4175       {
4176         /* quit, fallback to CPU */
4177         goto cleanup;
4178       }
4179     }
4180   }
4181
4182   resizeFilterType=(int)GetResizeFilterWeightingType(resizeFilter);
4183   resizeWindowType=(int)GetResizeFilterWindowWeightingType(resizeFilter);
4184
4185   verticalKernel=AcquireOpenCLKernel(device,"ResizeVerticalFilter");
4186   if (verticalKernel == (cl_kernel) NULL)
4187   {
4188     (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
4189       ResourceLimitWarning,"AcquireOpenCLKernel failed.",".");
4190     goto cleanup;
4191   }
4192
4193   resizeFilterScale=(float) GetResizeFilterScale(resizeFilter);
4194   resizeFilterSupport=(float) GetResizeFilterSupport(resizeFilter);
4195   resizeFilterBlur=(float) GetResizeFilterBlur(resizeFilter);
4196   resizeFilterWindowSupport=(float) GetResizeFilterWindowSupport(resizeFilter);
4197
4198   i=0;
4199   status =SetOpenCLKernelArg(verticalKernel,i++,sizeof(cl_mem),(void*)&imageBuffer);
4200   status|=SetOpenCLKernelArg(verticalKernel,i++,sizeof(cl_uint),(void*)&number_channels);
4201   status|=SetOpenCLKernelArg(verticalKernel,i++,sizeof(cl_uint),(void*)&columns);
4202   status|=SetOpenCLKernelArg(verticalKernel,i++,sizeof(cl_uint),(void*)&rows);
4203   status|=SetOpenCLKernelArg(verticalKernel,i++,sizeof(cl_mem),(void*)&resizedImageBuffer);
4204   status|=SetOpenCLKernelArg(verticalKernel,i++,sizeof(cl_uint),(void*)&resizedColumns);
4205   status|=SetOpenCLKernelArg(verticalKernel,i++,sizeof(cl_uint),(void*)&resizedRows);
4206   status|=SetOpenCLKernelArg(verticalKernel,i++,sizeof(float),(void*)&yFactor);
4207   status|=SetOpenCLKernelArg(verticalKernel,i++,sizeof(int),(void*)&resizeFilterType);
4208   status|=SetOpenCLKernelArg(verticalKernel,i++,sizeof(int),(void*)&resizeWindowType);
4209   status|=SetOpenCLKernelArg(verticalKernel,i++,sizeof(cl_mem),(void*)&resizeFilterCubicCoefficients);
4210   status|=SetOpenCLKernelArg(verticalKernel,i++,sizeof(float),(void*)&resizeFilterScale);
4211   status|=SetOpenCLKernelArg(verticalKernel,i++,sizeof(float),(void*)&resizeFilterSupport);
4212   status|=SetOpenCLKernelArg(verticalKernel,i++,sizeof(float),(void*)&resizeFilterWindowSupport);
4213   status|=SetOpenCLKernelArg(verticalKernel,i++,sizeof(float),(void*)&resizeFilterBlur);
4214   status|=SetOpenCLKernelArg(verticalKernel,i++,imageCacheLocalMemorySize, NULL);
4215   status|=SetOpenCLKernelArg(verticalKernel,i++,sizeof(int), &numCachedPixels);
4216   status|=SetOpenCLKernelArg(verticalKernel,i++,sizeof(unsigned int), &pixelPerWorkgroup);
4217   status|=SetOpenCLKernelArg(verticalKernel,i++,sizeof(unsigned int), &chunkSize);
4218   status|=SetOpenCLKernelArg(verticalKernel,i++,pixelAccumulatorLocalMemorySize, NULL);
4219   status|=SetOpenCLKernelArg(verticalKernel,i++,weightAccumulatorLocalMemorySize, NULL);
4220   status|=SetOpenCLKernelArg(verticalKernel,i++,gammaAccumulatorLocalMemorySize, NULL);
4221
4222   if (status != CL_SUCCESS)
4223   {
4224     (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
4225       ResourceLimitWarning,"SetOpenCLKernelArg failed.",".");
4226     goto cleanup;
4227   }
4228
4229   gsize[0]=resizedColumns;
4230   gsize[1]=(resizedRows+pixelPerWorkgroup-1)/pixelPerWorkgroup*
4231     workgroupSize;
4232   lsize[0]=1;
4233   lsize[1]=workgroupSize;
4234   outputReady=EnqueueOpenCLKernel(queue,verticalKernel,2,(const size_t *) NULL,
4235     gsize,lsize,image,filteredImage,MagickFalse,exception);
4236
4237 cleanup:
4238
4239   if (verticalKernel != (cl_kernel) NULL)
4240     ReleaseOpenCLKernel(verticalKernel);
4241
4242   return(outputReady);
4243 }
4244
4245 static Image *ComputeResizeImage(const Image* image,MagickCLEnv clEnv,
4246   const size_t resizedColumns,const size_t resizedRows,
4247   const ResizeFilter *resizeFilter,ExceptionInfo *exception)
4248 {
4249   cl_command_queue
4250     queue;
4251
4252   cl_mem
4253     cubicCoefficientsBuffer,
4254     filteredImageBuffer,
4255     imageBuffer,
4256     tempImageBuffer;
4257
4258   cl_uint
4259     number_channels;
4260
4261   const double
4262     *resizeFilterCoefficient;
4263
4264   float
4265     coefficientBuffer[7],
4266     xFactor,
4267     yFactor;
4268
4269   MagickBooleanType
4270     outputReady;
4271
4272   MagickCLDevice
4273     device;
4274
4275   MagickSizeType
4276     length;
4277
4278   Image
4279     *filteredImage;
4280
4281   size_t
4282     i;
4283
4284   filteredImage=NULL;
4285   tempImageBuffer=NULL;
4286   cubicCoefficientsBuffer=NULL;
4287   outputReady=MagickFalse;
4288
4289   device=RequestOpenCLDevice(clEnv);
4290   queue=AcquireOpenCLCommandQueue(device);
4291   filteredImage=CloneImage(image,resizedColumns,resizedRows,MagickTrue,
4292     exception);
4293   if (filteredImage == (Image *) NULL)
4294     goto cleanup;
4295   if (filteredImage->number_channels != image->number_channels)
4296     goto cleanup;
4297   imageBuffer=GetAuthenticOpenCLBuffer(image,device,exception);
4298   if (imageBuffer == (cl_mem) NULL)
4299     goto cleanup;
4300   filteredImageBuffer=GetAuthenticOpenCLBuffer(filteredImage,device,exception);
4301   if (filteredImageBuffer == (cl_mem) NULL)
4302     goto cleanup;
4303
4304   resizeFilterCoefficient=GetResizeFilterCoefficient(resizeFilter);
4305   for (i = 0; i < 7; i++)
4306     coefficientBuffer[i]=(float) resizeFilterCoefficient[i];
4307   cubicCoefficientsBuffer=CreateOpenCLBuffer(device,CL_MEM_COPY_HOST_PTR |
4308     CL_MEM_READ_ONLY,7*sizeof(*resizeFilterCoefficient),&coefficientBuffer);
4309   if (cubicCoefficientsBuffer == (cl_mem) NULL)
4310   {
4311     (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
4312       ResourceLimitWarning,"CreateOpenCLBuffer failed.",".");
4313     goto cleanup;
4314   }
4315
4316   number_channels=(cl_uint) image->number_channels;
4317   xFactor=(float) resizedColumns/(float) image->columns;
4318   yFactor=(float) resizedRows/(float) image->rows;
4319   if (xFactor > yFactor)
4320   {
4321     length=resizedColumns*image->rows*number_channels;
4322     tempImageBuffer=CreateOpenCLBuffer(device,CL_MEM_READ_WRITE,length*
4323       sizeof(CLQuantum),(void *) NULL);
4324     if (tempImageBuffer == (cl_mem) NULL)
4325     {
4326       (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
4327         ResourceLimitWarning,"CreateOpenCLBuffer failed.",".");
4328       goto cleanup;
4329     }
4330
4331     outputReady=resizeHorizontalFilter(device,queue,image,filteredImage,
4332       imageBuffer,number_channels,(cl_uint) image->columns,
4333       (cl_uint) image->rows,tempImageBuffer,(cl_uint) resizedColumns,
4334       (cl_uint) image->rows,resizeFilter,cubicCoefficientsBuffer,xFactor,
4335       exception);
4336     if (outputReady == MagickFalse)
4337       goto cleanup;
4338
4339     outputReady=resizeVerticalFilter(device,queue,image,filteredImage,
4340       tempImageBuffer,number_channels,(cl_uint) resizedColumns,
4341       (cl_uint) image->rows,filteredImageBuffer,(cl_uint) resizedColumns,
4342       (cl_uint) resizedRows,resizeFilter,cubicCoefficientsBuffer,yFactor,
4343       exception);
4344     if (outputReady == MagickFalse)
4345       goto cleanup;
4346   }
4347   else
4348   {
4349     length=image->columns*resizedRows*number_channels;
4350     tempImageBuffer=CreateOpenCLBuffer(device,CL_MEM_READ_WRITE,length*
4351       sizeof(CLQuantum),(void *) NULL);
4352     if (tempImageBuffer == (cl_mem) NULL)
4353     {
4354       (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
4355         ResourceLimitWarning,"CreateOpenCLBuffer failed.",".");
4356       goto cleanup;
4357     }
4358
4359     outputReady=resizeVerticalFilter(device,queue,image,filteredImage,
4360       imageBuffer,number_channels,(cl_uint) image->columns,
4361       (cl_int) image->rows,tempImageBuffer,(cl_uint) image->columns,
4362       (cl_uint) resizedRows,resizeFilter,cubicCoefficientsBuffer,yFactor,
4363       exception);
4364     if (outputReady == MagickFalse)
4365       goto cleanup;
4366
4367     outputReady=resizeHorizontalFilter(device,queue,image,filteredImage,
4368       tempImageBuffer,number_channels,(cl_uint) image->columns,
4369       (cl_uint) resizedRows,filteredImageBuffer,(cl_uint) resizedColumns,
4370       (cl_uint) resizedRows,resizeFilter,cubicCoefficientsBuffer,xFactor,
4371       exception);
4372     if (outputReady == MagickFalse)
4373       goto cleanup;
4374   }
4375
4376 cleanup:
4377
4378   if (tempImageBuffer != (cl_mem) NULL)
4379     ReleaseOpenCLMemObject(tempImageBuffer);
4380   if (cubicCoefficientsBuffer != (cl_mem) NULL)
4381     ReleaseOpenCLMemObject(cubicCoefficientsBuffer);
4382   if (queue != (cl_command_queue) NULL)
4383     ReleaseOpenCLCommandQueue(device,queue);
4384   if (device != (MagickCLDevice) NULL)
4385     ReleaseOpenCLDevice(device);
4386   if ((outputReady == MagickFalse) && (filteredImage != (Image *) NULL))
4387     filteredImage=DestroyImage(filteredImage);
4388
4389   return(filteredImage);
4390 }
4391
4392 static MagickBooleanType gpuSupportedResizeWeighting(
4393   ResizeWeightingFunctionType f)
4394 {
4395   unsigned int
4396     i;
4397
4398   for (i = 0; ;i++)
4399   {
4400     if (supportedResizeWeighting[i] == LastWeightingFunction)
4401       break;
4402     if (supportedResizeWeighting[i] == f)
4403       return(MagickTrue);
4404   }
4405   return(MagickFalse);
4406 }
4407
4408 MagickPrivate Image *AccelerateResizeImage(const Image *image,
4409   const size_t resizedColumns,const size_t resizedRows,
4410   const ResizeFilter *resizeFilter,ExceptionInfo *exception) 
4411 {
4412   Image
4413     *filteredImage;
4414
4415   MagickCLEnv
4416     clEnv;
4417
4418   assert(image != NULL);
4419   assert(exception != (ExceptionInfo *) NULL);
4420
4421   if (checkAccelerateCondition(image) == MagickFalse)
4422     return((Image *) NULL);
4423
4424   if ((gpuSupportedResizeWeighting(GetResizeFilterWeightingType(
4425          resizeFilter)) == MagickFalse) ||
4426       (gpuSupportedResizeWeighting(GetResizeFilterWindowWeightingType(
4427          resizeFilter)) == MagickFalse))
4428     return((Image *) NULL);
4429
4430   clEnv=getOpenCLEnvironment(exception);
4431   if (clEnv == (MagickCLEnv) NULL)
4432     return((Image *) NULL);
4433
4434   filteredImage=ComputeResizeImage(image,clEnv,resizedColumns,resizedRows,
4435     resizeFilter,exception);
4436   return(filteredImage);
4437 }
4438
4439 /*
4440 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
4441 %                                                                             %
4442 %                                                                             %
4443 %                                                                             %
4444 %     A c c e l e r a t e R o t a t i o n a l B l u r I m a g e               %
4445 %                                                                             %
4446 %                                                                             %
4447 %                                                                             %
4448 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
4449 */
4450
4451 static Image* ComputeRotationalBlurImage(const Image *image,MagickCLEnv clEnv,
4452   const double angle,ExceptionInfo *exception)
4453 {
4454   cl_command_queue
4455     queue;
4456
4457   cl_float2
4458     blurCenter;
4459
4460   cl_int
4461     status;
4462
4463   cl_mem
4464     cosThetaBuffer,
4465     filteredImageBuffer,
4466     imageBuffer,
4467     sinThetaBuffer;
4468
4469   cl_kernel
4470     rotationalBlurKernel;
4471
4472   cl_uint
4473     cossin_theta_size,
4474     number_channels;
4475
4476   float
4477     blurRadius,
4478     *cosThetaPtr,
4479     offset,
4480     *sinThetaPtr,
4481     theta;
4482
4483   Image
4484     *filteredImage;
4485
4486   MagickBooleanType
4487     outputReady;
4488
4489   MagickCLDevice
4490     device;
4491
4492   size_t
4493     gsize[2],
4494     i;
4495
4496   filteredImage=NULL;
4497   sinThetaBuffer=NULL;
4498   cosThetaBuffer=NULL;
4499   rotationalBlurKernel=NULL;
4500   outputReady=MagickFalse;
4501
4502   device=RequestOpenCLDevice(clEnv);
4503   queue=AcquireOpenCLCommandQueue(device);
4504   filteredImage=cloneImage(image,exception);
4505   if (filteredImage == (Image *) NULL)
4506     goto cleanup;
4507   if (filteredImage->number_channels != image->number_channels)
4508     goto cleanup;
4509   imageBuffer=GetAuthenticOpenCLBuffer(image,device,exception);
4510   if (imageBuffer == (cl_mem) NULL)
4511     goto cleanup;
4512   filteredImageBuffer=GetAuthenticOpenCLBuffer(filteredImage,device,exception);
4513   if (filteredImageBuffer == (cl_mem) NULL)
4514     goto cleanup;
4515
4516   blurCenter.x=(float) (image->columns-1)/2.0;
4517   blurCenter.y=(float) (image->rows-1)/2.0;
4518   blurRadius=hypot(blurCenter.x,blurCenter.y);
4519   cossin_theta_size=(unsigned int) fabs(4.0*DegreesToRadians(angle)*sqrt(
4520     (double) blurRadius)+2UL);
4521
4522   cosThetaPtr=AcquireQuantumMemory(cossin_theta_size,sizeof(float));
4523   if (cosThetaPtr == (float *) NULL)
4524     goto cleanup;
4525   sinThetaPtr=AcquireQuantumMemory(cossin_theta_size,sizeof(float));
4526   if (sinThetaPtr == (float *) NULL)
4527   {
4528     cosThetaPtr=RelinquishMagickMemory(cosThetaPtr);
4529     goto cleanup;
4530   }
4531
4532   theta=DegreesToRadians(angle)/(double) (cossin_theta_size-1);
4533   offset=theta*(float) (cossin_theta_size-1)/2.0;
4534   for (i=0; i < (ssize_t) cossin_theta_size; i++)
4535   {
4536     cosThetaPtr[i]=(float)cos((double) (theta*i-offset));
4537     sinThetaPtr[i]=(float)sin((double) (theta*i-offset));
4538   }
4539
4540   sinThetaBuffer=CreateOpenCLBuffer(device,CL_MEM_READ_ONLY |
4541     CL_MEM_COPY_HOST_PTR,cossin_theta_size*sizeof(float),sinThetaPtr);
4542   sinThetaPtr=RelinquishMagickMemory(sinThetaPtr);
4543   cosThetaBuffer=CreateOpenCLBuffer(device,CL_MEM_READ_ONLY |
4544     CL_MEM_COPY_HOST_PTR,cossin_theta_size*sizeof(float),cosThetaPtr);
4545   cosThetaPtr=RelinquishMagickMemory(cosThetaPtr);
4546   if ((sinThetaBuffer == (cl_mem) NULL) || (cosThetaBuffer == (cl_mem) NULL))
4547   {
4548     (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
4549       ResourceLimitWarning,"CreateOpenCLBuffer failed.",".");
4550     goto cleanup;
4551   }
4552
4553   rotationalBlurKernel=AcquireOpenCLKernel(device,"RotationalBlur");
4554   if (rotationalBlurKernel == (cl_kernel) NULL)
4555   {
4556     (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
4557       ResourceLimitWarning,"AcquireOpenCLKernel failed.",".");
4558     goto cleanup;
4559   }
4560
4561   number_channels=(cl_uint) image->number_channels;
4562
4563   i=0;
4564   status =SetOpenCLKernelArg(rotationalBlurKernel,i++,sizeof(cl_mem),(void *)&imageBuffer);
4565   status|=SetOpenCLKernelArg(rotationalBlurKernel,i++,sizeof(cl_uint),&number_channels);
4566   status|=SetOpenCLKernelArg(rotationalBlurKernel,i++,sizeof(ChannelType), &image->channel_mask);
4567   status|=SetOpenCLKernelArg(rotationalBlurKernel,i++,sizeof(cl_float2), &blurCenter);
4568   status|=SetOpenCLKernelArg(rotationalBlurKernel,i++,sizeof(cl_mem),(void *)&cosThetaBuffer);
4569   status|=SetOpenCLKernelArg(rotationalBlurKernel,i++,sizeof(cl_mem),(void *)&sinThetaBuffer);
4570   status|=SetOpenCLKernelArg(rotationalBlurKernel,i++,sizeof(cl_uint), &cossin_theta_size);
4571   status|=SetOpenCLKernelArg(rotationalBlurKernel,i++,sizeof(cl_mem),(void *)&filteredImageBuffer);
4572   if (status != CL_SUCCESS)
4573   {
4574     (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
4575       ResourceLimitWarning,"clEnv->library->clSetKernelArg failed.",".");
4576     goto cleanup;
4577   }
4578
4579   gsize[0]=image->columns;
4580   gsize[1]=image->rows;
4581   outputReady=EnqueueOpenCLKernel(queue,rotationalBlurKernel,2,
4582     (const size_t *) NULL,gsize,(const size_t *) NULL,image,filteredImage,
4583     MagickFalse,exception);
4584
4585 cleanup:
4586
4587   if (sinThetaBuffer != (cl_mem) NULL)
4588     ReleaseOpenCLMemObject(sinThetaBuffer);
4589   if (cosThetaBuffer != (cl_mem) NULL)
4590     ReleaseOpenCLMemObject(cosThetaBuffer);
4591   if (rotationalBlurKernel != (cl_kernel) NULL)
4592     ReleaseOpenCLKernel(rotationalBlurKernel);
4593   if (queue != (cl_command_queue) NULL)
4594     ReleaseOpenCLCommandQueue(device,queue);
4595   if (device != (MagickCLDevice) NULL)
4596     ReleaseOpenCLDevice(device);
4597   if ((outputReady == MagickFalse) && (filteredImage != (Image *) NULL))
4598     filteredImage=DestroyImage(filteredImage);
4599
4600   return(filteredImage);
4601 }
4602
4603 MagickPrivate Image* AccelerateRotationalBlurImage(const Image *image,
4604   const double angle,ExceptionInfo *exception)
4605 {
4606   Image
4607     *filteredImage;
4608
4609   MagickCLEnv
4610     clEnv;
4611
4612   assert(image != NULL);
4613   assert(exception != (ExceptionInfo *) NULL);
4614
4615   if (checkAccelerateCondition(image) == MagickFalse)
4616     return((Image *) NULL);
4617
4618   clEnv=getOpenCLEnvironment(exception);
4619   if (clEnv == (MagickCLEnv) NULL)
4620     return((Image *) NULL);
4621
4622   filteredImage=ComputeRotationalBlurImage(image,clEnv,angle,exception);
4623   return filteredImage;
4624 }
4625
4626 /*
4627 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
4628 %                                                                             %
4629 %                                                                             %
4630 %                                                                             %
4631 %     A c c e l e r a t e U n s h a r p M a s k I m a g e                     %
4632 %                                                                             %
4633 %                                                                             %
4634 %                                                                             %
4635 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
4636 */
4637
4638 static Image *ComputeUnsharpMaskImage(const Image *image,MagickCLEnv clEnv,
4639   const double radius,const double sigma,const double gain,
4640   const double threshold,ExceptionInfo *exception)
4641 {
4642   cl_command_queue
4643     queue;
4644
4645   cl_int
4646     status;
4647
4648   cl_kernel
4649     blurRowKernel,
4650     unsharpMaskBlurColumnKernel;
4651
4652   cl_mem
4653     filteredImageBuffer,
4654     imageBuffer,
4655     imageKernelBuffer,
4656     tempImageBuffer;
4657
4658   cl_uint
4659     imageColumns,
4660     imageRows,
4661     kernelWidth,
4662     number_channels;
4663
4664   float
4665     fGain,
4666     fThreshold;
4667
4668   Image
4669     *filteredImage;
4670
4671   int
4672     chunkSize;
4673
4674   MagickBooleanType
4675     outputReady;
4676
4677   MagickCLDevice
4678     device;
4679
4680   MagickSizeType
4681     length;
4682
4683   size_t
4684     gsize[2],
4685     i,
4686     lsize[2];
4687
4688   filteredImage=NULL;
4689   tempImageBuffer=NULL;
4690   imageKernelBuffer=NULL;
4691   blurRowKernel=NULL;
4692   unsharpMaskBlurColumnKernel=NULL;
4693   outputReady=MagickFalse;
4694
4695   device=RequestOpenCLDevice(clEnv);
4696   queue=AcquireOpenCLCommandQueue(device);
4697   filteredImage=cloneImage(image,exception);
4698   if (filteredImage == (Image *) NULL)
4699     goto cleanup;
4700   if (filteredImage->number_channels != image->number_channels)
4701     goto cleanup;
4702   imageBuffer=GetAuthenticOpenCLBuffer(image,device,exception);
4703   if (imageBuffer == (cl_mem) NULL)
4704     goto cleanup;
4705   filteredImageBuffer=GetAuthenticOpenCLBuffer(filteredImage,device,exception);
4706   if (filteredImageBuffer == (cl_mem) NULL)
4707     goto cleanup;
4708
4709   imageKernelBuffer=createKernelInfo(device,radius,sigma,&kernelWidth,
4710     exception);
4711
4712   length=image->columns*image->rows;
4713   tempImageBuffer=CreateOpenCLBuffer(device,CL_MEM_READ_WRITE,length*
4714     sizeof(cl_float4),NULL);
4715   if (tempImageBuffer == (cl_mem) NULL)
4716   {
4717     (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
4718       ResourceLimitWarning,"CreateOpenCLBuffer failed.",".");
4719     goto cleanup;
4720   }
4721
4722   blurRowKernel=AcquireOpenCLKernel(device,"BlurRow");
4723   if (blurRowKernel == (cl_kernel) NULL)
4724   {
4725     (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
4726       ResourceLimitWarning,"AcquireOpenCLKernel failed.",".");
4727     goto cleanup;
4728   }
4729
4730   unsharpMaskBlurColumnKernel=AcquireOpenCLKernel(device,
4731     "UnsharpMaskBlurColumn");
4732   if (unsharpMaskBlurColumnKernel == (cl_kernel) NULL)
4733   {
4734     (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
4735       ResourceLimitWarning,"AcquireOpenCLKernel failed.",".");
4736     goto cleanup;
4737   }
4738
4739   number_channels=(cl_uint) image->number_channels;
4740   imageColumns=(cl_uint) image->columns;
4741   imageRows=(cl_uint) image->rows;
4742
4743   chunkSize = 256;
4744
4745   i=0;
4746   status =SetOpenCLKernelArg(blurRowKernel,i++,sizeof(cl_mem),(void *)&imageBuffer);
4747   status|=SetOpenCLKernelArg(blurRowKernel,i++,sizeof(cl_uint),&number_channels);
4748   status|=SetOpenCLKernelArg(blurRowKernel,i++,sizeof(ChannelType),&image->channel_mask);
4749   status|=SetOpenCLKernelArg(blurRowKernel,i++,sizeof(cl_mem),(void *)&imageKernelBuffer);
4750   status|=SetOpenCLKernelArg(blurRowKernel,i++,sizeof(cl_uint),(void *)&kernelWidth);
4751   status|=SetOpenCLKernelArg(blurRowKernel,i++,sizeof(cl_uint),(void *)&imageColumns);
4752   status|=SetOpenCLKernelArg(blurRowKernel,i++,sizeof(cl_uint),(void *)&imageRows);
4753   status|=SetOpenCLKernelArg(blurRowKernel,i++,sizeof(cl_float4)*(chunkSize+kernelWidth),(void *) NULL);
4754   status|=SetOpenCLKernelArg(blurRowKernel,i++,sizeof(cl_mem),(void *)&tempImageBuffer);
4755   if (status != CL_SUCCESS)
4756   {
4757     (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
4758       ResourceLimitWarning,"clEnv->library->clSetKernelArg failed.",".");
4759     goto cleanup;
4760   }
4761
4762   gsize[0]=chunkSize*((image->columns+chunkSize-1)/chunkSize);
4763   gsize[1]=image->rows;
4764   lsize[0]=chunkSize;
4765   lsize[1]=1;
4766   outputReady=EnqueueOpenCLKernel(queue,blurRowKernel,2,
4767     (const size_t *) NULL,gsize,lsize,image,filteredImage,MagickFalse,
4768     exception);
4769
4770   chunkSize=256;
4771   fGain=(float) gain;
4772   fThreshold=(float) threshold;
4773
4774   i=0;
4775   status =SetOpenCLKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(cl_mem),(void *)&imageBuffer);
4776   status|=SetOpenCLKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(cl_mem),(void *)&tempImageBuffer);
4777   status|=SetOpenCLKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(cl_uint),&number_channels);
4778   status|=SetOpenCLKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(ChannelType),&image->channel_mask);
4779   status|=SetOpenCLKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(cl_uint),(void *)&imageColumns);
4780   status|=SetOpenCLKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(cl_uint),(void *)&imageRows);
4781   status|=SetOpenCLKernelArg(unsharpMaskBlurColumnKernel,i++,(chunkSize+kernelWidth-1)*sizeof(cl_float4),NULL);
4782   status|=SetOpenCLKernelArg(unsharpMaskBlurColumnKernel,i++,kernelWidth*sizeof(float),NULL);
4783   status|=SetOpenCLKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(cl_mem),(void *)&imageKernelBuffer);
4784   status|=SetOpenCLKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(cl_uint),(void *)&kernelWidth);
4785   status|=SetOpenCLKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(float),(void *)&fGain);
4786   status|=SetOpenCLKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(float),(void *)&fThreshold);
4787   status|=SetOpenCLKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(cl_mem),(void *)&filteredImageBuffer);
4788   if (status != CL_SUCCESS)
4789   {
4790     (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
4791       ResourceLimitWarning,"clEnv->library->clSetKernelArg failed.",".");
4792     goto cleanup;
4793   }
4794
4795   gsize[0]=image->columns;
4796   gsize[1]=chunkSize*((image->rows+chunkSize-1)/chunkSize);
4797   lsize[0]=1;
4798   lsize[1]=chunkSize;
4799   outputReady=EnqueueOpenCLKernel(queue,unsharpMaskBlurColumnKernel,2,
4800     (const size_t *) NULL,gsize,lsize,image,filteredImage,MagickFalse,
4801     exception);
4802
4803 cleanup:
4804
4805   if (tempImageBuffer != (cl_mem) NULL)
4806     ReleaseOpenCLMemObject(tempImageBuffer);
4807   if (imageKernelBuffer != (cl_mem) NULL)
4808     ReleaseOpenCLMemObject(imageKernelBuffer);
4809   if (blurRowKernel != (cl_kernel) NULL)
4810     ReleaseOpenCLKernel(blurRowKernel);
4811   if (unsharpMaskBlurColumnKernel != (cl_kernel) NULL)
4812     ReleaseOpenCLKernel(unsharpMaskBlurColumnKernel);
4813   if (queue != (cl_command_queue) NULL)
4814     ReleaseOpenCLCommandQueue(device,queue);
4815   if (device != (MagickCLDevice) NULL)
4816     ReleaseOpenCLDevice(device);
4817   if ((outputReady == MagickFalse) && (filteredImage != (Image *) NULL))
4818     filteredImage=DestroyImage(filteredImage);
4819
4820   return(filteredImage);
4821 }
4822
4823 static Image *ComputeUnsharpMaskImageSingle(const Image *image,
4824   MagickCLEnv clEnv,const double radius,const double sigma,const double gain,
4825   const double threshold,ExceptionInfo *exception)
4826 {
4827   cl_command_queue
4828     queue;
4829
4830   cl_int
4831     status;
4832
4833   cl_kernel
4834     unsharpMaskKernel;
4835
4836   cl_mem
4837     filteredImageBuffer,
4838     imageBuffer,
4839     imageKernelBuffer;
4840
4841   cl_uint
4842     imageColumns,
4843     imageRows,
4844     kernelWidth,
4845     number_channels;
4846
4847   float
4848     fGain,
4849     fThreshold;
4850
4851   Image
4852     *filteredImage;
4853
4854   MagickBooleanType
4855     outputReady;
4856
4857   MagickCLDevice
4858     device;
4859
4860   size_t
4861     gsize[2],
4862     i,
4863     lsize[2];
4864
4865   filteredImage=NULL;
4866   imageKernelBuffer=NULL;
4867   unsharpMaskKernel=NULL;
4868   outputReady=MagickFalse;
4869
4870   device=RequestOpenCLDevice(clEnv);
4871   queue=AcquireOpenCLCommandQueue(device);
4872   filteredImage=cloneImage(image,exception);
4873   if (filteredImage == (Image *) NULL)
4874     goto cleanup;
4875   if (filteredImage->number_channels != image->number_channels)
4876     goto cleanup;
4877   imageBuffer=GetAuthenticOpenCLBuffer(image,device,exception);
4878   if (imageBuffer == (cl_mem) NULL)
4879     goto cleanup;
4880   filteredImageBuffer=GetAuthenticOpenCLBuffer(filteredImage,device,exception);
4881   if (filteredImageBuffer == (cl_mem) NULL)
4882     goto cleanup;
4883
4884   imageKernelBuffer=createKernelInfo(device,radius,sigma,&kernelWidth,
4885     exception);
4886
4887   unsharpMaskKernel=AcquireOpenCLKernel(device,"UnsharpMask");
4888   if (unsharpMaskKernel == NULL)
4889   {
4890     (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
4891       ResourceLimitWarning,"AcquireOpenCLKernel failed.",".");
4892     goto cleanup;
4893   }
4894
4895   imageColumns=(cl_uint) image->columns;
4896   imageRows=(cl_uint) image->rows;
4897   number_channels=(cl_uint) image->number_channels;
4898   fGain=(float) gain;
4899   fThreshold=(float) threshold;
4900
4901   i=0;
4902   status =SetOpenCLKernelArg(unsharpMaskKernel,i++,sizeof(cl_mem),(void *)&imageBuffer);
4903   status|=SetOpenCLKernelArg(unsharpMaskKernel,i++,sizeof(cl_uint),(void *)&number_channels);
4904   status|=SetOpenCLKernelArg(unsharpMaskKernel,i++,sizeof(ChannelType),(void *)&image->channel_mask);
4905   status|=SetOpenCLKernelArg(unsharpMaskKernel,i++,sizeof(cl_mem),(void *)&imageKernelBuffer);
4906   status|=SetOpenCLKernelArg(unsharpMaskKernel,i++,sizeof(cl_uint),(void *)&kernelWidth);
4907   status|=SetOpenCLKernelArg(unsharpMaskKernel,i++,sizeof(cl_uint),(void *)&imageColumns);
4908   status|=SetOpenCLKernelArg(unsharpMaskKernel,i++,sizeof(cl_uint),(void *)&imageRows);
4909   status|=SetOpenCLKernelArg(unsharpMaskKernel,i++,sizeof(cl_float4)*(8 * (32 + kernelWidth)),(void *) NULL);
4910   status|=SetOpenCLKernelArg(unsharpMaskKernel,i++,sizeof(float),(void *)&fGain);
4911   status|=SetOpenCLKernelArg(unsharpMaskKernel,i++,sizeof(float),(void *)&fThreshold);
4912   status|=SetOpenCLKernelArg(unsharpMaskKernel,i++,sizeof(cl_mem),(void *)&filteredImageBuffer);
4913   if (status != CL_SUCCESS)
4914   {
4915     (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
4916       ResourceLimitWarning,"SetOpenCLKernelArg failed.",".");
4917     goto cleanup;
4918   }
4919
4920   gsize[0]=((image->columns + 7) / 8)*8;
4921   gsize[1]=((image->rows + 31) / 32)*32;
4922   lsize[0]=8;
4923   lsize[1]=32;
4924   outputReady=EnqueueOpenCLKernel(queue,unsharpMaskKernel,2,(const size_t *) NULL,
4925     gsize,lsize,image,filteredImage,MagickFalse,exception);
4926
4927 cleanup:
4928
4929   if (imageKernelBuffer != (cl_mem) NULL)
4930     ReleaseOpenCLMemObject(imageKernelBuffer);
4931   if (unsharpMaskKernel != (cl_kernel) NULL)
4932     ReleaseOpenCLKernel(unsharpMaskKernel);
4933   if (queue != (cl_command_queue) NULL)
4934     ReleaseOpenCLCommandQueue(device,queue);
4935   if (device != (MagickCLDevice) NULL)
4936     ReleaseOpenCLDevice(device);
4937   if ((outputReady == MagickFalse) && (filteredImage != (Image *) NULL))
4938     filteredImage=DestroyImage(filteredImage);
4939
4940   return(filteredImage);
4941 }
4942
4943 MagickPrivate Image *AccelerateUnsharpMaskImage(const Image *image,
4944   const double radius,const double sigma,const double gain,
4945   const double threshold,ExceptionInfo *exception)
4946 {
4947   Image
4948     *filteredImage;
4949
4950   MagickCLEnv
4951     clEnv;
4952
4953   assert(image != NULL);
4954   assert(exception != (ExceptionInfo *) NULL);
4955
4956   if (checkAccelerateCondition(image) == MagickFalse)
4957     return((Image *) NULL);
4958
4959   clEnv=getOpenCLEnvironment(exception);
4960   if (clEnv == (MagickCLEnv) NULL)
4961     return((Image *) NULL);
4962
4963   if (radius < 12.1)
4964     filteredImage=ComputeUnsharpMaskImageSingle(image,clEnv,radius,sigma,gain,
4965       threshold,exception);
4966   else
4967     filteredImage=ComputeUnsharpMaskImage(image,clEnv,radius,sigma,gain,
4968       threshold,exception);
4969   return(filteredImage);
4970 }
4971
4972 static Image *ComputeWaveletDenoiseImage(const Image *image,MagickCLEnv clEnv,
4973   const double threshold,ExceptionInfo *exception)
4974 {
4975   cl_command_queue
4976     queue;
4977
4978   const cl_int
4979     PASSES=5;
4980
4981   const int
4982     TILESIZE=64,
4983     PAD=1<<(PASSES-1),
4984     SIZE=TILESIZE-2*PAD;
4985
4986   cl_float
4987     thresh;
4988
4989   cl_int
4990     status;
4991
4992   cl_kernel
4993     denoiseKernel;
4994
4995   cl_mem
4996     filteredImageBuffer,
4997     imageBuffer;
4998
4999   cl_uint
5000     number_channels,
5001     width,
5002     height,
5003     max_channels;
5004
5005   Image
5006     *filteredImage;
5007
5008   MagickBooleanType
5009     outputReady;
5010
5011   MagickCLDevice
5012     device;
5013
5014   size_t
5015     goffset[2],
5016     gsize[2],
5017     i,
5018     lsize[2],
5019     passes,
5020     x;
5021
5022   filteredImage=NULL;
5023   denoiseKernel=NULL;
5024   queue=NULL;
5025   outputReady=MagickFalse;
5026
5027   device=RequestOpenCLDevice(clEnv);
5028   /* Work around an issue on low end Intel devices */
5029   if (strcmp("Intel(R) HD Graphics",device->name) == 0)
5030     goto cleanup;
5031   queue=AcquireOpenCLCommandQueue(device);
5032   filteredImage=CloneImage(image,image->columns,image->rows,MagickTrue,
5033     exception);
5034   if (filteredImage == (Image *) NULL)
5035     goto cleanup;
5036   if (filteredImage->number_channels != image->number_channels)
5037     goto cleanup;
5038   imageBuffer=GetAuthenticOpenCLBuffer(image,device,exception);
5039   if (imageBuffer == (cl_mem) NULL)
5040     goto cleanup;
5041   filteredImageBuffer=GetAuthenticOpenCLBuffer(filteredImage,device,exception);
5042   if (filteredImageBuffer == (cl_mem) NULL)
5043     goto cleanup;
5044
5045   denoiseKernel=AcquireOpenCLKernel(device,"WaveletDenoise");
5046   if (denoiseKernel == (cl_kernel) NULL)
5047   {
5048     (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
5049       ResourceLimitWarning,"AcquireOpenCLKernel failed.",".");
5050     goto cleanup;
5051   }
5052
5053   number_channels=(cl_uint)image->number_channels;
5054   width=(cl_uint)image->columns;
5055   height=(cl_uint)image->rows;
5056   max_channels=number_channels;
5057   if ((max_channels == 4) || (max_channels == 2))
5058     max_channels=max_channels-1;
5059   thresh=threshold;
5060   passes=(((1.0f*image->columns)*image->rows)+1999999.0f)/2000000.0f;
5061   passes=(passes < 1) ? 1 : passes;
5062
5063   i=0;
5064   status =SetOpenCLKernelArg(denoiseKernel,i++,sizeof(cl_mem),(void *)&imageBuffer);
5065   status|=SetOpenCLKernelArg(denoiseKernel,i++,sizeof(cl_mem),(void *)&filteredImageBuffer);
5066   status|=SetOpenCLKernelArg(denoiseKernel,i++,sizeof(cl_uint),(void *)&number_channels);
5067   status|=SetOpenCLKernelArg(denoiseKernel,i++,sizeof(cl_uint),(void *)&max_channels);
5068   status|=SetOpenCLKernelArg(denoiseKernel,i++,sizeof(cl_float),(void *)&thresh);
5069   status|=SetOpenCLKernelArg(denoiseKernel,i++,sizeof(cl_int),(void *)&PASSES);
5070   status|=SetOpenCLKernelArg(denoiseKernel,i++,sizeof(cl_uint),(void *)&width);
5071   status|=SetOpenCLKernelArg(denoiseKernel,i++,sizeof(cl_uint),(void *)&height);
5072   if (status != CL_SUCCESS)
5073     {
5074       (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
5075         ResourceLimitWarning,"SetOpenCLKernelArg failed.",".");
5076       goto cleanup;
5077     }
5078
5079   for (x = 0; x < passes; ++x)
5080   {
5081     gsize[0]=((width+(SIZE-1))/SIZE)*TILESIZE;
5082     gsize[1]=((((height+(SIZE-1))/SIZE)+passes-1)/passes)*4;
5083     lsize[0]=TILESIZE;
5084     lsize[1]=4;
5085     goffset[0]=0;
5086     goffset[1]=x*gsize[1];
5087
5088     outputReady=EnqueueOpenCLKernel(queue,denoiseKernel,2,goffset,gsize,lsize,
5089       image,filteredImage,MagickTrue,exception);
5090     if (outputReady == MagickFalse)
5091       break;
5092   }
5093
5094 cleanup:
5095
5096   if (denoiseKernel != (cl_kernel) NULL)
5097     ReleaseOpenCLKernel(denoiseKernel);
5098   if (queue != (cl_command_queue) NULL)
5099     ReleaseOpenCLCommandQueue(device,queue);
5100   if (device != (MagickCLDevice) NULL)
5101     ReleaseOpenCLDevice(device);
5102   if ((outputReady == MagickFalse) && (filteredImage != (Image *) NULL))
5103     filteredImage=DestroyImage(filteredImage);
5104
5105   return(filteredImage);
5106 }
5107
5108 MagickPrivate Image *AccelerateWaveletDenoiseImage(const Image *image,
5109   const double threshold,ExceptionInfo *exception)
5110 {
5111   Image
5112     *filteredImage;
5113
5114   MagickCLEnv
5115     clEnv;
5116
5117   assert(image != NULL);
5118   assert(exception != (ExceptionInfo *)NULL);
5119
5120   if (checkAccelerateCondition(image) == MagickFalse)
5121     return((Image *) NULL);
5122
5123   clEnv=getOpenCLEnvironment(exception);
5124   if (clEnv == (MagickCLEnv) NULL)
5125     return((Image *) NULL);
5126
5127   filteredImage=ComputeWaveletDenoiseImage(image,clEnv,threshold,exception);
5128
5129   return(filteredImage);
5130 }
5131 #endif /* MAGICKCORE_OPENCL_SUPPORT */