2 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
6 % AAA CCCC CCCC EEEEE L EEEEE RRRR AAA TTTTT EEEEE %
7 % A A C C E L E R R A A T E %
8 % AAAAA C C EEE L EEE RRRR AAAAA T EEE %
9 % A A C C E L E R R A A T E %
10 % A A CCCC CCCC EEEEE LLLLL EEEEE R R A A T EEEEE %
13 % MagickCore Acceleration Methods %
20 % Copyright 1999-2011 ImageMagick Studio LLC, a non-profit organization %
21 % dedicated to making software imaging solutions freely available. %
23 % You may not use this file except in compliance with the License. You may %
24 % obtain a copy of the License at %
26 % http://www.imagemagick.org/script/license.php %
28 % Unless required by applicable law or agreed to in writing, software %
29 % distributed under the License is distributed on an "AS IS" BASIS, %
30 % WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. %
31 % See the License for the specific language governing permissions and %
32 % limitations under the License. %
34 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
36 % Morphology is the the application of various kernals, of any size and even
37 % shape, to a image in various ways (typically binary, but not always).
39 % Convolution (weighted sum or average) is just one specific type of
40 % accelerate. Just one that is very common for image bluring and sharpening
41 % effects. Not only 2D Gaussian blurring, but also 2-pass 1D Blurring.
43 % This module provides not only a general accelerate function, and the ability
44 % to apply more advanced or iterative morphologies, but also functions for the
45 % generation of many different types of kernel arrays from user supplied
46 % arguments. Prehaps even the generation of a kernel from a small image.
52 #include "MagickCore/studio.h"
53 #include "MagickCore/accelerate.h"
54 #include "MagickCore/artifact.h"
55 #include "MagickCore/cache.h"
56 #include "MagickCore/cache-private.h"
57 #include "MagickCore/cache-view.h"
58 #include "MagickCore/color-private.h"
59 #include "MagickCore/enhance.h"
60 #include "MagickCore/exception.h"
61 #include "MagickCore/exception-private.h"
62 #include "MagickCore/gem.h"
63 #include "MagickCore/hashmap.h"
64 #include "MagickCore/image.h"
65 #include "MagickCore/image-private.h"
66 #include "MagickCore/list.h"
67 #include "MagickCore/memory_.h"
68 #include "MagickCore/monitor-private.h"
69 #include "MagickCore/accelerate.h"
70 #include "MagickCore/option.h"
71 #include "MagickCore/pixel-accessor.h"
72 #include "MagickCore/prepress.h"
73 #include "MagickCore/quantize.h"
74 #include "MagickCore/registry.h"
75 #include "MagickCore/semaphore.h"
76 #include "MagickCore/splay-tree.h"
77 #include "MagickCore/statistic.h"
78 #include "MagickCore/string_.h"
79 #include "MagickCore/string-private.h"
80 #include "MagickCore/token.h"
83 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
87 % A c c e l e r a t e C o n v o l v e I m a g e %
91 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
93 % AccelerateConvolveImage() applies a custom convolution kernel to the image.
94 % It is accelerated by taking advantage of speed-ups offered by executing in
95 % concert across heterogeneous platforms consisting of CPUs, GPUs, and other
98 % The format of the AccelerateConvolveImage method is:
100 % Image *AccelerateConvolveImage(const Image *image,
101 % const KernelInfo *kernel,Image *convolve_image,
102 % ExceptionInfo *exception)
104 % A description of each parameter follows:
106 % o image: the image.
108 % o kernel: the convolution kernel.
110 % o convole_image: the convoleed image.
112 % o exception: return any errors or warnings in this structure.
116 #if defined(MAGICKCORE_OPENCL_SUPPORT)
118 #if defined(MAGICKCORE_HDRI_SUPPORT)
119 #define CLOptions "-DMAGICKCORE_HDRI_SUPPORT=1 -DCLQuantum=float " \
120 "-DCLPixelType=float4 -DQuantumRange=%g -DMagickEpsilon=%g"
121 #define CLPixelPacket cl_float4
123 #if (MAGICKCORE_QUANTUM_DEPTH == 8)
124 #define CLOptions "-DCLQuantum=uchar -DCLPixelType=uchar4 " \
125 "-DQuantumRange=%g -DMagickEpsilon=%g"
126 #define CLPixelPacket cl_uchar4
127 #elif (MAGICKCORE_QUANTUM_DEPTH == 16)
128 #define CLOptions "-DCLQuantum=ushort -DCLPixelType=ushort4 " \
129 "-DQuantumRange=%g -DMagickEpsilon=%g"
130 #define CLPixelPacket cl_ushort4
131 #elif (MAGICKCORE_QUANTUM_DEPTH == 32)
132 #define CLOptions "-DCLQuantum=uint -DCLPixelType=uint4 " \
133 "-DQuantumRange=%g -DMagickEpsilon=%g"
134 #define CLPixelPacket cl_uint4
135 #elif (MAGICKCORE_QUANTUM_DEPTH == 64)
136 #define CLOptions "-DCLQuantum=ussize_t -DCLPixelType=ussize_t4 " \
137 "-DQuantumRange=%g -DMagickEpsilon=%g"
138 #define CLPixelPacket cl_ulong4
142 typedef struct _ConvolveInfo
176 "static inline long ClampToCanvas(const long offset,const unsigned long range)\n"
178 " if (offset < 0L)\n"
180 " if (offset >= range)\n"
181 " return((long) (range-1L));\n"
185 "static inline CLQuantum ClampToQuantum(const float value)\n"
187 "#if defined(MAGICKCORE_HDRI_SUPPORT)\n"
188 " return((CLQuantum) value)\n"
190 " if (value < 0.0)\n"
191 " return((CLQuantum) 0);\n"
192 " if (value >= (float) QuantumRange)\n"
193 " return((CLQuantum) QuantumRange);\n"
194 " return((CLQuantum) (value+0.5));\n"
198 "__kernel void Convolve(const __global CLPixelType *input,\n"
199 " __constant float *filter,const unsigned long width,const unsigned long height,\n"
200 " const unsigned int matte,__global CLPixelType *output)\n"
202 " const unsigned long columns = get_global_size(0);\n"
203 " const unsigned long rows = get_global_size(1);\n"
205 " const long x = get_global_id(0);\n"
206 " const long y = get_global_id(1);\n"
208 " const float scale = (1.0/QuantumRange);\n"
209 " const long mid_width = (width-1)/2;\n"
210 " const long mid_height = (height-1)/2;\n"
211 " float4 sum = { 0.0, 0.0, 0.0, 0.0 };\n"
212 " float gamma = 0.0;\n"
213 " register unsigned long i = 0;\n"
216 " if (matte != false)\n"
218 " if ((x >= width) && (x < (columns-width-1)) &&\n"
219 " (y >= height) && (y < (rows-height-1)))\n"
222 " if (matte != false)\n"
229 " for (long v=(-mid_height); v <= mid_height; v++)\n"
231 " for (long u=(-mid_width); u <= mid_width; u++)\n"
233 " const long index=ClampToCanvas(y+v,rows)*columns+\n"
234 " ClampToCanvas(x+u,columns);\n"
235 " sum.x+=filter[i]*input[index].x;\n"
236 " sum.y+=filter[i]*input[index].y;\n"
237 " sum.z+=filter[i]*input[index].z;\n"
238 " gamma+=filter[i];\n"
246 " for (long v=(-mid_height); v <= mid_height; v++)\n"
248 " for (long u=(-mid_width); u <= mid_width; u++)\n"
250 " const unsigned long index=ClampToCanvas(y+v,rows)*columns+\n"
251 " ClampToCanvas(x+u,columns);\n"
252 " const float alpha=scale*input[index].w;\n"
253 " sum.x+=alpha*filter[i]*input[index].x;\n"
254 " sum.y+=alpha*filter[i]*input[index].y;\n"
255 " sum.z+=alpha*filter[i]*input[index].z;\n"
256 " sum.w+=filter[i]*input[index].w;\n"
257 " gamma+=alpha*filter[i];\n"
265 " for (long v=(-mid_height); v <= mid_height; v++)\n"
267 " for (long u=(-mid_width); u <= mid_width; u++)\n"
269 " const unsigned long index=(y+v)*columns+(x+u);\n"
270 " sum.x+=filter[i]*input[index].x;\n"
271 " sum.y+=filter[i]*input[index].y;\n"
272 " sum.z+=filter[i]*input[index].z;\n"
273 " gamma+=filter[i];\n"
281 " for (long v=(-mid_height); v <= mid_height; v++)\n"
283 " for (long u=(-mid_width); u <= mid_width; u++)\n"
285 " const unsigned long index=(y+v)*columns+(x+u);\n"
286 " const float alpha=scale*input[index].w;\n"
287 " sum.x+=alpha*filter[i]*input[index].x;\n"
288 " sum.y+=alpha*filter[i]*input[index].y;\n"
289 " sum.z+=alpha*filter[i]*input[index].z;\n"
290 " sum.w+=filter[i]*input[index].w;\n"
291 " gamma+=alpha*filter[i];\n"
298 " gamma=1.0/(fabs(gamma) <= MagickEpsilon ? 1.0 : gamma);\n"
299 " const unsigned long index = y*columns+x;\n"
300 " output[index].x=ClampToQuantum(gamma*sum.x);\n"
301 " output[index].y=ClampToQuantum(gamma*sum.y);\n"
302 " output[index].z=ClampToQuantum(gamma*sum.z);\n"
303 " if (matte == false)\n"
304 " output[index].w=input[index].w;\n"
306 " output[index].w=ClampToQuantum(sum.w);\n"
309 static void ConvolveNotify(const char *message,const void *data,size_t length,
317 exception=(ExceptionInfo *) user_context;
318 (void) ThrowMagickException(exception,GetMagickModule(),DelegateWarning,
319 "DelegateFailed","`%s'",message);
322 static MagickBooleanType BindConvolveParameters(ConvolveInfo *convolve_info,
323 const Image *image,const void *pixels,float *filter,const size_t width,
324 const size_t height,void *convolve_pixels)
336 Allocate OpenCL buffers.
338 length=image->columns*image->rows;
339 convolve_info->pixels=clCreateBuffer(convolve_info->context,(cl_mem_flags)
340 (CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR),length*sizeof(CLPixelPacket),
341 (void *) pixels,&status);
342 if ((convolve_info->pixels == (cl_mem) NULL) || (status != CL_SUCCESS))
345 convolve_info->filter=clCreateBuffer(convolve_info->context,(cl_mem_flags)
346 (CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR),length*sizeof(cl_float),filter,
348 if ((convolve_info->filter == (cl_mem) NULL) || (status != CL_SUCCESS))
350 length=image->columns*image->rows;
351 convolve_info->convolve_pixels=clCreateBuffer(convolve_info->context,
352 (cl_mem_flags) (CL_MEM_WRITE_ONLY | CL_MEM_USE_HOST_PTR),length*
353 sizeof(CLPixelPacket),convolve_pixels,&status);
354 if ((convolve_info->convolve_pixels == (cl_mem) NULL) ||
355 (status != CL_SUCCESS))
361 status=clSetKernelArg(convolve_info->kernel,i++,sizeof(cl_mem),(void *)
362 &convolve_info->pixels);
363 if (status != CL_SUCCESS)
365 status=clSetKernelArg(convolve_info->kernel,i++,sizeof(cl_mem),(void *)
366 &convolve_info->filter);
367 if (status != CL_SUCCESS)
369 convolve_info->width=(cl_ulong) width;
370 status=clSetKernelArg(convolve_info->kernel,i++,sizeof(cl_ulong),(void *)
371 &convolve_info->width);
372 if (status != CL_SUCCESS)
374 convolve_info->height=(cl_ulong) height;
375 status=clSetKernelArg(convolve_info->kernel,i++,sizeof(cl_ulong),(void *)
376 &convolve_info->height);
377 if (status != CL_SUCCESS)
379 convolve_info->matte=(cl_uint) image->matte;
380 status=clSetKernelArg(convolve_info->kernel,i++,sizeof(cl_uint),(void *)
381 &convolve_info->matte);
382 if (status != CL_SUCCESS)
384 status=clSetKernelArg(convolve_info->kernel,i++,sizeof(cl_mem),(void *)
385 &convolve_info->convolve_pixels);
386 if (status != CL_SUCCESS)
388 status=clFinish(convolve_info->command_queue);
389 if (status != CL_SUCCESS)
394 static void DestroyConvolveBuffers(ConvolveInfo *convolve_info)
400 if (convolve_info->convolve_pixels != (cl_mem) NULL)
401 status=clReleaseMemObject(convolve_info->convolve_pixels);
402 if (convolve_info->pixels != (cl_mem) NULL)
403 status=clReleaseMemObject(convolve_info->pixels);
404 if (convolve_info->filter != (cl_mem) NULL)
405 status=clReleaseMemObject(convolve_info->filter);
408 static ConvolveInfo *DestroyConvolveInfo(ConvolveInfo *convolve_info)
414 if (convolve_info->kernel != (cl_kernel) NULL)
415 status=clReleaseKernel(convolve_info->kernel);
416 if (convolve_info->program != (cl_program) NULL)
417 status=clReleaseProgram(convolve_info->program);
418 if (convolve_info->command_queue != (cl_command_queue) NULL)
419 status=clReleaseCommandQueue(convolve_info->command_queue);
420 if (convolve_info->context != (cl_context) NULL)
421 status=clReleaseContext(convolve_info->context);
422 convolve_info=(ConvolveInfo *) RelinquishMagickMemory(convolve_info);
423 return(convolve_info);
426 static MagickBooleanType EnqueueConvolveKernel(ConvolveInfo *convolve_info,
427 const Image *image,const void *pixels,float *filter,const size_t width,
428 const size_t height,void *convolve_pixels)
437 length=image->columns*image->rows;
438 status=clEnqueueWriteBuffer(convolve_info->command_queue,
439 convolve_info->pixels,CL_TRUE,0,length*sizeof(CLPixelPacket),pixels,0,NULL,
442 status=clEnqueueWriteBuffer(convolve_info->command_queue,
443 convolve_info->filter,CL_TRUE,0,length*sizeof(cl_float),filter,0,NULL,
445 if (status != CL_SUCCESS)
447 global_work_size[0]=image->columns;
448 global_work_size[1]=image->rows;
449 status=clEnqueueNDRangeKernel(convolve_info->command_queue,
450 convolve_info->kernel,2,NULL,global_work_size,NULL,0,NULL,NULL);
451 if (status != CL_SUCCESS)
453 length=image->columns*image->rows;
454 status=clEnqueueReadBuffer(convolve_info->command_queue,
455 convolve_info->convolve_pixels,CL_TRUE,0,length*sizeof(CLPixelPacket),
456 convolve_pixels,0,NULL,NULL);
457 if (status != CL_SUCCESS)
459 status=clFinish(convolve_info->command_queue);
460 if (status != CL_SUCCESS)
465 static ConvolveInfo *GetConvolveInfo(const Image *image,const char *name,
466 const char *source,ExceptionInfo *exception)
469 options[MaxTextExtent];
471 cl_context_properties
472 context_properties[3];
488 lengths[] = { strlen(source) };
493 convolve_info=(ConvolveInfo *) AcquireMagickMemory(sizeof(*convolve_info));
494 if (convolve_info == (ConvolveInfo *) NULL)
496 (void) ThrowMagickException(exception,GetMagickModule(),
497 ResourceLimitError,"MemoryAllocationFailed","`%s'",image->filename);
498 return((ConvolveInfo *) NULL);
500 (void) ResetMagickMemory(convolve_info,0,sizeof(*convolve_info));
502 Create OpenCL context.
504 status=clGetPlatformIDs(0,(cl_platform_id *) NULL,&number_platforms);
505 if ((status == CL_SUCCESS) && (number_platforms > 0))
506 status=clGetPlatformIDs(1,platforms,NULL);
507 if (status != CL_SUCCESS)
509 (void) ThrowMagickException(exception,GetMagickModule(),DelegateWarning,
510 "failed to create OpenCL context","`%s' (%d)",image->filename,status);
511 convolve_info=DestroyConvolveInfo(convolve_info);
512 return((ConvolveInfo *) NULL);
514 context_properties[0]=CL_CONTEXT_PLATFORM;
515 context_properties[1]=(cl_context_properties) platforms[0];
516 context_properties[2]=0;
517 convolve_info->context=clCreateContextFromType(context_properties,
518 (cl_device_type) CL_DEVICE_TYPE_GPU,ConvolveNotify,exception,&status);
519 if ((convolve_info->context == (cl_context) NULL) || (status != CL_SUCCESS))
520 convolve_info->context=clCreateContextFromType(context_properties,
521 (cl_device_type) CL_DEVICE_TYPE_CPU,ConvolveNotify,exception,&status);
522 if ((convolve_info->context == (cl_context) NULL) || (status != CL_SUCCESS))
523 convolve_info->context=clCreateContextFromType(context_properties,
524 (cl_device_type) CL_DEVICE_TYPE_DEFAULT,ConvolveNotify,exception,&status);
525 if ((convolve_info->context == (cl_context) NULL) || (status != CL_SUCCESS))
527 (void) ThrowMagickException(exception,GetMagickModule(),DelegateWarning,
528 "failed to create OpenCL context","`%s' (%d)",image->filename,status);
529 convolve_info=DestroyConvolveInfo(convolve_info);
530 return((ConvolveInfo *) NULL);
533 Detect OpenCL devices.
535 status=clGetContextInfo(convolve_info->context,CL_CONTEXT_DEVICES,0,NULL,
537 if ((status != CL_SUCCESS) || (length == 0))
539 convolve_info=DestroyConvolveInfo(convolve_info);
540 return((ConvolveInfo *) NULL);
542 convolve_info->devices=(cl_device_id *) AcquireMagickMemory(length);
543 if (convolve_info->devices == (cl_device_id *) NULL)
545 (void) ThrowMagickException(exception,GetMagickModule(),
546 ResourceLimitError,"MemoryAllocationFailed","`%s'",image->filename);
547 convolve_info=DestroyConvolveInfo(convolve_info);
548 return((ConvolveInfo *) NULL);
550 status=clGetContextInfo(convolve_info->context,CL_CONTEXT_DEVICES,length,
551 convolve_info->devices,NULL);
552 if (status != CL_SUCCESS)
554 convolve_info=DestroyConvolveInfo(convolve_info);
555 return((ConvolveInfo *) NULL);
558 Create OpenCL command queue.
560 convolve_info->command_queue=clCreateCommandQueue(convolve_info->context,
561 convolve_info->devices[0],0,&status);
562 if ((convolve_info->command_queue == (cl_command_queue) NULL) ||
563 (status != CL_SUCCESS))
565 convolve_info=DestroyConvolveInfo(convolve_info);
566 return((ConvolveInfo *) NULL);
569 Build OpenCL program.
571 convolve_info->program=clCreateProgramWithSource(convolve_info->context,1,
572 &source,lengths,&status);
573 if ((convolve_info->program == (cl_program) NULL) || (status != CL_SUCCESS))
575 convolve_info=DestroyConvolveInfo(convolve_info);
576 return((ConvolveInfo *) NULL);
578 (void) FormatLocaleString(options,MaxTextExtent,CLOptions,(float)
579 QuantumRange,MagickEpsilon);
580 status=clBuildProgram(convolve_info->program,1,convolve_info->devices,options,
582 if ((convolve_info->program == (cl_program) NULL) || (status != CL_SUCCESS))
587 status=clGetProgramBuildInfo(convolve_info->program,
588 convolve_info->devices[0],CL_PROGRAM_BUILD_LOG,0,NULL,&length);
589 log=(char *) AcquireMagickMemory(length);
590 if (log == (char *) NULL)
592 convolve_info=DestroyConvolveInfo(convolve_info);
593 return((ConvolveInfo *) NULL);
595 status=clGetProgramBuildInfo(convolve_info->program,
596 convolve_info->devices[0],CL_PROGRAM_BUILD_LOG,length,log,&length);
597 (void) ThrowMagickException(exception,GetMagickModule(),DelegateWarning,
598 "failed to build OpenCL program","`%s' (%s)",image->filename,log);
599 log=DestroyString(log);
600 convolve_info=DestroyConvolveInfo(convolve_info);
601 return((ConvolveInfo *) NULL);
606 convolve_info->kernel=clCreateKernel(convolve_info->program,name,&status);
607 if ((convolve_info->kernel == (cl_kernel) NULL) || (status != CL_SUCCESS))
609 convolve_info=DestroyConvolveInfo(convolve_info);
610 return((ConvolveInfo *) NULL);
612 return(convolve_info);
617 MagickExport MagickBooleanType AccelerateConvolveImage(const Image *image,
618 const KernelInfo *kernel,Image *convolve_image,ExceptionInfo *exception)
620 assert(image != (Image *) NULL);
621 assert(image->signature == MagickSignature);
622 if (image->debug != MagickFalse)
623 (void) LogMagickEvent(TraceEvent,GetMagickModule(),"%s",image->filename);
624 assert(kernel != (KernelInfo *) NULL);
625 assert(kernel->signature == MagickSignature);
626 assert(convolve_image != (Image *) NULL);
627 assert(convolve_image->signature == MagickSignature);
628 assert(exception != (ExceptionInfo *) NULL);
629 assert(exception->signature == MagickSignature);
630 if ((image->storage_class != DirectClass) ||
631 (image->colorspace == CMYKColorspace))
633 if ((GetImageVirtualPixelMethod(image) != UndefinedVirtualPixelMethod) &&
634 (GetImageVirtualPixelMethod(image) != EdgeVirtualPixelMethod))
636 if (GetPixelChannels(image) != 4)
638 #if !defined(MAGICKCORE_OPENCL_SUPPORT)
663 convolve_info=GetConvolveInfo(image,"Convolve",ConvolveKernel,exception);
664 if (convolve_info == (ConvolveInfo *) NULL)
666 pixels=AcquirePixelCachePixels(image,&length,exception);
667 if (pixels == (const void *) NULL)
669 convolve_info=DestroyConvolveInfo(convolve_info);
670 (void) ThrowMagickException(exception,GetMagickModule(),CacheError,
671 "UnableToReadPixelCache","`%s'",image->filename);
674 convolve_pixels=GetPixelCachePixels(convolve_image,&length,exception);
675 if (convolve_pixels == (void *) NULL)
677 convolve_info=DestroyConvolveInfo(convolve_info);
678 (void) ThrowMagickException(exception,GetMagickModule(),CacheError,
679 "UnableToReadPixelCache","`%s'",image->filename);
682 filter=(float *) AcquireQuantumMemory(kernel->width,kernel->height*
684 if (filter == (float *) NULL)
686 DestroyConvolveBuffers(convolve_info);
687 convolve_info=DestroyConvolveInfo(convolve_info);
688 (void) ThrowMagickException(exception,GetMagickModule(),
689 ResourceLimitError,"MemoryAllocationFailed","`%s'",image->filename);
692 for (i=0; i < (ssize_t) (kernel->width*kernel->height); i++)
693 filter[i]=(float) kernel->values[i];
694 status=BindConvolveParameters(convolve_info,image,pixels,filter,
695 kernel->width,kernel->height,convolve_pixels);
696 if (status == MagickFalse)
698 filter=(float *) RelinquishMagickMemory(filter);
699 DestroyConvolveBuffers(convolve_info);
700 convolve_info=DestroyConvolveInfo(convolve_info);
703 status=EnqueueConvolveKernel(convolve_info,image,pixels,filter,
704 kernel->width,kernel->height,convolve_pixels);
705 filter=(float *) RelinquishMagickMemory(filter);
706 if (status == MagickFalse)
708 DestroyConvolveBuffers(convolve_info);
709 convolve_info=DestroyConvolveInfo(convolve_info);
712 DestroyConvolveBuffers(convolve_info);
713 convolve_info=DestroyConvolveInfo(convolve_info);