]> granicus.if.org Git - imagemagick/commitdiff
AccelerateUnsharpMaskImage now supports R/RA/RGB images.
authordirk <dirk@git.imagemagick.org>
Sat, 2 Apr 2016 20:28:46 +0000 (22:28 +0200)
committerdirk <dirk@git.imagemagick.org>
Sat, 2 Apr 2016 20:49:22 +0000 (22:49 +0200)
MagickCore/accelerate-private.h
MagickCore/accelerate.c

index 5b8faa7963b10da647c84828e2813f5f85acb5d0..d2a6b1e492883997b01497023cb3858d5c7f1ae3 100644 (file)
@@ -358,10 +358,10 @@ OPENCL_ENDIF()
 
   STRINGIFY(
 
-  inline __global CLQuantum *getPixel(__global CLQuantum *image, const unsigned int number_channels,
+  inline unsigned int getPixelIndex(const unsigned int number_channels,
     const unsigned int columns, const unsigned int x, const unsigned int y)
   {
-    return image + (x * number_channels) + (y * columns * number_channels);
+    return (x * number_channels) + (y * columns * number_channels);
   }
 
   inline float getPixelRed(const __global CLQuantum *p)   { return (float)*p; }
@@ -414,10 +414,10 @@ OPENCL_ENDIF()
       *alpha=getPixelAlpha(p);
   }
 
-  inline float4 ReadFloat4(__global CLQuantum *image, const unsigned int number_channels,
+  inline float4 ReadFloat4(const __global CLQuantum *image, const unsigned int number_channels,
     const unsigned int columns, const unsigned int x, const unsigned int y, const ChannelType channel)
   {
-    const __global CLQuantum *p = getPixel(image, number_channels, columns, x, y);
+    const __global CLQuantum *p = image + getPixelIndex(number_channels, columns, x, y);
 
     float red = 0.0f;
     float green = 0.0f;
@@ -452,7 +452,7 @@ OPENCL_ENDIF()
     const unsigned int columns, const unsigned int x, const unsigned int y, const ChannelType channel,
     float4 pixel)
   {
-    __global CLQuantum *p = getPixel(image, number_channels, columns, x, y);
+    __global CLQuantum *p = image + getPixelIndex(number_channels, columns, x, y);
     WriteChannels(p, number_channels, channel, pixel.x, pixel.y, pixel.z, pixel.w);
   }
 
@@ -866,7 +866,7 @@ OPENCL_ENDIF()
   /*
   Reduce image noise and reduce detail levels by row
   */
-  __kernel void BlurRow(__global CLQuantum *image,
+  __kernel void BlurRow(const __global CLQuantum *image,
     const unsigned int number_channels,const ChannelType channel,
     __constant float *filter,const unsigned int width,
     const unsigned int imageColumns,const unsigned int imageRows,
@@ -1900,15 +1900,14 @@ OPENCL_ENDIF()
   number_parameters : numbers of parameters 
   parameters : the parameter
   */
-  __kernel void ComputeFunction(__global CLQuantum *image,
-    const unsigned int number_channels,const ChannelType channel,
-    const MagickFunction function,const unsigned int number_parameters,
+  __kernel void ComputeFunction(__global CLQuantum *image,const unsigned int number_channels,
+    const ChannelType channel,const MagickFunction function,const unsigned int number_parameters,
     __constant float *parameters)
   {
     const unsigned int x = get_global_id(0);
     const unsigned int y = get_global_id(1);
     const unsigned int columns = get_global_size(0);
-    __global CLQuantum *p = getPixel(image, number_channels, columns, x, y);
+    __global CLQuantum *p = image + getPixelIndex(number_channels, columns, x, y);
 
     float red;
     float green;
@@ -1956,7 +1955,7 @@ OPENCL_ENDIF()
     const unsigned int x = get_global_id(0);
     const unsigned int y = get_global_id(1);
     const unsigned int columns = get_global_size(0);
-    __global CLQuantum *p = getPixel(image, number_channels, columns, x, y);
+    __global CLQuantum *p = image + getPixelIndex(number_channels, columns, x, y);
 
     float
       blue,
@@ -3101,12 +3100,13 @@ STRINGIFY(
 */
 
   STRINGIFY(
-  __kernel void UnsharpMaskBlurColumn(const __global CLPixelType* inputImage, 
-        const __global float4 *blurRowData, __global CLPixelType *filtered_im,
-        const unsigned int imageColumns, const unsigned int imageRows, 
-        __local float4* cachedData, __local float* cachedFilter,
-        const ChannelType channel, const __global float *filter, const unsigned int width, 
-        const float gain, const float threshold)
+  __kernel void UnsharpMaskBlurColumn(const __global CLQuantum* image,
+    const __global float4 *blurRowData,const unsigned int number_channels,
+    const ChannelType channel,const unsigned int columns,
+    const unsigned int rows,__local float4* cachedData,
+    __local float* cachedFilter,const __global float *filter,
+    const unsigned int width,const float gain, const float threshold,
+    __global CLQuantum *filteredImage)
   {
     const unsigned int radius = (width-1)/2;
 
@@ -3115,17 +3115,17 @@ STRINGIFY(
     const int groupStartY = get_group_id(1)*get_local_size(1) - radius;
     const int groupStopY = (get_group_id(1)+1)*get_local_size(1) + radius;
 
-    if (groupStartY >= 0
-        && groupStopY < imageRows) {
-      event_t e = async_work_group_strided_copy(cachedData
-                                              ,blurRowData+groupStartY*imageColumns+groupX
-                                              ,groupStopY-groupStartY,imageColumns,0);
+    if ((groupStartY >= 0) && (groupStopY < rows))
+    {
+      event_t e = async_work_group_strided_copy(cachedData,
+        blurRowData+groupStartY*columns+groupX,groupStopY-groupStartY,columns,0);
       wait_group_events(1,&e);
     }
-    else {
-      for (int i = get_local_id(1); i < (groupStopY - groupStartY); i+=get_local_size(1)) {
-        cachedData[i] = blurRowData[ClampToCanvas(groupStartY+i,imageRows)*imageColumns+ groupX];
-      }
+    else
+    {
+      for (int i = get_local_id(1); i < (groupStopY - groupStartY); i+=get_local_size(1))
+        cachedData[i] = blurRowData[ClampToCanvas(groupStartY+i,rows)*columns + groupX];
+
       barrier(CLK_LOCAL_MEM_FENCE);
     }
     // cache the filter as well
@@ -3133,36 +3133,24 @@ STRINGIFY(
     wait_group_events(1,&e);
 
     // only do the work if this is not a patched item
-    //const int cy = get_group_id(1)*get_local_size(1)+get_local_id(1);
     const int cy = get_global_id(1);
 
-    if (cy < imageRows) {
+    if (cy < rows)
+    {
       float4 blurredPixel = (float4) 0.0f;
 
       int i = 0;
 
-      \n #ifndef UFACTOR   \n 
-        \n #define UFACTOR 8 \n 
-        \n #endif                  \n 
-
-        for ( ; i+UFACTOR < width; ) 
-        {
-          \n #pragma unroll UFACTOR \n
-            for (int j=0; j < UFACTOR; j++, i++)
-            {
-              blurredPixel+=cachedFilter[i]*cachedData[i+get_local_id(1)];
-            }
-        }
-
-      for ( ; i < width; i++)
+      for ( ; i+7 < width; )
       {
-        blurredPixel+=cachedFilter[i]*cachedData[i+get_local_id(1)];
+        for (int j=0; j < 8; j++, i++)
+          blurredPixel+=cachedFilter[i+j]*cachedData[i+j+get_local_id(1)];
       }
 
-      blurredPixel = floor((float4)(ClampToQuantum(blurredPixel.x), ClampToQuantum(blurredPixel.y)
-                                    ,ClampToQuantum(blurredPixel.z), ClampToQuantum(blurredPixel.w)));
+      for ( ; i < width; i++)
+        blurredPixel+=cachedFilter[i]*cachedData[i+get_local_id(1)];
 
-      float4 inputImagePixel = convert_float4(inputImage[cy*imageColumns+groupX]);
+      float4 inputImagePixel = ReadFloat4(image,number_channels,columns,groupX,cy,channel);
       float4 outputPixel = inputImagePixel - blurredPixel;
 
       float quantumThreshold = QuantumRange*threshold;
@@ -3171,15 +3159,13 @@ STRINGIFY(
       outputPixel = select(inputImagePixel + outputPixel * gain, inputImagePixel, mask);
 
       //write back
-      filtered_im[cy*imageColumns+groupX] = (CLPixelType) (ClampToQuantum(outputPixel.x), ClampToQuantum(outputPixel.y)
-                                                          ,ClampToQuantum(outputPixel.z), ClampToQuantum(outputPixel.w));
-
+      WriteFloat4(filteredImage,number_channels,columns,groupX,cy,channel,outputPixel);
     }
   }
   )
 
   STRINGIFY(
-  __kernel void UnsharpMask(__global CLQuantum *image,const unsigned int number_channels,
+  __kernel void UnsharpMask(const __global CLQuantum *image,const unsigned int number_channels,
     const ChannelType channel,__constant float *filter,const unsigned int width,
     const unsigned int columns,const unsigned int rows,__local float4 *pixels,
     const float gain,const float threshold, const unsigned int justBlur,
index 5e813adc3fd083a9d8327ddfa22734e8707acb0e..88938169297be727fb4fc80837a299304985ee98 100644 (file)
@@ -5850,9 +5850,6 @@ static Image *ComputeUnsharpMaskImage(const Image *image,const double radius,
     *filteredImage_view,
     *image_view;
 
-  char
-    geometry[MagickPathExtent];
-
   cl_command_queue
     queue;
 
@@ -5875,16 +5872,15 @@ static Image *ComputeUnsharpMaskImage(const Image *image,const double radius,
     imageKernelBuffer,
     tempImageBuffer;
 
-  cl_mem_flags
-    mem_flags;
-
-  const void
-    *inputPixels;
+  cl_uint
+    imageColumns,
+    imageRows,
+    kernelWidth,
+    number_channels;
 
   float
     fGain,
-    fThreshold,
-    *kernelBufferPtr;
+    fThreshold;
 
   Image
     *filteredImage;
@@ -5892,9 +5888,6 @@ static Image *ComputeUnsharpMaskImage(const Image *image,const double radius,
   int
     chunkSize;
 
-  KernelInfo
-    *kernel;
-
   MagickBooleanType
     outputReady;
 
@@ -5905,22 +5898,18 @@ static Image *ComputeUnsharpMaskImage(const Image *image,const double radius,
     length;
 
   void
-    *filteredPixels,
-    *hostPtr;
+    *filteredPixels;
 
   unsigned int
-    i,
-    imageColumns,
-    imageRows,
-    kernelWidth;
+    i;
 
   clEnv = NULL;
   filteredImage = NULL;
   filteredImage_view = NULL;
-  kernel = NULL;
   context = NULL;
   imageBuffer = NULL;
   filteredImageBuffer = NULL;
+  filteredPixels = NULL;
   tempImageBuffer = NULL;
   imageKernelBuffer = NULL;
   blurRowKernel = NULL;
@@ -5932,116 +5921,33 @@ static Image *ComputeUnsharpMaskImage(const Image *image,const double radius,
   context = GetOpenCLContext(clEnv);
   queue = AcquireOpenCLCommandQueue(clEnv);
 
-  /* Create and initialize OpenCL buffers. */
-  {
-    image_view=AcquireVirtualCacheView(image,exception);
-    inputPixels=GetCacheViewVirtualPixels(image_view,0,0,image->columns,image->rows,exception);
-    if (inputPixels == (const void *) NULL)
-    {
-      (void) OpenCLThrowMagickException(exception,GetMagickModule(),CacheWarning,"UnableToReadPixelCache.","`%s'",image->filename);
-      goto cleanup;
-    }
-
-    /* If the host pointer is aligned to the size of CLPixelPacket, 
-     then use the host buffer directly from the GPU; otherwise, 
-     create a buffer on the GPU and copy the data over */
-    if (ALIGNED(inputPixels,CLPixelPacket)) 
-    {
-      mem_flags = CL_MEM_READ_ONLY|CL_MEM_USE_HOST_PTR;
-    }
-    else 
-    {
-      mem_flags = CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR;
-    }
-    /* create a CL buffer from image pixel buffer */
-    length = image->columns * image->rows;
-    imageBuffer = clEnv->library->clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus);
-    if (clStatus != CL_SUCCESS)
-    {
-      (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
-      goto cleanup;
-    }
-  }
+  image_view = AcquireVirtualCacheView(image, exception);
+  imageBuffer=createReadBuffer(image,image_view,clEnv,context,exception);
+  if (imageBuffer == (cl_mem) NULL)
+    goto cleanup;
 
-  /* create output */
+  filteredImage=CloneImage(image,0,0,MagickTrue,exception);
+  if (filteredImage == (Image *) NULL)
+    goto cleanup;
+  if (SetImageStorageClass(filteredImage, DirectClass, exception) != MagickTrue)
   {
-    filteredImage = CloneImage(image,image->columns,image->rows,MagickTrue,exception);
-    assert(filteredImage != NULL);
-    if (SetImageStorageClass(filteredImage,DirectClass,exception) != MagickTrue)
-    {
-      (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "CloneImage failed.", "'%s'", ".");
-      goto cleanup;
-    }
-    filteredImage_view=AcquireAuthenticCacheView(filteredImage,exception);
-    filteredPixels=GetCacheViewAuthenticPixels(filteredImage_view,0,0,filteredImage->columns,filteredImage->rows,exception);
-    if (filteredPixels == (void *) NULL)
-    {
-      (void) OpenCLThrowMagickException(exception,GetMagickModule(),CacheWarning, "UnableToReadPixelCache.","`%s'",filteredImage->filename);
-      goto cleanup;
-    }
-
-    if (ALIGNED(filteredPixels,CLPixelPacket)) 
-    {
-      mem_flags = CL_MEM_WRITE_ONLY|CL_MEM_USE_HOST_PTR;
-      hostPtr = filteredPixels;
-    }
-    else 
-    {
-      mem_flags = CL_MEM_WRITE_ONLY;
-      hostPtr = NULL;
-    }
-
-    /* create a CL buffer from image pixel buffer */
-    length = image->columns * image->rows;
-    filteredImageBuffer = clEnv->library->clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), hostPtr, &clStatus);
-    if (clStatus != CL_SUCCESS)
-    {
-      (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
-      goto cleanup;
-    }
+    (void)OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "CloneImage failed.", "'%s'", ".");
+    goto cleanup;
   }
+  filteredImage_view = AcquireAuthenticCacheView(filteredImage, exception);
+  filteredImageBuffer=createWriteBuffer(filteredImage,filteredImage_view,clEnv,
+    context,filteredPixels,exception);
+  if (filteredImageBuffer == (cl_mem) NULL)
+    goto cleanup;
 
-  /* create the blur kernel */
-  {
-    (void) FormatLocaleString(geometry,MagickPathExtent,"blur:%.20gx%.20g;blur:%.20gx%.20g+90",radius,sigma,radius,sigma);
-    kernel=AcquireKernelInfo(geometry,exception);
-    if (kernel == (KernelInfo *) NULL)
-    {
-      (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "AcquireKernelInfo failed.",".");
-      goto cleanup;
-    }
-
-    imageKernelBuffer = clEnv->library->clCreateBuffer(context, CL_MEM_READ_ONLY, kernel->width * sizeof(float), NULL, &clStatus);
-    if (clStatus != CL_SUCCESS)
-    {
-      (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
-      goto cleanup;
-    }
-
-
-    kernelBufferPtr = (float*)clEnv->library->clEnqueueMapBuffer(queue, imageKernelBuffer, CL_TRUE, CL_MAP_WRITE, 0, kernel->width * sizeof(float), 0, NULL, NULL, &clStatus);
-    if (clStatus != CL_SUCCESS)
-    {
-      (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueMapBuffer failed.",".");
-      goto cleanup;
-    }
-    for (i = 0; i < kernel->width; i++)
-    {
-      kernelBufferPtr[i] = (float) kernel->values[i];
-    }
-    clStatus = clEnv->library->clEnqueueUnmapMemObject(queue, imageKernelBuffer, kernelBufferPtr, 0, NULL, NULL);
-    if (clStatus != CL_SUCCESS)
-    {
-      (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueUnmapMemObject failed.", "'%s'", ".");
-      goto cleanup;
-    }
-  }
+  imageKernelBuffer=createKernelInfo(clEnv,context,queue,radius,sigma,
+    &kernelWidth,exception);
 
   {
     /* create temp buffer */
     {
       length = image->columns * image->rows;
-      tempImageBuffer = clEnv->library->clCreateBuffer(context, CL_MEM_READ_WRITE, length * 4 * sizeof(float), NULL, &clStatus);
+      tempImageBuffer = clEnv->library->clCreateBuffer(context, CL_MEM_READ_WRITE, length * sizeof(cl_float4), NULL, &clStatus);
       if (clStatus != CL_SUCCESS)
       {
         (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
@@ -6066,24 +5972,24 @@ static Image *ComputeUnsharpMaskImage(const Image *image,const double radius,
       };
     }
 
+    number_channels = (cl_uint) image->number_channels;
+    imageColumns = (cl_uint) image->columns;
+    imageRows = (cl_uint) image->rows;
+
     {
       chunkSize = 256;
 
-      imageColumns = (unsigned int) image->columns;
-      imageRows = (unsigned int) image->rows;
-
-      kernelWidth = (unsigned int) kernel->width;
-
       /* set the kernel arguments */
       i = 0;
       clStatus=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(cl_mem),(void *)&imageBuffer);
-      clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(cl_mem),(void *)&tempImageBuffer);
+      clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(cl_uint),&number_channels);
       clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(ChannelType),&image->channel_mask);
       clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(cl_mem),(void *)&imageKernelBuffer);
-      clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(unsigned int),(void *)&kernelWidth);
-      clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(unsigned int),(void *)&imageColumns);
-      clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(unsigned int),(void *)&imageRows);
-      clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(CLPixelPacket)*(chunkSize+kernel->width),(void *) NULL);
+      clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(cl_uint),(void *)&kernelWidth);
+      clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(cl_uint),(void *)&imageColumns);
+      clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(cl_uint),(void *)&imageRows);
+      clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(cl_float4)*(chunkSize+kernelWidth),(void *) NULL);
+      clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(cl_mem),(void *)&tempImageBuffer);
       if (clStatus != CL_SUCCESS)
       {
         (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", "'%s'", ".");
@@ -6101,7 +6007,7 @@ static Image *ComputeUnsharpMaskImage(const Image *image,const double radius,
       wsize[0] = chunkSize;
       wsize[1] = 1;
 
-         clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, blurRowKernel, 2, NULL, gsize, wsize, 0, NULL, &event);
+      clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, blurRowKernel, 2, NULL, gsize, wsize, 0, NULL, &event);
       if (clStatus != CL_SUCCESS)
       {
         (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", "'%s'", ".");
@@ -6115,25 +6021,23 @@ static Image *ComputeUnsharpMaskImage(const Image *image,const double radius,
 
     {
       chunkSize = 256;
-      imageColumns = (unsigned int) image->columns;
-      imageRows = (unsigned int) image->rows;
-      kernelWidth = (unsigned int) kernel->width;
       fGain = (float) gain;
       fThreshold = (float) threshold;
 
       i = 0;
       clStatus=clEnv->library->clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(cl_mem),(void *)&imageBuffer);
       clStatus|=clEnv->library->clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(cl_mem),(void *)&tempImageBuffer);
-      clStatus|=clEnv->library->clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(cl_mem),(void *)&filteredImageBuffer);
-      clStatus|=clEnv->library->clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(unsigned int),(void *)&imageColumns);
-      clStatus|=clEnv->library->clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(unsigned int),(void *)&imageRows);
-      clStatus|=clEnv->library->clSetKernelArg(unsharpMaskBlurColumnKernel,i++, (chunkSize+kernelWidth-1)*sizeof(cl_float4),NULL);
-      clStatus|=clEnv->library->clSetKernelArg(unsharpMaskBlurColumnKernel,i++, kernelWidth*sizeof(float),NULL);
+      clStatus|=clEnv->library->clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(cl_uint),&number_channels);
       clStatus|=clEnv->library->clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(ChannelType),&image->channel_mask);
+      clStatus|=clEnv->library->clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(cl_uint),(void *)&imageColumns);
+      clStatus|=clEnv->library->clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(cl_uint),(void *)&imageRows);
+      clStatus|=clEnv->library->clSetKernelArg(unsharpMaskBlurColumnKernel,i++,(chunkSize+kernelWidth-1)*sizeof(cl_float4),NULL);
+      clStatus|=clEnv->library->clSetKernelArg(unsharpMaskBlurColumnKernel,i++,kernelWidth*sizeof(float),NULL);
       clStatus|=clEnv->library->clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(cl_mem),(void *)&imageKernelBuffer);
-      clStatus|=clEnv->library->clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(unsigned int),(void *)&kernelWidth);
+      clStatus|=clEnv->library->clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(cl_uint),(void *)&kernelWidth);
       clStatus|=clEnv->library->clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(float),(void *)&fGain);
       clStatus|=clEnv->library->clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(float),(void *)&fThreshold);
+      clStatus|=clEnv->library->clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(cl_mem),(void *)&filteredImageBuffer);
 
       if (clStatus != CL_SUCCESS)
       {
@@ -6152,7 +6056,7 @@ static Image *ComputeUnsharpMaskImage(const Image *image,const double radius,
       wsize[0] = 1;
       wsize[1] = chunkSize;
 
-         clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, unsharpMaskBlurColumnKernel, 2, NULL, gsize, wsize, 0, NULL, &event);
+      clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, unsharpMaskBlurColumnKernel, 2, NULL, gsize, wsize, 0, NULL, &event);
       if (clStatus != CL_SUCCESS)
       {
         (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", "'%s'", ".");
@@ -6166,19 +6070,9 @@ static Image *ComputeUnsharpMaskImage(const Image *image,const double radius,
   }
 
   /* get result */
-  if (ALIGNED(filteredPixels,CLPixelPacket)) 
-  {
-    length = image->columns * image->rows;
-    clEnv->library->clEnqueueMapBuffer(queue, filteredImageBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(CLPixelPacket), 0, NULL, NULL, &clStatus);
-  }
-  else 
-  {
-    length = image->columns * image->rows;
-    clStatus = clEnv->library->clEnqueueReadBuffer(queue, filteredImageBuffer, CL_TRUE, 0, length * sizeof(CLPixelPacket), filteredPixels, 0, NULL, NULL);
-  }
-  if (clStatus != CL_SUCCESS)
+  if (copyWriteBuffer(image,clEnv,queue,filteredImageBuffer,filteredPixels,exception) == MagickFalse)
   {
-    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "Reading output image from CL buffer failed.", "'%s'", ".");
+    (void)OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "Reading output image from CL buffer failed.", "'%s'", ".");
     goto cleanup;
   }
 
@@ -6191,7 +6085,6 @@ cleanup:
   if (filteredImage_view != NULL)
     filteredImage_view=DestroyCacheView(filteredImage_view);
 
-  if (kernel != NULL)                        kernel=DestroyKernelInfo(kernel);
   if (imageBuffer!=NULL)                     clEnv->library->clReleaseMemObject(imageBuffer);
   if (filteredImageBuffer!=NULL)              clEnv->library->clReleaseMemObject(filteredImageBuffer);
   if (tempImageBuffer!=NULL)                  clEnv->library->clReleaseMemObject(tempImageBuffer);
@@ -6396,7 +6289,7 @@ MagickExport Image *AccelerateUnsharpMaskImage(const Image *image,
   assert(image != NULL);
   assert(exception != (ExceptionInfo *) NULL);
 
-  if ((checkAccelerateConditionRGBA(image) == MagickFalse) ||
+  if ((checkAccelerateCondition(image) == MagickFalse) ||
       (checkOpenCLEnvironment(exception) == MagickFalse))
     return NULL;