]> granicus.if.org Git - imagemagick/commitdiff
Removed section kernels.
authordirk <dirk@git.imagemagick.org>
Sat, 2 Apr 2016 19:07:09 +0000 (21:07 +0200)
committerdirk <dirk@git.imagemagick.org>
Sat, 2 Apr 2016 20:49:21 +0000 (22:49 +0200)
MagickCore/accelerate-private.h
MagickCore/accelerate.c

index 7e2c9593da92019d1da41410da389e5abf4abaae..bb375f80dbed8d714370c374e78e1aec4756308b 100644 (file)
@@ -324,13 +324,6 @@ OPENCL_ENDIF()
       }
   )
 
-  STRINGIFY(
-    inline int ClampToCanvasWithHalo(const int offset,const int range, const int edge, const int section)
-      {
-        return clamp(offset, section?(int)(0-edge):(int)0, section?(range-1):(range-1+edge));
-      }
-  )
-
   STRINGIFY(
     inline CLQuantum ClampToQuantum(const float value)
       {
@@ -869,176 +862,6 @@ OPENCL_ENDIF()
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 */
 
-  STRINGIFY(
-  /*
-  Reduce image noise and reduce detail levels by row
-  im: input pixels filtered_in  filtered_im: output pixels
-  filter : convolve kernel  width: convolve kernel size
-  channel : define which channel is blured
-  is_RGBA_BGRA : define the input is RGBA or BGRA
-  */
-  __kernel void BlurSectionRow(__global CLPixelType *im, __global float4 *filtered_im,
-                      const ChannelType channel, __constant float *filter,
-                      const unsigned int width, 
-                      const unsigned int imageColumns, const unsigned int imageRows,
-                      __local CLPixelType *temp, 
-                      const unsigned int offsetRows, const unsigned int section)
-  {
-    const int x = get_global_id(0);
-    const int y = get_global_id(1);
-
-    const int columns = imageColumns;
-
-    const unsigned int radius = (width-1)/2;
-    const int wsize = get_local_size(0);
-    const unsigned int loadSize = wsize+width;
-
-    //group coordinate
-    const int groupX=get_local_size(0)*get_group_id(0);
-    const int groupY=get_local_size(1)*get_group_id(1);
-
-    //offset the input data, assuming section is 0, 1 
-    im += imageColumns * (offsetRows - radius * section);
-
-    //parallel load and clamp
-    for (int i=get_local_id(0); i < loadSize; i=i+get_local_size(0))
-    {
-      //int cx = ClampToCanvas(groupX+i, columns);
-      temp[i] = im[y * columns + ClampToCanvas(i+groupX-radius, columns)];
-
-      /*if (0 && y==0 && get_group_id(1) == 0)
-      {
-        printf("(%d %d) temp %d load %d groupX %d\n", x, y, i, ClampToCanvas(groupX+i, columns), groupX);
-      }*/
-    }
-
-    // barrier
-    barrier(CLK_LOCAL_MEM_FENCE);
-
-    // only do the work if this is not a patched item
-    if (get_global_id(0) < columns)
-    {
-      // compute
-      float4 result = (float4) 0;
-
-      int i = 0;
-
-      \n #ifndef UFACTOR   \n
-      \n #define UFACTOR 8 \n
-      \n #endif                  \n
-
-      for ( ; i+UFACTOR < width; )
-      {
-        \n #pragma unroll UFACTOR\n
-        for (int j=0; j < UFACTOR; j++, i++)
-        {
-          result+=filter[i]*convert_float4(temp[i+get_local_id(0)]);
-        }
-      }
-
-      for ( ; i < width; i++)
-      {
-        result+=filter[i]*convert_float4(temp[i+get_local_id(0)]);
-      }
-
-      result.x = ClampToQuantum(result.x);
-      result.y = ClampToQuantum(result.y);
-      result.z = ClampToQuantum(result.z);
-      result.w = ClampToQuantum(result.w);
-
-      // write back to global
-      filtered_im[y*columns+x] = result;
-    }
-
-  }
-  )
-
-  STRINGIFY(
-  /*
-  Reduce image noise and reduce detail levels by line
-  im: input pixels filtered_in  filtered_im: output pixels
-  filter : convolve kernel  width: convolve kernel size
-  channel : define which channel is blured\
-  is_RGBA_BGRA : define the input is RGBA or BGRA
-  */
-  __kernel void BlurSectionColumn(const __global float4 *blurRowData, __global CLPixelType *filtered_im,
-                            const ChannelType channel, __constant float *filter,
-                            const unsigned int width, 
-                            const unsigned int imageColumns, const unsigned int imageRows,
-                            __local float4 *temp, 
-                            const unsigned int offsetRows, const unsigned int section)
-  {
-    const int x = get_global_id(0);
-    const int y = get_global_id(1);
-
-    //const int columns = get_global_size(0);
-    //const int rows = get_global_size(1);
-    const int columns = imageColumns;
-    const int rows = imageRows;
-
-    unsigned int radius = (width-1)/2;
-    const int wsize = get_local_size(1);
-    const unsigned int loadSize = wsize+width;
-
-    //group coordinate
-    const int groupX=get_local_size(0)*get_group_id(0);
-    const int groupY=get_local_size(1)*get_group_id(1);
-    //notice that get_local_size(0) is 1, so
-    //groupX=get_group_id(0);
-
-    // offset the input data
-    blurRowData += imageColumns * radius * section;
-
-    //parallel load and clamp
-    for (int i = get_local_id(1); i < loadSize; i=i+get_local_size(1))
-    {
-      int pos = ClampToCanvasWithHalo(i+groupY-radius, rows, radius, section) * columns + groupX;
-      temp[i] = *(blurRowData+pos);
-    }
-
-    // barrier
-    barrier(CLK_LOCAL_MEM_FENCE);
-
-    // only do the work if this is not a patched item
-    if (get_global_id(1) < rows)
-    {
-      // compute
-      float4 result = (float4) 0;
-
-      int i = 0;
-
-      \n #ifndef UFACTOR   \n
-      \n #define UFACTOR 8 \n
-      \n #endif                  \n
-
-      for ( ; i+UFACTOR < width; )
-      {
-        \n #pragma unroll UFACTOR \n
-        for (int j=0; j < UFACTOR; j++, i++)
-        {
-          result+=filter[i]*temp[i+get_local_id(1)];
-        }
-      }
-      for ( ; i < width; i++)
-      {
-        result+=filter[i]*temp[i+get_local_id(1)];
-      }
-
-      result.x = ClampToQuantum(result.x);
-      result.y = ClampToQuantum(result.y);
-      result.z = ClampToQuantum(result.z);
-      result.w = ClampToQuantum(result.w);
-
-      // offset the output data
-      filtered_im += imageColumns * offsetRows;
-
-      // write back to global
-      filtered_im[y*columns+x] = (CLPixelType) (result.x,result.y,result.z,result.w);
-    }
-
-  }
-  )
-
   STRINGIFY(
   /*
   Reduce image noise and reduce detail levels by row
@@ -3390,94 +3213,7 @@ STRINGIFY(
 
       }
     }
-
-    __kernel void UnsharpMaskBlurColumnSection(const __global CLPixelType* inputImage, 
-          const __global float4 *blurRowData, __global CLPixelType *filtered_im,
-          const unsigned int imageColumns, const unsigned int imageRows, 
-          __local float4* cachedData, __local float* cachedFilter,
-          const ChannelType channel, const __global float *filter, const unsigned int width, 
-          const float gain, const float threshold, 
-          const unsigned int offsetRows, const unsigned int section)
-    {
-      const unsigned int radius = (width-1)/2;
-
-      // cache the pixel shared by the workgroup
-      const int groupX = get_group_id(0);
-      const int groupStartY = get_group_id(1)*get_local_size(1) - radius;
-      const int groupStopY = (get_group_id(1)+1)*get_local_size(1) + radius;
-
-      // offset the input data
-      blurRowData += imageColumns * radius * section;
-
-      if (groupStartY >= 0
-          && groupStopY < imageRows) {
-        event_t e = async_work_group_strided_copy(cachedData
-                                                ,blurRowData+groupStartY*imageColumns+groupX
-                                                ,groupStopY-groupStartY,imageColumns,0);
-        wait_group_events(1,&e);
-      }
-      else {
-        for (int i = get_local_id(1); i < (groupStopY - groupStartY); i+=get_local_size(1)) {
-          int pos = ClampToCanvasWithHalo(groupStartY+i,imageRows, radius, section)*imageColumns+ groupX;
-          cachedData[i] = *(blurRowData + pos);
-        }
-        barrier(CLK_LOCAL_MEM_FENCE);
-      }
-      // cache the filter as well
-      event_t e = async_work_group_copy(cachedFilter,filter,width,0);
-      wait_group_events(1,&e);
-
-      // only do the work if this is not a patched item
-      //const int cy = get_group_id(1)*get_local_size(1)+get_local_id(1);
-      const int cy = get_global_id(1);
-
-      if (cy < imageRows) {
-        float4 blurredPixel = (float4) 0.0f;
-
-        int i = 0;
-
-        \n #ifndef UFACTOR   \n 
-          \n #define UFACTOR 8 \n 
-          \n #endif                  \n 
-
-          for ( ; i+UFACTOR < width; ) 
-          {
-            \n #pragma unroll UFACTOR \n
-              for (int j=0; j < UFACTOR; j++, i++)
-              {
-                blurredPixel+=cachedFilter[i]*cachedData[i+get_local_id(1)];
-              }
-          }
-
-        for ( ; i < width; i++)
-        {
-          blurredPixel+=cachedFilter[i]*cachedData[i+get_local_id(1)];
-        }
-
-        blurredPixel = floor((float4)(ClampToQuantum(blurredPixel.x), ClampToQuantum(blurredPixel.y)
-                                      ,ClampToQuantum(blurredPixel.z), ClampToQuantum(blurredPixel.w)));
-
-        // offset the output data
-        inputImage += imageColumns * offsetRows; 
-        filtered_im += imageColumns * offsetRows;
-
-        float4 inputImagePixel = convert_float4(inputImage[cy*imageColumns+groupX]);
-        float4 outputPixel = inputImagePixel - blurredPixel;
-
-        float quantumThreshold = QuantumRange*threshold;
-
-        int4 mask = isless(fabs(2.0f*outputPixel), (float4)quantumThreshold);
-        outputPixel = select(inputImagePixel + outputPixel * gain, inputImagePixel, mask);
-
-        //write back
-        filtered_im[cy*imageColumns+groupX] = (CLPixelType) (ClampToQuantum(outputPixel.x), ClampToQuantum(outputPixel.y)
-                                                            ,ClampToQuantum(outputPixel.z), ClampToQuantum(outputPixel.w));
-
-      }
-     
-    }
-    )
-
+  )
 
   STRINGIFY(
   __kernel void UnsharpMask(__global CLQuantum *image,const unsigned int number_channels,
index 4ce20b61b14982c2d1bf4e8f6937fbe3fb738264..eec096b53c921ac3fe504c9e2c842394106253e7 100644 (file)
@@ -257,27 +257,6 @@ inline static unsigned int padGlobalWorkgroupSizeToLocalWorkgroupSize(
   return ((orgGlobalSize+(localGroupSize-1))/localGroupSize*localGroupSize);
 }
 
-static MagickBooleanType splitImage(const Image* image)
-{
-  MagickBooleanType
-    split;
-
-  MagickCLEnv
-    clEnv;
-
-  unsigned long
-    allocSize,
-    tempSize;
-
-  clEnv=GetDefaultOpenCLEnv();
-
-  allocSize=GetOpenCLDeviceMaxMemAllocSize(clEnv);
-  tempSize=(unsigned long) (image->columns * image->rows * 4 * 4);
-
-  split = ((tempSize > allocSize) ? MagickTrue : MagickFalse);
-  return(split);
-}
-
 static cl_mem createBuffer(const Image *image,CacheView *image_view,
   MagickCLEnv clEnv,cl_context context,cl_mem_flags flags,void *pixels,
   ExceptionInfo *exception)
@@ -1054,55 +1033,139 @@ cleanup:
   return(filteredImage);
 }
 
-static Image* ComputeBlurImageSection(const Image* image,
+static Image* ComputeBlurImageSingle(const Image* image,
   const double radius,const double sigma,ExceptionInfo *exception)
 {
-  CacheView
-    *filteredImage_view,
-    *image_view;
+  return ComputeUnsharpMaskImageSingle(image,radius,sigma,0.0,0.0,1,exception);
+}
 
-  char
-    geometry[MagickPathExtent];
+MagickExport Image* AccelerateBlurImage(const Image *image,
+  const double radius,const double sigma,ExceptionInfo *exception)
+{
+  Image
+    *filteredImage;
 
-  cl_command_queue
-    queue;
+  assert(image != NULL);
+  assert(exception != (ExceptionInfo *) NULL);
+
+  if ((checkAccelerateConditionRGBA(image) == MagickFalse) ||
+      (checkOpenCLEnvironment(exception) == MagickFalse))
+    return NULL;
+
+  if (radius < 12.1)
+    filteredImage=ComputeBlurImageSingle(image,radius,sigma,exception);
+  else
+    filteredImage=ComputeBlurImage(image,radius,sigma,exception);
+  return(filteredImage);
+}
+
+/*
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%                                                                             %
+%                                                                             %
+%                                                                             %
+%     A c c e l e r a t e C o m p o s i t e I m a g e                         %
+%                                                                             %
+%                                                                             %
+%                                                                             %
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+*/
 
+static MagickBooleanType LaunchCompositeKernel(MagickCLEnv clEnv,
+  cl_command_queue queue,cl_mem imageBuffer,const unsigned int inputWidth,
+  const unsigned int inputHeight,const unsigned int matte,
+  const ChannelType channel,const CompositeOperator compose,
+  const cl_mem compositeImageBuffer,const unsigned int compositeWidth,
+  const unsigned int compositeHeight,const float destination_dissolve,
+  const float source_dissolve,ExceptionInfo *magick_unused(exception))
+{
   cl_int
     clStatus;
 
   cl_kernel
-    blurColumnKernel,
-    blurRowKernel;
+    compositeKernel;
 
   cl_event
     event;
 
-  cl_mem
-    imageBuffer,
-    tempImageBuffer,
-    filteredImageBuffer,
-    imageKernelBuffer;
+  int
+    k;
 
-  cl_mem_flags
-    mem_flags;
+  size_t
+    global_work_size[2],
+    local_work_size[2];
+
+  unsigned int
+    composeOp;
+
+  magick_unreferenced(exception);
+
+  compositeKernel = AcquireOpenCLKernel(clEnv, MAGICK_OPENCL_ACCELERATE,
+    "Composite");
+
+  k = 0;
+  clStatus=clEnv->library->clSetKernelArg(compositeKernel,k++,sizeof(cl_mem),(void*)&imageBuffer);
+  clStatus|=clEnv->library->clSetKernelArg(compositeKernel,k++,sizeof(unsigned int),(void*)&inputWidth);
+  clStatus|=clEnv->library->clSetKernelArg(compositeKernel,k++,sizeof(unsigned int),(void*)&inputHeight);
+  clStatus|=clEnv->library->clSetKernelArg(compositeKernel,k++,sizeof(cl_mem),(void*)&compositeImageBuffer);
+  clStatus|=clEnv->library->clSetKernelArg(compositeKernel,k++,sizeof(unsigned int),(void*)&compositeWidth);
+  clStatus|=clEnv->library->clSetKernelArg(compositeKernel,k++,sizeof(unsigned int),(void*)&compositeHeight);
+  composeOp = (unsigned int)compose;
+  clStatus|=clEnv->library->clSetKernelArg(compositeKernel,k++,sizeof(unsigned int),(void*)&composeOp);
+  clStatus|=clEnv->library->clSetKernelArg(compositeKernel,k++,sizeof(ChannelType),(void*)&channel);
+  clStatus|=clEnv->library->clSetKernelArg(compositeKernel,k++,sizeof(unsigned int),(void*)&matte);
+  clStatus|=clEnv->library->clSetKernelArg(compositeKernel,k++,sizeof(float),(void*)&destination_dissolve);
+  clStatus|=clEnv->library->clSetKernelArg(compositeKernel,k++,sizeof(float),(void*)&source_dissolve);
+
+  if (clStatus!=CL_SUCCESS)
+    return MagickFalse;
+
+  local_work_size[0] = 64;
+  local_work_size[1] = 1;
+
+  global_work_size[0] = padGlobalWorkgroupSizeToLocalWorkgroupSize(inputWidth,
+    (unsigned int) local_work_size[0]);
+  global_work_size[1] = inputHeight;
+  clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, compositeKernel, 2, NULL, 
+         global_work_size, local_work_size, 0, NULL, &event);
+
+  RecordProfileData(clEnv,CompositeKernel,event);
+  clEnv->library->clReleaseEvent(event);
+
+  RelinquishOpenCLKernel(clEnv, compositeKernel);
+
+  return((clStatus==CL_SUCCESS) ? MagickTrue : MagickFalse);
+}
+
+static MagickBooleanType ComputeCompositeImage(Image *image,
+  const CompositeOperator compose,const Image *compositeImage,
+  const float destination_dissolve,const float source_dissolve,ExceptionInfo *exception)
+{
+  CacheView
+    *image_view;
+
+  cl_command_queue
+    queue;
 
   cl_context
     context;
-  
-  const void
-    *inputPixels;
 
-  float
-    *kernelBufferPtr;
+  cl_int
+    clStatus;
 
-  Image
-    *filteredImage;
+  cl_mem_flags
+    mem_flags;
 
-  KernelInfo
-    *kernel;
+  cl_mem
+    compositeImageBuffer,
+    imageBuffer;
+
+  const void
+    *composePixels;
 
   MagickBooleanType
-    outputReady;
+    outputReady,
+    status;
 
   MagickCLEnv
     clEnv;
@@ -1110,357 +1173,154 @@ static Image* ComputeBlurImageSection(const Image* image,
   MagickSizeType
     length;
 
-  unsigned int
-    i,
-    imageColumns,
-    imageRows,
-    kernelWidth;
-
   void
-    *filteredPixels,
-    *hostPtr;
-
-  context = NULL;
-  filteredImage = NULL;
-  filteredImage_view = NULL;
-  imageBuffer = NULL;
-  tempImageBuffer = NULL;
-  filteredImageBuffer = NULL;
-  imageKernelBuffer = NULL;
-  blurRowKernel = NULL;
-  blurColumnKernel = NULL;
-  queue = NULL;
-  kernel = NULL;
+    *inputPixels;
 
+  status = MagickFalse;
   outputReady = MagickFalse;
+  composePixels = NULL;
+  imageBuffer = NULL;
+  compositeImageBuffer = NULL;
 
   clEnv = GetDefaultOpenCLEnv();
   context = GetOpenCLContext(clEnv);
   queue = AcquireOpenCLCommandQueue(clEnv);
 
   /* Create and initialize OpenCL buffers. */
+  image_view=AcquireAuthenticCacheView(image,exception);
+  inputPixels=GetCacheViewAuthenticPixels(image_view,0,0,image->columns,image->rows,exception);
+  if (inputPixels == (void *) NULL)
   {
-    image_view=AcquireVirtualCacheView(image,exception);
-    inputPixels=GetCacheViewVirtualPixels(image_view,0,0,image->columns,image->rows,exception);
-    if (inputPixels == (const void *) NULL)
-    {
-      (void) OpenCLThrowMagickException(exception,GetMagickModule(),CacheWarning,"UnableToReadPixelCache.","`%s'",image->filename);
-      goto cleanup;
-    }
-    /* If the host pointer is aligned to the size of CLPixelPacket, 
+    (void) OpenCLThrowMagickException(exception,GetMagickModule(),CacheWarning,
+      "UnableToReadPixelCache.","`%s'",image->filename);
+    goto cleanup;
+  }
+
+  /* If the host pointer is aligned to the size of CLPixelPacket, 
      then use the host buffer directly from the GPU; otherwise, 
      create a buffer on the GPU and copy the data over */
-    if (ALIGNED(inputPixels,CLPixelPacket)) 
-    {
-      mem_flags = CL_MEM_READ_ONLY|CL_MEM_USE_HOST_PTR;
-    }
-    else 
-    {
-      mem_flags = CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR;
-    }
-    /* create a CL buffer from image pixel buffer */
-    length = image->columns * image->rows;
-    imageBuffer = clEnv->library->clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus);
-    if (clStatus != CL_SUCCESS)
-    {
-      (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
-      goto cleanup;
-    }
+  if (ALIGNED(inputPixels,CLPixelPacket)) 
+  {
+    mem_flags = CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR;
   }
-
-  /* create output */
+  else 
   {
-    filteredImage = CloneImage(image,image->columns,image->rows,MagickTrue,exception);
-    assert(filteredImage != NULL);
-    if (SetImageStorageClass(filteredImage,DirectClass,exception) != MagickTrue)
-    {
-      (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "CloneImage failed.", "'%s'", ".");
-      goto cleanup;
-    }
-    filteredImage_view=AcquireAuthenticCacheView(filteredImage,exception);
-    filteredPixels=GetCacheViewAuthenticPixels(filteredImage_view,0,0,filteredImage->columns,filteredImage->rows,exception);
-    if (filteredPixels == (void *) NULL)
-    {
-      (void) OpenCLThrowMagickException(exception,GetMagickModule(),CacheWarning, "UnableToReadPixelCache.","`%s'",filteredImage->filename);
-      goto cleanup;
-    }
-
-    if (ALIGNED(filteredPixels,CLPixelPacket)) 
-    {
-      mem_flags = CL_MEM_WRITE_ONLY|CL_MEM_USE_HOST_PTR;
-      hostPtr = filteredPixels;
-    }
-    else 
-    {
-      mem_flags = CL_MEM_WRITE_ONLY;
-      hostPtr = NULL;
-    }
-    /* create a CL buffer from image pixel buffer */
-    length = image->columns * image->rows;
-    filteredImageBuffer = clEnv->library->clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), hostPtr, &clStatus);
-    if (clStatus != CL_SUCCESS)
-    {
-      (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
-      goto cleanup;
-    }
+    mem_flags = CL_MEM_READ_WRITE|CL_MEM_COPY_HOST_PTR;
   }
-
-  /* create processing kernel */
+  /* create a CL buffer from image pixel buffer */
+  length = image->columns * image->rows;
+  imageBuffer = clEnv->library->clCreateBuffer(context, mem_flags, 
+    length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus);
+  if (clStatus != CL_SUCCESS)
   {
-    (void) FormatLocaleString(geometry,MagickPathExtent,"blur:%.20gx%.20g;blur:%.20gx%.20g+90",radius,sigma,radius,sigma);
-    kernel=AcquireKernelInfo(geometry,exception);
-    if (kernel == (KernelInfo *) NULL)
-    {
-      (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "MemoryAllocationFailed.",".");
-      goto cleanup;
-    }
-
-    imageKernelBuffer = clEnv->library->clCreateBuffer(context, CL_MEM_READ_ONLY|CL_MEM_ALLOC_HOST_PTR, kernel->width * sizeof(float), NULL, &clStatus);
-    if (clStatus != CL_SUCCESS)
-    {
-      (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
-      goto cleanup;
-    }
-    kernelBufferPtr = (float*)clEnv->library->clEnqueueMapBuffer(queue, imageKernelBuffer, CL_TRUE, CL_MAP_WRITE, 0, kernel->width * sizeof(float), 0, NULL, NULL, &clStatus);
-    if (clStatus != CL_SUCCESS)
-    {
-      (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueMapBuffer failed.",".");
-      goto cleanup;
-    }
-
-    for (i = 0; i < kernel->width; i++)
-    {
-      kernelBufferPtr[i] = (float) kernel->values[i];
-    }
-
-    clStatus = clEnv->library->clEnqueueUnmapMemObject(queue, imageKernelBuffer, kernelBufferPtr, 0, NULL, NULL);
-    if (clStatus != CL_SUCCESS)
-    {
-      (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueUnmapMemObject failed.", "'%s'", ".");
-      goto cleanup;
-    }
+    (void) OpenCLThrowMagickException(exception, GetMagickModule(), 
+      ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
+    goto cleanup;
   }
 
-  {
-    unsigned int offsetRows;
-    unsigned int sec;
-
-    /* create temp buffer */
-    {
-      length = image->columns * (image->rows / 2 + 1 + (kernel->width-1) / 2);
-      tempImageBuffer = clEnv->library->clCreateBuffer(context, CL_MEM_READ_WRITE, length * 4 * sizeof(float), NULL, &clStatus);
-      if (clStatus != CL_SUCCESS)
-      {
-        (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
-        goto cleanup;
-      }
-    }
-
-    /* get the OpenCL kernels */
-    {
-      blurRowKernel = AcquireOpenCLKernel(clEnv, MAGICK_OPENCL_ACCELERATE, "BlurSectionRow");
-      if (blurRowKernel == NULL)
-      {
-        (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "AcquireOpenCLKernel failed.", "'%s'", ".");
-        goto cleanup;
-      };
-
-      blurColumnKernel = AcquireOpenCLKernel(clEnv, MAGICK_OPENCL_ACCELERATE, "BlurSectionColumn");
-      if (blurColumnKernel == NULL)
-      {
-        (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "AcquireOpenCLKernel failed.", "'%s'", ".");
-        goto cleanup;
-      };
-    }
-
-    for (sec = 0; sec < 2; sec++)
-    {
-      {
-        /* need logic to decide this value */
-        int chunkSize = 256;
-
-        {
-          imageColumns = (unsigned int) image->columns;
-          if (sec == 0)
-            imageRows = (unsigned int) (image->rows / 2 + (kernel->width-1) / 2);
-          else
-            imageRows = (unsigned int) ((image->rows - image->rows / 2) + (kernel->width-1) / 2);
-
-          offsetRows = (unsigned int) (sec * image->rows / 2);
-
-          kernelWidth = (unsigned int) kernel->width;
-
-          /* set the kernel arguments */
-          i = 0;
-          clStatus=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(cl_mem),(void *)&imageBuffer);
-          clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(cl_mem),(void *)&tempImageBuffer);
-          clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(ChannelType),&image->channel_mask);
-          clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(cl_mem),(void *)&imageKernelBuffer);
-          clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(unsigned int),(void *)&kernelWidth);
-          clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(unsigned int),(void *)&imageColumns);
-          clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(unsigned int),(void *)&imageRows);
-          clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(CLPixelPacket)*(chunkSize+kernel->width),(void *) NULL);
-          clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(unsigned int),(void *)&offsetRows);
-          clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(unsigned int),(void *)&sec);
-          if (clStatus != CL_SUCCESS)
-          {
-            (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", "'%s'", ".");
-            goto cleanup;
-          }
-        }
-
-        /* launch the kernel */
-        {
-          size_t gsize[2];
-          size_t wsize[2];
-
-          gsize[0] = chunkSize*((imageColumns+chunkSize-1)/chunkSize);
-          gsize[1] = imageRows;
-          wsize[0] = chunkSize;
-          wsize[1] = 1;
-
-                 clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, blurRowKernel, 2, NULL, gsize, wsize, 0, NULL, &event);
-          if (clStatus != CL_SUCCESS)
-          {
-            (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", "'%s'", ".");
-            goto cleanup;
-          }
-          clEnv->library->clFlush(queue);
-          RecordProfileData(clEnv,BlurRowKernel,event);
-          clEnv->library->clReleaseEvent(event);
-        }
-      }
-
-      {
-        /* need logic to decide this value */
-        int chunkSize = 256;
-
-        {
-          imageColumns = (unsigned int) image->columns;
-          if (sec == 0)
-            imageRows = (unsigned int) (image->rows / 2);
-          else
-            imageRows = (unsigned int) ((image->rows - image->rows / 2));
-
-          offsetRows = (unsigned int) (sec * image->rows / 2);
-
-          kernelWidth = (unsigned int) kernel->width;
-
-          /* set the kernel arguments */
-          i = 0;
-          clStatus=clEnv->library->clSetKernelArg(blurColumnKernel,i++,sizeof(cl_mem),(void *)&tempImageBuffer);
-          clStatus|=clEnv->library->clSetKernelArg(blurColumnKernel,i++,sizeof(cl_mem),(void *)&filteredImageBuffer);
-          clStatus|=clEnv->library->clSetKernelArg(blurColumnKernel,i++,sizeof(ChannelType),&image->channel_mask);
-          clStatus|=clEnv->library->clSetKernelArg(blurColumnKernel,i++,sizeof(cl_mem),(void *)&imageKernelBuffer);
-          clStatus|=clEnv->library->clSetKernelArg(blurColumnKernel,i++,sizeof(unsigned int),(void *)&kernelWidth);
-          clStatus|=clEnv->library->clSetKernelArg(blurColumnKernel,i++,sizeof(unsigned int),(void *)&imageColumns);
-          clStatus|=clEnv->library->clSetKernelArg(blurColumnKernel,i++,sizeof(unsigned int),(void *)&imageRows);
-          clStatus|=clEnv->library->clSetKernelArg(blurColumnKernel,i++,sizeof(cl_float4)*(chunkSize+kernel->width),(void *) NULL);
-          clStatus|=clEnv->library->clSetKernelArg(blurColumnKernel,i++,sizeof(unsigned int),(void *)&offsetRows);
-          clStatus|=clEnv->library->clSetKernelArg(blurColumnKernel,i++,sizeof(unsigned int),(void *)&sec);
-          if (clStatus != CL_SUCCESS)
-          {
-            (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", "'%s'", ".");
-            goto cleanup;
-          }
-        }
-
-        /* launch the kernel */
-        {
-          size_t gsize[2];
-          size_t wsize[2];
-
-          gsize[0] = imageColumns;
-          gsize[1] = chunkSize*((imageRows+chunkSize-1)/chunkSize);
-          wsize[0] = 1;
-          wsize[1] = chunkSize;
-
-                 clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, blurColumnKernel, 2, NULL, gsize, wsize, 0, NULL, &event);
-          if (clStatus != CL_SUCCESS)
-          {
-            (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", "'%s'", ".");
-            goto cleanup;
-          }
-          clEnv->library->clFlush(queue);
-          RecordProfileData(clEnv,BlurColumnKernel,event);
-          clEnv->library->clReleaseEvent(event);
-        }
-      }
-    }
 
+  /* Create and initialize OpenCL buffers. */
+  composePixels = AcquirePixelCachePixels(compositeImage, &length, exception); 
+  if (composePixels == (void *) NULL)
+  {
+    (void) OpenCLThrowMagickException(exception,GetMagickModule(),CacheWarning,
+      "UnableToReadPixelCache.","`%s'",compositeImage->filename);
+    goto cleanup;
   }
 
-  /* get result */
-  if (ALIGNED(filteredPixels,CLPixelPacket)) 
+  /* If the host pointer is aligned to the size of CLPixelPacket, 
+     then use the host buffer directly from the GPU; otherwise, 
+     create a buffer on the GPU and copy the data over */
+  if (ALIGNED(composePixels,CLPixelPacket)) 
   {
-    length = image->columns * image->rows;
-    clEnv->library->clEnqueueMapBuffer(queue, filteredImageBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(CLPixelPacket), 0, NULL, NULL, &clStatus);
+    mem_flags = CL_MEM_READ_ONLY|CL_MEM_USE_HOST_PTR;
   }
   else 
   {
-    length = image->columns * image->rows;
-    clStatus = clEnv->library->clEnqueueReadBuffer(queue, filteredImageBuffer, CL_TRUE, 0, length * sizeof(CLPixelPacket), filteredPixels, 0, NULL, NULL);
+    mem_flags = CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR;
   }
+  /* create a CL buffer from image pixel buffer */
+  length = compositeImage->columns * compositeImage->rows;
+  compositeImageBuffer = clEnv->library->clCreateBuffer(context, mem_flags, 
+    length * sizeof(CLPixelPacket), (void*)composePixels, &clStatus);
   if (clStatus != CL_SUCCESS)
   {
-    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "Reading output image from CL buffer failed.", "'%s'", ".");
+    (void) OpenCLThrowMagickException(exception, GetMagickModule(), 
+      ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
     goto cleanup;
   }
+  
+  status = LaunchCompositeKernel(clEnv,queue,imageBuffer,
+           (unsigned int) image->columns,
+           (unsigned int) image->rows,
+           (unsigned int) (image->alpha_trait > CopyPixelTrait) ? 1 : 0,
+           image->channel_mask, compose, compositeImageBuffer,
+           (unsigned int) compositeImage->columns,
+           (unsigned int) compositeImage->rows,
+           destination_dissolve,source_dissolve,
+           exception);
 
-  outputReady=SyncCacheViewAuthenticPixels(filteredImage_view,exception);
+  if (status==MagickFalse)
+    goto cleanup;
+
+  length = image->columns * image->rows;
+  if (ALIGNED(inputPixels,CLPixelPacket)) 
+  {
+    clEnv->library->clEnqueueMapBuffer(queue, imageBuffer, CL_TRUE, 
+      CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(CLPixelPacket), 0, NULL, 
+      NULL, &clStatus);
+  }
+  else
+  {
+    clStatus = clEnv->library->clEnqueueReadBuffer(queue, imageBuffer, CL_TRUE, 0, 
+      length * sizeof(CLPixelPacket), inputPixels, 0, NULL, NULL);
+  }
+  if (clStatus==CL_SUCCESS)
+    outputReady=SyncCacheViewAuthenticPixels(image_view,exception);
 
 cleanup:
-  OpenCLLogException(__FUNCTION__,__LINE__,exception);
 
   image_view=DestroyCacheView(image_view);
-  if (filteredImage_view != NULL)
-    filteredImage_view=DestroyCacheView(filteredImage_view);
-
-  if (imageBuffer!=NULL)     clEnv->library->clReleaseMemObject(imageBuffer);
-  if (tempImageBuffer!=NULL)      clEnv->library->clReleaseMemObject(tempImageBuffer);
-  if (filteredImageBuffer!=NULL)  clEnv->library->clReleaseMemObject(filteredImageBuffer);
-  if (imageKernelBuffer!=NULL)    clEnv->library->clReleaseMemObject(imageKernelBuffer);
-  if (blurRowKernel!=NULL)        RelinquishOpenCLKernel(clEnv, blurRowKernel);
-  if (blurColumnKernel!=NULL)     RelinquishOpenCLKernel(clEnv, blurColumnKernel);
-  if (queue != NULL)              RelinquishOpenCLCommandQueue(clEnv, queue);
-  if (kernel!=NULL)               DestroyKernelInfo(kernel);
-  if (outputReady == MagickFalse)
-  {
-    if (filteredImage != NULL)
-    {
-      DestroyImage(filteredImage);
-      filteredImage = NULL;
-    }
-  }
-  return filteredImage;
-}
+  if (imageBuffer!=NULL)      clEnv->library->clReleaseMemObject(imageBuffer);
+  if (compositeImageBuffer!=NULL)  clEnv->library->clReleaseMemObject(compositeImageBuffer);
+  if (queue != NULL)               RelinquishOpenCLCommandQueue(clEnv, queue);
 
-static Image* ComputeBlurImageSingle(const Image* image,
-  const double radius,const double sigma,ExceptionInfo *exception)
-{
-  return ComputeUnsharpMaskImageSingle(image,radius,sigma,0.0,0.0,1,exception);
+  return(outputReady);
 }
 
-MagickExport Image* AccelerateBlurImage(const Image *image,
-  const double radius,const double sigma,ExceptionInfo *exception)
+MagickExport MagickBooleanType AccelerateCompositeImage(Image *image,
+  const CompositeOperator compose,const Image *composite,
+  const float destination_dissolve,const float source_dissolve,
+  ExceptionInfo *exception)
 {
-  Image
-    *filteredImage;
+  MagickBooleanType
+    status;
 
   assert(image != NULL);
   assert(exception != (ExceptionInfo *) NULL);
 
   if ((checkAccelerateConditionRGBA(image) == MagickFalse) ||
       (checkOpenCLEnvironment(exception) == MagickFalse))
-    return NULL;
+    return(MagickFalse);
 
-  if (radius < 12.1)
-    filteredImage=ComputeBlurImageSingle(image,radius,sigma,exception);
-  else if (splitImage(image) && (image->rows / 2 > radius)) 
-    filteredImage=ComputeBlurImageSection(image,radius,sigma,exception);
-  else
-    filteredImage=ComputeBlurImage(image,radius,sigma,exception);
-  return(filteredImage);
+  /* only support images with the size for now */
+  if ((image->columns != composite->columns) ||
+      (image->rows != composite->rows))
+    return MagickFalse;
+
+  switch(compose)
+  {
+    case ColorDodgeCompositeOp: 
+    case BlendCompositeOp:
+      break;
+    default:
+      // unsupported compose operator, quit
+      return MagickFalse;
+  };
+
+  status=ComputeCompositeImage(image,compose,composite,destination_dissolve,
+    source_dissolve,exception);
+  return(status);
 }
 
 /*
@@ -1468,135 +1328,76 @@ MagickExport Image* AccelerateBlurImage(const Image *image,
 %                                                                             %
 %                                                                             %
 %                                                                             %
-%     A c c e l e r a t e C o m p o s i t e I m a g e                         %
+%     A c c e l e r a t e C o n t r a s t I m a g e                           %
 %                                                                             %
 %                                                                             %
 %                                                                             %
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 */
 
-static MagickBooleanType LaunchCompositeKernel(MagickCLEnv clEnv,
-  cl_command_queue queue,cl_mem imageBuffer,const unsigned int inputWidth,
-  const unsigned int inputHeight,const unsigned int matte,
-  const ChannelType channel,const CompositeOperator compose,
-  const cl_mem compositeImageBuffer,const unsigned int compositeWidth,
-  const unsigned int compositeHeight,const float destination_dissolve,
-  const float source_dissolve,ExceptionInfo *magick_unused(exception))
+static MagickBooleanType ComputeContrastImage(Image *image,
+  const MagickBooleanType sharpen,ExceptionInfo *exception)
 {
+  CacheView
+    *image_view;
+
+  cl_command_queue
+    queue;
+
+  cl_context
+    context;
+
   cl_int
     clStatus;
 
   cl_kernel
-    compositeKernel;
+    filterKernel;
 
   cl_event
     event;
 
-  int
-    k;
+  cl_mem
+    imageBuffer;
+
+  cl_mem_flags
+    mem_flags;
+
+  MagickBooleanType
+    outputReady;
+
+  MagickCLEnv
+    clEnv;
+
+  MagickSizeType
+    length;
 
   size_t
-    global_work_size[2],
-    local_work_size[2];
+    global_work_size[2];
 
   unsigned int
-    composeOp;
+    i,
+    uSharpen;
 
-  magick_unreferenced(exception);
+  void
+    *inputPixels;
 
-  compositeKernel = AcquireOpenCLKernel(clEnv, MAGICK_OPENCL_ACCELERATE,
-    "Composite");
-
-  k = 0;
-  clStatus=clEnv->library->clSetKernelArg(compositeKernel,k++,sizeof(cl_mem),(void*)&imageBuffer);
-  clStatus|=clEnv->library->clSetKernelArg(compositeKernel,k++,sizeof(unsigned int),(void*)&inputWidth);
-  clStatus|=clEnv->library->clSetKernelArg(compositeKernel,k++,sizeof(unsigned int),(void*)&inputHeight);
-  clStatus|=clEnv->library->clSetKernelArg(compositeKernel,k++,sizeof(cl_mem),(void*)&compositeImageBuffer);
-  clStatus|=clEnv->library->clSetKernelArg(compositeKernel,k++,sizeof(unsigned int),(void*)&compositeWidth);
-  clStatus|=clEnv->library->clSetKernelArg(compositeKernel,k++,sizeof(unsigned int),(void*)&compositeHeight);
-  composeOp = (unsigned int)compose;
-  clStatus|=clEnv->library->clSetKernelArg(compositeKernel,k++,sizeof(unsigned int),(void*)&composeOp);
-  clStatus|=clEnv->library->clSetKernelArg(compositeKernel,k++,sizeof(ChannelType),(void*)&channel);
-  clStatus|=clEnv->library->clSetKernelArg(compositeKernel,k++,sizeof(unsigned int),(void*)&matte);
-  clStatus|=clEnv->library->clSetKernelArg(compositeKernel,k++,sizeof(float),(void*)&destination_dissolve);
-  clStatus|=clEnv->library->clSetKernelArg(compositeKernel,k++,sizeof(float),(void*)&source_dissolve);
-
-  if (clStatus!=CL_SUCCESS)
-    return MagickFalse;
-
-  local_work_size[0] = 64;
-  local_work_size[1] = 1;
-
-  global_work_size[0] = padGlobalWorkgroupSizeToLocalWorkgroupSize(inputWidth,
-    (unsigned int) local_work_size[0]);
-  global_work_size[1] = inputHeight;
-  clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, compositeKernel, 2, NULL, 
-         global_work_size, local_work_size, 0, NULL, &event);
-
-  RecordProfileData(clEnv,CompositeKernel,event);
-  clEnv->library->clReleaseEvent(event);
-
-  RelinquishOpenCLKernel(clEnv, compositeKernel);
-
-  return((clStatus==CL_SUCCESS) ? MagickTrue : MagickFalse);
-}
-
-static MagickBooleanType ComputeCompositeImage(Image *image,
-  const CompositeOperator compose,const Image *compositeImage,
-  const float destination_dissolve,const float source_dissolve,ExceptionInfo *exception)
-{
-  CacheView
-    *image_view;
-
-  cl_command_queue
-    queue;
-
-  cl_context
-    context;
-
-  cl_int
-    clStatus;
-
-  cl_mem_flags
-    mem_flags;
-
-  cl_mem
-    compositeImageBuffer,
-    imageBuffer;
-
-  const void
-    *composePixels;
-
-  MagickBooleanType
-    outputReady,
-    status;
-
-  MagickCLEnv
-    clEnv;
-
-  MagickSizeType
-    length;
-
-  void
-    *inputPixels;
-
-  status = MagickFalse;
-  outputReady = MagickFalse;
-  composePixels = NULL;
-  imageBuffer = NULL;
-  compositeImageBuffer = NULL;
+  outputReady = MagickFalse;
+  clEnv = NULL;
+  inputPixels = NULL;
+  context = NULL;
+  imageBuffer = NULL;
+  filterKernel = NULL;
+  queue = NULL;
 
   clEnv = GetDefaultOpenCLEnv();
   context = GetOpenCLContext(clEnv);
-  queue = AcquireOpenCLCommandQueue(clEnv);
 
   /* Create and initialize OpenCL buffers. */
   image_view=AcquireAuthenticCacheView(image,exception);
   inputPixels=GetCacheViewAuthenticPixels(image_view,0,0,image->columns,image->rows,exception);
   if (inputPixels == (void *) NULL)
   {
-    (void) OpenCLThrowMagickException(exception,GetMagickModule(),CacheWarning,
-      "UnableToReadPixelCache.","`%s'",image->filename);
+    (void) OpenCLThrowMagickException(exception,GetMagickModule(),CacheWarning,"UnableToReadPixelCache.","`%s'",image->filename);
     goto cleanup;
   }
 
@@ -1613,89 +1414,75 @@ static MagickBooleanType ComputeCompositeImage(Image *image,
   }
   /* create a CL buffer from image pixel buffer */
   length = image->columns * image->rows;
-  imageBuffer = clEnv->library->clCreateBuffer(context, mem_flags, 
-    length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus);
+  imageBuffer = clEnv->library->clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus);
   if (clStatus != CL_SUCCESS)
   {
-    (void) OpenCLThrowMagickException(exception, GetMagickModule(), 
-      ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
+    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
     goto cleanup;
   }
-
-
-  /* Create and initialize OpenCL buffers. */
-  composePixels = AcquirePixelCachePixels(compositeImage, &length, exception); 
-  if (composePixels == (void *) NULL)
+  
+  filterKernel = AcquireOpenCLKernel(clEnv, MAGICK_OPENCL_ACCELERATE, "Contrast");
+  if (filterKernel == NULL)
   {
-    (void) OpenCLThrowMagickException(exception,GetMagickModule(),CacheWarning,
-      "UnableToReadPixelCache.","`%s'",compositeImage->filename);
+    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "AcquireOpenCLKernel failed.", "'%s'", ".");
     goto cleanup;
   }
 
-  /* If the host pointer is aligned to the size of CLPixelPacket, 
-     then use the host buffer directly from the GPU; otherwise, 
-     create a buffer on the GPU and copy the data over */
-  if (ALIGNED(composePixels,CLPixelPacket)) 
-  {
-    mem_flags = CL_MEM_READ_ONLY|CL_MEM_USE_HOST_PTR;
-  }
-  else 
-  {
-    mem_flags = CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR;
-  }
-  /* create a CL buffer from image pixel buffer */
-  length = compositeImage->columns * compositeImage->rows;
-  compositeImageBuffer = clEnv->library->clCreateBuffer(context, mem_flags, 
-    length * sizeof(CLPixelPacket), (void*)composePixels, &clStatus);
+  i = 0;
+  clStatus=clEnv->library->clSetKernelArg(filterKernel,i++,sizeof(cl_mem),(void *)&imageBuffer);
+
+  uSharpen = (sharpen == MagickFalse)?0:1;
+  clStatus|=clEnv->library->clSetKernelArg(filterKernel,i++,sizeof(cl_uint),&uSharpen);
   if (clStatus != CL_SUCCESS)
   {
-    (void) OpenCLThrowMagickException(exception, GetMagickModule(), 
-      ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
+    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", "'%s'", ".");
     goto cleanup;
   }
-  
-  status = LaunchCompositeKernel(clEnv,queue,imageBuffer,
-           (unsigned int) image->columns,
-           (unsigned int) image->rows,
-           (unsigned int) (image->alpha_trait > CopyPixelTrait) ? 1 : 0,
-           image->channel_mask, compose, compositeImageBuffer,
-           (unsigned int) compositeImage->columns,
-           (unsigned int) compositeImage->rows,
-           destination_dissolve,source_dissolve,
-           exception);
 
-  if (status==MagickFalse)
+  global_work_size[0] = image->columns;
+  global_work_size[1] = image->rows;
+  /* launch the kernel */
+  queue = AcquireOpenCLCommandQueue(clEnv);
+  clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, filterKernel, 2, NULL, global_work_size, NULL, 0, NULL, &event);
+  if (clStatus != CL_SUCCESS)
+  {
+    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", "'%s'", ".");
     goto cleanup;
+  }
+  clEnv->library->clFlush(queue);
+  RecordProfileData(clEnv,ContrastKernel,event);
+  clEnv->library->clReleaseEvent(event);
 
-  length = image->columns * image->rows;
   if (ALIGNED(inputPixels,CLPixelPacket)) 
   {
-    clEnv->library->clEnqueueMapBuffer(queue, imageBuffer, CL_TRUE, 
-      CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(CLPixelPacket), 0, NULL, 
-      NULL, &clStatus);
+    length = image->columns * image->rows;
+    clEnv->library->clEnqueueMapBuffer(queue, imageBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(CLPixelPacket), 0, NULL, NULL, &clStatus);
   }
-  else
+  else 
   {
-    clStatus = clEnv->library->clEnqueueReadBuffer(queue, imageBuffer, CL_TRUE, 0, 
-      length * sizeof(CLPixelPacket), inputPixels, 0, NULL, NULL);
+    length = image->columns * image->rows;
+    clStatus = clEnv->library->clEnqueueReadBuffer(queue, imageBuffer, CL_TRUE, 0, length * sizeof(CLPixelPacket), inputPixels, 0, NULL, NULL);
   }
-  if (clStatus==CL_SUCCESS)
-    outputReady=SyncCacheViewAuthenticPixels(image_view,exception);
+  if (clStatus != CL_SUCCESS)
+  {
+    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "Reading output image from CL buffer failed.", "'%s'", ".");
+    goto cleanup;
+  }
+  outputReady=SyncCacheViewAuthenticPixels(image_view,exception);
 
 cleanup:
+  OpenCLLogException(__FUNCTION__,__LINE__,exception);
 
   image_view=DestroyCacheView(image_view);
-  if (imageBuffer!=NULL)      clEnv->library->clReleaseMemObject(imageBuffer);
-  if (compositeImageBuffer!=NULL)  clEnv->library->clReleaseMemObject(compositeImageBuffer);
-  if (queue != NULL)               RelinquishOpenCLCommandQueue(clEnv, queue);
 
+  if (imageBuffer!=NULL)                     clEnv->library->clReleaseMemObject(imageBuffer);
+  if (filterKernel!=NULL)                     RelinquishOpenCLKernel(clEnv, filterKernel);
+  if (queue != NULL)                          RelinquishOpenCLCommandQueue(clEnv, queue);
   return(outputReady);
 }
 
-MagickExport MagickBooleanType AccelerateCompositeImage(Image *image,
-  const CompositeOperator compose,const Image *composite,
-  const float destination_dissolve,const float source_dissolve,
-  ExceptionInfo *exception)
+MagickExport MagickBooleanType AccelerateContrastImage(Image *image,
+  const MagickBooleanType sharpen,ExceptionInfo *exception)
 {
   MagickBooleanType
     status;
@@ -1707,23 +1494,7 @@ MagickExport MagickBooleanType AccelerateCompositeImage(Image *image,
       (checkOpenCLEnvironment(exception) == MagickFalse))
     return(MagickFalse);
 
-  /* only support images with the size for now */
-  if ((image->columns != composite->columns) ||
-      (image->rows != composite->rows))
-    return MagickFalse;
-
-  switch(compose)
-  {
-    case ColorDodgeCompositeOp: 
-    case BlendCompositeOp:
-      break;
-    default:
-      // unsupported compose operator, quit
-      return MagickFalse;
-  };
-
-  status=ComputeCompositeImage(image,compose,composite,destination_dissolve,
-    source_dissolve,exception);
+  status=ComputeContrastImage(image,sharpen,exception);
   return(status);
 }
 
@@ -1732,223 +1503,48 @@ MagickExport MagickBooleanType AccelerateCompositeImage(Image *image,
 %                                                                             %
 %                                                                             %
 %                                                                             %
-%     A c c e l e r a t e C o n t r a s t I m a g e                           %
+%     A c c e l e r a t e C o n t r a s t S t r e t c h I m a g e             %
 %                                                                             %
 %                                                                             %
 %                                                                             %
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 */
 
-static MagickBooleanType ComputeContrastImage(Image *image,
-  const MagickBooleanType sharpen,ExceptionInfo *exception)
+static MagickBooleanType LaunchHistogramKernel(MagickCLEnv clEnv,
+  cl_command_queue queue,cl_mem imageBuffer,cl_mem histogramBuffer,
+  Image *image,const ChannelType channel,ExceptionInfo *exception)
 {
-  CacheView
-    *image_view;
-
-  cl_command_queue
-    queue;
-
-  cl_context
-    context;
+  MagickBooleanType
+    outputReady;
 
   cl_int
     clStatus;
 
   cl_kernel
-    filterKernel;
+    histogramKernel; 
 
   cl_event
     event;
 
-  cl_mem
-    imageBuffer;
-
-  cl_mem_flags
-    mem_flags;
-
-  MagickBooleanType
-    outputReady;
-
-  MagickCLEnv
-    clEnv;
+  cl_uint
+    colorspace,
+    method;
 
-  MagickSizeType
-    length;
+  register ssize_t
+    i;
 
   size_t
     global_work_size[2];
 
-  unsigned int
-    i,
-    uSharpen;
-
-  void
-    *inputPixels;
+  histogramKernel = NULL; 
 
   outputReady = MagickFalse;
-  clEnv = NULL;
-  inputPixels = NULL;
-  context = NULL;
-  imageBuffer = NULL;
-  filterKernel = NULL;
-  queue = NULL;
-
-  clEnv = GetDefaultOpenCLEnv();
-  context = GetOpenCLContext(clEnv);
+  colorspace = image->colorspace;
+  method = image->intensity;
 
-  /* Create and initialize OpenCL buffers. */
-  image_view=AcquireAuthenticCacheView(image,exception);
-  inputPixels=GetCacheViewAuthenticPixels(image_view,0,0,image->columns,image->rows,exception);
-  if (inputPixels == (void *) NULL)
-  {
-    (void) OpenCLThrowMagickException(exception,GetMagickModule(),CacheWarning,"UnableToReadPixelCache.","`%s'",image->filename);
-    goto cleanup;
-  }
-
-  /* If the host pointer is aligned to the size of CLPixelPacket, 
-     then use the host buffer directly from the GPU; otherwise, 
-     create a buffer on the GPU and copy the data over */
-  if (ALIGNED(inputPixels,CLPixelPacket)) 
-  {
-    mem_flags = CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR;
-  }
-  else 
-  {
-    mem_flags = CL_MEM_READ_WRITE|CL_MEM_COPY_HOST_PTR;
-  }
-  /* create a CL buffer from image pixel buffer */
-  length = image->columns * image->rows;
-  imageBuffer = clEnv->library->clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus);
-  if (clStatus != CL_SUCCESS)
-  {
-    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
-    goto cleanup;
-  }
-  
-  filterKernel = AcquireOpenCLKernel(clEnv, MAGICK_OPENCL_ACCELERATE, "Contrast");
-  if (filterKernel == NULL)
-  {
-    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "AcquireOpenCLKernel failed.", "'%s'", ".");
-    goto cleanup;
-  }
-
-  i = 0;
-  clStatus=clEnv->library->clSetKernelArg(filterKernel,i++,sizeof(cl_mem),(void *)&imageBuffer);
-
-  uSharpen = (sharpen == MagickFalse)?0:1;
-  clStatus|=clEnv->library->clSetKernelArg(filterKernel,i++,sizeof(cl_uint),&uSharpen);
-  if (clStatus != CL_SUCCESS)
-  {
-    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", "'%s'", ".");
-    goto cleanup;
-  }
-
-  global_work_size[0] = image->columns;
-  global_work_size[1] = image->rows;
-  /* launch the kernel */
-  queue = AcquireOpenCLCommandQueue(clEnv);
-  clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, filterKernel, 2, NULL, global_work_size, NULL, 0, NULL, &event);
-  if (clStatus != CL_SUCCESS)
-  {
-    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", "'%s'", ".");
-    goto cleanup;
-  }
-  clEnv->library->clFlush(queue);
-  RecordProfileData(clEnv,ContrastKernel,event);
-  clEnv->library->clReleaseEvent(event);
-
-  if (ALIGNED(inputPixels,CLPixelPacket)) 
-  {
-    length = image->columns * image->rows;
-    clEnv->library->clEnqueueMapBuffer(queue, imageBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(CLPixelPacket), 0, NULL, NULL, &clStatus);
-  }
-  else 
-  {
-    length = image->columns * image->rows;
-    clStatus = clEnv->library->clEnqueueReadBuffer(queue, imageBuffer, CL_TRUE, 0, length * sizeof(CLPixelPacket), inputPixels, 0, NULL, NULL);
-  }
-  if (clStatus != CL_SUCCESS)
-  {
-    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "Reading output image from CL buffer failed.", "'%s'", ".");
-    goto cleanup;
-  }
-  outputReady=SyncCacheViewAuthenticPixels(image_view,exception);
-
-cleanup:
-  OpenCLLogException(__FUNCTION__,__LINE__,exception);
-
-  image_view=DestroyCacheView(image_view);
-
-  if (imageBuffer!=NULL)                     clEnv->library->clReleaseMemObject(imageBuffer);
-  if (filterKernel!=NULL)                     RelinquishOpenCLKernel(clEnv, filterKernel);
-  if (queue != NULL)                          RelinquishOpenCLCommandQueue(clEnv, queue);
-  return(outputReady);
-}
-
-MagickExport MagickBooleanType AccelerateContrastImage(Image *image,
-  const MagickBooleanType sharpen,ExceptionInfo *exception)
-{
-  MagickBooleanType
-    status;
-
-  assert(image != NULL);
-  assert(exception != (ExceptionInfo *) NULL);
-
-  if ((checkAccelerateConditionRGBA(image) == MagickFalse) ||
-      (checkOpenCLEnvironment(exception) == MagickFalse))
-    return(MagickFalse);
-
-  status=ComputeContrastImage(image,sharpen,exception);
-  return(status);
-}
-
-/*
-%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-%                                                                             %
-%                                                                             %
-%                                                                             %
-%     A c c e l e r a t e C o n t r a s t S t r e t c h I m a g e             %
-%                                                                             %
-%                                                                             %
-%                                                                             %
-%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-*/
-
-static MagickBooleanType LaunchHistogramKernel(MagickCLEnv clEnv,
-  cl_command_queue queue,cl_mem imageBuffer,cl_mem histogramBuffer,
-  Image *image,const ChannelType channel,ExceptionInfo *exception)
-{
-  MagickBooleanType
-    outputReady;
-
-  cl_int
-    clStatus;
-
-  cl_kernel
-    histogramKernel; 
-
-  cl_event
-    event;
-
-  cl_uint
-    colorspace,
-    method;
-
-  register ssize_t
-    i;
-
-  size_t
-    global_work_size[2];
-
-  histogramKernel = NULL; 
-
-  outputReady = MagickFalse;
-  colorspace = image->colorspace;
-  method = image->intensity;
-
-  /* get the OpenCL kernel */
-  histogramKernel = AcquireOpenCLKernel(clEnv, MAGICK_OPENCL_ACCELERATE, "Histogram");
-  if (histogramKernel == NULL)
+  /* get the OpenCL kernel */
+  histogramKernel = AcquireOpenCLKernel(clEnv, MAGICK_OPENCL_ACCELERATE, "Histogram");
+  if (histogramKernel == NULL)
   {
     (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "AcquireOpenCLKernel failed.", "'%s'", ".");
     goto cleanup;
@@ -6244,442 +5840,48 @@ static Image* ComputeRotationalBlurImage(const Image *image,const double angle,
     goto cleanup;
   }
 
-  
-  /* set the kernel arguments */
-  i = 0;
-  clStatus=clEnv->library->clSetKernelArg(rotationalBlurKernel,i++,sizeof(cl_mem),(void *)&imageBuffer);
-  clStatus|=clEnv->library->clSetKernelArg(rotationalBlurKernel,i++,sizeof(cl_mem),(void *)&filteredImageBuffer);
-
-  GetPixelInfo(image,&bias);
-  biasPixel.s[0] = bias.red;
-  biasPixel.s[1] = bias.green;
-  biasPixel.s[2] = bias.blue;
-  biasPixel.s[3] = bias.alpha;
-  clStatus|=clEnv->library->clSetKernelArg(rotationalBlurKernel,i++,sizeof(cl_float4), &biasPixel);
-  clStatus|=clEnv->library->clSetKernelArg(rotationalBlurKernel,i++,sizeof(ChannelType), &image->channel_mask);
-
-  matte = (image->alpha_trait > CopyPixelTrait)?1:0;
-  clStatus|=clEnv->library->clSetKernelArg(rotationalBlurKernel,i++,sizeof(unsigned int), &matte);
-
-  clStatus=clEnv->library->clSetKernelArg(rotationalBlurKernel,i++,sizeof(cl_float2), &blurCenter);
-
-  clStatus|=clEnv->library->clSetKernelArg(rotationalBlurKernel,i++,sizeof(cl_mem),(void *)&cosThetaBuffer);
-  clStatus|=clEnv->library->clSetKernelArg(rotationalBlurKernel,i++,sizeof(cl_mem),(void *)&sinThetaBuffer);
-  clStatus|=clEnv->library->clSetKernelArg(rotationalBlurKernel,i++,sizeof(unsigned int), &cossin_theta_size);
-  if (clStatus != CL_SUCCESS)
-  {
-    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", "'%s'", ".");
-    goto cleanup;
-  }
-
-
-  global_work_size[0] = image->columns;
-  global_work_size[1] = image->rows;
-  /* launch the kernel */
-  clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, rotationalBlurKernel, 2, NULL, global_work_size, NULL, 0, NULL, &event);
-  if (clStatus != CL_SUCCESS)
-  {
-    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", "'%s'", ".");
-    goto cleanup;
-  }
-  clEnv->library->clFlush(queue);
-  RecordProfileData(clEnv,RotationalBlurKernel,event);
-  clEnv->library->clReleaseEvent(event);
-
-  if (ALIGNED(filteredPixels,CLPixelPacket)) 
-  {
-    length = image->columns * image->rows;
-    clEnv->library->clEnqueueMapBuffer(queue, filteredImageBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(CLPixelPacket), 0, NULL, NULL, &clStatus);
-  }
-  else 
-  {
-    length = image->columns * image->rows;
-    clStatus = clEnv->library->clEnqueueReadBuffer(queue, filteredImageBuffer, CL_TRUE, 0, length * sizeof(CLPixelPacket), filteredPixels, 0, NULL, NULL);
-  }
-  if (clStatus != CL_SUCCESS)
-  {
-    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "Reading output image from CL buffer failed.", "'%s'", ".");
-    goto cleanup;
-  }
-  outputReady=SyncCacheViewAuthenticPixels(filteredImage_view,exception);
-
-cleanup:
-  OpenCLLogException(__FUNCTION__,__LINE__,exception);
-
-  image_view=DestroyCacheView(image_view);
-  if (filteredImage_view != NULL)
-    filteredImage_view=DestroyCacheView(filteredImage_view);
-
-  if (filteredImageBuffer!=NULL)  clEnv->library->clReleaseMemObject(filteredImageBuffer);
-  if (imageBuffer!=NULL)     clEnv->library->clReleaseMemObject(imageBuffer);
-  if (sinThetaBuffer!=NULL)       clEnv->library->clReleaseMemObject(sinThetaBuffer);
-  if (cosThetaBuffer!=NULL)       clEnv->library->clReleaseMemObject(cosThetaBuffer);
-  if (rotationalBlurKernel!=NULL) RelinquishOpenCLKernel(clEnv, rotationalBlurKernel);
-  if (queue != NULL)              RelinquishOpenCLCommandQueue(clEnv, queue);
-  if (outputReady == MagickFalse)
-  {
-    if (filteredImage != NULL)
-    {
-      DestroyImage(filteredImage);
-      filteredImage = NULL;
-    }
-  }
-  return filteredImage;
-}
-
-MagickExport Image* AccelerateRotationalBlurImage(const Image *image,
-  const double angle,ExceptionInfo *exception)
-{
-  Image
-    *filteredImage;
-
-  assert(image != NULL);
-  assert(exception != (ExceptionInfo *) NULL);
-
-  if ((checkAccelerateConditionRGBA(image) == MagickFalse) ||
-      (checkOpenCLEnvironment(exception) == MagickFalse))
-    return NULL;
-
-  filteredImage=ComputeRotationalBlurImage(image,angle,exception);
-  return filteredImage;
-}
-
-/*
-%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-%                                                                             %
-%                                                                             %
-%                                                                             %
-%     A c c e l e r a t e U n s h a r p M a s k I m a g e                     %
-%                                                                             %
-%                                                                             %
-%                                                                             %
-%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-*/
-
-static Image *ComputeUnsharpMaskImage(const Image *image,const double radius,
-  const double sigma,const double gain,const double threshold,
-  ExceptionInfo *exception)
-{
-  CacheView
-    *filteredImage_view,
-    *image_view;
-
-  char
-    geometry[MagickPathExtent];
-
-  cl_command_queue
-    queue;
-
-  cl_context
-    context;
-
-  cl_int
-    clStatus;
-
-  cl_kernel
-    blurRowKernel,
-    unsharpMaskBlurColumnKernel;
-
-  cl_event
-    event;
-
-  cl_mem
-    filteredImageBuffer,
-    imageBuffer,
-    imageKernelBuffer,
-    tempImageBuffer;
-
-  cl_mem_flags
-    mem_flags;
-
-  const void
-    *inputPixels;
-
-  float
-    fGain,
-    fThreshold,
-    *kernelBufferPtr;
-
-  Image
-    *filteredImage;
-
-  int
-    chunkSize;
-
-  KernelInfo
-    *kernel;
-
-  MagickBooleanType
-    outputReady;
-
-  MagickCLEnv
-    clEnv;
-
-  MagickSizeType
-    length;
-
-  void
-    *filteredPixels,
-    *hostPtr;
-
-  unsigned int
-    i,
-    imageColumns,
-    imageRows,
-    kernelWidth;
-
-  clEnv = NULL;
-  filteredImage = NULL;
-  filteredImage_view = NULL;
-  kernel = NULL;
-  context = NULL;
-  imageBuffer = NULL;
-  filteredImageBuffer = NULL;
-  tempImageBuffer = NULL;
-  imageKernelBuffer = NULL;
-  blurRowKernel = NULL;
-  unsharpMaskBlurColumnKernel = NULL;
-  queue = NULL;
-  outputReady = MagickFalse;
-
-  clEnv = GetDefaultOpenCLEnv();
-  context = GetOpenCLContext(clEnv);
-  queue = AcquireOpenCLCommandQueue(clEnv);
-
-  /* Create and initialize OpenCL buffers. */
-  {
-    image_view=AcquireVirtualCacheView(image,exception);
-    inputPixels=GetCacheViewVirtualPixels(image_view,0,0,image->columns,image->rows,exception);
-    if (inputPixels == (const void *) NULL)
-    {
-      (void) OpenCLThrowMagickException(exception,GetMagickModule(),CacheWarning,"UnableToReadPixelCache.","`%s'",image->filename);
-      goto cleanup;
-    }
-
-    /* If the host pointer is aligned to the size of CLPixelPacket, 
-     then use the host buffer directly from the GPU; otherwise, 
-     create a buffer on the GPU and copy the data over */
-    if (ALIGNED(inputPixels,CLPixelPacket)) 
-    {
-      mem_flags = CL_MEM_READ_ONLY|CL_MEM_USE_HOST_PTR;
-    }
-    else 
-    {
-      mem_flags = CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR;
-    }
-    /* create a CL buffer from image pixel buffer */
-    length = image->columns * image->rows;
-    imageBuffer = clEnv->library->clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus);
-    if (clStatus != CL_SUCCESS)
-    {
-      (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
-      goto cleanup;
-    }
-  }
-
-  /* create output */
-  {
-    filteredImage = CloneImage(image,image->columns,image->rows,MagickTrue,exception);
-    assert(filteredImage != NULL);
-    if (SetImageStorageClass(filteredImage,DirectClass,exception) != MagickTrue)
-    {
-      (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "CloneImage failed.", "'%s'", ".");
-      goto cleanup;
-    }
-    filteredImage_view=AcquireAuthenticCacheView(filteredImage,exception);
-    filteredPixels=GetCacheViewAuthenticPixels(filteredImage_view,0,0,filteredImage->columns,filteredImage->rows,exception);
-    if (filteredPixels == (void *) NULL)
-    {
-      (void) OpenCLThrowMagickException(exception,GetMagickModule(),CacheWarning, "UnableToReadPixelCache.","`%s'",filteredImage->filename);
-      goto cleanup;
-    }
-
-    if (ALIGNED(filteredPixels,CLPixelPacket)) 
-    {
-      mem_flags = CL_MEM_WRITE_ONLY|CL_MEM_USE_HOST_PTR;
-      hostPtr = filteredPixels;
-    }
-    else 
-    {
-      mem_flags = CL_MEM_WRITE_ONLY;
-      hostPtr = NULL;
-    }
-
-    /* create a CL buffer from image pixel buffer */
-    length = image->columns * image->rows;
-    filteredImageBuffer = clEnv->library->clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), hostPtr, &clStatus);
-    if (clStatus != CL_SUCCESS)
-    {
-      (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
-      goto cleanup;
-    }
-  }
-
-  /* create the blur kernel */
-  {
-    (void) FormatLocaleString(geometry,MagickPathExtent,"blur:%.20gx%.20g;blur:%.20gx%.20g+90",radius,sigma,radius,sigma);
-    kernel=AcquireKernelInfo(geometry,exception);
-    if (kernel == (KernelInfo *) NULL)
-    {
-      (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "AcquireKernelInfo failed.",".");
-      goto cleanup;
-    }
-
-    imageKernelBuffer = clEnv->library->clCreateBuffer(context, CL_MEM_READ_ONLY, kernel->width * sizeof(float), NULL, &clStatus);
-    if (clStatus != CL_SUCCESS)
-    {
-      (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
-      goto cleanup;
-    }
-
-
-    kernelBufferPtr = (float*)clEnv->library->clEnqueueMapBuffer(queue, imageKernelBuffer, CL_TRUE, CL_MAP_WRITE, 0, kernel->width * sizeof(float), 0, NULL, NULL, &clStatus);
-    if (clStatus != CL_SUCCESS)
-    {
-      (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueMapBuffer failed.",".");
-      goto cleanup;
-    }
-    for (i = 0; i < kernel->width; i++)
-    {
-      kernelBufferPtr[i] = (float) kernel->values[i];
-    }
-    clStatus = clEnv->library->clEnqueueUnmapMemObject(queue, imageKernelBuffer, kernelBufferPtr, 0, NULL, NULL);
-    if (clStatus != CL_SUCCESS)
-    {
-      (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueUnmapMemObject failed.", "'%s'", ".");
-      goto cleanup;
-    }
-  }
-
-  {
-    /* create temp buffer */
-    {
-      length = image->columns * image->rows;
-      tempImageBuffer = clEnv->library->clCreateBuffer(context, CL_MEM_READ_WRITE, length * 4 * sizeof(float), NULL, &clStatus);
-      if (clStatus != CL_SUCCESS)
-      {
-        (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
-        goto cleanup;
-      }
-    }
-
-    /* get the opencl kernel */
-    {
-      blurRowKernel = AcquireOpenCLKernel(clEnv, MAGICK_OPENCL_ACCELERATE, "BlurRow");
-      if (blurRowKernel == NULL)
-      {
-        (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "AcquireOpenCLKernel failed.", "'%s'", ".");
-        goto cleanup;
-      };
-
-      unsharpMaskBlurColumnKernel = AcquireOpenCLKernel(clEnv, MAGICK_OPENCL_ACCELERATE, "UnsharpMaskBlurColumn");
-      if (unsharpMaskBlurColumnKernel == NULL)
-      {
-        (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "AcquireOpenCLKernel failed.", "'%s'", ".");
-        goto cleanup;
-      };
-    }
-
-    {
-      chunkSize = 256;
-
-      imageColumns = (unsigned int) image->columns;
-      imageRows = (unsigned int) image->rows;
-
-      kernelWidth = (unsigned int) kernel->width;
-
-      /* set the kernel arguments */
-      i = 0;
-      clStatus=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(cl_mem),(void *)&imageBuffer);
-      clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(cl_mem),(void *)&tempImageBuffer);
-      clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(ChannelType),&image->channel_mask);
-      clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(cl_mem),(void *)&imageKernelBuffer);
-      clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(unsigned int),(void *)&kernelWidth);
-      clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(unsigned int),(void *)&imageColumns);
-      clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(unsigned int),(void *)&imageRows);
-      clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(CLPixelPacket)*(chunkSize+kernel->width),(void *) NULL);
-      if (clStatus != CL_SUCCESS)
-      {
-        (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", "'%s'", ".");
-        goto cleanup;
-      }
-    }
-
-    /* launch the kernel */
-    {
-      size_t gsize[2];
-      size_t wsize[2];
-
-      gsize[0] = chunkSize*((image->columns+chunkSize-1)/chunkSize);
-      gsize[1] = image->rows;
-      wsize[0] = chunkSize;
-      wsize[1] = 1;
-
-         clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, blurRowKernel, 2, NULL, gsize, wsize, 0, NULL, &event);
-      if (clStatus != CL_SUCCESS)
-      {
-        (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", "'%s'", ".");
-        goto cleanup;
-      }
-      clEnv->library->clFlush(queue);
-      RecordProfileData(clEnv,BlurRowKernel,event);
-      clEnv->library->clReleaseEvent(event);
-    }
-
-
-    {
-      chunkSize = 256;
-      imageColumns = (unsigned int) image->columns;
-      imageRows = (unsigned int) image->rows;
-      kernelWidth = (unsigned int) kernel->width;
-      fGain = (float) gain;
-      fThreshold = (float) threshold;
-
-      i = 0;
-      clStatus=clEnv->library->clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(cl_mem),(void *)&imageBuffer);
-      clStatus|=clEnv->library->clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(cl_mem),(void *)&tempImageBuffer);
-      clStatus|=clEnv->library->clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(cl_mem),(void *)&filteredImageBuffer);
-      clStatus|=clEnv->library->clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(unsigned int),(void *)&imageColumns);
-      clStatus|=clEnv->library->clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(unsigned int),(void *)&imageRows);
-      clStatus|=clEnv->library->clSetKernelArg(unsharpMaskBlurColumnKernel,i++, (chunkSize+kernelWidth-1)*sizeof(cl_float4),NULL);
-      clStatus|=clEnv->library->clSetKernelArg(unsharpMaskBlurColumnKernel,i++, kernelWidth*sizeof(float),NULL);
-      clStatus|=clEnv->library->clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(ChannelType),&image->channel_mask);
-      clStatus|=clEnv->library->clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(cl_mem),(void *)&imageKernelBuffer);
-      clStatus|=clEnv->library->clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(unsigned int),(void *)&kernelWidth);
-      clStatus|=clEnv->library->clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(float),(void *)&fGain);
-      clStatus|=clEnv->library->clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(float),(void *)&fThreshold);
+  
+  /* set the kernel arguments */
+  i = 0;
+  clStatus=clEnv->library->clSetKernelArg(rotationalBlurKernel,i++,sizeof(cl_mem),(void *)&imageBuffer);
+  clStatus|=clEnv->library->clSetKernelArg(rotationalBlurKernel,i++,sizeof(cl_mem),(void *)&filteredImageBuffer);
 
-      if (clStatus != CL_SUCCESS)
-      {
-        (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", "'%s'", ".");
-        goto cleanup;
-      }
-    }
+  GetPixelInfo(image,&bias);
+  biasPixel.s[0] = bias.red;
+  biasPixel.s[1] = bias.green;
+  biasPixel.s[2] = bias.blue;
+  biasPixel.s[3] = bias.alpha;
+  clStatus|=clEnv->library->clSetKernelArg(rotationalBlurKernel,i++,sizeof(cl_float4), &biasPixel);
+  clStatus|=clEnv->library->clSetKernelArg(rotationalBlurKernel,i++,sizeof(ChannelType), &image->channel_mask);
 
-    /* launch the kernel */
-    {
-      size_t gsize[2];
-      size_t wsize[2];
+  matte = (image->alpha_trait > CopyPixelTrait)?1:0;
+  clStatus|=clEnv->library->clSetKernelArg(rotationalBlurKernel,i++,sizeof(unsigned int), &matte);
 
-      gsize[0] = image->columns;
-      gsize[1] = chunkSize*((image->rows+chunkSize-1)/chunkSize);
-      wsize[0] = 1;
-      wsize[1] = chunkSize;
+  clStatus=clEnv->library->clSetKernelArg(rotationalBlurKernel,i++,sizeof(cl_float2), &blurCenter);
+
+  clStatus|=clEnv->library->clSetKernelArg(rotationalBlurKernel,i++,sizeof(cl_mem),(void *)&cosThetaBuffer);
+  clStatus|=clEnv->library->clSetKernelArg(rotationalBlurKernel,i++,sizeof(cl_mem),(void *)&sinThetaBuffer);
+  clStatus|=clEnv->library->clSetKernelArg(rotationalBlurKernel,i++,sizeof(unsigned int), &cossin_theta_size);
+  if (clStatus != CL_SUCCESS)
+  {
+    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", "'%s'", ".");
+    goto cleanup;
+  }
 
-         clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, unsharpMaskBlurColumnKernel, 2, NULL, gsize, wsize, 0, NULL, &event);
-      if (clStatus != CL_SUCCESS)
-      {
-        (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", "'%s'", ".");
-        goto cleanup;
-      }
-      clEnv->library->clFlush(queue);
-      RecordProfileData(clEnv,UnsharpMaskBlurColumnKernel,event);
-      clEnv->library->clReleaseEvent(event);
-    }
 
+  global_work_size[0] = image->columns;
+  global_work_size[1] = image->rows;
+  /* launch the kernel */
+  clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, rotationalBlurKernel, 2, NULL, global_work_size, NULL, 0, NULL, &event);
+  if (clStatus != CL_SUCCESS)
+  {
+    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", "'%s'", ".");
+    goto cleanup;
   }
+  clEnv->library->clFlush(queue);
+  RecordProfileData(clEnv,RotationalBlurKernel,event);
+  clEnv->library->clReleaseEvent(event);
 
-  /* get result */
   if (ALIGNED(filteredPixels,CLPixelPacket)) 
   {
     length = image->columns * image->rows;
@@ -6695,7 +5897,6 @@ static Image *ComputeUnsharpMaskImage(const Image *image,const double radius,
     (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "Reading output image from CL buffer failed.", "'%s'", ".");
     goto cleanup;
   }
-
   outputReady=SyncCacheViewAuthenticPixels(filteredImage_view,exception);
 
 cleanup:
@@ -6705,14 +5906,12 @@ cleanup:
   if (filteredImage_view != NULL)
     filteredImage_view=DestroyCacheView(filteredImage_view);
 
-  if (kernel != NULL)                        kernel=DestroyKernelInfo(kernel);
-  if (imageBuffer!=NULL)                     clEnv->library->clReleaseMemObject(imageBuffer);
-  if (filteredImageBuffer!=NULL)              clEnv->library->clReleaseMemObject(filteredImageBuffer);
-  if (tempImageBuffer!=NULL)                  clEnv->library->clReleaseMemObject(tempImageBuffer);
-  if (imageKernelBuffer!=NULL)                clEnv->library->clReleaseMemObject(imageKernelBuffer);
-  if (blurRowKernel!=NULL)                    RelinquishOpenCLKernel(clEnv, blurRowKernel);
-  if (unsharpMaskBlurColumnKernel!=NULL)      RelinquishOpenCLKernel(clEnv, unsharpMaskBlurColumnKernel);
-  if (queue != NULL)                          RelinquishOpenCLCommandQueue(clEnv, queue);
+  if (filteredImageBuffer!=NULL)  clEnv->library->clReleaseMemObject(filteredImageBuffer);
+  if (imageBuffer!=NULL)     clEnv->library->clReleaseMemObject(imageBuffer);
+  if (sinThetaBuffer!=NULL)       clEnv->library->clReleaseMemObject(sinThetaBuffer);
+  if (cosThetaBuffer!=NULL)       clEnv->library->clReleaseMemObject(cosThetaBuffer);
+  if (rotationalBlurKernel!=NULL) RelinquishOpenCLKernel(clEnv, rotationalBlurKernel);
+  if (queue != NULL)              RelinquishOpenCLCommandQueue(clEnv, queue);
   if (outputReady == MagickFalse)
   {
     if (filteredImage != NULL)
@@ -6721,12 +5920,41 @@ cleanup:
       filteredImage = NULL;
     }
   }
-  return(filteredImage);
+  return filteredImage;
 }
 
-static Image *ComputeUnsharpMaskImageSection(const Image *image,
-  const double radius,const double sigma,const double gain,
-  const double threshold,ExceptionInfo *exception)
+MagickExport Image* AccelerateRotationalBlurImage(const Image *image,
+  const double angle,ExceptionInfo *exception)
+{
+  Image
+    *filteredImage;
+
+  assert(image != NULL);
+  assert(exception != (ExceptionInfo *) NULL);
+
+  if ((checkAccelerateConditionRGBA(image) == MagickFalse) ||
+      (checkOpenCLEnvironment(exception) == MagickFalse))
+    return NULL;
+
+  filteredImage=ComputeRotationalBlurImage(image,angle,exception);
+  return filteredImage;
+}
+
+/*
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%                                                                             %
+%                                                                             %
+%                                                                             %
+%     A c c e l e r a t e U n s h a r p M a s k I m a g e                     %
+%                                                                             %
+%                                                                             %
+%                                                                             %
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+*/
+
+static Image *ComputeUnsharpMaskImage(const Image *image,const double radius,
+  const double sigma,const double gain,const double threshold,
+  ExceptionInfo *exception)
 {
   CacheView
     *filteredImage_view,
@@ -6920,12 +6148,9 @@ static Image *ComputeUnsharpMaskImageSection(const Image *image,
   }
 
   {
-    unsigned int offsetRows;
-    unsigned int sec;
-
     /* create temp buffer */
     {
-      length = image->columns * (image->rows / 2 + 1 + (kernel->width-1) / 2);
+      length = image->columns * image->rows;
       tempImageBuffer = clEnv->library->clCreateBuffer(context, CL_MEM_READ_WRITE, length * 4 * sizeof(float), NULL, &clStatus);
       if (clStatus != CL_SUCCESS)
       {
@@ -6936,14 +6161,14 @@ static Image *ComputeUnsharpMaskImageSection(const Image *image,
 
     /* get the opencl kernel */
     {
-      blurRowKernel = AcquireOpenCLKernel(clEnv, MAGICK_OPENCL_ACCELERATE, "BlurRowSection");
+      blurRowKernel = AcquireOpenCLKernel(clEnv, MAGICK_OPENCL_ACCELERATE, "BlurRow");
       if (blurRowKernel == NULL)
       {
         (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "AcquireOpenCLKernel failed.", "'%s'", ".");
         goto cleanup;
       };
 
-      unsharpMaskBlurColumnKernel = AcquireOpenCLKernel(clEnv, MAGICK_OPENCL_ACCELERATE, "UnsharpMaskBlurColumnSection");
+      unsharpMaskBlurColumnKernel = AcquireOpenCLKernel(clEnv, MAGICK_OPENCL_ACCELERATE, "UnsharpMaskBlurColumn");
       if (unsharpMaskBlurColumnKernel == NULL)
       {
         (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "AcquireOpenCLKernel failed.", "'%s'", ".");
@@ -6951,121 +6176,103 @@ static Image *ComputeUnsharpMaskImageSection(const Image *image,
       };
     }
 
-    for (sec = 0; sec < 2; sec++)
     {
-      {
-        chunkSize = 256;
-
-        imageColumns = (unsigned int) image->columns;
-        if (sec == 0)
-          imageRows = (unsigned int) (image->rows / 2 + (kernel->width-1) / 2);
-        else
-          imageRows = (unsigned int) ((image->rows - image->rows / 2) + (kernel->width-1) / 2);
+      chunkSize = 256;
 
-        offsetRows = (unsigned int) (sec * image->rows / 2);
+      imageColumns = (unsigned int) image->columns;
+      imageRows = (unsigned int) image->rows;
 
-        kernelWidth = (unsigned int) kernel->width;
+      kernelWidth = (unsigned int) kernel->width;
 
-        /* set the kernel arguments */
-        i = 0;
-        clStatus=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(cl_mem),(void *)&imageBuffer);
-        clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(cl_mem),(void *)&tempImageBuffer);
-        clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(ChannelType),&image->channel_mask);
-        clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(cl_mem),(void *)&imageKernelBuffer);
-        clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(unsigned int),(void *)&kernelWidth);
-        clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(unsigned int),(void *)&imageColumns);
-        clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(unsigned int),(void *)&imageRows);
-        clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(CLPixelPacket)*(chunkSize+kernel->width),(void *) NULL);
-        clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(unsigned int),(void *)&offsetRows);
-        clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(unsigned int),(void *)&sec);
-        if (clStatus != CL_SUCCESS)
-        {
-          (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", "'%s'", ".");
-          goto cleanup;
-        }
-      }
-      /* launch the kernel */
+      /* set the kernel arguments */
+      i = 0;
+      clStatus=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(cl_mem),(void *)&imageBuffer);
+      clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(cl_mem),(void *)&tempImageBuffer);
+      clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(ChannelType),&image->channel_mask);
+      clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(cl_mem),(void *)&imageKernelBuffer);
+      clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(unsigned int),(void *)&kernelWidth);
+      clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(unsigned int),(void *)&imageColumns);
+      clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(unsigned int),(void *)&imageRows);
+      clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(CLPixelPacket)*(chunkSize+kernel->width),(void *) NULL);
+      if (clStatus != CL_SUCCESS)
       {
-        size_t gsize[2];
-        size_t wsize[2];
-
-        gsize[0] = chunkSize*((imageColumns+chunkSize-1)/chunkSize);
-        gsize[1] = imageRows;
-        wsize[0] = chunkSize;
-        wsize[1] = 1;
-
-               clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, blurRowKernel, 2, NULL, gsize, wsize, 0, NULL, &event);
-        if (clStatus != CL_SUCCESS)
-        {
-          (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", "'%s'", ".");
-          goto cleanup;
-        }
-        clEnv->library->clFlush(queue);
-        RecordProfileData(clEnv,BlurRowKernel,event);
-        clEnv->library->clReleaseEvent(event);
+        (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", "'%s'", ".");
+        goto cleanup;
       }
+    }
 
+    /* launch the kernel */
+    {
+      size_t gsize[2];
+      size_t wsize[2];
 
-      {
-        chunkSize = 256;
-
-        imageColumns = (unsigned int) image->columns;
-        if (sec == 0)
-          imageRows = (unsigned int) (image->rows / 2);
-        else
-          imageRows = (unsigned int) (image->rows - image->rows / 2);
+      gsize[0] = chunkSize*((image->columns+chunkSize-1)/chunkSize);
+      gsize[1] = image->rows;
+      wsize[0] = chunkSize;
+      wsize[1] = 1;
 
-        offsetRows = (unsigned int) (sec * image->rows / 2);
+         clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, blurRowKernel, 2, NULL, gsize, wsize, 0, NULL, &event);
+      if (clStatus != CL_SUCCESS)
+      {
+        (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", "'%s'", ".");
+        goto cleanup;
+      }
+      clEnv->library->clFlush(queue);
+      RecordProfileData(clEnv,BlurRowKernel,event);
+      clEnv->library->clReleaseEvent(event);
+    }
 
-        kernelWidth = (unsigned int) kernel->width;
 
-        fGain = (float) gain;
-        fThreshold = (float) threshold;
+    {
+      chunkSize = 256;
+      imageColumns = (unsigned int) image->columns;
+      imageRows = (unsigned int) image->rows;
+      kernelWidth = (unsigned int) kernel->width;
+      fGain = (float) gain;
+      fThreshold = (float) threshold;
 
-        i = 0;
-        clStatus=clEnv->library->clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(cl_mem),(void *)&imageBuffer);
-        clStatus|=clEnv->library->clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(cl_mem),(void *)&tempImageBuffer);
-        clStatus|=clEnv->library->clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(cl_mem),(void *)&filteredImageBuffer);
-        clStatus|=clEnv->library->clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(unsigned int),(void *)&imageColumns);
-        clStatus|=clEnv->library->clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(unsigned int),(void *)&imageRows);
-        clStatus|=clEnv->library->clSetKernelArg(unsharpMaskBlurColumnKernel,i++, (chunkSize+kernelWidth-1)*sizeof(cl_float4),NULL);
-        clStatus|=clEnv->library->clSetKernelArg(unsharpMaskBlurColumnKernel,i++, kernelWidth*sizeof(float),NULL);
-        clStatus|=clEnv->library->clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(ChannelType),&image->channel_mask);
-        clStatus|=clEnv->library->clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(cl_mem),(void *)&imageKernelBuffer);
-        clStatus|=clEnv->library->clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(unsigned int),(void *)&kernelWidth);
-        clStatus|=clEnv->library->clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(float),(void *)&fGain);
-        clStatus|=clEnv->library->clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(float),(void *)&fThreshold);
-        clStatus|=clEnv->library->clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(unsigned int),(void *)&offsetRows);
-        clStatus|=clEnv->library->clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(unsigned int),(void *)&sec);
+      i = 0;
+      clStatus=clEnv->library->clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(cl_mem),(void *)&imageBuffer);
+      clStatus|=clEnv->library->clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(cl_mem),(void *)&tempImageBuffer);
+      clStatus|=clEnv->library->clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(cl_mem),(void *)&filteredImageBuffer);
+      clStatus|=clEnv->library->clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(unsigned int),(void *)&imageColumns);
+      clStatus|=clEnv->library->clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(unsigned int),(void *)&imageRows);
+      clStatus|=clEnv->library->clSetKernelArg(unsharpMaskBlurColumnKernel,i++, (chunkSize+kernelWidth-1)*sizeof(cl_float4),NULL);
+      clStatus|=clEnv->library->clSetKernelArg(unsharpMaskBlurColumnKernel,i++, kernelWidth*sizeof(float),NULL);
+      clStatus|=clEnv->library->clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(ChannelType),&image->channel_mask);
+      clStatus|=clEnv->library->clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(cl_mem),(void *)&imageKernelBuffer);
+      clStatus|=clEnv->library->clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(unsigned int),(void *)&kernelWidth);
+      clStatus|=clEnv->library->clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(float),(void *)&fGain);
+      clStatus|=clEnv->library->clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(float),(void *)&fThreshold);
 
-        if (clStatus != CL_SUCCESS)
-        {
-          (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", "'%s'", ".");
-          goto cleanup;
-        }
+      if (clStatus != CL_SUCCESS)
+      {
+        (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", "'%s'", ".");
+        goto cleanup;
       }
+    }
 
-      /* launch the kernel */
-      {
-        size_t gsize[2];
-        size_t wsize[2];
+    /* launch the kernel */
+    {
+      size_t gsize[2];
+      size_t wsize[2];
 
-        gsize[0] = imageColumns;
-        gsize[1] = chunkSize*((imageRows+chunkSize-1)/chunkSize);
-        wsize[0] = 1;
-        wsize[1] = chunkSize;
+      gsize[0] = image->columns;
+      gsize[1] = chunkSize*((image->rows+chunkSize-1)/chunkSize);
+      wsize[0] = 1;
+      wsize[1] = chunkSize;
 
-               clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, unsharpMaskBlurColumnKernel, 2, NULL, gsize, wsize, 0, NULL, &event);
-        if (clStatus != CL_SUCCESS)
-        {
-          (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", "'%s'", ".");
-          goto cleanup;
-        }
-        clEnv->library->clFlush(queue);
-        RecordProfileData(clEnv,UnsharpMaskBlurColumnKernel,event);
-        clEnv->library->clReleaseEvent(event);
+         clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, unsharpMaskBlurColumnKernel, 2, NULL, gsize, wsize, 0, NULL, &event);
+      if (clStatus != CL_SUCCESS)
+      {
+        (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", "'%s'", ".");
+        goto cleanup;
       }
+      clEnv->library->clFlush(queue);
+      RecordProfileData(clEnv,UnsharpMaskBlurColumnKernel,event);
+      clEnv->library->clReleaseEvent(event);
     }
+
   }
 
   /* get result */
@@ -7110,7 +6317,7 @@ cleanup:
       filteredImage = NULL;
     }
   }
-  return filteredImage;
+  return(filteredImage);
 }
 
 static Image *ComputeUnsharpMaskImageSingle(const Image *image,
@@ -7306,9 +6513,6 @@ MagickExport Image *AccelerateUnsharpMaskImage(const Image *image,
   if (radius < 12.1)
     filteredImage=ComputeUnsharpMaskImageSingle(image,radius,sigma,gain,
       threshold,0,exception);
-  else if (splitImage(image) && (image->rows / 2 > radius))
-    filteredImage=ComputeUnsharpMaskImageSection(image,radius,sigma,gain,
-      threshold,exception);
   else
     filteredImage=ComputeUnsharpMaskImage(image,radius,sigma,gain,threshold,
       exception);