]> granicus.if.org Git - imagemagick/commitdiff
AccelerateResizeImage now supports R/RA/RGB images.
authordirk <dirk@git.imagemagick.org>
Thu, 7 Apr 2016 20:40:20 +0000 (22:40 +0200)
committerdirk <dirk@git.imagemagick.org>
Thu, 7 Apr 2016 20:42:37 +0000 (22:42 +0200)
MagickCore/accelerate-private.h
MagickCore/accelerate.c

index b227b239da16cb224e28d11d517686c038cf7ace..66fe4ecb2bea74dee958bc1544a634191eceec4d 100644 (file)
@@ -54,7 +54,6 @@ const char* accelerateKernels =
 /*
   Define declarations.
 */
-  OPENCL_DEFINE(GetPixelAlpha(pixel), pixel.w)
   OPENCL_DEFINE(SigmaUniform, (attenuate*0.015625f))
   OPENCL_DEFINE(SigmaGaussian, (attenuate*0.015625f))
   OPENCL_DEFINE(SigmaImpulse, (attenuate*0.1f))
@@ -367,12 +366,12 @@ OPENCL_ENDIF()
   inline float getPixelRed(const __global CLQuantum *p)   { return (float)*p; }
   inline float getPixelGreen(const __global CLQuantum *p) { return (float)*(p+1); }
   inline float getPixelBlue(const __global CLQuantum *p)  { return (float)*(p+2); }
-  inline float getPixelAlpha(const __global CLQuantum *p) { return (float)*(p+3); }
+  inline float getPixelAlpha(const __global CLQuantum *p,const unsigned int number_channels) { return (float)*(p+number_channels-1); }
 
   inline void setPixelRed(__global CLQuantum *p,const CLQuantum value)   { *p=value; }
   inline void setPixelGreen(__global CLQuantum *p,const CLQuantum value) { *(p+1)=value; }
   inline void setPixelBlue(__global CLQuantum *p,const CLQuantum value)  { *(p+2)=value; }
-  inline void setPixelAlpha(__global CLQuantum *p,const CLQuantum value) { *(p+3)=value; }
+  inline void setPixelAlpha(__global CLQuantum *p,const unsigned int number_channels,const CLQuantum value) { *(p+number_channels-1)=value; }
 
   inline CLQuantum getBlue(CLPixelType p)               { return p.x; }
   inline void setBlue(CLPixelType* p, CLQuantum value)  { (*p).x = value; }
@@ -411,7 +410,7 @@ OPENCL_ENDIF()
 
     if (((number_channels == 4) || (number_channels == 2)) &&
         ((channel & AlphaChannel) != 0))
-      *alpha=getPixelAlpha(p);
+      *alpha=getPixelAlpha(p,number_channels);
   }
 
   inline float4 ReadFloat4(const __global CLQuantum *image, const unsigned int number_channels,
@@ -445,7 +444,7 @@ OPENCL_ENDIF()
 
     if (((number_channels == 4) || (number_channels == 2)) &&
         ((channel & AlphaChannel) != 0))
-      setPixelAlpha(p,ClampToQuantum(alpha));
+      setPixelAlpha(p,number_channels,ClampToQuantum(alpha));
   }
 
   inline void WriteFloat4(__global CLQuantum *image, const unsigned int number_channels,
@@ -2662,16 +2661,16 @@ STRINGIFY(
   )
 
   STRINGIFY(
- __kernel __attribute__((reqd_work_group_size(256, 1, 1)))
- void ResizeHorizontalFilter(const __global CLPixelType* inputImage, const unsigned int inputColumns, const unsigned int inputRows, const unsigned int matte
-  , const float xFactor, __global CLPixelType* filteredImage, const unsigned int filteredColumns, const unsigned int filteredRows
-  , const int resizeFilterType, const int resizeWindowType
-  , const __global float* resizeFilterCubicCoefficients
-  , const float resizeFilterScale, const float resizeFilterSupport, const float resizeFilterWindowSupport, const float resizeFilterBlur
-  , __local CLPixelType* inputImageCache, const int numCachedPixels, const unsigned int pixelPerWorkgroup, const unsigned int pixelChunkSize
-  , __local float4* outputPixelCache, __local float* densityCache, __local float* gammaCache) {
-
-
 __kernel __attribute__((reqd_work_group_size(256, 1, 1)))
+    void ResizeHorizontalFilter(const __global CLQuantum *inputImage, const unsigned int number_channels,
+      const unsigned int inputColumns, const unsigned int inputRows, __global CLQuantum *filteredImage,
+      const unsigned int filteredColumns, const unsigned int filteredRows, const float xFactor,
+      const int resizeFilterType, const int resizeWindowType, const __global float *resizeFilterCubicCoefficients,
+      const float resizeFilterScale, const float resizeFilterSupport, const float resizeFilterWindowSupport,
+      const float resizeFilterBlur, __local CLQuantum *inputImageCache, const int numCachedPixels,
+      const unsigned int pixelPerWorkgroup, const unsigned int pixelChunkSize,
+      __local float4 *outputPixelCache, __local float *densityCache, __local float *gammaCache)
+  {
     // calculate the range of resized image pixels computed by this workgroup
     const unsigned int startX = get_group_id(0)*pixelPerWorkgroup;
     const unsigned int stopX = MagickMin(startX + pixelPerWorkgroup,filteredColumns);
@@ -2687,13 +2686,14 @@ STRINGIFY(
 
     // cache the input pixels into local memory
     const unsigned int y = get_global_id(1);
-    event_t e = async_work_group_copy(inputImageCache,inputImage+y*inputColumns+cacheRangeStartX,cacheRangeEndX-cacheRangeStartX,0);
-    wait_group_events(1,&e);
+    const unsigned int pos = getPixelIndex(number_channels, inputColumns, cacheRangeStartX, y);
+    const unsigned int num_elements = (cacheRangeEndX - cacheRangeStartX) * number_channels;
+    event_t e = async_work_group_copy(inputImageCache, inputImage + pos, num_elements, 0);
+    wait_group_events(1, &e);
 
     unsigned int totalNumChunks = (actualNumPixelToCompute+pixelChunkSize-1)/pixelChunkSize;
     for (unsigned int chunk = 0; chunk < totalNumChunks; chunk++)
     {
-
       const unsigned int chunkStartX = startX + chunk*pixelChunkSize;
       const unsigned int chunkStopX = MagickMin(chunkStartX + pixelChunkSize, stopX);
       const unsigned int actualNumPixelInThisChunk = chunkStopX - chunkStartX;
@@ -2701,15 +2701,15 @@ STRINGIFY(
       // determine which resized pixel computed by this workitem
       const unsigned int itemID = get_local_id(0);
       const unsigned int numItems = getNumWorkItemsPerPixel(actualNumPixelInThisChunk, get_local_size(0));
-      
+
       const int pixelIndex = pixelToCompute(itemID, actualNumPixelInThisChunk, get_local_size(0));
 
       float4 filteredPixel = (float4)0.0f;
       float density = 0.0f;
       float gamma = 0.0f;
       // -1 means this workitem doesn't participate in the computation
-      if (pixelIndex != -1) {
-
+      if (pixelIndex != -1)
+      {
         // x coordinated of the resized pixel computed by this workitem
         const int x = chunkStartX + pixelIndex;
 
@@ -2724,44 +2724,46 @@ STRINGIFY(
         numStepsPerWorkItem += ((numItems*numStepsPerWorkItem)==n?0:1);
 
         const unsigned int startStep = (itemID%numItems)*numStepsPerWorkItem;
-        if (startStep < n) {
+        if (startStep < n)
+        {
           const unsigned int stopStep = MagickMin(startStep+numStepsPerWorkItem, n);
 
           unsigned int cacheIndex = start+startStep-cacheRangeStartX;
-          if (matte == 0) {
 
-            for (unsigned int i = startStep; i < stopStep; i++,cacheIndex++) {
-              float4 cp = convert_float4(inputImageCache[cacheIndex]);
-
-              float weight = getResizeFilterWeight(resizeFilterCubicCoefficients,(ResizeWeightingFunctionType)resizeFilterType
-                , (ResizeWeightingFunctionType)resizeWindowType
-                , resizeFilterScale, resizeFilterWindowSupport, resizeFilterBlur,scale*(start+i-bisect+0.5));
-
-              filteredPixel += ((float4)weight)*cp;
-              density+=weight;
+          for (unsigned int i = startStep; i < stopStep; i++, cacheIndex++)
+          {
+            float weight = getResizeFilterWeight(resizeFilterCubicCoefficients,
+              (ResizeWeightingFunctionType) resizeFilterType,
+              (ResizeWeightingFunctionType) resizeWindowType,
+              resizeFilterScale, resizeFilterWindowSupport,
+              resizeFilterBlur, scale*(start + i - bisect + 0.5));
+
+            float4 cp = (float4) 0;
+
+            __local float *p = inputImageCache + (cacheIndex*number_channels);
+            cp.x = *(p);
+            if (number_channels > 2)
+            {
+              cp.y = *(p + 1);
+              cp.z = *(p + 2);
             }
 
+            if ((number_channels == 4) || (number_channels == 2))
+            {
+              cp.w = *(p + number_channels - 1);
 
-          }
-          else {
-            for (unsigned int i = startStep; i < stopStep; i++,cacheIndex++) {
-              CLPixelType p = inputImageCache[cacheIndex];
-
-              float weight = getResizeFilterWeight(resizeFilterCubicCoefficients,(ResizeWeightingFunctionType)resizeFilterType
-                , (ResizeWeightingFunctionType)resizeWindowType
-                , resizeFilterScale, resizeFilterWindowSupport, resizeFilterBlur,scale*(start+i-bisect+0.5));
-
-              float alpha = weight * QuantumScale * GetPixelAlpha(p);
-              float4 cp = convert_float4(p);
+              float alpha = weight * QuantumScale * cp.w;
 
               filteredPixel.x += alpha * cp.x;
               filteredPixel.y += alpha * cp.y;
               filteredPixel.z += alpha * cp.z;
               filteredPixel.w += weight * cp.w;
-
-              density+=weight;
-              gamma+=alpha;
+              gamma += alpha;
             }
+            else
+              filteredPixel += ((float4) weight)*cp;
+
+            density += weight;
           }
         }
       }
@@ -2770,7 +2772,7 @@ STRINGIFY(
       if (itemID < actualNumPixelInThisChunk) {
         outputPixelCache[itemID] = (float4)0.0f;
         densityCache[itemID] = 0.0f;
-        if (matte != 0)
+        if ((number_channels == 4) || (number_channels == 2))
           gammaCache[itemID] = 0.0f;
       }
       barrier(CLK_LOCAL_MEM_FENCE);
@@ -2781,67 +2783,50 @@ STRINGIFY(
           if (itemID%numItems == i) {
             outputPixelCache[pixelIndex]+=filteredPixel;
             densityCache[pixelIndex]+=density;
-            if (matte!=0) {
+            if ((number_channels == 4) || (number_channels == 2))
               gammaCache[pixelIndex]+=gamma;
-            }
           }
         }
         barrier(CLK_LOCAL_MEM_FENCE);
       }
 
-      if (itemID < actualNumPixelInThisChunk) {
-        if (matte==0) {
-          float density = densityCache[itemID];
-          float4 filteredPixel = outputPixelCache[itemID];
-          if (density!= 0.0f && density != 1.0)
-          {
-            density = PerceptibleReciprocal(density);
-            filteredPixel *= (float4)density;
-          }
-          filteredImage[y*filteredColumns+chunkStartX+itemID] = (CLPixelType) (ClampToQuantum(filteredPixel.x)
-                                                                         , ClampToQuantum(filteredPixel.y)
-                                                                         , ClampToQuantum(filteredPixel.z)
-                                                                         , ClampToQuantum(filteredPixel.w));
-        }
-        else {
-          float density = densityCache[itemID];
-          float gamma = gammaCache[itemID];
-          float4 filteredPixel = outputPixelCache[itemID];
-
-          if (density!= 0.0f && density != 1.0) {
-            density = PerceptibleReciprocal(density);
-            filteredPixel *= (float4)density;
-            gamma *= density;
-          }
-          gamma = PerceptibleReciprocal(gamma);
-
-          CLPixelType fp;
-          fp = (CLPixelType) ( ClampToQuantum(gamma*filteredPixel.x)
-            , ClampToQuantum(gamma*filteredPixel.y)
-            , ClampToQuantum(gamma*filteredPixel.z)
-            , ClampToQuantum(filteredPixel.w));
+      if (itemID < actualNumPixelInThisChunk)
+      {
+        float density = densityCache[itemID];
+        float4 filteredPixel = outputPixelCache[itemID];
 
-          filteredImage[y*filteredColumns+chunkStartX+itemID] = fp;
+        if ((density != 0.0f) && (density != 1.0f))
+        {
+          density = PerceptibleReciprocal(density);
+          filteredPixel *= (float4) density;
+        }
 
+        if ((number_channels == 4) || (number_channels == 2))
+        {
+          float gamma = PerceptibleReciprocal(gammaCache[itemID]*density);
+          filteredPixel.x *= gamma;
+          filteredPixel.y *= gamma;
+          filteredPixel.z *= gamma;
         }
-      }
 
-    } // end of chunking loop
+        WriteFloat4(filteredImage, number_channels, filteredColumns, chunkStartX + itemID, y, AllChannels, filteredPixel);
+      }
+    }
   }
   )
 
 
   STRINGIFY(
  __kernel __attribute__((reqd_work_group_size(1, 256, 1)))
- void ResizeVerticalFilter(const __global CLPixelType* inputImage, const unsigned int inputColumns, const unsigned int inputRows, const unsigned int matte
-  , const float yFactor, __global CLPixelType* filteredImage, const unsigned int filteredColumns, const unsigned int filteredRows
-  , const int resizeFilterType, const int resizeWindowType
-  , const __global float* resizeFilterCubicCoefficients
-  , const float resizeFilterScale, const float resizeFilterSupport, const float resizeFilterWindowSupport, const float resizeFilterBlur
-  , __local CLPixelType* inputImageCache, const int numCachedPixels, const unsigned int pixelPerWorkgroup, const unsigned int pixelChunkSize
-  , __local float4* outputPixelCache, __local float* densityCache, __local float* gammaCache) {
-
-
+    void ResizeVerticalFilter(const __global CLQuantum *inputImage, const unsigned int number_channels,
+      const unsigned int inputColumns, const unsigned int inputRows, __global CLQuantum *filteredImage,
+      const unsigned int filteredColumns, const unsigned int filteredRows, const float yFactor,
+      const int resizeFilterType, const int resizeWindowType, const __global float *resizeFilterCubicCoefficients,
+      const float resizeFilterScale, const float resizeFilterSupport, const float resizeFilterWindowSupport,
+      const float resizeFilterBlur, __local CLQuantum *inputImageCache, const int numCachedPixels,
+      const unsigned int pixelPerWorkgroup, const unsigned int pixelChunkSize,
+      __local float4 *outputPixelCache, __local float *densityCache, __local float *gammaCache)
+  {
     // calculate the range of resized image pixels computed by this workgroup
     const unsigned int startY = get_group_id(1)*pixelPerWorkgroup;
     const unsigned int stopY = MagickMin(startY + pixelPerWorkgroup,filteredRows);
@@ -2857,13 +2842,18 @@ STRINGIFY(
 
     // cache the input pixels into local memory
     const unsigned int x = get_global_id(0);
-    event_t e = async_work_group_strided_copy(inputImageCache, inputImage+cacheRangeStartY*inputColumns+x, cacheRangeEndY-cacheRangeStartY, inputColumns, 0);
-    wait_group_events(1,&e);
+    unsigned int pos = getPixelIndex(number_channels, inputColumns, x, cacheRangeStartY);
+    unsigned int rangeLength = cacheRangeEndY-cacheRangeStartY;
+    unsigned int stride = inputColumns * number_channels;
+    for (unsigned int i = 0; i < number_channels; i++)
+    {
+      event_t e = async_work_group_strided_copy(inputImageCache + (rangeLength*i), inputImage+pos+i, rangeLength, stride, 0);
+      wait_group_events(1,&e);
+    }
 
     unsigned int totalNumChunks = (actualNumPixelToCompute+pixelChunkSize-1)/pixelChunkSize;
     for (unsigned int chunk = 0; chunk < totalNumChunks; chunk++)
     {
-
       const unsigned int chunkStartY = startY + chunk*pixelChunkSize;
       const unsigned int chunkStopY = MagickMin(chunkStartY + pixelChunkSize, stopY);
       const unsigned int actualNumPixelInThisChunk = chunkStopY - chunkStartY;
@@ -2871,15 +2861,15 @@ STRINGIFY(
       // determine which resized pixel computed by this workitem
       const unsigned int itemID = get_local_id(1);
       const unsigned int numItems = getNumWorkItemsPerPixel(actualNumPixelInThisChunk, get_local_size(1));
-      
+
       const int pixelIndex = pixelToCompute(itemID, actualNumPixelInThisChunk, get_local_size(1));
 
       float4 filteredPixel = (float4)0.0f;
       float density = 0.0f;
       float gamma = 0.0f;
       // -1 means this workitem doesn't participate in the computation
-      if (pixelIndex != -1) {
-
+      if (pixelIndex != -1)
+      {
         // x coordinated of the resized pixel computed by this workitem
         const int y = chunkStartY + pixelIndex;
 
@@ -2894,44 +2884,45 @@ STRINGIFY(
         numStepsPerWorkItem += ((numItems*numStepsPerWorkItem)==n?0:1);
 
         const unsigned int startStep = (itemID%numItems)*numStepsPerWorkItem;
-        if (startStep < n) {
+        if (startStep < n)
+        {
           const unsigned int stopStep = MagickMin(startStep+numStepsPerWorkItem, n);
 
           unsigned int cacheIndex = start+startStep-cacheRangeStartY;
-          if (matte == 0) {
-
-            for (unsigned int i = startStep; i < stopStep; i++,cacheIndex++) {
-              float4 cp = convert_float4(inputImageCache[cacheIndex]);
-
-              float weight = getResizeFilterWeight(resizeFilterCubicCoefficients,(ResizeWeightingFunctionType)resizeFilterType
-                , (ResizeWeightingFunctionType)resizeWindowType
-                , resizeFilterScale, resizeFilterWindowSupport, resizeFilterBlur,scale*(start+i-bisect+0.5));
-
-              filteredPixel += ((float4)weight)*cp;
-              density+=weight;
+          for (unsigned int i = startStep; i < stopStep; i++, cacheIndex++)
+          {
+            float weight = getResizeFilterWeight(resizeFilterCubicCoefficients,
+              (ResizeWeightingFunctionType) resizeFilterType,
+              (ResizeWeightingFunctionType) resizeWindowType,
+              resizeFilterScale, resizeFilterWindowSupport,
+              resizeFilterBlur, scale*(start + i - bisect + 0.5));
+
+            float4 cp = (float4)0.0f;
+
+            __local float *p = inputImageCache + cacheIndex;
+            cp.x = *(p);
+            if (number_channels > 2)
+            {
+              cp.y = *(p + rangeLength);
+              cp.z = *(p + (rangeLength * 2));
             }
 
+            if ((number_channels == 4) || (number_channels == 2))
+            {
+              cp.w = *(p + (rangeLength * (number_channels - 1)));
 
-          }
-          else {
-            for (unsigned int i = startStep; i < stopStep; i++,cacheIndex++) {
-              CLPixelType p = inputImageCache[cacheIndex];
-
-              float weight = getResizeFilterWeight(resizeFilterCubicCoefficients,(ResizeWeightingFunctionType)resizeFilterType
-                , (ResizeWeightingFunctionType)resizeWindowType
-                , resizeFilterScale, resizeFilterWindowSupport, resizeFilterBlur,scale*(start+i-bisect+0.5));
-
-              float alpha = weight * QuantumScale * GetPixelAlpha(p);
-              float4 cp = convert_float4(p);
+              float alpha = weight * QuantumScale * cp.w;
 
               filteredPixel.x += alpha * cp.x;
               filteredPixel.y += alpha * cp.y;
               filteredPixel.z += alpha * cp.z;
               filteredPixel.w += weight * cp.w;
-
-              density+=weight;
-              gamma+=alpha;
+              gamma += alpha;
             }
+            else
+              filteredPixel += ((float4) weight)*cp;
+
+            density += weight;
           }
         }
       }
@@ -2940,7 +2931,7 @@ STRINGIFY(
       if (itemID < actualNumPixelInThisChunk) {
         outputPixelCache[itemID] = (float4)0.0f;
         densityCache[itemID] = 0.0f;
-        if (matte != 0)
+        if ((number_channels == 4) || (number_channels == 2))
           gammaCache[itemID] = 0.0f;
       }
       barrier(CLK_LOCAL_MEM_FENCE);
@@ -2951,52 +2942,35 @@ STRINGIFY(
           if (itemID%numItems == i) {
             outputPixelCache[pixelIndex]+=filteredPixel;
             densityCache[pixelIndex]+=density;
-            if (matte!=0) {
+            if ((number_channels == 4) || (number_channels == 2))
               gammaCache[pixelIndex]+=gamma;
-            }
           }
         }
         barrier(CLK_LOCAL_MEM_FENCE);
       }
 
-      if (itemID < actualNumPixelInThisChunk) {
-        if (matte==0) {
-          float density = densityCache[itemID];
-          float4 filteredPixel = outputPixelCache[itemID];
-          if (density!= 0.0f && density != 1.0)
-          {
-            density = PerceptibleReciprocal(density);
-            filteredPixel *= (float4)density;
-          }
-          filteredImage[(chunkStartY+itemID)*filteredColumns+x] = (CLPixelType) (ClampToQuantum(filteredPixel.x)
-                                                                         , ClampToQuantum(filteredPixel.y)
-                                                                         , ClampToQuantum(filteredPixel.z)
-                                                                         , ClampToQuantum(filteredPixel.w));
-        }
-        else {
-          float density = densityCache[itemID];
-          float gamma = gammaCache[itemID];
-          float4 filteredPixel = outputPixelCache[itemID];
-
-          if (density!= 0.0f && density != 1.0) {
-            density = PerceptibleReciprocal(density);
-            filteredPixel *= (float4)density;
-            gamma *= density;
-          }
-          gamma = PerceptibleReciprocal(gamma);
-
-          CLPixelType fp;
-          fp = (CLPixelType) ( ClampToQuantum(gamma*filteredPixel.x)
-            , ClampToQuantum(gamma*filteredPixel.y)
-            , ClampToQuantum(gamma*filteredPixel.z)
-            , ClampToQuantum(filteredPixel.w));
+      if (itemID < actualNumPixelInThisChunk)
+      {
+        float density = densityCache[itemID];
+        float4 filteredPixel = outputPixelCache[itemID];
 
-          filteredImage[(chunkStartY+itemID)*filteredColumns+x] = fp;
+        if ((density != 0.0f) && (density != 1.0f))
+        {
+          density = PerceptibleReciprocal(density);
+          filteredPixel *= (float4) density;
+        }
 
+        if ((number_channels == 4) || (number_channels == 2))
+        {
+          float gamma = PerceptibleReciprocal(gammaCache[itemID]*density);
+          filteredPixel.x *= gamma;
+          filteredPixel.y *= gamma;
+          filteredPixel.z *= gamma;
         }
-      }
 
-    } // end of chunking loop
+        WriteFloat4(filteredImage, number_channels, filteredColumns, x, chunkStartY + itemID, AllChannels, filteredPixel);
+      }
+    }
   }
   )
 
index 88938169297be727fb4fc80837a299304985ee98..f25095083fe20b7ccc9881ff02d7f908b3290c9b 100644 (file)
@@ -4803,13 +4803,11 @@ MagickExport MagickBooleanType AccelerateRandomImage(Image *image,
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 */
 
-static MagickBooleanType resizeHorizontalFilter(cl_mem image,
-  const unsigned int imageColumns,const unsigned int imageRows,
-  const unsigned int matte,cl_mem resizedImage,
-  const unsigned int resizedColumns,const unsigned int resizedRows,
+static MagickBooleanType resizeHorizontalFilter(MagickCLEnv clEnv,
+  cl_command_queue queue,cl_mem image,cl_uint number_channels,cl_uint columns,
+  cl_uint rows,cl_mem resizedImage,cl_uint resizedColumns,cl_uint resizedRows,
   const ResizeFilter *resizeFilter,cl_mem resizeFilterCubicCoefficients,
-  const float xFactor,MagickCLEnv clEnv,cl_command_queue queue,
-  ExceptionInfo *exception)
+  const float xFactor,ExceptionInfo *exception)
 {
   cl_kernel
     horizontalKernel;
@@ -4896,7 +4894,7 @@ RestoreMSCWarning
     cacheRangeStart = (int) (((0 + 0.5)/xFactor+MagickEpsilon)-support+0.5);
     cacheRangeEnd = (int) ((((pixelPerWorkgroup-1) + 0.5)/xFactor+MagickEpsilon)+support+0.5);
     numCachedPixels = cacheRangeEnd - cacheRangeStart + 1;
-    imageCacheLocalMemorySize = numCachedPixels * sizeof(CLPixelPacket);
+    imageCacheLocalMemorySize = numCachedPixels * sizeof(CLQuantum) * number_channels;
     totalLocalMemorySize = imageCacheLocalMemorySize;
 
     /* local size for the pixel accumulator */
@@ -4908,10 +4906,10 @@ RestoreMSCWarning
     totalLocalMemorySize+=weightAccumulatorLocalMemorySize;
 
     /* local memory size for the gamma accumulator */
-    if (matte == 0)
-      gammaAccumulatorLocalMemorySize = sizeof(float);
-    else
+    if ((number_channels == 4) || (number_channels == 2))
       gammaAccumulatorLocalMemorySize = chunkSize * sizeof(float);
+    else
+      gammaAccumulatorLocalMemorySize = sizeof(float);
     totalLocalMemorySize+=gammaAccumulatorLocalMemorySize;
 
     if (totalLocalMemorySize <= deviceLocalMemorySize)
@@ -4941,14 +4939,13 @@ RestoreMSCWarning
 
   i = 0;
   clStatus = clEnv->library->clSetKernelArg(horizontalKernel, i++, sizeof(cl_mem), (void*)&image);
-  clStatus |= clEnv->library->clSetKernelArg(horizontalKernel, i++, sizeof(unsigned int), (void*)&imageColumns);
-  clStatus |= clEnv->library->clSetKernelArg(horizontalKernel, i++, sizeof(unsigned int), (void*)&imageRows);
-  clStatus |= clEnv->library->clSetKernelArg(horizontalKernel, i++, sizeof(unsigned int), (void*)&matte);
-  clStatus |= clEnv->library->clSetKernelArg(horizontalKernel, i++, sizeof(float), (void*)&xFactor);
+  clStatus |= clEnv->library->clSetKernelArg(horizontalKernel, i++, sizeof(cl_uint), (void*)&number_channels);
+  clStatus |= clEnv->library->clSetKernelArg(horizontalKernel, i++, sizeof(cl_uint), (void*)&columns);
+  clStatus |= clEnv->library->clSetKernelArg(horizontalKernel, i++, sizeof(cl_uint), (void*)&rows);
   clStatus |= clEnv->library->clSetKernelArg(horizontalKernel, i++, sizeof(cl_mem), (void*)&resizedImage);
-
-  clStatus |= clEnv->library->clSetKernelArg(horizontalKernel, i++, sizeof(unsigned int), (void*)&resizedColumns);
-  clStatus |= clEnv->library->clSetKernelArg(horizontalKernel, i++, sizeof(unsigned int), (void*)&resizedRows);
+  clStatus |= clEnv->library->clSetKernelArg(horizontalKernel, i++, sizeof(cl_uint), (void*)&resizedColumns);
+  clStatus |= clEnv->library->clSetKernelArg(horizontalKernel, i++, sizeof(cl_uint), (void*)&resizedRows);
+  clStatus |= clEnv->library->clSetKernelArg(horizontalKernel, i++, sizeof(float), (void*)&xFactor);
 
   clStatus |= clEnv->library->clSetKernelArg(horizontalKernel, i++, sizeof(int), (void*)&resizeFilterType);
   clStatus |= clEnv->library->clSetKernelArg(horizontalKernel, i++, sizeof(int), (void*)&resizeWindowType);
@@ -5009,13 +5006,11 @@ cleanup:
   return(status);
 }
 
-static MagickBooleanType resizeVerticalFilter(cl_mem image,
-  const unsigned int imageColumns,const unsigned int imageRows,
-  const unsigned int matte,cl_mem resizedImage,
-  const unsigned int resizedColumns,const unsigned int resizedRows,
+static MagickBooleanType resizeVerticalFilter(MagickCLEnv clEnv,
+  cl_command_queue queue,cl_mem image,cl_uint number_channels,cl_uint columns,
+  cl_uint rows,cl_mem resizedImage,cl_uint resizedColumns,cl_uint resizedRows,
   const ResizeFilter *resizeFilter,cl_mem resizeFilterCubicCoefficients,
-  const float yFactor,MagickCLEnv clEnv,cl_command_queue queue,
-  ExceptionInfo *exception)
+  const float yFactor,ExceptionInfo *exception)
 {
   cl_kernel
     verticalKernel;
@@ -5102,7 +5097,7 @@ RestoreMSCWarning
     cacheRangeStart = (int) (((0 + 0.5)/yFactor+MagickEpsilon)-support+0.5);
     cacheRangeEnd = (int) ((((pixelPerWorkgroup-1) + 0.5)/yFactor+MagickEpsilon)+support+0.5);
     numCachedPixels = cacheRangeEnd - cacheRangeStart + 1;
-    imageCacheLocalMemorySize = numCachedPixels * sizeof(CLPixelPacket);
+    imageCacheLocalMemorySize = numCachedPixels * sizeof(CLQuantum) * number_channels;
     totalLocalMemorySize = imageCacheLocalMemorySize;
 
     /* local size for the pixel accumulator */
@@ -5114,10 +5109,10 @@ RestoreMSCWarning
     totalLocalMemorySize+=weightAccumulatorLocalMemorySize;
 
     /* local memory size for the gamma accumulator */
-    if (matte == 0)
-      gammaAccumulatorLocalMemorySize = sizeof(float);
-    else
+    if ((number_channels == 4) || (number_channels == 2))
       gammaAccumulatorLocalMemorySize = chunkSize * sizeof(float);
+    else
+      gammaAccumulatorLocalMemorySize = sizeof(float);
     totalLocalMemorySize+=gammaAccumulatorLocalMemorySize;
 
     if (totalLocalMemorySize <= deviceLocalMemorySize)
@@ -5147,14 +5142,13 @@ RestoreMSCWarning
 
   i = 0;
   clStatus = clEnv->library->clSetKernelArg(verticalKernel, i++, sizeof(cl_mem), (void*)&image);
-  clStatus |= clEnv->library->clSetKernelArg(verticalKernel, i++, sizeof(unsigned int), (void*)&imageColumns);
-  clStatus |= clEnv->library->clSetKernelArg(verticalKernel, i++, sizeof(unsigned int), (void*)&imageRows);
-  clStatus |= clEnv->library->clSetKernelArg(verticalKernel, i++, sizeof(unsigned int), (void*)&matte);
-  clStatus |= clEnv->library->clSetKernelArg(verticalKernel, i++, sizeof(float), (void*)&yFactor);
+  clStatus |= clEnv->library->clSetKernelArg(verticalKernel, i++, sizeof(cl_uint), (void*)&number_channels);
+  clStatus |= clEnv->library->clSetKernelArg(verticalKernel, i++, sizeof(cl_uint), (void*)&columns);
+  clStatus |= clEnv->library->clSetKernelArg(verticalKernel, i++, sizeof(cl_uint), (void*)&rows);
   clStatus |= clEnv->library->clSetKernelArg(verticalKernel, i++, sizeof(cl_mem), (void*)&resizedImage);
-
-  clStatus |= clEnv->library->clSetKernelArg(verticalKernel, i++, sizeof(unsigned int), (void*)&resizedColumns);
-  clStatus |= clEnv->library->clSetKernelArg(verticalKernel, i++, sizeof(unsigned int), (void*)&resizedRows);
+  clStatus |= clEnv->library->clSetKernelArg(verticalKernel, i++, sizeof(cl_uint), (void*)&resizedColumns);
+  clStatus |= clEnv->library->clSetKernelArg(verticalKernel, i++, sizeof(cl_uint), (void*)&resizedRows);
+  clStatus |= clEnv->library->clSetKernelArg(verticalKernel, i++, sizeof(float), (void*)&yFactor);
 
   clStatus |= clEnv->library->clSetKernelArg(verticalKernel, i++, sizeof(int), (void*)&resizeFilterType);
   clStatus |= clEnv->library->clSetKernelArg(verticalKernel, i++, sizeof(int), (void*)&resizeWindowType);
@@ -5237,15 +5231,12 @@ static Image *ComputeResizeImage(const Image* image,
     imageBuffer,
     tempImageBuffer;
 
-  cl_mem_flags
-    mem_flags;
+  cl_uint
+    number_channels;
 
   const double
     *resizeFilterCoefficient;
 
-  const void
-    *inputPixels;
-
   float
     *mappedCoefficientBuffer,
     xFactor,
@@ -5265,12 +5256,10 @@ static Image *ComputeResizeImage(const Image* image,
     *filteredImage;
 
   unsigned int
-    i,
-    matte;
+    i;
 
   void
-    *filteredPixels,
-    *hostPtr;
+    *filteredPixels;
 
   outputReady = MagickFalse;
   filteredImage = NULL;
@@ -5280,40 +5269,17 @@ static Image *ComputeResizeImage(const Image* image,
   imageBuffer = NULL;
   tempImageBuffer = NULL;
   filteredImageBuffer = NULL;
+  filteredPixels = NULL;
   cubicCoefficientsBuffer = NULL;
   queue = NULL;
 
   clEnv = GetDefaultOpenCLEnv();
   context = GetOpenCLContext(clEnv);
 
-  /* Create and initialize OpenCL buffers. */
-  image_view=AcquireVirtualCacheView(image,exception);
-  inputPixels=GetCacheViewVirtualPixels(image_view,0,0,image->columns,image->rows,exception);
-  if (inputPixels == (const void *) NULL)
-  {
-    (void) OpenCLThrowMagickException(exception,GetMagickModule(),CacheWarning,"UnableToReadPixelCache.","`%s'",image->filename);
+  image_view = AcquireVirtualCacheView(image, exception);
+  imageBuffer=createReadBuffer(image,image_view,clEnv,context,exception);
+  if (imageBuffer == (cl_mem) NULL)
     goto cleanup;
-  }
-
-  /* If the host pointer is aligned to the size of CLPixelPacket, 
-     then use the host buffer directly from the GPU; otherwise, 
-     create a buffer on the GPU and copy the data over */
-  if (ALIGNED(inputPixels,CLPixelPacket)) 
-  {
-    mem_flags = CL_MEM_READ_ONLY|CL_MEM_USE_HOST_PTR;
-  }
-  else 
-  {
-    mem_flags = CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR;
-  }
-  /* create a CL buffer from image pixel buffer */
-  length = image->columns * image->rows;
-  imageBuffer = clEnv->library->clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus);
-  if (clStatus != CL_SUCCESS)
-  {
-    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
-    goto cleanup;
-  }
 
   cubicCoefficientsBuffer = clEnv->library->clCreateBuffer(context, CL_MEM_READ_ONLY, 7 * sizeof(float), NULL, &clStatus);
   if (clStatus != CL_SUCCESS)
@@ -5342,108 +5308,74 @@ static Image *ComputeResizeImage(const Image* image,
   }
 
   filteredImage = CloneImage(image,resizedColumns,resizedRows,MagickTrue,exception);
-  if (filteredImage == NULL)
+  if (filteredImage == (Image *) NULL)
     goto cleanup;
-
-  if (SetImageStorageClass(filteredImage,DirectClass,exception) != MagickTrue)
+  if (SetImageStorageClass(filteredImage, DirectClass, exception) != MagickTrue)
   {
     (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "CloneImage failed.", "'%s'", ".");
     goto cleanup;
   }
-  filteredImage_view=AcquireAuthenticCacheView(filteredImage,exception);
-  filteredPixels=GetCacheViewAuthenticPixels(filteredImage_view,0,0,filteredImage->columns,filteredImage->rows,exception);
-  if (filteredPixels == (void *) NULL)
-  {
-    (void) OpenCLThrowMagickException(exception,GetMagickModule(),CacheWarning, "UnableToReadPixelCache.","`%s'",filteredImage->filename);
-    goto cleanup;
-  }
-
-  if (ALIGNED(filteredPixels,CLPixelPacket)) 
-  {
-    mem_flags = CL_MEM_WRITE_ONLY|CL_MEM_USE_HOST_PTR;
-    hostPtr = filteredPixels;
-  }
-  else 
-  {
-    mem_flags = CL_MEM_WRITE_ONLY;
-    hostPtr = NULL;
-  }
-
-  /* create a CL buffer from image pixel buffer */
-  length = filteredImage->columns * filteredImage->rows;
-  filteredImageBuffer = clEnv->library->clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), hostPtr, &clStatus);
-  if (clStatus != CL_SUCCESS)
-  {
-    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
+  filteredImage_view = AcquireAuthenticCacheView(filteredImage, exception);
+  filteredImageBuffer=createWriteBuffer(filteredImage,filteredImage_view,clEnv,
+    context,filteredPixels,exception);
+  if (filteredImageBuffer == (cl_mem) NULL)
     goto cleanup;
-  }
 
+  number_channels = image->number_channels;
   xFactor=(float) resizedColumns/(float) image->columns;
   yFactor=(float) resizedRows/(float) image->rows;
-  matte=(image->alpha_trait > CopyPixelTrait)?1:0;
   if (xFactor > yFactor)
   {
-
-    length = resizedColumns*image->rows;
-    tempImageBuffer = clEnv->library->clCreateBuffer(context, CL_MEM_READ_WRITE, length*sizeof(CLPixelPacket), NULL, &clStatus);
+    length = resizedColumns*image->rows*number_channels;
+    tempImageBuffer = clEnv->library->clCreateBuffer(context, CL_MEM_READ_WRITE, length*sizeof(CLQuantum), NULL, &clStatus);
     if (clStatus != CL_SUCCESS)
     {
       (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
       goto cleanup;
     }
-    
-    status = resizeHorizontalFilter(imageBuffer, (unsigned int) image->columns, (unsigned int) image->rows, matte
-          , tempImageBuffer, (unsigned int) resizedColumns, (unsigned int) image->rows
-          , resizeFilter, cubicCoefficientsBuffer
-          , xFactor, clEnv, queue, exception);
+
+    status = resizeHorizontalFilter(clEnv,queue,imageBuffer,number_channels,
+      (cl_uint) image->columns,(cl_uint) image->rows,tempImageBuffer,
+      (cl_uint) resizedColumns,(cl_uint) image->rows,resizeFilter,
+      cubicCoefficientsBuffer,xFactor,exception);
     if (status != MagickTrue)
       goto cleanup;
-    
-    status = resizeVerticalFilter(tempImageBuffer, (unsigned int) resizedColumns, (unsigned int) image->rows, matte
-       , filteredImageBuffer, (unsigned int) resizedColumns, (unsigned int) resizedRows
-       , resizeFilter, cubicCoefficientsBuffer
-       , yFactor, clEnv, queue, exception);
+
+    status = resizeVerticalFilter(clEnv,queue,tempImageBuffer,number_channels,
+      (cl_uint) resizedColumns,(cl_uint) image->rows,filteredImageBuffer,
+      (cl_uint) resizedColumns,(cl_uint) resizedRows,resizeFilter,
+      cubicCoefficientsBuffer,yFactor,exception);
     if (status != MagickTrue)
       goto cleanup;
   }
   else
   {
-    length = image->columns*resizedRows;
-    tempImageBuffer = clEnv->library->clCreateBuffer(context, CL_MEM_READ_WRITE, length*sizeof(CLPixelPacket), NULL, &clStatus);
+    length = image->columns*resizedRows*number_channels;
+    tempImageBuffer = clEnv->library->clCreateBuffer(context, CL_MEM_READ_WRITE, length*sizeof(CLQuantum), NULL, &clStatus);
     if (clStatus != CL_SUCCESS)
     {
       (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
       goto cleanup;
     }
 
-    status = resizeVerticalFilter(imageBuffer, (unsigned int) image->columns, (unsigned int) image->rows, matte
-       , tempImageBuffer, (unsigned int) image->columns, (unsigned int) resizedRows
-       , resizeFilter, cubicCoefficientsBuffer
-       , yFactor, clEnv, queue, exception);
+    status = resizeVerticalFilter(clEnv,queue,imageBuffer,number_channels,
+      (cl_uint) image->columns,(cl_int) image->rows,tempImageBuffer,
+      (cl_uint) image->columns,(cl_uint) resizedRows,resizeFilter,
+      cubicCoefficientsBuffer,yFactor,exception);
     if (status != MagickTrue)
       goto cleanup;
 
-    status = resizeHorizontalFilter(tempImageBuffer, (unsigned int) image->columns, (unsigned int) resizedRows, matte
-       , filteredImageBuffer, (unsigned int) resizedColumns, (unsigned int) resizedRows
-       , resizeFilter, cubicCoefficientsBuffer
-       , xFactor, clEnv, queue, exception);
+    status = resizeHorizontalFilter(clEnv,queue,tempImageBuffer,number_channels,
+      (cl_uint) image->columns, (cl_uint) resizedRows,filteredImageBuffer,
+      (cl_uint) resizedColumns, (cl_uint) resizedRows,resizeFilter,
+      cubicCoefficientsBuffer,xFactor,exception);
     if (status != MagickTrue)
       goto cleanup;
   }
-  length = resizedColumns*resizedRows;
-  if (ALIGNED(filteredPixels,CLPixelPacket)) 
-  {
-    clEnv->library->clEnqueueMapBuffer(queue, filteredImageBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(CLPixelPacket), 0, NULL, NULL, &clStatus);
-  }
-  else 
-  {
-    clStatus = clEnv->library->clEnqueueReadBuffer(queue, filteredImageBuffer, CL_TRUE, 0, length * sizeof(CLPixelPacket), filteredPixels, 0, NULL, NULL);
-  }
-  if (clStatus != CL_SUCCESS)
-  {
-    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "Reading output image from CL buffer failed.", "'%s'", ".");
+
+  if (copyWriteBuffer(filteredImage,clEnv,queue,filteredImageBuffer,filteredPixels,exception) == MagickFalse)
     goto cleanup;
-  }
+
   outputReady=SyncCacheViewAuthenticPixels(filteredImage_view,exception);
 
 cleanup:
@@ -5489,7 +5421,7 @@ MagickExport Image *AccelerateResizeImage(const Image *image,
   assert(image != NULL);
   assert(exception != (ExceptionInfo *) NULL);
 
-  if ((checkAccelerateConditionRGBA(image) == MagickFalse) ||
+  if ((checkAccelerateCondition(image) == MagickFalse) ||
       (checkOpenCLEnvironment(exception) == MagickFalse))
     return NULL;
 
@@ -6070,7 +6002,7 @@ static Image *ComputeUnsharpMaskImage(const Image *image,const double radius,
   }
 
   /* get result */
-  if (copyWriteBuffer(image,clEnv,queue,filteredImageBuffer,filteredPixels,exception) == MagickFalse)
+  if (copyWriteBuffer(filteredImage,clEnv,queue,filteredImageBuffer,filteredPixels,exception) == MagickFalse)
   {
     (void)OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "Reading output image from CL buffer failed.", "'%s'", ".");
     goto cleanup;