From: dirk Date: Thu, 7 Apr 2016 20:40:20 +0000 (+0200) Subject: AccelerateResizeImage now supports R/RA/RGB images. X-Git-Tag: 7.0.1-0~58 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=25cc6903d104376b6815ceb97ed13fb56548d30c;p=imagemagick AccelerateResizeImage now supports R/RA/RGB images. --- diff --git a/MagickCore/accelerate-private.h b/MagickCore/accelerate-private.h index b227b239d..66fe4ecb2 100644 --- a/MagickCore/accelerate-private.h +++ b/MagickCore/accelerate-private.h @@ -54,7 +54,6 @@ const char* accelerateKernels = /* Define declarations. */ - OPENCL_DEFINE(GetPixelAlpha(pixel), pixel.w) OPENCL_DEFINE(SigmaUniform, (attenuate*0.015625f)) OPENCL_DEFINE(SigmaGaussian, (attenuate*0.015625f)) OPENCL_DEFINE(SigmaImpulse, (attenuate*0.1f)) @@ -367,12 +366,12 @@ OPENCL_ENDIF() inline float getPixelRed(const __global CLQuantum *p) { return (float)*p; } inline float getPixelGreen(const __global CLQuantum *p) { return (float)*(p+1); } inline float getPixelBlue(const __global CLQuantum *p) { return (float)*(p+2); } - inline float getPixelAlpha(const __global CLQuantum *p) { return (float)*(p+3); } + inline float getPixelAlpha(const __global CLQuantum *p,const unsigned int number_channels) { return (float)*(p+number_channels-1); } inline void setPixelRed(__global CLQuantum *p,const CLQuantum value) { *p=value; } inline void setPixelGreen(__global CLQuantum *p,const CLQuantum value) { *(p+1)=value; } inline void setPixelBlue(__global CLQuantum *p,const CLQuantum value) { *(p+2)=value; } - inline void setPixelAlpha(__global CLQuantum *p,const CLQuantum value) { *(p+3)=value; } + inline void setPixelAlpha(__global CLQuantum *p,const unsigned int number_channels,const CLQuantum value) { *(p+number_channels-1)=value; } inline CLQuantum getBlue(CLPixelType p) { return p.x; } inline void setBlue(CLPixelType* p, CLQuantum value) { (*p).x = value; } @@ -411,7 +410,7 @@ OPENCL_ENDIF() if (((number_channels == 4) || (number_channels == 2)) && ((channel & AlphaChannel) != 0)) - *alpha=getPixelAlpha(p); + *alpha=getPixelAlpha(p,number_channels); } inline float4 ReadFloat4(const __global CLQuantum *image, const unsigned int number_channels, @@ -445,7 +444,7 @@ OPENCL_ENDIF() if (((number_channels == 4) || (number_channels == 2)) && ((channel & AlphaChannel) != 0)) - setPixelAlpha(p,ClampToQuantum(alpha)); + setPixelAlpha(p,number_channels,ClampToQuantum(alpha)); } inline void WriteFloat4(__global CLQuantum *image, const unsigned int number_channels, @@ -2662,16 +2661,16 @@ STRINGIFY( ) STRINGIFY( - __kernel __attribute__((reqd_work_group_size(256, 1, 1))) - void ResizeHorizontalFilter(const __global CLPixelType* inputImage, const unsigned int inputColumns, const unsigned int inputRows, const unsigned int matte - , const float xFactor, __global CLPixelType* filteredImage, const unsigned int filteredColumns, const unsigned int filteredRows - , const int resizeFilterType, const int resizeWindowType - , const __global float* resizeFilterCubicCoefficients - , const float resizeFilterScale, const float resizeFilterSupport, const float resizeFilterWindowSupport, const float resizeFilterBlur - , __local CLPixelType* inputImageCache, const int numCachedPixels, const unsigned int pixelPerWorkgroup, const unsigned int pixelChunkSize - , __local float4* outputPixelCache, __local float* densityCache, __local float* gammaCache) { - - + __kernel __attribute__((reqd_work_group_size(256, 1, 1))) + void ResizeHorizontalFilter(const __global CLQuantum *inputImage, const unsigned int number_channels, + const unsigned int inputColumns, const unsigned int inputRows, __global CLQuantum *filteredImage, + const unsigned int filteredColumns, const unsigned int filteredRows, const float xFactor, + const int resizeFilterType, const int resizeWindowType, const __global float *resizeFilterCubicCoefficients, + const float resizeFilterScale, const float resizeFilterSupport, const float resizeFilterWindowSupport, + const float resizeFilterBlur, __local CLQuantum *inputImageCache, const int numCachedPixels, + const unsigned int pixelPerWorkgroup, const unsigned int pixelChunkSize, + __local float4 *outputPixelCache, __local float *densityCache, __local float *gammaCache) + { // calculate the range of resized image pixels computed by this workgroup const unsigned int startX = get_group_id(0)*pixelPerWorkgroup; const unsigned int stopX = MagickMin(startX + pixelPerWorkgroup,filteredColumns); @@ -2687,13 +2686,14 @@ STRINGIFY( // cache the input pixels into local memory const unsigned int y = get_global_id(1); - event_t e = async_work_group_copy(inputImageCache,inputImage+y*inputColumns+cacheRangeStartX,cacheRangeEndX-cacheRangeStartX,0); - wait_group_events(1,&e); + const unsigned int pos = getPixelIndex(number_channels, inputColumns, cacheRangeStartX, y); + const unsigned int num_elements = (cacheRangeEndX - cacheRangeStartX) * number_channels; + event_t e = async_work_group_copy(inputImageCache, inputImage + pos, num_elements, 0); + wait_group_events(1, &e); unsigned int totalNumChunks = (actualNumPixelToCompute+pixelChunkSize-1)/pixelChunkSize; for (unsigned int chunk = 0; chunk < totalNumChunks; chunk++) { - const unsigned int chunkStartX = startX + chunk*pixelChunkSize; const unsigned int chunkStopX = MagickMin(chunkStartX + pixelChunkSize, stopX); const unsigned int actualNumPixelInThisChunk = chunkStopX - chunkStartX; @@ -2701,15 +2701,15 @@ STRINGIFY( // determine which resized pixel computed by this workitem const unsigned int itemID = get_local_id(0); const unsigned int numItems = getNumWorkItemsPerPixel(actualNumPixelInThisChunk, get_local_size(0)); - + const int pixelIndex = pixelToCompute(itemID, actualNumPixelInThisChunk, get_local_size(0)); float4 filteredPixel = (float4)0.0f; float density = 0.0f; float gamma = 0.0f; // -1 means this workitem doesn't participate in the computation - if (pixelIndex != -1) { - + if (pixelIndex != -1) + { // x coordinated of the resized pixel computed by this workitem const int x = chunkStartX + pixelIndex; @@ -2724,44 +2724,46 @@ STRINGIFY( numStepsPerWorkItem += ((numItems*numStepsPerWorkItem)==n?0:1); const unsigned int startStep = (itemID%numItems)*numStepsPerWorkItem; - if (startStep < n) { + if (startStep < n) + { const unsigned int stopStep = MagickMin(startStep+numStepsPerWorkItem, n); unsigned int cacheIndex = start+startStep-cacheRangeStartX; - if (matte == 0) { - for (unsigned int i = startStep; i < stopStep; i++,cacheIndex++) { - float4 cp = convert_float4(inputImageCache[cacheIndex]); - - float weight = getResizeFilterWeight(resizeFilterCubicCoefficients,(ResizeWeightingFunctionType)resizeFilterType - , (ResizeWeightingFunctionType)resizeWindowType - , resizeFilterScale, resizeFilterWindowSupport, resizeFilterBlur,scale*(start+i-bisect+0.5)); - - filteredPixel += ((float4)weight)*cp; - density+=weight; + for (unsigned int i = startStep; i < stopStep; i++, cacheIndex++) + { + float weight = getResizeFilterWeight(resizeFilterCubicCoefficients, + (ResizeWeightingFunctionType) resizeFilterType, + (ResizeWeightingFunctionType) resizeWindowType, + resizeFilterScale, resizeFilterWindowSupport, + resizeFilterBlur, scale*(start + i - bisect + 0.5)); + + float4 cp = (float4) 0; + + __local float *p = inputImageCache + (cacheIndex*number_channels); + cp.x = *(p); + if (number_channels > 2) + { + cp.y = *(p + 1); + cp.z = *(p + 2); } + if ((number_channels == 4) || (number_channels == 2)) + { + cp.w = *(p + number_channels - 1); - } - else { - for (unsigned int i = startStep; i < stopStep; i++,cacheIndex++) { - CLPixelType p = inputImageCache[cacheIndex]; - - float weight = getResizeFilterWeight(resizeFilterCubicCoefficients,(ResizeWeightingFunctionType)resizeFilterType - , (ResizeWeightingFunctionType)resizeWindowType - , resizeFilterScale, resizeFilterWindowSupport, resizeFilterBlur,scale*(start+i-bisect+0.5)); - - float alpha = weight * QuantumScale * GetPixelAlpha(p); - float4 cp = convert_float4(p); + float alpha = weight * QuantumScale * cp.w; filteredPixel.x += alpha * cp.x; filteredPixel.y += alpha * cp.y; filteredPixel.z += alpha * cp.z; filteredPixel.w += weight * cp.w; - - density+=weight; - gamma+=alpha; + gamma += alpha; } + else + filteredPixel += ((float4) weight)*cp; + + density += weight; } } } @@ -2770,7 +2772,7 @@ STRINGIFY( if (itemID < actualNumPixelInThisChunk) { outputPixelCache[itemID] = (float4)0.0f; densityCache[itemID] = 0.0f; - if (matte != 0) + if ((number_channels == 4) || (number_channels == 2)) gammaCache[itemID] = 0.0f; } barrier(CLK_LOCAL_MEM_FENCE); @@ -2781,67 +2783,50 @@ STRINGIFY( if (itemID%numItems == i) { outputPixelCache[pixelIndex]+=filteredPixel; densityCache[pixelIndex]+=density; - if (matte!=0) { + if ((number_channels == 4) || (number_channels == 2)) gammaCache[pixelIndex]+=gamma; - } } } barrier(CLK_LOCAL_MEM_FENCE); } - if (itemID < actualNumPixelInThisChunk) { - if (matte==0) { - float density = densityCache[itemID]; - float4 filteredPixel = outputPixelCache[itemID]; - if (density!= 0.0f && density != 1.0) - { - density = PerceptibleReciprocal(density); - filteredPixel *= (float4)density; - } - filteredImage[y*filteredColumns+chunkStartX+itemID] = (CLPixelType) (ClampToQuantum(filteredPixel.x) - , ClampToQuantum(filteredPixel.y) - , ClampToQuantum(filteredPixel.z) - , ClampToQuantum(filteredPixel.w)); - } - else { - float density = densityCache[itemID]; - float gamma = gammaCache[itemID]; - float4 filteredPixel = outputPixelCache[itemID]; - - if (density!= 0.0f && density != 1.0) { - density = PerceptibleReciprocal(density); - filteredPixel *= (float4)density; - gamma *= density; - } - gamma = PerceptibleReciprocal(gamma); - - CLPixelType fp; - fp = (CLPixelType) ( ClampToQuantum(gamma*filteredPixel.x) - , ClampToQuantum(gamma*filteredPixel.y) - , ClampToQuantum(gamma*filteredPixel.z) - , ClampToQuantum(filteredPixel.w)); + if (itemID < actualNumPixelInThisChunk) + { + float density = densityCache[itemID]; + float4 filteredPixel = outputPixelCache[itemID]; - filteredImage[y*filteredColumns+chunkStartX+itemID] = fp; + if ((density != 0.0f) && (density != 1.0f)) + { + density = PerceptibleReciprocal(density); + filteredPixel *= (float4) density; + } + if ((number_channels == 4) || (number_channels == 2)) + { + float gamma = PerceptibleReciprocal(gammaCache[itemID]*density); + filteredPixel.x *= gamma; + filteredPixel.y *= gamma; + filteredPixel.z *= gamma; } - } - } // end of chunking loop + WriteFloat4(filteredImage, number_channels, filteredColumns, chunkStartX + itemID, y, AllChannels, filteredPixel); + } + } } ) STRINGIFY( __kernel __attribute__((reqd_work_group_size(1, 256, 1))) - void ResizeVerticalFilter(const __global CLPixelType* inputImage, const unsigned int inputColumns, const unsigned int inputRows, const unsigned int matte - , const float yFactor, __global CLPixelType* filteredImage, const unsigned int filteredColumns, const unsigned int filteredRows - , const int resizeFilterType, const int resizeWindowType - , const __global float* resizeFilterCubicCoefficients - , const float resizeFilterScale, const float resizeFilterSupport, const float resizeFilterWindowSupport, const float resizeFilterBlur - , __local CLPixelType* inputImageCache, const int numCachedPixels, const unsigned int pixelPerWorkgroup, const unsigned int pixelChunkSize - , __local float4* outputPixelCache, __local float* densityCache, __local float* gammaCache) { - - + void ResizeVerticalFilter(const __global CLQuantum *inputImage, const unsigned int number_channels, + const unsigned int inputColumns, const unsigned int inputRows, __global CLQuantum *filteredImage, + const unsigned int filteredColumns, const unsigned int filteredRows, const float yFactor, + const int resizeFilterType, const int resizeWindowType, const __global float *resizeFilterCubicCoefficients, + const float resizeFilterScale, const float resizeFilterSupport, const float resizeFilterWindowSupport, + const float resizeFilterBlur, __local CLQuantum *inputImageCache, const int numCachedPixels, + const unsigned int pixelPerWorkgroup, const unsigned int pixelChunkSize, + __local float4 *outputPixelCache, __local float *densityCache, __local float *gammaCache) + { // calculate the range of resized image pixels computed by this workgroup const unsigned int startY = get_group_id(1)*pixelPerWorkgroup; const unsigned int stopY = MagickMin(startY + pixelPerWorkgroup,filteredRows); @@ -2857,13 +2842,18 @@ STRINGIFY( // cache the input pixels into local memory const unsigned int x = get_global_id(0); - event_t e = async_work_group_strided_copy(inputImageCache, inputImage+cacheRangeStartY*inputColumns+x, cacheRangeEndY-cacheRangeStartY, inputColumns, 0); - wait_group_events(1,&e); + unsigned int pos = getPixelIndex(number_channels, inputColumns, x, cacheRangeStartY); + unsigned int rangeLength = cacheRangeEndY-cacheRangeStartY; + unsigned int stride = inputColumns * number_channels; + for (unsigned int i = 0; i < number_channels; i++) + { + event_t e = async_work_group_strided_copy(inputImageCache + (rangeLength*i), inputImage+pos+i, rangeLength, stride, 0); + wait_group_events(1,&e); + } unsigned int totalNumChunks = (actualNumPixelToCompute+pixelChunkSize-1)/pixelChunkSize; for (unsigned int chunk = 0; chunk < totalNumChunks; chunk++) { - const unsigned int chunkStartY = startY + chunk*pixelChunkSize; const unsigned int chunkStopY = MagickMin(chunkStartY + pixelChunkSize, stopY); const unsigned int actualNumPixelInThisChunk = chunkStopY - chunkStartY; @@ -2871,15 +2861,15 @@ STRINGIFY( // determine which resized pixel computed by this workitem const unsigned int itemID = get_local_id(1); const unsigned int numItems = getNumWorkItemsPerPixel(actualNumPixelInThisChunk, get_local_size(1)); - + const int pixelIndex = pixelToCompute(itemID, actualNumPixelInThisChunk, get_local_size(1)); float4 filteredPixel = (float4)0.0f; float density = 0.0f; float gamma = 0.0f; // -1 means this workitem doesn't participate in the computation - if (pixelIndex != -1) { - + if (pixelIndex != -1) + { // x coordinated of the resized pixel computed by this workitem const int y = chunkStartY + pixelIndex; @@ -2894,44 +2884,45 @@ STRINGIFY( numStepsPerWorkItem += ((numItems*numStepsPerWorkItem)==n?0:1); const unsigned int startStep = (itemID%numItems)*numStepsPerWorkItem; - if (startStep < n) { + if (startStep < n) + { const unsigned int stopStep = MagickMin(startStep+numStepsPerWorkItem, n); unsigned int cacheIndex = start+startStep-cacheRangeStartY; - if (matte == 0) { - - for (unsigned int i = startStep; i < stopStep; i++,cacheIndex++) { - float4 cp = convert_float4(inputImageCache[cacheIndex]); - - float weight = getResizeFilterWeight(resizeFilterCubicCoefficients,(ResizeWeightingFunctionType)resizeFilterType - , (ResizeWeightingFunctionType)resizeWindowType - , resizeFilterScale, resizeFilterWindowSupport, resizeFilterBlur,scale*(start+i-bisect+0.5)); - - filteredPixel += ((float4)weight)*cp; - density+=weight; + for (unsigned int i = startStep; i < stopStep; i++, cacheIndex++) + { + float weight = getResizeFilterWeight(resizeFilterCubicCoefficients, + (ResizeWeightingFunctionType) resizeFilterType, + (ResizeWeightingFunctionType) resizeWindowType, + resizeFilterScale, resizeFilterWindowSupport, + resizeFilterBlur, scale*(start + i - bisect + 0.5)); + + float4 cp = (float4)0.0f; + + __local float *p = inputImageCache + cacheIndex; + cp.x = *(p); + if (number_channels > 2) + { + cp.y = *(p + rangeLength); + cp.z = *(p + (rangeLength * 2)); } + if ((number_channels == 4) || (number_channels == 2)) + { + cp.w = *(p + (rangeLength * (number_channels - 1))); - } - else { - for (unsigned int i = startStep; i < stopStep; i++,cacheIndex++) { - CLPixelType p = inputImageCache[cacheIndex]; - - float weight = getResizeFilterWeight(resizeFilterCubicCoefficients,(ResizeWeightingFunctionType)resizeFilterType - , (ResizeWeightingFunctionType)resizeWindowType - , resizeFilterScale, resizeFilterWindowSupport, resizeFilterBlur,scale*(start+i-bisect+0.5)); - - float alpha = weight * QuantumScale * GetPixelAlpha(p); - float4 cp = convert_float4(p); + float alpha = weight * QuantumScale * cp.w; filteredPixel.x += alpha * cp.x; filteredPixel.y += alpha * cp.y; filteredPixel.z += alpha * cp.z; filteredPixel.w += weight * cp.w; - - density+=weight; - gamma+=alpha; + gamma += alpha; } + else + filteredPixel += ((float4) weight)*cp; + + density += weight; } } } @@ -2940,7 +2931,7 @@ STRINGIFY( if (itemID < actualNumPixelInThisChunk) { outputPixelCache[itemID] = (float4)0.0f; densityCache[itemID] = 0.0f; - if (matte != 0) + if ((number_channels == 4) || (number_channels == 2)) gammaCache[itemID] = 0.0f; } barrier(CLK_LOCAL_MEM_FENCE); @@ -2951,52 +2942,35 @@ STRINGIFY( if (itemID%numItems == i) { outputPixelCache[pixelIndex]+=filteredPixel; densityCache[pixelIndex]+=density; - if (matte!=0) { + if ((number_channels == 4) || (number_channels == 2)) gammaCache[pixelIndex]+=gamma; - } } } barrier(CLK_LOCAL_MEM_FENCE); } - if (itemID < actualNumPixelInThisChunk) { - if (matte==0) { - float density = densityCache[itemID]; - float4 filteredPixel = outputPixelCache[itemID]; - if (density!= 0.0f && density != 1.0) - { - density = PerceptibleReciprocal(density); - filteredPixel *= (float4)density; - } - filteredImage[(chunkStartY+itemID)*filteredColumns+x] = (CLPixelType) (ClampToQuantum(filteredPixel.x) - , ClampToQuantum(filteredPixel.y) - , ClampToQuantum(filteredPixel.z) - , ClampToQuantum(filteredPixel.w)); - } - else { - float density = densityCache[itemID]; - float gamma = gammaCache[itemID]; - float4 filteredPixel = outputPixelCache[itemID]; - - if (density!= 0.0f && density != 1.0) { - density = PerceptibleReciprocal(density); - filteredPixel *= (float4)density; - gamma *= density; - } - gamma = PerceptibleReciprocal(gamma); - - CLPixelType fp; - fp = (CLPixelType) ( ClampToQuantum(gamma*filteredPixel.x) - , ClampToQuantum(gamma*filteredPixel.y) - , ClampToQuantum(gamma*filteredPixel.z) - , ClampToQuantum(filteredPixel.w)); + if (itemID < actualNumPixelInThisChunk) + { + float density = densityCache[itemID]; + float4 filteredPixel = outputPixelCache[itemID]; - filteredImage[(chunkStartY+itemID)*filteredColumns+x] = fp; + if ((density != 0.0f) && (density != 1.0f)) + { + density = PerceptibleReciprocal(density); + filteredPixel *= (float4) density; + } + if ((number_channels == 4) || (number_channels == 2)) + { + float gamma = PerceptibleReciprocal(gammaCache[itemID]*density); + filteredPixel.x *= gamma; + filteredPixel.y *= gamma; + filteredPixel.z *= gamma; } - } - } // end of chunking loop + WriteFloat4(filteredImage, number_channels, filteredColumns, x, chunkStartY + itemID, AllChannels, filteredPixel); + } + } } ) diff --git a/MagickCore/accelerate.c b/MagickCore/accelerate.c index 889381692..f25095083 100644 --- a/MagickCore/accelerate.c +++ b/MagickCore/accelerate.c @@ -4803,13 +4803,11 @@ MagickExport MagickBooleanType AccelerateRandomImage(Image *image, %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% */ -static MagickBooleanType resizeHorizontalFilter(cl_mem image, - const unsigned int imageColumns,const unsigned int imageRows, - const unsigned int matte,cl_mem resizedImage, - const unsigned int resizedColumns,const unsigned int resizedRows, +static MagickBooleanType resizeHorizontalFilter(MagickCLEnv clEnv, + cl_command_queue queue,cl_mem image,cl_uint number_channels,cl_uint columns, + cl_uint rows,cl_mem resizedImage,cl_uint resizedColumns,cl_uint resizedRows, const ResizeFilter *resizeFilter,cl_mem resizeFilterCubicCoefficients, - const float xFactor,MagickCLEnv clEnv,cl_command_queue queue, - ExceptionInfo *exception) + const float xFactor,ExceptionInfo *exception) { cl_kernel horizontalKernel; @@ -4896,7 +4894,7 @@ RestoreMSCWarning cacheRangeStart = (int) (((0 + 0.5)/xFactor+MagickEpsilon)-support+0.5); cacheRangeEnd = (int) ((((pixelPerWorkgroup-1) + 0.5)/xFactor+MagickEpsilon)+support+0.5); numCachedPixels = cacheRangeEnd - cacheRangeStart + 1; - imageCacheLocalMemorySize = numCachedPixels * sizeof(CLPixelPacket); + imageCacheLocalMemorySize = numCachedPixels * sizeof(CLQuantum) * number_channels; totalLocalMemorySize = imageCacheLocalMemorySize; /* local size for the pixel accumulator */ @@ -4908,10 +4906,10 @@ RestoreMSCWarning totalLocalMemorySize+=weightAccumulatorLocalMemorySize; /* local memory size for the gamma accumulator */ - if (matte == 0) - gammaAccumulatorLocalMemorySize = sizeof(float); - else + if ((number_channels == 4) || (number_channels == 2)) gammaAccumulatorLocalMemorySize = chunkSize * sizeof(float); + else + gammaAccumulatorLocalMemorySize = sizeof(float); totalLocalMemorySize+=gammaAccumulatorLocalMemorySize; if (totalLocalMemorySize <= deviceLocalMemorySize) @@ -4941,14 +4939,13 @@ RestoreMSCWarning i = 0; clStatus = clEnv->library->clSetKernelArg(horizontalKernel, i++, sizeof(cl_mem), (void*)&image); - clStatus |= clEnv->library->clSetKernelArg(horizontalKernel, i++, sizeof(unsigned int), (void*)&imageColumns); - clStatus |= clEnv->library->clSetKernelArg(horizontalKernel, i++, sizeof(unsigned int), (void*)&imageRows); - clStatus |= clEnv->library->clSetKernelArg(horizontalKernel, i++, sizeof(unsigned int), (void*)&matte); - clStatus |= clEnv->library->clSetKernelArg(horizontalKernel, i++, sizeof(float), (void*)&xFactor); + clStatus |= clEnv->library->clSetKernelArg(horizontalKernel, i++, sizeof(cl_uint), (void*)&number_channels); + clStatus |= clEnv->library->clSetKernelArg(horizontalKernel, i++, sizeof(cl_uint), (void*)&columns); + clStatus |= clEnv->library->clSetKernelArg(horizontalKernel, i++, sizeof(cl_uint), (void*)&rows); clStatus |= clEnv->library->clSetKernelArg(horizontalKernel, i++, sizeof(cl_mem), (void*)&resizedImage); - - clStatus |= clEnv->library->clSetKernelArg(horizontalKernel, i++, sizeof(unsigned int), (void*)&resizedColumns); - clStatus |= clEnv->library->clSetKernelArg(horizontalKernel, i++, sizeof(unsigned int), (void*)&resizedRows); + clStatus |= clEnv->library->clSetKernelArg(horizontalKernel, i++, sizeof(cl_uint), (void*)&resizedColumns); + clStatus |= clEnv->library->clSetKernelArg(horizontalKernel, i++, sizeof(cl_uint), (void*)&resizedRows); + clStatus |= clEnv->library->clSetKernelArg(horizontalKernel, i++, sizeof(float), (void*)&xFactor); clStatus |= clEnv->library->clSetKernelArg(horizontalKernel, i++, sizeof(int), (void*)&resizeFilterType); clStatus |= clEnv->library->clSetKernelArg(horizontalKernel, i++, sizeof(int), (void*)&resizeWindowType); @@ -5009,13 +5006,11 @@ cleanup: return(status); } -static MagickBooleanType resizeVerticalFilter(cl_mem image, - const unsigned int imageColumns,const unsigned int imageRows, - const unsigned int matte,cl_mem resizedImage, - const unsigned int resizedColumns,const unsigned int resizedRows, +static MagickBooleanType resizeVerticalFilter(MagickCLEnv clEnv, + cl_command_queue queue,cl_mem image,cl_uint number_channels,cl_uint columns, + cl_uint rows,cl_mem resizedImage,cl_uint resizedColumns,cl_uint resizedRows, const ResizeFilter *resizeFilter,cl_mem resizeFilterCubicCoefficients, - const float yFactor,MagickCLEnv clEnv,cl_command_queue queue, - ExceptionInfo *exception) + const float yFactor,ExceptionInfo *exception) { cl_kernel verticalKernel; @@ -5102,7 +5097,7 @@ RestoreMSCWarning cacheRangeStart = (int) (((0 + 0.5)/yFactor+MagickEpsilon)-support+0.5); cacheRangeEnd = (int) ((((pixelPerWorkgroup-1) + 0.5)/yFactor+MagickEpsilon)+support+0.5); numCachedPixels = cacheRangeEnd - cacheRangeStart + 1; - imageCacheLocalMemorySize = numCachedPixels * sizeof(CLPixelPacket); + imageCacheLocalMemorySize = numCachedPixels * sizeof(CLQuantum) * number_channels; totalLocalMemorySize = imageCacheLocalMemorySize; /* local size for the pixel accumulator */ @@ -5114,10 +5109,10 @@ RestoreMSCWarning totalLocalMemorySize+=weightAccumulatorLocalMemorySize; /* local memory size for the gamma accumulator */ - if (matte == 0) - gammaAccumulatorLocalMemorySize = sizeof(float); - else + if ((number_channels == 4) || (number_channels == 2)) gammaAccumulatorLocalMemorySize = chunkSize * sizeof(float); + else + gammaAccumulatorLocalMemorySize = sizeof(float); totalLocalMemorySize+=gammaAccumulatorLocalMemorySize; if (totalLocalMemorySize <= deviceLocalMemorySize) @@ -5147,14 +5142,13 @@ RestoreMSCWarning i = 0; clStatus = clEnv->library->clSetKernelArg(verticalKernel, i++, sizeof(cl_mem), (void*)&image); - clStatus |= clEnv->library->clSetKernelArg(verticalKernel, i++, sizeof(unsigned int), (void*)&imageColumns); - clStatus |= clEnv->library->clSetKernelArg(verticalKernel, i++, sizeof(unsigned int), (void*)&imageRows); - clStatus |= clEnv->library->clSetKernelArg(verticalKernel, i++, sizeof(unsigned int), (void*)&matte); - clStatus |= clEnv->library->clSetKernelArg(verticalKernel, i++, sizeof(float), (void*)&yFactor); + clStatus |= clEnv->library->clSetKernelArg(verticalKernel, i++, sizeof(cl_uint), (void*)&number_channels); + clStatus |= clEnv->library->clSetKernelArg(verticalKernel, i++, sizeof(cl_uint), (void*)&columns); + clStatus |= clEnv->library->clSetKernelArg(verticalKernel, i++, sizeof(cl_uint), (void*)&rows); clStatus |= clEnv->library->clSetKernelArg(verticalKernel, i++, sizeof(cl_mem), (void*)&resizedImage); - - clStatus |= clEnv->library->clSetKernelArg(verticalKernel, i++, sizeof(unsigned int), (void*)&resizedColumns); - clStatus |= clEnv->library->clSetKernelArg(verticalKernel, i++, sizeof(unsigned int), (void*)&resizedRows); + clStatus |= clEnv->library->clSetKernelArg(verticalKernel, i++, sizeof(cl_uint), (void*)&resizedColumns); + clStatus |= clEnv->library->clSetKernelArg(verticalKernel, i++, sizeof(cl_uint), (void*)&resizedRows); + clStatus |= clEnv->library->clSetKernelArg(verticalKernel, i++, sizeof(float), (void*)&yFactor); clStatus |= clEnv->library->clSetKernelArg(verticalKernel, i++, sizeof(int), (void*)&resizeFilterType); clStatus |= clEnv->library->clSetKernelArg(verticalKernel, i++, sizeof(int), (void*)&resizeWindowType); @@ -5237,15 +5231,12 @@ static Image *ComputeResizeImage(const Image* image, imageBuffer, tempImageBuffer; - cl_mem_flags - mem_flags; + cl_uint + number_channels; const double *resizeFilterCoefficient; - const void - *inputPixels; - float *mappedCoefficientBuffer, xFactor, @@ -5265,12 +5256,10 @@ static Image *ComputeResizeImage(const Image* image, *filteredImage; unsigned int - i, - matte; + i; void - *filteredPixels, - *hostPtr; + *filteredPixels; outputReady = MagickFalse; filteredImage = NULL; @@ -5280,40 +5269,17 @@ static Image *ComputeResizeImage(const Image* image, imageBuffer = NULL; tempImageBuffer = NULL; filteredImageBuffer = NULL; + filteredPixels = NULL; cubicCoefficientsBuffer = NULL; queue = NULL; clEnv = GetDefaultOpenCLEnv(); context = GetOpenCLContext(clEnv); - /* Create and initialize OpenCL buffers. */ - image_view=AcquireVirtualCacheView(image,exception); - inputPixels=GetCacheViewVirtualPixels(image_view,0,0,image->columns,image->rows,exception); - if (inputPixels == (const void *) NULL) - { - (void) OpenCLThrowMagickException(exception,GetMagickModule(),CacheWarning,"UnableToReadPixelCache.","`%s'",image->filename); + image_view = AcquireVirtualCacheView(image, exception); + imageBuffer=createReadBuffer(image,image_view,clEnv,context,exception); + if (imageBuffer == (cl_mem) NULL) goto cleanup; - } - - /* If the host pointer is aligned to the size of CLPixelPacket, - then use the host buffer directly from the GPU; otherwise, - create a buffer on the GPU and copy the data over */ - if (ALIGNED(inputPixels,CLPixelPacket)) - { - mem_flags = CL_MEM_READ_ONLY|CL_MEM_USE_HOST_PTR; - } - else - { - mem_flags = CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR; - } - /* create a CL buffer from image pixel buffer */ - length = image->columns * image->rows; - imageBuffer = clEnv->library->clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus); - if (clStatus != CL_SUCCESS) - { - (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.","."); - goto cleanup; - } cubicCoefficientsBuffer = clEnv->library->clCreateBuffer(context, CL_MEM_READ_ONLY, 7 * sizeof(float), NULL, &clStatus); if (clStatus != CL_SUCCESS) @@ -5342,108 +5308,74 @@ static Image *ComputeResizeImage(const Image* image, } filteredImage = CloneImage(image,resizedColumns,resizedRows,MagickTrue,exception); - if (filteredImage == NULL) + if (filteredImage == (Image *) NULL) goto cleanup; - - if (SetImageStorageClass(filteredImage,DirectClass,exception) != MagickTrue) + if (SetImageStorageClass(filteredImage, DirectClass, exception) != MagickTrue) { (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "CloneImage failed.", "'%s'", "."); goto cleanup; } - filteredImage_view=AcquireAuthenticCacheView(filteredImage,exception); - filteredPixels=GetCacheViewAuthenticPixels(filteredImage_view,0,0,filteredImage->columns,filteredImage->rows,exception); - if (filteredPixels == (void *) NULL) - { - (void) OpenCLThrowMagickException(exception,GetMagickModule(),CacheWarning, "UnableToReadPixelCache.","`%s'",filteredImage->filename); - goto cleanup; - } - - if (ALIGNED(filteredPixels,CLPixelPacket)) - { - mem_flags = CL_MEM_WRITE_ONLY|CL_MEM_USE_HOST_PTR; - hostPtr = filteredPixels; - } - else - { - mem_flags = CL_MEM_WRITE_ONLY; - hostPtr = NULL; - } - - /* create a CL buffer from image pixel buffer */ - length = filteredImage->columns * filteredImage->rows; - filteredImageBuffer = clEnv->library->clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), hostPtr, &clStatus); - if (clStatus != CL_SUCCESS) - { - (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.","."); + filteredImage_view = AcquireAuthenticCacheView(filteredImage, exception); + filteredImageBuffer=createWriteBuffer(filteredImage,filteredImage_view,clEnv, + context,filteredPixels,exception); + if (filteredImageBuffer == (cl_mem) NULL) goto cleanup; - } + number_channels = image->number_channels; xFactor=(float) resizedColumns/(float) image->columns; yFactor=(float) resizedRows/(float) image->rows; - matte=(image->alpha_trait > CopyPixelTrait)?1:0; if (xFactor > yFactor) { - - length = resizedColumns*image->rows; - tempImageBuffer = clEnv->library->clCreateBuffer(context, CL_MEM_READ_WRITE, length*sizeof(CLPixelPacket), NULL, &clStatus); + length = resizedColumns*image->rows*number_channels; + tempImageBuffer = clEnv->library->clCreateBuffer(context, CL_MEM_READ_WRITE, length*sizeof(CLQuantum), NULL, &clStatus); if (clStatus != CL_SUCCESS) { (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.","."); goto cleanup; } - - status = resizeHorizontalFilter(imageBuffer, (unsigned int) image->columns, (unsigned int) image->rows, matte - , tempImageBuffer, (unsigned int) resizedColumns, (unsigned int) image->rows - , resizeFilter, cubicCoefficientsBuffer - , xFactor, clEnv, queue, exception); + + status = resizeHorizontalFilter(clEnv,queue,imageBuffer,number_channels, + (cl_uint) image->columns,(cl_uint) image->rows,tempImageBuffer, + (cl_uint) resizedColumns,(cl_uint) image->rows,resizeFilter, + cubicCoefficientsBuffer,xFactor,exception); if (status != MagickTrue) goto cleanup; - - status = resizeVerticalFilter(tempImageBuffer, (unsigned int) resizedColumns, (unsigned int) image->rows, matte - , filteredImageBuffer, (unsigned int) resizedColumns, (unsigned int) resizedRows - , resizeFilter, cubicCoefficientsBuffer - , yFactor, clEnv, queue, exception); + + status = resizeVerticalFilter(clEnv,queue,tempImageBuffer,number_channels, + (cl_uint) resizedColumns,(cl_uint) image->rows,filteredImageBuffer, + (cl_uint) resizedColumns,(cl_uint) resizedRows,resizeFilter, + cubicCoefficientsBuffer,yFactor,exception); if (status != MagickTrue) goto cleanup; } else { - length = image->columns*resizedRows; - tempImageBuffer = clEnv->library->clCreateBuffer(context, CL_MEM_READ_WRITE, length*sizeof(CLPixelPacket), NULL, &clStatus); + length = image->columns*resizedRows*number_channels; + tempImageBuffer = clEnv->library->clCreateBuffer(context, CL_MEM_READ_WRITE, length*sizeof(CLQuantum), NULL, &clStatus); if (clStatus != CL_SUCCESS) { (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.","."); goto cleanup; } - status = resizeVerticalFilter(imageBuffer, (unsigned int) image->columns, (unsigned int) image->rows, matte - , tempImageBuffer, (unsigned int) image->columns, (unsigned int) resizedRows - , resizeFilter, cubicCoefficientsBuffer - , yFactor, clEnv, queue, exception); + status = resizeVerticalFilter(clEnv,queue,imageBuffer,number_channels, + (cl_uint) image->columns,(cl_int) image->rows,tempImageBuffer, + (cl_uint) image->columns,(cl_uint) resizedRows,resizeFilter, + cubicCoefficientsBuffer,yFactor,exception); if (status != MagickTrue) goto cleanup; - status = resizeHorizontalFilter(tempImageBuffer, (unsigned int) image->columns, (unsigned int) resizedRows, matte - , filteredImageBuffer, (unsigned int) resizedColumns, (unsigned int) resizedRows - , resizeFilter, cubicCoefficientsBuffer - , xFactor, clEnv, queue, exception); + status = resizeHorizontalFilter(clEnv,queue,tempImageBuffer,number_channels, + (cl_uint) image->columns, (cl_uint) resizedRows,filteredImageBuffer, + (cl_uint) resizedColumns, (cl_uint) resizedRows,resizeFilter, + cubicCoefficientsBuffer,xFactor,exception); if (status != MagickTrue) goto cleanup; } - length = resizedColumns*resizedRows; - if (ALIGNED(filteredPixels,CLPixelPacket)) - { - clEnv->library->clEnqueueMapBuffer(queue, filteredImageBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(CLPixelPacket), 0, NULL, NULL, &clStatus); - } - else - { - clStatus = clEnv->library->clEnqueueReadBuffer(queue, filteredImageBuffer, CL_TRUE, 0, length * sizeof(CLPixelPacket), filteredPixels, 0, NULL, NULL); - } - if (clStatus != CL_SUCCESS) - { - (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "Reading output image from CL buffer failed.", "'%s'", "."); + + if (copyWriteBuffer(filteredImage,clEnv,queue,filteredImageBuffer,filteredPixels,exception) == MagickFalse) goto cleanup; - } + outputReady=SyncCacheViewAuthenticPixels(filteredImage_view,exception); cleanup: @@ -5489,7 +5421,7 @@ MagickExport Image *AccelerateResizeImage(const Image *image, assert(image != NULL); assert(exception != (ExceptionInfo *) NULL); - if ((checkAccelerateConditionRGBA(image) == MagickFalse) || + if ((checkAccelerateCondition(image) == MagickFalse) || (checkOpenCLEnvironment(exception) == MagickFalse)) return NULL; @@ -6070,7 +6002,7 @@ static Image *ComputeUnsharpMaskImage(const Image *image,const double radius, } /* get result */ - if (copyWriteBuffer(image,clEnv,queue,filteredImageBuffer,filteredPixels,exception) == MagickFalse) + if (copyWriteBuffer(filteredImage,clEnv,queue,filteredImageBuffer,filteredPixels,exception) == MagickFalse) { (void)OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "Reading output image from CL buffer failed.", "'%s'", "."); goto cleanup;