/*
Define declarations.
*/
- OPENCL_DEFINE(GetPixelAlpha(pixel), pixel.w)
OPENCL_DEFINE(SigmaUniform, (attenuate*0.015625f))
OPENCL_DEFINE(SigmaGaussian, (attenuate*0.015625f))
OPENCL_DEFINE(SigmaImpulse, (attenuate*0.1f))
inline float getPixelRed(const __global CLQuantum *p) { return (float)*p; }
inline float getPixelGreen(const __global CLQuantum *p) { return (float)*(p+1); }
inline float getPixelBlue(const __global CLQuantum *p) { return (float)*(p+2); }
- inline float getPixelAlpha(const __global CLQuantum *p) { return (float)*(p+3); }
+ inline float getPixelAlpha(const __global CLQuantum *p,const unsigned int number_channels) { return (float)*(p+number_channels-1); }
inline void setPixelRed(__global CLQuantum *p,const CLQuantum value) { *p=value; }
inline void setPixelGreen(__global CLQuantum *p,const CLQuantum value) { *(p+1)=value; }
inline void setPixelBlue(__global CLQuantum *p,const CLQuantum value) { *(p+2)=value; }
- inline void setPixelAlpha(__global CLQuantum *p,const CLQuantum value) { *(p+3)=value; }
+ inline void setPixelAlpha(__global CLQuantum *p,const unsigned int number_channels,const CLQuantum value) { *(p+number_channels-1)=value; }
inline CLQuantum getBlue(CLPixelType p) { return p.x; }
inline void setBlue(CLPixelType* p, CLQuantum value) { (*p).x = value; }
if (((number_channels == 4) || (number_channels == 2)) &&
((channel & AlphaChannel) != 0))
- *alpha=getPixelAlpha(p);
+ *alpha=getPixelAlpha(p,number_channels);
}
inline float4 ReadFloat4(const __global CLQuantum *image, const unsigned int number_channels,
if (((number_channels == 4) || (number_channels == 2)) &&
((channel & AlphaChannel) != 0))
- setPixelAlpha(p,ClampToQuantum(alpha));
+ setPixelAlpha(p,number_channels,ClampToQuantum(alpha));
}
inline void WriteFloat4(__global CLQuantum *image, const unsigned int number_channels,
)
STRINGIFY(
- __kernel __attribute__((reqd_work_group_size(256, 1, 1)))
- void ResizeHorizontalFilter(const __global CLPixelType* inputImage, const unsigned int inputColumns, const unsigned int inputRows, const unsigned int matte
- , const float xFactor, __global CLPixelType* filteredImage, const unsigned int filteredColumns, const unsigned int filteredRows
- , const int resizeFilterType, const int resizeWindowType
- , const __global float* resizeFilterCubicCoefficients
- , const float resizeFilterScale, const float resizeFilterSupport, const float resizeFilterWindowSupport, const float resizeFilterBlur
- , __local CLPixelType* inputImageCache, const int numCachedPixels, const unsigned int pixelPerWorkgroup, const unsigned int pixelChunkSize
- , __local float4* outputPixelCache, __local float* densityCache, __local float* gammaCache) {
-
-
+ __kernel __attribute__((reqd_work_group_size(256, 1, 1)))
+ void ResizeHorizontalFilter(const __global CLQuantum *inputImage, const unsigned int number_channels,
+ const unsigned int inputColumns, const unsigned int inputRows, __global CLQuantum *filteredImage,
+ const unsigned int filteredColumns, const unsigned int filteredRows, const float xFactor,
+ const int resizeFilterType, const int resizeWindowType, const __global float *resizeFilterCubicCoefficients,
+ const float resizeFilterScale, const float resizeFilterSupport, const float resizeFilterWindowSupport,
+ const float resizeFilterBlur, __local CLQuantum *inputImageCache, const int numCachedPixels,
+ const unsigned int pixelPerWorkgroup, const unsigned int pixelChunkSize,
+ __local float4 *outputPixelCache, __local float *densityCache, __local float *gammaCache)
+ {
// calculate the range of resized image pixels computed by this workgroup
const unsigned int startX = get_group_id(0)*pixelPerWorkgroup;
const unsigned int stopX = MagickMin(startX + pixelPerWorkgroup,filteredColumns);
// cache the input pixels into local memory
const unsigned int y = get_global_id(1);
- event_t e = async_work_group_copy(inputImageCache,inputImage+y*inputColumns+cacheRangeStartX,cacheRangeEndX-cacheRangeStartX,0);
- wait_group_events(1,&e);
+ const unsigned int pos = getPixelIndex(number_channels, inputColumns, cacheRangeStartX, y);
+ const unsigned int num_elements = (cacheRangeEndX - cacheRangeStartX) * number_channels;
+ event_t e = async_work_group_copy(inputImageCache, inputImage + pos, num_elements, 0);
+ wait_group_events(1, &e);
unsigned int totalNumChunks = (actualNumPixelToCompute+pixelChunkSize-1)/pixelChunkSize;
for (unsigned int chunk = 0; chunk < totalNumChunks; chunk++)
{
-
const unsigned int chunkStartX = startX + chunk*pixelChunkSize;
const unsigned int chunkStopX = MagickMin(chunkStartX + pixelChunkSize, stopX);
const unsigned int actualNumPixelInThisChunk = chunkStopX - chunkStartX;
// determine which resized pixel computed by this workitem
const unsigned int itemID = get_local_id(0);
const unsigned int numItems = getNumWorkItemsPerPixel(actualNumPixelInThisChunk, get_local_size(0));
-
+
const int pixelIndex = pixelToCompute(itemID, actualNumPixelInThisChunk, get_local_size(0));
float4 filteredPixel = (float4)0.0f;
float density = 0.0f;
float gamma = 0.0f;
// -1 means this workitem doesn't participate in the computation
- if (pixelIndex != -1) {
-
+ if (pixelIndex != -1)
+ {
// x coordinated of the resized pixel computed by this workitem
const int x = chunkStartX + pixelIndex;
numStepsPerWorkItem += ((numItems*numStepsPerWorkItem)==n?0:1);
const unsigned int startStep = (itemID%numItems)*numStepsPerWorkItem;
- if (startStep < n) {
+ if (startStep < n)
+ {
const unsigned int stopStep = MagickMin(startStep+numStepsPerWorkItem, n);
unsigned int cacheIndex = start+startStep-cacheRangeStartX;
- if (matte == 0) {
- for (unsigned int i = startStep; i < stopStep; i++,cacheIndex++) {
- float4 cp = convert_float4(inputImageCache[cacheIndex]);
-
- float weight = getResizeFilterWeight(resizeFilterCubicCoefficients,(ResizeWeightingFunctionType)resizeFilterType
- , (ResizeWeightingFunctionType)resizeWindowType
- , resizeFilterScale, resizeFilterWindowSupport, resizeFilterBlur,scale*(start+i-bisect+0.5));
-
- filteredPixel += ((float4)weight)*cp;
- density+=weight;
+ for (unsigned int i = startStep; i < stopStep; i++, cacheIndex++)
+ {
+ float weight = getResizeFilterWeight(resizeFilterCubicCoefficients,
+ (ResizeWeightingFunctionType) resizeFilterType,
+ (ResizeWeightingFunctionType) resizeWindowType,
+ resizeFilterScale, resizeFilterWindowSupport,
+ resizeFilterBlur, scale*(start + i - bisect + 0.5));
+
+ float4 cp = (float4) 0;
+
+ __local float *p = inputImageCache + (cacheIndex*number_channels);
+ cp.x = *(p);
+ if (number_channels > 2)
+ {
+ cp.y = *(p + 1);
+ cp.z = *(p + 2);
}
+ if ((number_channels == 4) || (number_channels == 2))
+ {
+ cp.w = *(p + number_channels - 1);
- }
- else {
- for (unsigned int i = startStep; i < stopStep; i++,cacheIndex++) {
- CLPixelType p = inputImageCache[cacheIndex];
-
- float weight = getResizeFilterWeight(resizeFilterCubicCoefficients,(ResizeWeightingFunctionType)resizeFilterType
- , (ResizeWeightingFunctionType)resizeWindowType
- , resizeFilterScale, resizeFilterWindowSupport, resizeFilterBlur,scale*(start+i-bisect+0.5));
-
- float alpha = weight * QuantumScale * GetPixelAlpha(p);
- float4 cp = convert_float4(p);
+ float alpha = weight * QuantumScale * cp.w;
filteredPixel.x += alpha * cp.x;
filteredPixel.y += alpha * cp.y;
filteredPixel.z += alpha * cp.z;
filteredPixel.w += weight * cp.w;
-
- density+=weight;
- gamma+=alpha;
+ gamma += alpha;
}
+ else
+ filteredPixel += ((float4) weight)*cp;
+
+ density += weight;
}
}
}
if (itemID < actualNumPixelInThisChunk) {
outputPixelCache[itemID] = (float4)0.0f;
densityCache[itemID] = 0.0f;
- if (matte != 0)
+ if ((number_channels == 4) || (number_channels == 2))
gammaCache[itemID] = 0.0f;
}
barrier(CLK_LOCAL_MEM_FENCE);
if (itemID%numItems == i) {
outputPixelCache[pixelIndex]+=filteredPixel;
densityCache[pixelIndex]+=density;
- if (matte!=0) {
+ if ((number_channels == 4) || (number_channels == 2))
gammaCache[pixelIndex]+=gamma;
- }
}
}
barrier(CLK_LOCAL_MEM_FENCE);
}
- if (itemID < actualNumPixelInThisChunk) {
- if (matte==0) {
- float density = densityCache[itemID];
- float4 filteredPixel = outputPixelCache[itemID];
- if (density!= 0.0f && density != 1.0)
- {
- density = PerceptibleReciprocal(density);
- filteredPixel *= (float4)density;
- }
- filteredImage[y*filteredColumns+chunkStartX+itemID] = (CLPixelType) (ClampToQuantum(filteredPixel.x)
- , ClampToQuantum(filteredPixel.y)
- , ClampToQuantum(filteredPixel.z)
- , ClampToQuantum(filteredPixel.w));
- }
- else {
- float density = densityCache[itemID];
- float gamma = gammaCache[itemID];
- float4 filteredPixel = outputPixelCache[itemID];
-
- if (density!= 0.0f && density != 1.0) {
- density = PerceptibleReciprocal(density);
- filteredPixel *= (float4)density;
- gamma *= density;
- }
- gamma = PerceptibleReciprocal(gamma);
-
- CLPixelType fp;
- fp = (CLPixelType) ( ClampToQuantum(gamma*filteredPixel.x)
- , ClampToQuantum(gamma*filteredPixel.y)
- , ClampToQuantum(gamma*filteredPixel.z)
- , ClampToQuantum(filteredPixel.w));
+ if (itemID < actualNumPixelInThisChunk)
+ {
+ float density = densityCache[itemID];
+ float4 filteredPixel = outputPixelCache[itemID];
- filteredImage[y*filteredColumns+chunkStartX+itemID] = fp;
+ if ((density != 0.0f) && (density != 1.0f))
+ {
+ density = PerceptibleReciprocal(density);
+ filteredPixel *= (float4) density;
+ }
+ if ((number_channels == 4) || (number_channels == 2))
+ {
+ float gamma = PerceptibleReciprocal(gammaCache[itemID]*density);
+ filteredPixel.x *= gamma;
+ filteredPixel.y *= gamma;
+ filteredPixel.z *= gamma;
}
- }
- } // end of chunking loop
+ WriteFloat4(filteredImage, number_channels, filteredColumns, chunkStartX + itemID, y, AllChannels, filteredPixel);
+ }
+ }
}
)
STRINGIFY(
__kernel __attribute__((reqd_work_group_size(1, 256, 1)))
- void ResizeVerticalFilter(const __global CLPixelType* inputImage, const unsigned int inputColumns, const unsigned int inputRows, const unsigned int matte
- , const float yFactor, __global CLPixelType* filteredImage, const unsigned int filteredColumns, const unsigned int filteredRows
- , const int resizeFilterType, const int resizeWindowType
- , const __global float* resizeFilterCubicCoefficients
- , const float resizeFilterScale, const float resizeFilterSupport, const float resizeFilterWindowSupport, const float resizeFilterBlur
- , __local CLPixelType* inputImageCache, const int numCachedPixels, const unsigned int pixelPerWorkgroup, const unsigned int pixelChunkSize
- , __local float4* outputPixelCache, __local float* densityCache, __local float* gammaCache) {
-
-
+ void ResizeVerticalFilter(const __global CLQuantum *inputImage, const unsigned int number_channels,
+ const unsigned int inputColumns, const unsigned int inputRows, __global CLQuantum *filteredImage,
+ const unsigned int filteredColumns, const unsigned int filteredRows, const float yFactor,
+ const int resizeFilterType, const int resizeWindowType, const __global float *resizeFilterCubicCoefficients,
+ const float resizeFilterScale, const float resizeFilterSupport, const float resizeFilterWindowSupport,
+ const float resizeFilterBlur, __local CLQuantum *inputImageCache, const int numCachedPixels,
+ const unsigned int pixelPerWorkgroup, const unsigned int pixelChunkSize,
+ __local float4 *outputPixelCache, __local float *densityCache, __local float *gammaCache)
+ {
// calculate the range of resized image pixels computed by this workgroup
const unsigned int startY = get_group_id(1)*pixelPerWorkgroup;
const unsigned int stopY = MagickMin(startY + pixelPerWorkgroup,filteredRows);
// cache the input pixels into local memory
const unsigned int x = get_global_id(0);
- event_t e = async_work_group_strided_copy(inputImageCache, inputImage+cacheRangeStartY*inputColumns+x, cacheRangeEndY-cacheRangeStartY, inputColumns, 0);
- wait_group_events(1,&e);
+ unsigned int pos = getPixelIndex(number_channels, inputColumns, x, cacheRangeStartY);
+ unsigned int rangeLength = cacheRangeEndY-cacheRangeStartY;
+ unsigned int stride = inputColumns * number_channels;
+ for (unsigned int i = 0; i < number_channels; i++)
+ {
+ event_t e = async_work_group_strided_copy(inputImageCache + (rangeLength*i), inputImage+pos+i, rangeLength, stride, 0);
+ wait_group_events(1,&e);
+ }
unsigned int totalNumChunks = (actualNumPixelToCompute+pixelChunkSize-1)/pixelChunkSize;
for (unsigned int chunk = 0; chunk < totalNumChunks; chunk++)
{
-
const unsigned int chunkStartY = startY + chunk*pixelChunkSize;
const unsigned int chunkStopY = MagickMin(chunkStartY + pixelChunkSize, stopY);
const unsigned int actualNumPixelInThisChunk = chunkStopY - chunkStartY;
// determine which resized pixel computed by this workitem
const unsigned int itemID = get_local_id(1);
const unsigned int numItems = getNumWorkItemsPerPixel(actualNumPixelInThisChunk, get_local_size(1));
-
+
const int pixelIndex = pixelToCompute(itemID, actualNumPixelInThisChunk, get_local_size(1));
float4 filteredPixel = (float4)0.0f;
float density = 0.0f;
float gamma = 0.0f;
// -1 means this workitem doesn't participate in the computation
- if (pixelIndex != -1) {
-
+ if (pixelIndex != -1)
+ {
// x coordinated of the resized pixel computed by this workitem
const int y = chunkStartY + pixelIndex;
numStepsPerWorkItem += ((numItems*numStepsPerWorkItem)==n?0:1);
const unsigned int startStep = (itemID%numItems)*numStepsPerWorkItem;
- if (startStep < n) {
+ if (startStep < n)
+ {
const unsigned int stopStep = MagickMin(startStep+numStepsPerWorkItem, n);
unsigned int cacheIndex = start+startStep-cacheRangeStartY;
- if (matte == 0) {
-
- for (unsigned int i = startStep; i < stopStep; i++,cacheIndex++) {
- float4 cp = convert_float4(inputImageCache[cacheIndex]);
-
- float weight = getResizeFilterWeight(resizeFilterCubicCoefficients,(ResizeWeightingFunctionType)resizeFilterType
- , (ResizeWeightingFunctionType)resizeWindowType
- , resizeFilterScale, resizeFilterWindowSupport, resizeFilterBlur,scale*(start+i-bisect+0.5));
-
- filteredPixel += ((float4)weight)*cp;
- density+=weight;
+ for (unsigned int i = startStep; i < stopStep; i++, cacheIndex++)
+ {
+ float weight = getResizeFilterWeight(resizeFilterCubicCoefficients,
+ (ResizeWeightingFunctionType) resizeFilterType,
+ (ResizeWeightingFunctionType) resizeWindowType,
+ resizeFilterScale, resizeFilterWindowSupport,
+ resizeFilterBlur, scale*(start + i - bisect + 0.5));
+
+ float4 cp = (float4)0.0f;
+
+ __local float *p = inputImageCache + cacheIndex;
+ cp.x = *(p);
+ if (number_channels > 2)
+ {
+ cp.y = *(p + rangeLength);
+ cp.z = *(p + (rangeLength * 2));
}
+ if ((number_channels == 4) || (number_channels == 2))
+ {
+ cp.w = *(p + (rangeLength * (number_channels - 1)));
- }
- else {
- for (unsigned int i = startStep; i < stopStep; i++,cacheIndex++) {
- CLPixelType p = inputImageCache[cacheIndex];
-
- float weight = getResizeFilterWeight(resizeFilterCubicCoefficients,(ResizeWeightingFunctionType)resizeFilterType
- , (ResizeWeightingFunctionType)resizeWindowType
- , resizeFilterScale, resizeFilterWindowSupport, resizeFilterBlur,scale*(start+i-bisect+0.5));
-
- float alpha = weight * QuantumScale * GetPixelAlpha(p);
- float4 cp = convert_float4(p);
+ float alpha = weight * QuantumScale * cp.w;
filteredPixel.x += alpha * cp.x;
filteredPixel.y += alpha * cp.y;
filteredPixel.z += alpha * cp.z;
filteredPixel.w += weight * cp.w;
-
- density+=weight;
- gamma+=alpha;
+ gamma += alpha;
}
+ else
+ filteredPixel += ((float4) weight)*cp;
+
+ density += weight;
}
}
}
if (itemID < actualNumPixelInThisChunk) {
outputPixelCache[itemID] = (float4)0.0f;
densityCache[itemID] = 0.0f;
- if (matte != 0)
+ if ((number_channels == 4) || (number_channels == 2))
gammaCache[itemID] = 0.0f;
}
barrier(CLK_LOCAL_MEM_FENCE);
if (itemID%numItems == i) {
outputPixelCache[pixelIndex]+=filteredPixel;
densityCache[pixelIndex]+=density;
- if (matte!=0) {
+ if ((number_channels == 4) || (number_channels == 2))
gammaCache[pixelIndex]+=gamma;
- }
}
}
barrier(CLK_LOCAL_MEM_FENCE);
}
- if (itemID < actualNumPixelInThisChunk) {
- if (matte==0) {
- float density = densityCache[itemID];
- float4 filteredPixel = outputPixelCache[itemID];
- if (density!= 0.0f && density != 1.0)
- {
- density = PerceptibleReciprocal(density);
- filteredPixel *= (float4)density;
- }
- filteredImage[(chunkStartY+itemID)*filteredColumns+x] = (CLPixelType) (ClampToQuantum(filteredPixel.x)
- , ClampToQuantum(filteredPixel.y)
- , ClampToQuantum(filteredPixel.z)
- , ClampToQuantum(filteredPixel.w));
- }
- else {
- float density = densityCache[itemID];
- float gamma = gammaCache[itemID];
- float4 filteredPixel = outputPixelCache[itemID];
-
- if (density!= 0.0f && density != 1.0) {
- density = PerceptibleReciprocal(density);
- filteredPixel *= (float4)density;
- gamma *= density;
- }
- gamma = PerceptibleReciprocal(gamma);
-
- CLPixelType fp;
- fp = (CLPixelType) ( ClampToQuantum(gamma*filteredPixel.x)
- , ClampToQuantum(gamma*filteredPixel.y)
- , ClampToQuantum(gamma*filteredPixel.z)
- , ClampToQuantum(filteredPixel.w));
+ if (itemID < actualNumPixelInThisChunk)
+ {
+ float density = densityCache[itemID];
+ float4 filteredPixel = outputPixelCache[itemID];
- filteredImage[(chunkStartY+itemID)*filteredColumns+x] = fp;
+ if ((density != 0.0f) && (density != 1.0f))
+ {
+ density = PerceptibleReciprocal(density);
+ filteredPixel *= (float4) density;
+ }
+ if ((number_channels == 4) || (number_channels == 2))
+ {
+ float gamma = PerceptibleReciprocal(gammaCache[itemID]*density);
+ filteredPixel.x *= gamma;
+ filteredPixel.y *= gamma;
+ filteredPixel.z *= gamma;
}
- }
- } // end of chunking loop
+ WriteFloat4(filteredImage, number_channels, filteredColumns, x, chunkStartY + itemID, AllChannels, filteredPixel);
+ }
+ }
}
)
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
*/
-static MagickBooleanType resizeHorizontalFilter(cl_mem image,
- const unsigned int imageColumns,const unsigned int imageRows,
- const unsigned int matte,cl_mem resizedImage,
- const unsigned int resizedColumns,const unsigned int resizedRows,
+static MagickBooleanType resizeHorizontalFilter(MagickCLEnv clEnv,
+ cl_command_queue queue,cl_mem image,cl_uint number_channels,cl_uint columns,
+ cl_uint rows,cl_mem resizedImage,cl_uint resizedColumns,cl_uint resizedRows,
const ResizeFilter *resizeFilter,cl_mem resizeFilterCubicCoefficients,
- const float xFactor,MagickCLEnv clEnv,cl_command_queue queue,
- ExceptionInfo *exception)
+ const float xFactor,ExceptionInfo *exception)
{
cl_kernel
horizontalKernel;
cacheRangeStart = (int) (((0 + 0.5)/xFactor+MagickEpsilon)-support+0.5);
cacheRangeEnd = (int) ((((pixelPerWorkgroup-1) + 0.5)/xFactor+MagickEpsilon)+support+0.5);
numCachedPixels = cacheRangeEnd - cacheRangeStart + 1;
- imageCacheLocalMemorySize = numCachedPixels * sizeof(CLPixelPacket);
+ imageCacheLocalMemorySize = numCachedPixels * sizeof(CLQuantum) * number_channels;
totalLocalMemorySize = imageCacheLocalMemorySize;
/* local size for the pixel accumulator */
totalLocalMemorySize+=weightAccumulatorLocalMemorySize;
/* local memory size for the gamma accumulator */
- if (matte == 0)
- gammaAccumulatorLocalMemorySize = sizeof(float);
- else
+ if ((number_channels == 4) || (number_channels == 2))
gammaAccumulatorLocalMemorySize = chunkSize * sizeof(float);
+ else
+ gammaAccumulatorLocalMemorySize = sizeof(float);
totalLocalMemorySize+=gammaAccumulatorLocalMemorySize;
if (totalLocalMemorySize <= deviceLocalMemorySize)
i = 0;
clStatus = clEnv->library->clSetKernelArg(horizontalKernel, i++, sizeof(cl_mem), (void*)&image);
- clStatus |= clEnv->library->clSetKernelArg(horizontalKernel, i++, sizeof(unsigned int), (void*)&imageColumns);
- clStatus |= clEnv->library->clSetKernelArg(horizontalKernel, i++, sizeof(unsigned int), (void*)&imageRows);
- clStatus |= clEnv->library->clSetKernelArg(horizontalKernel, i++, sizeof(unsigned int), (void*)&matte);
- clStatus |= clEnv->library->clSetKernelArg(horizontalKernel, i++, sizeof(float), (void*)&xFactor);
+ clStatus |= clEnv->library->clSetKernelArg(horizontalKernel, i++, sizeof(cl_uint), (void*)&number_channels);
+ clStatus |= clEnv->library->clSetKernelArg(horizontalKernel, i++, sizeof(cl_uint), (void*)&columns);
+ clStatus |= clEnv->library->clSetKernelArg(horizontalKernel, i++, sizeof(cl_uint), (void*)&rows);
clStatus |= clEnv->library->clSetKernelArg(horizontalKernel, i++, sizeof(cl_mem), (void*)&resizedImage);
-
- clStatus |= clEnv->library->clSetKernelArg(horizontalKernel, i++, sizeof(unsigned int), (void*)&resizedColumns);
- clStatus |= clEnv->library->clSetKernelArg(horizontalKernel, i++, sizeof(unsigned int), (void*)&resizedRows);
+ clStatus |= clEnv->library->clSetKernelArg(horizontalKernel, i++, sizeof(cl_uint), (void*)&resizedColumns);
+ clStatus |= clEnv->library->clSetKernelArg(horizontalKernel, i++, sizeof(cl_uint), (void*)&resizedRows);
+ clStatus |= clEnv->library->clSetKernelArg(horizontalKernel, i++, sizeof(float), (void*)&xFactor);
clStatus |= clEnv->library->clSetKernelArg(horizontalKernel, i++, sizeof(int), (void*)&resizeFilterType);
clStatus |= clEnv->library->clSetKernelArg(horizontalKernel, i++, sizeof(int), (void*)&resizeWindowType);
return(status);
}
-static MagickBooleanType resizeVerticalFilter(cl_mem image,
- const unsigned int imageColumns,const unsigned int imageRows,
- const unsigned int matte,cl_mem resizedImage,
- const unsigned int resizedColumns,const unsigned int resizedRows,
+static MagickBooleanType resizeVerticalFilter(MagickCLEnv clEnv,
+ cl_command_queue queue,cl_mem image,cl_uint number_channels,cl_uint columns,
+ cl_uint rows,cl_mem resizedImage,cl_uint resizedColumns,cl_uint resizedRows,
const ResizeFilter *resizeFilter,cl_mem resizeFilterCubicCoefficients,
- const float yFactor,MagickCLEnv clEnv,cl_command_queue queue,
- ExceptionInfo *exception)
+ const float yFactor,ExceptionInfo *exception)
{
cl_kernel
verticalKernel;
cacheRangeStart = (int) (((0 + 0.5)/yFactor+MagickEpsilon)-support+0.5);
cacheRangeEnd = (int) ((((pixelPerWorkgroup-1) + 0.5)/yFactor+MagickEpsilon)+support+0.5);
numCachedPixels = cacheRangeEnd - cacheRangeStart + 1;
- imageCacheLocalMemorySize = numCachedPixels * sizeof(CLPixelPacket);
+ imageCacheLocalMemorySize = numCachedPixels * sizeof(CLQuantum) * number_channels;
totalLocalMemorySize = imageCacheLocalMemorySize;
/* local size for the pixel accumulator */
totalLocalMemorySize+=weightAccumulatorLocalMemorySize;
/* local memory size for the gamma accumulator */
- if (matte == 0)
- gammaAccumulatorLocalMemorySize = sizeof(float);
- else
+ if ((number_channels == 4) || (number_channels == 2))
gammaAccumulatorLocalMemorySize = chunkSize * sizeof(float);
+ else
+ gammaAccumulatorLocalMemorySize = sizeof(float);
totalLocalMemorySize+=gammaAccumulatorLocalMemorySize;
if (totalLocalMemorySize <= deviceLocalMemorySize)
i = 0;
clStatus = clEnv->library->clSetKernelArg(verticalKernel, i++, sizeof(cl_mem), (void*)&image);
- clStatus |= clEnv->library->clSetKernelArg(verticalKernel, i++, sizeof(unsigned int), (void*)&imageColumns);
- clStatus |= clEnv->library->clSetKernelArg(verticalKernel, i++, sizeof(unsigned int), (void*)&imageRows);
- clStatus |= clEnv->library->clSetKernelArg(verticalKernel, i++, sizeof(unsigned int), (void*)&matte);
- clStatus |= clEnv->library->clSetKernelArg(verticalKernel, i++, sizeof(float), (void*)&yFactor);
+ clStatus |= clEnv->library->clSetKernelArg(verticalKernel, i++, sizeof(cl_uint), (void*)&number_channels);
+ clStatus |= clEnv->library->clSetKernelArg(verticalKernel, i++, sizeof(cl_uint), (void*)&columns);
+ clStatus |= clEnv->library->clSetKernelArg(verticalKernel, i++, sizeof(cl_uint), (void*)&rows);
clStatus |= clEnv->library->clSetKernelArg(verticalKernel, i++, sizeof(cl_mem), (void*)&resizedImage);
-
- clStatus |= clEnv->library->clSetKernelArg(verticalKernel, i++, sizeof(unsigned int), (void*)&resizedColumns);
- clStatus |= clEnv->library->clSetKernelArg(verticalKernel, i++, sizeof(unsigned int), (void*)&resizedRows);
+ clStatus |= clEnv->library->clSetKernelArg(verticalKernel, i++, sizeof(cl_uint), (void*)&resizedColumns);
+ clStatus |= clEnv->library->clSetKernelArg(verticalKernel, i++, sizeof(cl_uint), (void*)&resizedRows);
+ clStatus |= clEnv->library->clSetKernelArg(verticalKernel, i++, sizeof(float), (void*)&yFactor);
clStatus |= clEnv->library->clSetKernelArg(verticalKernel, i++, sizeof(int), (void*)&resizeFilterType);
clStatus |= clEnv->library->clSetKernelArg(verticalKernel, i++, sizeof(int), (void*)&resizeWindowType);
imageBuffer,
tempImageBuffer;
- cl_mem_flags
- mem_flags;
+ cl_uint
+ number_channels;
const double
*resizeFilterCoefficient;
- const void
- *inputPixels;
-
float
*mappedCoefficientBuffer,
xFactor,
*filteredImage;
unsigned int
- i,
- matte;
+ i;
void
- *filteredPixels,
- *hostPtr;
+ *filteredPixels;
outputReady = MagickFalse;
filteredImage = NULL;
imageBuffer = NULL;
tempImageBuffer = NULL;
filteredImageBuffer = NULL;
+ filteredPixels = NULL;
cubicCoefficientsBuffer = NULL;
queue = NULL;
clEnv = GetDefaultOpenCLEnv();
context = GetOpenCLContext(clEnv);
- /* Create and initialize OpenCL buffers. */
- image_view=AcquireVirtualCacheView(image,exception);
- inputPixels=GetCacheViewVirtualPixels(image_view,0,0,image->columns,image->rows,exception);
- if (inputPixels == (const void *) NULL)
- {
- (void) OpenCLThrowMagickException(exception,GetMagickModule(),CacheWarning,"UnableToReadPixelCache.","`%s'",image->filename);
+ image_view = AcquireVirtualCacheView(image, exception);
+ imageBuffer=createReadBuffer(image,image_view,clEnv,context,exception);
+ if (imageBuffer == (cl_mem) NULL)
goto cleanup;
- }
-
- /* If the host pointer is aligned to the size of CLPixelPacket,
- then use the host buffer directly from the GPU; otherwise,
- create a buffer on the GPU and copy the data over */
- if (ALIGNED(inputPixels,CLPixelPacket))
- {
- mem_flags = CL_MEM_READ_ONLY|CL_MEM_USE_HOST_PTR;
- }
- else
- {
- mem_flags = CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR;
- }
- /* create a CL buffer from image pixel buffer */
- length = image->columns * image->rows;
- imageBuffer = clEnv->library->clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus);
- if (clStatus != CL_SUCCESS)
- {
- (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
- goto cleanup;
- }
cubicCoefficientsBuffer = clEnv->library->clCreateBuffer(context, CL_MEM_READ_ONLY, 7 * sizeof(float), NULL, &clStatus);
if (clStatus != CL_SUCCESS)
}
filteredImage = CloneImage(image,resizedColumns,resizedRows,MagickTrue,exception);
- if (filteredImage == NULL)
+ if (filteredImage == (Image *) NULL)
goto cleanup;
-
- if (SetImageStorageClass(filteredImage,DirectClass,exception) != MagickTrue)
+ if (SetImageStorageClass(filteredImage, DirectClass, exception) != MagickTrue)
{
(void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "CloneImage failed.", "'%s'", ".");
goto cleanup;
}
- filteredImage_view=AcquireAuthenticCacheView(filteredImage,exception);
- filteredPixels=GetCacheViewAuthenticPixels(filteredImage_view,0,0,filteredImage->columns,filteredImage->rows,exception);
- if (filteredPixels == (void *) NULL)
- {
- (void) OpenCLThrowMagickException(exception,GetMagickModule(),CacheWarning, "UnableToReadPixelCache.","`%s'",filteredImage->filename);
- goto cleanup;
- }
-
- if (ALIGNED(filteredPixels,CLPixelPacket))
- {
- mem_flags = CL_MEM_WRITE_ONLY|CL_MEM_USE_HOST_PTR;
- hostPtr = filteredPixels;
- }
- else
- {
- mem_flags = CL_MEM_WRITE_ONLY;
- hostPtr = NULL;
- }
-
- /* create a CL buffer from image pixel buffer */
- length = filteredImage->columns * filteredImage->rows;
- filteredImageBuffer = clEnv->library->clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), hostPtr, &clStatus);
- if (clStatus != CL_SUCCESS)
- {
- (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
+ filteredImage_view = AcquireAuthenticCacheView(filteredImage, exception);
+ filteredImageBuffer=createWriteBuffer(filteredImage,filteredImage_view,clEnv,
+ context,filteredPixels,exception);
+ if (filteredImageBuffer == (cl_mem) NULL)
goto cleanup;
- }
+ number_channels = image->number_channels;
xFactor=(float) resizedColumns/(float) image->columns;
yFactor=(float) resizedRows/(float) image->rows;
- matte=(image->alpha_trait > CopyPixelTrait)?1:0;
if (xFactor > yFactor)
{
-
- length = resizedColumns*image->rows;
- tempImageBuffer = clEnv->library->clCreateBuffer(context, CL_MEM_READ_WRITE, length*sizeof(CLPixelPacket), NULL, &clStatus);
+ length = resizedColumns*image->rows*number_channels;
+ tempImageBuffer = clEnv->library->clCreateBuffer(context, CL_MEM_READ_WRITE, length*sizeof(CLQuantum), NULL, &clStatus);
if (clStatus != CL_SUCCESS)
{
(void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
goto cleanup;
}
-
- status = resizeHorizontalFilter(imageBuffer, (unsigned int) image->columns, (unsigned int) image->rows, matte
- , tempImageBuffer, (unsigned int) resizedColumns, (unsigned int) image->rows
- , resizeFilter, cubicCoefficientsBuffer
- , xFactor, clEnv, queue, exception);
+
+ status = resizeHorizontalFilter(clEnv,queue,imageBuffer,number_channels,
+ (cl_uint) image->columns,(cl_uint) image->rows,tempImageBuffer,
+ (cl_uint) resizedColumns,(cl_uint) image->rows,resizeFilter,
+ cubicCoefficientsBuffer,xFactor,exception);
if (status != MagickTrue)
goto cleanup;
-
- status = resizeVerticalFilter(tempImageBuffer, (unsigned int) resizedColumns, (unsigned int) image->rows, matte
- , filteredImageBuffer, (unsigned int) resizedColumns, (unsigned int) resizedRows
- , resizeFilter, cubicCoefficientsBuffer
- , yFactor, clEnv, queue, exception);
+
+ status = resizeVerticalFilter(clEnv,queue,tempImageBuffer,number_channels,
+ (cl_uint) resizedColumns,(cl_uint) image->rows,filteredImageBuffer,
+ (cl_uint) resizedColumns,(cl_uint) resizedRows,resizeFilter,
+ cubicCoefficientsBuffer,yFactor,exception);
if (status != MagickTrue)
goto cleanup;
}
else
{
- length = image->columns*resizedRows;
- tempImageBuffer = clEnv->library->clCreateBuffer(context, CL_MEM_READ_WRITE, length*sizeof(CLPixelPacket), NULL, &clStatus);
+ length = image->columns*resizedRows*number_channels;
+ tempImageBuffer = clEnv->library->clCreateBuffer(context, CL_MEM_READ_WRITE, length*sizeof(CLQuantum), NULL, &clStatus);
if (clStatus != CL_SUCCESS)
{
(void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
goto cleanup;
}
- status = resizeVerticalFilter(imageBuffer, (unsigned int) image->columns, (unsigned int) image->rows, matte
- , tempImageBuffer, (unsigned int) image->columns, (unsigned int) resizedRows
- , resizeFilter, cubicCoefficientsBuffer
- , yFactor, clEnv, queue, exception);
+ status = resizeVerticalFilter(clEnv,queue,imageBuffer,number_channels,
+ (cl_uint) image->columns,(cl_int) image->rows,tempImageBuffer,
+ (cl_uint) image->columns,(cl_uint) resizedRows,resizeFilter,
+ cubicCoefficientsBuffer,yFactor,exception);
if (status != MagickTrue)
goto cleanup;
- status = resizeHorizontalFilter(tempImageBuffer, (unsigned int) image->columns, (unsigned int) resizedRows, matte
- , filteredImageBuffer, (unsigned int) resizedColumns, (unsigned int) resizedRows
- , resizeFilter, cubicCoefficientsBuffer
- , xFactor, clEnv, queue, exception);
+ status = resizeHorizontalFilter(clEnv,queue,tempImageBuffer,number_channels,
+ (cl_uint) image->columns, (cl_uint) resizedRows,filteredImageBuffer,
+ (cl_uint) resizedColumns, (cl_uint) resizedRows,resizeFilter,
+ cubicCoefficientsBuffer,xFactor,exception);
if (status != MagickTrue)
goto cleanup;
}
- length = resizedColumns*resizedRows;
- if (ALIGNED(filteredPixels,CLPixelPacket))
- {
- clEnv->library->clEnqueueMapBuffer(queue, filteredImageBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(CLPixelPacket), 0, NULL, NULL, &clStatus);
- }
- else
- {
- clStatus = clEnv->library->clEnqueueReadBuffer(queue, filteredImageBuffer, CL_TRUE, 0, length * sizeof(CLPixelPacket), filteredPixels, 0, NULL, NULL);
- }
- if (clStatus != CL_SUCCESS)
- {
- (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "Reading output image from CL buffer failed.", "'%s'", ".");
+
+ if (copyWriteBuffer(filteredImage,clEnv,queue,filteredImageBuffer,filteredPixels,exception) == MagickFalse)
goto cleanup;
- }
+
outputReady=SyncCacheViewAuthenticPixels(filteredImage_view,exception);
cleanup:
assert(image != NULL);
assert(exception != (ExceptionInfo *) NULL);
- if ((checkAccelerateConditionRGBA(image) == MagickFalse) ||
+ if ((checkAccelerateCondition(image) == MagickFalse) ||
(checkOpenCLEnvironment(exception) == MagickFalse))
return NULL;
}
/* get result */
- if (copyWriteBuffer(image,clEnv,queue,filteredImageBuffer,filteredPixels,exception) == MagickFalse)
+ if (copyWriteBuffer(filteredImage,clEnv,queue,filteredImageBuffer,filteredPixels,exception) == MagickFalse)
{
(void)OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "Reading output image from CL buffer failed.", "'%s'", ".");
goto cleanup;