From d7a9cc48ee5c6c2003db9ed06ca88cbff3d97130 Mon Sep 17 00:00:00 2001 From: dirk Date: Sat, 2 Apr 2016 21:18:05 +0200 Subject: [PATCH] ComputeBlurImage now supports R/RA/RGB images. --- MagickCore/accelerate-private.h | 129 +++++++------------- MagickCore/accelerate.c | 206 ++++++++------------------------ 2 files changed, 94 insertions(+), 241 deletions(-) diff --git a/MagickCore/accelerate-private.h b/MagickCore/accelerate-private.h index bb375f80d..2ac7fbd54 100644 --- a/MagickCore/accelerate-private.h +++ b/MagickCore/accelerate-private.h @@ -432,20 +432,20 @@ OPENCL_ENDIF() const ChannelType channel, float red, float green, float blue, float alpha) { if ((channel & RedChannel) != 0) - setPixelRed(p,red); + setPixelRed(p,ClampToQuantum(red)); if (number_channels > 2) { if ((channel & GreenChannel) != 0) - setPixelGreen(p,green); + setPixelGreen(p,ClampToQuantum(green)); if ((channel & BlueChannel) != 0) - setPixelBlue(p,blue); + setPixelBlue(p,ClampToQuantum(blue)); } if (((number_channels == 4) || (number_channels == 2)) && ((channel & AlphaChannel) != 0)) - setPixelAlpha(p,alpha); + setPixelAlpha(p,ClampToQuantum(alpha)); } inline void WriteFloat4(__global CLQuantum *image, const unsigned int number_channels, @@ -865,16 +865,12 @@ OPENCL_ENDIF() STRINGIFY( /* Reduce image noise and reduce detail levels by row - im: input pixels filtered_in filtered_im: output pixels - filter : convolve kernel width: convolve kernel size - channel : define which channel is blured - is_RGBA_BGRA : define the input is RGBA or BGRA */ - __kernel void BlurRow(__global CLPixelType *im, __global float4 *filtered_im, - const ChannelType channel, __constant float *filter, - const unsigned int width, - const unsigned int imageColumns, const unsigned int imageRows, - __local CLPixelType *temp) + __kernel void BlurRow(__global CLQuantum *image, + const unsigned int number_channels,const ChannelType channel, + __constant float *filter,const unsigned int width, + const unsigned int imageColumns,const unsigned int imageRows, + __local float4 *temp,__global float4 *tempImage) { const int x = get_global_id(0); const int y = get_global_id(1); @@ -887,51 +883,37 @@ OPENCL_ENDIF() //group coordinate const int groupX=get_local_size(0)*get_group_id(0); - const int groupY=get_local_size(1)*get_group_id(1); //parallel load and clamp for (int i=get_local_id(0); i < loadSize; i=i+get_local_size(0)) { - //int cx = ClampToCanvas(groupX+i, columns); - temp[i] = im[y * columns + ClampToCanvas(i+groupX-radius, columns)]; + int cx = ClampToCanvas(i + groupX - radius, columns); + temp[i] = ReadFloat4(image, number_channels, columns, cx, y, channel); } // barrier barrier(CLK_LOCAL_MEM_FENCE); // only do the work if this is not a patched item - if (get_global_id(0) < columns) + if (get_global_id(0) < columns) { // compute float4 result = (float4) 0; int i = 0; - \n #ifndef UFACTOR \n - \n #define UFACTOR 8 \n - \n #endif \n - - for ( ; i+UFACTOR < width; ) + for ( ; i+7 < width; ) { - \n #pragma unroll UFACTOR\n - for (int j=0; j < UFACTOR; j++, i++) - { - result+=filter[i]*convert_float4(temp[i+get_local_id(0)]); - } + for (int j=0; j < 8; j++) + result+=filter[i+j]*temp[i+j+get_local_id(0)]; + i+=8; } for ( ; i < width; i++) - { - result+=filter[i]*convert_float4(temp[i+get_local_id(0)]); - } - - result.x = ClampToQuantum(result.x); - result.y = ClampToQuantum(result.y); - result.z = ClampToQuantum(result.z); - result.w = ClampToQuantum(result.w); + result+=filter[i]*temp[i+get_local_id(0)]; // write back to global - filtered_im[y*columns+x] = result; + tempImage[y*columns+x] = result; } } ) @@ -939,16 +921,12 @@ OPENCL_ENDIF() STRINGIFY( /* Reduce image noise and reduce detail levels by line - im: input pixels filtered_in filtered_im: output pixels - filter : convolve kernel width: convolve kernel size - channel : define which channel is blured\ - is_RGBA_BGRA : define the input is RGBA or BGRA */ - __kernel void BlurColumn(const __global float4 *blurRowData, __global CLPixelType *filtered_im, - const ChannelType channel, __constant float *filter, - const unsigned int width, - const unsigned int imageColumns, const unsigned int imageRows, - __local float4 *temp) + __kernel void BlurColumn(const __global float4 *blurRowData, + const unsigned int number_channels,const ChannelType channel, + __constant float *filter,const unsigned int width, + const unsigned int imageColumns,const unsigned int imageRows, + __local float4 *temp,__global CLQuantum *filteredImage) { const int x = get_global_id(0); const int y = get_global_id(1); @@ -968,9 +946,7 @@ OPENCL_ENDIF() //parallel load and clamp for (int i = get_local_id(1); i < loadSize; i=i+get_local_size(1)) - { temp[i] = blurRowData[ClampToCanvas(i+groupY-radius, rows) * columns + groupX]; - } // barrier barrier(CLK_LOCAL_MEM_FENCE); @@ -983,31 +959,18 @@ OPENCL_ENDIF() int i = 0; - \n #ifndef UFACTOR \n - \n #define UFACTOR 8 \n - \n #endif \n - - for ( ; i+UFACTOR < width; ) + for ( ; i+7 < width; ) { - \n #pragma unroll UFACTOR \n - for (int j=0; j < UFACTOR; j++, i++) - { - result+=filter[i]*temp[i+get_local_id(1)]; - } + for (int j=0; j < 8; j++) + result+=filter[i+j]*temp[i+j+get_local_id(1)]; + i+=8; } for ( ; i < width; i++) - { result+=filter[i]*temp[i+get_local_id(1)]; - } - - result.x = ClampToQuantum(result.x); - result.y = ClampToQuantum(result.y); - result.z = ClampToQuantum(result.z); - result.w = ClampToQuantum(result.w); // write back to global - filtered_im[y*columns+x] = (CLPixelType) (result.x,result.y,result.z,result.w); + WriteFloat4(filteredImage, number_channels, columns, x, y, channel, result); } } ) @@ -1942,9 +1905,9 @@ OPENCL_ENDIF() const MagickFunction function,const unsigned int number_parameters, __constant float *parameters) { - const int x = get_global_id(0); - const int y = get_global_id(1); - const int columns = get_global_size(0); + const unsigned int x = get_global_id(0); + const unsigned int y = get_global_id(1); + const unsigned int columns = get_global_size(0); __global CLQuantum *p = getPixel(image, number_channels, columns, x, y); float red; @@ -1990,9 +1953,9 @@ OPENCL_ENDIF() __kernel void Grayscale(__global CLQuantum *image,const int number_channels, const unsigned int colorspace,const unsigned int method) { - const int x = get_global_id(0); - const int y = get_global_id(1); - const int columns = get_global_size(0); + const unsigned int x = get_global_id(0); + const unsigned int y = get_global_id(1); + const unsigned int columns = get_global_size(0); __global CLQuantum *p = getPixel(image, number_channels, columns, x, y); float @@ -3218,12 +3181,12 @@ STRINGIFY( STRINGIFY( __kernel void UnsharpMask(__global CLQuantum *image,const unsigned int number_channels, const ChannelType channel,__constant float *filter,const unsigned int width, - const unsigned int imageColumns,const unsigned int imageRows,__local float4 *pixels, + const unsigned int columns,const unsigned int rows,__local float4 *pixels, const float gain,const float threshold, const unsigned int justBlur, __global CLQuantum *filteredImage) { - const int x = get_global_id(0); - const int y = get_global_id(1); + const unsigned int x = get_global_id(0); + const unsigned int y = get_global_id(1); const unsigned int radius = (width - 1) / 2; @@ -3232,8 +3195,8 @@ STRINGIFY( int endRow = (get_group_id(1) + 1) * get_local_size(1) + radius; while (row < endRow) { - int srcy = (row < 0) ? -row : row; // mirror pad - srcy = (srcy >= imageRows) ? (2 * imageRows - srcy - 1) : srcy; + int srcy = (row < 0) ? -row : row; // mirror pad + srcy = (srcy >= rows) ? (2 * rows - srcy - 1) : srcy; float4 value = 0.0f; @@ -3244,8 +3207,8 @@ STRINGIFY( for (int j = 0; j < 8; ++j) { // unrolled int srcx = ix + j; srcx = (srcx < 0) ? -srcx : srcx; - srcx = (srcx >= imageColumns) ? (2 * imageColumns - srcx - 1) : srcx; - value += filter[i + j] * ReadFloat4(image, number_channels, imageColumns, srcx, srcy, channel); + srcx = (srcx >= columns) ? (2 * columns - srcx - 1) : srcx; + value += filter[i + j] * ReadFloat4(image, number_channels, columns, srcx, srcy, channel); } ix += 8; i += 8; @@ -3253,8 +3216,8 @@ STRINGIFY( while (i < width) { int srcx = (ix < 0) ? -ix : ix; // mirror pad - srcx = (srcx >= imageColumns) ? (2 * imageColumns - srcx - 1) : srcx; - value += filter[i] * ReadFloat4(image, number_channels, imageColumns, srcx, srcy, channel); + srcx = (srcx >= columns) ? (2 * columns - srcx - 1) : srcx; + value += filter[i] * ReadFloat4(image, number_channels, columns, srcx, srcy, channel); ++i; ++ix; } @@ -3281,7 +3244,7 @@ STRINGIFY( } if (justBlur == 0) { // apply sharpening - float4 srcPixel = ReadFloat4(image, number_channels, imageColumns, x, y, channel); + float4 srcPixel = ReadFloat4(image, number_channels, columns, x, y, channel); float4 diff = srcPixel - value; float quantumThreshold = QuantumRange*threshold; @@ -3290,8 +3253,8 @@ STRINGIFY( value = select(srcPixel + diff * gain, srcPixel, mask); } - if ((x < imageColumns) && (y < imageRows)) - WriteFloat4(filteredImage, number_channels, imageColumns, x, y, channel, value); + if ((x < columns) && (y < rows)) + WriteFloat4(filteredImage, number_channels, columns, x, y, channel, value); } ) diff --git a/MagickCore/accelerate.c b/MagickCore/accelerate.c index eec096b53..5e813adc3 100644 --- a/MagickCore/accelerate.c +++ b/MagickCore/accelerate.c @@ -344,7 +344,7 @@ static inline MagickBooleanType copyWriteBuffer(const Image *image, length=image->columns*image->rows*image->number_channels; if (ALIGNED(pixels,CLQuantum)) - clEnv->library->clEnqueueMapBuffer(queue,buffer,CL_TRUE,CL_MAP_READ| + clEnv->library->clEnqueueMapBuffer(queue,buffer,CL_TRUE,CL_MAP_READ | CL_MAP_WRITE,0,length*sizeof(CLQuantum),0,NULL,NULL,&status); else status=clEnv->library->clEnqueueReadBuffer(queue,buffer,CL_TRUE,0,length* @@ -686,9 +686,6 @@ static Image *ComputeBlurImage(const Image* image,const double radius, *filteredImage_view, *image_view; - char - geometry[MagickPathExtent]; - cl_command_queue queue; @@ -710,15 +707,12 @@ static Image *ComputeBlurImage(const Image* image,const double radius, imageBuffer, imageKernelBuffer, tempImageBuffer; - - cl_mem_flags - mem_flags; - const void - *inputPixels; - - float - *kernelBufferPtr; + cl_uint + imageColumns, + imageRows, + kernelWidth, + number_channels; Image *filteredImage; @@ -732,18 +726,11 @@ static Image *ComputeBlurImage(const Image* image,const double radius, MagickSizeType length; - KernelInfo - *kernel; - unsigned int - i, - imageColumns, - imageRows, - kernelWidth; + i; void - *filteredPixels, - *hostPtr; + *filteredPixels; context = NULL; filteredImage = NULL; @@ -751,11 +738,11 @@ static Image *ComputeBlurImage(const Image* image,const double radius, imageBuffer = NULL; tempImageBuffer = NULL; filteredImageBuffer = NULL; + filteredPixels = NULL; imageKernelBuffer = NULL; blurRowKernel = NULL; blurColumnKernel = NULL; queue = NULL; - kernel = NULL; outputReady = MagickFalse; @@ -763,115 +750,34 @@ static Image *ComputeBlurImage(const Image* image,const double radius, context = GetOpenCLContext(clEnv); queue = AcquireOpenCLCommandQueue(clEnv); - /* Create and initialize OpenCL buffers. */ - { - image_view=AcquireVirtualCacheView(image,exception); - inputPixels=GetCacheViewVirtualPixels(image_view,0,0,image->columns,image->rows,exception); - if (inputPixels == (const void *) NULL) - { - (void) OpenCLThrowMagickException(exception,GetMagickModule(),CacheWarning,"UnableToReadPixelCache.","`%s'",image->filename); - goto cleanup; - } - /* If the host pointer is aligned to the size of CLPixelPacket, - then use the host buffer directly from the GPU; otherwise, - create a buffer on the GPU and copy the data over */ - if (ALIGNED(inputPixels,CLPixelPacket)) - { - mem_flags = CL_MEM_READ_ONLY|CL_MEM_USE_HOST_PTR; - } - else - { - mem_flags = CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR; - } - /* create a CL buffer from image pixel buffer */ - length = image->columns * image->rows; - imageBuffer = clEnv->library->clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus); - if (clStatus != CL_SUCCESS) - { - (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.","."); - goto cleanup; - } - } + image_view=AcquireVirtualCacheView(image,exception); + imageBuffer=createReadBuffer(image,image_view,clEnv,context,exception); + if (imageBuffer == (cl_mem) NULL) + goto cleanup; - /* create output */ + filteredImage=CloneImage(image,image->columns,image->rows,MagickTrue,exception); + if (filteredImage == (Image *) NULL) + goto cleanup; + if (SetImageStorageClass(filteredImage,DirectClass,exception) != MagickTrue) { - filteredImage = CloneImage(image,image->columns,image->rows,MagickTrue,exception); - assert(filteredImage != NULL); - if (SetImageStorageClass(filteredImage,DirectClass,exception) != MagickTrue) - { - (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "CloneImage failed.", "'%s'", "."); - goto cleanup; - } - filteredImage_view=AcquireAuthenticCacheView(filteredImage,exception); - filteredPixels=GetCacheViewAuthenticPixels(filteredImage_view,0,0,filteredImage->columns,filteredImage->rows,exception); - if (filteredPixels == (void *) NULL) - { - (void) OpenCLThrowMagickException(exception,GetMagickModule(),CacheWarning, "UnableToReadPixelCache.","`%s'",filteredImage->filename); - goto cleanup; - } - - if (ALIGNED(filteredPixels,CLPixelPacket)) - { - mem_flags = CL_MEM_WRITE_ONLY|CL_MEM_USE_HOST_PTR; - hostPtr = filteredPixels; - } - else - { - mem_flags = CL_MEM_WRITE_ONLY; - hostPtr = NULL; - } - /* create a CL buffer from image pixel buffer */ - length = image->columns * image->rows; - filteredImageBuffer = clEnv->library->clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), hostPtr, &clStatus); - if (clStatus != CL_SUCCESS) - { - (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.","."); - goto cleanup; - } + (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "CloneImage failed.", "'%s'", "."); + goto cleanup; } - /* create processing kernel */ - { - (void) FormatLocaleString(geometry,MagickPathExtent,"blur:%.20gx%.20g;blur:%.20gx%.20g+90",radius,sigma,radius,sigma); - kernel=AcquireKernelInfo(geometry,exception); - if (kernel == (KernelInfo *) NULL) - { - (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "MemoryAllocationFailed.","."); - goto cleanup; - } - - imageKernelBuffer = clEnv->library->clCreateBuffer(context, CL_MEM_READ_ONLY|CL_MEM_ALLOC_HOST_PTR, kernel->width * sizeof(float), NULL, &clStatus); - if (clStatus != CL_SUCCESS) - { - (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.","."); - goto cleanup; - } - kernelBufferPtr = (float*)clEnv->library->clEnqueueMapBuffer(queue, imageKernelBuffer, CL_TRUE, CL_MAP_WRITE, 0, kernel->width * sizeof(float), 0, NULL, NULL, &clStatus); - if (clStatus != CL_SUCCESS) - { - (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueMapBuffer failed.","."); - goto cleanup; - } - - for (i = 0; i < kernel->width; i++) - { - kernelBufferPtr[i] = (float) kernel->values[i]; - } + filteredImage_view=AcquireAuthenticCacheView(filteredImage,exception); + filteredImageBuffer=createWriteBuffer(filteredImage,filteredImage_view,clEnv, + context,filteredPixels,exception); + if (filteredImageBuffer == (void *) NULL) + goto cleanup; - clStatus = clEnv->library->clEnqueueUnmapMemObject(queue, imageKernelBuffer, kernelBufferPtr, 0, NULL, NULL); - if (clStatus != CL_SUCCESS) - { - (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueUnmapMemObject failed.", "'%s'", "."); - goto cleanup; - } - } + imageKernelBuffer=createKernelInfo(clEnv,context,queue,radius,sigma, + &kernelWidth,exception); { - /* create temp buffer */ { length = image->columns * image->rows; - tempImageBuffer = clEnv->library->clCreateBuffer(context, CL_MEM_READ_WRITE, length * 4 * sizeof(float), NULL, &clStatus); + tempImageBuffer = clEnv->library->clCreateBuffer(context, CL_MEM_READ_WRITE, length * sizeof(cl_float4), NULL, &clStatus); if (clStatus != CL_SUCCESS) { (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.","."); @@ -896,25 +802,26 @@ static Image *ComputeBlurImage(const Image* image,const double radius, }; } + number_channels = (cl_uint) image->number_channels; + imageColumns = (cl_uint) image->columns; + imageRows = (cl_uint) image->rows; + { /* need logic to decide this value */ int chunkSize = 256; { - imageColumns = (unsigned int) image->columns; - imageRows = (unsigned int) image->rows; - /* set the kernel arguments */ i = 0; clStatus=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(cl_mem),(void *)&imageBuffer); - clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(cl_mem),(void *)&tempImageBuffer); + clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(cl_uint),&number_channels); clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(ChannelType),&image->channel_mask); clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(cl_mem),(void *)&imageKernelBuffer); - kernelWidth = (unsigned int) kernel->width; - clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(unsigned int),(void *)&kernelWidth); - clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(unsigned int),(void *)&imageColumns); - clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(unsigned int),(void *)&imageRows); - clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(CLPixelPacket)*(chunkSize+kernel->width),(void *) NULL); + clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(cl_uint),(void *)&kernelWidth); + clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(cl_uint),(void *)&imageColumns); + clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(cl_uint),(void *)&imageRows); + clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(cl_float4)*(chunkSize+kernelWidth),(void *) NULL); + clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(cl_mem),(void *)&tempImageBuffer); if (clStatus != CL_SUCCESS) { (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", "'%s'", "."); @@ -932,7 +839,7 @@ static Image *ComputeBlurImage(const Image* image,const double radius, wsize[0] = chunkSize; wsize[1] = 1; - clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, blurRowKernel, 2, NULL, gsize, wsize, 0, NULL, &event); + clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, blurRowKernel, 2, NULL, gsize, wsize, 0, NULL, &event); if (clStatus != CL_SUCCESS) { (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", "'%s'", "."); @@ -949,20 +856,17 @@ static Image *ComputeBlurImage(const Image* image,const double radius, int chunkSize = 256; { - imageColumns = (unsigned int) image->columns; - imageRows = (unsigned int) image->rows; - /* set the kernel arguments */ i = 0; clStatus=clEnv->library->clSetKernelArg(blurColumnKernel,i++,sizeof(cl_mem),(void *)&tempImageBuffer); - clStatus|=clEnv->library->clSetKernelArg(blurColumnKernel,i++,sizeof(cl_mem),(void *)&filteredImageBuffer); + clStatus|=clEnv->library->clSetKernelArg(blurColumnKernel,i++,sizeof(cl_uint),&number_channels); clStatus|=clEnv->library->clSetKernelArg(blurColumnKernel,i++,sizeof(ChannelType),&image->channel_mask); clStatus|=clEnv->library->clSetKernelArg(blurColumnKernel,i++,sizeof(cl_mem),(void *)&imageKernelBuffer); - kernelWidth = (unsigned int) kernel->width; - clStatus|=clEnv->library->clSetKernelArg(blurColumnKernel,i++,sizeof(unsigned int),(void *)&kernelWidth); - clStatus|=clEnv->library->clSetKernelArg(blurColumnKernel,i++,sizeof(unsigned int),(void *)&imageColumns); - clStatus|=clEnv->library->clSetKernelArg(blurColumnKernel,i++,sizeof(unsigned int),(void *)&imageRows); - clStatus|=clEnv->library->clSetKernelArg(blurColumnKernel,i++,sizeof(cl_float4)*(chunkSize+kernel->width),(void *) NULL); + clStatus|=clEnv->library->clSetKernelArg(blurColumnKernel,i++,sizeof(cl_uint),(void *)&kernelWidth); + clStatus|=clEnv->library->clSetKernelArg(blurColumnKernel,i++,sizeof(cl_uint),(void *)&imageColumns); + clStatus|=clEnv->library->clSetKernelArg(blurColumnKernel,i++,sizeof(cl_uint),(void *)&imageRows); + clStatus|=clEnv->library->clSetKernelArg(blurColumnKernel,i++,sizeof(cl_float4)*(chunkSize+kernelWidth),(void *) NULL); + clStatus|=clEnv->library->clSetKernelArg(blurColumnKernel,i++,sizeof(cl_mem),(void *)&filteredImageBuffer); if (clStatus != CL_SUCCESS) { (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", "'%s'", "."); @@ -980,7 +884,7 @@ static Image *ComputeBlurImage(const Image* image,const double radius, wsize[0] = 1; wsize[1] = chunkSize; - clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, blurColumnKernel, 2, NULL, gsize, wsize, 0, NULL, &event); + clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, blurColumnKernel, 2, NULL, gsize, wsize, 0, NULL, &event); if (clStatus != CL_SUCCESS) { (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", "'%s'", "."); @@ -994,22 +898,9 @@ static Image *ComputeBlurImage(const Image* image,const double radius, } - /* get result */ - if (ALIGNED(filteredPixels,CLPixelPacket)) - { - length = image->columns * image->rows; - clEnv->library->clEnqueueMapBuffer(queue, filteredImageBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(CLPixelPacket), 0, NULL, NULL, &clStatus); - } - else - { - length = image->columns * image->rows; - clStatus = clEnv->library->clEnqueueReadBuffer(queue, filteredImageBuffer, CL_TRUE, 0, length * sizeof(CLPixelPacket), filteredPixels, 0, NULL, NULL); - } - if (clStatus != CL_SUCCESS) - { - (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "Reading output image from CL buffer failed.", "'%s'", "."); + /* get result */ + if (copyWriteBuffer(filteredImage,clEnv,queue,filteredImageBuffer,filteredPixels,exception) == MagickFalse) goto cleanup; - } outputReady=SyncCacheViewAuthenticPixels(filteredImage_view,exception); @@ -1027,7 +918,6 @@ cleanup: if (blurRowKernel!=NULL) RelinquishOpenCLKernel(clEnv, blurRowKernel); if (blurColumnKernel!=NULL) RelinquishOpenCLKernel(clEnv, blurColumnKernel); if (queue != NULL) RelinquishOpenCLCommandQueue(clEnv, queue); - if (kernel!=NULL) DestroyKernelInfo(kernel); if (outputReady == MagickFalse && filteredImage != NULL) filteredImage=DestroyImage(filteredImage); return(filteredImage); @@ -1048,7 +938,7 @@ MagickExport Image* AccelerateBlurImage(const Image *image, assert(image != NULL); assert(exception != (ExceptionInfo *) NULL); - if ((checkAccelerateConditionRGBA(image) == MagickFalse) || + if ((checkAccelerateCondition(image) == MagickFalse) || (checkOpenCLEnvironment(exception) == MagickFalse)) return NULL; -- 2.40.0