From: dirk Date: Sat, 2 Apr 2016 09:22:39 +0000 (+0200) Subject: ComputeUnsharpMaskImageSingle now supports R/RA/RGB images. X-Git-Tag: 7.0.1-0~81 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=3ff41c0420bbd524069a201fe74c7cbcb47b3229;p=imagemagick ComputeUnsharpMaskImageSingle now supports R/RA/RGB images. --- diff --git a/MagickCore/accelerate-private.h b/MagickCore/accelerate-private.h index 52539b774..8686647f5 100644 --- a/MagickCore/accelerate-private.h +++ b/MagickCore/accelerate-private.h @@ -365,6 +365,12 @@ OPENCL_ENDIF() STRINGIFY( + inline __global CLQuantum *getPixel(__global CLQuantum *image, const unsigned int number_channels, + const unsigned int columns, const unsigned int x, const unsigned int y) + { + return image + (x * number_channels) + (y * columns * number_channels); + } + inline float getPixelRed(const __global CLQuantum *p) { return (float)*p; } inline float getPixelGreen(const __global CLQuantum *p) { return (float)*(p+1); } inline float getPixelBlue(const __global CLQuantum *p) { return (float)*(p+2); } @@ -415,6 +421,20 @@ OPENCL_ENDIF() *alpha=getPixelAlpha(p); } + inline float4 ReadFloat4(__global CLQuantum *image, const unsigned int number_channels, + const unsigned int columns, const unsigned int x, const unsigned int y, const ChannelType channel) + { + const __global CLQuantum *p = getPixel(image, number_channels, columns, x, y); + + float red = 0.0f; + float green = 0.0f; + float blue = 0.0f; + float alpha = 0.0f; + + ReadChannels(p, number_channels, channel, &red, &green, &blue, &alpha); + return (float4)(red, green, blue, alpha); + } + inline void WriteChannels(__global CLQuantum *p, const unsigned int number_channels, const ChannelType channel, float red, float green, float blue, float alpha) { @@ -435,6 +455,14 @@ OPENCL_ENDIF() setPixelAlpha(p,alpha); } + inline void WriteFloat4(__global CLQuantum *image, const unsigned int number_channels, + const unsigned int columns, const unsigned int x, const unsigned int y, const ChannelType channel, + float4 pixel) + { + __global CLQuantum *p = getPixel(image, number_channels, columns, x, y); + WriteChannels(p, number_channels, channel, pixel.x, pixel.y, pixel.z, pixel.w); + } + inline float GetPixelIntensity(const unsigned int colorspace, const unsigned int method,float red,float green,float blue) { @@ -2127,7 +2155,7 @@ OPENCL_ENDIF() const int x = get_global_id(0); const int y = get_global_id(1); const int columns = get_global_size(0); - __global CLQuantum *p = image+(x * number_channels) + (y * columns * number_channels); + __global CLQuantum *p = getPixel(image, number_channels, columns, x, y); float red; float green; @@ -2175,7 +2203,7 @@ OPENCL_ENDIF() const int x = get_global_id(0); const int y = get_global_id(1); const int columns = get_global_size(0); - __global CLQuantum *p = image+(x * number_channels) + (y * columns * number_channels); + __global CLQuantum *p = getPixel(image, number_channels, columns, x, y); float blue, @@ -3485,11 +3513,11 @@ STRINGIFY( STRINGIFY( - __kernel void UnsharpMask(__global CLPixelType *im, - __global CLPixelType *filtered_im,__constant float *filter, - const unsigned int width,const unsigned int imageColumns, - const unsigned int imageRows,__local float4 *pixels,const float gain, - const float threshold, const unsigned int justBlur) + __kernel void UnsharpMask(__global CLQuantum *image,const unsigned int number_channels, + const ChannelType channel,__constant float *filter,const unsigned int width, + const unsigned int imageColumns,const unsigned int imageRows,__local float4 *pixels, + const float gain,const float threshold, const unsigned int justBlur, + __global CLQuantum *filteredImage) { const int x = get_global_id(0); const int y = get_global_id(1); @@ -3501,7 +3529,7 @@ STRINGIFY( int endRow = (get_group_id(1) + 1) * get_local_size(1) + radius; while (row < endRow) { - int srcy = (row < 0) ? -row : row; // mirror pad + int srcy = (row < 0) ? -row : row; // mirror pad srcy = (srcy >= imageRows) ? (2 * imageRows - srcy - 1) : srcy; float4 value = 0.0f; @@ -3514,7 +3542,7 @@ STRINGIFY( int srcx = ix + j; srcx = (srcx < 0) ? -srcx : srcx; srcx = (srcx >= imageColumns) ? (2 * imageColumns - srcx - 1) : srcx; - value += filter[i + j] * convert_float4(im[srcx + srcy * imageColumns]); + value += filter[i + j] * ReadFloat4(image, number_channels, imageColumns, srcx, srcy, channel); } ix += 8; i += 8; @@ -3523,7 +3551,7 @@ STRINGIFY( while (i < width) { int srcx = (ix < 0) ? -ix : ix; // mirror pad srcx = (srcx >= imageColumns) ? (2 * imageColumns - srcx - 1) : srcx; - value += filter[i] * convert_float4(im[srcx + srcy * imageColumns]); + value += filter[i] * ReadFloat4(image, number_channels, imageColumns, srcx, srcy, channel); ++i; ++ix; } @@ -3539,15 +3567,9 @@ STRINGIFY( float4 value = (float4)(0.0f); int i = 0; - while (i + 7 < width) { // unrolled - value += (float4)(filter[i]) * pixels[px + (py + i) * prp]; - value += (float4)(filter[i]) * pixels[px + (py + i + 1) * prp]; - value += (float4)(filter[i]) * pixels[px + (py + i + 2) * prp]; - value += (float4)(filter[i]) * pixels[px + (py + i + 3) * prp]; - value += (float4)(filter[i]) * pixels[px + (py + i + 4) * prp]; - value += (float4)(filter[i]) * pixels[px + (py + i + 5) * prp]; - value += (float4)(filter[i]) * pixels[px + (py + i + 6) * prp]; - value += (float4)(filter[i]) * pixels[px + (py + i + 7) * prp]; + while (i + 7 < width) { + for (int j = 0; j < 8; ++j) // unrolled + value += (float4)(filter[i]) * pixels[px + (py + i + j) * prp]; i += 8; } while (i < width) { @@ -3556,7 +3578,7 @@ STRINGIFY( } if (justBlur == 0) { // apply sharpening - float4 srcPixel = convert_float4(im[x + y * imageColumns]); + float4 srcPixel = ReadFloat4(image, number_channels, imageColumns, x, y, channel); float4 diff = srcPixel - value; float quantumThreshold = QuantumRange*threshold; @@ -3566,17 +3588,17 @@ STRINGIFY( } if ((x < imageColumns) && (y < imageRows)) - filtered_im[x + y * imageColumns] = (CLPixelType)(ClampToQuantum(value.s0), ClampToQuantum(value.s1), ClampToQuantum(value.s2), ClampToQuantum(value.s3)); + WriteFloat4(filteredImage, number_channels, imageColumns, x, y, channel, value); } ) STRINGIFY( - __kernel __attribute__((reqd_work_group_size(64, 4, 1))) - void WaveletDenoise(__global CLQuantum *srcImage,__global CLQuantum *dstImage, - const unsigned int number_channels,const unsigned int max_channels, - const float threshold,const int passes,const unsigned int imageWidth, - const unsigned int imageHeight) + __kernel __attribute__((reqd_work_group_size(64, 4, 1))) + void WaveletDenoise(__global CLQuantum *srcImage,__global CLQuantum *dstImage, + const unsigned int number_channels,const unsigned int max_channels, + const float threshold,const int passes,const unsigned int imageWidth, + const unsigned int imageHeight) { const int pad = (1 << (passes - 1)); const int tileSize = 64; diff --git a/MagickCore/accelerate.c b/MagickCore/accelerate.c index eba6ee7fa..4ce20b61b 100644 --- a/MagickCore/accelerate.c +++ b/MagickCore/accelerate.c @@ -380,6 +380,80 @@ static inline MagickBooleanType copyWriteBuffer(const Image *image, return(MagickTrue); } +static cl_mem createKernelInfo(MagickCLEnv clEnv,cl_context context, + cl_command_queue queue,const double radius,const double sigma,cl_uint *width, + ExceptionInfo *exception) +{ + char + geometry[MagickPathExtent]; + + cl_int + status; + + cl_mem + imageKernelBuffer; + + float + *kernelBufferPtr; + + KernelInfo + *kernel; + + size_t + i; + + (void) FormatLocaleString(geometry,MagickPathExtent, + "blur:%.20gx%.20g;blur:%.20gx%.20g+90",radius,sigma,radius,sigma); + kernel=AcquireKernelInfo(geometry,exception); + if (kernel == (KernelInfo *) NULL) + { + (void) OpenCLThrowMagickException(exception,GetMagickModule(), + ResourceLimitWarning,"AcquireKernelInfo failed.","."); + return((cl_mem) NULL); + } + + imageKernelBuffer=clEnv->library->clCreateBuffer(context,CL_MEM_READ_ONLY, + kernel->width*sizeof(float),NULL,&status); + if (status != CL_SUCCESS) + { + kernel=DestroyKernelInfo(kernel); + (void) OpenCLThrowMagickException(exception,GetMagickModule(), + ResourceLimitWarning,"clEnv->library->clCreateBuffer failed.","."); + return((cl_mem) NULL); + } + + kernelBufferPtr=(float*)clEnv->library->clEnqueueMapBuffer(queue, + imageKernelBuffer,CL_TRUE,CL_MAP_WRITE,0,kernel->width*sizeof(float),0, + NULL,NULL,&status); + if (status != CL_SUCCESS) + { + kernel=DestroyKernelInfo(kernel); + clEnv->library->clReleaseMemObject(imageKernelBuffer); + (void) OpenCLThrowMagickException(exception,GetMagickModule(), + ResourceLimitWarning,"clEnv->library->clEnqueueMapBuffer failed.","."); + return((cl_mem) NULL); + } + for (i = 0; i < kernel->width; i++) + { + kernelBufferPtr[i]=(float)kernel->values[i]; + } + + *width=(cl_uint) kernel->width; + kernel=DestroyKernelInfo(kernel); + + status=clEnv->library->clEnqueueUnmapMemObject(queue,imageKernelBuffer, + kernelBufferPtr,0,NULL,NULL); + if (status != CL_SUCCESS) + { + clEnv->library->clReleaseMemObject(imageKernelBuffer); + (void) OpenCLThrowMagickException(exception,GetMagickModule(), + ResourceLimitWarning,"clEnv->library->clEnqueueUnmapMemObject failed.", + "'%s'","."); + return((cl_mem) NULL); + } + return(imageKernelBuffer); +} + /* %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % % @@ -3796,9 +3870,6 @@ static MagickBooleanType ComputeFunctionImage(Image *image, imageBuffer, parametersBuffer; - cl_mem_flags - mem_flags; - cl_uint number_channels; @@ -3811,9 +3882,6 @@ static MagickBooleanType ComputeFunctionImage(Image *image, MagickCLEnv clEnv; - MagickSizeType - length; - size_t globalWorkSize[2]; @@ -3989,9 +4057,6 @@ static MagickBooleanType ComputeGrayscaleImage(Image *image, MagickCLEnv clEnv; - MagickSizeType - length; - register ssize_t i; @@ -7050,15 +7115,12 @@ cleanup: static Image *ComputeUnsharpMaskImageSingle(const Image *image, const double radius,const double sigma,const double gain, - const double threshold,int blurOnly, ExceptionInfo *exception) + const double threshold,int blurOnly,ExceptionInfo *exception) { CacheView *filteredImage_view, *image_view; - char - geometry[MagickPathExtent]; - cl_command_queue queue; @@ -7080,46 +7142,33 @@ static Image *ComputeUnsharpMaskImageSingle(const Image *image, imageBuffer, imageKernelBuffer; - cl_mem_flags - mem_flags; - - const void - *inputPixels; + cl_uint + i, + imageColumns, + imageRows, + kernelWidth, + number_channels; float fGain, - fThreshold, - *kernelBufferPtr; + fThreshold; Image *filteredImage; - KernelInfo - *kernel; - MagickBooleanType outputReady; MagickCLEnv clEnv; - MagickSizeType - length; - void - *filteredPixels, - *hostPtr; - - unsigned int - i, - imageColumns, - imageRows, - kernelWidth; + *filteredPixels; clEnv = NULL; filteredImage = NULL; filteredImage_view = NULL; - kernel = NULL; + filteredPixels = NULL; context = NULL; imageBuffer = NULL; filteredImageBuffer = NULL; @@ -7132,110 +7181,23 @@ static Image *ComputeUnsharpMaskImageSingle(const Image *image, context = GetOpenCLContext(clEnv); queue = AcquireOpenCLCommandQueue(clEnv); - /* Create and initialize OpenCL buffers. */ - { - image_view=AcquireVirtualCacheView(image,exception); - inputPixels=GetCacheViewVirtualPixels(image_view,0,0,image->columns,image->rows,exception); - if (inputPixels == (const void *) NULL) - { - (void) OpenCLThrowMagickException(exception,GetMagickModule(),CacheWarning,"UnableToReadPixelCache.","`%s'",image->filename); - goto cleanup; - } - - /* If the host pointer is aligned to the size of CLPixelPacket, - then use the host buffer directly from the GPU; otherwise, - create a buffer on the GPU and copy the data over */ - if (ALIGNED(inputPixels,CLPixelPacket)) - { - mem_flags = CL_MEM_READ_ONLY|CL_MEM_USE_HOST_PTR; - } - else - { - mem_flags = CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR; - } - /* create a CL buffer from image pixel buffer */ - length = image->columns * image->rows; - imageBuffer = clEnv->library->clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus); - if (clStatus != CL_SUCCESS) - { - (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.","."); - goto cleanup; - } - } - - /* create output */ - { - filteredImage = CloneImage(image,image->columns,image->rows,MagickTrue,exception); - assert(filteredImage != NULL); - if (SetImageStorageClass(filteredImage,DirectClass,exception) != MagickTrue) - { - (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "CloneImage failed.", "'%s'", "."); - goto cleanup; - } - filteredImage_view=AcquireAuthenticCacheView(filteredImage,exception); - filteredPixels=GetCacheViewAuthenticPixels(filteredImage_view,0,0,filteredImage->columns,filteredImage->rows,exception); - if (filteredPixels == (void *) NULL) - { - (void) OpenCLThrowMagickException(exception,GetMagickModule(),CacheWarning, "UnableToReadPixelCache.","`%s'",filteredImage->filename); - goto cleanup; - } - - if (ALIGNED(filteredPixels,CLPixelPacket)) - { - mem_flags = CL_MEM_WRITE_ONLY|CL_MEM_USE_HOST_PTR; - hostPtr = filteredPixels; - } - else - { - mem_flags = CL_MEM_WRITE_ONLY; - hostPtr = NULL; - } - - /* create a CL buffer from image pixel buffer */ - length = image->columns * image->rows; - filteredImageBuffer = clEnv->library->clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), hostPtr, &clStatus); - if (clStatus != CL_SUCCESS) - { - (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.","."); - goto cleanup; - } - } - - /* create the blur kernel */ - { - (void) FormatLocaleString(geometry,MagickPathExtent,"blur:%.20gx%.20g;blur:%.20gx%.20g+90",radius,sigma,radius,sigma); - kernel=AcquireKernelInfo(geometry,exception); - if (kernel == (KernelInfo *) NULL) - { - (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "AcquireKernelInfo failed.","."); - goto cleanup; - } + image_view=AcquireVirtualCacheView(image,exception); + imageBuffer=createReadBuffer(image,image_view,clEnv,context,exception); + if (imageBuffer == (cl_mem) NULL) + goto cleanup; - imageKernelBuffer = clEnv->library->clCreateBuffer(context, CL_MEM_READ_ONLY, kernel->width * sizeof(float), NULL, &clStatus); - if (clStatus != CL_SUCCESS) - { - (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.","."); - goto cleanup; - } + filteredImage = CloneImage(image,image->columns,image->rows,MagickTrue,exception); + if (filteredImage == (Image *) NULL) + goto cleanup; + filteredImage_view=AcquireAuthenticCacheView(filteredImage,exception); + filteredImageBuffer=createWriteBuffer(filteredImage,filteredImage_view,clEnv, + context,filteredPixels,exception); + if (filteredImageBuffer == (void *) NULL) + goto cleanup; - kernelBufferPtr = (float*)clEnv->library->clEnqueueMapBuffer(queue, imageKernelBuffer, CL_TRUE, CL_MAP_WRITE, 0, kernel->width * sizeof(float), 0, NULL, NULL, &clStatus); - if (clStatus != CL_SUCCESS) - { - (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueMapBuffer failed.","."); - goto cleanup; - } - for (i = 0; i < kernel->width; i++) - { - kernelBufferPtr[i] = (float) kernel->values[i]; - } - clStatus = clEnv->library->clEnqueueUnmapMemObject(queue, imageKernelBuffer, kernelBufferPtr, 0, NULL, NULL); - if (clStatus != CL_SUCCESS) - { - (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueUnmapMemObject failed.", "'%s'", "."); - goto cleanup; - } - } + imageKernelBuffer=createKernelInfo(clEnv,context,queue,radius,sigma, + &kernelWidth,exception); { /* get the opencl kernel */ @@ -7249,9 +7211,9 @@ static Image *ComputeUnsharpMaskImageSingle(const Image *image, } { - imageColumns = (unsigned int) image->columns; - imageRows = (unsigned int) image->rows; - kernelWidth = (unsigned int) kernel->width; + imageColumns = (cl_uint) image->columns; + imageRows = (cl_uint) image->rows; + number_channels = (cl_uint) image->number_channels; fGain = (float) gain; fThreshold = (float) threshold; justBlur = blurOnly; @@ -7259,15 +7221,17 @@ static Image *ComputeUnsharpMaskImageSingle(const Image *image, /* set the kernel arguments */ i = 0; clStatus=clEnv->library->clSetKernelArg(unsharpMaskKernel,i++,sizeof(cl_mem),(void *)&imageBuffer); - clStatus|=clEnv->library->clSetKernelArg(unsharpMaskKernel,i++,sizeof(cl_mem),(void *)&filteredImageBuffer); + clStatus|=clEnv->library->clSetKernelArg(unsharpMaskKernel,i++,sizeof(cl_uint),(void *)&number_channels); + clStatus|=clEnv->library->clSetKernelArg(unsharpMaskKernel,i++,sizeof(ChannelType),(void *)&image->channel_mask); clStatus|=clEnv->library->clSetKernelArg(unsharpMaskKernel,i++,sizeof(cl_mem),(void *)&imageKernelBuffer); - clStatus|=clEnv->library->clSetKernelArg(unsharpMaskKernel,i++,sizeof(unsigned int),(void *)&kernelWidth); - clStatus|=clEnv->library->clSetKernelArg(unsharpMaskKernel,i++,sizeof(unsigned int),(void *)&imageColumns); - clStatus|=clEnv->library->clSetKernelArg(unsharpMaskKernel,i++,sizeof(unsigned int),(void *)&imageRows); - clStatus|=clEnv->library->clSetKernelArg(unsharpMaskKernel,i++,sizeof(cl_float4)*(8 * (32 + kernel->width)),(void *) NULL); + clStatus|=clEnv->library->clSetKernelArg(unsharpMaskKernel,i++,sizeof(cl_uint),(void *)&kernelWidth); + clStatus|=clEnv->library->clSetKernelArg(unsharpMaskKernel,i++,sizeof(cl_uint),(void *)&imageColumns); + clStatus|=clEnv->library->clSetKernelArg(unsharpMaskKernel,i++,sizeof(cl_uint),(void *)&imageRows); + clStatus|=clEnv->library->clSetKernelArg(unsharpMaskKernel,i++,sizeof(cl_float4)*(8 * (32 + kernelWidth)),(void *) NULL); clStatus|=clEnv->library->clSetKernelArg(unsharpMaskKernel,i++,sizeof(float),(void *)&fGain); clStatus|=clEnv->library->clSetKernelArg(unsharpMaskKernel,i++,sizeof(float),(void *)&fThreshold); clStatus|=clEnv->library->clSetKernelArg(unsharpMaskKernel,i++,sizeof(cl_uint),(void *)&justBlur); + clStatus|=clEnv->library->clSetKernelArg(unsharpMaskKernel,i++,sizeof(cl_mem),(void *)&filteredImageBuffer); if (clStatus != CL_SUCCESS) { (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", "'%s'", "."); @@ -7285,7 +7249,7 @@ static Image *ComputeUnsharpMaskImageSingle(const Image *image, wsize[0] = 8; wsize[1] = 32; - clStatus = clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, unsharpMaskKernel, 2, NULL, gsize, wsize, 0, NULL, &event); + clStatus = clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, unsharpMaskKernel, 2, NULL, gsize, wsize, 0, NULL, &event); if (clStatus != CL_SUCCESS) { (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", "'%s'", "."); @@ -7297,22 +7261,8 @@ static Image *ComputeUnsharpMaskImageSingle(const Image *image, } } - /* get result */ - if (ALIGNED(filteredPixels,CLPixelPacket)) - { - length = image->columns * image->rows; - clEnv->library->clEnqueueMapBuffer(queue, filteredImageBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(CLPixelPacket), 0, NULL, NULL, &clStatus); - } - else - { - length = image->columns * image->rows; - clStatus = clEnv->library->clEnqueueReadBuffer(queue, filteredImageBuffer, CL_TRUE, 0, length * sizeof(CLPixelPacket), filteredPixels, 0, NULL, NULL); - } - if (clStatus != CL_SUCCESS) - { - (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "Reading output image from CL buffer failed.", "'%s'", "."); + if (copyWriteBuffer(filteredImage,clEnv,queue,filteredImageBuffer,filteredPixels,exception) == MagickFalse) goto cleanup; - } outputReady=SyncCacheViewAuthenticPixels(filteredImage_view,exception); @@ -7323,7 +7273,6 @@ cleanup: if (filteredImage_view != NULL) filteredImage_view=DestroyCacheView(filteredImage_view); - if (kernel != NULL) kernel=DestroyKernelInfo(kernel); if (imageBuffer!=NULL) clEnv->library->clReleaseMemObject(imageBuffer); if (filteredImageBuffer!=NULL) clEnv->library->clReleaseMemObject(filteredImageBuffer); if (imageKernelBuffer!=NULL) clEnv->library->clReleaseMemObject(imageKernelBuffer);