From: dirk Date: Sat, 2 Apr 2016 19:07:09 +0000 (+0200) Subject: Removed section kernels. X-Git-Tag: 7.0.1-0~79 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=4c8e9b102e0e8fcff0bc57c56b1ac7a620ce4225;p=imagemagick Removed section kernels. --- diff --git a/MagickCore/accelerate-private.h b/MagickCore/accelerate-private.h index 7e2c9593d..bb375f80d 100644 --- a/MagickCore/accelerate-private.h +++ b/MagickCore/accelerate-private.h @@ -324,13 +324,6 @@ OPENCL_ENDIF() } ) - STRINGIFY( - inline int ClampToCanvasWithHalo(const int offset,const int range, const int edge, const int section) - { - return clamp(offset, section?(int)(0-edge):(int)0, section?(range-1):(range-1+edge)); - } - ) - STRINGIFY( inline CLQuantum ClampToQuantum(const float value) { @@ -869,176 +862,6 @@ OPENCL_ENDIF() %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% */ - STRINGIFY( - /* - Reduce image noise and reduce detail levels by row - im: input pixels filtered_in filtered_im: output pixels - filter : convolve kernel width: convolve kernel size - channel : define which channel is blured - is_RGBA_BGRA : define the input is RGBA or BGRA - */ - __kernel void BlurSectionRow(__global CLPixelType *im, __global float4 *filtered_im, - const ChannelType channel, __constant float *filter, - const unsigned int width, - const unsigned int imageColumns, const unsigned int imageRows, - __local CLPixelType *temp, - const unsigned int offsetRows, const unsigned int section) - { - const int x = get_global_id(0); - const int y = get_global_id(1); - - const int columns = imageColumns; - - const unsigned int radius = (width-1)/2; - const int wsize = get_local_size(0); - const unsigned int loadSize = wsize+width; - - //group coordinate - const int groupX=get_local_size(0)*get_group_id(0); - const int groupY=get_local_size(1)*get_group_id(1); - - //offset the input data, assuming section is 0, 1 - im += imageColumns * (offsetRows - radius * section); - - //parallel load and clamp - for (int i=get_local_id(0); i < loadSize; i=i+get_local_size(0)) - { - //int cx = ClampToCanvas(groupX+i, columns); - temp[i] = im[y * columns + ClampToCanvas(i+groupX-radius, columns)]; - - /*if (0 && y==0 && get_group_id(1) == 0) - { - printf("(%d %d) temp %d load %d groupX %d\n", x, y, i, ClampToCanvas(groupX+i, columns), groupX); - }*/ - } - - // barrier - barrier(CLK_LOCAL_MEM_FENCE); - - // only do the work if this is not a patched item - if (get_global_id(0) < columns) - { - // compute - float4 result = (float4) 0; - - int i = 0; - - \n #ifndef UFACTOR \n - \n #define UFACTOR 8 \n - \n #endif \n - - for ( ; i+UFACTOR < width; ) - { - \n #pragma unroll UFACTOR\n - for (int j=0; j < UFACTOR; j++, i++) - { - result+=filter[i]*convert_float4(temp[i+get_local_id(0)]); - } - } - - for ( ; i < width; i++) - { - result+=filter[i]*convert_float4(temp[i+get_local_id(0)]); - } - - result.x = ClampToQuantum(result.x); - result.y = ClampToQuantum(result.y); - result.z = ClampToQuantum(result.z); - result.w = ClampToQuantum(result.w); - - // write back to global - filtered_im[y*columns+x] = result; - } - - } - ) - - STRINGIFY( - /* - Reduce image noise and reduce detail levels by line - im: input pixels filtered_in filtered_im: output pixels - filter : convolve kernel width: convolve kernel size - channel : define which channel is blured\ - is_RGBA_BGRA : define the input is RGBA or BGRA - */ - __kernel void BlurSectionColumn(const __global float4 *blurRowData, __global CLPixelType *filtered_im, - const ChannelType channel, __constant float *filter, - const unsigned int width, - const unsigned int imageColumns, const unsigned int imageRows, - __local float4 *temp, - const unsigned int offsetRows, const unsigned int section) - { - const int x = get_global_id(0); - const int y = get_global_id(1); - - //const int columns = get_global_size(0); - //const int rows = get_global_size(1); - const int columns = imageColumns; - const int rows = imageRows; - - unsigned int radius = (width-1)/2; - const int wsize = get_local_size(1); - const unsigned int loadSize = wsize+width; - - //group coordinate - const int groupX=get_local_size(0)*get_group_id(0); - const int groupY=get_local_size(1)*get_group_id(1); - //notice that get_local_size(0) is 1, so - //groupX=get_group_id(0); - - // offset the input data - blurRowData += imageColumns * radius * section; - - //parallel load and clamp - for (int i = get_local_id(1); i < loadSize; i=i+get_local_size(1)) - { - int pos = ClampToCanvasWithHalo(i+groupY-radius, rows, radius, section) * columns + groupX; - temp[i] = *(blurRowData+pos); - } - - // barrier - barrier(CLK_LOCAL_MEM_FENCE); - - // only do the work if this is not a patched item - if (get_global_id(1) < rows) - { - // compute - float4 result = (float4) 0; - - int i = 0; - - \n #ifndef UFACTOR \n - \n #define UFACTOR 8 \n - \n #endif \n - - for ( ; i+UFACTOR < width; ) - { - \n #pragma unroll UFACTOR \n - for (int j=0; j < UFACTOR; j++, i++) - { - result+=filter[i]*temp[i+get_local_id(1)]; - } - } - for ( ; i < width; i++) - { - result+=filter[i]*temp[i+get_local_id(1)]; - } - - result.x = ClampToQuantum(result.x); - result.y = ClampToQuantum(result.y); - result.z = ClampToQuantum(result.z); - result.w = ClampToQuantum(result.w); - - // offset the output data - filtered_im += imageColumns * offsetRows; - - // write back to global - filtered_im[y*columns+x] = (CLPixelType) (result.x,result.y,result.z,result.w); - } - - } - ) - STRINGIFY( /* Reduce image noise and reduce detail levels by row @@ -3390,94 +3213,7 @@ STRINGIFY( } } - - __kernel void UnsharpMaskBlurColumnSection(const __global CLPixelType* inputImage, - const __global float4 *blurRowData, __global CLPixelType *filtered_im, - const unsigned int imageColumns, const unsigned int imageRows, - __local float4* cachedData, __local float* cachedFilter, - const ChannelType channel, const __global float *filter, const unsigned int width, - const float gain, const float threshold, - const unsigned int offsetRows, const unsigned int section) - { - const unsigned int radius = (width-1)/2; - - // cache the pixel shared by the workgroup - const int groupX = get_group_id(0); - const int groupStartY = get_group_id(1)*get_local_size(1) - radius; - const int groupStopY = (get_group_id(1)+1)*get_local_size(1) + radius; - - // offset the input data - blurRowData += imageColumns * radius * section; - - if (groupStartY >= 0 - && groupStopY < imageRows) { - event_t e = async_work_group_strided_copy(cachedData - ,blurRowData+groupStartY*imageColumns+groupX - ,groupStopY-groupStartY,imageColumns,0); - wait_group_events(1,&e); - } - else { - for (int i = get_local_id(1); i < (groupStopY - groupStartY); i+=get_local_size(1)) { - int pos = ClampToCanvasWithHalo(groupStartY+i,imageRows, radius, section)*imageColumns+ groupX; - cachedData[i] = *(blurRowData + pos); - } - barrier(CLK_LOCAL_MEM_FENCE); - } - // cache the filter as well - event_t e = async_work_group_copy(cachedFilter,filter,width,0); - wait_group_events(1,&e); - - // only do the work if this is not a patched item - //const int cy = get_group_id(1)*get_local_size(1)+get_local_id(1); - const int cy = get_global_id(1); - - if (cy < imageRows) { - float4 blurredPixel = (float4) 0.0f; - - int i = 0; - - \n #ifndef UFACTOR \n - \n #define UFACTOR 8 \n - \n #endif \n - - for ( ; i+UFACTOR < width; ) - { - \n #pragma unroll UFACTOR \n - for (int j=0; j < UFACTOR; j++, i++) - { - blurredPixel+=cachedFilter[i]*cachedData[i+get_local_id(1)]; - } - } - - for ( ; i < width; i++) - { - blurredPixel+=cachedFilter[i]*cachedData[i+get_local_id(1)]; - } - - blurredPixel = floor((float4)(ClampToQuantum(blurredPixel.x), ClampToQuantum(blurredPixel.y) - ,ClampToQuantum(blurredPixel.z), ClampToQuantum(blurredPixel.w))); - - // offset the output data - inputImage += imageColumns * offsetRows; - filtered_im += imageColumns * offsetRows; - - float4 inputImagePixel = convert_float4(inputImage[cy*imageColumns+groupX]); - float4 outputPixel = inputImagePixel - blurredPixel; - - float quantumThreshold = QuantumRange*threshold; - - int4 mask = isless(fabs(2.0f*outputPixel), (float4)quantumThreshold); - outputPixel = select(inputImagePixel + outputPixel * gain, inputImagePixel, mask); - - //write back - filtered_im[cy*imageColumns+groupX] = (CLPixelType) (ClampToQuantum(outputPixel.x), ClampToQuantum(outputPixel.y) - ,ClampToQuantum(outputPixel.z), ClampToQuantum(outputPixel.w)); - - } - - } - ) - + ) STRINGIFY( __kernel void UnsharpMask(__global CLQuantum *image,const unsigned int number_channels, diff --git a/MagickCore/accelerate.c b/MagickCore/accelerate.c index 4ce20b61b..eec096b53 100644 --- a/MagickCore/accelerate.c +++ b/MagickCore/accelerate.c @@ -257,27 +257,6 @@ inline static unsigned int padGlobalWorkgroupSizeToLocalWorkgroupSize( return ((orgGlobalSize+(localGroupSize-1))/localGroupSize*localGroupSize); } -static MagickBooleanType splitImage(const Image* image) -{ - MagickBooleanType - split; - - MagickCLEnv - clEnv; - - unsigned long - allocSize, - tempSize; - - clEnv=GetDefaultOpenCLEnv(); - - allocSize=GetOpenCLDeviceMaxMemAllocSize(clEnv); - tempSize=(unsigned long) (image->columns * image->rows * 4 * 4); - - split = ((tempSize > allocSize) ? MagickTrue : MagickFalse); - return(split); -} - static cl_mem createBuffer(const Image *image,CacheView *image_view, MagickCLEnv clEnv,cl_context context,cl_mem_flags flags,void *pixels, ExceptionInfo *exception) @@ -1054,55 +1033,139 @@ cleanup: return(filteredImage); } -static Image* ComputeBlurImageSection(const Image* image, +static Image* ComputeBlurImageSingle(const Image* image, const double radius,const double sigma,ExceptionInfo *exception) { - CacheView - *filteredImage_view, - *image_view; + return ComputeUnsharpMaskImageSingle(image,radius,sigma,0.0,0.0,1,exception); +} - char - geometry[MagickPathExtent]; +MagickExport Image* AccelerateBlurImage(const Image *image, + const double radius,const double sigma,ExceptionInfo *exception) +{ + Image + *filteredImage; - cl_command_queue - queue; + assert(image != NULL); + assert(exception != (ExceptionInfo *) NULL); + + if ((checkAccelerateConditionRGBA(image) == MagickFalse) || + (checkOpenCLEnvironment(exception) == MagickFalse)) + return NULL; + + if (radius < 12.1) + filteredImage=ComputeBlurImageSingle(image,radius,sigma,exception); + else + filteredImage=ComputeBlurImage(image,radius,sigma,exception); + return(filteredImage); +} + +/* +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +% % +% % +% % +% A c c e l e r a t e C o m p o s i t e I m a g e % +% % +% % +% % +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +*/ +static MagickBooleanType LaunchCompositeKernel(MagickCLEnv clEnv, + cl_command_queue queue,cl_mem imageBuffer,const unsigned int inputWidth, + const unsigned int inputHeight,const unsigned int matte, + const ChannelType channel,const CompositeOperator compose, + const cl_mem compositeImageBuffer,const unsigned int compositeWidth, + const unsigned int compositeHeight,const float destination_dissolve, + const float source_dissolve,ExceptionInfo *magick_unused(exception)) +{ cl_int clStatus; cl_kernel - blurColumnKernel, - blurRowKernel; + compositeKernel; cl_event event; - cl_mem - imageBuffer, - tempImageBuffer, - filteredImageBuffer, - imageKernelBuffer; + int + k; - cl_mem_flags - mem_flags; + size_t + global_work_size[2], + local_work_size[2]; + + unsigned int + composeOp; + + magick_unreferenced(exception); + + compositeKernel = AcquireOpenCLKernel(clEnv, MAGICK_OPENCL_ACCELERATE, + "Composite"); + + k = 0; + clStatus=clEnv->library->clSetKernelArg(compositeKernel,k++,sizeof(cl_mem),(void*)&imageBuffer); + clStatus|=clEnv->library->clSetKernelArg(compositeKernel,k++,sizeof(unsigned int),(void*)&inputWidth); + clStatus|=clEnv->library->clSetKernelArg(compositeKernel,k++,sizeof(unsigned int),(void*)&inputHeight); + clStatus|=clEnv->library->clSetKernelArg(compositeKernel,k++,sizeof(cl_mem),(void*)&compositeImageBuffer); + clStatus|=clEnv->library->clSetKernelArg(compositeKernel,k++,sizeof(unsigned int),(void*)&compositeWidth); + clStatus|=clEnv->library->clSetKernelArg(compositeKernel,k++,sizeof(unsigned int),(void*)&compositeHeight); + composeOp = (unsigned int)compose; + clStatus|=clEnv->library->clSetKernelArg(compositeKernel,k++,sizeof(unsigned int),(void*)&composeOp); + clStatus|=clEnv->library->clSetKernelArg(compositeKernel,k++,sizeof(ChannelType),(void*)&channel); + clStatus|=clEnv->library->clSetKernelArg(compositeKernel,k++,sizeof(unsigned int),(void*)&matte); + clStatus|=clEnv->library->clSetKernelArg(compositeKernel,k++,sizeof(float),(void*)&destination_dissolve); + clStatus|=clEnv->library->clSetKernelArg(compositeKernel,k++,sizeof(float),(void*)&source_dissolve); + + if (clStatus!=CL_SUCCESS) + return MagickFalse; + + local_work_size[0] = 64; + local_work_size[1] = 1; + + global_work_size[0] = padGlobalWorkgroupSizeToLocalWorkgroupSize(inputWidth, + (unsigned int) local_work_size[0]); + global_work_size[1] = inputHeight; + clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, compositeKernel, 2, NULL, + global_work_size, local_work_size, 0, NULL, &event); + + RecordProfileData(clEnv,CompositeKernel,event); + clEnv->library->clReleaseEvent(event); + + RelinquishOpenCLKernel(clEnv, compositeKernel); + + return((clStatus==CL_SUCCESS) ? MagickTrue : MagickFalse); +} + +static MagickBooleanType ComputeCompositeImage(Image *image, + const CompositeOperator compose,const Image *compositeImage, + const float destination_dissolve,const float source_dissolve,ExceptionInfo *exception) +{ + CacheView + *image_view; + + cl_command_queue + queue; cl_context context; - - const void - *inputPixels; - float - *kernelBufferPtr; + cl_int + clStatus; - Image - *filteredImage; + cl_mem_flags + mem_flags; - KernelInfo - *kernel; + cl_mem + compositeImageBuffer, + imageBuffer; + + const void + *composePixels; MagickBooleanType - outputReady; + outputReady, + status; MagickCLEnv clEnv; @@ -1110,357 +1173,154 @@ static Image* ComputeBlurImageSection(const Image* image, MagickSizeType length; - unsigned int - i, - imageColumns, - imageRows, - kernelWidth; - void - *filteredPixels, - *hostPtr; - - context = NULL; - filteredImage = NULL; - filteredImage_view = NULL; - imageBuffer = NULL; - tempImageBuffer = NULL; - filteredImageBuffer = NULL; - imageKernelBuffer = NULL; - blurRowKernel = NULL; - blurColumnKernel = NULL; - queue = NULL; - kernel = NULL; + *inputPixels; + status = MagickFalse; outputReady = MagickFalse; + composePixels = NULL; + imageBuffer = NULL; + compositeImageBuffer = NULL; clEnv = GetDefaultOpenCLEnv(); context = GetOpenCLContext(clEnv); queue = AcquireOpenCLCommandQueue(clEnv); /* Create and initialize OpenCL buffers. */ + image_view=AcquireAuthenticCacheView(image,exception); + inputPixels=GetCacheViewAuthenticPixels(image_view,0,0,image->columns,image->rows,exception); + if (inputPixels == (void *) NULL) { - image_view=AcquireVirtualCacheView(image,exception); - inputPixels=GetCacheViewVirtualPixels(image_view,0,0,image->columns,image->rows,exception); - if (inputPixels == (const void *) NULL) - { - (void) OpenCLThrowMagickException(exception,GetMagickModule(),CacheWarning,"UnableToReadPixelCache.","`%s'",image->filename); - goto cleanup; - } - /* If the host pointer is aligned to the size of CLPixelPacket, + (void) OpenCLThrowMagickException(exception,GetMagickModule(),CacheWarning, + "UnableToReadPixelCache.","`%s'",image->filename); + goto cleanup; + } + + /* If the host pointer is aligned to the size of CLPixelPacket, then use the host buffer directly from the GPU; otherwise, create a buffer on the GPU and copy the data over */ - if (ALIGNED(inputPixels,CLPixelPacket)) - { - mem_flags = CL_MEM_READ_ONLY|CL_MEM_USE_HOST_PTR; - } - else - { - mem_flags = CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR; - } - /* create a CL buffer from image pixel buffer */ - length = image->columns * image->rows; - imageBuffer = clEnv->library->clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus); - if (clStatus != CL_SUCCESS) - { - (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.","."); - goto cleanup; - } + if (ALIGNED(inputPixels,CLPixelPacket)) + { + mem_flags = CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR; } - - /* create output */ + else { - filteredImage = CloneImage(image,image->columns,image->rows,MagickTrue,exception); - assert(filteredImage != NULL); - if (SetImageStorageClass(filteredImage,DirectClass,exception) != MagickTrue) - { - (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "CloneImage failed.", "'%s'", "."); - goto cleanup; - } - filteredImage_view=AcquireAuthenticCacheView(filteredImage,exception); - filteredPixels=GetCacheViewAuthenticPixels(filteredImage_view,0,0,filteredImage->columns,filteredImage->rows,exception); - if (filteredPixels == (void *) NULL) - { - (void) OpenCLThrowMagickException(exception,GetMagickModule(),CacheWarning, "UnableToReadPixelCache.","`%s'",filteredImage->filename); - goto cleanup; - } - - if (ALIGNED(filteredPixels,CLPixelPacket)) - { - mem_flags = CL_MEM_WRITE_ONLY|CL_MEM_USE_HOST_PTR; - hostPtr = filteredPixels; - } - else - { - mem_flags = CL_MEM_WRITE_ONLY; - hostPtr = NULL; - } - /* create a CL buffer from image pixel buffer */ - length = image->columns * image->rows; - filteredImageBuffer = clEnv->library->clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), hostPtr, &clStatus); - if (clStatus != CL_SUCCESS) - { - (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.","."); - goto cleanup; - } + mem_flags = CL_MEM_READ_WRITE|CL_MEM_COPY_HOST_PTR; } - - /* create processing kernel */ + /* create a CL buffer from image pixel buffer */ + length = image->columns * image->rows; + imageBuffer = clEnv->library->clCreateBuffer(context, mem_flags, + length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus); + if (clStatus != CL_SUCCESS) { - (void) FormatLocaleString(geometry,MagickPathExtent,"blur:%.20gx%.20g;blur:%.20gx%.20g+90",radius,sigma,radius,sigma); - kernel=AcquireKernelInfo(geometry,exception); - if (kernel == (KernelInfo *) NULL) - { - (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "MemoryAllocationFailed.","."); - goto cleanup; - } - - imageKernelBuffer = clEnv->library->clCreateBuffer(context, CL_MEM_READ_ONLY|CL_MEM_ALLOC_HOST_PTR, kernel->width * sizeof(float), NULL, &clStatus); - if (clStatus != CL_SUCCESS) - { - (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.","."); - goto cleanup; - } - kernelBufferPtr = (float*)clEnv->library->clEnqueueMapBuffer(queue, imageKernelBuffer, CL_TRUE, CL_MAP_WRITE, 0, kernel->width * sizeof(float), 0, NULL, NULL, &clStatus); - if (clStatus != CL_SUCCESS) - { - (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueMapBuffer failed.","."); - goto cleanup; - } - - for (i = 0; i < kernel->width; i++) - { - kernelBufferPtr[i] = (float) kernel->values[i]; - } - - clStatus = clEnv->library->clEnqueueUnmapMemObject(queue, imageKernelBuffer, kernelBufferPtr, 0, NULL, NULL); - if (clStatus != CL_SUCCESS) - { - (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueUnmapMemObject failed.", "'%s'", "."); - goto cleanup; - } + (void) OpenCLThrowMagickException(exception, GetMagickModule(), + ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.","."); + goto cleanup; } - { - unsigned int offsetRows; - unsigned int sec; - - /* create temp buffer */ - { - length = image->columns * (image->rows / 2 + 1 + (kernel->width-1) / 2); - tempImageBuffer = clEnv->library->clCreateBuffer(context, CL_MEM_READ_WRITE, length * 4 * sizeof(float), NULL, &clStatus); - if (clStatus != CL_SUCCESS) - { - (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.","."); - goto cleanup; - } - } - - /* get the OpenCL kernels */ - { - blurRowKernel = AcquireOpenCLKernel(clEnv, MAGICK_OPENCL_ACCELERATE, "BlurSectionRow"); - if (blurRowKernel == NULL) - { - (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "AcquireOpenCLKernel failed.", "'%s'", "."); - goto cleanup; - }; - - blurColumnKernel = AcquireOpenCLKernel(clEnv, MAGICK_OPENCL_ACCELERATE, "BlurSectionColumn"); - if (blurColumnKernel == NULL) - { - (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "AcquireOpenCLKernel failed.", "'%s'", "."); - goto cleanup; - }; - } - - for (sec = 0; sec < 2; sec++) - { - { - /* need logic to decide this value */ - int chunkSize = 256; - - { - imageColumns = (unsigned int) image->columns; - if (sec == 0) - imageRows = (unsigned int) (image->rows / 2 + (kernel->width-1) / 2); - else - imageRows = (unsigned int) ((image->rows - image->rows / 2) + (kernel->width-1) / 2); - - offsetRows = (unsigned int) (sec * image->rows / 2); - - kernelWidth = (unsigned int) kernel->width; - - /* set the kernel arguments */ - i = 0; - clStatus=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(cl_mem),(void *)&imageBuffer); - clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(cl_mem),(void *)&tempImageBuffer); - clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(ChannelType),&image->channel_mask); - clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(cl_mem),(void *)&imageKernelBuffer); - clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(unsigned int),(void *)&kernelWidth); - clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(unsigned int),(void *)&imageColumns); - clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(unsigned int),(void *)&imageRows); - clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(CLPixelPacket)*(chunkSize+kernel->width),(void *) NULL); - clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(unsigned int),(void *)&offsetRows); - clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(unsigned int),(void *)&sec); - if (clStatus != CL_SUCCESS) - { - (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", "'%s'", "."); - goto cleanup; - } - } - - /* launch the kernel */ - { - size_t gsize[2]; - size_t wsize[2]; - - gsize[0] = chunkSize*((imageColumns+chunkSize-1)/chunkSize); - gsize[1] = imageRows; - wsize[0] = chunkSize; - wsize[1] = 1; - - clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, blurRowKernel, 2, NULL, gsize, wsize, 0, NULL, &event); - if (clStatus != CL_SUCCESS) - { - (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", "'%s'", "."); - goto cleanup; - } - clEnv->library->clFlush(queue); - RecordProfileData(clEnv,BlurRowKernel,event); - clEnv->library->clReleaseEvent(event); - } - } - - { - /* need logic to decide this value */ - int chunkSize = 256; - - { - imageColumns = (unsigned int) image->columns; - if (sec == 0) - imageRows = (unsigned int) (image->rows / 2); - else - imageRows = (unsigned int) ((image->rows - image->rows / 2)); - - offsetRows = (unsigned int) (sec * image->rows / 2); - - kernelWidth = (unsigned int) kernel->width; - - /* set the kernel arguments */ - i = 0; - clStatus=clEnv->library->clSetKernelArg(blurColumnKernel,i++,sizeof(cl_mem),(void *)&tempImageBuffer); - clStatus|=clEnv->library->clSetKernelArg(blurColumnKernel,i++,sizeof(cl_mem),(void *)&filteredImageBuffer); - clStatus|=clEnv->library->clSetKernelArg(blurColumnKernel,i++,sizeof(ChannelType),&image->channel_mask); - clStatus|=clEnv->library->clSetKernelArg(blurColumnKernel,i++,sizeof(cl_mem),(void *)&imageKernelBuffer); - clStatus|=clEnv->library->clSetKernelArg(blurColumnKernel,i++,sizeof(unsigned int),(void *)&kernelWidth); - clStatus|=clEnv->library->clSetKernelArg(blurColumnKernel,i++,sizeof(unsigned int),(void *)&imageColumns); - clStatus|=clEnv->library->clSetKernelArg(blurColumnKernel,i++,sizeof(unsigned int),(void *)&imageRows); - clStatus|=clEnv->library->clSetKernelArg(blurColumnKernel,i++,sizeof(cl_float4)*(chunkSize+kernel->width),(void *) NULL); - clStatus|=clEnv->library->clSetKernelArg(blurColumnKernel,i++,sizeof(unsigned int),(void *)&offsetRows); - clStatus|=clEnv->library->clSetKernelArg(blurColumnKernel,i++,sizeof(unsigned int),(void *)&sec); - if (clStatus != CL_SUCCESS) - { - (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", "'%s'", "."); - goto cleanup; - } - } - - /* launch the kernel */ - { - size_t gsize[2]; - size_t wsize[2]; - - gsize[0] = imageColumns; - gsize[1] = chunkSize*((imageRows+chunkSize-1)/chunkSize); - wsize[0] = 1; - wsize[1] = chunkSize; - - clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, blurColumnKernel, 2, NULL, gsize, wsize, 0, NULL, &event); - if (clStatus != CL_SUCCESS) - { - (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", "'%s'", "."); - goto cleanup; - } - clEnv->library->clFlush(queue); - RecordProfileData(clEnv,BlurColumnKernel,event); - clEnv->library->clReleaseEvent(event); - } - } - } + /* Create and initialize OpenCL buffers. */ + composePixels = AcquirePixelCachePixels(compositeImage, &length, exception); + if (composePixels == (void *) NULL) + { + (void) OpenCLThrowMagickException(exception,GetMagickModule(),CacheWarning, + "UnableToReadPixelCache.","`%s'",compositeImage->filename); + goto cleanup; } - /* get result */ - if (ALIGNED(filteredPixels,CLPixelPacket)) + /* If the host pointer is aligned to the size of CLPixelPacket, + then use the host buffer directly from the GPU; otherwise, + create a buffer on the GPU and copy the data over */ + if (ALIGNED(composePixels,CLPixelPacket)) { - length = image->columns * image->rows; - clEnv->library->clEnqueueMapBuffer(queue, filteredImageBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(CLPixelPacket), 0, NULL, NULL, &clStatus); + mem_flags = CL_MEM_READ_ONLY|CL_MEM_USE_HOST_PTR; } else { - length = image->columns * image->rows; - clStatus = clEnv->library->clEnqueueReadBuffer(queue, filteredImageBuffer, CL_TRUE, 0, length * sizeof(CLPixelPacket), filteredPixels, 0, NULL, NULL); + mem_flags = CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR; } + /* create a CL buffer from image pixel buffer */ + length = compositeImage->columns * compositeImage->rows; + compositeImageBuffer = clEnv->library->clCreateBuffer(context, mem_flags, + length * sizeof(CLPixelPacket), (void*)composePixels, &clStatus); if (clStatus != CL_SUCCESS) { - (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "Reading output image from CL buffer failed.", "'%s'", "."); + (void) OpenCLThrowMagickException(exception, GetMagickModule(), + ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.","."); goto cleanup; } + + status = LaunchCompositeKernel(clEnv,queue,imageBuffer, + (unsigned int) image->columns, + (unsigned int) image->rows, + (unsigned int) (image->alpha_trait > CopyPixelTrait) ? 1 : 0, + image->channel_mask, compose, compositeImageBuffer, + (unsigned int) compositeImage->columns, + (unsigned int) compositeImage->rows, + destination_dissolve,source_dissolve, + exception); - outputReady=SyncCacheViewAuthenticPixels(filteredImage_view,exception); + if (status==MagickFalse) + goto cleanup; + + length = image->columns * image->rows; + if (ALIGNED(inputPixels,CLPixelPacket)) + { + clEnv->library->clEnqueueMapBuffer(queue, imageBuffer, CL_TRUE, + CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(CLPixelPacket), 0, NULL, + NULL, &clStatus); + } + else + { + clStatus = clEnv->library->clEnqueueReadBuffer(queue, imageBuffer, CL_TRUE, 0, + length * sizeof(CLPixelPacket), inputPixels, 0, NULL, NULL); + } + if (clStatus==CL_SUCCESS) + outputReady=SyncCacheViewAuthenticPixels(image_view,exception); cleanup: - OpenCLLogException(__FUNCTION__,__LINE__,exception); image_view=DestroyCacheView(image_view); - if (filteredImage_view != NULL) - filteredImage_view=DestroyCacheView(filteredImage_view); - - if (imageBuffer!=NULL) clEnv->library->clReleaseMemObject(imageBuffer); - if (tempImageBuffer!=NULL) clEnv->library->clReleaseMemObject(tempImageBuffer); - if (filteredImageBuffer!=NULL) clEnv->library->clReleaseMemObject(filteredImageBuffer); - if (imageKernelBuffer!=NULL) clEnv->library->clReleaseMemObject(imageKernelBuffer); - if (blurRowKernel!=NULL) RelinquishOpenCLKernel(clEnv, blurRowKernel); - if (blurColumnKernel!=NULL) RelinquishOpenCLKernel(clEnv, blurColumnKernel); - if (queue != NULL) RelinquishOpenCLCommandQueue(clEnv, queue); - if (kernel!=NULL) DestroyKernelInfo(kernel); - if (outputReady == MagickFalse) - { - if (filteredImage != NULL) - { - DestroyImage(filteredImage); - filteredImage = NULL; - } - } - return filteredImage; -} + if (imageBuffer!=NULL) clEnv->library->clReleaseMemObject(imageBuffer); + if (compositeImageBuffer!=NULL) clEnv->library->clReleaseMemObject(compositeImageBuffer); + if (queue != NULL) RelinquishOpenCLCommandQueue(clEnv, queue); -static Image* ComputeBlurImageSingle(const Image* image, - const double radius,const double sigma,ExceptionInfo *exception) -{ - return ComputeUnsharpMaskImageSingle(image,radius,sigma,0.0,0.0,1,exception); + return(outputReady); } -MagickExport Image* AccelerateBlurImage(const Image *image, - const double radius,const double sigma,ExceptionInfo *exception) +MagickExport MagickBooleanType AccelerateCompositeImage(Image *image, + const CompositeOperator compose,const Image *composite, + const float destination_dissolve,const float source_dissolve, + ExceptionInfo *exception) { - Image - *filteredImage; + MagickBooleanType + status; assert(image != NULL); assert(exception != (ExceptionInfo *) NULL); if ((checkAccelerateConditionRGBA(image) == MagickFalse) || (checkOpenCLEnvironment(exception) == MagickFalse)) - return NULL; + return(MagickFalse); - if (radius < 12.1) - filteredImage=ComputeBlurImageSingle(image,radius,sigma,exception); - else if (splitImage(image) && (image->rows / 2 > radius)) - filteredImage=ComputeBlurImageSection(image,radius,sigma,exception); - else - filteredImage=ComputeBlurImage(image,radius,sigma,exception); - return(filteredImage); + /* only support images with the size for now */ + if ((image->columns != composite->columns) || + (image->rows != composite->rows)) + return MagickFalse; + + switch(compose) + { + case ColorDodgeCompositeOp: + case BlendCompositeOp: + break; + default: + // unsupported compose operator, quit + return MagickFalse; + }; + + status=ComputeCompositeImage(image,compose,composite,destination_dissolve, + source_dissolve,exception); + return(status); } /* @@ -1468,135 +1328,76 @@ MagickExport Image* AccelerateBlurImage(const Image *image, % % % % % % -% A c c e l e r a t e C o m p o s i t e I m a g e % +% A c c e l e r a t e C o n t r a s t I m a g e % % % % % % % %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% */ -static MagickBooleanType LaunchCompositeKernel(MagickCLEnv clEnv, - cl_command_queue queue,cl_mem imageBuffer,const unsigned int inputWidth, - const unsigned int inputHeight,const unsigned int matte, - const ChannelType channel,const CompositeOperator compose, - const cl_mem compositeImageBuffer,const unsigned int compositeWidth, - const unsigned int compositeHeight,const float destination_dissolve, - const float source_dissolve,ExceptionInfo *magick_unused(exception)) +static MagickBooleanType ComputeContrastImage(Image *image, + const MagickBooleanType sharpen,ExceptionInfo *exception) { + CacheView + *image_view; + + cl_command_queue + queue; + + cl_context + context; + cl_int clStatus; cl_kernel - compositeKernel; + filterKernel; cl_event event; - int - k; + cl_mem + imageBuffer; + + cl_mem_flags + mem_flags; + + MagickBooleanType + outputReady; + + MagickCLEnv + clEnv; + + MagickSizeType + length; size_t - global_work_size[2], - local_work_size[2]; + global_work_size[2]; unsigned int - composeOp; + i, + uSharpen; - magick_unreferenced(exception); + void + *inputPixels; - compositeKernel = AcquireOpenCLKernel(clEnv, MAGICK_OPENCL_ACCELERATE, - "Composite"); - - k = 0; - clStatus=clEnv->library->clSetKernelArg(compositeKernel,k++,sizeof(cl_mem),(void*)&imageBuffer); - clStatus|=clEnv->library->clSetKernelArg(compositeKernel,k++,sizeof(unsigned int),(void*)&inputWidth); - clStatus|=clEnv->library->clSetKernelArg(compositeKernel,k++,sizeof(unsigned int),(void*)&inputHeight); - clStatus|=clEnv->library->clSetKernelArg(compositeKernel,k++,sizeof(cl_mem),(void*)&compositeImageBuffer); - clStatus|=clEnv->library->clSetKernelArg(compositeKernel,k++,sizeof(unsigned int),(void*)&compositeWidth); - clStatus|=clEnv->library->clSetKernelArg(compositeKernel,k++,sizeof(unsigned int),(void*)&compositeHeight); - composeOp = (unsigned int)compose; - clStatus|=clEnv->library->clSetKernelArg(compositeKernel,k++,sizeof(unsigned int),(void*)&composeOp); - clStatus|=clEnv->library->clSetKernelArg(compositeKernel,k++,sizeof(ChannelType),(void*)&channel); - clStatus|=clEnv->library->clSetKernelArg(compositeKernel,k++,sizeof(unsigned int),(void*)&matte); - clStatus|=clEnv->library->clSetKernelArg(compositeKernel,k++,sizeof(float),(void*)&destination_dissolve); - clStatus|=clEnv->library->clSetKernelArg(compositeKernel,k++,sizeof(float),(void*)&source_dissolve); - - if (clStatus!=CL_SUCCESS) - return MagickFalse; - - local_work_size[0] = 64; - local_work_size[1] = 1; - - global_work_size[0] = padGlobalWorkgroupSizeToLocalWorkgroupSize(inputWidth, - (unsigned int) local_work_size[0]); - global_work_size[1] = inputHeight; - clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, compositeKernel, 2, NULL, - global_work_size, local_work_size, 0, NULL, &event); - - RecordProfileData(clEnv,CompositeKernel,event); - clEnv->library->clReleaseEvent(event); - - RelinquishOpenCLKernel(clEnv, compositeKernel); - - return((clStatus==CL_SUCCESS) ? MagickTrue : MagickFalse); -} - -static MagickBooleanType ComputeCompositeImage(Image *image, - const CompositeOperator compose,const Image *compositeImage, - const float destination_dissolve,const float source_dissolve,ExceptionInfo *exception) -{ - CacheView - *image_view; - - cl_command_queue - queue; - - cl_context - context; - - cl_int - clStatus; - - cl_mem_flags - mem_flags; - - cl_mem - compositeImageBuffer, - imageBuffer; - - const void - *composePixels; - - MagickBooleanType - outputReady, - status; - - MagickCLEnv - clEnv; - - MagickSizeType - length; - - void - *inputPixels; - - status = MagickFalse; - outputReady = MagickFalse; - composePixels = NULL; - imageBuffer = NULL; - compositeImageBuffer = NULL; + outputReady = MagickFalse; + clEnv = NULL; + inputPixels = NULL; + context = NULL; + imageBuffer = NULL; + filterKernel = NULL; + queue = NULL; clEnv = GetDefaultOpenCLEnv(); context = GetOpenCLContext(clEnv); - queue = AcquireOpenCLCommandQueue(clEnv); /* Create and initialize OpenCL buffers. */ image_view=AcquireAuthenticCacheView(image,exception); inputPixels=GetCacheViewAuthenticPixels(image_view,0,0,image->columns,image->rows,exception); if (inputPixels == (void *) NULL) { - (void) OpenCLThrowMagickException(exception,GetMagickModule(),CacheWarning, - "UnableToReadPixelCache.","`%s'",image->filename); + (void) OpenCLThrowMagickException(exception,GetMagickModule(),CacheWarning,"UnableToReadPixelCache.","`%s'",image->filename); goto cleanup; } @@ -1613,89 +1414,75 @@ static MagickBooleanType ComputeCompositeImage(Image *image, } /* create a CL buffer from image pixel buffer */ length = image->columns * image->rows; - imageBuffer = clEnv->library->clCreateBuffer(context, mem_flags, - length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus); + imageBuffer = clEnv->library->clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus); if (clStatus != CL_SUCCESS) { - (void) OpenCLThrowMagickException(exception, GetMagickModule(), - ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.","."); + (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.","."); goto cleanup; } - - - /* Create and initialize OpenCL buffers. */ - composePixels = AcquirePixelCachePixels(compositeImage, &length, exception); - if (composePixels == (void *) NULL) + + filterKernel = AcquireOpenCLKernel(clEnv, MAGICK_OPENCL_ACCELERATE, "Contrast"); + if (filterKernel == NULL) { - (void) OpenCLThrowMagickException(exception,GetMagickModule(),CacheWarning, - "UnableToReadPixelCache.","`%s'",compositeImage->filename); + (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "AcquireOpenCLKernel failed.", "'%s'", "."); goto cleanup; } - /* If the host pointer is aligned to the size of CLPixelPacket, - then use the host buffer directly from the GPU; otherwise, - create a buffer on the GPU and copy the data over */ - if (ALIGNED(composePixels,CLPixelPacket)) - { - mem_flags = CL_MEM_READ_ONLY|CL_MEM_USE_HOST_PTR; - } - else - { - mem_flags = CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR; - } - /* create a CL buffer from image pixel buffer */ - length = compositeImage->columns * compositeImage->rows; - compositeImageBuffer = clEnv->library->clCreateBuffer(context, mem_flags, - length * sizeof(CLPixelPacket), (void*)composePixels, &clStatus); + i = 0; + clStatus=clEnv->library->clSetKernelArg(filterKernel,i++,sizeof(cl_mem),(void *)&imageBuffer); + + uSharpen = (sharpen == MagickFalse)?0:1; + clStatus|=clEnv->library->clSetKernelArg(filterKernel,i++,sizeof(cl_uint),&uSharpen); if (clStatus != CL_SUCCESS) { - (void) OpenCLThrowMagickException(exception, GetMagickModule(), - ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.","."); + (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", "'%s'", "."); goto cleanup; } - - status = LaunchCompositeKernel(clEnv,queue,imageBuffer, - (unsigned int) image->columns, - (unsigned int) image->rows, - (unsigned int) (image->alpha_trait > CopyPixelTrait) ? 1 : 0, - image->channel_mask, compose, compositeImageBuffer, - (unsigned int) compositeImage->columns, - (unsigned int) compositeImage->rows, - destination_dissolve,source_dissolve, - exception); - if (status==MagickFalse) + global_work_size[0] = image->columns; + global_work_size[1] = image->rows; + /* launch the kernel */ + queue = AcquireOpenCLCommandQueue(clEnv); + clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, filterKernel, 2, NULL, global_work_size, NULL, 0, NULL, &event); + if (clStatus != CL_SUCCESS) + { + (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", "'%s'", "."); goto cleanup; + } + clEnv->library->clFlush(queue); + RecordProfileData(clEnv,ContrastKernel,event); + clEnv->library->clReleaseEvent(event); - length = image->columns * image->rows; if (ALIGNED(inputPixels,CLPixelPacket)) { - clEnv->library->clEnqueueMapBuffer(queue, imageBuffer, CL_TRUE, - CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(CLPixelPacket), 0, NULL, - NULL, &clStatus); + length = image->columns * image->rows; + clEnv->library->clEnqueueMapBuffer(queue, imageBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(CLPixelPacket), 0, NULL, NULL, &clStatus); } - else + else { - clStatus = clEnv->library->clEnqueueReadBuffer(queue, imageBuffer, CL_TRUE, 0, - length * sizeof(CLPixelPacket), inputPixels, 0, NULL, NULL); + length = image->columns * image->rows; + clStatus = clEnv->library->clEnqueueReadBuffer(queue, imageBuffer, CL_TRUE, 0, length * sizeof(CLPixelPacket), inputPixels, 0, NULL, NULL); } - if (clStatus==CL_SUCCESS) - outputReady=SyncCacheViewAuthenticPixels(image_view,exception); + if (clStatus != CL_SUCCESS) + { + (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "Reading output image from CL buffer failed.", "'%s'", "."); + goto cleanup; + } + outputReady=SyncCacheViewAuthenticPixels(image_view,exception); cleanup: + OpenCLLogException(__FUNCTION__,__LINE__,exception); image_view=DestroyCacheView(image_view); - if (imageBuffer!=NULL) clEnv->library->clReleaseMemObject(imageBuffer); - if (compositeImageBuffer!=NULL) clEnv->library->clReleaseMemObject(compositeImageBuffer); - if (queue != NULL) RelinquishOpenCLCommandQueue(clEnv, queue); + if (imageBuffer!=NULL) clEnv->library->clReleaseMemObject(imageBuffer); + if (filterKernel!=NULL) RelinquishOpenCLKernel(clEnv, filterKernel); + if (queue != NULL) RelinquishOpenCLCommandQueue(clEnv, queue); return(outputReady); } -MagickExport MagickBooleanType AccelerateCompositeImage(Image *image, - const CompositeOperator compose,const Image *composite, - const float destination_dissolve,const float source_dissolve, - ExceptionInfo *exception) +MagickExport MagickBooleanType AccelerateContrastImage(Image *image, + const MagickBooleanType sharpen,ExceptionInfo *exception) { MagickBooleanType status; @@ -1707,23 +1494,7 @@ MagickExport MagickBooleanType AccelerateCompositeImage(Image *image, (checkOpenCLEnvironment(exception) == MagickFalse)) return(MagickFalse); - /* only support images with the size for now */ - if ((image->columns != composite->columns) || - (image->rows != composite->rows)) - return MagickFalse; - - switch(compose) - { - case ColorDodgeCompositeOp: - case BlendCompositeOp: - break; - default: - // unsupported compose operator, quit - return MagickFalse; - }; - - status=ComputeCompositeImage(image,compose,composite,destination_dissolve, - source_dissolve,exception); + status=ComputeContrastImage(image,sharpen,exception); return(status); } @@ -1732,223 +1503,48 @@ MagickExport MagickBooleanType AccelerateCompositeImage(Image *image, % % % % % % -% A c c e l e r a t e C o n t r a s t I m a g e % +% A c c e l e r a t e C o n t r a s t S t r e t c h I m a g e % % % % % % % %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% */ -static MagickBooleanType ComputeContrastImage(Image *image, - const MagickBooleanType sharpen,ExceptionInfo *exception) +static MagickBooleanType LaunchHistogramKernel(MagickCLEnv clEnv, + cl_command_queue queue,cl_mem imageBuffer,cl_mem histogramBuffer, + Image *image,const ChannelType channel,ExceptionInfo *exception) { - CacheView - *image_view; - - cl_command_queue - queue; - - cl_context - context; + MagickBooleanType + outputReady; cl_int clStatus; cl_kernel - filterKernel; + histogramKernel; cl_event event; - cl_mem - imageBuffer; - - cl_mem_flags - mem_flags; - - MagickBooleanType - outputReady; - - MagickCLEnv - clEnv; + cl_uint + colorspace, + method; - MagickSizeType - length; + register ssize_t + i; size_t global_work_size[2]; - unsigned int - i, - uSharpen; - - void - *inputPixels; + histogramKernel = NULL; outputReady = MagickFalse; - clEnv = NULL; - inputPixels = NULL; - context = NULL; - imageBuffer = NULL; - filterKernel = NULL; - queue = NULL; - - clEnv = GetDefaultOpenCLEnv(); - context = GetOpenCLContext(clEnv); + colorspace = image->colorspace; + method = image->intensity; - /* Create and initialize OpenCL buffers. */ - image_view=AcquireAuthenticCacheView(image,exception); - inputPixels=GetCacheViewAuthenticPixels(image_view,0,0,image->columns,image->rows,exception); - if (inputPixels == (void *) NULL) - { - (void) OpenCLThrowMagickException(exception,GetMagickModule(),CacheWarning,"UnableToReadPixelCache.","`%s'",image->filename); - goto cleanup; - } - - /* If the host pointer is aligned to the size of CLPixelPacket, - then use the host buffer directly from the GPU; otherwise, - create a buffer on the GPU and copy the data over */ - if (ALIGNED(inputPixels,CLPixelPacket)) - { - mem_flags = CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR; - } - else - { - mem_flags = CL_MEM_READ_WRITE|CL_MEM_COPY_HOST_PTR; - } - /* create a CL buffer from image pixel buffer */ - length = image->columns * image->rows; - imageBuffer = clEnv->library->clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus); - if (clStatus != CL_SUCCESS) - { - (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.","."); - goto cleanup; - } - - filterKernel = AcquireOpenCLKernel(clEnv, MAGICK_OPENCL_ACCELERATE, "Contrast"); - if (filterKernel == NULL) - { - (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "AcquireOpenCLKernel failed.", "'%s'", "."); - goto cleanup; - } - - i = 0; - clStatus=clEnv->library->clSetKernelArg(filterKernel,i++,sizeof(cl_mem),(void *)&imageBuffer); - - uSharpen = (sharpen == MagickFalse)?0:1; - clStatus|=clEnv->library->clSetKernelArg(filterKernel,i++,sizeof(cl_uint),&uSharpen); - if (clStatus != CL_SUCCESS) - { - (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", "'%s'", "."); - goto cleanup; - } - - global_work_size[0] = image->columns; - global_work_size[1] = image->rows; - /* launch the kernel */ - queue = AcquireOpenCLCommandQueue(clEnv); - clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, filterKernel, 2, NULL, global_work_size, NULL, 0, NULL, &event); - if (clStatus != CL_SUCCESS) - { - (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", "'%s'", "."); - goto cleanup; - } - clEnv->library->clFlush(queue); - RecordProfileData(clEnv,ContrastKernel,event); - clEnv->library->clReleaseEvent(event); - - if (ALIGNED(inputPixels,CLPixelPacket)) - { - length = image->columns * image->rows; - clEnv->library->clEnqueueMapBuffer(queue, imageBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(CLPixelPacket), 0, NULL, NULL, &clStatus); - } - else - { - length = image->columns * image->rows; - clStatus = clEnv->library->clEnqueueReadBuffer(queue, imageBuffer, CL_TRUE, 0, length * sizeof(CLPixelPacket), inputPixels, 0, NULL, NULL); - } - if (clStatus != CL_SUCCESS) - { - (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "Reading output image from CL buffer failed.", "'%s'", "."); - goto cleanup; - } - outputReady=SyncCacheViewAuthenticPixels(image_view,exception); - -cleanup: - OpenCLLogException(__FUNCTION__,__LINE__,exception); - - image_view=DestroyCacheView(image_view); - - if (imageBuffer!=NULL) clEnv->library->clReleaseMemObject(imageBuffer); - if (filterKernel!=NULL) RelinquishOpenCLKernel(clEnv, filterKernel); - if (queue != NULL) RelinquishOpenCLCommandQueue(clEnv, queue); - return(outputReady); -} - -MagickExport MagickBooleanType AccelerateContrastImage(Image *image, - const MagickBooleanType sharpen,ExceptionInfo *exception) -{ - MagickBooleanType - status; - - assert(image != NULL); - assert(exception != (ExceptionInfo *) NULL); - - if ((checkAccelerateConditionRGBA(image) == MagickFalse) || - (checkOpenCLEnvironment(exception) == MagickFalse)) - return(MagickFalse); - - status=ComputeContrastImage(image,sharpen,exception); - return(status); -} - -/* -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -% % -% % -% % -% A c c e l e r a t e C o n t r a s t S t r e t c h I m a g e % -% % -% % -% % -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -*/ - -static MagickBooleanType LaunchHistogramKernel(MagickCLEnv clEnv, - cl_command_queue queue,cl_mem imageBuffer,cl_mem histogramBuffer, - Image *image,const ChannelType channel,ExceptionInfo *exception) -{ - MagickBooleanType - outputReady; - - cl_int - clStatus; - - cl_kernel - histogramKernel; - - cl_event - event; - - cl_uint - colorspace, - method; - - register ssize_t - i; - - size_t - global_work_size[2]; - - histogramKernel = NULL; - - outputReady = MagickFalse; - colorspace = image->colorspace; - method = image->intensity; - - /* get the OpenCL kernel */ - histogramKernel = AcquireOpenCLKernel(clEnv, MAGICK_OPENCL_ACCELERATE, "Histogram"); - if (histogramKernel == NULL) + /* get the OpenCL kernel */ + histogramKernel = AcquireOpenCLKernel(clEnv, MAGICK_OPENCL_ACCELERATE, "Histogram"); + if (histogramKernel == NULL) { (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "AcquireOpenCLKernel failed.", "'%s'", "."); goto cleanup; @@ -6244,442 +5840,48 @@ static Image* ComputeRotationalBlurImage(const Image *image,const double angle, goto cleanup; } - - /* set the kernel arguments */ - i = 0; - clStatus=clEnv->library->clSetKernelArg(rotationalBlurKernel,i++,sizeof(cl_mem),(void *)&imageBuffer); - clStatus|=clEnv->library->clSetKernelArg(rotationalBlurKernel,i++,sizeof(cl_mem),(void *)&filteredImageBuffer); - - GetPixelInfo(image,&bias); - biasPixel.s[0] = bias.red; - biasPixel.s[1] = bias.green; - biasPixel.s[2] = bias.blue; - biasPixel.s[3] = bias.alpha; - clStatus|=clEnv->library->clSetKernelArg(rotationalBlurKernel,i++,sizeof(cl_float4), &biasPixel); - clStatus|=clEnv->library->clSetKernelArg(rotationalBlurKernel,i++,sizeof(ChannelType), &image->channel_mask); - - matte = (image->alpha_trait > CopyPixelTrait)?1:0; - clStatus|=clEnv->library->clSetKernelArg(rotationalBlurKernel,i++,sizeof(unsigned int), &matte); - - clStatus=clEnv->library->clSetKernelArg(rotationalBlurKernel,i++,sizeof(cl_float2), &blurCenter); - - clStatus|=clEnv->library->clSetKernelArg(rotationalBlurKernel,i++,sizeof(cl_mem),(void *)&cosThetaBuffer); - clStatus|=clEnv->library->clSetKernelArg(rotationalBlurKernel,i++,sizeof(cl_mem),(void *)&sinThetaBuffer); - clStatus|=clEnv->library->clSetKernelArg(rotationalBlurKernel,i++,sizeof(unsigned int), &cossin_theta_size); - if (clStatus != CL_SUCCESS) - { - (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", "'%s'", "."); - goto cleanup; - } - - - global_work_size[0] = image->columns; - global_work_size[1] = image->rows; - /* launch the kernel */ - clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, rotationalBlurKernel, 2, NULL, global_work_size, NULL, 0, NULL, &event); - if (clStatus != CL_SUCCESS) - { - (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", "'%s'", "."); - goto cleanup; - } - clEnv->library->clFlush(queue); - RecordProfileData(clEnv,RotationalBlurKernel,event); - clEnv->library->clReleaseEvent(event); - - if (ALIGNED(filteredPixels,CLPixelPacket)) - { - length = image->columns * image->rows; - clEnv->library->clEnqueueMapBuffer(queue, filteredImageBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(CLPixelPacket), 0, NULL, NULL, &clStatus); - } - else - { - length = image->columns * image->rows; - clStatus = clEnv->library->clEnqueueReadBuffer(queue, filteredImageBuffer, CL_TRUE, 0, length * sizeof(CLPixelPacket), filteredPixels, 0, NULL, NULL); - } - if (clStatus != CL_SUCCESS) - { - (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "Reading output image from CL buffer failed.", "'%s'", "."); - goto cleanup; - } - outputReady=SyncCacheViewAuthenticPixels(filteredImage_view,exception); - -cleanup: - OpenCLLogException(__FUNCTION__,__LINE__,exception); - - image_view=DestroyCacheView(image_view); - if (filteredImage_view != NULL) - filteredImage_view=DestroyCacheView(filteredImage_view); - - if (filteredImageBuffer!=NULL) clEnv->library->clReleaseMemObject(filteredImageBuffer); - if (imageBuffer!=NULL) clEnv->library->clReleaseMemObject(imageBuffer); - if (sinThetaBuffer!=NULL) clEnv->library->clReleaseMemObject(sinThetaBuffer); - if (cosThetaBuffer!=NULL) clEnv->library->clReleaseMemObject(cosThetaBuffer); - if (rotationalBlurKernel!=NULL) RelinquishOpenCLKernel(clEnv, rotationalBlurKernel); - if (queue != NULL) RelinquishOpenCLCommandQueue(clEnv, queue); - if (outputReady == MagickFalse) - { - if (filteredImage != NULL) - { - DestroyImage(filteredImage); - filteredImage = NULL; - } - } - return filteredImage; -} - -MagickExport Image* AccelerateRotationalBlurImage(const Image *image, - const double angle,ExceptionInfo *exception) -{ - Image - *filteredImage; - - assert(image != NULL); - assert(exception != (ExceptionInfo *) NULL); - - if ((checkAccelerateConditionRGBA(image) == MagickFalse) || - (checkOpenCLEnvironment(exception) == MagickFalse)) - return NULL; - - filteredImage=ComputeRotationalBlurImage(image,angle,exception); - return filteredImage; -} - -/* -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -% % -% % -% % -% A c c e l e r a t e U n s h a r p M a s k I m a g e % -% % -% % -% % -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -*/ - -static Image *ComputeUnsharpMaskImage(const Image *image,const double radius, - const double sigma,const double gain,const double threshold, - ExceptionInfo *exception) -{ - CacheView - *filteredImage_view, - *image_view; - - char - geometry[MagickPathExtent]; - - cl_command_queue - queue; - - cl_context - context; - - cl_int - clStatus; - - cl_kernel - blurRowKernel, - unsharpMaskBlurColumnKernel; - - cl_event - event; - - cl_mem - filteredImageBuffer, - imageBuffer, - imageKernelBuffer, - tempImageBuffer; - - cl_mem_flags - mem_flags; - - const void - *inputPixels; - - float - fGain, - fThreshold, - *kernelBufferPtr; - - Image - *filteredImage; - - int - chunkSize; - - KernelInfo - *kernel; - - MagickBooleanType - outputReady; - - MagickCLEnv - clEnv; - - MagickSizeType - length; - - void - *filteredPixels, - *hostPtr; - - unsigned int - i, - imageColumns, - imageRows, - kernelWidth; - - clEnv = NULL; - filteredImage = NULL; - filteredImage_view = NULL; - kernel = NULL; - context = NULL; - imageBuffer = NULL; - filteredImageBuffer = NULL; - tempImageBuffer = NULL; - imageKernelBuffer = NULL; - blurRowKernel = NULL; - unsharpMaskBlurColumnKernel = NULL; - queue = NULL; - outputReady = MagickFalse; - - clEnv = GetDefaultOpenCLEnv(); - context = GetOpenCLContext(clEnv); - queue = AcquireOpenCLCommandQueue(clEnv); - - /* Create and initialize OpenCL buffers. */ - { - image_view=AcquireVirtualCacheView(image,exception); - inputPixels=GetCacheViewVirtualPixels(image_view,0,0,image->columns,image->rows,exception); - if (inputPixels == (const void *) NULL) - { - (void) OpenCLThrowMagickException(exception,GetMagickModule(),CacheWarning,"UnableToReadPixelCache.","`%s'",image->filename); - goto cleanup; - } - - /* If the host pointer is aligned to the size of CLPixelPacket, - then use the host buffer directly from the GPU; otherwise, - create a buffer on the GPU and copy the data over */ - if (ALIGNED(inputPixels,CLPixelPacket)) - { - mem_flags = CL_MEM_READ_ONLY|CL_MEM_USE_HOST_PTR; - } - else - { - mem_flags = CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR; - } - /* create a CL buffer from image pixel buffer */ - length = image->columns * image->rows; - imageBuffer = clEnv->library->clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus); - if (clStatus != CL_SUCCESS) - { - (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.","."); - goto cleanup; - } - } - - /* create output */ - { - filteredImage = CloneImage(image,image->columns,image->rows,MagickTrue,exception); - assert(filteredImage != NULL); - if (SetImageStorageClass(filteredImage,DirectClass,exception) != MagickTrue) - { - (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "CloneImage failed.", "'%s'", "."); - goto cleanup; - } - filteredImage_view=AcquireAuthenticCacheView(filteredImage,exception); - filteredPixels=GetCacheViewAuthenticPixels(filteredImage_view,0,0,filteredImage->columns,filteredImage->rows,exception); - if (filteredPixels == (void *) NULL) - { - (void) OpenCLThrowMagickException(exception,GetMagickModule(),CacheWarning, "UnableToReadPixelCache.","`%s'",filteredImage->filename); - goto cleanup; - } - - if (ALIGNED(filteredPixels,CLPixelPacket)) - { - mem_flags = CL_MEM_WRITE_ONLY|CL_MEM_USE_HOST_PTR; - hostPtr = filteredPixels; - } - else - { - mem_flags = CL_MEM_WRITE_ONLY; - hostPtr = NULL; - } - - /* create a CL buffer from image pixel buffer */ - length = image->columns * image->rows; - filteredImageBuffer = clEnv->library->clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), hostPtr, &clStatus); - if (clStatus != CL_SUCCESS) - { - (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.","."); - goto cleanup; - } - } - - /* create the blur kernel */ - { - (void) FormatLocaleString(geometry,MagickPathExtent,"blur:%.20gx%.20g;blur:%.20gx%.20g+90",radius,sigma,radius,sigma); - kernel=AcquireKernelInfo(geometry,exception); - if (kernel == (KernelInfo *) NULL) - { - (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "AcquireKernelInfo failed.","."); - goto cleanup; - } - - imageKernelBuffer = clEnv->library->clCreateBuffer(context, CL_MEM_READ_ONLY, kernel->width * sizeof(float), NULL, &clStatus); - if (clStatus != CL_SUCCESS) - { - (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.","."); - goto cleanup; - } - - - kernelBufferPtr = (float*)clEnv->library->clEnqueueMapBuffer(queue, imageKernelBuffer, CL_TRUE, CL_MAP_WRITE, 0, kernel->width * sizeof(float), 0, NULL, NULL, &clStatus); - if (clStatus != CL_SUCCESS) - { - (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueMapBuffer failed.","."); - goto cleanup; - } - for (i = 0; i < kernel->width; i++) - { - kernelBufferPtr[i] = (float) kernel->values[i]; - } - clStatus = clEnv->library->clEnqueueUnmapMemObject(queue, imageKernelBuffer, kernelBufferPtr, 0, NULL, NULL); - if (clStatus != CL_SUCCESS) - { - (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueUnmapMemObject failed.", "'%s'", "."); - goto cleanup; - } - } - - { - /* create temp buffer */ - { - length = image->columns * image->rows; - tempImageBuffer = clEnv->library->clCreateBuffer(context, CL_MEM_READ_WRITE, length * 4 * sizeof(float), NULL, &clStatus); - if (clStatus != CL_SUCCESS) - { - (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.","."); - goto cleanup; - } - } - - /* get the opencl kernel */ - { - blurRowKernel = AcquireOpenCLKernel(clEnv, MAGICK_OPENCL_ACCELERATE, "BlurRow"); - if (blurRowKernel == NULL) - { - (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "AcquireOpenCLKernel failed.", "'%s'", "."); - goto cleanup; - }; - - unsharpMaskBlurColumnKernel = AcquireOpenCLKernel(clEnv, MAGICK_OPENCL_ACCELERATE, "UnsharpMaskBlurColumn"); - if (unsharpMaskBlurColumnKernel == NULL) - { - (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "AcquireOpenCLKernel failed.", "'%s'", "."); - goto cleanup; - }; - } - - { - chunkSize = 256; - - imageColumns = (unsigned int) image->columns; - imageRows = (unsigned int) image->rows; - - kernelWidth = (unsigned int) kernel->width; - - /* set the kernel arguments */ - i = 0; - clStatus=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(cl_mem),(void *)&imageBuffer); - clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(cl_mem),(void *)&tempImageBuffer); - clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(ChannelType),&image->channel_mask); - clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(cl_mem),(void *)&imageKernelBuffer); - clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(unsigned int),(void *)&kernelWidth); - clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(unsigned int),(void *)&imageColumns); - clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(unsigned int),(void *)&imageRows); - clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(CLPixelPacket)*(chunkSize+kernel->width),(void *) NULL); - if (clStatus != CL_SUCCESS) - { - (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", "'%s'", "."); - goto cleanup; - } - } - - /* launch the kernel */ - { - size_t gsize[2]; - size_t wsize[2]; - - gsize[0] = chunkSize*((image->columns+chunkSize-1)/chunkSize); - gsize[1] = image->rows; - wsize[0] = chunkSize; - wsize[1] = 1; - - clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, blurRowKernel, 2, NULL, gsize, wsize, 0, NULL, &event); - if (clStatus != CL_SUCCESS) - { - (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", "'%s'", "."); - goto cleanup; - } - clEnv->library->clFlush(queue); - RecordProfileData(clEnv,BlurRowKernel,event); - clEnv->library->clReleaseEvent(event); - } - - - { - chunkSize = 256; - imageColumns = (unsigned int) image->columns; - imageRows = (unsigned int) image->rows; - kernelWidth = (unsigned int) kernel->width; - fGain = (float) gain; - fThreshold = (float) threshold; - - i = 0; - clStatus=clEnv->library->clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(cl_mem),(void *)&imageBuffer); - clStatus|=clEnv->library->clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(cl_mem),(void *)&tempImageBuffer); - clStatus|=clEnv->library->clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(cl_mem),(void *)&filteredImageBuffer); - clStatus|=clEnv->library->clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(unsigned int),(void *)&imageColumns); - clStatus|=clEnv->library->clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(unsigned int),(void *)&imageRows); - clStatus|=clEnv->library->clSetKernelArg(unsharpMaskBlurColumnKernel,i++, (chunkSize+kernelWidth-1)*sizeof(cl_float4),NULL); - clStatus|=clEnv->library->clSetKernelArg(unsharpMaskBlurColumnKernel,i++, kernelWidth*sizeof(float),NULL); - clStatus|=clEnv->library->clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(ChannelType),&image->channel_mask); - clStatus|=clEnv->library->clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(cl_mem),(void *)&imageKernelBuffer); - clStatus|=clEnv->library->clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(unsigned int),(void *)&kernelWidth); - clStatus|=clEnv->library->clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(float),(void *)&fGain); - clStatus|=clEnv->library->clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(float),(void *)&fThreshold); + + /* set the kernel arguments */ + i = 0; + clStatus=clEnv->library->clSetKernelArg(rotationalBlurKernel,i++,sizeof(cl_mem),(void *)&imageBuffer); + clStatus|=clEnv->library->clSetKernelArg(rotationalBlurKernel,i++,sizeof(cl_mem),(void *)&filteredImageBuffer); - if (clStatus != CL_SUCCESS) - { - (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", "'%s'", "."); - goto cleanup; - } - } + GetPixelInfo(image,&bias); + biasPixel.s[0] = bias.red; + biasPixel.s[1] = bias.green; + biasPixel.s[2] = bias.blue; + biasPixel.s[3] = bias.alpha; + clStatus|=clEnv->library->clSetKernelArg(rotationalBlurKernel,i++,sizeof(cl_float4), &biasPixel); + clStatus|=clEnv->library->clSetKernelArg(rotationalBlurKernel,i++,sizeof(ChannelType), &image->channel_mask); - /* launch the kernel */ - { - size_t gsize[2]; - size_t wsize[2]; + matte = (image->alpha_trait > CopyPixelTrait)?1:0; + clStatus|=clEnv->library->clSetKernelArg(rotationalBlurKernel,i++,sizeof(unsigned int), &matte); - gsize[0] = image->columns; - gsize[1] = chunkSize*((image->rows+chunkSize-1)/chunkSize); - wsize[0] = 1; - wsize[1] = chunkSize; + clStatus=clEnv->library->clSetKernelArg(rotationalBlurKernel,i++,sizeof(cl_float2), &blurCenter); + + clStatus|=clEnv->library->clSetKernelArg(rotationalBlurKernel,i++,sizeof(cl_mem),(void *)&cosThetaBuffer); + clStatus|=clEnv->library->clSetKernelArg(rotationalBlurKernel,i++,sizeof(cl_mem),(void *)&sinThetaBuffer); + clStatus|=clEnv->library->clSetKernelArg(rotationalBlurKernel,i++,sizeof(unsigned int), &cossin_theta_size); + if (clStatus != CL_SUCCESS) + { + (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", "'%s'", "."); + goto cleanup; + } - clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, unsharpMaskBlurColumnKernel, 2, NULL, gsize, wsize, 0, NULL, &event); - if (clStatus != CL_SUCCESS) - { - (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", "'%s'", "."); - goto cleanup; - } - clEnv->library->clFlush(queue); - RecordProfileData(clEnv,UnsharpMaskBlurColumnKernel,event); - clEnv->library->clReleaseEvent(event); - } + global_work_size[0] = image->columns; + global_work_size[1] = image->rows; + /* launch the kernel */ + clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, rotationalBlurKernel, 2, NULL, global_work_size, NULL, 0, NULL, &event); + if (clStatus != CL_SUCCESS) + { + (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", "'%s'", "."); + goto cleanup; } + clEnv->library->clFlush(queue); + RecordProfileData(clEnv,RotationalBlurKernel,event); + clEnv->library->clReleaseEvent(event); - /* get result */ if (ALIGNED(filteredPixels,CLPixelPacket)) { length = image->columns * image->rows; @@ -6695,7 +5897,6 @@ static Image *ComputeUnsharpMaskImage(const Image *image,const double radius, (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "Reading output image from CL buffer failed.", "'%s'", "."); goto cleanup; } - outputReady=SyncCacheViewAuthenticPixels(filteredImage_view,exception); cleanup: @@ -6705,14 +5906,12 @@ cleanup: if (filteredImage_view != NULL) filteredImage_view=DestroyCacheView(filteredImage_view); - if (kernel != NULL) kernel=DestroyKernelInfo(kernel); - if (imageBuffer!=NULL) clEnv->library->clReleaseMemObject(imageBuffer); - if (filteredImageBuffer!=NULL) clEnv->library->clReleaseMemObject(filteredImageBuffer); - if (tempImageBuffer!=NULL) clEnv->library->clReleaseMemObject(tempImageBuffer); - if (imageKernelBuffer!=NULL) clEnv->library->clReleaseMemObject(imageKernelBuffer); - if (blurRowKernel!=NULL) RelinquishOpenCLKernel(clEnv, blurRowKernel); - if (unsharpMaskBlurColumnKernel!=NULL) RelinquishOpenCLKernel(clEnv, unsharpMaskBlurColumnKernel); - if (queue != NULL) RelinquishOpenCLCommandQueue(clEnv, queue); + if (filteredImageBuffer!=NULL) clEnv->library->clReleaseMemObject(filteredImageBuffer); + if (imageBuffer!=NULL) clEnv->library->clReleaseMemObject(imageBuffer); + if (sinThetaBuffer!=NULL) clEnv->library->clReleaseMemObject(sinThetaBuffer); + if (cosThetaBuffer!=NULL) clEnv->library->clReleaseMemObject(cosThetaBuffer); + if (rotationalBlurKernel!=NULL) RelinquishOpenCLKernel(clEnv, rotationalBlurKernel); + if (queue != NULL) RelinquishOpenCLCommandQueue(clEnv, queue); if (outputReady == MagickFalse) { if (filteredImage != NULL) @@ -6721,12 +5920,41 @@ cleanup: filteredImage = NULL; } } - return(filteredImage); + return filteredImage; } -static Image *ComputeUnsharpMaskImageSection(const Image *image, - const double radius,const double sigma,const double gain, - const double threshold,ExceptionInfo *exception) +MagickExport Image* AccelerateRotationalBlurImage(const Image *image, + const double angle,ExceptionInfo *exception) +{ + Image + *filteredImage; + + assert(image != NULL); + assert(exception != (ExceptionInfo *) NULL); + + if ((checkAccelerateConditionRGBA(image) == MagickFalse) || + (checkOpenCLEnvironment(exception) == MagickFalse)) + return NULL; + + filteredImage=ComputeRotationalBlurImage(image,angle,exception); + return filteredImage; +} + +/* +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +% % +% % +% % +% A c c e l e r a t e U n s h a r p M a s k I m a g e % +% % +% % +% % +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +*/ + +static Image *ComputeUnsharpMaskImage(const Image *image,const double radius, + const double sigma,const double gain,const double threshold, + ExceptionInfo *exception) { CacheView *filteredImage_view, @@ -6920,12 +6148,9 @@ static Image *ComputeUnsharpMaskImageSection(const Image *image, } { - unsigned int offsetRows; - unsigned int sec; - /* create temp buffer */ { - length = image->columns * (image->rows / 2 + 1 + (kernel->width-1) / 2); + length = image->columns * image->rows; tempImageBuffer = clEnv->library->clCreateBuffer(context, CL_MEM_READ_WRITE, length * 4 * sizeof(float), NULL, &clStatus); if (clStatus != CL_SUCCESS) { @@ -6936,14 +6161,14 @@ static Image *ComputeUnsharpMaskImageSection(const Image *image, /* get the opencl kernel */ { - blurRowKernel = AcquireOpenCLKernel(clEnv, MAGICK_OPENCL_ACCELERATE, "BlurRowSection"); + blurRowKernel = AcquireOpenCLKernel(clEnv, MAGICK_OPENCL_ACCELERATE, "BlurRow"); if (blurRowKernel == NULL) { (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "AcquireOpenCLKernel failed.", "'%s'", "."); goto cleanup; }; - unsharpMaskBlurColumnKernel = AcquireOpenCLKernel(clEnv, MAGICK_OPENCL_ACCELERATE, "UnsharpMaskBlurColumnSection"); + unsharpMaskBlurColumnKernel = AcquireOpenCLKernel(clEnv, MAGICK_OPENCL_ACCELERATE, "UnsharpMaskBlurColumn"); if (unsharpMaskBlurColumnKernel == NULL) { (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "AcquireOpenCLKernel failed.", "'%s'", "."); @@ -6951,121 +6176,103 @@ static Image *ComputeUnsharpMaskImageSection(const Image *image, }; } - for (sec = 0; sec < 2; sec++) { - { - chunkSize = 256; - - imageColumns = (unsigned int) image->columns; - if (sec == 0) - imageRows = (unsigned int) (image->rows / 2 + (kernel->width-1) / 2); - else - imageRows = (unsigned int) ((image->rows - image->rows / 2) + (kernel->width-1) / 2); + chunkSize = 256; - offsetRows = (unsigned int) (sec * image->rows / 2); + imageColumns = (unsigned int) image->columns; + imageRows = (unsigned int) image->rows; - kernelWidth = (unsigned int) kernel->width; + kernelWidth = (unsigned int) kernel->width; - /* set the kernel arguments */ - i = 0; - clStatus=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(cl_mem),(void *)&imageBuffer); - clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(cl_mem),(void *)&tempImageBuffer); - clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(ChannelType),&image->channel_mask); - clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(cl_mem),(void *)&imageKernelBuffer); - clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(unsigned int),(void *)&kernelWidth); - clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(unsigned int),(void *)&imageColumns); - clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(unsigned int),(void *)&imageRows); - clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(CLPixelPacket)*(chunkSize+kernel->width),(void *) NULL); - clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(unsigned int),(void *)&offsetRows); - clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(unsigned int),(void *)&sec); - if (clStatus != CL_SUCCESS) - { - (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", "'%s'", "."); - goto cleanup; - } - } - /* launch the kernel */ + /* set the kernel arguments */ + i = 0; + clStatus=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(cl_mem),(void *)&imageBuffer); + clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(cl_mem),(void *)&tempImageBuffer); + clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(ChannelType),&image->channel_mask); + clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(cl_mem),(void *)&imageKernelBuffer); + clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(unsigned int),(void *)&kernelWidth); + clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(unsigned int),(void *)&imageColumns); + clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(unsigned int),(void *)&imageRows); + clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(CLPixelPacket)*(chunkSize+kernel->width),(void *) NULL); + if (clStatus != CL_SUCCESS) { - size_t gsize[2]; - size_t wsize[2]; - - gsize[0] = chunkSize*((imageColumns+chunkSize-1)/chunkSize); - gsize[1] = imageRows; - wsize[0] = chunkSize; - wsize[1] = 1; - - clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, blurRowKernel, 2, NULL, gsize, wsize, 0, NULL, &event); - if (clStatus != CL_SUCCESS) - { - (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", "'%s'", "."); - goto cleanup; - } - clEnv->library->clFlush(queue); - RecordProfileData(clEnv,BlurRowKernel,event); - clEnv->library->clReleaseEvent(event); + (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", "'%s'", "."); + goto cleanup; } + } + /* launch the kernel */ + { + size_t gsize[2]; + size_t wsize[2]; - { - chunkSize = 256; - - imageColumns = (unsigned int) image->columns; - if (sec == 0) - imageRows = (unsigned int) (image->rows / 2); - else - imageRows = (unsigned int) (image->rows - image->rows / 2); + gsize[0] = chunkSize*((image->columns+chunkSize-1)/chunkSize); + gsize[1] = image->rows; + wsize[0] = chunkSize; + wsize[1] = 1; - offsetRows = (unsigned int) (sec * image->rows / 2); + clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, blurRowKernel, 2, NULL, gsize, wsize, 0, NULL, &event); + if (clStatus != CL_SUCCESS) + { + (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", "'%s'", "."); + goto cleanup; + } + clEnv->library->clFlush(queue); + RecordProfileData(clEnv,BlurRowKernel,event); + clEnv->library->clReleaseEvent(event); + } - kernelWidth = (unsigned int) kernel->width; - fGain = (float) gain; - fThreshold = (float) threshold; + { + chunkSize = 256; + imageColumns = (unsigned int) image->columns; + imageRows = (unsigned int) image->rows; + kernelWidth = (unsigned int) kernel->width; + fGain = (float) gain; + fThreshold = (float) threshold; - i = 0; - clStatus=clEnv->library->clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(cl_mem),(void *)&imageBuffer); - clStatus|=clEnv->library->clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(cl_mem),(void *)&tempImageBuffer); - clStatus|=clEnv->library->clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(cl_mem),(void *)&filteredImageBuffer); - clStatus|=clEnv->library->clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(unsigned int),(void *)&imageColumns); - clStatus|=clEnv->library->clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(unsigned int),(void *)&imageRows); - clStatus|=clEnv->library->clSetKernelArg(unsharpMaskBlurColumnKernel,i++, (chunkSize+kernelWidth-1)*sizeof(cl_float4),NULL); - clStatus|=clEnv->library->clSetKernelArg(unsharpMaskBlurColumnKernel,i++, kernelWidth*sizeof(float),NULL); - clStatus|=clEnv->library->clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(ChannelType),&image->channel_mask); - clStatus|=clEnv->library->clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(cl_mem),(void *)&imageKernelBuffer); - clStatus|=clEnv->library->clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(unsigned int),(void *)&kernelWidth); - clStatus|=clEnv->library->clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(float),(void *)&fGain); - clStatus|=clEnv->library->clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(float),(void *)&fThreshold); - clStatus|=clEnv->library->clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(unsigned int),(void *)&offsetRows); - clStatus|=clEnv->library->clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(unsigned int),(void *)&sec); + i = 0; + clStatus=clEnv->library->clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(cl_mem),(void *)&imageBuffer); + clStatus|=clEnv->library->clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(cl_mem),(void *)&tempImageBuffer); + clStatus|=clEnv->library->clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(cl_mem),(void *)&filteredImageBuffer); + clStatus|=clEnv->library->clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(unsigned int),(void *)&imageColumns); + clStatus|=clEnv->library->clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(unsigned int),(void *)&imageRows); + clStatus|=clEnv->library->clSetKernelArg(unsharpMaskBlurColumnKernel,i++, (chunkSize+kernelWidth-1)*sizeof(cl_float4),NULL); + clStatus|=clEnv->library->clSetKernelArg(unsharpMaskBlurColumnKernel,i++, kernelWidth*sizeof(float),NULL); + clStatus|=clEnv->library->clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(ChannelType),&image->channel_mask); + clStatus|=clEnv->library->clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(cl_mem),(void *)&imageKernelBuffer); + clStatus|=clEnv->library->clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(unsigned int),(void *)&kernelWidth); + clStatus|=clEnv->library->clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(float),(void *)&fGain); + clStatus|=clEnv->library->clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(float),(void *)&fThreshold); - if (clStatus != CL_SUCCESS) - { - (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", "'%s'", "."); - goto cleanup; - } + if (clStatus != CL_SUCCESS) + { + (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", "'%s'", "."); + goto cleanup; } + } - /* launch the kernel */ - { - size_t gsize[2]; - size_t wsize[2]; + /* launch the kernel */ + { + size_t gsize[2]; + size_t wsize[2]; - gsize[0] = imageColumns; - gsize[1] = chunkSize*((imageRows+chunkSize-1)/chunkSize); - wsize[0] = 1; - wsize[1] = chunkSize; + gsize[0] = image->columns; + gsize[1] = chunkSize*((image->rows+chunkSize-1)/chunkSize); + wsize[0] = 1; + wsize[1] = chunkSize; - clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, unsharpMaskBlurColumnKernel, 2, NULL, gsize, wsize, 0, NULL, &event); - if (clStatus != CL_SUCCESS) - { - (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", "'%s'", "."); - goto cleanup; - } - clEnv->library->clFlush(queue); - RecordProfileData(clEnv,UnsharpMaskBlurColumnKernel,event); - clEnv->library->clReleaseEvent(event); + clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, unsharpMaskBlurColumnKernel, 2, NULL, gsize, wsize, 0, NULL, &event); + if (clStatus != CL_SUCCESS) + { + (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", "'%s'", "."); + goto cleanup; } + clEnv->library->clFlush(queue); + RecordProfileData(clEnv,UnsharpMaskBlurColumnKernel,event); + clEnv->library->clReleaseEvent(event); } + } /* get result */ @@ -7110,7 +6317,7 @@ cleanup: filteredImage = NULL; } } - return filteredImage; + return(filteredImage); } static Image *ComputeUnsharpMaskImageSingle(const Image *image, @@ -7306,9 +6513,6 @@ MagickExport Image *AccelerateUnsharpMaskImage(const Image *image, if (radius < 12.1) filteredImage=ComputeUnsharpMaskImageSingle(image,radius,sigma,gain, threshold,0,exception); - else if (splitImage(image) && (image->rows / 2 > radius)) - filteredImage=ComputeUnsharpMaskImageSection(image,radius,sigma,gain, - threshold,exception); else filteredImage=ComputeUnsharpMaskImage(image,radius,sigma,gain,threshold, exception);