From 60e79250bc35563ce447febd3b3b2f49b2a4a22c Mon Sep 17 00:00:00 2001 From: dirk Date: Fri, 1 Apr 2016 23:42:39 +0200 Subject: [PATCH] AccelerateFunctionImage now supports R/RA/RGB images. --- MagickCore/accelerate-private.h | 195 ++++++++++++++++++++------------ MagickCore/accelerate.c | 55 +++------ MagickCore/opencl-private.h | 8 +- 3 files changed, 140 insertions(+), 118 deletions(-) diff --git a/MagickCore/accelerate-private.h b/MagickCore/accelerate-private.h index 8528566a4..16b5f7037 100644 --- a/MagickCore/accelerate-private.h +++ b/MagickCore/accelerate-private.h @@ -191,10 +191,10 @@ const char* accelerateKernels = typedef enum { UndefinedFunction, - PolynomialFunction, - SinusoidFunction, ArcsinFunction, - ArctanFunction + ArctanFunction, + PolynomialFunction, + SinusoidFunction } MagickFunction; ) @@ -334,7 +334,7 @@ OPENCL_ENDIF() STRINGIFY( inline CLQuantum ClampToQuantum(const float value) { - return (CLQuantum) (clamp(value, 0.0f, (float) QuantumRange) + 0.5f); + return (CLQuantum) (clamp(value, 0.0f, QuantumRange) + 0.5f); } ) @@ -365,10 +365,10 @@ OPENCL_ENDIF() STRINGIFY( - inline CLQuantum getPixelRed(const __global CLQuantum *p) { return *p; } - inline CLQuantum getPixelGreen(const __global CLQuantum *p) { return *(p+1); } - inline CLQuantum getPixelBlue(const __global CLQuantum *p) { return *(p+2); } - inline CLQuantum getPixelAlpha(const __global CLQuantum *p) { return *(p+3); } + inline float getPixelRed(const __global CLQuantum *p) { return (float)*p; } + inline float getPixelGreen(const __global CLQuantum *p) { return (float)*(p+1); } + inline float getPixelBlue(const __global CLQuantum *p) { return (float)*(p+2); } + inline float getPixelAlpha(const __global CLQuantum *p) { return (float)*(p+3); } inline void setPixelRed(__global CLQuantum *p,const CLQuantum value) { *p=value; } inline void setPixelGreen(__global CLQuantum *p,const CLQuantum value) { *(p+1)=value; } @@ -395,6 +395,46 @@ OPENCL_ENDIF() inline float getAlphaF4(float4 p) { return p.w; } inline void setAlphaF4(float4* p, float value) { (*p).w = value; } + inline void ReadChannels(__global CLQuantum *p, const unsigned int number_channels, + const ChannelType channel, float *red, float *green, float *blue, float *alpha) + { + if ((channel & RedChannel) != 0) + *red=getPixelRed(p); + + if (number_channels > 2) + { + if ((channel & GreenChannel) != 0) + *green=getPixelGreen(p); + + if ((channel & BlueChannel) != 0) + *blue=getPixelBlue(p); + } + + if (((number_channels == 4) || (number_channels == 2)) && + ((channel & AlphaChannel) != 0)) + *alpha=getPixelAlpha(p); + } + + inline void WriteChannels(__global CLQuantum *p, const unsigned int number_channels, + const ChannelType channel, float red, float green, float blue, float alpha) + { + if ((channel & RedChannel) != 0) + setPixelRed(p,red); + + if (number_channels > 2) + { + if ((channel & GreenChannel) != 0) + setPixelGreen(p,green); + + if ((channel & BlueChannel) != 0) + setPixelBlue(p,blue); + } + + if (((number_channels == 4) || (number_channels == 2)) && + ((channel & AlphaChannel) != 0)) + setPixelAlpha(p,alpha); + } + inline float GetPixelIntensity(const unsigned int colorspace, const unsigned int method,float red,float green,float blue) { @@ -639,7 +679,7 @@ OPENCL_ENDIF() return (1.0f * MWC64X_NextUint(rng)) / (float)(0xffffffff); // normalized to 1.0 } - float mwcGenerateDifferentialNoise(mwc64x_state_t* r, CLQuantum pixel, NoiseType noise_type, float attenuate) + float mwcGenerateDifferentialNoise(mwc64x_state_t* r, float pixel, NoiseType noise_type, float attenuate) { float alpha, @@ -669,8 +709,7 @@ OPENCL_ENDIF() gamma=sqrt(-2.0f*log(alpha)); sigma=gamma*cospi((2.0f*beta)); tau=gamma*sinpi((2.0f*beta)); - noise=(float)(pixel+sqrt((float) pixel)*SigmaGaussian*sigma+ - QuantumRange*TauGaussian*tau); + noise=pixel+sqrt(pixel)*SigmaGaussian*sigma+QuantumRange*TauGaussian*tau; break; } case ImpulseNoise: @@ -679,9 +718,9 @@ OPENCL_ENDIF() noise=0.0f; else if (alpha >= (1.0f-(SigmaImpulse/2.0f))) - noise=(float)QuantumRange; + noise=QuantumRange; else - noise=(float)pixel; + noise=pixel; break; } case LaplacianNoise: @@ -689,17 +728,17 @@ OPENCL_ENDIF() if (alpha <= 0.5f) { if (alpha <= MagickEpsilon) - noise=(float) (pixel-QuantumRange); + noise=(pixel-QuantumRange); else - noise=(float) (pixel+QuantumRange*SigmaLaplacian*log(2.0f*alpha)+ + noise=(pixel+QuantumRange*SigmaLaplacian*log(2.0f*alpha)+ 0.5f); break; } beta=1.0f-alpha; if (beta <= (0.5f*MagickEpsilon)) - noise=(float) (pixel+QuantumRange); + noise=(pixel+QuantumRange); else - noise=(float) (pixel-QuantumRange*SigmaLaplacian*log(2.0f*beta)+0.5f); + noise=(pixel-QuantumRange*SigmaLaplacian*log(2.0f*beta)+0.5f); break; } case MultiplicativeGaussianNoise: @@ -708,8 +747,8 @@ OPENCL_ENDIF() if (alpha > MagickEpsilon) sigma=sqrt(-2.0f*log(alpha)); beta=mwcReadPseudoRandomValue(r); - noise=(float) (pixel+pixel*SigmaMultiplicativeGaussian*sigma* - cospi((float) (2.0f*beta))/2.0f); + noise=(pixel+pixel*SigmaMultiplicativeGaussian*sigma* + cospi((2.0f*beta))/2.0f); break; } case PoissonNoise: @@ -723,12 +762,12 @@ OPENCL_ENDIF() beta=mwcReadPseudoRandomValue(r); alpha*=beta; } - noise=(float) (QuantumRange*i/SigmaPoisson); + noise=(QuantumRange*i/SigmaPoisson); break; } case RandomNoise: { - noise=(float) (QuantumRange*SigmaRandom*alpha); + noise=(QuantumRange*SigmaRandom*alpha); break; } } @@ -759,21 +798,30 @@ OPENCL_ENDIF() const __global CLQuantum *p = image + pos; __global CLQuantum *q = filteredImage + pos; + float red; + float green; + float blue; + float alpha; + + ReadChannels(p, number_channels, channel, &red, &green, &blue, &alpha); + if ((channel & RedChannel) != 0) - setPixelRed(q,ClampToQuantum(mwcGenerateDifferentialNoise(&rng,getPixelRed(p),noise_type,attenuate))); + red=mwcGenerateDifferentialNoise(&rng,red,noise_type,attenuate); if (number_channels > 2) { if ((channel & GreenChannel) != 0) - setPixelGreen(q,ClampToQuantum(mwcGenerateDifferentialNoise(&rng,getPixelGreen(p),noise_type,attenuate))); + green=mwcGenerateDifferentialNoise(&rng,green,noise_type,attenuate); if ((channel & BlueChannel) != 0) - setPixelBlue(q,ClampToQuantum(mwcGenerateDifferentialNoise(&rng,getPixelBlue(p),noise_type,attenuate))); + blue=mwcGenerateDifferentialNoise(&rng,blue,noise_type,attenuate); } if (((number_channels == 4) || (number_channels == 2)) && ((channel & AlphaChannel) != 0)) - setPixelAlpha(q,ClampToQuantum(mwcGenerateDifferentialNoise(&rng,getPixelAlpha(p),noise_type,attenuate))); + alpha=mwcGenerateDifferentialNoise(&rng,alpha,noise_type,attenuate); + + WriteChannels(q, number_channels, channel, red, green, blue, alpha); pos += (get_local_size(0) * number_channels); count--; @@ -1239,7 +1287,7 @@ OPENCL_ENDIF() Sa=QuantumScale*alpha; Da=QuantumScale*beta; gamma=RoundToUnity(Sa+Da); /* 'Plus' blending -- not 'Over' blending */ - setAlphaF4(composite,(float) QuantumRange*gamma); + setAlphaF4(composite,QuantumRange*gamma); gamma=PerceptibleReciprocal(gamma); setRedF4(composite,gamma*(Sa*getRedF4(*p)+Da*getRedF4(*q))); setGreenF4(composite,gamma*(Sa*getGreenF4(*p)+Da*getGreenF4(*q))); @@ -2002,21 +2050,21 @@ OPENCL_ENDIF() */ STRINGIFY( - /* apply FunctionImageChannel(braightness-contrast) */ - CLPixelType ApplyFunction(CLPixelType pixel,const MagickFunction function, + CLQuantum ApplyFunction(float pixel,const MagickFunction function, const unsigned int number_parameters,__constant float *parameters) { - float4 result = (float4) 0.0f; + float result = 0.0f; + switch (function) { case PolynomialFunction: { for (unsigned int i=0; i < number_parameters; i++) - result = result*(float4)QuantumScale*convert_float4(pixel) + parameters[i]; - result *= (float4)QuantumRange; + result = result*QuantumScale*pixel + parameters[i]; + result *= QuantumRange; break; } case SinusoidFunction: @@ -2026,14 +2074,8 @@ OPENCL_ENDIF() phase = ( number_parameters >= 2 ) ? parameters[1] : 0.0f; ampl = ( number_parameters >= 3 ) ? parameters[2] : 0.5f; bias = ( number_parameters >= 4 ) ? parameters[3] : 0.5f; - result.x = QuantumRange*(ampl*sin(2.0f*MagickPI* - (freq*QuantumScale*(float)pixel.x + phase/360.0f)) + bias); - result.y = QuantumRange*(ampl*sin(2.0f*MagickPI* - (freq*QuantumScale*(float)pixel.y + phase/360.0f)) + bias); - result.z = QuantumRange*(ampl*sin(2.0f*MagickPI* - (freq*QuantumScale*(float)pixel.z + phase/360.0f)) + bias); - result.w = QuantumRange*(ampl*sin(2.0f*MagickPI* - (freq*QuantumScale*(float)pixel.w + phase/360.0f)) + bias); + result = QuantumRange*(ampl*sin(2.0f*MagickPI* + (freq*QuantumScale*pixel + phase/360.0f)) + bias); break; } case ArcsinFunction: @@ -2044,28 +2086,11 @@ OPENCL_ENDIF() range = ( number_parameters >= 3 ) ? parameters[2] : 1.0f; bias = ( number_parameters >= 4 ) ? parameters[3] : 0.5f; - result.x = 2.0f/width*(QuantumScale*(float)pixel.x - center); - result.x = range/MagickPI*asin(result.x)+bias; - result.x = ( result.x <= -1.0f ) ? bias - range/2.0f : result.x; - result.x = ( result.x >= 1.0f ) ? bias + range/2.0f : result.x; - - result.y = 2.0f/width*(QuantumScale*(float)pixel.y - center); - result.y = range/MagickPI*asin(result.y)+bias; - result.y = ( result.y <= -1.0f ) ? bias - range/2.0f : result.y; - result.y = ( result.y >= 1.0f ) ? bias + range/2.0f : result.y; - - result.z = 2.0f/width*(QuantumScale*(float)pixel.z - center); - result.z = range/MagickPI*asin(result.z)+bias; - result.z = ( result.z <= -1.0f ) ? bias - range/2.0f : result.x; - result.z = ( result.z >= 1.0f ) ? bias + range/2.0f : result.x; - - - result.w = 2.0f/width*(QuantumScale*(float)pixel.w - center); - result.w = range/MagickPI*asin(result.w)+bias; - result.w = ( result.w <= -1.0f ) ? bias - range/2.0f : result.w; - result.w = ( result.w >= 1.0f ) ? bias + range/2.0f : result.w; - - result *= (float4)QuantumRange; + result = 2.0f/width*(QuantumScale*pixel - center); + result = range/MagickPI*asin(result)+bias; + result = ( result <= -1.0f ) ? bias - range/2.0f : result; + result = ( result >= 1.0f ) ? bias + range/2.0f : result; + result *= QuantumRange; break; } case ArctanFunction: @@ -2075,15 +2100,14 @@ OPENCL_ENDIF() center = ( number_parameters >= 2 ) ? parameters[1] : 0.5f; range = ( number_parameters >= 3 ) ? parameters[2] : 1.0f; bias = ( number_parameters >= 4 ) ? parameters[3] : 0.5f; - result = (float4)MagickPI*(float4)slope*((float4)QuantumScale*convert_float4(pixel)-(float4)center); - result = (float4)QuantumRange*((float4)range/(float4)MagickPI*atan(result) + (float4)bias); + result = MagickPI*slope*(QuantumScale*pixel-center); + result = QuantumRange*(range/MagickPI*atan(result) + bias); break; } case UndefinedFunction: break; } - return (CLPixelType) (ClampToQuantum(result.x), ClampToQuantum(result.y), - ClampToQuantum(result.z), ClampToQuantum(result.w)); + return(ClampToQuantum(result)); } ) @@ -2095,15 +2119,40 @@ OPENCL_ENDIF() number_parameters : numbers of parameters parameters : the parameter */ - __kernel void ComputeFunction(__global CLPixelType *im, - const ChannelType channel,const MagickFunction function, - const unsigned int number_parameters, __constant float *parameters) + __kernel void ComputeFunction(__global CLQuantum *image, + const unsigned int number_channels,const ChannelType channel, + const MagickFunction function,const unsigned int number_parameters, + __constant float *parameters) { const int x = get_global_id(0); const int y = get_global_id(1); const int columns = get_global_size(0); - const int c = x + y * columns; - im[c] = ApplyFunction(im[c], function, number_parameters, parameters); + __global CLQuantum *p = image+(x * number_channels) + (y * columns * number_channels); + + float red; + float green; + float blue; + float alpha; + + ReadChannels(p, number_channels, channel, &red, &green, &blue, &alpha); + + if ((channel & RedChannel) != 0) + red=ApplyFunction(red, function, number_parameters, parameters); + + if (number_channels > 2) + { + if ((channel & GreenChannel) != 0) + green=ApplyFunction(green, function, number_parameters, parameters); + + if ((channel & BlueChannel) != 0) + blue=ApplyFunction(blue, function, number_parameters, parameters); + } + + if (((number_channels == 4) || (number_channels == 2)) && + ((channel & AlphaChannel) != 0)) + alpha=ApplyFunction(alpha, function, number_parameters, parameters); + + WriteChannels(p, number_channels, channel, red, green, blue, alpha); } ) @@ -2133,9 +2182,9 @@ OPENCL_ENDIF() green, red; - red=(float)getPixelRed(p); - green=(float)getPixelGreen(p); - blue=(float)getPixelBlue(p); + red=getPixelRed(p); + green=getPixelGreen(p); + blue=getPixelBlue(p); CLQuantum intensity=ClampToQuantum(GetPixelIntensity(colorspace, method, red, green, blue)); diff --git a/MagickCore/accelerate.c b/MagickCore/accelerate.c index 7f089c042..eba6ee7fa 100644 --- a/MagickCore/accelerate.c +++ b/MagickCore/accelerate.c @@ -3799,6 +3799,9 @@ static MagickBooleanType ComputeFunctionImage(Image *image, cl_mem_flags mem_flags; + cl_uint + number_channels; + float *parametersBufferPtr; @@ -3827,37 +3830,16 @@ static MagickBooleanType ComputeFunctionImage(Image *image, queue = NULL; imageBuffer = NULL; parametersBuffer = NULL; + pixels = NULL; clEnv = GetDefaultOpenCLEnv(); context = GetOpenCLContext(clEnv); image_view=AcquireAuthenticCacheView(image,exception); - pixels=GetCacheViewAuthenticPixels(image_view,0,0,image->columns,image->rows,exception); - if (pixels == (void *) NULL) - { - (void) OpenCLThrowMagickException(exception, GetMagickModule(), CacheWarning, - "GetPixelCachePixels failed.", - "'%s'", image->filename); - goto cleanup; - } - - - if (ALIGNED(pixels,CLPixelPacket)) - { - mem_flags = CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR; - } - else - { - mem_flags = CL_MEM_READ_WRITE|CL_MEM_COPY_HOST_PTR; - } - /* create a CL buffer from image pixel buffer */ - length = image->columns * image->rows; - imageBuffer = clEnv->library->clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), (void*)pixels, &clStatus); - if (clStatus != CL_SUCCESS) - { - (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.","."); + imageBuffer=createReadWriteBuffer(image,image_view,clEnv,context,pixels, + exception); + if (imageBuffer == (cl_mem) NULL) goto cleanup; - } parametersBuffer = clEnv->library->clCreateBuffer(context, CL_MEM_READ_ONLY|CL_MEM_ALLOC_HOST_PTR, number_parameters * sizeof(float), NULL, &clStatus); if (clStatus != CL_SUCCESS) @@ -3894,9 +3876,12 @@ static MagickBooleanType ComputeFunctionImage(Image *image, goto cleanup; } + number_channels = (cl_uint) image->number_channels; + /* set the kernel arguments */ i = 0; clStatus =clEnv->library->clSetKernelArg(clkernel,i++,sizeof(cl_mem),(void *)&imageBuffer); + clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(cl_uint),(void *)&number_channels); clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(ChannelType),(void *)&image->channel_mask); clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(MagickFunction),(void *)&function); clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(unsigned int),(void *)&number_parameters); @@ -3920,21 +3905,9 @@ static MagickBooleanType ComputeFunctionImage(Image *image, RecordProfileData(clEnv,ComputeFunctionKernel,event); clEnv->library->clReleaseEvent(event); - if (ALIGNED(pixels,CLPixelPacket)) - { - length = image->columns * image->rows; - clEnv->library->clEnqueueMapBuffer(queue, imageBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(CLPixelPacket), 0, NULL, NULL, &clStatus); - } - else - { - length = image->columns * image->rows; - clStatus = clEnv->library->clEnqueueReadBuffer(queue, imageBuffer, CL_TRUE, 0, length * sizeof(CLPixelPacket), pixels, 0, NULL, NULL); - } - if (clStatus != CL_SUCCESS) - { - (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "Reading output image from CL buffer failed.", "'%s'", "."); - goto cleanup; - } + if (copyWriteBuffer(image,clEnv,queue,imageBuffer,pixels,exception) == MagickFalse) + goto cleanup; + status=SyncCacheViewAuthenticPixels(image_view,exception); cleanup: @@ -3960,7 +3933,7 @@ MagickExport MagickBooleanType AccelerateFunctionImage(Image *image, assert(image != NULL); assert(exception != (ExceptionInfo *) NULL); - if ((checkAccelerateConditionRGBA(image) == MagickFalse) || + if ((checkAccelerateCondition(image) == MagickFalse) || (checkOpenCLEnvironment(exception) == MagickFalse)) return(MagickFalse); diff --git a/MagickCore/opencl-private.h b/MagickCore/opencl-private.h index bfd6b8d72..6558728bd 100644 --- a/MagickCore/opencl-private.h +++ b/MagickCore/opencl-private.h @@ -315,7 +315,7 @@ struct _MagickCLEnv { #if defined(MAGICKCORE_HDRI_SUPPORT) #define CLOptions "-cl-single-precision-constant -cl-mad-enable -DMAGICKCORE_HDRI_SUPPORT=1 "\ - "-DCLQuantum=float -DCLSignedQuantum=float -DCLPixelType=float4 -DQuantumRange=%f " \ + "-DCLQuantum=float -DCLSignedQuantum=float -DCLPixelType=float4 -DQuantumRange=%ff " \ "-DQuantumScale=%f -DCharQuantumScale=%f -DMagickEpsilon=%f -DMagickPI=%f "\ "-DMaxMap=%u -DMAGICKCORE_QUANTUM_DEPTH=%u" #define CLQuantum cl_float @@ -331,7 +331,7 @@ struct _MagickCLEnv { #define CLCharQuantumScale 1.0f #elif (MAGICKCORE_QUANTUM_DEPTH == 16) #define CLOptions "-cl-single-precision-constant -cl-mad-enable " \ - "-DCLQuantum=ushort -DCLSignedQuantum=short -DCLPixelType=ushort4 -DQuantumRange=%f "\ + "-DCLQuantum=ushort -DCLSignedQuantum=short -DCLPixelType=ushort4 -DQuantumRange=%ff "\ "-DQuantumScale=%f -DCharQuantumScale=%f -DMagickEpsilon=%f -DMagickPI=%f "\ "-DMaxMap=%u -DMAGICKCORE_QUANTUM_DEPTH=%u" #define CLQuantum cl_ushort @@ -339,7 +339,7 @@ struct _MagickCLEnv { #define CLCharQuantumScale 257.0f #elif (MAGICKCORE_QUANTUM_DEPTH == 32) #define CLOptions "-cl-single-precision-constant -cl-mad-enable " \ - "-DCLQuantum=uint -DCLSignedQuantum=int -DCLPixelType=uint4 -DQuantumRange=%f "\ + "-DCLQuantum=uint -DCLSignedQuantum=int -DCLPixelType=uint4 -DQuantumRange=%ff "\ "-DQuantumScale=%f -DCharQuantumScale=%f -DMagickEpsilon=%f -DMagickPI=%f "\ "-DMaxMap=%u -DMAGICKCORE_QUANTUM_DEPTH=%u" #define CLQuantum cl_uint @@ -347,7 +347,7 @@ struct _MagickCLEnv { #define CLCharQuantumScale 16843009.0f #elif (MAGICKCORE_QUANTUM_DEPTH == 64) #define CLOptions "-cl-single-precision-constant -cl-mad-enable " \ - "-DCLQuantum=ulong -DCLSignedQuantum=long -DCLPixelType=ulong4 -DQuantumRange=%f "\ + "-DCLQuantum=ulong -DCLSignedQuantum=long -DCLPixelType=ulong4 -DQuantumRange=%ff "\ "-DQuantumScale=%f -DCharQuantumScale=%f -DMagickEpsilon=%f -DMagickPI=%f "\ "-DMaxMap=%u -DMAGICKCORE_QUANTUM_DEPTH=%u" #define CLQuantum cl_ulong -- 2.40.0