From 0c832c68ae950d35e8166671cdcb29ec46ef7b3f Mon Sep 17 00:00:00 2001 From: cristy Date: Fri, 7 Mar 2014 22:21:04 +0000 Subject: [PATCH] --- MagickCore/accelerate-private.h | 889 +++++- MagickCore/accelerate.c | 4569 ++++++++++++++++++++++--------- MagickCore/accelerate.h | 13 +- MagickCore/opencl-private.h | 313 ++- MagickCore/opencl.c | 542 +++- MagickCore/version.h | 4 +- configure | 5 +- m4/ax_opencl.m4 | 3 + 8 files changed, 4765 insertions(+), 1573 deletions(-) diff --git a/MagickCore/accelerate-private.h b/MagickCore/accelerate-private.h index fda847282..f882701c2 100644 --- a/MagickCore/accelerate-private.h +++ b/MagickCore/accelerate-private.h @@ -152,6 +152,69 @@ const char* accelerateKernels = OPENCL_DEFINE(GetPixelAlpha(pixel),(QuantumRange-(pixel).w)) + STRINGIFY( + typedef enum + { + UndefinedPixelIntensityMethod = 0, + AveragePixelIntensityMethod, + BrightnessPixelIntensityMethod, + LightnessPixelIntensityMethod, + Rec601LumaPixelIntensityMethod, + Rec601LuminancePixelIntensityMethod, + Rec709LumaPixelIntensityMethod, + Rec709LuminancePixelIntensityMethod, + RMSPixelIntensityMethod, + MSPixelIntensityMethod + } PixelIntensityMethod; + ) + + STRINGIFY( + typedef enum + { + UndefinedColorspace, + RGBColorspace, /* Linear RGB colorspace */ + GRAYColorspace, /* greyscale (linear) image (faked 1 channel) */ + TransparentColorspace, + OHTAColorspace, + LabColorspace, + XYZColorspace, + YCbCrColorspace, + YCCColorspace, + YIQColorspace, + YPbPrColorspace, + YUVColorspace, + CMYKColorspace, /* negared linear RGB with black separated */ + sRGBColorspace, /* Default: non-lienar sRGB colorspace */ + HSBColorspace, + HSLColorspace, + HWBColorspace, + Rec601LumaColorspace, + Rec601YCbCrColorspace, + Rec709LumaColorspace, + Rec709YCbCrColorspace, + LogColorspace, + CMYColorspace, /* negated linear RGB colorspace */ + LuvColorspace, + HCLColorspace, + LCHColorspace, /* alias for LCHuv */ + LMSColorspace, + LCHabColorspace, /* Cylindrical (Polar) Lab */ + LCHuvColorspace, /* Cylindrical (Polar) Luv */ + scRGBColorspace, + HSIColorspace, + HSVColorspace, /* alias for HSB */ + HCLpColorspace, + YDbDrColorspace + } ColorspaceType; + ) + + STRINGIFY( + inline float RoundToUnity(const float value) + { + return clamp(value,0.0f,1.0f); + } + ) + STRINGIFY( inline CLQuantum getBlue(CLPixelType p) { return p.x; } @@ -174,20 +237,106 @@ const char* accelerateKernels = inline float getOpacityF4(float4 p) { return p.w; } inline void setOpacityF4(float4* p, float value) { (*p).w = value; } - inline float GetPixelIntensity(int colorspace, CLPixelType p) + inline void setGray(CLPixelType* p, CLQuantum value) { (*p).z = value; (*p).y = value; (*p).x = value; } + + inline float GetPixelIntensity(const int method, const int colorspace, CLPixelType p) { - // this is for default intensity and sRGB (not RGB) color space float red = getRed(p); float green = getGreen(p); float blue = getBlue(p); - if (colorspace == 0) - return 0.212656*red+0.715158*green+0.072186*blue; - else + float intensity; + + if (colorspace == GRAYColorspace) + return red; + + switch (method) { - // need encode gamma + case AveragePixelIntensityMethod: + { + intensity=(red+green+blue)/3.0; + break; + } + case BrightnessPixelIntensityMethod: + { + intensity=max(max(red,green),blue); + break; + } + case LightnessPixelIntensityMethod: + { + intensity=(min(min(red,green),blue)+ + max(max(red,green),blue))/2.0; + break; + } + case MSPixelIntensityMethod: + { + intensity=(float) (((float) red*red+green*green+blue*blue)/ + (3.0*QuantumRange)); + break; + } + case Rec601LumaPixelIntensityMethod: + { + /* + if (image->colorspace == RGBColorspace) + { + red=EncodePixelGamma(red); + green=EncodePixelGamma(green); + blue=EncodePixelGamma(blue); + } + */ + intensity=0.298839*red+0.586811*green+0.114350*blue; + break; + } + case Rec601LuminancePixelIntensityMethod: + { + /* + if (image->colorspace == sRGBColorspace) + { + red=DecodePixelGamma(red); + green=DecodePixelGamma(green); + blue=DecodePixelGamma(blue); + } + */ + intensity=0.298839*red+0.586811*green+0.114350*blue; + break; + } + case Rec709LumaPixelIntensityMethod: + default: + { + /* + if (image->colorspace == RGBColorspace) + { + red=EncodePixelGamma(red); + green=EncodePixelGamma(green); + blue=EncodePixelGamma(blue); + } + */ + intensity=0.212656*red+0.715158*green+0.072186*blue; + break; + } + case Rec709LuminancePixelIntensityMethod: + { + /* + if (image->colorspace == sRGBColorspace) + { + red=DecodePixelGamma(red); + green=DecodePixelGamma(green); + blue=DecodePixelGamma(blue); + } + */ + intensity=0.212656*red+0.715158*green+0.072186*blue; + break; + } + case RMSPixelIntensityMethod: + { + intensity=(float) (sqrt((float) red*red+green*green+blue*blue)/ + sqrt(3.0)); + break; + } } - return 0.0; + + return intensity; + } ) @@ -317,6 +466,7 @@ const char* accelerateKernels = STRINGIFY( __kernel void Convolve(const __global CLPixelType *input, __global CLPixelType *output, + const uint imageWidth, const uint imageHeight, __constant float *filter, const unsigned int filterWidth, const unsigned int filterHeight, const uint matte, const ChannelType channel) { @@ -324,9 +474,10 @@ const char* accelerateKernels = imageIndex.x = get_global_id(0); imageIndex.y = get_global_id(1); + /* unsigned int imageWidth = get_global_size(0); unsigned int imageHeight = get_global_size(1); - + */ if (imageIndex.x >= imageWidth || imageIndex.y >= imageHeight) return; @@ -427,8 +578,8 @@ const char* accelerateKernels = case PolynomialFunction: { for (unsigned int i=0; i < number_parameters; i++) - result = result*QuantumScale*convert_float4(pixel) + parameters[i]; - result *= QuantumRange; + result = result*(float4)QuantumScale*convert_float4(pixel) + parameters[i]; + result *= (float4)QuantumRange; break; } case SinusoidFunction: @@ -438,8 +589,14 @@ const char* accelerateKernels = phase = ( number_parameters >= 2 ) ? parameters[1] : 0.0f; ampl = ( number_parameters >= 3 ) ? parameters[2] : 0.5f; bias = ( number_parameters >= 4 ) ? parameters[3] : 0.5f; - result = QuantumRange*(ampl*sin(2.0f*MagickPI* - (freq*QuantumScale*convert_float4(pixel) + phase/360.0f)) + bias); + result.x = QuantumRange*(ampl*sin(2.0f*MagickPI* + (freq*QuantumScale*(float)pixel.x + phase/360.0f)) + bias); + result.y = QuantumRange*(ampl*sin(2.0f*MagickPI* + (freq*QuantumScale*(float)pixel.y + phase/360.0f)) + bias); + result.z = QuantumRange*(ampl*sin(2.0f*MagickPI* + (freq*QuantumScale*(float)pixel.z + phase/360.0f)) + bias); + result.w = QuantumRange*(ampl*sin(2.0f*MagickPI* + (freq*QuantumScale*(float)pixel.w + phase/360.0f)) + bias); break; } case ArcsinFunction: @@ -449,18 +606,29 @@ const char* accelerateKernels = center = ( number_parameters >= 2 ) ? parameters[1] : 0.5f; range = ( number_parameters >= 3 ) ? parameters[2] : 1.0f; bias = ( number_parameters >= 4 ) ? parameters[3] : 0.5f; - result = 2.0f/width*(QuantumScale*convert_float4(pixel) - center); - result = range/MagickPI*asin(result)+bias; + + result.x = 2.0f/width*(QuantumScale*(float)pixel.x - center); + result.x = range/MagickPI*asin(result.x)+bias; result.x = ( result.x <= -1.0f ) ? bias - range/2.0f : result.x; result.x = ( result.x >= 1.0f ) ? bias + range/2.0f : result.x; + + result.y = 2.0f/width*(QuantumScale*(float)pixel.y - center); + result.y = range/MagickPI*asin(result.y)+bias; result.y = ( result.y <= -1.0f ) ? bias - range/2.0f : result.y; result.y = ( result.y >= 1.0f ) ? bias + range/2.0f : result.y; + + result.z = 2.0f/width*(QuantumScale*(float)pixel.z - center); + result.z = range/MagickPI*asin(result.z)+bias; result.z = ( result.z <= -1.0f ) ? bias - range/2.0f : result.x; result.z = ( result.z >= 1.0f ) ? bias + range/2.0f : result.x; + + + result.w = 2.0f/width*(QuantumScale*(float)pixel.w - center); + result.w = range/MagickPI*asin(result.w)+bias; result.w = ( result.w <= -1.0f ) ? bias - range/2.0f : result.w; result.w = ( result.w >= 1.0f ) ? bias + range/2.0f : result.w; - - result *= QuantumRange; + + result *= (float4)QuantumRange; break; } case ArctanFunction: @@ -470,8 +638,8 @@ const char* accelerateKernels = center = ( number_parameters >= 2 ) ? parameters[1] : 0.5f; range = ( number_parameters >= 3 ) ? parameters[2] : 1.0f; bias = ( number_parameters >= 4 ) ? parameters[3] : 0.5f; - result = MagickPI*slope*(QuantumScale*convert_float4(pixel)-center); - result = QuantumRange*(range/MagickPI*atan(result) + bias); + result = (float4)MagickPI*(float4)slope*((float4)QuantumScale*convert_float4(pixel)-(float4)center); + result = (float4)QuantumRange*((float4)range/(float4)MagickPI*atan(result) + (float4)bias); break; } case UndefinedFunction: @@ -502,6 +670,73 @@ const char* accelerateKernels = } ) + STRINGIFY( + /* + */ + __kernel void Stretch(__global CLPixelType * restrict im, + const ChannelType channel, + __global CLPixelType * restrict stretch_map, + const float4 white, const float4 black) + { + const int x = get_global_id(0); + const int y = get_global_id(1); + const int columns = get_global_size(0); + const int c = x + y * columns; + + uint ePos; + CLPixelType oValue, eValue; + CLQuantum red, green, blue, opacity; + + //read from global + oValue=im[c]; + + if ((channel & RedChannel) != 0) + { + if (getRedF4(white) != getRedF4(black)) + { + ePos = ScaleQuantumToMap(getRed(oValue)); + eValue = stretch_map[ePos]; + red = getRed(eValue); + } + } + + if ((channel & GreenChannel) != 0) + { + if (getGreenF4(white) != getGreenF4(black)) + { + ePos = ScaleQuantumToMap(getGreen(oValue)); + eValue = stretch_map[ePos]; + green = getGreen(eValue); + } + } + + if ((channel & BlueChannel) != 0) + { + if (getBlueF4(white) != getBlueF4(black)) + { + ePos = ScaleQuantumToMap(getBlue(oValue)); + eValue = stretch_map[ePos]; + blue = getBlue(eValue); + } + } + + if ((channel & OpacityChannel) != 0) + { + if (getOpacityF4(white) != getOpacityF4(black)) + { + ePos = ScaleQuantumToMap(getOpacity(oValue)); + eValue = stretch_map[ePos]; + opacity = getOpacity(eValue); + } + } + + //write back + im[c]=(CLPixelType)(blue, green, red, opacity); + + } + ) + + STRINGIFY( /* */ @@ -555,7 +790,9 @@ const char* accelerateKernels = /* */ __kernel void Histogram(__global CLPixelType * restrict im, - const ChannelType channel, const int colorspace, + const ChannelType channel, + const int method, + const int colorspace, __global uint4 * restrict histogram) { const int x = get_global_id(0); @@ -564,7 +801,7 @@ const char* accelerateKernels = const int c = x + y * columns; if ((channel & SyncChannels) != 0) { - float intensity = GetPixelIntensity(colorspace,im[c]); + float intensity = GetPixelIntensity(method, colorspace,im[c]); uint pos = ScaleQuantumToMap(ClampToQuantum(intensity)); atomic_inc((__global uint *)(&(histogram[pos]))+2); //red position } @@ -1323,47 +1560,6 @@ const char* accelerateKernels = } ) - STRINGIFY( - typedef enum - { - UndefinedColorspace, - RGBColorspace, /* Linear RGB colorspace */ - GRAYColorspace, /* greyscale (linear) image (faked 1 channel) */ - TransparentColorspace, - OHTAColorspace, - LabColorspace, - XYZColorspace, - YCbCrColorspace, - YCCColorspace, - YIQColorspace, - YPbPrColorspace, - YUVColorspace, - CMYKColorspace, /* negared linear RGB with black separated */ - sRGBColorspace, /* Default: non-lienar sRGB colorspace */ - HSBColorspace, - HSLColorspace, - HWBColorspace, - Rec601LumaColorspace, - Rec601YCbCrColorspace, - Rec709LumaColorspace, - Rec709YCbCrColorspace, - LogColorspace, - CMYColorspace, /* negated linear RGB colorspace */ - LuvColorspace, - HCLColorspace, - LCHColorspace, /* alias for LCHuv */ - LMSColorspace, - LCHabColorspace, /* Cylindrical (Polar) Lab */ - LCHuvColorspace, /* Cylindrical (Polar) Luv */ - scRGBColorspace, - HSIColorspace, - HSVColorspace, /* alias for HSB */ - HCLpColorspace, - YDbDrColorspace - } ColorspaceType; - ) - - STRINGIFY( inline float3 ConvertRGBToHSB(CLPixelType pixel) { @@ -1385,8 +1581,8 @@ const char* accelerateKernels = HueSaturationBrightness.z=QuantumScale*tmax; if (delta != 0.0f) { - HueSaturationBrightness.x = ((r == tmax)?0.0f:((g == tmax)?2.0f:4.0f)); - HueSaturationBrightness.x += ((r == tmax)?(g-b):((g == tmax)?(b-r):(r-g)))/delta; + HueSaturationBrightness.x = ((r == tmax)?0.0f:((g == tmax)?2.0f:4.0f)); + HueSaturationBrightness.x += ((r == tmax)?(g-b):((g == tmax)?(b-r):(r-g)))/delta; HueSaturationBrightness.x/=6.0f; HueSaturationBrightness.x += (HueSaturationBrightness.x < 0.0f)?0.0f:1.0f; } @@ -1421,18 +1617,18 @@ const char* accelerateKernels = float clamped_q = ClampToQuantum(QuantumRange*q); int ih = (int)h; setRed(&rgb, (ih == 1)?clamped_q: - (ih == 2 || ih == 3)?clamped_p: - (ih == 4)?clamped_t: + (ih == 2 || ih == 3)?clamped_p: + (ih == 4)?clamped_t: clampedBrightness); setGreen(&rgb, (ih == 1 || ih == 2)?clampedBrightness: - (ih == 3)?clamped_q: - (ih == 4 || ih == 5)?clamped_p: + (ih == 3)?clamped_q: + (ih == 4 || ih == 5)?clamped_p: clamped_t); setBlue(&rgb, (ih == 2)?clamped_t: - (ih == 3 || ih == 4)?clampedBrightness: - (ih == 5)?clamped_q: + (ih == 3 || ih == 4)?clampedBrightness: + (ih == 5)?clamped_q: clamped_p); } return rgb; @@ -1654,6 +1850,162 @@ const char* accelerateKernels = } ) + STRINGIFY( + __kernel void Negate(__global CLPixelType *im, + const ChannelType channel) + { + + const int x = get_global_id(0); + const int y = get_global_id(1); + const int columns = get_global_size(0); + const int c = x + y * columns; + + CLPixelType pixel = im[c]; + + CLQuantum + blue, + green, + red; + + red=getRed(pixel); + green=getGreen(pixel); + blue=getBlue(pixel); + + CLPixelType filteredPixel; + + if ((channel & RedChannel) !=0) + setRed(&filteredPixel, QuantumRange-red); + if ((channel & GreenChannel) !=0) + setGreen(&filteredPixel, QuantumRange-green); + if ((channel & BlueChannel) !=0) + setBlue(&filteredPixel, QuantumRange-blue); + + filteredPixel.w = pixel.w; + + im[c] = filteredPixel; + } + ) + + STRINGIFY( + __kernel void Grayscale(__global CLPixelType *im, + const int method, const int colorspace) + { + + const int x = get_global_id(0); + const int y = get_global_id(1); + const int columns = get_global_size(0); + const int c = x + y * columns; + + CLPixelType pixel = im[c]; + + float + blue, + green, + intensity, + red; + + red=(float)getRed(pixel); + green=(float)getGreen(pixel); + blue=(float)getBlue(pixel); + + intensity=0.0; + + CLPixelType filteredPixel; + + switch (method) + { + case AveragePixelIntensityMethod: + { + intensity=(red+green+blue)/3.0; + break; + } + case BrightnessPixelIntensityMethod: + { + intensity=max(max(red,green),blue); + break; + } + case LightnessPixelIntensityMethod: + { + intensity=(min(min(red,green),blue)+ + max(max(red,green),blue))/2.0; + break; + } + case MSPixelIntensityMethod: + { + intensity=(float) (((float) red*red+green*green+ + blue*blue)/(3.0*QuantumRange)); + break; + } + case Rec601LumaPixelIntensityMethod: + { + /* + if (colorspace == RGBColorspace) + { + red=EncodePixelGamma(red); + green=EncodePixelGamma(green); + blue=EncodePixelGamma(blue); + } + */ + intensity=0.298839*red+0.586811*green+0.114350*blue; + break; + } + case Rec601LuminancePixelIntensityMethod: + { + /* + if (image->colorspace == sRGBColorspace) + { + red=DecodePixelGamma(red); + green=DecodePixelGamma(green); + blue=DecodePixelGamma(blue); + } + */ + intensity=0.298839*red+0.586811*green+0.114350*blue; + break; + } + case Rec709LumaPixelIntensityMethod: + default: + { + /* + if (image->colorspace == RGBColorspace) + { + red=EncodePixelGamma(red); + green=EncodePixelGamma(green); + blue=EncodePixelGamma(blue); + } + */ + intensity=0.212656*red+0.715158*green+0.072186*blue; + break; + } + case Rec709LuminancePixelIntensityMethod: + { + /* + if (image->colorspace == sRGBColorspace) + { + red=DecodePixelGamma(red); + green=DecodePixelGamma(green); + blue=DecodePixelGamma(blue); + } + */ + intensity=0.212656*red+0.715158*green+0.072186*blue; + break; + } + case RMSPixelIntensityMethod: + { + intensity=(float) (sqrt((float) red*red+green*green+ + blue*blue)/sqrt(3.0)); + break; + } + + } + + setGray(&filteredPixel, ClampToQuantum(intensity)); + + filteredPixel.w = pixel.w; + + im[c] = filteredPixel; + } + ) + STRINGIFY( // Based on Box from resize.c float BoxResizeFilter(const float x) @@ -1881,7 +2233,7 @@ const char* accelerateKernels = const unsigned int actualNumPixelToCompute = stopX - startX; // calculate the range of input image pixels to cache - float scale = max(1.0/xFactor+MagickEpsilon ,1.0f); + float scale = max(1.0f/xFactor+MagickEpsilon ,1.0f); const float support = max(scale*resizeFilterSupport,0.5f); scale = PerceptibleReciprocal(scale); @@ -2074,7 +2426,7 @@ const char* accelerateKernels = const unsigned int actualNumPixelToCompute = stopY - startY; // calculate the range of input image pixels to cache - float scale = max(1.0/yFactor+MagickEpsilon ,1.0f); + float scale = max(1.0f/yFactor+MagickEpsilon ,1.0f); const float support = max(scale*resizeFilterSupport,0.5f); scale = PerceptibleReciprocal(scale); @@ -2249,6 +2601,18 @@ const char* accelerateKernels = STRINGIFY( + inline float GetPseudoRandomValue(uint4* seed, const float normalizeRand) { + uint4 s = *seed; + do { + unsigned int alpha = (unsigned int) (s.y ^ (s.y << 11)); + s.y=s.z; + s.z=s.w; + s.w=s.x; + s.x = (s.x ^ (s.x >> 19)) ^ (alpha ^ (alpha >> 8)); + } while (s.x == ~0UL); + *seed = s; + return (normalizeRand*s.x); + } __kernel void randomNumberGeneratorKernel(__global uint* seeds, const float normalizeRand , __global float* randomNumbers, const uint init @@ -2317,7 +2681,7 @@ const char* accelerateKernels = } RandomNumbers; - float GetPseudoRandomValue(RandomNumbers* r) { + float ReadPseudoRandomValue(RandomNumbers* r) { float v = *r->rns; r->rns++; return v; @@ -2343,7 +2707,7 @@ const char* accelerateKernels = sigma; noise = 0.0f; - alpha=GetPseudoRandomValue(r); + alpha=ReadPseudoRandomValue(r); switch(noise_type) { case UniformNoise: default: @@ -2359,7 +2723,7 @@ const char* accelerateKernels = if (alpha == 0.0f) alpha=1.0f; - beta=GetPseudoRandomValue(r); + beta=ReadPseudoRandomValue(r); gamma=sqrt(-2.0f*log(alpha)); sigma=gamma*cospi((2.0f*beta)); tau=gamma*sinpi((2.0f*beta)); @@ -2403,7 +2767,7 @@ const char* accelerateKernels = sigma=1.0f; if (alpha > MagickEpsilon) sigma=sqrt(-2.0f*log(alpha)); - beta=GetPseudoRandomValue(r); + beta=ReadPseudoRandomValue(r); noise=(float) (pixel+pixel*SigmaMultiplicativeGaussian*sigma* cospi((float) (2.0f*beta))/2.0f); break; @@ -2416,7 +2780,7 @@ const char* accelerateKernels = poisson=exp(-SigmaPoisson*QuantumScale*pixel); for (i=0; alpha > poisson; i++) { - beta=GetPseudoRandomValue(r); + beta=ReadPseudoRandomValue(r); alpha*=beta; } noise=(float) (QuantumRange*i/SigmaPoisson); @@ -2468,10 +2832,381 @@ const char* accelerateKernels = } ) - ; + STRINGIFY( + __kernel + void RandomImage(__global CLPixelType* inputImage, + const uint imageColumns, const uint imageRows, + __global uint* seeds, + const float randNormNumerator, + const uint randNormDenominator) { + + unsigned int numGenerators = get_global_size(0); + unsigned numRandPixelsPerWorkItem = ((imageColumns*imageRows) + (numGenerators-1)) + / numGenerators; + + uint4 s; + s.x = seeds[get_global_id(0)*4]; + s.y = seeds[get_global_id(0)*4+1]; + s.z = seeds[get_global_id(0)*4+2]; + s.w = seeds[get_global_id(0)*4+3]; + + unsigned int offset = get_group_id(0) * get_local_size(0) * numRandPixelsPerWorkItem; + for (unsigned int n = 0; n < numRandPixelsPerWorkItem; n++) + { + int i = offset + n*get_local_size(0) + get_local_id(0); + if (i >= imageColumns*imageRows) + break; + + float rand = GetPseudoRandomValue(&s,randNormNumerator/randNormDenominator); + CLQuantum v = ClampToQuantum(QuantumRange*rand); + + CLPixelType p; + setRed(&p,v); + setGreen(&p,v); + setBlue(&p,v); + setOpacity(&p,0); + + inputImage[i] = p; + } + + seeds[get_global_id(0)*4] = s.x; + seeds[get_global_id(0)*4+1] = s.y; + seeds[get_global_id(0)*4+2] = s.z; + seeds[get_global_id(0)*4+3] = s.w; + } + ) + + STRINGIFY( + __kernel + void MotionBlur(const __global CLPixelType *input, __global CLPixelType *output, + const unsigned int imageWidth, const unsigned int imageHeight, + const __global float *filter, const unsigned int width, const __global int2* offset, + const float4 bias, + const ChannelType channel, const unsigned int matte) { + + int2 currentPixel; + currentPixel.x = get_global_id(0); + currentPixel.y = get_global_id(1); + + if (currentPixel.x >= imageWidth + || currentPixel.y >= imageHeight) + return; + + float4 pixel; + pixel.x = (float)bias.x; + pixel.y = (float)bias.y; + pixel.z = (float)bias.z; + pixel.w = (float)bias.w; + + if (((channel & OpacityChannel) == 0) || (matte == 0)) { + + for (int i = 0; i < width; i++) { + // only support EdgeVirtualPixelMethod through ClampToCanvas + // TODO: implement other virtual pixel method + int2 samplePixel = currentPixel + offset[i]; + samplePixel.x = ClampToCanvas(samplePixel.x, imageWidth); + samplePixel.y = ClampToCanvas(samplePixel.y, imageHeight); + CLPixelType samplePixelValue = input[ samplePixel.y * imageWidth + samplePixel.x]; + + pixel.x += (filter[i] * (float)samplePixelValue.x); + pixel.y += (filter[i] * (float)samplePixelValue.y); + pixel.z += (filter[i] * (float)samplePixelValue.z); + pixel.w += (filter[i] * (float)samplePixelValue.w); + } + + CLPixelType outputPixel; + outputPixel.x = ClampToQuantum(pixel.x); + outputPixel.y = ClampToQuantum(pixel.y); + outputPixel.z = ClampToQuantum(pixel.z); + outputPixel.w = ClampToQuantum(pixel.w); + output[currentPixel.y * imageWidth + currentPixel.x] = outputPixel; + } + else { + + float gamma = 0.0f; + for (int i = 0; i < width; i++) { + // only support EdgeVirtualPixelMethod through ClampToCanvas + // TODO: implement other virtual pixel method + int2 samplePixel = currentPixel + offset[i]; + samplePixel.x = ClampToCanvas(samplePixel.x, imageWidth); + samplePixel.y = ClampToCanvas(samplePixel.y, imageHeight); + + CLPixelType samplePixelValue = input[ samplePixel.y * imageWidth + samplePixel.x]; + float alpha = QuantumScale*(QuantumRange-samplePixelValue.w); + float k = filter[i]; + pixel.x = pixel.x + k * alpha * samplePixelValue.x; + pixel.y = pixel.y + k * alpha * samplePixelValue.y; + pixel.z = pixel.z + k * alpha * samplePixelValue.z; + pixel.w += k * alpha * samplePixelValue.w; + + gamma+=k*alpha; + } + gamma = PerceptibleReciprocal(gamma); + pixel.xyz = gamma*pixel.xyz; + + CLPixelType outputPixel; + outputPixel.x = ClampToQuantum(pixel.x); + outputPixel.y = ClampToQuantum(pixel.y); + outputPixel.z = ClampToQuantum(pixel.z); + outputPixel.w = ClampToQuantum(pixel.w); + output[currentPixel.y * imageWidth + currentPixel.x] = outputPixel; + } + } + ) + + STRINGIFY( + typedef enum + { + UndefinedCompositeOp, + NoCompositeOp, + ModulusAddCompositeOp, + AtopCompositeOp, + BlendCompositeOp, + BumpmapCompositeOp, + ChangeMaskCompositeOp, + ClearCompositeOp, + ColorBurnCompositeOp, + ColorDodgeCompositeOp, + ColorizeCompositeOp, + CopyBlackCompositeOp, + CopyBlueCompositeOp, + CopyCompositeOp, + CopyCyanCompositeOp, + CopyGreenCompositeOp, + CopyMagentaCompositeOp, + CopyOpacityCompositeOp, + CopyRedCompositeOp, + CopyYellowCompositeOp, + DarkenCompositeOp, + DstAtopCompositeOp, + DstCompositeOp, + DstInCompositeOp, + DstOutCompositeOp, + DstOverCompositeOp, + DifferenceCompositeOp, + DisplaceCompositeOp, + DissolveCompositeOp, + ExclusionCompositeOp, + HardLightCompositeOp, + HueCompositeOp, + InCompositeOp, + LightenCompositeOp, + LinearLightCompositeOp, + LuminizeCompositeOp, + MinusDstCompositeOp, + ModulateCompositeOp, + MultiplyCompositeOp, + OutCompositeOp, + OverCompositeOp, + OverlayCompositeOp, + PlusCompositeOp, + ReplaceCompositeOp, + SaturateCompositeOp, + ScreenCompositeOp, + SoftLightCompositeOp, + SrcAtopCompositeOp, + SrcCompositeOp, + SrcInCompositeOp, + SrcOutCompositeOp, + SrcOverCompositeOp, + ModulusSubtractCompositeOp, + ThresholdCompositeOp, + XorCompositeOp, + /* These are new operators, added after the above was last sorted. + * The list should be re-sorted only when a new library version is + * created. + */ + DivideDstCompositeOp, + DistortCompositeOp, + BlurCompositeOp, + PegtopLightCompositeOp, + VividLightCompositeOp, + PinLightCompositeOp, + LinearDodgeCompositeOp, + LinearBurnCompositeOp, + MathematicsCompositeOp, + DivideSrcCompositeOp, + MinusSrcCompositeOp, + DarkenIntensityCompositeOp, + LightenIntensityCompositeOp + } CompositeOperator; + ) + + STRINGIFY( + inline float ColorDodge(const float Sca, + const float Sa,const float Dca,const float Da) + { + /* + Oct 2004 SVG specification. + */ + if ((Sca*Da+Dca*Sa) >= Sa*Da) + return(Sa*Da+Sca*(1.0-Da)+Dca*(1.0-Sa)); + return(Dca*Sa*Sa/(Sa-Sca)+Sca*(1.0-Da)+Dca*(1.0-Sa)); + + + /* + New specification, March 2009 SVG specification. This specification was + also wrong of non-overlap cases. + */ + /* + if ((fabs(Sca-Sa) < MagickEpsilon) && (fabs(Dca) < MagickEpsilon)) + return(Sca*(1.0-Da)); + if (fabs(Sca-Sa) < MagickEpsilon) + return(Sa*Da+Sca*(1.0-Da)+Dca*(1.0-Sa)); + return(Sa*MagickMin(Da,Dca*Sa/(Sa-Sca))); + */ + + /* + Working from first principles using the original formula: + + f(Sc,Dc) = Dc/(1-Sc) + + This works correctly! Looks like the 2004 model was right but just + required a extra condition for correct handling. + */ + + /* + if ((fabs(Sca-Sa) < MagickEpsilon) && (fabs(Dca) < MagickEpsilon)) + return(Sca*(1.0-Da)+Dca*(1.0-Sa)); + if (fabs(Sca-Sa) < MagickEpsilon) + return(Sa*Da+Sca*(1.0-Da)+Dca*(1.0-Sa)); + return(Dca*Sa*Sa/(Sa-Sca)+Sca*(1.0-Da)+Dca*(1.0-Sa)); + */ + } + + inline void CompositeColorDodge(const float4 *p, + const float4 *q,float4 *composite) { + + float + Da, + gamma, + Sa; + + Sa=1.0f-QuantumScale*getOpacityF4(*p); /* simplify and speed up equations */ + Da=1.0f-QuantumScale*getOpacityF4(*q); + gamma=RoundToUnity(Sa+Da-Sa*Da); /* over blend, as per SVG doc */ + setOpacityF4(composite, QuantumRange*(1.0-gamma)); + gamma=QuantumRange/(fabs(gamma) < MagickEpsilon ? MagickEpsilon : gamma); + setRedF4(composite,gamma*ColorDodge(QuantumScale*getRedF4(*p)*Sa,Sa,QuantumScale* + getRedF4(*q)*Da,Da)); + setGreenF4(composite,gamma*ColorDodge(QuantumScale*getGreenF4(*p)*Sa,Sa,QuantumScale* + getGreenF4(*q)*Da,Da)); + setBlueF4(composite,gamma*ColorDodge(QuantumScale*getBlueF4(*p)*Sa,Sa,QuantumScale* + getBlueF4(*q)*Da,Da)); + } + ) + + STRINGIFY( + inline void MagickPixelCompositePlus(const float4 *p, + const float alpha,const float4 *q, + const float beta,float4 *composite) + { + float + gamma; + + float + Da, + Sa; + /* + Add two pixels with the given opacities. + */ + Sa=1.0-QuantumScale*alpha; + Da=1.0-QuantumScale*beta; + gamma=RoundToUnity(Sa+Da); /* 'Plus' blending -- not 'Over' blending */ + setOpacityF4(composite,(float) QuantumRange*(1.0-gamma)); + gamma=PerceptibleReciprocal(gamma); + setRedF4(composite,gamma*(Sa*getRedF4(*p)+Da*getRedF4(*q))); + setGreenF4(composite,gamma*(Sa*getGreenF4(*p)+Da*getGreenF4(*q))); + setBlueF4(composite,gamma*(Sa*getBlueF4(*p)+Da*getBlueF4(*q))); + } + ) + + STRINGIFY( + inline void MagickPixelCompositeBlend(const float4 *p, + const float alpha,const float4 *q, + const float beta,float4 *composite) + { + MagickPixelCompositePlus(p,(float) (QuantumRange-alpha* + (QuantumRange-getOpacityF4(*p))),q,(float) (QuantumRange-beta* + (QuantumRange-getOpacityF4(*q))),composite); + } + ) + + STRINGIFY( + __kernel + void Composite(__global CLPixelType *image, + const unsigned int imageWidth, + const unsigned int imageHeight, + const __global CLPixelType *compositeImage, + const unsigned int compositeWidth, + const unsigned int compositeHeight, + const unsigned int compose, + const ChannelType channel, + const unsigned int matte, + const float destination_dissolve, + const float source_dissolve) { + + uint2 index; + index.x = get_global_id(0); + index.y = get_global_id(1); + + + if (index.x >= imageWidth + || index.y >= imageHeight) { + return; + } + const CLPixelType inputPixel = image[index.y*imageWidth+index.x]; + float4 destination; + setRedF4(&destination,getRed(inputPixel)); + setGreenF4(&destination,getGreen(inputPixel)); + setBlueF4(&destination,getBlue(inputPixel)); + + + const CLPixelType compositePixel + = compositeImage[index.y*imageWidth+index.x]; + float4 source; + setRedF4(&source,getRed(compositePixel)); + setGreenF4(&source,getGreen(compositePixel)); + setBlueF4(&source,getBlue(compositePixel)); + + if (matte != 0) { + setOpacityF4(&destination,getOpacity(inputPixel)); + setOpacityF4(&source,getOpacity(compositePixel)); + } + else { + setOpacityF4(&destination,0.0f); + setOpacityF4(&source,0.0f); + } + + float4 composite=destination; + + CompositeOperator op = (CompositeOperator)compose; + switch (op) { + case ColorDodgeCompositeOp: + CompositeColorDodge(&source,&destination,&composite); + break; + case BlendCompositeOp: + MagickPixelCompositeBlend(&source,source_dissolve,&destination, + destination_dissolve,&composite); + break; + default: + // unsupported operators + break; + }; + + CLPixelType outputPixel; + setRed(&outputPixel, ClampToQuantum(getRedF4(composite))); + setGreen(&outputPixel, ClampToQuantum(getGreenF4(composite))); + setBlue(&outputPixel, ClampToQuantum(getBlueF4(composite))); + setOpacity(&outputPixel, ClampToQuantum(getOpacityF4(composite))); + image[index.y*imageWidth+index.x] = outputPixel; + } + ) + + ; #endif // MAGICKCORE_OPENCL_SUPPORT diff --git a/MagickCore/accelerate.c b/MagickCore/accelerate.c index 34806e29d..ce6e4f8cd 100644 --- a/MagickCore/accelerate.c +++ b/MagickCore/accelerate.c @@ -89,6 +89,15 @@ Include declarations. #define ALIGNED(pointer,type) ((((long)(pointer)) & (sizeof(type)-1)) == 0) /*#define ALIGNED(pointer,type) (0) */ +/* pad the global workgroup size to the next multiple of + the local workgroup size */ +inline static unsigned int + padGlobalWorkgroupSizeToLocalWorkgroupSize(const unsigned int orgGlobalSize, + const unsigned int localGroupSize) +{ + return ((orgGlobalSize+(localGroupSize-1))/localGroupSize*localGroupSize); +} + static MagickBooleanType checkOpenCLEnvironment(ExceptionInfo* exception) { MagickBooleanType flag; @@ -122,7 +131,8 @@ static MagickBooleanType checkAccelerateCondition(const Image* image, const Chan { /* check if the image's colorspace is supported */ if (image->colorspace != RGBColorspace - && image->colorspace != sRGBColorspace) + && image->colorspace != sRGBColorspace + && image->colorspace != GRAYColorspace) return MagickFalse; /* check if the channel is supported */ @@ -142,6 +152,23 @@ static MagickBooleanType checkAccelerateCondition(const Image* image, const Chan return MagickTrue; } +static MagickBooleanType checkHistogramCondition(Image *image, const ChannelType channel) +{ + + /* ensure this is the only pass get in for now. */ + if ((channel & SyncChannels) == 0) + return MagickFalse; + + if (image->intensity == Rec601LuminancePixelIntensityMethod || + image->intensity == Rec709LuminancePixelIntensityMethod) + return MagickFalse; + + if (image->colorspace != sRGBColorspace) + return MagickFalse; + + return MagickTrue; +} + static Image* ComputeConvolveImage(const Image* inputImage, const ChannelType channel, const KernelInfo *kernel, ExceptionInfo *exception) { @@ -149,8 +176,8 @@ static Image* ComputeConvolveImage(const Image* inputImage, const ChannelType ch MagickCLEnv clEnv; cl_int clStatus; - size_t global_work_size[2]; - size_t localGroupSize[2]; + size_t global_work_size[3]; + size_t localGroupSize[3]; size_t localMemoryRequirement; Image* filteredImage; MagickSizeType length; @@ -161,13 +188,14 @@ static Image* ComputeConvolveImage(const Image* inputImage, const ChannelType ch unsigned kernelSize; unsigned int i; void *hostPtr; - unsigned int matte, filterWidth, filterHeight, imageWidth, imageHeight; + unsigned int matte, + filterWidth, filterHeight, + imageWidth, imageHeight; cl_context context; cl_kernel clkernel; cl_mem inputImageBuffer, filteredImageBuffer, convolutionKernel; cl_ulong deviceLocalMemorySize; - cl_device_id device; cl_command_queue queue; @@ -178,7 +206,6 @@ static Image* ComputeConvolveImage(const Image* inputImage, const ChannelType ch convolutionKernel = NULL; clkernel = NULL; queue = NULL; - device = NULL; filteredImage = NULL; outputReady = MagickFalse; @@ -209,16 +236,16 @@ static Image* ComputeConvolveImage(const Image* inputImage, const ChannelType ch } /* create a CL buffer from image pixel buffer */ length = inputImage->columns * inputImage->rows; - inputImageBuffer = clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus); + inputImageBuffer = clEnv->library->clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus); if (clStatus != CL_SUCCESS) { - (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.","."); + (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.","."); goto cleanup; } filteredImage = CloneImage(inputImage,inputImage->columns,inputImage->rows,MagickTrue,exception); assert(filteredImage != NULL); - if (SetImageStorageClass(filteredImage,DirectClass,exception) != MagickTrue) + if (SetImageStorageClass(filteredImage,DirectClass) != MagickTrue) { (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "CloneImage failed.", "'%s'", "."); goto cleanup; @@ -242,41 +269,43 @@ static Image* ComputeConvolveImage(const Image* inputImage, const ChannelType ch } /* create a CL buffer from image pixel buffer */ length = inputImage->columns * inputImage->rows; - filteredImageBuffer = clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), hostPtr, &clStatus); + filteredImageBuffer = clEnv->library->clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), hostPtr, &clStatus); if (clStatus != CL_SUCCESS) { - (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.","."); + (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.","."); goto cleanup; } kernelSize = kernel->width * kernel->height; - convolutionKernel = clCreateBuffer(context, CL_MEM_READ_ONLY|CL_MEM_ALLOC_HOST_PTR, kernelSize * sizeof(float), NULL, &clStatus); + convolutionKernel = clEnv->library->clCreateBuffer(context, CL_MEM_READ_ONLY|CL_MEM_ALLOC_HOST_PTR, kernelSize * sizeof(float), NULL, &clStatus); if (clStatus != CL_SUCCESS) { - (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.","."); + (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.","."); goto cleanup; } queue = AcquireOpenCLCommandQueue(clEnv); - kernelBufferPtr = (float*)clEnqueueMapBuffer(queue, convolutionKernel, CL_TRUE, CL_MAP_WRITE, 0, kernelSize * sizeof(float) + kernelBufferPtr = (float*)clEnv->library->clEnqueueMapBuffer(queue, convolutionKernel, CL_TRUE, CL_MAP_WRITE, 0, kernelSize * sizeof(float) , 0, NULL, NULL, &clStatus); if (clStatus != CL_SUCCESS) { - (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnqueueMapBuffer failed.","."); + (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueMapBuffer failed.","."); goto cleanup; } for (i = 0; i < kernelSize; i++) { kernelBufferPtr[i] = (float) kernel->values[i]; } - clStatus = clEnqueueUnmapMemObject(queue, convolutionKernel, kernelBufferPtr, 0, NULL, NULL); - if (clStatus != CL_SUCCESS) + clStatus = clEnv->library->clEnqueueUnmapMemObject(queue, convolutionKernel, kernelBufferPtr, 0, NULL, NULL); + if (clStatus != CL_SUCCESS) { - (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnqueueUnmapMemObject failed.", "'%s'", "."); + (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueUnmapMemObject failed.", "'%s'", "."); goto cleanup; } - clFlush(queue); + clEnv->library->clFlush(queue); + + deviceLocalMemorySize = GetOpenCLDeviceLocalMemorySize(clEnv); /* Compute the local memory requirement for a 16x16 workgroup. If it's larger than 16k, reduce the workgroup size to 8x8 */ @@ -284,19 +313,14 @@ static Image* ComputeConvolveImage(const Image* inputImage, const ChannelType ch localGroupSize[1] = 16; localMemoryRequirement = (localGroupSize[0]+kernel->width-1) * (localGroupSize[1]+kernel->height-1) * sizeof(CLPixelPacket) + kernel->width*kernel->height*sizeof(float); - if (localMemoryRequirement > 16384) - { - + if (localMemoryRequirement > deviceLocalMemorySize) + { localGroupSize[0] = 8; localGroupSize[1] = 8; - localMemoryRequirement = (localGroupSize[0]+kernel->width-1) * (localGroupSize[1]+kernel->height-1) * sizeof(CLPixelPacket) + kernel->width*kernel->height*sizeof(float); } - - GetMagickOpenCLEnvParam(clEnv, MAGICK_OPENCL_ENV_PARAM_DEVICE, sizeof(cl_device_id), &device, exception); - clGetDeviceInfo(device, CL_DEVICE_LOCAL_MEM_SIZE, sizeof(cl_ulong), &deviceLocalMemorySize, NULL); if (localMemoryRequirement <= deviceLocalMemorySize) { /* get the OpenCL kernel */ @@ -309,25 +333,25 @@ static Image* ComputeConvolveImage(const Image* inputImage, const ChannelType ch /* set the kernel arguments */ i = 0; - clStatus =clSetKernelArg(clkernel,i++,sizeof(cl_mem),(void *)&inputImageBuffer); - clStatus|=clSetKernelArg(clkernel,i++,sizeof(cl_mem),(void *)&filteredImageBuffer); + clStatus =clEnv->library->clSetKernelArg(clkernel,i++,sizeof(cl_mem),(void *)&inputImageBuffer); + clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(cl_mem),(void *)&filteredImageBuffer); imageWidth = inputImage->columns; imageHeight = inputImage->rows; - clStatus|=clSetKernelArg(clkernel,i++,sizeof(unsigned int),(void *)&imageWidth); - clStatus|=clSetKernelArg(clkernel,i++,sizeof(unsigned int),(void *)&imageHeight); - clStatus|=clSetKernelArg(clkernel,i++,sizeof(cl_mem),(void *)&convolutionKernel); + clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(unsigned int),(void *)&imageWidth); + clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(unsigned int),(void *)&imageHeight); + clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(cl_mem),(void *)&convolutionKernel); filterWidth = kernel->width; filterHeight = kernel->height; - clStatus|=clSetKernelArg(clkernel,i++,sizeof(unsigned int),(void *)&filterWidth); - clStatus|=clSetKernelArg(clkernel,i++,sizeof(unsigned int),(void *)&filterHeight); - matte = (inputImage->alpha_trait == BlendPixelTrait)?1:0; - clStatus|=clSetKernelArg(clkernel,i++,sizeof(unsigned int),(void *)&matte); - clStatus|=clSetKernelArg(clkernel,i++,sizeof(ChannelType),(void *)&channel); - clStatus|=clSetKernelArg(clkernel,i++, (localGroupSize[0] + kernel->width-1)*(localGroupSize[1] + kernel->height-1)*sizeof(CLPixelPacket),NULL); - clStatus|=clSetKernelArg(clkernel,i++, kernel->width*kernel->height*sizeof(float),NULL); + clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(unsigned int),(void *)&filterWidth); + clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(unsigned int),(void *)&filterHeight); + matte = (inputImage->matte==MagickTrue)?1:0; + clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(unsigned int),(void *)&matte); + clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(ChannelType),(void *)&channel); + clStatus|=clEnv->library->clSetKernelArg(clkernel,i++, (localGroupSize[0] + kernel->width-1)*(localGroupSize[1] + kernel->height-1)*sizeof(CLPixelPacket),NULL); + clStatus|=clEnv->library->clSetKernelArg(clkernel,i++, kernel->width*kernel->height*sizeof(float),NULL); if (clStatus != CL_SUCCESS) { - (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clSetKernelArg failed.", "'%s'", "."); + (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", "'%s'", "."); goto cleanup; } @@ -336,10 +360,10 @@ static Image* ComputeConvolveImage(const Image* inputImage, const ChannelType ch global_work_size[1] = ((inputImage->rows + localGroupSize[1] - 1)/localGroupSize[1]) * localGroupSize[1]; /* launch the kernel */ - clStatus = clEnqueueNDRangeKernel(queue, clkernel, 2, NULL, global_work_size, localGroupSize, 0, NULL, NULL); + clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, clkernel, 2, NULL, global_work_size, localGroupSize, 0, NULL, NULL); if (clStatus != CL_SUCCESS) { - (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnqueueNDRangeKernel failed.", "'%s'", "."); + (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", "'%s'", "."); goto cleanup; } } @@ -355,44 +379,49 @@ static Image* ComputeConvolveImage(const Image* inputImage, const ChannelType ch /* set the kernel arguments */ i = 0; - clStatus =clSetKernelArg(clkernel,i++,sizeof(cl_mem),(void *)&inputImageBuffer); - clStatus|=clSetKernelArg(clkernel,i++,sizeof(cl_mem),(void *)&filteredImageBuffer); - clStatus|=clSetKernelArg(clkernel,i++,sizeof(cl_mem),(void *)&convolutionKernel); + clStatus =clEnv->library->clSetKernelArg(clkernel,i++,sizeof(cl_mem),(void *)&inputImageBuffer); + clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(cl_mem),(void *)&filteredImageBuffer); + imageWidth = inputImage->columns; + imageHeight = inputImage->rows; + clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(unsigned int),(void *)&imageWidth); + clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(unsigned int),(void *)&imageHeight); + clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(cl_mem),(void *)&convolutionKernel); filterWidth = kernel->width; filterHeight = kernel->height; - clStatus|=clSetKernelArg(clkernel,i++,sizeof(unsigned int),(void *)&filterWidth); - clStatus|=clSetKernelArg(clkernel,i++,sizeof(unsigned int),(void *)&filterHeight); - matte = (inputImage->alpha_trait == BlendPixelTrait)?1:0; - clStatus|=clSetKernelArg(clkernel,i++,sizeof(unsigned int),(void *)&matte); - clStatus|=clSetKernelArg(clkernel,i++,sizeof(ChannelType),(void *)&channel); + clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(unsigned int),(void *)&filterWidth); + clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(unsigned int),(void *)&filterHeight); + matte = (inputImage->matte==MagickTrue)?1:0; + clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(unsigned int),(void *)&matte); + clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(ChannelType),(void *)&channel); if (clStatus != CL_SUCCESS) { - (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clSetKernelArg failed.", "'%s'", "."); + (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", "'%s'", "."); goto cleanup; } - global_work_size[0] = inputImage->columns; - global_work_size[1] = inputImage->rows; - - /* launch the kernel */ - clStatus = clEnqueueNDRangeKernel(queue, clkernel, 2, NULL, global_work_size, NULL, 0, NULL, NULL); + localGroupSize[0] = 8; + localGroupSize[1] = 8; + global_work_size[0] = (inputImage->columns + (localGroupSize[0]-1))/localGroupSize[0] * localGroupSize[0]; + global_work_size[1] = (inputImage->rows + (localGroupSize[1]-1))/localGroupSize[1] * localGroupSize[1]; + clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, clkernel, 2, NULL, global_work_size, localGroupSize, 0, NULL, NULL); + if (clStatus != CL_SUCCESS) { - (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnqueueNDRangeKernel failed.", "'%s'", "."); + (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", "'%s'", "."); goto cleanup; } } - clFlush(queue); + clEnv->library->clFlush(queue); if (ALIGNED(filteredPixels,CLPixelPacket)) { length = inputImage->columns * inputImage->rows; - clEnqueueMapBuffer(queue, filteredImageBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(CLPixelPacket), 0, NULL, NULL, &clStatus); + clEnv->library->clEnqueueMapBuffer(queue, filteredImageBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(CLPixelPacket), 0, NULL, NULL, &clStatus); } else { length = inputImage->columns * inputImage->rows; - clStatus = clEnqueueReadBuffer(queue, filteredImageBuffer, CL_TRUE, 0, length * sizeof(CLPixelPacket), filteredPixels, 0, NULL, NULL); + clStatus = clEnv->library->clEnqueueReadBuffer(queue, filteredImageBuffer, CL_TRUE, 0, length * sizeof(CLPixelPacket), filteredPixels, 0, NULL, NULL); } if (clStatus != CL_SUCCESS) { @@ -407,13 +436,13 @@ cleanup: OpenCLLogException(__FUNCTION__,__LINE__,exception); if (inputImageBuffer != NULL) - clReleaseMemObject(inputImageBuffer); + clEnv->library->clReleaseMemObject(inputImageBuffer); if (filteredImageBuffer != NULL) - clReleaseMemObject(filteredImageBuffer); + clEnv->library->clReleaseMemObject(filteredImageBuffer); if (convolutionKernel != NULL) - clReleaseMemObject(convolutionKernel); + clEnv->library->clReleaseMemObject(convolutionKernel); if (clkernel != NULL) RelinquishOpenCLKernel(clEnv, clkernel); @@ -539,40 +568,40 @@ static MagickBooleanType ComputeFunctionImage(Image *image, const ChannelType ch } /* create a CL buffer from image pixel buffer */ length = image->columns * image->rows; - imageBuffer = clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), (void*)pixels, &clStatus); + imageBuffer = clEnv->library->clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), (void*)pixels, &clStatus); if (clStatus != CL_SUCCESS) { - (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.","."); + (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.","."); goto cleanup; } - parametersBuffer = clCreateBuffer(context, CL_MEM_READ_ONLY|CL_MEM_ALLOC_HOST_PTR, number_parameters * sizeof(float), NULL, &clStatus); + parametersBuffer = clEnv->library->clCreateBuffer(context, CL_MEM_READ_ONLY|CL_MEM_ALLOC_HOST_PTR, number_parameters * sizeof(float), NULL, &clStatus); if (clStatus != CL_SUCCESS) { - (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.","."); + (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.","."); goto cleanup; } queue = AcquireOpenCLCommandQueue(clEnv); - parametersBufferPtr = (float*)clEnqueueMapBuffer(queue, parametersBuffer, CL_TRUE, CL_MAP_WRITE, 0, number_parameters * sizeof(float) + parametersBufferPtr = (float*)clEnv->library->clEnqueueMapBuffer(queue, parametersBuffer, CL_TRUE, CL_MAP_WRITE, 0, number_parameters * sizeof(float) , 0, NULL, NULL, &clStatus); if (clStatus != CL_SUCCESS) { - (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnqueueMapBuffer failed.","."); + (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueMapBuffer failed.","."); goto cleanup; } for (i = 0; i < number_parameters; i++) { parametersBufferPtr[i] = (float)parameters[i]; } - clStatus = clEnqueueUnmapMemObject(queue, parametersBuffer, parametersBufferPtr, 0, NULL, NULL); + clStatus = clEnv->library->clEnqueueUnmapMemObject(queue, parametersBuffer, parametersBufferPtr, 0, NULL, NULL); if (clStatus != CL_SUCCESS) { - (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnqueueUnmapMemObject failed.", "'%s'", "."); + (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueUnmapMemObject failed.", "'%s'", "."); goto cleanup; } - clFlush(queue); + clEnv->library->clFlush(queue); clkernel = AcquireOpenCLKernel(clEnv, MAGICK_OPENCL_ACCELERATE, "FunctionImage"); if (clkernel == NULL) @@ -583,38 +612,38 @@ static MagickBooleanType ComputeFunctionImage(Image *image, const ChannelType ch /* set the kernel arguments */ i = 0; - clStatus =clSetKernelArg(clkernel,i++,sizeof(cl_mem),(void *)&imageBuffer); - clStatus|=clSetKernelArg(clkernel,i++,sizeof(ChannelType),(void *)&channel); - clStatus|=clSetKernelArg(clkernel,i++,sizeof(MagickFunction),(void *)&function); - clStatus|=clSetKernelArg(clkernel,i++,sizeof(unsigned int),(void *)&number_parameters); - clStatus|=clSetKernelArg(clkernel,i++,sizeof(cl_mem),(void *)¶metersBuffer); + clStatus =clEnv->library->clSetKernelArg(clkernel,i++,sizeof(cl_mem),(void *)&imageBuffer); + clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(ChannelType),(void *)&channel); + clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(MagickFunction),(void *)&function); + clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(unsigned int),(void *)&number_parameters); + clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(cl_mem),(void *)¶metersBuffer); if (clStatus != CL_SUCCESS) { - (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clSetKernelArg failed.", "'%s'", "."); + (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", "'%s'", "."); goto cleanup; } globalWorkSize[0] = image->columns; globalWorkSize[1] = image->rows; /* launch the kernel */ - clStatus = clEnqueueNDRangeKernel(queue, clkernel, 2, NULL, globalWorkSize, NULL, 0, NULL, NULL); + clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, clkernel, 2, NULL, globalWorkSize, NULL, 0, NULL, NULL); if (clStatus != CL_SUCCESS) { - (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnqueueNDRangeKernel failed.", "'%s'", "."); + (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", "'%s'", "."); goto cleanup; } - clFlush(queue); + clEnv->library->clFlush(queue); if (ALIGNED(pixels,CLPixelPacket)) { length = image->columns * image->rows; - clEnqueueMapBuffer(queue, imageBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(CLPixelPacket), 0, NULL, NULL, &clStatus); + clEnv->library->clEnqueueMapBuffer(queue, imageBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(CLPixelPacket), 0, NULL, NULL, &clStatus); } else { length = image->columns * image->rows; - clStatus = clEnqueueReadBuffer(queue, imageBuffer, CL_TRUE, 0, length * sizeof(CLPixelPacket), pixels, 0, NULL, NULL); + clStatus = clEnv->library->clEnqueueReadBuffer(queue, imageBuffer, CL_TRUE, 0, length * sizeof(CLPixelPacket), pixels, 0, NULL, NULL); } if (clStatus != CL_SUCCESS) { @@ -628,8 +657,8 @@ cleanup: if (clkernel != NULL) RelinquishOpenCLKernel(clEnv, clkernel); if (queue != NULL) RelinquishOpenCLCommandQueue(clEnv, queue); - if (imageBuffer != NULL) clReleaseMemObject(imageBuffer); - if (parametersBuffer != NULL) clReleaseMemObject(parametersBuffer); + if (imageBuffer != NULL) clEnv->library->clReleaseMemObject(imageBuffer); + if (parametersBuffer != NULL) clEnv->library->clReleaseMemObject(parametersBuffer); return status; } @@ -749,10 +778,10 @@ static Image* ComputeBlurImage(const Image* inputImage, const ChannelType channe } /* create a CL buffer from image pixel buffer */ length = inputImage->columns * inputImage->rows; - inputImageBuffer = clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus); + inputImageBuffer = clEnv->library->clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus); if (clStatus != CL_SUCCESS) { - (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.","."); + (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.","."); goto cleanup; } } @@ -761,7 +790,7 @@ static Image* ComputeBlurImage(const Image* inputImage, const ChannelType channe { filteredImage = CloneImage(inputImage,inputImage->columns,inputImage->rows,MagickTrue,exception); assert(filteredImage != NULL); - if (SetImageStorageClass(filteredImage,DirectClass,exception) != MagickTrue) + if (SetImageStorageClass(filteredImage,DirectClass) != MagickTrue) { (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "CloneImage failed.", "'%s'", "."); goto cleanup; @@ -785,10 +814,10 @@ static Image* ComputeBlurImage(const Image* inputImage, const ChannelType channe } /* create a CL buffer from image pixel buffer */ length = inputImage->columns * inputImage->rows; - filteredImageBuffer = clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), hostPtr, &clStatus); + filteredImageBuffer = clEnv->library->clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), hostPtr, &clStatus); if (clStatus != CL_SUCCESS) { - (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.","."); + (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.","."); goto cleanup; } } @@ -803,16 +832,16 @@ static Image* ComputeBlurImage(const Image* inputImage, const ChannelType channe goto cleanup; } - imageKernelBuffer = clCreateBuffer(context, CL_MEM_READ_ONLY|CL_MEM_ALLOC_HOST_PTR, kernel->width * sizeof(float), NULL, &clStatus); + imageKernelBuffer = clEnv->library->clCreateBuffer(context, CL_MEM_READ_ONLY|CL_MEM_ALLOC_HOST_PTR, kernel->width * sizeof(float), NULL, &clStatus); if (clStatus != CL_SUCCESS) { - (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.","."); + (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.","."); goto cleanup; } - kernelBufferPtr = (float*)clEnqueueMapBuffer(queue, imageKernelBuffer, CL_TRUE, CL_MAP_WRITE, 0, kernel->width * sizeof(float), 0, NULL, NULL, &clStatus); + kernelBufferPtr = (float*)clEnv->library->clEnqueueMapBuffer(queue, imageKernelBuffer, CL_TRUE, CL_MAP_WRITE, 0, kernel->width * sizeof(float), 0, NULL, NULL, &clStatus); if (clStatus != CL_SUCCESS) { - (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnqueueMapBuffer failed.","."); + (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueMapBuffer failed.","."); goto cleanup; } @@ -821,10 +850,10 @@ static Image* ComputeBlurImage(const Image* inputImage, const ChannelType channe kernelBufferPtr[i] = (float) kernel->values[i]; } - clStatus = clEnqueueUnmapMemObject(queue, imageKernelBuffer, kernelBufferPtr, 0, NULL, NULL); + clStatus = clEnv->library->clEnqueueUnmapMemObject(queue, imageKernelBuffer, kernelBufferPtr, 0, NULL, NULL); if (clStatus != CL_SUCCESS) { - (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnqueueUnmapMemObject failed.", "'%s'", "."); + (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueUnmapMemObject failed.", "'%s'", "."); goto cleanup; } } @@ -834,10 +863,10 @@ static Image* ComputeBlurImage(const Image* inputImage, const ChannelType channe /* create temp buffer */ { length = inputImage->columns * inputImage->rows; - tempImageBuffer = clCreateBuffer(context, CL_MEM_READ_WRITE, length * 4 * sizeof(float), NULL, &clStatus); + tempImageBuffer = clEnv->library->clCreateBuffer(context, CL_MEM_READ_WRITE, length * 4 * sizeof(float), NULL, &clStatus); if (clStatus != CL_SUCCESS) { - (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.","."); + (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.","."); goto cleanup; } } @@ -869,18 +898,18 @@ static Image* ComputeBlurImage(const Image* inputImage, const ChannelType channe /* set the kernel arguments */ i = 0; - clStatus=clSetKernelArg(blurRowKernel,i++,sizeof(cl_mem),(void *)&inputImageBuffer); - clStatus|=clSetKernelArg(blurRowKernel,i++,sizeof(cl_mem),(void *)&tempImageBuffer); - clStatus|=clSetKernelArg(blurRowKernel,i++,sizeof(ChannelType),&channel); - clStatus|=clSetKernelArg(blurRowKernel,i++,sizeof(cl_mem),(void *)&imageKernelBuffer); + clStatus=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(cl_mem),(void *)&inputImageBuffer); + clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(cl_mem),(void *)&tempImageBuffer); + clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(ChannelType),&channel); + clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(cl_mem),(void *)&imageKernelBuffer); kernelWidth = kernel->width; - clStatus|=clSetKernelArg(blurRowKernel,i++,sizeof(unsigned int),(void *)&kernelWidth); - clStatus|=clSetKernelArg(blurRowKernel,i++,sizeof(unsigned int),(void *)&imageColumns); - clStatus|=clSetKernelArg(blurRowKernel,i++,sizeof(unsigned int),(void *)&imageRows); - clStatus|=clSetKernelArg(blurRowKernel,i++,sizeof(CLPixelPacket)*(chunkSize+kernel->width),(void *)NULL); + clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(unsigned int),(void *)&kernelWidth); + clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(unsigned int),(void *)&imageColumns); + clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(unsigned int),(void *)&imageRows); + clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(CLPixelPacket)*(chunkSize+kernel->width),(void *)NULL); if (clStatus != CL_SUCCESS) { - (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clSetKernelArg failed.", "'%s'", "."); + (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", "'%s'", "."); goto cleanup; } } @@ -895,13 +924,13 @@ static Image* ComputeBlurImage(const Image* inputImage, const ChannelType channe wsize[0] = chunkSize; wsize[1] = 1; - clStatus = clEnqueueNDRangeKernel(queue, blurRowKernel, 2, NULL, gsize, wsize, 0, NULL, NULL); + clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, blurRowKernel, 2, NULL, gsize, wsize, 0, NULL, NULL); if (clStatus != CL_SUCCESS) { - (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnqueueNDRangeKernel failed.", "'%s'", "."); + (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", "'%s'", "."); goto cleanup; } - clFlush(queue); + clEnv->library->clFlush(queue); } } @@ -915,18 +944,18 @@ static Image* ComputeBlurImage(const Image* inputImage, const ChannelType channe /* set the kernel arguments */ i = 0; - clStatus=clSetKernelArg(blurColumnKernel,i++,sizeof(cl_mem),(void *)&tempImageBuffer); - clStatus|=clSetKernelArg(blurColumnKernel,i++,sizeof(cl_mem),(void *)&filteredImageBuffer); - clStatus|=clSetKernelArg(blurColumnKernel,i++,sizeof(ChannelType),&channel); - clStatus|=clSetKernelArg(blurColumnKernel,i++,sizeof(cl_mem),(void *)&imageKernelBuffer); + clStatus=clEnv->library->clSetKernelArg(blurColumnKernel,i++,sizeof(cl_mem),(void *)&tempImageBuffer); + clStatus|=clEnv->library->clSetKernelArg(blurColumnKernel,i++,sizeof(cl_mem),(void *)&filteredImageBuffer); + clStatus|=clEnv->library->clSetKernelArg(blurColumnKernel,i++,sizeof(ChannelType),&channel); + clStatus|=clEnv->library->clSetKernelArg(blurColumnKernel,i++,sizeof(cl_mem),(void *)&imageKernelBuffer); kernelWidth = kernel->width; - clStatus|=clSetKernelArg(blurColumnKernel,i++,sizeof(unsigned int),(void *)&kernelWidth); - clStatus|=clSetKernelArg(blurColumnKernel,i++,sizeof(unsigned int),(void *)&imageColumns); - clStatus|=clSetKernelArg(blurColumnKernel,i++,sizeof(unsigned int),(void *)&imageRows); - clStatus|=clSetKernelArg(blurColumnKernel,i++,sizeof(cl_float4)*(chunkSize+kernel->width),(void *)NULL); + clStatus|=clEnv->library->clSetKernelArg(blurColumnKernel,i++,sizeof(unsigned int),(void *)&kernelWidth); + clStatus|=clEnv->library->clSetKernelArg(blurColumnKernel,i++,sizeof(unsigned int),(void *)&imageColumns); + clStatus|=clEnv->library->clSetKernelArg(blurColumnKernel,i++,sizeof(unsigned int),(void *)&imageRows); + clStatus|=clEnv->library->clSetKernelArg(blurColumnKernel,i++,sizeof(cl_float4)*(chunkSize+kernel->width),(void *)NULL); if (clStatus != CL_SUCCESS) { - (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clSetKernelArg failed.", "'%s'", "."); + (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", "'%s'", "."); goto cleanup; } } @@ -941,13 +970,13 @@ static Image* ComputeBlurImage(const Image* inputImage, const ChannelType channe wsize[0] = 1; wsize[1] = chunkSize; - clStatus = clEnqueueNDRangeKernel(queue, blurColumnKernel, 2, NULL, gsize, wsize, 0, NULL, NULL); + clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, blurColumnKernel, 2, NULL, gsize, wsize, 0, NULL, NULL); if (clStatus != CL_SUCCESS) { - (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnqueueNDRangeKernel failed.", "'%s'", "."); + (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", "'%s'", "."); goto cleanup; } - clFlush(queue); + clEnv->library->clFlush(queue); } } @@ -957,12 +986,12 @@ static Image* ComputeBlurImage(const Image* inputImage, const ChannelType channe if (ALIGNED(filteredPixels,CLPixelPacket)) { length = inputImage->columns * inputImage->rows; - clEnqueueMapBuffer(queue, filteredImageBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(CLPixelPacket), 0, NULL, NULL, &clStatus); + clEnv->library->clEnqueueMapBuffer(queue, filteredImageBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(CLPixelPacket), 0, NULL, NULL, &clStatus); } else { length = inputImage->columns * inputImage->rows; - clStatus = clEnqueueReadBuffer(queue, filteredImageBuffer, CL_TRUE, 0, length * sizeof(CLPixelPacket), filteredPixels, 0, NULL, NULL); + clStatus = clEnv->library->clEnqueueReadBuffer(queue, filteredImageBuffer, CL_TRUE, 0, length * sizeof(CLPixelPacket), filteredPixels, 0, NULL, NULL); } if (clStatus != CL_SUCCESS) { @@ -975,10 +1004,10 @@ static Image* ComputeBlurImage(const Image* inputImage, const ChannelType channe cleanup: OpenCLLogException(__FUNCTION__,__LINE__,exception); - if (inputImageBuffer!=NULL) clReleaseMemObject(inputImageBuffer); - if (tempImageBuffer!=NULL) clReleaseMemObject(tempImageBuffer); - if (filteredImageBuffer!=NULL) clReleaseMemObject(filteredImageBuffer); - if (imageKernelBuffer!=NULL) clReleaseMemObject(imageKernelBuffer); + if (inputImageBuffer!=NULL) clEnv->library->clReleaseMemObject(inputImageBuffer); + if (tempImageBuffer!=NULL) clEnv->library->clReleaseMemObject(tempImageBuffer); + if (filteredImageBuffer!=NULL) clEnv->library->clReleaseMemObject(filteredImageBuffer); + if (imageKernelBuffer!=NULL) clEnv->library->clReleaseMemObject(imageKernelBuffer); if (blurRowKernel!=NULL) RelinquishOpenCLKernel(clEnv, blurRowKernel); if (blurColumnKernel!=NULL) RelinquishOpenCLKernel(clEnv, blurColumnKernel); if (queue != NULL) RelinquishOpenCLCommandQueue(clEnv, queue); @@ -1060,10 +1089,10 @@ static Image* ComputeBlurImageSection(const Image* inputImage, const ChannelType } /* create a CL buffer from image pixel buffer */ length = inputImage->columns * inputImage->rows; - inputImageBuffer = clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus); + inputImageBuffer = clEnv->library->clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus); if (clStatus != CL_SUCCESS) { - (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.","."); + (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.","."); goto cleanup; } } @@ -1072,7 +1101,7 @@ static Image* ComputeBlurImageSection(const Image* inputImage, const ChannelType { filteredImage = CloneImage(inputImage,inputImage->columns,inputImage->rows,MagickTrue,exception); assert(filteredImage != NULL); - if (SetImageStorageClass(filteredImage,DirectClass,exception) != MagickTrue) + if (SetImageStorageClass(filteredImage,DirectClass) != MagickTrue) { (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "CloneImage failed.", "'%s'", "."); goto cleanup; @@ -1096,10 +1125,10 @@ static Image* ComputeBlurImageSection(const Image* inputImage, const ChannelType } /* create a CL buffer from image pixel buffer */ length = inputImage->columns * inputImage->rows; - filteredImageBuffer = clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), hostPtr, &clStatus); + filteredImageBuffer = clEnv->library->clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), hostPtr, &clStatus); if (clStatus != CL_SUCCESS) { - (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.","."); + (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.","."); goto cleanup; } } @@ -1114,16 +1143,16 @@ static Image* ComputeBlurImageSection(const Image* inputImage, const ChannelType goto cleanup; } - imageKernelBuffer = clCreateBuffer(context, CL_MEM_READ_ONLY|CL_MEM_ALLOC_HOST_PTR, kernel->width * sizeof(float), NULL, &clStatus); + imageKernelBuffer = clEnv->library->clCreateBuffer(context, CL_MEM_READ_ONLY|CL_MEM_ALLOC_HOST_PTR, kernel->width * sizeof(float), NULL, &clStatus); if (clStatus != CL_SUCCESS) { - (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.","."); + (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.","."); goto cleanup; } - kernelBufferPtr = (float*)clEnqueueMapBuffer(queue, imageKernelBuffer, CL_TRUE, CL_MAP_WRITE, 0, kernel->width * sizeof(float), 0, NULL, NULL, &clStatus); + kernelBufferPtr = (float*)clEnv->library->clEnqueueMapBuffer(queue, imageKernelBuffer, CL_TRUE, CL_MAP_WRITE, 0, kernel->width * sizeof(float), 0, NULL, NULL, &clStatus); if (clStatus != CL_SUCCESS) { - (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnqueueMapBuffer failed.","."); + (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueMapBuffer failed.","."); goto cleanup; } @@ -1132,10 +1161,10 @@ static Image* ComputeBlurImageSection(const Image* inputImage, const ChannelType kernelBufferPtr[i] = (float) kernel->values[i]; } - clStatus = clEnqueueUnmapMemObject(queue, imageKernelBuffer, kernelBufferPtr, 0, NULL, NULL); + clStatus = clEnv->library->clEnqueueUnmapMemObject(queue, imageKernelBuffer, kernelBufferPtr, 0, NULL, NULL); if (clStatus != CL_SUCCESS) { - (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnqueueUnmapMemObject failed.", "'%s'", "."); + (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueUnmapMemObject failed.", "'%s'", "."); goto cleanup; } } @@ -1147,10 +1176,10 @@ static Image* ComputeBlurImageSection(const Image* inputImage, const ChannelType /* create temp buffer */ { length = inputImage->columns * (inputImage->rows / 2 + 1 + (kernel->width-1) / 2); - tempImageBuffer = clCreateBuffer(context, CL_MEM_READ_WRITE, length * 4 * sizeof(float), NULL, &clStatus); + tempImageBuffer = clEnv->library->clCreateBuffer(context, CL_MEM_READ_WRITE, length * 4 * sizeof(float), NULL, &clStatus); if (clStatus != CL_SUCCESS) { - (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.","."); + (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.","."); goto cleanup; } } @@ -1191,19 +1220,19 @@ static Image* ComputeBlurImageSection(const Image* inputImage, const ChannelType /* set the kernel arguments */ i = 0; - clStatus=clSetKernelArg(blurRowKernel,i++,sizeof(cl_mem),(void *)&inputImageBuffer); - clStatus|=clSetKernelArg(blurRowKernel,i++,sizeof(cl_mem),(void *)&tempImageBuffer); - clStatus|=clSetKernelArg(blurRowKernel,i++,sizeof(ChannelType),&channel); - clStatus|=clSetKernelArg(blurRowKernel,i++,sizeof(cl_mem),(void *)&imageKernelBuffer); - clStatus|=clSetKernelArg(blurRowKernel,i++,sizeof(unsigned int),(void *)&kernelWidth); - clStatus|=clSetKernelArg(blurRowKernel,i++,sizeof(unsigned int),(void *)&imageColumns); - clStatus|=clSetKernelArg(blurRowKernel,i++,sizeof(unsigned int),(void *)&imageRows); - clStatus|=clSetKernelArg(blurRowKernel,i++,sizeof(CLPixelPacket)*(chunkSize+kernel->width),(void *)NULL); - clStatus|=clSetKernelArg(blurRowKernel,i++,sizeof(unsigned int),(void *)&offsetRows); - clStatus|=clSetKernelArg(blurRowKernel,i++,sizeof(unsigned int),(void *)&sec); + clStatus=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(cl_mem),(void *)&inputImageBuffer); + clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(cl_mem),(void *)&tempImageBuffer); + clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(ChannelType),&channel); + clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(cl_mem),(void *)&imageKernelBuffer); + clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(unsigned int),(void *)&kernelWidth); + clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(unsigned int),(void *)&imageColumns); + clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(unsigned int),(void *)&imageRows); + clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(CLPixelPacket)*(chunkSize+kernel->width),(void *)NULL); + clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(unsigned int),(void *)&offsetRows); + clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(unsigned int),(void *)&sec); if (clStatus != CL_SUCCESS) { - (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clSetKernelArg failed.", "'%s'", "."); + (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", "'%s'", "."); goto cleanup; } } @@ -1218,13 +1247,13 @@ static Image* ComputeBlurImageSection(const Image* inputImage, const ChannelType wsize[0] = chunkSize; wsize[1] = 1; - clStatus = clEnqueueNDRangeKernel(queue, blurRowKernel, 2, NULL, gsize, wsize, 0, NULL, NULL); + clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, blurRowKernel, 2, NULL, gsize, wsize, 0, NULL, NULL); if (clStatus != CL_SUCCESS) { - (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnqueueNDRangeKernel failed.", "'%s'", "."); + (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", "'%s'", "."); goto cleanup; } - clFlush(queue); + clEnv->library->clFlush(queue); } } @@ -1245,19 +1274,19 @@ static Image* ComputeBlurImageSection(const Image* inputImage, const ChannelType /* set the kernel arguments */ i = 0; - clStatus=clSetKernelArg(blurColumnKernel,i++,sizeof(cl_mem),(void *)&tempImageBuffer); - clStatus|=clSetKernelArg(blurColumnKernel,i++,sizeof(cl_mem),(void *)&filteredImageBuffer); - clStatus|=clSetKernelArg(blurColumnKernel,i++,sizeof(ChannelType),&channel); - clStatus|=clSetKernelArg(blurColumnKernel,i++,sizeof(cl_mem),(void *)&imageKernelBuffer); - clStatus|=clSetKernelArg(blurColumnKernel,i++,sizeof(unsigned int),(void *)&kernelWidth); - clStatus|=clSetKernelArg(blurColumnKernel,i++,sizeof(unsigned int),(void *)&imageColumns); - clStatus|=clSetKernelArg(blurColumnKernel,i++,sizeof(unsigned int),(void *)&imageRows); - clStatus|=clSetKernelArg(blurColumnKernel,i++,sizeof(cl_float4)*(chunkSize+kernel->width),(void *)NULL); - clStatus|=clSetKernelArg(blurColumnKernel,i++,sizeof(unsigned int),(void *)&offsetRows); - clStatus|=clSetKernelArg(blurColumnKernel,i++,sizeof(unsigned int),(void *)&sec); + clStatus=clEnv->library->clSetKernelArg(blurColumnKernel,i++,sizeof(cl_mem),(void *)&tempImageBuffer); + clStatus|=clEnv->library->clSetKernelArg(blurColumnKernel,i++,sizeof(cl_mem),(void *)&filteredImageBuffer); + clStatus|=clEnv->library->clSetKernelArg(blurColumnKernel,i++,sizeof(ChannelType),&channel); + clStatus|=clEnv->library->clSetKernelArg(blurColumnKernel,i++,sizeof(cl_mem),(void *)&imageKernelBuffer); + clStatus|=clEnv->library->clSetKernelArg(blurColumnKernel,i++,sizeof(unsigned int),(void *)&kernelWidth); + clStatus|=clEnv->library->clSetKernelArg(blurColumnKernel,i++,sizeof(unsigned int),(void *)&imageColumns); + clStatus|=clEnv->library->clSetKernelArg(blurColumnKernel,i++,sizeof(unsigned int),(void *)&imageRows); + clStatus|=clEnv->library->clSetKernelArg(blurColumnKernel,i++,sizeof(cl_float4)*(chunkSize+kernel->width),(void *)NULL); + clStatus|=clEnv->library->clSetKernelArg(blurColumnKernel,i++,sizeof(unsigned int),(void *)&offsetRows); + clStatus|=clEnv->library->clSetKernelArg(blurColumnKernel,i++,sizeof(unsigned int),(void *)&sec); if (clStatus != CL_SUCCESS) { - (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clSetKernelArg failed.", "'%s'", "."); + (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", "'%s'", "."); goto cleanup; } } @@ -1272,13 +1301,13 @@ static Image* ComputeBlurImageSection(const Image* inputImage, const ChannelType wsize[0] = 1; wsize[1] = chunkSize; - clStatus = clEnqueueNDRangeKernel(queue, blurColumnKernel, 2, NULL, gsize, wsize, 0, NULL, NULL); + clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, blurColumnKernel, 2, NULL, gsize, wsize, 0, NULL, NULL); if (clStatus != CL_SUCCESS) { - (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnqueueNDRangeKernel failed.", "'%s'", "."); + (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", "'%s'", "."); goto cleanup; } - clFlush(queue); + clEnv->library->clFlush(queue); } } } @@ -1289,12 +1318,12 @@ static Image* ComputeBlurImageSection(const Image* inputImage, const ChannelType if (ALIGNED(filteredPixels,CLPixelPacket)) { length = inputImage->columns * inputImage->rows; - clEnqueueMapBuffer(queue, filteredImageBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(CLPixelPacket), 0, NULL, NULL, &clStatus); + clEnv->library->clEnqueueMapBuffer(queue, filteredImageBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(CLPixelPacket), 0, NULL, NULL, &clStatus); } else { length = inputImage->columns * inputImage->rows; - clStatus = clEnqueueReadBuffer(queue, filteredImageBuffer, CL_TRUE, 0, length * sizeof(CLPixelPacket), filteredPixels, 0, NULL, NULL); + clStatus = clEnv->library->clEnqueueReadBuffer(queue, filteredImageBuffer, CL_TRUE, 0, length * sizeof(CLPixelPacket), filteredPixels, 0, NULL, NULL); } if (clStatus != CL_SUCCESS) { @@ -1307,10 +1336,10 @@ static Image* ComputeBlurImageSection(const Image* inputImage, const ChannelType cleanup: OpenCLLogException(__FUNCTION__,__LINE__,exception); - if (inputImageBuffer!=NULL) clReleaseMemObject(inputImageBuffer); - if (tempImageBuffer!=NULL) clReleaseMemObject(tempImageBuffer); - if (filteredImageBuffer!=NULL) clReleaseMemObject(filteredImageBuffer); - if (imageKernelBuffer!=NULL) clReleaseMemObject(imageKernelBuffer); + if (inputImageBuffer!=NULL) clEnv->library->clReleaseMemObject(inputImageBuffer); + if (tempImageBuffer!=NULL) clEnv->library->clReleaseMemObject(tempImageBuffer); + if (filteredImageBuffer!=NULL) clEnv->library->clReleaseMemObject(filteredImageBuffer); + if (imageKernelBuffer!=NULL) clEnv->library->clReleaseMemObject(imageKernelBuffer); if (blurRowKernel!=NULL) RelinquishOpenCLKernel(clEnv, blurRowKernel); if (blurColumnKernel!=NULL) RelinquishOpenCLKernel(clEnv, blurColumnKernel); if (queue != NULL) RelinquishOpenCLCommandQueue(clEnv, queue); @@ -1413,7 +1442,7 @@ static Image* ComputeRadialBlurImage(const Image *inputImage, const ChannelType float* cosThetaPtr; MagickSizeType length; unsigned int matte; - PixelInfo bias; + MagickPixelPacket bias; cl_float4 biasPixel; cl_float2 blurCenter; float blurRadius; @@ -1460,17 +1489,17 @@ static Image* ComputeRadialBlurImage(const Image *inputImage, const ChannelType } /* create a CL buffer from image pixel buffer */ length = inputImage->columns * inputImage->rows; - inputImageBuffer = clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus); + inputImageBuffer = clEnv->library->clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus); if (clStatus != CL_SUCCESS) { - (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.","."); + (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.","."); goto cleanup; } filteredImage = CloneImage(inputImage,inputImage->columns,inputImage->rows,MagickTrue,exception); assert(filteredImage != NULL); - if (SetImageStorageClass(filteredImage,DirectClass,exception) != MagickTrue) + if (SetImageStorageClass(filteredImage,DirectClass) != MagickTrue) { (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "CloneImage failed.", "'%s'", "."); goto cleanup; @@ -1494,10 +1523,10 @@ static Image* ComputeRadialBlurImage(const Image *inputImage, const ChannelType } /* create a CL buffer from image pixel buffer */ length = inputImage->columns * inputImage->rows; - filteredImageBuffer = clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), hostPtr, &clStatus); + filteredImageBuffer = clEnv->library->clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), hostPtr, &clStatus); if (clStatus != CL_SUCCESS) { - (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.","."); + (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.","."); goto cleanup; } @@ -1507,29 +1536,29 @@ static Image* ComputeRadialBlurImage(const Image *inputImage, const ChannelType cossin_theta_size=(unsigned int) fabs(4.0*DegreesToRadians(angle)*sqrt((double)blurRadius)+2UL); /* create a buffer for sin_theta and cos_theta */ - sinThetaBuffer = clCreateBuffer(context, CL_MEM_READ_ONLY|CL_MEM_ALLOC_HOST_PTR, cossin_theta_size * sizeof(float), NULL, &clStatus); + sinThetaBuffer = clEnv->library->clCreateBuffer(context, CL_MEM_READ_ONLY|CL_MEM_ALLOC_HOST_PTR, cossin_theta_size * sizeof(float), NULL, &clStatus); if (clStatus != CL_SUCCESS) { - (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.","."); + (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.","."); goto cleanup; } - cosThetaBuffer = clCreateBuffer(context, CL_MEM_READ_ONLY|CL_MEM_ALLOC_HOST_PTR, cossin_theta_size * sizeof(float), NULL, &clStatus); + cosThetaBuffer = clEnv->library->clCreateBuffer(context, CL_MEM_READ_ONLY|CL_MEM_ALLOC_HOST_PTR, cossin_theta_size * sizeof(float), NULL, &clStatus); if (clStatus != CL_SUCCESS) { - (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.","."); + (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.","."); goto cleanup; } queue = AcquireOpenCLCommandQueue(clEnv); - sinThetaPtr = (float*) clEnqueueMapBuffer(queue, sinThetaBuffer, CL_TRUE, CL_MAP_WRITE, 0, cossin_theta_size*sizeof(float), 0, NULL, NULL, &clStatus); + sinThetaPtr = (float*) clEnv->library->clEnqueueMapBuffer(queue, sinThetaBuffer, CL_TRUE, CL_MAP_WRITE, 0, cossin_theta_size*sizeof(float), 0, NULL, NULL, &clStatus); if (clStatus != CL_SUCCESS) { (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnqueuemapBuffer failed.","."); goto cleanup; } - cosThetaPtr = (float*) clEnqueueMapBuffer(queue, cosThetaBuffer, CL_TRUE, CL_MAP_WRITE, 0, cossin_theta_size*sizeof(float), 0, NULL, NULL, &clStatus); + cosThetaPtr = (float*) clEnv->library->clEnqueueMapBuffer(queue, cosThetaBuffer, CL_TRUE, CL_MAP_WRITE, 0, cossin_theta_size*sizeof(float), 0, NULL, NULL, &clStatus); if (clStatus != CL_SUCCESS) { (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnqueuemapBuffer failed.","."); @@ -1544,11 +1573,11 @@ static Image* ComputeRadialBlurImage(const Image *inputImage, const ChannelType sinThetaPtr[i]=(float)sin((double) (theta*i-offset)); } - clStatus = clEnqueueUnmapMemObject(queue, sinThetaBuffer, sinThetaPtr, 0, NULL, NULL); - clStatus |= clEnqueueUnmapMemObject(queue, cosThetaBuffer, cosThetaPtr, 0, NULL, NULL); + clStatus = clEnv->library->clEnqueueUnmapMemObject(queue, sinThetaBuffer, sinThetaPtr, 0, NULL, NULL); + clStatus |= clEnv->library->clEnqueueUnmapMemObject(queue, cosThetaBuffer, cosThetaPtr, 0, NULL, NULL); if (clStatus != CL_SUCCESS) { - (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnqueueUnmapMemObject failed.", "'%s'", "."); + (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueUnmapMemObject failed.", "'%s'", "."); goto cleanup; } @@ -1563,28 +1592,28 @@ static Image* ComputeRadialBlurImage(const Image *inputImage, const ChannelType /* set the kernel arguments */ i = 0; - clStatus=clSetKernelArg(radialBlurKernel,i++,sizeof(cl_mem),(void *)&inputImageBuffer); - clStatus|=clSetKernelArg(radialBlurKernel,i++,sizeof(cl_mem),(void *)&filteredImageBuffer); + clStatus=clEnv->library->clSetKernelArg(radialBlurKernel,i++,sizeof(cl_mem),(void *)&inputImageBuffer); + clStatus|=clEnv->library->clSetKernelArg(radialBlurKernel,i++,sizeof(cl_mem),(void *)&filteredImageBuffer); - GetPixelInfo(inputImage,&bias); + GetMagickPixelPacket(inputImage,&bias); biasPixel.s[0] = bias.red; biasPixel.s[1] = bias.green; biasPixel.s[2] = bias.blue; - biasPixel.s[3] = bias.alpha; - clStatus|=clSetKernelArg(radialBlurKernel,i++,sizeof(cl_float4), &biasPixel); - clStatus|=clSetKernelArg(radialBlurKernel,i++,sizeof(ChannelType), &channel); + biasPixel.s[3] = bias.opacity; + clStatus|=clEnv->library->clSetKernelArg(radialBlurKernel,i++,sizeof(cl_float4), &biasPixel); + clStatus|=clEnv->library->clSetKernelArg(radialBlurKernel,i++,sizeof(ChannelType), &channel); - matte = (inputImage->alpha_trait == BlendPixelTrait)?1:0; - clStatus|=clSetKernelArg(radialBlurKernel,i++,sizeof(unsigned int), &matte); + matte = (inputImage->matte != MagickFalse)?1:0; + clStatus|=clEnv->library->clSetKernelArg(radialBlurKernel,i++,sizeof(unsigned int), &matte); - clStatus=clSetKernelArg(radialBlurKernel,i++,sizeof(cl_float2), &blurCenter); + clStatus=clEnv->library->clSetKernelArg(radialBlurKernel,i++,sizeof(cl_float2), &blurCenter); - clStatus|=clSetKernelArg(radialBlurKernel,i++,sizeof(cl_mem),(void *)&cosThetaBuffer); - clStatus|=clSetKernelArg(radialBlurKernel,i++,sizeof(cl_mem),(void *)&sinThetaBuffer); - clStatus|=clSetKernelArg(radialBlurKernel,i++,sizeof(unsigned int), &cossin_theta_size); + clStatus|=clEnv->library->clSetKernelArg(radialBlurKernel,i++,sizeof(cl_mem),(void *)&cosThetaBuffer); + clStatus|=clEnv->library->clSetKernelArg(radialBlurKernel,i++,sizeof(cl_mem),(void *)&sinThetaBuffer); + clStatus|=clEnv->library->clSetKernelArg(radialBlurKernel,i++,sizeof(unsigned int), &cossin_theta_size); if (clStatus != CL_SUCCESS) { - (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clSetKernelArg failed.", "'%s'", "."); + (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", "'%s'", "."); goto cleanup; } @@ -1592,23 +1621,23 @@ static Image* ComputeRadialBlurImage(const Image *inputImage, const ChannelType global_work_size[0] = inputImage->columns; global_work_size[1] = inputImage->rows; /* launch the kernel */ - clStatus = clEnqueueNDRangeKernel(queue, radialBlurKernel, 2, NULL, global_work_size, NULL, 0, NULL, NULL); + clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, radialBlurKernel, 2, NULL, global_work_size, NULL, 0, NULL, NULL); if (clStatus != CL_SUCCESS) { - (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnqueueNDRangeKernel failed.", "'%s'", "."); + (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", "'%s'", "."); goto cleanup; } - clFlush(queue); + clEnv->library->clFlush(queue); if (ALIGNED(filteredPixels,CLPixelPacket)) { length = inputImage->columns * inputImage->rows; - clEnqueueMapBuffer(queue, filteredImageBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(CLPixelPacket), 0, NULL, NULL, &clStatus); + clEnv->library->clEnqueueMapBuffer(queue, filteredImageBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(CLPixelPacket), 0, NULL, NULL, &clStatus); } else { length = inputImage->columns * inputImage->rows; - clStatus = clEnqueueReadBuffer(queue, filteredImageBuffer, CL_TRUE, 0, length * sizeof(CLPixelPacket), filteredPixels, 0, NULL, NULL); + clStatus = clEnv->library->clEnqueueReadBuffer(queue, filteredImageBuffer, CL_TRUE, 0, length * sizeof(CLPixelPacket), filteredPixels, 0, NULL, NULL); } if (clStatus != CL_SUCCESS) { @@ -1620,10 +1649,10 @@ static Image* ComputeRadialBlurImage(const Image *inputImage, const ChannelType cleanup: OpenCLLogException(__FUNCTION__,__LINE__,exception); - if (filteredImageBuffer!=NULL) clReleaseMemObject(filteredImageBuffer); - if (inputImageBuffer!=NULL) clReleaseMemObject(inputImageBuffer); - if (sinThetaBuffer!=NULL) clReleaseMemObject(sinThetaBuffer); - if (cosThetaBuffer!=NULL) clReleaseMemObject(cosThetaBuffer); + if (filteredImageBuffer!=NULL) clEnv->library->clReleaseMemObject(filteredImageBuffer); + if (inputImageBuffer!=NULL) clEnv->library->clReleaseMemObject(inputImageBuffer); + if (sinThetaBuffer!=NULL) clEnv->library->clReleaseMemObject(sinThetaBuffer); + if (cosThetaBuffer!=NULL) clEnv->library->clReleaseMemObject(cosThetaBuffer); if (radialBlurKernel!=NULL) RelinquishOpenCLKernel(clEnv, radialBlurKernel); if (queue != NULL) RelinquishOpenCLCommandQueue(clEnv, queue); if (outputReady == MagickFalse) @@ -1757,10 +1786,10 @@ static Image* ComputeUnsharpMaskImage(const Image *inputImage, const ChannelType } /* create a CL buffer from image pixel buffer */ length = inputImage->columns * inputImage->rows; - inputImageBuffer = clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus); + inputImageBuffer = clEnv->library->clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus); if (clStatus != CL_SUCCESS) { - (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.","."); + (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.","."); goto cleanup; } } @@ -1769,7 +1798,7 @@ static Image* ComputeUnsharpMaskImage(const Image *inputImage, const ChannelType { filteredImage = CloneImage(inputImage,inputImage->columns,inputImage->rows,MagickTrue,exception); assert(filteredImage != NULL); - if (SetImageStorageClass(filteredImage,DirectClass,exception) != MagickTrue) + if (SetImageStorageClass(filteredImage,DirectClass) != MagickTrue) { (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "CloneImage failed.", "'%s'", "."); goto cleanup; @@ -1794,10 +1823,10 @@ static Image* ComputeUnsharpMaskImage(const Image *inputImage, const ChannelType /* create a CL buffer from image pixel buffer */ length = inputImage->columns * inputImage->rows; - filteredImageBuffer = clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), hostPtr, &clStatus); + filteredImageBuffer = clEnv->library->clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), hostPtr, &clStatus); if (clStatus != CL_SUCCESS) { - (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.","."); + (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.","."); goto cleanup; } } @@ -1812,28 +1841,28 @@ static Image* ComputeUnsharpMaskImage(const Image *inputImage, const ChannelType goto cleanup; } - imageKernelBuffer = clCreateBuffer(context, CL_MEM_READ_ONLY, kernel->width * sizeof(float), NULL, &clStatus); + imageKernelBuffer = clEnv->library->clCreateBuffer(context, CL_MEM_READ_ONLY, kernel->width * sizeof(float), NULL, &clStatus); if (clStatus != CL_SUCCESS) { - (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.","."); + (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.","."); goto cleanup; } - kernelBufferPtr = (float*)clEnqueueMapBuffer(queue, imageKernelBuffer, CL_TRUE, CL_MAP_WRITE, 0, kernel->width * sizeof(float), 0, NULL, NULL, &clStatus); + kernelBufferPtr = (float*)clEnv->library->clEnqueueMapBuffer(queue, imageKernelBuffer, CL_TRUE, CL_MAP_WRITE, 0, kernel->width * sizeof(float), 0, NULL, NULL, &clStatus); if (clStatus != CL_SUCCESS) { - (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnqueueMapBuffer failed.","."); + (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueMapBuffer failed.","."); goto cleanup; } for (i = 0; i < kernel->width; i++) { kernelBufferPtr[i] = (float) kernel->values[i]; } - clStatus = clEnqueueUnmapMemObject(queue, imageKernelBuffer, kernelBufferPtr, 0, NULL, NULL); + clStatus = clEnv->library->clEnqueueUnmapMemObject(queue, imageKernelBuffer, kernelBufferPtr, 0, NULL, NULL); if (clStatus != CL_SUCCESS) { - (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnqueueUnmapMemObject failed.", "'%s'", "."); + (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueUnmapMemObject failed.", "'%s'", "."); goto cleanup; } } @@ -1842,10 +1871,10 @@ static Image* ComputeUnsharpMaskImage(const Image *inputImage, const ChannelType /* create temp buffer */ { length = inputImage->columns * inputImage->rows; - tempImageBuffer = clCreateBuffer(context, CL_MEM_READ_WRITE, length * 4 * sizeof(float), NULL, &clStatus); + tempImageBuffer = clEnv->library->clCreateBuffer(context, CL_MEM_READ_WRITE, length * 4 * sizeof(float), NULL, &clStatus); if (clStatus != CL_SUCCESS) { - (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.","."); + (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.","."); goto cleanup; } } @@ -1877,17 +1906,17 @@ static Image* ComputeUnsharpMaskImage(const Image *inputImage, const ChannelType /* set the kernel arguments */ i = 0; - clStatus=clSetKernelArg(blurRowKernel,i++,sizeof(cl_mem),(void *)&inputImageBuffer); - clStatus|=clSetKernelArg(blurRowKernel,i++,sizeof(cl_mem),(void *)&tempImageBuffer); - clStatus|=clSetKernelArg(blurRowKernel,i++,sizeof(ChannelType),&channel); - clStatus|=clSetKernelArg(blurRowKernel,i++,sizeof(cl_mem),(void *)&imageKernelBuffer); - clStatus|=clSetKernelArg(blurRowKernel,i++,sizeof(unsigned int),(void *)&kernelWidth); - clStatus|=clSetKernelArg(blurRowKernel,i++,sizeof(unsigned int),(void *)&imageColumns); - clStatus|=clSetKernelArg(blurRowKernel,i++,sizeof(unsigned int),(void *)&imageRows); - clStatus|=clSetKernelArg(blurRowKernel,i++,sizeof(CLPixelPacket)*(chunkSize+kernel->width),(void *)NULL); + clStatus=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(cl_mem),(void *)&inputImageBuffer); + clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(cl_mem),(void *)&tempImageBuffer); + clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(ChannelType),&channel); + clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(cl_mem),(void *)&imageKernelBuffer); + clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(unsigned int),(void *)&kernelWidth); + clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(unsigned int),(void *)&imageColumns); + clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(unsigned int),(void *)&imageRows); + clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(CLPixelPacket)*(chunkSize+kernel->width),(void *)NULL); if (clStatus != CL_SUCCESS) { - (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clSetKernelArg failed.", "'%s'", "."); + (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", "'%s'", "."); goto cleanup; } } @@ -1902,13 +1931,13 @@ static Image* ComputeUnsharpMaskImage(const Image *inputImage, const ChannelType wsize[0] = chunkSize; wsize[1] = 1; - clStatus = clEnqueueNDRangeKernel(queue, blurRowKernel, 2, NULL, gsize, wsize, 0, NULL, NULL); + clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, blurRowKernel, 2, NULL, gsize, wsize, 0, NULL, NULL); if (clStatus != CL_SUCCESS) { - (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnqueueNDRangeKernel failed.", "'%s'", "."); + (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", "'%s'", "."); goto cleanup; } - clFlush(queue); + clEnv->library->clFlush(queue); } @@ -1921,22 +1950,22 @@ static Image* ComputeUnsharpMaskImage(const Image *inputImage, const ChannelType fThreshold = (float)threshold; i = 0; - clStatus=clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(cl_mem),(void *)&inputImageBuffer); - clStatus|=clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(cl_mem),(void *)&tempImageBuffer); - clStatus|=clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(cl_mem),(void *)&filteredImageBuffer); - clStatus|=clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(unsigned int),(void *)&imageColumns); - clStatus|=clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(unsigned int),(void *)&imageRows); - clStatus|=clSetKernelArg(unsharpMaskBlurColumnKernel,i++, (chunkSize+kernelWidth-1)*sizeof(cl_float4),NULL); - clStatus|=clSetKernelArg(unsharpMaskBlurColumnKernel,i++, kernelWidth*sizeof(float),NULL); - clStatus|=clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(ChannelType),&channel); - clStatus|=clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(cl_mem),(void *)&imageKernelBuffer); - clStatus|=clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(unsigned int),(void *)&kernelWidth); - clStatus|=clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(float),(void *)&fGain); - clStatus|=clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(float),(void *)&fThreshold); + clStatus=clEnv->library->clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(cl_mem),(void *)&inputImageBuffer); + clStatus|=clEnv->library->clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(cl_mem),(void *)&tempImageBuffer); + clStatus|=clEnv->library->clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(cl_mem),(void *)&filteredImageBuffer); + clStatus|=clEnv->library->clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(unsigned int),(void *)&imageColumns); + clStatus|=clEnv->library->clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(unsigned int),(void *)&imageRows); + clStatus|=clEnv->library->clSetKernelArg(unsharpMaskBlurColumnKernel,i++, (chunkSize+kernelWidth-1)*sizeof(cl_float4),NULL); + clStatus|=clEnv->library->clSetKernelArg(unsharpMaskBlurColumnKernel,i++, kernelWidth*sizeof(float),NULL); + clStatus|=clEnv->library->clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(ChannelType),&channel); + clStatus|=clEnv->library->clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(cl_mem),(void *)&imageKernelBuffer); + clStatus|=clEnv->library->clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(unsigned int),(void *)&kernelWidth); + clStatus|=clEnv->library->clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(float),(void *)&fGain); + clStatus|=clEnv->library->clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(float),(void *)&fThreshold); if (clStatus != CL_SUCCESS) { - (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clSetKernelArg failed.", "'%s'", "."); + (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", "'%s'", "."); goto cleanup; } } @@ -1951,13 +1980,13 @@ static Image* ComputeUnsharpMaskImage(const Image *inputImage, const ChannelType wsize[0] = 1; wsize[1] = chunkSize; - clStatus = clEnqueueNDRangeKernel(queue, unsharpMaskBlurColumnKernel, 2, NULL, gsize, wsize, 0, NULL, NULL); + clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, unsharpMaskBlurColumnKernel, 2, NULL, gsize, wsize, 0, NULL, NULL); if (clStatus != CL_SUCCESS) { - (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnqueueNDRangeKernel failed.", "'%s'", "."); + (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", "'%s'", "."); goto cleanup; } - clFlush(queue); + clEnv->library->clFlush(queue); } } @@ -1966,12 +1995,12 @@ static Image* ComputeUnsharpMaskImage(const Image *inputImage, const ChannelType if (ALIGNED(filteredPixels,CLPixelPacket)) { length = inputImage->columns * inputImage->rows; - clEnqueueMapBuffer(queue, filteredImageBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(CLPixelPacket), 0, NULL, NULL, &clStatus); + clEnv->library->clEnqueueMapBuffer(queue, filteredImageBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(CLPixelPacket), 0, NULL, NULL, &clStatus); } else { length = inputImage->columns * inputImage->rows; - clStatus = clEnqueueReadBuffer(queue, filteredImageBuffer, CL_TRUE, 0, length * sizeof(CLPixelPacket), filteredPixels, 0, NULL, NULL); + clStatus = clEnv->library->clEnqueueReadBuffer(queue, filteredImageBuffer, CL_TRUE, 0, length * sizeof(CLPixelPacket), filteredPixels, 0, NULL, NULL); } if (clStatus != CL_SUCCESS) { @@ -1985,10 +2014,10 @@ cleanup: OpenCLLogException(__FUNCTION__,__LINE__,exception); if (kernel != NULL) kernel=DestroyKernelInfo(kernel); - if (inputImageBuffer!=NULL) clReleaseMemObject(inputImageBuffer); - if (filteredImageBuffer!=NULL) clReleaseMemObject(filteredImageBuffer); - if (tempImageBuffer!=NULL) clReleaseMemObject(tempImageBuffer); - if (imageKernelBuffer!=NULL) clReleaseMemObject(imageKernelBuffer); + if (inputImageBuffer!=NULL) clEnv->library->clReleaseMemObject(inputImageBuffer); + if (filteredImageBuffer!=NULL) clEnv->library->clReleaseMemObject(filteredImageBuffer); + if (tempImageBuffer!=NULL) clEnv->library->clReleaseMemObject(tempImageBuffer); + if (imageKernelBuffer!=NULL) clEnv->library->clReleaseMemObject(imageKernelBuffer); if (blurRowKernel!=NULL) RelinquishOpenCLKernel(clEnv, blurRowKernel); if (unsharpMaskBlurColumnKernel!=NULL) RelinquishOpenCLKernel(clEnv, unsharpMaskBlurColumnKernel); if (queue != NULL) RelinquishOpenCLCommandQueue(clEnv, queue); @@ -2066,10 +2095,10 @@ static Image* ComputeUnsharpMaskImageSection(const Image *inputImage, const Chan } /* create a CL buffer from image pixel buffer */ length = inputImage->columns * inputImage->rows; - inputImageBuffer = clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus); + inputImageBuffer = clEnv->library->clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus); if (clStatus != CL_SUCCESS) { - (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.","."); + (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.","."); goto cleanup; } } @@ -2078,7 +2107,7 @@ static Image* ComputeUnsharpMaskImageSection(const Image *inputImage, const Chan { filteredImage = CloneImage(inputImage,inputImage->columns,inputImage->rows,MagickTrue,exception); assert(filteredImage != NULL); - if (SetImageStorageClass(filteredImage,DirectClass,exception) != MagickTrue) + if (SetImageStorageClass(filteredImage,DirectClass) != MagickTrue) { (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "CloneImage failed.", "'%s'", "."); goto cleanup; @@ -2103,10 +2132,10 @@ static Image* ComputeUnsharpMaskImageSection(const Image *inputImage, const Chan /* create a CL buffer from image pixel buffer */ length = inputImage->columns * inputImage->rows; - filteredImageBuffer = clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), hostPtr, &clStatus); + filteredImageBuffer = clEnv->library->clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), hostPtr, &clStatus); if (clStatus != CL_SUCCESS) { - (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.","."); + (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.","."); goto cleanup; } } @@ -2121,28 +2150,28 @@ static Image* ComputeUnsharpMaskImageSection(const Image *inputImage, const Chan goto cleanup; } - imageKernelBuffer = clCreateBuffer(context, CL_MEM_READ_ONLY, kernel->width * sizeof(float), NULL, &clStatus); + imageKernelBuffer = clEnv->library->clCreateBuffer(context, CL_MEM_READ_ONLY, kernel->width * sizeof(float), NULL, &clStatus); if (clStatus != CL_SUCCESS) { - (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.","."); + (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.","."); goto cleanup; } - kernelBufferPtr = (float*)clEnqueueMapBuffer(queue, imageKernelBuffer, CL_TRUE, CL_MAP_WRITE, 0, kernel->width * sizeof(float), 0, NULL, NULL, &clStatus); + kernelBufferPtr = (float*)clEnv->library->clEnqueueMapBuffer(queue, imageKernelBuffer, CL_TRUE, CL_MAP_WRITE, 0, kernel->width * sizeof(float), 0, NULL, NULL, &clStatus); if (clStatus != CL_SUCCESS) { - (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnqueueMapBuffer failed.","."); + (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueMapBuffer failed.","."); goto cleanup; } for (i = 0; i < kernel->width; i++) { kernelBufferPtr[i] = (float) kernel->values[i]; } - clStatus = clEnqueueUnmapMemObject(queue, imageKernelBuffer, kernelBufferPtr, 0, NULL, NULL); + clStatus = clEnv->library->clEnqueueUnmapMemObject(queue, imageKernelBuffer, kernelBufferPtr, 0, NULL, NULL); if (clStatus != CL_SUCCESS) { - (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnqueueUnmapMemObject failed.", "'%s'", "."); + (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueUnmapMemObject failed.", "'%s'", "."); goto cleanup; } } @@ -2154,10 +2183,10 @@ static Image* ComputeUnsharpMaskImageSection(const Image *inputImage, const Chan /* create temp buffer */ { length = inputImage->columns * (inputImage->rows / 2 + 1 + (kernel->width-1) / 2); - tempImageBuffer = clCreateBuffer(context, CL_MEM_READ_WRITE, length * 4 * sizeof(float), NULL, &clStatus); + tempImageBuffer = clEnv->library->clCreateBuffer(context, CL_MEM_READ_WRITE, length * 4 * sizeof(float), NULL, &clStatus); if (clStatus != CL_SUCCESS) { - (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.","."); + (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.","."); goto cleanup; } } @@ -2196,19 +2225,19 @@ static Image* ComputeUnsharpMaskImageSection(const Image *inputImage, const Chan /* set the kernel arguments */ i = 0; - clStatus=clSetKernelArg(blurRowKernel,i++,sizeof(cl_mem),(void *)&inputImageBuffer); - clStatus|=clSetKernelArg(blurRowKernel,i++,sizeof(cl_mem),(void *)&tempImageBuffer); - clStatus|=clSetKernelArg(blurRowKernel,i++,sizeof(ChannelType),&channel); - clStatus|=clSetKernelArg(blurRowKernel,i++,sizeof(cl_mem),(void *)&imageKernelBuffer); - clStatus|=clSetKernelArg(blurRowKernel,i++,sizeof(unsigned int),(void *)&kernelWidth); - clStatus|=clSetKernelArg(blurRowKernel,i++,sizeof(unsigned int),(void *)&imageColumns); - clStatus|=clSetKernelArg(blurRowKernel,i++,sizeof(unsigned int),(void *)&imageRows); - clStatus|=clSetKernelArg(blurRowKernel,i++,sizeof(CLPixelPacket)*(chunkSize+kernel->width),(void *)NULL); - clStatus|=clSetKernelArg(blurRowKernel,i++,sizeof(unsigned int),(void *)&offsetRows); - clStatus|=clSetKernelArg(blurRowKernel,i++,sizeof(unsigned int),(void *)&sec); + clStatus=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(cl_mem),(void *)&inputImageBuffer); + clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(cl_mem),(void *)&tempImageBuffer); + clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(ChannelType),&channel); + clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(cl_mem),(void *)&imageKernelBuffer); + clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(unsigned int),(void *)&kernelWidth); + clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(unsigned int),(void *)&imageColumns); + clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(unsigned int),(void *)&imageRows); + clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(CLPixelPacket)*(chunkSize+kernel->width),(void *)NULL); + clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(unsigned int),(void *)&offsetRows); + clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(unsigned int),(void *)&sec); if (clStatus != CL_SUCCESS) { - (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clSetKernelArg failed.", "'%s'", "."); + (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", "'%s'", "."); goto cleanup; } } @@ -2222,13 +2251,13 @@ static Image* ComputeUnsharpMaskImageSection(const Image *inputImage, const Chan wsize[0] = chunkSize; wsize[1] = 1; - clStatus = clEnqueueNDRangeKernel(queue, blurRowKernel, 2, NULL, gsize, wsize, 0, NULL, NULL); + clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, blurRowKernel, 2, NULL, gsize, wsize, 0, NULL, NULL); if (clStatus != CL_SUCCESS) { - (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnqueueNDRangeKernel failed.", "'%s'", "."); + (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", "'%s'", "."); goto cleanup; } - clFlush(queue); + clEnv->library->clFlush(queue); } @@ -2249,24 +2278,24 @@ static Image* ComputeUnsharpMaskImageSection(const Image *inputImage, const Chan fThreshold = (float)threshold; i = 0; - clStatus=clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(cl_mem),(void *)&inputImageBuffer); - clStatus|=clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(cl_mem),(void *)&tempImageBuffer); - clStatus|=clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(cl_mem),(void *)&filteredImageBuffer); - clStatus|=clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(unsigned int),(void *)&imageColumns); - clStatus|=clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(unsigned int),(void *)&imageRows); - clStatus|=clSetKernelArg(unsharpMaskBlurColumnKernel,i++, (chunkSize+kernelWidth-1)*sizeof(cl_float4),NULL); - clStatus|=clSetKernelArg(unsharpMaskBlurColumnKernel,i++, kernelWidth*sizeof(float),NULL); - clStatus|=clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(ChannelType),&channel); - clStatus|=clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(cl_mem),(void *)&imageKernelBuffer); - clStatus|=clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(unsigned int),(void *)&kernelWidth); - clStatus|=clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(float),(void *)&fGain); - clStatus|=clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(float),(void *)&fThreshold); - clStatus|=clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(unsigned int),(void *)&offsetRows); - clStatus|=clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(unsigned int),(void *)&sec); + clStatus=clEnv->library->clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(cl_mem),(void *)&inputImageBuffer); + clStatus|=clEnv->library->clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(cl_mem),(void *)&tempImageBuffer); + clStatus|=clEnv->library->clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(cl_mem),(void *)&filteredImageBuffer); + clStatus|=clEnv->library->clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(unsigned int),(void *)&imageColumns); + clStatus|=clEnv->library->clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(unsigned int),(void *)&imageRows); + clStatus|=clEnv->library->clSetKernelArg(unsharpMaskBlurColumnKernel,i++, (chunkSize+kernelWidth-1)*sizeof(cl_float4),NULL); + clStatus|=clEnv->library->clSetKernelArg(unsharpMaskBlurColumnKernel,i++, kernelWidth*sizeof(float),NULL); + clStatus|=clEnv->library->clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(ChannelType),&channel); + clStatus|=clEnv->library->clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(cl_mem),(void *)&imageKernelBuffer); + clStatus|=clEnv->library->clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(unsigned int),(void *)&kernelWidth); + clStatus|=clEnv->library->clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(float),(void *)&fGain); + clStatus|=clEnv->library->clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(float),(void *)&fThreshold); + clStatus|=clEnv->library->clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(unsigned int),(void *)&offsetRows); + clStatus|=clEnv->library->clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(unsigned int),(void *)&sec); if (clStatus != CL_SUCCESS) { - (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clSetKernelArg failed.", "'%s'", "."); + (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", "'%s'", "."); goto cleanup; } } @@ -2281,13 +2310,13 @@ static Image* ComputeUnsharpMaskImageSection(const Image *inputImage, const Chan wsize[0] = 1; wsize[1] = chunkSize; - clStatus = clEnqueueNDRangeKernel(queue, unsharpMaskBlurColumnKernel, 2, NULL, gsize, wsize, 0, NULL, NULL); + clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, unsharpMaskBlurColumnKernel, 2, NULL, gsize, wsize, 0, NULL, NULL); if (clStatus != CL_SUCCESS) { - (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnqueueNDRangeKernel failed.", "'%s'", "."); + (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", "'%s'", "."); goto cleanup; } - clFlush(queue); + clEnv->library->clFlush(queue); } } } @@ -2296,12 +2325,12 @@ static Image* ComputeUnsharpMaskImageSection(const Image *inputImage, const Chan if (ALIGNED(filteredPixels,CLPixelPacket)) { length = inputImage->columns * inputImage->rows; - clEnqueueMapBuffer(queue, filteredImageBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(CLPixelPacket), 0, NULL, NULL, &clStatus); + clEnv->library->clEnqueueMapBuffer(queue, filteredImageBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(CLPixelPacket), 0, NULL, NULL, &clStatus); } else { length = inputImage->columns * inputImage->rows; - clStatus = clEnqueueReadBuffer(queue, filteredImageBuffer, CL_TRUE, 0, length * sizeof(CLPixelPacket), filteredPixels, 0, NULL, NULL); + clStatus = clEnv->library->clEnqueueReadBuffer(queue, filteredImageBuffer, CL_TRUE, 0, length * sizeof(CLPixelPacket), filteredPixels, 0, NULL, NULL); } if (clStatus != CL_SUCCESS) { @@ -2315,10 +2344,10 @@ cleanup: OpenCLLogException(__FUNCTION__,__LINE__,exception); if (kernel != NULL) kernel=DestroyKernelInfo(kernel); - if (inputImageBuffer!=NULL) clReleaseMemObject(inputImageBuffer); - if (filteredImageBuffer!=NULL) clReleaseMemObject(filteredImageBuffer); - if (tempImageBuffer!=NULL) clReleaseMemObject(tempImageBuffer); - if (imageKernelBuffer!=NULL) clReleaseMemObject(imageKernelBuffer); + if (inputImageBuffer!=NULL) clEnv->library->clReleaseMemObject(inputImageBuffer); + if (filteredImageBuffer!=NULL) clEnv->library->clReleaseMemObject(filteredImageBuffer); + if (tempImageBuffer!=NULL) clEnv->library->clReleaseMemObject(tempImageBuffer); + if (imageKernelBuffer!=NULL) clEnv->library->clReleaseMemObject(imageKernelBuffer); if (blurRowKernel!=NULL) RelinquishOpenCLKernel(clEnv, blurRowKernel); if (unsharpMaskBlurColumnKernel!=NULL) RelinquishOpenCLKernel(clEnv, unsharpMaskBlurColumnKernel); if (queue != NULL) RelinquishOpenCLCommandQueue(clEnv, queue); @@ -2524,46 +2553,46 @@ RestoreMSCWarning } i = 0; - clStatus = clSetKernelArg(horizontalKernel, i++, sizeof(cl_mem), (void*)&inputImage); - clStatus |= clSetKernelArg(horizontalKernel, i++, sizeof(unsigned int), (void*)&inputImageColumns); - clStatus |= clSetKernelArg(horizontalKernel, i++, sizeof(unsigned int), (void*)&inputImageRows); - clStatus |= clSetKernelArg(horizontalKernel, i++, sizeof(unsigned int), (void*)&matte); - clStatus |= clSetKernelArg(horizontalKernel, i++, sizeof(float), (void*)&xFactor); - clStatus |= clSetKernelArg(horizontalKernel, i++, sizeof(cl_mem), (void*)&resizedImage); + clStatus = clEnv->library->clSetKernelArg(horizontalKernel, i++, sizeof(cl_mem), (void*)&inputImage); + clStatus |= clEnv->library->clSetKernelArg(horizontalKernel, i++, sizeof(unsigned int), (void*)&inputImageColumns); + clStatus |= clEnv->library->clSetKernelArg(horizontalKernel, i++, sizeof(unsigned int), (void*)&inputImageRows); + clStatus |= clEnv->library->clSetKernelArg(horizontalKernel, i++, sizeof(unsigned int), (void*)&matte); + clStatus |= clEnv->library->clSetKernelArg(horizontalKernel, i++, sizeof(float), (void*)&xFactor); + clStatus |= clEnv->library->clSetKernelArg(horizontalKernel, i++, sizeof(cl_mem), (void*)&resizedImage); - clStatus |= clSetKernelArg(horizontalKernel, i++, sizeof(unsigned int), (void*)&resizedColumns); - clStatus |= clSetKernelArg(horizontalKernel, i++, sizeof(unsigned int), (void*)&resizedRows); + clStatus |= clEnv->library->clSetKernelArg(horizontalKernel, i++, sizeof(unsigned int), (void*)&resizedColumns); + clStatus |= clEnv->library->clSetKernelArg(horizontalKernel, i++, sizeof(unsigned int), (void*)&resizedRows); - clStatus |= clSetKernelArg(horizontalKernel, i++, sizeof(int), (void*)&resizeFilterType); - clStatus |= clSetKernelArg(horizontalKernel, i++, sizeof(int), (void*)&resizeWindowType); - clStatus |= clSetKernelArg(horizontalKernel, i++, sizeof(cl_mem), (void*)&resizeFilterCubicCoefficients); + clStatus |= clEnv->library->clSetKernelArg(horizontalKernel, i++, sizeof(int), (void*)&resizeFilterType); + clStatus |= clEnv->library->clSetKernelArg(horizontalKernel, i++, sizeof(int), (void*)&resizeWindowType); + clStatus |= clEnv->library->clSetKernelArg(horizontalKernel, i++, sizeof(cl_mem), (void*)&resizeFilterCubicCoefficients); resizeFilterScale = (float) GetResizeFilterScale(resizeFilter); - clStatus |= clSetKernelArg(horizontalKernel, i++, sizeof(float), (void*)&resizeFilterScale); + clStatus |= clEnv->library->clSetKernelArg(horizontalKernel, i++, sizeof(float), (void*)&resizeFilterScale); resizeFilterSupport = (float) GetResizeFilterSupport(resizeFilter); - clStatus |= clSetKernelArg(horizontalKernel, i++, sizeof(float), (void*)&resizeFilterSupport); + clStatus |= clEnv->library->clSetKernelArg(horizontalKernel, i++, sizeof(float), (void*)&resizeFilterSupport); resizeFilterWindowSupport = (float) GetResizeFilterWindowSupport(resizeFilter); - clStatus |= clSetKernelArg(horizontalKernel, i++, sizeof(float), (void*)&resizeFilterWindowSupport); + clStatus |= clEnv->library->clSetKernelArg(horizontalKernel, i++, sizeof(float), (void*)&resizeFilterWindowSupport); resizeFilterBlur = (float) GetResizeFilterBlur(resizeFilter); - clStatus |= clSetKernelArg(horizontalKernel, i++, sizeof(float), (void*)&resizeFilterBlur); + clStatus |= clEnv->library->clSetKernelArg(horizontalKernel, i++, sizeof(float), (void*)&resizeFilterBlur); - clStatus |= clSetKernelArg(horizontalKernel, i++, imageCacheLocalMemorySize, NULL); - clStatus |= clSetKernelArg(horizontalKernel, i++, sizeof(int), &numCachedPixels); - clStatus |= clSetKernelArg(horizontalKernel, i++, sizeof(unsigned int), &pixelPerWorkgroup); - clStatus |= clSetKernelArg(horizontalKernel, i++, sizeof(unsigned int), &chunkSize); + clStatus |= clEnv->library->clSetKernelArg(horizontalKernel, i++, imageCacheLocalMemorySize, NULL); + clStatus |= clEnv->library->clSetKernelArg(horizontalKernel, i++, sizeof(int), &numCachedPixels); + clStatus |= clEnv->library->clSetKernelArg(horizontalKernel, i++, sizeof(unsigned int), &pixelPerWorkgroup); + clStatus |= clEnv->library->clSetKernelArg(horizontalKernel, i++, sizeof(unsigned int), &chunkSize); - clStatus |= clSetKernelArg(horizontalKernel, i++, pixelAccumulatorLocalMemorySize, NULL); - clStatus |= clSetKernelArg(horizontalKernel, i++, weightAccumulatorLocalMemorySize, NULL); - clStatus |= clSetKernelArg(horizontalKernel, i++, gammaAccumulatorLocalMemorySize, NULL); + clStatus |= clEnv->library->clSetKernelArg(horizontalKernel, i++, pixelAccumulatorLocalMemorySize, NULL); + clStatus |= clEnv->library->clSetKernelArg(horizontalKernel, i++, weightAccumulatorLocalMemorySize, NULL); + clStatus |= clEnv->library->clSetKernelArg(horizontalKernel, i++, gammaAccumulatorLocalMemorySize, NULL); if (clStatus != CL_SUCCESS) { - (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clSetKernelArg failed.", "'%s'", "."); + (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", "'%s'", "."); goto cleanup; } @@ -2572,13 +2601,13 @@ RestoreMSCWarning local_work_size[0] = workgroupSize; local_work_size[1] = 1; - clStatus = clEnqueueNDRangeKernel(queue, horizontalKernel, 2, NULL, global_work_size, local_work_size, 0, NULL, NULL); + clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, horizontalKernel, 2, NULL, global_work_size, local_work_size, 0, NULL, NULL); if (clStatus != CL_SUCCESS) { - (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnqueueNDRangeKernel failed.", "'%s'", "."); + (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", "'%s'", "."); goto cleanup; } - clFlush(queue); + clEnv->library->clFlush(queue); status = MagickTrue; @@ -2704,46 +2733,46 @@ RestoreMSCWarning } i = 0; - clStatus = clSetKernelArg(horizontalKernel, i++, sizeof(cl_mem), (void*)&inputImage); - clStatus |= clSetKernelArg(horizontalKernel, i++, sizeof(unsigned int), (void*)&inputImageColumns); - clStatus |= clSetKernelArg(horizontalKernel, i++, sizeof(unsigned int), (void*)&inputImageRows); - clStatus |= clSetKernelArg(horizontalKernel, i++, sizeof(unsigned int), (void*)&matte); - clStatus |= clSetKernelArg(horizontalKernel, i++, sizeof(float), (void*)&yFactor); - clStatus |= clSetKernelArg(horizontalKernel, i++, sizeof(cl_mem), (void*)&resizedImage); + clStatus = clEnv->library->clSetKernelArg(horizontalKernel, i++, sizeof(cl_mem), (void*)&inputImage); + clStatus |= clEnv->library->clSetKernelArg(horizontalKernel, i++, sizeof(unsigned int), (void*)&inputImageColumns); + clStatus |= clEnv->library->clSetKernelArg(horizontalKernel, i++, sizeof(unsigned int), (void*)&inputImageRows); + clStatus |= clEnv->library->clSetKernelArg(horizontalKernel, i++, sizeof(unsigned int), (void*)&matte); + clStatus |= clEnv->library->clSetKernelArg(horizontalKernel, i++, sizeof(float), (void*)&yFactor); + clStatus |= clEnv->library->clSetKernelArg(horizontalKernel, i++, sizeof(cl_mem), (void*)&resizedImage); - clStatus |= clSetKernelArg(horizontalKernel, i++, sizeof(unsigned int), (void*)&resizedColumns); - clStatus |= clSetKernelArg(horizontalKernel, i++, sizeof(unsigned int), (void*)&resizedRows); + clStatus |= clEnv->library->clSetKernelArg(horizontalKernel, i++, sizeof(unsigned int), (void*)&resizedColumns); + clStatus |= clEnv->library->clSetKernelArg(horizontalKernel, i++, sizeof(unsigned int), (void*)&resizedRows); - clStatus |= clSetKernelArg(horizontalKernel, i++, sizeof(int), (void*)&resizeFilterType); - clStatus |= clSetKernelArg(horizontalKernel, i++, sizeof(int), (void*)&resizeWindowType); - clStatus |= clSetKernelArg(horizontalKernel, i++, sizeof(cl_mem), (void*)&resizeFilterCubicCoefficients); + clStatus |= clEnv->library->clSetKernelArg(horizontalKernel, i++, sizeof(int), (void*)&resizeFilterType); + clStatus |= clEnv->library->clSetKernelArg(horizontalKernel, i++, sizeof(int), (void*)&resizeWindowType); + clStatus |= clEnv->library->clSetKernelArg(horizontalKernel, i++, sizeof(cl_mem), (void*)&resizeFilterCubicCoefficients); resizeFilterScale = (float) GetResizeFilterScale(resizeFilter); - clStatus |= clSetKernelArg(horizontalKernel, i++, sizeof(float), (void*)&resizeFilterScale); + clStatus |= clEnv->library->clSetKernelArg(horizontalKernel, i++, sizeof(float), (void*)&resizeFilterScale); resizeFilterSupport = (float) GetResizeFilterSupport(resizeFilter); - clStatus |= clSetKernelArg(horizontalKernel, i++, sizeof(float), (void*)&resizeFilterSupport); + clStatus |= clEnv->library->clSetKernelArg(horizontalKernel, i++, sizeof(float), (void*)&resizeFilterSupport); resizeFilterWindowSupport = (float) GetResizeFilterWindowSupport(resizeFilter); - clStatus |= clSetKernelArg(horizontalKernel, i++, sizeof(float), (void*)&resizeFilterWindowSupport); + clStatus |= clEnv->library->clSetKernelArg(horizontalKernel, i++, sizeof(float), (void*)&resizeFilterWindowSupport); resizeFilterBlur = (float) GetResizeFilterBlur(resizeFilter); - clStatus |= clSetKernelArg(horizontalKernel, i++, sizeof(float), (void*)&resizeFilterBlur); + clStatus |= clEnv->library->clSetKernelArg(horizontalKernel, i++, sizeof(float), (void*)&resizeFilterBlur); - clStatus |= clSetKernelArg(horizontalKernel, i++, imageCacheLocalMemorySize, NULL); - clStatus |= clSetKernelArg(horizontalKernel, i++, sizeof(int), &numCachedPixels); - clStatus |= clSetKernelArg(horizontalKernel, i++, sizeof(unsigned int), &pixelPerWorkgroup); - clStatus |= clSetKernelArg(horizontalKernel, i++, sizeof(unsigned int), &chunkSize); + clStatus |= clEnv->library->clSetKernelArg(horizontalKernel, i++, imageCacheLocalMemorySize, NULL); + clStatus |= clEnv->library->clSetKernelArg(horizontalKernel, i++, sizeof(int), &numCachedPixels); + clStatus |= clEnv->library->clSetKernelArg(horizontalKernel, i++, sizeof(unsigned int), &pixelPerWorkgroup); + clStatus |= clEnv->library->clSetKernelArg(horizontalKernel, i++, sizeof(unsigned int), &chunkSize); - clStatus |= clSetKernelArg(horizontalKernel, i++, pixelAccumulatorLocalMemorySize, NULL); - clStatus |= clSetKernelArg(horizontalKernel, i++, weightAccumulatorLocalMemorySize, NULL); - clStatus |= clSetKernelArg(horizontalKernel, i++, gammaAccumulatorLocalMemorySize, NULL); + clStatus |= clEnv->library->clSetKernelArg(horizontalKernel, i++, pixelAccumulatorLocalMemorySize, NULL); + clStatus |= clEnv->library->clSetKernelArg(horizontalKernel, i++, weightAccumulatorLocalMemorySize, NULL); + clStatus |= clEnv->library->clSetKernelArg(horizontalKernel, i++, gammaAccumulatorLocalMemorySize, NULL); if (clStatus != CL_SUCCESS) { - (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clSetKernelArg failed.", "'%s'", "."); + (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", "'%s'", "."); goto cleanup; } @@ -2752,13 +2781,13 @@ RestoreMSCWarning local_work_size[0] = 1; local_work_size[1] = workgroupSize; - clStatus = clEnqueueNDRangeKernel(queue, horizontalKernel, 2, NULL, global_work_size, local_work_size, 0, NULL, NULL); + clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, horizontalKernel, 2, NULL, global_work_size, local_work_size, 0, NULL, NULL); if (clStatus != CL_SUCCESS) { - (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnqueueNDRangeKernel failed.", "'%s'", "."); + (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", "'%s'", "."); goto cleanup; } - clFlush(queue); + clEnv->library->clFlush(queue); status = MagickTrue; @@ -2825,25 +2854,25 @@ static Image* ComputeResizeImage(const Image* inputImage, const size_t resizedCo } /* create a CL buffer from image pixel buffer */ length = inputImage->columns * inputImage->rows; - inputImageBuffer = clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus); + inputImageBuffer = clEnv->library->clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus); if (clStatus != CL_SUCCESS) { - (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.","."); + (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.","."); goto cleanup; } - cubicCoefficientsBuffer = clCreateBuffer(context, CL_MEM_READ_ONLY, 7 * sizeof(float), NULL, &clStatus); + cubicCoefficientsBuffer = clEnv->library->clCreateBuffer(context, CL_MEM_READ_ONLY, 7 * sizeof(float), NULL, &clStatus); if (clStatus != CL_SUCCESS) { - (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.","."); + (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.","."); goto cleanup; } queue = AcquireOpenCLCommandQueue(clEnv); - mappedCoefficientBuffer = (float*)clEnqueueMapBuffer(queue, cubicCoefficientsBuffer, CL_TRUE, CL_MAP_WRITE, 0, 7 * sizeof(float) + mappedCoefficientBuffer = (float*)clEnv->library->clEnqueueMapBuffer(queue, cubicCoefficientsBuffer, CL_TRUE, CL_MAP_WRITE, 0, 7 * sizeof(float) , 0, NULL, NULL, &clStatus); if (clStatus != CL_SUCCESS) { - (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnqueueMapBuffer failed.","."); + (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueMapBuffer failed.","."); goto cleanup; } resizeFilterCoefficient = GetResizeFilterCoefficient(resizeFilter); @@ -2851,10 +2880,10 @@ static Image* ComputeResizeImage(const Image* inputImage, const size_t resizedCo { mappedCoefficientBuffer[i] = (float) resizeFilterCoefficient[i]; } - clStatus = clEnqueueUnmapMemObject(queue, cubicCoefficientsBuffer, mappedCoefficientBuffer, 0, NULL, NULL); + clStatus = clEnv->library->clEnqueueUnmapMemObject(queue, cubicCoefficientsBuffer, mappedCoefficientBuffer, 0, NULL, NULL); if (clStatus != CL_SUCCESS) { - (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnqueueUnmapMemObject failed.", "'%s'", "."); + (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueUnmapMemObject failed.", "'%s'", "."); goto cleanup; } @@ -2862,7 +2891,7 @@ static Image* ComputeResizeImage(const Image* inputImage, const size_t resizedCo if (filteredImage == NULL) goto cleanup; - if (SetImageStorageClass(filteredImage,DirectClass,exception) != MagickTrue) + if (SetImageStorageClass(filteredImage,DirectClass) != MagickTrue) { (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "CloneImage failed.", "'%s'", "."); goto cleanup; @@ -2887,10 +2916,10 @@ static Image* ComputeResizeImage(const Image* inputImage, const size_t resizedCo /* create a CL buffer from image pixel buffer */ length = filteredImage->columns * filteredImage->rows; - filteredImageBuffer = clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), hostPtr, &clStatus); + filteredImageBuffer = clEnv->library->clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), hostPtr, &clStatus); if (clStatus != CL_SUCCESS) { - (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.","."); + (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.","."); goto cleanup; } @@ -2900,21 +2929,21 @@ static Image* ComputeResizeImage(const Image* inputImage, const size_t resizedCo { length = resizedColumns*inputImage->rows; - tempImageBuffer = clCreateBuffer(context, CL_MEM_READ_WRITE, length*sizeof(CLPixelPacket), NULL, &clStatus); + tempImageBuffer = clEnv->library->clCreateBuffer(context, CL_MEM_READ_WRITE, length*sizeof(CLPixelPacket), NULL, &clStatus); if (clStatus != CL_SUCCESS) { - (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.","."); + (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.","."); goto cleanup; } - status = resizeHorizontalFilter(inputImageBuffer, inputImage->columns, inputImage->rows, (inputImage->alpha_trait == BlendPixelTrait)?1:0 + status = resizeHorizontalFilter(inputImageBuffer, inputImage->columns, inputImage->rows, (inputImage->matte != MagickFalse)?1:0 , tempImageBuffer, resizedColumns, inputImage->rows , resizeFilter, cubicCoefficientsBuffer , xFactor, clEnv, queue, exception); if (status != MagickTrue) goto cleanup; - status = resizeVerticalFilter(tempImageBuffer, resizedColumns, inputImage->rows, (inputImage->alpha_trait == BlendPixelTrait)?1:0 + status = resizeVerticalFilter(tempImageBuffer, resizedColumns, inputImage->rows, (inputImage->matte != MagickFalse)?1:0 , filteredImageBuffer, resizedColumns, resizedRows , resizeFilter, cubicCoefficientsBuffer , yFactor, clEnv, queue, exception); @@ -2924,21 +2953,21 @@ static Image* ComputeResizeImage(const Image* inputImage, const size_t resizedCo else { length = inputImage->columns*resizedRows; - tempImageBuffer = clCreateBuffer(context, CL_MEM_READ_WRITE, length*sizeof(CLPixelPacket), NULL, &clStatus); + tempImageBuffer = clEnv->library->clCreateBuffer(context, CL_MEM_READ_WRITE, length*sizeof(CLPixelPacket), NULL, &clStatus); if (clStatus != CL_SUCCESS) { - (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.","."); + (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.","."); goto cleanup; } - status = resizeVerticalFilter(inputImageBuffer, inputImage->columns, inputImage->rows, (inputImage->alpha_trait == BlendPixelTrait)?1:0 + status = resizeVerticalFilter(inputImageBuffer, inputImage->columns, inputImage->rows, (inputImage->matte != MagickFalse)?1:0 , tempImageBuffer, inputImage->columns, resizedRows , resizeFilter, cubicCoefficientsBuffer , yFactor, clEnv, queue, exception); if (status != MagickTrue) goto cleanup; - status = resizeHorizontalFilter(tempImageBuffer, inputImage->columns, resizedRows, (inputImage->alpha_trait == BlendPixelTrait)?1:0 + status = resizeHorizontalFilter(tempImageBuffer, inputImage->columns, resizedRows, (inputImage->matte != MagickFalse)?1:0 , filteredImageBuffer, resizedColumns, resizedRows , resizeFilter, cubicCoefficientsBuffer , xFactor, clEnv, queue, exception); @@ -2948,11 +2977,11 @@ static Image* ComputeResizeImage(const Image* inputImage, const size_t resizedCo length = resizedColumns*resizedRows; if (ALIGNED(filteredPixels,CLPixelPacket)) { - clEnqueueMapBuffer(queue, filteredImageBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(CLPixelPacket), 0, NULL, NULL, &clStatus); + clEnv->library->clEnqueueMapBuffer(queue, filteredImageBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(CLPixelPacket), 0, NULL, NULL, &clStatus); } else { - clStatus = clEnqueueReadBuffer(queue, filteredImageBuffer, CL_TRUE, 0, length * sizeof(CLPixelPacket), filteredPixels, 0, NULL, NULL); + clStatus = clEnv->library->clEnqueueReadBuffer(queue, filteredImageBuffer, CL_TRUE, 0, length * sizeof(CLPixelPacket), filteredPixels, 0, NULL, NULL); } if (clStatus != CL_SUCCESS) { @@ -2964,10 +2993,10 @@ static Image* ComputeResizeImage(const Image* inputImage, const size_t resizedCo cleanup: OpenCLLogException(__FUNCTION__,__LINE__,exception); - if (inputImageBuffer!=NULL) clReleaseMemObject(inputImageBuffer); - if (tempImageBuffer!=NULL) clReleaseMemObject(tempImageBuffer); - if (filteredImageBuffer!=NULL) clReleaseMemObject(filteredImageBuffer); - if (cubicCoefficientsBuffer!=NULL) clReleaseMemObject(cubicCoefficientsBuffer); + if (inputImageBuffer!=NULL) clEnv->library->clReleaseMemObject(inputImageBuffer); + if (tempImageBuffer!=NULL) clEnv->library->clReleaseMemObject(tempImageBuffer); + if (filteredImageBuffer!=NULL) clEnv->library->clReleaseMemObject(filteredImageBuffer); + if (cubicCoefficientsBuffer!=NULL) clEnv->library->clReleaseMemObject(cubicCoefficientsBuffer); if (queue != NULL) RelinquishOpenCLCommandQueue(clEnv, queue); if (outputReady == MagickFalse) { @@ -3125,10 +3154,10 @@ static MagickBooleanType ComputeContrastImage(Image *inputImage, const MagickBoo } /* create a CL buffer from image pixel buffer */ length = inputImage->columns * inputImage->rows; - inputImageBuffer = clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus); + inputImageBuffer = clEnv->library->clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus); if (clStatus != CL_SUCCESS) { - (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.","."); + (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.","."); goto cleanup; } @@ -3140,13 +3169,13 @@ static MagickBooleanType ComputeContrastImage(Image *inputImage, const MagickBoo } i = 0; - clStatus=clSetKernelArg(filterKernel,i++,sizeof(cl_mem),(void *)&inputImageBuffer); + clStatus=clEnv->library->clSetKernelArg(filterKernel,i++,sizeof(cl_mem),(void *)&inputImageBuffer); uSharpen = (sharpen == MagickFalse)?0:1; - clStatus|=clSetKernelArg(filterKernel,i++,sizeof(cl_uint),&uSharpen); + clStatus|=clEnv->library->clSetKernelArg(filterKernel,i++,sizeof(cl_uint),&uSharpen); if (clStatus != CL_SUCCESS) { - (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clSetKernelArg failed.", "'%s'", "."); + (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", "'%s'", "."); goto cleanup; } @@ -3154,23 +3183,23 @@ static MagickBooleanType ComputeContrastImage(Image *inputImage, const MagickBoo global_work_size[1] = inputImage->rows; /* launch the kernel */ queue = AcquireOpenCLCommandQueue(clEnv); - clStatus = clEnqueueNDRangeKernel(queue, filterKernel, 2, NULL, global_work_size, NULL, 0, NULL, NULL); + clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, filterKernel, 2, NULL, global_work_size, NULL, 0, NULL, NULL); if (clStatus != CL_SUCCESS) { - (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnqueueNDRangeKernel failed.", "'%s'", "."); + (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", "'%s'", "."); goto cleanup; } - clFlush(queue); + clEnv->library->clFlush(queue); if (ALIGNED(inputPixels,CLPixelPacket)) { length = inputImage->columns * inputImage->rows; - clEnqueueMapBuffer(queue, inputImageBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(CLPixelPacket), 0, NULL, NULL, &clStatus); + clEnv->library->clEnqueueMapBuffer(queue, inputImageBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(CLPixelPacket), 0, NULL, NULL, &clStatus); } else { length = inputImage->columns * inputImage->rows; - clStatus = clEnqueueReadBuffer(queue, inputImageBuffer, CL_TRUE, 0, length * sizeof(CLPixelPacket), inputPixels, 0, NULL, NULL); + clStatus = clEnv->library->clEnqueueReadBuffer(queue, inputImageBuffer, CL_TRUE, 0, length * sizeof(CLPixelPacket), inputPixels, 0, NULL, NULL); } if (clStatus != CL_SUCCESS) { @@ -3182,7 +3211,7 @@ static MagickBooleanType ComputeContrastImage(Image *inputImage, const MagickBoo cleanup: OpenCLLogException(__FUNCTION__,__LINE__,exception); - if (inputImageBuffer!=NULL) clReleaseMemObject(inputImageBuffer); + if (inputImageBuffer!=NULL) clEnv->library->clReleaseMemObject(inputImageBuffer); if (filterKernel!=NULL) RelinquishOpenCLKernel(clEnv, filterKernel); if (queue != NULL) RelinquishOpenCLCommandQueue(clEnv, queue); return outputReady; @@ -3269,6 +3298,7 @@ MagickBooleanType ComputeModulateImage(Image* image, double percent_brightness, Image * inputImage = image; + inputPixels = NULL; inputImageBuffer = NULL; modulateKernel = NULL; @@ -3311,10 +3341,10 @@ MagickBooleanType ComputeModulateImage(Image* image, double percent_brightness, } /* create a CL buffer from image pixel buffer */ length = inputImage->columns * inputImage->rows; - inputImageBuffer = clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus); + inputImageBuffer = clEnv->library->clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus); if (clStatus != CL_SUCCESS) { - (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.","."); + (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.","."); goto cleanup; } @@ -3331,14 +3361,14 @@ MagickBooleanType ComputeModulateImage(Image* image, double percent_brightness, color=colorspace; i = 0; - clStatus=clSetKernelArg(modulateKernel,i++,sizeof(cl_mem),(void *)&inputImageBuffer); - clStatus|=clSetKernelArg(modulateKernel,i++,sizeof(cl_float),&bright); - clStatus|=clSetKernelArg(modulateKernel,i++,sizeof(cl_float),&hue); - clStatus|=clSetKernelArg(modulateKernel,i++,sizeof(cl_float),&saturation); - clStatus|=clSetKernelArg(modulateKernel,i++,sizeof(cl_float),&color); + clStatus=clEnv->library->clSetKernelArg(modulateKernel,i++,sizeof(cl_mem),(void *)&inputImageBuffer); + clStatus|=clEnv->library->clSetKernelArg(modulateKernel,i++,sizeof(cl_float),&bright); + clStatus|=clEnv->library->clSetKernelArg(modulateKernel,i++,sizeof(cl_float),&hue); + clStatus|=clEnv->library->clSetKernelArg(modulateKernel,i++,sizeof(cl_float),&saturation); + clStatus|=clEnv->library->clSetKernelArg(modulateKernel,i++,sizeof(cl_float),&color); if (clStatus != CL_SUCCESS) { - (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clSetKernelArg failed.", "'%s'", "."); + (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", "'%s'", "."); printf("no kernel\n"); goto cleanup; } @@ -3348,24 +3378,24 @@ MagickBooleanType ComputeModulateImage(Image* image, double percent_brightness, global_work_size[0] = inputImage->columns; global_work_size[1] = inputImage->rows; /* launch the kernel */ - clStatus = clEnqueueNDRangeKernel(queue, modulateKernel, 2, NULL, global_work_size, NULL, 0, NULL, NULL); + clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, modulateKernel, 2, NULL, global_work_size, NULL, 0, NULL, NULL); if (clStatus != CL_SUCCESS) { - (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnqueueNDRangeKernel failed.", "'%s'", "."); + (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", "'%s'", "."); goto cleanup; } - clFlush(queue); + clEnv->library->clFlush(queue); } if (ALIGNED(inputPixels,CLPixelPacket)) { length = inputImage->columns * inputImage->rows; - clEnqueueMapBuffer(queue, inputImageBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(CLPixelPacket), 0, NULL, NULL, &clStatus); + clEnv->library->clEnqueueMapBuffer(queue, inputImageBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(CLPixelPacket), 0, NULL, NULL, &clStatus); } else { length = inputImage->columns * inputImage->rows; - clStatus = clEnqueueReadBuffer(queue, inputImageBuffer, CL_TRUE, 0, length * sizeof(CLPixelPacket), inputPixels, 0, NULL, NULL); + clStatus = clEnv->library->clEnqueueReadBuffer(queue, inputImageBuffer, CL_TRUE, 0, length * sizeof(CLPixelPacket), inputPixels, 0, NULL, NULL); } if (clStatus != CL_SUCCESS) { @@ -3384,7 +3414,7 @@ cleanup: } if (inputImageBuffer!=NULL) - clReleaseMemObject(inputImageBuffer); + clEnv->library->clReleaseMemObject(inputImageBuffer); if (modulateKernel!=NULL) RelinquishOpenCLKernel(clEnv, modulateKernel); if (queue != NULL) @@ -3448,77 +3478,41 @@ MagickBooleanType AccelerateModulateImage(Image* image, double percent_brightnes return status; } - -MagickExport MagickBooleanType ComputeEqualizeImage(Image *inputImage, const ChannelType channel, ExceptionInfo * _exception) +MagickBooleanType ComputeNegateImageChannel(Image* image, const ChannelType channel, const MagickBooleanType magick_unused(grayscale), ExceptionInfo* exception) { -#define EqualizeImageTag "Equalize/Image" - - ExceptionInfo - *exception=_exception; - - FloatPixelPacket - white, - black, - intensity, - *map; - - cl_uint4 - *histogram; - - PixelPacket - *equalize_map; - register ssize_t i; - Image * image = inputImage; - MagickBooleanType outputReady; - MagickCLEnv clEnv; - cl_int clStatus; - size_t global_work_size[2]; + MagickCLEnv clEnv; void *inputPixels; - cl_mem_flags mem_flags; + + MagickSizeType length; cl_context context; - cl_mem inputImageBuffer; - cl_mem histogramBuffer; - cl_mem equalizeMapBuffer; - cl_kernel histogramKernel; - cl_kernel equalizeKernel; cl_command_queue queue; - cl_int colorspace; + cl_kernel negateKernel; - void* hostPtr; + cl_mem inputImageBuffer; + cl_mem_flags mem_flags; - MagickSizeType length; + cl_int clStatus; + + Image * inputImage = image; + + magick_unreferenced(grayscale); inputPixels = NULL; inputImageBuffer = NULL; - histogramBuffer = NULL; - histogramKernel = NULL; - equalizeKernel = NULL; - context = NULL; - queue = NULL; - outputReady = MagickFalse; + negateKernel = NULL; assert(inputImage != (Image *) NULL); assert(inputImage->signature == MagickSignature); if (inputImage->debug != MagickFalse) (void) LogMagickEvent(TraceEvent,GetMagickModule(),"%s",inputImage->filename); - /* - Allocate and initialize histogram arrays. - */ - histogram=(cl_uint4 *) AcquireQuantumMemory(MaxMap+1UL, sizeof(*histogram)); - if (histogram == (cl_uint4 *) NULL) - ThrowBinaryException(ResourceLimitWarning,"MemoryAllocationFailed", image->filename); - - /* reset histogram */ - (void) ResetMagickMemory(histogram,0,(MaxMap+1)*sizeof(*histogram)); - /* * initialize opencl env */ @@ -3526,115 +3520,80 @@ MagickExport MagickBooleanType ComputeEqualizeImage(Image *inputImage, const Cha context = GetOpenCLContext(clEnv); queue = AcquireOpenCLCommandQueue(clEnv); - /* Create and initialize OpenCL buffers. */ - /* inputPixels = AcquirePixelCachePixels(inputImage, &length, exception); */ - /* assume this will get a writable image */ - inputPixels = GetPixelCachePixels(inputImage, &length, exception); + outputReady = MagickFalse; + /* Create and initialize OpenCL buffers. + inputPixels = AcquirePixelCachePixels(inputImage, &length, exception); + assume this will get a writable image + */ + inputPixels = GetPixelCachePixels(inputImage, &length, exception); if (inputPixels == (void *) NULL) { (void) OpenCLThrowMagickException(exception,GetMagickModule(),CacheWarning,"UnableToReadPixelCache.","`%s'",inputImage->filename); goto cleanup; } + /* If the host pointer is aligned to the size of CLPixelPacket, - then use the host buffer directly from the GPU; otherwise, - create a buffer on the GPU and copy the data over */ + then use the host buffer directly from the GPU; otherwise, + create a buffer on the GPU and copy the data over + */ if (ALIGNED(inputPixels,CLPixelPacket)) - { - mem_flags = CL_MEM_READ_ONLY|CL_MEM_USE_HOST_PTR; - } - else - { - mem_flags = CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR; - } - /* create a CL buffer from image pixel buffer */ - length = inputImage->columns * inputImage->rows; - inputImageBuffer = clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus); - if (clStatus != CL_SUCCESS) - { - (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.","."); - goto cleanup; - } - - /* If the host pointer is aligned to the size of cl_uint, - then use the host buffer directly from the GPU; otherwise, - create a buffer on the GPU and copy the data over */ - if (ALIGNED(histogram,cl_uint4)) { mem_flags = CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR; - hostPtr = histogram; } else { mem_flags = CL_MEM_READ_WRITE|CL_MEM_COPY_HOST_PTR; - hostPtr = histogram; } - /* create a CL buffer for histogram */ - length = (MaxMap+1); - histogramBuffer = clCreateBuffer(context, mem_flags, length * sizeof(cl_uint4), hostPtr, &clStatus); + /* create a CL buffer from image pixel buffer */ + length = inputImage->columns * inputImage->rows; + inputImageBuffer = clEnv->library->clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus); if (clStatus != CL_SUCCESS) { - (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.","."); + (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.","."); goto cleanup; } - switch (inputImage->colorspace) - { - case RGBColorspace: - colorspace = 1; - break; - case sRGBColorspace: - colorspace = 0; - break; - default: - { - /* something is wrong, as we checked in checkAccelerateCondition */ - } - } - - /* get the OpenCL kernel */ - histogramKernel = AcquireOpenCLKernel(clEnv, MAGICK_OPENCL_ACCELERATE, "Histogram"); - if (histogramKernel == NULL) + negateKernel = AcquireOpenCLKernel(clEnv, MAGICK_OPENCL_ACCELERATE, "Negate"); + if (negateKernel == NULL) { (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "AcquireOpenCLKernel failed.", "'%s'", "."); goto cleanup; } - /* set the kernel arguments */ i = 0; - clStatus=clSetKernelArg(histogramKernel,i++,sizeof(cl_mem),(void *)&inputImageBuffer); - clStatus|=clSetKernelArg(histogramKernel,i++,sizeof(ChannelType),&channel); - clStatus|=clSetKernelArg(histogramKernel,i++,sizeof(cl_int),&colorspace); - clStatus|=clSetKernelArg(histogramKernel,i++,sizeof(cl_mem),(void *)&histogramBuffer); + clStatus=clEnv->library->clSetKernelArg(negateKernel,i++,sizeof(cl_mem),(void *)&inputImageBuffer); + clStatus=clEnv->library->clSetKernelArg(negateKernel,i++,sizeof(ChannelType),(void *)&channel); if (clStatus != CL_SUCCESS) { - (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clSetKernelArg failed.", "'%s'", "."); + (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", "'%s'", "."); + printf("no kernel\n"); goto cleanup; } - /* launch the kernel */ - global_work_size[0] = inputImage->columns; - global_work_size[1] = inputImage->rows; - - clStatus = clEnqueueNDRangeKernel(queue, histogramKernel, 2, NULL, global_work_size, NULL, 0, NULL, NULL); - - if (clStatus != CL_SUCCESS) { - (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnqueueNDRangeKernel failed.", "'%s'", "."); - goto cleanup; + size_t global_work_size[2]; + global_work_size[0] = inputImage->columns; + global_work_size[1] = inputImage->rows; + /* launch the kernel */ + clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, negateKernel, 2, NULL, global_work_size, NULL, 0, NULL, NULL); + if (clStatus != CL_SUCCESS) + { + (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", "'%s'", "."); + goto cleanup; + } + clEnv->library->clFlush(queue); } - clFlush(queue); - /* read from the kenel output */ - if (ALIGNED(histogram,cl_uint4)) + if (ALIGNED(inputPixels,CLPixelPacket)) { - length = (MaxMap+1); - clEnqueueMapBuffer(queue, histogramBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(cl_uint4), 0, NULL, NULL, &clStatus); + length = inputImage->columns * inputImage->rows; + clEnv->library->clEnqueueMapBuffer(queue, inputImageBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(CLPixelPacket), 0, NULL, NULL, &clStatus); } else { - length = (MaxMap+1); - clStatus = clEnqueueReadBuffer(queue, histogramBuffer, CL_TRUE, 0, length * sizeof(cl_uint4), histogram, 0, NULL, NULL); + length = inputImage->columns * inputImage->rows; + clStatus = clEnv->library->clEnqueueReadBuffer(queue, inputImageBuffer, CL_TRUE, 0, length * sizeof(CLPixelPacket), inputPixels, 0, NULL, NULL); } if (clStatus != CL_SUCCESS) { @@ -3642,154 +3601,132 @@ MagickExport MagickBooleanType ComputeEqualizeImage(Image *inputImage, const Cha goto cleanup; } - /* unmap, don't block gpu to use this buffer again. */ - if (ALIGNED(histogram,cl_uint4)) - { - clStatus = clEnqueueUnmapMemObject(queue, histogramBuffer, histogram, 0, NULL, NULL); - if (clStatus != CL_SUCCESS) - { - (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnqueueUnmapMemObject failed.", "'%s'", "."); - goto cleanup; - } - } + outputReady = MagickTrue; - if (getenv("TEST")) { - unsigned int i; - for (i=0; i<(MaxMap+1UL); i++) - { - printf("histogram %d: red %d\n", i, histogram[i].s[2]); - printf("histogram %d: green %d\n", i, histogram[i].s[1]); - printf("histogram %d: blue %d\n", i, histogram[i].s[0]); - printf("histogram %d: alpha %d\n", i, histogram[i].s[3]); - } - } +cleanup: + OpenCLLogException(__FUNCTION__,__LINE__,exception); - /* cpu stuff */ - equalize_map=(PixelPacket *) AcquireQuantumMemory(MaxMap+1UL, sizeof(*equalize_map)); - if (equalize_map == (PixelPacket *) NULL) - ThrowBinaryException(ResourceLimitWarning,"MemoryAllocationFailed", image->filename); + if (inputPixels) { + //ReleasePixelCachePixels(); + inputPixels = NULL; + } - map=(FloatPixelPacket *) AcquireQuantumMemory(MaxMap+1UL,sizeof(*map)); - if (map == (FloatPixelPacket *) NULL) - ThrowBinaryException(ResourceLimitWarning,"MemoryAllocationFailed", image->filename); + if (inputImageBuffer!=NULL) + clEnv->library->clReleaseMemObject(inputImageBuffer); + if (negateKernel!=NULL) + RelinquishOpenCLKernel(clEnv, negateKernel); + if (queue != NULL) + RelinquishOpenCLCommandQueue(clEnv, queue); - /* - Integrate the histogram to get the equalization map. - */ - (void) ResetMagickMemory(&intensity,0,sizeof(intensity)); - for (i=0; i <= (ssize_t) MaxMap; i++) - { - if ((channel & SyncChannels) != 0) - { - intensity.red+=histogram[i].s[2]; - map[i]=intensity; - continue; - } - if ((channel & RedChannel) != 0) - intensity.red+=histogram[i].s[2]; - if ((channel & GreenChannel) != 0) - intensity.green+=histogram[i].s[1]; - if ((channel & BlueChannel) != 0) - intensity.blue+=histogram[i].s[0]; - if ((channel & OpacityChannel) != 0) - intensity.alpha+=histogram[i].s[3]; - if (((channel & IndexChannel) != 0) && - (image->colorspace == CMYKColorspace)) - { - printf("something here\n"); - /*intensity.index+=histogram[i].index; */ - } - map[i]=intensity; - } - black=map[0]; - white=map[(int) MaxMap]; - (void) ResetMagickMemory(equalize_map,0,(MaxMap+1)*sizeof(*equalize_map)); - for (i=0; i <= (ssize_t) MaxMap; i++) - { - if ((channel & SyncChannels) != 0) - { - if (white.red != black.red) - equalize_map[i].red=ScaleMapToQuantum((MagickRealType) ((MaxMap* - (map[i].red-black.red))/(white.red-black.red))); - continue; - } - if (((channel & RedChannel) != 0) && (white.red != black.red)) - equalize_map[i].red=ScaleMapToQuantum((MagickRealType) ((MaxMap* - (map[i].red-black.red))/(white.red-black.red))); - if (((channel & GreenChannel) != 0) && (white.green != black.green)) - equalize_map[i].green=ScaleMapToQuantum((MagickRealType) ((MaxMap* - (map[i].green-black.green))/(white.green-black.green))); - if (((channel & BlueChannel) != 0) && (white.blue != black.blue)) - equalize_map[i].blue=ScaleMapToQuantum((MagickRealType) ((MaxMap* - (map[i].blue-black.blue))/(white.blue-black.blue))); - if (((channel & OpacityChannel) != 0) && (white.alpha != black.alpha)) - equalize_map[i].alpha=ScaleMapToQuantum((MagickRealType) ((MaxMap* - (map[i].alpha-black.alpha))/(white.alpha-black.alpha))); - /* - if ((((channel & IndexChannel) != 0) && - (image->colorspace == CMYKColorspace)) && - (white.index != black.index)) - equalize_map[i].index=ScaleMapToQuantum((MagickRealType) ((MaxMap* - (map[i].index-black.index))/(white.index-black.index))); - */ - } + return outputReady; - histogram=(cl_uint4 *) RelinquishMagickMemory(histogram); - map=(FloatPixelPacket *) RelinquishMagickMemory(map); +} - if (image->storage_class == PseudoClass) - { - /* - Equalize colormap. - */ - for (i=0; i < (ssize_t) image->colors; i++) - { - if ((channel & SyncChannels) != 0) - { - if (white.red != black.red) - { - image->colormap[i].red=equalize_map[ - ScaleQuantumToMap(image->colormap[i].red)].red; - image->colormap[i].green=equalize_map[ - ScaleQuantumToMap(image->colormap[i].green)].red; - image->colormap[i].blue=equalize_map[ - ScaleQuantumToMap(image->colormap[i].blue)].red; - image->colormap[i].alpha=equalize_map[ - ScaleQuantumToMap(image->colormap[i].alpha)].red; - } - continue; - } - if (((channel & RedChannel) != 0) && (white.red != black.red)) - image->colormap[i].red=equalize_map[ - ScaleQuantumToMap(image->colormap[i].red)].red; - if (((channel & GreenChannel) != 0) && (white.green != black.green)) - image->colormap[i].green=equalize_map[ - ScaleQuantumToMap(image->colormap[i].green)].green; - if (((channel & BlueChannel) != 0) && (white.blue != black.blue)) - image->colormap[i].blue=equalize_map[ - ScaleQuantumToMap(image->colormap[i].blue)].blue; - if (((channel & OpacityChannel) != 0) && - (white.alpha != black.alpha)) - image->colormap[i].alpha=equalize_map[ - ScaleQuantumToMap(image->colormap[i].alpha)].alpha; - } - } - /* - Equalize image. - */ +/* +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +% % +% % +% % +% N e g a t e I m a g e w i t h O p e n C L % +% % +% % +% % +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +% +% +% A description of each parameter follows: +% +% o image: the image. +% +% o channel: the channel. +% +% o grayscale: If MagickTrue, only negate grayscale pixels within the image. +% +*/ - /* GPU can work on this again, image and equalize map as input - image: uchar4 (CLPixelPacket) - equalize_map: uchar4 (PixelPacket) - black, white: float4 (FloatPixelPacket) */ +MagickExport +MagickBooleanType AccelerateNegateImageChannel(Image* image, const ChannelType channel, const MagickBooleanType grayscale, ExceptionInfo* exception) +{ + MagickBooleanType status; + + assert(image != NULL); + assert(exception != NULL); + + status = checkOpenCLEnvironment(exception); + if (status == MagickFalse) + return MagickFalse; + + status = checkAccelerateCondition(image, AllChannels); + if (status == MagickFalse) + return MagickFalse; + + status = ComputeNegateImageChannel(image,channel,grayscale,exception); + + return status; +} + + +MagickBooleanType ComputeGrayscaleImage(Image* image, const PixelIntensityMethod method, ExceptionInfo* exception) +{ + register ssize_t + i; + + cl_int intensityMethod; + cl_int colorspace; + + MagickBooleanType outputReady; + + MagickCLEnv clEnv; + + void *inputPixels; + + MagickSizeType length; + + cl_context context; + cl_command_queue queue; + cl_kernel grayscaleKernel; + + cl_mem inputImageBuffer; + cl_mem_flags mem_flags; + + cl_int clStatus; + + Image * inputImage = image; + + inputPixels = NULL; + inputImageBuffer = NULL; + grayscaleKernel = NULL; + + assert(inputImage != (Image *) NULL); + assert(inputImage->signature == MagickSignature); + if (inputImage->debug != MagickFalse) + (void) LogMagickEvent(TraceEvent,GetMagickModule(),"%s",inputImage->filename); + + /* + * initialize opencl env + */ + clEnv = GetDefaultOpenCLEnv(); + context = GetOpenCLContext(clEnv); + queue = AcquireOpenCLCommandQueue(clEnv); + + outputReady = MagickFalse; + + /* Create and initialize OpenCL buffers. + inputPixels = AcquirePixelCachePixels(inputImage, &length, exception); + assume this will get a writable image + */ + inputPixels = GetPixelCachePixels(inputImage, &length, exception); + if (inputPixels == (void *) NULL) + { + (void) OpenCLThrowMagickException(exception,GetMagickModule(),CacheWarning,"UnableToReadPixelCache.","`%s'",inputImage->filename); + goto cleanup; + } - if (inputImageBuffer!=NULL) - clReleaseMemObject(inputImageBuffer); - /* If the host pointer is aligned to the size of CLPixelPacket, - then use the host buffer directly from the GPU; otherwise, - create a buffer on the GPU and copy the data over */ + then use the host buffer directly from the GPU; otherwise, + create a buffer on the GPU and copy the data over + */ if (ALIGNED(inputPixels,CLPixelPacket)) { mem_flags = CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR; @@ -3800,77 +3737,57 @@ MagickExport MagickBooleanType ComputeEqualizeImage(Image *inputImage, const Cha } /* create a CL buffer from image pixel buffer */ length = inputImage->columns * inputImage->rows; - inputImageBuffer = clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus); + inputImageBuffer = clEnv->library->clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus); if (clStatus != CL_SUCCESS) { - (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.","."); + (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.","."); goto cleanup; } - /* Create and initialize OpenCL buffers. */ - if (ALIGNED(equalize_map, PixelPacket)) - { - mem_flags = CL_MEM_READ_ONLY|CL_MEM_USE_HOST_PTR; - hostPtr = equalize_map; - } - else - { - mem_flags = CL_MEM_READ_WRITE|CL_MEM_COPY_HOST_PTR; - hostPtr = equalize_map; - } - /* create a CL buffer for eqaulize_map */ - length = (MaxMap+1); - equalizeMapBuffer = clCreateBuffer(context, mem_flags, length * sizeof(PixelPacket), hostPtr, &clStatus); - if (clStatus != CL_SUCCESS) - { - (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.","."); - goto cleanup; - } + intensityMethod = method; + colorspace = image->colorspace; - /* get the OpenCL kernel */ - equalizeKernel = AcquireOpenCLKernel(clEnv, MAGICK_OPENCL_ACCELERATE, "Equalize"); - if (equalizeKernel == NULL) + grayscaleKernel = AcquireOpenCLKernel(clEnv, MAGICK_OPENCL_ACCELERATE, "Grayscale"); + if (grayscaleKernel == NULL) { (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "AcquireOpenCLKernel failed.", "'%s'", "."); goto cleanup; } - /* set the kernel arguments */ i = 0; - clStatus=clSetKernelArg(equalizeKernel,i++,sizeof(cl_mem),(void *)&inputImageBuffer); - clStatus|=clSetKernelArg(equalizeKernel,i++,sizeof(ChannelType),&channel); - clStatus|=clSetKernelArg(equalizeKernel,i++,sizeof(cl_mem),(void *)&equalizeMapBuffer); - clStatus|=clSetKernelArg(equalizeKernel,i++,sizeof(FloatPixelPacket),&white); - clStatus|=clSetKernelArg(equalizeKernel,i++,sizeof(FloatPixelPacket),&black); + clStatus=clEnv->library->clSetKernelArg(grayscaleKernel,i++,sizeof(cl_mem),(void *)&inputImageBuffer); + clStatus|=clEnv->library->clSetKernelArg(grayscaleKernel,i++,sizeof(cl_int),&intensityMethod); + clStatus|=clEnv->library->clSetKernelArg(grayscaleKernel,i++,sizeof(cl_int),&colorspace); if (clStatus != CL_SUCCESS) { - (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clSetKernelArg failed.", "'%s'", "."); + (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", "'%s'", "."); + printf("no kernel\n"); goto cleanup; } - /* launch the kernel */ - global_work_size[0] = inputImage->columns; - global_work_size[1] = inputImage->rows; - - clStatus = clEnqueueNDRangeKernel(queue, equalizeKernel, 2, NULL, global_work_size, NULL, 0, NULL, NULL); - - if (clStatus != CL_SUCCESS) { - (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnqueueNDRangeKernel failed.", "'%s'", "."); - goto cleanup; + size_t global_work_size[2]; + global_work_size[0] = inputImage->columns; + global_work_size[1] = inputImage->rows; + /* launch the kernel */ + clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, grayscaleKernel, 2, NULL, global_work_size, NULL, 0, NULL, NULL); + if (clStatus != CL_SUCCESS) + { + (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", "'%s'", "."); + goto cleanup; + } + clEnv->library->clFlush(queue); } - clFlush(queue); - /* read the data back */ if (ALIGNED(inputPixels,CLPixelPacket)) { length = inputImage->columns * inputImage->rows; - clEnqueueMapBuffer(queue, inputImageBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(CLPixelPacket), 0, NULL, NULL, &clStatus); + clEnv->library->clEnqueueMapBuffer(queue, inputImageBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(CLPixelPacket), 0, NULL, NULL, &clStatus); } else { length = inputImage->columns * inputImage->rows; - clStatus = clEnqueueReadBuffer(queue, inputImageBuffer, CL_TRUE, 0, length * sizeof(CLPixelPacket), inputPixels, 0, NULL, NULL); + clStatus = clEnv->library->clEnqueueReadBuffer(queue, inputImageBuffer, CL_TRUE, 0, length * sizeof(CLPixelPacket), inputPixels, 0, NULL, NULL); } if (clStatus != CL_SUCCESS) { @@ -3879,47 +3796,42 @@ MagickExport MagickBooleanType ComputeEqualizeImage(Image *inputImage, const Cha } outputReady = MagickTrue; - - equalize_map=(PixelPacket *) RelinquishMagickMemory(equalize_map); cleanup: OpenCLLogException(__FUNCTION__,__LINE__,exception); if (inputPixels) { - /*ReleasePixelCachePixels();*/ + //ReleasePixelCachePixels(); inputPixels = NULL; } if (inputImageBuffer!=NULL) - clReleaseMemObject(inputImageBuffer); - if (histogramBuffer!=NULL) - clReleaseMemObject(histogramBuffer); - if (histogramKernel!=NULL) - RelinquishOpenCLKernel(clEnv, histogramKernel); + clEnv->library->clReleaseMemObject(inputImageBuffer); + if (grayscaleKernel!=NULL) + RelinquishOpenCLKernel(clEnv, grayscaleKernel); if (queue != NULL) RelinquishOpenCLCommandQueue(clEnv, queue); return outputReady; -} +} /* %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % % % % % % -% E q u a l i z e I m a g e w i t h O p e n C L % +% G r a y s c a l e I m a g e w i t h O p e n C L % % % % % % % %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % -% EqualizeImage() applies a histogram equalization to the image. +% GrayscaleImage() converts the colors in the reference image to gray. % -% The format of the EqualizeImage method is: +% The format of the GrayscaleImageChannel method is: % -% MagickBooleanType EqualizeImage(Image *image) -% MagickBooleanType EqualizeImageChannel(Image *image, -% const ChannelType channel) +% MagickBooleanType GrayscaleImage(Image *image, +% const PixelIntensityMethod method) % % A description of each parameter follows: % @@ -3929,9 +3841,8 @@ cleanup: % */ - MagickExport -MagickBooleanType AccelerateEqualizeImage(Image* image, const ChannelType channel, ExceptionInfo* exception) +MagickBooleanType AccelerateGrayscaleImage(Image* image, const PixelIntensityMethod method, ExceptionInfo* exception) { MagickBooleanType status; @@ -3942,295 +3853,2031 @@ MagickBooleanType AccelerateEqualizeImage(Image* image, const ChannelType channe if (status == MagickFalse) return MagickFalse; - status = checkAccelerateCondition(image, channel); + status = checkAccelerateCondition(image, AllChannels); if (status == MagickFalse) return MagickFalse; - /* ensure this is the only pass get in for now. */ - if ((channel & SyncChannels) == 0) + if (method == Rec601LuminancePixelIntensityMethod || method == Rec709LuminancePixelIntensityMethod) return MagickFalse; if (image->colorspace != sRGBColorspace) return MagickFalse; - status = ComputeEqualizeImage(image,channel,exception); + status = ComputeGrayscaleImage(image,method,exception); + return status; } - -static Image* ComputeDespeckleImage(const Image* inputImage, ExceptionInfo* exception) +static MagickBooleanType LaunchHistogramKernel(MagickCLEnv clEnv, + cl_command_queue queue, + cl_mem inputImageBuffer, + cl_mem histogramBuffer, + Image *inputImage, + const ChannelType channel, + ExceptionInfo * _exception) { + ExceptionInfo + *exception=_exception; - MagickBooleanType outputReady = MagickFalse; - MagickCLEnv clEnv = NULL; + register ssize_t + i; + + MagickBooleanType outputReady; cl_int clStatus; + size_t global_work_size[2]; - const void *inputPixels = NULL; - Image* filteredImage = NULL; - void *filteredPixels = NULL; - void *hostPtr; - MagickSizeType length; + cl_kernel histogramKernel; - cl_mem_flags mem_flags; - cl_context context = NULL; - cl_mem inputImageBuffer = NULL; - cl_mem tempImageBuffer[2]; - cl_mem filteredImageBuffer = NULL; - cl_command_queue queue = NULL; - cl_kernel hullPass1 = NULL; - cl_kernel hullPass2 = NULL; + cl_int method; + cl_int colorspace; - unsigned int imageWidth, imageHeight; - int matte; - int k; + histogramKernel = NULL; - static const int - X[4] = {0, 1, 1,-1}, - Y[4] = {1, 0, 1, 1}; + outputReady = MagickFalse; + method = inputImage->intensity; + colorspace = inputImage->colorspace; - tempImageBuffer[0] = tempImageBuffer[1] = NULL; - clEnv = GetDefaultOpenCLEnv(); - context = GetOpenCLContext(clEnv); - queue = AcquireOpenCLCommandQueue(clEnv); - - inputPixels = AcquirePixelCachePixels(inputImage, &length, exception); - if (inputPixels == (void *) NULL) + /* get the OpenCL kernel */ + histogramKernel = AcquireOpenCLKernel(clEnv, MAGICK_OPENCL_ACCELERATE, "Histogram"); + if (histogramKernel == NULL) { - (void) OpenCLThrowMagickException(exception,GetMagickModule(),CacheWarning,"UnableToReadPixelCache.","`%s'",inputImage->filename); + (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "AcquireOpenCLKernel failed.", "'%s'", "."); goto cleanup; } - if (ALIGNED(inputPixels,CLPixelPacket)) - { - mem_flags = CL_MEM_READ_ONLY|CL_MEM_USE_HOST_PTR; - } - else - { - mem_flags = CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR; - } - /* create a CL buffer from image pixel buffer */ - length = inputImage->columns * inputImage->rows; - inputImageBuffer = clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus); + /* set the kernel arguments */ + i = 0; + clStatus=clEnv->library->clSetKernelArg(histogramKernel,i++,sizeof(cl_mem),(void *)&inputImageBuffer); + clStatus|=clEnv->library->clSetKernelArg(histogramKernel,i++,sizeof(ChannelType),&channel); + clStatus|=clEnv->library->clSetKernelArg(histogramKernel,i++,sizeof(cl_int),&method); + clStatus|=clEnv->library->clSetKernelArg(histogramKernel,i++,sizeof(cl_int),&colorspace); + clStatus|=clEnv->library->clSetKernelArg(histogramKernel,i++,sizeof(cl_mem),(void *)&histogramBuffer); if (clStatus != CL_SUCCESS) { - (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.","."); + (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", "'%s'", "."); goto cleanup; } - mem_flags = CL_MEM_READ_WRITE; - length = inputImage->columns * inputImage->rows; - for (k = 0; k < 2; k++) - { - tempImageBuffer[k] = clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), NULL, &clStatus); - if (clStatus != CL_SUCCESS) - { - (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.","."); - goto cleanup; - } - } + /* launch the kernel */ + global_work_size[0] = inputImage->columns; + global_work_size[1] = inputImage->rows; - filteredImage = CloneImage(inputImage,inputImage->columns,inputImage->rows,MagickTrue,exception); - assert(filteredImage != NULL); - if (SetImageStorageClass(filteredImage,DirectClass,exception) != MagickTrue) - { - (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "CloneImage failed.", "'%s'", "."); - goto cleanup; - } - filteredPixels = GetPixelCachePixels(filteredImage, &length, exception); - if (filteredPixels == (void *) NULL) - { - (void) OpenCLThrowMagickException(exception,GetMagickModule(),CacheWarning, "UnableToReadPixelCache.","`%s'",filteredImage->filename); - goto cleanup; - } + clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, histogramKernel, 2, NULL, global_work_size, NULL, 0, NULL, NULL); - if (ALIGNED(filteredPixels,CLPixelPacket)) - { - mem_flags = CL_MEM_WRITE_ONLY|CL_MEM_USE_HOST_PTR; - hostPtr = filteredPixels; - } - else - { - mem_flags = CL_MEM_WRITE_ONLY; - hostPtr = NULL; - } - /* create a CL buffer from image pixel buffer */ - length = inputImage->columns * inputImage->rows; - filteredImageBuffer = clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), hostPtr, &clStatus); if (clStatus != CL_SUCCESS) { - (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.","."); + (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", "'%s'", "."); goto cleanup; } + clEnv->library->clFlush(queue); - hullPass1 = AcquireOpenCLKernel(clEnv, MAGICK_OPENCL_ACCELERATE, "HullPass1"); - hullPass2 = AcquireOpenCLKernel(clEnv, MAGICK_OPENCL_ACCELERATE, "HullPass2"); + outputReady = MagickTrue; - clStatus =clSetKernelArg(hullPass1,0,sizeof(cl_mem),(void *)&inputImageBuffer); - clStatus |=clSetKernelArg(hullPass1,1,sizeof(cl_mem),(void *)(tempImageBuffer+1)); - imageWidth = inputImage->columns; - clStatus |=clSetKernelArg(hullPass1,2,sizeof(unsigned int),(void *)&imageWidth); - imageHeight = inputImage->rows; - clStatus |=clSetKernelArg(hullPass1,3,sizeof(unsigned int),(void *)&imageHeight); - matte = (inputImage->matte==MagickFalse)?0:1; - clStatus |=clSetKernelArg(hullPass1,6,sizeof(int),(void *)&matte); +cleanup: + OpenCLLogException(__FUNCTION__,__LINE__,exception); + + if (histogramKernel!=NULL) + RelinquishOpenCLKernel(clEnv, histogramKernel); + + return outputReady; +} + + +MagickExport MagickBooleanType ComputeEqualizeImage(Image *inputImage, const ChannelType channel, ExceptionInfo * _exception) +{ +#define EqualizeImageTag "Equalize/Image" + + ExceptionInfo + *exception=_exception; + + FloatPixelPacket + white, + black, + intensity, + *map=NULL; + + cl_uint4 + *histogram=NULL; + + PixelPacket + *equalize_map=NULL; + + register ssize_t + i; + + Image * image = inputImage; + + MagickBooleanType outputReady; + + MagickCLEnv clEnv; + + cl_int clStatus; + MagickBooleanType status; + + size_t global_work_size[2]; + + void *inputPixels; + cl_mem_flags mem_flags; + + cl_context context; + cl_mem inputImageBuffer; + cl_mem histogramBuffer; + cl_mem equalizeMapBuffer; + cl_kernel histogramKernel; + cl_kernel equalizeKernel; + cl_command_queue queue; + + void* hostPtr; + + MagickSizeType length; + + inputPixels = NULL; + inputImageBuffer = NULL; + histogramBuffer = NULL; + equalizeMapBuffer = NULL; + histogramKernel = NULL; + equalizeKernel = NULL; + context = NULL; + queue = NULL; + outputReady = MagickFalse; + + assert(inputImage != (Image *) NULL); + assert(inputImage->signature == MagickSignature); + if (inputImage->debug != MagickFalse) + (void) LogMagickEvent(TraceEvent,GetMagickModule(),"%s",inputImage->filename); + + /* + * initialize opencl env + */ + clEnv = GetDefaultOpenCLEnv(); + context = GetOpenCLContext(clEnv); + queue = AcquireOpenCLCommandQueue(clEnv); + + /* + Allocate and initialize histogram arrays. + */ + histogram=(cl_uint4 *) AcquireQuantumMemory(MaxMap+1UL, sizeof(*histogram)); + if (histogram == (cl_uint4 *) NULL) + ThrowBinaryException(ResourceLimitWarning,"MemoryAllocationFailed", image->filename); + + /* reset histogram */ + (void) ResetMagickMemory(histogram,0,(MaxMap+1)*sizeof(*histogram)); + + /* Create and initialize OpenCL buffers. */ + /* inputPixels = AcquirePixelCachePixels(inputImage, &length, exception); */ + /* assume this will get a writable image */ + inputPixels = GetPixelCachePixels(inputImage, &length, exception); + + if (inputPixels == (void *) NULL) + { + (void) OpenCLThrowMagickException(exception,GetMagickModule(),CacheWarning,"UnableToReadPixelCache.","`%s'",inputImage->filename); + goto cleanup; + } + /* If the host pointer is aligned to the size of CLPixelPacket, + then use the host buffer directly from the GPU; otherwise, + create a buffer on the GPU and copy the data over */ + if (ALIGNED(inputPixels,CLPixelPacket)) + { + mem_flags = CL_MEM_READ_ONLY|CL_MEM_USE_HOST_PTR; + } + else + { + mem_flags = CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR; + } + /* create a CL buffer from image pixel buffer */ + length = inputImage->columns * inputImage->rows; + inputImageBuffer = clEnv->library->clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus); if (clStatus != CL_SUCCESS) { - (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clSetKernelArg failed.", "'%s'", "."); + (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.","."); goto cleanup; } - clStatus = clSetKernelArg(hullPass2,0,sizeof(cl_mem),(void *)(tempImageBuffer+1)); - clStatus |=clSetKernelArg(hullPass2,1,sizeof(cl_mem),(void *)tempImageBuffer); - imageWidth = inputImage->columns; - clStatus |=clSetKernelArg(hullPass2,2,sizeof(unsigned int),(void *)&imageWidth); - imageHeight = inputImage->rows; - clStatus |=clSetKernelArg(hullPass2,3,sizeof(unsigned int),(void *)&imageHeight); - matte = (inputImage->alpha_trait == BlendPixelTrait)?1:0; - clStatus |=clSetKernelArg(hullPass2,6,sizeof(int),(void *)&matte); + /* If the host pointer is aligned to the size of cl_uint, + then use the host buffer directly from the GPU; otherwise, + create a buffer on the GPU and copy the data over */ + if (ALIGNED(histogram,cl_uint4)) + { + mem_flags = CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR; + hostPtr = histogram; + } + else + { + mem_flags = CL_MEM_READ_WRITE|CL_MEM_COPY_HOST_PTR; + hostPtr = histogram; + } + /* create a CL buffer for histogram */ + length = (MaxMap+1); + histogramBuffer = clEnv->library->clCreateBuffer(context, mem_flags, length * sizeof(cl_uint4), hostPtr, &clStatus); if (clStatus != CL_SUCCESS) { - (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clSetKernelArg failed.", "'%s'", "."); + (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.","."); goto cleanup; } + status = LaunchHistogramKernel(clEnv, queue, inputImageBuffer, histogramBuffer, image, channel, exception); + if (status == MagickFalse) + goto cleanup; - global_work_size[0] = inputImage->columns; - global_work_size[1] = inputImage->rows; - - - for (k = 0; k < 4; k++) + /* read from the kenel output */ + if (ALIGNED(histogram,cl_uint4)) { - cl_int2 offset; - int polarity; + length = (MaxMap+1); + clEnv->library->clEnqueueMapBuffer(queue, histogramBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(cl_uint4), 0, NULL, NULL, &clStatus); + } + else + { + length = (MaxMap+1); + clStatus = clEnv->library->clEnqueueReadBuffer(queue, histogramBuffer, CL_TRUE, 0, length * sizeof(cl_uint4), histogram, 0, NULL, NULL); + } + if (clStatus != CL_SUCCESS) + { + (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "Reading output image from CL buffer failed.", "'%s'", "."); + goto cleanup; + } - - offset.s[0] = X[k]; - offset.s[1] = Y[k]; - polarity = 1; - clStatus = clSetKernelArg(hullPass1,4,sizeof(cl_int2),(void *)&offset); - clStatus|= clSetKernelArg(hullPass1,5,sizeof(int),(void *)&polarity); - clStatus|=clSetKernelArg(hullPass2,4,sizeof(cl_int2),(void *)&offset); - clStatus|=clSetKernelArg(hullPass2,5,sizeof(int),(void *)&polarity); + /* unmap, don't block gpu to use this buffer again. */ + if (ALIGNED(histogram,cl_uint4)) + { + clStatus = clEnv->library->clEnqueueUnmapMemObject(queue, histogramBuffer, histogram, 0, NULL, NULL); if (clStatus != CL_SUCCESS) { - (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clSetKernelArg failed.", "'%s'", "."); + (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueUnmapMemObject failed.", "'%s'", "."); goto cleanup; } - /* launch the kernel */ - clStatus = clEnqueueNDRangeKernel(queue, hullPass1, 2, NULL, global_work_size, NULL, 0, NULL, NULL); - if (clStatus != CL_SUCCESS) + } + + /* recreate input buffer later, in case image updated */ +#ifdef RECREATEBUFFER + if (inputImageBuffer!=NULL) + clEnv->library->clReleaseMemObject(inputImageBuffer); +#endif + + /* CPU stuff */ + equalize_map=(PixelPacket *) AcquireQuantumMemory(MaxMap+1UL, sizeof(*equalize_map)); + if (equalize_map == (PixelPacket *) NULL) + ThrowBinaryException(ResourceLimitWarning,"MemoryAllocationFailed", image->filename); + + map=(FloatPixelPacket *) AcquireQuantumMemory(MaxMap+1UL,sizeof(*map)); + if (map == (FloatPixelPacket *) NULL) + ThrowBinaryException(ResourceLimitWarning,"MemoryAllocationFailed", image->filename); + + /* + Integrate the histogram to get the equalization map. + */ + (void) ResetMagickMemory(&intensity,0,sizeof(intensity)); + for (i=0; i <= (ssize_t) MaxMap; i++) + { + if ((channel & SyncChannels) != 0) { - (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnqueueNDRangeKernel failed.", "'%s'", "."); - goto cleanup; - } - /* launch the kernel */ - clStatus = clEnqueueNDRangeKernel(queue, hullPass2, 2, NULL, global_work_size, NULL, 0, NULL, NULL); - if (clStatus != CL_SUCCESS) + intensity.red+=histogram[i].s[2]; + map[i]=intensity; + continue; + } + if ((channel & RedChannel) != 0) + intensity.red+=histogram[i].s[2]; + if ((channel & GreenChannel) != 0) + intensity.green+=histogram[i].s[1]; + if ((channel & BlueChannel) != 0) + intensity.blue+=histogram[i].s[0]; + if ((channel & OpacityChannel) != 0) + intensity.opacity+=histogram[i].s[3]; + /* + if (((channel & IndexChannel) != 0) && + (image->colorspace == CMYKColorspace)) { - (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnqueueNDRangeKernel failed.", "'%s'", "."); - goto cleanup; - } + intensity.index+=histogram[i].index; + } + */ + map[i]=intensity; + } + black=map[0]; + white=map[(int) MaxMap]; + (void) ResetMagickMemory(equalize_map,0,(MaxMap+1)*sizeof(*equalize_map)); + for (i=0; i <= (ssize_t) MaxMap; i++) + { + if ((channel & SyncChannels) != 0) + { + if (white.red != black.red) + equalize_map[i].red=ScaleMapToQuantum((MagickRealType) ((MaxMap* + (map[i].red-black.red))/(white.red-black.red))); + continue; + } + if (((channel & RedChannel) != 0) && (white.red != black.red)) + equalize_map[i].red=ScaleMapToQuantum((MagickRealType) ((MaxMap* + (map[i].red-black.red))/(white.red-black.red))); + if (((channel & GreenChannel) != 0) && (white.green != black.green)) + equalize_map[i].green=ScaleMapToQuantum((MagickRealType) ((MaxMap* + (map[i].green-black.green))/(white.green-black.green))); + if (((channel & BlueChannel) != 0) && (white.blue != black.blue)) + equalize_map[i].blue=ScaleMapToQuantum((MagickRealType) ((MaxMap* + (map[i].blue-black.blue))/(white.blue-black.blue))); + if (((channel & OpacityChannel) != 0) && (white.opacity != black.opacity)) + equalize_map[i].opacity=ScaleMapToQuantum((MagickRealType) ((MaxMap* + (map[i].opacity-black.opacity))/(white.opacity-black.opacity))); + /* + if ((((channel & IndexChannel) != 0) && + (image->colorspace == CMYKColorspace)) && + (white.index != black.index)) + equalize_map[i].index=ScaleMapToQuantum((MagickRealType) ((MaxMap* + (map[i].index-black.index))/(white.index-black.index))); + */ + } + + if (image->storage_class == PseudoClass) + { + /* + Equalize colormap. + */ + for (i=0; i < (ssize_t) image->colors; i++) + { + if ((channel & SyncChannels) != 0) + { + if (white.red != black.red) + { + image->colormap[i].red=equalize_map[ + ScaleQuantumToMap(image->colormap[i].red)].red; + image->colormap[i].green=equalize_map[ + ScaleQuantumToMap(image->colormap[i].green)].red; + image->colormap[i].blue=equalize_map[ + ScaleQuantumToMap(image->colormap[i].blue)].red; + image->colormap[i].opacity=equalize_map[ + ScaleQuantumToMap(image->colormap[i].opacity)].red; + } + continue; + } + if (((channel & RedChannel) != 0) && (white.red != black.red)) + image->colormap[i].red=equalize_map[ + ScaleQuantumToMap(image->colormap[i].red)].red; + if (((channel & GreenChannel) != 0) && (white.green != black.green)) + image->colormap[i].green=equalize_map[ + ScaleQuantumToMap(image->colormap[i].green)].green; + if (((channel & BlueChannel) != 0) && (white.blue != black.blue)) + image->colormap[i].blue=equalize_map[ + ScaleQuantumToMap(image->colormap[i].blue)].blue; + if (((channel & OpacityChannel) != 0) && + (white.opacity != black.opacity)) + image->colormap[i].opacity=equalize_map[ + ScaleQuantumToMap(image->colormap[i].opacity)].opacity; + } + } + /* + Equalize image. + */ - if (k == 0) - clStatus =clSetKernelArg(hullPass1,0,sizeof(cl_mem),(void *)(tempImageBuffer)); - offset.s[0] = -X[k]; - offset.s[1] = -Y[k]; - polarity = 1; - clStatus = clSetKernelArg(hullPass1,4,sizeof(cl_int2),(void *)&offset); - clStatus|= clSetKernelArg(hullPass1,5,sizeof(int),(void *)&polarity); - clStatus|=clSetKernelArg(hullPass2,4,sizeof(cl_int2),(void *)&offset); - clStatus|=clSetKernelArg(hullPass2,5,sizeof(int),(void *)&polarity); + /* GPU can work on this again, image and equalize map as input + image: uchar4 (CLPixelPacket) + equalize_map: uchar4 (PixelPacket) + black, white: float4 (FloatPixelPacket) */ + +#ifdef RECREATEBUFFER + /* If the host pointer is aligned to the size of CLPixelPacket, + then use the host buffer directly from the GPU; otherwise, + create a buffer on the GPU and copy the data over */ + if (ALIGNED(inputPixels,CLPixelPacket)) + { + mem_flags = CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR; + } + else + { + mem_flags = CL_MEM_READ_WRITE|CL_MEM_COPY_HOST_PTR; + } + /* create a CL buffer from image pixel buffer */ + length = inputImage->columns * inputImage->rows; + inputImageBuffer = clEnv->library->clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus); + if (clStatus != CL_SUCCESS) + { + (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.","."); + goto cleanup; + } +#endif + + /* Create and initialize OpenCL buffers. */ + if (ALIGNED(equalize_map, PixelPacket)) + { + mem_flags = CL_MEM_READ_ONLY|CL_MEM_USE_HOST_PTR; + hostPtr = equalize_map; + } + else + { + mem_flags = CL_MEM_READ_WRITE|CL_MEM_COPY_HOST_PTR; + hostPtr = equalize_map; + } + /* create a CL buffer for eqaulize_map */ + length = (MaxMap+1); + equalizeMapBuffer = clEnv->library->clCreateBuffer(context, mem_flags, length * sizeof(PixelPacket), hostPtr, &clStatus); + if (clStatus != CL_SUCCESS) + { + (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.","."); + goto cleanup; + } + + /* get the OpenCL kernel */ + equalizeKernel = AcquireOpenCLKernel(clEnv, MAGICK_OPENCL_ACCELERATE, "Equalize"); + if (equalizeKernel == NULL) + { + (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "AcquireOpenCLKernel failed.", "'%s'", "."); + goto cleanup; + } + + /* set the kernel arguments */ + i = 0; + clStatus=clEnv->library->clSetKernelArg(equalizeKernel,i++,sizeof(cl_mem),(void *)&inputImageBuffer); + clStatus|=clEnv->library->clSetKernelArg(equalizeKernel,i++,sizeof(ChannelType),&channel); + clStatus|=clEnv->library->clSetKernelArg(equalizeKernel,i++,sizeof(cl_mem),(void *)&equalizeMapBuffer); + clStatus|=clEnv->library->clSetKernelArg(equalizeKernel,i++,sizeof(FloatPixelPacket),&white); + clStatus|=clEnv->library->clSetKernelArg(equalizeKernel,i++,sizeof(FloatPixelPacket),&black); + if (clStatus != CL_SUCCESS) + { + (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", "'%s'", "."); + goto cleanup; + } + + /* launch the kernel */ + global_work_size[0] = inputImage->columns; + global_work_size[1] = inputImage->rows; + + clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, equalizeKernel, 2, NULL, global_work_size, NULL, 0, NULL, NULL); + + if (clStatus != CL_SUCCESS) + { + (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", "'%s'", "."); + goto cleanup; + } + clEnv->library->clFlush(queue); + + /* read the data back */ + if (ALIGNED(inputPixels,CLPixelPacket)) + { + length = inputImage->columns * inputImage->rows; + clEnv->library->clEnqueueMapBuffer(queue, inputImageBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(CLPixelPacket), 0, NULL, NULL, &clStatus); + } + else + { + length = inputImage->columns * inputImage->rows; + clStatus = clEnv->library->clEnqueueReadBuffer(queue, inputImageBuffer, CL_TRUE, 0, length * sizeof(CLPixelPacket), inputPixels, 0, NULL, NULL); + } + if (clStatus != CL_SUCCESS) + { + (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "Reading output image from CL buffer failed.", "'%s'", "."); + goto cleanup; + } + + outputReady = MagickTrue; + +cleanup: + OpenCLLogException(__FUNCTION__,__LINE__,exception); + + if (inputPixels) { + /*ReleasePixelCachePixels();*/ + inputPixels = NULL; + } + + if (inputImageBuffer!=NULL) + clEnv->library->clReleaseMemObject(inputImageBuffer); + + if (map!=NULL) + map=(FloatPixelPacket *) RelinquishMagickMemory(map); + + if (equalizeMapBuffer!=NULL) + clEnv->library->clReleaseMemObject(equalizeMapBuffer); + if (equalize_map!=NULL) + equalize_map=(PixelPacket *) RelinquishMagickMemory(equalize_map); + + if (histogramBuffer!=NULL) + clEnv->library->clReleaseMemObject(histogramBuffer); + if (histogram!=NULL) + histogram=(cl_uint4 *) RelinquishMagickMemory(histogram); + + if (histogramKernel!=NULL) + RelinquishOpenCLKernel(clEnv, histogramKernel); + if (equalizeKernel!=NULL) + RelinquishOpenCLKernel(clEnv, equalizeKernel); + + if (queue != NULL) + RelinquishOpenCLCommandQueue(clEnv, queue); + + return outputReady; +} + +/* +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +% % +% % +% % +% E q u a l i z e I m a g e w i t h O p e n C L % +% % +% % +% % +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +% +% EqualizeImage() applies a histogram equalization to the image. +% +% The format of the EqualizeImage method is: +% +% MagickBooleanType EqualizeImage(Image *image) +% MagickBooleanType EqualizeImageChannel(Image *image, +% const ChannelType channel) +% +% A description of each parameter follows: +% +% o image: the image. +% +% o channel: the channel. +% +*/ + + +MagickExport +MagickBooleanType AccelerateEqualizeImage(Image* image, const ChannelType channel, ExceptionInfo* exception) +{ + MagickBooleanType status; + + assert(image != NULL); + assert(exception != NULL); + + status = checkOpenCLEnvironment(exception); + if (status == MagickFalse) + return MagickFalse; + + status = checkAccelerateCondition(image, channel); + if (status == MagickFalse) + return MagickFalse; + + status = checkHistogramCondition(image, channel); + if (status == MagickFalse) + return MagickFalse; + + status = ComputeEqualizeImage(image,channel,exception); + return status; +} + + + +MagickExport MagickBooleanType ComputeContrastStretchImageChannel(Image *image, + const ChannelType channel,const double black_point,const double white_point, + ExceptionInfo * _exception) +{ +#define MaxRange(color) ((MagickRealType) ScaleQuantumToMap((Quantum) (color))) +#define ContrastStretchImageTag "ContrastStretch/Image" + + ExceptionInfo + *exception=_exception; + + double + intensity; + + FloatPixelPacket + black, + white; + + cl_uint4 + *histogram=NULL; + + PixelPacket + *stretch_map=NULL; + + register ssize_t + i; + + Image * inputImage; + + MagickBooleanType outputReady; + + MagickCLEnv clEnv; + + cl_int clStatus; + MagickBooleanType status; + + size_t global_work_size[2]; + + void *inputPixels; + cl_mem_flags mem_flags; + + cl_context context; + cl_mem inputImageBuffer; + cl_mem histogramBuffer; + cl_mem stretchMapBuffer; + cl_kernel histogramKernel; + cl_kernel stretchKernel; + cl_command_queue queue; + + void* hostPtr; + + MagickSizeType length; + + inputImage = image; + inputPixels = NULL; + inputImageBuffer = NULL; + histogramBuffer = NULL; + stretchMapBuffer = NULL; + histogramKernel = NULL; + stretchKernel = NULL; + context = NULL; + queue = NULL; + outputReady = MagickFalse; + + + assert(image != (Image *) NULL); + assert(image->signature == MagickSignature); + if (image->debug != MagickFalse) + (void) LogMagickEvent(TraceEvent,GetMagickModule(),"%s",image->filename); + + //exception=(&image->exception); + + /* + * initialize opencl env + */ + clEnv = GetDefaultOpenCLEnv(); + context = GetOpenCLContext(clEnv); + queue = AcquireOpenCLCommandQueue(clEnv); + + /* + Allocate and initialize histogram arrays. + */ + histogram=(cl_uint4 *) AcquireQuantumMemory(MaxMap+1UL, sizeof(*histogram)); + + if ((histogram == (cl_uint4 *) NULL)) + ThrowBinaryException(ResourceLimitError,"MemoryAllocationFailed", image->filename); + + /* reset histogram */ + (void) ResetMagickMemory(histogram,0,(MaxMap+1)*sizeof(*histogram)); + + /* + if (IsGrayImage(image,exception) != MagickFalse) + (void) SetImageColorspace(image,GRAYColorspace); + */ + + status=MagickTrue; + + + /* + Form histogram. + */ + /* Create and initialize OpenCL buffers. */ + /* inputPixels = AcquirePixelCachePixels(inputImage, &length, exception); */ + /* assume this will get a writable image */ + inputPixels = GetPixelCachePixels(inputImage, &length, exception); + + if (inputPixels == (void *) NULL) + { + (void) OpenCLThrowMagickException(exception,GetMagickModule(),CacheWarning,"UnableToReadPixelCache.","`%s'",inputImage->filename); + goto cleanup; + } + /* If the host pointer is aligned to the size of CLPixelPacket, + then use the host buffer directly from the GPU; otherwise, + create a buffer on the GPU and copy the data over */ + if (ALIGNED(inputPixels,CLPixelPacket)) + { + mem_flags = CL_MEM_READ_ONLY|CL_MEM_USE_HOST_PTR; + } + else + { + mem_flags = CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR; + } + /* create a CL buffer from image pixel buffer */ + length = inputImage->columns * inputImage->rows; + inputImageBuffer = clEnv->library->clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus); + if (clStatus != CL_SUCCESS) + { + (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.","."); + goto cleanup; + } + + /* If the host pointer is aligned to the size of cl_uint, + then use the host buffer directly from the GPU; otherwise, + create a buffer on the GPU and copy the data over */ + if (ALIGNED(histogram,cl_uint4)) + { + mem_flags = CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR; + hostPtr = histogram; + } + else + { + mem_flags = CL_MEM_READ_WRITE|CL_MEM_COPY_HOST_PTR; + hostPtr = histogram; + } + /* create a CL buffer for histogram */ + length = (MaxMap+1); + histogramBuffer = clEnv->library->clCreateBuffer(context, mem_flags, length * sizeof(cl_uint4), hostPtr, &clStatus); + if (clStatus != CL_SUCCESS) + { + (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.","."); + goto cleanup; + } + + status = LaunchHistogramKernel(clEnv, queue, inputImageBuffer, histogramBuffer, image, channel, exception); + if (status == MagickFalse) + goto cleanup; + + /* read from the kenel output */ + if (ALIGNED(histogram,cl_uint4)) + { + length = (MaxMap+1); + clEnv->library->clEnqueueMapBuffer(queue, histogramBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(cl_uint4), 0, NULL, NULL, &clStatus); + } + else + { + length = (MaxMap+1); + clStatus = clEnv->library->clEnqueueReadBuffer(queue, histogramBuffer, CL_TRUE, 0, length * sizeof(cl_uint4), histogram, 0, NULL, NULL); + } + if (clStatus != CL_SUCCESS) + { + (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "Reading output image from CL buffer failed.", "'%s'", "."); + goto cleanup; + } + + /* unmap, don't block gpu to use this buffer again. */ + if (ALIGNED(histogram,cl_uint4)) + { + clStatus = clEnv->library->clEnqueueUnmapMemObject(queue, histogramBuffer, histogram, 0, NULL, NULL); + if (clStatus != CL_SUCCESS) + { + (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueUnmapMemObject failed.", "'%s'", "."); + goto cleanup; + } + } + + /* recreate input buffer later, in case image updated */ +#ifdef RECREATEBUFFER + if (inputImageBuffer!=NULL) + clEnv->library->clReleaseMemObject(inputImageBuffer); +#endif + + /* CPU stuff */ + /* + Find the histogram boundaries by locating the black/white levels. + */ + black.red=0.0; + white.red=MaxRange(QuantumRange); + if ((channel & RedChannel) != 0) + { + intensity=0.0; + for (i=0; i <= (ssize_t) MaxMap; i++) + { + intensity+=histogram[i].s[2]; + if (intensity > black_point) + break; + } + black.red=(MagickRealType) i; + intensity=0.0; + for (i=(ssize_t) MaxMap; i != 0; i--) + { + intensity+=histogram[i].s[2]; + if (intensity > ((double) image->columns*image->rows-white_point)) + break; + } + white.red=(MagickRealType) i; + } + black.green=0.0; + white.green=MaxRange(QuantumRange); + if ((channel & GreenChannel) != 0) + { + intensity=0.0; + for (i=0; i <= (ssize_t) MaxMap; i++) + { + intensity+=histogram[i].s[2]; + if (intensity > black_point) + break; + } + black.green=(MagickRealType) i; + intensity=0.0; + for (i=(ssize_t) MaxMap; i != 0; i--) + { + intensity+=histogram[i].s[2]; + if (intensity > ((double) image->columns*image->rows-white_point)) + break; + } + white.green=(MagickRealType) i; + } + black.blue=0.0; + white.blue=MaxRange(QuantumRange); + if ((channel & BlueChannel) != 0) + { + intensity=0.0; + for (i=0; i <= (ssize_t) MaxMap; i++) + { + intensity+=histogram[i].s[2]; + if (intensity > black_point) + break; + } + black.blue=(MagickRealType) i; + intensity=0.0; + for (i=(ssize_t) MaxMap; i != 0; i--) + { + intensity+=histogram[i].s[2]; + if (intensity > ((double) image->columns*image->rows-white_point)) + break; + } + white.blue=(MagickRealType) i; + } + black.opacity=0.0; + white.opacity=MaxRange(QuantumRange); + if ((channel & OpacityChannel) != 0) + { + intensity=0.0; + for (i=0; i <= (ssize_t) MaxMap; i++) + { + intensity+=histogram[i].s[2]; + if (intensity > black_point) + break; + } + black.opacity=(MagickRealType) i; + intensity=0.0; + for (i=(ssize_t) MaxMap; i != 0; i--) + { + intensity+=histogram[i].s[2]; + if (intensity > ((double) image->columns*image->rows-white_point)) + break; + } + white.opacity=(MagickRealType) i; + } + /* + black.index=0.0; + white.index=MaxRange(QuantumRange); + if (((channel & IndexChannel) != 0) && (image->colorspace == CMYKColorspace)) + { + intensity=0.0; + for (i=0; i <= (ssize_t) MaxMap; i++) + { + intensity+=histogram[i].index; + if (intensity > black_point) + break; + } + black.index=(MagickRealType) i; + intensity=0.0; + for (i=(ssize_t) MaxMap; i != 0; i--) + { + intensity+=histogram[i].index; + if (intensity > ((double) image->columns*image->rows-white_point)) + break; + } + white.index=(MagickRealType) i; + } + */ + + + stretch_map=(PixelPacket *) AcquireQuantumMemory(MaxMap+1UL, + sizeof(*stretch_map)); + + if ((stretch_map == (PixelPacket *) NULL)) + ThrowBinaryException(ResourceLimitError,"MemoryAllocationFailed", + image->filename); + + /* + Stretch the histogram to create the stretched image mapping. + */ + (void) ResetMagickMemory(stretch_map,0,(MaxMap+1)*sizeof(*stretch_map)); + for (i=0; i <= (ssize_t) MaxMap; i++) + { + if ((channel & RedChannel) != 0) + { + if (i < (ssize_t) black.red) + stretch_map[i].red=(Quantum) 0; + else + if (i > (ssize_t) white.red) + stretch_map[i].red=QuantumRange; + else + if (black.red != white.red) + stretch_map[i].red=ScaleMapToQuantum((MagickRealType) (MaxMap* + (i-black.red)/(white.red-black.red))); + } + if ((channel & GreenChannel) != 0) + { + if (i < (ssize_t) black.green) + stretch_map[i].green=0; + else + if (i > (ssize_t) white.green) + stretch_map[i].green=QuantumRange; + else + if (black.green != white.green) + stretch_map[i].green=ScaleMapToQuantum((MagickRealType) (MaxMap* + (i-black.green)/(white.green-black.green))); + } + if ((channel & BlueChannel) != 0) + { + if (i < (ssize_t) black.blue) + stretch_map[i].blue=0; + else + if (i > (ssize_t) white.blue) + stretch_map[i].blue= QuantumRange; + else + if (black.blue != white.blue) + stretch_map[i].blue=ScaleMapToQuantum((MagickRealType) (MaxMap* + (i-black.blue)/(white.blue-black.blue))); + } + if ((channel & OpacityChannel) != 0) + { + if (i < (ssize_t) black.opacity) + stretch_map[i].opacity=0; + else + if (i > (ssize_t) white.opacity) + stretch_map[i].opacity=QuantumRange; + else + if (black.opacity != white.opacity) + stretch_map[i].opacity=ScaleMapToQuantum((MagickRealType) (MaxMap* + (i-black.opacity)/(white.opacity-black.opacity))); + } + /* + if (((channel & IndexChannel) != 0) && + (image->colorspace == CMYKColorspace)) + { + if (i < (ssize_t) black.index) + stretch_map[i].index=0; + else + if (i > (ssize_t) white.index) + stretch_map[i].index=QuantumRange; + else + if (black.index != white.index) + stretch_map[i].index=ScaleMapToQuantum((MagickRealType) (MaxMap* + (i-black.index)/(white.index-black.index))); + } + */ + } + + /* + Stretch the image. + */ + if (((channel & OpacityChannel) != 0) || (((channel & IndexChannel) != 0) && + (image->colorspace == CMYKColorspace))) + image->storage_class=DirectClass; + if (image->storage_class == PseudoClass) + { + /* + Stretch colormap. + */ + for (i=0; i < (ssize_t) image->colors; i++) + { + if ((channel & RedChannel) != 0) + { + if (black.red != white.red) + image->colormap[i].red=stretch_map[ + ScaleQuantumToMap(image->colormap[i].red)].red; + } + if ((channel & GreenChannel) != 0) + { + if (black.green != white.green) + image->colormap[i].green=stretch_map[ + ScaleQuantumToMap(image->colormap[i].green)].green; + } + if ((channel & BlueChannel) != 0) + { + if (black.blue != white.blue) + image->colormap[i].blue=stretch_map[ + ScaleQuantumToMap(image->colormap[i].blue)].blue; + } + if ((channel & OpacityChannel) != 0) + { + if (black.opacity != white.opacity) + image->colormap[i].opacity=stretch_map[ + ScaleQuantumToMap(image->colormap[i].opacity)].opacity; + } + } + } + + /* + Stretch image. + */ + + + /* GPU can work on this again, image and equalize map as input + image: uchar4 (CLPixelPacket) + stretch_map: uchar4 (PixelPacket) + black, white: float4 (FloatPixelPacket) */ + +#ifdef RECREATEBUFFER + /* If the host pointer is aligned to the size of CLPixelPacket, + then use the host buffer directly from the GPU; otherwise, + create a buffer on the GPU and copy the data over */ + if (ALIGNED(inputPixels,CLPixelPacket)) + { + mem_flags = CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR; + } + else + { + mem_flags = CL_MEM_READ_WRITE|CL_MEM_COPY_HOST_PTR; + } + /* create a CL buffer from image pixel buffer */ + length = inputImage->columns * inputImage->rows; + inputImageBuffer = clEnv->library->clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus); + if (clStatus != CL_SUCCESS) + { + (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.","."); + goto cleanup; + } +#endif + + /* Create and initialize OpenCL buffers. */ + if (ALIGNED(stretch_map, PixelPacket)) + { + mem_flags = CL_MEM_READ_ONLY|CL_MEM_USE_HOST_PTR; + hostPtr = stretch_map; + } + else + { + mem_flags = CL_MEM_READ_WRITE|CL_MEM_COPY_HOST_PTR; + hostPtr = stretch_map; + } + /* create a CL buffer for stretch_map */ + length = (MaxMap+1); + stretchMapBuffer = clEnv->library->clCreateBuffer(context, mem_flags, length * sizeof(PixelPacket), hostPtr, &clStatus); + if (clStatus != CL_SUCCESS) + { + (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.","."); + goto cleanup; + } + + /* get the OpenCL kernel */ + stretchKernel = AcquireOpenCLKernel(clEnv, MAGICK_OPENCL_ACCELERATE, "Stretch"); + if (stretchKernel == NULL) + { + (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "AcquireOpenCLKernel failed.", "'%s'", "."); + goto cleanup; + } + + /* set the kernel arguments */ + i = 0; + clStatus=clEnv->library->clSetKernelArg(stretchKernel,i++,sizeof(cl_mem),(void *)&inputImageBuffer); + clStatus|=clEnv->library->clSetKernelArg(stretchKernel,i++,sizeof(ChannelType),&channel); + clStatus|=clEnv->library->clSetKernelArg(stretchKernel,i++,sizeof(cl_mem),(void *)&stretchMapBuffer); + clStatus|=clEnv->library->clSetKernelArg(stretchKernel,i++,sizeof(FloatPixelPacket),&white); + clStatus|=clEnv->library->clSetKernelArg(stretchKernel,i++,sizeof(FloatPixelPacket),&black); + if (clStatus != CL_SUCCESS) + { + (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", "'%s'", "."); + goto cleanup; + } + + /* launch the kernel */ + global_work_size[0] = inputImage->columns; + global_work_size[1] = inputImage->rows; + + clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, stretchKernel, 2, NULL, global_work_size, NULL, 0, NULL, NULL); + + if (clStatus != CL_SUCCESS) + { + (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", "'%s'", "."); + goto cleanup; + } + clEnv->library->clFlush(queue); + + /* read the data back */ + if (ALIGNED(inputPixels,CLPixelPacket)) + { + length = inputImage->columns * inputImage->rows; + clEnv->library->clEnqueueMapBuffer(queue, inputImageBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(CLPixelPacket), 0, NULL, NULL, &clStatus); + } + else + { + length = inputImage->columns * inputImage->rows; + clStatus = clEnv->library->clEnqueueReadBuffer(queue, inputImageBuffer, CL_TRUE, 0, length * sizeof(CLPixelPacket), inputPixels, 0, NULL, NULL); + } + if (clStatus != CL_SUCCESS) + { + (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "Reading output image from CL buffer failed.", "'%s'", "."); + goto cleanup; + } + + outputReady = MagickTrue; + +cleanup: + OpenCLLogException(__FUNCTION__,__LINE__,exception); + + if (inputPixels) { + /*ReleasePixelCachePixels();*/ + inputPixels = NULL; + } + + if (inputImageBuffer!=NULL) + clEnv->library->clReleaseMemObject(inputImageBuffer); + + if (stretchMapBuffer!=NULL) + clEnv->library->clReleaseMemObject(stretchMapBuffer); + if (stretch_map!=NULL) + stretch_map=(PixelPacket *) RelinquishMagickMemory(stretch_map); + + + if (histogramBuffer!=NULL) + clEnv->library->clReleaseMemObject(histogramBuffer); + if (histogram!=NULL) + histogram=(cl_uint4 *) RelinquishMagickMemory(histogram); + + + if (histogramKernel!=NULL) + RelinquishOpenCLKernel(clEnv, histogramKernel); + if (stretchKernel!=NULL) + RelinquishOpenCLKernel(clEnv, stretchKernel); + + if (queue != NULL) + RelinquishOpenCLCommandQueue(clEnv, queue); + + return outputReady; +} + + +/* +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +% % +% % +% % +% C o n t r a s t S t r e t c h I m a g e w i t h O p e n C L % +% % +% % +% % +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +% +% ContrastStretchImage() is a simple image enhancement technique that attempts +% to improve the contrast in an image by `stretching' the range of intensity +% values it contains to span a desired range of values. It differs from the +% more sophisticated histogram equalization in that it can only apply a +% linear scaling function to the image pixel values. As a result the +% `enhancement' is less harsh. +% +% The format of the ContrastStretchImage method is: +% +% MagickBooleanType ContrastStretchImage(Image *image, +% const char *levels) +% MagickBooleanType ContrastStretchImageChannel(Image *image, +% const size_t channel,const double black_point, +% const double white_point) +% +% A description of each parameter follows: +% +% o image: the image. +% +% o channel: the channel. +% +% o black_point: the black point. +% +% o white_point: the white point. +% +% o levels: Specify the levels where the black and white points have the +% range of 0 to number-of-pixels (e.g. 1%, 10x90%, etc.). +% +*/ + +MagickExport MagickBooleanType AccelerateContrastStretchImageChannel( + Image * image, const ChannelType channel, const double black_point, const double white_point, + ExceptionInfo* exception) +{ + MagickBooleanType status; + + assert(image != NULL); + assert(exception != NULL); + + status = checkOpenCLEnvironment(exception); + if (status == MagickFalse) + return MagickFalse; + + status = checkAccelerateCondition(image, channel); + if (status == MagickFalse) + return MagickFalse; + + status = checkHistogramCondition(image, channel); + if (status == MagickFalse) + return MagickFalse; + + status = ComputeContrastStretchImageChannel(image,channel, black_point, white_point, exception); + + return status; +} + + +static Image* ComputeDespeckleImage(const Image* inputImage, ExceptionInfo* exception) +{ + + MagickBooleanType outputReady = MagickFalse; + MagickCLEnv clEnv = NULL; + + cl_int clStatus; + size_t global_work_size[2]; + + const void *inputPixels = NULL; + Image* filteredImage = NULL; + void *filteredPixels = NULL; + void *hostPtr; + MagickSizeType length; + + cl_mem_flags mem_flags; + cl_context context = NULL; + cl_mem inputImageBuffer = NULL; + cl_mem tempImageBuffer[2]; + cl_mem filteredImageBuffer = NULL; + cl_command_queue queue = NULL; + cl_kernel hullPass1 = NULL; + cl_kernel hullPass2 = NULL; + + unsigned int imageWidth, imageHeight; + int matte; + int k; + + static const int + X[4] = {0, 1, 1,-1}, + Y[4] = {1, 0, 1, 1}; + + tempImageBuffer[0] = tempImageBuffer[1] = NULL; + clEnv = GetDefaultOpenCLEnv(); + context = GetOpenCLContext(clEnv); + queue = AcquireOpenCLCommandQueue(clEnv); + + inputPixels = AcquirePixelCachePixels(inputImage, &length, exception); + if (inputPixels == (void *) NULL) + { + (void) OpenCLThrowMagickException(exception,GetMagickModule(),CacheWarning,"UnableToReadPixelCache.","`%s'",inputImage->filename); + goto cleanup; + } + + if (ALIGNED(inputPixels,CLPixelPacket)) + { + mem_flags = CL_MEM_READ_ONLY|CL_MEM_USE_HOST_PTR; + } + else + { + mem_flags = CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR; + } + /* create a CL buffer from image pixel buffer */ + length = inputImage->columns * inputImage->rows; + inputImageBuffer = clEnv->library->clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus); + if (clStatus != CL_SUCCESS) + { + (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.","."); + goto cleanup; + } + + mem_flags = CL_MEM_READ_WRITE; + length = inputImage->columns * inputImage->rows; + for (k = 0; k < 2; k++) + { + tempImageBuffer[k] = clEnv->library->clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), NULL, &clStatus); + if (clStatus != CL_SUCCESS) + { + (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.","."); + goto cleanup; + } + } + + filteredImage = CloneImage(inputImage,inputImage->columns,inputImage->rows,MagickTrue,exception); + assert(filteredImage != NULL); + if (SetImageStorageClass(filteredImage,DirectClass) != MagickTrue) + { + (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "CloneImage failed.", "'%s'", "."); + goto cleanup; + } + filteredPixels = GetPixelCachePixels(filteredImage, &length, exception); + if (filteredPixels == (void *) NULL) + { + (void) OpenCLThrowMagickException(exception,GetMagickModule(),CacheWarning, "UnableToReadPixelCache.","`%s'",filteredImage->filename); + goto cleanup; + } + + if (ALIGNED(filteredPixels,CLPixelPacket)) + { + mem_flags = CL_MEM_WRITE_ONLY|CL_MEM_USE_HOST_PTR; + hostPtr = filteredPixels; + } + else + { + mem_flags = CL_MEM_WRITE_ONLY; + hostPtr = NULL; + } + /* create a CL buffer from image pixel buffer */ + length = inputImage->columns * inputImage->rows; + filteredImageBuffer = clEnv->library->clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), hostPtr, &clStatus); + if (clStatus != CL_SUCCESS) + { + (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.","."); + goto cleanup; + } + + hullPass1 = AcquireOpenCLKernel(clEnv, MAGICK_OPENCL_ACCELERATE, "HullPass1"); + hullPass2 = AcquireOpenCLKernel(clEnv, MAGICK_OPENCL_ACCELERATE, "HullPass2"); + + clStatus =clEnv->library->clSetKernelArg(hullPass1,0,sizeof(cl_mem),(void *)&inputImageBuffer); + clStatus |=clEnv->library->clSetKernelArg(hullPass1,1,sizeof(cl_mem),(void *)(tempImageBuffer+1)); + imageWidth = inputImage->columns; + clStatus |=clEnv->library->clSetKernelArg(hullPass1,2,sizeof(unsigned int),(void *)&imageWidth); + imageHeight = inputImage->rows; + clStatus |=clEnv->library->clSetKernelArg(hullPass1,3,sizeof(unsigned int),(void *)&imageHeight); + matte = (inputImage->matte==MagickFalse)?0:1; + clStatus |=clEnv->library->clSetKernelArg(hullPass1,6,sizeof(int),(void *)&matte); + if (clStatus != CL_SUCCESS) + { + (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", "'%s'", "."); + goto cleanup; + } + + clStatus = clEnv->library->clSetKernelArg(hullPass2,0,sizeof(cl_mem),(void *)(tempImageBuffer+1)); + clStatus |=clEnv->library->clSetKernelArg(hullPass2,1,sizeof(cl_mem),(void *)tempImageBuffer); + imageWidth = inputImage->columns; + clStatus |=clEnv->library->clSetKernelArg(hullPass2,2,sizeof(unsigned int),(void *)&imageWidth); + imageHeight = inputImage->rows; + clStatus |=clEnv->library->clSetKernelArg(hullPass2,3,sizeof(unsigned int),(void *)&imageHeight); + matte = (inputImage->matte==MagickFalse)?0:1; + clStatus |=clEnv->library->clSetKernelArg(hullPass2,6,sizeof(int),(void *)&matte); + if (clStatus != CL_SUCCESS) + { + (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", "'%s'", "."); + goto cleanup; + } + + + global_work_size[0] = inputImage->columns; + global_work_size[1] = inputImage->rows; + + + for (k = 0; k < 4; k++) + { + cl_int2 offset; + int polarity; + + + offset.s[0] = X[k]; + offset.s[1] = Y[k]; + polarity = 1; + clStatus = clEnv->library->clSetKernelArg(hullPass1,4,sizeof(cl_int2),(void *)&offset); + clStatus|= clEnv->library->clSetKernelArg(hullPass1,5,sizeof(int),(void *)&polarity); + clStatus|=clEnv->library->clSetKernelArg(hullPass2,4,sizeof(cl_int2),(void *)&offset); + clStatus|=clEnv->library->clSetKernelArg(hullPass2,5,sizeof(int),(void *)&polarity); + if (clStatus != CL_SUCCESS) + { + (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", "'%s'", "."); + goto cleanup; + } + /* launch the kernel */ + clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, hullPass1, 2, NULL, global_work_size, NULL, 0, NULL, NULL); + if (clStatus != CL_SUCCESS) + { + (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", "'%s'", "."); + goto cleanup; + } + /* launch the kernel */ + clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, hullPass2, 2, NULL, global_work_size, NULL, 0, NULL, NULL); + if (clStatus != CL_SUCCESS) + { + (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", "'%s'", "."); + goto cleanup; + } + + + if (k == 0) + clStatus =clEnv->library->clSetKernelArg(hullPass1,0,sizeof(cl_mem),(void *)(tempImageBuffer)); + offset.s[0] = -X[k]; + offset.s[1] = -Y[k]; + polarity = 1; + clStatus = clEnv->library->clSetKernelArg(hullPass1,4,sizeof(cl_int2),(void *)&offset); + clStatus|= clEnv->library->clSetKernelArg(hullPass1,5,sizeof(int),(void *)&polarity); + clStatus|=clEnv->library->clSetKernelArg(hullPass2,4,sizeof(cl_int2),(void *)&offset); + clStatus|=clEnv->library->clSetKernelArg(hullPass2,5,sizeof(int),(void *)&polarity); + if (clStatus != CL_SUCCESS) + { + (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", "'%s'", "."); + goto cleanup; + } + /* launch the kernel */ + clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, hullPass1, 2, NULL, global_work_size, NULL, 0, NULL, NULL); + if (clStatus != CL_SUCCESS) + { + (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", "'%s'", "."); + goto cleanup; + } + /* launch the kernel */ + clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, hullPass2, 2, NULL, global_work_size, NULL, 0, NULL, NULL); + if (clStatus != CL_SUCCESS) + { + (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", "'%s'", "."); + goto cleanup; + } + + offset.s[0] = -X[k]; + offset.s[1] = -Y[k]; + polarity = -1; + clStatus = clEnv->library->clSetKernelArg(hullPass1,4,sizeof(cl_int2),(void *)&offset); + clStatus|= clEnv->library->clSetKernelArg(hullPass1,5,sizeof(int),(void *)&polarity); + clStatus|=clEnv->library->clSetKernelArg(hullPass2,4,sizeof(cl_int2),(void *)&offset); + clStatus|=clEnv->library->clSetKernelArg(hullPass2,5,sizeof(int),(void *)&polarity); + if (clStatus != CL_SUCCESS) + { + (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", "'%s'", "."); + goto cleanup; + } + /* launch the kernel */ + clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, hullPass1, 2, NULL, global_work_size, NULL, 0, NULL, NULL); + if (clStatus != CL_SUCCESS) + { + (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", "'%s'", "."); + goto cleanup; + } + /* launch the kernel */ + clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, hullPass2, 2, NULL, global_work_size, NULL, 0, NULL, NULL); + if (clStatus != CL_SUCCESS) + { + (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", "'%s'", "."); + goto cleanup; + } + + offset.s[0] = X[k]; + offset.s[1] = Y[k]; + polarity = -1; + clStatus = clEnv->library->clSetKernelArg(hullPass1,4,sizeof(cl_int2),(void *)&offset); + clStatus|= clEnv->library->clSetKernelArg(hullPass1,5,sizeof(int),(void *)&polarity); + clStatus|=clEnv->library->clSetKernelArg(hullPass2,4,sizeof(cl_int2),(void *)&offset); + clStatus|=clEnv->library->clSetKernelArg(hullPass2,5,sizeof(int),(void *)&polarity); + + if (k == 3) + clStatus |=clEnv->library->clSetKernelArg(hullPass2,1,sizeof(cl_mem),(void *)&filteredImageBuffer); + + if (clStatus != CL_SUCCESS) + { + (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", "'%s'", "."); + goto cleanup; + } + /* launch the kernel */ + clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, hullPass1, 2, NULL, global_work_size, NULL, 0, NULL, NULL); + if (clStatus != CL_SUCCESS) + { + (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", "'%s'", "."); + goto cleanup; + } + /* launch the kernel */ + clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, hullPass2, 2, NULL, global_work_size, NULL, 0, NULL, NULL); + if (clStatus != CL_SUCCESS) + { + (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", "'%s'", "."); + goto cleanup; + } + } + + if (ALIGNED(filteredPixels,CLPixelPacket)) + { + length = inputImage->columns * inputImage->rows; + clEnv->library->clEnqueueMapBuffer(queue, filteredImageBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(CLPixelPacket), 0, NULL, NULL, &clStatus); + } + else + { + length = inputImage->columns * inputImage->rows; + clStatus = clEnv->library->clEnqueueReadBuffer(queue, filteredImageBuffer, CL_TRUE, 0, length * sizeof(CLPixelPacket), filteredPixels, 0, NULL, NULL); + } + if (clStatus != CL_SUCCESS) + { + (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "Reading output image from CL buffer failed.", "'%s'", "."); + goto cleanup; + } + + outputReady = MagickTrue; + +cleanup: + OpenCLLogException(__FUNCTION__,__LINE__,exception); + + if (queue != NULL) RelinquishOpenCLCommandQueue(clEnv, queue); + if (inputImageBuffer!=NULL) clEnv->library->clReleaseMemObject(inputImageBuffer); + for (k = 0; k < 2; k++) + { + if (tempImageBuffer[k]!=NULL) clEnv->library->clReleaseMemObject(tempImageBuffer[k]); + } + if (filteredImageBuffer!=NULL) clEnv->library->clReleaseMemObject(filteredImageBuffer); + if (hullPass1!=NULL) RelinquishOpenCLKernel(clEnv, hullPass1); + if (hullPass2!=NULL) RelinquishOpenCLKernel(clEnv, hullPass2); + if (outputReady == MagickFalse) + { + if (filteredImage != NULL) + { + DestroyImage(filteredImage); + filteredImage = NULL; + } + } + return filteredImage; +} + +/* +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +% % +% % +% % +% D e s p e c k l e I m a g e w i t h O p e n C L % +% % +% % +% % +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +% +% DespeckleImage() reduces the speckle noise in an image while perserving the +% edges of the original image. A speckle removing filter uses a complementary +% hulling technique (raising pixels that are darker than their surrounding +% neighbors, then complementarily lowering pixels that are brighter than their +% surrounding neighbors) to reduce the speckle index of that image (reference +% Crimmins speckle removal). +% +% The format of the DespeckleImage method is: +% +% Image *DespeckleImage(const Image *image,ExceptionInfo *exception) +% +% A description of each parameter follows: +% +% o image: the image. +% +% o exception: return any errors or warnings in this structure. +% +*/ + +MagickExport +Image* AccelerateDespeckleImage(const Image* image, ExceptionInfo* exception) +{ + MagickBooleanType status; + Image* newImage = NULL; + + assert(image != NULL); + assert(exception != NULL); + + status = checkOpenCLEnvironment(exception); + if (status == MagickFalse) + return NULL; + + status = checkAccelerateCondition(image, AllChannels); + if (status == MagickFalse) + return NULL; + + newImage = ComputeDespeckleImage(image,exception); + return newImage; +} + +static Image* ComputeAddNoiseImage(const Image* inputImage, + const ChannelType channel, const NoiseType noise_type, + ExceptionInfo *exception) +{ + MagickBooleanType outputReady = MagickFalse; + MagickCLEnv clEnv = NULL; + + cl_int clStatus; + size_t global_work_size[2]; + + const void *inputPixels = NULL; + Image* filteredImage = NULL; + void *filteredPixels = NULL; + void *hostPtr; + unsigned int inputColumns, inputRows; + float attenuate; + float *randomNumberBufferPtr = NULL; + MagickSizeType length; + unsigned int numRandomNumberPerPixel; + unsigned int numRowsPerKernelLaunch; + unsigned int numRandomNumberPerBuffer; + unsigned int r; + unsigned int k; + int i; + + RandomInfo **restrict random_info; + const char *option; +#if defined(MAGICKCORE_OPENMP_SUPPORT) + unsigned long key; +#endif + + cl_mem_flags mem_flags; + cl_context context = NULL; + cl_mem inputImageBuffer = NULL; + cl_mem randomNumberBuffer = NULL; + cl_mem filteredImageBuffer = NULL; + cl_command_queue queue = NULL; + cl_kernel addNoiseKernel = NULL; + + + clEnv = GetDefaultOpenCLEnv(); + context = GetOpenCLContext(clEnv); + queue = AcquireOpenCLCommandQueue(clEnv); + + inputPixels = AcquirePixelCachePixels(inputImage, &length, exception); + if (inputPixels == (void *) NULL) + { + (void) OpenCLThrowMagickException(exception,GetMagickModule(),CacheWarning,"UnableToReadPixelCache.","`%s'",inputImage->filename); + goto cleanup; + } + + if (ALIGNED(inputPixels,CLPixelPacket)) + { + mem_flags = CL_MEM_READ_ONLY|CL_MEM_USE_HOST_PTR; + } + else + { + mem_flags = CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR; + } + /* create a CL buffer from image pixel buffer */ + length = inputImage->columns * inputImage->rows; + inputImageBuffer = clEnv->library->clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus); + if (clStatus != CL_SUCCESS) + { + (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.","."); + goto cleanup; + } + + + filteredImage = CloneImage(inputImage,inputImage->columns,inputImage->rows,MagickTrue,exception); + assert(filteredImage != NULL); + if (SetImageStorageClass(filteredImage,DirectClass) != MagickTrue) + { + (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "CloneImage failed.", "'%s'", "."); + goto cleanup; + } + filteredPixels = GetPixelCachePixels(filteredImage, &length, exception); + if (filteredPixels == (void *) NULL) + { + (void) OpenCLThrowMagickException(exception,GetMagickModule(),CacheWarning, "UnableToReadPixelCache.","`%s'",filteredImage->filename); + goto cleanup; + } + + if (ALIGNED(filteredPixels,CLPixelPacket)) + { + mem_flags = CL_MEM_WRITE_ONLY|CL_MEM_USE_HOST_PTR; + hostPtr = filteredPixels; + } + else + { + mem_flags = CL_MEM_WRITE_ONLY; + hostPtr = NULL; + } + /* create a CL buffer from image pixel buffer */ + length = inputImage->columns * inputImage->rows; + filteredImageBuffer = clEnv->library->clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), hostPtr, &clStatus); + if (clStatus != CL_SUCCESS) + { + (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.","."); + goto cleanup; + } + + /* find out how many random numbers needed by pixel */ + numRandomNumberPerPixel = 0; + { + unsigned int numRandPerChannel = 0; + switch (noise_type) + { + case UniformNoise: + case ImpulseNoise: + case LaplacianNoise: + case RandomNoise: + default: + numRandPerChannel = 1; + break; + case GaussianNoise: + case MultiplicativeGaussianNoise: + case PoissonNoise: + numRandPerChannel = 2; + break; + }; + + if ((channel & RedChannel) != 0) + numRandomNumberPerPixel+=numRandPerChannel; + if ((channel & GreenChannel) != 0) + numRandomNumberPerPixel+=numRandPerChannel; + if ((channel & BlueChannel) != 0) + numRandomNumberPerPixel+=numRandPerChannel; + if ((channel & OpacityChannel) != 0) + numRandomNumberPerPixel+=numRandPerChannel; + } + + numRowsPerKernelLaunch = 512; + /* create a buffer for random numbers */ + numRandomNumberPerBuffer = (inputImage->columns*numRowsPerKernelLaunch)*numRandomNumberPerPixel; + randomNumberBuffer = clEnv->library->clCreateBuffer(context, CL_MEM_READ_ONLY|CL_MEM_ALLOC_HOST_PTR, numRandomNumberPerBuffer*sizeof(float) + , NULL, &clStatus); + + + /* set up the random number generators */ + attenuate=1.0; + option=GetImageArtifact(inputImage,"attenuate"); + if (option != (char *) NULL) + attenuate=StringToDouble(option,(char **) NULL); + random_info=AcquireRandomInfoThreadSet(); +#if defined(MAGICKCORE_OPENMP_SUPPORT) + key=GetRandomSecretKey(random_info[0]); +#endif + + addNoiseKernel = AcquireOpenCLKernel(clEnv,MAGICK_OPENCL_ACCELERATE,"AddNoiseImage"); + + k = 0; + clEnv->library->clSetKernelArg(addNoiseKernel,k++,sizeof(cl_mem),(void *)&inputImageBuffer); + clEnv->library->clSetKernelArg(addNoiseKernel,k++,sizeof(cl_mem),(void *)&filteredImageBuffer); + inputColumns = inputImage->columns; + clEnv->library->clSetKernelArg(addNoiseKernel,k++,sizeof(unsigned int),(void *)&inputColumns); + inputRows = inputImage->rows; + clEnv->library->clSetKernelArg(addNoiseKernel,k++,sizeof(unsigned int),(void *)&inputRows); + clEnv->library->clSetKernelArg(addNoiseKernel,k++,sizeof(ChannelType),(void *)&channel); + clEnv->library->clSetKernelArg(addNoiseKernel,k++,sizeof(NoiseType),(void *)&noise_type); + attenuate=1.0f; + option=GetImageArtifact(inputImage,"attenuate"); + if (option != (char *) NULL) + attenuate=(float)StringToDouble(option,(char **) NULL); + clEnv->library->clSetKernelArg(addNoiseKernel,k++,sizeof(float),(void *)&attenuate); + clEnv->library->clSetKernelArg(addNoiseKernel,k++,sizeof(cl_mem),(void *)&randomNumberBuffer); + clEnv->library->clSetKernelArg(addNoiseKernel,k++,sizeof(unsigned int),(void *)&numRandomNumberPerPixel); + + global_work_size[0] = inputColumns; + for (r = 0; r < inputRows; r+=numRowsPerKernelLaunch) + { + /* Generate random numbers in the buffer */ + randomNumberBufferPtr = (float*)clEnv->library->clEnqueueMapBuffer(queue, randomNumberBuffer, CL_TRUE, CL_MAP_WRITE, 0 + , numRandomNumberPerBuffer*sizeof(float), 0, NULL, NULL, &clStatus); + if (clStatus != CL_SUCCESS) + { + (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueMapBuffer failed.","."); + goto cleanup; + } + +#if defined(MAGICKCORE_OPENMP_SUPPORT) + #pragma omp parallel for schedule(static,4) \ + num_threads((key == ~0UL) == 0 ? 1 : (size_t) GetMagickResourceLimit(ThreadResource)) +#endif + for (i = 0; i < numRandomNumberPerBuffer; i++) + { + const int id = GetOpenMPThreadId(); + randomNumberBufferPtr[i] = (float)GetPseudoRandomValue(random_info[id]); + } + + clStatus = clEnv->library->clEnqueueUnmapMemObject(queue, randomNumberBuffer, randomNumberBufferPtr, 0, NULL, NULL); + if (clStatus != CL_SUCCESS) + { + (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueUnmapMemObject failed.","."); + goto cleanup; + } + + /* set the row offset */ + clEnv->library->clSetKernelArg(addNoiseKernel,k,sizeof(unsigned int),(void *)&r); + global_work_size[1] = MAGICK_MIN(numRowsPerKernelLaunch, inputRows - r); + clEnv->library->clEnqueueNDRangeKernel(queue,addNoiseKernel,2,NULL,global_work_size,NULL,0,NULL,NULL); + } + + if (ALIGNED(filteredPixels,CLPixelPacket)) + { + length = inputImage->columns * inputImage->rows; + clEnv->library->clEnqueueMapBuffer(queue, filteredImageBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(CLPixelPacket), 0, NULL, NULL, &clStatus); + } + else + { + length = inputImage->columns * inputImage->rows; + clStatus = clEnv->library->clEnqueueReadBuffer(queue, filteredImageBuffer, CL_TRUE, 0, length * sizeof(CLPixelPacket), filteredPixels, 0, NULL, NULL); + } + if (clStatus != CL_SUCCESS) + { + (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "Reading output image from CL buffer failed.", "'%s'", "."); + goto cleanup; + } + + outputReady = MagickTrue; + +cleanup: + OpenCLLogException(__FUNCTION__,__LINE__,exception); + + if (queue!=NULL) RelinquishOpenCLCommandQueue(clEnv, queue); + if (addNoiseKernel!=NULL) RelinquishOpenCLKernel(clEnv, addNoiseKernel); + if (inputImageBuffer!=NULL) clEnv->library->clReleaseMemObject(inputImageBuffer); + if (randomNumberBuffer!=NULL) clEnv->library->clReleaseMemObject(randomNumberBuffer); + if (filteredImageBuffer!=NULL) clEnv->library->clReleaseMemObject(filteredImageBuffer); + if (outputReady == MagickFalse + && filteredImage != NULL) + { + DestroyImage(filteredImage); + filteredImage = NULL; + } + return filteredImage; +} + + +static Image* ComputeAddNoiseImageOptRandomNum(const Image* inputImage, + const ChannelType channel, const NoiseType noise_type, + ExceptionInfo *exception) +{ + MagickBooleanType outputReady = MagickFalse; + MagickCLEnv clEnv = NULL; + + cl_int clStatus; + size_t global_work_size[2]; + size_t random_work_size; + + const void *inputPixels = NULL; + Image* filteredImage = NULL; + void *filteredPixels = NULL; + void *hostPtr; + unsigned int inputColumns, inputRows; + float attenuate; + MagickSizeType length; + unsigned int numRandomNumberPerPixel; + unsigned int numRowsPerKernelLaunch; + unsigned int numRandomNumberPerBuffer; + unsigned int numRandomNumberGenerators; + unsigned int initRandom; + float fNormalize; + unsigned int r; + unsigned int k; + int i; + const char *option; + + cl_mem_flags mem_flags; + cl_context context = NULL; + cl_mem inputImageBuffer = NULL; + cl_mem randomNumberBuffer = NULL; + cl_mem filteredImageBuffer = NULL; + cl_mem randomNumberSeedsBuffer = NULL; + cl_command_queue queue = NULL; + cl_kernel addNoiseKernel = NULL; + cl_kernel randomNumberGeneratorKernel = NULL; + + + clEnv = GetDefaultOpenCLEnv(); + context = GetOpenCLContext(clEnv); + queue = AcquireOpenCLCommandQueue(clEnv); + + inputPixels = AcquirePixelCachePixels(inputImage, &length, exception); + if (inputPixels == (void *) NULL) + { + (void) OpenCLThrowMagickException(exception,GetMagickModule(),CacheWarning,"UnableToReadPixelCache.","`%s'",inputImage->filename); + goto cleanup; + } + + if (ALIGNED(inputPixels,CLPixelPacket)) + { + mem_flags = CL_MEM_READ_ONLY|CL_MEM_USE_HOST_PTR; + } + else + { + mem_flags = CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR; + } + /* create a CL buffer from image pixel buffer */ + length = inputImage->columns * inputImage->rows; + inputImageBuffer = clEnv->library->clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus); + if (clStatus != CL_SUCCESS) + { + (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.","."); + goto cleanup; + } + + + filteredImage = CloneImage(inputImage,inputImage->columns,inputImage->rows,MagickTrue,exception); + assert(filteredImage != NULL); + if (SetImageStorageClass(filteredImage,DirectClass) != MagickTrue) + { + (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "CloneImage failed.", "'%s'", "."); + goto cleanup; + } + filteredPixels = GetPixelCachePixels(filteredImage, &length, exception); + if (filteredPixels == (void *) NULL) + { + (void) OpenCLThrowMagickException(exception,GetMagickModule(),CacheWarning, "UnableToReadPixelCache.","`%s'",filteredImage->filename); + goto cleanup; + } + + if (ALIGNED(filteredPixels,CLPixelPacket)) + { + mem_flags = CL_MEM_WRITE_ONLY|CL_MEM_USE_HOST_PTR; + hostPtr = filteredPixels; + } + else + { + mem_flags = CL_MEM_WRITE_ONLY; + hostPtr = NULL; + } + /* create a CL buffer from image pixel buffer */ + length = inputImage->columns * inputImage->rows; + filteredImageBuffer = clEnv->library->clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), hostPtr, &clStatus); + if (clStatus != CL_SUCCESS) + { + (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.","."); + goto cleanup; + } + + /* find out how many random numbers needed by pixel */ + numRandomNumberPerPixel = 0; + { + unsigned int numRandPerChannel = 0; + switch (noise_type) + { + case UniformNoise: + case ImpulseNoise: + case LaplacianNoise: + case RandomNoise: + default: + numRandPerChannel = 1; + break; + case GaussianNoise: + case MultiplicativeGaussianNoise: + case PoissonNoise: + numRandPerChannel = 2; + break; + }; + + if ((channel & RedChannel) != 0) + numRandomNumberPerPixel+=numRandPerChannel; + if ((channel & GreenChannel) != 0) + numRandomNumberPerPixel+=numRandPerChannel; + if ((channel & BlueChannel) != 0) + numRandomNumberPerPixel+=numRandPerChannel; + if ((channel & OpacityChannel) != 0) + numRandomNumberPerPixel+=numRandPerChannel; + } + + numRowsPerKernelLaunch = 512; + + /* create a buffer for random numbers */ + numRandomNumberPerBuffer = (inputImage->columns*numRowsPerKernelLaunch)*numRandomNumberPerPixel; + randomNumberBuffer = clEnv->library->clCreateBuffer(context, CL_MEM_READ_WRITE, numRandomNumberPerBuffer*sizeof(float) + , NULL, &clStatus); + + { + /* setup the random number generators */ + unsigned long* seeds; + numRandomNumberGenerators = 512; + randomNumberSeedsBuffer = clEnv->library->clCreateBuffer(context, CL_MEM_ALLOC_HOST_PTR|CL_MEM_READ_WRITE + , numRandomNumberGenerators * 4 * sizeof(unsigned long), NULL, &clStatus); if (clStatus != CL_SUCCESS) { - (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clSetKernelArg failed.", "'%s'", "."); + (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.","."); goto cleanup; } - /* launch the kernel */ - clStatus = clEnqueueNDRangeKernel(queue, hullPass1, 2, NULL, global_work_size, NULL, 0, NULL, NULL); - if (clStatus != CL_SUCCESS) - { - (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnqueueNDRangeKernel failed.", "'%s'", "."); - goto cleanup; - } - /* launch the kernel */ - clStatus = clEnqueueNDRangeKernel(queue, hullPass2, 2, NULL, global_work_size, NULL, 0, NULL, NULL); + seeds = (unsigned long*) clEnv->library->clEnqueueMapBuffer(queue, randomNumberSeedsBuffer, CL_TRUE, CL_MAP_WRITE, 0 + , numRandomNumberGenerators*4*sizeof(unsigned long), 0, NULL, NULL, &clStatus); if (clStatus != CL_SUCCESS) { - (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnqueueNDRangeKernel failed.", "'%s'", "."); + (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueMapBuffer failed.","."); goto cleanup; - } + } - offset.s[0] = -X[k]; - offset.s[1] = -Y[k]; - polarity = -1; - clStatus = clSetKernelArg(hullPass1,4,sizeof(cl_int2),(void *)&offset); - clStatus|= clSetKernelArg(hullPass1,5,sizeof(int),(void *)&polarity); - clStatus|=clSetKernelArg(hullPass2,4,sizeof(cl_int2),(void *)&offset); - clStatus|=clSetKernelArg(hullPass2,5,sizeof(int),(void *)&polarity); - if (clStatus != CL_SUCCESS) - { - (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clSetKernelArg failed.", "'%s'", "."); - goto cleanup; + for (i = 0; i < numRandomNumberGenerators; i++) { + RandomInfo* randomInfo = AcquireRandomInfo(); + const unsigned long* s = GetRandomInfoSeed(randomInfo); + + if (i == 0) + fNormalize = GetRandomInfoNormalize(randomInfo); + + seeds[i*4] = s[0]; + randomInfo = DestroyRandomInfo(randomInfo); } - /* launch the kernel */ - clStatus = clEnqueueNDRangeKernel(queue, hullPass1, 2, NULL, global_work_size, NULL, 0, NULL, NULL); - if (clStatus != CL_SUCCESS) - { - (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnqueueNDRangeKernel failed.", "'%s'", "."); - goto cleanup; - } - /* launch the kernel */ - clStatus = clEnqueueNDRangeKernel(queue, hullPass2, 2, NULL, global_work_size, NULL, 0, NULL, NULL); + + clStatus = clEnv->library->clEnqueueUnmapMemObject(queue, randomNumberSeedsBuffer, seeds, 0, NULL, NULL); if (clStatus != CL_SUCCESS) { - (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnqueueNDRangeKernel failed.", "'%s'", "."); + (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueUnmapMemObject failed.","."); goto cleanup; - } + } - offset.s[0] = X[k]; - offset.s[1] = Y[k]; - polarity = -1; - clStatus = clSetKernelArg(hullPass1,4,sizeof(cl_int2),(void *)&offset); - clStatus|= clSetKernelArg(hullPass1,5,sizeof(int),(void *)&polarity); - clStatus|=clSetKernelArg(hullPass2,4,sizeof(cl_int2),(void *)&offset); - clStatus|=clSetKernelArg(hullPass2,5,sizeof(int),(void *)&polarity); + randomNumberGeneratorKernel = AcquireOpenCLKernel(clEnv,MAGICK_OPENCL_ACCELERATE + ,"randomNumberGeneratorKernel"); + + k = 0; + clEnv->library->clSetKernelArg(randomNumberGeneratorKernel,k++,sizeof(cl_mem),(void *)&randomNumberSeedsBuffer); + clEnv->library->clSetKernelArg(randomNumberGeneratorKernel,k++,sizeof(float),(void *)&fNormalize); + clEnv->library->clSetKernelArg(randomNumberGeneratorKernel,k++,sizeof(cl_mem),(void *)&randomNumberBuffer); + initRandom = 1; + clEnv->library->clSetKernelArg(randomNumberGeneratorKernel,k++,sizeof(unsigned int),(void *)&initRandom); + clEnv->library->clSetKernelArg(randomNumberGeneratorKernel,k++,sizeof(unsigned int),(void *)&numRandomNumberPerBuffer); - if (k == 3) - clStatus |=clSetKernelArg(hullPass2,1,sizeof(cl_mem),(void *)&filteredImageBuffer); + random_work_size = numRandomNumberGenerators; + } - if (clStatus != CL_SUCCESS) + addNoiseKernel = AcquireOpenCLKernel(clEnv,MAGICK_OPENCL_ACCELERATE,"AddNoiseImage"); + k = 0; + clEnv->library->clSetKernelArg(addNoiseKernel,k++,sizeof(cl_mem),(void *)&inputImageBuffer); + clEnv->library->clSetKernelArg(addNoiseKernel,k++,sizeof(cl_mem),(void *)&filteredImageBuffer); + inputColumns = inputImage->columns; + clEnv->library->clSetKernelArg(addNoiseKernel,k++,sizeof(unsigned int),(void *)&inputColumns); + inputRows = inputImage->rows; + clEnv->library->clSetKernelArg(addNoiseKernel,k++,sizeof(unsigned int),(void *)&inputRows); + clEnv->library->clSetKernelArg(addNoiseKernel,k++,sizeof(ChannelType),(void *)&channel); + clEnv->library->clSetKernelArg(addNoiseKernel,k++,sizeof(NoiseType),(void *)&noise_type); + attenuate=1.0f; + option=GetImageArtifact(inputImage,"attenuate"); + if (option != (char *) NULL) + attenuate=(float)StringToDouble(option,(char **) NULL); + clEnv->library->clSetKernelArg(addNoiseKernel,k++,sizeof(float),(void *)&attenuate); + clEnv->library->clSetKernelArg(addNoiseKernel,k++,sizeof(cl_mem),(void *)&randomNumberBuffer); + clEnv->library->clSetKernelArg(addNoiseKernel,k++,sizeof(unsigned int),(void *)&numRandomNumberPerPixel); + + global_work_size[0] = inputColumns; + for (r = 0; r < inputRows; r+=numRowsPerKernelLaunch) + { + size_t generator_local_size = 64; + /* Generate random numbers in the buffer */ + clEnv->library->clEnqueueNDRangeKernel(queue,randomNumberGeneratorKernel,1,NULL + ,&random_work_size,&generator_local_size,0,NULL,NULL); + if (initRandom != 0) { - (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clSetKernelArg failed.", "'%s'", "."); - goto cleanup; + /* make sure we only do init once */ + initRandom = 0; + clEnv->library->clSetKernelArg(randomNumberGeneratorKernel,3,sizeof(unsigned int),(void *)&initRandom); } - /* launch the kernel */ - clStatus = clEnqueueNDRangeKernel(queue, hullPass1, 2, NULL, global_work_size, NULL, 0, NULL, NULL); - if (clStatus != CL_SUCCESS) - { - (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnqueueNDRangeKernel failed.", "'%s'", "."); - goto cleanup; - } - /* launch the kernel */ - clStatus = clEnqueueNDRangeKernel(queue, hullPass2, 2, NULL, global_work_size, NULL, 0, NULL, NULL); - if (clStatus != CL_SUCCESS) - { - (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnqueueNDRangeKernel failed.", "'%s'", "."); - goto cleanup; - } + + /* set the row offset */ + clEnv->library->clSetKernelArg(addNoiseKernel,k,sizeof(unsigned int),(void *)&r); + global_work_size[1] = MAGICK_MIN(numRowsPerKernelLaunch, inputRows - r); + clEnv->library->clEnqueueNDRangeKernel(queue,addNoiseKernel,2,NULL,global_work_size,NULL,0,NULL,NULL); } if (ALIGNED(filteredPixels,CLPixelPacket)) { length = inputImage->columns * inputImage->rows; - clEnqueueMapBuffer(queue, filteredImageBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(CLPixelPacket), 0, NULL, NULL, &clStatus); + clEnv->library->clEnqueueMapBuffer(queue, filteredImageBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(CLPixelPacket), 0, NULL, NULL, &clStatus); } else { length = inputImage->columns * inputImage->rows; - clStatus = clEnqueueReadBuffer(queue, filteredImageBuffer, CL_TRUE, 0, length * sizeof(CLPixelPacket), filteredPixels, 0, NULL, NULL); + clStatus = clEnv->library->clEnqueueReadBuffer(queue, filteredImageBuffer, CL_TRUE, 0, length * sizeof(CLPixelPacket), filteredPixels, 0, NULL, NULL); } if (clStatus != CL_SUCCESS) { @@ -4243,128 +5890,281 @@ static Image* ComputeDespeckleImage(const Image* inputImage, ExceptionInfo* exce cleanup: OpenCLLogException(__FUNCTION__,__LINE__,exception); - if (queue != NULL) RelinquishOpenCLCommandQueue(clEnv, queue); - if (inputImageBuffer!=NULL) clReleaseMemObject(inputImageBuffer); - for (k = 0; k < 2; k++) - { - if (tempImageBuffer[k]!=NULL) clReleaseMemObject(tempImageBuffer[k]); - } - if (filteredImageBuffer!=NULL) clReleaseMemObject(filteredImageBuffer); - if (hullPass1!=NULL) RelinquishOpenCLKernel(clEnv, hullPass1); - if (hullPass2!=NULL) RelinquishOpenCLKernel(clEnv, hullPass2); - if (outputReady == MagickFalse) + if (queue!=NULL) RelinquishOpenCLCommandQueue(clEnv, queue); + if (addNoiseKernel!=NULL) RelinquishOpenCLKernel(clEnv, addNoiseKernel); + if (randomNumberGeneratorKernel!=NULL) RelinquishOpenCLKernel(clEnv, randomNumberGeneratorKernel); + if (inputImageBuffer!=NULL) clEnv->library->clReleaseMemObject(inputImageBuffer); + if (randomNumberBuffer!=NULL) clEnv->library->clReleaseMemObject(randomNumberBuffer); + if (filteredImageBuffer!=NULL) clEnv->library->clReleaseMemObject(filteredImageBuffer); + if (randomNumberSeedsBuffer!=NULL) clEnv->library->clReleaseMemObject(randomNumberSeedsBuffer); + if (outputReady == MagickFalse + && filteredImage != NULL) { - if (filteredImage != NULL) - { DestroyImage(filteredImage); filteredImage = NULL; - } } return filteredImage; } -/* -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -% % -% % -% % -% D e s p e c k l e I m a g e w i t h O p e n C L % -% % -% % -% % -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -% -% DespeckleImage() reduces the speckle noise in an image while perserving the -% edges of the original image. A speckle removing filter uses a complementary -% hulling technique (raising pixels that are darker than their surrounding -% neighbors, then complementarily lowering pixels that are brighter than their -% surrounding neighbors) to reduce the speckle index of that image (reference -% Crimmins speckle removal). -% -% The format of the DespeckleImage method is: -% -% Image *DespeckleImage(const Image *image,ExceptionInfo *exception) -% -% A description of each parameter follows: -% -% o image: the image. -% -% o exception: return any errors or warnings in this structure. -% -*/ -MagickExport -Image* AccelerateDespeckleImage(const Image* image, ExceptionInfo* exception) + +MagickExport +Image* AccelerateAddNoiseImage(const Image *image, const ChannelType channel, + const NoiseType noise_type,ExceptionInfo *exception) { MagickBooleanType status; - Image* newImage = NULL; + Image* filteredImage = NULL; assert(image != NULL); assert(exception != NULL); status = checkOpenCLEnvironment(exception); - if (status == MagickFalse) - return NULL; + if (status == MagickFalse) + return NULL; + + status = checkAccelerateCondition(image, channel); + if (status == MagickFalse) + return NULL; + +DisableMSCWarning(4127) + if (sizeof(unsigned long) == 4) +RestoreMSCWarning + filteredImage = ComputeAddNoiseImageOptRandomNum(image,channel,noise_type,exception); + else + filteredImage = ComputeAddNoiseImage(image,channel,noise_type,exception); + + return filteredImage; +} + +static MagickBooleanType LaunchRandomImageKernel(MagickCLEnv clEnv, + cl_command_queue queue, + cl_mem inputImageBuffer, + const unsigned int imageColumns, + const unsigned int imageRows, + cl_mem seedBuffer, + const unsigned int numGenerators, + ExceptionInfo *exception) +{ + MagickBooleanType status = MagickFalse; + size_t global_work_size; + size_t local_work_size; + int k; + + cl_int clStatus; + cl_kernel randomImageKernel = NULL; + + randomImageKernel = AcquireOpenCLKernel(clEnv, MAGICK_OPENCL_ACCELERATE, "RandomImage"); + + k = 0; + clEnv->library->clSetKernelArg(randomImageKernel,k++,sizeof(cl_mem),(void*)&inputImageBuffer); + clEnv->library->clSetKernelArg(randomImageKernel,k++,sizeof(cl_uint),(void*)&imageColumns); + clEnv->library->clSetKernelArg(randomImageKernel,k++,sizeof(cl_uint),(void*)&imageRows); + clEnv->library->clSetKernelArg(randomImageKernel,k++,sizeof(cl_mem),(void*)&seedBuffer); + { + const float randNormNumerator = 1.0f; + const unsigned int randNormDenominator = (unsigned int)(~0UL); + clEnv->library->clSetKernelArg(randomImageKernel,k++, + sizeof(float),(void*)&randNormNumerator); + clEnv->library->clSetKernelArg(randomImageKernel,k++, + sizeof(cl_uint),(void*)&randNormDenominator); + } + + + global_work_size = numGenerators; + local_work_size = 64; + + clStatus = clEnv->library->clEnqueueNDRangeKernel(queue,randomImageKernel,1,NULL,&global_work_size, + &local_work_size,0,NULL,NULL); + + if (clStatus != CL_SUCCESS) + { + (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, + "clEnv->library->clSetKernelArg failed.", "'%s'", "."); + goto cleanup; + } + status = MagickTrue; + +cleanup: + if (randomImageKernel!=NULL) RelinquishOpenCLKernel(clEnv, randomImageKernel); + return status; +} + +static MagickBooleanType ComputeRandomImage(Image* inputImage, + ExceptionInfo* exception) +{ + MagickBooleanType status = MagickFalse; + + MagickBooleanType outputReady = MagickFalse; + MagickCLEnv clEnv = NULL; + + cl_int clStatus; + + void *inputPixels = NULL; + MagickSizeType length; + + cl_mem_flags mem_flags; + cl_context context = NULL; + cl_mem inputImageBuffer = NULL; + cl_command_queue queue = NULL; + + /* Don't release this buffer in this function !!! */ + cl_mem randomNumberSeedsBuffer; + + clEnv = GetDefaultOpenCLEnv(); + context = GetOpenCLContext(clEnv); + + /* Create and initialize OpenCL buffers. */ + inputPixels = GetPixelCachePixels(inputImage, &length, exception); + if (inputPixels == (void *) NULL) + { + (void) OpenCLThrowMagickException(exception,GetMagickModule(),CacheWarning,"UnableToReadPixelCache.","`%s'",inputImage->filename); + goto cleanup; + } + + /* If the host pointer is aligned to the size of CLPixelPacket, + then use the host buffer directly from the GPU; otherwise, + create a buffer on the GPU and copy the data over */ + if (ALIGNED(inputPixels,CLPixelPacket)) + { + mem_flags = CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR; + } + else + { + mem_flags = CL_MEM_READ_WRITE|CL_MEM_COPY_HOST_PTR; + } + /* create a CL buffer from image pixel buffer */ + length = inputImage->columns * inputImage->rows; + inputImageBuffer = clEnv->library->clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus); + if (clStatus != CL_SUCCESS) + { + (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.","."); + goto cleanup; + } + + queue = AcquireOpenCLCommandQueue(clEnv); + + randomNumberSeedsBuffer = GetAndLockRandSeedBuffer(clEnv); + if (randomNumberSeedsBuffer==NULL) + { + (void) OpenCLThrowMagickException(exception, GetMagickModule(), + ResourceLimitWarning, "Failed to get GPU random number generators.", + "'%s'", "."); + goto cleanup; + } + + status = LaunchRandomImageKernel(clEnv,queue, + inputImageBuffer, + inputImage->columns, + inputImage->rows, + randomNumberSeedsBuffer, + GetNumRandGenerators(clEnv), + exception); + if (status==MagickFalse) + { + goto cleanup; + } + + if (ALIGNED(inputPixels,CLPixelPacket)) + { + length = inputImage->columns * inputImage->rows; + clEnv->library->clEnqueueMapBuffer(queue, inputImageBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(CLPixelPacket), 0, NULL, NULL, &clStatus); + } + else + { + length = inputImage->columns * inputImage->rows; + clStatus = clEnv->library->clEnqueueReadBuffer(queue, inputImageBuffer, CL_TRUE, 0, length * sizeof(CLPixelPacket), inputPixels, 0, NULL, NULL); + } + if (clStatus != CL_SUCCESS) + { + (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "Reading output image from CL buffer failed.", "'%s'", "."); + goto cleanup; + } + outputReady = MagickTrue; + +cleanup: + OpenCLLogException(__FUNCTION__,__LINE__,exception); + + UnlockRandSeedBuffer(clEnv); + if (inputImageBuffer!=NULL) clEnv->library->clReleaseMemObject(inputImageBuffer); + if (queue != NULL) RelinquishOpenCLCommandQueue(clEnv, queue); + return outputReady; +} + +MagickExport MagickBooleanType AccelerateRandomImage(Image* image, ExceptionInfo* exception) +{ + MagickBooleanType status = MagickFalse; + + status = checkOpenCLEnvironment(exception); + if (status==MagickFalse) + return status; status = checkAccelerateCondition(image, AllChannels); - if (status == MagickFalse) - return NULL; + if (status==MagickFalse) + return status; - newImage = ComputeDespeckleImage(image,exception); - return newImage; + status = ComputeRandomImage(image,exception); + return status; } -static Image* ComputeAddNoiseImage(const Image* inputImage, - const ChannelType channel, const NoiseType noise_type, - ExceptionInfo *exception) +static Image* ComputeMotionBlurImage(const Image *inputImage, + const ChannelType channel, const double *kernel, const size_t width, + const OffsetInfo *offset, ExceptionInfo *exception) { - MagickBooleanType outputReady = MagickFalse; - MagickCLEnv clEnv = NULL; + MagickBooleanType outputReady; + Image* filteredImage; + MagickCLEnv clEnv; cl_int clStatus; size_t global_work_size[2]; + size_t local_work_size[2]; - const void *inputPixels = NULL; - Image* filteredImage = NULL; - void *filteredPixels = NULL; - void *hostPtr; - unsigned int inputColumns, inputRows; - float attenuate; - float *randomNumberBufferPtr = NULL; + cl_context context; + cl_mem_flags mem_flags; + cl_mem inputImageBuffer, filteredImageBuffer, imageKernelBuffer, + offsetBuffer; + cl_kernel motionBlurKernel; + cl_command_queue queue; + + const void *inputPixels; + void *filteredPixels; + void* hostPtr; + float* kernelBufferPtr; + int* offsetBufferPtr; MagickSizeType length; - unsigned int numRandomNumberPerPixel; - unsigned int numRowsPerKernelLaunch; - unsigned int numRandomNumberPerBuffer; - unsigned int r; - unsigned int k; - int i; + unsigned int matte; + MagickPixelPacket bias; + cl_float4 biasPixel; + unsigned int imageWidth, imageHeight; - RandomInfo **restrict random_info; - const char *option; -#if defined(MAGICKCORE_OPENMP_SUPPORT) - unsigned long key; -#endif + unsigned int i; - cl_mem_flags mem_flags; - cl_context context = NULL; - cl_mem inputImageBuffer = NULL; - cl_mem randomNumberBuffer = NULL; - cl_mem filteredImageBuffer = NULL; - cl_command_queue queue = NULL; - cl_kernel addNoiseKernel = NULL; + outputReady = MagickFalse; + context = NULL; + filteredImage = NULL; + inputImageBuffer = NULL; + filteredImageBuffer = NULL; + imageKernelBuffer = NULL; + motionBlurKernel = NULL; + queue = NULL; clEnv = GetDefaultOpenCLEnv(); context = GetOpenCLContext(clEnv); - queue = AcquireOpenCLCommandQueue(clEnv); - + + /* Create and initialize OpenCL buffers. */ + + inputPixels = NULL; inputPixels = AcquirePixelCachePixels(inputImage, &length, exception); - if (inputPixels == (void *) NULL) + if (inputPixels == (const void *) NULL) { - (void) OpenCLThrowMagickException(exception,GetMagickModule(),CacheWarning,"UnableToReadPixelCache.","`%s'",inputImage->filename); + (void) ThrowMagickException(exception,GetMagickModule(),CacheError, + "UnableToReadPixelCache.","`%s'",inputImage->filename); goto cleanup; } + // If the host pointer is aligned to the size of CLPixelPacket, + // then use the host buffer directly from the GPU; otherwise, + // create a buffer on the GPU and copy the data over if (ALIGNED(inputPixels,CLPixelPacket)) { mem_flags = CL_MEM_READ_ONLY|CL_MEM_USE_HOST_PTR; @@ -4373,27 +6173,32 @@ static Image* ComputeAddNoiseImage(const Image* inputImage, { mem_flags = CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR; } - /* create a CL buffer from image pixel buffer */ + // create a CL buffer from image pixel buffer length = inputImage->columns * inputImage->rows; - inputImageBuffer = clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus); + inputImageBuffer = clEnv->library->clCreateBuffer(context, mem_flags, + length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus); if (clStatus != CL_SUCCESS) { - (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.","."); + (void) ThrowMagickException(exception, GetMagickModule(), + ResourceLimitError, "clEnv->library->clCreateBuffer failed.","."); goto cleanup; } - filteredImage = CloneImage(inputImage,inputImage->columns,inputImage->rows,MagickTrue,exception); + filteredImage = CloneImage(inputImage,inputImage->columns,inputImage->rows, + MagickTrue,exception); assert(filteredImage != NULL); - if (SetImageStorageClass(filteredImage,DirectClass,exception) != MagickTrue) + if (SetImageStorageClass(filteredImage,DirectClass) != MagickTrue) { - (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "CloneImage failed.", "'%s'", "."); + (void) ThrowMagickException(exception, GetMagickModule(), + ResourceLimitError, "CloneImage failed.", "'%s'", "."); goto cleanup; } filteredPixels = GetPixelCachePixels(filteredImage, &length, exception); if (filteredPixels == (void *) NULL) { - (void) OpenCLThrowMagickException(exception,GetMagickModule(),CacheWarning, "UnableToReadPixelCache.","`%s'",filteredImage->filename); + (void) ThrowMagickException(exception,GetMagickModule(),CacheError, + "UnableToReadPixelCache.","`%s'",filteredImage->filename); goto cleanup; } @@ -4407,448 +6212,450 @@ static Image* ComputeAddNoiseImage(const Image* inputImage, mem_flags = CL_MEM_WRITE_ONLY; hostPtr = NULL; } - /* create a CL buffer from image pixel buffer */ + // create a CL buffer from image pixel buffer length = inputImage->columns * inputImage->rows; - filteredImageBuffer = clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), hostPtr, &clStatus); + filteredImageBuffer = clEnv->library->clCreateBuffer(context, mem_flags, + length * sizeof(CLPixelPacket), hostPtr, &clStatus); if (clStatus != CL_SUCCESS) { - (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.","."); + (void) ThrowMagickException(exception, GetMagickModule(), + ResourceLimitError, "clEnv->library->clCreateBuffer failed.","."); goto cleanup; } - /* find out how many random numbers needed by pixel */ - numRandomNumberPerPixel = 0; - { - unsigned int numRandPerChannel = 0; - switch (noise_type) - { - case UniformNoise: - case ImpulseNoise: - case LaplacianNoise: - case RandomNoise: - default: - numRandPerChannel = 1; - break; - case GaussianNoise: - case MultiplicativeGaussianNoise: - case PoissonNoise: - numRandPerChannel = 2; - break; - }; - if ((channel & RedChannel) != 0) - numRandomNumberPerPixel+=numRandPerChannel; - if ((channel & GreenChannel) != 0) - numRandomNumberPerPixel+=numRandPerChannel; - if ((channel & BlueChannel) != 0) - numRandomNumberPerPixel+=numRandPerChannel; - if ((channel & OpacityChannel) != 0) - numRandomNumberPerPixel+=numRandPerChannel; + imageKernelBuffer = clEnv->library->clCreateBuffer(context, + CL_MEM_READ_ONLY|CL_MEM_ALLOC_HOST_PTR, width * sizeof(float), NULL, + &clStatus); + if (clStatus != CL_SUCCESS) + { + (void) ThrowMagickException(exception, GetMagickModule(), + ResourceLimitError, "clEnv->library->clCreateBuffer failed.","."); + goto cleanup; } - numRowsPerKernelLaunch = 512; - /* create a buffer for random numbers */ - numRandomNumberPerBuffer = (inputImage->columns*numRowsPerKernelLaunch)*numRandomNumberPerPixel; - randomNumberBuffer = clCreateBuffer(context, CL_MEM_READ_ONLY|CL_MEM_ALLOC_HOST_PTR, numRandomNumberPerBuffer*sizeof(float) - , NULL, &clStatus); - + queue = AcquireOpenCLCommandQueue(clEnv); + kernelBufferPtr = (float*)clEnv->library->clEnqueueMapBuffer(queue, imageKernelBuffer, + CL_TRUE, CL_MAP_WRITE, 0, width * sizeof(float), 0, NULL, NULL, &clStatus); + if (clStatus != CL_SUCCESS) + { + (void) ThrowMagickException(exception, GetMagickModule(), + ResourceLimitError, "clEnv->library->clEnqueueMapBuffer failed.","."); + goto cleanup; + } + for (i = 0; i < width; i++) + { + kernelBufferPtr[i] = (float) kernel[i]; + } + clStatus = clEnv->library->clEnqueueUnmapMemObject(queue, imageKernelBuffer, kernelBufferPtr, + 0, NULL, NULL); + if (clStatus != CL_SUCCESS) + { + (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError, + "clEnv->library->clEnqueueUnmapMemObject failed.", "'%s'", "."); + goto cleanup; + } - /* set up the random number generators */ - attenuate=1.0; - option=GetImageArtifact(inputImage,"attenuate"); - if (option != (char *) NULL) - attenuate=StringToDouble(option,(char **) NULL); - random_info=AcquireRandomInfoThreadSet(); -#if defined(MAGICKCORE_OPENMP_SUPPORT) - key=GetRandomSecretKey(random_info[0]); -#endif + offsetBuffer = clEnv->library->clCreateBuffer(context, + CL_MEM_READ_ONLY|CL_MEM_ALLOC_HOST_PTR, width * sizeof(cl_int2), NULL, + &clStatus); + if (clStatus != CL_SUCCESS) + { + (void) ThrowMagickException(exception, GetMagickModule(), + ResourceLimitError, "clEnv->library->clCreateBuffer failed.","."); + goto cleanup; + } - addNoiseKernel = AcquireOpenCLKernel(clEnv,MAGICK_OPENCL_ACCELERATE,"AddNoiseImage"); + offsetBufferPtr = (int*)clEnv->library->clEnqueueMapBuffer(queue, offsetBuffer, CL_TRUE, + CL_MAP_WRITE, 0, width * sizeof(cl_int2), 0, NULL, NULL, &clStatus); + if (clStatus != CL_SUCCESS) + { + (void) ThrowMagickException(exception, GetMagickModule(), + ResourceLimitError, "clEnv->library->clEnqueueMapBuffer failed.","."); + goto cleanup; + } + for (i = 0; i < width; i++) + { + offsetBufferPtr[2*i] = (int)offset[i].x; + offsetBufferPtr[2*i+1] = (int)offset[i].y; + } + clStatus = clEnv->library->clEnqueueUnmapMemObject(queue, offsetBuffer, offsetBufferPtr, 0, + NULL, NULL); + if (clStatus != CL_SUCCESS) + { + (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError, + "clEnv->library->clEnqueueUnmapMemObject failed.", "'%s'", "."); + goto cleanup; + } - k = 0; - clSetKernelArg(addNoiseKernel,k++,sizeof(cl_mem),(void *)&inputImageBuffer); - clSetKernelArg(addNoiseKernel,k++,sizeof(cl_mem),(void *)&filteredImageBuffer); - inputColumns = inputImage->columns; - clSetKernelArg(addNoiseKernel,k++,sizeof(unsigned int),(void *)&inputColumns); - inputRows = inputImage->rows; - clSetKernelArg(addNoiseKernel,k++,sizeof(unsigned int),(void *)&inputRows); - clSetKernelArg(addNoiseKernel,k++,sizeof(ChannelType),(void *)&channel); - clSetKernelArg(addNoiseKernel,k++,sizeof(NoiseType),(void *)&noise_type); - attenuate=1.0f; - option=GetImageArtifact(inputImage,"attenuate"); - if (option != (char *) NULL) - attenuate=(float)StringToDouble(option,(char **) NULL); - clSetKernelArg(addNoiseKernel,k++,sizeof(float),(void *)&attenuate); - clSetKernelArg(addNoiseKernel,k++,sizeof(cl_mem),(void *)&randomNumberBuffer); - clSetKernelArg(addNoiseKernel,k++,sizeof(unsigned int),(void *)&numRandomNumberPerPixel); - global_work_size[0] = inputColumns; - for (r = 0; r < inputRows; r+=numRowsPerKernelLaunch) + // get the OpenCL kernel + motionBlurKernel = AcquireOpenCLKernel(clEnv, MAGICK_OPENCL_ACCELERATE, + "MotionBlur"); + if (motionBlurKernel == NULL) { - /* Generate random numbers in the buffer */ - randomNumberBufferPtr = (float*)clEnqueueMapBuffer(queue, randomNumberBuffer, CL_TRUE, CL_MAP_WRITE, 0 - , numRandomNumberPerBuffer*sizeof(float), 0, NULL, NULL, &clStatus); - if (clStatus != CL_SUCCESS) - { - (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnqueueMapBuffer failed.","."); - goto cleanup; - } + (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError, + "AcquireOpenCLKernel failed.", "'%s'", "."); + goto cleanup; + } + + // set the kernel arguments + i = 0; + clStatus=clEnv->library->clSetKernelArg(motionBlurKernel,i++,sizeof(cl_mem), + (void *)&inputImageBuffer); + clStatus|=clEnv->library->clSetKernelArg(motionBlurKernel,i++,sizeof(cl_mem), + (void *)&filteredImageBuffer); + imageWidth = inputImage->columns; + imageHeight = inputImage->rows; + clStatus|=clEnv->library->clSetKernelArg(motionBlurKernel,i++,sizeof(unsigned int), + &imageWidth); + clStatus|=clEnv->library->clSetKernelArg(motionBlurKernel,i++,sizeof(unsigned int), + &imageHeight); + clStatus|=clEnv->library->clSetKernelArg(motionBlurKernel,i++,sizeof(cl_mem), + (void *)&imageKernelBuffer); + clStatus|=clEnv->library->clSetKernelArg(motionBlurKernel,i++,sizeof(unsigned int), + &width); + clStatus|=clEnv->library->clSetKernelArg(motionBlurKernel,i++,sizeof(cl_mem), + (void *)&offsetBuffer); + + GetMagickPixelPacket(inputImage,&bias); + biasPixel.s[0] = bias.red; + biasPixel.s[1] = bias.green; + biasPixel.s[2] = bias.blue; + biasPixel.s[3] = bias.opacity; + clStatus|=clEnv->library->clSetKernelArg(motionBlurKernel,i++,sizeof(cl_float4), &biasPixel); -#if defined(MAGICKCORE_OPENMP_SUPPORT) - #pragma omp parallel for schedule(static,4) \ - num_threads((key == ~0UL) == 0 ? 1 : (size_t) GetMagickResourceLimit(ThreadResource)) -#endif - for (i = 0; i < numRandomNumberPerBuffer; i++) - { - const int id = GetOpenMPThreadId(); - randomNumberBufferPtr[i] = (float)GetPseudoRandomValue(random_info[id]); - } + clStatus|=clEnv->library->clSetKernelArg(motionBlurKernel,i++,sizeof(ChannelType), &channel); + matte = (inputImage->matte == MagickTrue)?1:0; + clStatus|=clEnv->library->clSetKernelArg(motionBlurKernel,i++,sizeof(unsigned int), &matte); + if (clStatus != CL_SUCCESS) + { + (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError, + "clEnv->library->clSetKernelArg failed.", "'%s'", "."); + goto cleanup; + } - clStatus = clEnqueueUnmapMemObject(queue, randomNumberBuffer, randomNumberBufferPtr, 0, NULL, NULL); - if (clStatus != CL_SUCCESS) - { - (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnqueueUnmapMemObject failed.","."); - goto cleanup; - } + // launch the kernel + local_work_size[0] = 16; + local_work_size[1] = 16; + global_work_size[0] = (size_t)padGlobalWorkgroupSizeToLocalWorkgroupSize( + inputImage->columns,local_work_size[0]); + global_work_size[1] = (size_t)padGlobalWorkgroupSizeToLocalWorkgroupSize( + inputImage->rows,local_work_size[1]); + clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, motionBlurKernel, 2, NULL, + global_work_size, local_work_size, 0, NULL, NULL); - /* set the row offset */ - clSetKernelArg(addNoiseKernel,k,sizeof(unsigned int),(void *)&r); - global_work_size[1] = MAGICK_MIN(numRowsPerKernelLaunch, inputRows - r); - clEnqueueNDRangeKernel(queue,addNoiseKernel,2,NULL,global_work_size,NULL,0,NULL,NULL); + if (clStatus != CL_SUCCESS) + { + (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError, + "clEnv->library->clEnqueueNDRangeKernel failed.", "'%s'", "."); + goto cleanup; } + clEnv->library->clFlush(queue); if (ALIGNED(filteredPixels,CLPixelPacket)) { length = inputImage->columns * inputImage->rows; - clEnqueueMapBuffer(queue, filteredImageBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(CLPixelPacket), 0, NULL, NULL, &clStatus); + clEnv->library->clEnqueueMapBuffer(queue, filteredImageBuffer, CL_TRUE, + CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(CLPixelPacket), 0, NULL, + NULL, &clStatus); } else { length = inputImage->columns * inputImage->rows; - clStatus = clEnqueueReadBuffer(queue, filteredImageBuffer, CL_TRUE, 0, length * sizeof(CLPixelPacket), filteredPixels, 0, NULL, NULL); + clStatus = clEnv->library->clEnqueueReadBuffer(queue, filteredImageBuffer, CL_TRUE, 0, + length * sizeof(CLPixelPacket), filteredPixels, 0, NULL, NULL); } if (clStatus != CL_SUCCESS) { - (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "Reading output image from CL buffer failed.", "'%s'", "."); + (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError, + "Reading output image from CL buffer failed.", "'%s'", "."); goto cleanup; } - outputReady = MagickTrue; cleanup: - OpenCLLogException(__FUNCTION__,__LINE__,exception); - if (queue!=NULL) RelinquishOpenCLCommandQueue(clEnv, queue); - if (addNoiseKernel!=NULL) RelinquishOpenCLKernel(clEnv, addNoiseKernel); - if (inputImageBuffer!=NULL) clReleaseMemObject(inputImageBuffer); - if (randomNumberBuffer!=NULL) clReleaseMemObject(randomNumberBuffer); - if (filteredImageBuffer!=NULL) clReleaseMemObject(filteredImageBuffer); - if (outputReady == MagickFalse - && filteredImage != NULL) + if (filteredImageBuffer!=NULL) clEnv->library->clReleaseMemObject(filteredImageBuffer); + if (inputImageBuffer!=NULL) clEnv->library->clReleaseMemObject(inputImageBuffer); + if (imageKernelBuffer!=NULL) clEnv->library->clReleaseMemObject(imageKernelBuffer); + if (motionBlurKernel!=NULL) RelinquishOpenCLKernel(clEnv, motionBlurKernel); + if (queue != NULL) RelinquishOpenCLCommandQueue(clEnv, queue); + if (outputReady == MagickFalse) { + if (filteredImage != NULL) + { DestroyImage(filteredImage); filteredImage = NULL; + } } + return filteredImage; } -static Image* ComputeAddNoiseImageOptRandomNum(const Image* inputImage, - const ChannelType channel, const NoiseType noise_type, - ExceptionInfo *exception) +MagickExport +Image* AccelerateMotionBlurImage(const Image *image, const ChannelType channel, + const double* kernel, const size_t width, const OffsetInfo *offset, + ExceptionInfo *exception) { - MagickBooleanType outputReady = MagickFalse; - MagickCLEnv clEnv = NULL; + MagickBooleanType status; + Image* filteredImage = NULL; - cl_int clStatus; + assert(image != NULL); + assert(kernel != (double *) NULL); + assert(offset != (OffsetInfo *) NULL); + assert(exception != (ExceptionInfo *) NULL); + + status = checkOpenCLEnvironment(exception); + if (status == MagickFalse) + return NULL; + + status = checkAccelerateCondition(image, channel); + if (status == MagickFalse) + return NULL; + + filteredImage = ComputeMotionBlurImage(image, channel, kernel, width, + offset, exception); + return filteredImage; + +} + + +static MagickBooleanType LaunchCompositeKernel(MagickCLEnv clEnv, + cl_command_queue queue, + cl_mem inputImageBuffer, + const unsigned int inputWidth, const unsigned int inputHeight, + const unsigned int matte, + const ChannelType channel,const CompositeOperator compose, + const cl_mem compositeImageBuffer, + const unsigned int compositeWidth, + const unsigned int compositeHeight, + const float destination_dissolve,const float source_dissolve, + ExceptionInfo *magick_unused(exception)) +{ size_t global_work_size[2]; - size_t random_work_size; + size_t local_work_size[2]; + unsigned int composeOp; + int k; + + cl_int clStatus; + cl_kernel compositeKernel = NULL; + + magick_unreferenced(exception); + + compositeKernel = AcquireOpenCLKernel(clEnv, MAGICK_OPENCL_ACCELERATE, + "Composite"); + + k = 0; + clStatus=clEnv->library->clSetKernelArg(compositeKernel,k++,sizeof(cl_mem),(void*)&inputImageBuffer); + clStatus|=clEnv->library->clSetKernelArg(compositeKernel,k++,sizeof(unsigned int),(void*)&inputWidth); + clStatus|=clEnv->library->clSetKernelArg(compositeKernel,k++,sizeof(unsigned int),(void*)&inputHeight); + clStatus|=clEnv->library->clSetKernelArg(compositeKernel,k++,sizeof(cl_mem),(void*)&compositeImageBuffer); + clStatus|=clEnv->library->clSetKernelArg(compositeKernel,k++,sizeof(unsigned int),(void*)&compositeWidth); + clStatus|=clEnv->library->clSetKernelArg(compositeKernel,k++,sizeof(unsigned int),(void*)&compositeHeight); + composeOp = (unsigned int)compose; + clStatus|=clEnv->library->clSetKernelArg(compositeKernel,k++,sizeof(unsigned int),(void*)&composeOp); + clStatus|=clEnv->library->clSetKernelArg(compositeKernel,k++,sizeof(ChannelType),(void*)&channel); + clStatus|=clEnv->library->clSetKernelArg(compositeKernel,k++,sizeof(unsigned int),(void*)&matte); + clStatus|=clEnv->library->clSetKernelArg(compositeKernel,k++,sizeof(float),(void*)&destination_dissolve); + clStatus|=clEnv->library->clSetKernelArg(compositeKernel,k++,sizeof(float),(void*)&source_dissolve); + + if (clStatus!=CL_SUCCESS) + return MagickFalse; + + local_work_size[0] = 64; + local_work_size[1] = 1; + + global_work_size[0] = padGlobalWorkgroupSizeToLocalWorkgroupSize(inputWidth, + local_work_size[0]); + global_work_size[1] = inputHeight; + clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, compositeKernel, 2, NULL, + global_work_size, local_work_size, 0, NULL, NULL); + + + RelinquishOpenCLKernel(clEnv, compositeKernel); + + return (clStatus==CL_SUCCESS)?MagickTrue:MagickFalse; +} + + +static MagickBooleanType ComputeCompositeImage(Image *inputImage, + const ChannelType channel,const CompositeOperator compose, + const Image *compositeImage,const ssize_t magick_unused(x_offset),const ssize_t magick_unused(y_offset), + const float destination_dissolve,const float source_dissolve, + ExceptionInfo *exception) +{ + MagickBooleanType status = MagickFalse; + + MagickBooleanType outputReady = MagickFalse; + MagickCLEnv clEnv = NULL; - const void *inputPixels = NULL; - Image* filteredImage = NULL; - void *filteredPixels = NULL; - void *hostPtr; - unsigned int inputColumns, inputRows; - float attenuate; + cl_int clStatus; + + void *inputPixels = NULL; + const void *composePixels = NULL; MagickSizeType length; - unsigned int numRandomNumberPerPixel; - unsigned int numRowsPerKernelLaunch; - unsigned int numRandomNumberPerBuffer; - unsigned int numRandomNumberGenerators; - unsigned int initRandom; - float fNormalize; - unsigned int r; - unsigned int k; - int i; - const char *option; cl_mem_flags mem_flags; cl_context context = NULL; cl_mem inputImageBuffer = NULL; - cl_mem randomNumberBuffer = NULL; - cl_mem filteredImageBuffer = NULL; - cl_mem randomNumberSeedsBuffer = NULL; + cl_mem compositeImageBuffer = NULL; cl_command_queue queue = NULL; - cl_kernel addNoiseKernel = NULL; - cl_kernel randomNumberGeneratorKernel = NULL; + magick_unreferenced(x_offset); + magick_unreferenced(y_offset); clEnv = GetDefaultOpenCLEnv(); context = GetOpenCLContext(clEnv); queue = AcquireOpenCLCommandQueue(clEnv); - - inputPixels = AcquirePixelCachePixels(inputImage, &length, exception); + + /* Create and initialize OpenCL buffers. */ + inputPixels = GetPixelCachePixels(inputImage, &length, exception); if (inputPixels == (void *) NULL) { - (void) OpenCLThrowMagickException(exception,GetMagickModule(),CacheWarning,"UnableToReadPixelCache.","`%s'",inputImage->filename); + (void) OpenCLThrowMagickException(exception,GetMagickModule(),CacheWarning, + "UnableToReadPixelCache.","`%s'",inputImage->filename); goto cleanup; } + /* If the host pointer is aligned to the size of CLPixelPacket, + then use the host buffer directly from the GPU; otherwise, + create a buffer on the GPU and copy the data over */ if (ALIGNED(inputPixels,CLPixelPacket)) { - mem_flags = CL_MEM_READ_ONLY|CL_MEM_USE_HOST_PTR; + mem_flags = CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR; } else { - mem_flags = CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR; + mem_flags = CL_MEM_READ_WRITE|CL_MEM_COPY_HOST_PTR; } /* create a CL buffer from image pixel buffer */ length = inputImage->columns * inputImage->rows; - inputImageBuffer = clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus); + inputImageBuffer = clEnv->library->clCreateBuffer(context, mem_flags, + length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus); if (clStatus != CL_SUCCESS) { - (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.","."); + (void) OpenCLThrowMagickException(exception, GetMagickModule(), + ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.","."); goto cleanup; } - filteredImage = CloneImage(inputImage,inputImage->columns,inputImage->rows,MagickTrue,exception); - assert(filteredImage != NULL); - if (SetImageStorageClass(filteredImage,DirectClass,exception) != MagickTrue) - { - (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "CloneImage failed.", "'%s'", "."); - goto cleanup; - } - filteredPixels = GetPixelCachePixels(filteredImage, &length, exception); - if (filteredPixels == (void *) NULL) + /* Create and initialize OpenCL buffers. */ + composePixels = AcquirePixelCachePixels(compositeImage, &length, exception); + if (composePixels == (void *) NULL) { - (void) OpenCLThrowMagickException(exception,GetMagickModule(),CacheWarning, "UnableToReadPixelCache.","`%s'",filteredImage->filename); + (void) OpenCLThrowMagickException(exception,GetMagickModule(),CacheWarning, + "UnableToReadPixelCache.","`%s'",compositeImage->filename); goto cleanup; } - if (ALIGNED(filteredPixels,CLPixelPacket)) + /* If the host pointer is aligned to the size of CLPixelPacket, + then use the host buffer directly from the GPU; otherwise, + create a buffer on the GPU and copy the data over */ + if (ALIGNED(composePixels,CLPixelPacket)) { - mem_flags = CL_MEM_WRITE_ONLY|CL_MEM_USE_HOST_PTR; - hostPtr = filteredPixels; + mem_flags = CL_MEM_READ_ONLY|CL_MEM_USE_HOST_PTR; } else { - mem_flags = CL_MEM_WRITE_ONLY; - hostPtr = NULL; + mem_flags = CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR; } /* create a CL buffer from image pixel buffer */ - length = inputImage->columns * inputImage->rows; - filteredImageBuffer = clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), hostPtr, &clStatus); + length = compositeImage->columns * compositeImage->rows; + compositeImageBuffer = clEnv->library->clCreateBuffer(context, mem_flags, + length * sizeof(CLPixelPacket), (void*)composePixels, &clStatus); if (clStatus != CL_SUCCESS) { - (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.","."); + (void) OpenCLThrowMagickException(exception, GetMagickModule(), + ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.","."); goto cleanup; } + + status = LaunchCompositeKernel(clEnv,queue,inputImageBuffer, + (unsigned int) inputImage->columns, + (unsigned int) inputImage->rows, + (unsigned int) inputImage->matte, + channel, compose, compositeImageBuffer, + (unsigned int) compositeImage->columns, + (unsigned int) compositeImage->rows, + destination_dissolve,source_dissolve, + exception); + + if (status==MagickFalse) + goto cleanup; - /* find out how many random numbers needed by pixel */ - numRandomNumberPerPixel = 0; - { - unsigned int numRandPerChannel = 0; - switch (noise_type) - { - case UniformNoise: - case ImpulseNoise: - case LaplacianNoise: - case RandomNoise: - default: - numRandPerChannel = 1; - break; - case GaussianNoise: - case MultiplicativeGaussianNoise: - case PoissonNoise: - numRandPerChannel = 2; - break; - }; - - if ((channel & RedChannel) != 0) - numRandomNumberPerPixel+=numRandPerChannel; - if ((channel & GreenChannel) != 0) - numRandomNumberPerPixel+=numRandPerChannel; - if ((channel & BlueChannel) != 0) - numRandomNumberPerPixel+=numRandPerChannel; - if ((channel & OpacityChannel) != 0) - numRandomNumberPerPixel+=numRandPerChannel; - } - - numRowsPerKernelLaunch = 512; - - /* create a buffer for random numbers */ - numRandomNumberPerBuffer = (inputImage->columns*numRowsPerKernelLaunch)*numRandomNumberPerPixel; - randomNumberBuffer = clCreateBuffer(context, CL_MEM_READ_WRITE, numRandomNumberPerBuffer*sizeof(float) - , NULL, &clStatus); - - { - /* setup the random number generators */ - unsigned long* seeds; - numRandomNumberGenerators = 512; - randomNumberSeedsBuffer = clCreateBuffer(context, CL_MEM_ALLOC_HOST_PTR|CL_MEM_READ_WRITE - , numRandomNumberGenerators * 4 * sizeof(unsigned long), NULL, &clStatus); - if (clStatus != CL_SUCCESS) - { - (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.","."); - goto cleanup; - } - seeds = (unsigned long*) clEnqueueMapBuffer(queue, randomNumberSeedsBuffer, CL_TRUE, CL_MAP_WRITE, 0 - , numRandomNumberGenerators*4*sizeof(unsigned long), 0, NULL, NULL, &clStatus); - if (clStatus != CL_SUCCESS) - { - (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnqueueMapBuffer failed.","."); - goto cleanup; - } - - for (i = 0; i < numRandomNumberGenerators; i++) { - RandomInfo* randomInfo = AcquireRandomInfo(); - const unsigned long* s = GetRandomInfoSeed(randomInfo); - - if (i == 0) - fNormalize = GetRandomInfoNormalize(randomInfo); - - seeds[i*4] = s[0]; - randomInfo = DestroyRandomInfo(randomInfo); - } - - clStatus = clEnqueueUnmapMemObject(queue, randomNumberSeedsBuffer, seeds, 0, NULL, NULL); - if (clStatus != CL_SUCCESS) - { - (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnqueueUnmapMemObject failed.","."); - goto cleanup; - } - - randomNumberGeneratorKernel = AcquireOpenCLKernel(clEnv,MAGICK_OPENCL_ACCELERATE - ,"randomNumberGeneratorKernel"); - - k = 0; - clSetKernelArg(randomNumberGeneratorKernel,k++,sizeof(cl_mem),(void *)&randomNumberSeedsBuffer); - clSetKernelArg(randomNumberGeneratorKernel,k++,sizeof(float),(void *)&fNormalize); - clSetKernelArg(randomNumberGeneratorKernel,k++,sizeof(cl_mem),(void *)&randomNumberBuffer); - initRandom = 1; - clSetKernelArg(randomNumberGeneratorKernel,k++,sizeof(unsigned int),(void *)&initRandom); - clSetKernelArg(randomNumberGeneratorKernel,k++,sizeof(unsigned int),(void *)&numRandomNumberPerBuffer); - - random_work_size = numRandomNumberGenerators; - } - - addNoiseKernel = AcquireOpenCLKernel(clEnv,MAGICK_OPENCL_ACCELERATE,"AddNoiseImage"); - k = 0; - clSetKernelArg(addNoiseKernel,k++,sizeof(cl_mem),(void *)&inputImageBuffer); - clSetKernelArg(addNoiseKernel,k++,sizeof(cl_mem),(void *)&filteredImageBuffer); - inputColumns = inputImage->columns; - clSetKernelArg(addNoiseKernel,k++,sizeof(unsigned int),(void *)&inputColumns); - inputRows = inputImage->rows; - clSetKernelArg(addNoiseKernel,k++,sizeof(unsigned int),(void *)&inputRows); - clSetKernelArg(addNoiseKernel,k++,sizeof(ChannelType),(void *)&channel); - clSetKernelArg(addNoiseKernel,k++,sizeof(NoiseType),(void *)&noise_type); - attenuate=1.0f; - option=GetImageArtifact(inputImage,"attenuate"); - if (option != (char *) NULL) - attenuate=(float)StringToDouble(option,(char **) NULL); - clSetKernelArg(addNoiseKernel,k++,sizeof(float),(void *)&attenuate); - clSetKernelArg(addNoiseKernel,k++,sizeof(cl_mem),(void *)&randomNumberBuffer); - clSetKernelArg(addNoiseKernel,k++,sizeof(unsigned int),(void *)&numRandomNumberPerPixel); - - global_work_size[0] = inputColumns; - for (r = 0; r < inputRows; r+=numRowsPerKernelLaunch) - { - size_t generator_local_size = 64; - /* Generate random numbers in the buffer */ - clEnqueueNDRangeKernel(queue,randomNumberGeneratorKernel,1,NULL - ,&random_work_size,&generator_local_size,0,NULL,NULL); - if (initRandom != 0) - { - /* make sure we only do init once */ - initRandom = 0; - clSetKernelArg(randomNumberGeneratorKernel,3,sizeof(unsigned int),(void *)&initRandom); - } - - /* set the row offset */ - clSetKernelArg(addNoiseKernel,k,sizeof(unsigned int),(void *)&r); - global_work_size[1] = MAGICK_MIN(numRowsPerKernelLaunch, inputRows - r); - clEnqueueNDRangeKernel(queue,addNoiseKernel,2,NULL,global_work_size,NULL,0,NULL,NULL); - } - - if (ALIGNED(filteredPixels,CLPixelPacket)) - { - length = inputImage->columns * inputImage->rows; - clEnqueueMapBuffer(queue, filteredImageBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(CLPixelPacket), 0, NULL, NULL, &clStatus); - } - else + length = inputImage->columns * inputImage->rows; + if (ALIGNED(inputPixels,CLPixelPacket)) { - length = inputImage->columns * inputImage->rows; - clStatus = clEnqueueReadBuffer(queue, filteredImageBuffer, CL_TRUE, 0, length * sizeof(CLPixelPacket), filteredPixels, 0, NULL, NULL); + clEnv->library->clEnqueueMapBuffer(queue, inputImageBuffer, CL_TRUE, + CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(CLPixelPacket), 0, NULL, + NULL, &clStatus); } - if (clStatus != CL_SUCCESS) + else { - (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "Reading output image from CL buffer failed.", "'%s'", "."); - goto cleanup; + clStatus = clEnv->library->clEnqueueReadBuffer(queue, inputImageBuffer, CL_TRUE, 0, + length * sizeof(CLPixelPacket), inputPixels, 0, NULL, NULL); } - - outputReady = MagickTrue; + if (clStatus==CL_SUCCESS) + outputReady = MagickTrue; cleanup: - OpenCLLogException(__FUNCTION__,__LINE__,exception); + if (inputImageBuffer!=NULL) clEnv->library->clReleaseMemObject(inputImageBuffer); + if (compositeImageBuffer!=NULL) clEnv->library->clReleaseMemObject(compositeImageBuffer); + if (queue != NULL) RelinquishOpenCLCommandQueue(clEnv, queue); - if (queue!=NULL) RelinquishOpenCLCommandQueue(clEnv, queue); - if (addNoiseKernel!=NULL) RelinquishOpenCLKernel(clEnv, addNoiseKernel); - if (randomNumberGeneratorKernel!=NULL) RelinquishOpenCLKernel(clEnv, randomNumberGeneratorKernel); - if (inputImageBuffer!=NULL) clReleaseMemObject(inputImageBuffer); - if (randomNumberBuffer!=NULL) clReleaseMemObject(randomNumberBuffer); - if (filteredImageBuffer!=NULL) clReleaseMemObject(filteredImageBuffer); - if (randomNumberSeedsBuffer!=NULL) clReleaseMemObject(randomNumberSeedsBuffer); - if (outputReady == MagickFalse - && filteredImage != NULL) - { - DestroyImage(filteredImage); - filteredImage = NULL; - } - return filteredImage; + return outputReady; } - -MagickExport -Image* AccelerateAddNoiseImage(const Image *image, const ChannelType channel, - const NoiseType noise_type,ExceptionInfo *exception) +MagickExport +MagickBooleanType AccelerateCompositeImage(Image *image, + const ChannelType channel,const CompositeOperator compose, + const Image *composite,const ssize_t x_offset,const ssize_t y_offset, + const float destination_dissolve,const float source_dissolve, + ExceptionInfo *exception) { MagickBooleanType status; - Image* filteredImage = NULL; assert(image != NULL); - assert(exception != NULL); + assert(composite != NULL); + assert(exception != (ExceptionInfo *) NULL); status = checkOpenCLEnvironment(exception); if (status == MagickFalse) - return NULL; + return MagickFalse; status = checkAccelerateCondition(image, channel); if (status == MagickFalse) - return NULL; + return MagickFalse; -DisableMSCWarning(4127) - if (sizeof(unsigned long) == 4) -RestoreMSCWarning - filteredImage = ComputeAddNoiseImageOptRandomNum(image,channel,noise_type,exception); - else - filteredImage = ComputeAddNoiseImage(image,channel,noise_type,exception); - - return filteredImage; + /* only support zero offset and + images with the size for now */ + if (x_offset!=0 + || y_offset!=0 + || image->columns!=composite->columns + || image->rows!=composite->rows) + return MagickFalse; + + switch(compose) { + case ColorDodgeCompositeOp: + case BlendCompositeOp: + break; + default: + // unsupported compose operator, quit + return MagickFalse; + }; + + status = ComputeCompositeImage(image,channel,compose,composite, + x_offset,y_offset,destination_dissolve,source_dissolve,exception); + + return status; } + #else /* MAGICKCORE_OPENCL_SUPPORT */ MagickExport Image *AccelerateConvolveImageChannel( @@ -4936,6 +6743,19 @@ MagickExport MagickBooleanType AccelerateContrastImage( return MagickFalse; } +MagickExport MagickBooleanType AcceleratContrastStretchImageChannel( + Image * image, const ChannelType channel, const double black_point, const double white_point, + ExceptionInfo* magick_unused(exception)) +{ + magick_unreferenced(image); + magick_unreferenced(channel); + magick_unreferenced(black_point); + magick_unreferenced(white_point); + magick_unreferenced(exception); + + return MagickFalse; +} + MagickExport MagickBooleanType AccelerateEqualizeImage( Image* magick_unused(image), const ChannelType magick_unused(channel), ExceptionInfo* magick_unused(exception)) @@ -4971,7 +6791,6 @@ MagickExport Image *AccelerateResizeImage(const Image* magick_unused(image), return NULL; } - MagickExport MagickBooleanType AccelerateModulateImage( Image* image, double percent_brightness, double percent_hue, @@ -4986,6 +6805,27 @@ MagickBooleanType AccelerateModulateImage( return(MagickFalse); } +MagickExport +MagickBooleanType AccelerateNegateImageChannel( + Image* image, const ChannelType channel, const MagickBooleanType grayscale, ExceptionInfo* exception) +{ + magick_unreferenced(image); + magick_unreferenced(channel); + magick_unreferenced(grayscale); + magick_unreferenced(exception); + return(MagickFalse); +} + +MagickExport +MagickBooleanType AccelerateGrayscaleImage( + Image* image, const PixelIntensityMethod method, ExceptionInfo* exception) +{ + magick_unreferenced(image); + magick_unreferenced(method); + magick_unreferenced(exception); + return(MagickFalse); +} + MagickExport Image *AccelerateAddNoiseImage(const Image *image, const ChannelType channel, const NoiseType noise_type,ExceptionInfo *exception) { @@ -4996,6 +6836,29 @@ MagickExport Image *AccelerateAddNoiseImage(const Image *image, return NULL; } + +MagickExport MagickBooleanType AccelerateRandomImage(Image* image, ExceptionInfo* exception) +{ + magick_unreferenced(image); + magick_unreferenced(exception); + return MagickFalse; +} + +MagickExport +Image* AccelerateMotionBlurImage(const Image *image, const ChannelType channel, + const double* kernel, const size_t width, + const OffsetInfo *offset, + ExceptionInfo *exception) +{ + magick_unreferenced(image); + magick_unreferenced(channel); + magick_unreferenced(kernel); + magick_unreferenced(width); + magick_unreferenced(offset); + magick_unreferenced(exception); + return NULL; +} + #endif /* MAGICKCORE_OPENCL_SUPPORT */ MagickExport MagickBooleanType AccelerateConvolveImage( diff --git a/MagickCore/accelerate.h b/MagickCore/accelerate.h index 1030e2aeb..24e072cf6 100644 --- a/MagickCore/accelerate.h +++ b/MagickCore/accelerate.h @@ -29,14 +29,23 @@ extern "C" { #endif extern MagickExport MagickBooleanType + AccelerateCompositeImage(Image *,const ChannelType,const CompositeOperator, + const Image *,const ssize_t,const ssize_t,const float,const float,ExceptionInfo *), AccelerateContrastImage(Image *,const MagickBooleanType,ExceptionInfo *), AccelerateConvolveImage(const Image *,const KernelInfo *,Image *, ExceptionInfo *), + AccelerateContrastStretchImageChannel(Image *, const ChannelType, const double, const double, + ExceptionInfo*), AccelerateEqualizeImage(Image *,const ChannelType,ExceptionInfo *), AccelerateFunctionImage(Image *,const ChannelType,const MagickFunction, const size_t,const double *,ExceptionInfo *), + AccelerateGrayscaleImage(Image*, const PixelIntensityMethod, + ExceptionInfo *), AccelerateModulateImage(Image*, double, double, double, - ColorspaceType, ExceptionInfo*); + ColorspaceType, ExceptionInfo*), + AccelerateNegateImageChannel(Image*, const ChannelType, const MagickBooleanType, + ExceptionInfo *), + AccelerateRandomImage(Image*, ExceptionInfo*); extern MagickExport Image *AccelerateAddNoiseImage(const Image*,const ChannelType,const NoiseType, @@ -46,6 +55,8 @@ extern MagickExport Image *AccelerateConvolveImageChannel(const Image *,const ChannelType, const KernelInfo *,ExceptionInfo *), *AccelerateDespeckleImage(const Image *,ExceptionInfo *), + *AccelerateMotionBlurImage(const Image*, const ChannelType, + const double*,const size_t,const OffsetInfo*,ExceptionInfo*), *AccelerateRadialBlurImage(const Image *,const ChannelType,const double, ExceptionInfo *), *AccelerateResizeImage(const Image *,const size_t,const size_t, diff --git a/MagickCore/opencl-private.h b/MagickCore/opencl-private.h index 657166fb7..2e69e3ce3 100644 --- a/MagickCore/opencl-private.h +++ b/MagickCore/opencl-private.h @@ -1,23 +1,26 @@ /* - Copyright 1999-2014 ImageMagick Studio LLC, a non-profit organization - dedicated to making software imaging solutions freely available. +Copyright 1999-2014 ImageMagick Studio LLC, a non-profit organization +dedicated to making software imaging solutions freely available. - You may not use this file except in compliance with the License. - obtain a copy of the License at +You may not use this file except in compliance with the License. +obtain a copy of the License at - http://www.imagemagick.org/script/license.php +http://www.imagemagick.org/script/license.php - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. - MagickCore OpenCL private methods. +MagickCore OpenCL private methods. */ #ifndef _MAGICKCORE_OPENCL_PRIVATE_H #define _MAGICKCORE_OPENCL_PRIVATE_H +/* +Include declarations. +*/ #include "MagickCore/studio.h" #include "MagickCore/opencl.h" @@ -31,7 +34,258 @@ extern "C" { typedef void* cl_context; typedef void* cl_command_queue; typedef void* cl_kernel; + typedef void* cl_mem; typedef struct { unsigned char t[8]; } cl_device_type; /* 64-bit */ +#else +/* + * + * function pointer typedefs + * + */ + +/* Platform APIs */ +typedef CL_API_ENTRY cl_int (CL_API_CALL *MAGICKpfn_clGetPlatformIDs)( + cl_uint num_entries, + cl_platform_id * platforms, + cl_uint * num_platforms) CL_API_SUFFIX__VERSION_1_0; + +typedef CL_API_ENTRY cl_int (CL_API_CALL *MAGICKpfn_clGetPlatformInfo)( + cl_platform_id platform, + cl_platform_info param_name, + size_t param_value_size, + void * param_value, + size_t * param_value_size_ret) CL_API_SUFFIX__VERSION_1_0; + +/* Device APIs */ +typedef CL_API_ENTRY cl_int (CL_API_CALL *MAGICKpfn_clGetDeviceIDs)( + cl_platform_id platform, + cl_device_type device_type, + cl_uint num_entries, + cl_device_id * devices, + cl_uint * num_devices) CL_API_SUFFIX__VERSION_1_0; + +typedef CL_API_ENTRY cl_int (CL_API_CALL *MAGICKpfn_clGetDeviceInfo)( + cl_device_id device, + cl_device_info param_name, + size_t param_value_size, + void * param_value, + size_t * param_value_size_ret) CL_API_SUFFIX__VERSION_1_0; + +/* Context APIs */ +typedef CL_API_ENTRY cl_context (CL_API_CALL *MAGICKpfn_clCreateContext)( + const cl_context_properties * properties, + cl_uint num_devices, + const cl_device_id * devices, + void (CL_CALLBACK *pfn_notify)(const char *, const void *, size_t, void *), + void * user_data, + cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_0; + +typedef CL_API_ENTRY cl_int (CL_API_CALL *MAGICKpfn_clReleaseContext)( + cl_context context) CL_API_SUFFIX__VERSION_1_0; + +/* Command Queue APIs */ +typedef CL_API_ENTRY cl_command_queue (CL_API_CALL *MAGICKpfn_clCreateCommandQueue)( + cl_context context, + cl_device_id device, + cl_command_queue_properties properties, + cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_0; + +typedef CL_API_ENTRY cl_int (CL_API_CALL *MAGICKpfn_clReleaseCommandQueue)( + cl_command_queue command_queue) CL_API_SUFFIX__VERSION_1_0; + +/* Memory Object APIs */ +typedef CL_API_ENTRY cl_mem (CL_API_CALL *MAGICKpfn_clCreateBuffer)( + cl_context context, + cl_mem_flags flags, + size_t size, + void * host_ptr, + cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_0; + +typedef CL_API_ENTRY cl_int (CL_API_CALL *MAGICKpfn_clReleaseMemObject)(cl_mem memobj) CL_API_SUFFIX__VERSION_1_0; + +/* Program Object APIs */ +typedef CL_API_ENTRY cl_program (CL_API_CALL *MAGICKpfn_clCreateProgramWithSource)( + cl_context context, + cl_uint count, + const char ** strings, + const size_t * lengths, + cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_0; + +typedef CL_API_ENTRY cl_program (CL_API_CALL *MAGICKpfn_clCreateProgramWithBinary)( + cl_context context, + cl_uint num_devices, + const cl_device_id * device_list, + const size_t * lengths, + const unsigned char ** binaries, + cl_int * binary_status, + cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_0; + +typedef CL_API_ENTRY cl_int (CL_API_CALL *MAGICKpfn_clReleaseProgram)(cl_program program) CL_API_SUFFIX__VERSION_1_0; + +typedef CL_API_ENTRY cl_int (CL_API_CALL *MAGICKpfn_clBuildProgram)( + cl_program program, + cl_uint num_devices, + const cl_device_id * device_list, + const char * options, + void (CL_CALLBACK *pfn_notify)(cl_program program, void * user_data), + void * user_data) CL_API_SUFFIX__VERSION_1_0; + +typedef CL_API_ENTRY cl_int (CL_API_CALL *MAGICKpfn_clGetProgramInfo)( + cl_program program, + cl_program_info param_name, + size_t param_value_size, + void * param_value, + size_t * param_value_size_ret) CL_API_SUFFIX__VERSION_1_0; + +typedef CL_API_ENTRY cl_int (CL_API_CALL *MAGICKpfn_clGetProgramBuildInfo)( + cl_program program, + cl_device_id device, + cl_program_build_info param_name, + size_t param_value_size, + void * param_value, + size_t * param_value_size_ret) CL_API_SUFFIX__VERSION_1_0; + +/* Kernel Object APIs */ +typedef CL_API_ENTRY cl_kernel (CL_API_CALL *MAGICKpfn_clCreateKernel)( + cl_program program, + const char * kernel_name, + cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_0; + +typedef CL_API_ENTRY cl_int (CL_API_CALL *MAGICKpfn_clReleaseKernel)(cl_kernel kernel) CL_API_SUFFIX__VERSION_1_0; + +typedef CL_API_ENTRY cl_int (CL_API_CALL *MAGICKpfn_clSetKernelArg)( + cl_kernel kernel, + cl_uint arg_index, + size_t arg_size, + const void * arg_value) CL_API_SUFFIX__VERSION_1_0; + +/* Flush and Finish APIs */ +typedef CL_API_ENTRY cl_int (CL_API_CALL *MAGICKpfn_clFlush)(cl_command_queue command_queue) CL_API_SUFFIX__VERSION_1_0; + +typedef CL_API_ENTRY cl_int (CL_API_CALL *MAGICKpfn_clFinish)(cl_command_queue command_queue) CL_API_SUFFIX__VERSION_1_0; + +/* Enqueued Commands APIs */ +typedef CL_API_ENTRY cl_int (CL_API_CALL *MAGICKpfn_clEnqueueReadBuffer)( + cl_command_queue command_queue, + cl_mem buffer, + cl_bool blocking_read, + size_t offset, + size_t cb, + void * ptr, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event) CL_API_SUFFIX__VERSION_1_0; + +typedef CL_API_ENTRY cl_int (CL_API_CALL *MAGICKpfn_clEnqueueWriteBuffer)( + cl_command_queue command_queue, + cl_mem buffer, + cl_bool blocking_write, + size_t offset, + size_t cb, + const void * ptr, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event) CL_API_SUFFIX__VERSION_1_0; + +typedef CL_API_ENTRY void * (CL_API_CALL *MAGICKpfn_clEnqueueMapBuffer)( + cl_command_queue command_queue, + cl_mem buffer, + cl_bool blocking_map, + cl_map_flags map_flags, + size_t offset, + size_t cb, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event, + cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_0; + +typedef CL_API_ENTRY cl_int (CL_API_CALL *MAGICKpfn_clEnqueueUnmapMemObject)( + cl_command_queue command_queue, + cl_mem memobj, + void * mapped_ptr, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event) CL_API_SUFFIX__VERSION_1_0; + +typedef CL_API_ENTRY cl_int (CL_API_CALL *MAGICKpfn_clEnqueueNDRangeKernel)( + cl_command_queue command_queue, + cl_kernel kernel, + cl_uint work_dim, + const size_t * global_work_offset, + const size_t * global_work_size, + const size_t * local_work_size, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event) CL_API_SUFFIX__VERSION_1_0; + +/* + * + * vendor dispatch table structure + * + * note that the types in the structure KHRicdVendorDispatch mirror the function + * names listed in the string table khrIcdVendorDispatchFunctionNames + * + */ + +typedef struct MagickLibraryRec MagickLibrary; + +struct MagickLibraryRec +{ + void * base; + + MAGICKpfn_clGetPlatformIDs clGetPlatformIDs; + MAGICKpfn_clGetPlatformInfo clGetPlatformInfo; + MAGICKpfn_clGetDeviceIDs clGetDeviceIDs; + MAGICKpfn_clGetDeviceInfo clGetDeviceInfo; + MAGICKpfn_clCreateContext clCreateContext; + MAGICKpfn_clCreateCommandQueue clCreateCommandQueue; + MAGICKpfn_clReleaseCommandQueue clReleaseCommandQueue; + MAGICKpfn_clCreateBuffer clCreateBuffer; + MAGICKpfn_clReleaseMemObject clReleaseMemObject; + MAGICKpfn_clCreateProgramWithSource clCreateProgramWithSource; + MAGICKpfn_clCreateProgramWithBinary clCreateProgramWithBinary; + MAGICKpfn_clReleaseProgram clReleaseProgram; + MAGICKpfn_clBuildProgram clBuildProgram; + MAGICKpfn_clGetProgramInfo clGetProgramInfo; + MAGICKpfn_clGetProgramBuildInfo clGetProgramBuildInfo; + MAGICKpfn_clCreateKernel clCreateKernel; + MAGICKpfn_clReleaseKernel clReleaseKernel; + MAGICKpfn_clSetKernelArg clSetKernelArg; + MAGICKpfn_clFlush clFlush; + MAGICKpfn_clFinish clFinish; + MAGICKpfn_clEnqueueReadBuffer clEnqueueReadBuffer; + MAGICKpfn_clEnqueueWriteBuffer clEnqueueWriteBuffer; + MAGICKpfn_clEnqueueMapBuffer clEnqueueMapBuffer; + MAGICKpfn_clEnqueueUnmapMemObject clEnqueueUnmapMemObject; + MAGICKpfn_clEnqueueNDRangeKernel clEnqueueNDRangeKernel; +}; + +struct _MagickCLEnv { + MagickBooleanType OpenCLInitialized; /* whether OpenCL environment is initialized. */ + MagickBooleanType OpenCLDisabled; /* whether if OpenCL has been explicitely disabled. */ + + MagickLibrary * library; + + /*OpenCL objects */ + cl_platform_id platform; + cl_device_type deviceType; + cl_device_id device; + cl_context context; + + MagickBooleanType disableProgramCache; /* disable the OpenCL program cache */ + cl_program programs[MAGICK_OPENCL_NUM_PROGRAMS]; /* one program object maps one kernel source file */ + + MagickBooleanType regenerateProfile; /* re-run the microbenchmark in auto device selection mode */ + + /* Random number generator seeds */ + unsigned int numGenerators; + float randNormalize; + cl_mem seeds; + SemaphoreInfo* seedsLock; + + SemaphoreInfo* lock; +}; + #endif #if defined(MAGICKCORE_HDRI_SUPPORT) @@ -43,8 +297,8 @@ extern "C" { #define CLCharQuantumScale 1.0f #elif (MAGICKCORE_QUANTUM_DEPTH == 8) #define CLOptions "-cl-single-precision-constant -cl-mad-enable " \ - "-DCLQuantum=uchar -DCLSignedQuantum=char -DCLPixelType=uchar4 -DQuantumRange=%f " \ - "-DQuantumScale=%f -DCharQuantumScale=%f -DMagickEpsilon=%f -DMagickPI=%f "\ + "-DCLQuantum=uchar -DCLSignedQuantum=char -DCLPixelType=uchar4 -DQuantumRange=%ff " \ + "-DQuantumScale=%ff -DCharQuantumScale=%ff -DMagickEpsilon=%ff -DMagickPI=%ff "\ "-DMaxMap=%u -DMAGICKCORE_QUANTUM_DEPTH=%u" #define CLPixelPacket cl_uchar4 #define CLCharQuantumScale 1.0f @@ -81,6 +335,9 @@ extern MagickPrivate cl_command_queue AcquireOpenCLCommandQueue(MagickCLEnv); extern MagickPrivate MagickBooleanType + OpenCLThrowMagickException(ExceptionInfo *, + const char *,const char *,const size_t, + const ExceptionType,const char *,const char *,...), RelinquishOpenCLCommandQueue(MagickCLEnv, cl_command_queue), RelinquishOpenCLKernel(MagickCLEnv, cl_kernel); @@ -91,9 +348,32 @@ extern MagickPrivate unsigned long extern MagickPrivate const char* GetOpenCLCachedFilesDirectory(); -extern MagickPrivate void +extern MagickPrivate void + UnlockRandSeedBuffer(MagickCLEnv), OpenCLLog(const char*); +extern MagickPrivate cl_mem + GetAndLockRandSeedBuffer(MagickCLEnv); + +extern MagickPrivate unsigned int + GetNumRandGenerators(MagickCLEnv); + +extern MagickPrivate float + GetRandNormalize(MagickCLEnv clEnv); + +typedef struct _AccelerateTimer { + long long _freq; + long long _clocks; + long long _start; +} AccelerateTimer; + + +void startAccelerateTimer(AccelerateTimer* timer); +void stopAccelerateTimer(AccelerateTimer* timer); +void resetAccelerateTimer(AccelerateTimer* timer); +void initAccelerateTimer(AccelerateTimer* timer); +double readAccelerateTimer(AccelerateTimer* timer); + /* #define OPENCLLOG_ENABLED 1 */ static inline void OpenCLLogException(const char* function, const unsigned int line, @@ -102,8 +382,8 @@ static inline void OpenCLLogException(const char* function, if (exception->severity!=0) { char message[MaxTextExtent]; /* dump the source into a file */ - (void) FormatLocaleString(message,MaxTextExtent,"%s:%d Exception(%d)" - ,function,line,exception->severity); + (void) FormatLocaleString(message,MaxTextExtent,"%s:%d Exception(%d):%s " + ,function,line,exception->severity,exception->reason); OpenCLLog(message); } #else @@ -113,6 +393,7 @@ static inline void OpenCLLogException(const char* function, #endif } + #if defined(__cplusplus) || defined(c_plusplus) } #endif diff --git a/MagickCore/opencl.c b/MagickCore/opencl.c index 000f44cdf..98c9888d7 100644 --- a/MagickCore/opencl.c +++ b/MagickCore/opencl.c @@ -71,6 +71,8 @@ Include declarations. #include "MagickCore/property.h" #include "MagickCore/quantize.h" #include "MagickCore/quantum.h" +#include "MagickCore/random_.h" +#include "MagickCore/random-private.h" #include "MagickCore/resample.h" #include "MagickCore/resource_.h" #include "MagickCore/splay-tree.h" @@ -87,22 +89,60 @@ Include declarations. #if defined(MAGICKCORE_OPENCL_SUPPORT) -struct _MagickCLEnv { - MagickBooleanType OpenCLInitialized; /* whether OpenCL environment is initialized. */ - MagickBooleanType OpenCLDisabled; /* whether if OpenCL has been explicitely disabled. */ +#ifdef MAGICKCORE_HAVE_OPENCL_CL_H +#define MAGICKCORE_OPENCL_MACOSX 1 +#endif - /*OpenCL objects */ - cl_platform_id platform; - cl_device_type deviceType; - cl_device_id device; - cl_context context; - MagickBooleanType disableProgramCache; /* disable the OpenCL program cache */ - cl_program programs[MAGICK_OPENCL_NUM_PROGRAMS]; /* one program object maps one kernel source file */ +#define NUM_CL_RAND_GENERATORS 1024 /* number of random number generators running in parallel */ - MagickBooleanType regenerateProfile; /* re-run the microbenchmark in auto device selection mode */ - SemaphoreInfo* lock; -}; +/* + * + * Dynamic library loading functions + * + */ +#ifdef MAGICKCORE_WINDOWS_SUPPORT +#else +#include +#endif + +// dynamically load a library. returns NULL on failure +void *OsLibraryLoad(const char *libraryName) +{ +#ifdef MAGICKCORE_WINDOWS_SUPPORT + return (void *)LoadLibraryA(libraryName); +#else + return (void *)dlopen(libraryName, RTLD_NOW); +#endif +} + +// get a function pointer from a loaded library. returns NULL on failure. +void *OsLibraryGetFunctionAddress(void *library, const char *functionName) +{ +#ifdef MAGICKCORE_WINDOWS_SUPPORT + if (!library || !functionName) + { + return NULL; + } + return (void *) GetProcAddress( (HMODULE)library, functionName); +#else + if (!library || !functionName) + { + return NULL; + } + return (void *)dlsym(library, functionName); +#endif +} + +// unload a library. +void OsLibraryUnload(void *library) +{ +#ifdef MAGICKCORE_WINDOWS_SUPPORT + FreeLibrary( (HMODULE)library); +#else + dlclose(library); +#endif +} /* @@ -160,7 +200,7 @@ MagickExport MagickBooleanType RelinquishMagickOpenCLEnv(MagickCLEnv clEnv) { if (clEnv != (MagickCLEnv)NULL) { - RelinquishSemaphoreInfo(clEnv->lock); + DestroySemaphoreInfo(&clEnv->lock); RelinquishMagickMemory(clEnv); return MagickTrue; } @@ -174,6 +214,103 @@ MagickExport MagickBooleanType RelinquishMagickOpenCLEnv(MagickCLEnv clEnv) MagickCLEnv defaultCLEnv; SemaphoreInfo* defaultCLEnvLock; +/* +* OpenCL library +*/ +MagickLibrary * OpenCLLib; +SemaphoreInfo* OpenCLLibLock; + + +static MagickBooleanType bindOpenCLFunctions(void* library) +{ +#ifdef MAGICKCORE_OPENCL_MACOSX +#define BIND(X) OpenCLLib->X= &X; +#else +#define BIND(X)\ + if ((OpenCLLib->X=(MAGICKpfn_##X)OsLibraryGetFunctionAddress(library,#X)) == NULL)\ + return MagickFalse; +#endif + + BIND(clGetPlatformIDs); + BIND(clGetPlatformInfo); + + BIND(clGetDeviceIDs); + BIND(clGetDeviceInfo); + + BIND(clCreateContext); + + BIND(clCreateBuffer); + BIND(clReleaseMemObject); + + BIND(clCreateProgramWithSource); + BIND(clCreateProgramWithBinary); + BIND(clBuildProgram); + BIND(clGetProgramInfo); + BIND(clGetProgramBuildInfo); + + BIND(clCreateKernel); + BIND(clReleaseKernel); + BIND(clSetKernelArg); + + BIND(clFlush); + BIND(clFinish); + + BIND(clEnqueueNDRangeKernel); + BIND(clEnqueueReadBuffer); + BIND(clEnqueueMapBuffer); + BIND(clEnqueueUnmapMemObject); + + BIND(clCreateCommandQueue); + BIND(clReleaseCommandQueue); + + return MagickTrue; +} + +MagickLibrary * GetOpenCLLib() +{ + if (OpenCLLib == NULL) + { + if (OpenCLLibLock == NULL) + { + ActivateSemaphoreInfo(&OpenCLLibLock); + } + + LockSemaphoreInfo(OpenCLLibLock); + + OpenCLLib = (MagickLibrary *) AcquireMagickMemory (sizeof (MagickLibrary)); + + if (OpenCLLib != NULL) + { + MagickBooleanType status = MagickFalse; + void * library = NULL; + +#ifdef MAGICKCORE_OPENCL_MACOSX + status = bindOpenCLFunctions(library); +#else + + memset(OpenCLLib, 0, sizeof(MagickLibrary)); +#ifdef MAGICKCORE_WINDOWS_SUPPORT + library = OsLibraryLoad("OpenCL.dll"); +#else + library = OsLibraryLoad("libOpenCL.so"); +#endif + if (library) + status = bindOpenCLFunctions(library); + + if (status==MagickTrue) + OpenCLLib->base=library; + else + OpenCLLib=(MagickLibrary *)RelinquishMagickMemory(OpenCLLib); +#endif + } + + UnlockSemaphoreInfo(OpenCLLibLock); + } + + + return OpenCLLib; +} + /* %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% @@ -498,7 +635,7 @@ static char* getBinaryCLProgramName(MagickCLEnv clEnv, MagickOpenCLProgram prog, char path[MaxTextExtent]; char deviceName[MaxTextExtent]; const char* prefix = "magick_opencl"; - clGetDeviceInfo(clEnv->device, CL_DEVICE_NAME, MaxTextExtent, deviceName, NULL); + clEnv->library->clGetDeviceInfo(clEnv->device, CL_DEVICE_NAME, MaxTextExtent, deviceName, NULL); ptr=deviceName; /* strip out illegal characters for file names */ while (*ptr != '\0') @@ -536,7 +673,7 @@ static MagickBooleanType saveBinaryCLProgram(MagickCLEnv clEnv, MagickOpenCLProg fileHandle = NULL; saveSuccessful = MagickFalse; - clStatus = clGetProgramInfo(clEnv->programs[prog], CL_PROGRAM_BINARY_SIZES, sizeof(size_t), &binaryProgramSize, NULL); + clStatus = clEnv->library->clGetProgramInfo(clEnv->programs[prog], CL_PROGRAM_BINARY_SIZES, sizeof(size_t), &binaryProgramSize, NULL); if (clStatus != CL_SUCCESS) { (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError, "clGetProgramInfo failed.", "'%s'", "."); @@ -544,7 +681,7 @@ static MagickBooleanType saveBinaryCLProgram(MagickCLEnv clEnv, MagickOpenCLProg } binaryProgram = (unsigned char*) AcquireMagickMemory(binaryProgramSize); - clStatus = clGetProgramInfo(clEnv->programs[prog], CL_PROGRAM_BINARIES, sizeof(char*), &binaryProgram, NULL); + clStatus = clEnv->library->clGetProgramInfo(clEnv->programs[prog], CL_PROGRAM_BINARIES, sizeof(char*), &binaryProgram, NULL); if (clStatus != CL_SUCCESS) { (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError, "clGetProgramInfo failed.", "'%s'", "."); @@ -619,7 +756,7 @@ static MagickBooleanType loadBinaryCLProgram(MagickCLEnv clEnv, MagickOpenCLProg memset(binaryProgram, 0, length); b_error |= fread(binaryProgram, 1, length, fileHandle) != length; - clEnv->programs[prog] = clCreateProgramWithBinary(clEnv->context, 1, &clEnv->device, &length, (const unsigned char**)&binaryProgram, &clBinaryStatus, &clStatus); + clEnv->programs[prog] = clEnv->library->clCreateProgramWithBinary(clEnv->context, 1, &clEnv->device, &length, (const unsigned char**)&binaryProgram, &clBinaryStatus, &clStatus); if (clStatus != CL_SUCCESS || clBinaryStatus != CL_SUCCESS) goto cleanup; @@ -745,7 +882,7 @@ static MagickBooleanType CompileOpenCLKernels(MagickCLEnv clEnv, ExceptionInfo* { /* Binary CL program unavailable, compile the program from source */ size_t programLength = strlen(MagickOpenCLProgramStrings[i]); - clEnv->programs[i] = clCreateProgramWithSource(clEnv->context, 1, &(MagickOpenCLProgramStrings[i]), &programLength, &clStatus); + clEnv->programs[i] = clEnv->library->clCreateProgramWithSource(clEnv->context, 1, &(MagickOpenCLProgramStrings[i]), &programLength, &clStatus); if (clStatus!=CL_SUCCESS) { (void) ThrowMagickException(exception, GetMagickModule(), DelegateWarning, @@ -755,7 +892,7 @@ static MagickBooleanType CompileOpenCLKernels(MagickCLEnv clEnv, ExceptionInfo* } } - clStatus = clBuildProgram(clEnv->programs[i], 1, &clEnv->device, options, NULL, NULL); + clStatus = clEnv->library->clBuildProgram(clEnv->programs[i], 1, &clEnv->device, options, NULL, NULL); if (clStatus!=CL_SUCCESS) { (void) ThrowMagickException(exception, GetMagickModule(), DelegateWarning, @@ -781,9 +918,9 @@ static MagickBooleanType CompileOpenCLKernels(MagickCLEnv clEnv, ExceptionInfo* { char* log; size_t logSize; - clGetProgramBuildInfo(clEnv->programs[i], clEnv->device, CL_PROGRAM_BUILD_LOG, 0, NULL, &logSize); + clEnv->library->clGetProgramBuildInfo(clEnv->programs[i], clEnv->device, CL_PROGRAM_BUILD_LOG, 0, NULL, &logSize); log = (char*)AcquireMagickMemory(logSize); - clGetProgramBuildInfo(clEnv->programs[i], clEnv->device, CL_PROGRAM_BUILD_LOG, logSize, log, &logSize); + clEnv->library->clGetProgramBuildInfo(clEnv->programs[i], clEnv->device, CL_PROGRAM_BUILD_LOG, logSize, log, &logSize); (void) FormatLocaleString(path,MaxTextExtent,"%s%s%s" ,GetOpenCLCachedFilesDirectory() @@ -860,7 +997,7 @@ static MagickBooleanType InitOpenCLPlatformDevice(MagickCLEnv clEnv, ExceptionIn if (clEnv->device != NULL) { - status = clGetDeviceInfo(clEnv->device, CL_DEVICE_PLATFORM, sizeof(cl_platform_id), &clEnv->platform, NULL); + status = clEnv->library->clGetDeviceInfo(clEnv->device, CL_DEVICE_PLATFORM, sizeof(cl_platform_id), &clEnv->platform, NULL); if (status != CL_SUCCESS) { (void) ThrowMagickException(exception, GetMagickModule(), DelegateWarning, "Failed to get OpenCL platform from the selected device.", "(%d)", status); @@ -884,7 +1021,7 @@ static MagickBooleanType InitOpenCLPlatformDevice(MagickCLEnv clEnv, ExceptionIn clEnv->device = NULL; /* Get the number of OpenCL platforms available */ - status = clGetPlatformIDs(0, NULL, &numPlatforms); + status = clEnv->library->clGetPlatformIDs(0, NULL, &numPlatforms); if (status != CL_SUCCESS) { (void) ThrowMagickException(exception, GetMagickModule(), DelegateWarning, @@ -905,7 +1042,7 @@ static MagickBooleanType InitOpenCLPlatformDevice(MagickCLEnv clEnv, ExceptionIn goto cleanup; } - status = clGetPlatformIDs(numPlatforms, platforms, NULL); + status = clEnv->library->clGetPlatformIDs(numPlatforms, platforms, NULL); if (status != CL_SUCCESS) { (void) ThrowMagickException(exception, GetMagickModule(), DelegateWarning, @@ -937,11 +1074,11 @@ static MagickBooleanType InitOpenCLPlatformDevice(MagickCLEnv clEnv, ExceptionIn for (i = 0; i < numPlatforms; i++) { cl_uint numDevices; - status = clGetDeviceIDs(platforms[i], deviceType, 1, &(clEnv->device), &numDevices); + status = clEnv->library->clGetDeviceIDs(platforms[i], deviceType, 1, &(clEnv->device), &numDevices); if (status != CL_SUCCESS) { (void) ThrowMagickException(exception, GetMagickModule(), DelegateWarning, - "clGetPlatformIDs failed.", "(%d)", status); + "clGetDeviceIDs failed.", "(%d)", status); goto cleanup; } if (clEnv->device != NULL) @@ -1010,8 +1147,31 @@ MagickBooleanType InitOpenCLEnvInternal(MagickCLEnv clEnv, ExceptionInfo* except cl_int clStatus; cl_context_properties cps[3]; - +#ifdef MAGICKCORE_CLPERFMARKER + { + int status = clInitializePerfMarkerAMD(); + if (status == AP_SUCCESS) { + //printf("PerfMarker successfully initialized\n"); + } + } +#endif clEnv->OpenCLInitialized = MagickTrue; + + /* check and init the global lib */ + OpenCLLib=GetOpenCLLib(); + if (OpenCLLib) + { + clEnv->library=OpenCLLib; + } + else + { + /* turn off opencl */ + MagickBooleanType flag; + flag = MagickTrue; + SetMagickOpenCLEnvParamInternal(clEnv, MAGICK_OPENCL_ENV_PARAM_OPENCL_DISABLED + , sizeof(MagickBooleanType), &flag, exception); + } + if (clEnv->OpenCLDisabled != MagickFalse) goto cleanup; @@ -1027,7 +1187,7 @@ MagickBooleanType InitOpenCLEnvInternal(MagickCLEnv clEnv, ExceptionInfo* except cps[0] = CL_CONTEXT_PLATFORM; cps[1] = (cl_context_properties)clEnv->platform; cps[2] = 0; - clEnv->context = clCreateContext(cps, 1, &(clEnv->device), NULL, NULL, &clStatus); + clEnv->context = clEnv->library->clCreateContext(cps, 1, &(clEnv->device), NULL, NULL, &clStatus); if (clStatus != CL_SUCCESS) { (void) ThrowMagickException(exception, GetMagickModule(), DelegateWarning, @@ -1046,6 +1206,7 @@ MagickBooleanType InitOpenCLEnvInternal(MagickCLEnv clEnv, ExceptionInfo* except } status = EnableOpenCLInternal(clEnv); + cleanup: return status; } @@ -1106,7 +1267,7 @@ MagickPrivate cl_command_queue AcquireOpenCLCommandQueue(MagickCLEnv clEnv) { if (clEnv != NULL) - return clCreateCommandQueue(clEnv->context, clEnv->device, 0, NULL); + return clEnv->library->clCreateCommandQueue(clEnv->context, clEnv->device, 0, NULL); else return NULL; } @@ -1143,7 +1304,7 @@ MagickBooleanType RelinquishOpenCLCommandQueue(MagickCLEnv clEnv, cl_command_que { if (clEnv != NULL) { - return ((clReleaseCommandQueue(queue) == CL_SUCCESS) ? MagickTrue:MagickFalse); + return ((clEnv->library->clReleaseCommandQueue(queue) == CL_SUCCESS) ? MagickTrue:MagickFalse); } else return MagickFalse; @@ -1186,7 +1347,7 @@ MagickPrivate cl_kernel kernel = NULL; if (clEnv != NULL && kernelName!=NULL) { - kernel = clCreateKernel(clEnv->programs[program], kernelName, &clStatus); + kernel = clEnv->library->clCreateKernel(clEnv->programs[program], kernelName, &clStatus); } return kernel; } @@ -1225,7 +1386,7 @@ MagickPrivate MagickBooleanType status = MagickFalse; if (clEnv != NULL && kernel != NULL) { - status = ((clReleaseKernel(kernel) == CL_SUCCESS)?MagickTrue:MagickFalse); + status = ((clEnv->library->clReleaseKernel(kernel) == CL_SUCCESS)?MagickTrue:MagickFalse); } return status; } @@ -1258,7 +1419,7 @@ MagickPrivate unsigned long GetOpenCLDeviceLocalMemorySize(MagickCLEnv clEnv) { cl_ulong localMemorySize; - clGetDeviceInfo(clEnv->device, CL_DEVICE_LOCAL_MEM_SIZE, sizeof(cl_ulong), &localMemorySize, NULL); + clEnv->library->clGetDeviceInfo(clEnv->device, CL_DEVICE_LOCAL_MEM_SIZE, sizeof(cl_ulong), &localMemorySize, NULL); return (unsigned long)localMemorySize; } @@ -1266,7 +1427,7 @@ MagickPrivate unsigned long GetOpenCLDeviceMaxMemAllocSize(MagickCLEnv clEnv) { cl_ulong maxMemAllocSize; - clGetDeviceInfo(clEnv->device, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof(cl_ulong), &maxMemAllocSize, NULL); + clEnv->library->clGetDeviceInfo(clEnv->device, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof(cl_ulong), &maxMemAllocSize, NULL); return (unsigned long)maxMemAllocSize; } @@ -1363,17 +1524,17 @@ static ds_status initDSProfile(ds_profile** p, const char* version) { memset(profile, 0, sizeof(ds_profile)); - clGetPlatformIDs(0, NULL, &numPlatforms); + OpenCLLib->clGetPlatformIDs(0, NULL, &numPlatforms); if (numPlatforms > 0) { platforms = (cl_platform_id*)malloc(numPlatforms*sizeof(cl_platform_id)); if (platforms == NULL) { status = DS_MEMORY_ERROR; goto cleanup; } - clGetPlatformIDs(numPlatforms, platforms, NULL); + OpenCLLib->clGetPlatformIDs(numPlatforms, platforms, NULL); for (i = 0; i < (unsigned int)numPlatforms; i++) { cl_uint num; - if (clGetDeviceIDs(platforms[i], CL_DEVICE_TYPE_CPU | CL_DEVICE_TYPE_GPU, 0, NULL, &num) == CL_SUCCESS) + if (OpenCLLib->clGetDeviceIDs(platforms[i], CL_DEVICE_TYPE_CPU | CL_DEVICE_TYPE_GPU, 0, NULL, &num) == CL_SUCCESS) numDevices+=num; } } @@ -1412,7 +1573,7 @@ static ds_status initDSProfile(ds_profile** p, const char* version) { continue; break; } - if (clGetDeviceIDs(platforms[i], deviceType, numDevices, devices, &num) != CL_SUCCESS) + if (OpenCLLib->clGetDeviceIDs(platforms[i], deviceType, numDevices, devices, &num) != CL_SUCCESS) continue; for (j = 0; j < num; j++, next++) { size_t length; @@ -1420,22 +1581,22 @@ static ds_status initDSProfile(ds_profile** p, const char* version) { profile->devices[next].type = DS_DEVICE_OPENCL_DEVICE; profile->devices[next].oclDeviceID = devices[j]; - clGetDeviceInfo(profile->devices[next].oclDeviceID, CL_DEVICE_NAME + OpenCLLib->clGetDeviceInfo(profile->devices[next].oclDeviceID, CL_DEVICE_NAME , 0, NULL, &length); profile->devices[next].oclDeviceName = (char*)malloc(sizeof(char)*length); - clGetDeviceInfo(profile->devices[next].oclDeviceID, CL_DEVICE_NAME + OpenCLLib->clGetDeviceInfo(profile->devices[next].oclDeviceID, CL_DEVICE_NAME , length, profile->devices[next].oclDeviceName, NULL); - clGetDeviceInfo(profile->devices[next].oclDeviceID, CL_DRIVER_VERSION + OpenCLLib->clGetDeviceInfo(profile->devices[next].oclDeviceID, CL_DRIVER_VERSION , 0, NULL, &length); profile->devices[next].oclDriverVersion = (char*)malloc(sizeof(char)*length); - clGetDeviceInfo(profile->devices[next].oclDeviceID, CL_DRIVER_VERSION + OpenCLLib->clGetDeviceInfo(profile->devices[next].oclDeviceID, CL_DRIVER_VERSION , length, profile->devices[next].oclDriverVersion, NULL); - clGetDeviceInfo(profile->devices[next].oclDeviceID, CL_DEVICE_MAX_CLOCK_FREQUENCY + OpenCLLib->clGetDeviceInfo(profile->devices[next].oclDeviceID, CL_DEVICE_MAX_CLOCK_FREQUENCY , sizeof(cl_uint), &profile->devices[next].oclMaxClockFrequency, NULL); - clGetDeviceInfo(profile->devices[next].oclDeviceID, CL_DEVICE_MAX_COMPUTE_UNITS + OpenCLLib->clGetDeviceInfo(profile->devices[next].oclDeviceID, CL_DEVICE_MAX_COMPUTE_UNITS , sizeof(cl_uint), &profile->devices[next].oclMaxComputeUnits, NULL); } } @@ -1923,57 +2084,6 @@ static ds_status getNumDeviceWithEmptyScore(ds_profile* profile, unsigned int* n */ - -typedef struct _AccelerateTimer { - long long _freq; - long long _clocks; - long long _start; -} AccelerateTimer; - -static void startAccelerateTimer(AccelerateTimer* timer) { -#ifdef _WIN32 - QueryPerformanceCounter((LARGE_INTEGER*)&timer->_start); - - -#else - struct timeval s; - gettimeofday(&s, 0); - timer->_start = (long long)s.tv_sec * (long long)1.0E3 + (long long)s.tv_usec / (long long)1.0E3; -#endif -} - -static void stopAccelerateTimer(AccelerateTimer* timer) { - long long n=0; -#ifdef _WIN32 - QueryPerformanceCounter((LARGE_INTEGER*)&(n)); -#else - struct timeval s; - gettimeofday(&s, 0); - n = (long long)s.tv_sec * (long long)1.0E3+ (long long)s.tv_usec / (long long)1.0E3; -#endif - n -= timer->_start; - timer->_start = 0; - timer->_clocks += n; -} - -static void resetAccelerateTimer(AccelerateTimer* timer) { - timer->_clocks = 0; - timer->_start = 0; -} - - -static void initAccelerateTimer(AccelerateTimer* timer) { -#ifdef _WIN32 - QueryPerformanceFrequency((LARGE_INTEGER*)&timer->_freq); -#else - timer->_freq = (long long)1.0E3; -#endif - resetAccelerateTimer(timer); -} - -double readAccelerateTimer(AccelerateTimer* timer) { return (double)timer->_clocks/(double)timer->_freq; }; - - typedef double AccelerateScoreType; static ds_status AcceleratePerfEvaluator(ds_device *device, @@ -2067,7 +2177,7 @@ static ds_status AcceleratePerfEvaluator(ds_device *device, bluredImage=BlurImage(inputImage,10.0f,3.5f,exception); unsharpedImage=UnsharpMaskImage(bluredImage,2.0f,2.0f,50.0f,10.0f, exception); - resizedImage=ResizeImage(unsharpedImage,640,480,LanczosFilter, + resizedImage=ResizeImage(unsharpedImage,640,480,LanczosFilter,1.0, exception); #ifdef MAGICKCORE_CLPERFMARKER @@ -2156,6 +2266,14 @@ static MagickBooleanType autoSelectDevice(MagickCLEnv clEnv, ExceptionInfo* exce SetMagickOpenCLEnvParamInternal(clEnv, MAGICK_OPENCL_ENV_PARAM_OPENCL_DISABLED , sizeof(MagickBooleanType), &flag, exception); + /* check and init the global lib */ + OpenCLLib=GetOpenCLLib(); + if (OpenCLLib==NULL) + { + mStatus=InitOpenCLEnvInternal(clEnv, exception); + goto cleanup; + } + status = initDSProfile(&profile, IMAGEMAGICK_PROFILE_VERSION); if (status!=DS_SUCCESS) { (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError, "Error when initializing the profile", "'%s'", "."); @@ -2353,10 +2471,10 @@ MagickBooleanType OpenCLThrowMagickException(ExceptionInfo *exception, if (severity!=0) { cl_device_type dType; - clGetDeviceInfo(clEnv->device,CL_DEVICE_TYPE ,sizeof(cl_device_type),&dType,NULL); + clEnv->library->clGetDeviceInfo(clEnv->device,CL_DEVICE_TYPE ,sizeof(cl_device_type),&dType,NULL); if (dType == CL_DEVICE_TYPE_CPU) { char buffer[MaxTextExtent]; - clGetPlatformInfo(clEnv->platform, CL_PLATFORM_NAME, MaxTextExtent, buffer, NULL); + clEnv->library->clGetPlatformInfo(clEnv->platform, CL_PLATFORM_NAME, MaxTextExtent, buffer, NULL); /* Workaround for Intel OpenCL CPU runtime bug */ /* Turn off OpenCL when a problem is detected! */ @@ -2386,6 +2504,88 @@ MagickBooleanType OpenCLThrowMagickException(ExceptionInfo *exception, return(status); } +MagickPrivate cl_mem GetAndLockRandSeedBuffer(MagickCLEnv clEnv) +{ + LockSemaphoreInfo(clEnv->lock); + if (clEnv->seedsLock == NULL) + { + ActivateSemaphoreInfo(&clEnv->seedsLock); + } + LockSemaphoreInfo(clEnv->seedsLock); + + if (clEnv->seeds == NULL) + { + cl_int clStatus; + clEnv->numGenerators = NUM_CL_RAND_GENERATORS; + clEnv->seeds = clEnv->library->clCreateBuffer(clEnv->context, CL_MEM_READ_WRITE, + clEnv->numGenerators*4*sizeof(unsigned int), + NULL, &clStatus); + if (clStatus != CL_SUCCESS) + { + clEnv->seeds = NULL; + } + else + { + unsigned int i; + cl_command_queue queue = NULL; + unsigned int *seeds; + + queue = AcquireOpenCLCommandQueue(clEnv); + seeds = (unsigned int*) clEnv->library->clEnqueueMapBuffer(queue, clEnv->seeds, CL_TRUE, + CL_MAP_WRITE, 0, + clEnv->numGenerators*4 + *sizeof(unsigned int), + 0, NULL, NULL, &clStatus); + if (clStatus!=CL_SUCCESS) + { + clEnv->library->clReleaseMemObject(clEnv->seeds); + goto cleanup; + } + + for (i = 0; i < clEnv->numGenerators; i++) { + RandomInfo* randomInfo = AcquireRandomInfo(); + const unsigned long* s = GetRandomInfoSeed(randomInfo); + if (i == 0) + clEnv->randNormalize = GetRandomInfoNormalize(randomInfo); + + seeds[i*4] = (unsigned int) s[0]; + seeds[i*4+1] = (unsigned int) 0x50a7f451; + seeds[i*4+2] = (unsigned int) 0x5365417e; + seeds[i*4+3] = (unsigned int) 0xc3a4171a; + + randomInfo = DestroyRandomInfo(randomInfo); + } + clStatus = clEnv->library->clEnqueueUnmapMemObject(queue, clEnv->seeds, seeds, 0, + NULL, NULL); + clEnv->library->clFinish(queue); +cleanup: + if (queue != NULL) + RelinquishOpenCLCommandQueue(clEnv, queue); + } + } + UnlockSemaphoreInfo(clEnv->lock); + return clEnv->seeds; +} + +MagickPrivate void UnlockRandSeedBuffer(MagickCLEnv clEnv) { + if (clEnv->seedsLock == NULL) + { + ActivateSemaphoreInfo(&clEnv->seedsLock); + } + else + UnlockSemaphoreInfo(clEnv->seedsLock); +} + +MagickPrivate unsigned int GetNumRandGenerators(MagickCLEnv clEnv) +{ + return clEnv->numGenerators; +} + + +MagickPrivate float GetRandNormalize(MagickCLEnv clEnv) +{ + return clEnv->randNormalize; +} #else @@ -2393,12 +2593,12 @@ struct _MagickCLEnv { MagickBooleanType OpenCLInitialized; /* whether OpenCL environment is initialized. */ }; -extern MagickExport MagickCLEnv AcquireMagickOpenCLEnv() +MagickExport MagickCLEnv AcquireMagickOpenCLEnv() { return NULL; } -extern MagickExport MagickBooleanType RelinquishMagickOpenCLEnv( +MagickExport MagickBooleanType RelinquishMagickOpenCLEnv( MagickCLEnv magick_unused(clEnv)) { magick_unreferenced(clEnv); @@ -2470,7 +2670,7 @@ MagickPrivate cl_command_queue AcquireOpenCLCommandQueue( return (cl_command_queue) NULL; } -MagickExport MagickBooleanType RelinquishCommandQueue( +MagickPrivate MagickBooleanType RelinquishCommandQueue( MagickCLEnv magick_unused(clEnv),cl_command_queue magick_unused(queue)) { magick_unreferenced(clEnv); @@ -2534,6 +2734,32 @@ MagickBooleanType OpenCLThrowMagickException(ExceptionInfo *exception, magick_unreferenced(format); return(MagickFalse); } + + +MagickPrivate cl_mem GetAndLockRandSeedBuffer(MagickCLEnv clEnv) +{ + magick_unreferenced(clEnv); + return NULL; +} + + +MagickPrivate void UnlockRandSeedBuffer(MagickCLEnv clEnv) +{ + magick_unreferenced(clEnv); +} + +MagickPrivate unsigned int GetNumRandGenerators(MagickCLEnv clEnv) +{ + magick_unreferenced(clEnv); + return 0; +} + +MagickPrivate float GetRandNormalize(MagickCLEnv clEnv) +{ + magick_unreferenced(clEnv); + return 0.0f; +} + #endif /* MAGICKCORE_OPENCL_SUPPORT */ char* openclCachedFilesDirectory; @@ -2554,43 +2780,65 @@ const char* GetOpenCLCachedFilesDirectory() { struct stat attributes; MagickBooleanType status; -#ifdef MAGICKCORE_WINDOWS_SUPPORT - home=GetEnvironmentValue("LOCALAPPDATA"); - if (home == (char *) NULL) - home=GetEnvironmentValue("APPDATA"); + + + home=GetEnvironmentValue("IMAGEMAGICK_OPENCL_CACHE_DIR"); if (home == (char *) NULL) - home=GetEnvironmentValue("USERPROFILE"); + { +#ifdef MAGICKCORE_WINDOWS_SUPPORT + home=GetEnvironmentValue("LOCALAPPDATA"); + if (home == (char *) NULL) + home=GetEnvironmentValue("APPDATA"); + if (home == (char *) NULL) + home=GetEnvironmentValue("USERPROFILE"); #else - home=GetEnvironmentValue("HOME"); + home=GetEnvironmentValue("HOME"); #endif + } + if (home != (char *) NULL) { + int mkdirStatus = 0; /* - Search $HOME/.config/ImageMagick. */ - (void) FormatLocaleString(path,MaxTextExtent,"%s%s.config",home, - DirectorySeparator); + + /* first check if $HOME/.config exists */ + (void) FormatLocaleString(path,MaxTextExtent,"%s%s.config", + home,DirectorySeparator); status=GetPathAttributes(path,&attributes); - if (status == MagickFalse) { + if (status == MagickFalse) + { + #ifdef MAGICKCORE_WINDOWS_SUPPORT - mkdir(path); + mkdirStatus = mkdir(path); #else - mkdir(path, 0777); + mkdirStatus = mkdir(path, 0777); #endif } - (void) FormatLocaleString(path,MaxTextExtent,"%s%s.config%sImageMagick", - home,DirectorySeparator,DirectorySeparator); - home=DestroyString(home); - temp = (char*)AcquireMagickMemory(strlen(path)+1); - CopyMagickString(temp,path,strlen(path)+1); - status=GetPathAttributes(path,&attributes); - if (status == MagickFalse) { + + /* first check if $HOME/.config/ImageMagick exists */ + if (mkdirStatus==0) + { + (void) FormatLocaleString(path,MaxTextExtent,"%s%s.config%sImageMagick", + home,DirectorySeparator,DirectorySeparator); + + status=GetPathAttributes(path,&attributes); + if (status == MagickFalse) + { #ifdef MAGICKCORE_WINDOWS_SUPPORT - mkdir(path); + mkdirStatus = mkdir(path); #else - mkdir(path, 0777); + mkdirStatus = mkdir(path, 0777); #endif + } + } + + if (mkdirStatus==0) + { + temp = (char*)AcquireMagickMemory(strlen(path)+1); + CopyMagickString(temp,path,strlen(path)+1); } + home=DestroyString(home); } openclCachedFilesDirectory = temp; } @@ -2599,6 +2847,52 @@ const char* GetOpenCLCachedFilesDirectory() { return openclCachedFilesDirectory; } +void startAccelerateTimer(AccelerateTimer* timer) { +#ifdef _WIN32 + QueryPerformanceCounter((LARGE_INTEGER*)&timer->_start); + + +#else + struct timeval s; + gettimeofday(&s, 0); + timer->_start = (long long)s.tv_sec * (long long)1.0E3 + (long long)s.tv_usec / (long long)1.0E3; +#endif +} + +void stopAccelerateTimer(AccelerateTimer* timer) { + long long n=0; +#ifdef _WIN32 + QueryPerformanceCounter((LARGE_INTEGER*)&(n)); +#else + struct timeval s; + gettimeofday(&s, 0); + n = (long long)s.tv_sec * (long long)1.0E3+ (long long)s.tv_usec / (long long)1.0E3; +#endif + n -= timer->_start; + timer->_start = 0; + timer->_clocks += n; +} + +void resetAccelerateTimer(AccelerateTimer* timer) { + timer->_clocks = 0; + timer->_start = 0; +} + + +void initAccelerateTimer(AccelerateTimer* timer) { +#ifdef _WIN32 + QueryPerformanceFrequency((LARGE_INTEGER*)&timer->_freq); +#else + timer->_freq = (long long)1.0E3; +#endif + resetAccelerateTimer(timer); +} + +double readAccelerateTimer(AccelerateTimer* timer) { + return (double)timer->_clocks/(double)timer->_freq; +}; + + /* create a function for OpenCL log */ MagickPrivate void OpenCLLog(const char* message) { @@ -2640,3 +2934,5 @@ void OpenCLLog(const char* message) { magick_unreferenced(message); #endif } + + diff --git a/MagickCore/version.h b/MagickCore/version.h index efc89ca1c..a876602e9 100644 --- a/MagickCore/version.h +++ b/MagickCore/version.h @@ -27,7 +27,7 @@ extern "C" { */ #define MagickPackageName "ImageMagick" #define MagickCopyright "Copyright (C) 1999-2014 ImageMagick Studio LLC" -#define MagickSVNRevision "15035M" +#define MagickSVNRevision "15038M" #define MagickLibVersion 0x700 #define MagickLibVersionText "7.0.0" #define MagickLibVersionNumber 1,0,0 @@ -48,7 +48,7 @@ extern "C" { #define MagickppLibAddendum "-0" #define MagickppLibInterface 1 #define MagickppLibMinInterface 1 -#define MagickReleaseDate "2014-03-06" +#define MagickReleaseDate "2014-03-07" #define MagickChangeDate "20120427" #define MagickAuthoritativeURL "http://www.imagemagick.org" #define MagickFeatures "DPC HDRI OpenMP" diff --git a/configure b/configure index 16ddf164e..a482eaf22 100755 --- a/configure +++ b/configure @@ -3702,7 +3702,7 @@ MAGICK_PATCHLEVEL_VERSION=0 MAGICK_VERSION=7.0.0-0 -MAGICK_SVN_REVISION=15035M +MAGICK_SVN_REVISION=15038M # Substitute library versioning @@ -10524,6 +10524,9 @@ fi +#remove static link on Linux +CL_LIBS=`echo $CL_LIBS | $SED -e 's/-lOpenCL //'` + CFLAGS="$CL_CFLAGS $CFLAGS" CPPFLAGS="$CL_CFLAGS $CPPFLAGS" diff --git a/m4/ax_opencl.m4 b/m4/ax_opencl.m4 index 834b61800..a646346be 100644 --- a/m4/ax_opencl.m4 +++ b/m4/ax_opencl.m4 @@ -143,5 +143,8 @@ if test "$enable_opencl" = 'yes'; then fi AC_SUBST([CL_CFLAGS]) + +#remove static link on Linux +CL_LIBS=`echo $CL_LIBS | $SED -e 's/-lOpenCL //'` AC_SUBST([CL_LIBS]) ])dnl -- 2.40.0