From 560a910a25df2b4ca46af343321575ca6fb2c3bf Mon Sep 17 00:00:00 2001 From: dirk Date: Tue, 16 Dec 2014 21:55:06 +0000 Subject: [PATCH] Improved performance of dds:cluster-fit when OpenMP is enabled. --- coders/dds.c | 69 ++++++++++++++++++++++++++++++++++------------------ 1 file changed, 45 insertions(+), 24 deletions(-) diff --git a/coders/dds.c b/coders/dds.c index c62197789..89c59f562 100644 --- a/coders/dds.c +++ b/coders/dds.c @@ -63,6 +63,7 @@ #include "MagickCore/profile.h" #include "MagickCore/quantum.h" #include "MagickCore/quantum-private.h" +#include "MagickCore/resource_.h" #include "MagickCore/static.h" #include "MagickCore/string_.h" #include "MagickCore/string-private.h" @@ -1061,10 +1062,6 @@ static void CompressClusterFit(const size_t count, gridrcp, half, onethird_onethird2, - part0, - part1, - part2, - part3, pointsWeights[16], two, twonineths, @@ -1079,11 +1076,10 @@ static void CompressClusterFit(const size_t count, besti = 0, bestj = 0, bestk = 0, - iterationIndex, - i, - j, - k, - kmin; + iterationIndex; + + ssize_t + i; unsigned char *o, @@ -1118,11 +1114,29 @@ static void CompressClusterFit(const size_t count, for (iterationIndex = 0;;) { - VectorInit(part0,0.0f); - for (i=0; i < count; i++) +#if defined(MAGICKCORE_OPENMP_SUPPORT) + #pragma omp parallel for schedule(dynamic,1) \ + num_threads(GetMagickResourceLimit(ThreadResource)) +#endif + for (i=0; i < (ssize_t) count; i++) { + DDSVector4 + part0, + part1, + part2; + + size_t + ii, + j, + k, + kmin; + + VectorInit(part0,0.0f); + for(ii=0; ii < (size_t) i; ii++) + VectorAdd(pointsWeights[ii],part0,&part0); + VectorInit(part1,0.0f); - for (j=i;;) + for (j=(size_t) i;;) { if (j == 0) { @@ -1147,7 +1161,8 @@ static void CompressClusterFit(const size_t count, betax_sum, e1, e2, - factor; + factor, + part3; float error; @@ -1206,15 +1221,23 @@ static void CompressClusterFit(const size_t count, error = e2.x + e2.y + e2.z; if (error < bestError) - { - VectorCopy43(a,start); - VectorCopy43(b,end); - bestError = error; - besti = i; - bestj = j; - bestk = k; - bestIteration = iterationIndex; - } + { +#if defined(MAGICKCORE_OPENMP_SUPPORT) + #pragma omp critical (DDS_CompressClusterFit) +#endif + { + if (error < bestError) + { + VectorCopy43(a,start); + VectorCopy43(b,end); + bestError = error; + besti = i; + bestj = j; + bestk = k; + bestIteration = iterationIndex; + } + } + } if (k == count) break; @@ -1229,8 +1252,6 @@ static void CompressClusterFit(const size_t count, VectorAdd(pointsWeights[j],part1,&part1); j++; } - - VectorAdd(pointsWeights[i],part0,&part0); } if (bestIteration != iterationIndex) -- 2.40.0