From: Bborie Park Date: Wed, 18 May 2011 14:18:15 +0000 (+0000) Subject: ST_SummaryStats returns the sum as part of the summary stats. X-Git-Tag: 2.0.0alpha1~1618 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=616b91bc702a7cac6f196eb34fcc80aff848dbf4;p=postgis ST_SummaryStats returns the sum as part of the summary stats. _ST_SummaryStats(rastertable, rastercolumn) function changed to make use of the sum and compute a straight mean (sum / count) rather than computing a weighted mean. git-svn-id: http://svn.osgeo.org/postgis/trunk@7193 b70326c6-7e19-0410-871a-916f4a2858ee --- diff --git a/raster/rt_core/rt_api.c b/raster/rt_core/rt_api.c index 035afe507..62be5de40 100644 --- a/raster/rt_core/rt_api.c +++ b/raster/rt_core/rt_api.c @@ -1489,6 +1489,7 @@ struct rt_bandstats_t { double min; double max; + double sum; double mean; double stddev; @@ -1584,6 +1585,7 @@ rt_band_get_summary_stats(rt_band band, int hasnodata, double sample, stats->sample = 1; stats->count = band->width * band->height; stats->min = stats->max = nodata; + stats->sum = stats->count * nodata; stats->mean = nodata; stats->stddev = 0; stats->values = NULL; @@ -1696,6 +1698,7 @@ rt_band_get_summary_stats(rt_band band, int hasnodata, double sample, stats->sample = sample; stats->count = 0; + stats->sum = 0; stats->mean = 0; stats->stddev = -1; stats->min = stats->max = value; @@ -1731,6 +1734,7 @@ rt_band_get_summary_stats(rt_band band, int hasnodata, double sample, } stats->count = k; + stats->sum = sum; stats->mean = sum / k; /* standard deviation */ diff --git a/raster/rt_pg/rt_pg.c b/raster/rt_pg/rt_pg.c index bb42addd3..d6d94238c 100644 --- a/raster/rt_pg/rt_pg.c +++ b/raster/rt_pg/rt_pg.c @@ -2922,6 +2922,7 @@ struct rt_bandstats_t { double min; double max; + double sum; double mean; double stddev; @@ -2935,188 +2936,158 @@ struct rt_bandstats_t { PG_FUNCTION_INFO_V1(RASTER_summaryStats); Datum RASTER_summaryStats(PG_FUNCTION_ARGS) { - FuncCallContext *funcctx; + rt_pgraster *pgraster = (rt_pgraster *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0)); + rt_raster raster = NULL; + rt_band band = NULL; + int32_t bandindex = 0; + bool hasnodata = TRUE; + int num_bands = 0; + double sample = 0; + rt_bandstats stats = NULL; + TupleDesc tupdesc; AttInMetadata *attinmeta; - rt_bandstats stats; - rt_bandstats stats2; - int call_cntr; - int max_calls; - - /* first call of function */ - if (SRF_IS_FIRSTCALL()) { - MemoryContext oldcontext; + char **values; + HeapTuple tuple; + Datum result; - rt_pgraster *pgraster = (rt_pgraster *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0)); - rt_raster raster = NULL; - rt_band band = NULL; - int32_t bandindex = 0; - bool hasnodata = TRUE; - int num_bands = 0; - double sample = 0; - - /* create a function context for cross-call persistence */ - funcctx = SRF_FIRSTCALL_INIT(); + raster = rt_raster_deserialize(pgraster); + if (!raster) { + elog(ERROR, "RASTER_summaryStats: Could not deserialize raster"); + PG_RETURN_NULL(); + } - /* switch to memory context appropriate for multiple function calls */ - oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx); + /* band index is 1-based */ + bandindex = PG_GETARG_INT32(1); + num_bands = rt_raster_get_num_bands(raster); + if (bandindex < 1 || bandindex > num_bands) { + elog(NOTICE, "Invalid band index (must use 1-based). Returning NULL"); + rt_raster_destroy(raster); + PG_RETURN_NULL(); + } + assert(0 <= (bandindex - 1)); - raster = rt_raster_deserialize(pgraster); - if (!raster) { - elog(ERROR, "RASTER_summaryStats: Could not deserialize raster"); - PG_RETURN_NULL(); - } + /* hasnodata flag */ + if (!PG_ARGISNULL(2)) + hasnodata = PG_GETARG_BOOL(2); - /* band index is 1-based */ - bandindex = PG_GETARG_INT32(1); - num_bands = rt_raster_get_num_bands(raster); - if (bandindex < 1 || bandindex > num_bands) { - elog(NOTICE, "Invalid band index (must use 1-based). Returning NULL"); + /* sample % */ + if (!PG_ARGISNULL(3)) { + sample = PG_GETARG_FLOAT8(3); + if (sample < 0 || sample > 1) { + elog(NOTICE, "Invalid sample percentage (must be between 0 and 1). Returning NULL"); rt_raster_destroy(raster); PG_RETURN_NULL(); } - assert(0 <= (bandindex - 1)); - - /* hasnodata flag */ - if (!PG_ARGISNULL(2)) - hasnodata = PG_GETARG_BOOL(2); - - /* sample % */ - if (!PG_ARGISNULL(3)) { - sample = PG_GETARG_FLOAT8(3); - if (sample < 0 || sample > 1) { - elog(NOTICE, "Invalid sample percentage (must be between 0 and 1). Returning NULL"); - rt_raster_destroy(raster); - PG_RETURN_NULL(); - } - else if (sample == 0) - sample = 1; - } - else + else if (sample == 0) sample = 1; + } + else + sample = 1; - /* get band */ - band = rt_raster_get_band(raster, bandindex - 1); - if (!band) { - elog(NOTICE, "Could not find raster band of index %d. Returning NULL", bandindex); - rt_raster_destroy(raster); - PG_RETURN_NULL(); - } - - /* we don't need the raw values, hence the zero parameter */ - stats = rt_band_get_summary_stats(band, (int) hasnodata, sample, 0); - rt_band_destroy(band); + /* get band */ + band = rt_raster_get_band(raster, bandindex - 1); + if (!band) { + elog(NOTICE, "Could not find raster band of index %d. Returning NULL", bandindex); rt_raster_destroy(raster); - if (NULL == stats) { - elog(NOTICE, "Could not retrieve summary statistics of band of index %d. Returning NULL", bandindex); - PG_RETURN_NULL(); - } - - /* store needed data */ - funcctx->user_fctx = stats; - - /* total number of tuples to return */ - funcctx->max_calls = 1; - - /* Build a tuple descriptor for our result type */ - if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE) { - ereport(ERROR, ( - errcode(ERRCODE_FEATURE_NOT_SUPPORTED), - errmsg( - "function returning record called in context " - "that cannot accept type record" - ) - )); - } - - /* - * generate attribute metadata needed later to produce tuples from raw - * C strings - */ - attinmeta = TupleDescGetAttInMetadata(tupdesc); - funcctx->attinmeta = attinmeta; - MemoryContextSwitchTo(oldcontext); - + PG_RETURN_NULL(); } - /* stuff done on every call of the function */ - funcctx = SRF_PERCALL_SETUP(); - - call_cntr = funcctx->call_cntr; - max_calls = funcctx->max_calls; - attinmeta = funcctx->attinmeta; - stats2 = funcctx->user_fctx; - - /* do when there is more left to send */ - if (call_cntr < max_calls) { - char **values; - HeapTuple tuple; - Datum result; - - POSTGIS_RT_DEBUGF(3, "tuple #%d", call_cntr); - - /* - * Prepare a values array for building the returned tuple. - * This should be an array of C strings which will - * be processed later by the type input functions. - */ - values = (char **) palloc(5 * sizeof(char *)); - - values[0] = (char *) palloc(sizeof(char) * (MAX_INT_CHARLEN + 1)); - values[1] = (char *) palloc(sizeof(char) * (MAX_DBL_CHARLEN + 1)); - values[2] = (char *) palloc(sizeof(char) * (MAX_DBL_CHARLEN + 1)); - values[3] = (char *) palloc(sizeof(char) * (MAX_DBL_CHARLEN + 1)); - values[4] = (char *) palloc(sizeof(char) * (MAX_DBL_CHARLEN + 1)); - - snprintf( - values[0], - sizeof(char) * (MAX_INT_CHARLEN + 1), - "%d", - stats2[call_cntr].count - ); - snprintf( - values[1], - sizeof(char) * (MAX_DBL_CHARLEN + 1), - "%f", - stats2[call_cntr].mean - ); - snprintf( - values[2], - sizeof(char) * (MAX_DBL_CHARLEN + 1), - "%f", - stats2[call_cntr].stddev - ); - snprintf( - values[3], - sizeof(char) * (MAX_DBL_CHARLEN + 1), - "%f", - stats2[call_cntr].min - ); - snprintf( - values[4], - sizeof(char) * (MAX_DBL_CHARLEN + 1), - "%f", - stats2[call_cntr].max - ); - - /* build a tuple */ - tuple = BuildTupleFromCStrings(attinmeta, values); + /* we don't need the raw values, hence the zero parameter */ + stats = rt_band_get_summary_stats(band, (int) hasnodata, sample, 0); + rt_band_destroy(band); + rt_raster_destroy(raster); + if (NULL == stats) { + elog(NOTICE, "Could not retrieve summary statistics of band of index %d. Returning NULL", bandindex); + PG_RETURN_NULL(); + } - /* make the tuple into a datum */ - result = HeapTupleGetDatum(tuple); + /* Build a tuple descriptor for our result type */ + if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE) { + ereport(ERROR, ( + errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg( + "function returning record called in context " + "that cannot accept type record" + ) + )); + } - /* clean up */ - pfree(values[1]); - pfree(values[0]); - pfree(values); + /* + * generate attribute metadata needed later to produce tuples from raw + * C strings + */ + attinmeta = TupleDescGetAttInMetadata(tupdesc); - SRF_RETURN_NEXT(funcctx, result); - } - else { - pfree(stats2); - SRF_RETURN_DONE(funcctx); - } + /* + * Prepare a values array for building the returned tuple. + * This should be an array of C strings which will + * be processed later by the type input functions. + */ + values = (char **) palloc(6 * sizeof(char *)); + + values[0] = (char *) palloc(sizeof(char) * (MAX_INT_CHARLEN + 1)); + values[1] = (char *) palloc(sizeof(char) * (MAX_DBL_CHARLEN + 1)); + values[2] = (char *) palloc(sizeof(char) * (MAX_DBL_CHARLEN + 1)); + values[3] = (char *) palloc(sizeof(char) * (MAX_DBL_CHARLEN + 1)); + values[4] = (char *) palloc(sizeof(char) * (MAX_DBL_CHARLEN + 1)); + values[5] = (char *) palloc(sizeof(char) * (MAX_DBL_CHARLEN + 1)); + + snprintf( + values[0], + sizeof(char) * (MAX_INT_CHARLEN + 1), + "%d", + stats->count + ); + snprintf( + values[1], + sizeof(char) * (MAX_DBL_CHARLEN + 1), + "%f", + stats->sum + ); + snprintf( + values[2], + sizeof(char) * (MAX_DBL_CHARLEN + 1), + "%f", + stats->mean + ); + snprintf( + values[3], + sizeof(char) * (MAX_DBL_CHARLEN + 1), + "%f", + stats->stddev + ); + snprintf( + values[4], + sizeof(char) * (MAX_DBL_CHARLEN + 1), + "%f", + stats->min + ); + snprintf( + values[5], + sizeof(char) * (MAX_DBL_CHARLEN + 1), + "%f", + stats->max + ); + + /* build a tuple */ + tuple = BuildTupleFromCStrings(attinmeta, values); + + /* make the tuple into a datum */ + result = HeapTupleGetDatum(tuple); + + /* clean up */ + pfree(values[5]); + pfree(values[4]); + pfree(values[3]); + pfree(values[2]); + pfree(values[1]); + pfree(values[0]); + pfree(values); + pfree(stats); + + PG_RETURN_DATUM(result); } /* get histogram */ diff --git a/raster/rt_pg/rtpostgis.sql.in.c b/raster/rt_pg/rtpostgis.sql.in.c index 7d91e0a25..c5a5bb2ad 100644 --- a/raster/rt_pg/rtpostgis.sql.in.c +++ b/raster/rt_pg/rtpostgis.sql.in.c @@ -263,6 +263,7 @@ CREATE OR REPLACE FUNCTION st_band(rast raster) ----------------------------------------------------------------------- CREATE TYPE summarystats AS ( count integer, + sum double precision, mean double precision, stddev double precision, min double precision, @@ -276,47 +277,47 @@ CREATE OR REPLACE FUNCTION _st_summarystats(rast raster, nband int, hasnodata bo CREATE OR REPLACE FUNCTION st_summarystats(rast raster, nband int, hasnodata boolean) RETURNS summarystats - AS $$ SELECT count, mean, stddev, min, max FROM _st_summarystats($1, $2, $3, 1) $$ + AS $$ SELECT count, sum, mean, stddev, min, max FROM _st_summarystats($1, $2, $3, 1) $$ LANGUAGE 'SQL' IMMUTABLE STRICT; CREATE OR REPLACE FUNCTION st_summarystats(rast raster, nband int) RETURNS summarystats - AS $$ SELECT count, mean, stddev, min, max FROM _st_summarystats($1, $2, TRUE, 1) $$ + AS $$ SELECT count, sum, mean, stddev, min, max FROM _st_summarystats($1, $2, TRUE, 1) $$ LANGUAGE 'SQL' IMMUTABLE STRICT; CREATE OR REPLACE FUNCTION st_summarystats(rast raster, hasnodata boolean) RETURNS summarystats - AS $$ SELECT count, mean, stddev, min, max FROM _st_summarystats($1, 1, $2, 1) $$ + AS $$ SELECT count, sum, mean, stddev, min, max FROM _st_summarystats($1, 1, $2, 1) $$ LANGUAGE 'SQL' IMMUTABLE STRICT; CREATE OR REPLACE FUNCTION st_summarystats(rast raster) RETURNS summarystats - AS $$ SELECT count, mean, stddev, min, max FROM _st_summarystats($1, 1, TRUE, 1) $$ + AS $$ SELECT count, sum, mean, stddev, min, max FROM _st_summarystats($1, 1, TRUE, 1) $$ LANGUAGE 'SQL' IMMUTABLE STRICT; CREATE OR REPLACE FUNCTION st_approxsummarystats(rast raster, nband int, hasnodata boolean, sample_percent double precision) RETURNS summarystats - AS $$ SELECT count, mean, stddev, min, max FROM _st_summarystats($1, $2, $3, $4) $$ + AS $$ SELECT count, sum, mean, stddev, min, max FROM _st_summarystats($1, $2, $3, $4) $$ LANGUAGE 'SQL' IMMUTABLE STRICT; CREATE OR REPLACE FUNCTION st_approxsummarystats(rast raster, nband int, sample_percent double precision) RETURNS summarystats - AS $$ SELECT count, mean, stddev, min, max FROM _st_summarystats($1, $2, TRUE, $3) $$ + AS $$ SELECT count, sum, mean, stddev, min, max FROM _st_summarystats($1, $2, TRUE, $3) $$ LANGUAGE 'SQL' IMMUTABLE STRICT; CREATE OR REPLACE FUNCTION st_approxsummarystats(rast raster, hasnodata boolean, sample_percent double precision) RETURNS summarystats - AS $$ SELECT count, mean, stddev, min, max FROM _st_summarystats($1, 1, $2, $3) $$ + AS $$ SELECT count, sum, mean, stddev, min, max FROM _st_summarystats($1, 1, $2, $3) $$ LANGUAGE 'SQL' IMMUTABLE STRICT; CREATE OR REPLACE FUNCTION st_approxsummarystats(rast raster, sample_percent double precision) RETURNS summarystats - AS $$ SELECT count, mean, stddev, min, max FROM _st_summarystats($1, 1, TRUE, $2) $$ + AS $$ SELECT count, sum, mean, stddev, min, max FROM _st_summarystats($1, 1, TRUE, $2) $$ LANGUAGE 'SQL' IMMUTABLE STRICT; CREATE OR REPLACE FUNCTION st_approxsummarystats(rast raster) RETURNS summarystats - AS $$ SELECT count, mean, stddev, min, max FROM _st_summarystats($1, 1, TRUE, 0.1) $$ + AS $$ SELECT count, sum, mean, stddev, min, max FROM _st_summarystats($1, 1, TRUE, 0.1) $$ LANGUAGE 'SQL' IMMUTABLE STRICT; CREATE OR REPLACE FUNCTION _st_summarystats(rastertable text, rastercolumn text, nband integer, hasnodata boolean, sample_percent double precision) @@ -331,7 +332,8 @@ CREATE OR REPLACE FUNCTION _st_summarystats(rastertable text, rastercolumn text, stats summarystats; rtn summarystats; - mean double precision; + avg double precision; + wavg double precision; tmp double precision; BEGIN -- nband @@ -364,19 +366,21 @@ CREATE OR REPLACE FUNCTION _st_summarystats(rastertable text, rastercolumn text, RETURN NULL; END; - mean := 0; + avg := 0; + wavg := 0; tmp := 0; LOOP FETCH curs INTO rast; EXIT WHEN NOT FOUND; - SELECT * FROM _st_summarystats(rast, nband, hasnodata, sample_percent) INTO stats; + SELECT count, sum, mean, stddev, min, max FROM _st_summarystats(rast, nband, hasnodata, sample_percent) INTO stats; IF stats IS NULL OR stats.count < 1 THEN CONTINUE; END IF; --RAISE NOTICE 'stats = %', stats; - mean := mean + (stats.count * stats.mean); -- not the mean + avg := avg + stats.sum; -- not the mean + wavg := wavg + (stats.count * stats.mean); -- not the mean -- sum of "sum of squares" IF sample_percent > 0 AND sample_percent < 1 THEN tmp := tmp + (power(stats.stddev, 2) * (stats.count - 1)); @@ -401,9 +405,12 @@ CREATE OR REPLACE FUNCTION _st_summarystats(rastertable text, rastercolumn text, CLOSE curs; - -- weighted mean and cumulative stddev + -- mean and cumulative stddev IF rtn.count IS NOT NULL AND rtn.count > 0 THEN - rtn.mean := mean / rtn.count; + rtn.mean := avg / rtn.count; + wavg := wavg / rtn.count; + --RAISE NOTICE 'straight avg = %', avg; + --RAISE NOTICE 'weighted avg = %', wavg; rtn.stddev := sqrt(tmp / rtn.count); END IF; @@ -413,42 +420,42 @@ CREATE OR REPLACE FUNCTION _st_summarystats(rastertable text, rastercolumn text, CREATE OR REPLACE FUNCTION st_summarystats(rastertable text, rastercolumn text, nband integer, hasnodata boolean) RETURNS summarystats - AS $$ SELECT count, mean, stddev, min, max FROM _st_summarystats($1, $2, $3, $4, 1) $$ + AS $$ SELECT count, sum, mean, stddev, min, max FROM _st_summarystats($1, $2, $3, $4, 1) $$ LANGUAGE 'SQL' IMMUTABLE STRICT; CREATE OR REPLACE FUNCTION st_summarystats(rastertable text, rastercolumn text, nband integer) RETURNS summarystats - AS $$ SELECT count, mean, stddev, min, max FROM _st_summarystats($1, $2, $3, TRUE, 1) $$ + AS $$ SELECT count, sum, mean, stddev, min, max FROM _st_summarystats($1, $2, $3, TRUE, 1) $$ LANGUAGE 'SQL' IMMUTABLE STRICT; CREATE OR REPLACE FUNCTION st_summarystats(rastertable text, rastercolumn text) RETURNS summarystats - AS $$ SELECT count, mean, stddev, min, max FROM _st_summarystats($1, $2, 1, TRUE, 1) $$ + AS $$ SELECT count, sum, mean, stddev, min, max FROM _st_summarystats($1, $2, 1, TRUE, 1) $$ LANGUAGE 'SQL' IMMUTABLE STRICT; CREATE OR REPLACE FUNCTION st_approxsummarystats(rastertable text, rastercolumn text, nband integer, hasnodata boolean, sample_percent double precision) RETURNS summarystats - AS $$ SELECT count, mean, stddev, min, max FROM _st_summarystats($1, $2, $3, $4, $5) $$ + AS $$ SELECT count, sum, mean, stddev, min, max FROM _st_summarystats($1, $2, $3, $4, $5) $$ LANGUAGE 'SQL' IMMUTABLE STRICT; CREATE OR REPLACE FUNCTION st_approxsummarystats(rastertable text, rastercolumn text, nband integer, sample_percent double precision) RETURNS summarystats - AS $$ SELECT count, mean, stddev, min, max FROM _st_summarystats($1, $2, $3, TRUE, $4) $$ + AS $$ SELECT count, sum, mean, stddev, min, max FROM _st_summarystats($1, $2, $3, TRUE, $4) $$ LANGUAGE 'SQL' IMMUTABLE STRICT; CREATE OR REPLACE FUNCTION st_approxsummarystats(rastertable text, rastercolumn text, nband integer) RETURNS summarystats - AS $$ SELECT count, mean, stddev, min, max FROM _st_summarystats($1, $2, $3, TRUE, 0.1) $$ + AS $$ SELECT count, sum, mean, stddev, min, max FROM _st_summarystats($1, $2, $3, TRUE, 0.1) $$ LANGUAGE 'SQL' IMMUTABLE STRICT; CREATE OR REPLACE FUNCTION st_approxsummarystats(rastertable text, rastercolumn text, sample_percent double precision) RETURNS summarystats - AS $$ SELECT count, mean, stddev, min, max FROM _st_summarystats($1, $2, 1, TRUE, $3) $$ + AS $$ SELECT count, sum, mean, stddev, min, max FROM _st_summarystats($1, $2, 1, TRUE, $3) $$ LANGUAGE 'SQL' IMMUTABLE STRICT; CREATE OR REPLACE FUNCTION st_approxsummarystats(rastertable text, rastercolumn text) RETURNS summarystats - AS $$ SELECT count, mean, stddev, min, max FROM _st_summarystats($1, $2, 1, TRUE, 0.1) $$ + AS $$ SELECT count, sum, mean, stddev, min, max FROM _st_summarystats($1, $2, 1, TRUE, 0.1) $$ LANGUAGE 'SQL' IMMUTABLE STRICT; ----------------------------------------------------------------------- diff --git a/raster/test/core/testapi.c b/raster/test/core/testapi.c index 915c56177..0d9319e6d 100644 --- a/raster/test/core/testapi.c +++ b/raster/test/core/testapi.c @@ -993,6 +993,7 @@ struct rt_bandstats_t { double min; double max; + double sum; double mean; double stddev; diff --git a/raster/test/regress/rt_summarystats_expected b/raster/test/regress/rt_summarystats_expected index 98c347e5e..d366dc47d 100644 --- a/raster/test/regress/rt_summarystats_expected +++ b/raster/test/regress/rt_summarystats_expected @@ -1,12 +1,12 @@ -2|-3.429205|6.570795|-10|3.14159 -(2,-3.429205,6.570795,-10,3.14159) +2|-6.85841|-3.429205|6.570795|-10|3.14159 +(2,-6.85841,-3.429205,6.570795,-10,3.14159) 2 100 -3.429205|6.570795 -0.068584|1.045941 BEGIN -20|-3.429205|6.570795|-10|3.14159 -1000|-0.068584|1.045941|-10|3.14159 -20|-3.429205|6.570795|-10|3.14159 -20|-3.429205|6.570795|-10|3.14159 +20|-6.85841|-3.429205|6.570795|-10|3.14159 +1000|-6.85841|-0.0685841|1.045941|-10|3.14159 +20|-6.85841|-3.429205|6.570795|-10|3.14159 +20|-6.85841|-3.429205|6.570795|-10|3.14159 COMMIT diff --git a/raster/test/regress/run_test.in b/raster/test/regress/run_test.in index 8afe79aef..07799798e 100644 --- a/raster/test/regress/run_test.in +++ b/raster/test/regress/run_test.in @@ -113,6 +113,7 @@ run_simple_test () ${PSQL} -tA < "${_sql}" ${DB} > ${TMPFILE} 2>&1 cat ${TMPFILE} \ | grep -v "^$" \ + | grep -v "^SELECT" \ | grep -v "^INSERT" \ | grep -v "^DELETE" \ | grep -v "^CONTEXT" \