From: Mark Cave-Ayland Date: Mon, 5 Oct 2009 16:17:02 +0000 (+0000) Subject: Commit first attempt at a multi-D statistics histogram builder. Note that while geode... X-Git-Tag: 1.5.0b1~416 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=51339d603ddfb26fb1b2e85922dcc4970e2efd8d;p=postgis Commit first attempt at a multi-D statistics histogram builder. Note that while geodetic data is inherently 3D, the builder also contains code to handle lower dimension cartesian coordinates, which should make porting to ggeometry easier at a later date. At the moment there are no selectivity routines which actually use the histograms for real queries, however a reasonably convincing histogram is created in pg_statistic and all regression tests pass here. git-svn-id: http://svn.osgeo.org/postgis/trunk@4594 b70326c6-7e19-0410-871a-916f4a2858ee --- diff --git a/postgis/geography_estimate.c b/postgis/geography_estimate.c index 206a54f35..73b8c1ef4 100644 --- a/postgis/geography_estimate.c +++ b/postgis/geography_estimate.c @@ -14,6 +14,7 @@ #include "postgres.h" #include "commands/vacuum.h" +#include "libgeom.h" #include "lwgeom_pg.h" /* Prototypes */ @@ -42,6 +43,71 @@ Datum geography_gist_join_selectivity(PG_FUNCTION_ARGS) } +/** + * Assign a number to the postgis statistics kind + * + * tgl suggested: + * + * 1-100: reserved for assignment by the core Postgres project + * 100-199: reserved for assignment by PostGIS + * 200-9999: reserved for other globally-known stats kinds + * 10000-32767: reserved for private site-local use + * + */ +#define STATISTIC_KIND_GEOGRAPHY 101 + +/* + * Define this if you want to use standard deviation based + * histogram extent computation. If you do, you can also + * tweak the deviation factor used in computation with + * SDFACTOR. + */ +#define USE_STANDARD_DEVIATION 1 +#define SDFACTOR 3.25 + + +/* Information about the dimensions stored in the sample */ +struct dimensions +{ + char axis; + double min; + double max; +}; + +/* Main geography statistics structure, stored in pg_statistics */ +typedef struct GEOG_STATS_T +{ + /* Dimensionality of this column */ + float4 dims; + + /* x . y . z = total boxes in grid */ + float4 unitsx; + float4 unitsy; + float4 unitsz; + + /* average feature coverage of not-null features: + this is either an area for the case of a 2D column + or a volume in the case of a 3D column */ + float4 avgFeatureCoverage; + + /* + * average number of histogram cells + * covered by the sample not-null features + */ + float4 avgFeatureCells; + + /* histogram extent */ + float4 xmin, ymin, zmin, xmax, ymax, zmax; + + /* + * variable length # of floats for histogram + */ + float4 value[1]; +} +GEOG_STATS; + + + /** * This function is called by the analyze function iff * the geography_analyze() function give it its pointer @@ -64,8 +130,651 @@ static void compute_geography_stats(VacAttrStats *stats, AnalyzeAttrFetchFunc fetchfunc, int samplerows, double totalrows) { + MemoryContext old_context; + GEOG_STATS *geogstats; + + GBOX *sample_extent = NULL; + GBOX **sampleboxes; + GBOX histobox; + int histocells; + double sizex, sizey, sizez, edgelength; + int unitsx = 0, unitsy = 0, unitsz = 0; + int geog_stats_size; + struct dimensions histodims[3]; + int ndims; + + double total_width = 0; + int notnull_cnt = 0, examinedsamples = 0, total_count_cells=0, total_cells_coverage = 0; + +#if USE_STANDARD_DEVIATION + /* for standard deviation */ + double avgLOWx, avgLOWy, avgLOWz, avgHIGx, avgHIGy, avgHIGz; + double sumLOWx = 0, sumLOWy = 0, sumLOWz = 0, sumHIGx = 0, sumHIGy = 0, sumHIGz = 0; + double sdLOWx = 0, sdLOWy = 0, sdLOWz = 0, sdHIGx = 0, sdHIGy = 0, sdHIGz = 0; + GBOX *newhistobox = NULL; +#endif + + bool isnull; + int i; + + POSTGIS_DEBUG(2, "compute_geography_stats called"); + + /* + * We'll build an histogram having from 40 to 400 boxesPerSide + * Total number of cells is determined by attribute stat + * target. It can go from 1600 to 160000 (stat target: 10,1000) + */ + histocells = 160 * stats->attr->attstattarget; + + /* + * Memory to store the bounding boxes from all of the sampled rows + */ + sampleboxes = palloc(sizeof(GBOX *) * samplerows); + + /* + * First scan: + * o find extent of the sample rows + * o count null-infinite/not-null values + * o compute total_width + * o compute total features's box area (for avgFeatureArea) + * o sum features box coordinates (for standard deviation) + */ + for (i = 0; i < samplerows; i++) + { + GBOX gbox; + Datum datum; + GSERIALIZED *serialized; + LWGEOM *geometry; + + /* Fetch the datum and cast it into a geography */ + datum = fetchfunc(stats, i, &isnull); + + /* Skip nulls */ + if (isnull) + continue; + + serialized = (GSERIALIZED *)PG_DETOAST_DATUM(datum); + geometry = lwgeom_from_gserialized(serialized); + + /* Convert coordinates to 3D geodesic */ + if (!lwgeom_calculate_gbox_geodetic(geometry, &gbox)) + { + /* Unable to obtain or calculate a bounding box */ + POSTGIS_DEBUGF(3, "skipping geometry at position %d", i); + + continue; + } + + /* + * Skip infinite geoms + */ + if ( ! finite(gbox.xmin) || ! finite(gbox.xmax) || + ! finite(gbox.ymin) || ! finite(gbox.ymax) || + ! finite(gbox.zmin) || ! finite(gbox.zmax) ) + { + POSTGIS_DEBUGF(3, " skipped infinite geometry at position %d", i); + + continue; + } + + /* + * Store bounding box in array + */ + sampleboxes[notnull_cnt] = palloc(sizeof(GBOX)); + memcpy(sampleboxes[notnull_cnt], &gbox, sizeof(GBOX)); + + /* + * Add to sample extent union + */ + if ( ! sample_extent ) + { + sample_extent = palloc(sizeof(GBOX)); + memcpy(sample_extent, &gbox, sizeof(GBOX)); + } + else + { + sample_extent->xmax = LWGEOM_Maxf(sample_extent->xmax, gbox.xmax); + sample_extent->ymax = LWGEOM_Maxf(sample_extent->ymax, gbox.ymax); + sample_extent->zmax = LWGEOM_Maxf(sample_extent->zmax, gbox.zmax); + sample_extent->xmin = LWGEOM_Minf(sample_extent->xmin, gbox.xmin); + sample_extent->ymin = LWGEOM_Minf(sample_extent->ymin, gbox.ymin); + sample_extent->zmin = LWGEOM_Minf(sample_extent->zmin, gbox.zmin); + } + + /** TODO: ask if we need geom or bvol size for stawidth */ + total_width += serialized->size; + +#if USE_STANDARD_DEVIATION + /* + * Add bvol coordinates to sum for standard deviation + * computation. + */ + sumLOWx += gbox.xmin; + sumLOWy += gbox.ymin; + sumLOWz += gbox.zmin; + sumHIGx += gbox.xmax; + sumHIGy += gbox.ymax; + sumHIGz += gbox.zmax; +#endif + + notnull_cnt++; + + /* give backend a chance of interrupting us */ + vacuum_delay_point(); + } + + POSTGIS_DEBUG(3, "End of 1st scan:"); + POSTGIS_DEBUGF(3, " Sample extent (min, max): (%g %g %g), (%g %g %g)", sample_extent->xmin, sample_extent->ymin, + sample_extent->zmin, sample_extent->xmax, sample_extent->ymax, sample_extent->zmax); + POSTGIS_DEBUGF(3, " No. of geometries sampled: %d", samplerows); + POSTGIS_DEBUGF(3, " No. of non-null geometries sampled: %d", notnull_cnt); + + if ( ! notnull_cnt ) + { + elog(NOTICE, " no notnull values, invalid stats"); + stats->stats_valid = false; + return; + } + +#if USE_STANDARD_DEVIATION + + POSTGIS_DEBUG(3, "Standard deviation filter enabled"); + + /* + * Second scan: + * o compute standard deviation + */ + avgLOWx = sumLOWx / notnull_cnt; + avgLOWy = sumLOWy / notnull_cnt; + avgLOWz = sumLOWz / notnull_cnt; + avgHIGx = sumHIGx / notnull_cnt; + avgHIGy = sumHIGy / notnull_cnt; + avgHIGz = sumHIGz / notnull_cnt; + + for (i = 0; i < notnull_cnt; i++) + { + GBOX *box; + box = (GBOX *)sampleboxes[i]; + + sdLOWx += (box->xmin - avgLOWx) * (box->xmin - avgLOWx); + sdLOWy += (box->ymin - avgLOWy) * (box->ymin - avgLOWy); + sdLOWz += (box->zmin - avgLOWz) * (box->zmin - avgLOWz); + sdHIGx += (box->xmax - avgHIGx) * (box->xmax - avgHIGx); + sdHIGy += (box->ymax - avgHIGy) * (box->ymax - avgHIGy); + sdHIGz += (box->zmax - avgHIGz) * (box->zmax - avgHIGz); + } + sdLOWx = sqrt(sdLOWx / notnull_cnt); + sdLOWy = sqrt(sdLOWy / notnull_cnt); + sdLOWz = sqrt(sdLOWz / notnull_cnt); + sdHIGx = sqrt(sdHIGx / notnull_cnt); + sdHIGy = sqrt(sdHIGy / notnull_cnt); + sdHIGz = sqrt(sdHIGz / notnull_cnt); + + POSTGIS_DEBUG(3, " standard deviations:"); + POSTGIS_DEBUGF(3, " LOWx - avg:%f sd:%f", avgLOWx, sdLOWx); + POSTGIS_DEBUGF(3, " LOWy - avg:%f sd:%f", avgLOWy, sdLOWy); + POSTGIS_DEBUGF(3, " LOWz - avg:%f sd:%f", avgLOWz, sdLOWz); + POSTGIS_DEBUGF(3, " HIGx - avg:%f sd:%f", avgHIGx, sdHIGx); + POSTGIS_DEBUGF(3, " HIGy - avg:%f sd:%f", avgHIGy, sdHIGy); + POSTGIS_DEBUGF(3, " HIGz - avg:%f sd:%f", avgHIGz, sdHIGz); + + histobox.xmin = LW_MAX((avgLOWx - SDFACTOR * sdLOWx), sample_extent->xmin); + histobox.ymin = LW_MAX((avgLOWy - SDFACTOR * sdLOWy), sample_extent->ymin); + histobox.zmin = LW_MAX((avgLOWz - SDFACTOR * sdLOWz), sample_extent->zmin); + histobox.xmax = LW_MIN((avgHIGx + SDFACTOR * sdHIGx), sample_extent->xmax); + histobox.ymax = LW_MIN((avgHIGy + SDFACTOR * sdHIGy), sample_extent->ymax); + histobox.zmax = LW_MIN((avgHIGz + SDFACTOR * sdHIGz), sample_extent->zmax); + + POSTGIS_DEBUGF(3, " sd_extent: xmin, ymin, zmin: %f, %f, %f", + histobox.xmin, histobox.ymin, histobox.zmin); + POSTGIS_DEBUGF(3, " sd_extent: xmax, ymax, zmax: %f, %f, %f", + histobox.xmax, histobox.ymax, histobox.zmax); + + /* + * Third scan: + * o skip hard deviants + * o compute new histogram box + */ + for (i = 0; i < notnull_cnt; i++) + { + GBOX *box; + box = (GBOX *)sampleboxes[i]; + + if ( box->xmin > histobox.xmax || box->xmax < histobox.xmin || + box->ymin > histobox.ymax || box->ymax < histobox.ymin || + box->zmin > histobox.zmax || box->zmax < histobox.zmin) + { + POSTGIS_DEBUGF(4, " feat %d is an hard deviant, skipped", i); + + sampleboxes[i] = NULL; + continue; + } + + if ( ! newhistobox ) + { + newhistobox = palloc(sizeof(GBOX)); + memcpy(newhistobox, box, sizeof(GBOX)); + } + else + { + if ( box->xmin < newhistobox->xmin ) + newhistobox->xmin = box->xmin; + if ( box->ymin < newhistobox->ymin ) + newhistobox->ymin = box->ymin; + if ( box->zmin < newhistobox->zmin ) + newhistobox->zmin = box->zmin; + if ( box->xmax > newhistobox->xmax ) + newhistobox->xmax = box->xmax; + if ( box->ymax > newhistobox->ymax ) + newhistobox->ymax = box->ymax; + if ( box->zmax > newhistobox->zmax ) + newhistobox->zmax = box->zmax; + } + } + + /* + * Set histogram extent as the intersection between + * standard deviation based histogram extent + * and computed sample extent after removal of + * hard deviants (there might be no hard deviants). + */ + if ( histobox.xmin < newhistobox->xmin ) + histobox.xmin = newhistobox->xmin; + if ( histobox.ymin < newhistobox->ymin ) + histobox.ymin = newhistobox->ymin; + if ( histobox.zmin < newhistobox->zmin ) + histobox.zmin = newhistobox->zmin; + if ( histobox.xmax > newhistobox->xmax ) + histobox.xmax = newhistobox->xmax; + if ( histobox.ymax > newhistobox->ymax ) + histobox.ymax = newhistobox->ymax; + if ( histobox.zmax > newhistobox->zmax ) + histobox.zmax = newhistobox->zmax; + +#else /* ! USE_STANDARD_DEVIATION */ + + /* + * Set histogram extent box + */ + histobox.xmin = sample_extent->xmin; + histobox.ymin = sample_extent->ymin; + histobox.zmin = sample_extent->zmin; + histobox.xmax = sample_extent->xmax; + histobox.ymax = sample_extent->ymax; + histobox.zmax = sample_extent->zmax; + +#endif /* USE_STANDARD_DEVIATION */ + + + POSTGIS_DEBUGF(3, " histogram_extent: xmin, ymin, zmin: %f, %f, %f", + histobox.xmin, histobox.ymin, histobox.zmin); + POSTGIS_DEBUGF(3, " histogram_extent: xmax, ymax, zmax: %f, %f, %f", + histobox.xmax, histobox.ymax, histobox.zmax); + + /* Calculate the size of each dimension */ + sizex = histobox.xmax - histobox.xmin; + sizey = histobox.ymax - histobox.ymin; + sizez = histobox.zmax - histobox.zmin; + + /* In order to calculate a suitable aspect ratio for the histogram, we need + to work out how many dimensions exist within our sample data (which we + assume is representative of the whole data) */ + ndims = 0; + if (sizex != 0) + { + histodims[ndims].axis = 'X'; + histodims[ndims].min = histobox.xmin; + histodims[ndims].max = histobox.xmax; + ndims++; + } + + if (sizey != 0) + { + histodims[ndims].axis = 'Y'; + histodims[ndims].min = histobox.ymin; + histodims[ndims].max = histobox.ymax; + + ndims++; + } + + if (sizez != 0) + { + histodims[ndims].axis = 'Z'; + histodims[ndims].min = histobox.zmin; + histodims[ndims].max = histobox.zmax; + + ndims++; + } + + /* Based upon the number of dimensions, we now work out the number of units in each dimension. + The number of units is defined as the number of cell blocks in each dimension which make + up the total number of histocells; i.e. unitsx * unitsy * unitsz = histocells */ + + /* Note: geodetic data is currently indexed in 3 dimensions; however code for remaining dimensions + is also included to allow for indexing 3D cartesian data at a later date */ + + POSTGIS_DEBUGF(3, "Number of dimensions in sample set: %d", ndims); + + switch (ndims) + { + case 0: + /* An empty column, or multiple points in exactly the same + position in space */ + unitsx = 1; + unitsy = 1; + unitsz = 1; + histocells = 1; + break; + + case 1: + /* Sample data all lies on a single line, so set the correct + units variables depending upon which axis is in use */ + for (i = 0; i < ndims; i++) + { + if ( (histodims[i].max - histodims[i].min) != 0) + { + /* We've found the non-zero dimension, so set the + units variables accordingly */ + switch(histodims[i].axis) + { + case 'X': + unitsx = histocells; + unitsy = 1; + unitsz = 1; + break; + + case 'Y': + unitsx = 1; + unitsy = histocells; + unitsz = 1; + break; + + case 'Z': + unitsx = 1; + unitsy = 1; + unitsz = histocells; + break; + } + } + } + break; + + case 2: + /* Sample data lies within 2D space: divide the total area by the total + number of cells, and thus work out the edge size of the unit block */ + edgelength = sqrt( + abs(histodims[0].max - histodims[0].min) * + abs(histodims[1].max - histodims[1].min) / (double)histocells + ); + + /* The calculation is easy; the harder part is to work out which dimensions + we actually have to set the units variables appropriately */ + if (histodims[0].axis == 'X' && histodims[1].axis == 'Y') + { + /* X and Y */ + unitsx = abs(histodims[0].max - histodims[0].min) / edgelength; + unitsy = abs(histodims[1].max - histodims[1].min) / edgelength; + unitsz = 1; + } + else if (histodims[0].axis == 'Y' && histodims[1].axis == 'X') + { + /* Y and X */ + unitsx = abs(histodims[1].max - histodims[1].min) / edgelength; + unitsy = abs(histodims[0].max - histodims[0].min) / edgelength; + unitsz = 1; + } + else if (histodims[0].axis == 'X' && histodims[1].axis == 'Z') + { + /* X and Z */ + unitsx = abs(histodims[0].max - histodims[0].min) / edgelength; + unitsy = 1; + unitsz = abs(histodims[1].max - histodims[1].min) / edgelength; + } + else if (histodims[0].axis == 'Z' && histodims[1].axis == 'X') + { + /* Z and X */ + unitsx = abs(histodims[0].max - histodims[0].min) / edgelength; + unitsy = 1; + unitsz = abs(histodims[1].max - histodims[1].min) / edgelength; + } + else if (histodims[0].axis == 'Y' && histodims[1].axis == 'Z') + { + /* Y and Z */ + unitsx = 1; + unitsy = abs(histodims[0].max - histodims[0].min) / edgelength; + unitsz = (histodims[1].max - histodims[1].min) / edgelength; + } + else if (histodims[0].axis == 'Z' && histodims[1].axis == 'Y') + { + /* Z and X */ + unitsx = 1; + unitsy = abs(histodims[1].max - histodims[1].min) / edgelength; + unitsz = abs(histodims[0].max - histodims[0].min) / edgelength; + } + + break; + + case 3: + /* Sample data lies within 3D space: divide the total volume by the total + number of cells, and thus work out the edge size of the unit block */ + edgelength = pow( + abs(histodims[0].max - histodims[0].min) * + abs(histodims[1].max - histodims[1].min) * + abs(histodims[2].max - histodims[2].min) / (double)histocells, + (double)1/3); + + /* Units are simple in 3 dimensions */ + unitsx = abs(histodims[0].max - histodims[0].min) / edgelength; + unitsy = abs(histodims[1].max - histodims[1].min) / edgelength; + unitsz = abs(histodims[2].max - histodims[2].min) / edgelength; + + break; + } + + POSTGIS_DEBUGF(3, " computed histogram grid size (X,Y,Z): %d x %d x %d (%d out of %d cells)", unitsx, unitsy, unitsz, unitsx * unitsy * unitsz, histocells); + + /* + * Create the histogram (GEOG_STATS) + */ + old_context = MemoryContextSwitchTo(stats->anl_context); + geog_stats_size = sizeof(GEOG_STATS) + (histocells - 1) * sizeof(float4); + geogstats = palloc(geog_stats_size); + MemoryContextSwitchTo(old_context); + + geogstats->dims = ndims; + geogstats->xmin = histobox.xmin; + geogstats->ymin = histobox.ymin; + geogstats->zmin = histobox.zmin; + geogstats->xmax = histobox.xmax; + geogstats->ymax = histobox.ymax; + geogstats->zmax = histobox.zmax; + geogstats->unitsx = unitsx; + geogstats->unitsy = unitsy; + geogstats->unitsz = unitsz; + + /* Initialize all values to 0 */ + for (i = 0; i < histocells; i++) + geogstats->value[i] = 0; + + + /* + * Fourth scan: + * o fill histogram values with the number of + * features' bbox overlaps: a feature's bvol + * can fully overlap (1) or partially overlap + * (fraction of 1) an histogram cell. + * + * o compute total cells occupation + * + */ + + POSTGIS_DEBUG(3, "Beginning histogram intersection calculations"); + + for (i = 0; i < notnull_cnt; i++) + { + GBOX *box; + + /* Note these array index variables are zero-based */ + int x_idx_min, x_idx_max, x; + int y_idx_min, y_idx_max, y; + int z_idx_min, z_idx_max, z; + int numcells = 0; + + box = (GBOX *)sampleboxes[i]; + if ( ! box ) continue; /* hard deviant.. */ + + /* give backend a chance of interrupting us */ + vacuum_delay_point(); + + POSTGIS_DEBUGF(4, " feat %d box is %f %f %f, %f %f %f", + i, box->xmax, box->ymax, box->zmax, box->xmin, box->ymin, box->zmin); + + /* Find first overlapping unitsx cell */ + x_idx_min = (box->xmin - geogstats->xmin) / sizex * unitsx; + if (x_idx_min <0) x_idx_min = 0; + if (x_idx_min >= unitsx) x_idx_min = unitsx - 1; + + /* Find first overlapping unitsy cell */ + y_idx_min = (box->ymin - geogstats->ymin) / sizey * unitsy; + if (y_idx_min <0) y_idx_min = 0; + if (y_idx_min >= unitsy) y_idx_min = unitsy - 1; + + /* Find first overlapping unitsz cell */ + z_idx_min = (box->zmin - geogstats->zmin) / sizez * unitsz; + if (z_idx_min <0) z_idx_min = 0; + if (z_idx_min >= unitsz) z_idx_min = unitsz - 1; + + /* Find last overlapping unitsx cell */ + x_idx_max = (box->xmax - geogstats->xmin) / sizex * unitsx; + if (x_idx_max <0) x_idx_max = 0; + if (x_idx_max >= unitsx ) x_idx_max = unitsx - 1; + + /* Find last overlapping unitsy cell */ + y_idx_max = (box->ymax - geogstats->ymin) / sizey * unitsy; + if (y_idx_max <0) y_idx_max = 0; + if (y_idx_max >= unitsy) y_idx_max = unitsy - 1; + + /* Find last overlapping unitsz cell */ + z_idx_max = (box->zmax - geogstats->zmin) / sizez * unitsz; + if (z_idx_max <0) z_idx_max = 0; + if (z_idx_max >= unitsz) z_idx_max = unitsz - 1; + + POSTGIS_DEBUGF(4, " feat %d overlaps unitsx %d-%d, unitsy %d-%d, unitsz %d-%d", + i, x_idx_min, x_idx_max, y_idx_min, y_idx_max, z_idx_min, z_idx_max); + + /* Calculate the feature coverage - this of course depends upon the number of dims */ + switch (ndims) + { + case 1: + total_cells_coverage++; + break; + + case 2: + total_cells_coverage += (box->xmax - box->xmin) * (box->ymax - box->ymin); + break; + + case 3: + total_cells_coverage += (box->xmax - box->xmin) * (box->ymax - box->ymin) * + (box->zmax - box->zmin); + break; + } + + /* + * the {x,y,z}_idx_{min,max} + * define the grid squares that the box intersects + */ + + for (z = z_idx_min; z <= z_idx_max; z++) + { + for (y = y_idx_min; y <= y_idx_max; y++) + { + for (x = x_idx_min; x <= x_idx_max; x++) + { + geogstats->value[x + y * unitsx + z * unitsx * unitsy] += 1; + numcells++; + } + } + } + + /* + * before adding to the total cells + * we could decide if we really + * want this feature to count + */ + total_count_cells += numcells; + + examinedsamples++; + } + + POSTGIS_DEBUGF(3, " examined_samples: %d/%d", examinedsamples, samplerows); + + if ( ! examinedsamples ) + { + elog(NOTICE, " no examined values, invalid stats"); + stats->stats_valid = false; + + POSTGIS_DEBUG(3, " no stats have been gathered"); + + return; + } + + /** TODO: what about null features (TODO) */ + geogstats->avgFeatureCells = (float4)total_count_cells / examinedsamples; + geogstats->avgFeatureCoverage = total_cells_coverage / examinedsamples; + + POSTGIS_DEBUGF(3, " histo: total_boxes_cells: %d", total_count_cells); + POSTGIS_DEBUGF(3, " histo: avgFeatureCells: %f", geogstats->avgFeatureCells); + POSTGIS_DEBUGF(3, " histo: avgFeatureCoverage: %f", geogstats->avgFeatureCoverage); + + /* + * Normalize histogram + * + * We divide each histogram cell value + * by the number of samples examined. + * + */ + for (i = 0; i < histocells; i++) + geogstats->value[i] /= examinedsamples; + +#if POSTGIS_DEBUG_LEVEL >= 4 + /* Dump the resulting histogram for analysis */ + { + int x, y, z; + for (x = 0; x < unitsx; x++) + { + for (y = 0; y < unitsy; y++) + { + for (z = 0; z < unitsz; z++) + { + POSTGIS_DEBUGF(4, " histo[%d,%d,%d] = %.15f", x, y, z, + geogstats->value[x + y * unitsx + z * unitsx * unitsy]); + } + } + } + } +#endif + + /* + * Write the statistics data + */ + stats->stakind[0] = STATISTIC_KIND_GEOGRAPHY; + stats->staop[0] = InvalidOid; + stats->stanumbers[0] = (float4 *)geogstats; + stats->numnumbers[0] = geog_stats_size/sizeof(float4); + + stats->stanullfrac = (float4)(samplerows - notnull_cnt)/samplerows; + stats->stawidth = total_width/notnull_cnt; + stats->stadistinct = -1.0; + + POSTGIS_DEBUGF(3, " out: slot 0: kind %d (STATISTIC_KIND_GEOGRAPHY)", + stats->stakind[0]); + POSTGIS_DEBUGF(3, " out: slot 0: op %d (InvalidOid)", stats->staop[0]); + POSTGIS_DEBUGF(3, " out: slot 0: numnumbers %d", stats->numnumbers[0]); + POSTGIS_DEBUGF(3, " out: null fraction: %d/%d=%g", (samplerows - notnull_cnt), samplerows, stats->stanullfrac); + POSTGIS_DEBUGF(3, " out: average width: %d bytes", stats->stawidth); + POSTGIS_DEBUG(3, " out: distinct values: all (no check done)"); - elog(NOTICE, "compute_geography_stats called!"); + stats->stats_valid = true; }