From: Paul Ramsey Date: Mon, 26 Sep 2011 20:39:00 +0000 (+0000) Subject: Add support for KNN-GiST ops. First cut only, much testing required. (#701) X-Git-Tag: 2.0.0alpha1~960 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=3cc31d65d8ee9201105e3cc4ae4a8e82b678a2c6;p=postgis Add support for KNN-GiST ops. First cut only, much testing required. (#701) git-svn-id: http://svn.osgeo.org/postgis/trunk@7894 b70326c6-7e19-0410-871a-916f4a2858ee --- diff --git a/postgis/gserialized_gist_2d.c b/postgis/gserialized_gist_2d.c index 18a4ef914..f53e47b4b 100644 --- a/postgis/gserialized_gist_2d.c +++ b/postgis/gserialized_gist_2d.c @@ -63,6 +63,7 @@ Datum gserialized_gist_penalty_2d(PG_FUNCTION_ARGS); Datum gserialized_gist_picksplit_2d(PG_FUNCTION_ARGS); Datum gserialized_gist_union_2d(PG_FUNCTION_ARGS); Datum gserialized_gist_same_2d(PG_FUNCTION_ARGS); +Datum gserialized_gist_distance_2d(PG_FUNCTION_ARGS); /* ** GiST 2D operator prototypes @@ -79,6 +80,7 @@ Datum gserialized_overleft_2d(PG_FUNCTION_ARGS); Datum gserialized_overright_2d(PG_FUNCTION_ARGS); Datum gserialized_overabove_2d(PG_FUNCTION_ARGS); Datum gserialized_overbelow_2d(PG_FUNCTION_ARGS); +Datum gserialized_boxdistance_2d(PG_FUNCTION_ARGS); /* ** true/false test function type @@ -237,7 +239,7 @@ static bool box2df_overlaps(BOX2DF *a, BOX2DF *b) if ( (a->xmin > b->xmax) || (b->xmin > a->xmax) || (a->ymin > b->ymax) || (b->ymin > a->ymax) ) { - return FALSE; + return FALSE; } return TRUE; @@ -248,7 +250,7 @@ static bool box2df_contains(BOX2DF *a, BOX2DF *b) if ( (a->xmin > b->xmin) || (a->xmax < b->xmax) || (a->ymin > b->ymin) || (a->ymax < b->ymax) ) { - return FALSE; + return FALSE; } return TRUE; @@ -319,6 +321,98 @@ static bool box2df_overabove(BOX2DF *a, BOX2DF *b) return a->ymin >= b->ymin; } +/** +* Calculate the centroid->centroid distance between the boxes. +* We return the square distance to avoid a call to sqrt. +*/ +static double box2df_distance_leaf(BOX2DF *a, BOX2DF *b) +{ + /* The centroid->centroid distance between the boxes */ + double a_x = (a->xmax + a->xmin) / 2.0; + double a_y = (a->ymax + a->ymin) / 2.0; + double b_x = (b->xmax + b->xmin) / 2.0; + double b_y = (b->ymax + b->ymin) / 2.0; + + /* This "distance" is only used for comparisons, */ + /* so for speed we drop contants and skip the sqrt step. */ + return (a_x - b_x) * (a_x - b_x) + (a_y - b_y) * (a_y - b_y); +} + +/** +* Calculate the The node_box_edge->query_centroid distance +* between the boxes. +* We return the square distance to avoid a call to sqrt. +*/ +static double box2df_distance_node(BOX2DF *node, BOX2DF *query) +{ + BOX2DF q; + double qx, qy; + double d = 0.0; + + /* Turn query into point */ + q.xmin = q.xmax = (query->xmin + query->xmax) / 2.0; + q.ymin = q.ymax = (query->ymin + query->ymax) / 2.0; + qx = q.xmin; + qy = q.ymin; + + /* Check for overlap */ + if ( box2df_overlaps(node, &q) == LW_TRUE ) + return 0.0; + + /* Above or below */ + if ( qx >= node->xmin && qx <= node->xmax ) + { + if( qy > node->ymax ) + d = qy - node->ymax; + else if ( qy < node->ymin ) + d = node->ymin - qy; + return d*d; + } + /* Left or right */ + else if ( qy >= node->ymin && qy <= node->ymax ) + { + if ( qx > node->xmax ) + d = qx - node->xmax; + else if ( qx < node->xmin ) + d = node->xmin - qx; + return d*d; + } + /* Corner quadrants */ + else + { + /* below/left of xmin/ymin */ + if ( qx < node->xmin && qy < node->ymin ) + { + d = (node->xmin - qx) * (node->xmin - qx) + + (node->ymin - qy) * (node->ymin - qy); + } + /* above/left of xmin/ymax */ + else if ( qx < node->xmin && qy > node->ymax ) + { + d = (node->xmin - qx) * (node->xmin - qx) + + (node->ymax - qy) * (node->ymax - qy); + } + /* above/right of xmax/ymax */ + else if ( qx > node->xmax && qy > node->ymax ) + { + d = (node->xmax - qx) * (node->xmax - qx) + + (node->ymax - qy) * (node->ymax - qy); + } + /* below/right of xmax/ymin */ + else if ( qx > node->xmin && qy < node->ymin ) + { + d = (node->xmax - qx) * (node->xmax - qx) + + (node->ymin - qy) * (node->ymin - qy); + } + else + { + /*ERROR*/ + } + } + + return d; +} + /** * Peak into a #GSERIALIZED datum to find the bounding box. If the @@ -385,10 +479,9 @@ static int gserialized_datum_predicate_2d(Datum gs1, Datum gs2, box2df_predicate predicate) { BOX2DF b1, b2; - POSTGIS_DEBUG(3, "entered function"); - /* Must be able to build box for each arguement (ie, not empty geometry) + /* Must be able to build box for each argument (ie, not empty geometry) and overlap boxes to return true. */ if ( (gserialized_datum_get_box2df_p(gs1, &b1) == LW_SUCCESS) && (gserialized_datum_get_box2df_p(gs2, &b2) == LW_SUCCESS) && @@ -407,6 +500,26 @@ gserialized_datum_predicate_2d(Datum gs1, Datum gs2, box2df_predicate predicate) * GiST 2-D Index Operator Functions */ +PG_FUNCTION_INFO_V1(gserialized_boxdistance_2d); +Datum gserialized_boxdistance_2d(PG_FUNCTION_ARGS) +{ + BOX2DF b1, b2; + Datum gs1 = PG_GETARG_DATUM(0); + Datum gs2 = PG_GETARG_DATUM(1); + + POSTGIS_DEBUG(3, "entered function"); + + /* Must be able to build box for each argument (ie, not empty geometry). */ + if ( (gserialized_datum_get_box2df_p(gs1, &b1) == LW_SUCCESS) && + (gserialized_datum_get_box2df_p(gs2, &b2) == LW_SUCCESS) ) + { + double distance = box2df_distance_leaf(&b1, &b2); + POSTGIS_DEBUGF(3, "got boxes %s and %s", box2df_to_string(&b1), box2df_to_string(&b2)); + PG_RETURN_FLOAT8(distance); + } + PG_RETURN_FLOAT8(INFINITY); +} + PG_FUNCTION_INFO_V1(gserialized_same_2d); Datum gserialized_same_2d(PG_FUNCTION_ARGS) { @@ -738,6 +851,61 @@ Datum gserialized_gist_consistent_2d(PG_FUNCTION_ARGS) PG_RETURN_BOOL(result); } + +/* +** GiST support function. Take in a query and an entry and return the "distance" +** between them. +** +** Given an index entry p and a query value q, this function determines the +** index entry's "distance" from the query value. This function must be +** supplied if the operator class contains any ordering operators. A query +** using the ordering operator will be implemented by returning index entries +** with the smallest "distance" values first, so the results must be consistent +** with the operator's semantics. For a leaf index entry the result just +** represents the distance to the index entry; for an internal tree node, the +** result must be the smallest distance that any child entry could have. +** +*/ +PG_FUNCTION_INFO_V1(gserialized_gist_distance_2d); +Datum gserialized_gist_distance_2d(PG_FUNCTION_ARGS) +{ + GISTENTRY *entry = (GISTENTRY*) PG_GETARG_POINTER(0); + BOX2DF query_box; + StrategyNumber strategy = (StrategyNumber) PG_GETARG_UINT16(2); + double distance; + + POSTGIS_DEBUG(4, "[GIST] 'distance' function called"); + + /* We are using '13' as the gist distance strategy number */ + if ( strategy != 13 ) { + elog(ERROR, "unrecognized strategy number: %d", strategy); + PG_RETURN_FLOAT8(INFINITY); + } + + /* Null box should never make this far. */ + if ( gserialized_datum_get_box2df_p(PG_GETARG_DATUM(1), &query_box) == LW_FAILURE ) + { + POSTGIS_DEBUG(4, "[GIST] null query_gbox_index!"); + PG_RETURN_FLOAT8(INFINITY); + } + + /* Treat leaf node tests different from internal nodes */ + if (GIST_LEAF(entry)) + { + /* Calculate distance to centroid for leaves */ + BOX2DF *leaf_box = (BOX2DF*)DatumGetPointer(entry->key); + distance = (double)box2df_distance_leaf(leaf_box, &query_box); + } + else + { + /* Calculate distance to nearest corner for internal nodes */ + BOX2DF *node_box = (BOX2DF*)DatumGetPointer(entry->key); + distance = (double)box2df_distance_node(node_box, &query_box); + } + + PG_RETURN_FLOAT8(distance); +} + /* ** GiST support function. Calculate the "penalty" cost of adding this entry into an existing entry. ** Calculate the change in volume of the old entry once the new entry is added. diff --git a/postgis/postgis.sql.in.c b/postgis/postgis.sql.in.c index 8f5b6e849..72c19a449 100644 --- a/postgis/postgis.sql.in.c +++ b/postgis/postgis.sql.in.c @@ -519,6 +519,12 @@ CREATE TYPE box2df ( alignment = double ); +-- Availability: 2.0.0 +CREATE OR REPLACE FUNCTION geometry_gist_boxdistance_2d(internal,geometry,int4) + RETURNS float8 + AS 'MODULE_PATHNAME' ,'gserialized_gist_distance_2d' + LANGUAGE 'C'; + -- Availability: 2.0.0 CREATE OR REPLACE FUNCTION geometry_gist_consistent_2d(internal,geometry,int4) RETURNS bool @@ -601,6 +607,17 @@ CREATE OPERATOR ~= ( RESTRICT = contsel, JOIN = contjoinsel ); +-- Availability: 2.0.0 +CREATE OR REPLACE FUNCTION geometry_boxdistance(geometry, geometry) + RETURNS float8 + AS 'MODULE_PATHNAME' ,'gserialized_boxdistance_2d' + LANGUAGE 'C' IMMUTABLE STRICT; + +CREATE OPERATOR <-> ( + LEFTARG = geometry, RIGHTARG = geometry, PROCEDURE = geometry_boxdistance, + COMMUTATOR = '<->' +); + -- Availability: 2.0.0 CREATE OR REPLACE FUNCTION geometry_contains(geometry, geometry) RETURNS bool @@ -737,14 +754,16 @@ CREATE OPERATOR CLASS gist_geometry_ops_2d OPERATOR 10 <<| , OPERATOR 11 |>> , OPERATOR 12 |&> , + OPERATOR 13 <-> FOR ORDER BY pg_catalog.float_ops, FUNCTION 1 geometry_gist_consistent_2d (internal, geometry, int4), FUNCTION 2 geometry_gist_union_2d (bytea, internal), FUNCTION 3 geometry_gist_compress_2d (internal), FUNCTION 4 geometry_gist_decompress_2d (internal), FUNCTION 5 geometry_gist_penalty_2d (internal, internal, internal), FUNCTION 6 geometry_gist_picksplit_2d (internal, internal), - FUNCTION 7 geometry_gist_same_2d (geometry, geometry, internal); - + FUNCTION 7 geometry_gist_same_2d (geometry, geometry, internal), + FUNCTION 8 geometry_gist_boxdistance_2d (internal, geometry, int4); + #else ------------------------------------------------------------------- @@ -1097,7 +1116,6 @@ CREATE OR REPLACE FUNCTION ST_Area(geometry) AS 'MODULE_PATHNAME','LWGEOM_area_polygon' LANGUAGE 'C' IMMUTABLE STRICT; - -- Availability: 1.2.2 CREATE OR REPLACE FUNCTION ST_distance_spheroid(geometry,geometry,spheroid) RETURNS FLOAT8 @@ -1105,14 +1123,6 @@ CREATE OR REPLACE FUNCTION ST_distance_spheroid(geometry,geometry,spheroid) LANGUAGE 'C' IMMUTABLE STRICT COST 100; - --- Availability: 1.2.2 -CREATE OR REPLACE FUNCTION ST_distance_sphere(geometry,geometry) - RETURNS FLOAT8 - AS 'MODULE_PATHNAME','LWGEOM_distance_sphere' - LANGUAGE 'C' IMMUTABLE STRICT - COST 100; - -- Minimum distance. 2d only. -- PostGIS equivalent function: distance(geometry,geometry) @@ -4384,6 +4394,25 @@ LANGUAGE 'plpgsql' IMMUTABLE STRICT; +-- Availability: 1.2.2 +CREATE OR REPLACE FUNCTION ST_distance_sphere(geometry,geometry) + RETURNS FLOAT8 + AS $$ + select st_distance(geography($1),geography($2),false) + $$ + LANGUAGE 'SQL' IMMUTABLE STRICT + COST 300; + +-- Availability: 1.2.2 +CREATE OR REPLACE FUNCTION ST_distance_sphere(geometry,geometry) + RETURNS FLOAT8 + AS $$ + select st_distance(geography($1),geography($2),false) + $$ + LANGUAGE 'SQL' IMMUTABLE STRICT + COST 300; + + #ifdef GSERIALIZED_ON