From 85687453ca3a18d2434c42e986f62849dc2ba47b Mon Sep 17 00:00:00 2001 From: Sandro Santilli Date: Wed, 10 Jun 2015 16:18:37 +0000 Subject: [PATCH] Add |=| operator with CPA semantic and KNN support with PgSQL 9.5+ Includes regress test and documentation git-svn-id: http://svn.osgeo.org/postgis/trunk@13641 b70326c6-7e19-0410-871a-916f4a2858ee --- doc/reference_operator.xml | 80 +++++++++++++++++++++++++++++++++++ doc/reference_temporal.xml | 3 +- postgis/gserialized_gist_nd.c | 27 +++++++++--- postgis/postgis.sql.in | 22 ++++++++++ regress/Makefile.in | 3 +- regress/temporal_knn.sql | 75 ++++++++++++++++++++++++++++++++ regress/temporal_knn_expected | 7 +++ 7 files changed, 209 insertions(+), 8 deletions(-) create mode 100644 regress/temporal_knn.sql create mode 100644 regress/temporal_knn_expected diff --git a/doc/reference_operator.xml b/doc/reference_operator.xml index 7a314d047..14f651226 100644 --- a/doc/reference_operator.xml +++ b/doc/reference_operator.xml @@ -1204,6 +1204,86 @@ For users running with PostgreSQL < 9.5, use a hybrid query to find the true , , + + + + |=| + + +Returns the distance between A and B trajectories at their closest point of approach. + + + + + + + double precision |=| + + + geometry + + A + + + + geometry + + B + + + + + + + + Description + + +The |=| operator returns the 3D distance between +two trajectories (See ). +This is the same as but as an operator +it can be used for doing nearest neightbor searches using an N-dimensional +index (requires PostgreSQL 9.5.0 or higher). + + + This operand will make use of ND GiST indexes that may be available on the geometries. It is different from other operators that use spatial indexes in that the spatial index is only used when the operator is in the ORDER BY clause. + Index only kicks in if one of the geometries is a constant (not in a subquery/cte). e.g. 'SRID=3005;LINESTRINGM(0 0 0,0 0 1)'::geometry instead of a.geom + + Availability: 2.2.0. Index-supported only available for PostgreSQL 9.5+ + + + + + Examples + +-- Save a literal query trajectory in a psql variable... +\set qt 'ST_AddMeasure(ST_MakeLine(ST_MakePointM(-350,300,0),ST_MakePointM(-410,490,0)),10,20)' +-- Run the query ! +SELECT track_id, dist FROM ( + SELECT track_id, ST_DistanceCPA(tr,:qt) dist + FROM trajectories + ORDER BY tr |=| :qt + LIMIT 5 +) foo; + track_id dist +----------+------------------- + 395 | 0.576496831518066 + 380 | 5.06797130410151 + 390 | 7.72262293958322 + 385 | 9.8004461358071 + 405 | 10.9534397988433 +(5 rows) + + + + See Also + +, +, + + + + diff --git a/doc/reference_temporal.xml b/doc/reference_temporal.xml index 5d688ea39..37d1f58d7 100644 --- a/doc/reference_temporal.xml +++ b/doc/reference_temporal.xml @@ -211,7 +211,8 @@ SELECT ST_DistanceCPA(a,b) distance FROM inp; , , - +, + diff --git a/postgis/gserialized_gist_nd.c b/postgis/gserialized_gist_nd.c index 1f784c09d..d8b666ad0 100644 --- a/postgis/gserialized_gist_nd.c +++ b/postgis/gserialized_gist_nd.c @@ -505,7 +505,7 @@ static double gidx_distance_leaf_centroid(const GIDX *a, const GIDX *b) /** * Calculate the box->box distance. */ -static double gidx_distance(const GIDX *a, const GIDX *b) +static double gidx_distance(const GIDX *a, const GIDX *b, int m_is_time) { int ndims, i; double sum = 0; @@ -527,6 +527,10 @@ static double gidx_distance(const GIDX *a, const GIDX *b) /* overlaps */ d = 0; } + else if ( i == 4 && m_is_time ) + { + return FLT_MAX; + } else if ( bmax < amin ) { /* is "left" */ @@ -1200,7 +1204,7 @@ Datum gserialized_gist_geog_distance(PG_FUNCTION_ARGS) /* We scale up to "world units" so that the box-to-box distances */ /* compare reasonably with the over-the-spheroid distances that */ /* the recheck process will turn up */ - distance = WGS84_RADIUS * gidx_distance(entry_box, query_box); + distance = WGS84_RADIUS * gidx_distance(entry_box, query_box, 0); POSTGIS_DEBUGF(2, "[GIST] '%s' got distance %g", __func__, distance); PG_RETURN_FLOAT8(distance); @@ -1220,7 +1224,8 @@ Datum gserialized_gist_geog_distance(PG_FUNCTION_ARGS) ** represents the distance to the index entry; for an internal tree node, the ** result must be the smallest distance that any child entry could have. ** -** Strategy 13 = centroid-based distance tests <<->> +** Strategy 13 is centroid-based distance tests <<->> +** Strategy 20 is cpa based distance tests |=| */ PG_FUNCTION_INFO_V1(gserialized_gist_distance); Datum gserialized_gist_distance(PG_FUNCTION_ARGS) @@ -1238,8 +1243,9 @@ Datum gserialized_gist_distance(PG_FUNCTION_ARGS) POSTGIS_DEBUG(4, "[GIST] 'distance' function called"); - /* We are using '13' as the gist distance strategy number for <<->> */ - if ( strategy != 13 ) { + /* Strategy 13 is <<->> */ + /* Strategy 20 is |=| */ + if ( strategy != 13 && strategy != 20 ) { elog(ERROR, "unrecognized strategy number: %d", strategy); PG_RETURN_FLOAT8(FLT_MAX); } @@ -1256,11 +1262,20 @@ Datum gserialized_gist_distance(PG_FUNCTION_ARGS) #if POSTGIS_PGSQL_VERSION >= 95 - distance = gidx_distance(entry_box, query_box); + /* Strategy 20 is |=| */ + distance = gidx_distance(entry_box, query_box, strategy == 20); + /* Treat leaf node tests different from internal nodes */ if (GIST_LEAF(entry)) *recheck = true; #else + + if ( strategy == 20 ) + { + elog(ERROR, "You need PostgreSQL 9.5.0 or higher in order to use |=| with index"); + PG_RETURN_FLOAT8(FLT_MAX); + } + /* Treat leaf node tests different from internal nodes */ if (GIST_LEAF(entry)) { diff --git a/postgis/postgis.sql.in b/postgis/postgis.sql.in index 6c6103ad4..9d59ba5ca 100644 --- a/postgis/postgis.sql.in +++ b/postgis/postgis.sql.in @@ -828,6 +828,24 @@ CREATE OPERATOR <<->> ( COMMUTATOR = '<<->>' ); +-- +-- This is for use with |=| operator, which does not directly use +-- ST_DistanceCPA just in case it'll ever need to change behavior +-- (operators definition cannot be altered) +-- +-- Availability: 2.2.0 +CREATE OR REPLACE FUNCTION geometry_distance_cpa(geometry, geometry) + RETURNS float8 + AS 'MODULE_PATHNAME', 'ST_DistanceCPA' + LANGUAGE 'c' IMMUTABLE STRICT; + +-- Availability: 2.2.0 +CREATE OPERATOR |=| ( + LEFTARG = geometry, RIGHTARG = geometry, + PROCEDURE = geometry_distance_cpa, + COMMUTATOR = '|=|' +); + -- Availability: 2.2.0 CREATE OR REPLACE FUNCTION geometry_gist_distance_nd(internal,geometry,int4) RETURNS float8 @@ -846,6 +864,10 @@ CREATE OPERATOR CLASS gist_geometry_ops_nd #if POSTGIS_PGSQL_VERSION >= 91 -- Availability: 2.2.0 OPERATOR 13 <<->> FOR ORDER BY pg_catalog.float_ops, +#if POSTGIS_PGSQL_VERSION >= 95 + -- Availability: 2.2.0 + OPERATOR 20 |=| FOR ORDER BY pg_catalog.float_ops, +#endif -- Availability: 2.2.0 FUNCTION 8 geometry_gist_distance_nd (internal, geometry, int4), #endif diff --git a/regress/Makefile.in b/regress/Makefile.in index 6975d501e..56769292a 100644 --- a/regress/Makefile.in +++ b/regress/Makefile.in @@ -147,7 +147,8 @@ endif ifeq ($(shell expr $(POSTGIS_PGSQL_VERSION) ">=" 95),1) # Index supported KNN recheck only available in PostgreSQL 9.5 and higher - TESTS += knn_recheck + TESTS += knn_recheck \ + temporal_knn endif ifeq ($(shell expr $(POSTGIS_GEOS_VERSION) ">=" 32),1) diff --git a/regress/temporal_knn.sql b/regress/temporal_knn.sql new file mode 100644 index 000000000..dbf2a8901 --- /dev/null +++ b/regress/temporal_knn.sql @@ -0,0 +1,75 @@ +CREATE OR REPLACE FUNCTION qnodes(q text) RETURNS text +LANGUAGE 'plpgsql' AS +$$ +DECLARE + exp TEXT; + mat TEXT[]; + ret TEXT[]; +BEGIN + --RAISE NOTICE 'Q: %', q; + FOR exp IN EXECUTE 'EXPLAIN ' || q + LOOP + --RAISE NOTICE 'EXP: %', exp; + mat := regexp_matches(exp, ' *(?:-> *)?(.*Scan)'); + --RAISE NOTICE 'MAT: %', mat; + IF mat IS NOT NULL THEN + ret := array_append(ret, mat[1]); + END IF; + --RAISE NOTICE 'RET: %', ret; + END LOOP; + RETURN array_to_string(ret,','); +END; +$$; + +-- create table +CREATE TABLE knn_cpa AS +WITH points AS ( + SELECT t, + ST_MakePoint(x-t,x+t) p + FROM generate_series(0,1000,5) t -- trajectories + ,generate_series(-100,100,10) x +) +SELECT t, ST_AddMeasure( + CASE WHEN t%2 = 0 THEN ST_Reverse(ST_MakeLine(p)) + ELSE ST_MakeLine(p) END, + 10, 20) tr +FROM points GROUP BY t; +--ALTER TABLE knn_cpa ADD PRIMARY KEY(t); + +\set qt 'ST_AddMeasure(ST_MakeLine(ST_MakePointM(-260,380,0),ST_MakePointM(-360,540,0)),10,20)' + +SELECT '|=| no idx', qnodes('select * from knn_cpa ORDER BY tr |=| ' || quote_literal(:qt ::text) || ' LIMIT 1'); +CREATE TABLE knn_cpa_no_index AS +SELECT row_number() over () n, t, d FROM ( + SELECT t, + ST_DistanceCPA(tr,:qt) d + FROM knn_cpa ORDER BY tr |=| :qt LIMIT 5 +) foo; + +CREATE INDEX on knn_cpa USING gist (tr gist_geometry_ops_nd); +ANALYZE knn_cpa; +set enable_seqscan to off; + +SELECT '|=| idx', qnodes('select * from knn_cpa ORDER BY tr |=| ' || quote_literal(:qt ::text) || ' LIMIT 1'); +CREATE TABLE knn_cpa_index AS +SELECT row_number() over () n, t, d FROM ( + SELECT t, ST_DistanceCPA(tr,:qt) d + FROM knn_cpa ORDER BY tr |=| :qt LIMIT 5 +) foo; + +--SELECT * FROM knn_cpa_no_index; +--SELECT * FROM knn_cpa_index; + +SELECT a.n, + CASE WHEN a.t = b.t THEN a.t||'' ELSE a.t || ' vs ' || b.t END closest, + CASE WHEN a.d = b.d THEN 'dist:' || a.d::numeric(10,2) ELSE 'diff:' || (a.d - b.d) END dist +FROM knn_cpa_no_index a, knn_cpa_index b +WHERE a.n = b.n +ORDER BY a.n; + +-- Cleanup + +DROP FUNCTION qnodes(text); +DROP TABLE knn_cpa; +DROP TABLE knn_cpa_no_index; +DROP TABLE knn_cpa_index; diff --git a/regress/temporal_knn_expected b/regress/temporal_knn_expected new file mode 100644 index 000000000..d58640e1f --- /dev/null +++ b/regress/temporal_knn_expected @@ -0,0 +1,7 @@ +|=| no idx|Seq Scan +|=| idx|Index Scan +1|445|dist:2.97 +2|340|dist:3.21 +3|435|dist:8.26 +4|350|dist:9.10 +5|455|dist:14.21 -- 2.50.1