From 1cb0fb41c8bbb0c51ee42f9f019b12ac8eeced59 Mon Sep 17 00:00:00 2001 From: Darafei Praliaskouski Date: Sat, 24 Nov 2018 09:30:36 +0000 Subject: [PATCH] Stabilize GiST ND indexes for mixed dimensions MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Patch by Darafei Praliaskouski Thanks to Arthur Lesuisse for synthesizing test case, Andrew Gierth for finding runaway memcpy, Raúl Marín for pointing to memory problem in index construction. This is not backpatchable to 2.x, that requires separate solution. References #4139 Closes https://github.com/postgis/postgis/pull/336 git-svn-id: http://svn.osgeo.org/postgis/trunk@17070 b70326c6-7e19-0410-871a-916f4a2858ee --- NEWS | 2 ++ postgis/gserialized_gist_nd.c | 29 +++++++++++---------- regress/core/Makefile.in | 5 ++-- regress/core/regress_gist_index_nd.sql | 3 +-- regress/core/regress_gist_index_nd_expected | 4 +-- 5 files changed, 22 insertions(+), 21 deletions(-) diff --git a/NEWS b/NEWS index 2b054b215..6518e2414 100644 --- a/NEWS +++ b/NEWS @@ -43,6 +43,8 @@ PostGIS 3.0.0 - #4249, Fix undefined behaviour in raster intersection (Raúl Marín) - #4246, Fix undefined behaviour in ST_3DDistance (Raúl Marín) - #4244, Avoid unaligned memory access in BOX2D_out (Raúl Marín) + - #4139, Make mixed-dimension ND index build tree correctly (Darafei Praliaskouski, + Arthur Lesuisse, Andrew Gierth, Raúl Marín) PostGIS 2.5.0 2018/09/23 diff --git a/postgis/gserialized_gist_nd.c b/postgis/gserialized_gist_nd.c index 7b38adad1..150a7a630 100644 --- a/postgis/gserialized_gist_nd.c +++ b/postgis/gserialized_gist_nd.c @@ -154,8 +154,7 @@ gidx_set_unknown(GIDX *a) SET_VARSIZE(a, VARHDRSZ); } -/* Enlarge b_union to contain b_new. If b_new contains more - dimensions than b_union, expand b_union to contain those dimensions. */ +/* Enlarge b_union to contain b_new. */ void gidx_merge(GIDX **b_union, GIDX *b_new) { @@ -165,10 +164,12 @@ gidx_merge(GIDX **b_union, GIDX *b_new) Assert(b_new); /* Can't merge an unknown into any thing */ + /* Q: Unknown is 0 dimensions. Should we reset result to unknown instead? (ticket #4232) */ if (gidx_is_unknown(b_new)) return; /* Merge of unknown and known is known */ + /* Q: Unknown is 0 dimensions. Should we never modify unknown instead? (ticket #4232) */ if (gidx_is_unknown(*b_union)) { *b_union = b_new; @@ -178,26 +179,22 @@ gidx_merge(GIDX **b_union, GIDX *b_new) dims_union = GIDX_NDIMS(*b_union); dims_new = GIDX_NDIMS(b_new); - POSTGIS_DEBUGF(4, "merging gidx (%s) into gidx (%s)", gidx_to_string(b_new), gidx_to_string(*b_union)); - - if (dims_new > dims_union) + /* Shrink unshared dimensions. + * Unset dimension is essentially [-FLT_MAX, FLT_MAX], so we can either trim it or reset to that range.*/ + if (dims_new < dims_union) { - POSTGIS_DEBUGF(5, "reallocating b_union from %d dims to %d dims", dims_union, dims_new); *b_union = (GIDX *)repalloc(*b_union, GIDX_SIZE(dims_new)); SET_VARSIZE(*b_union, VARSIZE(b_new)); dims_union = dims_new; } - for (i = 0; i < dims_new; i++) + for (i = 0; i < dims_union; i++) { /* Adjust minimums */ GIDX_SET_MIN(*b_union, i, Min(GIDX_GET_MIN(*b_union, i), GIDX_GET_MIN(b_new, i))); /* Adjust maximums */ GIDX_SET_MAX(*b_union, i, Max(GIDX_GET_MAX(*b_union, i), GIDX_GET_MAX(b_new, i))); } - - POSTGIS_DEBUGF(5, "merge complete (%s)", gidx_to_string(*b_union)); - return; } /* Calculate the volume (in n-d units) of the GIDX */ @@ -1071,7 +1068,7 @@ gserialized_gist_consistent_leaf(GIDX *key, GIDX *query, StrategyNumber strategy retval = false; } - return (retval); + return retval; } /* @@ -1105,7 +1102,7 @@ gserialized_gist_consistent_internal(GIDX *key, GIDX *query, StrategyNumber stra retval = false; } - return (retval); + return retval; } /* @@ -1426,7 +1423,11 @@ gserialized_gist_picksplit_addlist(OffsetNumber *list, GIDX **box_union, GIDX *b if (*pos) gidx_merge(box_union, box_current); else - memcpy((void *)(*box_union), (void *)box_current, VARSIZE(box_current)); + { + pfree(*box_union); + *box_union = gidx_copy(box_current); + } + list[*pos] = num; (*pos)++; } @@ -1721,7 +1722,7 @@ Datum gserialized_gist_picksplit(PG_FUNCTION_ARGS) ** sides of the page union box can occasionally all end up in one place, leading ** to this condition. */ - if (gserialized_gist_picksplit_badratios(pos, ndims_pageunion) == true) + if (gserialized_gist_picksplit_badratios(pos, ndims_pageunion)) { /* ** Instead we split on center points and see if we do better. diff --git a/regress/core/Makefile.in b/regress/core/Makefile.in index 045b7667f..cea4af3db 100644 --- a/regress/core/Makefile.in +++ b/regress/core/Makefile.in @@ -107,6 +107,7 @@ TESTS = \ quantize_coordinates \ regress \ regress_bdpoly \ + regress_gist_index_nd \ regress_index \ regress_index_nulls \ regress_management \ @@ -189,7 +190,6 @@ TESTS += \ delaunaytriangles - ifeq ($(INTERRUPTTESTS),yes) # Allow CI servers to configure --with-interrupt-tests TESTS += \ @@ -234,8 +234,7 @@ ifeq ($(HAVE_SPGIST),yes) TESTS += \ regress_spgist_index_2d \ regress_spgist_index_3d \ - regress_spgist_index_nd #\ -# regress_gist_index_nd + regress_spgist_index_nd endif ifeq ($(HAVE_PROTOBUF),yes) diff --git a/regress/core/regress_gist_index_nd.sql b/regress/core/regress_gist_index_nd.sql index 810d962f3..9c0d6df81 100644 --- a/regress/core/regress_gist_index_nd.sql +++ b/regress/core/regress_gist_index_nd.sql @@ -85,5 +85,4 @@ select * from test_gist_idx_nd; DROP TABLE tbl_geomcollection_nd CASCADE; DROP TABLE test_gist_idx_nd CASCADE; -DROP FUNCTION qnodes; - +DROP FUNCTION qnodes (text); diff --git a/regress/core/regress_gist_index_nd_expected b/regress/core/regress_gist_index_nd_expected index aff493fff..7fc03582d 100644 --- a/regress/core/regress_gist_index_nd_expected +++ b/regress/core/regress_gist_index_nd_expected @@ -1,4 +1,4 @@ &&&|180502|Seq Scan|180502|Index Scan -~~ |35498|Seq Scan|35498|Index Scan -@@ |35498|Seq Scan|35498|Index Scan +~~ |39682|Seq Scan|39682|Index Scan +@@ |39682|Seq Scan|39682|Index Scan ~~=|480|Seq Scan|480|Index Scan -- 2.50.1