From 1f7ef548ec2e594fa8766781c490fb5b998ea46b Mon Sep 17 00:00:00 2001 From: Teodor Sigaev Date: Wed, 28 Jun 2006 12:00:14 +0000 Subject: [PATCH] Changes * new split algorithm (as proposed in http://archives.postgresql.org/pgsql-hackers/2006-06/msg00254.php) * possible call pickSplit() for second and below columns * add spl_(l|r)datum_exists to GIST_SPLITVEC - pickSplit should check its values to use already defined spl_(l|r)datum for splitting. pickSplit should set spl_(l|r)datum_exists to 'false' (if they was 'true') to signal to caller about using spl_(l|r)datum. * support for old pickSplit(): not very optimal but correct split * remove 'bytes' field from GISTENTRY: in any case size of value is defined by it's type. * split GIST_SPLITVEC to two structures: one for using in picksplit and second - for internal use. * some code refactoring * support of subsplit to rtree opclasses TODO: add support of subsplit to contrib modules --- contrib/btree_gist/btree_inet.c | 2 +- contrib/btree_gist/btree_interval.c | 4 +- contrib/btree_gist/btree_text.c | 2 +- contrib/btree_gist/btree_time.c | 2 +- contrib/btree_gist/btree_ts.c | 2 +- contrib/btree_gist/btree_utils_num.c | 2 +- contrib/btree_gist/btree_utils_var.c | 4 +- contrib/cube/cube.c | 6 +- contrib/intarray/_int_gist.c | 8 +- contrib/intarray/_intbig_gist.c | 4 +- contrib/ltree/_ltree_gist.c | 4 +- contrib/ltree/ltree_gist.c | 6 +- contrib/pg_trgm/trgm_gist.c | 4 +- contrib/seg/seg.c | 4 +- contrib/tsearch2/gistidx.c | 8 +- contrib/tsearch2/query_gist.c | 2 +- src/backend/access/gist/Makefile | 4 +- src/backend/access/gist/gist.c | 182 ++---------- src/backend/access/gist/gistget.c | 3 +- src/backend/access/gist/gistproc.c | 139 ++++++--- src/backend/access/gist/gistutil.c | 421 ++++----------------------- src/include/access/gist.h | 31 +- src/include/access/gist_private.h | 67 +++-- 23 files changed, 266 insertions(+), 645 deletions(-) diff --git a/contrib/btree_gist/btree_inet.c b/contrib/btree_gist/btree_inet.c index 551368cec2..914f4f6d5e 100644 --- a/contrib/btree_gist/btree_inet.c +++ b/contrib/btree_gist/btree_inet.c @@ -100,7 +100,7 @@ gbt_inet_compress(PG_FUNCTION_ARGS) r->upper = r->lower; gistentryinit(*retval, PointerGetDatum(r), entry->rel, entry->page, - entry->offset, sizeof(inetKEY), FALSE); + entry->offset, FALSE); } else retval = entry; diff --git a/contrib/btree_gist/btree_interval.c b/contrib/btree_gist/btree_interval.c index b7776295a0..6f346f94fe 100644 --- a/contrib/btree_gist/btree_interval.c +++ b/contrib/btree_gist/btree_interval.c @@ -129,7 +129,7 @@ gbt_intv_compress(PG_FUNCTION_ARGS) } gistentryinit(*retval, PointerGetDatum(r), entry->rel, entry->page, - entry->offset, 2 * INTERVALSIZE, FALSE); + entry->offset, FALSE); } PG_RETURN_POINTER(retval); @@ -153,7 +153,7 @@ gbt_intv_decompress(PG_FUNCTION_ARGS) gistentryinit(*retval, PointerGetDatum(r), entry->rel, entry->page, - entry->offset, sizeof(intvKEY), FALSE); + entry->offset, FALSE); } PG_RETURN_POINTER(retval); } diff --git a/contrib/btree_gist/btree_text.c b/contrib/btree_gist/btree_text.c index 6ab77f86ae..fa4eb904f5 100644 --- a/contrib/btree_gist/btree_text.c +++ b/contrib/btree_gist/btree_text.c @@ -115,7 +115,7 @@ gbt_bpchar_compress(PG_FUNCTION_ARGS) gistentryinit(trim, d, entry->rel, entry->page, - entry->offset, VARSIZE(DatumGetPointer(d)), TRUE); + entry->offset, TRUE); retval = gbt_var_compress(&trim, &tinfo); } else diff --git a/contrib/btree_gist/btree_time.c b/contrib/btree_gist/btree_time.c index c6a5697af0..379f1e0a60 100644 --- a/contrib/btree_gist/btree_time.c +++ b/contrib/btree_gist/btree_time.c @@ -137,7 +137,7 @@ gbt_timetz_compress(PG_FUNCTION_ARGS) r->lower = r->upper = tmp; gistentryinit(*retval, PointerGetDatum(r), entry->rel, entry->page, - entry->offset, sizeof(timeKEY), FALSE); + entry->offset, FALSE); } else retval = entry; diff --git a/contrib/btree_gist/btree_ts.c b/contrib/btree_gist/btree_ts.c index 9f305119d8..9a7d650527 100644 --- a/contrib/btree_gist/btree_ts.c +++ b/contrib/btree_gist/btree_ts.c @@ -159,7 +159,7 @@ gbt_tstz_compress(PG_FUNCTION_ARGS) r->lower = r->upper = gmt; gistentryinit(*retval, PointerGetDatum(r), entry->rel, entry->page, - entry->offset, sizeof(tsKEY), FALSE); + entry->offset, FALSE); } else retval = entry; diff --git a/contrib/btree_gist/btree_utils_num.c b/contrib/btree_gist/btree_utils_num.c index 69292758f5..9798d01563 100644 --- a/contrib/btree_gist/btree_utils_num.c +++ b/contrib/btree_gist/btree_utils_num.c @@ -46,7 +46,7 @@ gbt_num_compress(GISTENTRY *retval, GISTENTRY *entry, const gbtree_ninfo * tinfo memcpy((void *) &r[tinfo->size], leaf, tinfo->size); retval = palloc(sizeof(GISTENTRY)); gistentryinit(*retval, PointerGetDatum(r), entry->rel, entry->page, - entry->offset, (2 * tinfo->size), FALSE); + entry->offset, FALSE); } else retval = entry; diff --git a/contrib/btree_gist/btree_utils_var.c b/contrib/btree_gist/btree_utils_var.c index 20a1e427f2..810ca78af9 100644 --- a/contrib/btree_gist/btree_utils_var.c +++ b/contrib/btree_gist/btree_utils_var.c @@ -19,7 +19,7 @@ gbt_var_decompress(PG_FUNCTION_ARGS) gistentryinit(*retval, PointerGetDatum(key), entry->rel, entry->page, - entry->offset, VARSIZE(key), FALSE); + entry->offset, FALSE); PG_RETURN_POINTER(retval); } @@ -292,7 +292,7 @@ gbt_var_compress(GISTENTRY *entry, const gbtree_vinfo * tinfo) retval = palloc(sizeof(GISTENTRY)); gistentryinit(*retval, PointerGetDatum(r), entry->rel, entry->page, - entry->offset, VARSIZE(r), TRUE); + entry->offset, TRUE); } else retval = entry; diff --git a/contrib/cube/cube.c b/contrib/cube/cube.c index 944eee62b1..87ca2db768 100644 --- a/contrib/cube/cube.c +++ b/contrib/cube/cube.c @@ -1,5 +1,5 @@ /****************************************************************************** - $PostgreSQL: pgsql/contrib/cube/cube.c,v 1.25 2006/05/30 22:12:12 tgl Exp $ + $PostgreSQL: pgsql/contrib/cube/cube.c,v 1.26 2006/06/28 11:59:59 teodor Exp $ This file contains routines that can be bound to a Postgres backend and called by the backend in the process of processing queries. The calling @@ -300,8 +300,8 @@ g_cube_picksplit(GistEntryVector *entryvec, double size_l, size_r; int nbytes; - OffsetNumber seed_1 = 0, - seed_2 = 0; + OffsetNumber seed_1 = 1, + seed_2 = 2; OffsetNumber *left, *right; OffsetNumber maxoff; diff --git a/contrib/intarray/_int_gist.c b/contrib/intarray/_int_gist.c index f82024896f..df5ae03e54 100644 --- a/contrib/intarray/_int_gist.c +++ b/contrib/intarray/_int_gist.c @@ -154,7 +154,7 @@ g_int_compress(PG_FUNCTION_ARGS) retval = palloc(sizeof(GISTENTRY)); gistentryinit(*retval, PointerGetDatum(r), - entry->rel, entry->page, entry->offset, VARSIZE(r), FALSE); + entry->rel, entry->page, entry->offset, FALSE); PG_RETURN_POINTER(retval); } @@ -201,7 +201,7 @@ g_int_compress(PG_FUNCTION_ARGS) r = resize_intArrayType(r, len); retval = palloc(sizeof(GISTENTRY)); gistentryinit(*retval, PointerGetDatum(r), - entry->rel, entry->page, entry->offset, VARSIZE(r), FALSE); + entry->rel, entry->page, entry->offset, FALSE); PG_RETURN_POINTER(retval); } else @@ -238,7 +238,7 @@ g_int_decompress(PG_FUNCTION_ARGS) { retval = palloc(sizeof(GISTENTRY)); gistentryinit(*retval, PointerGetDatum(in), - entry->rel, entry->page, entry->offset, VARSIZE(in), FALSE); + entry->rel, entry->page, entry->offset, FALSE); PG_RETURN_POINTER(retval); } @@ -260,7 +260,7 @@ g_int_decompress(PG_FUNCTION_ARGS) pfree(in); retval = palloc(sizeof(GISTENTRY)); gistentryinit(*retval, PointerGetDatum(r), - entry->rel, entry->page, entry->offset, VARSIZE(r), FALSE); + entry->rel, entry->page, entry->offset, FALSE); PG_RETURN_POINTER(retval); } diff --git a/contrib/intarray/_intbig_gist.c b/contrib/intarray/_intbig_gist.c index ecd949d87a..626421f83c 100644 --- a/contrib/intarray/_intbig_gist.c +++ b/contrib/intarray/_intbig_gist.c @@ -172,7 +172,7 @@ g_intbig_compress(PG_FUNCTION_ARGS) retval = (GISTENTRY *) palloc(sizeof(GISTENTRY)); gistentryinit(*retval, PointerGetDatum(res), entry->rel, entry->page, - entry->offset, res->len, FALSE); + entry->offset, FALSE); if (in != (ArrayType *) PG_DETOAST_DATUM(entry->key)) pfree(in); @@ -198,7 +198,7 @@ g_intbig_compress(PG_FUNCTION_ARGS) retval = (GISTENTRY *) palloc(sizeof(GISTENTRY)); gistentryinit(*retval, PointerGetDatum(res), entry->rel, entry->page, - entry->offset, res->len, FALSE); + entry->offset, FALSE); PG_RETURN_POINTER(retval); } diff --git a/contrib/ltree/_ltree_gist.c b/contrib/ltree/_ltree_gist.c index b95dc33334..285077802b 100644 --- a/contrib/ltree/_ltree_gist.c +++ b/contrib/ltree/_ltree_gist.c @@ -108,7 +108,7 @@ _ltree_compress(PG_FUNCTION_ARGS) retval = (GISTENTRY *) palloc(sizeof(GISTENTRY)); gistentryinit(*retval, PointerGetDatum(key), entry->rel, entry->page, - entry->offset, key->len, FALSE); + entry->offset, FALSE); } else if (!LTG_ISALLTRUE(entry->key)) { @@ -130,7 +130,7 @@ _ltree_compress(PG_FUNCTION_ARGS) retval = (GISTENTRY *) palloc(sizeof(GISTENTRY)); gistentryinit(*retval, PointerGetDatum(key), entry->rel, entry->page, - entry->offset, key->len, FALSE); + entry->offset, FALSE); } PG_RETURN_POINTER(retval); } diff --git a/contrib/ltree/ltree_gist.c b/contrib/ltree/ltree_gist.c index e210117cbc..f5214b746b 100644 --- a/contrib/ltree/ltree_gist.c +++ b/contrib/ltree/ltree_gist.c @@ -1,7 +1,7 @@ /* * GiST support for ltree * Teodor Sigaev - * $PostgreSQL: pgsql/contrib/ltree/ltree_gist.c,v 1.14 2006/03/11 04:38:29 momjian Exp $ + * $PostgreSQL: pgsql/contrib/ltree/ltree_gist.c,v 1.15 2006/06/28 11:59:59 teodor Exp $ */ #include "ltree.h" @@ -81,7 +81,7 @@ ltree_compress(PG_FUNCTION_ARGS) retval = (GISTENTRY *) palloc(sizeof(GISTENTRY)); gistentryinit(*retval, PointerGetDatum(key), entry->rel, entry->page, - entry->offset, key->len, FALSE); + entry->offset, FALSE); } PG_RETURN_POINTER(retval); } @@ -98,7 +98,7 @@ ltree_decompress(PG_FUNCTION_ARGS) gistentryinit(*retval, PointerGetDatum(key), entry->rel, entry->page, - entry->offset, key->len, FALSE); + entry->offset, FALSE); PG_RETURN_POINTER(retval); } PG_RETURN_POINTER(entry); diff --git a/contrib/pg_trgm/trgm_gist.c b/contrib/pg_trgm/trgm_gist.c index 4c33a45a22..bfb92336d7 100644 --- a/contrib/pg_trgm/trgm_gist.c +++ b/contrib/pg_trgm/trgm_gist.c @@ -103,7 +103,7 @@ gtrgm_compress(PG_FUNCTION_ARGS) retval = (GISTENTRY *) palloc(sizeof(GISTENTRY)); gistentryinit(*retval, PointerGetDatum(res), entry->rel, entry->page, - entry->offset, res->len, FALSE); + entry->offset, FALSE); } else if (ISSIGNKEY(DatumGetPointer(entry->key)) && !ISALLTRUE(DatumGetPointer(entry->key))) @@ -126,7 +126,7 @@ gtrgm_compress(PG_FUNCTION_ARGS) retval = (GISTENTRY *) palloc(sizeof(GISTENTRY)); gistentryinit(*retval, PointerGetDatum(res), entry->rel, entry->page, - entry->offset, res->len, FALSE); + entry->offset, FALSE); } PG_RETURN_POINTER(retval); } diff --git a/contrib/seg/seg.c b/contrib/seg/seg.c index 47ca1b614a..8f0c83c298 100644 --- a/contrib/seg/seg.c +++ b/contrib/seg/seg.c @@ -316,8 +316,8 @@ gseg_picksplit(GistEntryVector *entryvec, float size_l, size_r; int nbytes; - OffsetNumber seed_1 = 0, - seed_2 = 0; + OffsetNumber seed_1 = 1, + seed_2 = 2; OffsetNumber *left, *right; OffsetNumber maxoff; diff --git a/contrib/tsearch2/gistidx.c b/contrib/tsearch2/gistidx.c index 7cc370bf84..5afb1a41b3 100644 --- a/contrib/tsearch2/gistidx.c +++ b/contrib/tsearch2/gistidx.c @@ -1,4 +1,4 @@ -/* $PostgreSQL: pgsql/contrib/tsearch2/gistidx.c,v 1.13 2006/03/11 04:38:30 momjian Exp $ */ +/* $PostgreSQL: pgsql/contrib/tsearch2/gistidx.c,v 1.14 2006/06/28 12:00:06 teodor Exp $ */ #include "postgres.h" @@ -202,7 +202,7 @@ gtsvector_compress(PG_FUNCTION_ARGS) retval = (GISTENTRY *) palloc(sizeof(GISTENTRY)); gistentryinit(*retval, PointerGetDatum(res), entry->rel, entry->page, - entry->offset, res->len, FALSE); + entry->offset, FALSE); } else if (ISSIGNKEY(DatumGetPointer(entry->key)) && !ISALLTRUE(DatumGetPointer(entry->key))) @@ -225,7 +225,7 @@ gtsvector_compress(PG_FUNCTION_ARGS) retval = (GISTENTRY *) palloc(sizeof(GISTENTRY)); gistentryinit(*retval, PointerGetDatum(res), entry->rel, entry->page, - entry->offset, res->len, FALSE); + entry->offset, FALSE); } PG_RETURN_POINTER(retval); } @@ -242,7 +242,7 @@ gtsvector_decompress(PG_FUNCTION_ARGS) gistentryinit(*retval, PointerGetDatum(key), entry->rel, entry->page, - entry->offset, key->len, FALSE); + entry->offset, FALSE); PG_RETURN_POINTER(retval); } diff --git a/contrib/tsearch2/query_gist.c b/contrib/tsearch2/query_gist.c index 5941be3a55..d0a61389d2 100644 --- a/contrib/tsearch2/query_gist.c +++ b/contrib/tsearch2/query_gist.c @@ -164,7 +164,7 @@ gtsq_compress(PG_FUNCTION_ARGS) gistentryinit(*retval, PointerGetDatum(sign), entry->rel, entry->page, - entry->offset, sizeof(TPQTGist), FALSE); + entry->offset, FALSE); } PG_RETURN_POINTER(retval); diff --git a/src/backend/access/gist/Makefile b/src/backend/access/gist/Makefile index 10b091e170..174185c80b 100644 --- a/src/backend/access/gist/Makefile +++ b/src/backend/access/gist/Makefile @@ -4,7 +4,7 @@ # Makefile for access/gist # # IDENTIFICATION -# $PostgreSQL: pgsql/src/backend/access/gist/Makefile,v 1.15 2005/07/01 19:19:02 tgl Exp $ +# $PostgreSQL: pgsql/src/backend/access/gist/Makefile,v 1.16 2006/06/28 12:00:13 teodor Exp $ # #------------------------------------------------------------------------- @@ -13,7 +13,7 @@ top_builddir = ../../../.. include $(top_builddir)/src/Makefile.global OBJS = gist.o gistutil.o gistxlog.o gistvacuum.o gistget.o gistscan.o \ - gistproc.o + gistproc.o gistsplit.o all: SUBSYS.o diff --git a/src/backend/access/gist/gist.c b/src/backend/access/gist/gist.c index 326e87e788..39ff702c3d 100644 --- a/src/backend/access/gist/gist.c +++ b/src/backend/access/gist/gist.c @@ -8,7 +8,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/access/gist/gist.c,v 1.138 2006/05/29 12:50:06 teodor Exp $ + * $PostgreSQL: pgsql/src/backend/access/gist/gist.c,v 1.139 2006/06/28 12:00:14 teodor Exp $ * *------------------------------------------------------------------------- */ @@ -187,7 +187,7 @@ gistbuildCallback(Relation index, /* form an index tuple and point it at the heap tuple */ itup = gistFormTuple(&buildstate->giststate, index, - values, NULL /* size is currently bogus */, isnull); + values, isnull, true /* size is currently bogus */); itup->t_tid = htup->t_self; /* @@ -233,7 +233,7 @@ gistinsert(PG_FUNCTION_ARGS) initGISTstate(&giststate, r); itup = gistFormTuple(&giststate, r, - values, NULL /* size is currently bogus */, isnull); + values, isnull, true /* size is currently bogus */); itup->t_tid = *ht_ctid; gistdoinsert(r, itup, &giststate); @@ -899,150 +899,6 @@ gistmakedeal(GISTInsertState *state, GISTSTATE *giststate) gistxlogInsertCompletion(state->r->rd_node, &(state->key), 1); } -/* - * simple split page - */ -static void -gistSplitHalf(GIST_SPLITVEC *v, int len) { - int i; - - v->spl_nright = v->spl_nleft = 0; - v->spl_left = (OffsetNumber *) palloc(len * sizeof(OffsetNumber)); - v->spl_right= (OffsetNumber *) palloc(len * sizeof(OffsetNumber)); - for(i = 1; i <= len; i++) - if ( ispl_right[ v->spl_nright++ ] = i; - else - v->spl_left[ v->spl_nleft++ ] = i; -} - -/* - * if it was invalid tuple then we need special processing. - * We move all invalid tuples on right page. - * - * if there is no place on left page, gistSplit will be called one more - * time for left page. - * - * Normally, we never exec this code, but after crash replay it's possible - * to get 'invalid' tuples (probability is low enough) - */ -static void -gistSplitByInvalid(GISTSTATE *giststate, GIST_SPLITVEC *v, IndexTuple *itup, int len) { - int i; - static OffsetNumber offInvTuples[ MaxOffsetNumber ]; - int nOffInvTuples = 0; - - for (i = 1; i <= len; i++) - if ( GistTupleIsInvalid(itup[i - 1]) ) - offInvTuples[ nOffInvTuples++ ] = i; - - if ( nOffInvTuples == len ) { - /* corner case, all tuples are invalid */ - v->spl_rightvalid= v->spl_leftvalid = false; - gistSplitHalf( v, len ); - } else { - GistSplitVec gsvp; - - v->spl_right = offInvTuples; - v->spl_nright = nOffInvTuples; - v->spl_rightvalid = false; - - v->spl_left = (OffsetNumber *) palloc(len * sizeof(OffsetNumber)); - v->spl_nleft = 0; - for(i = 1; i <= len; i++) - if ( !GistTupleIsInvalid(itup[i - 1]) ) - v->spl_left[ v->spl_nleft++ ] = i; - v->spl_leftvalid = true; - - gsvp.idgrp = NULL; - gsvp.attrsize = v->spl_lattrsize; - gsvp.attr = v->spl_lattr; - gsvp.len = v->spl_nleft; - gsvp.entries = v->spl_left; - gsvp.isnull = v->spl_lisnull; - - gistunionsubkeyvec(giststate, itup, &gsvp, 0); - } -} - -/* - * trys to split page by attno key, in a case of null - * values move its to separate page. - */ -static void -gistSplitByKey(Relation r, Page page, IndexTuple *itup, int len, GISTSTATE *giststate, - GIST_SPLITVEC *v, GistEntryVector *entryvec, int attno) { - int i; - static OffsetNumber offNullTuples[ MaxOffsetNumber ]; - int nOffNullTuples = 0; - - - for (i = 1; i <= len; i++) { - Datum datum; - bool IsNull; - - if (!GistPageIsLeaf(page) && GistTupleIsInvalid(itup[i - 1])) { - gistSplitByInvalid(giststate, v, itup, len); - return; - } - - datum = index_getattr(itup[i - 1], attno+1, giststate->tupdesc, &IsNull); - gistdentryinit(giststate, attno, &(entryvec->vector[i]), - datum, r, page, i, - ATTSIZE(datum, giststate->tupdesc, attno+1, IsNull), - FALSE, IsNull); - if ( IsNull ) - offNullTuples[ nOffNullTuples++ ] = i; - } - - v->spl_leftvalid = v->spl_rightvalid = true; - - if ( nOffNullTuples == len ) { - /* - * Corner case: All keys in attno column are null, we should try to - * by keys in next column. It all keys in all columns - * are NULL just split page half by half - */ - v->spl_risnull[attno] = v->spl_lisnull[attno] = TRUE; - if ( attno+1 == r->rd_att->natts ) - gistSplitHalf( v, len ); - else - gistSplitByKey(r, page, itup, len, giststate, v, entryvec, attno+1); - } else if ( nOffNullTuples > 0 ) { - int j=0; - - /* - * We don't want to mix NULLs and not-NULLs keys - * on one page, so move nulls to right page - */ - v->spl_right = offNullTuples; - v->spl_nright = nOffNullTuples; - v->spl_risnull[attno] = TRUE; - - v->spl_left = (OffsetNumber *) palloc(len * sizeof(OffsetNumber)); - v->spl_nleft = 0; - for(i = 1; i <= len; i++) - if ( jspl_nright && offNullTuples[j] == i ) - j++; - else - v->spl_left[ v->spl_nleft++ ] = i; - - v->spl_idgrp = NULL; - gistunionsubkey(giststate, itup, v, 0); - } else { - /* - * all keys are not-null - */ - if ( gistUserPicksplit(r, entryvec, attno, v, itup, len, giststate) && attno+1 != r->rd_att->natts ) - /* - * Splitting on attno column is not optimized: unions of left and right - * page are the same, we will try to split page by - * following columns - */ - gistSplitByKey(r, page, itup, len, giststate, v, entryvec, attno+1); - } -} - /* * gistSplit -- split a page in the tree and fill struct * used for XLOG and real writes buffers. Function is recursive, ie @@ -1057,7 +913,7 @@ gistSplit(Relation r, { IndexTuple *lvectup, *rvectup; - GIST_SPLITVEC v; + GistSplitVector v; GistEntryVector *entryvec; int i; SplitedPageLayout *res = NULL; @@ -1066,6 +922,8 @@ gistSplit(Relation r, entryvec = palloc(GEVHDRSZ + (len + 1) * sizeof(GISTENTRY)); entryvec->n = len + 1; + memset( v.spl_lisnull, TRUE, sizeof(bool) * giststate->tupdesc->natts ); + memset( v.spl_risnull, TRUE, sizeof(bool) * giststate->tupdesc->natts ); gistSplitByKey(r, page, itup, len, giststate, &v, entryvec, 0); @@ -1073,31 +931,31 @@ gistSplit(Relation r, lvectup = (IndexTuple *) palloc(sizeof(IndexTuple) * (len + 1)); rvectup = (IndexTuple *) palloc(sizeof(IndexTuple) * (len + 1)); - for (i = 0; i < v.spl_nleft; i++) - lvectup[i] = itup[v.spl_left[i] - 1]; + for (i = 0; i < v.splitVector.spl_nleft; i++) + lvectup[i] = itup[v.splitVector.spl_left[i] - 1]; - for (i = 0; i < v.spl_nright; i++) - rvectup[i] = itup[v.spl_right[i] - 1]; + for (i = 0; i < v.splitVector.spl_nright; i++) + rvectup[i] = itup[v.splitVector.spl_right[i] - 1]; /* finalyze splitting (may need another split) */ - if (!gistfitpage(rvectup, v.spl_nright)) + if (!gistfitpage(rvectup, v.splitVector.spl_nright)) { - res = gistSplit(r, page, rvectup, v.spl_nright, giststate); + res = gistSplit(r, page, rvectup, v.splitVector.spl_nright, giststate); } else { ROTATEDIST(res); - res->block.num = v.spl_nright; - res->list = gistfillitupvec(rvectup, v.spl_nright, &( res->lenlist ) ); - res->itup = (v.spl_rightvalid) ? gistFormTuple(giststate, r, v.spl_rattr, v.spl_rattrsize, v.spl_risnull) + res->block.num = v.splitVector.spl_nright; + res->list = gistfillitupvec(rvectup, v.splitVector.spl_nright, &( res->lenlist ) ); + res->itup = (v.spl_rightvalid) ? gistFormTuple(giststate, r, v.spl_rattr, v.spl_risnull, false) : gist_form_invalid_tuple(GIST_ROOT_BLKNO); } - if (!gistfitpage(lvectup, v.spl_nleft)) + if (!gistfitpage(lvectup, v.splitVector.spl_nleft)) { SplitedPageLayout *resptr, *subres; - resptr = subres = gistSplit(r, page, lvectup, v.spl_nleft, giststate); + resptr = subres = gistSplit(r, page, lvectup, v.splitVector.spl_nleft, giststate); /* install on list's tail */ while( resptr->next ) @@ -1109,9 +967,9 @@ gistSplit(Relation r, else { ROTATEDIST(res); - res->block.num = v.spl_nleft; - res->list = gistfillitupvec(lvectup, v.spl_nleft, &( res->lenlist ) ); - res->itup = (v.spl_leftvalid) ? gistFormTuple(giststate, r, v.spl_lattr, v.spl_lattrsize, v.spl_lisnull) + res->block.num = v.splitVector.spl_nleft; + res->list = gistfillitupvec(lvectup, v.splitVector.spl_nleft, &( res->lenlist ) ); + res->itup = (v.spl_leftvalid) ? gistFormTuple(giststate, r, v.spl_lattr, v.spl_lisnull, false) : gist_form_invalid_tuple(GIST_ROOT_BLKNO); } diff --git a/src/backend/access/gist/gistget.c b/src/backend/access/gist/gistget.c index dcb8f2da54..c69f2887b8 100644 --- a/src/backend/access/gist/gistget.c +++ b/src/backend/access/gist/gistget.c @@ -8,7 +8,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/access/gist/gistget.c,v 1.57 2006/05/24 11:01:39 teodor Exp $ + * $PostgreSQL: pgsql/src/backend/access/gist/gistget.c,v 1.58 2006/06/28 12:00:14 teodor Exp $ * *------------------------------------------------------------------------- */ @@ -391,7 +391,6 @@ gistindex_keytest(IndexTuple tuple, gistdentryinit(giststate, key->sk_attno - 1, &de, datum, r, p, offset, - IndexTupleSize(tuple) - sizeof(IndexTupleData), FALSE, isNull); /* diff --git a/src/backend/access/gist/gistproc.c b/src/backend/access/gist/gistproc.c index 8589ed0a8f..0e3bd53d7d 100644 --- a/src/backend/access/gist/gistproc.c +++ b/src/backend/access/gist/gistproc.c @@ -10,7 +10,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/access/gist/gistproc.c,v 1.5 2006/03/05 15:58:20 momjian Exp $ + * $PostgreSQL: pgsql/src/backend/access/gist/gistproc.c,v 1.6 2006/06/28 12:00:14 teodor Exp $ * *------------------------------------------------------------------------- */ @@ -112,6 +112,17 @@ gist_box_consistent(PG_FUNCTION_ARGS) strategy)); } +static void +adjustBox( BOX *b, BOX *addon ) { + if (b->high.x < addon->high.x) + b->high.x = addon->high.x; + if (b->low.x > addon->low.x) + b->low.x = addon->low.x; + if (b->high.y < addon->high.y) + b->high.y = addon->high.y; + if (b->low.y > addon->low.y) + b->low.y = addon->low.y; +} /* * The GiST Union method for boxes @@ -136,14 +147,7 @@ gist_box_union(PG_FUNCTION_ARGS) for (i = 1; i < numranges; i++) { cur = DatumGetBoxP(entryvec->vector[i].key); - if (pageunion->high.x < cur->high.x) - pageunion->high.x = cur->high.x; - if (pageunion->low.x > cur->low.x) - pageunion->low.x = cur->low.x; - if (pageunion->high.y < cur->high.y) - pageunion->high.y = cur->high.y; - if (pageunion->low.y > cur->low.y) - pageunion->low.y = cur->low.y; + adjustBox( pageunion, cur ); } *sizep = sizeof(BOX); @@ -206,6 +210,74 @@ compare_KB(const void *a, const void *b) return (sa > sb) ? 1 : -1; } +static void +chooseLR( GIST_SPLITVEC *v, + OffsetNumber *list1, int nlist1, BOX *union1, + OffsetNumber *list2, int nlist2, BOX *union2 ) +{ + bool firstToLeft = true; + + if ( v->spl_ldatum_exists || v->spl_rdatum_exists ) { + if ( v->spl_ldatum_exists && v->spl_rdatum_exists ) { + BOX LRl = *union1, LRr = *union2; + BOX RLl = *union2, RLr = *union1; + double sizeLR, sizeRL; + + adjustBox( &LRl, DatumGetBoxP( v->spl_ldatum ) ); + adjustBox( &LRr, DatumGetBoxP( v->spl_rdatum ) ); + adjustBox( &RLl, DatumGetBoxP( v->spl_ldatum ) ); + adjustBox( &RLr, DatumGetBoxP( v->spl_rdatum ) ); + + sizeLR = size_box( DirectFunctionCall2(rt_box_inter, BoxPGetDatum(&LRl), BoxPGetDatum(&LRr)) ); + sizeRL = size_box( DirectFunctionCall2(rt_box_inter, BoxPGetDatum(&RLl), BoxPGetDatum(&RLr)) ); + + if ( sizeLR > sizeRL ) + firstToLeft = false; + + } else { + float p1, p2; + GISTENTRY oldUnion, addon; + + gistentryinit(oldUnion, ( v->spl_ldatum_exists ) ? v->spl_ldatum : v->spl_rdatum, + NULL, NULL, InvalidOffsetNumber, FALSE); + + gistentryinit(addon, BoxPGetDatum(union1), NULL, NULL, InvalidOffsetNumber, FALSE); + DirectFunctionCall3(gist_box_penalty, PointerGetDatum(&oldUnion), PointerGetDatum(&union1), PointerGetDatum(&p1)); + gistentryinit(addon, BoxPGetDatum(union2), NULL, NULL, InvalidOffsetNumber, FALSE); + DirectFunctionCall3(gist_box_penalty, PointerGetDatum(&oldUnion), PointerGetDatum(&union2), PointerGetDatum(&p2)); + + if ( (v->spl_ldatum_exists && p1 > p2) || (v->spl_rdatum_exists && p1 < p2) ) + firstToLeft = false; + } + } + + if ( firstToLeft ) { + v->spl_left = list1; + v->spl_right = list2; + v->spl_nleft = nlist1; + v->spl_nright = nlist2; + if ( v->spl_ldatum_exists ) + adjustBox(union1, DatumGetBoxP( v->spl_ldatum ) ); + v->spl_ldatum = BoxPGetDatum(union1); + if ( v->spl_rdatum_exists ) + adjustBox(union2, DatumGetBoxP( v->spl_rdatum ) ); + v->spl_rdatum = BoxPGetDatum(union2); + } else { + v->spl_left = list2; + v->spl_right = list1; + v->spl_nleft = nlist2; + v->spl_nright = nlist1; + if ( v->spl_ldatum_exists ) + adjustBox(union2, DatumGetBoxP( v->spl_ldatum ) ); + v->spl_ldatum = BoxPGetDatum(union2); + if ( v->spl_rdatum_exists ) + adjustBox(union1, DatumGetBoxP( v->spl_rdatum ) ); + v->spl_rdatum = BoxPGetDatum(union1); + } + + v->spl_ldatum_exists = v->spl_rdatum_exists = false; +} + /* * The GiST PickSplit method * @@ -255,14 +327,7 @@ gist_box_picksplit(PG_FUNCTION_ARGS) )) allisequal = false; - if (pageunion.high.x < cur->high.x) - pageunion.high.x = cur->high.x; - if (pageunion.low.x > cur->low.x) - pageunion.low.x = cur->low.x; - if (pageunion.high.y < cur->high.y) - pageunion.high.y = cur->high.y; - if (pageunion.low.y > cur->low.y) - pageunion.low.y = cur->low.y; + adjustBox( &pageunion, cur ); } nbytes = (maxoff + 2) * sizeof(OffsetNumber); @@ -294,9 +359,17 @@ gist_box_picksplit(PG_FUNCTION_ARGS) v->spl_nright++; } } + + if ( v->spl_ldatum_exists ) + adjustBox( unionL, DatumGetBoxP( v->spl_ldatum ) ); v->spl_ldatum = BoxPGetDatum(unionL); + + if ( v->spl_rdatum_exists ) + adjustBox( unionR, DatumGetBoxP( v->spl_rdatum ) ); v->spl_rdatum = BoxPGetDatum(unionR); + v->spl_ldatum_exists = v->spl_rdatum_exists = false; + PG_RETURN_POINTER(v); } } @@ -399,23 +472,13 @@ gist_box_picksplit(PG_FUNCTION_ARGS) } if (direction == 'x') - { - v->spl_left = listL; - v->spl_right = listR; - v->spl_nleft = posL; - v->spl_nright = posR; - v->spl_ldatum = BoxPGetDatum(unionL); - v->spl_rdatum = BoxPGetDatum(unionR); - } - else - { - v->spl_left = listB; - v->spl_right = listT; - v->spl_nleft = posB; - v->spl_nright = posT; - v->spl_ldatum = BoxPGetDatum(unionB); - v->spl_rdatum = BoxPGetDatum(unionT); - } + chooseLR( v, + listL, posL, unionL, + listR, posR, unionR ); + else + chooseLR( v, + listB, posB, unionB, + listT, posT, unionT ); PG_RETURN_POINTER(v); } @@ -629,14 +692,14 @@ gist_poly_compress(PG_FUNCTION_ARGS) memcpy((void *) r, (void *) &(in->boundbox), sizeof(BOX)); gistentryinit(*retval, PointerGetDatum(r), entry->rel, entry->page, - entry->offset, sizeof(BOX), FALSE); + entry->offset, FALSE); } else { gistentryinit(*retval, (Datum) 0, entry->rel, entry->page, - entry->offset, 0, FALSE); + entry->offset, FALSE); } } else @@ -700,14 +763,14 @@ gist_circle_compress(PG_FUNCTION_ARGS) r->low.y = in->center.y - in->radius; gistentryinit(*retval, PointerGetDatum(r), entry->rel, entry->page, - entry->offset, sizeof(BOX), FALSE); + entry->offset, FALSE); } else { gistentryinit(*retval, (Datum) 0, entry->rel, entry->page, - entry->offset, 0, FALSE); + entry->offset, FALSE); } } else diff --git a/src/backend/access/gist/gistutil.c b/src/backend/access/gist/gistutil.c index 987aed89f7..3be4fd31f5 100644 --- a/src/backend/access/gist/gistutil.c +++ b/src/backend/access/gist/gistutil.c @@ -8,7 +8,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/access/gist/gistutil.c,v 1.15 2006/05/29 12:50:06 teodor Exp $ + * $PostgreSQL: pgsql/src/backend/access/gist/gistutil.c,v 1.16 2006/06/28 12:00:14 teodor Exp $ *------------------------------------------------------------------------- */ #include "postgres.h" @@ -21,23 +21,12 @@ #include "miscadmin.h" #include "storage/freespace.h" -/* group flags ( in gistadjsubkey ) */ -#define LEFT_ADDED 0x01 -#define RIGHT_ADDED 0x02 -#define BOTH_ADDED ( LEFT_ADDED | RIGHT_ADDED ) - -static float gistpenalty(GISTSTATE *giststate, int attno, - GISTENTRY *key1, bool isNull1, - GISTENTRY *key2, bool isNull2); - - /* * static *S used for temrorary storage (saves stack and palloc() call) */ -static int attrsizeS[INDEX_MAX_KEYS]; -static Datum attrS[INDEX_MAX_KEYS]; -static bool isnullS[INDEX_MAX_KEYS]; +static Datum attrS[INDEX_MAX_KEYS]; +static bool isnullS[INDEX_MAX_KEYS]; /* * Write itup vector to page, has no control of free space @@ -156,24 +145,31 @@ gistfillitupvec(IndexTuple *vec, int veclen, int *memlen) { * invalid tuple. Resulting Datums aren't compressed. */ -static bool +bool gistMakeUnionItVec(GISTSTATE *giststate, IndexTuple *itvec, int len, int startkey, - Datum *attr, bool *isnull, int *attrsize ) { + Datum *attr, bool *isnull ) { int i; GistEntryVector *evec; + int attrsize; - evec = (GistEntryVector *) palloc(((len == 1) ? 2 : len) * sizeof(GISTENTRY) + GEVHDRSZ); + evec = (GistEntryVector *) palloc( ( len + 2 ) * sizeof(GISTENTRY) + GEVHDRSZ); for (i = startkey; i < giststate->tupdesc->natts; i++) { int j; evec->n = 0; + if ( !isnull[i] ) { + gistentryinit( evec->vector[evec->n], attr[i], + NULL, NULL, (OffsetNumber) 0, + FALSE); + evec->n++; + } for (j = 0; j < len; j++) { Datum datum; bool IsNull; - if (GistTupleIsInvalid(itvec[j])) + if (GistTupleIsInvalid(itvec[j])) return FALSE; /* signals that union with invalid tuple => result is invalid */ datum = index_getattr(itvec[j], i + 1, giststate->tupdesc, &IsNull); @@ -184,7 +180,6 @@ gistMakeUnionItVec(GISTSTATE *giststate, IndexTuple *itvec, int len, int startke evec->vector + evec->n, datum, NULL, NULL, (OffsetNumber) 0, - ATTSIZE(datum, giststate->tupdesc, i + 1, IsNull), FALSE, IsNull); evec->n++; } @@ -192,7 +187,6 @@ gistMakeUnionItVec(GISTSTATE *giststate, IndexTuple *itvec, int len, int startke /* If this tuple vector was all NULLs, the union is NULL */ if ( evec->n == 0 ) { attr[i] = (Datum) 0; - attrsize[i] = (Datum) 0; isnull[i] = TRUE; } else { if (evec->n == 1) { @@ -203,7 +197,7 @@ gistMakeUnionItVec(GISTSTATE *giststate, IndexTuple *itvec, int len, int startke /* Make union and store in attr array */ attr[i] = FunctionCall2(&giststate->unionFn[i], PointerGetDatum(evec), - PointerGetDatum(attrsize + i)); + PointerGetDatum(&attrsize)); isnull[i] = FALSE; } @@ -219,20 +213,24 @@ gistMakeUnionItVec(GISTSTATE *giststate, IndexTuple *itvec, int len, int startke IndexTuple gistunion(Relation r, IndexTuple *itvec, int len, GISTSTATE *giststate) { - if ( !gistMakeUnionItVec(giststate, itvec, len, 0, attrS, isnullS, attrsizeS) ) + memset(isnullS, TRUE, sizeof(bool) * giststate->tupdesc->natts); + + if ( !gistMakeUnionItVec(giststate, itvec, len, 0, attrS, isnullS ) ) return gist_form_invalid_tuple(InvalidBlockNumber); - return gistFormTuple(giststate, r, attrS, attrsizeS, isnullS); + return gistFormTuple(giststate, r, attrS, isnullS, false); } /* * makes union of two key */ -static void +void gistMakeUnionKey( GISTSTATE *giststate, int attno, GISTENTRY *entry1, bool isnull1, GISTENTRY *entry2, bool isnull2, - Datum *dst, int *dstsize, bool *dstisnull ) { + Datum *dst, bool *dstisnull ) { + + int dstsize; static char storage[ 2 * sizeof(GISTENTRY) + GEVHDRSZ ]; GistEntryVector *evec = (GistEntryVector*)storage; @@ -242,7 +240,6 @@ gistMakeUnionKey( GISTSTATE *giststate, int attno, if ( isnull1 && isnull2 ) { *dstisnull = TRUE; *dst = (Datum)0; - *dstsize = 0; } else { if ( isnull1 == FALSE && isnull2 == FALSE ) { evec->vector[0] = *entry1; @@ -258,11 +255,11 @@ gistMakeUnionKey( GISTSTATE *giststate, int attno, *dstisnull = FALSE; *dst = FunctionCall2(&giststate->unionFn[attno], PointerGetDatum(evec), - PointerGetDatum(dstsize)); + PointerGetDatum(&dstsize)); } } -static bool +bool gistKeyIsEQ(GISTSTATE *giststate, int attno, Datum a, Datum b) { bool result; @@ -272,6 +269,25 @@ gistKeyIsEQ(GISTSTATE *giststate, int attno, Datum a, Datum b) { return result; } +/* + * Decompress all keys in tuple + */ +void +gistDeCompressAtt(GISTSTATE *giststate, Relation r, IndexTuple tuple, Page p, + OffsetNumber o, GISTENTRY *attdata, bool *isnull) +{ + int i; + + for (i = 0; i < r->rd_att->natts; i++) + { + Datum datum = index_getattr(tuple, i + 1, giststate->tupdesc, &isnull[i]); + + gistdentryinit(giststate, i, &attdata[i], + datum, r, p, o, + FALSE, isnull[i]); + } +} + /* * Forms union of oldtup and addtup, if union == oldtup then return NULL */ @@ -299,7 +315,7 @@ gistgetadjusted(Relation r, IndexTuple oldtup, IndexTuple addtup, GISTSTATE *gis gistMakeUnionKey( giststate, i, oldentries + i, oldisnull[i], addentries + i, addisnull[i], - attrS + i, attrsizeS + i, isnullS + i ); + attrS + i, isnullS + i ); if ( neednew ) /* we already need new key, so we can skip check */ @@ -318,244 +334,13 @@ gistgetadjusted(Relation r, IndexTuple oldtup, IndexTuple addtup, GISTSTATE *gis if (neednew) { /* need to update key */ - newtup = gistFormTuple(giststate, r, attrS, attrsizeS, isnullS); + newtup = gistFormTuple(giststate, r, attrS, isnullS, false); newtup->t_tid = oldtup->t_tid; } return newtup; } -/* - * Forms unions of subkeys after page split, but - * uses only tuples aren't in groups of equalent tuples - */ -void -gistunionsubkeyvec(GISTSTATE *giststate, IndexTuple *itvec, - GistSplitVec *gsvp, int startkey) { - IndexTuple *cleanedItVec; - int i, cleanedLen=0; - - cleanedItVec = (IndexTuple*)palloc(sizeof(IndexTuple) * gsvp->len); - - for(i=0;ilen;i++) { - if ( gsvp->idgrp && gsvp->idgrp[gsvp->entries[i]]) - continue; - - cleanedItVec[cleanedLen++] = itvec[gsvp->entries[i] - 1]; - } - - gistMakeUnionItVec(giststate, cleanedItVec, cleanedLen, startkey, - gsvp->attr, gsvp->isnull, gsvp->attrsize); - - pfree( cleanedItVec ); -} - -/* - * unions subkeys for after user picksplit over attno-1 column - */ -void -gistunionsubkey(GISTSTATE *giststate, IndexTuple *itvec, GIST_SPLITVEC *spl, int attno) -{ - GistSplitVec gsvp; - - gsvp.idgrp = spl->spl_idgrp; - - gsvp.attrsize = spl->spl_lattrsize; - gsvp.attr = spl->spl_lattr; - gsvp.len = spl->spl_nleft; - gsvp.entries = spl->spl_left; - gsvp.isnull = spl->spl_lisnull; - - gistunionsubkeyvec(giststate, itvec, &gsvp, attno); - - gsvp.attrsize = spl->spl_rattrsize; - gsvp.attr = spl->spl_rattr; - gsvp.len = spl->spl_nright; - gsvp.entries = spl->spl_right; - gsvp.isnull = spl->spl_risnull; - - gistunionsubkeyvec(giststate, itvec, &gsvp, attno); -} - -/* - * find group in vector with equal value - */ -static int -gistfindgroup(GISTSTATE *giststate, GISTENTRY *valvec, GIST_SPLITVEC *spl, int attno) -{ - int i; - int curid = 1; - - /* - * attno key is always not null (see gistSplitByKey), so we may not check for - * nulls - */ - for (i = 0; i < spl->spl_nleft; i++) - { - int j; - int len; - - if (spl->spl_idgrp[spl->spl_left[i]]) - continue; - len = 0; - /* find all equal value in right part */ - for (j = 0; j < spl->spl_nright; j++) - { - if (spl->spl_idgrp[spl->spl_right[j]]) - continue; - if (gistKeyIsEQ(giststate, attno, valvec[spl->spl_left[i]].key, valvec[spl->spl_right[j]].key)) - { - spl->spl_idgrp[spl->spl_right[j]] = curid; - len++; - } - } - /* find all other equal value in left part */ - if (len) - { - /* add current val to list of equal values */ - spl->spl_idgrp[spl->spl_left[i]] = curid; - /* searching .. */ - for (j = i + 1; j < spl->spl_nleft; j++) - { - if (spl->spl_idgrp[spl->spl_left[j]]) - continue; - if (gistKeyIsEQ(giststate, attno, valvec[spl->spl_left[i]].key, valvec[spl->spl_left[j]].key)) - { - spl->spl_idgrp[spl->spl_left[j]] = curid; - len++; - } - } - spl->spl_ngrp[curid] = len + 1; - curid++; - } - } - - return curid; -} - -/* - * Insert equivalent tuples to left or right page with minimum - * penalty - */ -static void -gistadjsubkey(Relation r, - IndexTuple *itup, /* contains compressed entry */ - int len, - GIST_SPLITVEC *v, - GISTSTATE *giststate, - int attno) -{ - int curlen; - OffsetNumber *curwpos; - GISTENTRY entry, - identry[INDEX_MAX_KEYS]; - float lpenalty = 0, - rpenalty = 0; - bool isnull[INDEX_MAX_KEYS]; - int i, - j; - - /* clear vectors */ - curlen = v->spl_nleft; - curwpos = v->spl_left; - for (i = 0; i < v->spl_nleft; i++) - { - if (v->spl_idgrp[v->spl_left[i]] == 0) - { - *curwpos = v->spl_left[i]; - curwpos++; - } - else - curlen--; - } - v->spl_nleft = curlen; - - curlen = v->spl_nright; - curwpos = v->spl_right; - for (i = 0; i < v->spl_nright; i++) - { - if (v->spl_idgrp[v->spl_right[i]] == 0) - { - *curwpos = v->spl_right[i]; - curwpos++; - } - else - curlen--; - } - v->spl_nright = curlen; - - /* add equivalent tuple */ - for (i = 0; i < len; i++) - { - if (v->spl_idgrp[i + 1] == 0) /* already inserted */ - continue; - gistDeCompressAtt(giststate, r, itup[i], NULL, (OffsetNumber) 0, - identry, isnull); - - v->spl_ngrp[v->spl_idgrp[i + 1]]--; - if (v->spl_ngrp[v->spl_idgrp[i + 1]] == 0 && - (v->spl_grpflag[v->spl_idgrp[i + 1]] & BOTH_ADDED) != BOTH_ADDED) - { - /* force last in group */ - rpenalty = 1.0; - lpenalty = (v->spl_grpflag[v->spl_idgrp[i + 1]] & LEFT_ADDED) ? 2.0 : 0.0; - } - else - { - /* where? */ - for (j = attno+1; j < r->rd_att->natts; j++) - { - gistentryinit(entry, v->spl_lattr[j], r, NULL, - (OffsetNumber) 0, v->spl_lattrsize[j], FALSE); - lpenalty = gistpenalty(giststate, j, &entry, v->spl_lisnull[j], - &identry[j], isnull[j]); - - gistentryinit(entry, v->spl_rattr[j], r, NULL, - (OffsetNumber) 0, v->spl_rattrsize[j], FALSE); - rpenalty = gistpenalty(giststate, j, &entry, v->spl_risnull[j], - &identry[j], isnull[j]); - - if (lpenalty != rpenalty) - break; - } - } - - /* - * add XXX: refactor this to avoid duplicating code - */ - if (lpenalty < rpenalty) - { - v->spl_grpflag[v->spl_idgrp[i + 1]] |= LEFT_ADDED; - v->spl_left[v->spl_nleft++] = i + 1; - - for (j = attno+1; j < r->rd_att->natts; j++) - { - gistentryinit(entry, v->spl_lattr[j], r, NULL, - (OffsetNumber) 0, v->spl_lattrsize[j], FALSE); - gistMakeUnionKey( giststate, j, - &entry, v->spl_lisnull[j], - identry + j, isnull[j], - v->spl_lattr + j, v->spl_lattrsize + j, v->spl_lisnull + j ); - } - } - else - { - v->spl_grpflag[v->spl_idgrp[i + 1]] |= RIGHT_ADDED; - v->spl_right[v->spl_nright++] = i + 1; - - for (j = attno+1; j < r->rd_att->natts; j++) - { - gistentryinit(entry, v->spl_rattr[j], r, NULL, - (OffsetNumber) 0, v->spl_rattrsize[j], FALSE); - gistMakeUnionKey( giststate, j, - &entry, v->spl_risnull[j], - identry + j, isnull[j], - v->spl_rattr + j, v->spl_rattrsize + j, v->spl_risnull + j ); - } - } - } -} - /* * find entry with lowest penalty */ @@ -580,6 +365,9 @@ gistchoose(Relation r, Page p, IndexTuple it, /* it has compressed entry */ it, NULL, (OffsetNumber) 0, identry, isnull); + Assert( maxoff >= FirstOffsetNumber ); + Assert( !GistPageIsLeaf(p) ); + for (i = FirstOffsetNumber; i <= maxoff && sum_grow; i = OffsetNumberNext(i)) { int j; @@ -602,7 +390,6 @@ gistchoose(Relation r, Page p, IndexTuple it, /* it has compressed entry */ datum = index_getattr(itup, j + 1, giststate->tupdesc, &IsNull); gistdentryinit(giststate, j, &entry, datum, r, p, i, - ATTSIZE(datum, giststate->tupdesc, j + 1, IsNull), FALSE, IsNull); usize = gistpenalty(giststate, j, &entry, IsNull, &identry[j], isnull[j]); @@ -637,23 +424,23 @@ gistchoose(Relation r, Page p, IndexTuple it, /* it has compressed entry */ void gistdentryinit(GISTSTATE *giststate, int nkey, GISTENTRY *e, Datum k, Relation r, Page pg, OffsetNumber o, - int b, bool l, bool isNull) + bool l, bool isNull) { - if (b && !isNull) + if (!isNull) { GISTENTRY *dep; - gistentryinit(*e, k, r, pg, o, b, l); + gistentryinit(*e, k, r, pg, o, l); dep = (GISTENTRY *) DatumGetPointer(FunctionCall1(&giststate->decompressFn[nkey], PointerGetDatum(e))); /* decompressFn may just return the given pointer */ if (dep != e) gistentryinit(*e, dep->key, dep->rel, dep->page, dep->offset, - dep->bytes, dep->leafkey); + dep->leafkey); } else - gistentryinit(*e, (Datum) 0, r, pg, o, 0, l); + gistentryinit(*e, (Datum) 0, r, pg, o, l); } @@ -663,28 +450,28 @@ gistdentryinit(GISTSTATE *giststate, int nkey, GISTENTRY *e, void gistcentryinit(GISTSTATE *giststate, int nkey, GISTENTRY *e, Datum k, Relation r, - Page pg, OffsetNumber o, int b, bool l, bool isNull) + Page pg, OffsetNumber o, bool l, bool isNull) { if (!isNull) { GISTENTRY *cep; - gistentryinit(*e, k, r, pg, o, b, l); + gistentryinit(*e, k, r, pg, o, l); cep = (GISTENTRY *) DatumGetPointer(FunctionCall1(&giststate->compressFn[nkey], PointerGetDatum(e))); /* compressFn may just return the given pointer */ if (cep != e) gistentryinit(*e, cep->key, cep->rel, cep->page, cep->offset, - cep->bytes, cep->leafkey); + cep->leafkey); } else - gistentryinit(*e, (Datum) 0, r, pg, o, 0, l); + gistentryinit(*e, (Datum) 0, r, pg, o, l); } IndexTuple gistFormTuple(GISTSTATE *giststate, Relation r, - Datum attdata[], int datumsize[], bool isnull[]) + Datum attdata[], bool isnull[], bool newValues) { GISTENTRY centry[INDEX_MAX_KEYS]; Datum compatt[INDEX_MAX_KEYS]; @@ -699,8 +486,7 @@ gistFormTuple(GISTSTATE *giststate, Relation r, { gistcentryinit(giststate, i, ¢ry[i], attdata[i], r, NULL, (OffsetNumber) 0, - (datumsize) ? datumsize[i] : -1, - (datumsize) ? FALSE : TRUE, + newValues, FALSE); compatt[i] = centry[i].key; } @@ -711,24 +497,7 @@ gistFormTuple(GISTSTATE *giststate, Relation r, return res; } -void -gistDeCompressAtt(GISTSTATE *giststate, Relation r, IndexTuple tuple, Page p, - OffsetNumber o, GISTENTRY *attdata, bool *isnull) -{ - int i; - - for (i = 0; i < r->rd_att->natts; i++) - { - Datum datum = index_getattr(tuple, i + 1, giststate->tupdesc, &isnull[i]); - - gistdentryinit(giststate, i, &attdata[i], - datum, r, p, o, - ATTSIZE(datum, giststate->tupdesc, i + 1, isnull[i]), - FALSE, isnull[i]); - } -} - -static float +float gistpenalty(GISTSTATE *giststate, int attno, GISTENTRY *orig, bool isNullOrig, GISTENTRY *add, bool isNullAdd) @@ -748,74 +517,6 @@ gistpenalty(GISTSTATE *giststate, int attno, return penalty; } -/* - * Calls user picksplit method for attno columns to split vector to - * two vectors. May use attno+n columns data to - * get better split. - * Returns TRUE if left and right unions of attno columns are the same, - * so caller may find better split - */ -bool -gistUserPicksplit(Relation r, GistEntryVector *entryvec, int attno, GIST_SPLITVEC *v, - IndexTuple *itup, int len, GISTSTATE *giststate) -{ - /* - * now let the user-defined picksplit function set up the split vector; in - * entryvec have no null value!! - */ - FunctionCall2(&giststate->picksplitFn[attno], - PointerGetDatum(entryvec), - PointerGetDatum(v)); - - /* compatibility with old code */ - if (v->spl_left[v->spl_nleft - 1] == InvalidOffsetNumber) - v->spl_left[v->spl_nleft - 1] = (OffsetNumber) (entryvec->n - 1); - if (v->spl_right[v->spl_nright - 1] == InvalidOffsetNumber) - v->spl_right[v->spl_nright - 1] = (OffsetNumber) (entryvec->n - 1); - - v->spl_lattr[attno] = v->spl_ldatum; - v->spl_rattr[attno] = v->spl_rdatum; - v->spl_lisnull[attno] = false; - v->spl_risnull[attno] = false; - - /* - * if index is multikey, then we must to try get smaller bounding box for - * subkey(s) - */ - if (giststate->tupdesc->natts > 1 && attno+1 != giststate->tupdesc->natts) - { - if ( gistKeyIsEQ(giststate, attno, v->spl_ldatum, v->spl_rdatum) ) { - /* - * Left and right key's unions are equial, so - * we can get better split by following columns. Note, - * uninons for attno columns are already done. - */ - - return true; - } else { - int MaxGrpId; - - v->spl_idgrp = (int *) palloc0(sizeof(int) * entryvec->n); - v->spl_grpflag = (char *) palloc0(sizeof(char) * entryvec->n); - v->spl_ngrp = (int *) palloc(sizeof(int) * entryvec->n); - - MaxGrpId = gistfindgroup(giststate, entryvec->vector, v, attno); - - /* form union of sub keys for each page (l,p) */ - gistunionsubkey(giststate, itup, v, attno + 1); - - /* - * if possible, we insert equivalent tuples with control by penalty - * for a subkey(s) - */ - if (MaxGrpId > 1) - gistadjsubkey(r, itup, len, v, giststate, attno); - } - } - - return false; -} - /* * Initialize a new index page */ diff --git a/src/include/access/gist.h b/src/include/access/gist.h index cfa07f2be5..55d7634a73 100644 --- a/src/include/access/gist.h +++ b/src/include/access/gist.h @@ -9,7 +9,7 @@ * Portions Copyright (c) 1996-2006, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/access/gist.h,v 1.53 2006/06/25 01:02:12 momjian Exp $ + * $PostgreSQL: pgsql/src/include/access/gist.h,v 1.54 2006/06/28 12:00:14 teodor Exp $ * *------------------------------------------------------------------------- */ @@ -75,37 +75,29 @@ typedef GISTPageOpaqueData *GISTPageOpaque; /* * This is the Split Vector to be returned by the PickSplit method. + * PickSplit should check spl_(r|l)datum_exists. If it is 'true', + * that corresponding spl_(r|l)datum already defined and + * PickSplit should use that value. PickSplit should always set + * spl_(r|l)datum_exists to false: GiST will check value to + * control supportng this feature by PickSplit... */ typedef struct GIST_SPLITVEC { OffsetNumber *spl_left; /* array of entries that go left */ int spl_nleft; /* size of this array */ Datum spl_ldatum; /* Union of keys in spl_left */ - Datum spl_lattr[INDEX_MAX_KEYS]; /* Union of subkeys in - * spl_left */ - int spl_lattrsize[INDEX_MAX_KEYS]; - bool spl_lisnull[INDEX_MAX_KEYS]; - bool spl_leftvalid; + bool spl_ldatum_exists; /* true, if spl_ldatum already exists. */ OffsetNumber *spl_right; /* array of entries that go right */ int spl_nright; /* size of the array */ Datum spl_rdatum; /* Union of keys in spl_right */ - Datum spl_rattr[INDEX_MAX_KEYS]; /* Union of subkeys in - * spl_right */ - int spl_rattrsize[INDEX_MAX_KEYS]; - bool spl_risnull[INDEX_MAX_KEYS]; - bool spl_rightvalid; - - int *spl_idgrp; - int *spl_ngrp; /* number in each group */ - char *spl_grpflag; /* flags of each group */ + bool spl_rdatum_exists; /* true, if spl_rdatum already exists. */ } GIST_SPLITVEC; /* * An entry on a GiST node. Contains the key, as well as its own * location (rel,page,offset) which can supply the matching pointer. - * The size of the key is in bytes, and leafkey is a flag to tell us - * if the entry is in a leaf node. + * leafkey is a flag to tell us if the entry is in a leaf node. */ typedef struct GISTENTRY { @@ -113,7 +105,6 @@ typedef struct GISTENTRY Relation rel; Page page; OffsetNumber offset; - int bytes; bool leafkey; } GISTENTRY; @@ -147,8 +138,8 @@ typedef struct /* * macro to initialize a GISTENTRY */ -#define gistentryinit(e, k, r, pg, o, b, l) \ +#define gistentryinit(e, k, r, pg, o, l) \ do { (e).key = (k); (e).rel = (r); (e).page = (pg); \ - (e).offset = (o); (e).bytes = (b); (e).leafkey = (l); } while (0) + (e).offset = (o); (e).leafkey = (l); } while (0) #endif /* GIST_H */ diff --git a/src/include/access/gist_private.h b/src/include/access/gist_private.h index f75682847a..7a55a31216 100644 --- a/src/include/access/gist_private.h +++ b/src/include/access/gist_private.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2006, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/access/gist_private.h,v 1.17 2006/05/29 12:50:06 teodor Exp $ + * $PostgreSQL: pgsql/src/include/access/gist_private.h,v 1.18 2006/06/28 12:00:14 teodor Exp $ * *------------------------------------------------------------------------- */ @@ -180,6 +180,21 @@ typedef struct GISTInsertStack struct GISTInsertStack *next; } GISTInsertStack; +typedef struct GistSplitVector { + GIST_SPLITVEC splitVector; /* to/from PickSplit method */ + + Datum spl_lattr[INDEX_MAX_KEYS]; /* Union of subkeys in spl_left */ + bool spl_lisnull[INDEX_MAX_KEYS]; + bool spl_leftvalid; + + Datum spl_rattr[INDEX_MAX_KEYS]; /* Union of subkeys in spl_right */ + bool spl_risnull[INDEX_MAX_KEYS]; + bool spl_rightvalid; + + bool *spl_equiv; /* equivalent tuples which can be freely + * distributed between left and right pages */ +} GistSplitVector; + #define XLogRecPtrIsInvalid( r ) ( (r).xlogid == 0 && (r).xrecoff == 0 ) typedef struct @@ -206,12 +221,6 @@ typedef struct /* root page of a gist index */ #define GIST_ROOT_BLKNO 0 -#define ATTSIZE(datum, tupdesc, i, isnull) \ - ( \ - (isnull) ? 0 : \ - att_addlength(0, (tupdesc)->attrs[(i)-1]->attlen, (datum)) \ - ) - /* * mark tuples on inner pages during recovery */ @@ -281,7 +290,7 @@ extern IndexTuple gistgetadjusted(Relation r, IndexTuple addtup, GISTSTATE *giststate); extern IndexTuple gistFormTuple(GISTSTATE *giststate, - Relation r, Datum *attdata, int *datumsize, bool *isnull); + Relation r, Datum *attdata, bool *isnull, bool newValues); extern OffsetNumber gistchoose(Relation r, Page p, IndexTuple it, @@ -289,34 +298,34 @@ extern OffsetNumber gistchoose(Relation r, Page p, extern void gistcentryinit(GISTSTATE *giststate, int nkey, GISTENTRY *e, Datum k, Relation r, Page pg, - OffsetNumber o, int b, bool l, bool isNull); -extern void gistDeCompressAtt(GISTSTATE *giststate, Relation r, - IndexTuple tuple, Page p, OffsetNumber o, - GISTENTRY *attdata, bool *isnull); - -typedef struct { - int *attrsize; - Datum *attr; - int len; - OffsetNumber *entries; - bool *isnull; - int *idgrp; -} GistSplitVec; - -extern void gistunionsubkeyvec(GISTSTATE *giststate, - IndexTuple *itvec, GistSplitVec *gsvp, int startkey); -extern void gistunionsubkey(GISTSTATE *giststate, IndexTuple *itvec, - GIST_SPLITVEC *spl, int attno); + OffsetNumber o, bool l, bool isNull); extern void GISTInitBuffer(Buffer b, uint32 f); extern void gistdentryinit(GISTSTATE *giststate, int nkey, GISTENTRY *e, Datum k, Relation r, Page pg, OffsetNumber o, - int b, bool l, bool isNull); -bool gistUserPicksplit(Relation r, GistEntryVector *entryvec, int attno, GIST_SPLITVEC *v, - IndexTuple *itup, int len, GISTSTATE *giststate); + bool l, bool isNull); + +extern float gistpenalty(GISTSTATE *giststate, int attno, + GISTENTRY *key1, bool isNull1, + GISTENTRY *key2, bool isNull2); +extern bool gistMakeUnionItVec(GISTSTATE *giststate, IndexTuple *itvec, int len, int startkey, + Datum *attr, bool *isnull ); +extern bool gistKeyIsEQ(GISTSTATE *giststate, int attno, Datum a, Datum b); +extern void gistDeCompressAtt(GISTSTATE *giststate, Relation r, IndexTuple tuple, Page p, + OffsetNumber o, GISTENTRY *attdata, bool *isnull); + +extern void gistMakeUnionKey( GISTSTATE *giststate, int attno, + GISTENTRY *entry1, bool isnull1, + GISTENTRY *entry2, bool isnull2, + Datum *dst, bool *dstisnull ); /* gistvacuum.c */ extern Datum gistbulkdelete(PG_FUNCTION_ARGS); extern Datum gistvacuumcleanup(PG_FUNCTION_ARGS); +/* gistsplit.c */ +extern void gistSplitByKey(Relation r, Page page, IndexTuple *itup, + int len, GISTSTATE *giststate, + GistSplitVector *v, GistEntryVector *entryvec, + int attno); #endif /* GIST_PRIVATE_H */ -- 2.40.0