From d4c6da152782b580b24cd8b4054eb1b7fb72c5a0 Mon Sep 17 00:00:00 2001 From: Teodor Sigaev Date: Wed, 31 Jan 2007 15:09:45 +0000 Subject: [PATCH] Allow GIN's extractQuery method to signal that nothing can satisfy the query. In this case extractQuery should returns -1 as nentries. This changes prototype of extractQuery method to use int32* instead of uint32* for nentries argument. Based on that gincostestimate may see two corner cases: nothing will be found or seqscan should be used. Per proposal at http://archives.postgresql.org/pgsql-hackers/2007-01/msg01581.php PS tsearch_core patch should be sightly modified to support changes, but I'm waiting a verdict about reviewing of tsearch_core patch. --- contrib/intarray/_int_gin.c | 15 +++- contrib/tsearch2/ginidx.c | 8 +- doc/src/sgml/gin.sgml | 12 ++- src/backend/access/gin/ginarrayproc.c | 19 ++++- src/backend/access/gin/ginbulk.c | 6 +- src/backend/access/gin/ginget.c | 13 ++- src/backend/access/gin/gininsert.c | 6 +- src/backend/access/gin/ginscan.c | 17 +++- src/backend/access/gin/ginutil.c | 6 +- src/backend/utils/adt/selfuncs.c | 116 +++++++++++++++++++++++++- src/include/access/gin.h | 10 ++- 11 files changed, 199 insertions(+), 29 deletions(-) diff --git a/contrib/intarray/_int_gin.c b/contrib/intarray/_int_gin.c index 7bb9599b33..2248428786 100644 --- a/contrib/intarray/_int_gin.c +++ b/contrib/intarray/_int_gin.c @@ -6,7 +6,7 @@ Datum ginint4_queryextract(PG_FUNCTION_ARGS); Datum ginint4_queryextract(PG_FUNCTION_ARGS) { - uint32 *nentries = (uint32 *) PG_GETARG_POINTER(1); + int32 *nentries = (int32 *) PG_GETARG_POINTER(1); StrategyNumber strategy = PG_GETARG_UINT16(2); Datum *res = NULL; @@ -57,6 +57,19 @@ ginint4_queryextract(PG_FUNCTION_ARGS) } } + if ( nentries == 0 ) + { + switch( strategy ) + { + case BooleanSearchStrategy: + case RTOverlapStrategyNumber: + *nentries = -1; /* nobody can be found */ + break; + default: /* require fullscan: GIN can't find void arrays */ + break; + } + } + PG_RETURN_POINTER(res); } diff --git a/contrib/tsearch2/ginidx.c b/contrib/tsearch2/ginidx.c index c1bb173e1c..01766923c1 100644 --- a/contrib/tsearch2/ginidx.c +++ b/contrib/tsearch2/ginidx.c @@ -21,7 +21,7 @@ Datum gin_extract_tsvector(PG_FUNCTION_ARGS) { tsvector *vector = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0)); - uint32 *nentries = (uint32 *) PG_GETARG_POINTER(1); + int32 *nentries = (int32 *) PG_GETARG_POINTER(1); Datum *entries = NULL; *nentries = 0; @@ -30,7 +30,7 @@ gin_extract_tsvector(PG_FUNCTION_ARGS) int i; WordEntry *we = ARRPTR(vector); - *nentries = (uint32) vector->size; + *nentries = (int32) vector->size; entries = (Datum *) palloc(sizeof(Datum) * vector->size); for (i = 0; i < vector->size; i++) @@ -58,7 +58,7 @@ Datum gin_extract_tsquery(PG_FUNCTION_ARGS) { QUERYTYPE *query = (QUERYTYPE *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0)); - uint32 *nentries = (uint32 *) PG_GETARG_POINTER(1); + int32 *nentries = (int32 *) PG_GETARG_POINTER(1); StrategyNumber strategy = DatumGetUInt16(PG_GETARG_DATUM(2)); Datum *entries = NULL; @@ -99,6 +99,8 @@ gin_extract_tsquery(PG_FUNCTION_ARGS) } } + else + *nentries = -1; /* nothing can be found */ PG_FREE_IF_COPY(query, 0); PG_RETURN_POINTER(entries); diff --git a/doc/src/sgml/gin.sgml b/doc/src/sgml/gin.sgml index c2bbad42bf..32919e5871 100644 --- a/doc/src/sgml/gin.sgml +++ b/doc/src/sgml/gin.sgml @@ -1,4 +1,4 @@ - + GIN Indexes @@ -77,7 +77,7 @@ - Datum* extractValue(Datum inputValue, uint32 *nkeys) + Datum* extractValue(Datum inputValue, int32 *nkeys) Returns an array of keys given a value to be indexed. The @@ -87,7 +87,7 @@ - Datum* extractQuery(Datum query, uint32 *nkeys, + Datum* extractQuery(Datum query, int32 *nkeys, StrategyNumber n) @@ -100,6 +100,12 @@ to consult n to determine the data type of query and the key values that need to be extracted. The number of returned keys must be stored into *nkeys. + If number of keys is equal to zero then extractQuery + should store 0 or -1 into *nkeys. 0 means that any + row matches the query and sequence scan should be + produced. -1 means nothing can satisfy query. + Choice of value should be based on semantics meaning of operation with + given strategy number. diff --git a/src/backend/access/gin/ginarrayproc.c b/src/backend/access/gin/ginarrayproc.c index fd505333bc..a46def63c5 100644 --- a/src/backend/access/gin/ginarrayproc.c +++ b/src/backend/access/gin/ginarrayproc.c @@ -8,7 +8,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/access/gin/ginarrayproc.c,v 1.8 2007/01/05 22:19:21 momjian Exp $ + * $PostgreSQL: pgsql/src/backend/access/gin/ginarrayproc.c,v 1.9 2007/01/31 15:09:45 teodor Exp $ *------------------------------------------------------------------------- */ #include "postgres.h" @@ -38,7 +38,7 @@ Datum ginarrayextract(PG_FUNCTION_ARGS) { ArrayType *array; - uint32 *nentries = (uint32 *) PG_GETARG_POINTER(1); + int32 *nentries = (int32 *) PG_GETARG_POINTER(1); Datum *entries = NULL; int16 elmlen; bool elmbyval; @@ -60,6 +60,21 @@ ginarrayextract(PG_FUNCTION_ARGS) elmlen, elmbyval, elmalign, &entries, NULL, (int *) nentries); + if ( *nentries == 0 && PG_NARGS() == 3 ) + { + switch( PG_GETARG_UINT16(2) ) + { + case GinOverlapStrategy: + *nentries = -1; /* nobody can be found */ + break; + case GinContainsStrategy: + case GinContainedStrategy: + case GinEqualStrategy: + default: /* require fullscan: GIN can't find void arrays */ + break; + } + } + /* we should not free array, entries[i] points into it */ PG_RETURN_POINTER(entries); } diff --git a/src/backend/access/gin/ginbulk.c b/src/backend/access/gin/ginbulk.c index f45d5b6395..d3b6f0bceb 100644 --- a/src/backend/access/gin/ginbulk.c +++ b/src/backend/access/gin/ginbulk.c @@ -8,7 +8,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/access/gin/ginbulk.c,v 1.7 2007/01/05 22:19:21 momjian Exp $ + * $PostgreSQL: pgsql/src/backend/access/gin/ginbulk.c,v 1.8 2007/01/31 15:09:45 teodor Exp $ *------------------------------------------------------------------------- */ @@ -191,13 +191,13 @@ ginChooseElem(BuildAccumulator *accum, ItemPointer heapptr, Datum *entries, uint * next middle on left part and middle of right part. */ void -ginInsertRecordBA(BuildAccumulator *accum, ItemPointer heapptr, Datum *entries, uint32 nentry) +ginInsertRecordBA(BuildAccumulator *accum, ItemPointer heapptr, Datum *entries, int32 nentry) { uint32 i, nbit = 0, offset; - if (nentry == 0) + if (nentry <= 0) return; i = nentry - 1; diff --git a/src/backend/access/gin/ginget.c b/src/backend/access/gin/ginget.c index 11f034bb21..2366f3f727 100644 --- a/src/backend/access/gin/ginget.c +++ b/src/backend/access/gin/ginget.c @@ -8,7 +8,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/access/gin/ginget.c,v 1.5 2007/01/05 22:19:21 momjian Exp $ + * $PostgreSQL: pgsql/src/backend/access/gin/ginget.c,v 1.6 2007/01/31 15:09:45 teodor Exp $ *------------------------------------------------------------------------- */ @@ -420,6 +420,7 @@ scanGetItem(IndexScanDesc scan, ItemPointerData *item) } #define GinIsNewKey(s) ( ((GinScanOpaque) scan->opaque)->keys == NULL ) +#define GinIsVoidRes(s) ( ((GinScanOpaque) scan->opaque)->isVoidRes == true ) Datum gingetmulti(PG_FUNCTION_ARGS) @@ -432,10 +433,13 @@ gingetmulti(PG_FUNCTION_ARGS) if (GinIsNewKey(scan)) newScanKey(scan); - startScan(scan); - *returned_tids = 0; + if (GinIsVoidRes(scan)) + PG_RETURN_BOOL(false); + + startScan(scan); + do { if (scanGetItem(scan, tids + *returned_tids)) @@ -462,6 +466,9 @@ gingettuple(PG_FUNCTION_ARGS) if (GinIsNewKey(scan)) newScanKey(scan); + if (GinIsVoidRes(scan)) + PG_RETURN_BOOL(false); + startScan(scan); res = scanGetItem(scan, &scan->xs_ctup.t_self); stopScan(scan); diff --git a/src/backend/access/gin/gininsert.c b/src/backend/access/gin/gininsert.c index a7ded0c47f..3a17233ba8 100644 --- a/src/backend/access/gin/gininsert.c +++ b/src/backend/access/gin/gininsert.c @@ -8,7 +8,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/access/gin/gininsert.c,v 1.6 2007/01/05 22:19:21 momjian Exp $ + * $PostgreSQL: pgsql/src/backend/access/gin/gininsert.c,v 1.7 2007/01/31 15:09:45 teodor Exp $ *------------------------------------------------------------------------- */ @@ -203,7 +203,7 @@ static uint32 ginHeapTupleBulkInsert(GinBuildState *buildstate, Datum value, ItemPointer heapptr) { Datum *entries; - uint32 nentries; + int32 nentries; MemoryContext oldCtx; oldCtx = MemoryContextSwitchTo(buildstate->funcCtx); @@ -356,7 +356,7 @@ static uint32 ginHeapTupleInsert(Relation index, GinState *ginstate, Datum value, ItemPointer item) { Datum *entries; - uint32 i, + int32 i, nentries; entries = extractEntriesSU(ginstate, value, &nentries); diff --git a/src/backend/access/gin/ginscan.c b/src/backend/access/gin/ginscan.c index bc8c5595f9..22896bc5d7 100644 --- a/src/backend/access/gin/ginscan.c +++ b/src/backend/access/gin/ginscan.c @@ -8,7 +8,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/access/gin/ginscan.c,v 1.8 2007/01/05 22:19:21 momjian Exp $ + * $PostgreSQL: pgsql/src/backend/access/gin/ginscan.c,v 1.9 2007/01/31 15:09:45 teodor Exp $ *------------------------------------------------------------------------- */ @@ -145,10 +145,12 @@ newScanKey(IndexScanDesc scan) (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("GIN indexes do not support whole-index scans"))); + so->isVoidRes = false; + for (i = 0; i < scan->numberOfKeys; i++) { Datum *entryValues; - uint32 nEntryValues; + int32 nEntryValues; if (scankey[i].sk_flags & SK_ISNULL) elog(ERROR, "Gin doesn't support NULL as scan key"); @@ -162,6 +164,15 @@ newScanKey(IndexScanDesc scan) UInt16GetDatum(scankey[i].sk_strategy) ) ); + if ( nEntryValues < 0 ) + { + /* + * extractQueryFn signals that nothing will be found, + * so we can just set isVoidRes flag... + */ + so->isVoidRes = true; + break; + } if (entryValues == NULL || nEntryValues == 0) /* full scan... */ continue; @@ -173,7 +184,7 @@ newScanKey(IndexScanDesc scan) so->nkeys = nkeys; - if (so->nkeys == 0) + if (so->nkeys == 0 && !so->isVoidRes) ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("GIN index does not support search with void query"))); diff --git a/src/backend/access/gin/ginutil.c b/src/backend/access/gin/ginutil.c index a14900e56c..e704e8051e 100644 --- a/src/backend/access/gin/ginutil.c +++ b/src/backend/access/gin/ginutil.c @@ -8,7 +8,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/access/gin/ginutil.c,v 1.9 2007/01/05 22:19:21 momjian Exp $ + * $PostgreSQL: pgsql/src/backend/access/gin/ginutil.c,v 1.10 2007/01/31 15:09:45 teodor Exp $ *------------------------------------------------------------------------- */ @@ -148,7 +148,7 @@ cmpEntries(const Datum *a, const Datum *b, cmpEntriesData *arg) } Datum * -extractEntriesS(GinState *ginstate, Datum value, uint32 *nentries, +extractEntriesS(GinState *ginstate, Datum value, int32 *nentries, bool *needUnique) { Datum *entries; @@ -178,7 +178,7 @@ extractEntriesS(GinState *ginstate, Datum value, uint32 *nentries, Datum * -extractEntriesSU(GinState *ginstate, Datum value, uint32 *nentries) +extractEntriesSU(GinState *ginstate, Datum value, int32 *nentries) { bool needUnique; Datum *entries = extractEntriesS(ginstate, value, nentries, diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c index 0780e52a89..e035557ff2 100644 --- a/src/backend/utils/adt/selfuncs.c +++ b/src/backend/utils/adt/selfuncs.c @@ -15,7 +15,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/utils/adt/selfuncs.c,v 1.223 2007/01/28 02:53:34 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/utils/adt/selfuncs.c,v 1.224 2007/01/31 15:09:45 teodor Exp $ * *------------------------------------------------------------------------- */ @@ -76,6 +76,7 @@ #include #include +#include "access/gin.h" #include "catalog/pg_opfamily.h" #include "catalog/pg_statistic.h" #include "catalog/pg_type.h" @@ -5286,7 +5287,120 @@ gincostestimate(PG_FUNCTION_ARGS) Cost *indexTotalCost = (Cost *) PG_GETARG_POINTER(5); Selectivity *indexSelectivity = (Selectivity *) PG_GETARG_POINTER(6); double *indexCorrelation = (double *) PG_GETARG_POINTER(7); + ListCell *l; + int32 nfullscan = 0; + + /* + * GIN doesn't support full index scan. + * If quals require full index scan then we should + * return cost big as possible to forbid full index scan. + */ + + foreach(l, indexQuals) + { + RestrictInfo *rinfo = (RestrictInfo *) lfirst(l); + Expr *clause; + Node *leftop, + *rightop, + *operand; + Oid extractProcOid; + Oid clause_op; + int strategy_op; + Oid lefttype, + righttype; + bool recheck; + int32 nentries; + + /* + * For each clause it's needed to check operand + * for values to search in GIN. So, we should find + * extractQuery method to get values from operand + */ + + Assert(IsA(rinfo, RestrictInfo)); + clause = rinfo->clause; + Assert( IsA(clause, OpExpr) ); + leftop = get_leftop(clause); + rightop = get_rightop(clause); + clause_op = ((OpExpr *) clause)->opno; + + if (match_index_to_operand(leftop, 0 /* GiN has only one column */, index)) + { + operand = rightop; + } + else if (match_index_to_operand(rightop, 0, index)) + { + operand = leftop; + clause_op = get_commutator(clause_op); + } + + if ( IsA(operand, RelabelType) ) + operand = (Node *) ((RelabelType *) operand)->arg; + + /* + * It's impossible to call extractQuery method for not yet + * known operand (taken from table, for example). In this + * case we can't do anything useful... + */ + if ( !IsA(operand, Const) ) + continue; + + if (!op_in_opfamily(clause_op, index->opfamily[0])) + continue; + + /* + * lefttype is a type of index column, righttype is a + * type of operand (query) + */ + get_op_opfamily_properties( clause_op, index->opfamily[0], + &strategy_op, &lefttype, &righttype, &recheck); + + /* + * GIN (as GiST) always has lefttype == righttype in pg_amproc + * and they are equal to type Oid on which index was created/designed + */ + extractProcOid = get_opfamily_proc( index->opfamily[0], + lefttype, lefttype, + GIN_EXTRACTQUERY_PROC ); + + if ( !OidIsValid(extractProcOid) ) + continue; /* should not be */ + + OidFunctionCall3( extractProcOid, + ((Const*)operand)->constvalue, + PointerGetDatum(&nentries), + UInt16GetDatum(strategy_op)); + + if ( nentries == 0 ) + nfullscan++; + else if ( nentries < 0 ) + { + /* + * GIN_EXTRACTQUERY_PROC guarantees that nothing will be found + */ + *indexStartupCost = 0; + *indexTotalCost = 0; + *indexSelectivity = 0; + *indexCorrelation = 0; + PG_RETURN_VOID(); + } + } + + if ( nfullscan == list_length(indexQuals) ) + { + /* + * All quals are void and require full scan. So + * set max possible cost to prevent index scan. + */ + *indexStartupCost = disable_cost; + *indexTotalCost = disable_cost; + *indexSelectivity = 1.0; + *indexCorrelation = 0; + + PG_RETURN_VOID(); + } + genericcostestimate(root, index, indexQuals, outer_rel, 0.0, indexStartupCost, indexTotalCost, indexSelectivity, indexCorrelation); diff --git a/src/include/access/gin.h b/src/include/access/gin.h index c37b367278..80f2374996 100644 --- a/src/include/access/gin.h +++ b/src/include/access/gin.h @@ -3,7 +3,7 @@ * header file for postgres inverted index access method implementation. * * Copyright (c) 2006, PostgreSQL Global Development Group - * $PostgreSQL: pgsql/src/include/access/gin.h,v 1.9 2006/10/05 17:57:40 tgl Exp $ + * $PostgreSQL: pgsql/src/include/access/gin.h,v 1.10 2007/01/31 15:09:45 teodor Exp $ *-------------------------------------------------------------------------- */ @@ -233,8 +233,8 @@ extern void GinInitBuffer(Buffer b, uint32 f); extern void GinInitPage(Page page, uint32 f, Size pageSize); extern int compareEntries(GinState *ginstate, Datum a, Datum b); extern Datum *extractEntriesS(GinState *ginstate, Datum value, - uint32 *nentries, bool *needUnique); -extern Datum *extractEntriesSU(GinState *ginstate, Datum value, uint32 *nentries); + int32 *nentries, bool *needUnique); +extern Datum *extractEntriesSU(GinState *ginstate, Datum value, int32 *nentries); extern Page GinPageGetCopyPage(Page page); /* gininsert.c */ @@ -399,6 +399,8 @@ typedef struct GinScanOpaqueData GinScanKey keys; uint32 nkeys; + bool isVoidRes; /* true if ginstate.extractQueryFn + guarantees that nothing will be found */ GinScanKey markPos; } GinScanOpaqueData; @@ -458,7 +460,7 @@ typedef struct extern void ginInitBA(BuildAccumulator *accum); extern void ginInsertRecordBA(BuildAccumulator *accum, - ItemPointer heapptr, Datum *entries, uint32 nentry); + ItemPointer heapptr, Datum *entries, int32 nentry); extern ItemPointerData *ginGetEntry(BuildAccumulator *accum, Datum *entry, uint32 *n); #endif -- 2.40.0