From: Teodor Sigaev <teodor@sigaev.ru>
Date: Wed, 31 Jan 2007 15:09:45 +0000 (+0000)
Subject: Allow GIN's extractQuery method to signal that nothing can satisfy the query.
X-Git-Tag: REL8_3_BETA1~1375
X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=d4c6da152782b580b24cd8b4054eb1b7fb72c5a0;p=postgresql

Allow GIN's extractQuery method to signal that nothing can satisfy the query.
In this case extractQuery should returns -1 as nentries. This changes
prototype of extractQuery method to use int32* instead of uint32* for
nentries argument.
Based on that gincostestimate may see two corner cases: nothing will be found
or seqscan should be used.

Per proposal at http://archives.postgresql.org/pgsql-hackers/2007-01/msg01581.php

PS tsearch_core patch should be sightly modified to support changes, but I'm
waiting a verdict about reviewing of tsearch_core patch.
---

diff --git a/contrib/intarray/_int_gin.c b/contrib/intarray/_int_gin.c
index 7bb9599b33..2248428786 100644
--- a/contrib/intarray/_int_gin.c
+++ b/contrib/intarray/_int_gin.c
@@ -6,7 +6,7 @@ Datum		ginint4_queryextract(PG_FUNCTION_ARGS);
 Datum
 ginint4_queryextract(PG_FUNCTION_ARGS)
 {
-	uint32	   *nentries = (uint32 *) PG_GETARG_POINTER(1);
+	int32	   *nentries = (int32 *) PG_GETARG_POINTER(1);
 	StrategyNumber strategy = PG_GETARG_UINT16(2);
 	Datum	   *res = NULL;
 
@@ -57,6 +57,19 @@ ginint4_queryextract(PG_FUNCTION_ARGS)
 		}
 	}
 
+	if ( nentries == 0 )
+	{
+		switch( strategy )
+		{
+			case BooleanSearchStrategy:
+			case RTOverlapStrategyNumber:
+					*nentries = -1; /* nobody can be found */
+					break;
+			default:   /* require fullscan: GIN can't find void arrays */
+			break;
+		}
+	}
+
 	PG_RETURN_POINTER(res);
 }
 
diff --git a/contrib/tsearch2/ginidx.c b/contrib/tsearch2/ginidx.c
index c1bb173e1c..01766923c1 100644
--- a/contrib/tsearch2/ginidx.c
+++ b/contrib/tsearch2/ginidx.c
@@ -21,7 +21,7 @@ Datum
 gin_extract_tsvector(PG_FUNCTION_ARGS)
 {
 	tsvector   *vector = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
-	uint32	   *nentries = (uint32 *) PG_GETARG_POINTER(1);
+	int32	   *nentries = (int32 *) PG_GETARG_POINTER(1);
 	Datum	   *entries = NULL;
 
 	*nentries = 0;
@@ -30,7 +30,7 @@ gin_extract_tsvector(PG_FUNCTION_ARGS)
 		int			i;
 		WordEntry  *we = ARRPTR(vector);
 
-		*nentries = (uint32) vector->size;
+		*nentries = (int32) vector->size;
 		entries = (Datum *) palloc(sizeof(Datum) * vector->size);
 
 		for (i = 0; i < vector->size; i++)
@@ -58,7 +58,7 @@ Datum
 gin_extract_tsquery(PG_FUNCTION_ARGS)
 {
 	QUERYTYPE  *query = (QUERYTYPE *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
-	uint32	   *nentries = (uint32 *) PG_GETARG_POINTER(1);
+	int32	   *nentries = (int32 *) PG_GETARG_POINTER(1);
 	StrategyNumber strategy = DatumGetUInt16(PG_GETARG_DATUM(2));
 	Datum	   *entries = NULL;
 
@@ -99,6 +99,8 @@ gin_extract_tsquery(PG_FUNCTION_ARGS)
 			}
 
 	}
+	else
+		*nentries = -1; /* nothing can be found */
 
 	PG_FREE_IF_COPY(query, 0);
 	PG_RETURN_POINTER(entries);
diff --git a/doc/src/sgml/gin.sgml b/doc/src/sgml/gin.sgml
index c2bbad42bf..32919e5871 100644
--- a/doc/src/sgml/gin.sgml
+++ b/doc/src/sgml/gin.sgml
@@ -1,4 +1,4 @@
-<!-- $PostgreSQL: pgsql/doc/src/sgml/gin.sgml,v 2.7 2006/12/01 23:46:46 tgl Exp $ -->
+<!-- $PostgreSQL: pgsql/doc/src/sgml/gin.sgml,v 2.8 2007/01/31 15:09:45 teodor Exp $ -->
 
 <chapter id="GIN">
 <title>GIN Indexes</title>
@@ -77,7 +77,7 @@
     </varlistentry>
 
     <varlistentry>
-     <term>Datum* extractValue(Datum inputValue, uint32 *nkeys)</term>
+     <term>Datum* extractValue(Datum inputValue, int32 *nkeys)</term>
      <listitem>
       <para>
        Returns an array of keys given a value to be indexed.  The
@@ -87,7 +87,7 @@
     </varlistentry>
 
     <varlistentry>
-     <term>Datum* extractQuery(Datum query, uint32 *nkeys,
+     <term>Datum* extractQuery(Datum query, int32 *nkeys,
         StrategyNumber n)</term>
      <listitem>
       <para>
@@ -100,6 +100,12 @@
        to consult <literal>n</> to determine the data type of
        <literal>query</> and the key values that need to be extracted.
        The number of returned keys must be stored into <literal>*nkeys</>.
+	   If number of keys is equal to zero then <function>extractQuery</> 
+	   should store 0 or -1 into <literal>*nkeys</>. 0 means that any 
+	   row matches the <literal>query</> and sequence scan should be 
+	   produced. -1 means nothing can satisfy <literal>query</>. 
+	   Choice of value should be based on semantics meaning of operation with 
+	   given strategy number.
       </para>
      </listitem>
     </varlistentry>
diff --git a/src/backend/access/gin/ginarrayproc.c b/src/backend/access/gin/ginarrayproc.c
index fd505333bc..a46def63c5 100644
--- a/src/backend/access/gin/ginarrayproc.c
+++ b/src/backend/access/gin/ginarrayproc.c
@@ -8,7 +8,7 @@
  * Portions Copyright (c) 1994, Regents of the University of California
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/access/gin/ginarrayproc.c,v 1.8 2007/01/05 22:19:21 momjian Exp $
+ *	  $PostgreSQL: pgsql/src/backend/access/gin/ginarrayproc.c,v 1.9 2007/01/31 15:09:45 teodor Exp $
  *-------------------------------------------------------------------------
  */
 #include "postgres.h"
@@ -38,7 +38,7 @@ Datum
 ginarrayextract(PG_FUNCTION_ARGS)
 {
 	ArrayType  *array;
-	uint32	   *nentries = (uint32 *) PG_GETARG_POINTER(1);
+	int32	   *nentries = (int32 *) PG_GETARG_POINTER(1);
 	Datum	   *entries = NULL;
 	int16		elmlen;
 	bool		elmbyval;
@@ -60,6 +60,21 @@ ginarrayextract(PG_FUNCTION_ARGS)
 					  elmlen, elmbyval, elmalign,
 					  &entries, NULL, (int *) nentries);
 
+	if ( *nentries == 0 && PG_NARGS() == 3 )
+	{
+		switch( PG_GETARG_UINT16(2) )
+		{
+			case GinOverlapStrategy:
+					*nentries = -1; /* nobody can be found */
+					break;
+			case GinContainsStrategy:
+			case GinContainedStrategy:
+			case GinEqualStrategy:
+			default:	/* require fullscan: GIN can't find void arrays */
+				break;
+		}
+	}
+
 	/* we should not free array, entries[i] points into it */
 	PG_RETURN_POINTER(entries);
 }
diff --git a/src/backend/access/gin/ginbulk.c b/src/backend/access/gin/ginbulk.c
index f45d5b6395..d3b6f0bceb 100644
--- a/src/backend/access/gin/ginbulk.c
+++ b/src/backend/access/gin/ginbulk.c
@@ -8,7 +8,7 @@
  * Portions Copyright (c) 1994, Regents of the University of California
  *
  * IDENTIFICATION
- *			$PostgreSQL: pgsql/src/backend/access/gin/ginbulk.c,v 1.7 2007/01/05 22:19:21 momjian Exp $
+ *			$PostgreSQL: pgsql/src/backend/access/gin/ginbulk.c,v 1.8 2007/01/31 15:09:45 teodor Exp $
  *-------------------------------------------------------------------------
  */
 
@@ -191,13 +191,13 @@ ginChooseElem(BuildAccumulator *accum, ItemPointer heapptr, Datum *entries, uint
  * next middle on left part and middle of right part.
  */
 void
-ginInsertRecordBA(BuildAccumulator *accum, ItemPointer heapptr, Datum *entries, uint32 nentry)
+ginInsertRecordBA(BuildAccumulator *accum, ItemPointer heapptr, Datum *entries, int32 nentry)
 {
 	uint32		i,
 				nbit = 0,
 				offset;
 
-	if (nentry == 0)
+	if (nentry <= 0)
 		return;
 
 	i = nentry - 1;
diff --git a/src/backend/access/gin/ginget.c b/src/backend/access/gin/ginget.c
index 11f034bb21..2366f3f727 100644
--- a/src/backend/access/gin/ginget.c
+++ b/src/backend/access/gin/ginget.c
@@ -8,7 +8,7 @@
  * Portions Copyright (c) 1994, Regents of the University of California
  *
  * IDENTIFICATION
- *			$PostgreSQL: pgsql/src/backend/access/gin/ginget.c,v 1.5 2007/01/05 22:19:21 momjian Exp $
+ *			$PostgreSQL: pgsql/src/backend/access/gin/ginget.c,v 1.6 2007/01/31 15:09:45 teodor Exp $
  *-------------------------------------------------------------------------
  */
 
@@ -420,6 +420,7 @@ scanGetItem(IndexScanDesc scan, ItemPointerData *item)
 }
 
 #define GinIsNewKey(s)		( ((GinScanOpaque) scan->opaque)->keys == NULL )
+#define GinIsVoidRes(s)		( ((GinScanOpaque) scan->opaque)->isVoidRes == true )
 
 Datum
 gingetmulti(PG_FUNCTION_ARGS)
@@ -432,10 +433,13 @@ gingetmulti(PG_FUNCTION_ARGS)
 	if (GinIsNewKey(scan))
 		newScanKey(scan);
 
-	startScan(scan);
-
 	*returned_tids = 0;
 
+	if (GinIsVoidRes(scan))
+		PG_RETURN_BOOL(false);
+
+	startScan(scan);
+
 	do
 	{
 		if (scanGetItem(scan, tids + *returned_tids))
@@ -462,6 +466,9 @@ gingettuple(PG_FUNCTION_ARGS)
 	if (GinIsNewKey(scan))
 		newScanKey(scan);
 
+	if (GinIsVoidRes(scan))
+		PG_RETURN_BOOL(false);
+
 	startScan(scan);
 	res = scanGetItem(scan, &scan->xs_ctup.t_self);
 	stopScan(scan);
diff --git a/src/backend/access/gin/gininsert.c b/src/backend/access/gin/gininsert.c
index a7ded0c47f..3a17233ba8 100644
--- a/src/backend/access/gin/gininsert.c
+++ b/src/backend/access/gin/gininsert.c
@@ -8,7 +8,7 @@
  * Portions Copyright (c) 1994, Regents of the University of California
  *
  * IDENTIFICATION
- *			$PostgreSQL: pgsql/src/backend/access/gin/gininsert.c,v 1.6 2007/01/05 22:19:21 momjian Exp $
+ *			$PostgreSQL: pgsql/src/backend/access/gin/gininsert.c,v 1.7 2007/01/31 15:09:45 teodor Exp $
  *-------------------------------------------------------------------------
  */
 
@@ -203,7 +203,7 @@ static uint32
 ginHeapTupleBulkInsert(GinBuildState *buildstate, Datum value, ItemPointer heapptr)
 {
 	Datum	   *entries;
-	uint32		nentries;
+	int32		nentries;
 	MemoryContext oldCtx;
 
 	oldCtx = MemoryContextSwitchTo(buildstate->funcCtx);
@@ -356,7 +356,7 @@ static uint32
 ginHeapTupleInsert(Relation index, GinState *ginstate, Datum value, ItemPointer item)
 {
 	Datum	   *entries;
-	uint32		i,
+	int32		i,
 				nentries;
 
 	entries = extractEntriesSU(ginstate, value, &nentries);
diff --git a/src/backend/access/gin/ginscan.c b/src/backend/access/gin/ginscan.c
index bc8c5595f9..22896bc5d7 100644
--- a/src/backend/access/gin/ginscan.c
+++ b/src/backend/access/gin/ginscan.c
@@ -8,7 +8,7 @@
  * Portions Copyright (c) 1994, Regents of the University of California
  *
  * IDENTIFICATION
- *			$PostgreSQL: pgsql/src/backend/access/gin/ginscan.c,v 1.8 2007/01/05 22:19:21 momjian Exp $
+ *			$PostgreSQL: pgsql/src/backend/access/gin/ginscan.c,v 1.9 2007/01/31 15:09:45 teodor Exp $
  *-------------------------------------------------------------------------
  */
 
@@ -145,10 +145,12 @@ newScanKey(IndexScanDesc scan)
 				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
 				 errmsg("GIN indexes do not support whole-index scans")));
 
+	so->isVoidRes = false;
+
 	for (i = 0; i < scan->numberOfKeys; i++)
 	{
 		Datum	   *entryValues;
-		uint32		nEntryValues;
+		int32		nEntryValues;
 
 		if (scankey[i].sk_flags & SK_ISNULL)
 			elog(ERROR, "Gin doesn't support NULL as scan key");
@@ -162,6 +164,15 @@ newScanKey(IndexScanDesc scan)
 									   UInt16GetDatum(scankey[i].sk_strategy)
 															  )
 			);
+		if ( nEntryValues < 0 )
+		{
+			/*
+			 * extractQueryFn signals that nothing will be found,
+			 * so we can just set isVoidRes flag...
+			 */
+			so->isVoidRes = true;	
+			break;
+		}
 		if (entryValues == NULL || nEntryValues == 0)
 			/* full scan... */
 			continue;
@@ -173,7 +184,7 @@ newScanKey(IndexScanDesc scan)
 
 	so->nkeys = nkeys;
 
-	if (so->nkeys == 0)
+	if (so->nkeys == 0 && !so->isVoidRes)
 		ereport(ERROR,
 				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
 				 errmsg("GIN index does not support search with void query")));
diff --git a/src/backend/access/gin/ginutil.c b/src/backend/access/gin/ginutil.c
index a14900e56c..e704e8051e 100644
--- a/src/backend/access/gin/ginutil.c
+++ b/src/backend/access/gin/ginutil.c
@@ -8,7 +8,7 @@
  * Portions Copyright (c) 1994, Regents of the University of California
  *
  * IDENTIFICATION
- *			$PostgreSQL: pgsql/src/backend/access/gin/ginutil.c,v 1.9 2007/01/05 22:19:21 momjian Exp $
+ *			$PostgreSQL: pgsql/src/backend/access/gin/ginutil.c,v 1.10 2007/01/31 15:09:45 teodor Exp $
  *-------------------------------------------------------------------------
  */
 
@@ -148,7 +148,7 @@ cmpEntries(const Datum *a, const Datum *b, cmpEntriesData *arg)
 }
 
 Datum *
-extractEntriesS(GinState *ginstate, Datum value, uint32 *nentries,
+extractEntriesS(GinState *ginstate, Datum value, int32 *nentries,
 				bool *needUnique)
 {
 	Datum	   *entries;
@@ -178,7 +178,7 @@ extractEntriesS(GinState *ginstate, Datum value, uint32 *nentries,
 
 
 Datum *
-extractEntriesSU(GinState *ginstate, Datum value, uint32 *nentries)
+extractEntriesSU(GinState *ginstate, Datum value, int32 *nentries)
 {
 	bool		needUnique;
 	Datum	   *entries = extractEntriesS(ginstate, value, nentries,
diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c
index 0780e52a89..e035557ff2 100644
--- a/src/backend/utils/adt/selfuncs.c
+++ b/src/backend/utils/adt/selfuncs.c
@@ -15,7 +15,7 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/utils/adt/selfuncs.c,v 1.223 2007/01/28 02:53:34 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/utils/adt/selfuncs.c,v 1.224 2007/01/31 15:09:45 teodor Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -76,6 +76,7 @@
 #include <ctype.h>
 #include <math.h>
 
+#include "access/gin.h"
 #include "catalog/pg_opfamily.h"
 #include "catalog/pg_statistic.h"
 #include "catalog/pg_type.h"
@@ -5286,7 +5287,120 @@ gincostestimate(PG_FUNCTION_ARGS)
 	Cost	   *indexTotalCost = (Cost *) PG_GETARG_POINTER(5);
 	Selectivity *indexSelectivity = (Selectivity *) PG_GETARG_POINTER(6);
 	double	   *indexCorrelation = (double *) PG_GETARG_POINTER(7);
+	ListCell   *l;
+	int32		nfullscan = 0;
+
+	/*
+	 * GIN doesn't support full index scan.
+	 * If quals require full index scan then we should
+	 * return cost big as possible to forbid full index scan.
+	 */
+
+	foreach(l, indexQuals)
+	{
+		RestrictInfo *rinfo = (RestrictInfo *) lfirst(l);
+		Expr	   *clause;
+		Node	   *leftop,
+				   *rightop,
+				   *operand;
+		Oid			extractProcOid;
+		Oid			clause_op;
+		int			strategy_op;
+		Oid		   	lefttype, 
+				   	righttype;
+		bool	    recheck;
+		int32		nentries;
+
+		/*
+		 * For each clause it's needed to check operand
+		 * for values to search in GIN. So, we should find 
+		 * extractQuery method to get values from operand
+		 */
+
+		Assert(IsA(rinfo, RestrictInfo));
+		clause = rinfo->clause;
+		Assert( IsA(clause, OpExpr) );
+		leftop = get_leftop(clause);
+		rightop = get_rightop(clause);
+		clause_op = ((OpExpr *) clause)->opno;
+
+		if (match_index_to_operand(leftop, 0 /* GiN has only one column */, index))
+		{
+			operand = rightop;
+		}
+		else if (match_index_to_operand(rightop, 0, index))
+		{
+			operand = leftop;
+			clause_op = get_commutator(clause_op);
+		}
+
+		if ( IsA(operand, RelabelType) )
+			operand = (Node *) ((RelabelType *) operand)->arg;
+
+		/*
+		 * It's impossible to call extractQuery method for not yet
+		 * known operand (taken from table, for example). In this
+		 * case we can't do anything useful...
+		 */
+		if ( !IsA(operand, Const) )
+			continue;
+
+		if (!op_in_opfamily(clause_op, index->opfamily[0]))
+			continue;
+
+		/*
+		 * lefttype is a type of index column, righttype is a
+		 * type of operand (query)
+		 */
+		get_op_opfamily_properties(	clause_op, index->opfamily[0],
+									&strategy_op, &lefttype, &righttype, &recheck);
+
+		/* 
+		 * GIN (as GiST) always has lefttype == righttype in pg_amproc
+		 * and they are equal to type Oid on which index was created/designed
+		 */
+		extractProcOid = get_opfamily_proc( index->opfamily[0],
+											lefttype, lefttype, 
+											GIN_EXTRACTQUERY_PROC );
+
+		if ( !OidIsValid(extractProcOid) )
+			continue; /* should not be */
+
+		OidFunctionCall3( extractProcOid,
+							((Const*)operand)->constvalue,
+							PointerGetDatum(&nentries),
+							UInt16GetDatum(strategy_op));
+
+		if ( nentries == 0 )
+			nfullscan++;
+		else if ( nentries < 0 )
+		{
+			/*
+			 * GIN_EXTRACTQUERY_PROC guarantees that nothing will be found
+			 */
+			*indexStartupCost = 0;
+			*indexTotalCost = 0;
+			*indexSelectivity = 0;
+			*indexCorrelation = 0;
+			PG_RETURN_VOID();
+		}
+	}
+
+	if ( nfullscan == list_length(indexQuals) )
+	{
+		/*
+		 * All quals are void and require full scan. So
+		 * set max possible cost to prevent index scan.
+		 */
+		*indexStartupCost = disable_cost;
+		*indexTotalCost = disable_cost;
+		*indexSelectivity = 1.0;
+		*indexCorrelation = 0;
+
+		PG_RETURN_VOID();
+	}
 
+	
 	genericcostestimate(root, index, indexQuals, outer_rel, 0.0,
 						indexStartupCost, indexTotalCost,
 						indexSelectivity, indexCorrelation);
diff --git a/src/include/access/gin.h b/src/include/access/gin.h
index c37b367278..80f2374996 100644
--- a/src/include/access/gin.h
+++ b/src/include/access/gin.h
@@ -3,7 +3,7 @@
  *	  header file for postgres inverted index access method implementation.
  *
  *	Copyright (c) 2006, PostgreSQL Global Development Group
- *	$PostgreSQL: pgsql/src/include/access/gin.h,v 1.9 2006/10/05 17:57:40 tgl Exp $
+ *	$PostgreSQL: pgsql/src/include/access/gin.h,v 1.10 2007/01/31 15:09:45 teodor Exp $
  *--------------------------------------------------------------------------
  */
 
@@ -233,8 +233,8 @@ extern void GinInitBuffer(Buffer b, uint32 f);
 extern void GinInitPage(Page page, uint32 f, Size pageSize);
 extern int	compareEntries(GinState *ginstate, Datum a, Datum b);
 extern Datum *extractEntriesS(GinState *ginstate, Datum value,
-							  uint32 *nentries, bool *needUnique);
-extern Datum *extractEntriesSU(GinState *ginstate, Datum value, uint32 *nentries);
+							  int32 *nentries, bool *needUnique);
+extern Datum *extractEntriesSU(GinState *ginstate, Datum value, int32 *nentries);
 extern Page GinPageGetCopyPage(Page page);
 
 /* gininsert.c */
@@ -399,6 +399,8 @@ typedef struct GinScanOpaqueData
 
 	GinScanKey	keys;
 	uint32		nkeys;
+	bool		isVoidRes; /* true if ginstate.extractQueryFn 
+							  guarantees that nothing will be found */
 
 	GinScanKey	markPos;
 } GinScanOpaqueData;
@@ -458,7 +460,7 @@ typedef struct
 
 extern void ginInitBA(BuildAccumulator *accum);
 extern void ginInsertRecordBA(BuildAccumulator *accum,
-				  ItemPointer heapptr, Datum *entries, uint32 nentry);
+				  ItemPointer heapptr, Datum *entries, int32 nentry);
 extern ItemPointerData *ginGetEntry(BuildAccumulator *accum, Datum *entry, uint32 *n);
 
 #endif