PG_RETURN_VOID();
}
+
+/*
+ * Support routines for gincostestimate
+ */
+
+typedef struct
+{
+ bool haveFullScan;
+ double partialEntries;
+ double exactEntries;
+ double searchEntries;
+ double arrayScans;
+} GinQualCounts;
+
/* Find the index column matching "op"; return its index, or -1 if no match */
static int
find_index_column(Node *op, IndexOptInfo *index)
return -1;
}
+/*
+ * Estimate the number of index terms that need to be searched for while
+ * testing the given GIN query, and increment the counts in *counts
+ * appropriately. If the query is unsatisfiable, return false.
+ */
+static bool
+gincost_pattern(IndexOptInfo *index, int indexcol,
+ Oid clause_op, Datum query,
+ GinQualCounts *counts)
+{
+ Oid extractProcOid;
+ int strategy_op;
+ Oid lefttype,
+ righttype;
+ int32 nentries = 0;
+ bool *partial_matches = NULL;
+ Pointer *extra_data = NULL;
+ bool *nullFlags = NULL;
+ int32 searchMode = GIN_SEARCH_MODE_DEFAULT;
+ int32 i;
+
+ /*
+ * Get the operator's strategy number and declared input data types
+ * within the index opfamily. (We don't need the latter, but we use
+ * get_op_opfamily_properties because it will throw error if it fails
+ * to find a matching pg_amop entry.)
+ */
+ get_op_opfamily_properties(clause_op, index->opfamily[indexcol], false,
+ &strategy_op, &lefttype, &righttype);
+
+ /*
+ * GIN always uses the "default" support functions, which are those
+ * with lefttype == righttype == the opclass' opcintype (see
+ * IndexSupportInitialize in relcache.c).
+ */
+ extractProcOid = get_opfamily_proc(index->opfamily[indexcol],
+ index->opcintype[indexcol],
+ index->opcintype[indexcol],
+ GIN_EXTRACTQUERY_PROC);
+
+ if (!OidIsValid(extractProcOid))
+ {
+ /* should not happen; throw same error as index_getprocinfo */
+ elog(ERROR, "missing support function %d for attribute %d of index \"%s\"",
+ GIN_EXTRACTQUERY_PROC, indexcol + 1,
+ get_rel_name(index->indexoid));
+ }
+
+ OidFunctionCall7(extractProcOid,
+ query,
+ PointerGetDatum(&nentries),
+ UInt16GetDatum(strategy_op),
+ PointerGetDatum(&partial_matches),
+ PointerGetDatum(&extra_data),
+ PointerGetDatum(&nullFlags),
+ PointerGetDatum(&searchMode));
+
+ if (nentries <= 0 && searchMode == GIN_SEARCH_MODE_DEFAULT)
+ {
+ /* No match is possible */
+ return false;
+ }
+
+ for (i = 0; i < nentries; i++)
+ {
+ /*
+ * For partial match we haven't any information to estimate number of
+ * matched entries in index, so, we just estimate it as 100
+ */
+ if (partial_matches && partial_matches[i])
+ counts->partialEntries += 100;
+ else
+ counts->exactEntries++;
+
+ counts->searchEntries++;
+ }
+
+ if (searchMode == GIN_SEARCH_MODE_INCLUDE_EMPTY)
+ {
+ /* Treat "include empty" like an exact-match item */
+ counts->exactEntries++;
+ counts->searchEntries++;
+ }
+ else if (searchMode != GIN_SEARCH_MODE_DEFAULT)
+ {
+ /* It's GIN_SEARCH_MODE_ALL */
+ counts->haveFullScan = true;
+ }
+
+ return true;
+}
+
+/*
+ * Estimate the number of index terms that need to be searched for while
+ * testing the given GIN index clause, and increment the counts in *counts
+ * appropriately. If the query is unsatisfiable, return false.
+ */
+static bool
+gincost_opexpr(IndexOptInfo *index, OpExpr *clause, GinQualCounts *counts)
+{
+ Node *leftop = get_leftop((Expr *) clause);
+ Node *rightop = get_rightop((Expr *) clause);
+ Oid clause_op = clause->opno;
+ int indexcol;
+ Node *operand;
+
+ /* Locate the operand being compared to the index column */
+ if ((indexcol = find_index_column(leftop, index)) >= 0)
+ {
+ operand = rightop;
+ }
+ else if ((indexcol = find_index_column(rightop, index)) >= 0)
+ {
+ operand = leftop;
+ clause_op = get_commutator(clause_op);
+ }
+ else
+ {
+ elog(ERROR, "could not match index to operand");
+ operand = NULL; /* keep compiler quiet */
+ }
+
+ if (IsA(operand, RelabelType))
+ operand = (Node *) ((RelabelType *) operand)->arg;
+
+ /*
+ * It's impossible to call extractQuery method for unknown operand. So
+ * unless operand is a Const we can't do much; just assume there will
+ * be one ordinary search entry from the operand at runtime.
+ */
+ if (!IsA(operand, Const))
+ {
+ counts->exactEntries++;
+ counts->searchEntries++;
+ return true;
+ }
+
+ /* If Const is null, there can be no matches */
+ if (((Const *) operand)->constisnull)
+ return false;
+
+ /* Otherwise, apply extractQuery and get the actual term counts */
+ return gincost_pattern(index, indexcol, clause_op,
+ ((Const *) operand)->constvalue,
+ counts);
+}
+
+/*
+ * Estimate the number of index terms that need to be searched for while
+ * testing the given GIN index clause, and increment the counts in *counts
+ * appropriately. If the query is unsatisfiable, return false.
+ *
+ * A ScalarArrayOpExpr will give rise to N separate indexscans at runtime,
+ * each of which involves one value from the RHS array, plus all the
+ * non-array quals (if any). To model this, we average the counts across
+ * the RHS elements, and add the averages to the counts in *counts (which
+ * correspond to per-indexscan costs). We also multiply counts->arrayScans
+ * by N, causing gincostestimate to scale up its estimates accordingly.
+ */
+static bool
+gincost_scalararrayopexpr(IndexOptInfo *index, ScalarArrayOpExpr *clause,
+ double numIndexEntries,
+ GinQualCounts *counts)
+{
+ Node *leftop = (Node *) linitial(clause->args);
+ Node *rightop = (Node *) lsecond(clause->args);
+ Oid clause_op = clause->opno;
+ int indexcol;
+ ArrayType *arrayval;
+ int16 elmlen;
+ bool elmbyval;
+ char elmalign;
+ int numElems;
+ Datum *elemValues;
+ bool *elemNulls;
+ GinQualCounts arraycounts;
+ int numPossible = 0;
+ int i;
+
+ Assert(clause->useOr);
+
+ /* index column must be on the left */
+ if ((indexcol = find_index_column(leftop, index)) < 0)
+ elog(ERROR, "could not match index to operand");
+
+ if (IsA(rightop, RelabelType))
+ rightop = (Node *) ((RelabelType *) rightop)->arg;
+
+ /*
+ * It's impossible to call extractQuery method for unknown operand. So
+ * unless operand is a Const we can't do much; just assume there will
+ * be one ordinary search entry from each array entry at runtime, and
+ * fall back on a probably-bad estimate of the number of array entries.
+ */
+ if (!IsA(rightop, Const))
+ {
+ counts->exactEntries++;
+ counts->searchEntries++;
+ counts->arrayScans *= estimate_array_length(rightop);
+ return true;
+ }
+
+ /* If Const is null, there can be no matches */
+ if (((Const *) rightop)->constisnull)
+ return false;
+
+ /* Otherwise, extract the array elements and iterate over them */
+ arrayval = DatumGetArrayTypeP(((Const *) rightop)->constvalue);
+ get_typlenbyvalalign(ARR_ELEMTYPE(arrayval),
+ &elmlen, &elmbyval, &elmalign);
+ deconstruct_array(arrayval,
+ ARR_ELEMTYPE(arrayval),
+ elmlen, elmbyval, elmalign,
+ &elemValues, &elemNulls, &numElems);
+
+ memset(&arraycounts, 0, sizeof(arraycounts));
+
+ for (i = 0; i < numElems; i++)
+ {
+ GinQualCounts elemcounts;
+
+ /* NULL can't match anything, so ignore, as the executor will */
+ if (elemNulls[i])
+ continue;
+
+ /* Otherwise, apply extractQuery and get the actual term counts */
+ memset(&elemcounts, 0, sizeof(elemcounts));
+
+ if (gincost_pattern(index, indexcol, clause_op, elemValues[i],
+ &elemcounts))
+ {
+ /* We ignore array elements that are unsatisfiable patterns */
+ numPossible++;
+
+ if (elemcounts.haveFullScan)
+ {
+ /*
+ * Full index scan will be required. We treat this as if
+ * every key in the index had been listed in the query; is
+ * that reasonable?
+ */
+ elemcounts.partialEntries = 0;
+ elemcounts.exactEntries = numIndexEntries;
+ elemcounts.searchEntries = numIndexEntries;
+ }
+ arraycounts.partialEntries += elemcounts.partialEntries;
+ arraycounts.exactEntries += elemcounts.exactEntries;
+ arraycounts.searchEntries += elemcounts.searchEntries;
+ }
+ }
+
+ if (numPossible == 0)
+ {
+ /* No satisfiable patterns in the array */
+ return false;
+ }
+
+ /*
+ * Now add the averages to the global counts. This will give us an
+ * estimate of the average number of terms searched for in each indexscan,
+ * including contributions from both array and non-array quals.
+ */
+ counts->partialEntries += arraycounts.partialEntries / numPossible;
+ counts->exactEntries += arraycounts.exactEntries / numPossible;
+ counts->searchEntries += arraycounts.searchEntries / numPossible;
+
+ counts->arrayScans *= numPossible;
+
+ return true;
+}
+
/*
* GIN has search behavior completely different from other index types
*/
numDataPages,
numPendingPages,
numEntries;
- bool haveFullScan = false;
- double partialEntriesInQuals = 0.0;
- double searchEntriesInQuals = 0.0;
- double exactEntriesInQuals = 0.0;
+ GinQualCounts counts;
+ bool matchPossible;
double entryPagesFetched,
dataPagesFetched,
dataPagesFetchedBySel;
double qual_op_cost,
qual_arg_cost,
spc_random_page_cost,
- num_scans;
+ outer_scans;
QualCost index_qual_cost;
Relation indexRel;
GinStatsData ginStats;
/*
* Examine quals to estimate number of search entries & partial matches
*/
+ memset(&counts, 0, sizeof(counts));
+ counts.arrayScans = 1;
+ matchPossible = true;
+
foreach(l, indexQuals)
{
RestrictInfo *rinfo = (RestrictInfo *) lfirst(l);
Expr *clause;
- Node *leftop,
- *rightop,
- *operand;
- Oid extractProcOid;
- Oid clause_op;
- int strategy_op;
- Oid lefttype,
- righttype;
- int32 nentries = 0;
- bool *partial_matches = NULL;
- Pointer *extra_data = NULL;
- bool *nullFlags = NULL;
- int32 searchMode = GIN_SEARCH_MODE_DEFAULT;
- int indexcol;
Assert(IsA(rinfo, RestrictInfo));
clause = rinfo->clause;
- Assert(IsA(clause, OpExpr));
- leftop = get_leftop(clause);
- rightop = get_rightop(clause);
- clause_op = ((OpExpr *) clause)->opno;
-
- if ((indexcol = find_index_column(leftop, index)) >= 0)
- {
- operand = rightop;
- }
- else if ((indexcol = find_index_column(rightop, index)) >= 0)
- {
- operand = leftop;
- clause_op = get_commutator(clause_op);
- }
- else
- {
- elog(ERROR, "could not match index to operand");
- operand = NULL; /* keep compiler quiet */
- }
-
- if (IsA(operand, RelabelType))
- operand = (Node *) ((RelabelType *) operand)->arg;
-
- /*
- * It's impossible to call extractQuery method for unknown operand. So
- * unless operand is a Const we can't do much; just assume there will
- * be one ordinary search entry from the operand at runtime.
- */
- if (!IsA(operand, Const))
- {
- searchEntriesInQuals++;
- continue;
- }
-
- /* If Const is null, there can be no matches */
- if (((Const *) operand)->constisnull)
- {
- *indexStartupCost = 0;
- *indexTotalCost = 0;
- *indexSelectivity = 0;
- PG_RETURN_VOID();
- }
-
- /*
- * Get the operator's strategy number and declared input data types
- * within the index opfamily. (We don't need the latter, but we use
- * get_op_opfamily_properties because it will throw error if it fails
- * to find a matching pg_amop entry.)
- */
- get_op_opfamily_properties(clause_op, index->opfamily[indexcol], false,
- &strategy_op, &lefttype, &righttype);
-
- /*
- * GIN always uses the "default" support functions, which are those
- * with lefttype == righttype == the opclass' opcintype (see
- * IndexSupportInitialize in relcache.c).
- */
- extractProcOid = get_opfamily_proc(index->opfamily[indexcol],
- index->opcintype[indexcol],
- index->opcintype[indexcol],
- GIN_EXTRACTQUERY_PROC);
-
- if (!OidIsValid(extractProcOid))
+ if (IsA(clause, OpExpr))
{
- /* should not happen; throw same error as index_getprocinfo */
- elog(ERROR, "missing support function %d for attribute %d of index \"%s\"",
- GIN_EXTRACTQUERY_PROC, indexcol + 1,
- get_rel_name(index->indexoid));
+ matchPossible = gincost_opexpr(index,
+ (OpExpr *) clause,
+ &counts);
+ if (!matchPossible)
+ break;
}
-
- OidFunctionCall7(extractProcOid,
- ((Const *) operand)->constvalue,
- PointerGetDatum(&nentries),
- UInt16GetDatum(strategy_op),
- PointerGetDatum(&partial_matches),
- PointerGetDatum(&extra_data),
- PointerGetDatum(&nullFlags),
- PointerGetDatum(&searchMode));
-
- if (nentries <= 0 && searchMode == GIN_SEARCH_MODE_DEFAULT)
+ else if (IsA(clause, ScalarArrayOpExpr))
{
- /* No match is possible */
- *indexStartupCost = 0;
- *indexTotalCost = 0;
- *indexSelectivity = 0;
- PG_RETURN_VOID();
+ matchPossible = gincost_scalararrayopexpr(index,
+ (ScalarArrayOpExpr *) clause,
+ numEntries,
+ &counts);
+ if (!matchPossible)
+ break;
}
else
{
- int32 i;
-
- for (i = 0; i < nentries; i++)
- {
- /*
- * For partial match we haven't any information to estimate
- * number of matched entries in index, so, we just estimate it
- * as 100
- */
- if (partial_matches && partial_matches[i])
- partialEntriesInQuals += 100;
- else
- exactEntriesInQuals++;
-
- searchEntriesInQuals++;
- }
+ /* shouldn't be anything else for a GIN index */
+ elog(ERROR, "unsupported GIN indexqual type: %d",
+ (int) nodeTag(clause));
}
+ }
- if (searchMode == GIN_SEARCH_MODE_INCLUDE_EMPTY)
- {
- /* Treat "include empty" like an exact-match item */
- exactEntriesInQuals++;
- searchEntriesInQuals++;
- }
- else if (searchMode != GIN_SEARCH_MODE_DEFAULT)
- {
- /* It's GIN_SEARCH_MODE_ALL */
- haveFullScan = true;
- }
+ /* Fall out if there were any provably-unsatisfiable quals */
+ if (!matchPossible)
+ {
+ *indexStartupCost = 0;
+ *indexTotalCost = 0;
+ *indexSelectivity = 0;
+ PG_RETURN_VOID();
}
- if (haveFullScan || indexQuals == NIL)
+ if (counts.haveFullScan || indexQuals == NIL)
{
/*
* Full index scan will be required. We treat this as if every key in
* the index had been listed in the query; is that reasonable?
*/
- searchEntriesInQuals = numEntries;
+ counts.partialEntries = 0;
+ counts.exactEntries = numEntries;
+ counts.searchEntries = numEntries;
}
/* Will we have more than one iteration of a nestloop scan? */
if (outer_rel != NULL && outer_rel->rows > 1)
- num_scans = outer_rel->rows;
+ outer_scans = outer_rel->rows;
else
- num_scans = 1;
+ outer_scans = 1;
/*
- * cost to begin scan, first of all, pay attention to pending list.
+ * Compute cost to begin scan, first of all, pay attention to pending list.
*/
entryPagesFetched = numPendingPages;
/*
* Estimate number of entry pages read. We need to do
- * searchEntriesInQuals searches. Use a power function as it should be,
+ * counts.searchEntries searches. Use a power function as it should be,
* but tuples on leaf pages usually is much greater. Here we include all
* searches in entry tree, including search of first entry in partial
* match algorithm
*/
- entryPagesFetched += ceil(searchEntriesInQuals * rint(pow(numEntryPages, 0.15)));
+ entryPagesFetched += ceil(counts.searchEntries * rint(pow(numEntryPages, 0.15)));
/*
* Add an estimate of entry pages read by partial match algorithm. It's a
* scan over leaf pages in entry tree. We haven't any useful stats here,
* so estimate it as proportion.
*/
- entryPagesFetched += ceil(numEntryPages * partialEntriesInQuals / numEntries);
+ entryPagesFetched += ceil(numEntryPages * counts.partialEntries / numEntries);
/*
* Partial match algorithm reads all data pages before doing actual scan,
- * so it's a startup cost. Again, we havn't any useful stats here, so,
+ * so it's a startup cost. Again, we haven't any useful stats here, so,
* estimate it as proportion
*/
- dataPagesFetched = ceil(numDataPages * partialEntriesInQuals / numEntries);
+ dataPagesFetched = ceil(numDataPages * counts.partialEntries / numEntries);
- /* calculate cache effects */
- if (num_scans > 1 || searchEntriesInQuals > 1)
+ /*
+ * Calculate cache effects if more than one scan due to nestloops or array
+ * quals. The result is pro-rated per nestloop scan, but the array qual
+ * factor shouldn't be pro-rated (compare genericcostestimate).
+ */
+ if (outer_scans > 1 || counts.arrayScans > 1)
{
+ entryPagesFetched *= outer_scans * counts.arrayScans;
entryPagesFetched = index_pages_fetched(entryPagesFetched,
(BlockNumber) numEntryPages,
numEntryPages, root);
+ entryPagesFetched /= outer_scans;
+ dataPagesFetched *= outer_scans * counts.arrayScans;
dataPagesFetched = index_pages_fetched(dataPagesFetched,
(BlockNumber) numDataPages,
numDataPages, root);
+ dataPagesFetched /= outer_scans;
}
/*
*/
*indexStartupCost = (entryPagesFetched + dataPagesFetched) * spc_random_page_cost;
- /* cost to scan data pages for each exact (non-partial) matched entry */
- dataPagesFetched = ceil(numDataPages * exactEntriesInQuals / numEntries);
+ /*
+ * Now we compute the number of data pages fetched while the scan proceeds.
+ */
+
+ /* data pages scanned for each exact (non-partial) matched entry */
+ dataPagesFetched = ceil(numDataPages * counts.exactEntries / numEntries);
/*
* Estimate number of data pages read, using selectivity estimation and
dataPagesFetched = dataPagesFetchedBySel;
}
- if (num_scans > 1)
+ /* Account for cache effects, the same as above */
+ if (outer_scans > 1 || counts.arrayScans > 1)
+ {
+ dataPagesFetched *= outer_scans * counts.arrayScans;
dataPagesFetched = index_pages_fetched(dataPagesFetched,
(BlockNumber) numDataPages,
numDataPages, root);
+ dataPagesFetched /= outer_scans;
+ }
+
+ /* And apply random_page_cost as the cost per page */
*indexTotalCost = *indexStartupCost +
dataPagesFetched * spc_random_page_cost;