]> granicus.if.org Git - postgresql/commitdiff
Build out the planner support function infrastructure.
authorTom Lane <tgl@sss.pgh.pa.us>
Sat, 9 Feb 2019 23:32:23 +0000 (18:32 -0500)
committerTom Lane <tgl@sss.pgh.pa.us>
Sat, 9 Feb 2019 23:32:23 +0000 (18:32 -0500)
Add support function requests for estimating the selectivity, cost,
and number of result rows (if a SRF) of the target function.

The lack of a way to estimate selectivity of a boolean-returning
function in WHERE has been a recognized deficiency of the planner
since Berkeley days.  This commit finally fixes it.

In addition, non-constant estimates of cost and number of output
rows are now possible.  We still fall back to looking at procost
and prorows if the support function doesn't service the request,
of course.

To make concrete use of the possibility of estimating output rowcount
for SRFs, this commit adds support functions for array_unnest(anyarray)
and the integer variants of generate_series; the lack of plausible
rowcount estimates for those, even when it's obvious to a human,
has been a repeated subject of complaints.  Obviously, much more
could now be done in this line, but I'm mostly just trying to get
the infrastructure in place.

Discussion: https://postgr.es/m/15193.1548028093@sss.pgh.pa.us

27 files changed:
contrib/postgres_fdw/postgres_fdw.c
doc/src/sgml/xfunc.sgml
src/backend/optimizer/path/clausesel.c
src/backend/optimizer/path/costsize.c
src/backend/optimizer/util/clauses.c
src/backend/optimizer/util/pathnode.c
src/backend/optimizer/util/plancat.c
src/backend/utils/adt/arrayfuncs.c
src/backend/utils/adt/int.c
src/backend/utils/adt/int8.c
src/backend/utils/adt/selfuncs.c
src/backend/utils/cache/lsyscache.c
src/include/catalog/catversion.h
src/include/catalog/pg_proc.dat
src/include/nodes/nodes.h
src/include/nodes/pathnodes.h
src/include/nodes/supportnodes.h
src/include/optimizer/clauses.h
src/include/optimizer/plancat.h
src/include/utils/lsyscache.h
src/test/regress/expected/misc_functions.out
src/test/regress/expected/subselect.out
src/test/regress/input/create_function_1.source
src/test/regress/output/create_function_1.source
src/test/regress/regress.c
src/test/regress/sql/misc_functions.sql
src/test/regress/sql/subselect.sql

index 994cec50ce88b3a08fa710757e936471ed6e36f7..6b96e7de0a41191712ad9c5953210a8cafcd1395 100644 (file)
@@ -2779,6 +2779,7 @@ estimate_path_cost_size(PlannerInfo *root,
                        startup_cost = ofpinfo->rel_startup_cost;
                        startup_cost += aggcosts.transCost.startup;
                        startup_cost += aggcosts.transCost.per_tuple * input_rows;
+                       startup_cost += aggcosts.finalCost.startup;
                        startup_cost += (cpu_operator_cost * numGroupCols) * input_rows;
 
                        /*-----
@@ -2788,7 +2789,7 @@ estimate_path_cost_size(PlannerInfo *root,
                         *-----
                         */
                        run_cost = ofpinfo->rel_total_cost - ofpinfo->rel_startup_cost;
-                       run_cost += aggcosts.finalCost * numGroups;
+                       run_cost += aggcosts.finalCost.per_tuple * numGroups;
                        run_cost += cpu_tuple_cost * numGroups;
 
                        /* Account for the eval cost of HAVING quals, if any */
index d70aa6eed786e965a465adbb8c3e6ff3307c9576..b486ef391eeb2c6ea8872f5d063e2796898ac1f0 100644 (file)
@@ -3439,4 +3439,25 @@ supportfn(internal) returns internal
     simplify.  Ensure rigorous equivalence between the simplified
     expression and an actual execution of the target function.
    </para>
+
+   <para>
+    For target functions that return boolean, it is often useful to estimate
+    the fraction of rows that will be selected by a WHERE clause using that
+    function.  This can be done by a support function that implements
+    the <literal>SupportRequestSelectivity</literal> request type.
+   </para>
+
+   <para>
+    If the target function's runtime is highly dependent on its inputs,
+    it may be useful to provide a non-constant cost estimate for it.
+    This can be done by a support function that implements
+    the <literal>SupportRequestCost</literal> request type.
+   </para>
+
+   <para>
+    For target functions that return sets, it is often useful to provide
+    a non-constant estimate for the number of rows that will be returned.
+    This can be done by a support function that implements
+    the <literal>SupportRequestRows</literal> request type.
+   </para>
   </sect1>
index abca03b805e0fe8cf408be54273f5d60c29a489c..e8142bddf02dc2696f9378bfcc8df23ec84b88f2 100644 (file)
@@ -762,6 +762,21 @@ clause_selectivity(PlannerInfo *root,
                if (IsA(clause, DistinctExpr))
                        s1 = 1.0 - s1;
        }
+       else if (is_funcclause(clause))
+       {
+               FuncExpr   *funcclause = (FuncExpr *) clause;
+
+               /* Try to get an estimate from the support function, if any */
+               s1 = function_selectivity(root,
+                                                                 funcclause->funcid,
+                                                                 funcclause->args,
+                                                                 funcclause->inputcollid,
+                                                                 treat_as_join_clause(clause, rinfo,
+                                                                                                          varRelid, sjinfo),
+                                                                 varRelid,
+                                                                 jointype,
+                                                                 sjinfo);
+       }
        else if (IsA(clause, ScalarArrayOpExpr))
        {
                /* Use node specific selectivity calculation function */
index 1057ddaa3e5822b93b8ed3ccbd54a3e65f15d8fd..beee50ec1350101f8a4005ed779a95775ec5824f 100644 (file)
@@ -2112,9 +2112,9 @@ cost_agg(Path *path, PlannerInfo *root,
        /*
         * The transCost.per_tuple component of aggcosts should be charged once
         * per input tuple, corresponding to the costs of evaluating the aggregate
-        * transfns and their input expressions (with any startup cost of course
-        * charged but once).  The finalCost component is charged once per output
-        * tuple, corresponding to the costs of evaluating the finalfns.
+        * transfns and their input expressions. The finalCost.per_tuple component
+        * is charged once per output tuple, corresponding to the costs of
+        * evaluating the finalfns.  Startup costs are of course charged but once.
         *
         * If we are grouping, we charge an additional cpu_operator_cost per
         * grouping column per input tuple for grouping comparisons.
@@ -2136,7 +2136,8 @@ cost_agg(Path *path, PlannerInfo *root,
                startup_cost = input_total_cost;
                startup_cost += aggcosts->transCost.startup;
                startup_cost += aggcosts->transCost.per_tuple * input_tuples;
-               startup_cost += aggcosts->finalCost;
+               startup_cost += aggcosts->finalCost.startup;
+               startup_cost += aggcosts->finalCost.per_tuple;
                /* we aren't grouping */
                total_cost = startup_cost + cpu_tuple_cost;
                output_tuples = 1;
@@ -2155,7 +2156,8 @@ cost_agg(Path *path, PlannerInfo *root,
                total_cost += aggcosts->transCost.startup;
                total_cost += aggcosts->transCost.per_tuple * input_tuples;
                total_cost += (cpu_operator_cost * numGroupCols) * input_tuples;
-               total_cost += aggcosts->finalCost * numGroups;
+               total_cost += aggcosts->finalCost.startup;
+               total_cost += aggcosts->finalCost.per_tuple * numGroups;
                total_cost += cpu_tuple_cost * numGroups;
                output_tuples = numGroups;
        }
@@ -2168,8 +2170,9 @@ cost_agg(Path *path, PlannerInfo *root,
                startup_cost += aggcosts->transCost.startup;
                startup_cost += aggcosts->transCost.per_tuple * input_tuples;
                startup_cost += (cpu_operator_cost * numGroupCols) * input_tuples;
+               startup_cost += aggcosts->finalCost.startup;
                total_cost = startup_cost;
-               total_cost += aggcosts->finalCost * numGroups;
+               total_cost += aggcosts->finalCost.per_tuple * numGroups;
                total_cost += cpu_tuple_cost * numGroups;
                output_tuples = numGroups;
        }
@@ -2234,7 +2237,11 @@ cost_windowagg(Path *path, PlannerInfo *root,
                Cost            wfunccost;
                QualCost        argcosts;
 
-               wfunccost = get_func_cost(wfunc->winfnoid) * cpu_operator_cost;
+               argcosts.startup = argcosts.per_tuple = 0;
+               add_function_cost(root, wfunc->winfnoid, (Node *) wfunc,
+                                                 &argcosts);
+               startup_cost += argcosts.startup;
+               wfunccost = argcosts.per_tuple;
 
                /* also add the input expressions' cost to per-input-row costs */
                cost_qual_eval_node(&argcosts, (Node *) wfunc->args, root);
@@ -3864,8 +3871,8 @@ cost_qual_eval_walker(Node *node, cost_qual_eval_context *context)
         */
        if (IsA(node, FuncExpr))
        {
-               context->total.per_tuple +=
-                       get_func_cost(((FuncExpr *) node)->funcid) * cpu_operator_cost;
+               add_function_cost(context->root, ((FuncExpr *) node)->funcid, node,
+                                                 &context->total);
        }
        else if (IsA(node, OpExpr) ||
                         IsA(node, DistinctExpr) ||
@@ -3873,8 +3880,8 @@ cost_qual_eval_walker(Node *node, cost_qual_eval_context *context)
        {
                /* rely on struct equivalence to treat these all alike */
                set_opfuncid((OpExpr *) node);
-               context->total.per_tuple +=
-                       get_func_cost(((OpExpr *) node)->opfuncid) * cpu_operator_cost;
+               add_function_cost(context->root, ((OpExpr *) node)->opfuncid, node,
+                                                 &context->total);
        }
        else if (IsA(node, ScalarArrayOpExpr))
        {
@@ -3884,10 +3891,15 @@ cost_qual_eval_walker(Node *node, cost_qual_eval_context *context)
                 */
                ScalarArrayOpExpr *saop = (ScalarArrayOpExpr *) node;
                Node       *arraynode = (Node *) lsecond(saop->args);
+               QualCost        sacosts;
 
                set_sa_opfuncid(saop);
-               context->total.per_tuple += get_func_cost(saop->opfuncid) *
-                       cpu_operator_cost * estimate_array_length(arraynode) * 0.5;
+               sacosts.startup = sacosts.per_tuple = 0;
+               add_function_cost(context->root, saop->opfuncid, NULL,
+                                                 &sacosts);
+               context->total.startup += sacosts.startup;
+               context->total.per_tuple += sacosts.per_tuple *
+                       estimate_array_length(arraynode) * 0.5;
        }
        else if (IsA(node, Aggref) ||
                         IsA(node, WindowFunc))
@@ -3913,11 +3925,13 @@ cost_qual_eval_walker(Node *node, cost_qual_eval_context *context)
                /* check the result type's input function */
                getTypeInputInfo(iocoerce->resulttype,
                                                 &iofunc, &typioparam);
-               context->total.per_tuple += get_func_cost(iofunc) * cpu_operator_cost;
+               add_function_cost(context->root, iofunc, NULL,
+                                                 &context->total);
                /* check the input type's output function */
                getTypeOutputInfo(exprType((Node *) iocoerce->arg),
                                                  &iofunc, &typisvarlena);
-               context->total.per_tuple += get_func_cost(iofunc) * cpu_operator_cost;
+               add_function_cost(context->root, iofunc, NULL,
+                                                 &context->total);
        }
        else if (IsA(node, ArrayCoerceExpr))
        {
@@ -3941,8 +3955,8 @@ cost_qual_eval_walker(Node *node, cost_qual_eval_context *context)
                {
                        Oid                     opid = lfirst_oid(lc);
 
-                       context->total.per_tuple += get_func_cost(get_opcode(opid)) *
-                               cpu_operator_cost;
+                       add_function_cost(context->root, get_opcode(opid), NULL,
+                                                         &context->total);
                }
        }
        else if (IsA(node, MinMaxExpr) ||
@@ -4941,7 +4955,7 @@ set_function_size_estimates(PlannerInfo *root, RelOptInfo *rel)
        foreach(lc, rte->functions)
        {
                RangeTblFunction *rtfunc = (RangeTblFunction *) lfirst(lc);
-               double          ntup = expression_returns_set_rows(rtfunc->funcexpr);
+               double          ntup = expression_returns_set_rows(root, rtfunc->funcexpr);
 
                if (ntup > rel->tuples)
                        rel->tuples = ntup;
index 002c29a5f5d5c43f7c721eb21f5022038256067d..86e4753a5b380b00cb9b2468db0ed1652f5ebe40 100644 (file)
@@ -36,8 +36,8 @@
 #include "optimizer/clauses.h"
 #include "optimizer/cost.h"
 #include "optimizer/optimizer.h"
+#include "optimizer/plancat.h"
 #include "optimizer/planmain.h"
-#include "optimizer/prep.h"
 #include "parser/analyze.h"
 #include "parser/parse_agg.h"
 #include "parser/parse_coerce.h"
@@ -343,19 +343,24 @@ get_agg_clause_costs_walker(Node *node, get_agg_clause_costs_context *context)
                if (DO_AGGSPLIT_COMBINE(context->aggsplit))
                {
                        /* charge for combining previously aggregated states */
-                       costs->transCost.per_tuple += get_func_cost(aggcombinefn) * cpu_operator_cost;
+                       add_function_cost(context->root, aggcombinefn, NULL,
+                                                         &costs->transCost);
                }
                else
-                       costs->transCost.per_tuple += get_func_cost(aggtransfn) * cpu_operator_cost;
+                       add_function_cost(context->root, aggtransfn, NULL,
+                                                         &costs->transCost);
                if (DO_AGGSPLIT_DESERIALIZE(context->aggsplit) &&
                        OidIsValid(aggdeserialfn))
-                       costs->transCost.per_tuple += get_func_cost(aggdeserialfn) * cpu_operator_cost;
+                       add_function_cost(context->root, aggdeserialfn, NULL,
+                                                         &costs->transCost);
                if (DO_AGGSPLIT_SERIALIZE(context->aggsplit) &&
                        OidIsValid(aggserialfn))
-                       costs->finalCost += get_func_cost(aggserialfn) * cpu_operator_cost;
+                       add_function_cost(context->root, aggserialfn, NULL,
+                                                         &costs->finalCost);
                if (!DO_AGGSPLIT_SKIPFINAL(context->aggsplit) &&
                        OidIsValid(aggfinalfn))
-                       costs->finalCost += get_func_cost(aggfinalfn) * cpu_operator_cost;
+                       add_function_cost(context->root, aggfinalfn, NULL,
+                                                         &costs->finalCost);
 
                /*
                 * These costs are incurred only by the initial aggregate node, so we
@@ -392,8 +397,8 @@ get_agg_clause_costs_walker(Node *node, get_agg_clause_costs_context *context)
                {
                        cost_qual_eval_node(&argcosts, (Node *) aggref->aggdirectargs,
                                                                context->root);
-                       costs->transCost.startup += argcosts.startup;
-                       costs->finalCost += argcosts.per_tuple;
+                       costs->finalCost.startup += argcosts.startup;
+                       costs->finalCost.per_tuple += argcosts.per_tuple;
                }
 
                /*
@@ -561,7 +566,7 @@ find_window_functions_walker(Node *node, WindowFuncLists *lists)
  * Note: keep this in sync with expression_returns_set() in nodes/nodeFuncs.c.
  */
 double
-expression_returns_set_rows(Node *clause)
+expression_returns_set_rows(PlannerInfo *root, Node *clause)
 {
        if (clause == NULL)
                return 1.0;
@@ -570,7 +575,7 @@ expression_returns_set_rows(Node *clause)
                FuncExpr   *expr = (FuncExpr *) clause;
 
                if (expr->funcretset)
-                       return clamp_row_est(get_func_rows(expr->funcid));
+                       return clamp_row_est(get_function_rows(root, expr->funcid, clause));
        }
        if (IsA(clause, OpExpr))
        {
@@ -579,7 +584,7 @@ expression_returns_set_rows(Node *clause)
                if (expr->opretset)
                {
                        set_opfuncid(expr);
-                       return clamp_row_est(get_func_rows(expr->opfuncid));
+                       return clamp_row_est(get_function_rows(root, expr->opfuncid, clause));
                }
        }
        return 1.0;
index a3e64110d36dda913ff9ca7ebe5c7937a544328b..169e51e7921f38e955c36f900e7517275fc9cabf 100644 (file)
@@ -2711,7 +2711,7 @@ create_set_projection_path(PlannerInfo *root,
                Node       *node = (Node *) lfirst(lc);
                double          itemrows;
 
-               itemrows = expression_returns_set_rows(node);
+               itemrows = expression_returns_set_rows(root, node);
                if (tlist_rows < itemrows)
                        tlist_rows = itemrows;
        }
index 3efa1bdc1a4dd783ec90f8f42e4fbdf62e11da76..d6dc83ca8090577cbe83c2f9ae671eb83c72c0b3 100644 (file)
 #include "catalog/heap.h"
 #include "catalog/partition.h"
 #include "catalog/pg_am.h"
+#include "catalog/pg_proc.h"
 #include "catalog/pg_statistic_ext.h"
 #include "foreign/fdwapi.h"
 #include "miscadmin.h"
 #include "nodes/makefuncs.h"
+#include "nodes/supportnodes.h"
 #include "optimizer/clauses.h"
 #include "optimizer/cost.h"
 #include "optimizer/optimizer.h"
@@ -1772,6 +1774,8 @@ restriction_selectivity(PlannerInfo *root,
  * Returns the selectivity of a specified join operator clause.
  * This code executes registered procedures stored in the
  * operator relation, by calling the function manager.
+ *
+ * See clause_selectivity() for the meaning of the additional parameters.
  */
 Selectivity
 join_selectivity(PlannerInfo *root,
@@ -1805,6 +1809,184 @@ join_selectivity(PlannerInfo *root,
        return (Selectivity) result;
 }
 
+/*
+ * function_selectivity
+ *
+ * Returns the selectivity of a specified boolean function clause.
+ * This code executes registered procedures stored in the
+ * pg_proc relation, by calling the function manager.
+ *
+ * See clause_selectivity() for the meaning of the additional parameters.
+ */
+Selectivity
+function_selectivity(PlannerInfo *root,
+                                        Oid funcid,
+                                        List *args,
+                                        Oid inputcollid,
+                                        bool is_join,
+                                        int varRelid,
+                                        JoinType jointype,
+                                        SpecialJoinInfo *sjinfo)
+{
+       RegProcedure prosupport = get_func_support(funcid);
+       SupportRequestSelectivity req;
+       SupportRequestSelectivity *sresult;
+
+       /*
+        * If no support function is provided, use our historical default
+        * estimate, 0.3333333.  This seems a pretty unprincipled choice, but
+        * Postgres has been using that estimate for function calls since 1992.
+        * The hoariness of this behavior suggests that we should not be in too
+        * much hurry to use another value.
+        */
+       if (!prosupport)
+               return (Selectivity) 0.3333333;
+
+       req.type = T_SupportRequestSelectivity;
+       req.root = root;
+       req.funcid = funcid;
+       req.args = args;
+       req.inputcollid = inputcollid;
+       req.is_join = is_join;
+       req.varRelid = varRelid;
+       req.jointype = jointype;
+       req.sjinfo = sjinfo;
+       req.selectivity = -1;           /* to catch failure to set the value */
+
+       sresult = (SupportRequestSelectivity *)
+               DatumGetPointer(OidFunctionCall1(prosupport,
+                                                                                PointerGetDatum(&req)));
+
+       /* If support function fails, use default */
+       if (sresult != &req)
+               return (Selectivity) 0.3333333;
+
+       if (req.selectivity < 0.0 || req.selectivity > 1.0)
+               elog(ERROR, "invalid function selectivity: %f", req.selectivity);
+
+       return (Selectivity) req.selectivity;
+}
+
+/*
+ * add_function_cost
+ *
+ * Get an estimate of the execution cost of a function, and *add* it to
+ * the contents of *cost.  The estimate may include both one-time and
+ * per-tuple components, since QualCost does.
+ *
+ * The funcid must always be supplied.  If it is being called as the
+ * implementation of a specific parsetree node (FuncExpr, OpExpr,
+ * WindowFunc, etc), pass that as "node", else pass NULL.
+ *
+ * In some usages root might be NULL, too.
+ */
+void
+add_function_cost(PlannerInfo *root, Oid funcid, Node *node,
+                                 QualCost *cost)
+{
+       HeapTuple       proctup;
+       Form_pg_proc procform;
+
+       proctup = SearchSysCache1(PROCOID, ObjectIdGetDatum(funcid));
+       if (!HeapTupleIsValid(proctup))
+               elog(ERROR, "cache lookup failed for function %u", funcid);
+       procform = (Form_pg_proc) GETSTRUCT(proctup);
+
+       if (OidIsValid(procform->prosupport))
+       {
+               SupportRequestCost req;
+               SupportRequestCost *sresult;
+
+               req.type = T_SupportRequestCost;
+               req.root = root;
+               req.funcid = funcid;
+               req.node = node;
+
+               /* Initialize cost fields so that support function doesn't have to */
+               req.startup = 0;
+               req.per_tuple = 0;
+
+               sresult = (SupportRequestCost *)
+                       DatumGetPointer(OidFunctionCall1(procform->prosupport,
+                                                                                        PointerGetDatum(&req)));
+
+               if (sresult == &req)
+               {
+                       /* Success, so accumulate support function's estimate into *cost */
+                       cost->startup += req.startup;
+                       cost->per_tuple += req.per_tuple;
+                       ReleaseSysCache(proctup);
+                       return;
+               }
+       }
+
+       /* No support function, or it failed, so rely on procost */
+       cost->per_tuple += procform->procost * cpu_operator_cost;
+
+       ReleaseSysCache(proctup);
+}
+
+/*
+ * get_function_rows
+ *
+ * Get an estimate of the number of rows returned by a set-returning function.
+ *
+ * The funcid must always be supplied.  In current usage, the calling node
+ * will always be supplied, and will be either a FuncExpr or OpExpr.
+ * But it's a good idea to not fail if it's NULL.
+ *
+ * In some usages root might be NULL, too.
+ *
+ * Note: this returns the unfiltered result of the support function, if any.
+ * It's usually a good idea to apply clamp_row_est() to the result, but we
+ * leave it to the caller to do so.
+ */
+double
+get_function_rows(PlannerInfo *root, Oid funcid, Node *node)
+{
+       HeapTuple       proctup;
+       Form_pg_proc procform;
+       double          result;
+
+       proctup = SearchSysCache1(PROCOID, ObjectIdGetDatum(funcid));
+       if (!HeapTupleIsValid(proctup))
+               elog(ERROR, "cache lookup failed for function %u", funcid);
+       procform = (Form_pg_proc) GETSTRUCT(proctup);
+
+       Assert(procform->proretset);    /* else caller error */
+
+       if (OidIsValid(procform->prosupport))
+       {
+               SupportRequestRows req;
+               SupportRequestRows *sresult;
+
+               req.type = T_SupportRequestRows;
+               req.root = root;
+               req.funcid = funcid;
+               req.node = node;
+
+               req.rows = 0;                   /* just for sanity */
+
+               sresult = (SupportRequestRows *)
+                       DatumGetPointer(OidFunctionCall1(procform->prosupport,
+                                                                                        PointerGetDatum(&req)));
+
+               if (sresult == &req)
+               {
+                       /* Success */
+                       ReleaseSysCache(proctup);
+                       return req.rows;
+               }
+       }
+
+       /* No support function, or it failed, so rely on prorows */
+       result = procform->prorows;
+
+       ReleaseSysCache(proctup);
+
+       return result;
+}
+
 /*
  * has_unique_index
  *
index a785361fd07fa90f913b06c2f90531767d86c29e..5b2917d1594d330950524ce2d88eff27c938c433 100644 (file)
 #include "catalog/pg_type.h"
 #include "funcapi.h"
 #include "libpq/pqformat.h"
+#include "nodes/nodeFuncs.h"
+#include "nodes/supportnodes.h"
+#include "optimizer/optimizer.h"
 #include "utils/array.h"
 #include "utils/arrayaccess.h"
 #include "utils/builtins.h"
 #include "utils/datum.h"
 #include "utils/lsyscache.h"
 #include "utils/memutils.h"
+#include "utils/selfuncs.h"
 #include "utils/typcache.h"
 
 
@@ -6025,6 +6029,36 @@ array_unnest(PG_FUNCTION_ARGS)
        }
 }
 
+/*
+ * Planner support function for array_unnest(anyarray)
+ */
+Datum
+array_unnest_support(PG_FUNCTION_ARGS)
+{
+       Node       *rawreq = (Node *) PG_GETARG_POINTER(0);
+       Node       *ret = NULL;
+
+       if (IsA(rawreq, SupportRequestRows))
+       {
+               /* Try to estimate the number of rows returned */
+               SupportRequestRows *req = (SupportRequestRows *) rawreq;
+
+               if (is_funcclause(req->node))   /* be paranoid */
+               {
+                       List       *args = ((FuncExpr *) req->node)->args;
+                       Node       *arg1;
+
+                       /* We can use estimated argument values here */
+                       arg1 = estimate_expression_value(req->root, linitial(args));
+
+                       req->rows = estimate_array_length(arg1);
+                       ret = (Node *) req;
+               }
+       }
+
+       PG_RETURN_POINTER(ret);
+}
+
 
 /*
  * array_replace/array_remove support
index ad8e6d02ee497eb84cdf759c19961b180026ea63..04825fc77de1d03637ba705e004bef6e7fed8c2e 100644 (file)
 
 #include <ctype.h>
 #include <limits.h>
+#include <math.h>
 
 #include "catalog/pg_type.h"
 #include "common/int.h"
 #include "funcapi.h"
 #include "libpq/pqformat.h"
+#include "nodes/nodeFuncs.h"
+#include "nodes/supportnodes.h"
+#include "optimizer/optimizer.h"
 #include "utils/array.h"
 #include "utils/builtins.h"
 
@@ -1427,3 +1431,73 @@ generate_series_step_int4(PG_FUNCTION_ARGS)
                /* do when there is no more left */
                SRF_RETURN_DONE(funcctx);
 }
+
+/*
+ * Planner support function for generate_series(int4, int4 [, int4])
+ */
+Datum
+generate_series_int4_support(PG_FUNCTION_ARGS)
+{
+       Node       *rawreq = (Node *) PG_GETARG_POINTER(0);
+       Node       *ret = NULL;
+
+       if (IsA(rawreq, SupportRequestRows))
+       {
+               /* Try to estimate the number of rows returned */
+               SupportRequestRows *req = (SupportRequestRows *) rawreq;
+
+               if (is_funcclause(req->node))   /* be paranoid */
+               {
+                       List       *args = ((FuncExpr *) req->node)->args;
+                       Node       *arg1,
+                                          *arg2,
+                                          *arg3;
+
+                       /* We can use estimated argument values here */
+                       arg1 = estimate_expression_value(req->root, linitial(args));
+                       arg2 = estimate_expression_value(req->root, lsecond(args));
+                       if (list_length(args) >= 3)
+                               arg3 = estimate_expression_value(req->root, lthird(args));
+                       else
+                               arg3 = NULL;
+
+                       /*
+                        * If any argument is constant NULL, we can safely assume that
+                        * zero rows are returned.  Otherwise, if they're all non-NULL
+                        * constants, we can calculate the number of rows that will be
+                        * returned.  Use double arithmetic to avoid overflow hazards.
+                        */
+                       if ((IsA(arg1, Const) &&
+                                ((Const *) arg1)->constisnull) ||
+                               (IsA(arg2, Const) &&
+                                ((Const *) arg2)->constisnull) ||
+                               (arg3 != NULL && IsA(arg3, Const) &&
+                                ((Const *) arg3)->constisnull))
+                       {
+                               req->rows = 0;
+                               ret = (Node *) req;
+                       }
+                       else if (IsA(arg1, Const) &&
+                                        IsA(arg2, Const) &&
+                                        (arg3 == NULL || IsA(arg3, Const)))
+                       {
+                               double          start,
+                                                       finish,
+                                                       step;
+
+                               start = DatumGetInt32(((Const *) arg1)->constvalue);
+                               finish = DatumGetInt32(((Const *) arg2)->constvalue);
+                               step = arg3 ? DatumGetInt32(((Const *) arg3)->constvalue) : 1;
+
+                               /* This equation works for either sign of step */
+                               if (step != 0)
+                               {
+                                       req->rows = floor((finish - start + step) / step);
+                                       ret = (Node *) req;
+                               }
+                       }
+               }
+       }
+
+       PG_RETURN_POINTER(ret);
+}
index d16cc9e574ba1f139a7c4d09770ec0455639caf3..0ff9394a2fb037ff6ac532dbcd149896e04f414b 100644 (file)
@@ -20,6 +20,9 @@
 #include "common/int.h"
 #include "funcapi.h"
 #include "libpq/pqformat.h"
+#include "nodes/nodeFuncs.h"
+#include "nodes/supportnodes.h"
+#include "optimizer/optimizer.h"
 #include "utils/int8.h"
 #include "utils/builtins.h"
 
@@ -1373,3 +1376,73 @@ generate_series_step_int8(PG_FUNCTION_ARGS)
                /* do when there is no more left */
                SRF_RETURN_DONE(funcctx);
 }
+
+/*
+ * Planner support function for generate_series(int8, int8 [, int8])
+ */
+Datum
+generate_series_int8_support(PG_FUNCTION_ARGS)
+{
+       Node       *rawreq = (Node *) PG_GETARG_POINTER(0);
+       Node       *ret = NULL;
+
+       if (IsA(rawreq, SupportRequestRows))
+       {
+               /* Try to estimate the number of rows returned */
+               SupportRequestRows *req = (SupportRequestRows *) rawreq;
+
+               if (is_funcclause(req->node))   /* be paranoid */
+               {
+                       List       *args = ((FuncExpr *) req->node)->args;
+                       Node       *arg1,
+                                          *arg2,
+                                          *arg3;
+
+                       /* We can use estimated argument values here */
+                       arg1 = estimate_expression_value(req->root, linitial(args));
+                       arg2 = estimate_expression_value(req->root, lsecond(args));
+                       if (list_length(args) >= 3)
+                               arg3 = estimate_expression_value(req->root, lthird(args));
+                       else
+                               arg3 = NULL;
+
+                       /*
+                        * If any argument is constant NULL, we can safely assume that
+                        * zero rows are returned.  Otherwise, if they're all non-NULL
+                        * constants, we can calculate the number of rows that will be
+                        * returned.  Use double arithmetic to avoid overflow hazards.
+                        */
+                       if ((IsA(arg1, Const) &&
+                                ((Const *) arg1)->constisnull) ||
+                               (IsA(arg2, Const) &&
+                                ((Const *) arg2)->constisnull) ||
+                               (arg3 != NULL && IsA(arg3, Const) &&
+                                ((Const *) arg3)->constisnull))
+                       {
+                               req->rows = 0;
+                               ret = (Node *) req;
+                       }
+                       else if (IsA(arg1, Const) &&
+                                        IsA(arg2, Const) &&
+                                        (arg3 == NULL || IsA(arg3, Const)))
+                       {
+                               double          start,
+                                                       finish,
+                                                       step;
+
+                               start = DatumGetInt64(((Const *) arg1)->constvalue);
+                               finish = DatumGetInt64(((Const *) arg2)->constvalue);
+                               step = arg3 ? DatumGetInt64(((Const *) arg3)->constvalue) : 1;
+
+                               /* This equation works for either sign of step */
+                               if (step != 0)
+                               {
+                                       req->rows = floor((finish - start + step) / step);
+                                       ret = (Node *) req;
+                               }
+                       }
+               }
+       }
+
+       PG_RETURN_POINTER(ret);
+}
index 74fafc64f3ec16d49c08aca3a3fed48d242fe0d8..1ef6faecd1eed7ea0c0f245dd9d7305f0790ea14 100644 (file)
@@ -1577,17 +1577,6 @@ boolvarsel(PlannerInfo *root, Node *arg, int varRelid)
                selec = var_eq_const(&vardata, BooleanEqualOperator,
                                                         BoolGetDatum(true), false, true, false);
        }
-       else if (is_funcclause(arg))
-       {
-               /*
-                * If we have no stats and it's a function call, estimate 0.3333333.
-                * This seems a pretty unprincipled choice, but Postgres has been
-                * using that estimate for function calls since 1992.  The hoariness
-                * of this behavior suggests that we should not be in too much hurry
-                * to use another value.
-                */
-               selec = 0.3333333;
-       }
        else
        {
                /* Otherwise, the default estimate is 0.5 */
@@ -3502,7 +3491,7 @@ estimate_num_groups(PlannerInfo *root, List *groupExprs, double input_rows,
                 * pointless to worry too much about this without much better
                 * estimates for SRF output rowcounts than we have today.)
                 */
-               this_srf_multiplier = expression_returns_set_rows(groupexpr);
+               this_srf_multiplier = expression_returns_set_rows(root, groupexpr);
                if (srf_multiplier < this_srf_multiplier)
                        srf_multiplier = this_srf_multiplier;
 
index fba0ee8b847b7ae1d7ec527f1dd6e46e004bbab6..e88c45d268a5cf933f0d59ccdc513c66f250f515 100644 (file)
@@ -1605,41 +1605,28 @@ get_func_leakproof(Oid funcid)
 }
 
 /*
- * get_func_cost
- *             Given procedure id, return the function's procost field.
- */
-float4
-get_func_cost(Oid funcid)
-{
-       HeapTuple       tp;
-       float4          result;
-
-       tp = SearchSysCache1(PROCOID, ObjectIdGetDatum(funcid));
-       if (!HeapTupleIsValid(tp))
-               elog(ERROR, "cache lookup failed for function %u", funcid);
-
-       result = ((Form_pg_proc) GETSTRUCT(tp))->procost;
-       ReleaseSysCache(tp);
-       return result;
-}
-
-/*
- * get_func_rows
- *             Given procedure id, return the function's prorows field.
+ * get_func_support
+ *
+ *             Returns the support function OID associated with a given function,
+ *             or InvalidOid if there is none.
  */
-float4
-get_func_rows(Oid funcid)
+RegProcedure
+get_func_support(Oid funcid)
 {
        HeapTuple       tp;
-       float4          result;
 
        tp = SearchSysCache1(PROCOID, ObjectIdGetDatum(funcid));
-       if (!HeapTupleIsValid(tp))
-               elog(ERROR, "cache lookup failed for function %u", funcid);
+       if (HeapTupleIsValid(tp))
+       {
+               Form_pg_proc functup = (Form_pg_proc) GETSTRUCT(tp);
+               RegProcedure result;
 
-       result = ((Form_pg_proc) GETSTRUCT(tp))->prorows;
-       ReleaseSysCache(tp);
-       return result;
+               result = functup->prosupport;
+               ReleaseSysCache(tp);
+               return result;
+       }
+       else
+               return (RegProcedure) InvalidOid;
 }
 
 /*                             ---------- RELATION CACHE ----------                                     */
index 9233fb934e8a0dc27c8d0599631173985e00cf76..968a6800ddc51e9084b8f5d8051ea64d5868af7b 100644 (file)
@@ -53,6 +53,6 @@
  */
 
 /*                                                     yyyymmddN */
-#define CATALOG_VERSION_NO     201902091
+#define CATALOG_VERSION_NO     201902092
 
 #endif
index 1f5352ce5439e904aad86849c5734a768843cd5c..50b742c06e60b213d0e1bb7f1cfa4295c1853d1d 100644 (file)
   proargtypes => 'anyelement _int4 _int4',
   prosrc => 'array_fill_with_lower_bounds' },
 { oid => '2331', descr => 'expand array to set of rows',
-  proname => 'unnest', prorows => '100', proretset => 't',
-  prorettype => 'anyelement', proargtypes => 'anyarray',
+  proname => 'unnest', prorows => '100', prosupport => 'array_unnest_support',
+  proretset => 't', prorettype => 'anyelement', proargtypes => 'anyarray',
   prosrc => 'array_unnest' },
+{ oid => '3996', descr => 'planner support for array_unnest',
+  proname => 'array_unnest_support', prorettype => 'internal',
+  proargtypes => 'internal', prosrc => 'array_unnest_support' },
 { oid => '3167',
   descr => 'remove any occurrences of an element from an array',
   proname => 'array_remove', proisstrict => 'f', prorettype => 'anyarray',
 
 # non-persistent series generator
 { oid => '1066', descr => 'non-persistent series generator',
-  proname => 'generate_series', prorows => '1000', proretset => 't',
+  proname => 'generate_series', prorows => '1000',
+  prosupport => 'generate_series_int4_support', proretset => 't',
   prorettype => 'int4', proargtypes => 'int4 int4 int4',
   prosrc => 'generate_series_step_int4' },
 { oid => '1067', descr => 'non-persistent series generator',
-  proname => 'generate_series', prorows => '1000', proretset => 't',
+  proname => 'generate_series', prorows => '1000',
+  prosupport => 'generate_series_int4_support', proretset => 't',
   prorettype => 'int4', proargtypes => 'int4 int4',
   prosrc => 'generate_series_int4' },
+{ oid => '3994', descr => 'planner support for generate_series',
+  proname => 'generate_series_int4_support', prorettype => 'internal',
+  proargtypes => 'internal', prosrc => 'generate_series_int4_support' },
 { oid => '1068', descr => 'non-persistent series generator',
-  proname => 'generate_series', prorows => '1000', proretset => 't',
+  proname => 'generate_series', prorows => '1000',
+  prosupport => 'generate_series_int8_support', proretset => 't',
   prorettype => 'int8', proargtypes => 'int8 int8 int8',
   prosrc => 'generate_series_step_int8' },
 { oid => '1069', descr => 'non-persistent series generator',
-  proname => 'generate_series', prorows => '1000', proretset => 't',
+  proname => 'generate_series', prorows => '1000',
+  prosupport => 'generate_series_int8_support', proretset => 't',
   prorettype => 'int8', proargtypes => 'int8 int8',
   prosrc => 'generate_series_int8' },
+{ oid => '3995', descr => 'planner support for generate_series',
+  proname => 'generate_series_int8_support', prorettype => 'internal',
+  proargtypes => 'internal', prosrc => 'generate_series_int8_support' },
 { oid => '3259', descr => 'non-persistent series generator',
   proname => 'generate_series', prorows => '1000', proretset => 't',
   prorettype => 'numeric', proargtypes => 'numeric numeric numeric',
index 0d2d1889e9d5baa427ecd13b73ff7e242624e9f8..453079a9e268caf7043cf4ae4689a2bbd18a3418 100644 (file)
@@ -507,7 +507,10 @@ typedef enum NodeTag
        T_TsmRoutine,                           /* in access/tsmapi.h */
        T_ForeignKeyCacheInfo,          /* in utils/rel.h */
        T_CallContext,                          /* in nodes/parsenodes.h */
-       T_SupportRequestSimplify        /* in nodes/supportnodes.h */
+       T_SupportRequestSimplify,       /* in nodes/supportnodes.h */
+       T_SupportRequestSelectivity,    /* in nodes/supportnodes.h */
+       T_SupportRequestCost,           /* in nodes/supportnodes.h */
+       T_SupportRequestRows            /* in nodes/supportnodes.h */
 } NodeTag;
 
 /*
index 0b780b07c1266a318dff8943079530d3ebabd41e..c23c4304f375660596e487668ff824c724f78b27 100644 (file)
@@ -61,7 +61,7 @@ typedef struct AggClauseCosts
        bool            hasNonPartial;  /* does any agg not support partial mode? */
        bool            hasNonSerial;   /* is any partial agg non-serializable? */
        QualCost        transCost;              /* total per-input-row execution costs */
-       Cost            finalCost;              /* total per-aggregated-row costs */
+       QualCost        finalCost;              /* total per-aggregated-row costs */
        Size            transitionSpace;        /* space for pass-by-ref transition data */
 } AggClauseCosts;
 
index 1f7d02b5ee2637419882580ae76d207477875761..1a3a36ba99ca0b50af474969e656c1ef972541a4 100644 (file)
@@ -36,6 +36,7 @@
 #include "nodes/primnodes.h"
 
 struct PlannerInfo;                            /* avoid including relation.h here */
+struct SpecialJoinInfo;
 
 
 /*
@@ -67,4 +68,103 @@ typedef struct SupportRequestSimplify
        FuncExpr   *fcall;                      /* Function call to be simplified */
 } SupportRequestSimplify;
 
+/*
+ * The Selectivity request allows the support function to provide a
+ * selectivity estimate for a function appearing at top level of a WHERE
+ * clause (so it applies only to functions returning boolean).
+ *
+ * The input arguments are the same as are supplied to operator restriction
+ * and join estimators, except that we unify those two APIs into just one
+ * request type.  See clause_selectivity() for the details.
+ *
+ * If an estimate can be made, store it into the "selectivity" field and
+ * return the address of the SupportRequestSelectivity node; the estimate
+ * must be between 0 and 1 inclusive.  Return NULL if no estimate can be
+ * made (in which case the planner will fall back to a default estimate,
+ * traditionally 1/3).
+ *
+ * If the target function is being used as the implementation of an operator,
+ * the support function will not be used for this purpose; the operator's
+ * restriction or join estimator is consulted instead.
+ */
+typedef struct SupportRequestSelectivity
+{
+       NodeTag         type;
+
+       /* Input fields: */
+       struct PlannerInfo *root;       /* Planner's infrastructure */
+       Oid                     funcid;                 /* function we are inquiring about */
+       List       *args;                       /* pre-simplified arguments to function */
+       Oid                     inputcollid;    /* function's input collation */
+       bool            is_join;                /* is this a join or restriction case? */
+       int                     varRelid;               /* if restriction, RTI of target relation */
+       JoinType        jointype;               /* if join, outer join type */
+       struct SpecialJoinInfo *sjinfo; /* if outer join, info about join */
+
+       /* Output fields: */
+       Selectivity selectivity;        /* returned selectivity estimate */
+} SupportRequestSelectivity;
+
+/*
+ * The Cost request allows the support function to provide an execution
+ * cost estimate for its target function.  The cost estimate can include
+ * both a one-time (query startup) component and a per-execution component.
+ * The estimate should *not* include the costs of evaluating the target
+ * function's arguments, only the target function itself.
+ *
+ * The "node" argument is normally the parse node that is invoking the
+ * target function.  This is a FuncExpr in the simplest case, but it could
+ * also be an OpExpr, DistinctExpr, NullIfExpr, or WindowFunc, or possibly
+ * other cases in future.  NULL is passed if the function cannot presume
+ * its arguments to be equivalent to what the calling node presents as
+ * arguments; that happens for, e.g., aggregate support functions and
+ * per-column comparison operators used by RowExprs.
+ *
+ * If an estimate can be made, store it into the cost fields and return the
+ * address of the SupportRequestCost node.  Return NULL if no estimate can be
+ * made, in which case the planner will rely on the target function's procost
+ * field.  (Note: while procost is automatically scaled by cpu_operator_cost,
+ * this is not the case for the outputs of the Cost request; the support
+ * function must scale its results appropriately on its own.)
+ */
+typedef struct SupportRequestCost
+{
+       NodeTag         type;
+
+       /* Input fields: */
+       struct PlannerInfo *root;       /* Planner's infrastructure (could be NULL) */
+       Oid                     funcid;                 /* function we are inquiring about */
+       Node       *node;                       /* parse node invoking function, or NULL */
+
+       /* Output fields: */
+       Cost            startup;                /* one-time cost */
+       Cost            per_tuple;              /* per-evaluation cost */
+} SupportRequestCost;
+
+/*
+ * The Rows request allows the support function to provide an output rowcount
+ * estimate for its target function (so it applies only to set-returning
+ * functions).
+ *
+ * The "node" argument is the parse node that is invoking the target function;
+ * currently this will always be a FuncExpr or OpExpr.
+ *
+ * If an estimate can be made, store it into the rows field and return the
+ * address of the SupportRequestRows node.  Return NULL if no estimate can be
+ * made, in which case the planner will rely on the target function's prorows
+ * field.
+ */
+typedef struct SupportRequestRows
+{
+       NodeTag         type;
+
+       /* Input fields: */
+       struct PlannerInfo *root;       /* Planner's infrastructure (could be NULL) */
+       Oid                     funcid;                 /* function we are inquiring about */
+       Node       *node;                       /* parse node invoking function */
+
+       /* Output fields: */
+       double          rows;                   /* number of rows expected to be returned */
+} SupportRequestRows;
+
 #endif                                                 /* SUPPORTNODES_H */
index 95a78cfa393be43847dc160ba1cd9d88c6ede39c..5e10fb1d5072ab0406b749df8f7555f3a6fc0809 100644 (file)
@@ -31,7 +31,7 @@ extern void get_agg_clause_costs(PlannerInfo *root, Node *clause,
 extern bool contain_window_function(Node *clause);
 extern WindowFuncLists *find_window_functions(Node *clause, Index maxWinRef);
 
-extern double expression_returns_set_rows(Node *clause);
+extern double expression_returns_set_rows(PlannerInfo *root, Node *clause);
 
 extern bool contain_subplans(Node *clause);
 
index 40f70f9f2b1faafeb0a5ff34646750d550f358fe..c337f047cb753b2dfbc57378f46b7958aeaa3dc6 100644 (file)
@@ -55,6 +55,20 @@ extern Selectivity join_selectivity(PlannerInfo *root,
                                 JoinType jointype,
                                 SpecialJoinInfo *sjinfo);
 
+extern Selectivity function_selectivity(PlannerInfo *root,
+                                        Oid funcid,
+                                        List *args,
+                                        Oid inputcollid,
+                                        bool is_join,
+                                        int varRelid,
+                                        JoinType jointype,
+                                        SpecialJoinInfo *sjinfo);
+
+extern void add_function_cost(PlannerInfo *root, Oid funcid, Node *node,
+                                 QualCost *cost);
+
+extern double get_function_rows(PlannerInfo *root, Oid funcid, Node *node);
+
 extern bool has_row_triggers(PlannerInfo *root, Index rti, CmdType event);
 
 #endif                                                 /* PLANCAT_H */
index ceec85db9254b1bf942b8a2d5362e150cf0d96e4..16b0b1d2dccaee89002974f42744514c9d3d5f31 100644 (file)
@@ -120,8 +120,7 @@ extern char func_volatile(Oid funcid);
 extern char func_parallel(Oid funcid);
 extern char get_func_prokind(Oid funcid);
 extern bool get_func_leakproof(Oid funcid);
-extern float4 get_func_cost(Oid funcid);
-extern float4 get_func_rows(Oid funcid);
+extern RegProcedure get_func_support(Oid funcid);
 extern Oid     get_relname_relid(const char *relname, Oid relnamespace);
 extern char *get_rel_name(Oid relid);
 extern Oid     get_rel_namespace(Oid relid);
index 130a0e4be3abf5d2ca3f7500fbb5b33305804f91..0879c885eb33e13f42612ac0fc0a22a675a5c429 100644 (file)
@@ -133,3 +133,63 @@ ERROR:  function num_nulls() does not exist
 LINE 1: SELECT num_nulls();
                ^
 HINT:  No function matches the given name and argument types. You might need to add explicit type casts.
+--
+-- Test adding a support function to a subject function
+--
+CREATE FUNCTION my_int_eq(int, int) RETURNS bool
+  LANGUAGE internal STRICT IMMUTABLE PARALLEL SAFE
+  AS $$int4eq$$;
+-- By default, planner does not think that's selective
+EXPLAIN (COSTS OFF)
+SELECT * FROM tenk1 a JOIN tenk1 b ON a.unique1 = b.unique1
+WHERE my_int_eq(a.unique2, 42);
+                  QUERY PLAN                  
+----------------------------------------------
+ Hash Join
+   Hash Cond: (b.unique1 = a.unique1)
+   ->  Seq Scan on tenk1 b
+   ->  Hash
+         ->  Seq Scan on tenk1 a
+               Filter: my_int_eq(unique2, 42)
+(6 rows)
+
+-- With support function that knows it's int4eq, we get a different plan
+ALTER FUNCTION my_int_eq(int, int) SUPPORT test_support_func;
+EXPLAIN (COSTS OFF)
+SELECT * FROM tenk1 a JOIN tenk1 b ON a.unique1 = b.unique1
+WHERE my_int_eq(a.unique2, 42);
+                   QUERY PLAN                    
+-------------------------------------------------
+ Nested Loop
+   ->  Seq Scan on tenk1 a
+         Filter: my_int_eq(unique2, 42)
+   ->  Index Scan using tenk1_unique1 on tenk1 b
+         Index Cond: (unique1 = a.unique1)
+(5 rows)
+
+-- Also test non-default rowcount estimate
+CREATE FUNCTION my_gen_series(int, int) RETURNS SETOF integer
+  LANGUAGE internal STRICT IMMUTABLE PARALLEL SAFE
+  AS $$generate_series_int4$$
+  SUPPORT test_support_func;
+EXPLAIN (COSTS OFF)
+SELECT * FROM tenk1 a JOIN my_gen_series(1,1000) g ON a.unique1 = g;
+               QUERY PLAN               
+----------------------------------------
+ Hash Join
+   Hash Cond: (g.g = a.unique1)
+   ->  Function Scan on my_gen_series g
+   ->  Hash
+         ->  Seq Scan on tenk1 a
+(5 rows)
+
+EXPLAIN (COSTS OFF)
+SELECT * FROM tenk1 a JOIN my_gen_series(1,10) g ON a.unique1 = g;
+                   QUERY PLAN                    
+-------------------------------------------------
+ Nested Loop
+   ->  Function Scan on my_gen_series g
+   ->  Index Scan using tenk1_unique1 on tenk1 a
+         Index Cond: (unique1 = g.g)
+(4 rows)
+
index a54b4a5a7c8154d49e09186f2ab5e7d1963fd4f0..6e238e88b37956e0688bae07c1c51f9fb3d6ade3 100644 (file)
@@ -904,7 +904,7 @@ select * from int4_tbl where
 --
 explain (verbose, costs off)
 select * from int4_tbl o where (f1, f1) in
-  (select f1, generate_series(1,2) / 10 g from int4_tbl i group by f1);
+  (select f1, generate_series(1,50) / 10 g from int4_tbl i group by f1);
                             QUERY PLAN                             
 -------------------------------------------------------------------
  Nested Loop Semi Join
@@ -918,9 +918,9 @@ select * from int4_tbl o where (f1, f1) in
                Output: "ANY_subquery".f1, "ANY_subquery".g
                Filter: ("ANY_subquery".f1 = "ANY_subquery".g)
                ->  Result
-                     Output: i.f1, ((generate_series(1, 2)) / 10)
+                     Output: i.f1, ((generate_series(1, 50)) / 10)
                      ->  ProjectSet
-                           Output: generate_series(1, 2), i.f1
+                           Output: generate_series(1, 50), i.f1
                            ->  HashAggregate
                                  Output: i.f1
                                  Group Key: i.f1
@@ -929,7 +929,7 @@ select * from int4_tbl o where (f1, f1) in
 (19 rows)
 
 select * from int4_tbl o where (f1, f1) in
-  (select f1, generate_series(1,2) / 10 g from int4_tbl i group by f1);
+  (select f1, generate_series(1,50) / 10 g from int4_tbl i group by f1);
  f1 
 ----
   0
index 26e2227d3afed45cd28622b424a2b77643d90375..223454a5eabc6fcfcf960a1654e86906d5325fe5 100644 (file)
@@ -68,6 +68,11 @@ CREATE FUNCTION test_fdw_handler()
     AS '@libdir@/regress@DLSUFFIX@', 'test_fdw_handler'
     LANGUAGE C;
 
+CREATE FUNCTION test_support_func(internal)
+    RETURNS internal
+    AS '@libdir@/regress@DLSUFFIX@', 'test_support_func'
+    LANGUAGE C STRICT;
+
 -- Things that shouldn't work:
 
 CREATE FUNCTION test1 (int) RETURNS int LANGUAGE SQL
index 8c50d9b30992b347f49165d41c9c5b5012fd1afc..5f43e8de81ffea50061742695eefa83ba1fcbb4e 100644 (file)
@@ -60,6 +60,10 @@ CREATE FUNCTION test_fdw_handler()
     RETURNS fdw_handler
     AS '@libdir@/regress@DLSUFFIX@', 'test_fdw_handler'
     LANGUAGE C;
+CREATE FUNCTION test_support_func(internal)
+    RETURNS internal
+    AS '@libdir@/regress@DLSUFFIX@', 'test_support_func'
+    LANGUAGE C STRICT;
 -- Things that shouldn't work:
 CREATE FUNCTION test1 (int) RETURNS int LANGUAGE SQL
     AS 'SELECT ''not an integer'';';
index 70727286ca5616f2efd214233ec951fc2515432b..ad3e8038993e3f8c6baa016520db412755ff7979 100644 (file)
 #include "access/transam.h"
 #include "access/tuptoaster.h"
 #include "access/xact.h"
+#include "catalog/pg_operator.h"
 #include "catalog/pg_type.h"
 #include "commands/sequence.h"
 #include "commands/trigger.h"
 #include "executor/executor.h"
 #include "executor/spi.h"
 #include "miscadmin.h"
+#include "nodes/supportnodes.h"
+#include "optimizer/optimizer.h"
+#include "optimizer/plancat.h"
 #include "port/atomics.h"
 #include "utils/builtins.h"
 #include "utils/geo_decls.h"
@@ -863,3 +867,76 @@ test_fdw_handler(PG_FUNCTION_ARGS)
        elog(ERROR, "test_fdw_handler is not implemented");
        PG_RETURN_NULL();
 }
+
+PG_FUNCTION_INFO_V1(test_support_func);
+Datum
+test_support_func(PG_FUNCTION_ARGS)
+{
+       Node       *rawreq = (Node *) PG_GETARG_POINTER(0);
+       Node       *ret = NULL;
+
+       if (IsA(rawreq, SupportRequestSelectivity))
+       {
+               /*
+                * Assume that the target is int4eq; that's safe as long as we don't
+                * attach this to any other boolean-returning function.
+                */
+               SupportRequestSelectivity *req = (SupportRequestSelectivity *) rawreq;
+               Selectivity s1;
+
+               if (req->is_join)
+                       s1 = join_selectivity(req->root, Int4EqualOperator,
+                                                                 req->args,
+                                                                 req->inputcollid,
+                                                                 req->jointype,
+                                                                 req->sjinfo);
+               else
+                       s1 = restriction_selectivity(req->root, Int4EqualOperator,
+                                                                                req->args,
+                                                                                req->inputcollid,
+                                                                                req->varRelid);
+
+               req->selectivity = s1;
+               ret = (Node *) req;
+       }
+
+       if (IsA(rawreq, SupportRequestCost))
+       {
+               /* Provide some generic estimate */
+               SupportRequestCost *req = (SupportRequestCost *) rawreq;
+
+               req->startup = 0;
+               req->per_tuple = 2 * cpu_operator_cost;
+               ret = (Node *) req;
+       }
+
+       if (IsA(rawreq, SupportRequestRows))
+       {
+               /*
+                * Assume that the target is generate_series_int4; that's safe as long
+                * as we don't attach this to any other set-returning function.
+                */
+               SupportRequestRows *req = (SupportRequestRows *) rawreq;
+
+               if (req->node && IsA(req->node, FuncExpr))      /* be paranoid */
+               {
+                       List       *args = ((FuncExpr *) req->node)->args;
+                       Node       *arg1 = linitial(args);
+                       Node       *arg2 = lsecond(args);
+
+                       if (IsA(arg1, Const) &&
+                               !((Const *) arg1)->constisnull &&
+                               IsA(arg2, Const) &&
+                               !((Const *) arg2)->constisnull)
+                       {
+                               int32           val1 = DatumGetInt32(((Const *) arg1)->constvalue);
+                               int32           val2 = DatumGetInt32(((Const *) arg2)->constvalue);
+
+                               req->rows = val2 - val1 + 1;
+                               ret = (Node *) req;
+                       }
+               }
+       }
+
+       PG_RETURN_POINTER(ret);
+}
index 1a20c1f76527f9b06ea86f839bc04fa4382a9cec..7a71f7659ce85499c2c0914ca97e1ef1a83e6c64 100644 (file)
@@ -29,3 +29,35 @@ SELECT num_nulls(VARIADIC '{}'::int[]);
 -- should fail, one or more arguments is required
 SELECT num_nonnulls();
 SELECT num_nulls();
+
+--
+-- Test adding a support function to a subject function
+--
+
+CREATE FUNCTION my_int_eq(int, int) RETURNS bool
+  LANGUAGE internal STRICT IMMUTABLE PARALLEL SAFE
+  AS $$int4eq$$;
+
+-- By default, planner does not think that's selective
+EXPLAIN (COSTS OFF)
+SELECT * FROM tenk1 a JOIN tenk1 b ON a.unique1 = b.unique1
+WHERE my_int_eq(a.unique2, 42);
+
+-- With support function that knows it's int4eq, we get a different plan
+ALTER FUNCTION my_int_eq(int, int) SUPPORT test_support_func;
+
+EXPLAIN (COSTS OFF)
+SELECT * FROM tenk1 a JOIN tenk1 b ON a.unique1 = b.unique1
+WHERE my_int_eq(a.unique2, 42);
+
+-- Also test non-default rowcount estimate
+CREATE FUNCTION my_gen_series(int, int) RETURNS SETOF integer
+  LANGUAGE internal STRICT IMMUTABLE PARALLEL SAFE
+  AS $$generate_series_int4$$
+  SUPPORT test_support_func;
+
+EXPLAIN (COSTS OFF)
+SELECT * FROM tenk1 a JOIN my_gen_series(1,1000) g ON a.unique1 = g;
+
+EXPLAIN (COSTS OFF)
+SELECT * FROM tenk1 a JOIN my_gen_series(1,10) g ON a.unique1 = g;
index 843f511b3dc92201f26b2f4386a64601680df948..ccbe8a1df5ddf4fb1c262680b71d2969888f67e7 100644 (file)
@@ -498,9 +498,9 @@ select * from int4_tbl where
 --
 explain (verbose, costs off)
 select * from int4_tbl o where (f1, f1) in
-  (select f1, generate_series(1,2) / 10 g from int4_tbl i group by f1);
+  (select f1, generate_series(1,50) / 10 g from int4_tbl i group by f1);
 select * from int4_tbl o where (f1, f1) in
-  (select f1, generate_series(1,2) / 10 g from int4_tbl i group by f1);
+  (select f1, generate_series(1,50) / 10 g from int4_tbl i group by f1);
 
 --
 -- check for over-optimization of whole-row Var referencing an Append plan