Allow FDWs and custom scan providers to replace joins with scans.
authorRobert Haas <rhaas@postgresql.org>
Fri, 1 May 2015 12:50:35 +0000 (08:50 -0400)
committerRobert Haas <rhaas@postgresql.org>
Fri, 1 May 2015 12:50:35 +0000 (08:50 -0400)
Foreign data wrappers can use this capability for so-called "join
pushdown"; that is, instead of executing two separate foreign scans
and then joining the results locally, they can generate a path which
performs the join on the remote server and then is scanned locally.
This commit does not extend postgres_fdw to take advantage of this
capability; it just provides the infrastructure.

Custom scan providers can use this in a similar way.  Previously,
it was only possible for a custom scan provider to scan a single
relation.  Now, it can scan an entire join tree, provided of course
that it knows how to produce the same results that the join would
have produced if executed normally.

KaiGai Kohei, reviewed by Shigeru Hanada, Ashutosh Bapat, and me.

20 files changed:
doc/src/sgml/custom-scan.sgml
doc/src/sgml/fdwhandler.sgml
src/backend/commands/explain.c
src/backend/executor/execScan.c
src/backend/executor/nodeCustom.c
src/backend/executor/nodeForeignscan.c
src/backend/foreign/foreign.c
src/backend/nodes/copyfuncs.c
src/backend/nodes/outfuncs.c
src/backend/optimizer/path/joinpath.c
src/backend/optimizer/plan/createplan.c
src/backend/optimizer/plan/setrefs.c
src/backend/optimizer/util/plancat.c
src/backend/optimizer/util/relnode.c
src/backend/utils/adt/ruleutils.c
src/include/foreign/fdwapi.h
src/include/nodes/plannodes.h
src/include/nodes/relation.h
src/include/optimizer/paths.h
src/include/optimizer/planmain.h

index 8a4a3dfcfeb7a1cf5d4f665b2907793b7046d1d3..9fd1db6fde48643feeaa92847f4c8c9150790965 100644 (file)
@@ -81,6 +81,28 @@ typedef struct CustomPath
     detailed below.
   </para>
 
+  <para>
+   A custom scan provider can also add join paths; in this case, the scan
+   must produce the same output as would normally be produced by the join
+   it replaces.  To do this, the join provider should set the following hook.
+   This hook may be invoked repeatedly for the same pair of relations, with
+   different combinations of inner and outer relations; it is the
+   responsibility of the hook to minimize duplicated work.
+<programlisting>
+typedef void (*set_join_pathlist_hook_type) (PlannerInfo *root,
+                                             RelOptInfo *joinrel,
+                                             RelOptInfo *outerrel,
+                                             RelOptInfo *innerrel,
+                                             List *restrictlist,
+                                             JoinType jointype,
+                                             SpecialJoinInfo *sjinfo,
+                                             SemiAntiJoinFactors *semifactors,
+                                             Relids param_source_rels,
+                                             Relids extra_lateral_rels);
+extern PGDLLIMPORT set_join_pathlist_hook_type set_join_pathlist_hook;
+</programlisting>
+  </para>
+
   <sect2 id="custom-scan-path-callbacks">
   <title>Custom Path Callbacks</title>
 
@@ -124,7 +146,9 @@ typedef struct CustomScan
     Scan      scan;
     uint32    flags;
     List     *custom_exprs;
+    List     *custom_ps_tlist;
     List     *custom_private;
+    List     *custom_relids;
     const CustomScanMethods *methods;
 } CustomScan;
 </programlisting>
@@ -141,11 +165,27 @@ typedef struct CustomScan
     is only used by the custom scan provider itself.  Plan trees must be able
     to be duplicated using <function>copyObject</>, so all the data stored
     within these two fields must consist of nodes that function can handle.
+    <literal>custom_relids</> is set by the core code to the set of relations
+    which this scan node must handle; except when this scan is replacing a
+    join, it will have only one member.
     <structfield>methods</> must point to a (usually statically allocated)
     object implementing the required custom scan methods, which are further
     detailed below.
   </para>
 
+  <para>
+   When a <structname>CustomScan</> scans a single relation,
+   <structfield>scan.scanrelid</> should be the range table index of the table
+   to be scanned, and <structfield>custom_ps_tlist</> should be
+   <literal>NULL</>.  When it replaces a join, <structfield>scan.scanrelid</>
+   should be zero, and <structfield>custom_ps_tlist</> should be a list of
+   <structname>TargetEntry</> nodes.  This is necessary because, when a join
+   is replaced, the target list cannot be constructed from the table
+   definition.  At execution time, this list will be used to initialize the
+   tuple descriptor of the <structname>TupleTableSlot</>.  It will also be
+   used by <command>EXPLAIN</>, when deparsing.
+  </para>
+
   <sect2 id="custom-scan-plan-callbacks">
    <title>Custom Scan Callbacks</title>
    <para>
index 5af41318e5c17efe66e321af9514933b8d0686c0..04f3c224331216ad47578eca5eb3ba8378d05e68 100644 (file)
@@ -598,6 +598,42 @@ IsForeignRelUpdatable (Relation rel);
 
    </sect2>
 
+   <sect2>
+    <title>FDW Routines For Remote Joins</title>
+    <para>
+<programlisting>
+void
+GetForeignJoinPaths(PlannerInfo *root,
+                    RelOptInfo *joinrel,
+                    RelOptInfo *outerrel,
+                    RelOptInfo *innerrel,
+                    List *restrictlist,
+                    JoinType jointype,
+                    SpecialJoinInfo *sjinfo,
+                    SemiAntiJoinFactors *semifactors,
+                    Relids param_source_rels,
+                    Relids extra_lateral_rels);
+</programlisting>
+     Create possible access paths for a join of two foreign tables managed
+     by the same foreign data wrapper.
+     This optional function is called during query planning.
+    </para>
+    <para>
+     This function the FDW to add <structname>ForeignScan</> paths for the
+     supplied <literal>joinrel</>.  Typically, the FDW will send the whole
+     join to the remote server as a single query, as performing the join
+     remotely rather than locally is typically much more efficient.
+    </para>
+    <para>
+     Since we cannot construct the slot descriptor for a remote join from
+     the catalogs, the FDW should set the <structfield>scanrelid</> of the
+     <structname>ForeignScan</> to zero and <structfield>fdw_ps_tlist</>
+     to an appropriate list of <structfield>TargetEntry</> nodes.
+     Junk entries will be ignored, but can be present for the benefit of
+     deparsing performed by <command>EXPLAIN</>.
+    </para>
+   </sect2>
+
    <sect2 id="fdw-callbacks-explain">
     <title>FDW Routines for <command>EXPLAIN</></title>
 
index 315a52849c9494afd3d554639e85b7f765a4d28a..f4cc90183a49ea5642c200f304ae4cd98a12beb7 100644 (file)
@@ -730,11 +730,17 @@ ExplainPreScanNode(PlanState *planstate, Bitmapset **rels_used)
                case T_ValuesScan:
                case T_CteScan:
                case T_WorkTableScan:
-               case T_ForeignScan:
-               case T_CustomScan:
                        *rels_used = bms_add_member(*rels_used,
                                                                                ((Scan *) plan)->scanrelid);
                        break;
+               case T_ForeignScan:
+                       *rels_used = bms_add_members(*rels_used,
+                                                                                ((ForeignScan *) plan)->fdw_relids);
+                       break;
+               case T_CustomScan:
+                       *rels_used = bms_add_members(*rels_used,
+                                                                                ((CustomScan *) plan)->custom_relids);
+                       break;
                case T_ModifyTable:
                        *rels_used = bms_add_member(*rels_used,
                                                                        ((ModifyTable *) plan)->nominalRelation);
@@ -1072,9 +1078,12 @@ ExplainNode(PlanState *planstate, List *ancestors,
                case T_ValuesScan:
                case T_CteScan:
                case T_WorkTableScan:
+                       ExplainScanTarget((Scan *) plan, es);
+                       break;
                case T_ForeignScan:
                case T_CustomScan:
-                       ExplainScanTarget((Scan *) plan, es);
+                       if (((Scan *) plan)->scanrelid > 0)
+                               ExplainScanTarget((Scan *) plan, es);
                        break;
                case T_IndexScan:
                        {
index 3f0d809387581f69a8e31d14450d536d0f69fe48..fa475014f134d527498c5481573c788678a4f0b5 100644 (file)
@@ -251,6 +251,12 @@ ExecAssignScanProjectionInfo(ScanState *node)
        /* Vars in an index-only scan's tlist should be INDEX_VAR */
        if (IsA(scan, IndexOnlyScan))
                varno = INDEX_VAR;
+       /* Also foreign or custom scan on pseudo relation should be INDEX_VAR */
+       else if (scan->scanrelid == 0)
+       {
+               Assert(IsA(scan, ForeignScan) || IsA(scan, CustomScan));
+               varno = INDEX_VAR;
+       }
        else
                varno = scan->scanrelid;
 
index b07932b32e56b6edff356e164034ba0c5240e0dd..db1b4f2ffa4dbaffcff031277815d5b77c31014b 100644 (file)
@@ -23,7 +23,7 @@ CustomScanState *
 ExecInitCustomScan(CustomScan *cscan, EState *estate, int eflags)
 {
        CustomScanState    *css;
-       Relation                        scan_rel;
+       Index                           scan_relid = cscan->scan.scanrelid;
 
        /* populate a CustomScanState according to the CustomScan */
        css = (CustomScanState *) cscan->methods->CreateCustomScanState(cscan);
@@ -48,12 +48,26 @@ ExecInitCustomScan(CustomScan *cscan, EState *estate, int eflags)
        ExecInitScanTupleSlot(estate, &css->ss);
        ExecInitResultTupleSlot(estate, &css->ss.ps);
 
-       /* initialize scan relation */
-       scan_rel = ExecOpenScanRelation(estate, cscan->scan.scanrelid, eflags);
-       css->ss.ss_currentRelation = scan_rel;
-       css->ss.ss_currentScanDesc = NULL;      /* set by provider */
-       ExecAssignScanType(&css->ss, RelationGetDescr(scan_rel));
-
+       /*
+        * open the base relation and acquire an appropriate lock on it;
+        * also, get and assign the scan type
+        */
+       if (scan_relid > 0)
+       {
+               Relation                scan_rel;
+
+               scan_rel = ExecOpenScanRelation(estate, scan_relid, eflags);
+               css->ss.ss_currentRelation = scan_rel;
+               css->ss.ss_currentScanDesc = NULL;      /* set by provider */
+               ExecAssignScanType(&css->ss, RelationGetDescr(scan_rel));
+       }
+       else
+       {
+               TupleDesc       ps_tupdesc;
+
+               ps_tupdesc = ExecCleanTypeFromTL(cscan->custom_ps_tlist, false);
+               ExecAssignScanType(&css->ss, ps_tupdesc);
+       }
        css->ss.ps.ps_TupFromTlist = false;
 
        /*
@@ -89,11 +103,11 @@ ExecEndCustomScan(CustomScanState *node)
 
        /* Clean out the tuple table */
        ExecClearTuple(node->ss.ps.ps_ResultTupleSlot);
-       if (node->ss.ss_ScanTupleSlot)
-               ExecClearTuple(node->ss.ss_ScanTupleSlot);
+       ExecClearTuple(node->ss.ss_ScanTupleSlot);
 
        /* Close the heap relation */
-       ExecCloseScanRelation(node->ss.ss_currentRelation);
+       if (node->ss.ss_currentRelation)
+               ExecCloseScanRelation(node->ss.ss_currentRelation);
 }
 
 void
index 7399053ae7d280d0821e4a0ac132dc6c0705486f..fa553ace5d687960f4aefc5124834397041018ca 100644 (file)
@@ -102,7 +102,7 @@ ForeignScanState *
 ExecInitForeignScan(ForeignScan *node, EState *estate, int eflags)
 {
        ForeignScanState *scanstate;
-       Relation        currentRelation;
+       Index           scanrelid = node->scan.scanrelid;
        FdwRoutine *fdwroutine;
 
        /* check for unsupported flags */
@@ -141,16 +141,24 @@ ExecInitForeignScan(ForeignScan *node, EState *estate, int eflags)
        ExecInitScanTupleSlot(estate, &scanstate->ss);
 
        /*
-        * open the base relation and acquire appropriate lock on it.
+        * open the base relation and acquire an appropriate lock on it;
+        * also, get and assign the scan type
         */
-       currentRelation = ExecOpenScanRelation(estate, node->scan.scanrelid, eflags);
-       scanstate->ss.ss_currentRelation = currentRelation;
+       if (scanrelid > 0)
+       {
+               Relation        currentRelation;
 
-       /*
-        * get the scan type from the relation descriptor.  (XXX at some point we
-        * might want to let the FDW editorialize on the scan tupdesc.)
-        */
-       ExecAssignScanType(&scanstate->ss, RelationGetDescr(currentRelation));
+               currentRelation = ExecOpenScanRelation(estate, scanrelid, eflags);
+               scanstate->ss.ss_currentRelation = currentRelation;
+               ExecAssignScanType(&scanstate->ss, RelationGetDescr(currentRelation));
+       }
+       else
+       {
+               TupleDesc       ps_tupdesc;
+
+               ps_tupdesc = ExecCleanTypeFromTL(node->fdw_ps_tlist, false);
+               ExecAssignScanType(&scanstate->ss, ps_tupdesc);
+       }
 
        /*
         * Initialize result tuple type and projection info.
@@ -161,7 +169,7 @@ ExecInitForeignScan(ForeignScan *node, EState *estate, int eflags)
        /*
         * Acquire function pointers from the FDW's handler, and init fdw_state.
         */
-       fdwroutine = GetFdwRoutineForRelation(currentRelation, true);
+       fdwroutine = GetFdwRoutine(node->fdw_handler);
        scanstate->fdwroutine = fdwroutine;
        scanstate->fdw_state = NULL;
 
@@ -193,7 +201,8 @@ ExecEndForeignScan(ForeignScanState *node)
        ExecClearTuple(node->ss.ss_ScanTupleSlot);
 
        /* close the relation. */
-       ExecCloseScanRelation(node->ss.ss_currentRelation);
+       if (node->ss.ss_currentRelation)
+               ExecCloseScanRelation(node->ss.ss_currentRelation);
 }
 
 /* ----------------------------------------------------------------
index cbe8b78be9e2d3c1f9a3b72bd03ea9fd84f12982..cdbd550fd4363a17d394df04018b1724682a9175 100644 (file)
@@ -304,11 +304,11 @@ GetFdwRoutine(Oid fdwhandler)
 
 
 /*
- * GetFdwRoutineByRelId - look up the handler of the foreign-data wrapper
- * for the given foreign table, and retrieve its FdwRoutine struct.
+ * GetFdwHandlerByRelId - look up the handler of the foreign-data wrapper
+ * for the given foreign table
  */
-FdwRoutine *
-GetFdwRoutineByRelId(Oid relid)
+Oid
+GetFdwHandlerByRelId(Oid relid)
 {
        HeapTuple       tp;
        Form_pg_foreign_data_wrapper fdwform;
@@ -350,7 +350,18 @@ GetFdwRoutineByRelId(Oid relid)
 
        ReleaseSysCache(tp);
 
-       /* And finally, call the handler function. */
+       return fdwhandler;
+}
+
+/*
+ * GetFdwRoutineByRelId - look up the handler of the foreign-data wrapper
+ * for the given foreign table, and retrieve its FdwRoutine struct.
+ */
+FdwRoutine *
+GetFdwRoutineByRelId(Oid relid)
+{
+       Oid                     fdwhandler = GetFdwHandlerByRelId(relid);
+
        return GetFdwRoutine(fdwhandler);
 }
 
index 1685efe254ba5ff56c3ae7df3c7144e440278021..805045d15e62e9aceb1b49c4154940116217d1a4 100644 (file)
@@ -592,8 +592,11 @@ _copyForeignScan(const ForeignScan *from)
        /*
         * copy remainder of node
         */
+       COPY_SCALAR_FIELD(fdw_handler);
        COPY_NODE_FIELD(fdw_exprs);
+       COPY_NODE_FIELD(fdw_ps_tlist);
        COPY_NODE_FIELD(fdw_private);
+       COPY_BITMAPSET_FIELD(fdw_relids);
        COPY_SCALAR_FIELD(fsSystemCol);
 
        return newnode;
@@ -617,7 +620,9 @@ _copyCustomScan(const CustomScan *from)
         */
        COPY_SCALAR_FIELD(flags);
        COPY_NODE_FIELD(custom_exprs);
+       COPY_NODE_FIELD(custom_ps_tlist);
        COPY_NODE_FIELD(custom_private);
+       COPY_BITMAPSET_FIELD(custom_relids);
 
        /*
         * NOTE: The method field of CustomScan is required to be a pointer to a
index e0dca56ea6cfaefc1ad8ee4d7d8036895a1e8f2f..f9f948e39bbe9b590a9583f5b14c775455990cd9 100644 (file)
@@ -558,8 +558,11 @@ _outForeignScan(StringInfo str, const ForeignScan *node)
 
        _outScanInfo(str, (const Scan *) node);
 
+       WRITE_OID_FIELD(fdw_handler);
        WRITE_NODE_FIELD(fdw_exprs);
+       WRITE_NODE_FIELD(fdw_ps_tlist);
        WRITE_NODE_FIELD(fdw_private);
+       WRITE_BITMAPSET_FIELD(fdw_relids);
        WRITE_BOOL_FIELD(fsSystemCol);
 }
 
@@ -572,7 +575,9 @@ _outCustomScan(StringInfo str, const CustomScan *node)
 
        WRITE_UINT_FIELD(flags);
        WRITE_NODE_FIELD(custom_exprs);
+       WRITE_NODE_FIELD(custom_ps_tlist);
        WRITE_NODE_FIELD(custom_private);
+       WRITE_BITMAPSET_FIELD(custom_relids);
        appendStringInfoString(str, " :methods ");
        _outToken(str, node->methods->CustomName);
        if (node->methods->TextOutCustomScan)
index 1da953f6d335d89729f48db3423473016333eb56..dabef3c3c7fe4ff5df5b3c3c67c15cdd13186d1e 100644 (file)
 #include <math.h>
 
 #include "executor/executor.h"
+#include "foreign/fdwapi.h"
 #include "optimizer/cost.h"
 #include "optimizer/pathnode.h"
 #include "optimizer/paths.h"
 
+/* Hook for plugins to get control in add_paths_to_joinrel() */
+set_join_pathlist_hook_type set_join_pathlist_hook = NULL;
 
 #define PATH_PARAM_BY_REL(path, rel)  \
        ((path)->param_info && bms_overlap(PATH_REQ_OUTER(path), (rel)->relids))
@@ -260,6 +263,27 @@ add_paths_to_joinrel(PlannerInfo *root,
                                                         restrictlist, jointype,
                                                         sjinfo, &semifactors,
                                                         param_source_rels, extra_lateral_rels);
+
+       /*
+        * 5. If both inner and outer relations are managed by the same FDW,
+        * give it a chance to push down joins.
+        */
+       if (joinrel->fdwroutine &&
+               joinrel->fdwroutine->GetForeignJoinPaths)
+               joinrel->fdwroutine->GetForeignJoinPaths(root, joinrel,
+                                                                                                outerrel, innerrel,
+                                                                                                restrictlist, jointype, sjinfo,
+                                                                                                &semifactors,
+                                                                                                param_source_rels,
+                                                                                                extra_lateral_rels);
+       /*
+        * 6. Finally, give extensions a chance to manipulate the path list.
+        */
+       if (set_join_pathlist_hook)
+               set_join_pathlist_hook(root, joinrel, outerrel, innerrel,
+                                                          restrictlist, jointype,
+                                                          sjinfo, &semifactors,
+                                                          param_source_rels, extra_lateral_rels);
 }
 
 /*
index cb69c03df000887276d52c37e64fa15e2bad9bf7..eeb2a4176431b59a839cabd406161ce8960da630 100644 (file)
@@ -44,7 +44,6 @@
 #include "utils/lsyscache.h"
 
 
-static Plan *create_plan_recurse(PlannerInfo *root, Path *best_path);
 static Plan *create_scan_plan(PlannerInfo *root, Path *best_path);
 static List *build_path_tlist(PlannerInfo *root, Path *path);
 static bool use_physical_tlist(PlannerInfo *root, RelOptInfo *rel);
@@ -220,7 +219,7 @@ create_plan(PlannerInfo *root, Path *best_path)
  * create_plan_recurse
  *       Recursive guts of create_plan().
  */
-static Plan *
+Plan *
 create_plan_recurse(PlannerInfo *root, Path *best_path)
 {
        Plan       *plan;
@@ -1961,16 +1960,25 @@ create_foreignscan_plan(PlannerInfo *root, ForeignPath *best_path,
        ForeignScan *scan_plan;
        RelOptInfo *rel = best_path->path.parent;
        Index           scan_relid = rel->relid;
-       RangeTblEntry *rte;
+       Oid                     rel_oid = InvalidOid;
        Bitmapset  *attrs_used = NULL;
        ListCell   *lc;
        int                     i;
 
-       /* it should be a base rel... */
-       Assert(scan_relid > 0);
-       Assert(rel->rtekind == RTE_RELATION);
-       rte = planner_rt_fetch(scan_relid, root);
-       Assert(rte->rtekind == RTE_RELATION);
+       /*
+        * If we're scanning a base relation, look up the OID.
+        * (We can skip this if scanning a join relation.)
+        */
+       if (scan_relid > 0)
+       {
+               RangeTblEntry *rte;
+
+               Assert(rel->rtekind == RTE_RELATION);
+               rte = planner_rt_fetch(scan_relid, root);
+               Assert(rte->rtekind == RTE_RELATION);
+               rel_oid = rte->relid;
+       }
+       Assert(rel->fdwroutine != NULL);
 
        /*
         * Sort clauses into best execution order.  We do this first since the FDW
@@ -1985,13 +1993,39 @@ create_foreignscan_plan(PlannerInfo *root, ForeignPath *best_path,
         * has selected some join clauses for remote use but also wants them
         * rechecked locally).
         */
-       scan_plan = rel->fdwroutine->GetForeignPlan(root, rel, rte->relid,
+       scan_plan = rel->fdwroutine->GetForeignPlan(root, rel, rel_oid,
                                                                                                best_path,
                                                                                                tlist, scan_clauses);
+       /*
+        * Sanity check.  There may be resjunk entries in fdw_ps_tlist that
+        * are included only to help EXPLAIN deparse plans properly. We require
+        * that these are at the end, so that when the executor builds the scan
+        * descriptor based on the non-junk entries, it gets the attribute
+        * numbers correct.
+        */
+       if (scan_plan->scan.scanrelid == 0)
+       {
+               bool    found_resjunk = false;
+
+               foreach (lc, scan_plan->fdw_ps_tlist)
+               {
+                       TargetEntry        *tle = lfirst(lc);
+
+                       if (tle->resjunk)
+                               found_resjunk = true;
+                       else if (found_resjunk)
+                               elog(ERROR, "junk TLE should not apper prior to valid one");
+               }
+       }
+       /* Set the relids that are represented by this foreign scan for Explain */
+       scan_plan->fdw_relids = best_path->path.parent->relids;
 
        /* Copy cost data from Path to Plan; no need to make FDW do this */
        copy_path_costsize(&scan_plan->scan.plan, &best_path->path);
 
+       /* Track FDW server-id; no need to make FDW do this */
+       scan_plan->fdw_handler = rel->fdw_handler;
+
        /*
         * Replace any outer-relation variables with nestloop params in the qual
         * and fdw_exprs expressions.  We do this last so that the FDW doesn't
@@ -2053,12 +2087,7 @@ create_customscan_plan(PlannerInfo *root, CustomPath *best_path,
 {
        CustomScan *cplan;
        RelOptInfo *rel = best_path->path.parent;
-
-       /*
-        * Right now, all we can support is CustomScan node which is associated
-        * with a particular base relation to be scanned.
-        */
-       Assert(rel && rel->reloptkind == RELOPT_BASEREL);
+       ListCell   *lc;
 
        /*
         * Sort clauses into the best execution order, although custom-scan
@@ -2077,6 +2106,30 @@ create_customscan_plan(PlannerInfo *root, CustomPath *best_path,
                                                                                                                          scan_clauses);
        Assert(IsA(cplan, CustomScan));
 
+       /*
+        * Sanity check.  There may be resjunk entries in custom_ps_tlist that
+        * are included only to help EXPLAIN deparse plans properly. We require
+        * that these are at the end, so that when the executor builds the scan
+        * descriptor based on the non-junk entries, it gets the attribute
+        * numbers correct.
+        */
+       if (cplan->scan.scanrelid == 0)
+       {
+               bool    found_resjunk = false;
+
+               foreach (lc, cplan->custom_ps_tlist)
+               {
+                       TargetEntry        *tle = lfirst(lc);
+
+                       if (tle->resjunk)
+                               found_resjunk = true;
+                       else if (found_resjunk)
+                               elog(ERROR, "junk TLE should not apper prior to valid one");
+               }
+       }
+       /* Set the relids that are represented by this custom scan for Explain */
+       cplan->custom_relids = best_path->path.parent->relids;
+
        /*
         * Copy cost data from Path to Plan; no need to make custom-plan providers
         * do this
index 94b12ab8ca1132867cd3a1b3f830eb52942c2d22..69ed2a574e52cacd793e847c8b4230f3c51f302e 100644 (file)
@@ -86,6 +86,12 @@ static void flatten_unplanned_rtes(PlannerGlobal *glob, RangeTblEntry *rte);
 static bool flatten_rtes_walker(Node *node, PlannerGlobal *glob);
 static void add_rte_to_flat_rtable(PlannerGlobal *glob, RangeTblEntry *rte);
 static Plan *set_plan_refs(PlannerInfo *root, Plan *plan, int rtoffset);
+static void set_foreignscan_references(PlannerInfo *root,
+                                                                          ForeignScan *fscan,
+                                                                          int rtoffset);
+static void set_customscan_references(PlannerInfo *root,
+                                                                         CustomScan *cscan,
+                                                                         int rtoffset);
 static Plan *set_indexonlyscan_references(PlannerInfo *root,
                                                         IndexOnlyScan *plan,
                                                         int rtoffset);
@@ -565,31 +571,11 @@ set_plan_refs(PlannerInfo *root, Plan *plan, int rtoffset)
                        }
                        break;
                case T_ForeignScan:
-                       {
-                               ForeignScan *splan = (ForeignScan *) plan;
-
-                               splan->scan.scanrelid += rtoffset;
-                               splan->scan.plan.targetlist =
-                                       fix_scan_list(root, splan->scan.plan.targetlist, rtoffset);
-                               splan->scan.plan.qual =
-                                       fix_scan_list(root, splan->scan.plan.qual, rtoffset);
-                               splan->fdw_exprs =
-                                       fix_scan_list(root, splan->fdw_exprs, rtoffset);
-                       }
+                       set_foreignscan_references(root, (ForeignScan *) plan, rtoffset);
                        break;
 
                case T_CustomScan:
-                       {
-                               CustomScan *splan = (CustomScan *) plan;
-
-                               splan->scan.scanrelid += rtoffset;
-                               splan->scan.plan.targetlist =
-                                       fix_scan_list(root, splan->scan.plan.targetlist, rtoffset);
-                               splan->scan.plan.qual =
-                                       fix_scan_list(root, splan->scan.plan.qual, rtoffset);
-                               splan->custom_exprs =
-                                       fix_scan_list(root, splan->custom_exprs, rtoffset);
-                       }
+                       set_customscan_references(root, (CustomScan *) plan, rtoffset);
                        break;
 
                case T_NestLoop:
@@ -876,6 +862,121 @@ set_plan_refs(PlannerInfo *root, Plan *plan, int rtoffset)
        return plan;
 }
 
+/*
+ * set_foreignscan_references
+ *     Do set_plan_references processing on an ForeignScan
+ */
+static void
+set_foreignscan_references(PlannerInfo *root,
+                                                  ForeignScan *fscan,
+                                                  int rtoffset)
+{
+       if (rtoffset > 0)
+       {
+               Bitmapset  *tempset = NULL;
+               int                     x = -1;
+
+               while ((x = bms_next_member(fscan->fdw_relids, x)) >= 0)
+                       tempset = bms_add_member(tempset, x + rtoffset);
+               fscan->fdw_relids = tempset;
+       }
+
+       if (fscan->scan.scanrelid == 0)
+       {
+               indexed_tlist *pscan_itlist = build_tlist_index(fscan->fdw_ps_tlist);
+
+               fscan->scan.plan.targetlist = (List *)
+                       fix_upper_expr(root,
+                                                  (Node *) fscan->scan.plan.targetlist,
+                                                  pscan_itlist,
+                                                  INDEX_VAR,
+                                                  rtoffset);
+               fscan->scan.plan.qual = (List *)
+                       fix_upper_expr(root,
+                                                  (Node *) fscan->scan.plan.qual,
+                                                  pscan_itlist,
+                                                  INDEX_VAR,
+                                                  rtoffset);
+               fscan->fdw_exprs = (List *)
+                       fix_upper_expr(root,
+                                                  (Node *) fscan->fdw_exprs,
+                                                  pscan_itlist,
+                                                  INDEX_VAR,
+                                                  rtoffset);
+               fscan->fdw_ps_tlist =
+                       fix_scan_list(root, fscan->fdw_ps_tlist, rtoffset);
+               pfree(pscan_itlist);
+       }
+       else
+       {
+               fscan->scan.scanrelid += rtoffset;
+               fscan->scan.plan.targetlist =
+                       fix_scan_list(root, fscan->scan.plan.targetlist, rtoffset);
+               fscan->scan.plan.qual =
+                       fix_scan_list(root, fscan->scan.plan.qual, rtoffset);
+               fscan->fdw_exprs =
+                       fix_scan_list(root, fscan->fdw_exprs, rtoffset);
+       }
+}
+
+/*
+ * set_customscan_references
+ *     Do set_plan_references processing on an CustomScan
+ */
+static void
+set_customscan_references(PlannerInfo *root,
+                                                 CustomScan *cscan,
+                                                 int rtoffset)
+{
+       if (rtoffset > 0)
+       {
+               Bitmapset  *tempset = NULL;
+               int                     x = -1;
+
+               while ((x = bms_next_member(cscan->custom_relids, x)) >= 0)
+                       tempset = bms_add_member(tempset, x + rtoffset);
+               cscan->custom_relids = tempset;
+       }
+
+       if (cscan->scan.scanrelid == 0)
+       {
+               indexed_tlist *pscan_itlist =
+                       build_tlist_index(cscan->custom_ps_tlist);
+
+               cscan->scan.plan.targetlist = (List *)
+                       fix_upper_expr(root,
+                                                  (Node *) cscan->scan.plan.targetlist,
+                                                  pscan_itlist,
+                                                  INDEX_VAR,
+                                                  rtoffset);
+               cscan->scan.plan.qual = (List *)
+                       fix_upper_expr(root,
+                                                  (Node *) cscan->scan.plan.qual,
+                                                  pscan_itlist,
+                                                  INDEX_VAR,
+                                                  rtoffset);
+               cscan->custom_exprs = (List *)
+                       fix_upper_expr(root,
+                                                  (Node *) cscan->custom_exprs,
+                                                  pscan_itlist,
+                                                  INDEX_VAR,
+                                                  rtoffset);
+               cscan->custom_ps_tlist =
+                       fix_scan_list(root, cscan->custom_ps_tlist, rtoffset);
+               pfree(pscan_itlist);
+       }
+       else
+       {
+               cscan->scan.scanrelid += rtoffset;
+               cscan->scan.plan.targetlist =
+                       fix_scan_list(root, cscan->scan.plan.targetlist, rtoffset);
+               cscan->scan.plan.qual =
+                       fix_scan_list(root, cscan->scan.plan.qual, rtoffset);
+               cscan->custom_exprs =
+                       fix_scan_list(root, cscan->custom_exprs, rtoffset);
+       }
+}
+
 /*
  * set_indexonlyscan_references
  *             Do set_plan_references processing on an IndexOnlyScan
index 8abed2ae0dada01bf56fd080e041f1b481ee612f..068ab39dd437e6bd531d4ed6e2a938ce5159a71c 100644 (file)
@@ -379,10 +379,15 @@ get_relation_info(PlannerInfo *root, Oid relationObjectId, bool inhparent,
 
        /* Grab the fdwroutine info using the relcache, while we have it */
        if (relation->rd_rel->relkind == RELKIND_FOREIGN_TABLE)
+       {
+               rel->fdw_handler = GetFdwHandlerByRelId(RelationGetRelid(relation));
                rel->fdwroutine = GetFdwRoutineForRelation(relation, true);
+       }
        else
+       {
+               rel->fdw_handler = InvalidOid;
                rel->fdwroutine = NULL;
-
+       }
        heap_close(relation, NoLock);
 
        /*
index 8cfbea04e8b7afb871847bbfd7c793b5d0ff847b..56235663d7f99c6193f8ea128f15abbe4374eca2 100644 (file)
@@ -14,6 +14,7 @@
  */
 #include "postgres.h"
 
+#include "foreign/fdwapi.h"
 #include "optimizer/cost.h"
 #include "optimizer/pathnode.h"
 #include "optimizer/paths.h"
@@ -122,6 +123,7 @@ build_simple_rel(PlannerInfo *root, int relid, RelOptKind reloptkind)
        rel->subroot = NULL;
        rel->subplan_params = NIL;
        rel->fdwroutine = NULL;
+       rel->fdw_handler = InvalidOid;
        rel->fdw_private = NULL;
        rel->baserestrictinfo = NIL;
        rel->baserestrictcost.startup = 0;
@@ -426,6 +428,18 @@ build_join_rel(PlannerInfo *root,
        set_joinrel_size_estimates(root, joinrel, outer_rel, inner_rel,
                                                           sjinfo, restrictlist);
 
+       /*
+        * Set FDW handler and routine if both outer and inner relation
+        * are managed by same FDW driver.
+        */
+       if (OidIsValid(outer_rel->fdw_handler) &&
+               OidIsValid(inner_rel->fdw_handler) &&
+               outer_rel->fdw_handler == inner_rel->fdw_handler)
+       {
+               joinrel->fdw_handler = outer_rel->fdw_handler;
+               joinrel->fdwroutine = GetFdwRoutine(joinrel->fdw_handler);
+       }
+
        /*
         * Add the joinrel to the query's joinrel list, and store it into the
         * auxiliary hashtable if there is one.  NB: GEQO requires us to append
index 5ffb712472ba489acd789fe8be824521e3a78395..29d1210e05cb71e63afb5c984c4620e206ba251f 100644 (file)
@@ -3862,6 +3862,10 @@ set_deparse_planstate(deparse_namespace *dpns, PlanState *ps)
        /* index_tlist is set only if it's an IndexOnlyScan */
        if (IsA(ps->plan, IndexOnlyScan))
                dpns->index_tlist = ((IndexOnlyScan *) ps->plan)->indextlist;
+       else if (IsA(ps->plan, ForeignScan))
+               dpns->index_tlist = ((ForeignScan *) ps->plan)->fdw_ps_tlist;
+       else if (IsA(ps->plan, CustomScan))
+               dpns->index_tlist = ((CustomScan *) ps->plan)->custom_ps_tlist;
        else
                dpns->index_tlist = NIL;
 }
index 1d768412af2f7b7f97ef63d55b86ef70df4f9ee0..c683d9259e445aadd7e90145db693ca51bfad566 100644 (file)
@@ -82,6 +82,17 @@ typedef void (*EndForeignModify_function) (EState *estate,
 
 typedef int (*IsForeignRelUpdatable_function) (Relation rel);
 
+typedef void (*GetForeignJoinPaths_function) (PlannerInfo *root,
+                                                                                         RelOptInfo *joinrel,
+                                                                                         RelOptInfo *outerrel,
+                                                                                         RelOptInfo *innerrel,
+                                                                                         List *restrictlist,
+                                                                                         JoinType jointype,
+                                                                                         SpecialJoinInfo *sjinfo,
+                                                                                         SemiAntiJoinFactors *semifactors,
+                                                                                         Relids param_source_rels,
+                                                                                         Relids extra_lateral_rels);
+
 typedef void (*ExplainForeignScan_function) (ForeignScanState *node,
                                                                                                        struct ExplainState *es);
 
@@ -150,10 +161,14 @@ typedef struct FdwRoutine
 
        /* Support functions for IMPORT FOREIGN SCHEMA */
        ImportForeignSchema_function ImportForeignSchema;
+
+       /* Support functions for join push-down */
+       GetForeignJoinPaths_function GetForeignJoinPaths;
 } FdwRoutine;
 
 
 /* Functions in foreign/foreign.c */
+extern Oid GetFdwHandlerByRelId(Oid relid);
 extern FdwRoutine *GetFdwRoutine(Oid fdwhandler);
 extern FdwRoutine *GetFdwRoutineByRelId(Oid relid);
 extern FdwRoutine *GetFdwRoutineForRelation(Relation relation, bool makecopy);
index 21cbfa8cf0febf77d67e6b82b85a07aaa8cf746d..baeba2d330feb6afd51bb404d10a3bafe8049015 100644 (file)
@@ -471,7 +471,11 @@ typedef struct WorkTableScan
  * fdw_exprs and fdw_private are both under the control of the foreign-data
  * wrapper, but fdw_exprs is presumed to contain expression trees and will
  * be post-processed accordingly by the planner; fdw_private won't be.
- * Note that everything in both lists must be copiable by copyObject().
+ * An optional fdw_ps_tlist is used to map a reference to an attribute of
+ * underlying relation(s) onto a pair of INDEX_VAR and alternative varattno.
+ * When fdw_ps_tlist is used, this represents a remote join, and the FDW
+ * is responsible for setting this field to an appropriate value.
+ * Note that everything in above lists must be copiable by copyObject().
  * One way to store an arbitrary blob of bytes is to represent it as a bytea
  * Const.  Usually, though, you'll be better off choosing a representation
  * that can be dumped usefully by nodeToString().
@@ -480,18 +484,22 @@ typedef struct WorkTableScan
 typedef struct ForeignScan
 {
        Scan            scan;
+       Oid                     fdw_handler;    /* OID of FDW handler */
        List       *fdw_exprs;          /* expressions that FDW may evaluate */
+       List       *fdw_ps_tlist;       /* tlist, if replacing a join */
        List       *fdw_private;        /* private data for FDW */
+       Bitmapset  *fdw_relids;         /* RTIs generated by this scan */
        bool            fsSystemCol;    /* true if any "system column" is needed */
 } ForeignScan;
 
 /* ----------------
  *        CustomScan node
  *
- * The comments for ForeignScan's fdw_exprs and fdw_private fields apply
- * equally to custom_exprs and custom_private.  Note that since Plan trees
- * can be copied, custom scan providers *must* fit all plan data they need
- * into those fields; embedding CustomScan in a larger struct will not work.
+ * The comments for ForeignScan's fdw_exprs, fdw_varmap and fdw_private fields
+ * apply equally to custom_exprs, custom_ps_tlist and custom_private.
+ * Note that since Plan trees can be copied, custom scan providers *must*
+ * fit all plan data they need into those fields; embedding CustomScan in
+ * a larger struct will not work.
  * ----------------
  */
 struct CustomScan;
@@ -512,7 +520,9 @@ typedef struct CustomScan
        Scan            scan;
        uint32          flags;                  /* mask of CUSTOMPATH_* flags, see relation.h */
        List       *custom_exprs;       /* expressions that custom code may evaluate */
+       List       *custom_ps_tlist;/* tlist, if replacing a join */
        List       *custom_private; /* private data for custom code */
+       Bitmapset  *custom_relids;      /* RTIs generated by this scan */
        const CustomScanMethods *methods;
 } CustomScan;
 
index 401a686664de329781234282a11100ccb7da4f3b..1713d298de256deb47c0bda3a8659de1283e0b66 100644 (file)
@@ -366,6 +366,7 @@ typedef struct PlannerInfo
  *             subroot - PlannerInfo for subquery (NULL if it's not a subquery)
  *             subplan_params - list of PlannerParamItems to be passed to subquery
  *             fdwroutine - function hooks for FDW, if foreign table (else NULL)
+ *             fdw_handler - OID of FDW handler, if foreign table (else InvalidOid)
  *             fdw_private - private state for FDW, if foreign table (else NULL)
  *
  *             Note: for a subquery, tuples, subplan, subroot are not set immediately
@@ -461,6 +462,7 @@ typedef struct RelOptInfo
        List       *subplan_params; /* if subquery */
        /* use "struct FdwRoutine" to avoid including fdwapi.h here */
        struct FdwRoutine *fdwroutine;          /* if foreign table */
+       Oid                     fdw_handler;    /* if foreign table */
        void       *fdw_private;        /* if foreign table */
 
        /* used by various scans and joins: */
index 6cad92e4430ebc57669bc5bb1ebef9177e6a20e8..c42c69d7460751bb73edf96f015cc2f54fabfff2 100644 (file)
@@ -30,6 +30,19 @@ typedef void (*set_rel_pathlist_hook_type) (PlannerInfo *root,
                                                                                                                RangeTblEntry *rte);
 extern PGDLLIMPORT set_rel_pathlist_hook_type set_rel_pathlist_hook;
 
+/* Hook for plugins to get control in add_paths_to_joinrel() */
+typedef void (*set_join_pathlist_hook_type) (PlannerInfo *root,
+                                                                                        RelOptInfo *joinrel,
+                                                                                        RelOptInfo *outerrel,
+                                                                                        RelOptInfo *innerrel,
+                                                                                        List *restrictlist,
+                                                                                        JoinType jointype,
+                                                                                        SpecialJoinInfo *sjinfo,
+                                                                                        SemiAntiJoinFactors *semifactors,
+                                                                                        Relids param_source_rels,
+                                                                                        Relids extra_lateral_rels);
+extern PGDLLIMPORT set_join_pathlist_hook_type set_join_pathlist_hook;
+
 /* Hook for plugins to replace standard_join_search() */
 typedef RelOptInfo *(*join_search_hook_type) (PlannerInfo *root,
                                                                                                                  int levels_needed,
index fa72918d1bbadad9e2865814f58abd08bbe0c579..0c8cbcded96a393108a28e1ec31bd747782768df 100644 (file)
@@ -41,6 +41,7 @@ extern Plan *optimize_minmax_aggregates(PlannerInfo *root, List *tlist,
  * prototypes for plan/createplan.c
  */
 extern Plan *create_plan(PlannerInfo *root, Path *best_path);
+extern Plan *create_plan_recurse(PlannerInfo *root, Path *best_path);
 extern SubqueryScan *make_subqueryscan(List *qptlist, List *qpqual,
                                  Index scanrelid, Plan *subplan);
 extern ForeignScan *make_foreignscan(List *qptlist, List *qpqual,