Support parallel joins, and make related improvements.

author Robert Haas <rhaas@postgresql.org>

Wed, 20 Jan 2016 19:29:22 +0000 (14:29 -0500)

committer Robert Haas <rhaas@postgresql.org>

Wed, 20 Jan 2016 19:40:26 +0000 (14:40 -0500)
author Robert Haas <rhaas@postgresql.org>
Wed, 20 Jan 2016 19:29:22 +0000 (14:29 -0500)
committer Robert Haas <rhaas@postgresql.org>
Wed, 20 Jan 2016 19:40:26 +0000 (14:40 -0500)
diff --git a/src/backend/executor/execParallel.c b/src/backend/executor/execParallel.c

index 4658e59941aea79e34a56564d5a2722073ea216c..c30b3485dd5cdb43b6f228d25f3f5ff1a3be76f7 100644 (file)
--- a/src/backend/executor/execParallel.c
+++ b/src/backend/executor/execParallel.c
@@ -167,25 +167,25 @@ ExecParallelEstimate(PlanState *planstate, ExecParallelEstimateContext *e)
         e->nnodes++;
  
         /* Call estimators for parallel-aware nodes. */
-       switch (nodeTag(planstate))
+       if (planstate->plan->parallel_aware)
         {
-               case T_SeqScanState:
-                       ExecSeqScanEstimate((SeqScanState *) planstate,
-                                                               e->pcxt);
-                       break;
-               default:
-                       break;
+               switch (nodeTag(planstate))
+               {
+                       case T_SeqScanState:
+                               ExecSeqScanEstimate((SeqScanState *) planstate,
+                                                                       e->pcxt);
+                               break;
+                       default:
+                               break;
+               }
         }
  
         return planstate_tree_walker(planstate, ExecParallelEstimate, e);
  }
  
  /*
- * Ordinary plan nodes won't do anything here, but parallel-aware plan nodes
- * may need to initialize shared state in the DSM before parallel workers
- * are available.  They can allocate the space they previous estimated using
- * shm_toc_allocate, and add the keys they previously estimated using
- * shm_toc_insert, in each case targeting pcxt->toc.
+ * Initialize the dynamic shared memory segment that will be used to control
+ * parallel execution.
   */
  static bool
  ExecParallelInitializeDSM(PlanState *planstate,
@@ -202,15 +202,26 @@ ExecParallelInitializeDSM(PlanState *planstate,
         /* Count this node. */
         d->nnodes++;
  
-       /* Call initializers for parallel-aware plan nodes. */
-       switch (nodeTag(planstate))
+       /*
+        * Call initializers for parallel-aware plan nodes.
+        *
+        * Ordinary plan nodes won't do anything here, but parallel-aware plan
+        * nodes may need to initialize shared state in the DSM before parallel
+        * workers are available.  They can allocate the space they previously
+        * estimated using shm_toc_allocate, and add the keys they previously
+        * estimated using shm_toc_insert, in each case targeting pcxt->toc.
+        */
+       if (planstate->plan->parallel_aware)
         {
-               case T_SeqScanState:
-                       ExecSeqScanInitializeDSM((SeqScanState *) planstate,
-                                                                        d->pcxt);
-                       break;
-               default:
-                       break;
+               switch (nodeTag(planstate))
+               {
+                       case T_SeqScanState:
+                               ExecSeqScanInitializeDSM((SeqScanState *) planstate,
+                                                                                d->pcxt);
+                               break;
+                       default:
+                               break;
+               }
         }
  
         return planstate_tree_walker(planstate, ExecParallelInitializeDSM, d);
@@ -623,13 +634,16 @@ ExecParallelInitializeWorker(PlanState *planstate, shm_toc *toc)
                 return false;
  
         /* Call initializers for parallel-aware plan nodes. */
-       switch (nodeTag(planstate))
+       if (planstate->plan->parallel_aware)
         {
-               case T_SeqScanState:
-                       ExecSeqScanInitializeWorker((SeqScanState *) planstate, toc);
-                       break;
-               default:
-                       break;
+               switch (nodeTag(planstate))
+               {
+                       case T_SeqScanState:
+                               ExecSeqScanInitializeWorker((SeqScanState *) planstate, toc);
+                               break;
+                       default:
+                               break;
+               }
         }
  
         return planstate_tree_walker(planstate, ExecParallelInitializeWorker, toc);
diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c

index 8817b56df97a1ddc27c77d2a2c3def281f5cad29..b5e0b5578f16c527825ede17530fd0c233bd40ee 100644 (file)
--- a/src/backend/nodes/outfuncs.c
+++ b/src/backend/nodes/outfuncs.c
@@ -1591,6 +1591,8 @@ _outPathInfo(StringInfo str, const Path *node)
         else
                 _outBitmapset(str, NULL);
         WRITE_BOOL_FIELD(parallel_aware);
+       WRITE_BOOL_FIELD(parallel_safe);
+       WRITE_INT_FIELD(parallel_degree);
         WRITE_FLOAT_FIELD(rows, "%.0f");
         WRITE_FLOAT_FIELD(startup_cost, "%.2f");
         WRITE_FLOAT_FIELD(total_cost, "%.2f");
@@ -1768,7 +1770,6 @@ _outGatherPath(StringInfo str, const GatherPath *node)
         _outPathInfo(str, (const Path *) node);
  
         WRITE_NODE_FIELD(subpath);
-       WRITE_INT_FIELD(num_workers);
         WRITE_BOOL_FIELD(single_copy);
  }
  
@@ -1890,6 +1891,7 @@ _outRelOptInfo(StringInfo str, const RelOptInfo *node)
         WRITE_NODE_FIELD(reltargetlist);
         WRITE_NODE_FIELD(pathlist);
         WRITE_NODE_FIELD(ppilist);
+       WRITE_NODE_FIELD(partial_pathlist);
         WRITE_NODE_FIELD(cheapest_startup_path);
         WRITE_NODE_FIELD(cheapest_total_path);
         WRITE_NODE_FIELD(cheapest_unique_path);
diff --git a/src/backend/optimizer/README b/src/backend/optimizer/README

index 916a51852e3e76ae29bb8f9bfb44f65e0be75e68..501980449c06bbc137f02a9e3d44887ee55359dc 100644 (file)
--- a/src/backend/optimizer/README
+++ b/src/backend/optimizer/README
@@ -851,4 +851,57 @@ lateral reference.  (Perhaps now that that stuff works, we could relax the
  pullup restriction?)
  
  
--- bjm & tgl
+Parallel Query and Partial Paths
+--------------------------------
+
+Parallel query involves dividing up the work that needs to be performed
+either by an entire query or some portion of the query in such a way that
+some of that work can be done by one or more worker processes, which are
+called parallel workers.  Parallel workers are a subtype of dynamic
+background workers; see src/backend/access/transam/README.parallel for a
+fuller description.  Academic literature on parallel query suggests that
+that parallel execution strategies can be divided into essentially two
+categories: pipelined parallelism, where the execution of the query is
+divided into multiple stages and each stage is handled by a separate
+process; and partitioning parallelism, where the data is split between
+multiple processes and each process handles a subset of it.  The
+literature, however, suggests that gains from pipeline parallelism are
+often very limited due to the difficulty of avoiding pipeline stalls.
+Consequently, we do not currently attempt to generate query plans that
+use this technique.
+
+Instead, we focus on partitioning paralellism, which does not require
+that the underlying table be partitioned.  It only requires that (1)
+there is some method of dividing the data from at least one of the base
+tables involved in the relation across multiple processes, (2) allowing
+each process to handle its own portion of the data, and then (3)
+collecting the results.  Requirements (2) and (3) is satisfied by the
+executor node Gather, which launches any number of worker processes and
+executes its single child plan in all of them (and perhaps in the leader
+also, if the children aren't generating enough data to keep the leader
+busy).  Requirement (1) is handled by the SeqScan node: when invoked
+with parallel_aware = true, this node will, in effect, partition the
+table on a block by block basis, returning a subset of the tuples from
+the relation in each worker where that SeqScan is executed.  A similar
+scheme could be (and probably should be) implemented for bitmap heap
+scans.
+
+Just as we do for non-parallel access methods, we build Paths to
+represent access strategies that can be used in a parallel plan.  These
+are, in essence, the same strategies that are available in the
+non-parallel plan, but there is an important difference: a path that
+will run beneath a Gather node returns only a subset of the query
+results in each worker, not all of them.  To form a path that can
+actually be executed, the (rather large) cost of the Gather node must be
+accounted for.  For this reason among others, paths intended to run
+beneath a Gather node - which we call "partial" paths since they return
+only a subset of the results in each worker - must be kept separate from
+ordinary paths (see RelOptInfo's partial_pathlist and the function
+add_partial_path).
+
+One of the keys to making parallel query effective is to run as much of
+the query in parallel as possible.  Therefore, we expect it to generally
+be desirable to postpone the Gather stage until as near to the top of the
+plan as possible.  Expanding the range of cases in which more work can be
+pushed below the Gather (and costly them accurately) is likely to keep us
+busy for a long time to come.
diff --git a/src/backend/optimizer/path/allpaths.c b/src/backend/optimizer/path/allpaths.c

index d5231467f93e611656eacff59ab23b22782e16a2..380b15eeff2a2c606b49d1574f9023b7c2245688 100644 (file)
--- a/src/backend/optimizer/path/allpaths.c
+++ b/src/backend/optimizer/path/allpaths.c
@@ -72,6 +72,7 @@ static void set_rel_pathlist(PlannerInfo *root, RelOptInfo *rel,
                                  Index rti, RangeTblEntry *rte);
  static void set_plain_rel_size(PlannerInfo *root, RelOptInfo *rel,
                                    RangeTblEntry *rte);
+static void create_parallel_paths(PlannerInfo *root, RelOptInfo *rel);
  static void set_rel_consider_parallel(PlannerInfo *root, RelOptInfo *rel,
                                                   RangeTblEntry *rte);
  static bool function_rte_parallel_ok(RangeTblEntry *rte);
@@ -612,7 +613,6 @@ static void
  set_plain_rel_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
  {
         Relids          required_outer;
-       int                     parallel_threshold = 1000;
  
         /*
          * We don't support pushing join clauses into the quals of a seqscan, but
@@ -624,39 +624,9 @@ set_plain_rel_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
         /* Consider sequential scan */
         add_path(rel, create_seqscan_path(root, rel, required_outer, 0));
  
-       /* Consider parallel sequential scan */
-       if (rel->consider_parallel && rel->pages > parallel_threshold &&
-               required_outer == NULL)
-       {
-               Path *path;
-               int parallel_degree = 1;
-
-               /*
-                * Limit the degree of parallelism logarithmically based on the size
-                * of the relation.  This probably needs to be a good deal more
-                * sophisticated, but we need something here for now.
-                */
-               while (rel->pages > parallel_threshold * 3 &&
-                          parallel_degree < max_parallel_degree)
-               {
-                       parallel_degree++;
-                       parallel_threshold *= 3;
-                       if (parallel_threshold >= PG_INT32_MAX / 3)
-                               break;
-               }
-
-               /*
-                * Ideally we should consider postponing the gather operation until
-                * much later, after we've pushed joins and so on atop the parallel
-                * sequential scan path.  But we don't have the infrastructure for
-                * that yet, so just do this for now.
-                */
-               path = create_seqscan_path(root, rel, required_outer, parallel_degree);
-               path = (Path *)
-                       create_gather_path(root, rel, path, required_outer,
-                                                          parallel_degree);
-               add_path(rel, path);
-       }
+       /* If appropriate, consider parallel sequential scan */
+       if (rel->consider_parallel && required_outer == NULL)
+               create_parallel_paths(root, rel);
  
         /* Consider index scans */
         create_index_paths(root, rel);
@@ -665,6 +635,54 @@ set_plain_rel_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
         create_tidscan_paths(root, rel);
  }
  
+/*
+ * create_parallel_paths
+ *       Build parallel access paths for a plain relation
+ */
+static void
+create_parallel_paths(PlannerInfo *root, RelOptInfo *rel)
+{
+       int             parallel_threshold = 1000;
+       int             parallel_degree = 1;
+
+       /*
+        * If this relation is too small to be worth a parallel scan, just return
+        * without doing anything ... unless it's an inheritance child.  In that case,
+        * we want to generate a parallel path here anyway.  It might not be worthwhile
+        * just for this relation, but when combined with all of its inheritance siblings
+        * it may well pay off.
+        */
+       if (rel->pages < parallel_threshold && rel->reloptkind == RELOPT_BASEREL)
+               return;
+
+       /*
+        * Limit the degree of parallelism logarithmically based on the size of the
+        * relation.  This probably needs to be a good deal more sophisticated, but we
+        * need something here for now.
+        */
+       while (rel->pages > parallel_threshold * 3 &&
+                  parallel_degree < max_parallel_degree)
+       {
+               parallel_degree++;
+               parallel_threshold *= 3;
+               if (parallel_threshold >= PG_INT32_MAX / 3)
+                       break;
+       }
+
+       /* Add an unordered partial path based on a parallel sequential scan. */
+       add_partial_path(rel, create_seqscan_path(root, rel, NULL, parallel_degree));
+
+       /*
+        * If this is a baserel, consider gathering any partial paths we may have
+        * just created.  If we gathered an inheritance child, we could end up
+        * with a very large number of gather nodes, each trying to grab its own
+        * pool of workers, so don't do this in that case.  Instead, we'll
+        * consider gathering partial paths for the appendrel.
+        */
+       if (rel->reloptkind == RELOPT_BASEREL)
+               generate_gather_paths(root, rel);
+}
+
  /*
   * set_tablesample_rel_size
   *       Set size estimates for a sampled relation
@@ -1039,6 +1057,8 @@ set_append_rel_pathlist(PlannerInfo *root, RelOptInfo *rel,
         List       *live_childrels = NIL;
         List       *subpaths = NIL;
         bool            subpaths_valid = true;
+       List       *partial_subpaths = NIL;
+       bool            partial_subpaths_valid = true;
         List       *all_child_pathkeys = NIL;
         List       *all_child_outers = NIL;
         ListCell   *l;
@@ -1093,6 +1113,13 @@ set_append_rel_pathlist(PlannerInfo *root, RelOptInfo *rel,
                 else
                         subpaths_valid = false;
  
+               /* Same idea, but for a partial plan. */
+               if (childrel->partial_pathlist != NIL)
+                       partial_subpaths = accumulate_append_subpath(partial_subpaths,
+                                                                          linitial(childrel->partial_pathlist));
+               else
+                       partial_subpaths_valid = false;
+
                 /*
                  * Collect lists of all the available path orderings and
                  * parameterizations for all the children.  We use these as a
@@ -1164,7 +1191,39 @@ set_append_rel_pathlist(PlannerInfo *root, RelOptInfo *rel,
          * if we have zero or one live subpath due to constraint exclusion.)
          */
         if (subpaths_valid)
-               add_path(rel, (Path *) create_append_path(rel, subpaths, NULL));
+               add_path(rel, (Path *) create_append_path(rel, subpaths, NULL, 0));
+
+       /*
+        * Consider an append of partial unordered, unparameterized partial paths.
+        */
+       if (partial_subpaths_valid)
+       {
+               AppendPath *appendpath;
+               ListCell   *lc;
+               int                     parallel_degree = 0;
+
+               /*
+                * Decide what parallel degree to request for this append path.  For
+                * now, we just use the maximum parallel degree of any member.  It
+                * might be useful to use a higher number if the Append node were
+                * smart enough to spread out the workers, but it currently isn't.
+                */
+               foreach(lc, partial_subpaths)
+               {
+                       Path       *path = lfirst(lc);
+
+                       parallel_degree = Max(parallel_degree, path->parallel_degree);
+               }
+               Assert(parallel_degree > 0);
+
+               /* Generate a partial append path. */
+               appendpath = create_append_path(rel, partial_subpaths, NULL,
+                                                                               parallel_degree);
+               add_partial_path(rel, (Path *) appendpath);
+
+               /* Consider gathering it. */
+               generate_gather_paths(root, rel);
+       }
  
         /*
          * Also build unparameterized MergeAppend paths based on the collected
@@ -1214,7 +1273,7 @@ set_append_rel_pathlist(PlannerInfo *root, RelOptInfo *rel,
  
                 if (subpaths_valid)
                         add_path(rel, (Path *)
-                                        create_append_path(rel, subpaths, required_outer));
+                                        create_append_path(rel, subpaths, required_outer, 0));
         }
  }
  
@@ -1440,8 +1499,9 @@ set_dummy_rel_pathlist(RelOptInfo *rel)
  
         /* Discard any pre-existing paths; no further need for them */
         rel->pathlist = NIL;
+       rel->partial_pathlist = NIL;
  
-       add_path(rel, (Path *) create_append_path(rel, NIL, NULL));
+       add_path(rel, (Path *) create_append_path(rel, NIL, NULL, 0));
  
         /*
          * We set the cheapest path immediately, to ensure that IS_DUMMY_REL()
@@ -1843,6 +1903,36 @@ set_worktable_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
         add_path(rel, create_worktablescan_path(root, rel, required_outer));
  }
  
+/*
+ * generate_gather_paths
+ *             Generate parallel access paths for a relation by pushing a Gather on
+ *             top of a partial path.
+ */
+void
+generate_gather_paths(PlannerInfo *root, RelOptInfo *rel)
+{
+       Path       *cheapest_partial_path;
+       Path       *simple_gather_path;
+
+       /* If there are no partial paths, there's nothing to do here. */
+       if (rel->partial_pathlist == NIL)
+               return;
+
+       /*
+        * The output of Gather is currently always unsorted, so there's only one
+        * partial path of interest: the cheapest one.
+        *
+        * Eventually, we should have a Gather Merge operation that can merge
+        * multiple tuple streams together while preserving their ordering.  We
+        * could usefully generate such a path from each partial path that has
+        * non-NIL pathkeys.
+        */
+       cheapest_partial_path = linitial(rel->partial_pathlist);
+       simple_gather_path = (Path *)
+               create_gather_path(root, rel, cheapest_partial_path, NULL);
+       add_path(rel, simple_gather_path);
+}
+
  /*
   * make_rel_from_joinlist
   *       Build access paths using a "joinlist" to guide the join path search.
diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c

index eaeb17fcd4158facd283aa7ac38e55df066c52a4..5fc80e735beb0aa7856ac7706778e71312b70a09 100644 (file)
--- a/src/backend/optimizer/path/costsize.c
+++ b/src/backend/optimizer/path/costsize.c
@@ -187,11 +187,11 @@ clamp_row_est(double nrows)
   */
  void
  cost_seqscan(Path *path, PlannerInfo *root,
-                        RelOptInfo *baserel, ParamPathInfo *param_info,
-                        int nworkers)
+                        RelOptInfo *baserel, ParamPathInfo *param_info)
  {
         Cost            startup_cost = 0;
-       Cost            run_cost = 0;
+       Cost            cpu_run_cost;
+       Cost            disk_run_cost;
         double          spc_seq_page_cost;
         QualCost        qpqual_cost;
         Cost            cpu_per_tuple;
@@ -217,27 +217,58 @@ cost_seqscan(Path *path, PlannerInfo *root,
         /*
          * disk costs
          */
-       run_cost += spc_seq_page_cost * baserel->pages;
+       disk_run_cost = spc_seq_page_cost * baserel->pages;
  
         /* CPU costs */
         get_restriction_qual_cost(root, baserel, param_info, &qpqual_cost);
  
         startup_cost += qpqual_cost.startup;
         cpu_per_tuple = cpu_tuple_cost + qpqual_cost.per_tuple;
-       run_cost += cpu_per_tuple * baserel->tuples;
+       cpu_run_cost = cpu_per_tuple * baserel->tuples;
  
-       /*
-        * Primitive parallel cost model.  Assume the leader will do half as much
-        * work as a regular worker, because it will also need to read the tuples
-        * returned by the workers when they percolate up to the gather node. This
-        * is almost certainly not exactly the right way to model this, so this
-        * will probably need to be changed at some point...
-        */
-       if (nworkers > 0)
-               run_cost = run_cost / (nworkers + 0.5);
+       /* Adjust costing for parallelism, if used. */
+       if (path->parallel_degree > 0)
+       {
+               double  parallel_divisor = path->parallel_degree;
+               double  leader_contribution;
+
+               /*
+                * Early experience with parallel query suggests that when there is
+                * only one worker, the leader often makes a very substantial
+                * contribution to executing the parallel portion of the plan, but as
+                * more workers are added, it does less and less, because it's busy
+                * reading tuples from the workers and doing whatever non-paralell
+                * post-processing is needed.  By the time we reach 4 workers, the
+                * leader no longer makes a meaningful contribution.  Thus, for now,
+                * estimate that the leader spends 30% of its time servicing each
+                * worker, and the remainder executing the parallel plan.
+                */
+               leader_contribution = 1.0 - (0.3 * path->parallel_degree);
+               if (leader_contribution > 0)
+                       parallel_divisor += leader_contribution;
+
+               /*
+                * In the case of a parallel plan, the row count needs to represent
+                * the number of tuples processed per worker.  Otherwise, higher-level
+                * plan nodes that appear below the gather will be costed incorrectly,
+                * because they'll anticipate receiving more rows than any given copy
+                * will actually get.
+                */
+               path->rows /= parallel_divisor;
+
+               /* The CPU cost is divided among all the workers. */
+               cpu_run_cost /= parallel_divisor;
+
+               /*
+                * It may be possible to amortize some of the I/O cost, but probably
+                * not very much, because most operating systems already do aggressive
+                * prefetching.  For now, we assume that the disk run cost can't be
+                * amortized at all.
+                */
+       }
  
         path->startup_cost = startup_cost;
-       path->total_cost = startup_cost + run_cost;
+       path->total_cost = startup_cost + cpu_run_cost + disk_run_cost;
  }
  
  /*
diff --git a/src/backend/optimizer/path/joinpath.c b/src/backend/optimizer/path/joinpath.c

index 90a5110ec7c762fd6a28b10375c28610283d7d33..e61fa589a8ab17a48d976165ead72edc3236f775 100644 (file)
--- a/src/backend/optimizer/path/joinpath.c
+++ b/src/backend/optimizer/path/joinpath.c
@@ -34,6 +34,12 @@ static void sort_inner_and_outer(PlannerInfo *root, RelOptInfo *joinrel,
  static void match_unsorted_outer(PlannerInfo *root, RelOptInfo *joinrel,
                                          RelOptInfo *outerrel, RelOptInfo *innerrel,
                                          JoinType jointype, JoinPathExtraData *extra);
+static void consider_parallel_nestloop(PlannerInfo *root,
+                                                  RelOptInfo *joinrel,
+                                                  RelOptInfo *outerrel,
+                                                  RelOptInfo *innerrel,
+                                                  JoinType jointype,
+                                                  JoinPathExtraData *extra);
  static void hash_inner_and_outer(PlannerInfo *root, RelOptInfo *joinrel,
                                          RelOptInfo *outerrel, RelOptInfo *innerrel,
                                          JoinType jointype, JoinPathExtraData *extra);
@@ -216,7 +222,12 @@ add_paths_to_joinrel(PlannerInfo *root,
                                                                                                  jointype, &extra);
  
         /*
-        * 6. Finally, give extensions a chance to manipulate the path list.
+        * 6. Consider gathering partial paths.
+        */
+       generate_gather_paths(root, joinrel);
+
+       /*
+        * 7. Finally, give extensions a chance to manipulate the path list.
          */
         if (set_join_pathlist_hook)
                 set_join_pathlist_hook(root, joinrel, outerrel, innerrel,
@@ -329,6 +340,62 @@ try_nestloop_path(PlannerInfo *root,
         }
  }
  
+/*
+ * try_partial_nestloop_path
+ *       Consider a partial nestloop join path; if it appears useful, push it into
+ *       the joinrel's partial_pathlist via add_partial_path().
+ */
+static void
+try_partial_nestloop_path(PlannerInfo *root,
+                                 RelOptInfo *joinrel,
+                                 Path *outer_path,
+                                 Path *inner_path,
+                                 List *pathkeys,
+                                 JoinType jointype,
+                                 JoinPathExtraData *extra)
+{
+       JoinCostWorkspace workspace;
+
+       /*
+        * If the inner path is parameterized, the parameterization must be fully
+        * satisfied by the proposed outer path.  Parameterized partial paths are
+        * not supported.  The caller should already have verified that no
+        * extra_lateral_rels are required here.
+        */
+       Assert(bms_is_empty(joinrel->lateral_relids));
+       if (inner_path->param_info != NULL)
+       {
+               Relids          inner_paramrels = inner_path->param_info->ppi_req_outer;
+
+               if (!bms_is_subset(inner_paramrels, outer_path->parent->relids))
+                       return;
+       }
+
+       /*
+        * Before creating a path, get a quick lower bound on what it is likely
+        * to cost.  Bail out right away if it looks terrible.
+        */
+       initial_cost_nestloop(root, &workspace, jointype,
+                                                 outer_path, inner_path,
+                                                 extra->sjinfo, &extra->semifactors);
+       if (!add_partial_path_precheck(joinrel, workspace.total_cost, pathkeys))
+               return;
+
+       /* Might be good enough to be worth trying, so let's try it. */
+       add_partial_path(joinrel, (Path *)
+                        create_nestloop_path(root,
+                                                                 joinrel,
+                                                                 jointype,
+                                                                 &workspace,
+                                                                 extra->sjinfo,
+                                                                 &extra->semifactors,
+                                                                 outer_path,
+                                                                 inner_path,
+                                                                 extra->restrictlist,
+                                                                 pathkeys,
+                                                                 NULL));
+}
+
  /*
   * try_mergejoin_path
   *       Consider a merge join path; if it appears useful, push it into
@@ -471,6 +538,62 @@ try_hashjoin_path(PlannerInfo *root,
         }
  }
  
+/*
+ * try_partial_hashjoin_path
+ *       Consider a partial hashjoin join path; if it appears useful, push it into
+ *       the joinrel's partial_pathlist via add_partial_path().
+ */
+static void
+try_partial_hashjoin_path(PlannerInfo *root,
+                                                 RelOptInfo *joinrel,
+                                                 Path *outer_path,
+                                                 Path *inner_path,
+                                                 List *hashclauses,
+                                                 JoinType jointype,
+                                                 JoinPathExtraData *extra)
+{
+       JoinCostWorkspace workspace;
+
+       /*
+        * If the inner path is parameterized, the parameterization must be fully
+        * satisfied by the proposed outer path.  Parameterized partial paths are
+        * not supported.  The caller should already have verified that no
+        * extra_lateral_rels are required here.
+        */
+       Assert(bms_is_empty(joinrel->lateral_relids));
+       if (inner_path->param_info != NULL)
+       {
+               Relids          inner_paramrels = inner_path->param_info->ppi_req_outer;
+
+               if (!bms_is_empty(inner_paramrels))
+                       return;
+       }
+
+       /*
+        * Before creating a path, get a quick lower bound on what it is likely
+        * to cost.  Bail out right away if it looks terrible.
+        */
+       initial_cost_hashjoin(root, &workspace, jointype, hashclauses,
+                                                 outer_path, inner_path,
+                                                 extra->sjinfo, &extra->semifactors);
+       if (!add_partial_path_precheck(joinrel, workspace.total_cost, NIL))
+               return;
+
+       /* Might be good enough to be worth trying, so let's try it. */
+       add_partial_path(joinrel, (Path *)
+                        create_hashjoin_path(root,
+                                                                 joinrel,
+                                                                 jointype,
+                                                                 &workspace,
+                                                                 extra->sjinfo,
+                                                                 &extra->semifactors,
+                                                                 outer_path,
+                                                                 inner_path,
+                                                                 extra->restrictlist,
+                                                                 NULL,
+                                                                 hashclauses));
+}
+
  /*
   * clause_sides_match_join
   *       Determine whether a join clause is of the right form to use in this join.
@@ -1063,6 +1186,85 @@ match_unsorted_outer(PlannerInfo *root,
                                 break;
                 }
         }
+
+       /*
+        * If the joinrel is parallel-safe and the join type supports nested loops,
+        * we may be able to consider a partial nestloop plan.  However, we can't
+        * handle JOIN_UNIQUE_OUTER, because the outer path will be partial, and
+        * therefore we won't be able to properly guarantee uniqueness.  Nor can
+        * we handle extra_lateral_rels, since partial paths must not be
+        * parameterized.
+        */
+       if (joinrel->consider_parallel && nestjoinOK &&
+               save_jointype != JOIN_UNIQUE_OUTER &&
+               bms_is_empty(joinrel->lateral_relids))
+               consider_parallel_nestloop(root, joinrel, outerrel, innerrel,
+                                                                  save_jointype, extra);
+}
+
+/*
+ * consider_parallel_nestloop
+ *       Try to build partial paths for a joinrel by joining a partial path for the
+ *       outer relation to a complete path for the inner relation.
+ *
+ * 'joinrel' is the join relation
+ * 'outerrel' is the outer join relation
+ * 'innerrel' is the inner join relation
+ * 'jointype' is the type of join to do
+ * 'extra' contains additional input values
+ */
+static void
+consider_parallel_nestloop(PlannerInfo *root,
+                                                  RelOptInfo *joinrel,
+                                                  RelOptInfo *outerrel,
+                                                  RelOptInfo *innerrel,
+                                                  JoinType jointype,
+                                                  JoinPathExtraData *extra)
+{
+       ListCell   *lc1;
+
+       foreach(lc1, outerrel->partial_pathlist)
+       {
+               Path       *outerpath = (Path *) lfirst(lc1);
+               List       *pathkeys;
+               ListCell   *lc2;
+
+               /* Figure out what useful ordering any paths we create will have. */
+               pathkeys = build_join_pathkeys(root, joinrel, jointype,
+                                                                          outerpath->pathkeys);
+
+               /*
+                * Try the cheapest parameterized paths; only those which will
+                * produce an unparameterized path when joined to this outerrel
+                * will survive try_partial_nestloop_path.  The cheapest
+                * unparameterized path is also in this list.
+                */
+               foreach(lc2, innerrel->cheapest_parameterized_paths)
+               {
+                       Path       *innerpath = (Path *) lfirst(lc2);
+
+                       /* Can't join to an inner path that is not parallel-safe */
+                       if (!innerpath->parallel_safe)
+                               continue;
+
+                       /*
+                        * Like match_unsorted_outer, we only consider a single nestloop
+                        * path when the jointype is JOIN_UNIQUE_INNER.  But we have to scan
+                        * cheapest_parameterized_paths to find the one we want to consider,
+                        * because cheapest_total_path might not be parallel-safe.
+                        */
+                       if (jointype == JOIN_UNIQUE_INNER)
+                       {
+                               if (!bms_is_empty(PATH_REQ_OUTER(innerpath)))
+                                       continue;
+                               innerpath = (Path *) create_unique_path(root, innerrel,
+                                                                                          innerpath, extra->sjinfo);
+                       }
+
+                       try_partial_nestloop_path(root, joinrel, outerpath, innerpath,
+                                                                         pathkeys, jointype, extra);
+               }
+       }
  }
  
  /*
@@ -1240,6 +1442,55 @@ hash_inner_and_outer(PlannerInfo *root,
                                 }
                         }
                 }
+
+               /*
+                * If the joinrel is parallel-safe, we may be able to consider a
+                * partial hash join.  However, we can't handle JOIN_UNIQUE_OUTER,
+                * because the outer path will be partial, and therefore we won't be
+                * able to properly guarantee uniqueness.  Also, the resulting path
+                * must not be parameterized.
+                */
+               if (joinrel->consider_parallel && jointype != JOIN_UNIQUE_OUTER &&
+                       outerrel->partial_pathlist != NIL &&
+                       bms_is_empty(joinrel->lateral_relids))
+               {
+                       Path   *cheapest_partial_outer;
+                       Path   *cheapest_safe_inner = NULL;
+
+                       cheapest_partial_outer =
+                               (Path *) linitial(outerrel->partial_pathlist);
+
+                       /*
+                        * Normally, given that the joinrel is parallel-safe, the cheapest
+                        * total inner path will also be parallel-safe, but if not, we'll
+                        * have to search cheapest_parameterized_paths for the cheapest
+                        * unparameterized inner path.
+                        */
+                       if (cheapest_total_inner->parallel_safe)
+                               cheapest_safe_inner = cheapest_total_inner;
+                       else
+                       {
+                               ListCell   *lc;
+
+                               foreach(lc, innerrel->cheapest_parameterized_paths)
+                               {
+                                       Path       *innerpath = (Path *) lfirst(lc);
+
+                                       if (innerpath->parallel_safe &&
+                                               bms_is_empty(PATH_REQ_OUTER(innerpath)))
+                                       {
+                                               cheapest_safe_inner = innerpath;
+                                               break;
+                                       }
+                               }
+                       }
+
+                       if (cheapest_safe_inner != NULL)
+                               try_partial_hashjoin_path(root, joinrel,
+                                                                                 cheapest_partial_outer,
+                                                                                 cheapest_safe_inner,
+                                                                                 hashclauses, jointype, extra);
+               }
         }
  }
  
diff --git a/src/backend/optimizer/path/joinrels.c b/src/backend/optimizer/path/joinrels.c

index 866f6849d20e4827f9dea0e4b25b4a88b0139117..01d4fea78c4898c0ebe00143e9a4c962f10dae89 100644 (file)
--- a/src/backend/optimizer/path/joinrels.c
+++ b/src/backend/optimizer/path/joinrels.c
@@ -1194,9 +1194,10 @@ mark_dummy_rel(RelOptInfo *rel)
  
         /* Evict any previously chosen paths */
         rel->pathlist = NIL;
+       rel->partial_pathlist = NIL;
  
         /* Set up the dummy path */
-       add_path(rel, (Path *) create_append_path(rel, NIL, NULL));
+       add_path(rel, (Path *) create_append_path(rel, NIL, NULL, 0));
  
         /* Set or update cheapest_total_path and related fields */
         set_cheapest(rel);
diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c

index 01bd7e746b50e0c77ef75e6ba49aaddbcb1ad3eb..fda4df6421001416b7da1cd4a938c97d5ade145a 100644 (file)
--- a/src/backend/optimizer/plan/createplan.c
+++ b/src/backend/optimizer/plan/createplan.c
@@ -1130,7 +1130,7 @@ create_gather_plan(PlannerInfo *root, GatherPath *best_path)
  
         gather_plan = make_gather(subplan->targetlist,
                                                           NIL,
-                                                         best_path->num_workers,
+                                                         best_path->path.parallel_degree,
                                                           best_path->single_copy,
                                                           subplan);
  
diff --git a/src/backend/optimizer/plan/planmain.c b/src/backend/optimizer/plan/planmain.c

index ae34f4a640acee2b4e95161def75a51aa31991c0..f4319c610173f8abef419319d7ff09b763cb32a5 100644 (file)
--- a/src/backend/optimizer/plan/planmain.c
+++ b/src/backend/optimizer/plan/planmain.c
@@ -84,7 +84,8 @@ query_planner(PlannerInfo *root, List *tlist,
  
                 /* The only path for it is a trivial Result path */
                 add_path(final_rel, (Path *)
-                                create_result_path((List *) parse->jointree->quals));
+                                create_result_path(final_rel,
+                                                                       (List *) parse->jointree->quals));
  
                 /* Select cheapest path (pretty easy in this case...) */
                 set_cheapest(final_rel);
diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c

index e24811f4ea4291f5375b64df87f4586af77a76f1..1097a1804a60973a71bacb248d64e7546484f2a0 100644 (file)
--- a/src/backend/optimizer/util/pathnode.c
+++ b/src/backend/optimizer/util/pathnode.c
@@ -217,7 +217,12 @@ compare_path_costs_fuzzily(Path *path1, Path *path2, double fuzz_factor)
   * The cheapest_parameterized_paths list collects all parameterized paths
   * that have survived the add_path() tournament for this relation.  (Since
   * add_path ignores pathkeys for a parameterized path, these will be paths
- * that have best cost or best row count for their parameterization.)
+ * that have best cost or best row count for their parameterization.  We
+ * may also have both a parallel-safe and a non-parallel-safe path in some
+ * cases for the same parameterization in some cases, but this should be
+ * relatively rare since, most typically, all paths for the same relation
+ * will be parallel-safe or none of them will.)
+ *
   * cheapest_parameterized_paths always includes the cheapest-total
   * unparameterized path, too, if there is one; the users of that list find
   * it more convenient if that's included.
@@ -352,11 +357,12 @@ set_cheapest(RelOptInfo *parent_rel)
   *       A path is worthy if it has a better sort order (better pathkeys) or
   *       cheaper cost (on either dimension), or generates fewer rows, than any
   *       existing path that has the same or superset parameterization rels.
+ *       We also consider parallel-safe paths more worthy than others.
   *
   *       We also remove from the rel's pathlist any old paths that are dominated
   *       by new_path --- that is, new_path is cheaper, at least as well ordered,
- *       generates no more rows, and requires no outer rels not required by the
- *       old path.
+ *       generates no more rows, requires no outer rels not required by the old
+ *       path, and is no less parallel-safe.
   *
   *       In most cases, a path with a superset parameterization will generate
   *       fewer rows (since it has more join clauses to apply), so that those two
@@ -470,14 +476,16 @@ add_path(RelOptInfo *parent_rel, Path *new_path)
                                                 {
                                                         if ((outercmp == BMS_EQUAL ||
                                                                  outercmp == BMS_SUBSET1) &&
-                                                               new_path->rows <= old_path->rows)
+                                                               new_path->rows <= old_path->rows &&
+                                                               new_path->parallel_safe >= old_path->parallel_safe)
                                                                 remove_old = true;              /* new dominates old */
                                                 }
                                                 else if (keyscmp == PATHKEYS_BETTER2)
                                                 {
                                                         if ((outercmp == BMS_EQUAL ||
                                                                  outercmp == BMS_SUBSET2) &&
-                                                               new_path->rows >= old_path->rows)
+                                                               new_path->rows >= old_path->rows &&
+                                                               new_path->parallel_safe <= old_path->parallel_safe)
                                                                 accept_new = false;             /* old dominates new */
                                                 }
                                                 else    /* keyscmp == PATHKEYS_EQUAL */
@@ -487,19 +495,25 @@ add_path(RelOptInfo *parent_rel, Path *new_path)
                                                                 /*
                                                                  * Same pathkeys and outer rels, and fuzzily
                                                                  * the same cost, so keep just one; to decide
-                                                                * which, first check rows and then do a fuzzy
-                                                                * cost comparison with very small fuzz limit.
-                                                                * (We used to do an exact cost comparison,
-                                                                * but that results in annoying
-                                                                * platform-specific plan variations due to
-                                                                * roundoff in the cost estimates.)  If things
-                                                                * are still tied, arbitrarily keep only the
-                                                                * old path.  Notice that we will keep only
-                                                                * the old path even if the less-fuzzy
-                                                                * comparison decides the startup and total
-                                                                * costs compare differently.
+                                                                * which, first check parallel-safety, then
+                                                                * rows, then do a fuzzy cost comparison with
+                                                                * very small fuzz limit.  (We used to do an
+                                                                * exact cost comparison, but that results in
+                                                                * annoying platform-specific plan variations
+                                                                * due to roundoff in the cost estimates.)      If
+                                                                * things are still tied, arbitrarily keep
+                                                                * only the old path.  Notice that we will
+                                                                * keep only the old path even if the
+                                                                * less-fuzzy comparison decides the startup
+                                                                * and total costs compare differently.
                                                                  */
-                                                               if (new_path->rows < old_path->rows)
+                                                               if (new_path->parallel_safe >
+                                                                       old_path->parallel_safe)
+                                                                       remove_old = true;      /* new dominates old */
+                                                               else if (new_path->parallel_safe <
+                                                                                old_path->parallel_safe)
+                                                                       accept_new = false; /* old dominates new */
+                                                               else if (new_path->rows < old_path->rows)
                                                                         remove_old = true;      /* new dominates old */
                                                                 else if (new_path->rows > old_path->rows)
                                                                         accept_new = false; /* old dominates new */
@@ -512,10 +526,12 @@ add_path(RelOptInfo *parent_rel, Path *new_path)
                                                                                                                  * dominates new */
                                                         }
                                                         else if (outercmp == BMS_SUBSET1 &&
-                                                                        new_path->rows <= old_path->rows)
+                                                                        new_path->rows <= old_path->rows &&
+                                                                        new_path->parallel_safe >= old_path->parallel_safe)
                                                                 remove_old = true;              /* new dominates old */
                                                         else if (outercmp == BMS_SUBSET2 &&
-                                                                        new_path->rows >= old_path->rows)
+                                                                        new_path->rows >= old_path->rows &&
+                                                                        new_path->parallel_safe <= old_path->parallel_safe)
                                                                 accept_new = false;             /* old dominates new */
                                                         /* else different parameterizations, keep both */
                                                 }
@@ -527,7 +543,8 @@ add_path(RelOptInfo *parent_rel, Path *new_path)
                                                                                                    PATH_REQ_OUTER(old_path));
                                                         if ((outercmp == BMS_EQUAL ||
                                                                  outercmp == BMS_SUBSET1) &&
-                                                               new_path->rows <= old_path->rows)
+                                                               new_path->rows <= old_path->rows &&
+                                                               new_path->parallel_safe >= old_path->parallel_safe)
                                                                 remove_old = true;              /* new dominates old */
                                                 }
                                                 break;
@@ -538,7 +555,8 @@ add_path(RelOptInfo *parent_rel, Path *new_path)
                                                                                                    PATH_REQ_OUTER(old_path));
                                                         if ((outercmp == BMS_EQUAL ||
                                                                  outercmp == BMS_SUBSET2) &&
-                                                               new_path->rows >= old_path->rows)
+                                                               new_path->rows >= old_path->rows &&
+                                                               new_path->parallel_safe <= old_path->parallel_safe)
                                                                 accept_new = false;             /* old dominates new */
                                                 }
                                                 break;
@@ -685,6 +703,214 @@ add_path_precheck(RelOptInfo *parent_rel,
         return true;
  }
  
+/*
+ * add_partial_path
+ *       Like add_path, our goal here is to consider whether a path is worthy
+ *       of being kept around, but the considerations here are a bit different.
+ *       A partial path is one which can be executed in any number of workers in
+ *       parallel such that each worker will generate a subset of the path's
+ *       overall result.
+ *
+ *       We don't generate parameterized partial paths for several reasons.  Most
+ *       importantly, they're not safe to execute, because there's nothing to
+ *       make sure that a parallel scan within the parameterized portion of the
+ *       plan is running with the same value in every worker at the same time.
+ *       Fortunately, it seems unlikely to be worthwhile anyway, because having
+ *       each worker scan the entire outer relation and a subset of the inner
+ *       relation will generally be a terrible plan.  The inner (parameterized)
+ *       side of the plan will be small anyway.  There could be rare cases where
+ *       this wins big - e.g. if join order constraints put a 1-row relation on
+ *       the outer side of the topmost join with a parameterized plan on the inner
+ *       side - but we'll have to be content not to handle such cases until somebody
+ *       builds an executor infrastructure that can cope with them.
+ *
+ *       Because we don't consider parameterized paths here, we also don't
+ *       need to consider the row counts as a measure of quality: every path will
+ *       produce the same number of rows.  Neither do we need to consider startup
+ *       costs: parallelism is only used for plans that will be run to completion.
+ *       Therefore, this routine is much simpler than add_path: it needs to
+ *       consider only pathkeys and total cost.
+ */
+void
+add_partial_path(RelOptInfo *parent_rel, Path *new_path)
+{
+       bool            accept_new = true;              /* unless we find a superior old path */
+       ListCell   *insert_after = NULL;        /* where to insert new item */
+       ListCell   *p1;
+       ListCell   *p1_prev;
+       ListCell   *p1_next;
+
+       /* Check for query cancel. */
+       CHECK_FOR_INTERRUPTS();
+
+       /*
+        * As in add_path, throw out any paths which are dominated by the new
+        * path, but throw out the new path if some existing path dominates it.
+        */
+       p1_prev = NULL;
+       for (p1 = list_head(parent_rel->partial_pathlist); p1 != NULL;
+                p1 = p1_next)
+       {
+               Path       *old_path = (Path *) lfirst(p1);
+               bool            remove_old = false; /* unless new proves superior */
+               PathKeysComparison keyscmp;
+
+               p1_next = lnext(p1);
+
+               /* Compare pathkeys. */
+               keyscmp = compare_pathkeys(new_path->pathkeys, old_path->pathkeys);
+
+               /* Unless pathkeys are incompable, keep just one of the two paths. */
+               if (keyscmp != PATHKEYS_DIFFERENT)
+               {
+                       if (new_path->total_cost > old_path->total_cost * STD_FUZZ_FACTOR)
+                       {
+                               /* New path costs more; keep it only if pathkeys are better. */
+                               if (keyscmp != PATHKEYS_BETTER1)
+                                       accept_new = false;
+                       }
+                       else if (old_path->total_cost > new_path->total_cost
+                                        * STD_FUZZ_FACTOR)
+                       {
+                               /* Old path costs more; keep it only if pathkeys are better. */
+                               if (keyscmp != PATHKEYS_BETTER2)
+                                       remove_old = true;
+                       }
+                       else if (keyscmp == PATHKEYS_BETTER1)
+                       {
+                               /* Costs are about the same, new path has better pathkeys. */
+                               remove_old = true;
+                       }
+                       else if (keyscmp == PATHKEYS_BETTER2)
+                       {
+                               /* Costs are about the same, old path has better pathkeys. */
+                               accept_new = false;
+                       }
+                       else if (old_path->total_cost > new_path->total_cost * 1.0000000001)
+                       {
+                               /* Pathkeys are the same, and the old path costs more. */
+                               remove_old = true;
+                       }
+                       else
+                       {
+                               /*
+                                * Pathkeys are the same, and new path isn't materially
+                                * cheaper.
+                                */
+                               accept_new = false;
+                       }
+               }
+
+               /*
+                * Remove current element from partial_pathlist if dominated by new.
+                */
+               if (remove_old)
+               {
+                       parent_rel->partial_pathlist =
+                               list_delete_cell(parent_rel->partial_pathlist, p1, p1_prev);
+                       /* add_path has a special case for IndexPath; we don't need it */
+                       Assert(!IsA(old_path, IndexPath));
+                       pfree(old_path);
+                       /* p1_prev does not advance */
+               }
+               else
+               {
+                       /* new belongs after this old path if it has cost >= old's */
+                       if (new_path->total_cost >= old_path->total_cost)
+                               insert_after = p1;
+                       /* p1_prev advances */
+                       p1_prev = p1;
+               }
+
+               /*
+                * If we found an old path that dominates new_path, we can quit
+                * scanning the partial_pathlist; we will not add new_path, and we
+                * assume new_path cannot dominate any later path.
+                */
+               if (!accept_new)
+                       break;
+       }
+
+       if (accept_new)
+       {
+               /* Accept the new path: insert it at proper place */
+               if (insert_after)
+                       lappend_cell(parent_rel->partial_pathlist, insert_after, new_path);
+               else
+                       parent_rel->partial_pathlist =
+                               lcons(new_path, parent_rel->partial_pathlist);
+       }
+       else
+       {
+               /* add_path has a special case for IndexPath; we don't need it */
+               Assert(!IsA(new_path, IndexPath));
+               /* Reject and recycle the new path */
+               pfree(new_path);
+       }
+}
+
+/*
+ * add_partial_path_precheck
+ *       Check whether a proposed new partial path could possibly get accepted.
+ *
+ * Unlike add_path_precheck, we can ignore startup cost and parameterization,
+ * since they don't matter for partial paths (see add_partial_path).  But
+ * we do want to make sure we don't add a partial path if there's already
+ * a complete path that dominates it, since in that case the proposed path
+ * is surely a loser.
+ */
+bool
+add_partial_path_precheck(RelOptInfo *parent_rel, Cost total_cost,
+                                                 List *pathkeys)
+{
+       ListCell   *p1;
+
+       /*
+        * Our goal here is twofold.  First, we want to find out whether this path
+        * is clearly inferior to some existing partial path.  If so, we want to
+        * reject it immediately.  Second, we want to find out whether this path
+        * is clearly superior to some existing partial path -- at least, modulo
+        * final cost computations.  If so, we definitely want to consider it.
+        *
+        * Unlike add_path(), we always compare pathkeys here.  This is because we
+        * expect partial_pathlist to be very short, and getting a definitive
+        * answer at this stage avoids the need to call add_path_precheck.
+        */
+       foreach(p1, parent_rel->partial_pathlist)
+       {
+               Path       *old_path = (Path *) lfirst(p1);
+               PathKeysComparison keyscmp;
+
+               keyscmp = compare_pathkeys(pathkeys, old_path->pathkeys);
+               if (keyscmp != PATHKEYS_DIFFERENT)
+               {
+                       if (total_cost > old_path->total_cost * STD_FUZZ_FACTOR &&
+                               keyscmp != PATHKEYS_BETTER1)
+                               return false;
+                       if (old_path->total_cost > total_cost * STD_FUZZ_FACTOR &&
+                               keyscmp != PATHKEYS_BETTER2)
+                               return true;
+               }
+       }
+
+       /*
+        * This path is neither clearly inferior to an existing partial path nor
+        * clearly good enough that it might replace one.  Compare it to
+        * non-parallel plans.  If it loses even before accounting for the cost of
+        * the Gather node, we should definitely reject it.
+        *
+        * Note that we pass the total_cost to add_path_precheck twice.  This is
+        * because it's never advantageous to consider the startup cost of a
+        * partial path; the resulting plans, if run in parallel, will be run to
+        * completion.
+        */
+       if (!add_path_precheck(parent_rel, total_cost, total_cost, pathkeys,
+                                                  NULL))
+               return false;
+
+       return true;
+}
+
  
  /*****************************************************************************
   *             PATH NODE CREATION ROUTINES
@@ -697,7 +923,7 @@ add_path_precheck(RelOptInfo *parent_rel,
   */
  Path *
  create_seqscan_path(PlannerInfo *root, RelOptInfo *rel,
-                                       Relids required_outer, int nworkers)
+                                       Relids required_outer, int parallel_degree)
  {
         Path       *pathnode = makeNode(Path);
  
@@ -705,10 +931,12 @@ create_seqscan_path(PlannerInfo *root, RelOptInfo *rel,
         pathnode->parent = rel;
         pathnode->param_info = get_baserel_parampathinfo(root, rel,
                                                                                                          required_outer);
-       pathnode->parallel_aware = nworkers > 0 ? true : false;
+       pathnode->parallel_aware = parallel_degree > 0 ? true : false;
+       pathnode->parallel_safe = rel->consider_parallel;
+       pathnode->parallel_degree = parallel_degree;
         pathnode->pathkeys = NIL;       /* seqscan has unordered result */
  
-       cost_seqscan(pathnode, root, rel, pathnode->param_info, nworkers);
+       cost_seqscan(pathnode, root, rel, pathnode->param_info);
  
         return pathnode;
  }
@@ -727,6 +955,8 @@ create_samplescan_path(PlannerInfo *root, RelOptInfo *rel, Relids required_outer
         pathnode->param_info = get_baserel_parampathinfo(root, rel,
                                                                                                          required_outer);
         pathnode->parallel_aware = false;
+       pathnode->parallel_safe = rel->consider_parallel;
+       pathnode->parallel_degree = 0;
         pathnode->pathkeys = NIL;       /* samplescan has unordered result */
  
         cost_samplescan(pathnode, root, rel, pathnode->param_info);
@@ -781,6 +1011,8 @@ create_index_path(PlannerInfo *root,
         pathnode->path.param_info = get_baserel_parampathinfo(root, rel,
                                                                                                                   required_outer);
         pathnode->path.parallel_aware = false;
+       pathnode->path.parallel_safe = rel->consider_parallel;
+       pathnode->path.parallel_degree = 0;
         pathnode->path.pathkeys = pathkeys;
  
         /* Convert clauses to indexquals the executor can handle */
@@ -827,6 +1059,8 @@ create_bitmap_heap_path(PlannerInfo *root,
         pathnode->path.param_info = get_baserel_parampathinfo(root, rel,
                                                                                                                   required_outer);
         pathnode->path.parallel_aware = false;
+       pathnode->path.parallel_safe = bitmapqual->parallel_safe;
+       pathnode->path.parallel_degree = 0;
         pathnode->path.pathkeys = NIL;          /* always unordered */
  
         pathnode->bitmapqual = bitmapqual;
@@ -852,7 +1086,17 @@ create_bitmap_and_path(PlannerInfo *root,
         pathnode->path.pathtype = T_BitmapAnd;
         pathnode->path.parent = rel;
         pathnode->path.param_info = NULL;       /* not used in bitmap trees */
+
+       /*
+        * Currently, a BitmapHeapPath, BitmapAndPath, or BitmapOrPath will be
+        * parallel-safe if and only if rel->consider_parallel is set.  So, we can
+        * set the flag for this path based only on the relation-level flag,
+        * without actually iterating over the list of children.
+        */
         pathnode->path.parallel_aware = false;
+       pathnode->path.parallel_safe = rel->consider_parallel;
+       pathnode->path.parallel_degree = 0;
+
         pathnode->path.pathkeys = NIL;          /* always unordered */
  
         pathnode->bitmapquals = bitmapquals;
@@ -877,7 +1121,17 @@ create_bitmap_or_path(PlannerInfo *root,
         pathnode->path.pathtype = T_BitmapOr;
         pathnode->path.parent = rel;
         pathnode->path.param_info = NULL;       /* not used in bitmap trees */
+
+       /*
+        * Currently, a BitmapHeapPath, BitmapAndPath, or BitmapOrPath will be
+        * parallel-safe if and only if rel->consider_parallel is set.  So, we can
+        * set the flag for this path based only on the relation-level flag,
+        * without actually iterating over the list of children.
+        */
         pathnode->path.parallel_aware = false;
+       pathnode->path.parallel_safe = rel->consider_parallel;
+       pathnode->path.parallel_degree = 0;
+
         pathnode->path.pathkeys = NIL;          /* always unordered */
  
         pathnode->bitmapquals = bitmapquals;
@@ -903,6 +1157,8 @@ create_tidscan_path(PlannerInfo *root, RelOptInfo *rel, List *tidquals,
         pathnode->path.param_info = get_baserel_parampathinfo(root, rel,
                                                                                                                   required_outer);
         pathnode->path.parallel_aware = false;
+       pathnode->path.parallel_safe = rel->consider_parallel;
+       pathnode->path.parallel_degree = 0;
         pathnode->path.pathkeys = NIL;          /* always unordered */
  
         pathnode->tidquals = tidquals;
@@ -921,7 +1177,8 @@ create_tidscan_path(PlannerInfo *root, RelOptInfo *rel, List *tidquals,
   * Note that we must handle subpaths = NIL, representing a dummy access path.
   */
  AppendPath *
-create_append_path(RelOptInfo *rel, List *subpaths, Relids required_outer)
+create_append_path(RelOptInfo *rel, List *subpaths, Relids required_outer,
+                                  int parallel_degree)
  {
         AppendPath *pathnode = makeNode(AppendPath);
         ListCell   *l;
@@ -931,6 +1188,8 @@ create_append_path(RelOptInfo *rel, List *subpaths, Relids required_outer)
         pathnode->path.param_info = get_appendrel_parampathinfo(rel,
                                                                                                                         required_outer);
         pathnode->path.parallel_aware = false;
+       pathnode->path.parallel_safe = rel->consider_parallel;
+       pathnode->path.parallel_degree = parallel_degree;
         pathnode->path.pathkeys = NIL;          /* result is always considered
                                                                                  * unsorted */
         pathnode->subpaths = subpaths;
@@ -955,6 +1214,8 @@ create_append_path(RelOptInfo *rel, List *subpaths, Relids required_outer)
                 if (l == list_head(subpaths))   /* first node? */
                         pathnode->path.startup_cost = subpath->startup_cost;
                 pathnode->path.total_cost += subpath->total_cost;
+               pathnode->path.parallel_safe = pathnode->path.parallel_safe &&
+                       subpath->parallel_safe;
  
                 /* All child paths must have same parameterization */
                 Assert(bms_equal(PATH_REQ_OUTER(subpath), required_outer));
@@ -985,6 +1246,8 @@ create_merge_append_path(PlannerInfo *root,
         pathnode->path.param_info = get_appendrel_parampathinfo(rel,
                                                                                                                         required_outer);
         pathnode->path.parallel_aware = false;
+       pathnode->path.parallel_safe = rel->consider_parallel;
+       pathnode->path.parallel_degree = 0;
         pathnode->path.pathkeys = pathkeys;
         pathnode->subpaths = subpaths;
  
@@ -1008,6 +1271,8 @@ create_merge_append_path(PlannerInfo *root,
                 Path       *subpath = (Path *) lfirst(l);
  
                 pathnode->path.rows += subpath->rows;
+               pathnode->path.parallel_safe = pathnode->path.parallel_safe &&
+                       subpath->parallel_safe;
  
                 if (pathkeys_contained_in(pathkeys, subpath->pathkeys))
                 {
@@ -1052,7 +1317,7 @@ create_merge_append_path(PlannerInfo *root,
   *       This is only used for the case of a query with an empty jointree.
   */
  ResultPath *
-create_result_path(List *quals)
+create_result_path(RelOptInfo *rel, List *quals)
  {
         ResultPath *pathnode = makeNode(ResultPath);
  
@@ -1060,6 +1325,8 @@ create_result_path(List *quals)
         pathnode->path.parent = NULL;
         pathnode->path.param_info = NULL;       /* there are no other rels... */
         pathnode->path.parallel_aware = false;
+       pathnode->path.parallel_safe = rel->consider_parallel;
+       pathnode->path.parallel_degree = 0;
         pathnode->path.pathkeys = NIL;
         pathnode->quals = quals;
  
@@ -1094,6 +1361,8 @@ create_material_path(RelOptInfo *rel, Path *subpath)
         pathnode->path.parent = rel;
         pathnode->path.param_info = subpath->param_info;
         pathnode->path.parallel_aware = false;
+       pathnode->path.parallel_safe = subpath->parallel_safe;
+       pathnode->path.parallel_degree = 0;
         pathnode->path.pathkeys = subpath->pathkeys;
  
         pathnode->subpath = subpath;
@@ -1155,6 +1424,8 @@ create_unique_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath,
         pathnode->path.parent = rel;
         pathnode->path.param_info = subpath->param_info;
         pathnode->path.parallel_aware = false;
+       pathnode->path.parallel_safe = subpath->parallel_safe;
+       pathnode->path.parallel_degree = 0;
  
         /*
          * Assume the output is unsorted, since we don't necessarily have pathkeys
@@ -1328,19 +1599,30 @@ create_unique_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath,
   */
  GatherPath *
  create_gather_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath,
-                                  Relids required_outer, int nworkers)
+                                  Relids required_outer)
  {
         GatherPath *pathnode = makeNode(GatherPath);
  
+       Assert(subpath->parallel_safe);
+
         pathnode->path.pathtype = T_Gather;
         pathnode->path.parent = rel;
         pathnode->path.param_info = get_baserel_parampathinfo(root, rel,
                                                                                                                   required_outer);
         pathnode->path.parallel_aware = false;
+       pathnode->path.parallel_safe = false;
+       pathnode->path.parallel_degree = subpath->parallel_degree;
         pathnode->path.pathkeys = NIL;          /* Gather has unordered result */
  
         pathnode->subpath = subpath;
-       pathnode->num_workers = nworkers;
+       pathnode->single_copy = false;
+
+       if (pathnode->path.parallel_degree == 0)
+       {
+               pathnode->path.parallel_degree = 1;
+               pathnode->path.pathkeys = subpath->pathkeys;
+               pathnode->single_copy = true;
+       }
  
         cost_gather(pathnode, root, rel, pathnode->path.param_info);
  
@@ -1393,6 +1675,8 @@ create_subqueryscan_path(PlannerInfo *root, RelOptInfo *rel,
         pathnode->param_info = get_baserel_parampathinfo(root, rel,
                                                                                                          required_outer);
         pathnode->parallel_aware = false;
+       pathnode->parallel_safe = rel->consider_parallel;
+       pathnode->parallel_degree = 0;
         pathnode->pathkeys = pathkeys;
  
         cost_subqueryscan(pathnode, root, rel, pathnode->param_info);
@@ -1416,6 +1700,8 @@ create_functionscan_path(PlannerInfo *root, RelOptInfo *rel,
         pathnode->param_info = get_baserel_parampathinfo(root, rel,
                                                                                                          required_outer);
         pathnode->parallel_aware = false;
+       pathnode->parallel_safe = rel->consider_parallel;
+       pathnode->parallel_degree = 0;
         pathnode->pathkeys = pathkeys;
  
         cost_functionscan(pathnode, root, rel, pathnode->param_info);
@@ -1439,6 +1725,8 @@ create_valuesscan_path(PlannerInfo *root, RelOptInfo *rel,
         pathnode->param_info = get_baserel_parampathinfo(root, rel,
                                                                                                          required_outer);
         pathnode->parallel_aware = false;
+       pathnode->parallel_safe = rel->consider_parallel;
+       pathnode->parallel_degree = 0;
         pathnode->pathkeys = NIL;       /* result is always unordered */
  
         cost_valuesscan(pathnode, root, rel, pathnode->param_info);
@@ -1461,6 +1749,8 @@ create_ctescan_path(PlannerInfo *root, RelOptInfo *rel, Relids required_outer)
         pathnode->param_info = get_baserel_parampathinfo(root, rel,
                                                                                                          required_outer);
         pathnode->parallel_aware = false;
+       pathnode->parallel_safe = rel->consider_parallel;
+       pathnode->parallel_degree = 0;
         pathnode->pathkeys = NIL;       /* XXX for now, result is always unordered */
  
         cost_ctescan(pathnode, root, rel, pathnode->param_info);
@@ -1484,6 +1774,8 @@ create_worktablescan_path(PlannerInfo *root, RelOptInfo *rel,
         pathnode->param_info = get_baserel_parampathinfo(root, rel,
                                                                                                          required_outer);
         pathnode->parallel_aware = false;
+       pathnode->parallel_safe = rel->consider_parallel;
+       pathnode->parallel_degree = 0;
         pathnode->pathkeys = NIL;       /* result is always unordered */
  
         /* Cost is the same as for a regular CTE scan */
@@ -1517,6 +1809,8 @@ create_foreignscan_path(PlannerInfo *root, RelOptInfo *rel,
         pathnode->path.param_info = get_baserel_parampathinfo(root, rel,
                                                                                                                   required_outer);
         pathnode->path.parallel_aware = false;
+       pathnode->path.parallel_safe = rel->consider_parallel;
+       pathnode->path.parallel_degree = 0;
         pathnode->path.rows = rows;
         pathnode->path.startup_cost = startup_cost;
         pathnode->path.total_cost = total_cost;
@@ -1653,6 +1947,10 @@ create_nestloop_path(PlannerInfo *root,
                                                                   required_outer,
                                                                   &restrict_clauses);
         pathnode->path.parallel_aware = false;
+       pathnode->path.parallel_safe = joinrel->consider_parallel &&
+               outer_path->parallel_safe && inner_path->parallel_safe;
+       /* This is a foolish way to estimate parallel_degree, but for now... */
+       pathnode->path.parallel_degree = outer_path->parallel_degree;
         pathnode->path.pathkeys = pathkeys;
         pathnode->jointype = jointype;
         pathnode->outerjoinpath = outer_path;
@@ -1711,6 +2009,9 @@ create_mergejoin_path(PlannerInfo *root,
                                                                   required_outer,
                                                                   &restrict_clauses);
         pathnode->jpath.path.parallel_aware = false;
+       pathnode->jpath.path.parallel_safe = joinrel->consider_parallel &&
+               outer_path->parallel_safe && inner_path->parallel_safe;
+       pathnode->jpath.path.parallel_degree = 0;
         pathnode->jpath.path.pathkeys = pathkeys;
         pathnode->jpath.jointype = jointype;
         pathnode->jpath.outerjoinpath = outer_path;
@@ -1768,6 +2069,10 @@ create_hashjoin_path(PlannerInfo *root,
                                                                   required_outer,
                                                                   &restrict_clauses);
         pathnode->jpath.path.parallel_aware = false;
+       pathnode->jpath.path.parallel_safe = joinrel->consider_parallel &&
+               outer_path->parallel_safe && inner_path->parallel_safe;
+       /* This is a foolish way to estimate parallel_degree, but for now... */
+       pathnode->jpath.path.parallel_degree = outer_path->parallel_degree;
  
         /*
          * A hashjoin never has pathkeys, since its output ordering is
diff --git a/src/backend/optimizer/util/relnode.c b/src/backend/optimizer/util/relnode.c

index 2496a25a3e665d26e77f3c514ece70a866991ac1..7428c18af9fa1dccc6451e486a9d421e851c8e00 100644 (file)
--- a/src/backend/optimizer/util/relnode.c
+++ b/src/backend/optimizer/util/relnode.c
@@ -107,6 +107,7 @@ build_simple_rel(PlannerInfo *root, int relid, RelOptKind reloptkind)
         rel->reltargetlist = NIL;
         rel->pathlist = NIL;
         rel->ppilist = NIL;
+       rel->partial_pathlist = NIL;
         rel->cheapest_startup_path = NULL;
         rel->cheapest_total_path = NULL;
         rel->cheapest_unique_path = NULL;
@@ -370,6 +371,7 @@ build_join_rel(PlannerInfo *root,
         joinrel->reltargetlist = NIL;
         joinrel->pathlist = NIL;
         joinrel->ppilist = NIL;
+       joinrel->partial_pathlist = NIL;
         joinrel->cheapest_startup_path = NULL;
         joinrel->cheapest_total_path = NULL;
         joinrel->cheapest_unique_path = NULL;
diff --git a/src/include/nodes/relation.h b/src/include/nodes/relation.h

index 6deda54c143a6641940475d98353a408f6f41ecd..b233b62d56c881888754b48bfea7227ebe2a74c7 100644 (file)
--- a/src/include/nodes/relation.h
+++ b/src/include/nodes/relation.h
@@ -458,6 +458,7 @@ typedef struct RelOptInfo
         List       *reltargetlist;      /* Vars to be output by scan of relation */
         List       *pathlist;           /* Path structures */
         List       *ppilist;            /* ParamPathInfos used in pathlist */
+       List       *partial_pathlist;   /* partial Paths */
         struct Path *cheapest_startup_path;
         struct Path *cheapest_total_path;
         struct Path *cheapest_unique_path;
@@ -761,6 +762,8 @@ typedef struct Path
         RelOptInfo *parent;                     /* the relation this path can build */
         ParamPathInfo *param_info;      /* parameterization info, or NULL if none */
         bool            parallel_aware; /* engage parallel-aware logic? */
+       bool            parallel_safe;  /* OK to use as part of parallel plan? */
+       int                     parallel_degree; /* desired parallel degree; 0 = not parallel */
  
         /* estimated size/costs for path (see costsize.c for more info) */
         double          rows;                   /* estimated number of result tuples */
@@ -1064,7 +1067,6 @@ typedef struct GatherPath
  {
         Path            path;
         Path       *subpath;            /* path for each worker */
-       int                     num_workers;    /* number of workers sought to help */
         bool            single_copy;    /* path must not be executed >1x */
  } GatherPath;
  
diff --git a/src/include/optimizer/cost.h b/src/include/optimizer/cost.h

index 9999ca37c624a609e37e2e4285372893892b29f9..78c7cae99bef3e01ea1b347850c6c35af85cf136 100644 (file)
--- a/src/include/optimizer/cost.h
+++ b/src/include/optimizer/cost.h
@@ -72,7 +72,7 @@ extern double clamp_row_est(double nrows);
  extern double index_pages_fetched(double tuples_fetched, BlockNumber pages,
                                         double index_pages, PlannerInfo *root);
  extern void cost_seqscan(Path *path, PlannerInfo *root, RelOptInfo *baserel,
-                        ParamPathInfo *param_info, int nworkers);
+                        ParamPathInfo *param_info);
  extern void cost_samplescan(Path *path, PlannerInfo *root, RelOptInfo *baserel,
                                 ParamPathInfo *param_info);
  extern void cost_index(IndexPath *path, PlannerInfo *root,
diff --git a/src/include/optimizer/pathnode.h b/src/include/optimizer/pathnode.h

index 0fb1e3d273f9ac8acecf1b9a46bd5ed2e6dc3d09..f479981d37eecd04a547ea435a90d2f0086c4c48 100644 (file)
--- a/src/include/optimizer/pathnode.h
+++ b/src/include/optimizer/pathnode.h
@@ -29,9 +29,12 @@ extern void add_path(RelOptInfo *parent_rel, Path *new_path);
  extern bool add_path_precheck(RelOptInfo *parent_rel,
                                   Cost startup_cost, Cost total_cost,
                                   List *pathkeys, Relids required_outer);
+extern void add_partial_path(RelOptInfo *parent_rel, Path *new_path);
+extern bool add_partial_path_precheck(RelOptInfo *parent_rel,
+                                                 Cost total_cost, List *pathkeys);
  
  extern Path *create_seqscan_path(PlannerInfo *root, RelOptInfo *rel,
-                                       Relids required_outer, int nworkers);
+                                       Relids required_outer, int parallel_degree);
  extern Path *create_samplescan_path(PlannerInfo *root, RelOptInfo *rel,
                                            Relids required_outer);
  extern IndexPath *create_index_path(PlannerInfo *root,
@@ -59,19 +62,18 @@ extern BitmapOrPath *create_bitmap_or_path(PlannerInfo *root,
  extern TidPath *create_tidscan_path(PlannerInfo *root, RelOptInfo *rel,
                                         List *tidquals, Relids required_outer);
  extern AppendPath *create_append_path(RelOptInfo *rel, List *subpaths,
-                                  Relids required_outer);
+                                  Relids required_outer, int parallel_degree);
  extern MergeAppendPath *create_merge_append_path(PlannerInfo *root,
                                                  RelOptInfo *rel,
                                                  List *subpaths,
                                                  List *pathkeys,
                                                  Relids required_outer);
-extern ResultPath *create_result_path(List *quals);
+extern ResultPath *create_result_path(RelOptInfo *rel, List *quals);
  extern MaterialPath *create_material_path(RelOptInfo *rel, Path *subpath);
  extern UniquePath *create_unique_path(PlannerInfo *root, RelOptInfo *rel,
                                    Path *subpath, SpecialJoinInfo *sjinfo);
  extern GatherPath *create_gather_path(PlannerInfo *root,
-                                  RelOptInfo *rel, Path *subpath, Relids required_outer,
-                                  int nworkers);
+                                  RelOptInfo *rel, Path *subpath, Relids required_outer);
  extern Path *create_subqueryscan_path(PlannerInfo *root, RelOptInfo *rel,
                                                  List *pathkeys, Relids required_outer);
  extern Path *create_functionscan_path(PlannerInfo *root, RelOptInfo *rel,
diff --git a/src/include/optimizer/paths.h b/src/include/optimizer/paths.h

index bcedc5a18f57d1f8f45563da93beb23fdc71777e..20474c3e1fea4c088699212e0b7c49e210246708 100644 (file)
--- a/src/include/optimizer/paths.h
+++ b/src/include/optimizer/paths.h
@@ -50,6 +50,8 @@ extern RelOptInfo *make_one_rel(PlannerInfo *root, List *joinlist);
  extern RelOptInfo *standard_join_search(PlannerInfo *root, int levels_needed,
                                          List *initial_rels);
  
+extern void generate_gather_paths(PlannerInfo *root, RelOptInfo *rel);
+
  #ifdef OPTIMIZER_DEBUG
  extern void debug_print_rel(PlannerInfo *root, RelOptInfo *rel);
  #endif
author	Robert Haas <rhaas@postgresql.org>
	Wed, 20 Jan 2016 19:29:22 +0000 (14:29 -0500)
committer	Robert Haas <rhaas@postgresql.org>
	Wed, 20 Jan 2016 19:40:26 +0000 (14:40 -0500)
src/backend/executor/execParallel.c		patch \| blob \| history
src/backend/nodes/outfuncs.c		patch \| blob \| history
src/backend/optimizer/README		patch \| blob \| history
src/backend/optimizer/path/allpaths.c		patch \| blob \| history
src/backend/optimizer/path/costsize.c		patch \| blob \| history
src/backend/optimizer/path/joinpath.c		patch \| blob \| history
src/backend/optimizer/path/joinrels.c		patch \| blob \| history
src/backend/optimizer/plan/createplan.c		patch \| blob \| history
src/backend/optimizer/plan/planmain.c		patch \| blob \| history
src/backend/optimizer/util/pathnode.c		patch \| blob \| history
src/backend/optimizer/util/relnode.c		patch \| blob \| history
src/include/nodes/relation.h		patch \| blob \| history
src/include/optimizer/cost.h		patch \| blob \| history
src/include/optimizer/pathnode.h		patch \| blob \| history
src/include/optimizer/paths.h		patch \| blob \| history