]> granicus.if.org Git - postgresql/commitdiff
Add optimizer and executor support for parallel index scans.
authorRobert Haas <rhaas@postgresql.org>
Wed, 15 Feb 2017 18:53:24 +0000 (13:53 -0500)
committerRobert Haas <rhaas@postgresql.org>
Wed, 15 Feb 2017 18:53:24 +0000 (13:53 -0500)
In combination with 569174f1be92be93f5366212cc46960d28a5c5cd, which
taught the btree AM how to perform parallel index scans, this allows
parallel index scan plans on btree indexes.  This infrastructure
should be general enough to support parallel index scans for other
index AMs as well, if someone updates them to support parallel
scans.

Amit Kapila, reviewed and tested by Anastasia Lubennikova, Tushar
Ahuja, and Haribabu Kommi, and me.

29 files changed:
contrib/bloom/blcost.c
contrib/bloom/bloom.h
contrib/bloom/blutils.c
doc/src/sgml/indexam.sgml
src/backend/access/brin/brin.c
src/backend/access/gin/ginutil.c
src/backend/access/gist/gist.c
src/backend/access/hash/hash.c
src/backend/access/nbtree/nbtree.c
src/backend/access/spgist/spgutils.c
src/backend/executor/execParallel.c
src/backend/executor/nodeIndexscan.c
src/backend/optimizer/path/allpaths.c
src/backend/optimizer/path/costsize.c
src/backend/optimizer/path/indxpath.c
src/backend/optimizer/plan/planner.c
src/backend/optimizer/util/pathnode.c
src/backend/optimizer/util/plancat.c
src/backend/utils/adt/selfuncs.c
src/include/access/amapi.h
src/include/executor/nodeIndexscan.h
src/include/nodes/execnodes.h
src/include/nodes/relation.h
src/include/optimizer/cost.h
src/include/optimizer/pathnode.h
src/include/optimizer/paths.h
src/include/utils/index_selfuncs.h
src/test/regress/expected/select_parallel.out
src/test/regress/sql/select_parallel.sql

index 98a2228edf6a0db8c7341ab9c143b95049041382..ba39f627fd26d75ba2e71c098bb904c738c02145 100644 (file)
@@ -24,7 +24,8 @@
 void
 blcostestimate(PlannerInfo *root, IndexPath *path, double loop_count,
                           Cost *indexStartupCost, Cost *indexTotalCost,
-                          Selectivity *indexSelectivity, double *indexCorrelation)
+                          Selectivity *indexSelectivity, double *indexCorrelation,
+                          double *indexPages)
 {
        IndexOptInfo *index = path->indexinfo;
        List       *qinfos;
@@ -45,4 +46,5 @@ blcostestimate(PlannerInfo *root, IndexPath *path, double loop_count,
        *indexTotalCost = costs.indexTotalCost;
        *indexSelectivity = costs.indexSelectivity;
        *indexCorrelation = costs.indexCorrelation;
+       *indexPages = costs.numIndexPages;
 }
index 39d8d05c5d472214bd44c54a941f8cfaf989188f..0cfe49aad8228cc60d0e4086759c2467a900d3d1 100644 (file)
@@ -208,6 +208,6 @@ extern bytea *bloptions(Datum reloptions, bool validate);
 extern void blcostestimate(PlannerInfo *root, IndexPath *path,
                           double loop_count, Cost *indexStartupCost,
                           Cost *indexTotalCost, Selectivity *indexSelectivity,
-                          double *indexCorrelation);
+                          double *indexCorrelation, double *indexPages);
 
 #endif
index 858798db85c6693e290dab64b01f1805a3db5ad3..f2eda67e0aeb87c5e61cdb93617e3d86938276b6 100644 (file)
@@ -119,6 +119,7 @@ blhandler(PG_FUNCTION_ARGS)
        amroutine->amstorage = false;
        amroutine->amclusterable = false;
        amroutine->ampredlocks = false;
+       amroutine->amcanparallel = false;
        amroutine->amkeytype = InvalidOid;
 
        amroutine->ambuild = blbuild;
index 9afd7f64179629ab1b901ea082398951e94fdbbe..401b11598eb37528a1a87c19222bacbf0305f714 100644 (file)
@@ -110,6 +110,8 @@ typedef struct IndexAmRoutine
     bool        amclusterable;
     /* does AM handle predicate locks? */
     bool        ampredlocks;
+    /* does AM support parallel scan? */
+    bool        amcanparallel;
     /* type of data stored in index, or InvalidOid if variable */
     Oid         amkeytype;
 
index 4ff046b4b01e689f61f085c169f150dc402f3725..b22563bf7c4cacde9e38be1021acc1c2a4160476 100644 (file)
@@ -93,6 +93,7 @@ brinhandler(PG_FUNCTION_ARGS)
        amroutine->amstorage = true;
        amroutine->amclusterable = false;
        amroutine->ampredlocks = false;
+       amroutine->amcanparallel = false;
        amroutine->amkeytype = InvalidOid;
 
        amroutine->ambuild = brinbuild;
index a98d4fc3973818a2186dfe25a822a8defedb446b..d03d59da6a73f9ae5cc667082da723f8042360ee 100644 (file)
@@ -50,6 +50,7 @@ ginhandler(PG_FUNCTION_ARGS)
        amroutine->amstorage = true;
        amroutine->amclusterable = false;
        amroutine->ampredlocks = false;
+       amroutine->amcanparallel = false;
        amroutine->amkeytype = InvalidOid;
 
        amroutine->ambuild = ginbuild;
index 96ead531ea3ba345eeaae797889780d169274182..6593771361c31fe4970eeafecec0e5069c422fb1 100644 (file)
@@ -71,6 +71,7 @@ gisthandler(PG_FUNCTION_ARGS)
        amroutine->amstorage = true;
        amroutine->amclusterable = true;
        amroutine->ampredlocks = false;
+       amroutine->amcanparallel = false;
        amroutine->amkeytype = InvalidOid;
 
        amroutine->ambuild = gistbuild;
index bca77a80c3b4592b988c3adb8019576b1525429f..24510e78f5fd39df131ece29eb2b2b8ed723acbe 100644 (file)
@@ -67,6 +67,7 @@ hashhandler(PG_FUNCTION_ARGS)
        amroutine->amstorage = false;
        amroutine->amclusterable = false;
        amroutine->ampredlocks = false;
+       amroutine->amcanparallel = false;
        amroutine->amkeytype = INT4OID;
 
        amroutine->ambuild = hashbuild;
index cbc575d5cf2e8b1149f542067e812d8590b0a0c9..775f2ff1f8c343232681aab6a79f7364e09a1684 100644 (file)
@@ -140,6 +140,7 @@ bthandler(PG_FUNCTION_ARGS)
        amroutine->amstorage = false;
        amroutine->amclusterable = true;
        amroutine->ampredlocks = true;
+       amroutine->amcanparallel = true;
        amroutine->amkeytype = InvalidOid;
 
        amroutine->ambuild = btbuild;
index 78846bec6665ab3ad52bb5116ab17a80f7f8b72f..e57ac49c6b4f2461294b8e60b94b00d70d5a26b2 100644 (file)
@@ -49,6 +49,7 @@ spghandler(PG_FUNCTION_ARGS)
        amroutine->amstorage = false;
        amroutine->amclusterable = false;
        amroutine->ampredlocks = false;
+       amroutine->amcanparallel = false;
        amroutine->amkeytype = InvalidOid;
 
        amroutine->ambuild = spgbuild;
index 784dbaf590ef8496fd7dde6d9511317630e1570e..98d4f1eca76cd09d85550f4d23882296d49c87d7 100644 (file)
@@ -28,6 +28,7 @@
 #include "executor/nodeCustom.h"
 #include "executor/nodeForeignscan.h"
 #include "executor/nodeSeqscan.h"
+#include "executor/nodeIndexscan.h"
 #include "executor/tqueue.h"
 #include "nodes/nodeFuncs.h"
 #include "optimizer/planmain.h"
@@ -197,6 +198,10 @@ ExecParallelEstimate(PlanState *planstate, ExecParallelEstimateContext *e)
                                ExecSeqScanEstimate((SeqScanState *) planstate,
                                                                        e->pcxt);
                                break;
+                       case T_IndexScanState:
+                               ExecIndexScanEstimate((IndexScanState *) planstate,
+                                                                         e->pcxt);
+                               break;
                        case T_ForeignScanState:
                                ExecForeignScanEstimate((ForeignScanState *) planstate,
                                                                                e->pcxt);
@@ -249,6 +254,10 @@ ExecParallelInitializeDSM(PlanState *planstate,
                                ExecSeqScanInitializeDSM((SeqScanState *) planstate,
                                                                                 d->pcxt);
                                break;
+                       case T_IndexScanState:
+                               ExecIndexScanInitializeDSM((IndexScanState *) planstate,
+                                                                                  d->pcxt);
+                               break;
                        case T_ForeignScanState:
                                ExecForeignScanInitializeDSM((ForeignScanState *) planstate,
                                                                                         d->pcxt);
@@ -725,6 +734,9 @@ ExecParallelInitializeWorker(PlanState *planstate, shm_toc *toc)
                        case T_SeqScanState:
                                ExecSeqScanInitializeWorker((SeqScanState *) planstate, toc);
                                break;
+                       case T_IndexScanState:
+                               ExecIndexScanInitializeWorker((IndexScanState *) planstate, toc);
+                               break;
                        case T_ForeignScanState:
                                ExecForeignScanInitializeWorker((ForeignScanState *) planstate,
                                                                                                toc);
index 5734550d2c78c7030d8a20a2bfc672794b36a86f..0a9dfdbaf3054c1ea2b5680fa2d8d2fba2d4a315 100644 (file)
@@ -22,6 +22,9 @@
  *             ExecEndIndexScan                releases all storage.
  *             ExecIndexMarkPos                marks scan position.
  *             ExecIndexRestrPos               restores scan position.
+ *             ExecIndexScanEstimate   estimates DSM space needed for parallel index scan
+ *             ExecIndexScanInitializeDSM initialize DSM for parallel indexscan
+ *             ExecIndexScanInitializeWorker attach to DSM info in parallel worker
  */
 #include "postgres.h"
 
@@ -514,6 +517,18 @@ ExecIndexScan(IndexScanState *node)
 void
 ExecReScanIndexScan(IndexScanState *node)
 {
+       bool            reset_parallel_scan = true;
+
+       /*
+        * If we are here to just update the scan keys, then don't reset parallel
+        * scan.  We don't want each of the participating process in the parallel
+        * scan to update the shared parallel scan state at the start of the scan.
+        * It is quite possible that one of the participants has already begun
+        * scanning the index when another has yet to start it.
+        */
+       if (node->iss_NumRuntimeKeys != 0 && !node->iss_RuntimeKeysReady)
+               reset_parallel_scan = false;
+
        /*
         * If we are doing runtime key calculations (ie, any of the index key
         * values weren't simple Consts), compute the new key values.  But first,
@@ -539,10 +554,21 @@ ExecReScanIndexScan(IndexScanState *node)
                        reorderqueue_pop(node);
        }
 
-       /* reset index scan */
-       index_rescan(node->iss_ScanDesc,
-                                node->iss_ScanKeys, node->iss_NumScanKeys,
-                                node->iss_OrderByKeys, node->iss_NumOrderByKeys);
+       /*
+        * Reset (parallel) index scan.  For parallel-aware nodes, the scan
+        * descriptor is initialized during actual execution of node and we can
+        * reach here before that (ex. during execution of nest loop join).  So,
+        * avoid updating the scan descriptor at that time.
+        */
+       if (node->iss_ScanDesc)
+       {
+               index_rescan(node->iss_ScanDesc,
+                                        node->iss_ScanKeys, node->iss_NumScanKeys,
+                                        node->iss_OrderByKeys, node->iss_NumOrderByKeys);
+
+               if (reset_parallel_scan && node->iss_ScanDesc->parallel_scan)
+                       index_parallelrescan(node->iss_ScanDesc);
+       }
        node->iss_ReachedEnd = false;
 
        ExecScanReScan(&node->ss);
@@ -1013,22 +1039,29 @@ ExecInitIndexScan(IndexScan *node, EState *estate, int eflags)
        }
 
        /*
-        * Initialize scan descriptor.
+        * for parallel-aware node, we initialize the scan descriptor after
+        * initializing the shared memory for parallel execution.
         */
-       indexstate->iss_ScanDesc = index_beginscan(currentRelation,
-                                                                                          indexstate->iss_RelationDesc,
-                                                                                          estate->es_snapshot,
-                                                                                          indexstate->iss_NumScanKeys,
+       if (!node->scan.plan.parallel_aware)
+       {
+               /*
+                * Initialize scan descriptor.
+                */
+               indexstate->iss_ScanDesc = index_beginscan(currentRelation,
+                                                                                               indexstate->iss_RelationDesc,
+                                                                                                  estate->es_snapshot,
+                                                                                                indexstate->iss_NumScanKeys,
                                                                                         indexstate->iss_NumOrderByKeys);
 
-       /*
-        * If no run-time keys to calculate, go ahead and pass the scankeys to the
-        * index AM.
-        */
-       if (indexstate->iss_NumRuntimeKeys == 0)
-               index_rescan(indexstate->iss_ScanDesc,
-                                        indexstate->iss_ScanKeys, indexstate->iss_NumScanKeys,
+               /*
+                * If no run-time keys to calculate, go ahead and pass the scankeys to
+                * the index AM.
+                */
+               if (indexstate->iss_NumRuntimeKeys == 0)
+                       index_rescan(indexstate->iss_ScanDesc,
+                                          indexstate->iss_ScanKeys, indexstate->iss_NumScanKeys,
                                indexstate->iss_OrderByKeys, indexstate->iss_NumOrderByKeys);
+       }
 
        /*
         * all done.
@@ -1590,3 +1623,91 @@ ExecIndexBuildScanKeys(PlanState *planstate, Relation index,
        else if (n_array_keys != 0)
                elog(ERROR, "ScalarArrayOpExpr index qual found where not allowed");
 }
+
+/* ----------------------------------------------------------------
+ *                                             Parallel Scan Support
+ * ----------------------------------------------------------------
+ */
+
+/* ----------------------------------------------------------------
+ *             ExecIndexScanEstimate
+ *
+ *             estimates the space required to serialize indexscan node.
+ * ----------------------------------------------------------------
+ */
+void
+ExecIndexScanEstimate(IndexScanState *node,
+                                         ParallelContext *pcxt)
+{
+       EState     *estate = node->ss.ps.state;
+
+       node->iss_PscanLen = index_parallelscan_estimate(node->iss_RelationDesc,
+                                                                                                        estate->es_snapshot);
+       shm_toc_estimate_chunk(&pcxt->estimator, node->iss_PscanLen);
+       shm_toc_estimate_keys(&pcxt->estimator, 1);
+}
+
+/* ----------------------------------------------------------------
+ *             ExecIndexScanInitializeDSM
+ *
+ *             Set up a parallel index scan descriptor.
+ * ----------------------------------------------------------------
+ */
+void
+ExecIndexScanInitializeDSM(IndexScanState *node,
+                                                  ParallelContext *pcxt)
+{
+       EState     *estate = node->ss.ps.state;
+       ParallelIndexScanDesc piscan;
+
+       piscan = shm_toc_allocate(pcxt->toc, node->iss_PscanLen);
+       index_parallelscan_initialize(node->ss.ss_currentRelation,
+                                                                 node->iss_RelationDesc,
+                                                                 estate->es_snapshot,
+                                                                 piscan);
+       shm_toc_insert(pcxt->toc, node->ss.ps.plan->plan_node_id, piscan);
+       node->iss_ScanDesc =
+               index_beginscan_parallel(node->ss.ss_currentRelation,
+                                                                node->iss_RelationDesc,
+                                                                node->iss_NumScanKeys,
+                                                                node->iss_NumOrderByKeys,
+                                                                piscan);
+
+       /*
+        * If no run-time keys to calculate, go ahead and pass the scankeys to the
+        * index AM.
+        */
+       if (node->iss_NumRuntimeKeys == 0)
+               index_rescan(node->iss_ScanDesc,
+                                        node->iss_ScanKeys, node->iss_NumScanKeys,
+                                        node->iss_OrderByKeys, node->iss_NumOrderByKeys);
+}
+
+/* ----------------------------------------------------------------
+ *             ExecIndexScanInitializeWorker
+ *
+ *             Copy relevant information from TOC into planstate.
+ * ----------------------------------------------------------------
+ */
+void
+ExecIndexScanInitializeWorker(IndexScanState *node, shm_toc *toc)
+{
+       ParallelIndexScanDesc piscan;
+
+       piscan = shm_toc_lookup(toc, node->ss.ps.plan->plan_node_id);
+       node->iss_ScanDesc =
+               index_beginscan_parallel(node->ss.ss_currentRelation,
+                                                                node->iss_RelationDesc,
+                                                                node->iss_NumScanKeys,
+                                                                node->iss_NumOrderByKeys,
+                                                                piscan);
+
+       /*
+        * If no run-time keys to calculate, go ahead and pass the scankeys to the
+        * index AM.
+        */
+       if (node->iss_NumRuntimeKeys == 0)
+               index_rescan(node->iss_ScanDesc,
+                                        node->iss_ScanKeys, node->iss_NumScanKeys,
+                                        node->iss_OrderByKeys, node->iss_NumOrderByKeys);
+}
index 85505c57d3611be8fe8373d5d89cdb3b66d98622..eeacf815e3517dc1e15f8898bc6ae101ddfca018 100644 (file)
@@ -127,8 +127,6 @@ static void subquery_push_qual(Query *subquery,
 static void recurse_push_qual(Node *setOp, Query *topquery,
                                  RangeTblEntry *rte, Index rti, Node *qual);
 static void remove_unused_subquery_outputs(Query *subquery, RelOptInfo *rel);
-static int compute_parallel_worker(RelOptInfo *rel, BlockNumber heap_pages,
-                                               BlockNumber index_pages);
 
 
 /*
@@ -2885,7 +2883,7 @@ remove_unused_subquery_outputs(Query *subquery, RelOptInfo *rel)
  * "heap_pages" is the number of pages from the table that we expect to scan.
  * "index_pages" is the number of pages from the index that we expect to scan.
  */
-static int
+int
 compute_parallel_worker(RelOptInfo *rel, BlockNumber heap_pages,
                                                BlockNumber index_pages)
 {
index a43daa744c7c1d2ae7560dea42f5334b89f5b204..d01630f8dba7f2e1a0cc6235153b3cdc50f8d649 100644 (file)
@@ -391,7 +391,8 @@ cost_gather(GatherPath *path, PlannerInfo *root,
  * we have to fetch from the table, so they don't reduce the scan cost.
  */
 void
-cost_index(IndexPath *path, PlannerInfo *root, double loop_count)
+cost_index(IndexPath *path, PlannerInfo *root, double loop_count,
+                  bool partial_path)
 {
        IndexOptInfo *index = path->indexinfo;
        RelOptInfo *baserel = index->rel;
@@ -400,6 +401,7 @@ cost_index(IndexPath *path, PlannerInfo *root, double loop_count)
        List       *qpquals;
        Cost            startup_cost = 0;
        Cost            run_cost = 0;
+       Cost            cpu_run_cost = 0;
        Cost            indexStartupCost;
        Cost            indexTotalCost;
        Selectivity indexSelectivity;
@@ -413,6 +415,8 @@ cost_index(IndexPath *path, PlannerInfo *root, double loop_count)
        Cost            cpu_per_tuple;
        double          tuples_fetched;
        double          pages_fetched;
+       double          rand_heap_pages;
+       double          index_pages;
 
        /* Should only be applied to base relations */
        Assert(IsA(baserel, RelOptInfo) &&
@@ -459,7 +463,8 @@ cost_index(IndexPath *path, PlannerInfo *root, double loop_count)
        amcostestimate = (amcostestimate_function) index->amcostestimate;
        amcostestimate(root, path, loop_count,
                                   &indexStartupCost, &indexTotalCost,
-                                  &indexSelectivity, &indexCorrelation);
+                                  &indexSelectivity, &indexCorrelation,
+                                  &index_pages);
 
        /*
         * Save amcostestimate's results for possible use in bitmap scan planning.
@@ -526,6 +531,8 @@ cost_index(IndexPath *path, PlannerInfo *root, double loop_count)
                if (indexonly)
                        pages_fetched = ceil(pages_fetched * (1.0 - baserel->allvisfrac));
 
+               rand_heap_pages = pages_fetched;
+
                max_IO_cost = (pages_fetched * spc_random_page_cost) / loop_count;
 
                /*
@@ -564,6 +571,8 @@ cost_index(IndexPath *path, PlannerInfo *root, double loop_count)
                if (indexonly)
                        pages_fetched = ceil(pages_fetched * (1.0 - baserel->allvisfrac));
 
+               rand_heap_pages = pages_fetched;
+
                /* max_IO_cost is for the perfectly uncorrelated case (csquared=0) */
                max_IO_cost = pages_fetched * spc_random_page_cost;
 
@@ -583,6 +592,29 @@ cost_index(IndexPath *path, PlannerInfo *root, double loop_count)
                        min_IO_cost = 0;
        }
 
+       if (partial_path)
+       {
+               /*
+                * Estimate the number of parallel workers required to scan index. Use
+                * the number of heap pages computed considering heap fetches won't be
+                * sequential as for parallel scans the pages are accessed in random
+                * order.
+                */
+               path->path.parallel_workers = compute_parallel_worker(baserel,
+                                                                                          (BlockNumber) rand_heap_pages,
+                                                                                                 (BlockNumber) index_pages);
+
+               /*
+                * Fall out if workers can't be assigned for parallel scan, because in
+                * such a case this path will be rejected.  So there is no benefit in
+                * doing extra computation.
+                */
+               if (path->path.parallel_workers <= 0)
+                       return;
+
+               path->path.parallel_aware = true;
+       }
+
        /*
         * Now interpolate based on estimated index order correlation to get total
         * disk I/O cost for main table accesses.
@@ -602,11 +634,24 @@ cost_index(IndexPath *path, PlannerInfo *root, double loop_count)
        startup_cost += qpqual_cost.startup;
        cpu_per_tuple = cpu_tuple_cost + qpqual_cost.per_tuple;
 
-       run_cost += cpu_per_tuple * tuples_fetched;
+       cpu_run_cost += cpu_per_tuple * tuples_fetched;
 
        /* tlist eval costs are paid per output row, not per tuple scanned */
        startup_cost += path->path.pathtarget->cost.startup;
-       run_cost += path->path.pathtarget->cost.per_tuple * path->path.rows;
+       cpu_run_cost += path->path.pathtarget->cost.per_tuple * path->path.rows;
+
+       /* Adjust costing for parallelism, if used. */
+       if (path->path.parallel_workers > 0)
+       {
+               double          parallel_divisor = get_parallel_divisor(&path->path);
+
+               path->path.rows = clamp_row_est(path->path.rows / parallel_divisor);
+
+               /* The CPU cost is divided among all the workers. */
+               cpu_run_cost /= parallel_divisor;
+       }
+
+       run_cost += cpu_run_cost;
 
        path->path.startup_cost = startup_cost;
        path->path.total_cost = startup_cost + run_cost;
index 528346898812c7be78174a347bce05c0b1a1e012..56eccafd7b0745d8a60eddf974ed7bcc6033fba4 100644 (file)
@@ -813,7 +813,7 @@ get_index_paths(PlannerInfo *root, RelOptInfo *rel,
 /*
  * build_index_paths
  *       Given an index and a set of index clauses for it, construct zero
- *       or more IndexPaths.
+ *       or more IndexPaths. It also constructs zero or more partial IndexPaths.
  *
  * We return a list of paths because (1) this routine checks some cases
  * that should cause us to not generate any IndexPath, and (2) in some
@@ -1042,8 +1042,41 @@ build_index_paths(PlannerInfo *root, RelOptInfo *rel,
                                                                  NoMovementScanDirection,
                                                                  index_only_scan,
                                                                  outer_relids,
-                                                                 loop_count);
+                                                                 loop_count,
+                                                                 false);
                result = lappend(result, ipath);
+
+               /*
+                * If appropriate, consider parallel index scan.  We don't allow
+                * parallel index scan for bitmap or index only scans.
+                */
+               if (index->amcanparallel && !index_only_scan &&
+                       rel->consider_parallel && outer_relids == NULL &&
+                       scantype != ST_BITMAPSCAN)
+               {
+                       ipath = create_index_path(root, index,
+                                                                         index_clauses,
+                                                                         clause_columns,
+                                                                         orderbyclauses,
+                                                                         orderbyclausecols,
+                                                                         useful_pathkeys,
+                                                                         index_is_ordered ?
+                                                                         ForwardScanDirection :
+                                                                         NoMovementScanDirection,
+                                                                         index_only_scan,
+                                                                         outer_relids,
+                                                                         loop_count,
+                                                                         true);
+
+                       /*
+                        * if, after costing the path, we find that it's not worth
+                        * using parallel workers, just free it.
+                        */
+                       if (ipath->path.parallel_workers > 0)
+                               add_partial_path(rel, (Path *) ipath);
+                       else
+                               pfree(ipath);
+               }
        }
 
        /*
@@ -1066,8 +1099,36 @@ build_index_paths(PlannerInfo *root, RelOptInfo *rel,
                                                                          BackwardScanDirection,
                                                                          index_only_scan,
                                                                          outer_relids,
-                                                                         loop_count);
+                                                                         loop_count,
+                                                                         false);
                        result = lappend(result, ipath);
+
+                       /* If appropriate, consider parallel index scan */
+                       if (index->amcanparallel && !index_only_scan &&
+                               rel->consider_parallel && outer_relids == NULL &&
+                               scantype != ST_BITMAPSCAN)
+                       {
+                               ipath = create_index_path(root, index,
+                                                                                 index_clauses,
+                                                                                 clause_columns,
+                                                                                 NIL,
+                                                                                 NIL,
+                                                                                 useful_pathkeys,
+                                                                                 BackwardScanDirection,
+                                                                                 index_only_scan,
+                                                                                 outer_relids,
+                                                                                 loop_count,
+                                                                                 true);
+
+                               /*
+                                * if, after costing the path, we find that it's not worth
+                                * using parallel workers, just free it.
+                                */
+                               if (ipath->path.parallel_workers > 0)
+                                       add_partial_path(rel, (Path *) ipath);
+                               else
+                                       pfree(ipath);
+                       }
                }
        }
 
index abb4f12cea15c3308922de87a98ec8efa1b48afa..3d33d469713cab64ccc8421453ae29ebab2d6a94 100644 (file)
@@ -5333,7 +5333,7 @@ plan_cluster_use_sort(Oid tableOid, Oid indexOid)
        indexScanPath = create_index_path(root, indexInfo,
                                                                          NIL, NIL, NIL, NIL, NIL,
                                                                          ForwardScanDirection, false,
-                                                                         NULL, 1.0);
+                                                                         NULL, 1.0, false);
 
        return (seqScanAndSortPath.total_cost < indexScanPath->path.total_cost);
 }
index f440875ceb1d8db9223dbe7d273131909d0a953a..324829690d28521e34d15292d124a16e6f3b961d 100644 (file)
@@ -744,10 +744,9 @@ add_path_precheck(RelOptInfo *parent_rel,
  *       As with add_path, we pfree paths that are found to be dominated by
  *       another partial path; this requires that there be no other references to
  *       such paths yet.  Hence, GatherPaths must not be created for a rel until
- *       we're done creating all partial paths for it.  We do not currently build
- *       partial indexscan paths, so there is no need for an exception for
- *       IndexPaths here; for safety, we instead Assert that a path to be freed
- *       isn't an IndexPath.
+ *       we're done creating all partial paths for it.  Unlike add_path, we don't
+ *       take an exception for IndexPaths as partial index paths won't be
+ *       referenced by partial BitmapHeapPaths.
  */
 void
 add_partial_path(RelOptInfo *parent_rel, Path *new_path)
@@ -826,8 +825,6 @@ add_partial_path(RelOptInfo *parent_rel, Path *new_path)
                {
                        parent_rel->partial_pathlist =
                                list_delete_cell(parent_rel->partial_pathlist, p1, p1_prev);
-                       /* we should not see IndexPaths here, so always safe to delete */
-                       Assert(!IsA(old_path, IndexPath));
                        pfree(old_path);
                        /* p1_prev does not advance */
                }
@@ -860,8 +857,6 @@ add_partial_path(RelOptInfo *parent_rel, Path *new_path)
        }
        else
        {
-               /* we should not see IndexPaths here, so always safe to delete */
-               Assert(!IsA(new_path, IndexPath));
                /* Reject and recycle the new path */
                pfree(new_path);
        }
@@ -1005,6 +1000,7 @@ create_samplescan_path(PlannerInfo *root, RelOptInfo *rel, Relids required_outer
  * 'required_outer' is the set of outer relids for a parameterized path.
  * 'loop_count' is the number of repetitions of the indexscan to factor into
  *             estimates of caching behavior.
+ * 'partial_path' is true if constructing a parallel index scan path.
  *
  * Returns the new path node.
  */
@@ -1019,7 +1015,8 @@ create_index_path(PlannerInfo *root,
                                  ScanDirection indexscandir,
                                  bool indexonly,
                                  Relids required_outer,
-                                 double loop_count)
+                                 double loop_count,
+                                 bool partial_path)
 {
        IndexPath  *pathnode = makeNode(IndexPath);
        RelOptInfo *rel = index->rel;
@@ -1049,7 +1046,7 @@ create_index_path(PlannerInfo *root,
        pathnode->indexorderbycols = indexorderbycols;
        pathnode->indexscandir = indexscandir;
 
-       cost_index(pathnode, root, loop_count);
+       cost_index(pathnode, root, loop_count, partial_path);
 
        return pathnode;
 }
@@ -3247,7 +3244,7 @@ reparameterize_path(PlannerInfo *root, Path *path,
                                memcpy(newpath, ipath, sizeof(IndexPath));
                                newpath->path.param_info =
                                        get_baserel_parampathinfo(root, rel, required_outer);
-                               cost_index(newpath, root, loop_count);
+                               cost_index(newpath, root, loop_count, false);
                                return (Path *) newpath;
                        }
                case T_BitmapHeapScan:
index 7836e6b3f85a67953849e5fcce4837de70fdcd30..4ed27054a11d49bd6e29ed09d5a2b606f211ad88 100644 (file)
@@ -241,6 +241,7 @@ get_relation_info(PlannerInfo *root, Oid relationObjectId, bool inhparent,
                        info->amoptionalkey = amroutine->amoptionalkey;
                        info->amsearcharray = amroutine->amsearcharray;
                        info->amsearchnulls = amroutine->amsearchnulls;
+                       info->amcanparallel = amroutine->amcanparallel;
                        info->amhasgettuple = (amroutine->amgettuple != NULL);
                        info->amhasgetbitmap = (amroutine->amgetbitmap != NULL);
                        info->amcostestimate = amroutine->amcostestimate;
index fa32e9eabe1104edf2bb78766628f6d8da687fd0..d14f0f97a8a523acf5c3f578e1b4d67f6a5de875 100644 (file)
@@ -6471,7 +6471,8 @@ add_predicate_to_quals(IndexOptInfo *index, List *indexQuals)
 void
 btcostestimate(PlannerInfo *root, IndexPath *path, double loop_count,
                           Cost *indexStartupCost, Cost *indexTotalCost,
-                          Selectivity *indexSelectivity, double *indexCorrelation)
+                          Selectivity *indexSelectivity, double *indexCorrelation,
+                          double *indexPages)
 {
        IndexOptInfo *index = path->indexinfo;
        List       *qinfos;
@@ -6761,12 +6762,14 @@ btcostestimate(PlannerInfo *root, IndexPath *path, double loop_count,
        *indexTotalCost = costs.indexTotalCost;
        *indexSelectivity = costs.indexSelectivity;
        *indexCorrelation = costs.indexCorrelation;
+       *indexPages = costs.numIndexPages;
 }
 
 void
 hashcostestimate(PlannerInfo *root, IndexPath *path, double loop_count,
                                 Cost *indexStartupCost, Cost *indexTotalCost,
-                                Selectivity *indexSelectivity, double *indexCorrelation)
+                                Selectivity *indexSelectivity, double *indexCorrelation,
+                                double *indexPages)
 {
        List       *qinfos;
        GenericCosts costs;
@@ -6807,12 +6810,14 @@ hashcostestimate(PlannerInfo *root, IndexPath *path, double loop_count,
        *indexTotalCost = costs.indexTotalCost;
        *indexSelectivity = costs.indexSelectivity;
        *indexCorrelation = costs.indexCorrelation;
+       *indexPages = costs.numIndexPages;
 }
 
 void
 gistcostestimate(PlannerInfo *root, IndexPath *path, double loop_count,
                                 Cost *indexStartupCost, Cost *indexTotalCost,
-                                Selectivity *indexSelectivity, double *indexCorrelation)
+                                Selectivity *indexSelectivity, double *indexCorrelation,
+                                double *indexPages)
 {
        IndexOptInfo *index = path->indexinfo;
        List       *qinfos;
@@ -6866,12 +6871,14 @@ gistcostestimate(PlannerInfo *root, IndexPath *path, double loop_count,
        *indexTotalCost = costs.indexTotalCost;
        *indexSelectivity = costs.indexSelectivity;
        *indexCorrelation = costs.indexCorrelation;
+       *indexPages = costs.numIndexPages;
 }
 
 void
 spgcostestimate(PlannerInfo *root, IndexPath *path, double loop_count,
                                Cost *indexStartupCost, Cost *indexTotalCost,
-                               Selectivity *indexSelectivity, double *indexCorrelation)
+                               Selectivity *indexSelectivity, double *indexCorrelation,
+                               double *indexPages)
 {
        IndexOptInfo *index = path->indexinfo;
        List       *qinfos;
@@ -6925,6 +6932,7 @@ spgcostestimate(PlannerInfo *root, IndexPath *path, double loop_count,
        *indexTotalCost = costs.indexTotalCost;
        *indexSelectivity = costs.indexSelectivity;
        *indexCorrelation = costs.indexCorrelation;
+       *indexPages = costs.numIndexPages;
 }
 
 
@@ -7222,7 +7230,8 @@ gincost_scalararrayopexpr(PlannerInfo *root,
 void
 gincostestimate(PlannerInfo *root, IndexPath *path, double loop_count,
                                Cost *indexStartupCost, Cost *indexTotalCost,
-                               Selectivity *indexSelectivity, double *indexCorrelation)
+                               Selectivity *indexSelectivity, double *indexCorrelation,
+                               double *indexPages)
 {
        IndexOptInfo *index = path->indexinfo;
        List       *indexQuals = path->indexquals;
@@ -7537,6 +7546,7 @@ gincostestimate(PlannerInfo *root, IndexPath *path, double loop_count,
        *indexStartupCost += qual_arg_cost;
        *indexTotalCost += qual_arg_cost;
        *indexTotalCost += (numTuples * *indexSelectivity) * (cpu_index_tuple_cost + qual_op_cost);
+       *indexPages = dataPagesFetched;
 }
 
 /*
@@ -7545,7 +7555,8 @@ gincostestimate(PlannerInfo *root, IndexPath *path, double loop_count,
 void
 brincostestimate(PlannerInfo *root, IndexPath *path, double loop_count,
                                 Cost *indexStartupCost, Cost *indexTotalCost,
-                                Selectivity *indexSelectivity, double *indexCorrelation)
+                                Selectivity *indexSelectivity, double *indexCorrelation,
+                                double *indexPages)
 {
        IndexOptInfo *index = path->indexinfo;
        List       *indexQuals = path->indexquals;
@@ -7597,6 +7608,7 @@ brincostestimate(PlannerInfo *root, IndexPath *path, double loop_count,
        *indexStartupCost += qual_arg_cost;
        *indexTotalCost += qual_arg_cost;
        *indexTotalCost += (numTuples * *indexSelectivity) * (cpu_index_tuple_cost + qual_op_cost);
+       *indexPages = index->pages;
 
        /* XXX what about pages_per_range? */
 }
index b0730bfefae0728dcc92a513c1f9494b06781e13..f919cf8b873e4a29f105447640d782fc9944a8ea 100644 (file)
@@ -95,7 +95,8 @@ typedef void (*amcostestimate_function) (struct PlannerInfo *root,
                                                                                                         Cost *indexStartupCost,
                                                                                                         Cost *indexTotalCost,
                                                                                           Selectivity *indexSelectivity,
-                                                                                                  double *indexCorrelation);
+                                                                                                        double *indexCorrelation,
+                                                                                                        double *indexPages);
 
 /* parse index reloptions */
 typedef bytea *(*amoptions_function) (Datum reloptions,
@@ -188,6 +189,8 @@ typedef struct IndexAmRoutine
        bool            amclusterable;
        /* does AM handle predicate locks? */
        bool            ampredlocks;
+       /* does AM support parallel scan? */
+       bool            amcanparallel;
        /* type of data stored in index, or InvalidOid if variable */
        Oid                     amkeytype;
 
index 46d6f45e837578b000fa92d2188282264936b8bc..ea3f3a5cc4d1e17c2c34d05c6e9ab9cd5964f70b 100644 (file)
@@ -14,6 +14,7 @@
 #ifndef NODEINDEXSCAN_H
 #define NODEINDEXSCAN_H
 
+#include "access/parallel.h"
 #include "nodes/execnodes.h"
 
 extern IndexScanState *ExecInitIndexScan(IndexScan *node, EState *estate, int eflags);
@@ -22,6 +23,9 @@ extern void ExecEndIndexScan(IndexScanState *node);
 extern void ExecIndexMarkPos(IndexScanState *node);
 extern void ExecIndexRestrPos(IndexScanState *node);
 extern void ExecReScanIndexScan(IndexScanState *node);
+extern void ExecIndexScanEstimate(IndexScanState *node, ParallelContext *pcxt);
+extern void ExecIndexScanInitializeDSM(IndexScanState *node, ParallelContext *pcxt);
+extern void ExecIndexScanInitializeWorker(IndexScanState *node, shm_toc *toc);
 
 /*
  * These routines are exported to share code with nodeIndexonlyscan.c and
index 42c6c58ff9c9b934c38e863bf5ffd828b8df0960..9f41babf3534fea130676202d5749b49959fedfa 100644 (file)
@@ -1363,6 +1363,7 @@ typedef struct
  *             SortSupport                for reordering ORDER BY exprs
  *             OrderByTypByVals   is the datatype of order by expression pass-by-value?
  *             OrderByTypLens     typlens of the datatypes of order by expressions
+ *             pscan_len                  size of parallel index scan descriptor
  * ----------------
  */
 typedef struct IndexScanState
@@ -1389,6 +1390,7 @@ typedef struct IndexScanState
        SortSupport iss_SortSupport;
        bool       *iss_OrderByTypByVals;
        int16      *iss_OrderByTypLens;
+       Size            iss_PscanLen;
 } IndexScanState;
 
 /* ----------------
index 643be54d405a6992b31282bb8e47bbf2039503ed..f7ac6f600fe8da0fc3ca861ca50dd3f9eb0f8c4e 100644 (file)
@@ -629,6 +629,7 @@ typedef struct IndexOptInfo
        bool            amsearchnulls;  /* can AM search for NULL/NOT NULL entries? */
        bool            amhasgettuple;  /* does AM have amgettuple interface? */
        bool            amhasgetbitmap; /* does AM have amgetbitmap interface? */
+       bool            amcanparallel;  /* does AM support parallel scan? */
        /* Rather than include amapi.h here, we declare amcostestimate like this */
        void            (*amcostestimate) ();   /* AM's cost estimator */
 } IndexOptInfo;
index 0e68264a41f1d0ab9596681022a14d206642a8ad..72200fa5310e1205445387f7739520f3820971dc 100644 (file)
@@ -76,7 +76,7 @@ extern void cost_seqscan(Path *path, PlannerInfo *root, RelOptInfo *baserel,
 extern void cost_samplescan(Path *path, PlannerInfo *root, RelOptInfo *baserel,
                                ParamPathInfo *param_info);
 extern void cost_index(IndexPath *path, PlannerInfo *root,
-                  double loop_count);
+                  double loop_count, bool partial_path);
 extern void cost_bitmap_heap_scan(Path *path, PlannerInfo *root, RelOptInfo *baserel,
                                          ParamPathInfo *param_info,
                                          Path *bitmapqual, double loop_count);
index 7b413176219390ffede6740fa14518b84a1b235f..53cad247dc4707a42ced7aa5d23860c3ad57b9a1 100644 (file)
@@ -47,7 +47,8 @@ extern IndexPath *create_index_path(PlannerInfo *root,
                                  ScanDirection indexscandir,
                                  bool indexonly,
                                  Relids required_outer,
-                                 double loop_count);
+                                 double loop_count,
+                                 bool partial_path);
 extern BitmapHeapPath *create_bitmap_heap_path(PlannerInfo *root,
                                                RelOptInfo *rel,
                                                Path *bitmapqual,
index 81e7a4274da59b19183187d575852c1647dbe436..ebda308c4166da190976ed881a00676cc386279d 100644 (file)
@@ -54,6 +54,8 @@ extern RelOptInfo *standard_join_search(PlannerInfo *root, int levels_needed,
                                         List *initial_rels);
 
 extern void generate_gather_paths(PlannerInfo *root, RelOptInfo *rel);
+extern int compute_parallel_worker(RelOptInfo *rel, BlockNumber heap_pages,
+                                               BlockNumber index_pages);
 
 #ifdef OPTIMIZER_DEBUG
 extern void debug_print_rel(PlannerInfo *root, RelOptInfo *rel);
index d3172420f995d19b8ae983a3fd1d84c76bdad7f9..17d165ca6514a00f96a400b6bbf7c00edea3848b 100644 (file)
@@ -28,41 +28,47 @@ extern void brincostestimate(struct PlannerInfo *root,
                                 Cost *indexStartupCost,
                                 Cost *indexTotalCost,
                                 Selectivity *indexSelectivity,
-                                double *indexCorrelation);
+                                double *indexCorrelation,
+                                double *indexPages);
 extern void btcostestimate(struct PlannerInfo *root,
                           struct IndexPath *path,
                           double loop_count,
                           Cost *indexStartupCost,
                           Cost *indexTotalCost,
                           Selectivity *indexSelectivity,
-                          double *indexCorrelation);
+                          double *indexCorrelation,
+                          double *indexPages);
 extern void hashcostestimate(struct PlannerInfo *root,
                                 struct IndexPath *path,
                                 double loop_count,
                                 Cost *indexStartupCost,
                                 Cost *indexTotalCost,
                                 Selectivity *indexSelectivity,
-                                double *indexCorrelation);
+                                double *indexCorrelation,
+                                double *indexPages);
 extern void gistcostestimate(struct PlannerInfo *root,
                                 struct IndexPath *path,
                                 double loop_count,
                                 Cost *indexStartupCost,
                                 Cost *indexTotalCost,
                                 Selectivity *indexSelectivity,
-                                double *indexCorrelation);
+                                double *indexCorrelation,
+                                double *indexPages);
 extern void spgcostestimate(struct PlannerInfo *root,
                                struct IndexPath *path,
                                double loop_count,
                                Cost *indexStartupCost,
                                Cost *indexTotalCost,
                                Selectivity *indexSelectivity,
-                               double *indexCorrelation);
+                               double *indexCorrelation,
+                               double *indexPages);
 extern void gincostestimate(struct PlannerInfo *root,
                                struct IndexPath *path,
                                double loop_count,
                                Cost *indexStartupCost,
                                Cost *indexTotalCost,
                                Selectivity *indexSelectivity,
-                               double *indexCorrelation);
+                               double *indexCorrelation,
+                               double *indexPages);
 
 #endif   /* INDEX_SELFUNCS_H */
index 3692d4f1b8135dfbfa9fb949ae92cefb0106d8db..48fb80e90c874ae03528fd17d81e65f1c422d380 100644 (file)
@@ -125,6 +125,29 @@ select count(*) from tenk1 where (two, four) not in
 (1 row)
 
 alter table tenk2 reset (parallel_workers);
+-- test parallel index scans.
+set enable_seqscan to off;
+set enable_bitmapscan to off;
+explain (costs off)
+       select  count((unique1)) from tenk1 where hundred > 1;
+                             QUERY PLAN                             
+--------------------------------------------------------------------
+ Finalize Aggregate
+   ->  Gather
+         Workers Planned: 4
+         ->  Partial Aggregate
+               ->  Parallel Index Scan using tenk1_hundred on tenk1
+                     Index Cond: (hundred > 1)
+(6 rows)
+
+select  count((unique1)) from tenk1 where hundred > 1;
+ count 
+-------
+  9800
+(1 row)
+
+reset enable_seqscan;
+reset enable_bitmapscan;
 set force_parallel_mode=1;
 explain (costs off)
   select stringu1::int2 from tenk1 where unique1 = 1;
index f4f9dd5ab67200379eeb6e2675e9342a41416fbe..f5bc4d18733eb1c9a2504ab86672c5e0f131fd65 100644 (file)
@@ -48,6 +48,17 @@ select count(*) from tenk1 where (two, four) not in
        (select hundred, thousand from tenk2 where thousand > 100);
 alter table tenk2 reset (parallel_workers);
 
+-- test parallel index scans.
+set enable_seqscan to off;
+set enable_bitmapscan to off;
+
+explain (costs off)
+       select  count((unique1)) from tenk1 where hundred > 1;
+select  count((unique1)) from tenk1 where hundred > 1;
+
+reset enable_seqscan;
+reset enable_bitmapscan;
+
 set force_parallel_mode=1;
 
 explain (costs off)