static void
show_sort_info(SortState *sortstate, ExplainState *es)
{
- if (es->analyze && sortstate->sort_Done &&
- sortstate->tuplesortstate != NULL)
+ if (!es->analyze)
+ return;
+
+ if (sortstate->sort_Done && sortstate->tuplesortstate != NULL)
{
Tuplesortstate *state = (Tuplesortstate *) sortstate->tuplesortstate;
+ TuplesortInstrumentation stats;
const char *sortMethod;
const char *spaceType;
long spaceUsed;
- tuplesort_get_stats(state, &sortMethod, &spaceType, &spaceUsed);
+ tuplesort_get_stats(state, &stats);
+ sortMethod = tuplesort_method_name(stats.sortMethod);
+ spaceType = tuplesort_space_type_name(stats.spaceType);
+ spaceUsed = stats.spaceUsed;
if (es->format == EXPLAIN_FORMAT_TEXT)
{
ExplainPropertyText("Sort Space Type", spaceType, es);
}
}
+
+ if (sortstate->shared_info != NULL)
+ {
+ int n;
+ bool opened_group = false;
+
+ for (n = 0; n < sortstate->shared_info->num_workers; n++)
+ {
+ TuplesortInstrumentation *sinstrument;
+ const char *sortMethod;
+ const char *spaceType;
+ long spaceUsed;
+
+ sinstrument = &sortstate->shared_info->sinstrument[n];
+ if (sinstrument->sortMethod == SORT_TYPE_STILL_IN_PROGRESS)
+ continue; /* ignore any unfilled slots */
+ sortMethod = tuplesort_method_name(sinstrument->sortMethod);
+ spaceType = tuplesort_space_type_name(sinstrument->spaceType);
+ spaceUsed = sinstrument->spaceUsed;
+
+ if (es->format == EXPLAIN_FORMAT_TEXT)
+ {
+ appendStringInfoSpaces(es->str, es->indent * 2);
+ appendStringInfo(es->str,
+ "Worker %d: Sort Method: %s %s: %ldkB\n",
+ n, sortMethod, spaceType, spaceUsed);
+ }
+ else
+ {
+ if (!opened_group)
+ {
+ ExplainOpenGroup("Workers", "Workers", false, es);
+ opened_group = true;
+ }
+ ExplainOpenGroup("Worker", NULL, true, es);
+ ExplainPropertyInteger("Worker Number", n, es);
+ ExplainPropertyText("Sort Method", sortMethod, es);
+ ExplainPropertyLong("Sort Space Used", spaceUsed, es);
+ ExplainPropertyText("Sort Space Type", spaceType, es);
+ ExplainCloseGroup("Worker", NULL, true, es);
+ }
+ }
+ if (opened_group)
+ ExplainCloseGroup("Workers", "Workers", false, es);
+ }
}
/*
#include "executor/nodeBitmapHeapscan.h"
#include "executor/nodeCustom.h"
#include "executor/nodeForeignscan.h"
-#include "executor/nodeSeqscan.h"
#include "executor/nodeIndexscan.h"
#include "executor/nodeIndexonlyscan.h"
+#include "executor/nodeSeqscan.h"
+#include "executor/nodeSort.h"
#include "executor/tqueue.h"
#include "nodes/nodeFuncs.h"
#include "optimizer/planmain.h"
}
/*
- * Ordinary plan nodes won't do anything here, but parallel-aware plan nodes
- * may need some state which is shared across all parallel workers. Before
- * we size the DSM, give them a chance to call shm_toc_estimate_chunk or
- * shm_toc_estimate_keys on &pcxt->estimator.
+ * Parallel-aware plan nodes (and occasionally others) may need some state
+ * which is shared across all parallel workers. Before we size the DSM, give
+ * them a chance to call shm_toc_estimate_chunk or shm_toc_estimate_keys on
+ * &pcxt->estimator.
*
* While we're at it, count the number of PlanState nodes in the tree, so
* we know how many SharedPlanStateInstrumentation structures we need.
/* Count this node. */
e->nnodes++;
- /* Call estimators for parallel-aware nodes. */
- if (planstate->plan->parallel_aware)
+ switch (nodeTag(planstate))
{
- switch (nodeTag(planstate))
- {
- case T_SeqScanState:
+ case T_SeqScanState:
+ if (planstate->plan->parallel_aware)
ExecSeqScanEstimate((SeqScanState *) planstate,
e->pcxt);
- break;
- case T_IndexScanState:
+ break;
+ case T_IndexScanState:
+ if (planstate->plan->parallel_aware)
ExecIndexScanEstimate((IndexScanState *) planstate,
e->pcxt);
- break;
- case T_IndexOnlyScanState:
+ break;
+ case T_IndexOnlyScanState:
+ if (planstate->plan->parallel_aware)
ExecIndexOnlyScanEstimate((IndexOnlyScanState *) planstate,
e->pcxt);
- break;
- case T_ForeignScanState:
+ break;
+ case T_ForeignScanState:
+ if (planstate->plan->parallel_aware)
ExecForeignScanEstimate((ForeignScanState *) planstate,
e->pcxt);
- break;
- case T_CustomScanState:
+ break;
+ case T_CustomScanState:
+ if (planstate->plan->parallel_aware)
ExecCustomScanEstimate((CustomScanState *) planstate,
e->pcxt);
- break;
- case T_BitmapHeapScanState:
+ break;
+ case T_BitmapHeapScanState:
+ if (planstate->plan->parallel_aware)
ExecBitmapHeapEstimate((BitmapHeapScanState *) planstate,
e->pcxt);
- break;
- default:
- break;
- }
+ break;
+ case T_SortState:
+ /* even when not parallel-aware */
+ ExecSortEstimate((SortState *) planstate, e->pcxt);
+ default:
+ break;
}
return planstate_tree_walker(planstate, ExecParallelEstimate, e);
d->nnodes++;
/*
- * Call initializers for parallel-aware plan nodes.
+ * Call initializers for DSM-using plan nodes.
*
- * Ordinary plan nodes won't do anything here, but parallel-aware plan
- * nodes may need to initialize shared state in the DSM before parallel
- * workers are available. They can allocate the space they previously
+ * Most plan nodes won't do anything here, but plan nodes that allocated
+ * DSM may need to initialize shared state in the DSM before parallel
+ * workers are launched. They can allocate the space they previously
* estimated using shm_toc_allocate, and add the keys they previously
* estimated using shm_toc_insert, in each case targeting pcxt->toc.
*/
- if (planstate->plan->parallel_aware)
+ switch (nodeTag(planstate))
{
- switch (nodeTag(planstate))
- {
- case T_SeqScanState:
+ case T_SeqScanState:
+ if (planstate->plan->parallel_aware)
ExecSeqScanInitializeDSM((SeqScanState *) planstate,
d->pcxt);
- break;
- case T_IndexScanState:
+ break;
+ case T_IndexScanState:
+ if (planstate->plan->parallel_aware)
ExecIndexScanInitializeDSM((IndexScanState *) planstate,
d->pcxt);
- break;
- case T_IndexOnlyScanState:
+ break;
+ case T_IndexOnlyScanState:
+ if (planstate->plan->parallel_aware)
ExecIndexOnlyScanInitializeDSM((IndexOnlyScanState *) planstate,
d->pcxt);
- break;
- case T_ForeignScanState:
+ break;
+ case T_ForeignScanState:
+ if (planstate->plan->parallel_aware)
ExecForeignScanInitializeDSM((ForeignScanState *) planstate,
d->pcxt);
- break;
- case T_CustomScanState:
+ break;
+ case T_CustomScanState:
+ if (planstate->plan->parallel_aware)
ExecCustomScanInitializeDSM((CustomScanState *) planstate,
d->pcxt);
- break;
- case T_BitmapHeapScanState:
+ break;
+ case T_BitmapHeapScanState:
+ if (planstate->plan->parallel_aware)
ExecBitmapHeapInitializeDSM((BitmapHeapScanState *) planstate,
d->pcxt);
- break;
-
- default:
- break;
- }
+ break;
+ case T_SortState:
+ /* even when not parallel-aware */
+ ExecSortInitializeDSM((SortState *) planstate, d->pcxt);
+ default:
+ break;
}
return planstate_tree_walker(planstate, ExecParallelInitializeDSM, d);
planstate->worker_instrument->num_workers = instrumentation->num_workers;
memcpy(&planstate->worker_instrument->instrument, instrument, ibytes);
+ /*
+ * Perform any node-type-specific work that needs to be done. Currently,
+ * only Sort nodes need to do anything here.
+ */
+ if (IsA(planstate, SortState))
+ ExecSortRetrieveInstrumentation((SortState *) planstate);
+
return planstate_tree_walker(planstate, ExecParallelRetrieveInstrumentation,
instrumentation);
}
if (planstate == NULL)
return false;
- /* Call initializers for parallel-aware plan nodes. */
- if (planstate->plan->parallel_aware)
+ switch (nodeTag(planstate))
{
- switch (nodeTag(planstate))
- {
- case T_SeqScanState:
+ case T_SeqScanState:
+ if (planstate->plan->parallel_aware)
ExecSeqScanInitializeWorker((SeqScanState *) planstate, toc);
- break;
- case T_IndexScanState:
+ break;
+ case T_IndexScanState:
+ if (planstate->plan->parallel_aware)
ExecIndexScanInitializeWorker((IndexScanState *) planstate, toc);
- break;
- case T_IndexOnlyScanState:
+ break;
+ case T_IndexOnlyScanState:
+ if (planstate->plan->parallel_aware)
ExecIndexOnlyScanInitializeWorker((IndexOnlyScanState *) planstate, toc);
- break;
- case T_ForeignScanState:
+ break;
+ case T_ForeignScanState:
+ if (planstate->plan->parallel_aware)
ExecForeignScanInitializeWorker((ForeignScanState *) planstate,
toc);
- break;
- case T_CustomScanState:
+ break;
+ case T_CustomScanState:
+ if (planstate->plan->parallel_aware)
ExecCustomScanInitializeWorker((CustomScanState *) planstate,
toc);
- break;
- case T_BitmapHeapScanState:
+ break;
+ case T_BitmapHeapScanState:
+ if (planstate->plan->parallel_aware)
ExecBitmapHeapInitializeWorker(
(BitmapHeapScanState *) planstate, toc);
- break;
- default:
- break;
- }
+ break;
+ case T_SortState:
+ /* even when not parallel-aware */
+ ExecSortInitializeWorker((SortState *) planstate, toc);
+ default:
+ break;
}
return planstate_tree_walker(planstate, ExecParallelInitializeWorker, toc);
#include "postgres.h"
+#include "access/parallel.h"
#include "executor/execdebug.h"
#include "executor/nodeSort.h"
#include "miscadmin.h"
node->sort_Done = true;
node->bounded_Done = node->bounded;
node->bound_Done = node->bound;
+ if (node->shared_info && node->am_worker)
+ {
+ TuplesortInstrumentation *si;
+
+ Assert(IsParallelWorker());
+ Assert(ParallelWorkerNumber <= node->shared_info->num_workers);
+ si = &node->shared_info->sinstrument[ParallelWorkerNumber];
+ tuplesort_get_stats(tuplesortstate, si);
+ }
SO1_printf("ExecSort: %s\n", "sorting done");
}
else
tuplesort_rescan((Tuplesortstate *) node->tuplesortstate);
}
+
+/* ----------------------------------------------------------------
+ * Parallel Query Support
+ * ----------------------------------------------------------------
+ */
+
+/* ----------------------------------------------------------------
+ * ExecSortEstimate
+ *
+ * Estimate space required to propagate sort statistics.
+ * ----------------------------------------------------------------
+ */
+void
+ExecSortEstimate(SortState *node, ParallelContext *pcxt)
+{
+ Size size;
+
+ /* don't need this if not instrumenting or no workers */
+ if (!node->ss.ps.instrument || pcxt->nworkers == 0)
+ return;
+
+ size = mul_size(pcxt->nworkers, sizeof(TuplesortInstrumentation));
+ size = add_size(size, offsetof(SharedSortInfo, sinstrument));
+ shm_toc_estimate_chunk(&pcxt->estimator, size);
+ shm_toc_estimate_keys(&pcxt->estimator, 1);
+}
+
+/* ----------------------------------------------------------------
+ * ExecSortInitializeDSM
+ *
+ * Initialize DSM space for sort statistics.
+ * ----------------------------------------------------------------
+ */
+void
+ExecSortInitializeDSM(SortState *node, ParallelContext *pcxt)
+{
+ Size size;
+
+ /* don't need this if not instrumenting or no workers */
+ if (!node->ss.ps.instrument || pcxt->nworkers == 0)
+ return;
+
+ size = offsetof(SharedSortInfo, sinstrument)
+ + pcxt->nworkers * sizeof(TuplesortInstrumentation);
+ node->shared_info = shm_toc_allocate(pcxt->toc, size);
+ /* ensure any unfilled slots will contain zeroes */
+ memset(node->shared_info, 0, size);
+ node->shared_info->num_workers = pcxt->nworkers;
+ shm_toc_insert(pcxt->toc, node->ss.ps.plan->plan_node_id,
+ node->shared_info);
+}
+
+/* ----------------------------------------------------------------
+ * ExecSortInitializeWorker
+ *
+ * Attach worker to DSM space for sort statistics.
+ * ----------------------------------------------------------------
+ */
+void
+ExecSortInitializeWorker(SortState *node, shm_toc *toc)
+{
+ node->shared_info =
+ shm_toc_lookup(toc, node->ss.ps.plan->plan_node_id, true);
+ node->am_worker = true;
+}
+
+/* ----------------------------------------------------------------
+ * ExecSortRetrieveInstrumentation
+ *
+ * Transfer sort statistics from DSM to private memory.
+ * ----------------------------------------------------------------
+ */
+void
+ExecSortRetrieveInstrumentation(SortState *node)
+{
+ Size size;
+ SharedSortInfo *si;
+
+ if (node->shared_info == NULL)
+ return;
+
+ size = offsetof(SharedSortInfo, sinstrument)
+ + node->shared_info->num_workers * sizeof(TuplesortInstrumentation);
+ si = palloc(size);
+ memcpy(si, node->shared_info, size);
+ node->shared_info = si;
+}
*
* This can be called after tuplesort_performsort() finishes to obtain
* printable summary information about how the sort was performed.
- * spaceUsed is measured in kilobytes.
*/
void
tuplesort_get_stats(Tuplesortstate *state,
- const char **sortMethod,
- const char **spaceType,
- long *spaceUsed)
+ TuplesortInstrumentation *stats)
{
/*
* Note: it might seem we should provide both memory and disk usage for a
*/
if (state->tapeset)
{
- *spaceType = "Disk";
- *spaceUsed = LogicalTapeSetBlocks(state->tapeset) * (BLCKSZ / 1024);
+ stats->spaceType = SORT_SPACE_TYPE_DISK;
+ stats->spaceUsed = LogicalTapeSetBlocks(state->tapeset) * (BLCKSZ / 1024);
}
else
{
- *spaceType = "Memory";
- *spaceUsed = (state->allowedMem - state->availMem + 1023) / 1024;
+ stats->spaceType = SORT_SPACE_TYPE_MEMORY;
+ stats->spaceUsed = (state->allowedMem - state->availMem + 1023) / 1024;
}
switch (state->status)
{
case TSS_SORTEDINMEM:
if (state->boundUsed)
- *sortMethod = "top-N heapsort";
+ stats->sortMethod = SORT_TYPE_TOP_N_HEAPSORT;
else
- *sortMethod = "quicksort";
+ stats->sortMethod = SORT_TYPE_QUICKSORT;
break;
case TSS_SORTEDONTAPE:
- *sortMethod = "external sort";
+ stats->sortMethod = SORT_TYPE_EXTERNAL_SORT;
break;
case TSS_FINALMERGE:
- *sortMethod = "external merge";
+ stats->sortMethod = SORT_TYPE_EXTERNAL_MERGE;
break;
default:
- *sortMethod = "still in progress";
+ stats->sortMethod = SORT_TYPE_STILL_IN_PROGRESS;
break;
}
}
+/*
+ * Convert TuplesortMethod to a string.
+ */
+const char *
+tuplesort_method_name(TuplesortMethod m)
+{
+ switch (m)
+ {
+ case SORT_TYPE_STILL_IN_PROGRESS:
+ return "still in progress";
+ case SORT_TYPE_TOP_N_HEAPSORT:
+ return "top-N heapsort";
+ case SORT_TYPE_QUICKSORT:
+ return "quicksort";
+ case SORT_TYPE_EXTERNAL_SORT:
+ return "external sort";
+ case SORT_TYPE_EXTERNAL_MERGE:
+ return "external merge";
+ }
+
+ return "unknown";
+}
+
+/*
+ * Convert TuplesortSpaceType to a string.
+ */
+const char *
+tuplesort_space_type_name(TuplesortSpaceType t)
+{
+ Assert(t == SORT_SPACE_TYPE_DISK || t == SORT_SPACE_TYPE_MEMORY);
+ return t == SORT_SPACE_TYPE_DISK ? "Disk" : "Memory";
+}
+
/*
* Heap manipulation routines, per Knuth's Algorithm 5.2.3H.
#ifndef NODESORT_H
#define NODESORT_H
+#include "access/parallel.h"
#include "nodes/execnodes.h"
extern SortState *ExecInitSort(Sort *node, EState *estate, int eflags);
extern void ExecSortRestrPos(SortState *node);
extern void ExecReScanSort(SortState *node);
+/* parallel instrumentation support */
+extern void ExecSortEstimate(SortState *node, ParallelContext *pcxt);
+extern void ExecSortInitializeDSM(SortState *node, ParallelContext *pcxt);
+extern void ExecSortInitializeWorker(SortState *node, shm_toc *toc);
+extern void ExecSortRetrieveInstrumentation(SortState *node);
+
#endif /* NODESORT_H */
Tuplestorestate *tuplestorestate;
} MaterialState;
+/* ----------------
+ * Shared memory container for per-worker sort information
+ * ----------------
+ */
+typedef struct SharedSortInfo
+{
+ int num_workers;
+ TuplesortInstrumentation sinstrument[FLEXIBLE_ARRAY_MEMBER];
+} SharedSortInfo;
+
/* ----------------
* SortState information
* ----------------
bool bounded_Done; /* value of bounded we did the sort with */
int64 bound_Done; /* value of bound we did the sort with */
void *tuplesortstate; /* private state of tuplesort.c */
+ bool am_worker; /* are we a worker? */
+ SharedSortInfo *shared_info; /* one entry per worker */
} SortState;
/* ---------------------
*/
typedef struct Tuplesortstate Tuplesortstate;
+/*
+ * Data structures for reporting sort statistics. Note that
+ * TuplesortInstrumentation can't contain any pointers because we
+ * sometimes put it in shared memory.
+ */
+typedef enum
+{
+ SORT_TYPE_STILL_IN_PROGRESS = 0,
+ SORT_TYPE_TOP_N_HEAPSORT,
+ SORT_TYPE_QUICKSORT,
+ SORT_TYPE_EXTERNAL_SORT,
+ SORT_TYPE_EXTERNAL_MERGE
+} TuplesortMethod;
+
+typedef enum
+{
+ SORT_SPACE_TYPE_DISK,
+ SORT_SPACE_TYPE_MEMORY
+} TuplesortSpaceType;
+
+typedef struct TuplesortInstrumentation
+{
+ TuplesortMethod sortMethod; /* sort algorithm used */
+ TuplesortSpaceType spaceType; /* type of space spaceUsed represents */
+ long spaceUsed; /* space consumption, in kB */
+} TuplesortInstrumentation;
+
+
/*
* We provide multiple interfaces to what is essentially the same code,
* since different callers have different data to be sorted and want to
extern void tuplesort_end(Tuplesortstate *state);
extern void tuplesort_get_stats(Tuplesortstate *state,
- const char **sortMethod,
- const char **spaceType,
- long *spaceUsed);
+ TuplesortInstrumentation *stats);
+extern const char *tuplesort_method_name(TuplesortMethod m);
+extern const char *tuplesort_space_type_name(TuplesortSpaceType t);
extern int tuplesort_merge_order(int64 allowedMem);
-- ANALYZE shows that a top-N sort was used. We must suppress or filter away
-- all the non-invariant parts of the EXPLAIN ANALYZE output.
--
-create temp table sq_limit (pk int primary key, c1 int, c2 int);
+create table sq_limit (pk int primary key, c1 int, c2 int);
insert into sq_limit values
(1, 1, 1),
(2, 2, 2),
select * from (select pk,c2 from sq_limit order by c1,pk) as x limit 3
loop
ln := regexp_replace(ln, 'Memory: \S*', 'Memory: xxx');
+ -- this case might occur if force_parallel_mode is on:
+ ln := regexp_replace(ln, 'Worker 0: Sort Method', 'Sort Method');
return next ln;
end loop;
end;
(3 rows)
drop function explain_sq_limit();
+drop table sq_limit;
-- ANALYZE shows that a top-N sort was used. We must suppress or filter away
-- all the non-invariant parts of the EXPLAIN ANALYZE output.
--
-create temp table sq_limit (pk int primary key, c1 int, c2 int);
+create table sq_limit (pk int primary key, c1 int, c2 int);
insert into sq_limit values
(1, 1, 1),
(2, 2, 2),
select * from (select pk,c2 from sq_limit order by c1,pk) as x limit 3
loop
ln := regexp_replace(ln, 'Memory: \S*', 'Memory: xxx');
+ -- this case might occur if force_parallel_mode is on:
+ ln := regexp_replace(ln, 'Worker 0: Sort Method', 'Sort Method');
return next ln;
end loop;
end;
select * from (select pk,c2 from sq_limit order by c1,pk) as x limit 3;
drop function explain_sq_limit();
+
+drop table sq_limit;