*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/executor/nodeLimit.c,v 1.29 2007/01/05 22:19:28 momjian Exp $
+ * $PostgreSQL: pgsql/src/backend/executor/nodeLimit.c,v 1.30 2007/05/04 01:13:43 tgl Exp $
*
*-------------------------------------------------------------------------
*/
/* Reset position to start-of-scan */
node->position = 0;
node->subSlot = NULL;
+
+ /*
+ * If we have a COUNT, and our input is a Sort node, notify it that it can
+ * use bounded sort.
+ *
+ * This is a bit of a kluge, but we don't have any more-abstract way of
+ * communicating between the two nodes; and it doesn't seem worth trying
+ * to invent one without some more examples of special communication needs.
+ *
+ * Note: it is the responsibility of nodeSort.c to react properly to
+ * changes of these parameters. If we ever do redesign this, it'd be
+ * a good idea to integrate this signaling with the parameter-change
+ * mechanism.
+ */
+ if (IsA(outerPlanState(node), SortState))
+ {
+ SortState *sortState = (SortState *) outerPlanState(node);
+ int64 tuples_needed = node->count + node->offset;
+
+ /* negative test checks for overflow */
+ if (node->noCount || tuples_needed < 0)
+ {
+ /* make sure flag gets reset if needed upon rescan */
+ sortState->bounded = false;
+ }
+ else
+ {
+ sortState->bounded = true;
+ sortState->bound = tuples_needed;
+ }
+ }
}
/* ----------------------------------------------------------------
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/executor/nodeSort.c,v 1.60 2007/01/09 02:14:11 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/executor/nodeSort.c,v 1.61 2007/05/04 01:13:44 tgl Exp $
*
*-------------------------------------------------------------------------
*/
plannode->nullsFirst,
work_mem,
node->randomAccess);
+ if (node->bounded)
+ tuplesort_set_bound(tuplesortstate, node->bound);
node->tuplesortstate = (void *) tuplesortstate;
/*
* finally set the sorted flag to true
*/
node->sort_Done = true;
+ node->bounded_Done = node->bounded;
+ node->bound_Done = node->bound;
SO1_printf("ExecSort: %s\n", "sorting done");
}
EXEC_FLAG_BACKWARD |
EXEC_FLAG_MARK)) != 0;
+ sortstate->bounded = false;
sortstate->sort_Done = false;
sortstate->tuplesortstate = NULL;
/*
* If subnode is to be rescanned then we forget previous sort results; we
- * have to re-read the subplan and re-sort.
+ * have to re-read the subplan and re-sort. Also must re-sort if the
+ * bounded-sort parameters changed or we didn't select randomAccess.
*
* Otherwise we can just rewind and rescan the sorted output.
*/
if (((PlanState *) node)->lefttree->chgParam != NULL ||
+ node->bounded != node->bounded_Done ||
+ node->bound != node->bound_Done ||
!node->randomAccess)
{
node->sort_Done = false;
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/optimizer/path/costsize.c,v 1.181 2007/04/21 21:01:44 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/optimizer/path/costsize.c,v 1.182 2007/05/04 01:13:44 tgl Exp $
*
*-------------------------------------------------------------------------
*/
* disk traffic = 2 * relsize * ceil(logM(p / (2*work_mem)))
* cpu = comparison_cost * t * log2(t)
*
+ * If the sort is bounded (i.e., only the first k result tuples are needed)
+ * and k tuples can fit into work_mem, we use a heap method that keeps only
+ * k tuples in the heap; this will require about t*log2(k) tuple comparisons.
+ *
* The disk traffic is assumed to be 3/4ths sequential and 1/4th random
* accesses (XXX can't we refine that guess?)
*
* 'input_cost' is the total cost for reading the input data
* 'tuples' is the number of tuples in the relation
* 'width' is the average tuple width in bytes
+ * 'limit_tuples' is the bound on the number of output tuples; -1 if no bound
*
* NOTE: some callers currently pass NIL for pathkeys because they
* can't conveniently supply the sort keys. Since this routine doesn't
*/
void
cost_sort(Path *path, PlannerInfo *root,
- List *pathkeys, Cost input_cost, double tuples, int width)
+ List *pathkeys, Cost input_cost, double tuples, int width,
+ double limit_tuples)
{
Cost startup_cost = input_cost;
Cost run_cost = 0;
- double nbytes = relation_byte_size(tuples, width);
+ double input_bytes = relation_byte_size(tuples, width);
+ double output_bytes;
+ double output_tuples;
long work_mem_bytes = work_mem * 1024L;
if (!enable_sort)
if (tuples < 2.0)
tuples = 2.0;
- /*
- * CPU costs
- *
- * Assume about two operator evals per tuple comparison and N log2 N
- * comparisons
- */
- startup_cost += 2.0 * cpu_operator_cost * tuples * LOG2(tuples);
+ /* Do we have a useful LIMIT? */
+ if (limit_tuples > 0 && limit_tuples < tuples)
+ {
+ output_tuples = limit_tuples;
+ output_bytes = relation_byte_size(output_tuples, width);
+ }
+ else
+ {
+ output_tuples = tuples;
+ output_bytes = input_bytes;
+ }
- /* disk costs */
- if (nbytes > work_mem_bytes)
+ if (output_bytes > work_mem_bytes)
{
- double npages = ceil(nbytes / BLCKSZ);
- double nruns = (nbytes / work_mem_bytes) * 0.5;
+ /*
+ * We'll have to use a disk-based sort of all the tuples
+ */
+ double npages = ceil(input_bytes / BLCKSZ);
+ double nruns = (input_bytes / work_mem_bytes) * 0.5;
double mergeorder = tuplesort_merge_order(work_mem_bytes);
double log_runs;
double npageaccesses;
+ /*
+ * CPU costs
+ *
+ * Assume about two operator evals per tuple comparison and N log2 N
+ * comparisons
+ */
+ startup_cost += 2.0 * cpu_operator_cost * tuples * LOG2(tuples);
+
+ /* Disk costs */
+
/* Compute logM(r) as log(r) / log(M) */
if (nruns > mergeorder)
log_runs = ceil(log(nruns) / log(mergeorder));
startup_cost += npageaccesses *
(seq_page_cost * 0.75 + random_page_cost * 0.25);
}
+ else if (tuples > 2 * output_tuples || input_bytes > work_mem_bytes)
+ {
+ /*
+ * We'll use a bounded heap-sort keeping just K tuples in memory,
+ * for a total number of tuple comparisons of N log2 K; but the
+ * constant factor is a bit higher than for quicksort. Tweak it
+ * so that the cost curve is continuous at the crossover point.
+ */
+ startup_cost += 2.0 * cpu_operator_cost * tuples * LOG2(2.0 * output_tuples);
+ }
+ else
+ {
+ /* We'll use plain quicksort on all the input tuples */
+ startup_cost += 2.0 * cpu_operator_cost * tuples * LOG2(tuples);
+ }
/*
* Also charge a small amount (arbitrarily set equal to operator cost) per
- * extracted tuple.
+ * extracted tuple. Note it's correct to use tuples not output_tuples
+ * here --- the upper LIMIT will pro-rate the run cost so we'd be double
+ * counting the LIMIT otherwise.
*/
run_cost += cpu_operator_cost * tuples;
outersortkeys,
outer_path->total_cost,
outer_path_rows,
- outer_path->parent->width);
+ outer_path->parent->width,
+ -1.0);
startup_cost += sort_path.startup_cost;
run_cost += (sort_path.total_cost - sort_path.startup_cost)
* outerscansel;
innersortkeys,
inner_path->total_cost,
inner_path_rows,
- inner_path->parent->width);
+ inner_path->parent->width,
+ -1.0);
startup_cost += sort_path.startup_cost;
run_cost += (sort_path.total_cost - sort_path.startup_cost)
* innerscansel * rescanratio;
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/optimizer/plan/createplan.c,v 1.229 2007/04/21 21:01:44 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/optimizer/plan/createplan.c,v 1.230 2007/05/04 01:13:44 tgl Exp $
*
*-------------------------------------------------------------------------
*/
Plan *lefttree, Plan *righttree,
JoinType jointype);
static Sort *make_sort(PlannerInfo *root, Plan *lefttree, int numCols,
- AttrNumber *sortColIdx, Oid *sortOperators, bool *nullsFirst);
+ AttrNumber *sortColIdx, Oid *sortOperators, bool *nullsFirst,
+ double limit_tuples);
/*
outer_plan = (Plan *)
make_sort_from_pathkeys(root,
outer_plan,
- best_path->outersortkeys);
+ best_path->outersortkeys,
+ -1.0);
outerpathkeys = best_path->outersortkeys;
}
else
inner_plan = (Plan *)
make_sort_from_pathkeys(root,
inner_plan,
- best_path->innersortkeys);
+ best_path->innersortkeys,
+ -1.0);
innerpathkeys = best_path->innersortkeys;
}
else
* make_sort --- basic routine to build a Sort plan node
*
* Caller must have built the sortColIdx, sortOperators, and nullsFirst
- * arrays already.
+ * arrays already. limit_tuples is as for cost_sort (in particular, pass
+ * -1 if no limit)
*/
static Sort *
make_sort(PlannerInfo *root, Plan *lefttree, int numCols,
- AttrNumber *sortColIdx, Oid *sortOperators, bool *nullsFirst)
+ AttrNumber *sortColIdx, Oid *sortOperators, bool *nullsFirst,
+ double limit_tuples)
{
Sort *node = makeNode(Sort);
Plan *plan = &node->plan;
cost_sort(&sort_path, root, NIL,
lefttree->total_cost,
lefttree->plan_rows,
- lefttree->plan_width);
+ lefttree->plan_width,
+ limit_tuples);
plan->startup_cost = sort_path.startup_cost;
plan->total_cost = sort_path.total_cost;
plan->targetlist = lefttree->targetlist;
*
* 'lefttree' is the node which yields input tuples
* 'pathkeys' is the list of pathkeys by which the result is to be sorted
+ * 'limit_tuples' is the bound on the number of output tuples;
+ * -1 if no bound
*
* We must convert the pathkey information into arrays of sort key column
* numbers and sort operator OIDs.
* adding a Result node just to do the projection.
*/
Sort *
-make_sort_from_pathkeys(PlannerInfo *root, Plan *lefttree, List *pathkeys)
+make_sort_from_pathkeys(PlannerInfo *root, Plan *lefttree, List *pathkeys,
+ double limit_tuples)
{
List *tlist = lefttree->targetlist;
ListCell *i;
Assert(numsortkeys > 0);
return make_sort(root, lefttree, numsortkeys,
- sortColIdx, sortOperators, nullsFirst);
+ sortColIdx, sortOperators, nullsFirst, limit_tuples);
}
/*
Assert(numsortkeys > 0);
return make_sort(root, lefttree, numsortkeys,
- sortColIdx, sortOperators, nullsFirst);
+ sortColIdx, sortOperators, nullsFirst, -1.0);
}
/*
Assert(numsortkeys > 0);
return make_sort(root, lefttree, numsortkeys,
- sortColIdx, sortOperators, nullsFirst);
+ sortColIdx, sortOperators, nullsFirst, -1.0);
}
Material *
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/optimizer/plan/planmain.c,v 1.100 2007/04/21 21:01:45 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/optimizer/plan/planmain.c,v 1.101 2007/05/04 01:13:44 tgl Exp $
*
*-------------------------------------------------------------------------
*/
* tlist is the target list the query should produce
* (this is NOT necessarily root->parse->targetList!)
* tuple_fraction is the fraction of tuples we expect will be retrieved
+ * limit_tuples is a hard limit on number of tuples to retrieve,
+ * or -1 if no limit
*
* Output parameters:
* *cheapest_path receives the overall-cheapest path for the query
* from the plan to be retrieved
* tuple_fraction >= 1: tuple_fraction is the absolute number of tuples
* expected to be retrieved (ie, a LIMIT specification)
+ * Note that a nonzero tuple_fraction could come from outer context; it is
+ * therefore not redundant with limit_tuples. We use limit_tuples to determine
+ * whether a bounded sort can be used at runtime.
*/
void
-query_planner(PlannerInfo *root, List *tlist, double tuple_fraction,
+query_planner(PlannerInfo *root, List *tlist,
+ double tuple_fraction, double limit_tuples,
Path **cheapest_path, Path **sorted_path,
double *num_groups)
{
/* Figure cost for sorting */
cost_sort(&sort_path, root, root->query_pathkeys,
cheapestpath->total_cost,
- final_rel->rows, final_rel->width);
+ final_rel->rows, final_rel->width,
+ limit_tuples);
}
if (compare_fractional_path_costs(sortedpath, &sort_path,
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/optimizer/plan/planner.c,v 1.218 2007/04/27 22:05:47 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/optimizer/plan/planner.c,v 1.219 2007/05/04 01:13:44 tgl Exp $
*
*-------------------------------------------------------------------------
*/
double tuple_fraction,
int64 *offset_est, int64 *count_est);
static Oid *extract_grouping_ops(List *groupClause);
-static bool choose_hashed_grouping(PlannerInfo *root, double tuple_fraction,
+static bool choose_hashed_grouping(PlannerInfo *root,
+ double tuple_fraction, double limit_tuples,
Path *cheapest_path, Path *sorted_path,
Oid *groupOperators, double dNumGroups,
AggClauseCounts *agg_counts);
List *tlist = parse->targetList;
int64 offset_est = 0;
int64 count_est = 0;
+ double limit_tuples = -1.0;
Plan *result_plan;
List *current_pathkeys;
List *sort_pathkeys;
/* Tweak caller-supplied tuple_fraction if have LIMIT/OFFSET */
if (parse->limitCount || parse->limitOffset)
+ {
tuple_fraction = preprocess_limit(root, tuple_fraction,
&offset_est, &count_est);
+ /*
+ * If we have a known LIMIT, and don't have an unknown OFFSET,
+ * we can estimate the effects of using a bounded sort.
+ */
+ if (count_est > 0 && offset_est >= 0)
+ limit_tuples = (double) count_est + (double) offset_est;
+ }
if (parse->setOperations)
{
* estimate the number of groups in the query, and canonicalize all
* the pathkeys.
*/
- query_planner(root, sub_tlist, tuple_fraction,
+ query_planner(root, sub_tlist, tuple_fraction, limit_tuples,
&cheapest_path, &sorted_path, &dNumGroups);
group_pathkeys = root->group_pathkeys;
{
groupOperators = extract_grouping_ops(parse->groupClause);
use_hashed_grouping =
- choose_hashed_grouping(root, tuple_fraction,
+ choose_hashed_grouping(root, tuple_fraction, limit_tuples,
cheapest_path, sorted_path,
groupOperators, dNumGroups,
&agg_counts);
{
result_plan = (Plan *) make_sort_from_pathkeys(root,
result_plan,
- sort_pathkeys);
+ sort_pathkeys,
+ limit_tuples);
current_pathkeys = sort_pathkeys;
}
}
* choose_hashed_grouping - should we use hashed grouping?
*/
static bool
-choose_hashed_grouping(PlannerInfo *root, double tuple_fraction,
+choose_hashed_grouping(PlannerInfo *root,
+ double tuple_fraction, double limit_tuples,
Path *cheapest_path, Path *sorted_path,
Oid *groupOperators, double dNumGroups,
AggClauseCounts *agg_counts)
/* Result of hashed agg is always unsorted */
if (root->sort_pathkeys)
cost_sort(&hashed_p, root, root->sort_pathkeys, hashed_p.total_cost,
- dNumGroups, cheapest_path_width);
+ dNumGroups, cheapest_path_width, limit_tuples);
if (sorted_path)
{
if (!pathkeys_contained_in(root->group_pathkeys, current_pathkeys))
{
cost_sort(&sorted_p, root, root->group_pathkeys, sorted_p.total_cost,
- cheapest_path_rows, cheapest_path_width);
+ cheapest_path_rows, cheapest_path_width, -1.0);
current_pathkeys = root->group_pathkeys;
}
if (root->sort_pathkeys &&
!pathkeys_contained_in(root->sort_pathkeys, current_pathkeys))
cost_sort(&sorted_p, root, root->sort_pathkeys, sorted_p.total_cost,
- dNumGroups, cheapest_path_width);
+ dNumGroups, cheapest_path_width, limit_tuples);
/*
* Now make the decision using the top-level tuple fraction. First we
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/optimizer/util/pathnode.c,v 1.139 2007/04/21 21:01:45 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/optimizer/util/pathnode.c,v 1.140 2007/05/04 01:13:44 tgl Exp $
*
*-------------------------------------------------------------------------
*/
cost_sort(&sort_path, root, NIL,
subpath->total_cost,
rel->rows,
- rel->width);
+ rel->width,
+ -1.0);
/*
* Charge one cpu_operator_cost per comparison per input tuple. We assume
* Written by Peter Eisentraut <peter_e@gmx.net>.
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/utils/misc/guc.c,v 1.389 2007/04/26 16:13:12 neilc Exp $
+ * $PostgreSQL: pgsql/src/backend/utils/misc/guc.c,v 1.390 2007/05/04 01:13:44 tgl Exp $
*
*--------------------------------------------------------------------
*/
#ifdef TRACE_SORT
extern bool trace_sort;
#endif
+#ifdef DEBUG_BOUNDED_SORT
+extern bool optimize_bounded_sort;
+#endif
#ifdef USE_SSL
extern char *SSLCipherSuites;
},
#endif
+#ifdef DEBUG_BOUNDED_SORT
+ /* this is undocumented because not exposed in a standard build */
+ {
+ {
+ "optimize_bounded_sort", PGC_USERSET, QUERY_TUNING_METHOD,
+ gettext_noop("Enable bounded sorting using heap sort."),
+ NULL,
+ GUC_NOT_IN_SAMPLE
+ },
+ &optimize_bounded_sort,
+ true, NULL, NULL
+ },
+#endif
+
#ifdef WAL_DEBUG
{
{"wal_debug", PGC_SUSET, DEVELOPER_OPTIONS,
&server_version_num,
PG_VERSION_NUM, PG_VERSION_NUM, PG_VERSION_NUM, NULL, NULL
},
-
+
{
{"log_temp_files", PGC_USERSET, LOGGING_WHAT,
gettext_noop("Log the use of temporary files larger than this number of kilobytes."),
PGC_S_DEFAULT))
elog(FATAL, "failed to initialize %s to %d",
conf->gen.name, conf->boot_val);
- *conf->variable = conf->reset_val = conf->boot_val;
+ *conf->variable = conf->reset_val = conf->boot_val;
break;
}
case PGC_REAL:
PGC_S_DEFAULT))
elog(FATAL, "failed to initialize %s to %g",
conf->gen.name, conf->boot_val);
- *conf->variable = conf->reset_val = conf->boot_val;
+ *conf->variable = conf->reset_val = conf->boot_val;
break;
}
case PGC_STRING:
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/utils/sort/tuplesort.c,v 1.74 2007/01/10 18:06:04 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/utils/sort/tuplesort.c,v 1.75 2007/05/04 01:13:44 tgl Exp $
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
+#include <limits.h>
+
#include "access/heapam.h"
#include "access/nbtree.h"
#include "catalog/pg_amop.h"
#include "utils/tuplesort.h"
-/* GUC variable */
+/* GUC variables */
#ifdef TRACE_SORT
bool trace_sort = false;
#endif
+#ifdef DEBUG_BOUNDED_SORT
+bool optimize_bounded_sort = true;
+#endif
/*
typedef enum
{
TSS_INITIAL, /* Loading tuples; still within memory limit */
+ TSS_BOUNDED, /* Loading tuples into bounded-size heap */
TSS_BUILDRUNS, /* Loading tuples; writing to tape */
TSS_SORTEDINMEM, /* Sort completed entirely in memory */
TSS_SORTEDONTAPE, /* Sort completed, final run is on tape */
TupSortStatus status; /* enumerated value as shown above */
int nKeys; /* number of columns in sort key */
bool randomAccess; /* did caller request random access? */
+ bool bounded; /* did caller specify a maximum number of
+ * tuples to return? */
+ int bound; /* if bounded, the maximum number of tuples */
long availMem; /* remaining memory available, in bytes */
long allowedMem; /* total memory allowed, in bytes */
int maxTapes; /* number of tapes (Knuth's T) */
void (*readtup) (Tuplesortstate *state, SortTuple *stup,
int tapenum, unsigned int len);
+ /*
+ * Function to reverse the sort direction from its current state.
+ * (We could dispense with this if we wanted to enforce that all variants
+ * represent the sort key information alike.)
+ */
+ void (*reversedirection) (Tuplesortstate *state);
+
/*
* This array holds the tuples now in sort memory. If we are in state
* INITIAL, the tuples are in no particular order; if we are in state
#define COPYTUP(state,stup,tup) ((*(state)->copytup) (state, stup, tup))
#define WRITETUP(state,tape,stup) ((*(state)->writetup) (state, tape, stup))
#define READTUP(state,stup,tape,len) ((*(state)->readtup) (state, stup, tape, len))
+#define REVERSEDIRECTION(state) ((*(state)->reversedirection) (state))
#define LACKMEM(state) ((state)->availMem < 0)
#define USEMEM(state,amt) ((state)->availMem -= (amt))
#define FREEMEM(state,amt) ((state)->availMem += (amt))
static void mergepreread(Tuplesortstate *state);
static void mergeprereadone(Tuplesortstate *state, int srcTape);
static void dumptuples(Tuplesortstate *state, bool alltuples);
+static void make_bounded_heap(Tuplesortstate *state);
+static void sort_bounded_heap(Tuplesortstate *state);
static void tuplesort_heap_insert(Tuplesortstate *state, SortTuple *tuple,
int tupleindex, bool checkIndex);
static void tuplesort_heap_siftup(Tuplesortstate *state, bool checkIndex);
SortTuple *stup);
static void readtup_heap(Tuplesortstate *state, SortTuple *stup,
int tapenum, unsigned int len);
+static void reversedirection_heap(Tuplesortstate *state);
static int comparetup_index(const SortTuple *a, const SortTuple *b,
Tuplesortstate *state);
static void copytup_index(Tuplesortstate *state, SortTuple *stup, void *tup);
SortTuple *stup);
static void readtup_index(Tuplesortstate *state, SortTuple *stup,
int tapenum, unsigned int len);
+static void reversedirection_index(Tuplesortstate *state);
static int comparetup_datum(const SortTuple *a, const SortTuple *b,
Tuplesortstate *state);
static void copytup_datum(Tuplesortstate *state, SortTuple *stup, void *tup);
SortTuple *stup);
static void readtup_datum(Tuplesortstate *state, SortTuple *stup,
int tapenum, unsigned int len);
+static void reversedirection_datum(Tuplesortstate *state);
+static void free_sort_tuple(Tuplesortstate *state, SortTuple *stup);
/*
state->copytup = copytup_heap;
state->writetup = writetup_heap;
state->readtup = readtup_heap;
+ state->reversedirection = reversedirection_heap;
state->tupDesc = tupDesc; /* assume we need not copy tupDesc */
state->scanKeys = (ScanKey) palloc0(nkeys * sizeof(ScanKeyData));
state->copytup = copytup_index;
state->writetup = writetup_index;
state->readtup = readtup_index;
+ state->reversedirection = reversedirection_index;
state->indexRel = indexRel;
/* see comments below about btree dependence of this code... */
state->copytup = copytup_datum;
state->writetup = writetup_datum;
state->readtup = readtup_datum;
+ state->reversedirection = reversedirection_datum;
state->datumType = datumType;
return state;
}
+/*
+ * tuplesort_set_bound
+ *
+ * Advise tuplesort that at most the first N result tuples are required.
+ *
+ * Must be called before inserting any tuples. (Actually, we could allow it
+ * as long as the sort hasn't spilled to disk, but there seems no need for
+ * delayed calls at the moment.)
+ *
+ * This is a hint only. The tuplesort may still return more tuples than
+ * requested.
+ */
+void
+tuplesort_set_bound(Tuplesortstate *state, int64 bound)
+{
+ /* Assert we're called before loading any tuples */
+ Assert(state->status == TSS_INITIAL);
+ Assert(state->memtupcount == 0);
+ Assert(!state->bounded);
+
+#ifdef DEBUG_BOUNDED_SORT
+ /* Honor GUC setting that disables the feature (for easy testing) */
+ if (!optimize_bounded_sort)
+ return;
+#endif
+
+ /* We want to be able to compute bound * 2, so limit the setting */
+ if (bound > (int64) (INT_MAX/2))
+ return;
+
+ state->bounded = true;
+ state->bound = (int) bound;
+}
+
/*
* tuplesort_end
*
}
state->memtuples[state->memtupcount++] = *tuple;
+ /*
+ * Check if it's time to switch over to a bounded heapsort.
+ * We do so if the input tuple count exceeds twice the desired
+ * tuple count (this is a heuristic for where heapsort becomes
+ * cheaper than a quicksort), or if we've just filled workMem
+ * and have enough tuples to meet the bound.
+ *
+ * Note that once we enter TSS_BOUNDED state we will always try
+ * to complete the sort that way. In the worst case, if later
+ * input tuples are larger than earlier ones, this might cause
+ * us to exceed workMem significantly.
+ */
+ if (state->bounded &&
+ (state->memtupcount > state->bound * 2 ||
+ (state->memtupcount > state->bound && LACKMEM(state))))
+ {
+#ifdef TRACE_SORT
+ if (trace_sort)
+ elog(LOG, "switching to bounded heapsort at %d tuples: %s",
+ state->memtupcount,
+ pg_rusage_show(&state->ru_start));
+#endif
+ make_bounded_heap(state);
+ return;
+ }
+
/*
* Done if we still fit in available memory and have array slots.
*/
*/
dumptuples(state, false);
break;
+
+ case TSS_BOUNDED:
+ /*
+ * We don't want to grow the array here, so check whether the
+ * new tuple can be discarded before putting it in. This should
+ * be a good speed optimization, too, since when there are many
+ * more input tuples than the bound, most input tuples can be
+ * discarded with just this one comparison. Note that because
+ * we currently have the sort direction reversed, we must check
+ * for <= not >=.
+ */
+ if (COMPARETUP(state, tuple, &state->memtuples[0]) <= 0)
+ {
+ /* new tuple <= top of the heap, so we can discard it */
+ free_sort_tuple(state, tuple);
+ }
+ else
+ {
+ /* discard top of heap, sift up, insert new tuple */
+ free_sort_tuple(state, &state->memtuples[0]);
+ tuplesort_heap_siftup(state, false);
+ tuplesort_heap_insert(state, tuple, 0, false);
+ }
+ break;
+
case TSS_BUILDRUNS:
/*
*/
dumptuples(state, false);
break;
+
default:
elog(ERROR, "invalid tuplesort state");
break;
state->markpos_eof = false;
state->status = TSS_SORTEDINMEM;
break;
+
+ case TSS_BOUNDED:
+
+ /*
+ * We were able to accumulate all the tuples required for output
+ * in memory, using a heap to eliminate excess tuples. Now we have
+ * to transform the heap to a properly-sorted array.
+ */
+ if (state->memtupcount > 1)
+ sort_bounded_heap(state);
+ state->current = 0;
+ state->eof_reached = false;
+ state->markpos_offset = 0;
+ state->markpos_eof = false;
+ state->status = TSS_SORTEDINMEM;
+ break;
+
case TSS_BUILDRUNS:
/*
state->markpos_offset = 0;
state->markpos_eof = false;
break;
+
default:
elog(ERROR, "invalid tuplesort state");
break;
return true;
}
state->eof_reached = true;
+
+ /*
+ * Complain if caller tries to retrieve more tuples than
+ * originally asked for in a bounded sort. This is because
+ * returning EOF here might be the wrong thing.
+ */
+ if (state->bounded && state->current >= state->bound)
+ elog(ERROR, "retrieved too many tuples in a bounded sort");
+
return false;
}
else
((tup1)->tupindex) - ((tup2)->tupindex) : \
COMPARETUP(state, tup1, tup2))
+/*
+ * Convert the existing unordered array of SortTuples to a bounded heap,
+ * discarding all but the smallest "state->bound" tuples.
+ *
+ * When working with a bounded heap, we want to keep the largest entry
+ * at the root (array entry zero), instead of the smallest as in the normal
+ * sort case. This allows us to discard the largest entry cheaply.
+ * Therefore, we temporarily reverse the sort direction.
+ *
+ * We assume that all entries in a bounded heap will always have tupindex
+ * zero; it therefore doesn't matter that HEAPCOMPARE() doesn't reverse
+ * the direction of comparison for tupindexes.
+ */
+static void
+make_bounded_heap(Tuplesortstate *state)
+{
+ int tupcount = state->memtupcount;
+ int i;
+
+ Assert(state->status == TSS_INITIAL);
+ Assert(state->bounded);
+ Assert(tupcount >= state->bound);
+
+ /* Reverse sort direction so largest entry will be at root */
+ REVERSEDIRECTION(state);
+
+ state->memtupcount = 0; /* make the heap empty */
+ for (i=0; i<tupcount; i++)
+ {
+ if (state->memtupcount >= state->bound &&
+ COMPARETUP(state, &state->memtuples[i], &state->memtuples[0]) <= 0)
+ {
+ /* New tuple would just get thrown out, so skip it */
+ free_sort_tuple(state, &state->memtuples[i]);
+ }
+ else
+ {
+ /* Insert next tuple into heap */
+ /* Must copy source tuple to avoid possible overwrite */
+ SortTuple stup = state->memtuples[i];
+
+ tuplesort_heap_insert(state, &stup, 0, false);
+
+ /* If heap too full, discard largest entry */
+ if (state->memtupcount > state->bound)
+ {
+ free_sort_tuple(state, &state->memtuples[0]);
+ tuplesort_heap_siftup(state, false);
+ }
+ }
+ }
+
+ Assert(state->memtupcount == state->bound);
+ state->status = TSS_BOUNDED;
+}
+
+/*
+ * Convert the bounded heap to a properly-sorted array
+ */
+static void
+sort_bounded_heap(Tuplesortstate *state)
+{
+ int tupcount = state->memtupcount;
+
+ Assert(state->status == TSS_BOUNDED);
+ Assert(state->bounded);
+ Assert(tupcount == state->bound);
+
+ /*
+ * We can unheapify in place because each sift-up will remove the largest
+ * entry, which we can promptly store in the newly freed slot at the end.
+ * Once we're down to a single-entry heap, we're done.
+ */
+ while (state->memtupcount > 1)
+ {
+ SortTuple stup = state->memtuples[0];
+
+ /* this sifts-up the next-largest entry and decreases memtupcount */
+ tuplesort_heap_siftup(state, false);
+ state->memtuples[state->memtupcount] = stup;
+ }
+ state->memtupcount = tupcount;
+
+ /*
+ * Reverse sort direction back to the original state. This is not
+ * actually necessary but seems like a good idea for tidiness.
+ */
+ REVERSEDIRECTION(state);
+
+ state->status = TSS_SORTEDINMEM;
+}
+
/*
* Insert a new tuple into an empty or existing heap, maintaining the
* heap invariant. Caller is responsible for ensuring there's room.
&stup->isnull1);
}
+static void
+reversedirection_heap(Tuplesortstate *state)
+{
+ ScanKey scanKey = state->scanKeys;
+ int nkey;
+
+ for (nkey = 0; nkey < state->nKeys; nkey++, scanKey++)
+ {
+ scanKey->sk_flags ^= (SK_BT_DESC | SK_BT_NULLS_FIRST);
+ }
+}
+
/*
* Routines specialized for IndexTuple case
&stup->isnull1);
}
+static void
+reversedirection_index(Tuplesortstate *state)
+{
+ ScanKey scanKey = state->indexScanKey;
+ int nkey;
+
+ for (nkey = 0; nkey < state->nKeys; nkey++, scanKey++)
+ {
+ scanKey->sk_flags ^= (SK_BT_DESC | SK_BT_NULLS_FIRST);
+ }
+}
+
/*
* Routines specialized for DatumTuple case
sizeof(tuplen)) != sizeof(tuplen))
elog(ERROR, "unexpected end of data");
}
+
+static void
+reversedirection_datum(Tuplesortstate *state)
+{
+ state->sortFnFlags ^= (SK_BT_DESC | SK_BT_NULLS_FIRST);
+}
+
+/*
+ * Convenience routine to free a tuple previously loaded into sort memory
+ */
+static void
+free_sort_tuple(Tuplesortstate *state, SortTuple *stup)
+{
+ FREEMEM(state, GetMemoryChunkSpace(stup->tuple));
+ pfree(stup->tuple);
+}
* Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
- * $PostgreSQL: pgsql/src/include/nodes/execnodes.h,v 1.172 2007/04/26 23:24:44 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/nodes/execnodes.h,v 1.173 2007/05/04 01:13:45 tgl Exp $
*
*-------------------------------------------------------------------------
*/
{
ScanState ss; /* its first field is NodeTag */
bool randomAccess; /* need random access to sort output? */
+ bool bounded; /* is the result set bounded? */
+ int64 bound; /* if bounded, how many tuples are needed */
bool sort_Done; /* sort completed yet? */
+ bool bounded_Done; /* value of bounded we did the sort with */
+ int64 bound_Done; /* value of bound we did the sort with */
void *tuplesortstate; /* private state of tuplesort.c */
} SortState;
* Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
- * $PostgreSQL: pgsql/src/include/optimizer/cost.h,v 1.85 2007/02/22 22:00:26 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/optimizer/cost.h,v 1.86 2007/05/04 01:13:45 tgl Exp $
*
*-------------------------------------------------------------------------
*/
extern void cost_valuesscan(Path *path, PlannerInfo *root,
RelOptInfo *baserel);
extern void cost_sort(Path *path, PlannerInfo *root,
- List *pathkeys, Cost input_cost, double tuples, int width);
+ List *pathkeys, Cost input_cost, double tuples, int width,
+ double limit_tuples);
extern void cost_material(Path *path,
Cost input_cost, double tuples, int width);
extern void cost_agg(Path *path, PlannerInfo *root,
* Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
- * $PostgreSQL: pgsql/src/include/optimizer/planmain.h,v 1.100 2007/02/22 22:00:26 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/optimizer/planmain.h,v 1.101 2007/05/04 01:13:45 tgl Exp $
*
*-------------------------------------------------------------------------
*/
* prototypes for plan/planmain.c
*/
extern void query_planner(PlannerInfo *root, List *tlist,
- double tuple_fraction,
+ double tuple_fraction, double limit_tuples,
Path **cheapest_path, Path **sorted_path,
double *num_groups);
Index scanrelid, Plan *subplan, List *subrtable);
extern Append *make_append(List *appendplans, bool isTarget, List *tlist);
extern Sort *make_sort_from_pathkeys(PlannerInfo *root, Plan *lefttree,
- List *pathkeys);
+ List *pathkeys, double limit_tuples);
extern Sort *make_sort_from_sortclauses(PlannerInfo *root, List *sortcls,
Plan *lefttree);
extern Sort *make_sort_from_groupcols(PlannerInfo *root, List *groupcls,
* Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
- * $PostgreSQL: pgsql/src/include/utils/tuplesort.h,v 1.25 2007/01/09 02:14:16 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/utils/tuplesort.h,v 1.26 2007/05/04 01:13:45 tgl Exp $
*
*-------------------------------------------------------------------------
*/
Oid sortOperator, bool nullsFirstFlag,
int workMem, bool randomAccess);
+extern void tuplesort_set_bound(Tuplesortstate *state, int64 bound);
+
extern void tuplesort_puttupleslot(Tuplesortstate *state,
TupleTableSlot *slot);
extern void tuplesort_putindextuple(Tuplesortstate *state, IndexTuple tuple);