1 /*-------------------------------------------------------------------------
4 * Routines to manipulate pathlists and create path nodes
6 * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
7 * Portions Copyright (c) 1994, Regents of the University of California
11 * $PostgreSQL: pgsql/src/backend/optimizer/util/pathnode.c,v 1.144 2008/08/02 21:32:00 tgl Exp $
13 *-------------------------------------------------------------------------
19 #include "catalog/pg_operator.h"
20 #include "executor/executor.h"
21 #include "miscadmin.h"
22 #include "optimizer/cost.h"
23 #include "optimizer/pathnode.h"
24 #include "optimizer/paths.h"
25 #include "optimizer/tlist.h"
26 #include "parser/parse_expr.h"
27 #include "parser/parse_oper.h"
28 #include "parser/parsetree.h"
29 #include "utils/selfuncs.h"
30 #include "utils/lsyscache.h"
31 #include "utils/syscache.h"
34 static List *translate_sub_tlist(List *tlist, int relid);
35 static bool query_is_distinct_for(Query *query, List *colnos, List *opids);
36 static Oid distinct_col_search(int colno, List *colnos, List *opids);
37 static bool hash_safe_operators(List *opids);
40 /*****************************************************************************
41 * MISC. PATH UTILITIES
42 *****************************************************************************/
46 * Return -1, 0, or +1 according as path1 is cheaper, the same cost,
47 * or more expensive than path2 for the specified criterion.
50 compare_path_costs(Path *path1, Path *path2, CostSelector criterion)
52 if (criterion == STARTUP_COST)
54 if (path1->startup_cost < path2->startup_cost)
56 if (path1->startup_cost > path2->startup_cost)
60 * If paths have the same startup cost (not at all unlikely), order
63 if (path1->total_cost < path2->total_cost)
65 if (path1->total_cost > path2->total_cost)
70 if (path1->total_cost < path2->total_cost)
72 if (path1->total_cost > path2->total_cost)
76 * If paths have the same total cost, order them by startup cost.
78 if (path1->startup_cost < path2->startup_cost)
80 if (path1->startup_cost > path2->startup_cost)
87 * compare_fuzzy_path_costs
88 * Return -1, 0, or +1 according as path1 is cheaper, the same cost,
89 * or more expensive than path2 for the specified criterion.
91 * This differs from compare_path_costs in that we consider the costs the
92 * same if they agree to within a "fuzz factor". This is used by add_path
93 * to avoid keeping both of a pair of paths that really have insignificantly
97 compare_fuzzy_path_costs(Path *path1, Path *path2, CostSelector criterion)
100 * We use a fuzz factor of 1% of the smaller cost.
102 * XXX does this percentage need to be user-configurable?
104 if (criterion == STARTUP_COST)
106 if (path1->startup_cost > path2->startup_cost * 1.01)
108 if (path2->startup_cost > path1->startup_cost * 1.01)
112 * If paths have the same startup cost (not at all unlikely), order
113 * them by total cost.
115 if (path1->total_cost > path2->total_cost * 1.01)
117 if (path2->total_cost > path1->total_cost * 1.01)
122 if (path1->total_cost > path2->total_cost * 1.01)
124 if (path2->total_cost > path1->total_cost * 1.01)
128 * If paths have the same total cost, order them by startup cost.
130 if (path1->startup_cost > path2->startup_cost * 1.01)
132 if (path2->startup_cost > path1->startup_cost * 1.01)
139 * compare_path_fractional_costs
140 * Return -1, 0, or +1 according as path1 is cheaper, the same cost,
141 * or more expensive than path2 for fetching the specified fraction
142 * of the total tuples.
144 * If fraction is <= 0 or > 1, we interpret it as 1, ie, we select the
145 * path with the cheaper total_cost.
148 compare_fractional_path_costs(Path *path1, Path *path2,
154 if (fraction <= 0.0 || fraction >= 1.0)
155 return compare_path_costs(path1, path2, TOTAL_COST);
156 cost1 = path1->startup_cost +
157 fraction * (path1->total_cost - path1->startup_cost);
158 cost2 = path2->startup_cost +
159 fraction * (path2->total_cost - path2->startup_cost);
169 * Find the minimum-cost paths from among a relation's paths,
170 * and save them in the rel's cheapest-path fields.
172 * This is normally called only after we've finished constructing the path
173 * list for the rel node.
175 * If we find two paths of identical costs, try to keep the better-sorted one.
176 * The paths might have unrelated sort orderings, in which case we can only
177 * guess which might be better to keep, but if one is superior then we
178 * definitely should keep it.
181 set_cheapest(RelOptInfo *parent_rel)
183 List *pathlist = parent_rel->pathlist;
185 Path *cheapest_startup_path;
186 Path *cheapest_total_path;
188 Assert(IsA(parent_rel, RelOptInfo));
191 elog(ERROR, "could not devise a query plan for the given query");
193 cheapest_startup_path = cheapest_total_path = (Path *) linitial(pathlist);
195 for_each_cell(p, lnext(list_head(pathlist)))
197 Path *path = (Path *) lfirst(p);
200 cmp = compare_path_costs(cheapest_startup_path, path, STARTUP_COST);
203 compare_pathkeys(cheapest_startup_path->pathkeys,
204 path->pathkeys) == PATHKEYS_BETTER2))
205 cheapest_startup_path = path;
207 cmp = compare_path_costs(cheapest_total_path, path, TOTAL_COST);
210 compare_pathkeys(cheapest_total_path->pathkeys,
211 path->pathkeys) == PATHKEYS_BETTER2))
212 cheapest_total_path = path;
215 parent_rel->cheapest_startup_path = cheapest_startup_path;
216 parent_rel->cheapest_total_path = cheapest_total_path;
217 parent_rel->cheapest_unique_path = NULL; /* computed only if needed */
222 * Consider a potential implementation path for the specified parent rel,
223 * and add it to the rel's pathlist if it is worthy of consideration.
224 * A path is worthy if it has either a better sort order (better pathkeys)
225 * or cheaper cost (on either dimension) than any of the existing old paths.
227 * We also remove from the rel's pathlist any old paths that are dominated
228 * by new_path --- that is, new_path is both cheaper and at least as well
231 * The pathlist is kept sorted by TOTAL_COST metric, with cheaper paths
232 * at the front. No code depends on that for correctness; it's simply
233 * a speed hack within this routine. Doing it that way makes it more
234 * likely that we will reject an inferior path after a few comparisons,
235 * rather than many comparisons.
237 * NOTE: discarded Path objects are immediately pfree'd to reduce planner
238 * memory consumption. We dare not try to free the substructure of a Path,
239 * since much of it may be shared with other Paths or the query tree itself;
240 * but just recycling discarded Path nodes is a very useful savings in
241 * a large join tree. We can recycle the List nodes of pathlist, too.
243 * BUT: we do not pfree IndexPath objects, since they may be referenced as
244 * children of BitmapHeapPaths as well as being paths in their own right.
246 * 'parent_rel' is the relation entry to which the path corresponds.
247 * 'new_path' is a potential path for parent_rel.
249 * Returns nothing, but modifies parent_rel->pathlist.
252 add_path(RelOptInfo *parent_rel, Path *new_path)
254 bool accept_new = true; /* unless we find a superior old path */
255 ListCell *insert_after = NULL; /* where to insert new item */
256 ListCell *p1_prev = NULL;
260 * This is a convenient place to check for query cancel --- no part of the
261 * planner goes very long without calling add_path().
263 CHECK_FOR_INTERRUPTS();
266 * Loop to check proposed new path against old paths. Note it is possible
267 * for more than one old path to be tossed out because new_path dominates
270 p1 = list_head(parent_rel->pathlist); /* cannot use foreach here */
273 Path *old_path = (Path *) lfirst(p1);
274 bool remove_old = false; /* unless new proves superior */
278 * As of Postgres 8.0, we use fuzzy cost comparison to avoid wasting
279 * cycles keeping paths that are really not significantly different in
282 costcmp = compare_fuzzy_path_costs(new_path, old_path, TOTAL_COST);
285 * If the two paths compare differently for startup and total cost,
286 * then we want to keep both, and we can skip the (much slower)
287 * comparison of pathkeys. If they compare the same, proceed with the
288 * pathkeys comparison. Note: this test relies on the fact that
289 * compare_fuzzy_path_costs will only return 0 if both costs are
290 * effectively equal (and, therefore, there's no need to call it twice
294 costcmp == compare_fuzzy_path_costs(new_path, old_path,
297 switch (compare_pathkeys(new_path->pathkeys, old_path->pathkeys))
301 remove_old = true; /* new dominates old */
302 else if (costcmp > 0)
303 accept_new = false; /* old dominates new */
307 * Same pathkeys, and fuzzily the same cost, so keep
308 * just one --- but we'll do an exact cost comparison
311 if (compare_path_costs(new_path, old_path,
313 remove_old = true; /* new dominates old */
315 accept_new = false; /* old equals or dominates new */
318 case PATHKEYS_BETTER1:
320 remove_old = true; /* new dominates old */
322 case PATHKEYS_BETTER2:
324 accept_new = false; /* old dominates new */
326 case PATHKEYS_DIFFERENT:
327 /* keep both paths, since they have different ordering */
333 * Remove current element from pathlist if dominated by new.
337 parent_rel->pathlist = list_delete_cell(parent_rel->pathlist,
341 * Delete the data pointed-to by the deleted cell, if possible
343 if (!IsA(old_path, IndexPath))
345 /* Advance list pointer */
349 p1 = list_head(parent_rel->pathlist);
353 /* new belongs after this old path if it has cost >= old's */
356 /* Advance list pointers */
362 * If we found an old path that dominates new_path, we can quit
363 * scanning the pathlist; we will not add new_path, and we assume
364 * new_path cannot dominate any other elements of the pathlist.
372 /* Accept the new path: insert it at proper place in pathlist */
374 lappend_cell(parent_rel->pathlist, insert_after, new_path);
376 parent_rel->pathlist = lcons(new_path, parent_rel->pathlist);
380 /* Reject and recycle the new path */
381 if (!IsA(new_path, IndexPath))
387 /*****************************************************************************
388 * PATH NODE CREATION ROUTINES
389 *****************************************************************************/
392 * create_seqscan_path
393 * Creates a path corresponding to a sequential scan, returning the
397 create_seqscan_path(PlannerInfo *root, RelOptInfo *rel)
399 Path *pathnode = makeNode(Path);
401 pathnode->pathtype = T_SeqScan;
402 pathnode->parent = rel;
403 pathnode->pathkeys = NIL; /* seqscan has unordered result */
405 cost_seqscan(pathnode, root, rel);
412 * Creates a path node for an index scan.
414 * 'index' is a usable index.
415 * 'clause_groups' is a list of lists of RestrictInfo nodes
416 * to be used as index qual conditions in the scan.
417 * 'pathkeys' describes the ordering of the path.
418 * 'indexscandir' is ForwardScanDirection or BackwardScanDirection
419 * for an ordered index, or NoMovementScanDirection for
420 * an unordered index.
421 * 'outer_rel' is the outer relation if this is a join inner indexscan path.
422 * (pathkeys and indexscandir are ignored if so.) NULL if not.
424 * Returns the new path node.
427 create_index_path(PlannerInfo *root,
431 ScanDirection indexscandir,
432 RelOptInfo *outer_rel)
434 IndexPath *pathnode = makeNode(IndexPath);
435 RelOptInfo *rel = index->rel;
440 * For a join inner scan, there's no point in marking the path with any
441 * pathkeys, since it will only ever be used as the inner path of a
442 * nestloop, and so its ordering does not matter. For the same reason we
443 * don't really care what order it's scanned in. (We could expect the
444 * caller to supply the correct values, but it's easier to force it here.)
446 if (outer_rel != NULL)
449 indexscandir = NoMovementScanDirection;
452 pathnode->path.pathtype = T_IndexScan;
453 pathnode->path.parent = rel;
454 pathnode->path.pathkeys = pathkeys;
456 /* Convert clauses to indexquals the executor can handle */
457 indexquals = expand_indexqual_conditions(index, clause_groups);
459 /* Flatten the clause-groups list to produce indexclauses list */
460 allclauses = flatten_clausegroups_list(clause_groups);
462 /* Fill in the pathnode */
463 pathnode->indexinfo = index;
464 pathnode->indexclauses = allclauses;
465 pathnode->indexquals = indexquals;
467 pathnode->isjoininner = (outer_rel != NULL);
468 pathnode->indexscandir = indexscandir;
470 if (outer_rel != NULL)
473 * We must compute the estimated number of output rows for the
474 * indexscan. This is less than rel->rows because of the additional
475 * selectivity of the join clauses. Since clause_groups may contain
476 * both restriction and join clauses, we have to do a set union to get
477 * the full set of clauses that must be considered to compute the
478 * correct selectivity. (Without the union operation, we might have
479 * some restriction clauses appearing twice, which'd mislead
480 * clauselist_selectivity into double-counting their selectivity.
481 * However, since RestrictInfo nodes aren't copied when linking them
482 * into different lists, it should be sufficient to use pointer
483 * comparison to remove duplicates.)
485 * Always assume the join type is JOIN_INNER; even if some of the join
486 * clauses come from other contexts, that's not our problem.
488 allclauses = list_union_ptr(rel->baserestrictinfo, allclauses);
489 pathnode->rows = rel->tuples *
490 clauselist_selectivity(root,
492 rel->relid, /* do not use 0! */
494 /* Like costsize.c, force estimate to be at least one row */
495 pathnode->rows = clamp_row_est(pathnode->rows);
500 * The number of rows is the same as the parent rel's estimate, since
501 * this isn't a join inner indexscan.
503 pathnode->rows = rel->rows;
506 cost_index(pathnode, root, index, indexquals, outer_rel);
512 * create_bitmap_heap_path
513 * Creates a path node for a bitmap scan.
515 * 'bitmapqual' is a tree of IndexPath, BitmapAndPath, and BitmapOrPath nodes.
517 * If this is a join inner indexscan path, 'outer_rel' is the outer relation,
518 * and all the component IndexPaths should have been costed accordingly.
521 create_bitmap_heap_path(PlannerInfo *root,
524 RelOptInfo *outer_rel)
526 BitmapHeapPath *pathnode = makeNode(BitmapHeapPath);
528 pathnode->path.pathtype = T_BitmapHeapScan;
529 pathnode->path.parent = rel;
530 pathnode->path.pathkeys = NIL; /* always unordered */
532 pathnode->bitmapqual = bitmapqual;
533 pathnode->isjoininner = (outer_rel != NULL);
535 if (pathnode->isjoininner)
538 * We must compute the estimated number of output rows for the
539 * indexscan. This is less than rel->rows because of the additional
540 * selectivity of the join clauses. We make use of the selectivity
541 * estimated for the bitmap to do this; this isn't really quite right
542 * since there may be restriction conditions not included in the
546 Selectivity indexSelectivity;
548 cost_bitmap_tree_node(bitmapqual, &indexTotalCost, &indexSelectivity);
549 pathnode->rows = rel->tuples * indexSelectivity;
550 if (pathnode->rows > rel->rows)
551 pathnode->rows = rel->rows;
552 /* Like costsize.c, force estimate to be at least one row */
553 pathnode->rows = clamp_row_est(pathnode->rows);
558 * The number of rows is the same as the parent rel's estimate, since
559 * this isn't a join inner indexscan.
561 pathnode->rows = rel->rows;
564 cost_bitmap_heap_scan(&pathnode->path, root, rel, bitmapqual, outer_rel);
570 * create_bitmap_and_path
571 * Creates a path node representing a BitmapAnd.
574 create_bitmap_and_path(PlannerInfo *root,
578 BitmapAndPath *pathnode = makeNode(BitmapAndPath);
580 pathnode->path.pathtype = T_BitmapAnd;
581 pathnode->path.parent = rel;
582 pathnode->path.pathkeys = NIL; /* always unordered */
584 pathnode->bitmapquals = bitmapquals;
586 /* this sets bitmapselectivity as well as the regular cost fields: */
587 cost_bitmap_and_node(pathnode, root);
593 * create_bitmap_or_path
594 * Creates a path node representing a BitmapOr.
597 create_bitmap_or_path(PlannerInfo *root,
601 BitmapOrPath *pathnode = makeNode(BitmapOrPath);
603 pathnode->path.pathtype = T_BitmapOr;
604 pathnode->path.parent = rel;
605 pathnode->path.pathkeys = NIL; /* always unordered */
607 pathnode->bitmapquals = bitmapquals;
609 /* this sets bitmapselectivity as well as the regular cost fields: */
610 cost_bitmap_or_node(pathnode, root);
616 * create_tidscan_path
617 * Creates a path corresponding to a scan by TID, returning the pathnode.
620 create_tidscan_path(PlannerInfo *root, RelOptInfo *rel, List *tidquals)
622 TidPath *pathnode = makeNode(TidPath);
624 pathnode->path.pathtype = T_TidScan;
625 pathnode->path.parent = rel;
626 pathnode->path.pathkeys = NIL;
628 pathnode->tidquals = tidquals;
630 cost_tidscan(&pathnode->path, root, rel, tidquals);
637 * Creates a path corresponding to an Append plan, returning the
641 create_append_path(RelOptInfo *rel, List *subpaths)
643 AppendPath *pathnode = makeNode(AppendPath);
646 pathnode->path.pathtype = T_Append;
647 pathnode->path.parent = rel;
648 pathnode->path.pathkeys = NIL; /* result is always considered
650 pathnode->subpaths = subpaths;
652 pathnode->path.startup_cost = 0;
653 pathnode->path.total_cost = 0;
656 Path *subpath = (Path *) lfirst(l);
658 if (l == list_head(subpaths)) /* first node? */
659 pathnode->path.startup_cost = subpath->startup_cost;
660 pathnode->path.total_cost += subpath->total_cost;
668 * Creates a path representing a Result-and-nothing-else plan.
669 * This is only used for the case of a query with an empty jointree.
672 create_result_path(List *quals)
674 ResultPath *pathnode = makeNode(ResultPath);
676 pathnode->path.pathtype = T_Result;
677 pathnode->path.parent = NULL;
678 pathnode->path.pathkeys = NIL;
679 pathnode->quals = quals;
681 /* Ideally should define cost_result(), but I'm too lazy */
682 pathnode->path.startup_cost = 0;
683 pathnode->path.total_cost = cpu_tuple_cost;
686 * In theory we should include the qual eval cost as well, but at present
687 * that doesn't accomplish much except duplicate work that will be done
688 * again in make_result; since this is only used for degenerate cases,
689 * nothing interesting will be done with the path cost values...
696 * create_material_path
697 * Creates a path corresponding to a Material plan, returning the
701 create_material_path(RelOptInfo *rel, Path *subpath)
703 MaterialPath *pathnode = makeNode(MaterialPath);
705 pathnode->path.pathtype = T_Material;
706 pathnode->path.parent = rel;
708 pathnode->path.pathkeys = subpath->pathkeys;
710 pathnode->subpath = subpath;
712 cost_material(&pathnode->path,
722 * Creates a path representing elimination of distinct rows from the
725 * If used at all, this is likely to be called repeatedly on the same rel;
726 * and the input subpath should always be the same (the cheapest_total path
727 * for the rel). So we cache the result.
730 create_unique_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath)
732 UniquePath *pathnode;
733 Path sort_path; /* dummy for result of cost_sort */
734 Path agg_path; /* dummy for result of cost_agg */
735 MemoryContext oldcontext;
736 List *sub_targetlist;
741 /* Caller made a mistake if subpath isn't cheapest_total */
742 Assert(subpath == rel->cheapest_total_path);
744 /* If result already cached, return it */
745 if (rel->cheapest_unique_path)
746 return (UniquePath *) rel->cheapest_unique_path;
749 * We must ensure path struct is allocated in main planning context;
750 * otherwise GEQO memory management causes trouble. (Compare
751 * best_inner_indexscan().)
753 oldcontext = MemoryContextSwitchTo(root->planner_cxt);
755 pathnode = makeNode(UniquePath);
757 /* There is no substructure to allocate, so can switch back right away */
758 MemoryContextSwitchTo(oldcontext);
760 pathnode->path.pathtype = T_Unique;
761 pathnode->path.parent = rel;
764 * Treat the output as always unsorted, since we don't necessarily have
765 * pathkeys to represent it.
767 pathnode->path.pathkeys = NIL;
769 pathnode->subpath = subpath;
772 * Try to identify the targetlist that will actually be unique-ified. In
773 * current usage, this routine is only used for sub-selects of IN clauses,
774 * so we should be able to find the tlist in in_info_list. Get the IN
775 * clause's operators, too, because they determine what "unique" means.
777 sub_targetlist = NIL;
779 foreach(l, root->in_info_list)
781 InClauseInfo *ininfo = (InClauseInfo *) lfirst(l);
783 if (bms_equal(ininfo->righthand, rel->relids))
785 sub_targetlist = ininfo->sub_targetlist;
786 in_operators = ininfo->in_operators;
792 * If the input is a subquery whose output must be unique already, then we
793 * don't need to do anything. The test for uniqueness has to consider
794 * exactly which columns we are extracting; for example "SELECT DISTINCT
795 * x,y" doesn't guarantee that x alone is distinct. So we cannot check for
796 * this optimization unless we found our own targetlist above, and it
797 * consists only of simple Vars referencing subquery outputs. (Possibly
798 * we could do something with expressions in the subquery outputs, too,
799 * but for now keep it simple.)
801 if (sub_targetlist && rel->rtekind == RTE_SUBQUERY)
803 RangeTblEntry *rte = planner_rt_fetch(rel->relid, root);
804 List *sub_tlist_colnos;
806 sub_tlist_colnos = translate_sub_tlist(sub_targetlist, rel->relid);
808 if (sub_tlist_colnos &&
809 query_is_distinct_for(rte->subquery,
810 sub_tlist_colnos, in_operators))
812 pathnode->umethod = UNIQUE_PATH_NOOP;
813 pathnode->rows = rel->rows;
814 pathnode->path.startup_cost = subpath->startup_cost;
815 pathnode->path.total_cost = subpath->total_cost;
816 pathnode->path.pathkeys = subpath->pathkeys;
818 rel->cheapest_unique_path = (Path *) pathnode;
825 * If we know the targetlist, try to estimate number of result rows;
830 pathnode->rows = estimate_num_groups(root, sub_targetlist, rel->rows);
831 numCols = list_length(sub_targetlist);
835 pathnode->rows = rel->rows;
836 numCols = list_length(rel->reltargetlist);
840 * Estimate cost for sort+unique implementation
842 cost_sort(&sort_path, root, NIL,
849 * Charge one cpu_operator_cost per comparison per input tuple. We assume
850 * all columns get compared at most of the tuples. (XXX probably this is
851 * an overestimate.) This should agree with make_unique.
853 sort_path.total_cost += cpu_operator_cost * rel->rows * numCols;
856 * Is it safe to use a hashed implementation? If so, estimate and compare
857 * costs. We only try this if we know the IN operators, else we can't
858 * check their hashability.
860 pathnode->umethod = UNIQUE_PATH_SORT;
861 if (enable_hashagg && in_operators && hash_safe_operators(in_operators))
864 * Estimate the overhead per hashtable entry at 64 bytes (same as in
867 int hashentrysize = rel->width + 64;
869 if (hashentrysize * pathnode->rows <= work_mem * 1024L)
871 cost_agg(&agg_path, root,
873 numCols, pathnode->rows,
874 subpath->startup_cost,
877 if (agg_path.total_cost < sort_path.total_cost)
878 pathnode->umethod = UNIQUE_PATH_HASH;
882 if (pathnode->umethod == UNIQUE_PATH_HASH)
884 pathnode->path.startup_cost = agg_path.startup_cost;
885 pathnode->path.total_cost = agg_path.total_cost;
889 pathnode->path.startup_cost = sort_path.startup_cost;
890 pathnode->path.total_cost = sort_path.total_cost;
893 rel->cheapest_unique_path = (Path *) pathnode;
899 * translate_sub_tlist - get subquery column numbers represented by tlist
901 * The given targetlist usually contains only Vars referencing the given relid.
902 * Extract their varattnos (ie, the column numbers of the subquery) and return
903 * as an integer List.
905 * If any of the tlist items is not a simple Var, we cannot determine whether
906 * the subquery's uniqueness condition (if any) matches ours, so punt and
910 translate_sub_tlist(List *tlist, int relid)
917 Var *var = (Var *) lfirst(l);
919 if (!var || !IsA(var, Var) ||
921 return NIL; /* punt */
923 result = lappend_int(result, var->varattno);
929 * query_is_distinct_for - does query never return duplicates of the
932 * colnos is an integer list of output column numbers (resno's). We are
933 * interested in whether rows consisting of just these columns are certain
934 * to be distinct. "Distinctness" is defined according to whether the
935 * corresponding upper-level equality operators listed in opids would think
936 * the values are distinct. (Note: the opids entries could be cross-type
937 * operators, and thus not exactly the equality operators that the subquery
938 * would use itself. We use equality_ops_are_compatible() to check
939 * compatibility. That looks at btree or hash opfamily membership, and so
940 * should give trustworthy answers for all operators that we might need
941 * to deal with here.)
944 query_is_distinct_for(Query *query, List *colnos, List *opids)
949 Assert(list_length(colnos) == list_length(opids));
952 * DISTINCT (including DISTINCT ON) guarantees uniqueness if all the
953 * columns in the DISTINCT clause appear in colnos and operator semantics
956 if (query->distinctClause)
958 foreach(l, query->distinctClause)
960 SortGroupClause *sgc = (SortGroupClause *) lfirst(l);
961 TargetEntry *tle = get_sortgroupclause_tle(sgc,
964 opid = distinct_col_search(tle->resno, colnos, opids);
965 if (!OidIsValid(opid) ||
966 !equality_ops_are_compatible(opid, sgc->eqop))
967 break; /* exit early if no match */
969 if (l == NULL) /* had matches for all? */
974 * Similarly, GROUP BY guarantees uniqueness if all the grouped columns
975 * appear in colnos and operator semantics match.
977 if (query->groupClause)
979 foreach(l, query->groupClause)
981 SortGroupClause *sgc = (SortGroupClause *) lfirst(l);
982 TargetEntry *tle = get_sortgroupclause_tle(sgc,
985 opid = distinct_col_search(tle->resno, colnos, opids);
986 if (!OidIsValid(opid) ||
987 !equality_ops_are_compatible(opid, sgc->eqop))
988 break; /* exit early if no match */
990 if (l == NULL) /* had matches for all? */
996 * If we have no GROUP BY, but do have aggregates or HAVING, then the
997 * result is at most one row so it's surely unique, for any operators.
999 if (query->hasAggs || query->havingQual)
1004 * UNION, INTERSECT, EXCEPT guarantee uniqueness of the whole output row,
1007 * XXX this code knows that prepunion.c will adopt the default sort/group
1008 * operators for each column datatype to determine uniqueness. It'd
1009 * probably be better if these operators were chosen at parse time and
1010 * stored into the parsetree, instead of leaving bits of the planner to
1013 if (query->setOperations)
1015 SetOperationStmt *topop = (SetOperationStmt *) query->setOperations;
1017 Assert(IsA(topop, SetOperationStmt));
1018 Assert(topop->op != SETOP_NONE);
1022 /* We're good if all the nonjunk output columns are in colnos */
1023 foreach(l, query->targetList)
1025 TargetEntry *tle = (TargetEntry *) lfirst(l);
1029 continue; /* ignore resjunk columns */
1031 opid = distinct_col_search(tle->resno, colnos, opids);
1032 if (!OidIsValid(opid))
1033 break; /* exit early if no match */
1034 /* check for compatible semantics */
1035 get_sort_group_operators(exprType((Node *) tle->expr),
1036 false, false, false,
1037 NULL, &tle_eq_opr, NULL);
1038 if (!OidIsValid(tle_eq_opr) ||
1039 !equality_ops_are_compatible(opid, tle_eq_opr))
1040 break; /* exit early if no match */
1042 if (l == NULL) /* had matches for all? */
1048 * XXX Are there any other cases in which we can easily see the result
1056 * distinct_col_search - subroutine for query_is_distinct_for
1058 * If colno is in colnos, return the corresponding element of opids,
1059 * else return InvalidOid. (We expect colnos does not contain duplicates,
1060 * so the result is well-defined.)
1063 distinct_col_search(int colno, List *colnos, List *opids)
1068 forboth(lc1, colnos, lc2, opids)
1070 if (colno == lfirst_int(lc1))
1071 return lfirst_oid(lc2);
1077 * hash_safe_operators - can all the specified IN operators be hashed?
1079 * We assume hashed aggregation will work if each IN operator is marked
1080 * hashjoinable. If the IN operators are cross-type, this could conceivably
1081 * fail: the aggregation will need a hashable equality operator for the RHS
1082 * datatype --- but it's pretty hard to conceive of a hash opfamily that has
1083 * cross-type hashing without support for hashing the individual types, so
1084 * we don't expend cycles here to support the case. We could check
1085 * get_compatible_hash_operator() instead of just op_hashjoinable(), but the
1086 * former is a significantly more expensive test.
1089 hash_safe_operators(List *opids)
1095 if (!op_hashjoinable(lfirst_oid(lc)))
1102 * create_subqueryscan_path
1103 * Creates a path corresponding to a sequential scan of a subquery,
1104 * returning the pathnode.
1107 create_subqueryscan_path(RelOptInfo *rel, List *pathkeys)
1109 Path *pathnode = makeNode(Path);
1111 pathnode->pathtype = T_SubqueryScan;
1112 pathnode->parent = rel;
1113 pathnode->pathkeys = pathkeys;
1115 cost_subqueryscan(pathnode, rel);
1121 * create_functionscan_path
1122 * Creates a path corresponding to a sequential scan of a function,
1123 * returning the pathnode.
1126 create_functionscan_path(PlannerInfo *root, RelOptInfo *rel)
1128 Path *pathnode = makeNode(Path);
1130 pathnode->pathtype = T_FunctionScan;
1131 pathnode->parent = rel;
1132 pathnode->pathkeys = NIL; /* for now, assume unordered result */
1134 cost_functionscan(pathnode, root, rel);
1140 * create_valuesscan_path
1141 * Creates a path corresponding to a scan of a VALUES list,
1142 * returning the pathnode.
1145 create_valuesscan_path(PlannerInfo *root, RelOptInfo *rel)
1147 Path *pathnode = makeNode(Path);
1149 pathnode->pathtype = T_ValuesScan;
1150 pathnode->parent = rel;
1151 pathnode->pathkeys = NIL; /* result is always unordered */
1153 cost_valuesscan(pathnode, root, rel);
1159 * create_nestloop_path
1160 * Creates a pathnode corresponding to a nestloop join between two
1163 * 'joinrel' is the join relation.
1164 * 'jointype' is the type of join required
1165 * 'outer_path' is the outer path
1166 * 'inner_path' is the inner path
1167 * 'restrict_clauses' are the RestrictInfo nodes to apply at the join
1168 * 'pathkeys' are the path keys of the new join path
1170 * Returns the resulting path node.
1173 create_nestloop_path(PlannerInfo *root,
1174 RelOptInfo *joinrel,
1178 List *restrict_clauses,
1181 NestPath *pathnode = makeNode(NestPath);
1183 pathnode->path.pathtype = T_NestLoop;
1184 pathnode->path.parent = joinrel;
1185 pathnode->jointype = jointype;
1186 pathnode->outerjoinpath = outer_path;
1187 pathnode->innerjoinpath = inner_path;
1188 pathnode->joinrestrictinfo = restrict_clauses;
1189 pathnode->path.pathkeys = pathkeys;
1191 cost_nestloop(pathnode, root);
1197 * create_mergejoin_path
1198 * Creates a pathnode corresponding to a mergejoin join between
1201 * 'joinrel' is the join relation
1202 * 'jointype' is the type of join required
1203 * 'outer_path' is the outer path
1204 * 'inner_path' is the inner path
1205 * 'restrict_clauses' are the RestrictInfo nodes to apply at the join
1206 * 'pathkeys' are the path keys of the new join path
1207 * 'mergeclauses' are the RestrictInfo nodes to use as merge clauses
1208 * (this should be a subset of the restrict_clauses list)
1209 * 'outersortkeys' are the sort varkeys for the outer relation
1210 * 'innersortkeys' are the sort varkeys for the inner relation
1213 create_mergejoin_path(PlannerInfo *root,
1214 RelOptInfo *joinrel,
1218 List *restrict_clauses,
1221 List *outersortkeys,
1222 List *innersortkeys)
1224 MergePath *pathnode = makeNode(MergePath);
1227 * If the given paths are already well enough ordered, we can skip doing
1230 if (outersortkeys &&
1231 pathkeys_contained_in(outersortkeys, outer_path->pathkeys))
1232 outersortkeys = NIL;
1233 if (innersortkeys &&
1234 pathkeys_contained_in(innersortkeys, inner_path->pathkeys))
1235 innersortkeys = NIL;
1238 * If we are not sorting the inner path, we may need a materialize node to
1239 * ensure it can be marked/restored. (Sort does support mark/restore, so
1240 * no materialize is needed in that case.)
1242 * Since the inner side must be ordered, and only Sorts and IndexScans can
1243 * create order to begin with, you might think there's no problem --- but
1244 * you'd be wrong. Nestloop and merge joins can *preserve* the order of
1245 * their inputs, so they can be selected as the input of a mergejoin, and
1246 * they don't support mark/restore at present.
1248 if (innersortkeys == NIL &&
1249 !ExecSupportsMarkRestore(inner_path->pathtype))
1250 inner_path = (Path *)
1251 create_material_path(inner_path->parent, inner_path);
1253 pathnode->jpath.path.pathtype = T_MergeJoin;
1254 pathnode->jpath.path.parent = joinrel;
1255 pathnode->jpath.jointype = jointype;
1256 pathnode->jpath.outerjoinpath = outer_path;
1257 pathnode->jpath.innerjoinpath = inner_path;
1258 pathnode->jpath.joinrestrictinfo = restrict_clauses;
1259 pathnode->jpath.path.pathkeys = pathkeys;
1260 pathnode->path_mergeclauses = mergeclauses;
1261 pathnode->outersortkeys = outersortkeys;
1262 pathnode->innersortkeys = innersortkeys;
1264 cost_mergejoin(pathnode, root);
1270 * create_hashjoin_path
1271 * Creates a pathnode corresponding to a hash join between two relations.
1273 * 'joinrel' is the join relation
1274 * 'jointype' is the type of join required
1275 * 'outer_path' is the cheapest outer path
1276 * 'inner_path' is the cheapest inner path
1277 * 'restrict_clauses' are the RestrictInfo nodes to apply at the join
1278 * 'hashclauses' are the RestrictInfo nodes to use as hash clauses
1279 * (this should be a subset of the restrict_clauses list)
1282 create_hashjoin_path(PlannerInfo *root,
1283 RelOptInfo *joinrel,
1287 List *restrict_clauses,
1290 HashPath *pathnode = makeNode(HashPath);
1292 pathnode->jpath.path.pathtype = T_HashJoin;
1293 pathnode->jpath.path.parent = joinrel;
1294 pathnode->jpath.jointype = jointype;
1295 pathnode->jpath.outerjoinpath = outer_path;
1296 pathnode->jpath.innerjoinpath = inner_path;
1297 pathnode->jpath.joinrestrictinfo = restrict_clauses;
1298 /* A hashjoin never has pathkeys, since its ordering is unpredictable */
1299 pathnode->jpath.path.pathkeys = NIL;
1300 pathnode->path_hashclauses = hashclauses;
1302 cost_hashjoin(pathnode, root);