1 /*-------------------------------------------------------------------------
4 * routines for accessing the system catalogs
7 * Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
8 * Portions Copyright (c) 1994, Regents of the University of California
12 * $PostgreSQL: pgsql/src/backend/optimizer/util/plancat.c,v 1.135 2007/05/25 17:54:25 tgl Exp $
14 *-------------------------------------------------------------------------
20 #include "access/genam.h"
21 #include "access/heapam.h"
22 #include "catalog/pg_inherits.h"
23 #include "nodes/makefuncs.h"
24 #include "optimizer/clauses.h"
25 #include "optimizer/plancat.h"
26 #include "optimizer/predtest.h"
27 #include "optimizer/prep.h"
28 #include "parser/parse_expr.h"
29 #include "parser/parse_relation.h"
30 #include "parser/parsetree.h"
31 #include "rewrite/rewriteManip.h"
32 #include "utils/fmgroids.h"
33 #include "utils/lsyscache.h"
34 #include "utils/relcache.h"
35 #include "utils/syscache.h"
36 #include "catalog/catalog.h"
37 #include "miscadmin.h"
41 bool constraint_exclusion = false;
43 /* Hook for plugins to get control in get_relation_info() */
44 get_relation_info_hook_type get_relation_info_hook = NULL;
47 static void estimate_rel_size(Relation rel, int32 *attr_widths,
48 BlockNumber *pages, double *tuples);
49 static List *get_relation_constraints(Oid relationObjectId, RelOptInfo *rel);
54 * Retrieves catalog information for a given relation.
56 * Given the Oid of the relation, return the following info into fields
57 * of the RelOptInfo struct:
59 * min_attr lowest valid AttrNumber
60 * max_attr highest valid AttrNumber
61 * indexlist list of IndexOptInfos for relation's indexes
62 * pages number of pages
63 * tuples number of tuples
65 * Also, initialize the attr_needed[] and attr_widths[] arrays. In most
66 * cases these are left as zeroes, but sometimes we need to compute attr
67 * widths here, and we may as well cache the results for costsize.c.
69 * If inhparent is true, all we need to do is set up the attr arrays:
70 * the RelOptInfo actually represents the appendrel formed by an inheritance
71 * tree, and so the parent rel's physical size and index information isn't
75 get_relation_info(PlannerInfo *root, Oid relationObjectId, bool inhparent,
78 Index varno = rel->relid;
81 List *indexinfos = NIL;
84 * We need not lock the relation since it was already locked, either by
85 * the rewriter or when expand_inherited_rtentry() added it to the query's
88 relation = heap_open(relationObjectId, NoLock);
90 rel->min_attr = FirstLowInvalidHeapAttributeNumber + 1;
91 rel->max_attr = RelationGetNumberOfAttributes(relation);
93 Assert(rel->max_attr >= rel->min_attr);
94 rel->attr_needed = (Relids *)
95 palloc0((rel->max_attr - rel->min_attr + 1) * sizeof(Relids));
96 rel->attr_widths = (int32 *)
97 palloc0((rel->max_attr - rel->min_attr + 1) * sizeof(int32));
100 * Estimate relation size --- unless it's an inheritance parent, in which
101 * case the size will be computed later in set_append_rel_pathlist, and we
102 * must leave it zero for now to avoid bollixing the total_table_pages
106 estimate_rel_size(relation, rel->attr_widths - rel->min_attr,
107 &rel->pages, &rel->tuples);
110 * Make list of indexes. Ignore indexes on system catalogs if told to.
111 * Don't bother with indexes for an inheritance parent, either.
114 (IgnoreSystemIndexes && IsSystemClass(relation->rd_rel)))
117 hasindex = relation->rd_rel->relhasindex;
125 indexoidlist = RelationGetIndexList(relation);
128 * For each index, we get the same type of lock that the executor will
129 * need, and do not release it. This saves a couple of trips to the
130 * shared lock manager while not creating any real loss of
131 * concurrency, because no schema changes could be happening on the
132 * index while we hold lock on the parent rel, and neither lock type
133 * blocks any other kind of index operation.
135 if (rel->relid == root->parse->resultRelation)
136 lmode = RowExclusiveLock;
138 lmode = AccessShareLock;
140 foreach(l, indexoidlist)
142 Oid indexoid = lfirst_oid(l);
143 Relation indexRelation;
150 * Extract info from the relation descriptor for the index.
152 indexRelation = index_open(indexoid, lmode);
153 index = indexRelation->rd_index;
156 * Ignore invalid indexes, since they can't safely be used for
157 * queries. Note that this is OK because the data structure we
158 * are constructing is only used by the planner --- the executor
159 * still needs to insert into "invalid" indexes!
161 if (!index->indisvalid)
163 index_close(indexRelation, NoLock);
167 info = makeNode(IndexOptInfo);
169 info->indexoid = index->indexrelid;
171 info->ncolumns = ncolumns = index->indnatts;
174 * Need to make opfamily array large enough to put a terminating
177 info->indexkeys = (int *) palloc(sizeof(int) * ncolumns);
178 info->opfamily = (Oid *) palloc0(sizeof(Oid) * (ncolumns + 1));
179 /* initialize these to zeroes in case index is unordered */
180 info->fwdsortop = (Oid *) palloc0(sizeof(Oid) * ncolumns);
181 info->revsortop = (Oid *) palloc0(sizeof(Oid) * ncolumns);
182 info->nulls_first = (bool *) palloc0(sizeof(bool) * ncolumns);
184 for (i = 0; i < ncolumns; i++)
186 info->opfamily[i] = indexRelation->rd_opfamily[i];
187 info->indexkeys[i] = index->indkey.values[i];
190 info->relam = indexRelation->rd_rel->relam;
191 info->amcostestimate = indexRelation->rd_am->amcostestimate;
192 info->amoptionalkey = indexRelation->rd_am->amoptionalkey;
193 info->amsearchnulls = indexRelation->rd_am->amsearchnulls;
196 * Fetch the ordering operators associated with the index, if any.
197 * We expect that all ordering-capable indexes use btree's
198 * strategy numbers for the ordering operators.
200 if (indexRelation->rd_am->amcanorder)
202 int nstrat = indexRelation->rd_am->amstrategies;
204 for (i = 0; i < ncolumns; i++)
206 int16 opt = indexRelation->rd_indoption[i];
210 if (opt & INDOPTION_DESC)
212 fwdstrat = BTGreaterStrategyNumber;
213 revstrat = BTLessStrategyNumber;
217 fwdstrat = BTLessStrategyNumber;
218 revstrat = BTGreaterStrategyNumber;
221 * Index AM must have a fixed set of strategies for it
222 * to make sense to specify amcanorder, so we
223 * need not allow the case amstrategies == 0.
227 Assert(fwdstrat <= nstrat);
228 info->fwdsortop[i] = indexRelation->rd_operator[i * nstrat + fwdstrat - 1];
232 Assert(revstrat <= nstrat);
233 info->revsortop[i] = indexRelation->rd_operator[i * nstrat + revstrat - 1];
235 info->nulls_first[i] = (opt & INDOPTION_NULLS_FIRST) != 0;
240 * Fetch the index expressions and predicate, if any. We must
241 * modify the copies we obtain from the relcache to have the
242 * correct varno for the parent relation, so that they match up
243 * correctly against qual clauses.
245 info->indexprs = RelationGetIndexExpressions(indexRelation);
246 info->indpred = RelationGetIndexPredicate(indexRelation);
247 if (info->indexprs && varno != 1)
248 ChangeVarNodes((Node *) info->indexprs, 1, varno, 0);
249 if (info->indpred && varno != 1)
250 ChangeVarNodes((Node *) info->indpred, 1, varno, 0);
251 info->predOK = false; /* set later in indxpath.c */
252 info->unique = index->indisunique;
255 * Estimate the index size. If it's not a partial index, we lock
256 * the number-of-tuples estimate to equal the parent table; if it
257 * is partial then we have to use the same methods as we would for
258 * a table, except we can be sure that the index is not larger
261 if (info->indpred == NIL)
263 info->pages = RelationGetNumberOfBlocks(indexRelation);
264 info->tuples = rel->tuples;
268 estimate_rel_size(indexRelation, NULL,
269 &info->pages, &info->tuples);
270 if (info->tuples > rel->tuples)
271 info->tuples = rel->tuples;
274 index_close(indexRelation, NoLock);
276 indexinfos = lcons(info, indexinfos);
279 list_free(indexoidlist);
282 rel->indexlist = indexinfos;
284 heap_close(relation, NoLock);
287 * Allow a plugin to editorialize on the info we obtained from the
288 * catalogs. Actions might include altering the assumed relation size,
289 * removing an index, or adding a hypothetical index to the indexlist.
291 if (get_relation_info_hook)
292 (*get_relation_info_hook) (root, relationObjectId, inhparent, rel);
296 * estimate_rel_size - estimate # pages and # tuples in a table or index
298 * If attr_widths isn't NULL, it points to the zero-index entry of the
299 * relation's attr_width[] cache; we fill this in if we have need to compute
300 * the attribute widths for estimation purposes.
303 estimate_rel_size(Relation rel, int32 *attr_widths,
304 BlockNumber *pages, double *tuples)
306 BlockNumber curpages;
307 BlockNumber relpages;
311 switch (rel->rd_rel->relkind)
313 case RELKIND_RELATION:
315 case RELKIND_TOASTVALUE:
316 /* it has storage, ok to call the smgr */
317 curpages = RelationGetNumberOfBlocks(rel);
320 * HACK: if the relation has never yet been vacuumed, use a
321 * minimum estimate of 10 pages. This emulates a desirable aspect
322 * of pre-8.0 behavior, which is that we wouldn't assume a newly
323 * created relation is really small, which saves us from making
324 * really bad plans during initial data loading. (The plans are
325 * not wrong when they are made, but if they are cached and used
326 * again after the table has grown a lot, they are bad.) It would
327 * be better to force replanning if the table size has changed a
328 * lot since the plan was made ... but we don't currently have any
329 * infrastructure for redoing cached plans at all, so we have to
330 * kluge things here instead.
332 * We approximate "never vacuumed" by "has relpages = 0", which
333 * means this will also fire on genuinely empty relations. Not
334 * great, but fortunately that's a seldom-seen case in the real
335 * world, and it shouldn't degrade the quality of the plan too
336 * much anyway to err in this direction.
338 if (curpages < 10 && rel->rd_rel->relpages == 0)
341 /* report estimated # pages */
343 /* quick exit if rel is clearly empty */
349 /* coerce values in pg_class to more desirable types */
350 relpages = (BlockNumber) rel->rd_rel->relpages;
351 reltuples = (double) rel->rd_rel->reltuples;
354 * If it's an index, discount the metapage. This is a kluge
355 * because it assumes more than it ought to about index contents;
356 * it's reasonably OK for btrees but a bit suspect otherwise.
358 if (rel->rd_rel->relkind == RELKIND_INDEX &&
364 /* estimate number of tuples from previous tuple density */
366 density = reltuples / (double) relpages;
370 * When we have no data because the relation was truncated,
371 * estimate tuple width from attribute datatypes. We assume
372 * here that the pages are completely full, which is OK for
373 * tables (since they've presumably not been VACUUMed yet) but
374 * is probably an overestimate for indexes. Fortunately
375 * get_relation_info() can clamp the overestimate to the
376 * parent table's size.
378 * Note: this code intentionally disregards alignment
379 * considerations, because (a) that would be gilding the lily
380 * considering how crude the estimate is, and (b) it creates
381 * platform dependencies in the default plans which are kind
382 * of a headache for regression testing.
384 int32 tuple_width = 0;
387 for (i = 1; i <= RelationGetNumberOfAttributes(rel); i++)
389 Form_pg_attribute att = rel->rd_att->attrs[i - 1];
392 if (att->attisdropped)
394 /* This should match set_rel_width() in costsize.c */
395 item_width = get_attavgwidth(RelationGetRelid(rel), i);
398 item_width = get_typavgwidth(att->atttypid,
400 Assert(item_width > 0);
402 if (attr_widths != NULL)
403 attr_widths[i] = item_width;
404 tuple_width += item_width;
406 tuple_width += sizeof(HeapTupleHeaderData);
407 tuple_width += sizeof(ItemPointerData);
408 /* note: integer division is intentional here */
409 density = (BLCKSZ - sizeof(PageHeaderData)) / tuple_width;
411 *tuples = rint(density * (double) curpages);
413 case RELKIND_SEQUENCE:
414 /* Sequences always have a known size */
419 /* else it has no disk storage; probably shouldn't get here? */
428 * get_relation_constraints
430 * Retrieve the CHECK constraint expressions of the given relation.
432 * Returns a List (possibly empty) of constraint expressions. Each one
433 * has been canonicalized, and its Vars are changed to have the varno
434 * indicated by rel->relid. This allows the expressions to be easily
435 * compared to expressions taken from WHERE.
437 * Note: at present this is invoked at most once per relation per planner
438 * run, and in many cases it won't be invoked at all, so there seems no
439 * point in caching the data in RelOptInfo.
442 get_relation_constraints(Oid relationObjectId, RelOptInfo *rel)
445 Index varno = rel->relid;
450 * We assume the relation has already been safely locked.
452 relation = heap_open(relationObjectId, NoLock);
454 constr = relation->rd_att->constr;
457 int num_check = constr->num_check;
460 for (i = 0; i < num_check; i++)
464 cexpr = stringToNode(constr->check[i].ccbin);
467 * Run each expression through const-simplification and
468 * canonicalization. This is not just an optimization, but is
469 * necessary, because we will be comparing it to
470 * similarly-processed qual clauses, and may fail to detect valid
471 * matches without this. This must match the processing done to
472 * qual clauses in preprocess_expression()! (We can skip the
473 * stuff involving subqueries, however, since we don't allow any
474 * in check constraints.)
476 cexpr = eval_const_expressions(cexpr);
478 cexpr = (Node *) canonicalize_qual((Expr *) cexpr);
481 * Also mark any coercion format fields as "don't care", so that
482 * we can match to both explicit and implicit coercions.
484 set_coercionform_dontcare(cexpr);
486 /* Fix Vars to have the desired varno */
488 ChangeVarNodes(cexpr, 1, varno, 0);
491 * Finally, convert to implicit-AND format (that is, a List) and
492 * append the resulting item(s) to our output list.
494 result = list_concat(result,
495 make_ands_implicit((Expr *) cexpr));
499 heap_close(relation, NoLock);
506 * relation_excluded_by_constraints
508 * Detect whether the relation need not be scanned because it has either
509 * self-inconsistent restrictions, or restrictions inconsistent with the
510 * relation's CHECK constraints.
512 * Note: this examines only rel->relid and rel->baserestrictinfo; therefore
513 * it can be called before filling in other fields of the RelOptInfo.
516 relation_excluded_by_constraints(RelOptInfo *rel, RangeTblEntry *rte)
518 List *safe_restrictions;
519 List *constraint_pred;
520 List *safe_constraints;
523 /* Skip the test if constraint exclusion is disabled */
524 if (!constraint_exclusion)
528 * Check for self-contradictory restriction clauses. We dare not make
529 * deductions with non-immutable functions, but any immutable clauses that
530 * are self-contradictory allow us to conclude the scan is unnecessary.
532 * Note: strip off RestrictInfo because predicate_refuted_by() isn't
533 * expecting to see any in its predicate argument.
535 safe_restrictions = NIL;
536 foreach(lc, rel->baserestrictinfo)
538 RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc);
540 if (!contain_mutable_functions((Node *) rinfo->clause))
541 safe_restrictions = lappend(safe_restrictions, rinfo->clause);
544 if (predicate_refuted_by(safe_restrictions, safe_restrictions))
547 /* Only plain relations have constraints */
548 if (rte->rtekind != RTE_RELATION || rte->inh)
551 /* OK to fetch the constraint expressions */
552 constraint_pred = get_relation_constraints(rte->relid, rel);
555 * We do not currently enforce that CHECK constraints contain only
556 * immutable functions, so it's necessary to check here. We daren't draw
557 * conclusions from plan-time evaluation of non-immutable functions. Since
558 * they're ANDed, we can just ignore any mutable constraints in the list,
559 * and reason about the rest.
561 safe_constraints = NIL;
562 foreach(lc, constraint_pred)
564 Node *pred = (Node *) lfirst(lc);
566 if (!contain_mutable_functions(pred))
567 safe_constraints = lappend(safe_constraints, pred);
571 * The constraints are effectively ANDed together, so we can just try to
572 * refute the entire collection at once. This may allow us to make proofs
573 * that would fail if we took them individually.
575 * Note: we use rel->baserestrictinfo, not safe_restrictions as might seem
576 * an obvious optimization. Some of the clauses might be OR clauses that
577 * have volatile and nonvolatile subclauses, and it's OK to make
578 * deductions with the nonvolatile parts.
580 if (predicate_refuted_by(safe_constraints, rel->baserestrictinfo))
588 * build_physical_tlist
590 * Build a targetlist consisting of exactly the relation's user attributes,
591 * in order. The executor can special-case such tlists to avoid a projection
592 * step at runtime, so we use such tlists preferentially for scan nodes.
594 * Exception: if there are any dropped columns, we punt and return NIL.
595 * Ideally we would like to handle the dropped-column case too. However this
596 * creates problems for ExecTypeFromTL, which may be asked to build a tupdesc
597 * for a tlist that includes vars of no-longer-existent types. In theory we
598 * could dig out the required info from the pg_attribute entries of the
599 * relation, but that data is not readily available to ExecTypeFromTL.
600 * For now, we don't apply the physical-tlist optimization when there are
603 * We also support building a "physical" tlist for subqueries, functions,
604 * and values lists, since the same optimization can occur in SubqueryScan,
605 * FunctionScan, and ValuesScan nodes.
608 build_physical_tlist(PlannerInfo *root, RelOptInfo *rel)
611 Index varno = rel->relid;
612 RangeTblEntry *rte = planner_rt_fetch(varno, root);
621 switch (rte->rtekind)
624 /* Assume we already have adequate lock */
625 relation = heap_open(rte->relid, NoLock);
627 numattrs = RelationGetNumberOfAttributes(relation);
628 for (attrno = 1; attrno <= numattrs; attrno++)
630 Form_pg_attribute att_tup = relation->rd_att->attrs[attrno - 1];
632 if (att_tup->attisdropped)
634 /* found a dropped col, so punt */
645 tlist = lappend(tlist,
646 makeTargetEntry((Expr *) var,
652 heap_close(relation, NoLock);
656 subquery = rte->subquery;
657 foreach(l, subquery->targetList)
659 TargetEntry *tle = (TargetEntry *) lfirst(l);
662 * A resjunk column of the subquery can be reflected as
663 * resjunk in the physical tlist; we need not punt.
667 exprType((Node *) tle->expr),
668 exprTypmod((Node *) tle->expr),
671 tlist = lappend(tlist,
672 makeTargetEntry((Expr *) var,
680 expandRTE(rte, varno, 0, true /* include dropped */ ,
684 var = (Var *) lfirst(l);
687 * A non-Var in expandRTE's output means a dropped column;
696 tlist = lappend(tlist,
697 makeTargetEntry((Expr *) var,
705 expandRTE(rte, varno, 0, false /* dropped not applicable */ ,
709 var = (Var *) lfirst(l);
711 tlist = lappend(tlist,
712 makeTargetEntry((Expr *) var,
721 elog(ERROR, "unsupported RTE kind %d in build_physical_tlist",
730 * restriction_selectivity
732 * Returns the selectivity of a specified restriction operator clause.
733 * This code executes registered procedures stored in the
734 * operator relation, by calling the function manager.
736 * See clause_selectivity() for the meaning of the additional parameters.
739 restriction_selectivity(PlannerInfo *root,
744 RegProcedure oprrest = get_oprrest(operator);
748 * if the oprrest procedure is missing for whatever reason, use a
752 return (Selectivity) 0.5;
754 result = DatumGetFloat8(OidFunctionCall4(oprrest,
755 PointerGetDatum(root),
756 ObjectIdGetDatum(operator),
757 PointerGetDatum(args),
758 Int32GetDatum(varRelid)));
760 if (result < 0.0 || result > 1.0)
761 elog(ERROR, "invalid restriction selectivity: %f", result);
763 return (Selectivity) result;
769 * Returns the selectivity of a specified join operator clause.
770 * This code executes registered procedures stored in the
771 * operator relation, by calling the function manager.
774 join_selectivity(PlannerInfo *root,
779 RegProcedure oprjoin = get_oprjoin(operator);
783 * if the oprjoin procedure is missing for whatever reason, use a
787 return (Selectivity) 0.5;
789 result = DatumGetFloat8(OidFunctionCall4(oprjoin,
790 PointerGetDatum(root),
791 ObjectIdGetDatum(operator),
792 PointerGetDatum(args),
793 Int16GetDatum(jointype)));
795 if (result < 0.0 || result > 1.0)
796 elog(ERROR, "invalid join selectivity: %f", result);
798 return (Selectivity) result;
802 * find_inheritance_children
804 * Returns a list containing the OIDs of all relations which
805 * inherit *directly* from the relation with OID 'inhparent'.
807 * XXX might be a good idea to create an index on pg_inherits' inhparent
808 * field, so that we can use an indexscan instead of sequential scan here.
809 * However, in typical databases pg_inherits won't have enough entries to
810 * justify an indexscan...
813 find_inheritance_children(Oid inhparent)
818 HeapTuple inheritsTuple;
823 * Can skip the scan if pg_class shows the relation has never had a
826 if (!has_subclass(inhparent))
830 Anum_pg_inherits_inhparent,
831 BTEqualStrategyNumber, F_OIDEQ,
832 ObjectIdGetDatum(inhparent));
833 relation = heap_open(InheritsRelationId, AccessShareLock);
834 scan = heap_beginscan(relation, SnapshotNow, 1, key);
835 while ((inheritsTuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
837 inhrelid = ((Form_pg_inherits) GETSTRUCT(inheritsTuple))->inhrelid;
838 list = lappend_oid(list, inhrelid);
841 heap_close(relation, AccessShareLock);
848 * In the current implementation, has_subclass returns whether a
849 * particular class *might* have a subclass. It will not return the
850 * correct result if a class had a subclass which was later dropped.
851 * This is because relhassubclass in pg_class is not updated when a
852 * subclass is dropped, primarily because of concurrency concerns.
854 * Currently has_subclass is only used as an efficiency hack to skip
855 * unnecessary inheritance searches, so this is OK.
858 has_subclass(Oid relationId)
863 tuple = SearchSysCache(RELOID,
864 ObjectIdGetDatum(relationId),
866 if (!HeapTupleIsValid(tuple))
867 elog(ERROR, "cache lookup failed for relation %u", relationId);
869 result = ((Form_pg_class) GETSTRUCT(tuple))->relhassubclass;
870 ReleaseSysCache(tuple);
877 * Detect whether there is a unique index on the specified attribute
878 * of the specified relation, thus allowing us to conclude that all
879 * the (non-null) values of the attribute are distinct.
882 has_unique_index(RelOptInfo *rel, AttrNumber attno)
886 foreach(ilist, rel->indexlist)
888 IndexOptInfo *index = (IndexOptInfo *) lfirst(ilist);
891 * Note: ignore partial indexes, since they don't allow us to conclude
892 * that all attr values are distinct. We don't take any interest in
893 * expressional indexes either. Also, a multicolumn unique index
894 * doesn't allow us to conclude that just the specified attr is
898 index->ncolumns == 1 &&
899 index->indexkeys[0] == attno &&
900 index->indpred == NIL)