1 /*-------------------------------------------------------------------------
4 * routines for accessing the system catalogs
7 * Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
8 * Portions Copyright (c) 1994, Regents of the University of California
12 * $PostgreSQL: pgsql/src/backend/optimizer/util/plancat.c,v 1.134 2007/04/21 21:01:45 tgl Exp $
14 *-------------------------------------------------------------------------
20 #include "access/genam.h"
21 #include "access/heapam.h"
22 #include "catalog/pg_inherits.h"
23 #include "nodes/makefuncs.h"
24 #include "optimizer/clauses.h"
25 #include "optimizer/plancat.h"
26 #include "optimizer/predtest.h"
27 #include "optimizer/prep.h"
28 #include "parser/parse_expr.h"
29 #include "parser/parse_relation.h"
30 #include "parser/parsetree.h"
31 #include "rewrite/rewriteManip.h"
32 #include "utils/fmgroids.h"
33 #include "utils/lsyscache.h"
34 #include "utils/relcache.h"
35 #include "utils/syscache.h"
36 #include "catalog/catalog.h"
37 #include "miscadmin.h"
41 bool constraint_exclusion = false;
44 static void estimate_rel_size(Relation rel, int32 *attr_widths,
45 BlockNumber *pages, double *tuples);
46 static List *get_relation_constraints(Oid relationObjectId, RelOptInfo *rel);
51 * Retrieves catalog information for a given relation.
53 * Given the Oid of the relation, return the following info into fields
54 * of the RelOptInfo struct:
56 * min_attr lowest valid AttrNumber
57 * max_attr highest valid AttrNumber
58 * indexlist list of IndexOptInfos for relation's indexes
59 * pages number of pages
60 * tuples number of tuples
62 * Also, initialize the attr_needed[] and attr_widths[] arrays. In most
63 * cases these are left as zeroes, but sometimes we need to compute attr
64 * widths here, and we may as well cache the results for costsize.c.
66 * If inhparent is true, all we need to do is set up the attr arrays:
67 * the RelOptInfo actually represents the appendrel formed by an inheritance
68 * tree, and so the parent rel's physical size and index information isn't
72 get_relation_info(PlannerInfo *root, Oid relationObjectId, bool inhparent,
75 Index varno = rel->relid;
78 List *indexinfos = NIL;
81 * We need not lock the relation since it was already locked, either by
82 * the rewriter or when expand_inherited_rtentry() added it to the query's
85 relation = heap_open(relationObjectId, NoLock);
87 rel->min_attr = FirstLowInvalidHeapAttributeNumber + 1;
88 rel->max_attr = RelationGetNumberOfAttributes(relation);
90 Assert(rel->max_attr >= rel->min_attr);
91 rel->attr_needed = (Relids *)
92 palloc0((rel->max_attr - rel->min_attr + 1) * sizeof(Relids));
93 rel->attr_widths = (int32 *)
94 palloc0((rel->max_attr - rel->min_attr + 1) * sizeof(int32));
97 * Estimate relation size --- unless it's an inheritance parent, in which
98 * case the size will be computed later in set_append_rel_pathlist, and we
99 * must leave it zero for now to avoid bollixing the total_table_pages
103 estimate_rel_size(relation, rel->attr_widths - rel->min_attr,
104 &rel->pages, &rel->tuples);
107 * Make list of indexes. Ignore indexes on system catalogs if told to.
108 * Don't bother with indexes for an inheritance parent, either.
111 (IgnoreSystemIndexes && IsSystemClass(relation->rd_rel)))
114 hasindex = relation->rd_rel->relhasindex;
122 indexoidlist = RelationGetIndexList(relation);
125 * For each index, we get the same type of lock that the executor will
126 * need, and do not release it. This saves a couple of trips to the
127 * shared lock manager while not creating any real loss of
128 * concurrency, because no schema changes could be happening on the
129 * index while we hold lock on the parent rel, and neither lock type
130 * blocks any other kind of index operation.
132 if (rel->relid == root->parse->resultRelation)
133 lmode = RowExclusiveLock;
135 lmode = AccessShareLock;
137 foreach(l, indexoidlist)
139 Oid indexoid = lfirst_oid(l);
140 Relation indexRelation;
147 * Extract info from the relation descriptor for the index.
149 indexRelation = index_open(indexoid, lmode);
150 index = indexRelation->rd_index;
153 * Ignore invalid indexes, since they can't safely be used for
154 * queries. Note that this is OK because the data structure we
155 * are constructing is only used by the planner --- the executor
156 * still needs to insert into "invalid" indexes!
158 if (!index->indisvalid)
160 index_close(indexRelation, NoLock);
164 info = makeNode(IndexOptInfo);
166 info->indexoid = index->indexrelid;
168 info->ncolumns = ncolumns = index->indnatts;
171 * Need to make opfamily array large enough to put a terminating
174 info->indexkeys = (int *) palloc(sizeof(int) * ncolumns);
175 info->opfamily = (Oid *) palloc0(sizeof(Oid) * (ncolumns + 1));
176 /* initialize these to zeroes in case index is unordered */
177 info->fwdsortop = (Oid *) palloc0(sizeof(Oid) * ncolumns);
178 info->revsortop = (Oid *) palloc0(sizeof(Oid) * ncolumns);
179 info->nulls_first = (bool *) palloc0(sizeof(bool) * ncolumns);
181 for (i = 0; i < ncolumns; i++)
183 info->opfamily[i] = indexRelation->rd_opfamily[i];
184 info->indexkeys[i] = index->indkey.values[i];
187 info->relam = indexRelation->rd_rel->relam;
188 info->amcostestimate = indexRelation->rd_am->amcostestimate;
189 info->amoptionalkey = indexRelation->rd_am->amoptionalkey;
190 info->amsearchnulls = indexRelation->rd_am->amsearchnulls;
193 * Fetch the ordering operators associated with the index, if any.
194 * We expect that all ordering-capable indexes use btree's
195 * strategy numbers for the ordering operators.
197 if (indexRelation->rd_am->amcanorder)
199 int nstrat = indexRelation->rd_am->amstrategies;
201 for (i = 0; i < ncolumns; i++)
203 int16 opt = indexRelation->rd_indoption[i];
207 if (opt & INDOPTION_DESC)
209 fwdstrat = BTGreaterStrategyNumber;
210 revstrat = BTLessStrategyNumber;
214 fwdstrat = BTLessStrategyNumber;
215 revstrat = BTGreaterStrategyNumber;
218 * Index AM must have a fixed set of strategies for it
219 * to make sense to specify amcanorder, so we
220 * need not allow the case amstrategies == 0.
224 Assert(fwdstrat <= nstrat);
225 info->fwdsortop[i] = indexRelation->rd_operator[i * nstrat + fwdstrat - 1];
229 Assert(revstrat <= nstrat);
230 info->revsortop[i] = indexRelation->rd_operator[i * nstrat + revstrat - 1];
232 info->nulls_first[i] = (opt & INDOPTION_NULLS_FIRST) != 0;
237 * Fetch the index expressions and predicate, if any. We must
238 * modify the copies we obtain from the relcache to have the
239 * correct varno for the parent relation, so that they match up
240 * correctly against qual clauses.
242 info->indexprs = RelationGetIndexExpressions(indexRelation);
243 info->indpred = RelationGetIndexPredicate(indexRelation);
244 if (info->indexprs && varno != 1)
245 ChangeVarNodes((Node *) info->indexprs, 1, varno, 0);
246 if (info->indpred && varno != 1)
247 ChangeVarNodes((Node *) info->indpred, 1, varno, 0);
248 info->predOK = false; /* set later in indxpath.c */
249 info->unique = index->indisunique;
252 * Estimate the index size. If it's not a partial index, we lock
253 * the number-of-tuples estimate to equal the parent table; if it
254 * is partial then we have to use the same methods as we would for
255 * a table, except we can be sure that the index is not larger
258 if (info->indpred == NIL)
260 info->pages = RelationGetNumberOfBlocks(indexRelation);
261 info->tuples = rel->tuples;
265 estimate_rel_size(indexRelation, NULL,
266 &info->pages, &info->tuples);
267 if (info->tuples > rel->tuples)
268 info->tuples = rel->tuples;
271 index_close(indexRelation, NoLock);
273 indexinfos = lcons(info, indexinfos);
276 list_free(indexoidlist);
279 rel->indexlist = indexinfos;
281 heap_close(relation, NoLock);
285 * estimate_rel_size - estimate # pages and # tuples in a table or index
287 * If attr_widths isn't NULL, it points to the zero-index entry of the
288 * relation's attr_width[] cache; we fill this in if we have need to compute
289 * the attribute widths for estimation purposes.
292 estimate_rel_size(Relation rel, int32 *attr_widths,
293 BlockNumber *pages, double *tuples)
295 BlockNumber curpages;
296 BlockNumber relpages;
300 switch (rel->rd_rel->relkind)
302 case RELKIND_RELATION:
304 case RELKIND_TOASTVALUE:
305 /* it has storage, ok to call the smgr */
306 curpages = RelationGetNumberOfBlocks(rel);
309 * HACK: if the relation has never yet been vacuumed, use a
310 * minimum estimate of 10 pages. This emulates a desirable aspect
311 * of pre-8.0 behavior, which is that we wouldn't assume a newly
312 * created relation is really small, which saves us from making
313 * really bad plans during initial data loading. (The plans are
314 * not wrong when they are made, but if they are cached and used
315 * again after the table has grown a lot, they are bad.) It would
316 * be better to force replanning if the table size has changed a
317 * lot since the plan was made ... but we don't currently have any
318 * infrastructure for redoing cached plans at all, so we have to
319 * kluge things here instead.
321 * We approximate "never vacuumed" by "has relpages = 0", which
322 * means this will also fire on genuinely empty relations. Not
323 * great, but fortunately that's a seldom-seen case in the real
324 * world, and it shouldn't degrade the quality of the plan too
325 * much anyway to err in this direction.
327 if (curpages < 10 && rel->rd_rel->relpages == 0)
330 /* report estimated # pages */
332 /* quick exit if rel is clearly empty */
338 /* coerce values in pg_class to more desirable types */
339 relpages = (BlockNumber) rel->rd_rel->relpages;
340 reltuples = (double) rel->rd_rel->reltuples;
343 * If it's an index, discount the metapage. This is a kluge
344 * because it assumes more than it ought to about index contents;
345 * it's reasonably OK for btrees but a bit suspect otherwise.
347 if (rel->rd_rel->relkind == RELKIND_INDEX &&
353 /* estimate number of tuples from previous tuple density */
355 density = reltuples / (double) relpages;
359 * When we have no data because the relation was truncated,
360 * estimate tuple width from attribute datatypes. We assume
361 * here that the pages are completely full, which is OK for
362 * tables (since they've presumably not been VACUUMed yet) but
363 * is probably an overestimate for indexes. Fortunately
364 * get_relation_info() can clamp the overestimate to the
365 * parent table's size.
367 * Note: this code intentionally disregards alignment
368 * considerations, because (a) that would be gilding the lily
369 * considering how crude the estimate is, and (b) it creates
370 * platform dependencies in the default plans which are kind
371 * of a headache for regression testing.
373 int32 tuple_width = 0;
376 for (i = 1; i <= RelationGetNumberOfAttributes(rel); i++)
378 Form_pg_attribute att = rel->rd_att->attrs[i - 1];
381 if (att->attisdropped)
383 /* This should match set_rel_width() in costsize.c */
384 item_width = get_attavgwidth(RelationGetRelid(rel), i);
387 item_width = get_typavgwidth(att->atttypid,
389 Assert(item_width > 0);
391 if (attr_widths != NULL)
392 attr_widths[i] = item_width;
393 tuple_width += item_width;
395 tuple_width += sizeof(HeapTupleHeaderData);
396 tuple_width += sizeof(ItemPointerData);
397 /* note: integer division is intentional here */
398 density = (BLCKSZ - sizeof(PageHeaderData)) / tuple_width;
400 *tuples = rint(density * (double) curpages);
402 case RELKIND_SEQUENCE:
403 /* Sequences always have a known size */
408 /* else it has no disk storage; probably shouldn't get here? */
417 * get_relation_constraints
419 * Retrieve the CHECK constraint expressions of the given relation.
421 * Returns a List (possibly empty) of constraint expressions. Each one
422 * has been canonicalized, and its Vars are changed to have the varno
423 * indicated by rel->relid. This allows the expressions to be easily
424 * compared to expressions taken from WHERE.
426 * Note: at present this is invoked at most once per relation per planner
427 * run, and in many cases it won't be invoked at all, so there seems no
428 * point in caching the data in RelOptInfo.
431 get_relation_constraints(Oid relationObjectId, RelOptInfo *rel)
434 Index varno = rel->relid;
439 * We assume the relation has already been safely locked.
441 relation = heap_open(relationObjectId, NoLock);
443 constr = relation->rd_att->constr;
446 int num_check = constr->num_check;
449 for (i = 0; i < num_check; i++)
453 cexpr = stringToNode(constr->check[i].ccbin);
456 * Run each expression through const-simplification and
457 * canonicalization. This is not just an optimization, but is
458 * necessary, because we will be comparing it to
459 * similarly-processed qual clauses, and may fail to detect valid
460 * matches without this. This must match the processing done to
461 * qual clauses in preprocess_expression()! (We can skip the
462 * stuff involving subqueries, however, since we don't allow any
463 * in check constraints.)
465 cexpr = eval_const_expressions(cexpr);
467 cexpr = (Node *) canonicalize_qual((Expr *) cexpr);
470 * Also mark any coercion format fields as "don't care", so that
471 * we can match to both explicit and implicit coercions.
473 set_coercionform_dontcare(cexpr);
475 /* Fix Vars to have the desired varno */
477 ChangeVarNodes(cexpr, 1, varno, 0);
480 * Finally, convert to implicit-AND format (that is, a List) and
481 * append the resulting item(s) to our output list.
483 result = list_concat(result,
484 make_ands_implicit((Expr *) cexpr));
488 heap_close(relation, NoLock);
495 * relation_excluded_by_constraints
497 * Detect whether the relation need not be scanned because it has either
498 * self-inconsistent restrictions, or restrictions inconsistent with the
499 * relation's CHECK constraints.
501 * Note: this examines only rel->relid and rel->baserestrictinfo; therefore
502 * it can be called before filling in other fields of the RelOptInfo.
505 relation_excluded_by_constraints(RelOptInfo *rel, RangeTblEntry *rte)
507 List *safe_restrictions;
508 List *constraint_pred;
509 List *safe_constraints;
512 /* Skip the test if constraint exclusion is disabled */
513 if (!constraint_exclusion)
517 * Check for self-contradictory restriction clauses. We dare not make
518 * deductions with non-immutable functions, but any immutable clauses that
519 * are self-contradictory allow us to conclude the scan is unnecessary.
521 * Note: strip off RestrictInfo because predicate_refuted_by() isn't
522 * expecting to see any in its predicate argument.
524 safe_restrictions = NIL;
525 foreach(lc, rel->baserestrictinfo)
527 RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc);
529 if (!contain_mutable_functions((Node *) rinfo->clause))
530 safe_restrictions = lappend(safe_restrictions, rinfo->clause);
533 if (predicate_refuted_by(safe_restrictions, safe_restrictions))
536 /* Only plain relations have constraints */
537 if (rte->rtekind != RTE_RELATION || rte->inh)
540 /* OK to fetch the constraint expressions */
541 constraint_pred = get_relation_constraints(rte->relid, rel);
544 * We do not currently enforce that CHECK constraints contain only
545 * immutable functions, so it's necessary to check here. We daren't draw
546 * conclusions from plan-time evaluation of non-immutable functions. Since
547 * they're ANDed, we can just ignore any mutable constraints in the list,
548 * and reason about the rest.
550 safe_constraints = NIL;
551 foreach(lc, constraint_pred)
553 Node *pred = (Node *) lfirst(lc);
555 if (!contain_mutable_functions(pred))
556 safe_constraints = lappend(safe_constraints, pred);
560 * The constraints are effectively ANDed together, so we can just try to
561 * refute the entire collection at once. This may allow us to make proofs
562 * that would fail if we took them individually.
564 * Note: we use rel->baserestrictinfo, not safe_restrictions as might seem
565 * an obvious optimization. Some of the clauses might be OR clauses that
566 * have volatile and nonvolatile subclauses, and it's OK to make
567 * deductions with the nonvolatile parts.
569 if (predicate_refuted_by(safe_constraints, rel->baserestrictinfo))
577 * build_physical_tlist
579 * Build a targetlist consisting of exactly the relation's user attributes,
580 * in order. The executor can special-case such tlists to avoid a projection
581 * step at runtime, so we use such tlists preferentially for scan nodes.
583 * Exception: if there are any dropped columns, we punt and return NIL.
584 * Ideally we would like to handle the dropped-column case too. However this
585 * creates problems for ExecTypeFromTL, which may be asked to build a tupdesc
586 * for a tlist that includes vars of no-longer-existent types. In theory we
587 * could dig out the required info from the pg_attribute entries of the
588 * relation, but that data is not readily available to ExecTypeFromTL.
589 * For now, we don't apply the physical-tlist optimization when there are
592 * We also support building a "physical" tlist for subqueries, functions,
593 * and values lists, since the same optimization can occur in SubqueryScan,
594 * FunctionScan, and ValuesScan nodes.
597 build_physical_tlist(PlannerInfo *root, RelOptInfo *rel)
600 Index varno = rel->relid;
601 RangeTblEntry *rte = planner_rt_fetch(varno, root);
610 switch (rte->rtekind)
613 /* Assume we already have adequate lock */
614 relation = heap_open(rte->relid, NoLock);
616 numattrs = RelationGetNumberOfAttributes(relation);
617 for (attrno = 1; attrno <= numattrs; attrno++)
619 Form_pg_attribute att_tup = relation->rd_att->attrs[attrno - 1];
621 if (att_tup->attisdropped)
623 /* found a dropped col, so punt */
634 tlist = lappend(tlist,
635 makeTargetEntry((Expr *) var,
641 heap_close(relation, NoLock);
645 subquery = rte->subquery;
646 foreach(l, subquery->targetList)
648 TargetEntry *tle = (TargetEntry *) lfirst(l);
651 * A resjunk column of the subquery can be reflected as
652 * resjunk in the physical tlist; we need not punt.
656 exprType((Node *) tle->expr),
657 exprTypmod((Node *) tle->expr),
660 tlist = lappend(tlist,
661 makeTargetEntry((Expr *) var,
669 expandRTE(rte, varno, 0, true /* include dropped */ ,
673 var = (Var *) lfirst(l);
676 * A non-Var in expandRTE's output means a dropped column;
685 tlist = lappend(tlist,
686 makeTargetEntry((Expr *) var,
694 expandRTE(rte, varno, 0, false /* dropped not applicable */ ,
698 var = (Var *) lfirst(l);
700 tlist = lappend(tlist,
701 makeTargetEntry((Expr *) var,
710 elog(ERROR, "unsupported RTE kind %d in build_physical_tlist",
719 * restriction_selectivity
721 * Returns the selectivity of a specified restriction operator clause.
722 * This code executes registered procedures stored in the
723 * operator relation, by calling the function manager.
725 * See clause_selectivity() for the meaning of the additional parameters.
728 restriction_selectivity(PlannerInfo *root,
733 RegProcedure oprrest = get_oprrest(operator);
737 * if the oprrest procedure is missing for whatever reason, use a
741 return (Selectivity) 0.5;
743 result = DatumGetFloat8(OidFunctionCall4(oprrest,
744 PointerGetDatum(root),
745 ObjectIdGetDatum(operator),
746 PointerGetDatum(args),
747 Int32GetDatum(varRelid)));
749 if (result < 0.0 || result > 1.0)
750 elog(ERROR, "invalid restriction selectivity: %f", result);
752 return (Selectivity) result;
758 * Returns the selectivity of a specified join operator clause.
759 * This code executes registered procedures stored in the
760 * operator relation, by calling the function manager.
763 join_selectivity(PlannerInfo *root,
768 RegProcedure oprjoin = get_oprjoin(operator);
772 * if the oprjoin procedure is missing for whatever reason, use a
776 return (Selectivity) 0.5;
778 result = DatumGetFloat8(OidFunctionCall4(oprjoin,
779 PointerGetDatum(root),
780 ObjectIdGetDatum(operator),
781 PointerGetDatum(args),
782 Int16GetDatum(jointype)));
784 if (result < 0.0 || result > 1.0)
785 elog(ERROR, "invalid join selectivity: %f", result);
787 return (Selectivity) result;
791 * find_inheritance_children
793 * Returns a list containing the OIDs of all relations which
794 * inherit *directly* from the relation with OID 'inhparent'.
796 * XXX might be a good idea to create an index on pg_inherits' inhparent
797 * field, so that we can use an indexscan instead of sequential scan here.
798 * However, in typical databases pg_inherits won't have enough entries to
799 * justify an indexscan...
802 find_inheritance_children(Oid inhparent)
807 HeapTuple inheritsTuple;
812 * Can skip the scan if pg_class shows the relation has never had a
815 if (!has_subclass(inhparent))
819 Anum_pg_inherits_inhparent,
820 BTEqualStrategyNumber, F_OIDEQ,
821 ObjectIdGetDatum(inhparent));
822 relation = heap_open(InheritsRelationId, AccessShareLock);
823 scan = heap_beginscan(relation, SnapshotNow, 1, key);
824 while ((inheritsTuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
826 inhrelid = ((Form_pg_inherits) GETSTRUCT(inheritsTuple))->inhrelid;
827 list = lappend_oid(list, inhrelid);
830 heap_close(relation, AccessShareLock);
837 * In the current implementation, has_subclass returns whether a
838 * particular class *might* have a subclass. It will not return the
839 * correct result if a class had a subclass which was later dropped.
840 * This is because relhassubclass in pg_class is not updated when a
841 * subclass is dropped, primarily because of concurrency concerns.
843 * Currently has_subclass is only used as an efficiency hack to skip
844 * unnecessary inheritance searches, so this is OK.
847 has_subclass(Oid relationId)
852 tuple = SearchSysCache(RELOID,
853 ObjectIdGetDatum(relationId),
855 if (!HeapTupleIsValid(tuple))
856 elog(ERROR, "cache lookup failed for relation %u", relationId);
858 result = ((Form_pg_class) GETSTRUCT(tuple))->relhassubclass;
859 ReleaseSysCache(tuple);
866 * Detect whether there is a unique index on the specified attribute
867 * of the specified relation, thus allowing us to conclude that all
868 * the (non-null) values of the attribute are distinct.
871 has_unique_index(RelOptInfo *rel, AttrNumber attno)
875 foreach(ilist, rel->indexlist)
877 IndexOptInfo *index = (IndexOptInfo *) lfirst(ilist);
880 * Note: ignore partial indexes, since they don't allow us to conclude
881 * that all attr values are distinct. We don't take any interest in
882 * expressional indexes either. Also, a multicolumn unique index
883 * doesn't allow us to conclude that just the specified attr is
887 index->ncolumns == 1 &&
888 index->indexkeys[0] == attno &&
889 index->indpred == NIL)