]> granicus.if.org Git - postgresql/blob - src/backend/optimizer/path/clausesel.c
First cut at unifying regular selectivity estimation with indexscan
[postgresql] / src / backend / optimizer / path / clausesel.c
1 /*-------------------------------------------------------------------------
2  *
3  * clausesel.c
4  *        Routines to compute clause selectivities
5  *
6  * Copyright (c) 1994, Regents of the University of California
7  *
8  *
9  * IDENTIFICATION
10  *        $Header: /cvsroot/pgsql/src/backend/optimizer/path/clausesel.c,v 1.28 2000/01/23 02:06:58 tgl Exp $
11  *
12  *-------------------------------------------------------------------------
13  */
14 #include "postgres.h"
15
16 #include "catalog/pg_operator.h"
17 #include "optimizer/clauses.h"
18 #include "optimizer/cost.h"
19 #include "optimizer/internal.h"
20 #include "optimizer/plancat.h"
21 #include "optimizer/restrictinfo.h"
22 #include "parser/parsetree.h"
23 #include "utils/lsyscache.h"
24
25
26 /****************************************************************************
27  *              ROUTINES TO COMPUTE SELECTIVITIES
28  ****************************************************************************/
29
30 /*
31  * restrictlist_selectivity -
32  *        Compute the selectivity of an implicitly-ANDed list of RestrictInfo
33  *        clauses.
34  *
35  * This is the same as clauselist_selectivity except for the representation
36  * of the clause list.
37  */
38 Selectivity
39 restrictlist_selectivity(Query *root,
40                                                  List *restrictinfo_list,
41                                                  int varRelid)
42 {
43         List       *clauselist = get_actual_clauses(restrictinfo_list);
44         Selectivity     result;
45
46         result = clauselist_selectivity(root, clauselist, varRelid);
47         freeList(clauselist);
48         return result;
49 }
50
51 /*
52  * clauselist_selectivity -
53  *        Compute the selectivity of an implicitly-ANDed list of boolean
54  *        expression clauses.  The list can be empty, in which case 1.0
55  *        must be returned.
56  *
57  * See clause_selectivity() for the meaning of the varRelid parameter.
58  */
59 Selectivity
60 clauselist_selectivity(Query *root,
61                                            List *clauses,
62                                            int varRelid)
63 {
64         Selectivity             s1 = 1.0;
65         List               *clause;
66
67         /* Use the product of the selectivities of the subclauses.
68          * XXX this is too optimistic, since the subclauses
69          * are very likely not independent...
70          */
71         foreach(clause, clauses)
72         {
73                 Selectivity     s2 = clause_selectivity(root,
74                                                                                         (Node *) lfirst(clause),
75                                                                                         varRelid);
76                 s1 = s1 * s2;
77         }
78         return s1;
79 }
80
81 /*
82  * clause_selectivity -
83  *        Compute the selectivity of a general boolean expression clause.
84  *
85  * varRelid is either 0 or a rangetable index.
86  *
87  * When varRelid is not 0, only variables belonging to that relation are
88  * considered in computing selectivity; other vars are treated as constants
89  * of unknown values.  This is appropriate for estimating the selectivity of
90  * a join clause that is being used as a restriction clause in a scan of a
91  * nestloop join's inner relation --- varRelid should then be the ID of the
92  * inner relation.
93  *
94  * When varRelid is 0, all variables are treated as variables.  This
95  * is appropriate for ordinary join clauses and restriction clauses.
96  */
97 Selectivity
98 clause_selectivity(Query *root,
99                                    Node *clause,
100                                    int varRelid)
101 {
102         Selectivity             s1 = 1.0;       /* default for any unhandled clause type */
103
104         if (clause == NULL)
105                 return s1;
106         if (IsA(clause, Var))
107         {
108                 /*
109                  * we have a bool Var.  This is exactly equivalent to the clause:
110                  * reln.attribute = 't' so we compute the selectivity as if that
111                  * is what we have. The magic #define constants are a hack.  I
112                  * didn't want to have to do system cache look ups to find out all
113                  * of that info.
114                  */
115                 Index   varno = ((Var *) clause)->varno;
116
117                 if (varRelid == 0 || varRelid == varno)
118                         s1 = restriction_selectivity(F_EQSEL,
119                                                                                  BooleanEqualOperator,
120                                                                                  getrelid(varno, root->rtable),
121                                                                                  ((Var *) clause)->varattno,
122                                                                                  Int8GetDatum(true),
123                                                                                  SEL_CONSTANT | SEL_RIGHT);
124                 /* an outer-relation bool var is taken as always true... */
125         }
126         else if (IsA(clause, Param))
127         {
128                 /* XXX any way to do better? */
129                 s1 = 1.0;
130         }
131         else if (IsA(clause, Const))
132         {
133                 /* bool constant is pretty easy... */
134                 s1 = ((bool) ((Const *) clause)->constvalue) ? 1.0 : 0.0;
135         }
136         else if (not_clause(clause))
137         {
138                 /* inverse of the selectivity of the underlying clause */
139                 s1 = 1.0 - clause_selectivity(root,
140                                                                           (Node*) get_notclausearg((Expr*) clause),
141                                                                           varRelid);
142         }
143         else if (and_clause(clause))
144         {
145                 /* share code with clauselist_selectivity() */
146                 s1 = clauselist_selectivity(root,
147                                                                         ((Expr *) clause)->args,
148                                                                         varRelid);
149         }
150         else if (or_clause(clause))
151         {
152                 /*
153                  * Selectivities for an 'or' clause are computed as s1+s2 - s1*s2
154                  * to account for the probable overlap of selected tuple sets.
155                  * XXX is this too conservative?
156                  */
157                 List   *arg;
158                 s1 = 0.0;
159                 foreach(arg, ((Expr *) clause)->args)
160                 {
161                         Selectivity     s2 = clause_selectivity(root,
162                                                                                                 (Node *) lfirst(arg),
163                                                                                                 varRelid);
164                         s1 = s1 + s2 - s1 * s2;
165                 }
166         }
167         else if (is_opclause(clause))
168         {
169                 Oid                     opno = ((Oper *) ((Expr *) clause)->oper)->opno;
170                 bool            is_join_clause;
171
172                 if (varRelid != 0)
173                 {
174                         /*
175                          * If we are considering a nestloop join then all clauses
176                          * are restriction clauses, since we are only interested in
177                          * the one relation.
178                          */
179                         is_join_clause = false;
180                 }
181                 else
182                 {
183                         /*
184                          * Otherwise, it's a join if there's more than one relation used.
185                          */
186                         is_join_clause = (NumRelids(clause) > 1);
187                 }
188
189                 if (is_join_clause)
190                 {
191                         /* Estimate selectivity for a join clause. */
192                         RegProcedure oprjoin = get_oprjoin(opno);
193
194                         /*
195                          * if the oprjoin procedure is missing for whatever reason, use a
196                          * selectivity of 0.5
197                          */
198                         if (!oprjoin)
199                                 s1 = (Selectivity) 0.5;
200                         else
201                         {
202                                 int                     relid1,
203                                                         relid2;
204                                 AttrNumber      attno1,
205                                                         attno2;
206                                 Oid                     reloid1,
207                                                         reloid2;
208
209                                 get_rels_atts(clause, &relid1, &attno1, &relid2, &attno2);
210                                 reloid1 = relid1 ? getrelid(relid1, root->rtable) : InvalidOid;
211                                 reloid2 = relid2 ? getrelid(relid2, root->rtable) : InvalidOid;
212                                 s1 = join_selectivity(oprjoin, opno,
213                                                                           reloid1, attno1,
214                                                                           reloid2, attno2);
215                         }
216                 }
217                 else
218                 {
219                         /* Estimate selectivity for a restriction clause. */
220                         RegProcedure oprrest = get_oprrest(opno);
221
222                         /*
223                          * if the oprrest procedure is missing for whatever reason, use a
224                          * selectivity of 0.5
225                          */
226                         if (!oprrest)
227                                 s1 = (Selectivity) 0.5;
228                         else
229                         {
230                                 int                     relidx;
231                                 AttrNumber      attno;
232                                 Datum           constval;
233                                 int                     flag;
234                                 Oid                     reloid;
235
236                                 get_relattval(clause, varRelid,
237                                                           &relidx, &attno, &constval, &flag);
238                                 reloid = relidx ? getrelid(relidx, root->rtable) : InvalidOid;
239                                 s1 = restriction_selectivity(oprrest, opno,
240                                                                                          reloid, attno,
241                                                                                          constval, flag);
242                         }
243                 }
244         }
245         else if (is_funcclause(clause))
246         {
247                 /*
248                  * This is not an operator, so we guess at the selectivity. THIS
249                  * IS A HACK TO GET V4 OUT THE DOOR.  FUNCS SHOULD BE ABLE TO HAVE
250                  * SELECTIVITIES THEMSELVES.       -- JMH 7/9/92
251                  */
252                 s1 = (Selectivity) 0.3333333;
253         }
254         else if (is_subplan(clause))
255         {
256                 /*
257                  * Just for the moment! FIX ME! - vadim 02/04/98
258                  */
259                 s1 = 1.0;
260         }
261
262         return s1;
263 }