]> granicus.if.org Git - postgresql/blob - src/backend/utils/adt/selfuncs.c
Spelling fixes in code comments
[postgresql] / src / backend / utils / adt / selfuncs.c
1 /*-------------------------------------------------------------------------
2  *
3  * selfuncs.c
4  *        Selectivity functions and index cost estimation functions for
5  *        standard operators and index access methods.
6  *
7  *        Selectivity routines are registered in the pg_operator catalog
8  *        in the "oprrest" and "oprjoin" attributes.
9  *
10  *        Index cost functions are located via the index AM's API struct,
11  *        which is obtained from the handler function registered in pg_am.
12  *
13  * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
14  * Portions Copyright (c) 1994, Regents of the University of California
15  *
16  *
17  * IDENTIFICATION
18  *        src/backend/utils/adt/selfuncs.c
19  *
20  *-------------------------------------------------------------------------
21  */
22
23 /*----------
24  * Operator selectivity estimation functions are called to estimate the
25  * selectivity of WHERE clauses whose top-level operator is their operator.
26  * We divide the problem into two cases:
27  *              Restriction clause estimation: the clause involves vars of just
28  *                      one relation.
29  *              Join clause estimation: the clause involves vars of multiple rels.
30  * Join selectivity estimation is far more difficult and usually less accurate
31  * than restriction estimation.
32  *
33  * When dealing with the inner scan of a nestloop join, we consider the
34  * join's joinclauses as restriction clauses for the inner relation, and
35  * treat vars of the outer relation as parameters (a/k/a constants of unknown
36  * values).  So, restriction estimators need to be able to accept an argument
37  * telling which relation is to be treated as the variable.
38  *
39  * The call convention for a restriction estimator (oprrest function) is
40  *
41  *              Selectivity oprrest (PlannerInfo *root,
42  *                                                       Oid operator,
43  *                                                       List *args,
44  *                                                       int varRelid);
45  *
46  * root: general information about the query (rtable and RelOptInfo lists
47  * are particularly important for the estimator).
48  * operator: OID of the specific operator in question.
49  * args: argument list from the operator clause.
50  * varRelid: if not zero, the relid (rtable index) of the relation to
51  * be treated as the variable relation.  May be zero if the args list
52  * is known to contain vars of only one relation.
53  *
54  * This is represented at the SQL level (in pg_proc) as
55  *
56  *              float8 oprrest (internal, oid, internal, int4);
57  *
58  * The result is a selectivity, that is, a fraction (0 to 1) of the rows
59  * of the relation that are expected to produce a TRUE result for the
60  * given operator.
61  *
62  * The call convention for a join estimator (oprjoin function) is similar
63  * except that varRelid is not needed, and instead join information is
64  * supplied:
65  *
66  *              Selectivity oprjoin (PlannerInfo *root,
67  *                                                       Oid operator,
68  *                                                       List *args,
69  *                                                       JoinType jointype,
70  *                                                       SpecialJoinInfo *sjinfo);
71  *
72  *              float8 oprjoin (internal, oid, internal, int2, internal);
73  *
74  * (Before Postgres 8.4, join estimators had only the first four of these
75  * parameters.  That signature is still allowed, but deprecated.)  The
76  * relationship between jointype and sjinfo is explained in the comments for
77  * clause_selectivity() --- the short version is that jointype is usually
78  * best ignored in favor of examining sjinfo.
79  *
80  * Join selectivity for regular inner and outer joins is defined as the
81  * fraction (0 to 1) of the cross product of the relations that is expected
82  * to produce a TRUE result for the given operator.  For both semi and anti
83  * joins, however, the selectivity is defined as the fraction of the left-hand
84  * side relation's rows that are expected to have a match (ie, at least one
85  * row with a TRUE result) in the right-hand side.
86  *
87  * For both oprrest and oprjoin functions, the operator's input collation OID
88  * (if any) is passed using the standard fmgr mechanism, so that the estimator
89  * function can fetch it with PG_GET_COLLATION().  Note, however, that all
90  * statistics in pg_statistic are currently built using the database's default
91  * collation.  Thus, in most cases where we are looking at statistics, we
92  * should ignore the actual operator collation and use DEFAULT_COLLATION_OID.
93  * We expect that the error induced by doing this is usually not large enough
94  * to justify complicating matters.
95  *----------
96  */
97
98 #include "postgres.h"
99
100 #include <ctype.h>
101 #include <float.h>
102 #include <math.h>
103
104 #include "access/gin.h"
105 #include "access/htup_details.h"
106 #include "access/sysattr.h"
107 #include "catalog/index.h"
108 #include "catalog/pg_am.h"
109 #include "catalog/pg_collation.h"
110 #include "catalog/pg_operator.h"
111 #include "catalog/pg_opfamily.h"
112 #include "catalog/pg_statistic.h"
113 #include "catalog/pg_type.h"
114 #include "executor/executor.h"
115 #include "mb/pg_wchar.h"
116 #include "nodes/makefuncs.h"
117 #include "nodes/nodeFuncs.h"
118 #include "optimizer/clauses.h"
119 #include "optimizer/cost.h"
120 #include "optimizer/pathnode.h"
121 #include "optimizer/paths.h"
122 #include "optimizer/plancat.h"
123 #include "optimizer/predtest.h"
124 #include "optimizer/restrictinfo.h"
125 #include "optimizer/var.h"
126 #include "parser/parse_clause.h"
127 #include "parser/parse_coerce.h"
128 #include "parser/parsetree.h"
129 #include "utils/builtins.h"
130 #include "utils/bytea.h"
131 #include "utils/date.h"
132 #include "utils/datum.h"
133 #include "utils/fmgroids.h"
134 #include "utils/index_selfuncs.h"
135 #include "utils/lsyscache.h"
136 #include "utils/nabstime.h"
137 #include "utils/pg_locale.h"
138 #include "utils/rel.h"
139 #include "utils/selfuncs.h"
140 #include "utils/spccache.h"
141 #include "utils/syscache.h"
142 #include "utils/timestamp.h"
143 #include "utils/tqual.h"
144 #include "utils/typcache.h"
145 #include "utils/varlena.h"
146
147
148 /* Hooks for plugins to get control when we ask for stats */
149 get_relation_stats_hook_type get_relation_stats_hook = NULL;
150 get_index_stats_hook_type get_index_stats_hook = NULL;
151
152 static double var_eq_const(VariableStatData *vardata, Oid operator,
153                          Datum constval, bool constisnull,
154                          bool varonleft);
155 static double var_eq_non_const(VariableStatData *vardata, Oid operator,
156                                  Node *other,
157                                  bool varonleft);
158 static double ineq_histogram_selectivity(PlannerInfo *root,
159                                                    VariableStatData *vardata,
160                                                    FmgrInfo *opproc, bool isgt,
161                                                    Datum constval, Oid consttype);
162 static double eqjoinsel_inner(Oid operator,
163                                 VariableStatData *vardata1, VariableStatData *vardata2);
164 static double eqjoinsel_semi(Oid operator,
165                            VariableStatData *vardata1, VariableStatData *vardata2,
166                            RelOptInfo *inner_rel);
167 static bool convert_to_scalar(Datum value, Oid valuetypid, double *scaledvalue,
168                                   Datum lobound, Datum hibound, Oid boundstypid,
169                                   double *scaledlobound, double *scaledhibound);
170 static double convert_numeric_to_scalar(Datum value, Oid typid);
171 static void convert_string_to_scalar(char *value,
172                                                  double *scaledvalue,
173                                                  char *lobound,
174                                                  double *scaledlobound,
175                                                  char *hibound,
176                                                  double *scaledhibound);
177 static void convert_bytea_to_scalar(Datum value,
178                                                 double *scaledvalue,
179                                                 Datum lobound,
180                                                 double *scaledlobound,
181                                                 Datum hibound,
182                                                 double *scaledhibound);
183 static double convert_one_string_to_scalar(char *value,
184                                                          int rangelo, int rangehi);
185 static double convert_one_bytea_to_scalar(unsigned char *value, int valuelen,
186                                                         int rangelo, int rangehi);
187 static char *convert_string_datum(Datum value, Oid typid);
188 static double convert_timevalue_to_scalar(Datum value, Oid typid);
189 static void examine_simple_variable(PlannerInfo *root, Var *var,
190                                                 VariableStatData *vardata);
191 static bool get_variable_range(PlannerInfo *root, VariableStatData *vardata,
192                                    Oid sortop, Datum *min, Datum *max);
193 static bool get_actual_variable_range(PlannerInfo *root,
194                                                   VariableStatData *vardata,
195                                                   Oid sortop,
196                                                   Datum *min, Datum *max);
197 static RelOptInfo *find_join_input_rel(PlannerInfo *root, Relids relids);
198 static Selectivity prefix_selectivity(PlannerInfo *root,
199                                    VariableStatData *vardata,
200                                    Oid vartype, Oid opfamily, Const *prefixcon);
201 static Selectivity like_selectivity(const char *patt, int pattlen,
202                                  bool case_insensitive);
203 static Selectivity regex_selectivity(const char *patt, int pattlen,
204                                   bool case_insensitive,
205                                   int fixed_prefix_len);
206 static Datum string_to_datum(const char *str, Oid datatype);
207 static Const *string_to_const(const char *str, Oid datatype);
208 static Const *string_to_bytea_const(const char *str, size_t str_len);
209 static List *add_predicate_to_quals(IndexOptInfo *index, List *indexQuals);
210
211
212 /*
213  *              eqsel                   - Selectivity of "=" for any data types.
214  *
215  * Note: this routine is also used to estimate selectivity for some
216  * operators that are not "=" but have comparable selectivity behavior,
217  * such as "~=" (geometric approximate-match).  Even for "=", we must
218  * keep in mind that the left and right datatypes may differ.
219  */
220 Datum
221 eqsel(PG_FUNCTION_ARGS)
222 {
223         PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
224         Oid                     operator = PG_GETARG_OID(1);
225         List       *args = (List *) PG_GETARG_POINTER(2);
226         int                     varRelid = PG_GETARG_INT32(3);
227         VariableStatData vardata;
228         Node       *other;
229         bool            varonleft;
230         double          selec;
231
232         /*
233          * If expression is not variable = something or something = variable, then
234          * punt and return a default estimate.
235          */
236         if (!get_restriction_variable(root, args, varRelid,
237                                                                   &vardata, &other, &varonleft))
238                 PG_RETURN_FLOAT8(DEFAULT_EQ_SEL);
239
240         /*
241          * We can do a lot better if the something is a constant.  (Note: the
242          * Const might result from estimation rather than being a simple constant
243          * in the query.)
244          */
245         if (IsA(other, Const))
246                 selec = var_eq_const(&vardata, operator,
247                                                          ((Const *) other)->constvalue,
248                                                          ((Const *) other)->constisnull,
249                                                          varonleft);
250         else
251                 selec = var_eq_non_const(&vardata, operator, other,
252                                                                  varonleft);
253
254         ReleaseVariableStats(vardata);
255
256         PG_RETURN_FLOAT8((float8) selec);
257 }
258
259 /*
260  * var_eq_const --- eqsel for var = const case
261  *
262  * This is split out so that some other estimation functions can use it.
263  */
264 static double
265 var_eq_const(VariableStatData *vardata, Oid operator,
266                          Datum constval, bool constisnull,
267                          bool varonleft)
268 {
269         double          selec;
270         bool            isdefault;
271
272         /*
273          * If the constant is NULL, assume operator is strict and return zero, ie,
274          * operator will never return TRUE.
275          */
276         if (constisnull)
277                 return 0.0;
278
279         /*
280          * If we matched the var to a unique index or DISTINCT clause, assume
281          * there is exactly one match regardless of anything else.  (This is
282          * slightly bogus, since the index or clause's equality operator might be
283          * different from ours, but it's much more likely to be right than
284          * ignoring the information.)
285          */
286         if (vardata->isunique && vardata->rel && vardata->rel->tuples >= 1.0)
287                 return 1.0 / vardata->rel->tuples;
288
289         if (HeapTupleIsValid(vardata->statsTuple))
290         {
291                 Form_pg_statistic stats;
292                 Datum      *values;
293                 int                     nvalues;
294                 float4     *numbers;
295                 int                     nnumbers;
296                 bool            match = false;
297                 int                     i;
298
299                 stats = (Form_pg_statistic) GETSTRUCT(vardata->statsTuple);
300
301                 /*
302                  * Is the constant "=" to any of the column's most common values?
303                  * (Although the given operator may not really be "=", we will assume
304                  * that seeing whether it returns TRUE is an appropriate test.  If you
305                  * don't like this, maybe you shouldn't be using eqsel for your
306                  * operator...)
307                  */
308                 if (get_attstatsslot(vardata->statsTuple,
309                                                          vardata->atttype, vardata->atttypmod,
310                                                          STATISTIC_KIND_MCV, InvalidOid,
311                                                          NULL,
312                                                          &values, &nvalues,
313                                                          &numbers, &nnumbers))
314                 {
315                         FmgrInfo        eqproc;
316
317                         fmgr_info(get_opcode(operator), &eqproc);
318
319                         for (i = 0; i < nvalues; i++)
320                         {
321                                 /* be careful to apply operator right way 'round */
322                                 if (varonleft)
323                                         match = DatumGetBool(FunctionCall2Coll(&eqproc,
324                                                                                                            DEFAULT_COLLATION_OID,
325                                                                                                                    values[i],
326                                                                                                                    constval));
327                                 else
328                                         match = DatumGetBool(FunctionCall2Coll(&eqproc,
329                                                                                                            DEFAULT_COLLATION_OID,
330                                                                                                                    constval,
331                                                                                                                    values[i]));
332                                 if (match)
333                                         break;
334                         }
335                 }
336                 else
337                 {
338                         /* no most-common-value info available */
339                         values = NULL;
340                         numbers = NULL;
341                         i = nvalues = nnumbers = 0;
342                 }
343
344                 if (match)
345                 {
346                         /*
347                          * Constant is "=" to this common value.  We know selectivity
348                          * exactly (or as exactly as ANALYZE could calculate it, anyway).
349                          */
350                         selec = numbers[i];
351                 }
352                 else
353                 {
354                         /*
355                          * Comparison is against a constant that is neither NULL nor any
356                          * of the common values.  Its selectivity cannot be more than
357                          * this:
358                          */
359                         double          sumcommon = 0.0;
360                         double          otherdistinct;
361
362                         for (i = 0; i < nnumbers; i++)
363                                 sumcommon += numbers[i];
364                         selec = 1.0 - sumcommon - stats->stanullfrac;
365                         CLAMP_PROBABILITY(selec);
366
367                         /*
368                          * and in fact it's probably a good deal less. We approximate that
369                          * all the not-common values share this remaining fraction
370                          * equally, so we divide by the number of other distinct values.
371                          */
372                         otherdistinct = get_variable_numdistinct(vardata, &isdefault) - nnumbers;
373                         if (otherdistinct > 1)
374                                 selec /= otherdistinct;
375
376                         /*
377                          * Another cross-check: selectivity shouldn't be estimated as more
378                          * than the least common "most common value".
379                          */
380                         if (nnumbers > 0 && selec > numbers[nnumbers - 1])
381                                 selec = numbers[nnumbers - 1];
382                 }
383
384                 free_attstatsslot(vardata->atttype, values, nvalues,
385                                                   numbers, nnumbers);
386         }
387         else
388         {
389                 /*
390                  * No ANALYZE stats available, so make a guess using estimated number
391                  * of distinct values and assuming they are equally common. (The guess
392                  * is unlikely to be very good, but we do know a few special cases.)
393                  */
394                 selec = 1.0 / get_variable_numdistinct(vardata, &isdefault);
395         }
396
397         /* result should be in range, but make sure... */
398         CLAMP_PROBABILITY(selec);
399
400         return selec;
401 }
402
403 /*
404  * var_eq_non_const --- eqsel for var = something-other-than-const case
405  */
406 static double
407 var_eq_non_const(VariableStatData *vardata, Oid operator,
408                                  Node *other,
409                                  bool varonleft)
410 {
411         double          selec;
412         bool            isdefault;
413
414         /*
415          * If we matched the var to a unique index or DISTINCT clause, assume
416          * there is exactly one match regardless of anything else.  (This is
417          * slightly bogus, since the index or clause's equality operator might be
418          * different from ours, but it's much more likely to be right than
419          * ignoring the information.)
420          */
421         if (vardata->isunique && vardata->rel && vardata->rel->tuples >= 1.0)
422                 return 1.0 / vardata->rel->tuples;
423
424         if (HeapTupleIsValid(vardata->statsTuple))
425         {
426                 Form_pg_statistic stats;
427                 double          ndistinct;
428                 float4     *numbers;
429                 int                     nnumbers;
430
431                 stats = (Form_pg_statistic) GETSTRUCT(vardata->statsTuple);
432
433                 /*
434                  * Search is for a value that we do not know a priori, but we will
435                  * assume it is not NULL.  Estimate the selectivity as non-null
436                  * fraction divided by number of distinct values, so that we get a
437                  * result averaged over all possible values whether common or
438                  * uncommon.  (Essentially, we are assuming that the not-yet-known
439                  * comparison value is equally likely to be any of the possible
440                  * values, regardless of their frequency in the table.  Is that a good
441                  * idea?)
442                  */
443                 selec = 1.0 - stats->stanullfrac;
444                 ndistinct = get_variable_numdistinct(vardata, &isdefault);
445                 if (ndistinct > 1)
446                         selec /= ndistinct;
447
448                 /*
449                  * Cross-check: selectivity should never be estimated as more than the
450                  * most common value's.
451                  */
452                 if (get_attstatsslot(vardata->statsTuple,
453                                                          vardata->atttype, vardata->atttypmod,
454                                                          STATISTIC_KIND_MCV, InvalidOid,
455                                                          NULL,
456                                                          NULL, NULL,
457                                                          &numbers, &nnumbers))
458                 {
459                         if (nnumbers > 0 && selec > numbers[0])
460                                 selec = numbers[0];
461                         free_attstatsslot(vardata->atttype, NULL, 0, numbers, nnumbers);
462                 }
463         }
464         else
465         {
466                 /*
467                  * No ANALYZE stats available, so make a guess using estimated number
468                  * of distinct values and assuming they are equally common. (The guess
469                  * is unlikely to be very good, but we do know a few special cases.)
470                  */
471                 selec = 1.0 / get_variable_numdistinct(vardata, &isdefault);
472         }
473
474         /* result should be in range, but make sure... */
475         CLAMP_PROBABILITY(selec);
476
477         return selec;
478 }
479
480 /*
481  *              neqsel                  - Selectivity of "!=" for any data types.
482  *
483  * This routine is also used for some operators that are not "!="
484  * but have comparable selectivity behavior.  See above comments
485  * for eqsel().
486  */
487 Datum
488 neqsel(PG_FUNCTION_ARGS)
489 {
490         PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
491         Oid                     operator = PG_GETARG_OID(1);
492         List       *args = (List *) PG_GETARG_POINTER(2);
493         int                     varRelid = PG_GETARG_INT32(3);
494         Oid                     eqop;
495         float8          result;
496
497         /*
498          * We want 1 - eqsel() where the equality operator is the one associated
499          * with this != operator, that is, its negator.
500          */
501         eqop = get_negator(operator);
502         if (eqop)
503         {
504                 result = DatumGetFloat8(DirectFunctionCall4(eqsel,
505                                                                                                         PointerGetDatum(root),
506                                                                                                         ObjectIdGetDatum(eqop),
507                                                                                                         PointerGetDatum(args),
508                                                                                                         Int32GetDatum(varRelid)));
509         }
510         else
511         {
512                 /* Use default selectivity (should we raise an error instead?) */
513                 result = DEFAULT_EQ_SEL;
514         }
515         result = 1.0 - result;
516         PG_RETURN_FLOAT8(result);
517 }
518
519 /*
520  *      scalarineqsel           - Selectivity of "<", "<=", ">", ">=" for scalars.
521  *
522  * This is the guts of both scalarltsel and scalargtsel.  The caller has
523  * commuted the clause, if necessary, so that we can treat the variable as
524  * being on the left.  The caller must also make sure that the other side
525  * of the clause is a non-null Const, and dissect same into a value and
526  * datatype.
527  *
528  * This routine works for any datatype (or pair of datatypes) known to
529  * convert_to_scalar().  If it is applied to some other datatype,
530  * it will return a default estimate.
531  */
532 static double
533 scalarineqsel(PlannerInfo *root, Oid operator, bool isgt,
534                           VariableStatData *vardata, Datum constval, Oid consttype)
535 {
536         Form_pg_statistic stats;
537         FmgrInfo        opproc;
538         double          mcv_selec,
539                                 hist_selec,
540                                 sumcommon;
541         double          selec;
542
543         if (!HeapTupleIsValid(vardata->statsTuple))
544         {
545                 /* no stats available, so default result */
546                 return DEFAULT_INEQ_SEL;
547         }
548         stats = (Form_pg_statistic) GETSTRUCT(vardata->statsTuple);
549
550         fmgr_info(get_opcode(operator), &opproc);
551
552         /*
553          * If we have most-common-values info, add up the fractions of the MCV
554          * entries that satisfy MCV OP CONST.  These fractions contribute directly
555          * to the result selectivity.  Also add up the total fraction represented
556          * by MCV entries.
557          */
558         mcv_selec = mcv_selectivity(vardata, &opproc, constval, true,
559                                                                 &sumcommon);
560
561         /*
562          * If there is a histogram, determine which bin the constant falls in, and
563          * compute the resulting contribution to selectivity.
564          */
565         hist_selec = ineq_histogram_selectivity(root, vardata, &opproc, isgt,
566                                                                                         constval, consttype);
567
568         /*
569          * Now merge the results from the MCV and histogram calculations,
570          * realizing that the histogram covers only the non-null values that are
571          * not listed in MCV.
572          */
573         selec = 1.0 - stats->stanullfrac - sumcommon;
574
575         if (hist_selec >= 0.0)
576                 selec *= hist_selec;
577         else
578         {
579                 /*
580                  * If no histogram but there are values not accounted for by MCV,
581                  * arbitrarily assume half of them will match.
582                  */
583                 selec *= 0.5;
584         }
585
586         selec += mcv_selec;
587
588         /* result should be in range, but make sure... */
589         CLAMP_PROBABILITY(selec);
590
591         return selec;
592 }
593
594 /*
595  *      mcv_selectivity                 - Examine the MCV list for selectivity estimates
596  *
597  * Determine the fraction of the variable's MCV population that satisfies
598  * the predicate (VAR OP CONST), or (CONST OP VAR) if !varonleft.  Also
599  * compute the fraction of the total column population represented by the MCV
600  * list.  This code will work for any boolean-returning predicate operator.
601  *
602  * The function result is the MCV selectivity, and the fraction of the
603  * total population is returned into *sumcommonp.  Zeroes are returned
604  * if there is no MCV list.
605  */
606 double
607 mcv_selectivity(VariableStatData *vardata, FmgrInfo *opproc,
608                                 Datum constval, bool varonleft,
609                                 double *sumcommonp)
610 {
611         double          mcv_selec,
612                                 sumcommon;
613         Datum      *values;
614         int                     nvalues;
615         float4     *numbers;
616         int                     nnumbers;
617         int                     i;
618
619         mcv_selec = 0.0;
620         sumcommon = 0.0;
621
622         if (HeapTupleIsValid(vardata->statsTuple) &&
623                 get_attstatsslot(vardata->statsTuple,
624                                                  vardata->atttype, vardata->atttypmod,
625                                                  STATISTIC_KIND_MCV, InvalidOid,
626                                                  NULL,
627                                                  &values, &nvalues,
628                                                  &numbers, &nnumbers))
629         {
630                 for (i = 0; i < nvalues; i++)
631                 {
632                         if (varonleft ?
633                                 DatumGetBool(FunctionCall2Coll(opproc,
634                                                                                            DEFAULT_COLLATION_OID,
635                                                                                            values[i],
636                                                                                            constval)) :
637                                 DatumGetBool(FunctionCall2Coll(opproc,
638                                                                                            DEFAULT_COLLATION_OID,
639                                                                                            constval,
640                                                                                            values[i])))
641                                 mcv_selec += numbers[i];
642                         sumcommon += numbers[i];
643                 }
644                 free_attstatsslot(vardata->atttype, values, nvalues,
645                                                   numbers, nnumbers);
646         }
647
648         *sumcommonp = sumcommon;
649         return mcv_selec;
650 }
651
652 /*
653  *      histogram_selectivity   - Examine the histogram for selectivity estimates
654  *
655  * Determine the fraction of the variable's histogram entries that satisfy
656  * the predicate (VAR OP CONST), or (CONST OP VAR) if !varonleft.
657  *
658  * This code will work for any boolean-returning predicate operator, whether
659  * or not it has anything to do with the histogram sort operator.  We are
660  * essentially using the histogram just as a representative sample.  However,
661  * small histograms are unlikely to be all that representative, so the caller
662  * should be prepared to fall back on some other estimation approach when the
663  * histogram is missing or very small.  It may also be prudent to combine this
664  * approach with another one when the histogram is small.
665  *
666  * If the actual histogram size is not at least min_hist_size, we won't bother
667  * to do the calculation at all.  Also, if the n_skip parameter is > 0, we
668  * ignore the first and last n_skip histogram elements, on the grounds that
669  * they are outliers and hence not very representative.  Typical values for
670  * these parameters are 10 and 1.
671  *
672  * The function result is the selectivity, or -1 if there is no histogram
673  * or it's smaller than min_hist_size.
674  *
675  * The output parameter *hist_size receives the actual histogram size,
676  * or zero if no histogram.  Callers may use this number to decide how
677  * much faith to put in the function result.
678  *
679  * Note that the result disregards both the most-common-values (if any) and
680  * null entries.  The caller is expected to combine this result with
681  * statistics for those portions of the column population.  It may also be
682  * prudent to clamp the result range, ie, disbelieve exact 0 or 1 outputs.
683  */
684 double
685 histogram_selectivity(VariableStatData *vardata, FmgrInfo *opproc,
686                                           Datum constval, bool varonleft,
687                                           int min_hist_size, int n_skip,
688                                           int *hist_size)
689 {
690         double          result;
691         Datum      *values;
692         int                     nvalues;
693
694         /* check sanity of parameters */
695         Assert(n_skip >= 0);
696         Assert(min_hist_size > 2 * n_skip);
697
698         if (HeapTupleIsValid(vardata->statsTuple) &&
699                 get_attstatsslot(vardata->statsTuple,
700                                                  vardata->atttype, vardata->atttypmod,
701                                                  STATISTIC_KIND_HISTOGRAM, InvalidOid,
702                                                  NULL,
703                                                  &values, &nvalues,
704                                                  NULL, NULL))
705         {
706                 *hist_size = nvalues;
707                 if (nvalues >= min_hist_size)
708                 {
709                         int                     nmatch = 0;
710                         int                     i;
711
712                         for (i = n_skip; i < nvalues - n_skip; i++)
713                         {
714                                 if (varonleft ?
715                                         DatumGetBool(FunctionCall2Coll(opproc,
716                                                                                                    DEFAULT_COLLATION_OID,
717                                                                                                    values[i],
718                                                                                                    constval)) :
719                                         DatumGetBool(FunctionCall2Coll(opproc,
720                                                                                                    DEFAULT_COLLATION_OID,
721                                                                                                    constval,
722                                                                                                    values[i])))
723                                         nmatch++;
724                         }
725                         result = ((double) nmatch) / ((double) (nvalues - 2 * n_skip));
726                 }
727                 else
728                         result = -1;
729                 free_attstatsslot(vardata->atttype, values, nvalues, NULL, 0);
730         }
731         else
732         {
733                 *hist_size = 0;
734                 result = -1;
735         }
736
737         return result;
738 }
739
740 /*
741  *      ineq_histogram_selectivity      - Examine the histogram for scalarineqsel
742  *
743  * Determine the fraction of the variable's histogram population that
744  * satisfies the inequality condition, ie, VAR < CONST or VAR > CONST.
745  *
746  * Returns -1 if there is no histogram (valid results will always be >= 0).
747  *
748  * Note that the result disregards both the most-common-values (if any) and
749  * null entries.  The caller is expected to combine this result with
750  * statistics for those portions of the column population.
751  */
752 static double
753 ineq_histogram_selectivity(PlannerInfo *root,
754                                                    VariableStatData *vardata,
755                                                    FmgrInfo *opproc, bool isgt,
756                                                    Datum constval, Oid consttype)
757 {
758         double          hist_selec;
759         Oid                     hist_op;
760         Datum      *values;
761         int                     nvalues;
762
763         hist_selec = -1.0;
764
765         /*
766          * Someday, ANALYZE might store more than one histogram per rel/att,
767          * corresponding to more than one possible sort ordering defined for the
768          * column type.  However, to make that work we will need to figure out
769          * which staop to search for --- it's not necessarily the one we have at
770          * hand!  (For example, we might have a '<=' operator rather than the '<'
771          * operator that will appear in staop.)  For now, assume that whatever
772          * appears in pg_statistic is sorted the same way our operator sorts, or
773          * the reverse way if isgt is TRUE.
774          */
775         if (HeapTupleIsValid(vardata->statsTuple) &&
776                 get_attstatsslot(vardata->statsTuple,
777                                                  vardata->atttype, vardata->atttypmod,
778                                                  STATISTIC_KIND_HISTOGRAM, InvalidOid,
779                                                  &hist_op,
780                                                  &values, &nvalues,
781                                                  NULL, NULL))
782         {
783                 if (nvalues > 1)
784                 {
785                         /*
786                          * Use binary search to find proper location, ie, the first slot
787                          * at which the comparison fails.  (If the given operator isn't
788                          * actually sort-compatible with the histogram, you'll get garbage
789                          * results ... but probably not any more garbage-y than you would
790                          * from the old linear search.)
791                          *
792                          * If the binary search accesses the first or last histogram
793                          * entry, we try to replace that endpoint with the true column min
794                          * or max as found by get_actual_variable_range().  This
795                          * ameliorates misestimates when the min or max is moving as a
796                          * result of changes since the last ANALYZE.  Note that this could
797                          * result in effectively including MCVs into the histogram that
798                          * weren't there before, but we don't try to correct for that.
799                          */
800                         double          histfrac;
801                         int                     lobound = 0;    /* first possible slot to search */
802                         int                     hibound = nvalues;              /* last+1 slot to search */
803                         bool            have_end = false;
804
805                         /*
806                          * If there are only two histogram entries, we'll want up-to-date
807                          * values for both.  (If there are more than two, we need at most
808                          * one of them to be updated, so we deal with that within the
809                          * loop.)
810                          */
811                         if (nvalues == 2)
812                                 have_end = get_actual_variable_range(root,
813                                                                                                          vardata,
814                                                                                                          hist_op,
815                                                                                                          &values[0],
816                                                                                                          &values[1]);
817
818                         while (lobound < hibound)
819                         {
820                                 int                     probe = (lobound + hibound) / 2;
821                                 bool            ltcmp;
822
823                                 /*
824                                  * If we find ourselves about to compare to the first or last
825                                  * histogram entry, first try to replace it with the actual
826                                  * current min or max (unless we already did so above).
827                                  */
828                                 if (probe == 0 && nvalues > 2)
829                                         have_end = get_actual_variable_range(root,
830                                                                                                                  vardata,
831                                                                                                                  hist_op,
832                                                                                                                  &values[0],
833                                                                                                                  NULL);
834                                 else if (probe == nvalues - 1 && nvalues > 2)
835                                         have_end = get_actual_variable_range(root,
836                                                                                                                  vardata,
837                                                                                                                  hist_op,
838                                                                                                                  NULL,
839                                                                                                                  &values[probe]);
840
841                                 ltcmp = DatumGetBool(FunctionCall2Coll(opproc,
842                                                                                                            DEFAULT_COLLATION_OID,
843                                                                                                            values[probe],
844                                                                                                            constval));
845                                 if (isgt)
846                                         ltcmp = !ltcmp;
847                                 if (ltcmp)
848                                         lobound = probe + 1;
849                                 else
850                                         hibound = probe;
851                         }
852
853                         if (lobound <= 0)
854                         {
855                                 /* Constant is below lower histogram boundary. */
856                                 histfrac = 0.0;
857                         }
858                         else if (lobound >= nvalues)
859                         {
860                                 /* Constant is above upper histogram boundary. */
861                                 histfrac = 1.0;
862                         }
863                         else
864                         {
865                                 int                     i = lobound;
866                                 double          val,
867                                                         high,
868                                                         low;
869                                 double          binfrac;
870
871                                 /*
872                                  * We have values[i-1] <= constant <= values[i].
873                                  *
874                                  * Convert the constant and the two nearest bin boundary
875                                  * values to a uniform comparison scale, and do a linear
876                                  * interpolation within this bin.
877                                  */
878                                 if (convert_to_scalar(constval, consttype, &val,
879                                                                           values[i - 1], values[i],
880                                                                           vardata->vartype,
881                                                                           &low, &high))
882                                 {
883                                         if (high <= low)
884                                         {
885                                                 /* cope if bin boundaries appear identical */
886                                                 binfrac = 0.5;
887                                         }
888                                         else if (val <= low)
889                                                 binfrac = 0.0;
890                                         else if (val >= high)
891                                                 binfrac = 1.0;
892                                         else
893                                         {
894                                                 binfrac = (val - low) / (high - low);
895
896                                                 /*
897                                                  * Watch out for the possibility that we got a NaN or
898                                                  * Infinity from the division.  This can happen
899                                                  * despite the previous checks, if for example "low"
900                                                  * is -Infinity.
901                                                  */
902                                                 if (isnan(binfrac) ||
903                                                         binfrac < 0.0 || binfrac > 1.0)
904                                                         binfrac = 0.5;
905                                         }
906                                 }
907                                 else
908                                 {
909                                         /*
910                                          * Ideally we'd produce an error here, on the grounds that
911                                          * the given operator shouldn't have scalarXXsel
912                                          * registered as its selectivity func unless we can deal
913                                          * with its operand types.  But currently, all manner of
914                                          * stuff is invoking scalarXXsel, so give a default
915                                          * estimate until that can be fixed.
916                                          */
917                                         binfrac = 0.5;
918                                 }
919
920                                 /*
921                                  * Now, compute the overall selectivity across the values
922                                  * represented by the histogram.  We have i-1 full bins and
923                                  * binfrac partial bin below the constant.
924                                  */
925                                 histfrac = (double) (i - 1) + binfrac;
926                                 histfrac /= (double) (nvalues - 1);
927                         }
928
929                         /*
930                          * Now histfrac = fraction of histogram entries below the
931                          * constant.
932                          *
933                          * Account for "<" vs ">"
934                          */
935                         hist_selec = isgt ? (1.0 - histfrac) : histfrac;
936
937                         /*
938                          * The histogram boundaries are only approximate to begin with,
939                          * and may well be out of date anyway.  Therefore, don't believe
940                          * extremely small or large selectivity estimates --- unless we
941                          * got actual current endpoint values from the table.
942                          */
943                         if (have_end)
944                                 CLAMP_PROBABILITY(hist_selec);
945                         else
946                         {
947                                 if (hist_selec < 0.0001)
948                                         hist_selec = 0.0001;
949                                 else if (hist_selec > 0.9999)
950                                         hist_selec = 0.9999;
951                         }
952                 }
953
954                 free_attstatsslot(vardata->atttype, values, nvalues, NULL, 0);
955         }
956
957         return hist_selec;
958 }
959
960 /*
961  *              scalarltsel             - Selectivity of "<" (also "<=") for scalars.
962  */
963 Datum
964 scalarltsel(PG_FUNCTION_ARGS)
965 {
966         PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
967         Oid                     operator = PG_GETARG_OID(1);
968         List       *args = (List *) PG_GETARG_POINTER(2);
969         int                     varRelid = PG_GETARG_INT32(3);
970         VariableStatData vardata;
971         Node       *other;
972         bool            varonleft;
973         Datum           constval;
974         Oid                     consttype;
975         bool            isgt;
976         double          selec;
977
978         /*
979          * If expression is not variable op something or something op variable,
980          * then punt and return a default estimate.
981          */
982         if (!get_restriction_variable(root, args, varRelid,
983                                                                   &vardata, &other, &varonleft))
984                 PG_RETURN_FLOAT8(DEFAULT_INEQ_SEL);
985
986         /*
987          * Can't do anything useful if the something is not a constant, either.
988          */
989         if (!IsA(other, Const))
990         {
991                 ReleaseVariableStats(vardata);
992                 PG_RETURN_FLOAT8(DEFAULT_INEQ_SEL);
993         }
994
995         /*
996          * If the constant is NULL, assume operator is strict and return zero, ie,
997          * operator will never return TRUE.
998          */
999         if (((Const *) other)->constisnull)
1000         {
1001                 ReleaseVariableStats(vardata);
1002                 PG_RETURN_FLOAT8(0.0);
1003         }
1004         constval = ((Const *) other)->constvalue;
1005         consttype = ((Const *) other)->consttype;
1006
1007         /*
1008          * Force the var to be on the left to simplify logic in scalarineqsel.
1009          */
1010         if (varonleft)
1011         {
1012                 /* we have var < other */
1013                 isgt = false;
1014         }
1015         else
1016         {
1017                 /* we have other < var, commute to make var > other */
1018                 operator = get_commutator(operator);
1019                 if (!operator)
1020                 {
1021                         /* Use default selectivity (should we raise an error instead?) */
1022                         ReleaseVariableStats(vardata);
1023                         PG_RETURN_FLOAT8(DEFAULT_INEQ_SEL);
1024                 }
1025                 isgt = true;
1026         }
1027
1028         selec = scalarineqsel(root, operator, isgt, &vardata, constval, consttype);
1029
1030         ReleaseVariableStats(vardata);
1031
1032         PG_RETURN_FLOAT8((float8) selec);
1033 }
1034
1035 /*
1036  *              scalargtsel             - Selectivity of ">" (also ">=") for integers.
1037  */
1038 Datum
1039 scalargtsel(PG_FUNCTION_ARGS)
1040 {
1041         PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
1042         Oid                     operator = PG_GETARG_OID(1);
1043         List       *args = (List *) PG_GETARG_POINTER(2);
1044         int                     varRelid = PG_GETARG_INT32(3);
1045         VariableStatData vardata;
1046         Node       *other;
1047         bool            varonleft;
1048         Datum           constval;
1049         Oid                     consttype;
1050         bool            isgt;
1051         double          selec;
1052
1053         /*
1054          * If expression is not variable op something or something op variable,
1055          * then punt and return a default estimate.
1056          */
1057         if (!get_restriction_variable(root, args, varRelid,
1058                                                                   &vardata, &other, &varonleft))
1059                 PG_RETURN_FLOAT8(DEFAULT_INEQ_SEL);
1060
1061         /*
1062          * Can't do anything useful if the something is not a constant, either.
1063          */
1064         if (!IsA(other, Const))
1065         {
1066                 ReleaseVariableStats(vardata);
1067                 PG_RETURN_FLOAT8(DEFAULT_INEQ_SEL);
1068         }
1069
1070         /*
1071          * If the constant is NULL, assume operator is strict and return zero, ie,
1072          * operator will never return TRUE.
1073          */
1074         if (((Const *) other)->constisnull)
1075         {
1076                 ReleaseVariableStats(vardata);
1077                 PG_RETURN_FLOAT8(0.0);
1078         }
1079         constval = ((Const *) other)->constvalue;
1080         consttype = ((Const *) other)->consttype;
1081
1082         /*
1083          * Force the var to be on the left to simplify logic in scalarineqsel.
1084          */
1085         if (varonleft)
1086         {
1087                 /* we have var > other */
1088                 isgt = true;
1089         }
1090         else
1091         {
1092                 /* we have other > var, commute to make var < other */
1093                 operator = get_commutator(operator);
1094                 if (!operator)
1095                 {
1096                         /* Use default selectivity (should we raise an error instead?) */
1097                         ReleaseVariableStats(vardata);
1098                         PG_RETURN_FLOAT8(DEFAULT_INEQ_SEL);
1099                 }
1100                 isgt = false;
1101         }
1102
1103         selec = scalarineqsel(root, operator, isgt, &vardata, constval, consttype);
1104
1105         ReleaseVariableStats(vardata);
1106
1107         PG_RETURN_FLOAT8((float8) selec);
1108 }
1109
1110 /*
1111  * patternsel                   - Generic code for pattern-match selectivity.
1112  */
1113 static double
1114 patternsel(PG_FUNCTION_ARGS, Pattern_Type ptype, bool negate)
1115 {
1116         PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
1117         Oid                     operator = PG_GETARG_OID(1);
1118         List       *args = (List *) PG_GETARG_POINTER(2);
1119         int                     varRelid = PG_GETARG_INT32(3);
1120         Oid                     collation = PG_GET_COLLATION();
1121         VariableStatData vardata;
1122         Node       *other;
1123         bool            varonleft;
1124         Datum           constval;
1125         Oid                     consttype;
1126         Oid                     vartype;
1127         Oid                     opfamily;
1128         Pattern_Prefix_Status pstatus;
1129         Const      *patt;
1130         Const      *prefix = NULL;
1131         Selectivity rest_selec = 0;
1132         double          result;
1133
1134         /*
1135          * If this is for a NOT LIKE or similar operator, get the corresponding
1136          * positive-match operator and work with that.  Set result to the correct
1137          * default estimate, too.
1138          */
1139         if (negate)
1140         {
1141                 operator = get_negator(operator);
1142                 if (!OidIsValid(operator))
1143                         elog(ERROR, "patternsel called for operator without a negator");
1144                 result = 1.0 - DEFAULT_MATCH_SEL;
1145         }
1146         else
1147         {
1148                 result = DEFAULT_MATCH_SEL;
1149         }
1150
1151         /*
1152          * If expression is not variable op constant, then punt and return a
1153          * default estimate.
1154          */
1155         if (!get_restriction_variable(root, args, varRelid,
1156                                                                   &vardata, &other, &varonleft))
1157                 return result;
1158         if (!varonleft || !IsA(other, Const))
1159         {
1160                 ReleaseVariableStats(vardata);
1161                 return result;
1162         }
1163
1164         /*
1165          * If the constant is NULL, assume operator is strict and return zero, ie,
1166          * operator will never return TRUE.  (It's zero even for a negator op.)
1167          */
1168         if (((Const *) other)->constisnull)
1169         {
1170                 ReleaseVariableStats(vardata);
1171                 return 0.0;
1172         }
1173         constval = ((Const *) other)->constvalue;
1174         consttype = ((Const *) other)->consttype;
1175
1176         /*
1177          * The right-hand const is type text or bytea for all supported operators.
1178          * We do not expect to see binary-compatible types here, since
1179          * const-folding should have relabeled the const to exactly match the
1180          * operator's declared type.
1181          */
1182         if (consttype != TEXTOID && consttype != BYTEAOID)
1183         {
1184                 ReleaseVariableStats(vardata);
1185                 return result;
1186         }
1187
1188         /*
1189          * Similarly, the exposed type of the left-hand side should be one of
1190          * those we know.  (Do not look at vardata.atttype, which might be
1191          * something binary-compatible but different.)  We can use it to choose
1192          * the index opfamily from which we must draw the comparison operators.
1193          *
1194          * NOTE: It would be more correct to use the PATTERN opfamilies than the
1195          * simple ones, but at the moment ANALYZE will not generate statistics for
1196          * the PATTERN operators.  But our results are so approximate anyway that
1197          * it probably hardly matters.
1198          */
1199         vartype = vardata.vartype;
1200
1201         switch (vartype)
1202         {
1203                 case TEXTOID:
1204                         opfamily = TEXT_BTREE_FAM_OID;
1205                         break;
1206                 case BPCHAROID:
1207                         opfamily = BPCHAR_BTREE_FAM_OID;
1208                         break;
1209                 case NAMEOID:
1210                         opfamily = NAME_BTREE_FAM_OID;
1211                         break;
1212                 case BYTEAOID:
1213                         opfamily = BYTEA_BTREE_FAM_OID;
1214                         break;
1215                 default:
1216                         ReleaseVariableStats(vardata);
1217                         return result;
1218         }
1219
1220         /*
1221          * Pull out any fixed prefix implied by the pattern, and estimate the
1222          * fractional selectivity of the remainder of the pattern.  Unlike many of
1223          * the other functions in this file, we use the pattern operator's actual
1224          * collation for this step.  This is not because we expect the collation
1225          * to make a big difference in the selectivity estimate (it seldom would),
1226          * but because we want to be sure we cache compiled regexps under the
1227          * right cache key, so that they can be re-used at runtime.
1228          */
1229         patt = (Const *) other;
1230         pstatus = pattern_fixed_prefix(patt, ptype, collation,
1231                                                                    &prefix, &rest_selec);
1232
1233         /*
1234          * If necessary, coerce the prefix constant to the right type.
1235          */
1236         if (prefix && prefix->consttype != vartype)
1237         {
1238                 char       *prefixstr;
1239
1240                 switch (prefix->consttype)
1241                 {
1242                         case TEXTOID:
1243                                 prefixstr = TextDatumGetCString(prefix->constvalue);
1244                                 break;
1245                         case BYTEAOID:
1246                                 prefixstr = DatumGetCString(DirectFunctionCall1(byteaout,
1247                                                                                                                 prefix->constvalue));
1248                                 break;
1249                         default:
1250                                 elog(ERROR, "unrecognized consttype: %u",
1251                                          prefix->consttype);
1252                                 ReleaseVariableStats(vardata);
1253                                 return result;
1254                 }
1255                 prefix = string_to_const(prefixstr, vartype);
1256                 pfree(prefixstr);
1257         }
1258
1259         if (pstatus == Pattern_Prefix_Exact)
1260         {
1261                 /*
1262                  * Pattern specifies an exact match, so pretend operator is '='
1263                  */
1264                 Oid                     eqopr = get_opfamily_member(opfamily, vartype, vartype,
1265                                                                                                 BTEqualStrategyNumber);
1266
1267                 if (eqopr == InvalidOid)
1268                         elog(ERROR, "no = operator for opfamily %u", opfamily);
1269                 result = var_eq_const(&vardata, eqopr, prefix->constvalue,
1270                                                           false, true);
1271         }
1272         else
1273         {
1274                 /*
1275                  * Not exact-match pattern.  If we have a sufficiently large
1276                  * histogram, estimate selectivity for the histogram part of the
1277                  * population by counting matches in the histogram.  If not, estimate
1278                  * selectivity of the fixed prefix and remainder of pattern
1279                  * separately, then combine the two to get an estimate of the
1280                  * selectivity for the part of the column population represented by
1281                  * the histogram.  (For small histograms, we combine these
1282                  * approaches.)
1283                  *
1284                  * We then add up data for any most-common-values values; these are
1285                  * not in the histogram population, and we can get exact answers for
1286                  * them by applying the pattern operator, so there's no reason to
1287                  * approximate.  (If the MCVs cover a significant part of the total
1288                  * population, this gives us a big leg up in accuracy.)
1289                  */
1290                 Selectivity selec;
1291                 int                     hist_size;
1292                 FmgrInfo        opproc;
1293                 double          nullfrac,
1294                                         mcv_selec,
1295                                         sumcommon;
1296
1297                 /* Try to use the histogram entries to get selectivity */
1298                 fmgr_info(get_opcode(operator), &opproc);
1299
1300                 selec = histogram_selectivity(&vardata, &opproc, constval, true,
1301                                                                           10, 1, &hist_size);
1302
1303                 /* If not at least 100 entries, use the heuristic method */
1304                 if (hist_size < 100)
1305                 {
1306                         Selectivity heursel;
1307                         Selectivity prefixsel;
1308
1309                         if (pstatus == Pattern_Prefix_Partial)
1310                                 prefixsel = prefix_selectivity(root, &vardata, vartype,
1311                                                                                            opfamily, prefix);
1312                         else
1313                                 prefixsel = 1.0;
1314                         heursel = prefixsel * rest_selec;
1315
1316                         if (selec < 0)          /* fewer than 10 histogram entries? */
1317                                 selec = heursel;
1318                         else
1319                         {
1320                                 /*
1321                                  * For histogram sizes from 10 to 100, we combine the
1322                                  * histogram and heuristic selectivities, putting increasingly
1323                                  * more trust in the histogram for larger sizes.
1324                                  */
1325                                 double          hist_weight = hist_size / 100.0;
1326
1327                                 selec = selec * hist_weight + heursel * (1.0 - hist_weight);
1328                         }
1329                 }
1330
1331                 /* In any case, don't believe extremely small or large estimates. */
1332                 if (selec < 0.0001)
1333                         selec = 0.0001;
1334                 else if (selec > 0.9999)
1335                         selec = 0.9999;
1336
1337                 /*
1338                  * If we have most-common-values info, add up the fractions of the MCV
1339                  * entries that satisfy MCV OP PATTERN.  These fractions contribute
1340                  * directly to the result selectivity.  Also add up the total fraction
1341                  * represented by MCV entries.
1342                  */
1343                 mcv_selec = mcv_selectivity(&vardata, &opproc, constval, true,
1344                                                                         &sumcommon);
1345
1346                 if (HeapTupleIsValid(vardata.statsTuple))
1347                         nullfrac = ((Form_pg_statistic) GETSTRUCT(vardata.statsTuple))->stanullfrac;
1348                 else
1349                         nullfrac = 0.0;
1350
1351                 /*
1352                  * Now merge the results from the MCV and histogram calculations,
1353                  * realizing that the histogram covers only the non-null values that
1354                  * are not listed in MCV.
1355                  */
1356                 selec *= 1.0 - nullfrac - sumcommon;
1357                 selec += mcv_selec;
1358
1359                 /* result should be in range, but make sure... */
1360                 CLAMP_PROBABILITY(selec);
1361                 result = selec;
1362         }
1363
1364         if (prefix)
1365         {
1366                 pfree(DatumGetPointer(prefix->constvalue));
1367                 pfree(prefix);
1368         }
1369
1370         ReleaseVariableStats(vardata);
1371
1372         return negate ? (1.0 - result) : result;
1373 }
1374
1375 /*
1376  *              regexeqsel              - Selectivity of regular-expression pattern match.
1377  */
1378 Datum
1379 regexeqsel(PG_FUNCTION_ARGS)
1380 {
1381         PG_RETURN_FLOAT8(patternsel(fcinfo, Pattern_Type_Regex, false));
1382 }
1383
1384 /*
1385  *              icregexeqsel    - Selectivity of case-insensitive regex match.
1386  */
1387 Datum
1388 icregexeqsel(PG_FUNCTION_ARGS)
1389 {
1390         PG_RETURN_FLOAT8(patternsel(fcinfo, Pattern_Type_Regex_IC, false));
1391 }
1392
1393 /*
1394  *              likesel                 - Selectivity of LIKE pattern match.
1395  */
1396 Datum
1397 likesel(PG_FUNCTION_ARGS)
1398 {
1399         PG_RETURN_FLOAT8(patternsel(fcinfo, Pattern_Type_Like, false));
1400 }
1401
1402 /*
1403  *              iclikesel                       - Selectivity of ILIKE pattern match.
1404  */
1405 Datum
1406 iclikesel(PG_FUNCTION_ARGS)
1407 {
1408         PG_RETURN_FLOAT8(patternsel(fcinfo, Pattern_Type_Like_IC, false));
1409 }
1410
1411 /*
1412  *              regexnesel              - Selectivity of regular-expression pattern non-match.
1413  */
1414 Datum
1415 regexnesel(PG_FUNCTION_ARGS)
1416 {
1417         PG_RETURN_FLOAT8(patternsel(fcinfo, Pattern_Type_Regex, true));
1418 }
1419
1420 /*
1421  *              icregexnesel    - Selectivity of case-insensitive regex non-match.
1422  */
1423 Datum
1424 icregexnesel(PG_FUNCTION_ARGS)
1425 {
1426         PG_RETURN_FLOAT8(patternsel(fcinfo, Pattern_Type_Regex_IC, true));
1427 }
1428
1429 /*
1430  *              nlikesel                - Selectivity of LIKE pattern non-match.
1431  */
1432 Datum
1433 nlikesel(PG_FUNCTION_ARGS)
1434 {
1435         PG_RETURN_FLOAT8(patternsel(fcinfo, Pattern_Type_Like, true));
1436 }
1437
1438 /*
1439  *              icnlikesel              - Selectivity of ILIKE pattern non-match.
1440  */
1441 Datum
1442 icnlikesel(PG_FUNCTION_ARGS)
1443 {
1444         PG_RETURN_FLOAT8(patternsel(fcinfo, Pattern_Type_Like_IC, true));
1445 }
1446
1447 /*
1448  *              boolvarsel              - Selectivity of Boolean variable.
1449  *
1450  * This can actually be called on any boolean-valued expression.  If it
1451  * involves only Vars of the specified relation, and if there are statistics
1452  * about the Var or expression (the latter is possible if it's indexed) then
1453  * we'll produce a real estimate; otherwise it's just a default.
1454  */
1455 Selectivity
1456 boolvarsel(PlannerInfo *root, Node *arg, int varRelid)
1457 {
1458         VariableStatData vardata;
1459         double          selec;
1460
1461         examine_variable(root, arg, varRelid, &vardata);
1462         if (HeapTupleIsValid(vardata.statsTuple))
1463         {
1464                 /*
1465                  * A boolean variable V is equivalent to the clause V = 't', so we
1466                  * compute the selectivity as if that is what we have.
1467                  */
1468                 selec = var_eq_const(&vardata, BooleanEqualOperator,
1469                                                          BoolGetDatum(true), false, true);
1470         }
1471         else if (is_funcclause(arg))
1472         {
1473                 /*
1474                  * If we have no stats and it's a function call, estimate 0.3333333.
1475                  * This seems a pretty unprincipled choice, but Postgres has been
1476                  * using that estimate for function calls since 1992.  The hoariness
1477                  * of this behavior suggests that we should not be in too much hurry
1478                  * to use another value.
1479                  */
1480                 selec = 0.3333333;
1481         }
1482         else
1483         {
1484                 /* Otherwise, the default estimate is 0.5 */
1485                 selec = 0.5;
1486         }
1487         ReleaseVariableStats(vardata);
1488         return selec;
1489 }
1490
1491 /*
1492  *              booltestsel             - Selectivity of BooleanTest Node.
1493  */
1494 Selectivity
1495 booltestsel(PlannerInfo *root, BoolTestType booltesttype, Node *arg,
1496                         int varRelid, JoinType jointype, SpecialJoinInfo *sjinfo)
1497 {
1498         VariableStatData vardata;
1499         double          selec;
1500
1501         examine_variable(root, arg, varRelid, &vardata);
1502
1503         if (HeapTupleIsValid(vardata.statsTuple))
1504         {
1505                 Form_pg_statistic stats;
1506                 double          freq_null;
1507                 Datum      *values;
1508                 int                     nvalues;
1509                 float4     *numbers;
1510                 int                     nnumbers;
1511
1512                 stats = (Form_pg_statistic) GETSTRUCT(vardata.statsTuple);
1513                 freq_null = stats->stanullfrac;
1514
1515                 if (get_attstatsslot(vardata.statsTuple,
1516                                                          vardata.atttype, vardata.atttypmod,
1517                                                          STATISTIC_KIND_MCV, InvalidOid,
1518                                                          NULL,
1519                                                          &values, &nvalues,
1520                                                          &numbers, &nnumbers)
1521                         && nnumbers > 0)
1522                 {
1523                         double          freq_true;
1524                         double          freq_false;
1525
1526                         /*
1527                          * Get first MCV frequency and derive frequency for true.
1528                          */
1529                         if (DatumGetBool(values[0]))
1530                                 freq_true = numbers[0];
1531                         else
1532                                 freq_true = 1.0 - numbers[0] - freq_null;
1533
1534                         /*
1535                          * Next derive frequency for false. Then use these as appropriate
1536                          * to derive frequency for each case.
1537                          */
1538                         freq_false = 1.0 - freq_true - freq_null;
1539
1540                         switch (booltesttype)
1541                         {
1542                                 case IS_UNKNOWN:
1543                                         /* select only NULL values */
1544                                         selec = freq_null;
1545                                         break;
1546                                 case IS_NOT_UNKNOWN:
1547                                         /* select non-NULL values */
1548                                         selec = 1.0 - freq_null;
1549                                         break;
1550                                 case IS_TRUE:
1551                                         /* select only TRUE values */
1552                                         selec = freq_true;
1553                                         break;
1554                                 case IS_NOT_TRUE:
1555                                         /* select non-TRUE values */
1556                                         selec = 1.0 - freq_true;
1557                                         break;
1558                                 case IS_FALSE:
1559                                         /* select only FALSE values */
1560                                         selec = freq_false;
1561                                         break;
1562                                 case IS_NOT_FALSE:
1563                                         /* select non-FALSE values */
1564                                         selec = 1.0 - freq_false;
1565                                         break;
1566                                 default:
1567                                         elog(ERROR, "unrecognized booltesttype: %d",
1568                                                  (int) booltesttype);
1569                                         selec = 0.0;    /* Keep compiler quiet */
1570                                         break;
1571                         }
1572
1573                         free_attstatsslot(vardata.atttype, values, nvalues,
1574                                                           numbers, nnumbers);
1575                 }
1576                 else
1577                 {
1578                         /*
1579                          * No most-common-value info available. Still have null fraction
1580                          * information, so use it for IS [NOT] UNKNOWN. Otherwise adjust
1581                          * for null fraction and assume a 50-50 split of TRUE and FALSE.
1582                          */
1583                         switch (booltesttype)
1584                         {
1585                                 case IS_UNKNOWN:
1586                                         /* select only NULL values */
1587                                         selec = freq_null;
1588                                         break;
1589                                 case IS_NOT_UNKNOWN:
1590                                         /* select non-NULL values */
1591                                         selec = 1.0 - freq_null;
1592                                         break;
1593                                 case IS_TRUE:
1594                                 case IS_FALSE:
1595                                         /* Assume we select half of the non-NULL values */
1596                                         selec = (1.0 - freq_null) / 2.0;
1597                                         break;
1598                                 case IS_NOT_TRUE:
1599                                 case IS_NOT_FALSE:
1600                                         /* Assume we select NULLs plus half of the non-NULLs */
1601                                         /* equiv. to freq_null + (1.0 - freq_null) / 2.0 */
1602                                         selec = (freq_null + 1.0) / 2.0;
1603                                         break;
1604                                 default:
1605                                         elog(ERROR, "unrecognized booltesttype: %d",
1606                                                  (int) booltesttype);
1607                                         selec = 0.0;    /* Keep compiler quiet */
1608                                         break;
1609                         }
1610                 }
1611         }
1612         else
1613         {
1614                 /*
1615                  * If we can't get variable statistics for the argument, perhaps
1616                  * clause_selectivity can do something with it.  We ignore the
1617                  * possibility of a NULL value when using clause_selectivity, and just
1618                  * assume the value is either TRUE or FALSE.
1619                  */
1620                 switch (booltesttype)
1621                 {
1622                         case IS_UNKNOWN:
1623                                 selec = DEFAULT_UNK_SEL;
1624                                 break;
1625                         case IS_NOT_UNKNOWN:
1626                                 selec = DEFAULT_NOT_UNK_SEL;
1627                                 break;
1628                         case IS_TRUE:
1629                         case IS_NOT_FALSE:
1630                                 selec = (double) clause_selectivity(root, arg,
1631                                                                                                         varRelid,
1632                                                                                                         jointype, sjinfo);
1633                                 break;
1634                         case IS_FALSE:
1635                         case IS_NOT_TRUE:
1636                                 selec = 1.0 - (double) clause_selectivity(root, arg,
1637                                                                                                                   varRelid,
1638                                                                                                                   jointype, sjinfo);
1639                                 break;
1640                         default:
1641                                 elog(ERROR, "unrecognized booltesttype: %d",
1642                                          (int) booltesttype);
1643                                 selec = 0.0;    /* Keep compiler quiet */
1644                                 break;
1645                 }
1646         }
1647
1648         ReleaseVariableStats(vardata);
1649
1650         /* result should be in range, but make sure... */
1651         CLAMP_PROBABILITY(selec);
1652
1653         return (Selectivity) selec;
1654 }
1655
1656 /*
1657  *              nulltestsel             - Selectivity of NullTest Node.
1658  */
1659 Selectivity
1660 nulltestsel(PlannerInfo *root, NullTestType nulltesttype, Node *arg,
1661                         int varRelid, JoinType jointype, SpecialJoinInfo *sjinfo)
1662 {
1663         VariableStatData vardata;
1664         double          selec;
1665
1666         examine_variable(root, arg, varRelid, &vardata);
1667
1668         if (HeapTupleIsValid(vardata.statsTuple))
1669         {
1670                 Form_pg_statistic stats;
1671                 double          freq_null;
1672
1673                 stats = (Form_pg_statistic) GETSTRUCT(vardata.statsTuple);
1674                 freq_null = stats->stanullfrac;
1675
1676                 switch (nulltesttype)
1677                 {
1678                         case IS_NULL:
1679
1680                                 /*
1681                                  * Use freq_null directly.
1682                                  */
1683                                 selec = freq_null;
1684                                 break;
1685                         case IS_NOT_NULL:
1686
1687                                 /*
1688                                  * Select not unknown (not null) values. Calculate from
1689                                  * freq_null.
1690                                  */
1691                                 selec = 1.0 - freq_null;
1692                                 break;
1693                         default:
1694                                 elog(ERROR, "unrecognized nulltesttype: %d",
1695                                          (int) nulltesttype);
1696                                 return (Selectivity) 0; /* keep compiler quiet */
1697                 }
1698         }
1699         else
1700         {
1701                 /*
1702                  * No ANALYZE stats available, so make a guess
1703                  */
1704                 switch (nulltesttype)
1705                 {
1706                         case IS_NULL:
1707                                 selec = DEFAULT_UNK_SEL;
1708                                 break;
1709                         case IS_NOT_NULL:
1710                                 selec = DEFAULT_NOT_UNK_SEL;
1711                                 break;
1712                         default:
1713                                 elog(ERROR, "unrecognized nulltesttype: %d",
1714                                          (int) nulltesttype);
1715                                 return (Selectivity) 0; /* keep compiler quiet */
1716                 }
1717         }
1718
1719         ReleaseVariableStats(vardata);
1720
1721         /* result should be in range, but make sure... */
1722         CLAMP_PROBABILITY(selec);
1723
1724         return (Selectivity) selec;
1725 }
1726
1727 /*
1728  * strip_array_coercion - strip binary-compatible relabeling from an array expr
1729  *
1730  * For array values, the parser normally generates ArrayCoerceExpr conversions,
1731  * but it seems possible that RelabelType might show up.  Also, the planner
1732  * is not currently tense about collapsing stacked ArrayCoerceExpr nodes,
1733  * so we need to be ready to deal with more than one level.
1734  */
1735 static Node *
1736 strip_array_coercion(Node *node)
1737 {
1738         for (;;)
1739         {
1740                 if (node && IsA(node, ArrayCoerceExpr) &&
1741                         ((ArrayCoerceExpr *) node)->elemfuncid == InvalidOid)
1742                 {
1743                         node = (Node *) ((ArrayCoerceExpr *) node)->arg;
1744                 }
1745                 else if (node && IsA(node, RelabelType))
1746                 {
1747                         /* We don't really expect this case, but may as well cope */
1748                         node = (Node *) ((RelabelType *) node)->arg;
1749                 }
1750                 else
1751                         break;
1752         }
1753         return node;
1754 }
1755
1756 /*
1757  *              scalararraysel          - Selectivity of ScalarArrayOpExpr Node.
1758  */
1759 Selectivity
1760 scalararraysel(PlannerInfo *root,
1761                            ScalarArrayOpExpr *clause,
1762                            bool is_join_clause,
1763                            int varRelid,
1764                            JoinType jointype,
1765                            SpecialJoinInfo *sjinfo)
1766 {
1767         Oid                     operator = clause->opno;
1768         bool            useOr = clause->useOr;
1769         bool            isEquality = false;
1770         bool            isInequality = false;
1771         Node       *leftop;
1772         Node       *rightop;
1773         Oid                     nominal_element_type;
1774         Oid                     nominal_element_collation;
1775         TypeCacheEntry *typentry;
1776         RegProcedure oprsel;
1777         FmgrInfo        oprselproc;
1778         Selectivity s1;
1779         Selectivity s1disjoint;
1780
1781         /* First, deconstruct the expression */
1782         Assert(list_length(clause->args) == 2);
1783         leftop = (Node *) linitial(clause->args);
1784         rightop = (Node *) lsecond(clause->args);
1785
1786         /* aggressively reduce both sides to constants */
1787         leftop = estimate_expression_value(root, leftop);
1788         rightop = estimate_expression_value(root, rightop);
1789
1790         /* get nominal (after relabeling) element type of rightop */
1791         nominal_element_type = get_base_element_type(exprType(rightop));
1792         if (!OidIsValid(nominal_element_type))
1793                 return (Selectivity) 0.5;               /* probably shouldn't happen */
1794         /* get nominal collation, too, for generating constants */
1795         nominal_element_collation = exprCollation(rightop);
1796
1797         /* look through any binary-compatible relabeling of rightop */
1798         rightop = strip_array_coercion(rightop);
1799
1800         /*
1801          * Detect whether the operator is the default equality or inequality
1802          * operator of the array element type.
1803          */
1804         typentry = lookup_type_cache(nominal_element_type, TYPECACHE_EQ_OPR);
1805         if (OidIsValid(typentry->eq_opr))
1806         {
1807                 if (operator == typentry->eq_opr)
1808                         isEquality = true;
1809                 else if (get_negator(operator) == typentry->eq_opr)
1810                         isInequality = true;
1811         }
1812
1813         /*
1814          * If it is equality or inequality, we might be able to estimate this as a
1815          * form of array containment; for instance "const = ANY(column)" can be
1816          * treated as "ARRAY[const] <@ column".  scalararraysel_containment tries
1817          * that, and returns the selectivity estimate if successful, or -1 if not.
1818          */
1819         if ((isEquality || isInequality) && !is_join_clause)
1820         {
1821                 s1 = scalararraysel_containment(root, leftop, rightop,
1822                                                                                 nominal_element_type,
1823                                                                                 isEquality, useOr, varRelid);
1824                 if (s1 >= 0.0)
1825                         return s1;
1826         }
1827
1828         /*
1829          * Look up the underlying operator's selectivity estimator. Punt if it
1830          * hasn't got one.
1831          */
1832         if (is_join_clause)
1833                 oprsel = get_oprjoin(operator);
1834         else
1835                 oprsel = get_oprrest(operator);
1836         if (!oprsel)
1837                 return (Selectivity) 0.5;
1838         fmgr_info(oprsel, &oprselproc);
1839
1840         /*
1841          * In the array-containment check above, we must only believe that an
1842          * operator is equality or inequality if it is the default btree equality
1843          * operator (or its negator) for the element type, since those are the
1844          * operators that array containment will use.  But in what follows, we can
1845          * be a little laxer, and also believe that any operators using eqsel() or
1846          * neqsel() as selectivity estimator act like equality or inequality.
1847          */
1848         if (oprsel == F_EQSEL || oprsel == F_EQJOINSEL)
1849                 isEquality = true;
1850         else if (oprsel == F_NEQSEL || oprsel == F_NEQJOINSEL)
1851                 isInequality = true;
1852
1853         /*
1854          * We consider three cases:
1855          *
1856          * 1. rightop is an Array constant: deconstruct the array, apply the
1857          * operator's selectivity function for each array element, and merge the
1858          * results in the same way that clausesel.c does for AND/OR combinations.
1859          *
1860          * 2. rightop is an ARRAY[] construct: apply the operator's selectivity
1861          * function for each element of the ARRAY[] construct, and merge.
1862          *
1863          * 3. otherwise, make a guess ...
1864          */
1865         if (rightop && IsA(rightop, Const))
1866         {
1867                 Datum           arraydatum = ((Const *) rightop)->constvalue;
1868                 bool            arrayisnull = ((Const *) rightop)->constisnull;
1869                 ArrayType  *arrayval;
1870                 int16           elmlen;
1871                 bool            elmbyval;
1872                 char            elmalign;
1873                 int                     num_elems;
1874                 Datum      *elem_values;
1875                 bool       *elem_nulls;
1876                 int                     i;
1877
1878                 if (arrayisnull)                /* qual can't succeed if null array */
1879                         return (Selectivity) 0.0;
1880                 arrayval = DatumGetArrayTypeP(arraydatum);
1881                 get_typlenbyvalalign(ARR_ELEMTYPE(arrayval),
1882                                                          &elmlen, &elmbyval, &elmalign);
1883                 deconstruct_array(arrayval,
1884                                                   ARR_ELEMTYPE(arrayval),
1885                                                   elmlen, elmbyval, elmalign,
1886                                                   &elem_values, &elem_nulls, &num_elems);
1887
1888                 /*
1889                  * For generic operators, we assume the probability of success is
1890                  * independent for each array element.  But for "= ANY" or "<> ALL",
1891                  * if the array elements are distinct (which'd typically be the case)
1892                  * then the probabilities are disjoint, and we should just sum them.
1893                  *
1894                  * If we were being really tense we would try to confirm that the
1895                  * elements are all distinct, but that would be expensive and it
1896                  * doesn't seem to be worth the cycles; it would amount to penalizing
1897                  * well-written queries in favor of poorly-written ones.  However, we
1898                  * do protect ourselves a little bit by checking whether the
1899                  * disjointness assumption leads to an impossible (out of range)
1900                  * probability; if so, we fall back to the normal calculation.
1901                  */
1902                 s1 = s1disjoint = (useOr ? 0.0 : 1.0);
1903
1904                 for (i = 0; i < num_elems; i++)
1905                 {
1906                         List       *args;
1907                         Selectivity s2;
1908
1909                         args = list_make2(leftop,
1910                                                           makeConst(nominal_element_type,
1911                                                                                 -1,
1912                                                                                 nominal_element_collation,
1913                                                                                 elmlen,
1914                                                                                 elem_values[i],
1915                                                                                 elem_nulls[i],
1916                                                                                 elmbyval));
1917                         if (is_join_clause)
1918                                 s2 = DatumGetFloat8(FunctionCall5Coll(&oprselproc,
1919                                                                                                           clause->inputcollid,
1920                                                                                                           PointerGetDatum(root),
1921                                                                                                   ObjectIdGetDatum(operator),
1922                                                                                                           PointerGetDatum(args),
1923                                                                                                           Int16GetDatum(jointype),
1924                                                                                                    PointerGetDatum(sjinfo)));
1925                         else
1926                                 s2 = DatumGetFloat8(FunctionCall4Coll(&oprselproc,
1927                                                                                                           clause->inputcollid,
1928                                                                                                           PointerGetDatum(root),
1929                                                                                                   ObjectIdGetDatum(operator),
1930                                                                                                           PointerGetDatum(args),
1931                                                                                                    Int32GetDatum(varRelid)));
1932
1933                         if (useOr)
1934                         {
1935                                 s1 = s1 + s2 - s1 * s2;
1936                                 if (isEquality)
1937                                         s1disjoint += s2;
1938                         }
1939                         else
1940                         {
1941                                 s1 = s1 * s2;
1942                                 if (isInequality)
1943                                         s1disjoint += s2 - 1.0;
1944                         }
1945                 }
1946
1947                 /* accept disjoint-probability estimate if in range */
1948                 if ((useOr ? isEquality : isInequality) &&
1949                         s1disjoint >= 0.0 && s1disjoint <= 1.0)
1950                         s1 = s1disjoint;
1951         }
1952         else if (rightop && IsA(rightop, ArrayExpr) &&
1953                          !((ArrayExpr *) rightop)->multidims)
1954         {
1955                 ArrayExpr  *arrayexpr = (ArrayExpr *) rightop;
1956                 int16           elmlen;
1957                 bool            elmbyval;
1958                 ListCell   *l;
1959
1960                 get_typlenbyval(arrayexpr->element_typeid,
1961                                                 &elmlen, &elmbyval);
1962
1963                 /*
1964                  * We use the assumption of disjoint probabilities here too, although
1965                  * the odds of equal array elements are rather higher if the elements
1966                  * are not all constants (which they won't be, else constant folding
1967                  * would have reduced the ArrayExpr to a Const).  In this path it's
1968                  * critical to have the sanity check on the s1disjoint estimate.
1969                  */
1970                 s1 = s1disjoint = (useOr ? 0.0 : 1.0);
1971
1972                 foreach(l, arrayexpr->elements)
1973                 {
1974                         Node       *elem = (Node *) lfirst(l);
1975                         List       *args;
1976                         Selectivity s2;
1977
1978                         /*
1979                          * Theoretically, if elem isn't of nominal_element_type we should
1980                          * insert a RelabelType, but it seems unlikely that any operator
1981                          * estimation function would really care ...
1982                          */
1983                         args = list_make2(leftop, elem);
1984                         if (is_join_clause)
1985                                 s2 = DatumGetFloat8(FunctionCall5Coll(&oprselproc,
1986                                                                                                           clause->inputcollid,
1987                                                                                                           PointerGetDatum(root),
1988                                                                                                   ObjectIdGetDatum(operator),
1989                                                                                                           PointerGetDatum(args),
1990                                                                                                           Int16GetDatum(jointype),
1991                                                                                                    PointerGetDatum(sjinfo)));
1992                         else
1993                                 s2 = DatumGetFloat8(FunctionCall4Coll(&oprselproc,
1994                                                                                                           clause->inputcollid,
1995                                                                                                           PointerGetDatum(root),
1996                                                                                                   ObjectIdGetDatum(operator),
1997                                                                                                           PointerGetDatum(args),
1998                                                                                                    Int32GetDatum(varRelid)));
1999
2000                         if (useOr)
2001                         {
2002                                 s1 = s1 + s2 - s1 * s2;
2003                                 if (isEquality)
2004                                         s1disjoint += s2;
2005                         }
2006                         else
2007                         {
2008                                 s1 = s1 * s2;
2009                                 if (isInequality)
2010                                         s1disjoint += s2 - 1.0;
2011                         }
2012                 }
2013
2014                 /* accept disjoint-probability estimate if in range */
2015                 if ((useOr ? isEquality : isInequality) &&
2016                         s1disjoint >= 0.0 && s1disjoint <= 1.0)
2017                         s1 = s1disjoint;
2018         }
2019         else
2020         {
2021                 CaseTestExpr *dummyexpr;
2022                 List       *args;
2023                 Selectivity s2;
2024                 int                     i;
2025
2026                 /*
2027                  * We need a dummy rightop to pass to the operator selectivity
2028                  * routine.  It can be pretty much anything that doesn't look like a
2029                  * constant; CaseTestExpr is a convenient choice.
2030                  */
2031                 dummyexpr = makeNode(CaseTestExpr);
2032                 dummyexpr->typeId = nominal_element_type;
2033                 dummyexpr->typeMod = -1;
2034                 dummyexpr->collation = clause->inputcollid;
2035                 args = list_make2(leftop, dummyexpr);
2036                 if (is_join_clause)
2037                         s2 = DatumGetFloat8(FunctionCall5Coll(&oprselproc,
2038                                                                                                   clause->inputcollid,
2039                                                                                                   PointerGetDatum(root),
2040                                                                                                   ObjectIdGetDatum(operator),
2041                                                                                                   PointerGetDatum(args),
2042                                                                                                   Int16GetDatum(jointype),
2043                                                                                                   PointerGetDatum(sjinfo)));
2044                 else
2045                         s2 = DatumGetFloat8(FunctionCall4Coll(&oprselproc,
2046                                                                                                   clause->inputcollid,
2047                                                                                                   PointerGetDatum(root),
2048                                                                                                   ObjectIdGetDatum(operator),
2049                                                                                                   PointerGetDatum(args),
2050                                                                                                   Int32GetDatum(varRelid)));
2051                 s1 = useOr ? 0.0 : 1.0;
2052
2053                 /*
2054                  * Arbitrarily assume 10 elements in the eventual array value (see
2055                  * also estimate_array_length).  We don't risk an assumption of
2056                  * disjoint probabilities here.
2057                  */
2058                 for (i = 0; i < 10; i++)
2059                 {
2060                         if (useOr)
2061                                 s1 = s1 + s2 - s1 * s2;
2062                         else
2063                                 s1 = s1 * s2;
2064                 }
2065         }
2066
2067         /* result should be in range, but make sure... */
2068         CLAMP_PROBABILITY(s1);
2069
2070         return s1;
2071 }
2072
2073 /*
2074  * Estimate number of elements in the array yielded by an expression.
2075  *
2076  * It's important that this agree with scalararraysel.
2077  */
2078 int
2079 estimate_array_length(Node *arrayexpr)
2080 {
2081         /* look through any binary-compatible relabeling of arrayexpr */
2082         arrayexpr = strip_array_coercion(arrayexpr);
2083
2084         if (arrayexpr && IsA(arrayexpr, Const))
2085         {
2086                 Datum           arraydatum = ((Const *) arrayexpr)->constvalue;
2087                 bool            arrayisnull = ((Const *) arrayexpr)->constisnull;
2088                 ArrayType  *arrayval;
2089
2090                 if (arrayisnull)
2091                         return 0;
2092                 arrayval = DatumGetArrayTypeP(arraydatum);
2093                 return ArrayGetNItems(ARR_NDIM(arrayval), ARR_DIMS(arrayval));
2094         }
2095         else if (arrayexpr && IsA(arrayexpr, ArrayExpr) &&
2096                          !((ArrayExpr *) arrayexpr)->multidims)
2097         {
2098                 return list_length(((ArrayExpr *) arrayexpr)->elements);
2099         }
2100         else
2101         {
2102                 /* default guess --- see also scalararraysel */
2103                 return 10;
2104         }
2105 }
2106
2107 /*
2108  *              rowcomparesel           - Selectivity of RowCompareExpr Node.
2109  *
2110  * We estimate RowCompare selectivity by considering just the first (high
2111  * order) columns, which makes it equivalent to an ordinary OpExpr.  While
2112  * this estimate could be refined by considering additional columns, it
2113  * seems unlikely that we could do a lot better without multi-column
2114  * statistics.
2115  */
2116 Selectivity
2117 rowcomparesel(PlannerInfo *root,
2118                           RowCompareExpr *clause,
2119                           int varRelid, JoinType jointype, SpecialJoinInfo *sjinfo)
2120 {
2121         Selectivity s1;
2122         Oid                     opno = linitial_oid(clause->opnos);
2123         Oid                     inputcollid = linitial_oid(clause->inputcollids);
2124         List       *opargs;
2125         bool            is_join_clause;
2126
2127         /* Build equivalent arg list for single operator */
2128         opargs = list_make2(linitial(clause->largs), linitial(clause->rargs));
2129
2130         /*
2131          * Decide if it's a join clause.  This should match clausesel.c's
2132          * treat_as_join_clause(), except that we intentionally consider only the
2133          * leading columns and not the rest of the clause.
2134          */
2135         if (varRelid != 0)
2136         {
2137                 /*
2138                  * Caller is forcing restriction mode (eg, because we are examining an
2139                  * inner indexscan qual).
2140                  */
2141                 is_join_clause = false;
2142         }
2143         else if (sjinfo == NULL)
2144         {
2145                 /*
2146                  * It must be a restriction clause, since it's being evaluated at a
2147                  * scan node.
2148                  */
2149                 is_join_clause = false;
2150         }
2151         else
2152         {
2153                 /*
2154                  * Otherwise, it's a join if there's more than one relation used.
2155                  */
2156                 is_join_clause = (NumRelids((Node *) opargs) > 1);
2157         }
2158
2159         if (is_join_clause)
2160         {
2161                 /* Estimate selectivity for a join clause. */
2162                 s1 = join_selectivity(root, opno,
2163                                                           opargs,
2164                                                           inputcollid,
2165                                                           jointype,
2166                                                           sjinfo);
2167         }
2168         else
2169         {
2170                 /* Estimate selectivity for a restriction clause. */
2171                 s1 = restriction_selectivity(root, opno,
2172                                                                          opargs,
2173                                                                          inputcollid,
2174                                                                          varRelid);
2175         }
2176
2177         return s1;
2178 }
2179
2180 /*
2181  *              eqjoinsel               - Join selectivity of "="
2182  */
2183 Datum
2184 eqjoinsel(PG_FUNCTION_ARGS)
2185 {
2186         PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
2187         Oid                     operator = PG_GETARG_OID(1);
2188         List       *args = (List *) PG_GETARG_POINTER(2);
2189
2190 #ifdef NOT_USED
2191         JoinType        jointype = (JoinType) PG_GETARG_INT16(3);
2192 #endif
2193         SpecialJoinInfo *sjinfo = (SpecialJoinInfo *) PG_GETARG_POINTER(4);
2194         double          selec;
2195         VariableStatData vardata1;
2196         VariableStatData vardata2;
2197         bool            join_is_reversed;
2198         RelOptInfo *inner_rel;
2199
2200         get_join_variables(root, args, sjinfo,
2201                                            &vardata1, &vardata2, &join_is_reversed);
2202
2203         switch (sjinfo->jointype)
2204         {
2205                 case JOIN_INNER:
2206                 case JOIN_LEFT:
2207                 case JOIN_FULL:
2208                         selec = eqjoinsel_inner(operator, &vardata1, &vardata2);
2209                         break;
2210                 case JOIN_SEMI:
2211                 case JOIN_ANTI:
2212
2213                         /*
2214                          * Look up the join's inner relation.  min_righthand is sufficient
2215                          * information because neither SEMI nor ANTI joins permit any
2216                          * reassociation into or out of their RHS, so the righthand will
2217                          * always be exactly that set of rels.
2218                          */
2219                         inner_rel = find_join_input_rel(root, sjinfo->min_righthand);
2220
2221                         if (!join_is_reversed)
2222                                 selec = eqjoinsel_semi(operator, &vardata1, &vardata2,
2223                                                                            inner_rel);
2224                         else
2225                                 selec = eqjoinsel_semi(get_commutator(operator),
2226                                                                            &vardata2, &vardata1,
2227                                                                            inner_rel);
2228                         break;
2229                 default:
2230                         /* other values not expected here */
2231                         elog(ERROR, "unrecognized join type: %d",
2232                                  (int) sjinfo->jointype);
2233                         selec = 0;                      /* keep compiler quiet */
2234                         break;
2235         }
2236
2237         ReleaseVariableStats(vardata1);
2238         ReleaseVariableStats(vardata2);
2239
2240         CLAMP_PROBABILITY(selec);
2241
2242         PG_RETURN_FLOAT8((float8) selec);
2243 }
2244
2245 /*
2246  * eqjoinsel_inner --- eqjoinsel for normal inner join
2247  *
2248  * We also use this for LEFT/FULL outer joins; it's not presently clear
2249  * that it's worth trying to distinguish them here.
2250  */
2251 static double
2252 eqjoinsel_inner(Oid operator,
2253                                 VariableStatData *vardata1, VariableStatData *vardata2)
2254 {
2255         double          selec;
2256         double          nd1;
2257         double          nd2;
2258         bool            isdefault1;
2259         bool            isdefault2;
2260         Form_pg_statistic stats1 = NULL;
2261         Form_pg_statistic stats2 = NULL;
2262         bool            have_mcvs1 = false;
2263         Datum      *values1 = NULL;
2264         int                     nvalues1 = 0;
2265         float4     *numbers1 = NULL;
2266         int                     nnumbers1 = 0;
2267         bool            have_mcvs2 = false;
2268         Datum      *values2 = NULL;
2269         int                     nvalues2 = 0;
2270         float4     *numbers2 = NULL;
2271         int                     nnumbers2 = 0;
2272
2273         nd1 = get_variable_numdistinct(vardata1, &isdefault1);
2274         nd2 = get_variable_numdistinct(vardata2, &isdefault2);
2275
2276         if (HeapTupleIsValid(vardata1->statsTuple))
2277         {
2278                 stats1 = (Form_pg_statistic) GETSTRUCT(vardata1->statsTuple);
2279                 have_mcvs1 = get_attstatsslot(vardata1->statsTuple,
2280                                                                           vardata1->atttype,
2281                                                                           vardata1->atttypmod,
2282                                                                           STATISTIC_KIND_MCV,
2283                                                                           InvalidOid,
2284                                                                           NULL,
2285                                                                           &values1, &nvalues1,
2286                                                                           &numbers1, &nnumbers1);
2287         }
2288
2289         if (HeapTupleIsValid(vardata2->statsTuple))
2290         {
2291                 stats2 = (Form_pg_statistic) GETSTRUCT(vardata2->statsTuple);
2292                 have_mcvs2 = get_attstatsslot(vardata2->statsTuple,
2293                                                                           vardata2->atttype,
2294                                                                           vardata2->atttypmod,
2295                                                                           STATISTIC_KIND_MCV,
2296                                                                           InvalidOid,
2297                                                                           NULL,
2298                                                                           &values2, &nvalues2,
2299                                                                           &numbers2, &nnumbers2);
2300         }
2301
2302         if (have_mcvs1 && have_mcvs2)
2303         {
2304                 /*
2305                  * We have most-common-value lists for both relations.  Run through
2306                  * the lists to see which MCVs actually join to each other with the
2307                  * given operator.  This allows us to determine the exact join
2308                  * selectivity for the portion of the relations represented by the MCV
2309                  * lists.  We still have to estimate for the remaining population, but
2310                  * in a skewed distribution this gives us a big leg up in accuracy.
2311                  * For motivation see the analysis in Y. Ioannidis and S.
2312                  * Christodoulakis, "On the propagation of errors in the size of join
2313                  * results", Technical Report 1018, Computer Science Dept., University
2314                  * of Wisconsin, Madison, March 1991 (available from ftp.cs.wisc.edu).
2315                  */
2316                 FmgrInfo        eqproc;
2317                 bool       *hasmatch1;
2318                 bool       *hasmatch2;
2319                 double          nullfrac1 = stats1->stanullfrac;
2320                 double          nullfrac2 = stats2->stanullfrac;
2321                 double          matchprodfreq,
2322                                         matchfreq1,
2323                                         matchfreq2,
2324                                         unmatchfreq1,
2325                                         unmatchfreq2,
2326                                         otherfreq1,
2327                                         otherfreq2,
2328                                         totalsel1,
2329                                         totalsel2;
2330                 int                     i,
2331                                         nmatches;
2332
2333                 fmgr_info(get_opcode(operator), &eqproc);
2334                 hasmatch1 = (bool *) palloc0(nvalues1 * sizeof(bool));
2335                 hasmatch2 = (bool *) palloc0(nvalues2 * sizeof(bool));
2336
2337                 /*
2338                  * Note we assume that each MCV will match at most one member of the
2339                  * other MCV list.  If the operator isn't really equality, there could
2340                  * be multiple matches --- but we don't look for them, both for speed
2341                  * and because the math wouldn't add up...
2342                  */
2343                 matchprodfreq = 0.0;
2344                 nmatches = 0;
2345                 for (i = 0; i < nvalues1; i++)
2346                 {
2347                         int                     j;
2348
2349                         for (j = 0; j < nvalues2; j++)
2350                         {
2351                                 if (hasmatch2[j])
2352                                         continue;
2353                                 if (DatumGetBool(FunctionCall2Coll(&eqproc,
2354                                                                                                    DEFAULT_COLLATION_OID,
2355                                                                                                    values1[i],
2356                                                                                                    values2[j])))
2357                                 {
2358                                         hasmatch1[i] = hasmatch2[j] = true;
2359                                         matchprodfreq += numbers1[i] * numbers2[j];
2360                                         nmatches++;
2361                                         break;
2362                                 }
2363                         }
2364                 }
2365                 CLAMP_PROBABILITY(matchprodfreq);
2366                 /* Sum up frequencies of matched and unmatched MCVs */
2367                 matchfreq1 = unmatchfreq1 = 0.0;
2368                 for (i = 0; i < nvalues1; i++)
2369                 {
2370                         if (hasmatch1[i])
2371                                 matchfreq1 += numbers1[i];
2372                         else
2373                                 unmatchfreq1 += numbers1[i];
2374                 }
2375                 CLAMP_PROBABILITY(matchfreq1);
2376                 CLAMP_PROBABILITY(unmatchfreq1);
2377                 matchfreq2 = unmatchfreq2 = 0.0;
2378                 for (i = 0; i < nvalues2; i++)
2379                 {
2380                         if (hasmatch2[i])
2381                                 matchfreq2 += numbers2[i];
2382                         else
2383                                 unmatchfreq2 += numbers2[i];
2384                 }
2385                 CLAMP_PROBABILITY(matchfreq2);
2386                 CLAMP_PROBABILITY(unmatchfreq2);
2387                 pfree(hasmatch1);
2388                 pfree(hasmatch2);
2389
2390                 /*
2391                  * Compute total frequency of non-null values that are not in the MCV
2392                  * lists.
2393                  */
2394                 otherfreq1 = 1.0 - nullfrac1 - matchfreq1 - unmatchfreq1;
2395                 otherfreq2 = 1.0 - nullfrac2 - matchfreq2 - unmatchfreq2;
2396                 CLAMP_PROBABILITY(otherfreq1);
2397                 CLAMP_PROBABILITY(otherfreq2);
2398
2399                 /*
2400                  * We can estimate the total selectivity from the point of view of
2401                  * relation 1 as: the known selectivity for matched MCVs, plus
2402                  * unmatched MCVs that are assumed to match against random members of
2403                  * relation 2's non-MCV population, plus non-MCV values that are
2404                  * assumed to match against random members of relation 2's unmatched
2405                  * MCVs plus non-MCV values.
2406                  */
2407                 totalsel1 = matchprodfreq;
2408                 if (nd2 > nvalues2)
2409                         totalsel1 += unmatchfreq1 * otherfreq2 / (nd2 - nvalues2);
2410                 if (nd2 > nmatches)
2411                         totalsel1 += otherfreq1 * (otherfreq2 + unmatchfreq2) /
2412                                 (nd2 - nmatches);
2413                 /* Same estimate from the point of view of relation 2. */
2414                 totalsel2 = matchprodfreq;
2415                 if (nd1 > nvalues1)
2416                         totalsel2 += unmatchfreq2 * otherfreq1 / (nd1 - nvalues1);
2417                 if (nd1 > nmatches)
2418                         totalsel2 += otherfreq2 * (otherfreq1 + unmatchfreq1) /
2419                                 (nd1 - nmatches);
2420
2421                 /*
2422                  * Use the smaller of the two estimates.  This can be justified in
2423                  * essentially the same terms as given below for the no-stats case: to
2424                  * a first approximation, we are estimating from the point of view of
2425                  * the relation with smaller nd.
2426                  */
2427                 selec = (totalsel1 < totalsel2) ? totalsel1 : totalsel2;
2428         }
2429         else
2430         {
2431                 /*
2432                  * We do not have MCV lists for both sides.  Estimate the join
2433                  * selectivity as MIN(1/nd1,1/nd2)*(1-nullfrac1)*(1-nullfrac2). This
2434                  * is plausible if we assume that the join operator is strict and the
2435                  * non-null values are about equally distributed: a given non-null
2436                  * tuple of rel1 will join to either zero or N2*(1-nullfrac2)/nd2 rows
2437                  * of rel2, so total join rows are at most
2438                  * N1*(1-nullfrac1)*N2*(1-nullfrac2)/nd2 giving a join selectivity of
2439                  * not more than (1-nullfrac1)*(1-nullfrac2)/nd2. By the same logic it
2440                  * is not more than (1-nullfrac1)*(1-nullfrac2)/nd1, so the expression
2441                  * with MIN() is an upper bound.  Using the MIN() means we estimate
2442                  * from the point of view of the relation with smaller nd (since the
2443                  * larger nd is determining the MIN).  It is reasonable to assume that
2444                  * most tuples in this rel will have join partners, so the bound is
2445                  * probably reasonably tight and should be taken as-is.
2446                  *
2447                  * XXX Can we be smarter if we have an MCV list for just one side? It
2448                  * seems that if we assume equal distribution for the other side, we
2449                  * end up with the same answer anyway.
2450                  */
2451                 double          nullfrac1 = stats1 ? stats1->stanullfrac : 0.0;
2452                 double          nullfrac2 = stats2 ? stats2->stanullfrac : 0.0;
2453
2454                 selec = (1.0 - nullfrac1) * (1.0 - nullfrac2);
2455                 if (nd1 > nd2)
2456                         selec /= nd1;
2457                 else
2458                         selec /= nd2;
2459         }
2460
2461         if (have_mcvs1)
2462                 free_attstatsslot(vardata1->atttype, values1, nvalues1,
2463                                                   numbers1, nnumbers1);
2464         if (have_mcvs2)
2465                 free_attstatsslot(vardata2->atttype, values2, nvalues2,
2466                                                   numbers2, nnumbers2);
2467
2468         return selec;
2469 }
2470
2471 /*
2472  * eqjoinsel_semi --- eqjoinsel for semi join
2473  *
2474  * (Also used for anti join, which we are supposed to estimate the same way.)
2475  * Caller has ensured that vardata1 is the LHS variable.
2476  */
2477 static double
2478 eqjoinsel_semi(Oid operator,
2479                            VariableStatData *vardata1, VariableStatData *vardata2,
2480                            RelOptInfo *inner_rel)
2481 {
2482         double          selec;
2483         double          nd1;
2484         double          nd2;
2485         bool            isdefault1;
2486         bool            isdefault2;
2487         Form_pg_statistic stats1 = NULL;
2488         bool            have_mcvs1 = false;
2489         Datum      *values1 = NULL;
2490         int                     nvalues1 = 0;
2491         float4     *numbers1 = NULL;
2492         int                     nnumbers1 = 0;
2493         bool            have_mcvs2 = false;
2494         Datum      *values2 = NULL;
2495         int                     nvalues2 = 0;
2496         float4     *numbers2 = NULL;
2497         int                     nnumbers2 = 0;
2498
2499         nd1 = get_variable_numdistinct(vardata1, &isdefault1);
2500         nd2 = get_variable_numdistinct(vardata2, &isdefault2);
2501
2502         /*
2503          * We clamp nd2 to be not more than what we estimate the inner relation's
2504          * size to be.  This is intuitively somewhat reasonable since obviously
2505          * there can't be more than that many distinct values coming from the
2506          * inner rel.  The reason for the asymmetry (ie, that we don't clamp nd1
2507          * likewise) is that this is the only pathway by which restriction clauses
2508          * applied to the inner rel will affect the join result size estimate,
2509          * since set_joinrel_size_estimates will multiply SEMI/ANTI selectivity by
2510          * only the outer rel's size.  If we clamped nd1 we'd be double-counting
2511          * the selectivity of outer-rel restrictions.
2512          *
2513          * We can apply this clamping both with respect to the base relation from
2514          * which the join variable comes (if there is just one), and to the
2515          * immediate inner input relation of the current join.
2516          *
2517          * If we clamp, we can treat nd2 as being a non-default estimate; it's not
2518          * great, maybe, but it didn't come out of nowhere either.  This is most
2519          * helpful when the inner relation is empty and consequently has no stats.
2520          */
2521         if (vardata2->rel)
2522         {
2523                 if (nd2 >= vardata2->rel->rows)
2524                 {
2525                         nd2 = vardata2->rel->rows;
2526                         isdefault2 = false;
2527                 }
2528         }
2529         if (nd2 >= inner_rel->rows)
2530         {
2531                 nd2 = inner_rel->rows;
2532                 isdefault2 = false;
2533         }
2534
2535         if (HeapTupleIsValid(vardata1->statsTuple))
2536         {
2537                 stats1 = (Form_pg_statistic) GETSTRUCT(vardata1->statsTuple);
2538                 have_mcvs1 = get_attstatsslot(vardata1->statsTuple,
2539                                                                           vardata1->atttype,
2540                                                                           vardata1->atttypmod,
2541                                                                           STATISTIC_KIND_MCV,
2542                                                                           InvalidOid,
2543                                                                           NULL,
2544                                                                           &values1, &nvalues1,
2545                                                                           &numbers1, &nnumbers1);
2546         }
2547
2548         if (HeapTupleIsValid(vardata2->statsTuple))
2549         {
2550                 have_mcvs2 = get_attstatsslot(vardata2->statsTuple,
2551                                                                           vardata2->atttype,
2552                                                                           vardata2->atttypmod,
2553                                                                           STATISTIC_KIND_MCV,
2554                                                                           InvalidOid,
2555                                                                           NULL,
2556                                                                           &values2, &nvalues2,
2557                                                                           &numbers2, &nnumbers2);
2558         }
2559
2560         if (have_mcvs1 && have_mcvs2 && OidIsValid(operator))
2561         {
2562                 /*
2563                  * We have most-common-value lists for both relations.  Run through
2564                  * the lists to see which MCVs actually join to each other with the
2565                  * given operator.  This allows us to determine the exact join
2566                  * selectivity for the portion of the relations represented by the MCV
2567                  * lists.  We still have to estimate for the remaining population, but
2568                  * in a skewed distribution this gives us a big leg up in accuracy.
2569                  */
2570                 FmgrInfo        eqproc;
2571                 bool       *hasmatch1;
2572                 bool       *hasmatch2;
2573                 double          nullfrac1 = stats1->stanullfrac;
2574                 double          matchfreq1,
2575                                         uncertainfrac,
2576                                         uncertain;
2577                 int                     i,
2578                                         nmatches,
2579                                         clamped_nvalues2;
2580
2581                 /*
2582                  * The clamping above could have resulted in nd2 being less than
2583                  * nvalues2; in which case, we assume that precisely the nd2 most
2584                  * common values in the relation will appear in the join input, and so
2585                  * compare to only the first nd2 members of the MCV list.  Of course
2586                  * this is frequently wrong, but it's the best bet we can make.
2587                  */
2588                 clamped_nvalues2 = Min(nvalues2, nd2);
2589
2590                 fmgr_info(get_opcode(operator), &eqproc);
2591                 hasmatch1 = (bool *) palloc0(nvalues1 * sizeof(bool));
2592                 hasmatch2 = (bool *) palloc0(clamped_nvalues2 * sizeof(bool));
2593
2594                 /*
2595                  * Note we assume that each MCV will match at most one member of the
2596                  * other MCV list.  If the operator isn't really equality, there could
2597                  * be multiple matches --- but we don't look for them, both for speed
2598                  * and because the math wouldn't add up...
2599                  */
2600                 nmatches = 0;
2601                 for (i = 0; i < nvalues1; i++)
2602                 {
2603                         int                     j;
2604
2605                         for (j = 0; j < clamped_nvalues2; j++)
2606                         {
2607                                 if (hasmatch2[j])
2608                                         continue;
2609                                 if (DatumGetBool(FunctionCall2Coll(&eqproc,
2610                                                                                                    DEFAULT_COLLATION_OID,
2611                                                                                                    values1[i],
2612                                                                                                    values2[j])))
2613                                 {
2614                                         hasmatch1[i] = hasmatch2[j] = true;
2615                                         nmatches++;
2616                                         break;
2617                                 }
2618                         }
2619                 }
2620                 /* Sum up frequencies of matched MCVs */
2621                 matchfreq1 = 0.0;
2622                 for (i = 0; i < nvalues1; i++)
2623                 {
2624                         if (hasmatch1[i])
2625                                 matchfreq1 += numbers1[i];
2626                 }
2627                 CLAMP_PROBABILITY(matchfreq1);
2628                 pfree(hasmatch1);
2629                 pfree(hasmatch2);
2630
2631                 /*
2632                  * Now we need to estimate the fraction of relation 1 that has at
2633                  * least one join partner.  We know for certain that the matched MCVs
2634                  * do, so that gives us a lower bound, but we're really in the dark
2635                  * about everything else.  Our crude approach is: if nd1 <= nd2 then
2636                  * assume all non-null rel1 rows have join partners, else assume for
2637                  * the uncertain rows that a fraction nd2/nd1 have join partners. We
2638                  * can discount the known-matched MCVs from the distinct-values counts
2639                  * before doing the division.
2640                  *
2641                  * Crude as the above is, it's completely useless if we don't have
2642                  * reliable ndistinct values for both sides.  Hence, if either nd1 or
2643                  * nd2 is default, punt and assume half of the uncertain rows have
2644                  * join partners.
2645                  */
2646                 if (!isdefault1 && !isdefault2)
2647                 {
2648                         nd1 -= nmatches;
2649                         nd2 -= nmatches;
2650                         if (nd1 <= nd2 || nd2 < 0)
2651                                 uncertainfrac = 1.0;
2652                         else
2653                                 uncertainfrac = nd2 / nd1;
2654                 }
2655                 else
2656                         uncertainfrac = 0.5;
2657                 uncertain = 1.0 - matchfreq1 - nullfrac1;
2658                 CLAMP_PROBABILITY(uncertain);
2659                 selec = matchfreq1 + uncertainfrac * uncertain;
2660         }
2661         else
2662         {
2663                 /*
2664                  * Without MCV lists for both sides, we can only use the heuristic
2665                  * about nd1 vs nd2.
2666                  */
2667                 double          nullfrac1 = stats1 ? stats1->stanullfrac : 0.0;
2668
2669                 if (!isdefault1 && !isdefault2)
2670                 {
2671                         if (nd1 <= nd2 || nd2 < 0)
2672                                 selec = 1.0 - nullfrac1;
2673                         else
2674                                 selec = (nd2 / nd1) * (1.0 - nullfrac1);
2675                 }
2676                 else
2677                         selec = 0.5 * (1.0 - nullfrac1);
2678         }
2679
2680         if (have_mcvs1)
2681                 free_attstatsslot(vardata1->atttype, values1, nvalues1,
2682                                                   numbers1, nnumbers1);
2683         if (have_mcvs2)
2684                 free_attstatsslot(vardata2->atttype, values2, nvalues2,
2685                                                   numbers2, nnumbers2);
2686
2687         return selec;
2688 }
2689
2690 /*
2691  *              neqjoinsel              - Join selectivity of "!="
2692  */
2693 Datum
2694 neqjoinsel(PG_FUNCTION_ARGS)
2695 {
2696         PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
2697         Oid                     operator = PG_GETARG_OID(1);
2698         List       *args = (List *) PG_GETARG_POINTER(2);
2699         JoinType        jointype = (JoinType) PG_GETARG_INT16(3);
2700         SpecialJoinInfo *sjinfo = (SpecialJoinInfo *) PG_GETARG_POINTER(4);
2701         Oid                     eqop;
2702         float8          result;
2703
2704         /*
2705          * We want 1 - eqjoinsel() where the equality operator is the one
2706          * associated with this != operator, that is, its negator.
2707          */
2708         eqop = get_negator(operator);
2709         if (eqop)
2710         {
2711                 result = DatumGetFloat8(DirectFunctionCall5(eqjoinsel,
2712                                                                                                         PointerGetDatum(root),
2713                                                                                                         ObjectIdGetDatum(eqop),
2714                                                                                                         PointerGetDatum(args),
2715                                                                                                         Int16GetDatum(jointype),
2716                                                                                                         PointerGetDatum(sjinfo)));
2717         }
2718         else
2719         {
2720                 /* Use default selectivity (should we raise an error instead?) */
2721                 result = DEFAULT_EQ_SEL;
2722         }
2723         result = 1.0 - result;
2724         PG_RETURN_FLOAT8(result);
2725 }
2726
2727 /*
2728  *              scalarltjoinsel - Join selectivity of "<" and "<=" for scalars
2729  */
2730 Datum
2731 scalarltjoinsel(PG_FUNCTION_ARGS)
2732 {
2733         PG_RETURN_FLOAT8(DEFAULT_INEQ_SEL);
2734 }
2735
2736 /*
2737  *              scalargtjoinsel - Join selectivity of ">" and ">=" for scalars
2738  */
2739 Datum
2740 scalargtjoinsel(PG_FUNCTION_ARGS)
2741 {
2742         PG_RETURN_FLOAT8(DEFAULT_INEQ_SEL);
2743 }
2744
2745 /*
2746  * patternjoinsel               - Generic code for pattern-match join selectivity.
2747  */
2748 static double
2749 patternjoinsel(PG_FUNCTION_ARGS, Pattern_Type ptype, bool negate)
2750 {
2751         /* For the moment we just punt. */
2752         return negate ? (1.0 - DEFAULT_MATCH_SEL) : DEFAULT_MATCH_SEL;
2753 }
2754
2755 /*
2756  *              regexeqjoinsel  - Join selectivity of regular-expression pattern match.
2757  */
2758 Datum
2759 regexeqjoinsel(PG_FUNCTION_ARGS)
2760 {
2761         PG_RETURN_FLOAT8(patternjoinsel(fcinfo, Pattern_Type_Regex, false));
2762 }
2763
2764 /*
2765  *              icregexeqjoinsel        - Join selectivity of case-insensitive regex match.
2766  */
2767 Datum
2768 icregexeqjoinsel(PG_FUNCTION_ARGS)
2769 {
2770         PG_RETURN_FLOAT8(patternjoinsel(fcinfo, Pattern_Type_Regex_IC, false));
2771 }
2772
2773 /*
2774  *              likejoinsel                     - Join selectivity of LIKE pattern match.
2775  */
2776 Datum
2777 likejoinsel(PG_FUNCTION_ARGS)
2778 {
2779         PG_RETURN_FLOAT8(patternjoinsel(fcinfo, Pattern_Type_Like, false));
2780 }
2781
2782 /*
2783  *              iclikejoinsel                   - Join selectivity of ILIKE pattern match.
2784  */
2785 Datum
2786 iclikejoinsel(PG_FUNCTION_ARGS)
2787 {
2788         PG_RETURN_FLOAT8(patternjoinsel(fcinfo, Pattern_Type_Like_IC, false));
2789 }
2790
2791 /*
2792  *              regexnejoinsel  - Join selectivity of regex non-match.
2793  */
2794 Datum
2795 regexnejoinsel(PG_FUNCTION_ARGS)
2796 {
2797         PG_RETURN_FLOAT8(patternjoinsel(fcinfo, Pattern_Type_Regex, true));
2798 }
2799
2800 /*
2801  *              icregexnejoinsel        - Join selectivity of case-insensitive regex non-match.
2802  */
2803 Datum
2804 icregexnejoinsel(PG_FUNCTION_ARGS)
2805 {
2806         PG_RETURN_FLOAT8(patternjoinsel(fcinfo, Pattern_Type_Regex_IC, true));
2807 }
2808
2809 /*
2810  *              nlikejoinsel            - Join selectivity of LIKE pattern non-match.
2811  */
2812 Datum
2813 nlikejoinsel(PG_FUNCTION_ARGS)
2814 {
2815         PG_RETURN_FLOAT8(patternjoinsel(fcinfo, Pattern_Type_Like, true));
2816 }
2817
2818 /*
2819  *              icnlikejoinsel          - Join selectivity of ILIKE pattern non-match.
2820  */
2821 Datum
2822 icnlikejoinsel(PG_FUNCTION_ARGS)
2823 {
2824         PG_RETURN_FLOAT8(patternjoinsel(fcinfo, Pattern_Type_Like_IC, true));
2825 }
2826
2827 /*
2828  * mergejoinscansel                     - Scan selectivity of merge join.
2829  *
2830  * A merge join will stop as soon as it exhausts either input stream.
2831  * Therefore, if we can estimate the ranges of both input variables,
2832  * we can estimate how much of the input will actually be read.  This
2833  * can have a considerable impact on the cost when using indexscans.
2834  *
2835  * Also, we can estimate how much of each input has to be read before the
2836  * first join pair is found, which will affect the join's startup time.
2837  *
2838  * clause should be a clause already known to be mergejoinable.  opfamily,
2839  * strategy, and nulls_first specify the sort ordering being used.
2840  *
2841  * The outputs are:
2842  *              *leftstart is set to the fraction of the left-hand variable expected
2843  *               to be scanned before the first join pair is found (0 to 1).
2844  *              *leftend is set to the fraction of the left-hand variable expected
2845  *               to be scanned before the join terminates (0 to 1).
2846  *              *rightstart, *rightend similarly for the right-hand variable.
2847  */
2848 void
2849 mergejoinscansel(PlannerInfo *root, Node *clause,
2850                                  Oid opfamily, int strategy, bool nulls_first,
2851                                  Selectivity *leftstart, Selectivity *leftend,
2852                                  Selectivity *rightstart, Selectivity *rightend)
2853 {
2854         Node       *left,
2855                            *right;
2856         VariableStatData leftvar,
2857                                 rightvar;
2858         int                     op_strategy;
2859         Oid                     op_lefttype;
2860         Oid                     op_righttype;
2861         Oid                     opno,
2862                                 lsortop,
2863                                 rsortop,
2864                                 lstatop,
2865                                 rstatop,
2866                                 ltop,
2867                                 leop,
2868                                 revltop,
2869                                 revleop;
2870         bool            isgt;
2871         Datum           leftmin,
2872                                 leftmax,
2873                                 rightmin,
2874                                 rightmax;
2875         double          selec;
2876
2877         /* Set default results if we can't figure anything out. */
2878         /* XXX should default "start" fraction be a bit more than 0? */
2879         *leftstart = *rightstart = 0.0;
2880         *leftend = *rightend = 1.0;
2881
2882         /* Deconstruct the merge clause */
2883         if (!is_opclause(clause))
2884                 return;                                 /* shouldn't happen */
2885         opno = ((OpExpr *) clause)->opno;
2886         left = get_leftop((Expr *) clause);
2887         right = get_rightop((Expr *) clause);
2888         if (!right)
2889                 return;                                 /* shouldn't happen */
2890
2891         /* Look for stats for the inputs */
2892         examine_variable(root, left, 0, &leftvar);
2893         examine_variable(root, right, 0, &rightvar);
2894
2895         /* Extract the operator's declared left/right datatypes */
2896         get_op_opfamily_properties(opno, opfamily, false,
2897                                                            &op_strategy,
2898                                                            &op_lefttype,
2899                                                            &op_righttype);
2900         Assert(op_strategy == BTEqualStrategyNumber);
2901
2902         /*
2903          * Look up the various operators we need.  If we don't find them all, it
2904          * probably means the opfamily is broken, but we just fail silently.
2905          *
2906          * Note: we expect that pg_statistic histograms will be sorted by the '<'
2907          * operator, regardless of which sort direction we are considering.
2908          */
2909         switch (strategy)
2910         {
2911                 case BTLessStrategyNumber:
2912                         isgt = false;
2913                         if (op_lefttype == op_righttype)
2914                         {
2915                                 /* easy case */
2916                                 ltop = get_opfamily_member(opfamily,
2917                                                                                    op_lefttype, op_righttype,
2918                                                                                    BTLessStrategyNumber);
2919                                 leop = get_opfamily_member(opfamily,
2920                                                                                    op_lefttype, op_righttype,
2921                                                                                    BTLessEqualStrategyNumber);
2922                                 lsortop = ltop;
2923                                 rsortop = ltop;
2924                                 lstatop = lsortop;
2925                                 rstatop = rsortop;
2926                                 revltop = ltop;
2927                                 revleop = leop;
2928                         }
2929                         else
2930                         {
2931                                 ltop = get_opfamily_member(opfamily,
2932                                                                                    op_lefttype, op_righttype,
2933                                                                                    BTLessStrategyNumber);
2934                                 leop = get_opfamily_member(opfamily,
2935                                                                                    op_lefttype, op_righttype,
2936                                                                                    BTLessEqualStrategyNumber);
2937                                 lsortop = get_opfamily_member(opfamily,
2938                                                                                           op_lefttype, op_lefttype,
2939                                                                                           BTLessStrategyNumber);
2940                                 rsortop = get_opfamily_member(opfamily,
2941                                                                                           op_righttype, op_righttype,
2942                                                                                           BTLessStrategyNumber);
2943                                 lstatop = lsortop;
2944                                 rstatop = rsortop;
2945                                 revltop = get_opfamily_member(opfamily,
2946                                                                                           op_righttype, op_lefttype,
2947                                                                                           BTLessStrategyNumber);
2948                                 revleop = get_opfamily_member(opfamily,
2949                                                                                           op_righttype, op_lefttype,
2950                                                                                           BTLessEqualStrategyNumber);
2951                         }
2952                         break;
2953                 case BTGreaterStrategyNumber:
2954                         /* descending-order case */
2955                         isgt = true;
2956                         if (op_lefttype == op_righttype)
2957                         {
2958                                 /* easy case */
2959                                 ltop = get_opfamily_member(opfamily,
2960                                                                                    op_lefttype, op_righttype,
2961                                                                                    BTGreaterStrategyNumber);
2962                                 leop = get_opfamily_member(opfamily,
2963                                                                                    op_lefttype, op_righttype,
2964                                                                                    BTGreaterEqualStrategyNumber);
2965                                 lsortop = ltop;
2966                                 rsortop = ltop;
2967                                 lstatop = get_opfamily_member(opfamily,
2968                                                                                           op_lefttype, op_lefttype,
2969                                                                                           BTLessStrategyNumber);
2970                                 rstatop = lstatop;
2971                                 revltop = ltop;
2972                                 revleop = leop;
2973                         }
2974                         else
2975                         {
2976                                 ltop = get_opfamily_member(opfamily,
2977                                                                                    op_lefttype, op_righttype,
2978                                                                                    BTGreaterStrategyNumber);
2979                                 leop = get_opfamily_member(opfamily,
2980                                                                                    op_lefttype, op_righttype,
2981                                                                                    BTGreaterEqualStrategyNumber);
2982                                 lsortop = get_opfamily_member(opfamily,
2983                                                                                           op_lefttype, op_lefttype,
2984                                                                                           BTGreaterStrategyNumber);
2985                                 rsortop = get_opfamily_member(opfamily,
2986                                                                                           op_righttype, op_righttype,
2987                                                                                           BTGreaterStrategyNumber);
2988                                 lstatop = get_opfamily_member(opfamily,
2989                                                                                           op_lefttype, op_lefttype,
2990                                                                                           BTLessStrategyNumber);
2991                                 rstatop = get_opfamily_member(opfamily,
2992                                                                                           op_righttype, op_righttype,
2993                                                                                           BTLessStrategyNumber);
2994                                 revltop = get_opfamily_member(opfamily,
2995                                                                                           op_righttype, op_lefttype,
2996                                                                                           BTGreaterStrategyNumber);
2997                                 revleop = get_opfamily_member(opfamily,
2998                                                                                           op_righttype, op_lefttype,
2999                                                                                           BTGreaterEqualStrategyNumber);
3000                         }
3001                         break;
3002                 default:
3003                         goto fail;                      /* shouldn't get here */
3004         }
3005
3006         if (!OidIsValid(lsortop) ||
3007                 !OidIsValid(rsortop) ||
3008                 !OidIsValid(lstatop) ||
3009                 !OidIsValid(rstatop) ||
3010                 !OidIsValid(ltop) ||
3011                 !OidIsValid(leop) ||
3012                 !OidIsValid(revltop) ||
3013                 !OidIsValid(revleop))
3014                 goto fail;                              /* insufficient info in catalogs */
3015
3016         /* Try to get ranges of both inputs */
3017         if (!isgt)
3018         {
3019                 if (!get_variable_range(root, &leftvar, lstatop,
3020                                                                 &leftmin, &leftmax))
3021                         goto fail;                      /* no range available from stats */
3022                 if (!get_variable_range(root, &rightvar, rstatop,
3023                                                                 &rightmin, &rightmax))
3024                         goto fail;                      /* no range available from stats */
3025         }
3026         else
3027         {
3028                 /* need to swap the max and min */
3029                 if (!get_variable_range(root, &leftvar, lstatop,
3030                                                                 &leftmax, &leftmin))
3031                         goto fail;                      /* no range available from stats */
3032                 if (!get_variable_range(root, &rightvar, rstatop,
3033                                                                 &rightmax, &rightmin))
3034                         goto fail;                      /* no range available from stats */
3035         }
3036
3037         /*
3038          * Now, the fraction of the left variable that will be scanned is the
3039          * fraction that's <= the right-side maximum value.  But only believe
3040          * non-default estimates, else stick with our 1.0.
3041          */
3042         selec = scalarineqsel(root, leop, isgt, &leftvar,
3043                                                   rightmax, op_righttype);
3044         if (selec != DEFAULT_INEQ_SEL)
3045                 *leftend = selec;
3046
3047         /* And similarly for the right variable. */
3048         selec = scalarineqsel(root, revleop, isgt, &rightvar,
3049                                                   leftmax, op_lefttype);
3050         if (selec != DEFAULT_INEQ_SEL)
3051                 *rightend = selec;
3052
3053         /*
3054          * Only one of the two "end" fractions can really be less than 1.0;
3055          * believe the smaller estimate and reset the other one to exactly 1.0. If
3056          * we get exactly equal estimates (as can easily happen with self-joins),
3057          * believe neither.
3058          */
3059         if (*leftend > *rightend)
3060                 *leftend = 1.0;
3061         else if (*leftend < *rightend)
3062                 *rightend = 1.0;
3063         else
3064                 *leftend = *rightend = 1.0;
3065
3066         /*
3067          * Also, the fraction of the left variable that will be scanned before the
3068          * first join pair is found is the fraction that's < the right-side
3069          * minimum value.  But only believe non-default estimates, else stick with
3070          * our own default.
3071          */
3072         selec = scalarineqsel(root, ltop, isgt, &leftvar,
3073                                                   rightmin, op_righttype);
3074         if (selec != DEFAULT_INEQ_SEL)
3075                 *leftstart = selec;
3076
3077         /* And similarly for the right variable. */
3078         selec = scalarineqsel(root, revltop, isgt, &rightvar,
3079                                                   leftmin, op_lefttype);
3080         if (selec != DEFAULT_INEQ_SEL)
3081                 *rightstart = selec;
3082
3083         /*
3084          * Only one of the two "start" fractions can really be more than zero;
3085          * believe the larger estimate and reset the other one to exactly 0.0. If
3086          * we get exactly equal estimates (as can easily happen with self-joins),
3087          * believe neither.
3088          */
3089         if (*leftstart < *rightstart)
3090                 *leftstart = 0.0;
3091         else if (*leftstart > *rightstart)
3092                 *rightstart = 0.0;
3093         else
3094                 *leftstart = *rightstart = 0.0;
3095
3096         /*
3097          * If the sort order is nulls-first, we're going to have to skip over any
3098          * nulls too.  These would not have been counted by scalarineqsel, and we
3099          * can safely add in this fraction regardless of whether we believe
3100          * scalarineqsel's results or not.  But be sure to clamp the sum to 1.0!
3101          */
3102         if (nulls_first)
3103         {
3104                 Form_pg_statistic stats;
3105
3106                 if (HeapTupleIsValid(leftvar.statsTuple))
3107                 {
3108                         stats = (Form_pg_statistic) GETSTRUCT(leftvar.statsTuple);
3109                         *leftstart += stats->stanullfrac;
3110                         CLAMP_PROBABILITY(*leftstart);
3111                         *leftend += stats->stanullfrac;
3112                         CLAMP_PROBABILITY(*leftend);
3113                 }
3114                 if (HeapTupleIsValid(rightvar.statsTuple))
3115                 {
3116                         stats = (Form_pg_statistic) GETSTRUCT(rightvar.statsTuple);
3117                         *rightstart += stats->stanullfrac;
3118                         CLAMP_PROBABILITY(*rightstart);
3119                         *rightend += stats->stanullfrac;
3120                         CLAMP_PROBABILITY(*rightend);
3121                 }
3122         }
3123
3124         /* Disbelieve start >= end, just in case that can happen */
3125         if (*leftstart >= *leftend)
3126         {
3127                 *leftstart = 0.0;
3128                 *leftend = 1.0;
3129         }
3130         if (*rightstart >= *rightend)
3131         {
3132                 *rightstart = 0.0;
3133                 *rightend = 1.0;
3134         }
3135
3136 fail:
3137         ReleaseVariableStats(leftvar);
3138         ReleaseVariableStats(rightvar);
3139 }
3140
3141
3142 /*
3143  * Helper routine for estimate_num_groups: add an item to a list of
3144  * GroupVarInfos, but only if it's not known equal to any of the existing
3145  * entries.
3146  */
3147 typedef struct
3148 {
3149         Node       *var;                        /* might be an expression, not just a Var */
3150         RelOptInfo *rel;                        /* relation it belongs to */
3151         double          ndistinct;              /* # distinct values */
3152 } GroupVarInfo;
3153
3154 static List *
3155 add_unique_group_var(PlannerInfo *root, List *varinfos,
3156                                          Node *var, VariableStatData *vardata)
3157 {
3158         GroupVarInfo *varinfo;
3159         double          ndistinct;
3160         bool            isdefault;
3161         ListCell   *lc;
3162
3163         ndistinct = get_variable_numdistinct(vardata, &isdefault);
3164
3165         /* cannot use foreach here because of possible list_delete */
3166         lc = list_head(varinfos);
3167         while (lc)
3168         {
3169                 varinfo = (GroupVarInfo *) lfirst(lc);
3170
3171                 /* must advance lc before list_delete possibly pfree's it */
3172                 lc = lnext(lc);
3173
3174                 /* Drop exact duplicates */
3175                 if (equal(var, varinfo->var))
3176                         return varinfos;
3177
3178                 /*
3179                  * Drop known-equal vars, but only if they belong to different
3180                  * relations (see comments for estimate_num_groups)
3181                  */
3182                 if (vardata->rel != varinfo->rel &&
3183                         exprs_known_equal(root, var, varinfo->var))
3184                 {
3185                         if (varinfo->ndistinct <= ndistinct)
3186                         {
3187                                 /* Keep older item, forget new one */
3188                                 return varinfos;
3189                         }
3190                         else
3191                         {
3192                                 /* Delete the older item */
3193                                 varinfos = list_delete_ptr(varinfos, varinfo);
3194                         }
3195                 }
3196         }
3197
3198         varinfo = (GroupVarInfo *) palloc(sizeof(GroupVarInfo));
3199
3200         varinfo->var = var;
3201         varinfo->rel = vardata->rel;
3202         varinfo->ndistinct = ndistinct;
3203         varinfos = lappend(varinfos, varinfo);
3204         return varinfos;
3205 }
3206
3207 /*
3208  * estimate_num_groups          - Estimate number of groups in a grouped query
3209  *
3210  * Given a query having a GROUP BY clause, estimate how many groups there
3211  * will be --- ie, the number of distinct combinations of the GROUP BY
3212  * expressions.
3213  *
3214  * This routine is also used to estimate the number of rows emitted by
3215  * a DISTINCT filtering step; that is an isomorphic problem.  (Note:
3216  * actually, we only use it for DISTINCT when there's no grouping or
3217  * aggregation ahead of the DISTINCT.)
3218  *
3219  * Inputs:
3220  *      root - the query
3221  *      groupExprs - list of expressions being grouped by
3222  *      input_rows - number of rows estimated to arrive at the group/unique
3223  *              filter step
3224  *      pgset - NULL, or a List** pointing to a grouping set to filter the
3225  *              groupExprs against
3226  *
3227  * Given the lack of any cross-correlation statistics in the system, it's
3228  * impossible to do anything really trustworthy with GROUP BY conditions
3229  * involving multiple Vars.  We should however avoid assuming the worst
3230  * case (all possible cross-product terms actually appear as groups) since
3231  * very often the grouped-by Vars are highly correlated.  Our current approach
3232  * is as follows:
3233  *      1.  Expressions yielding boolean are assumed to contribute two groups,
3234  *              independently of their content, and are ignored in the subsequent
3235  *              steps.  This is mainly because tests like "col IS NULL" break the
3236  *              heuristic used in step 2 especially badly.
3237  *      2.  Reduce the given expressions to a list of unique Vars used.  For
3238  *              example, GROUP BY a, a + b is treated the same as GROUP BY a, b.
3239  *              It is clearly correct not to count the same Var more than once.
3240  *              It is also reasonable to treat f(x) the same as x: f() cannot
3241  *              increase the number of distinct values (unless it is volatile,
3242  *              which we consider unlikely for grouping), but it probably won't
3243  *              reduce the number of distinct values much either.
3244  *              As a special case, if a GROUP BY expression can be matched to an
3245  *              expressional index for which we have statistics, then we treat the
3246  *              whole expression as though it were just a Var.
3247  *      3.  If the list contains Vars of different relations that are known equal
3248  *              due to equivalence classes, then drop all but one of the Vars from each
3249  *              known-equal set, keeping the one with smallest estimated # of values
3250  *              (since the extra values of the others can't appear in joined rows).
3251  *              Note the reason we only consider Vars of different relations is that
3252  *              if we considered ones of the same rel, we'd be double-counting the
3253  *              restriction selectivity of the equality in the next step.
3254  *      4.  For Vars within a single source rel, we multiply together the numbers
3255  *              of values, clamp to the number of rows in the rel (divided by 10 if
3256  *              more than one Var), and then multiply by a factor based on the
3257  *              selectivity of the restriction clauses for that rel.  When there's
3258  *              more than one Var, the initial product is probably too high (it's the
3259  *              worst case) but clamping to a fraction of the rel's rows seems to be a
3260  *              helpful heuristic for not letting the estimate get out of hand.  (The
3261  *              factor of 10 is derived from pre-Postgres-7.4 practice.)  The factor
3262  *              we multiply by to adjust for the restriction selectivity assumes that
3263  *              the restriction clauses are independent of the grouping, which may not
3264  *              be a valid assumption, but it's hard to do better.
3265  *      5.  If there are Vars from multiple rels, we repeat step 4 for each such
3266  *              rel, and multiply the results together.
3267  * Note that rels not containing grouped Vars are ignored completely, as are
3268  * join clauses.  Such rels cannot increase the number of groups, and we
3269  * assume such clauses do not reduce the number either (somewhat bogus,
3270  * but we don't have the info to do better).
3271  */
3272 double
3273 estimate_num_groups(PlannerInfo *root, List *groupExprs, double input_rows,
3274                                         List **pgset)
3275 {
3276         List       *varinfos = NIL;
3277         double          numdistinct;
3278         ListCell   *l;
3279         int                     i;
3280
3281         /*
3282          * We don't ever want to return an estimate of zero groups, as that tends
3283          * to lead to division-by-zero and other unpleasantness.  The input_rows
3284          * estimate is usually already at least 1, but clamp it just in case it
3285          * isn't.
3286          */
3287         input_rows = clamp_row_est(input_rows);
3288
3289         /*
3290          * If no grouping columns, there's exactly one group.  (This can't happen
3291          * for normal cases with GROUP BY or DISTINCT, but it is possible for
3292          * corner cases with set operations.)
3293          */
3294         if (groupExprs == NIL || (pgset && list_length(*pgset) < 1))
3295                 return 1.0;
3296
3297         /*
3298          * Count groups derived from boolean grouping expressions.  For other
3299          * expressions, find the unique Vars used, treating an expression as a Var
3300          * if we can find stats for it.  For each one, record the statistical
3301          * estimate of number of distinct values (total in its table, without
3302          * regard for filtering).
3303          */
3304         numdistinct = 1.0;
3305
3306         i = 0;
3307         foreach(l, groupExprs)
3308         {
3309                 Node       *groupexpr = (Node *) lfirst(l);
3310                 VariableStatData vardata;
3311                 List       *varshere;
3312                 ListCell   *l2;
3313
3314                 /* is expression in this grouping set? */
3315                 if (pgset && !list_member_int(*pgset, i++))
3316                         continue;
3317
3318                 /* Short-circuit for expressions returning boolean */
3319                 if (exprType(groupexpr) == BOOLOID)
3320                 {
3321                         numdistinct *= 2.0;
3322                         continue;
3323                 }
3324
3325                 /*
3326                  * If examine_variable is able to deduce anything about the GROUP BY
3327                  * expression, treat it as a single variable even if it's really more
3328                  * complicated.
3329                  */
3330                 examine_variable(root, groupexpr, 0, &vardata);
3331                 if (HeapTupleIsValid(vardata.statsTuple) || vardata.isunique)
3332                 {
3333                         varinfos = add_unique_group_var(root, varinfos,
3334                                                                                         groupexpr, &vardata);
3335                         ReleaseVariableStats(vardata);
3336                         continue;
3337                 }
3338                 ReleaseVariableStats(vardata);
3339
3340                 /*
3341                  * Else pull out the component Vars.  Handle PlaceHolderVars by
3342                  * recursing into their arguments (effectively assuming that the
3343                  * PlaceHolderVar doesn't change the number of groups, which boils
3344                  * down to ignoring the possible addition of nulls to the result set).
3345                  */
3346                 varshere = pull_var_clause(groupexpr,
3347                                                                    PVC_RECURSE_AGGREGATES |
3348                                                                    PVC_RECURSE_WINDOWFUNCS |
3349                                                                    PVC_RECURSE_PLACEHOLDERS);
3350
3351                 /*
3352                  * If we find any variable-free GROUP BY item, then either it is a
3353                  * constant (and we can ignore it) or it contains a volatile function;
3354                  * in the latter case we punt and assume that each input row will
3355                  * yield a distinct group.
3356                  */
3357                 if (varshere == NIL)
3358                 {
3359                         if (contain_volatile_functions(groupexpr))
3360                                 return input_rows;
3361                         continue;
3362                 }
3363
3364                 /*
3365                  * Else add variables to varinfos list
3366                  */
3367                 foreach(l2, varshere)
3368                 {
3369                         Node       *var = (Node *) lfirst(l2);
3370
3371                         examine_variable(root, var, 0, &vardata);
3372                         varinfos = add_unique_group_var(root, varinfos, var, &vardata);
3373                         ReleaseVariableStats(vardata);
3374                 }
3375         }
3376
3377         /*
3378          * If now no Vars, we must have an all-constant or all-boolean GROUP BY
3379          * list.
3380          */
3381         if (varinfos == NIL)
3382         {
3383                 /* Guard against out-of-range answers */
3384                 if (numdistinct > input_rows)
3385                         numdistinct = input_rows;
3386                 return numdistinct;
3387         }
3388
3389         /*
3390          * Group Vars by relation and estimate total numdistinct.
3391          *
3392          * For each iteration of the outer loop, we process the frontmost Var in
3393          * varinfos, plus all other Vars in the same relation.  We remove these
3394          * Vars from the newvarinfos list for the next iteration. This is the
3395          * easiest way to group Vars of same rel together.
3396          */
3397         do
3398         {
3399                 GroupVarInfo *varinfo1 = (GroupVarInfo *) linitial(varinfos);
3400                 RelOptInfo *rel = varinfo1->rel;
3401                 double          reldistinct = varinfo1->ndistinct;
3402                 double          relmaxndistinct = reldistinct;
3403                 int                     relvarcount = 1;
3404                 List       *newvarinfos = NIL;
3405
3406                 /*
3407                  * Get the product of numdistinct estimates of the Vars for this rel.
3408                  * Also, construct new varinfos list of remaining Vars.
3409                  */
3410                 for_each_cell(l, lnext(list_head(varinfos)))
3411                 {
3412                         GroupVarInfo *varinfo2 = (GroupVarInfo *) lfirst(l);
3413
3414                         if (varinfo2->rel == varinfo1->rel)
3415                         {
3416                                 reldistinct *= varinfo2->ndistinct;
3417                                 if (relmaxndistinct < varinfo2->ndistinct)
3418                                         relmaxndistinct = varinfo2->ndistinct;
3419                                 relvarcount++;
3420                         }
3421                         else
3422                         {
3423                                 /* not time to process varinfo2 yet */
3424                                 newvarinfos = lcons(varinfo2, newvarinfos);
3425                         }
3426                 }
3427
3428                 /*
3429                  * Sanity check --- don't divide by zero if empty relation.
3430                  */
3431                 Assert(rel->reloptkind == RELOPT_BASEREL);
3432                 if (rel->tuples > 0)
3433                 {
3434                         /*
3435                          * Clamp to size of rel, or size of rel / 10 if multiple Vars. The
3436                          * fudge factor is because the Vars are probably correlated but we
3437                          * don't know by how much.  We should never clamp to less than the
3438                          * largest ndistinct value for any of the Vars, though, since
3439                          * there will surely be at least that many groups.
3440                          */
3441                         double          clamp = rel->tuples;
3442
3443                         if (relvarcount > 1)
3444                         {
3445                                 clamp *= 0.1;
3446                                 if (clamp < relmaxndistinct)
3447                                 {
3448                                         clamp = relmaxndistinct;
3449                                         /* for sanity in case some ndistinct is too large: */
3450                                         if (clamp > rel->tuples)
3451                                                 clamp = rel->tuples;
3452                                 }
3453                         }
3454                         if (reldistinct > clamp)
3455                                 reldistinct = clamp;
3456
3457                         /*
3458                          * Update the estimate based on the restriction selectivity,
3459                          * guarding against division by zero when reldistinct is zero.
3460                          * Also skip this if we know that we are returning all rows.
3461                          */
3462                         if (reldistinct > 0 && rel->rows < rel->tuples)
3463                         {
3464                                 /*
3465                                  * Given a table containing N rows with n distinct values in a
3466                                  * uniform distribution, if we select p rows at random then
3467                                  * the expected number of distinct values selected is
3468                                  *
3469                                  * n * (1 - product((N-N/n-i)/(N-i), i=0..p-1))
3470                                  *
3471                                  * = n * (1 - (N-N/n)! / (N-N/n-p)! * (N-p)! / N!)
3472                                  *
3473                                  * See "Approximating block accesses in database
3474                                  * organizations", S. B. Yao, Communications of the ACM,
3475                                  * Volume 20 Issue 4, April 1977 Pages 260-261.
3476                                  *
3477                                  * Alternatively, re-arranging the terms from the factorials,
3478                                  * this may be written as
3479                                  *
3480                                  * n * (1 - product((N-p-i)/(N-i), i=0..N/n-1))
3481                                  *
3482                                  * This form of the formula is more efficient to compute in
3483                                  * the common case where p is larger than N/n.  Additionally,
3484                                  * as pointed out by Dell'Era, if i << N for all terms in the
3485                                  * product, it can be approximated by
3486                                  *
3487                                  * n * (1 - ((N-p)/N)^(N/n))
3488                                  *
3489                                  * See "Expected distinct values when selecting from a bag
3490                                  * without replacement", Alberto Dell'Era,
3491                                  * http://www.adellera.it/investigations/distinct_balls/.
3492                                  *
3493                                  * The condition i << N is equivalent to n >> 1, so this is a
3494                                  * good approximation when the number of distinct values in
3495                                  * the table is large.  It turns out that this formula also
3496                                  * works well even when n is small.
3497                                  */
3498                                 reldistinct *=
3499                                         (1 - pow((rel->tuples - rel->rows) / rel->tuples,
3500                                                          rel->tuples / reldistinct));
3501                         }
3502                         reldistinct = clamp_row_est(reldistinct);
3503
3504                         /*
3505                          * Update estimate of total distinct groups.
3506                          */
3507                         numdistinct *= reldistinct;
3508                 }
3509
3510                 varinfos = newvarinfos;
3511         } while (varinfos != NIL);
3512
3513         numdistinct = ceil(numdistinct);
3514
3515         /* Guard against out-of-range answers */
3516         if (numdistinct > input_rows)
3517                 numdistinct = input_rows;
3518         if (numdistinct < 1.0)
3519                 numdistinct = 1.0;
3520
3521         return numdistinct;
3522 }
3523
3524 /*
3525  * Estimate hash bucketsize fraction (ie, number of entries in a bucket
3526  * divided by total tuples in relation) if the specified expression is used
3527  * as a hash key.
3528  *
3529  * XXX This is really pretty bogus since we're effectively assuming that the
3530  * distribution of hash keys will be the same after applying restriction
3531  * clauses as it was in the underlying relation.  However, we are not nearly
3532  * smart enough to figure out how the restrict clauses might change the
3533  * distribution, so this will have to do for now.
3534  *
3535  * We are passed the number of buckets the executor will use for the given
3536  * input relation.  If the data were perfectly distributed, with the same
3537  * number of tuples going into each available bucket, then the bucketsize
3538  * fraction would be 1/nbuckets.  But this happy state of affairs will occur
3539  * only if (a) there are at least nbuckets distinct data values, and (b)
3540  * we have a not-too-skewed data distribution.  Otherwise the buckets will
3541  * be nonuniformly occupied.  If the other relation in the join has a key
3542  * distribution similar to this one's, then the most-loaded buckets are
3543  * exactly those that will be probed most often.  Therefore, the "average"
3544  * bucket size for costing purposes should really be taken as something close
3545  * to the "worst case" bucket size.  We try to estimate this by adjusting the
3546  * fraction if there are too few distinct data values, and then scaling up
3547  * by the ratio of the most common value's frequency to the average frequency.
3548  *
3549  * If no statistics are available, use a default estimate of 0.1.  This will
3550  * discourage use of a hash rather strongly if the inner relation is large,
3551  * which is what we want.  We do not want to hash unless we know that the
3552  * inner rel is well-dispersed (or the alternatives seem much worse).
3553  */
3554 Selectivity
3555 estimate_hash_bucketsize(PlannerInfo *root, Node *hashkey, double nbuckets)
3556 {
3557         VariableStatData vardata;
3558         double          estfract,
3559                                 ndistinct,
3560                                 stanullfrac,
3561                                 mcvfreq,
3562                                 avgfreq;
3563         bool            isdefault;
3564         float4     *numbers;
3565         int                     nnumbers;
3566
3567         examine_variable(root, hashkey, 0, &vardata);
3568
3569         /* Get number of distinct values */
3570         ndistinct = get_variable_numdistinct(&vardata, &isdefault);
3571
3572         /* If ndistinct isn't real, punt and return 0.1, per comments above */
3573         if (isdefault)
3574         {
3575                 ReleaseVariableStats(vardata);
3576                 return (Selectivity) 0.1;
3577         }
3578
3579         /* Get fraction that are null */
3580         if (HeapTupleIsValid(vardata.statsTuple))
3581         {
3582                 Form_pg_statistic stats;
3583
3584                 stats = (Form_pg_statistic) GETSTRUCT(vardata.statsTuple);
3585                 stanullfrac = stats->stanullfrac;
3586         }
3587         else
3588                 stanullfrac = 0.0;
3589
3590         /* Compute avg freq of all distinct data values in raw relation */
3591         avgfreq = (1.0 - stanullfrac) / ndistinct;
3592
3593         /*
3594          * Adjust ndistinct to account for restriction clauses.  Observe we are
3595          * assuming that the data distribution is affected uniformly by the
3596          * restriction clauses!
3597          *
3598          * XXX Possibly better way, but much more expensive: multiply by
3599          * selectivity of rel's restriction clauses that mention the target Var.
3600          */
3601         if (vardata.rel && vardata.rel->tuples > 0)
3602         {
3603                 ndistinct *= vardata.rel->rows / vardata.rel->tuples;
3604                 ndistinct = clamp_row_est(ndistinct);
3605         }
3606
3607         /*
3608          * Initial estimate of bucketsize fraction is 1/nbuckets as long as the
3609          * number of buckets is less than the expected number of distinct values;
3610          * otherwise it is 1/ndistinct.
3611          */
3612         if (ndistinct > nbuckets)
3613                 estfract = 1.0 / nbuckets;
3614         else
3615                 estfract = 1.0 / ndistinct;
3616
3617         /*
3618          * Look up the frequency of the most common value, if available.
3619          */
3620         mcvfreq = 0.0;
3621
3622         if (HeapTupleIsValid(vardata.statsTuple))
3623         {
3624                 if (get_attstatsslot(vardata.statsTuple,
3625                                                          vardata.atttype, vardata.atttypmod,
3626                                                          STATISTIC_KIND_MCV, InvalidOid,
3627                                                          NULL,
3628                                                          NULL, NULL,
3629                                                          &numbers, &nnumbers))
3630                 {
3631                         /*
3632                          * The first MCV stat is for the most common value.
3633                          */
3634                         if (nnumbers > 0)
3635                                 mcvfreq = numbers[0];
3636                         free_attstatsslot(vardata.atttype, NULL, 0,
3637                                                           numbers, nnumbers);
3638                 }
3639         }
3640
3641         /*
3642          * Adjust estimated bucketsize upward to account for skewed distribution.
3643          */
3644         if (avgfreq > 0.0 && mcvfreq > avgfreq)
3645                 estfract *= mcvfreq / avgfreq;
3646
3647         /*
3648          * Clamp bucketsize to sane range (the above adjustment could easily
3649          * produce an out-of-range result).  We set the lower bound a little above
3650          * zero, since zero isn't a very sane result.
3651          */
3652         if (estfract < 1.0e-6)
3653                 estfract = 1.0e-6;
3654         else if (estfract > 1.0)
3655                 estfract = 1.0;
3656
3657         ReleaseVariableStats(vardata);
3658
3659         return (Selectivity) estfract;
3660 }
3661
3662
3663 /*-------------------------------------------------------------------------
3664  *
3665  * Support routines
3666  *
3667  *-------------------------------------------------------------------------
3668  */
3669
3670 /*
3671  * convert_to_scalar
3672  *        Convert non-NULL values of the indicated types to the comparison
3673  *        scale needed by scalarineqsel().
3674  *        Returns "true" if successful.
3675  *
3676  * XXX this routine is a hack: ideally we should look up the conversion
3677  * subroutines in pg_type.
3678  *
3679  * All numeric datatypes are simply converted to their equivalent
3680  * "double" values.  (NUMERIC values that are outside the range of "double"
3681  * are clamped to +/- HUGE_VAL.)
3682  *
3683  * String datatypes are converted by convert_string_to_scalar(),
3684  * which is explained below.  The reason why this routine deals with
3685  * three values at a time, not just one, is that we need it for strings.
3686  *
3687  * The bytea datatype is just enough different from strings that it has
3688  * to be treated separately.
3689  *
3690  * The several datatypes representing absolute times are all converted
3691  * to Timestamp, which is actually a double, and then we just use that
3692  * double value.  Note this will give correct results even for the "special"
3693  * values of Timestamp, since those are chosen to compare correctly;
3694  * see timestamp_cmp.
3695  *
3696  * The several datatypes representing relative times (intervals) are all
3697  * converted to measurements expressed in seconds.
3698  */
3699 static bool
3700 convert_to_scalar(Datum value, Oid valuetypid, double *scaledvalue,
3701                                   Datum lobound, Datum hibound, Oid boundstypid,
3702                                   double *scaledlobound, double *scaledhibound)
3703 {
3704         /*
3705          * Both the valuetypid and the boundstypid should exactly match the
3706          * declared input type(s) of the operator we are invoked for, so we just
3707          * error out if either is not recognized.
3708          *
3709          * XXX The histogram we are interpolating between points of could belong
3710          * to a column that's only binary-compatible with the declared type. In
3711          * essence we are assuming that the semantics of binary-compatible types
3712          * are enough alike that we can use a histogram generated with one type's
3713          * operators to estimate selectivity for the other's.  This is outright
3714          * wrong in some cases --- in particular signed versus unsigned
3715          * interpretation could trip us up.  But it's useful enough in the
3716          * majority of cases that we do it anyway.  Should think about more
3717          * rigorous ways to do it.
3718          */
3719         switch (valuetypid)
3720         {
3721                         /*
3722                          * Built-in numeric types
3723                          */
3724                 case BOOLOID:
3725                 case INT2OID:
3726                 case INT4OID:
3727                 case INT8OID:
3728                 case FLOAT4OID:
3729                 case FLOAT8OID:
3730                 case NUMERICOID:
3731                 case OIDOID:
3732                 case REGPROCOID:
3733                 case REGPROCEDUREOID:
3734                 case REGOPEROID:
3735                 case REGOPERATOROID:
3736                 case REGCLASSOID:
3737                 case REGTYPEOID:
3738                 case REGCONFIGOID:
3739                 case REGDICTIONARYOID:
3740                 case REGROLEOID:
3741                 case REGNAMESPACEOID:
3742                         *scaledvalue = convert_numeric_to_scalar(value, valuetypid);
3743                         *scaledlobound = convert_numeric_to_scalar(lobound, boundstypid);
3744                         *scaledhibound = convert_numeric_to_scalar(hibound, boundstypid);
3745                         return true;
3746
3747                         /*
3748                          * Built-in string types
3749                          */
3750                 case CHAROID:
3751                 case BPCHAROID:
3752                 case VARCHAROID:
3753                 case TEXTOID:
3754                 case NAMEOID:
3755                         {
3756                                 char       *valstr = convert_string_datum(value, valuetypid);
3757                                 char       *lostr = convert_string_datum(lobound, boundstypid);
3758                                 char       *histr = convert_string_datum(hibound, boundstypid);
3759
3760                                 convert_string_to_scalar(valstr, scaledvalue,
3761                                                                                  lostr, scaledlobound,
3762                                                                                  histr, scaledhibound);
3763                                 pfree(valstr);
3764                                 pfree(lostr);
3765                                 pfree(histr);
3766                                 return true;
3767                         }
3768
3769                         /*
3770                          * Built-in bytea type
3771                          */
3772                 case BYTEAOID:
3773                         {
3774                                 convert_bytea_to_scalar(value, scaledvalue,
3775                                                                                 lobound, scaledlobound,
3776                                                                                 hibound, scaledhibound);
3777                                 return true;
3778                         }
3779
3780                         /*
3781                          * Built-in time types
3782                          */
3783                 case TIMESTAMPOID:
3784                 case TIMESTAMPTZOID:
3785                 case ABSTIMEOID:
3786                 case DATEOID:
3787                 case INTERVALOID:
3788                 case RELTIMEOID:
3789                 case TINTERVALOID:
3790                 case TIMEOID:
3791                 case TIMETZOID:
3792                         *scaledvalue = convert_timevalue_to_scalar(value, valuetypid);
3793                         *scaledlobound = convert_timevalue_to_scalar(lobound, boundstypid);
3794                         *scaledhibound = convert_timevalue_to_scalar(hibound, boundstypid);
3795                         return true;
3796
3797                         /*
3798                          * Built-in network types
3799                          */
3800                 case INETOID:
3801                 case CIDROID:
3802                 case MACADDROID:
3803                         *scaledvalue = convert_network_to_scalar(value, valuetypid);
3804                         *scaledlobound = convert_network_to_scalar(lobound, boundstypid);
3805                         *scaledhibound = convert_network_to_scalar(hibound, boundstypid);
3806                         return true;
3807         }
3808         /* Don't know how to convert */
3809         *scaledvalue = *scaledlobound = *scaledhibound = 0;
3810         return false;
3811 }
3812
3813 /*
3814  * Do convert_to_scalar()'s work for any numeric data type.
3815  */
3816 static double
3817 convert_numeric_to_scalar(Datum value, Oid typid)
3818 {
3819         switch (typid)
3820         {
3821                 case BOOLOID:
3822                         return (double) DatumGetBool(value);
3823                 case INT2OID:
3824                         return (double) DatumGetInt16(value);
3825                 case INT4OID:
3826                         return (double) DatumGetInt32(value);
3827                 case INT8OID:
3828                         return (double) DatumGetInt64(value);
3829                 case FLOAT4OID:
3830                         return (double) DatumGetFloat4(value);
3831                 case FLOAT8OID:
3832                         return (double) DatumGetFloat8(value);
3833                 case NUMERICOID:
3834                         /* Note: out-of-range values will be clamped to +-HUGE_VAL */
3835                         return (double)
3836                                 DatumGetFloat8(DirectFunctionCall1(numeric_float8_no_overflow,
3837                                                                                                    value));
3838                 case OIDOID:
3839                 case REGPROCOID:
3840                 case REGPROCEDUREOID:
3841                 case REGOPEROID:
3842                 case REGOPERATOROID:
3843                 case REGCLASSOID:
3844                 case REGTYPEOID:
3845                 case REGCONFIGOID:
3846                 case REGDICTIONARYOID:
3847                 case REGROLEOID:
3848                 case REGNAMESPACEOID:
3849                         /* we can treat OIDs as integers... */
3850                         return (double) DatumGetObjectId(value);
3851         }
3852
3853         /*
3854          * Can't get here unless someone tries to use scalarltsel/scalargtsel on
3855          * an operator with one numeric and one non-numeric operand.
3856          */
3857         elog(ERROR, "unsupported type: %u", typid);
3858         return 0;
3859 }
3860
3861 /*
3862  * Do convert_to_scalar()'s work for any character-string data type.
3863  *
3864  * String datatypes are converted to a scale that ranges from 0 to 1,
3865  * where we visualize the bytes of the string as fractional digits.
3866  *
3867  * We do not want the base to be 256, however, since that tends to
3868  * generate inflated selectivity estimates; few databases will have
3869  * occurrences of all 256 possible byte values at each position.
3870  * Instead, use the smallest and largest byte values seen in the bounds
3871  * as the estimated range for each byte, after some fudging to deal with
3872  * the fact that we probably aren't going to see the full range that way.
3873  *
3874  * An additional refinement is that we discard any common prefix of the
3875  * three strings before computing the scaled values.  This allows us to
3876  * "zoom in" when we encounter a narrow data range.  An example is a phone
3877  * number database where all the values begin with the same area code.
3878  * (Actually, the bounds will be adjacent histogram-bin-boundary values,
3879  * so this is more likely to happen than you might think.)
3880  */
3881 static void
3882 convert_string_to_scalar(char *value,
3883                                                  double *scaledvalue,
3884                                                  char *lobound,
3885                                                  double *scaledlobound,
3886                                                  char *hibound,
3887                                                  double *scaledhibound)
3888 {
3889         int                     rangelo,
3890                                 rangehi;
3891         char       *sptr;
3892
3893         rangelo = rangehi = (unsigned char) hibound[0];
3894         for (sptr = lobound; *sptr; sptr++)
3895         {
3896                 if (rangelo > (unsigned char) *sptr)
3897                         rangelo = (unsigned char) *sptr;
3898                 if (rangehi < (unsigned char) *sptr)
3899                         rangehi = (unsigned char) *sptr;
3900         }
3901         for (sptr = hibound; *sptr; sptr++)
3902         {
3903                 if (rangelo > (unsigned char) *sptr)
3904                         rangelo = (unsigned char) *sptr;
3905                 if (rangehi < (unsigned char) *sptr)
3906                         rangehi = (unsigned char) *sptr;
3907         }
3908         /* If range includes any upper-case ASCII chars, make it include all */
3909         if (rangelo <= 'Z' && rangehi >= 'A')
3910         {
3911                 if (rangelo > 'A')
3912                         rangelo = 'A';
3913                 if (rangehi < 'Z')
3914                         rangehi = 'Z';
3915         }
3916         /* Ditto lower-case */
3917         if (rangelo <= 'z' && rangehi >= 'a')
3918         {
3919                 if (rangelo > 'a')
3920                         rangelo = 'a';
3921                 if (rangehi < 'z')
3922                         rangehi = 'z';
3923         }
3924         /* Ditto digits */
3925         if (rangelo <= '9' && rangehi >= '0')
3926         {
3927                 if (rangelo > '0')
3928                         rangelo = '0';
3929                 if (rangehi < '9')
3930                         rangehi = '9';
3931         }
3932
3933         /*
3934          * If range includes less than 10 chars, assume we have not got enough
3935          * data, and make it include regular ASCII set.
3936          */
3937         if (rangehi - rangelo < 9)
3938         {
3939                 rangelo = ' ';
3940                 rangehi = 127;
3941         }
3942
3943         /*
3944          * Now strip any common prefix of the three strings.
3945          */
3946         while (*lobound)
3947         {
3948                 if (*lobound != *hibound || *lobound != *value)
3949                         break;
3950                 lobound++, hibound++, value++;
3951         }
3952
3953         /*
3954          * Now we can do the conversions.
3955          */
3956         *scaledvalue = convert_one_string_to_scalar(value, rangelo, rangehi);
3957         *scaledlobound = convert_one_string_to_scalar(lobound, rangelo, rangehi);
3958         *scaledhibound = convert_one_string_to_scalar(hibound, rangelo, rangehi);
3959 }
3960
3961 static double
3962 convert_one_string_to_scalar(char *value, int rangelo, int rangehi)
3963 {
3964         int                     slen = strlen(value);
3965         double          num,
3966                                 denom,
3967                                 base;
3968
3969         if (slen <= 0)
3970                 return 0.0;                             /* empty string has scalar value 0 */
3971
3972         /*
3973          * There seems little point in considering more than a dozen bytes from
3974          * the string.  Since base is at least 10, that will give us nominal
3975          * resolution of at least 12 decimal digits, which is surely far more
3976          * precision than this estimation technique has got anyway (especially in
3977          * non-C locales).  Also, even with the maximum possible base of 256, this
3978          * ensures denom cannot grow larger than 256^13 = 2.03e31, which will not
3979          * overflow on any known machine.
3980          */
3981         if (slen > 12)
3982                 slen = 12;
3983
3984         /* Convert initial characters to fraction */
3985         base = rangehi - rangelo + 1;
3986         num = 0.0;
3987         denom = base;
3988         while (slen-- > 0)
3989         {
3990                 int                     ch = (unsigned char) *value++;
3991
3992                 if (ch < rangelo)
3993                         ch = rangelo - 1;
3994                 else if (ch > rangehi)
3995                         ch = rangehi + 1;
3996                 num += ((double) (ch - rangelo)) / denom;
3997                 denom *= base;
3998         }
3999
4000         return num;
4001 }
4002
4003 /*
4004  * Convert a string-type Datum into a palloc'd, null-terminated string.
4005  *
4006  * When using a non-C locale, we must pass the string through strxfrm()
4007  * before continuing, so as to generate correct locale-specific results.
4008  */
4009 static char *
4010 convert_string_datum(Datum value, Oid typid)
4011 {
4012         char       *val;
4013
4014         switch (typid)
4015         {
4016                 case CHAROID:
4017                         val = (char *) palloc(2);
4018                         val[0] = DatumGetChar(value);
4019                         val[1] = '\0';
4020                         break;
4021                 case BPCHAROID:
4022                 case VARCHAROID:
4023                 case TEXTOID:
4024                         val = TextDatumGetCString(value);
4025                         break;
4026                 case NAMEOID:
4027                         {
4028                                 NameData   *nm = (NameData *) DatumGetPointer(value);
4029
4030                                 val = pstrdup(NameStr(*nm));
4031                                 break;
4032                         }
4033                 default:
4034
4035                         /*
4036                          * Can't get here unless someone tries to use scalarltsel on an
4037                          * operator with one string and one non-string operand.
4038                          */
4039                         elog(ERROR, "unsupported type: %u", typid);
4040                         return NULL;
4041         }
4042
4043         if (!lc_collate_is_c(DEFAULT_COLLATION_OID))
4044         {
4045                 char       *xfrmstr;
4046                 size_t          xfrmlen;
4047                 size_t xfrmlen2 PG_USED_FOR_ASSERTS_ONLY;
4048
4049                 /*
4050                  * XXX: We could guess at a suitable output buffer size and only call
4051                  * strxfrm twice if our guess is too small.
4052                  *
4053                  * XXX: strxfrm doesn't support UTF-8 encoding on Win32, it can return
4054                  * bogus data or set an error. This is not really a problem unless it
4055                  * crashes since it will only give an estimation error and nothing
4056                  * fatal.
4057                  */
4058 #if _MSC_VER == 1400                    /* VS.Net 2005 */
4059
4060                 /*
4061                  *
4062                  * http://connect.microsoft.com/VisualStudio/feedback/ViewFeedback.aspx?
4063                  * FeedbackID=99694 */
4064                 {
4065                         char            x[1];
4066
4067                         xfrmlen = strxfrm(x, val, 0);
4068                 }
4069 #else
4070                 xfrmlen = strxfrm(NULL, val, 0);
4071 #endif
4072 #ifdef WIN32
4073
4074                 /*
4075                  * On Windows, strxfrm returns INT_MAX when an error occurs. Instead
4076                  * of trying to allocate this much memory (and fail), just return the
4077                  * original string unmodified as if we were in the C locale.
4078                  */
4079                 if (xfrmlen == INT_MAX)
4080                         return val;
4081 #endif
4082                 xfrmstr = (char *) palloc(xfrmlen + 1);
4083                 xfrmlen2 = strxfrm(xfrmstr, val, xfrmlen + 1);
4084
4085                 /*
4086                  * Some systems (e.g., glibc) can return a smaller value from the
4087                  * second call than the first; thus the Assert must be <= not ==.
4088                  */
4089                 Assert(xfrmlen2 <= xfrmlen);
4090                 pfree(val);
4091                 val = xfrmstr;
4092         }
4093
4094         return val;
4095 }
4096
4097 /*
4098  * Do convert_to_scalar()'s work for any bytea data type.
4099  *
4100  * Very similar to convert_string_to_scalar except we can't assume
4101  * null-termination and therefore pass explicit lengths around.
4102  *
4103  * Also, assumptions about likely "normal" ranges of characters have been
4104  * removed - a data range of 0..255 is always used, for now.  (Perhaps
4105  * someday we will add information about actual byte data range to
4106  * pg_statistic.)
4107  */
4108 static void
4109 convert_bytea_to_scalar(Datum value,
4110                                                 double *scaledvalue,
4111                                                 Datum lobound,
4112                                                 double *scaledlobound,
4113                                                 Datum hibound,
4114                                                 double *scaledhibound)
4115 {
4116         int                     rangelo,
4117                                 rangehi,
4118                                 valuelen = VARSIZE(DatumGetPointer(value)) - VARHDRSZ,
4119                                 loboundlen = VARSIZE(DatumGetPointer(lobound)) - VARHDRSZ,
4120                                 hiboundlen = VARSIZE(DatumGetPointer(hibound)) - VARHDRSZ,
4121                                 i,
4122                                 minlen;
4123         unsigned char *valstr = (unsigned char *) VARDATA(DatumGetPointer(value)),
4124                            *lostr = (unsigned char *) VARDATA(DatumGetPointer(lobound)),
4125                            *histr = (unsigned char *) VARDATA(DatumGetPointer(hibound));
4126
4127         /*
4128          * Assume bytea data is uniformly distributed across all byte values.
4129          */
4130         rangelo = 0;
4131         rangehi = 255;
4132
4133         /*
4134          * Now strip any common prefix of the three strings.
4135          */
4136         minlen = Min(Min(valuelen, loboundlen), hiboundlen);
4137         for (i = 0; i < minlen; i++)
4138         {
4139                 if (*lostr != *histr || *lostr != *valstr)
4140                         break;
4141                 lostr++, histr++, valstr++;
4142                 loboundlen--, hiboundlen--, valuelen--;
4143         }
4144
4145         /*
4146          * Now we can do the conversions.
4147          */
4148         *scaledvalue = convert_one_bytea_to_scalar(valstr, valuelen, rangelo, rangehi);
4149         *scaledlobound = convert_one_bytea_to_scalar(lostr, loboundlen, rangelo, rangehi);
4150         *scaledhibound = convert_one_bytea_to_scalar(histr, hiboundlen, rangelo, rangehi);
4151 }
4152
4153 static double
4154 convert_one_bytea_to_scalar(unsigned char *value, int valuelen,
4155                                                         int rangelo, int rangehi)
4156 {
4157         double          num,
4158                                 denom,
4159                                 base;
4160
4161         if (valuelen <= 0)
4162                 return 0.0;                             /* empty string has scalar value 0 */
4163
4164         /*
4165          * Since base is 256, need not consider more than about 10 chars (even
4166          * this many seems like overkill)
4167          */
4168         if (valuelen > 10)
4169                 valuelen = 10;
4170
4171         /* Convert initial characters to fraction */
4172         base = rangehi - rangelo + 1;
4173         num = 0.0;
4174         denom = base;
4175         while (valuelen-- > 0)
4176         {
4177                 int                     ch = *value++;
4178
4179                 if (ch < rangelo)
4180                         ch = rangelo - 1;
4181                 else if (ch > rangehi)
4182                         ch = rangehi + 1;
4183                 num += ((double) (ch - rangelo)) / denom;
4184                 denom *= base;
4185         }
4186
4187         return num;
4188 }
4189
4190 /*
4191  * Do convert_to_scalar()'s work for any timevalue data type.
4192  */
4193 static double
4194 convert_timevalue_to_scalar(Datum value, Oid typid)
4195 {
4196         switch (typid)
4197         {
4198                 case TIMESTAMPOID:
4199                         return DatumGetTimestamp(value);
4200                 case TIMESTAMPTZOID:
4201                         return DatumGetTimestampTz(value);
4202                 case ABSTIMEOID:
4203                         return DatumGetTimestamp(DirectFunctionCall1(abstime_timestamp,
4204                                                                                                                  value));
4205                 case DATEOID:
4206                         return date2timestamp_no_overflow(DatumGetDateADT(value));
4207                 case INTERVALOID:
4208                         {
4209                                 Interval   *interval = DatumGetIntervalP(value);
4210
4211                                 /*
4212                                  * Convert the month part of Interval to days using assumed
4213                                  * average month length of 365.25/12.0 days.  Not too
4214                                  * accurate, but plenty good enough for our purposes.
4215                                  */
4216                                 return interval->time + interval->day * (double) USECS_PER_DAY +
4217                                         interval->month * ((DAYS_PER_YEAR / (double) MONTHS_PER_YEAR) * USECS_PER_DAY);
4218                         }
4219                 case RELTIMEOID:
4220                         return (DatumGetRelativeTime(value) * 1000000.0);
4221                 case TINTERVALOID:
4222                         {
4223                                 TimeInterval tinterval = DatumGetTimeInterval(value);
4224
4225                                 if (tinterval->status != 0)
4226                                         return ((tinterval->data[1] - tinterval->data[0]) * 1000000.0);
4227                                 return 0;               /* for lack of a better idea */
4228                         }
4229                 case TIMEOID:
4230                         return DatumGetTimeADT(value);
4231                 case TIMETZOID:
4232                         {
4233                                 TimeTzADT  *timetz = DatumGetTimeTzADTP(value);
4234
4235                                 /* use GMT-equivalent time */
4236                                 return (double) (timetz->time + (timetz->zone * 1000000.0));
4237                         }
4238         }
4239
4240         /*
4241          * Can't get here unless someone tries to use scalarltsel/scalargtsel on
4242          * an operator with one timevalue and one non-timevalue operand.
4243          */
4244         elog(ERROR, "unsupported type: %u", typid);
4245         return 0;
4246 }
4247
4248
4249 /*
4250  * get_restriction_variable
4251  *              Examine the args of a restriction clause to see if it's of the
4252  *              form (variable op pseudoconstant) or (pseudoconstant op variable),
4253  *              where "variable" could be either a Var or an expression in vars of a
4254  *              single relation.  If so, extract information about the variable,
4255  *              and also indicate which side it was on and the other argument.
4256  *
4257  * Inputs:
4258  *      root: the planner info
4259  *      args: clause argument list
4260  *      varRelid: see specs for restriction selectivity functions
4261  *
4262  * Outputs: (these are valid only if TRUE is returned)
4263  *      *vardata: gets information about variable (see examine_variable)
4264  *      *other: gets other clause argument, aggressively reduced to a constant
4265  *      *varonleft: set TRUE if variable is on the left, FALSE if on the right
4266  *
4267  * Returns TRUE if a variable is identified, otherwise FALSE.
4268  *
4269  * Note: if there are Vars on both sides of the clause, we must fail, because
4270  * callers are expecting that the other side will act like a pseudoconstant.
4271  */
4272 bool
4273 get_restriction_variable(PlannerInfo *root, List *args, int varRelid,
4274                                                  VariableStatData *vardata, Node **other,
4275                                                  bool *varonleft)
4276 {
4277         Node       *left,
4278                            *right;
4279         VariableStatData rdata;
4280
4281         /* Fail if not a binary opclause (probably shouldn't happen) */
4282         if (list_length(args) != 2)
4283                 return false;
4284
4285         left = (Node *) linitial(args);
4286         right = (Node *) lsecond(args);
4287
4288         /*
4289          * Examine both sides.  Note that when varRelid is nonzero, Vars of other
4290          * relations will be treated as pseudoconstants.
4291          */
4292         examine_variable(root, left, varRelid, vardata);
4293         examine_variable(root, right, varRelid, &rdata);
4294
4295         /*
4296          * If one side is a variable and the other not, we win.
4297          */
4298         if (vardata->rel && rdata.rel == NULL)
4299         {
4300                 *varonleft = true;
4301                 *other = estimate_expression_value(root, rdata.var);
4302                 /* Assume we need no ReleaseVariableStats(rdata) here */
4303                 return true;
4304         }
4305
4306         if (vardata->rel == NULL && rdata.rel)
4307         {
4308                 *varonleft = false;
4309                 *other = estimate_expression_value(root, vardata->var);
4310                 /* Assume we need no ReleaseVariableStats(*vardata) here */
4311                 *vardata = rdata;
4312                 return true;
4313         }
4314
4315         /* Oops, clause has wrong structure (probably var op var) */
4316         ReleaseVariableStats(*vardata);
4317         ReleaseVariableStats(rdata);
4318
4319         return false;
4320 }
4321
4322 /*
4323  * get_join_variables
4324  *              Apply examine_variable() to each side of a join clause.
4325  *              Also, attempt to identify whether the join clause has the same
4326  *              or reversed sense compared to the SpecialJoinInfo.
4327  *
4328  * We consider the join clause "normal" if it is "lhs_var OP rhs_var",
4329  * or "reversed" if it is "rhs_var OP lhs_var".  In complicated cases
4330  * where we can't tell for sure, we default to assuming it's normal.
4331  */
4332 void
4333 get_join_variables(PlannerInfo *root, List *args, SpecialJoinInfo *sjinfo,
4334                                    VariableStatData *vardata1, VariableStatData *vardata2,
4335                                    bool *join_is_reversed)
4336 {
4337         Node       *left,
4338                            *right;
4339
4340         if (list_length(args) != 2)
4341                 elog(ERROR, "join operator should take two arguments");
4342
4343         left = (Node *) linitial(args);
4344         right = (Node *) lsecond(args);
4345
4346         examine_variable(root, left, 0, vardata1);
4347         examine_variable(root, right, 0, vardata2);
4348
4349         if (vardata1->rel &&
4350                 bms_is_subset(vardata1->rel->relids, sjinfo->syn_righthand))
4351                 *join_is_reversed = true;               /* var1 is on RHS */
4352         else if (vardata2->rel &&
4353                          bms_is_subset(vardata2->rel->relids, sjinfo->syn_lefthand))
4354                 *join_is_reversed = true;               /* var2 is on LHS */
4355         else
4356                 *join_is_reversed = false;
4357 }
4358
4359 /*
4360  * examine_variable
4361  *              Try to look up statistical data about an expression.
4362  *              Fill in a VariableStatData struct to describe the expression.
4363  *
4364  * Inputs:
4365  *      root: the planner info
4366  *      node: the expression tree to examine
4367  *      varRelid: see specs for restriction selectivity functions
4368  *
4369  * Outputs: *vardata is filled as follows:
4370  *      var: the input expression (with any binary relabeling stripped, if
4371  *              it is or contains a variable; but otherwise the type is preserved)
4372  *      rel: RelOptInfo for relation containing variable; NULL if expression
4373  *              contains no Vars (NOTE this could point to a RelOptInfo of a
4374  *              subquery, not one in the current query).
4375  *      statsTuple: the pg_statistic entry for the variable, if one exists;
4376  *              otherwise NULL.
4377  *      freefunc: pointer to a function to release statsTuple with.
4378  *      vartype: exposed type of the expression; this should always match
4379  *              the declared input type of the operator we are estimating for.
4380  *      atttype, atttypmod: type data to pass to get_attstatsslot().  This is
4381  *              commonly the same as the exposed type of the variable argument,
4382  *              but can be different in binary-compatible-type cases.
4383  *      isunique: TRUE if we were able to match the var to a unique index or a
4384  *              single-column DISTINCT clause, implying its values are unique for
4385  *              this query.  (Caution: this should be trusted for statistical
4386  *              purposes only, since we do not check indimmediate nor verify that
4387  *              the exact same definition of equality applies.)
4388  *
4389  * Caller is responsible for doing ReleaseVariableStats() before exiting.
4390  */
4391 void
4392 examine_variable(PlannerInfo *root, Node *node, int varRelid,
4393                                  VariableStatData *vardata)
4394 {
4395         Node       *basenode;
4396         Relids          varnos;
4397         RelOptInfo *onerel;
4398
4399         /* Make sure we don't return dangling pointers in vardata */
4400         MemSet(vardata, 0, sizeof(VariableStatData));
4401
4402         /* Save the exposed type of the expression */
4403         vardata->vartype = exprType(node);
4404
4405         /* Look inside any binary-compatible relabeling */
4406
4407         if (IsA(node, RelabelType))
4408                 basenode = (Node *) ((RelabelType *) node)->arg;
4409         else
4410                 basenode = node;
4411
4412         /* Fast path for a simple Var */
4413
4414         if (IsA(basenode, Var) &&
4415                 (varRelid == 0 || varRelid == ((Var *) basenode)->varno))
4416         {
4417                 Var                *var = (Var *) basenode;
4418
4419                 /* Set up result fields other than the stats tuple */
4420                 vardata->var = basenode;        /* return Var without relabeling */
4421                 vardata->rel = find_base_rel(root, var->varno);
4422                 vardata->atttype = var->vartype;
4423                 vardata->atttypmod = var->vartypmod;
4424                 vardata->isunique = has_unique_index(vardata->rel, var->varattno);
4425
4426                 /* Try to locate some stats */
4427                 examine_simple_variable(root, var, vardata);
4428
4429                 return;
4430         }
4431
4432         /*
4433          * Okay, it's a more complicated expression.  Determine variable
4434          * membership.  Note that when varRelid isn't zero, only vars of that
4435          * relation are considered "real" vars.
4436          */
4437         varnos = pull_varnos(basenode);
4438
4439         onerel = NULL;
4440
4441         switch (bms_membership(varnos))
4442         {
4443                 case BMS_EMPTY_SET:
4444                         /* No Vars at all ... must be pseudo-constant clause */
4445                         break;
4446                 case BMS_SINGLETON:
4447                         if (varRelid == 0 || bms_is_member(varRelid, varnos))
4448                         {
4449                                 onerel = find_base_rel(root,
4450                                            (varRelid ? varRelid : bms_singleton_member(varnos)));
4451                                 vardata->rel = onerel;
4452                                 node = basenode;        /* strip any relabeling */
4453                         }
4454                         /* else treat it as a constant */
4455                         break;
4456                 case BMS_MULTIPLE:
4457                         if (varRelid == 0)
4458                         {
4459                                 /* treat it as a variable of a join relation */
4460                                 vardata->rel = find_join_rel(root, varnos);
4461                                 node = basenode;        /* strip any relabeling */
4462                         }
4463                         else if (bms_is_member(varRelid, varnos))
4464                         {
4465                                 /* ignore the vars belonging to other relations */
4466                                 vardata->rel = find_base_rel(root, varRelid);
4467                                 node = basenode;        /* strip any relabeling */
4468                                 /* note: no point in expressional-index search here */
4469                         }
4470                         /* else treat it as a constant */
4471                         break;
4472         }
4473
4474         bms_free(varnos);
4475
4476         vardata->var = node;
4477         vardata->atttype = exprType(node);
4478         vardata->atttypmod = exprTypmod(node);
4479
4480         if (onerel)
4481         {
4482                 /*
4483                  * We have an expression in vars of a single relation.  Try to match
4484                  * it to expressional index columns, in hopes of finding some
4485                  * statistics.
4486                  *
4487                  * XXX it's conceivable that there are multiple matches with different
4488                  * index opfamilies; if so, we need to pick one that matches the
4489                  * operator we are estimating for.  FIXME later.
4490                  */
4491                 ListCell   *ilist;
4492
4493                 foreach(ilist, onerel->indexlist)
4494                 {
4495                         IndexOptInfo *index = (IndexOptInfo *) lfirst(ilist);
4496                         ListCell   *indexpr_item;
4497                         int                     pos;
4498
4499                         indexpr_item = list_head(index->indexprs);
4500                         if (indexpr_item == NULL)
4501                                 continue;               /* no expressions here... */
4502
4503                         for (pos = 0; pos < index->ncolumns; pos++)
4504                         {
4505                                 if (index->indexkeys[pos] == 0)
4506                                 {
4507                                         Node       *indexkey;
4508
4509                                         if (indexpr_item == NULL)
4510                                                 elog(ERROR, "too few entries in indexprs list");
4511                                         indexkey = (Node *) lfirst(indexpr_item);
4512                                         if (indexkey && IsA(indexkey, RelabelType))
4513                                                 indexkey = (Node *) ((RelabelType *) indexkey)->arg;
4514                                         if (equal(node, indexkey))
4515                                         {
4516                                                 /*
4517                                                  * Found a match ... is it a unique index? Tests here
4518                                                  * should match has_unique_index().
4519                                                  */
4520                                                 if (index->unique &&
4521                                                         index->ncolumns == 1 &&
4522                                                         (index->indpred == NIL || index->predOK))
4523                                                         vardata->isunique = true;
4524
4525                                                 /*
4526                                                  * Has it got stats?  We only consider stats for
4527                                                  * non-partial indexes, since partial indexes probably
4528                                                  * don't reflect whole-relation statistics; the above
4529                                                  * check for uniqueness is the only info we take from
4530                                                  * a partial index.
4531                                                  *
4532                                                  * An index stats hook, however, must make its own
4533                                                  * decisions about what to do with partial indexes.
4534                                                  */
4535                                                 if (get_index_stats_hook &&
4536                                                         (*get_index_stats_hook) (root, index->indexoid,
4537                                                                                                          pos + 1, vardata))
4538                                                 {
4539                                                         /*
4540                                                          * The hook took control of acquiring a stats
4541                                                          * tuple.  If it did supply a tuple, it'd better
4542                                                          * have supplied a freefunc.
4543                                                          */
4544                                                         if (HeapTupleIsValid(vardata->statsTuple) &&
4545                                                                 !vardata->freefunc)
4546                                                                 elog(ERROR, "no function provided to release variable stats with");
4547                                                 }
4548                                                 else if (index->indpred == NIL)
4549                                                 {
4550                                                         vardata->statsTuple =
4551                                                                 SearchSysCache3(STATRELATTINH,
4552                                                                                    ObjectIdGetDatum(index->indexoid),
4553                                                                                                 Int16GetDatum(pos + 1),
4554                                                                                                 BoolGetDatum(false));
4555                                                         vardata->freefunc = ReleaseSysCache;
4556                                                 }
4557                                                 if (vardata->statsTuple)
4558                                                         break;
4559                                         }
4560                                         indexpr_item = lnext(indexpr_item);
4561                                 }
4562                         }
4563                         if (vardata->statsTuple)
4564                                 break;
4565                 }
4566         }
4567 }
4568
4569 /*
4570  * examine_simple_variable
4571  *              Handle a simple Var for examine_variable
4572  *
4573  * This is split out as a subroutine so that we can recurse to deal with
4574  * Vars referencing subqueries.
4575  *
4576  * We already filled in all the fields of *vardata except for the stats tuple.
4577  */
4578 static void
4579 examine_simple_variable(PlannerInfo *root, Var *var,
4580                                                 VariableStatData *vardata)
4581 {
4582         RangeTblEntry *rte = root->simple_rte_array[var->varno];
4583
4584         Assert(IsA(rte, RangeTblEntry));
4585
4586         if (get_relation_stats_hook &&
4587                 (*get_relation_stats_hook) (root, rte, var->varattno, vardata))
4588         {
4589                 /*
4590                  * The hook took control of acquiring a stats tuple.  If it did supply
4591                  * a tuple, it'd better have supplied a freefunc.
4592                  */
4593                 if (HeapTupleIsValid(vardata->statsTuple) &&
4594                         !vardata->freefunc)
4595                         elog(ERROR, "no function provided to release variable stats with");
4596         }
4597         else if (rte->rtekind == RTE_RELATION)
4598         {
4599                 /*
4600                  * Plain table or parent of an inheritance appendrel, so look up the
4601                  * column in pg_statistic
4602                  */
4603                 vardata->statsTuple = SearchSysCache3(STATRELATTINH,
4604                                                                                           ObjectIdGetDatum(rte->relid),
4605                                                                                           Int16GetDatum(var->varattno),
4606                                                                                           BoolGetDatum(rte->inh));
4607                 vardata->freefunc = ReleaseSysCache;
4608         }
4609         else if (rte->rtekind == RTE_SUBQUERY && !rte->inh)
4610         {
4611                 /*
4612                  * Plain subquery (not one that was converted to an appendrel).
4613                  */
4614                 Query      *subquery = rte->subquery;
4615                 RelOptInfo *rel;
4616                 TargetEntry *ste;
4617
4618                 /*
4619                  * Punt if it's a whole-row var rather than a plain column reference.
4620                  */
4621                 if (var->varattno == InvalidAttrNumber)
4622                         return;
4623
4624                 /*
4625                  * Punt if subquery uses set operations or GROUP BY, as these will
4626                  * mash underlying columns' stats beyond recognition.  (Set ops are
4627                  * particularly nasty; if we forged ahead, we would return stats
4628                  * relevant to only the leftmost subselect...)  DISTINCT is also
4629                  * problematic, but we check that later because there is a possibility
4630                  * of learning something even with it.
4631                  */
4632                 if (subquery->setOperations ||
4633                         subquery->groupClause)
4634                         return;
4635
4636                 /*
4637                  * OK, fetch RelOptInfo for subquery.  Note that we don't change the
4638                  * rel returned in vardata, since caller expects it to be a rel of the
4639                  * caller's query level.  Because we might already be recursing, we
4640                  * can't use that rel pointer either, but have to look up the Var's
4641                  * rel afresh.
4642                  */
4643                 rel = find_base_rel(root, var->varno);
4644
4645                 /* If the subquery hasn't been planned yet, we have to punt */
4646                 if (rel->subroot == NULL)
4647                         return;
4648                 Assert(IsA(rel->subroot, PlannerInfo));
4649
4650                 /*
4651                  * Switch our attention to the subquery as mangled by the planner. It
4652                  * was okay to look at the pre-planning version for the tests above,
4653                  * but now we need a Var that will refer to the subroot's live
4654                  * RelOptInfos.  For instance, if any subquery pullup happened during
4655                  * planning, Vars in the targetlist might have gotten replaced, and we
4656                  * need to see the replacement expressions.
4657                  */
4658                 subquery = rel->subroot->parse;
4659                 Assert(IsA(subquery, Query));
4660
4661                 /* Get the subquery output expression referenced by the upper Var */
4662                 ste = get_tle_by_resno(subquery->targetList, var->varattno);
4663                 if (ste == NULL || ste->resjunk)
4664                         elog(ERROR, "subquery %s does not have attribute %d",
4665                                  rte->eref->aliasname, var->varattno);
4666                 var = (Var *) ste->expr;
4667
4668                 /*
4669                  * If subquery uses DISTINCT, we can't make use of any stats for the
4670                  * variable ... but, if it's the only DISTINCT column, we are entitled
4671                  * to consider it unique.  We do the test this way so that it works
4672                  * for cases involving DISTINCT ON.
4673                  */
4674                 if (subquery->distinctClause)
4675                 {
4676                         if (list_length(subquery->distinctClause) == 1 &&
4677                                 targetIsInSortList(ste, InvalidOid, subquery->distinctClause))
4678                                 vardata->isunique = true;
4679                         /* cannot go further */
4680                         return;
4681                 }
4682
4683                 /*
4684                  * If the sub-query originated from a view with the security_barrier
4685                  * attribute, we must not look at the variable's statistics, though it
4686                  * seems all right to notice the existence of a DISTINCT clause. So
4687                  * stop here.
4688                  *
4689                  * This is probably a harsher restriction than necessary; it's
4690                  * certainly OK for the selectivity estimator (which is a C function,
4691                  * and therefore omnipotent anyway) to look at the statistics.  But
4692                  * many selectivity estimators will happily *invoke the operator
4693                  * function* to try to work out a good estimate - and that's not OK.
4694                  * So for now, don't dig down for stats.
4695                  */
4696                 if (rte->security_barrier)
4697                         return;
4698
4699                 /* Can only handle a simple Var of subquery's query level */
4700                 if (var && IsA(var, Var) &&
4701                         var->varlevelsup == 0)
4702                 {
4703                         /*
4704                          * OK, recurse into the subquery.  Note that the original setting
4705                          * of vardata->isunique (which will surely be false) is left
4706                          * unchanged in this situation.  That's what we want, since even
4707                          * if the underlying column is unique, the subquery may have
4708                          * joined to other tables in a way that creates duplicates.
4709                          */
4710                         examine_simple_variable(rel->subroot, var, vardata);
4711                 }
4712         }
4713         else
4714         {
4715                 /*
4716                  * Otherwise, the Var comes from a FUNCTION, VALUES, or CTE RTE.  (We
4717                  * won't see RTE_JOIN here because join alias Vars have already been
4718                  * flattened.)  There's not much we can do with function outputs, but
4719                  * maybe someday try to be smarter about VALUES and/or CTEs.
4720                  */
4721         }
4722 }
4723
4724 /*
4725  * get_variable_numdistinct
4726  *        Estimate the number of distinct values of a variable.
4727  *
4728  * vardata: results of examine_variable
4729  * *isdefault: set to TRUE if the result is a default rather than based on
4730  * anything meaningful.
4731  *
4732  * NB: be careful to produce a positive integral result, since callers may
4733  * compare the result to exact integer counts, or might divide by it.
4734  */
4735 double
4736 get_variable_numdistinct(VariableStatData *vardata, bool *isdefault)
4737 {
4738         double          stadistinct;
4739         double          stanullfrac = 0.0;
4740         double          ntuples;
4741
4742         *isdefault = false;
4743
4744         /*
4745          * Determine the stadistinct value to use.  There are cases where we can
4746          * get an estimate even without a pg_statistic entry, or can get a better
4747          * value than is in pg_statistic.  Grab stanullfrac too if we can find it
4748          * (otherwise, assume no nulls, for lack of any better idea).
4749          */
4750         if (HeapTupleIsValid(vardata->statsTuple))
4751         {
4752                 /* Use the pg_statistic entry */
4753                 Form_pg_statistic stats;
4754
4755                 stats = (Form_pg_statistic) GETSTRUCT(vardata->statsTuple);
4756                 stadistinct = stats->stadistinct;
4757                 stanullfrac = stats->stanullfrac;
4758         }
4759         else if (vardata->vartype == BOOLOID)
4760         {
4761                 /*
4762                  * Special-case boolean columns: presumably, two distinct values.
4763                  *
4764                  * Are there any other datatypes we should wire in special estimates
4765                  * for?
4766                  */
4767                 stadistinct = 2.0;
4768         }
4769         else
4770         {
4771                 /*
4772                  * We don't keep statistics for system columns, but in some cases we
4773                  * can infer distinctness anyway.
4774                  */
4775                 if (vardata->var && IsA(vardata->var, Var))
4776                 {
4777                         switch (((Var *) vardata->var)->varattno)
4778                         {
4779                                 case ObjectIdAttributeNumber:
4780                                 case SelfItemPointerAttributeNumber:
4781                                         stadistinct = -1.0; /* unique (and all non null) */
4782                                         break;
4783                                 case TableOidAttributeNumber:
4784                                         stadistinct = 1.0;      /* only 1 value */
4785                                         break;
4786                                 default:
4787                                         stadistinct = 0.0;      /* means "unknown" */
4788                                         break;
4789                         }
4790                 }
4791                 else
4792                         stadistinct = 0.0;      /* means "unknown" */
4793
4794                 /*
4795                  * XXX consider using estimate_num_groups on expressions?
4796                  */
4797         }
4798
4799         /*
4800          * If there is a unique index or DISTINCT clause for the variable, assume
4801          * it is unique no matter what pg_statistic says; the statistics could be
4802          * out of date, or we might have found a partial unique index that proves
4803          * the var is unique for this query.  However, we'd better still believe
4804          * the null-fraction statistic.
4805          */
4806         if (vardata->isunique)
4807                 stadistinct = -1.0 * (1.0 - stanullfrac);
4808
4809         /*
4810          * If we had an absolute estimate, use that.
4811          */
4812         if (stadistinct > 0.0)
4813                 return clamp_row_est(stadistinct);
4814
4815         /*
4816          * Otherwise we need to get the relation size; punt if not available.
4817          */
4818         if (vardata->rel == NULL)
4819         {
4820                 *isdefault = true;
4821                 return DEFAULT_NUM_DISTINCT;
4822         }
4823         ntuples = vardata->rel->tuples;
4824         if (ntuples <= 0.0)
4825         {
4826                 *isdefault = true;
4827                 return DEFAULT_NUM_DISTINCT;
4828         }
4829
4830         /*
4831          * If we had a relative estimate, use that.
4832          */
4833         if (stadistinct < 0.0)
4834                 return clamp_row_est(-stadistinct * ntuples);
4835
4836         /*
4837          * With no data, estimate ndistinct = ntuples if the table is small, else
4838          * use default.  We use DEFAULT_NUM_DISTINCT as the cutoff for "small" so
4839          * that the behavior isn't discontinuous.
4840          */
4841         if (ntuples < DEFAULT_NUM_DISTINCT)
4842                 return clamp_row_est(ntuples);
4843
4844         *isdefault = true;
4845         return DEFAULT_NUM_DISTINCT;
4846 }
4847
4848 /*
4849  * get_variable_range
4850  *              Estimate the minimum and maximum value of the specified variable.
4851  *              If successful, store values in *min and *max, and return TRUE.
4852  *              If no data available, return FALSE.
4853  *
4854  * sortop is the "<" comparison operator to use.  This should generally
4855  * be "<" not ">", as only the former is likely to be found in pg_statistic.
4856  */
4857 static bool
4858 get_variable_range(PlannerInfo *root, VariableStatData *vardata, Oid sortop,
4859                                    Datum *min, Datum *max)
4860 {
4861         Datum           tmin = 0;
4862         Datum           tmax = 0;
4863         bool            have_data = false;
4864         int16           typLen;
4865         bool            typByVal;
4866         Datum      *values;
4867         int                     nvalues;
4868         int                     i;
4869
4870         /*
4871          * XXX It's very tempting to try to use the actual column min and max, if
4872          * we can get them relatively-cheaply with an index probe.  However, since
4873          * this function is called many times during join planning, that could
4874          * have unpleasant effects on planning speed.  Need more investigation
4875          * before enabling this.
4876          */
4877 #ifdef NOT_USED
4878         if (get_actual_variable_range(root, vardata, sortop, min, max))
4879                 return true;
4880 #endif
4881
4882         if (!HeapTupleIsValid(vardata->statsTuple))
4883         {
4884                 /* no stats available, so default result */
4885                 return false;
4886         }
4887
4888         get_typlenbyval(vardata->atttype, &typLen, &typByVal);
4889
4890         /*
4891          * If there is a histogram, grab the first and last values.
4892          *
4893          * If there is a histogram that is sorted with some other operator than
4894          * the one we want, fail --- this suggests that there is data we can't
4895          * use.
4896          */
4897         if (get_attstatsslot(vardata->statsTuple,
4898                                                  vardata->atttype, vardata->atttypmod,
4899                                                  STATISTIC_KIND_HISTOGRAM, sortop,
4900                                                  NULL,
4901                                                  &values, &nvalues,
4902                                                  NULL, NULL))
4903         {
4904                 if (nvalues > 0)
4905                 {
4906                         tmin = datumCopy(values[0], typByVal, typLen);
4907                         tmax = datumCopy(values[nvalues - 1], typByVal, typLen);
4908                         have_data = true;
4909                 }
4910                 free_attstatsslot(vardata->atttype, values, nvalues, NULL, 0);
4911         }
4912         else if (get_attstatsslot(vardata->statsTuple,
4913                                                           vardata->atttype, vardata->atttypmod,
4914                                                           STATISTIC_KIND_HISTOGRAM, InvalidOid,
4915                                                           NULL,
4916                                                           &values, &nvalues,
4917                                                           NULL, NULL))
4918         {
4919                 free_attstatsslot(vardata->atttype, values, nvalues, NULL, 0);
4920                 return false;
4921         }
4922
4923         /*
4924          * If we have most-common-values info, look for extreme MCVs.  This is
4925          * needed even if we also have a histogram, since the histogram excludes
4926          * the MCVs.  However, usually the MCVs will not be the extreme values, so
4927          * avoid unnecessary data copying.
4928          */
4929         if (get_attstatsslot(vardata->statsTuple,
4930                                                  vardata->atttype, vardata->atttypmod,
4931                                                  STATISTIC_KIND_MCV, InvalidOid,
4932                                                  NULL,
4933                                                  &values, &nvalues,
4934                                                  NULL, NULL))
4935         {
4936                 bool            tmin_is_mcv = false;
4937                 bool            tmax_is_mcv = false;
4938                 FmgrInfo        opproc;
4939
4940                 fmgr_info(get_opcode(sortop), &opproc);
4941
4942                 for (i = 0; i < nvalues; i++)
4943                 {
4944                         if (!have_data)
4945                         {
4946                                 tmin = tmax = values[i];
4947                                 tmin_is_mcv = tmax_is_mcv = have_data = true;
4948                                 continue;
4949                         }
4950                         if (DatumGetBool(FunctionCall2Coll(&opproc,
4951                                                                                            DEFAULT_COLLATION_OID,
4952                                                                                            values[i], tmin)))
4953                         {
4954                                 tmin = values[i];
4955                                 tmin_is_mcv = true;
4956                         }
4957                         if (DatumGetBool(FunctionCall2Coll(&opproc,
4958                                                                                            DEFAULT_COLLATION_OID,
4959                                                                                            tmax, values[i])))
4960                         {
4961                                 tmax = values[i];
4962                                 tmax_is_mcv = true;
4963                         }
4964                 }
4965                 if (tmin_is_mcv)
4966                         tmin = datumCopy(tmin, typByVal, typLen);
4967                 if (tmax_is_mcv)
4968                         tmax = datumCopy(tmax, typByVal, typLen);
4969                 free_attstatsslot(vardata->atttype, values, nvalues, NULL, 0);
4970         }
4971
4972         *min = tmin;
4973         *max = tmax;
4974         return have_data;
4975 }
4976
4977
4978 /*
4979  * get_actual_variable_range
4980  *              Attempt to identify the current *actual* minimum and/or maximum
4981  *              of the specified variable, by looking for a suitable btree index
4982  *              and fetching its low and/or high values.
4983  *              If successful, store values in *min and *max, and return TRUE.
4984  *              (Either pointer can be NULL if that endpoint isn't needed.)
4985  *              If no data available, return FALSE.
4986  *
4987  * sortop is the "<" comparison operator to use.
4988  */
4989 static bool
4990 get_actual_variable_range(PlannerInfo *root, VariableStatData *vardata,
4991                                                   Oid sortop,
4992                                                   Datum *min, Datum *max)
4993 {
4994         bool            have_data = false;
4995         RelOptInfo *rel = vardata->rel;
4996         RangeTblEntry *rte;
4997         ListCell   *lc;
4998
4999         /* No hope if no relation or it doesn't have indexes */
5000         if (rel == NULL || rel->indexlist == NIL)
5001                 return false;
5002         /* If it has indexes it must be a plain relation */
5003         rte = root->simple_rte_array[rel->relid];
5004         Assert(rte->rtekind == RTE_RELATION);
5005
5006         /* Search through the indexes to see if any match our problem */
5007         foreach(lc, rel->indexlist)
5008         {
5009                 IndexOptInfo *index = (IndexOptInfo *) lfirst(lc);
5010                 ScanDirection indexscandir;
5011
5012                 /* Ignore non-btree indexes */
5013                 if (index->relam != BTREE_AM_OID)
5014                         continue;
5015
5016                 /*
5017                  * Ignore partial indexes --- we only want stats that cover the entire
5018                  * relation.
5019                  */
5020                 if (index->indpred != NIL)
5021                         continue;
5022
5023                 /*
5024                  * The index list might include hypothetical indexes inserted by a
5025                  * get_relation_info hook --- don't try to access them.
5026                  */
5027                 if (index->hypothetical)
5028                         continue;
5029
5030                 /*
5031                  * The first index column must match the desired variable and sort
5032                  * operator --- but we can use a descending-order index.
5033                  */
5034                 if (!match_index_to_operand(vardata->var, 0, index))
5035                         continue;
5036                 switch (get_op_opfamily_strategy(sortop, index->sortopfamily[0]))
5037                 {
5038                         case BTLessStrategyNumber:
5039                                 if (index->reverse_sort[0])
5040                                         indexscandir = BackwardScanDirection;
5041                                 else
5042                                         indexscandir = ForwardScanDirection;
5043                                 break;
5044                         case BTGreaterStrategyNumber:
5045                                 if (index->reverse_sort[0])
5046                                         indexscandir = ForwardScanDirection;
5047                                 else
5048                                         indexscandir = BackwardScanDirection;
5049                                 break;
5050                         default:
5051                                 /* index doesn't match the sortop */
5052                                 continue;
5053                 }
5054
5055                 /*
5056                  * Found a suitable index to extract data from.  We'll need an EState
5057                  * and a bunch of other infrastructure.
5058                  */
5059                 {
5060                         EState     *estate;
5061                         ExprContext *econtext;
5062                         MemoryContext tmpcontext;
5063                         MemoryContext oldcontext;
5064                         Relation        heapRel;
5065                         Relation        indexRel;
5066                         IndexInfo  *indexInfo;
5067                         TupleTableSlot *slot;
5068                         int16           typLen;
5069                         bool            typByVal;
5070                         ScanKeyData scankeys[1];
5071                         IndexScanDesc index_scan;
5072                         HeapTuple       tup;
5073                         Datum           values[INDEX_MAX_KEYS];
5074                         bool            isnull[INDEX_MAX_KEYS];
5075                         SnapshotData SnapshotDirty;
5076
5077                         estate = CreateExecutorState();
5078                         econtext = GetPerTupleExprContext(estate);
5079                         /* Make sure any cruft is generated in the econtext's memory */
5080                         tmpcontext = econtext->ecxt_per_tuple_memory;
5081                         oldcontext = MemoryContextSwitchTo(tmpcontext);
5082
5083                         /*
5084                          * Open the table and index so we can read from them.  We should
5085                          * already have at least AccessShareLock on the table, but not
5086                          * necessarily on the index.
5087                          */
5088                         heapRel = heap_open(rte->relid, NoLock);
5089                         indexRel = index_open(index->indexoid, AccessShareLock);
5090
5091                         /* extract index key information from the index's pg_index info */
5092                         indexInfo = BuildIndexInfo(indexRel);
5093
5094                         /* some other stuff */
5095                         slot = MakeSingleTupleTableSlot(RelationGetDescr(heapRel));
5096                         econtext->ecxt_scantuple = slot;
5097                         get_typlenbyval(vardata->atttype, &typLen, &typByVal);
5098                         InitDirtySnapshot(SnapshotDirty);
5099
5100                         /* set up an IS NOT NULL scan key so that we ignore nulls */
5101                         ScanKeyEntryInitialize(&scankeys[0],
5102                                                                    SK_ISNULL | SK_SEARCHNOTNULL,
5103                                                                    1,   /* index col to scan */
5104                                                                    InvalidStrategy,             /* no strategy */
5105                                                                    InvalidOid,  /* no strategy subtype */
5106                                                                    InvalidOid,  /* no collation */
5107                                                                    InvalidOid,  /* no reg proc for this */
5108                                                                    (Datum) 0);  /* constant */
5109
5110                         have_data = true;
5111
5112                         /* If min is requested ... */
5113                         if (min)
5114                         {
5115                                 /*
5116                                  * In principle, we should scan the index with our current
5117                                  * active snapshot, which is the best approximation we've got
5118                                  * to what the query will see when executed.  But that won't
5119                                  * be exact if a new snap is taken before running the query,
5120                                  * and it can be very expensive if a lot of uncommitted rows
5121                                  * exist at the end of the index (because we'll laboriously
5122                                  * fetch each one and reject it).  What seems like a good
5123                                  * compromise is to use SnapshotDirty.  That will accept
5124                                  * uncommitted rows, and thus avoid fetching multiple heap
5125                                  * tuples in this scenario.  On the other hand, it will reject
5126                                  * known-dead rows, and thus not give a bogus answer when the
5127                                  * extreme value has been deleted; that case motivates not
5128                                  * using SnapshotAny here.
5129                                  */
5130                                 index_scan = index_beginscan(heapRel, indexRel, &SnapshotDirty,
5131                                                                                          1, 0);
5132                                 index_rescan(index_scan, scankeys, 1, NULL, 0);
5133
5134                                 /* Fetch first tuple in sortop's direction */
5135                                 if ((tup = index_getnext(index_scan,
5136                                                                                  indexscandir)) != NULL)
5137                                 {
5138                                         /* Extract the index column values from the heap tuple */
5139                                         ExecStoreTuple(tup, slot, InvalidBuffer, false);
5140                                         FormIndexDatum(indexInfo, slot, estate,
5141                                                                    values, isnull);
5142
5143                                         /* Shouldn't have got a null, but be careful */
5144                                         if (isnull[0])
5145                                                 elog(ERROR, "found unexpected null value in index \"%s\"",
5146                                                          RelationGetRelationName(indexRel));
5147
5148                                         /* Copy the index column value out to caller's context */
5149                                         MemoryContextSwitchTo(oldcontext);
5150                                         *min = datumCopy(values[0], typByVal, typLen);
5151                                         MemoryContextSwitchTo(tmpcontext);
5152                                 }
5153                                 else
5154                                         have_data = false;
5155
5156                                 index_endscan(index_scan);
5157                         }
5158
5159                         /* If max is requested, and we didn't find the index is empty */
5160                         if (max && have_data)
5161                         {
5162                                 index_scan = index_beginscan(heapRel, indexRel, &SnapshotDirty,
5163                                                                                          1, 0);
5164                                 index_rescan(index_scan, scankeys, 1, NULL, 0);
5165
5166                                 /* Fetch first tuple in reverse direction */
5167                                 if ((tup = index_getnext(index_scan,
5168                                                                                  -indexscandir)) != NULL)
5169                                 {
5170                                         /* Extract the index column values from the heap tuple */
5171                                         ExecStoreTuple(tup, slot, InvalidBuffer, false);
5172                                         FormIndexDatum(indexInfo, slot, estate,
5173                                                                    values, isnull);
5174
5175                                         /* Shouldn't have got a null, but be careful */
5176                                         if (isnull[0])
5177                                                 elog(ERROR, "found unexpected null value in index \"%s\"",
5178                                                          RelationGetRelationName(indexRel));
5179
5180                                         /* Copy the index column value out to caller's context */
5181                                         MemoryContextSwitchTo(oldcontext);
5182                                         *max = datumCopy(values[0], typByVal, typLen);
5183                                         MemoryContextSwitchTo(tmpcontext);
5184                                 }
5185                                 else
5186                                         have_data = false;
5187
5188                                 index_endscan(index_scan);
5189                         }
5190
5191                         /* Clean everything up */
5192                         ExecDropSingleTupleTableSlot(slot);
5193
5194                         index_close(indexRel, AccessShareLock);
5195                         heap_close(heapRel, NoLock);
5196
5197                         MemoryContextSwitchTo(oldcontext);
5198                         FreeExecutorState(estate);
5199
5200                         /* And we're done */
5201                         break;
5202                 }
5203         }
5204
5205         return have_data;
5206 }
5207
5208 /*
5209  * find_join_input_rel
5210  *              Look up the input relation for a join.
5211  *
5212  * We assume that the input relation's RelOptInfo must have been constructed
5213  * already.
5214  */
5215 static RelOptInfo *
5216 find_join_input_rel(PlannerInfo *root, Relids relids)
5217 {
5218         RelOptInfo *rel = NULL;
5219
5220         switch (bms_membership(relids))
5221         {
5222                 case BMS_EMPTY_SET:
5223                         /* should not happen */
5224                         break;
5225                 case BMS_SINGLETON:
5226                         rel = find_base_rel(root, bms_singleton_member(relids));
5227                         break;
5228                 case BMS_MULTIPLE:
5229                         rel = find_join_rel(root, relids);
5230                         break;
5231         }
5232
5233         if (rel == NULL)
5234                 elog(ERROR, "could not find RelOptInfo for given relids");
5235
5236         return rel;
5237 }
5238
5239
5240 /*-------------------------------------------------------------------------
5241  *
5242  * Pattern analysis functions
5243  *
5244  * These routines support analysis of LIKE and regular-expression patterns
5245  * by the planner/optimizer.  It's important that they agree with the
5246  * regular-expression code in backend/regex/ and the LIKE code in
5247  * backend/utils/adt/like.c.  Also, the computation of the fixed prefix
5248  * must be conservative: if we report a string longer than the true fixed
5249  * prefix, the query may produce actually wrong answers, rather than just
5250  * getting a bad selectivity estimate!
5251  *
5252  * Note that the prefix-analysis functions are called from
5253  * backend/optimizer/path/indxpath.c as well as from routines in this file.
5254  *
5255  *-------------------------------------------------------------------------
5256  */
5257
5258 /*
5259  * Check whether char is a letter (and, hence, subject to case-folding)
5260  *
5261  * In multibyte character sets, we can't use isalpha, and it does not seem
5262  * worth trying to convert to wchar_t to use iswalpha.  Instead, just assume
5263  * any multibyte char is potentially case-varying.
5264  */
5265 static int
5266 pattern_char_isalpha(char c, bool is_multibyte,
5267                                          pg_locale_t locale, bool locale_is_c)
5268 {
5269         if (locale_is_c)
5270                 return (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z');
5271         else if (is_multibyte && IS_HIGHBIT_SET(c))
5272                 return true;
5273 #ifdef HAVE_LOCALE_T
5274         else if (locale)
5275                 return isalpha_l((unsigned char) c, locale);
5276 #endif
5277         else
5278                 return isalpha((unsigned char) c);
5279 }
5280
5281 /*
5282  * Extract the fixed prefix, if any, for a pattern.
5283  *
5284  * *prefix is set to a palloc'd prefix string (in the form of a Const node),
5285  *      or to NULL if no fixed prefix exists for the pattern.
5286  * If rest_selec is not NULL, *rest_selec is set to an estimate of the
5287  *      selectivity of the remainder of the pattern (without any fixed prefix).
5288  * The prefix Const has the same type (TEXT or BYTEA) as the input pattern.
5289  *
5290  * The return value distinguishes no fixed prefix, a partial prefix,
5291  * or an exact-match-only pattern.
5292  */
5293
5294 static Pattern_Prefix_Status
5295 like_fixed_prefix(Const *patt_const, bool case_insensitive, Oid collation,
5296                                   Const **prefix_const, Selectivity *rest_selec)
5297 {
5298         char       *match;
5299         char       *patt;
5300         int                     pattlen;
5301         Oid                     typeid = patt_const->consttype;
5302         int                     pos,
5303                                 match_pos;
5304         bool            is_multibyte = (pg_database_encoding_max_length() > 1);
5305         pg_locale_t locale = 0;
5306         bool            locale_is_c = false;
5307
5308         /* the right-hand const is type text or bytea */
5309         Assert(typeid == BYTEAOID || typeid == TEXTOID);
5310
5311         if (case_insensitive)
5312         {
5313                 if (typeid == BYTEAOID)
5314                         ereport(ERROR,
5315                                         (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
5316                         errmsg("case insensitive matching not supported on type bytea")));
5317
5318                 /* If case-insensitive, we need locale info */
5319                 if (lc_ctype_is_c(collation))
5320                         locale_is_c = true;
5321                 else if (collation != DEFAULT_COLLATION_OID)
5322                 {
5323                         if (!OidIsValid(collation))
5324                         {
5325                                 /*
5326                                  * This typically means that the parser could not resolve a
5327                                  * conflict of implicit collations, so report it that way.
5328                                  */
5329                                 ereport(ERROR,
5330                                                 (errcode(ERRCODE_INDETERMINATE_COLLATION),
5331                                                  errmsg("could not determine which collation to use for ILIKE"),
5332                                                  errhint("Use the COLLATE clause to set the collation explicitly.")));
5333                         }
5334                         locale = pg_newlocale_from_collation(collation);
5335                 }
5336         }
5337
5338         if (typeid != BYTEAOID)
5339         {
5340                 patt = TextDatumGetCString(patt_const->constvalue);
5341                 pattlen = strlen(patt);
5342         }
5343         else
5344         {
5345                 bytea      *bstr = DatumGetByteaPP(patt_const->constvalue);
5346
5347                 pattlen = VARSIZE_ANY_EXHDR(bstr);
5348                 patt = (char *) palloc(pattlen);
5349                 memcpy(patt, VARDATA_ANY(bstr), pattlen);
5350                 Assert((Pointer) bstr == DatumGetPointer(patt_const->constvalue));
5351         }
5352
5353         match = palloc(pattlen + 1);
5354         match_pos = 0;
5355         for (pos = 0; pos < pattlen; pos++)
5356         {
5357                 /* % and _ are wildcard characters in LIKE */
5358                 if (patt[pos] == '%' ||
5359                         patt[pos] == '_')
5360                         break;
5361
5362                 /* Backslash escapes the next character */
5363                 if (patt[pos] == '\\')
5364                 {
5365                         pos++;
5366                         if (pos >= pattlen)
5367                                 break;
5368                 }
5369
5370                 /* Stop if case-varying character (it's sort of a wildcard) */
5371                 if (case_insensitive &&
5372                   pattern_char_isalpha(patt[pos], is_multibyte, locale, locale_is_c))
5373                         break;
5374
5375                 match[match_pos++] = patt[pos];
5376         }
5377
5378         match[match_pos] = '\0';
5379
5380         if (typeid != BYTEAOID)
5381                 *prefix_const = string_to_const(match, typeid);
5382         else
5383                 *prefix_const = string_to_bytea_const(match, match_pos);
5384
5385         if (rest_selec != NULL)
5386                 *rest_selec = like_selectivity(&patt[pos], pattlen - pos,
5387                                                                            case_insensitive);
5388
5389         pfree(patt);
5390         pfree(match);
5391
5392         /* in LIKE, an empty pattern is an exact match! */
5393         if (pos == pattlen)
5394                 return Pattern_Prefix_Exact;    /* reached end of pattern, so exact */
5395
5396         if (match_pos > 0)
5397                 return Pattern_Prefix_Partial;
5398
5399         return Pattern_Prefix_None;
5400 }
5401
5402 static Pattern_Prefix_Status
5403 regex_fixed_prefix(Const *patt_const, bool case_insensitive, Oid collation,
5404                                    Const **prefix_const, Selectivity *rest_selec)
5405 {
5406         Oid                     typeid = patt_const->consttype;
5407         char       *prefix;
5408         bool            exact;
5409
5410         /*
5411          * Should be unnecessary, there are no bytea regex operators defined. As
5412          * such, it should be noted that the rest of this function has *not* been
5413          * made safe for binary (possibly NULL containing) strings.
5414          */
5415         if (typeid == BYTEAOID)
5416                 ereport(ERROR,
5417                                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
5418                  errmsg("regular-expression matching not supported on type bytea")));
5419
5420         /* Use the regexp machinery to extract the prefix, if any */
5421         prefix = regexp_fixed_prefix(DatumGetTextPP(patt_const->constvalue),
5422                                                                  case_insensitive, collation,
5423                                                                  &exact);
5424
5425         if (prefix == NULL)
5426         {
5427                 *prefix_const = NULL;
5428
5429                 if (rest_selec != NULL)
5430                 {
5431                         char       *patt = TextDatumGetCString(patt_const->constvalue);
5432
5433                         *rest_selec = regex_selectivity(patt, strlen(patt),
5434                                                                                         case_insensitive,
5435                                                                                         0);
5436                         pfree(patt);
5437                 }
5438
5439                 return Pattern_Prefix_None;
5440         }
5441
5442         *prefix_const = string_to_const(prefix, typeid);
5443
5444         if (rest_selec != NULL)
5445         {
5446                 if (exact)
5447                 {
5448                         /* Exact match, so there's no additional selectivity */
5449                         *rest_selec = 1.0;
5450                 }
5451                 else
5452                 {
5453                         char       *patt = TextDatumGetCString(patt_const->constvalue);
5454
5455                         *rest_selec = regex_selectivity(patt, strlen(patt),
5456                                                                                         case_insensitive,
5457                                                                                         strlen(prefix));
5458                         pfree(patt);
5459                 }
5460         }
5461
5462         pfree(prefix);
5463
5464         if (exact)
5465                 return Pattern_Prefix_Exact;    /* pattern specifies exact match */
5466         else
5467                 return Pattern_Prefix_Partial;
5468 }
5469
5470 Pattern_Prefix_Status
5471 pattern_fixed_prefix(Const *patt, Pattern_Type ptype, Oid collation,
5472                                          Const **prefix, Selectivity *rest_selec)
5473 {
5474         Pattern_Prefix_Status result;
5475
5476         switch (ptype)
5477         {
5478                 case Pattern_Type_Like:
5479                         result = like_fixed_prefix(patt, false, collation,
5480                                                                            prefix, rest_selec);
5481                         break;
5482                 case Pattern_Type_Like_IC:
5483                         result = like_fixed_prefix(patt, true, collation,
5484                                                                            prefix, rest_selec);
5485                         break;
5486                 case Pattern_Type_Regex:
5487                         result = regex_fixed_prefix(patt, false, collation,
5488                                                                                 prefix, rest_selec);
5489                         break;
5490                 case Pattern_Type_Regex_IC:
5491                         result = regex_fixed_prefix(patt, true, collation,
5492                                                                                 prefix, rest_selec);
5493                         break;
5494                 default:
5495                         elog(ERROR, "unrecognized ptype: %d", (int) ptype);
5496                         result = Pattern_Prefix_None;           /* keep compiler quiet */
5497                         break;
5498         }
5499         return result;
5500 }
5501
5502 /*
5503  * Estimate the selectivity of a fixed prefix for a pattern match.
5504  *
5505  * A fixed prefix "foo" is estimated as the selectivity of the expression
5506  * "variable >= 'foo' AND variable < 'fop'" (see also indxpath.c).
5507  *
5508  * The selectivity estimate is with respect to the portion of the column
5509  * population represented by the histogram --- the caller must fold this
5510  * together with info about MCVs and NULLs.
5511  *
5512  * We use the >= and < operators from the specified btree opfamily to do the
5513  * estimation.  The given variable and Const must be of the associated
5514  * datatype.
5515  *
5516  * XXX Note: we make use of the upper bound to estimate operator selectivity
5517  * even if the locale is such that we cannot rely on the upper-bound string.
5518  * The selectivity only needs to be approximately right anyway, so it seems
5519  * more useful to use the upper-bound code than not.
5520  */
5521 static Selectivity
5522 prefix_selectivity(PlannerInfo *root, VariableStatData *vardata,
5523                                    Oid vartype, Oid opfamily, Const *prefixcon)
5524 {
5525         Selectivity prefixsel;
5526         Oid                     cmpopr;
5527         FmgrInfo        opproc;
5528         Const      *greaterstrcon;
5529         Selectivity eq_sel;
5530
5531         cmpopr = get_opfamily_member(opfamily, vartype, vartype,
5532                                                                  BTGreaterEqualStrategyNumber);
5533         if (cmpopr == InvalidOid)
5534                 elog(ERROR, "no >= operator for opfamily %u", opfamily);
5535         fmgr_info(get_opcode(cmpopr), &opproc);
5536
5537         prefixsel = ineq_histogram_selectivity(root, vardata, &opproc, true,
5538                                                                                    prefixcon->constvalue,
5539                                                                                    prefixcon->consttype);
5540
5541         if (prefixsel < 0.0)
5542         {
5543                 /* No histogram is present ... return a suitable default estimate */
5544                 return DEFAULT_MATCH_SEL;
5545         }
5546
5547         /*-------
5548          * If we can create a string larger than the prefix, say
5549          *      "x < greaterstr".
5550          *-------
5551          */
5552         cmpopr = get_opfamily_member(opfamily, vartype, vartype,
5553                                                                  BTLessStrategyNumber);
5554         if (cmpopr == InvalidOid)
5555                 elog(ERROR, "no < operator for opfamily %u", opfamily);
5556         fmgr_info(get_opcode(cmpopr), &opproc);
5557         greaterstrcon = make_greater_string(prefixcon, &opproc,
5558                                                                                 DEFAULT_COLLATION_OID);
5559         if (greaterstrcon)
5560         {
5561                 Selectivity topsel;
5562
5563                 topsel = ineq_histogram_selectivity(root, vardata, &opproc, false,
5564                                                                                         greaterstrcon->constvalue,
5565                                                                                         greaterstrcon->consttype);
5566
5567                 /* ineq_histogram_selectivity worked before, it shouldn't fail now */
5568                 Assert(topsel >= 0.0);
5569
5570                 /*
5571                  * Merge the two selectivities in the same way as for a range query
5572                  * (see clauselist_selectivity()).  Note that we don't need to worry
5573                  * about double-exclusion of nulls, since ineq_histogram_selectivity
5574                  * doesn't count those anyway.
5575                  */
5576                 prefixsel = topsel + prefixsel - 1.0;
5577         }
5578
5579         /*
5580          * If the prefix is long then the two bounding values might be too close
5581          * together for the histogram to distinguish them usefully, resulting in a
5582          * zero estimate (plus or minus roundoff error). To avoid returning a
5583          * ridiculously small estimate, compute the estimated selectivity for
5584          * "variable = 'foo'", and clamp to that. (Obviously, the resultant
5585          * estimate should be at least that.)
5586          *
5587          * We apply this even if we couldn't make a greater string.  That case
5588          * suggests that the prefix is near the maximum possible, and thus
5589          * probably off the end of the histogram, and thus we probably got a very
5590          * small estimate from the >= condition; so we still need to clamp.
5591          */
5592         cmpopr = get_opfamily_member(opfamily, vartype, vartype,
5593                                                                  BTEqualStrategyNumber);
5594         if (cmpopr == InvalidOid)
5595                 elog(ERROR, "no = operator for opfamily %u", opfamily);
5596         eq_sel = var_eq_const(vardata, cmpopr, prefixcon->constvalue,
5597                                                   false, true);
5598
5599         prefixsel = Max(prefixsel, eq_sel);
5600
5601         return prefixsel;
5602 }
5603
5604
5605 /*
5606  * Estimate the selectivity of a pattern of the specified type.
5607  * Note that any fixed prefix of the pattern will have been removed already,
5608  * so actually we may be looking at just a fragment of the pattern.
5609  *
5610  * For now, we use a very simplistic approach: fixed characters reduce the
5611  * selectivity a good deal, character ranges reduce it a little,
5612  * wildcards (such as % for LIKE or .* for regex) increase it.
5613  */
5614
5615 #define FIXED_CHAR_SEL  0.20    /* about 1/5 */
5616 #define CHAR_RANGE_SEL  0.25
5617 #define ANY_CHAR_SEL    0.9             /* not 1, since it won't match end-of-string */
5618 #define FULL_WILDCARD_SEL 5.0
5619 #define PARTIAL_WILDCARD_SEL 2.0
5620
5621 static Selectivity
5622 like_selectivity(const char *patt, int pattlen, bool case_insensitive)
5623 {
5624         Selectivity sel = 1.0;
5625         int                     pos;
5626
5627         /* Skip any leading wildcard; it's already factored into initial sel */
5628         for (pos = 0; pos < pattlen; pos++)
5629         {
5630                 if (patt[pos] != '%' && patt[pos] != '_')
5631                         break;
5632         }
5633
5634         for (; pos < pattlen; pos++)
5635         {
5636                 /* % and _ are wildcard characters in LIKE */
5637                 if (patt[pos] == '%')
5638                         sel *= FULL_WILDCARD_SEL;
5639                 else if (patt[pos] == '_')
5640                         sel *= ANY_CHAR_SEL;
5641                 else if (patt[pos] == '\\')
5642                 {
5643                         /* Backslash quotes the next character */
5644                         pos++;
5645                         if (pos >= pattlen)
5646                                 break;
5647                         sel *= FIXED_CHAR_SEL;
5648                 }
5649                 else
5650                         sel *= FIXED_CHAR_SEL;
5651         }
5652         /* Could get sel > 1 if multiple wildcards */
5653         if (sel > 1.0)
5654                 sel = 1.0;
5655         return sel;
5656 }
5657
5658 static Selectivity
5659 regex_selectivity_sub(const char *patt, int pattlen, bool case_insensitive)
5660 {
5661         Selectivity sel = 1.0;
5662         int                     paren_depth = 0;
5663         int                     paren_pos = 0;  /* dummy init to keep compiler quiet */
5664         int                     pos;
5665
5666         for (pos = 0; pos < pattlen; pos++)
5667         {
5668                 if (patt[pos] == '(')
5669                 {
5670                         if (paren_depth == 0)
5671                                 paren_pos = pos;        /* remember start of parenthesized item */
5672                         paren_depth++;
5673                 }
5674                 else if (patt[pos] == ')' && paren_depth > 0)
5675                 {
5676                         paren_depth--;
5677                         if (paren_depth == 0)
5678                                 sel *= regex_selectivity_sub(patt + (paren_pos + 1),
5679                                                                                          pos - (paren_pos + 1),
5680                                                                                          case_insensitive);
5681                 }
5682                 else if (patt[pos] == '|' && paren_depth == 0)
5683                 {
5684                         /*
5685                          * If unquoted | is present at paren level 0 in pattern, we have
5686                          * multiple alternatives; sum their probabilities.
5687                          */
5688                         sel += regex_selectivity_sub(patt + (pos + 1),
5689                                                                                  pattlen - (pos + 1),
5690                                                                                  case_insensitive);
5691                         break;                          /* rest of pattern is now processed */
5692                 }
5693                 else if (patt[pos] == '[')
5694                 {
5695                         bool            negclass = false;
5696
5697                         if (patt[++pos] == '^')
5698                         {
5699                                 negclass = true;
5700                                 pos++;
5701                         }
5702                         if (patt[pos] == ']')           /* ']' at start of class is not
5703                                                                                  * special */
5704                                 pos++;
5705                         while (pos < pattlen && patt[pos] != ']')
5706                                 pos++;
5707                         if (paren_depth == 0)
5708                                 sel *= (negclass ? (1.0 - CHAR_RANGE_SEL) : CHAR_RANGE_SEL);
5709                 }
5710                 else if (patt[pos] == '.')
5711                 {
5712                         if (paren_depth == 0)
5713                                 sel *= ANY_CHAR_SEL;
5714                 }
5715                 else if (patt[pos] == '*' ||
5716                                  patt[pos] == '?' ||
5717                                  patt[pos] == '+')
5718                 {
5719                         /* Ought to be smarter about quantifiers... */
5720                         if (paren_depth == 0)
5721                                 sel *= PARTIAL_WILDCARD_SEL;
5722                 }
5723                 else if (patt[pos] == '{')
5724                 {
5725                         while (pos < pattlen && patt[pos] != '}')
5726                                 pos++;
5727                         if (paren_depth == 0)
5728                                 sel *= PARTIAL_WILDCARD_SEL;
5729                 }
5730                 else if (patt[pos] == '\\')
5731                 {
5732                         /* backslash quotes the next character */
5733                         pos++;
5734                         if (pos >= pattlen)
5735                                 break;
5736                         if (paren_depth == 0)
5737                                 sel *= FIXED_CHAR_SEL;
5738                 }
5739                 else
5740                 {
5741                         if (paren_depth == 0)
5742                                 sel *= FIXED_CHAR_SEL;
5743                 }
5744         }
5745         /* Could get sel > 1 if multiple wildcards */
5746         if (sel > 1.0)
5747                 sel = 1.0;
5748         return sel;
5749 }
5750
5751 static Selectivity
5752 regex_selectivity(const char *patt, int pattlen, bool case_insensitive,
5753                                   int fixed_prefix_len)
5754 {
5755         Selectivity sel;
5756
5757         /* If patt doesn't end with $, consider it to have a trailing wildcard */
5758         if (pattlen > 0 && patt[pattlen - 1] == '$' &&
5759                 (pattlen == 1 || patt[pattlen - 2] != '\\'))
5760         {
5761                 /* has trailing $ */
5762                 sel = regex_selectivity_sub(patt, pattlen - 1, case_insensitive);
5763         }
5764         else
5765         {
5766                 /* no trailing $ */
5767                 sel = regex_selectivity_sub(patt, pattlen, case_insensitive);
5768                 sel *= FULL_WILDCARD_SEL;
5769         }
5770
5771         /* If there's a fixed prefix, discount its selectivity */
5772         if (fixed_prefix_len > 0)
5773                 sel /= pow(FIXED_CHAR_SEL, fixed_prefix_len);
5774
5775         /* Make sure result stays in range */
5776         CLAMP_PROBABILITY(sel);
5777         return sel;
5778 }
5779
5780
5781 /*
5782  * For bytea, the increment function need only increment the current byte
5783  * (there are no multibyte characters to worry about).
5784  */
5785 static bool
5786 byte_increment(unsigned char *ptr, int len)
5787 {
5788         if (*ptr >= 255)
5789                 return false;
5790         (*ptr)++;
5791         return true;
5792 }
5793
5794 /*
5795  * Try to generate a string greater than the given string or any
5796  * string it is a prefix of.  If successful, return a palloc'd string
5797  * in the form of a Const node; else return NULL.
5798  *
5799  * The caller must provide the appropriate "less than" comparison function
5800  * for testing the strings, along with the collation to use.
5801  *
5802  * The key requirement here is that given a prefix string, say "foo",
5803  * we must be able to generate another string "fop" that is greater than
5804  * all strings "foobar" starting with "foo".  We can test that we have
5805  * generated a string greater than the prefix string, but in non-C collations
5806  * that is not a bulletproof guarantee that an extension of the string might
5807  * not sort after it; an example is that "foo " is less than "foo!", but it
5808  * is not clear that a "dictionary" sort ordering will consider "foo!" less
5809  * than "foo bar".  CAUTION: Therefore, this function should be used only for
5810  * estimation purposes when working in a non-C collation.
5811  *
5812  * To try to catch most cases where an extended string might otherwise sort
5813  * before the result value, we determine which of the strings "Z", "z", "y",
5814  * and "9" is seen as largest by the collation, and append that to the given
5815  * prefix before trying to find a string that compares as larger.
5816  *
5817  * To search for a greater string, we repeatedly "increment" the rightmost
5818  * character, using an encoding-specific character incrementer function.
5819  * When it's no longer possible to increment the last character, we truncate
5820  * off that character and start incrementing the next-to-rightmost.
5821  * For example, if "z" were the last character in the sort order, then we
5822  * could produce "foo" as a string greater than "fonz".
5823  *
5824  * This could be rather slow in the worst case, but in most cases we
5825  * won't have to try more than one or two strings before succeeding.
5826  *
5827  * Note that it's important for the character incrementer not to be too anal
5828  * about producing every possible character code, since in some cases the only
5829  * way to get a larger string is to increment a previous character position.
5830  * So we don't want to spend too much time trying every possible character
5831  * code at the last position.  A good rule of thumb is to be sure that we
5832  * don't try more than 256*K values for a K-byte character (and definitely
5833  * not 256^K, which is what an exhaustive search would approach).
5834  */
5835 Const *
5836 make_greater_string(const Const *str_const, FmgrInfo *ltproc, Oid collation)
5837 {
5838         Oid                     datatype = str_const->consttype;
5839         char       *workstr;
5840         int                     len;
5841         Datum           cmpstr;
5842         text       *cmptxt = NULL;
5843         mbcharacter_incrementer charinc;
5844
5845         /*
5846          * Get a modifiable copy of the prefix string in C-string format, and set
5847          * up the string we will compare to as a Datum.  In C locale this can just
5848          * be the given prefix string, otherwise we need to add a suffix.  Types
5849          * NAME and BYTEA sort bytewise so they don't need a suffix either.
5850          */
5851         if (datatype == NAMEOID)
5852         {
5853                 workstr = DatumGetCString(DirectFunctionCall1(nameout,
5854                                                                                                           str_const->constvalue));
5855                 len = strlen(workstr);
5856                 cmpstr = str_const->constvalue;
5857         }
5858         else if (datatype == BYTEAOID)
5859         {
5860                 bytea      *bstr = DatumGetByteaPP(str_const->constvalue);
5861
5862                 len = VARSIZE_ANY_EXHDR(bstr);
5863                 workstr = (char *) palloc(len);
5864                 memcpy(workstr, VARDATA_ANY(bstr), len);
5865                 Assert((Pointer) bstr == DatumGetPointer(str_const->constvalue));
5866                 cmpstr = str_const->constvalue;
5867         }
5868         else
5869         {
5870                 workstr = TextDatumGetCString(str_const->constvalue);
5871                 len = strlen(workstr);
5872                 if (lc_collate_is_c(collation) || len == 0)
5873                         cmpstr = str_const->constvalue;
5874                 else
5875                 {
5876                         /* If first time through, determine the suffix to use */
5877                         static char suffixchar = 0;
5878                         static Oid      suffixcollation = 0;
5879
5880                         if (!suffixchar || suffixcollation != collation)
5881                         {
5882                                 char       *best;
5883
5884                                 best = "Z";
5885                                 if (varstr_cmp(best, 1, "z", 1, collation) < 0)
5886                                         best = "z";
5887                                 if (varstr_cmp(best, 1, "y", 1, collation) < 0)
5888                                         best = "y";
5889                                 if (varstr_cmp(best, 1, "9", 1, collation) < 0)
5890                                         best = "9";
5891                                 suffixchar = *best;
5892                                 suffixcollation = collation;
5893                         }
5894
5895                         /* And build the string to compare to */
5896                         cmptxt = (text *) palloc(VARHDRSZ + len + 1);
5897                         SET_VARSIZE(cmptxt, VARHDRSZ + len + 1);
5898                         memcpy(VARDATA(cmptxt), workstr, len);
5899                         *(VARDATA(cmptxt) + len) = suffixchar;
5900                         cmpstr = PointerGetDatum(cmptxt);
5901                 }
5902         }
5903
5904         /* Select appropriate character-incrementer function */
5905         if (datatype == BYTEAOID)
5906                 charinc = byte_increment;
5907         else
5908                 charinc = pg_database_encoding_character_incrementer();
5909
5910         /* And search ... */
5911         while (len > 0)
5912         {
5913                 int                     charlen;
5914                 unsigned char *lastchar;
5915
5916                 /* Identify the last character --- for bytea, just the last byte */
5917                 if (datatype == BYTEAOID)
5918                         charlen = 1;
5919                 else
5920                         charlen = len - pg_mbcliplen(workstr, len, len - 1);
5921                 lastchar = (unsigned char *) (workstr + len - charlen);
5922
5923                 /*
5924                  * Try to generate a larger string by incrementing the last character
5925                  * (for BYTEA, we treat each byte as a character).
5926                  *
5927                  * Note: the incrementer function is expected to return true if it's
5928                  * generated a valid-per-the-encoding new character, otherwise false.
5929                  * The contents of the character on false return are unspecified.
5930                  */
5931                 while (charinc(lastchar, charlen))
5932                 {
5933                         Const      *workstr_const;
5934
5935                         if (datatype == BYTEAOID)
5936                                 workstr_const = string_to_bytea_const(workstr, len);
5937                         else
5938                                 workstr_const = string_to_const(workstr, datatype);
5939
5940                         if (DatumGetBool(FunctionCall2Coll(ltproc,
5941                                                                                            collation,
5942                                                                                            cmpstr,
5943                                                                                            workstr_const->constvalue)))
5944                         {
5945                                 /* Successfully made a string larger than cmpstr */
5946                                 if (cmptxt)
5947                                         pfree(cmptxt);
5948                                 pfree(workstr);
5949                                 return workstr_const;
5950                         }
5951
5952                         /* No good, release unusable value and try again */
5953                         pfree(DatumGetPointer(workstr_const->constvalue));
5954                         pfree(workstr_const);
5955                 }
5956
5957                 /*
5958                  * No luck here, so truncate off the last character and try to
5959                  * increment the next one.
5960                  */
5961                 len -= charlen;
5962                 workstr[len] = '\0';
5963         }
5964
5965         /* Failed... */
5966         if (cmptxt)
5967                 pfree(cmptxt);
5968         pfree(workstr);
5969
5970         return NULL;
5971 }
5972
5973 /*
5974  * Generate a Datum of the appropriate type from a C string.
5975  * Note that all of the supported types are pass-by-ref, so the
5976  * returned value should be pfree'd if no longer needed.
5977  */
5978 static Datum
5979 string_to_datum(const char *str, Oid datatype)
5980 {
5981         Assert(str != NULL);
5982
5983         /*
5984          * We cheat a little by assuming that CStringGetTextDatum() will do for
5985          * bpchar and varchar constants too...
5986          */
5987         if (datatype == NAMEOID)
5988                 return DirectFunctionCall1(namein, CStringGetDatum(str));
5989         else if (datatype == BYTEAOID)
5990                 return DirectFunctionCall1(byteain, CStringGetDatum(str));
5991         else
5992                 return CStringGetTextDatum(str);
5993 }
5994
5995 /*
5996  * Generate a Const node of the appropriate type from a C string.
5997  */
5998 static Const *
5999 string_to_const(const char *str, Oid datatype)
6000 {
6001         Datum           conval = string_to_datum(str, datatype);
6002         Oid                     collation;
6003         int                     constlen;
6004
6005         /*
6006          * We only need to support a few datatypes here, so hard-wire properties
6007          * instead of incurring the expense of catalog lookups.
6008          */
6009         switch (datatype)
6010         {
6011                 case TEXTOID:
6012                 case VARCHAROID:
6013                 case BPCHAROID:
6014                         collation = DEFAULT_COLLATION_OID;
6015                         constlen = -1;
6016                         break;
6017
6018                 case NAMEOID:
6019                         collation = InvalidOid;
6020                         constlen = NAMEDATALEN;
6021                         break;
6022
6023                 case BYTEAOID:
6024                         collation = InvalidOid;
6025                         constlen = -1;
6026                         break;
6027
6028                 default:
6029                         elog(ERROR, "unexpected datatype in string_to_const: %u",
6030                                  datatype);
6031                         return NULL;
6032         }
6033
6034         return makeConst(datatype, -1, collation, constlen,
6035                                          conval, false, false);
6036 }
6037
6038 /*
6039  * Generate a Const node of bytea type from a binary C string and a length.
6040  */
6041 static Const *
6042 string_to_bytea_const(const char *str, size_t str_len)
6043 {
6044         bytea      *bstr = palloc(VARHDRSZ + str_len);
6045         Datum           conval;
6046
6047         memcpy(VARDATA(bstr), str, str_len);
6048         SET_VARSIZE(bstr, VARHDRSZ + str_len);
6049         conval = PointerGetDatum(bstr);
6050
6051         return makeConst(BYTEAOID, -1, InvalidOid, -1, conval, false, false);
6052 }
6053
6054 /*-------------------------------------------------------------------------
6055  *
6056  * Index cost estimation functions
6057  *
6058  *-------------------------------------------------------------------------
6059  */
6060
6061 List *
6062 deconstruct_indexquals(IndexPath *path)
6063 {
6064         List       *result = NIL;
6065         IndexOptInfo *index = path->indexinfo;
6066         ListCell   *lcc,
6067                            *lci;
6068
6069         forboth(lcc, path->indexquals, lci, path->indexqualcols)
6070         {
6071                 RestrictInfo *rinfo = castNode(RestrictInfo, lfirst(lcc));
6072                 int                     indexcol = lfirst_int(lci);
6073                 Expr       *clause;
6074                 Node       *leftop,
6075                                    *rightop;
6076                 IndexQualInfo *qinfo;
6077
6078                 clause = rinfo->clause;
6079
6080                 qinfo = (IndexQualInfo *) palloc(sizeof(IndexQualInfo));
6081                 qinfo->rinfo = rinfo;
6082                 qinfo->indexcol = indexcol;
6083
6084                 if (IsA(clause, OpExpr))
6085                 {
6086                         qinfo->clause_op = ((OpExpr *) clause)->opno;
6087                         leftop = get_leftop(clause);
6088                         rightop = get_rightop(clause);
6089                         if (match_index_to_operand(leftop, indexcol, index))
6090                         {
6091                                 qinfo->varonleft = true;
6092                                 qinfo->other_operand = rightop;
6093                         }
6094                         else
6095                         {
6096                                 Assert(match_index_to_operand(rightop, indexcol, index));
6097                                 qinfo->varonleft = false;
6098                                 qinfo->other_operand = leftop;
6099                         }
6100                 }
6101                 else if (IsA(clause, RowCompareExpr))
6102                 {
6103                         RowCompareExpr *rc = (RowCompareExpr *) clause;
6104
6105                         qinfo->clause_op = linitial_oid(rc->opnos);
6106                         /* Examine only first columns to determine left/right sides */
6107                         if (match_index_to_operand((Node *) linitial(rc->largs),
6108                                                                            indexcol, index))
6109                         {
6110                                 qinfo->varonleft = true;
6111                                 qinfo->other_operand = (Node *) rc->rargs;
6112                         }
6113                         else
6114                         {
6115                                 Assert(match_index_to_operand((Node *) linitial(rc->rargs),
6116                                                                                           indexcol, index));
6117                                 qinfo->varonleft = false;
6118                                 qinfo->other_operand = (Node *) rc->largs;
6119                         }
6120                 }
6121                 else if (IsA(clause, ScalarArrayOpExpr))
6122                 {
6123                         ScalarArrayOpExpr *saop = (ScalarArrayOpExpr *) clause;
6124
6125                         qinfo->clause_op = saop->opno;
6126                         /* index column is always on the left in this case */
6127                         Assert(match_index_to_operand((Node *) linitial(saop->args),
6128                                                                                   indexcol, index));
6129                         qinfo->varonleft = true;
6130                         qinfo->other_operand = (Node *) lsecond(saop->args);
6131                 }
6132                 else if (IsA(clause, NullTest))
6133                 {
6134                         qinfo->clause_op = InvalidOid;
6135                         Assert(match_index_to_operand((Node *) ((NullTest *) clause)->arg,
6136                                                                                   indexcol, index));
6137                         qinfo->varonleft = true;
6138                         qinfo->other_operand = NULL;
6139                 }
6140                 else
6141                 {
6142                         elog(ERROR, "unsupported indexqual type: %d",
6143                                  (int) nodeTag(clause));
6144                 }
6145
6146                 result = lappend(result, qinfo);
6147         }
6148         return result;
6149 }
6150
6151 /*
6152  * Simple function to compute the total eval cost of the "other operands"
6153  * in an IndexQualInfo list.  Since we know these will be evaluated just
6154  * once per scan, there's no need to distinguish startup from per-row cost.
6155  */
6156 static Cost
6157 other_operands_eval_cost(PlannerInfo *root, List *qinfos)
6158 {
6159         Cost            qual_arg_cost = 0;
6160         ListCell   *lc;
6161
6162         foreach(lc, qinfos)
6163         {
6164                 IndexQualInfo *qinfo = (IndexQualInfo *) lfirst(lc);
6165                 QualCost        index_qual_cost;
6166
6167                 cost_qual_eval_node(&index_qual_cost, qinfo->other_operand, root);
6168                 qual_arg_cost += index_qual_cost.startup + index_qual_cost.per_tuple;
6169         }
6170         return qual_arg_cost;
6171 }
6172
6173 /*
6174  * Get other-operand eval cost for an index orderby list.
6175  *
6176  * Index orderby expressions aren't represented as RestrictInfos (since they
6177  * aren't boolean, usually).  So we can't apply deconstruct_indexquals to
6178  * them.  However, they are much simpler to deal with since they are always
6179  * OpExprs and the index column is always on the left.
6180  */
6181 static Cost
6182 orderby_operands_eval_cost(PlannerInfo *root, IndexPath *path)
6183 {
6184         Cost            qual_arg_cost = 0;
6185         ListCell   *lc;
6186
6187         foreach(lc, path->indexorderbys)
6188         {
6189                 Expr       *clause = (Expr *) lfirst(lc);
6190                 Node       *other_operand;
6191                 QualCost        index_qual_cost;
6192
6193                 if (IsA(clause, OpExpr))
6194                 {
6195                         other_operand = get_rightop(clause);
6196                 }
6197                 else
6198                 {
6199                         elog(ERROR, "unsupported indexorderby type: %d",
6200                                  (int) nodeTag(clause));
6201                         other_operand = NULL;           /* keep compiler quiet */
6202                 }
6203
6204                 cost_qual_eval_node(&index_qual_cost, other_operand, root);
6205                 qual_arg_cost += index_qual_cost.startup + index_qual_cost.per_tuple;
6206         }
6207         return qual_arg_cost;
6208 }
6209
6210 void
6211 genericcostestimate(PlannerInfo *root,
6212                                         IndexPath *path,
6213                                         double loop_count,
6214                                         List *qinfos,
6215                                         GenericCosts *costs)
6216 {
6217         IndexOptInfo *index = path->indexinfo;
6218         List       *indexQuals = path->indexquals;
6219         List       *indexOrderBys = path->indexorderbys;
6220         Cost            indexStartupCost;
6221         Cost            indexTotalCost;
6222         Selectivity indexSelectivity;
6223         double          indexCorrelation;
6224         double          numIndexPages;
6225         double          numIndexTuples;
6226         double          spc_random_page_cost;
6227         double          num_sa_scans;
6228         double          num_outer_scans;
6229         double          num_scans;
6230         double          qual_op_cost;
6231         double          qual_arg_cost;
6232         List       *selectivityQuals;
6233         ListCell   *l;
6234
6235         /*
6236          * If the index is partial, AND the index predicate with the explicitly
6237          * given indexquals to produce a more accurate idea of the index
6238          * selectivity.
6239          */
6240         selectivityQuals = add_predicate_to_quals(index, indexQuals);
6241
6242         /*
6243          * Check for ScalarArrayOpExpr index quals, and estimate the number of
6244          * index scans that will be performed.
6245          */
6246         num_sa_scans = 1;
6247         foreach(l, indexQuals)
6248         {
6249                 RestrictInfo *rinfo = (RestrictInfo *) lfirst(l);
6250
6251                 if (IsA(rinfo->clause, ScalarArrayOpExpr))
6252                 {
6253                         ScalarArrayOpExpr *saop = (ScalarArrayOpExpr *) rinfo->clause;
6254                         int                     alength = estimate_array_length(lsecond(saop->args));
6255
6256                         if (alength > 1)
6257                                 num_sa_scans *= alength;
6258                 }
6259         }
6260
6261         /* Estimate the fraction of main-table tuples that will be visited */
6262         indexSelectivity = clauselist_selectivity(root, selectivityQuals,
6263                                                                                           index->rel->relid,
6264                                                                                           JOIN_INNER,
6265                                                                                           NULL);
6266
6267         /*
6268          * If caller didn't give us an estimate, estimate the number of index
6269          * tuples that will be visited.  We do it in this rather peculiar-looking
6270          * way in order to get the right answer for partial indexes.
6271          */
6272         numIndexTuples = costs->numIndexTuples;
6273         if (numIndexTuples <= 0.0)
6274         {
6275                 numIndexTuples = indexSelectivity * index->rel->tuples;
6276
6277                 /*
6278                  * The above calculation counts all the tuples visited across all
6279                  * scans induced by ScalarArrayOpExpr nodes.  We want to consider the
6280                  * average per-indexscan number, so adjust.  This is a handy place to
6281                  * round to integer, too.  (If caller supplied tuple estimate, it's
6282                  * responsible for handling these considerations.)
6283                  */
6284                 numIndexTuples = rint(numIndexTuples / num_sa_scans);
6285         }
6286
6287         /*
6288          * We can bound the number of tuples by the index size in any case. Also,
6289          * always estimate at least one tuple is touched, even when
6290          * indexSelectivity estimate is tiny.
6291          */
6292         if (numIndexTuples > index->tuples)
6293                 numIndexTuples = index->tuples;
6294         if (numIndexTuples < 1.0)
6295                 numIndexTuples = 1.0;
6296
6297         /*
6298          * Estimate the number of index pages that will be retrieved.
6299          *
6300          * We use the simplistic method of taking a pro-rata fraction of the total
6301          * number of index pages.  In effect, this counts only leaf pages and not
6302          * any overhead such as index metapage or upper tree levels.
6303          *
6304          * In practice access to upper index levels is often nearly free because
6305          * those tend to stay in cache under load; moreover, the cost involved is
6306          * highly dependent on index type.  We therefore ignore such costs here
6307          * and leave it to the caller to add a suitable charge if needed.
6308          */
6309         if (index->pages > 1 && index->tuples > 1)
6310                 numIndexPages = ceil(numIndexTuples * index->pages / index->tuples);
6311         else
6312                 numIndexPages = 1.0;
6313
6314         /* fetch estimated page cost for tablespace containing index */
6315         get_tablespace_page_costs(index->reltablespace,
6316                                                           &spc_random_page_cost,
6317                                                           NULL);
6318
6319         /*
6320          * Now compute the disk access costs.
6321          *
6322          * The above calculations are all per-index-scan.  However, if we are in a
6323          * nestloop inner scan, we can expect the scan to be repeated (with
6324          * different search keys) for each row of the outer relation.  Likewise,
6325          * ScalarArrayOpExpr quals result in multiple index scans.  This creates
6326          * the potential for cache effects to reduce the number of disk page
6327          * fetches needed.  We want to estimate the average per-scan I/O cost in
6328          * the presence of caching.
6329          *
6330          * We use the Mackert-Lohman formula (see costsize.c for details) to
6331          * estimate the total number of page fetches that occur.  While this
6332          * wasn't what it was designed for, it seems a reasonable model anyway.
6333          * Note that we are counting pages not tuples anymore, so we take N = T =
6334          * index size, as if there were one "tuple" per page.
6335          */
6336         num_outer_scans = loop_count;
6337         num_scans = num_sa_scans * num_outer_scans;
6338
6339         if (num_scans > 1)
6340         {
6341                 double          pages_fetched;
6342
6343                 /* total page fetches ignoring cache effects */
6344                 pages_fetched = numIndexPages * num_scans;
6345
6346                 /* use Mackert and Lohman formula to adjust for cache effects */
6347                 pages_fetched = index_pages_fetched(pages_fetched,
6348                                                                                         index->pages,
6349                                                                                         (double) index->pages,
6350                                                                                         root);
6351
6352                 /*
6353                  * Now compute the total disk access cost, and then report a pro-rated
6354                  * share for each outer scan.  (Don't pro-rate for ScalarArrayOpExpr,
6355                  * since that's internal to the indexscan.)
6356                  */
6357                 indexTotalCost = (pages_fetched * spc_random_page_cost)
6358                         / num_outer_scans;
6359         }
6360         else
6361         {
6362                 /*
6363                  * For a single index scan, we just charge spc_random_page_cost per
6364                  * page touched.
6365                  */
6366                 indexTotalCost = numIndexPages * spc_random_page_cost;
6367         }
6368
6369         /*
6370          * CPU cost: any complex expressions in the indexquals will need to be
6371          * evaluated once at the start of the scan to reduce them to runtime keys
6372          * to pass to the index AM (see nodeIndexscan.c).  We model the per-tuple
6373          * CPU costs as cpu_index_tuple_cost plus one cpu_operator_cost per
6374          * indexqual operator.  Because we have numIndexTuples as a per-scan
6375          * number, we have to multiply by num_sa_scans to get the correct result
6376          * for ScalarArrayOpExpr cases.  Similarly add in costs for any index
6377          * ORDER BY expressions.
6378          *
6379          * Note: this neglects the possible costs of rechecking lossy operators.
6380          * Detecting that that might be needed seems more expensive than it's
6381          * worth, though, considering all the other inaccuracies here ...
6382          */
6383         qual_arg_cost = other_operands_eval_cost(root, qinfos) +
6384                 orderby_operands_eval_cost(root, path);
6385         qual_op_cost = cpu_operator_cost *
6386                 (list_length(indexQuals) + list_length(indexOrderBys));
6387
6388         indexStartupCost = qual_arg_cost;
6389         indexTotalCost += qual_arg_cost;
6390         indexTotalCost += numIndexTuples * num_sa_scans * (cpu_index_tuple_cost + qual_op_cost);
6391
6392         /*
6393          * Generic assumption about index correlation: there isn't any.
6394          */
6395         indexCorrelation = 0.0;
6396
6397         /*
6398          * Return everything to caller.
6399          */
6400         costs->indexStartupCost = indexStartupCost;
6401         costs->indexTotalCost = indexTotalCost;
6402         costs->indexSelectivity = indexSelectivity;
6403         costs->indexCorrelation = indexCorrelation;
6404         costs->numIndexPages = numIndexPages;
6405         costs->numIndexTuples = numIndexTuples;
6406         costs->spc_random_page_cost = spc_random_page_cost;
6407         costs->num_sa_scans = num_sa_scans;
6408 }
6409
6410 /*
6411  * If the index is partial, add its predicate to the given qual list.
6412  *
6413  * ANDing the index predicate with the explicitly given indexquals produces
6414  * a more accurate idea of the index's selectivity.  However, we need to be
6415  * careful not to insert redundant clauses, because clauselist_selectivity()
6416  * is easily fooled into computing a too-low selectivity estimate.  Our
6417  * approach is to add only the predicate clause(s) that cannot be proven to
6418  * be implied by the given indexquals.  This successfully handles cases such
6419  * as a qual "x = 42" used with a partial index "WHERE x >= 40 AND x < 50".
6420  * There are many other cases where we won't detect redundancy, leading to a
6421  * too-low selectivity estimate, which will bias the system in favor of using
6422  * partial indexes where possible.  That is not necessarily bad though.
6423  *
6424  * Note that indexQuals contains RestrictInfo nodes while the indpred
6425  * does not, so the output list will be mixed.  This is OK for both
6426  * predicate_implied_by() and clauselist_selectivity(), but might be
6427  * problematic if the result were passed to other things.
6428  */
6429 static List *
6430 add_predicate_to_quals(IndexOptInfo *index, List *indexQuals)
6431 {
6432         List       *predExtraQuals = NIL;
6433         ListCell   *lc;
6434
6435         if (index->indpred == NIL)
6436                 return indexQuals;
6437
6438         foreach(lc, index->indpred)
6439         {
6440                 Node       *predQual = (Node *) lfirst(lc);
6441                 List       *oneQual = list_make1(predQual);
6442
6443                 if (!predicate_implied_by(oneQual, indexQuals))
6444                         predExtraQuals = list_concat(predExtraQuals, oneQual);
6445         }
6446         /* list_concat avoids modifying the passed-in indexQuals list */
6447         return list_concat(predExtraQuals, indexQuals);
6448 }
6449
6450
6451 void
6452 btcostestimate(PlannerInfo *root, IndexPath *path, double loop_count,
6453                            Cost *indexStartupCost, Cost *indexTotalCost,
6454                            Selectivity *indexSelectivity, double *indexCorrelation,
6455                            double *indexPages)
6456 {
6457         IndexOptInfo *index = path->indexinfo;
6458         List       *qinfos;
6459         GenericCosts costs;
6460         Oid                     relid;
6461         AttrNumber      colnum;
6462         VariableStatData vardata;
6463         double          numIndexTuples;
6464         Cost            descentCost;
6465         List       *indexBoundQuals;
6466         int                     indexcol;
6467         bool            eqQualHere;
6468         bool            found_saop;
6469         bool            found_is_null_op;
6470         double          num_sa_scans;
6471         ListCell   *lc;
6472
6473         /* Do preliminary analysis of indexquals */
6474         qinfos = deconstruct_indexquals(path);
6475
6476         /*
6477          * For a btree scan, only leading '=' quals plus inequality quals for the
6478          * immediately next attribute contribute to index selectivity (these are
6479          * the "boundary quals" that determine the starting and stopping points of
6480          * the index scan).  Additional quals can suppress visits to the heap, so
6481          * it's OK to count them in indexSelectivity, but they should not count
6482          * for estimating numIndexTuples.  So we must examine the given indexquals
6483          * to find out which ones count as boundary quals.  We rely on the
6484          * knowledge that they are given in index column order.
6485          *
6486          * For a RowCompareExpr, we consider only the first column, just as
6487          * rowcomparesel() does.
6488          *
6489          * If there's a ScalarArrayOpExpr in the quals, we'll actually perform N
6490          * index scans not one, but the ScalarArrayOpExpr's operator can be
6491          * considered to act the same as it normally does.
6492          */
6493         indexBoundQuals = NIL;
6494         indexcol = 0;
6495         eqQualHere = false;
6496         found_saop = false;
6497         found_is_null_op = false;
6498         num_sa_scans = 1;
6499         foreach(lc, qinfos)
6500         {
6501                 IndexQualInfo *qinfo = (IndexQualInfo *) lfirst(lc);
6502                 RestrictInfo *rinfo = qinfo->rinfo;
6503                 Expr       *clause = rinfo->clause;
6504                 Oid                     clause_op;
6505                 int                     op_strategy;
6506
6507                 if (indexcol != qinfo->indexcol)
6508                 {
6509                         /* Beginning of a new column's quals */
6510                         if (!eqQualHere)
6511                                 break;                  /* done if no '=' qual for indexcol */
6512                         eqQualHere = false;
6513                         indexcol++;
6514                         if (indexcol != qinfo->indexcol)
6515                                 break;                  /* no quals at all for indexcol */
6516                 }
6517
6518                 if (IsA(clause, ScalarArrayOpExpr))
6519                 {
6520                         int                     alength = estimate_array_length(qinfo->other_operand);
6521
6522                         found_saop = true;
6523                         /* count up number of SA scans induced by indexBoundQuals only */
6524                         if (alength > 1)
6525                                 num_sa_scans *= alength;
6526                 }
6527                 else if (IsA(clause, NullTest))
6528                 {
6529                         NullTest   *nt = (NullTest *) clause;
6530
6531                         if (nt->nulltesttype == IS_NULL)
6532                         {
6533                                 found_is_null_op = true;
6534                                 /* IS NULL is like = for selectivity determination purposes */
6535                                 eqQualHere = true;
6536                         }
6537                 }
6538
6539                 /*
6540                  * We would need to commute the clause_op if not varonleft, except
6541                  * that we only care if it's equality or not, so that refinement is
6542                  * unnecessary.
6543                  */
6544                 clause_op = qinfo->clause_op;
6545
6546                 /* check for equality operator */
6547                 if (OidIsValid(clause_op))
6548                 {
6549                         op_strategy = get_op_opfamily_strategy(clause_op,
6550                                                                                                    index->opfamily[indexcol]);
6551                         Assert(op_strategy != 0);       /* not a member of opfamily?? */
6552                         if (op_strategy == BTEqualStrategyNumber)
6553                                 eqQualHere = true;
6554                 }
6555
6556                 indexBoundQuals = lappend(indexBoundQuals, rinfo);
6557         }
6558
6559         /*
6560          * If index is unique and we found an '=' clause for each column, we can
6561          * just assume numIndexTuples = 1 and skip the expensive
6562          * clauselist_selectivity calculations.  However, a ScalarArrayOp or
6563          * NullTest invalidates that theory, even though it sets eqQualHere.
6564          */
6565         if (index->unique &&
6566                 indexcol == index->ncolumns - 1 &&
6567                 eqQualHere &&
6568                 !found_saop &&
6569                 !found_is_null_op)
6570                 numIndexTuples = 1.0;
6571         else
6572         {
6573                 List       *selectivityQuals;
6574                 Selectivity btreeSelectivity;
6575
6576                 /*
6577                  * If the index is partial, AND the index predicate with the
6578                  * index-bound quals to produce a more accurate idea of the number of
6579                  * rows covered by the bound conditions.
6580                  */
6581                 selectivityQuals = add_predicate_to_quals(index, indexBoundQuals);
6582
6583                 btreeSelectivity = clauselist_selectivity(root, selectivityQuals,
6584                                                                                                   index->rel->relid,
6585                                                                                                   JOIN_INNER,
6586                                                                                                   NULL);
6587                 numIndexTuples = btreeSelectivity * index->rel->tuples;
6588
6589                 /*
6590                  * As in genericcostestimate(), we have to adjust for any
6591                  * ScalarArrayOpExpr quals included in indexBoundQuals, and then round
6592                  * to integer.
6593                  */
6594                 numIndexTuples = rint(numIndexTuples / num_sa_scans);
6595         }
6596
6597         /*
6598          * Now do generic index cost estimation.
6599          */
6600         MemSet(&costs, 0, sizeof(costs));
6601         costs.numIndexTuples = numIndexTuples;
6602
6603         genericcostestimate(root, path, loop_count, qinfos, &costs);
6604
6605         /*
6606          * Add a CPU-cost component to represent the costs of initial btree
6607          * descent.  We don't charge any I/O cost for touching upper btree levels,
6608          * since they tend to stay in cache, but we still have to do about log2(N)
6609          * comparisons to descend a btree of N leaf tuples.  We charge one
6610          * cpu_operator_cost per comparison.
6611          *
6612          * If there are ScalarArrayOpExprs, charge this once per SA scan.  The
6613          * ones after the first one are not startup cost so far as the overall
6614          * plan is concerned, so add them only to "total" cost.
6615          */
6616         if (index->tuples > 1)          /* avoid computing log(0) */
6617         {
6618                 descentCost = ceil(log(index->tuples) / log(2.0)) * cpu_operator_cost;
6619                 costs.indexStartupCost += descentCost;
6620                 costs.indexTotalCost += costs.num_sa_scans * descentCost;
6621         }
6622
6623         /*
6624          * Even though we're not charging I/O cost for touching upper btree pages,
6625          * it's still reasonable to charge some CPU cost per page descended
6626          * through.  Moreover, if we had no such charge at all, bloated indexes
6627          * would appear to have the same search cost as unbloated ones, at least
6628          * in cases where only a single leaf page is expected to be visited.  This
6629          * cost is somewhat arbitrarily set at 50x cpu_operator_cost per page
6630          * touched.  The number of such pages is btree tree height plus one (ie,
6631          * we charge for the leaf page too).  As above, charge once per SA scan.
6632          */
6633         descentCost = (index->tree_height + 1) * 50.0 * cpu_operator_cost;
6634         costs.indexStartupCost += descentCost;
6635         costs.indexTotalCost += costs.num_sa_scans * descentCost;
6636
6637         /*
6638          * If we can get an estimate of the first column's ordering correlation C
6639          * from pg_statistic, estimate the index correlation as C for a
6640          * single-column index, or C * 0.75 for multiple columns. (The idea here
6641          * is that multiple columns dilute the importance of the first column's
6642          * ordering, but don't negate it entirely.  Before 8.0 we divided the
6643          * correlation by the number of columns, but that seems too strong.)
6644          */
6645         MemSet(&vardata, 0, sizeof(vardata));
6646
6647         if (index->indexkeys[0] != 0)
6648         {
6649                 /* Simple variable --- look to stats for the underlying table */
6650                 RangeTblEntry *rte = planner_rt_fetch(index->rel->relid, root);
6651
6652                 Assert(rte->rtekind == RTE_RELATION);
6653                 relid = rte->relid;
6654                 Assert(relid != InvalidOid);
6655                 colnum = index->indexkeys[0];
6656
6657                 if (get_relation_stats_hook &&
6658                         (*get_relation_stats_hook) (root, rte, colnum, &vardata))
6659                 {
6660                         /*
6661                          * The hook took control of acquiring a stats tuple.  If it did
6662                          * supply a tuple, it'd better have supplied a freefunc.
6663                          */
6664                         if (HeapTupleIsValid(vardata.statsTuple) &&
6665                                 !vardata.freefunc)
6666                                 elog(ERROR, "no function provided to release variable stats with");
6667                 }
6668                 else
6669                 {
6670                         vardata.statsTuple = SearchSysCache3(STATRELATTINH,
6671                                                                                                  ObjectIdGetDatum(relid),
6672                                                                                                  Int16GetDatum(colnum),
6673                                                                                                  BoolGetDatum(rte->inh));
6674                         vardata.freefunc = ReleaseSysCache;
6675                 }
6676         }
6677         else
6678         {
6679                 /* Expression --- maybe there are stats for the index itself */
6680                 relid = index->indexoid;
6681                 colnum = 1;
6682
6683                 if (get_index_stats_hook &&
6684                         (*get_index_stats_hook) (root, relid, colnum, &vardata))
6685                 {
6686                         /*
6687                          * The hook took control of acquiring a stats tuple.  If it did
6688                          * supply a tuple, it'd better have supplied a freefunc.
6689                          */
6690                         if (HeapTupleIsValid(vardata.statsTuple) &&
6691                                 !vardata.freefunc)
6692                                 elog(ERROR, "no function provided to release variable stats with");
6693                 }
6694                 else
6695                 {
6696                         vardata.statsTuple = SearchSysCache3(STATRELATTINH,
6697                                                                                                  ObjectIdGetDatum(relid),
6698                                                                                                  Int16GetDatum(colnum),
6699                                                                                                  BoolGetDatum(false));
6700                         vardata.freefunc = ReleaseSysCache;
6701                 }
6702         }
6703
6704         if (HeapTupleIsValid(vardata.statsTuple))
6705         {
6706                 Oid                     sortop;
6707                 float4     *numbers;
6708                 int                     nnumbers;
6709
6710                 sortop = get_opfamily_member(index->opfamily[0],
6711                                                                          index->opcintype[0],
6712                                                                          index->opcintype[0],
6713                                                                          BTLessStrategyNumber);
6714                 if (OidIsValid(sortop) &&
6715                         get_attstatsslot(vardata.statsTuple, InvalidOid, 0,
6716                                                          STATISTIC_KIND_CORRELATION,
6717                                                          sortop,
6718                                                          NULL,
6719                                                          NULL, NULL,
6720                                                          &numbers, &nnumbers))
6721                 {
6722                         double          varCorrelation;
6723
6724                         Assert(nnumbers == 1);
6725                         varCorrelation = numbers[0];
6726
6727                         if (index->reverse_sort[0])
6728                                 varCorrelation = -varCorrelation;
6729
6730                         if (index->ncolumns > 1)
6731                                 costs.indexCorrelation = varCorrelation * 0.75;
6732                         else
6733                                 costs.indexCorrelation = varCorrelation;
6734
6735                         free_attstatsslot(InvalidOid, NULL, 0, numbers, nnumbers);
6736                 }
6737         }
6738
6739         ReleaseVariableStats(vardata);
6740
6741         *indexStartupCost = costs.indexStartupCost;
6742         *indexTotalCost = costs.indexTotalCost;
6743         *indexSelectivity = costs.indexSelectivity;
6744         *indexCorrelation = costs.indexCorrelation;
6745         *indexPages = costs.numIndexPages;
6746 }
6747
6748 void
6749 hashcostestimate(PlannerInfo *root, IndexPath *path, double loop_count,
6750                                  Cost *indexStartupCost, Cost *indexTotalCost,
6751                                  Selectivity *indexSelectivity, double *indexCorrelation,
6752                                  double *indexPages)
6753 {
6754         List       *qinfos;
6755         GenericCosts costs;
6756
6757         /* Do preliminary analysis of indexquals */
6758         qinfos = deconstruct_indexquals(path);
6759
6760         MemSet(&costs, 0, sizeof(costs));
6761
6762         genericcostestimate(root, path, loop_count, qinfos, &costs);
6763
6764         /*
6765          * A hash index has no descent costs as such, since the index AM can go
6766          * directly to the target bucket after computing the hash value.  There
6767          * are a couple of other hash-specific costs that we could conceivably add
6768          * here, though:
6769          *
6770          * Ideally we'd charge spc_random_page_cost for each page in the target
6771          * bucket, not just the numIndexPages pages that genericcostestimate
6772          * thought we'd visit.  However in most cases we don't know which bucket
6773          * that will be.  There's no point in considering the average bucket size
6774          * because the hash AM makes sure that's always one page.
6775          *
6776          * Likewise, we could consider charging some CPU for each index tuple in
6777          * the bucket, if we knew how many there were.  But the per-tuple cost is
6778          * just a hash value comparison, not a general datatype-dependent
6779          * comparison, so any such charge ought to be quite a bit less than
6780          * cpu_operator_cost; which makes it probably not worth worrying about.
6781          *
6782          * A bigger issue is that chance hash-value collisions will result in
6783          * wasted probes into the heap.  We don't currently attempt to model this
6784          * cost on the grounds that it's rare, but maybe it's not rare enough.
6785          * (Any fix for this ought to consider the generic lossy-operator problem,
6786          * though; it's not entirely hash-specific.)
6787          */
6788
6789         *indexStartupCost = costs.indexStartupCost;
6790         *indexTotalCost = costs.indexTotalCost;
6791         *indexSelectivity = costs.indexSelectivity;
6792         *indexCorrelation = costs.indexCorrelation;
6793         *indexPages = costs.numIndexPages;
6794 }
6795
6796 void
6797 gistcostestimate(PlannerInfo *root, IndexPath *path, double loop_count,
6798                                  Cost *indexStartupCost, Cost *indexTotalCost,
6799                                  Selectivity *indexSelectivity, double *indexCorrelation,
6800                                  double *indexPages)
6801 {
6802         IndexOptInfo *index = path->indexinfo;
6803         List       *qinfos;
6804         GenericCosts costs;
6805         Cost            descentCost;
6806
6807         /* Do preliminary analysis of indexquals */
6808         qinfos = deconstruct_indexquals(path);
6809
6810         MemSet(&costs, 0, sizeof(costs));
6811
6812         genericcostestimate(root, path, loop_count, qinfos, &costs);
6813
6814         /*
6815          * We model index descent costs similarly to those for btree, but to do
6816          * that we first need an idea of the tree height.  We somewhat arbitrarily
6817          * assume that the fanout is 100, meaning the tree height is at most
6818          * log100(index->pages).
6819          *
6820          * Although this computation isn't really expensive enough to require
6821          * caching, we might as well use index->tree_height to cache it.
6822          */
6823         if (index->tree_height < 0) /* unknown? */
6824         {
6825                 if (index->pages > 1)   /* avoid computing log(0) */
6826                         index->tree_height = (int) (log(index->pages) / log(100.0));
6827                 else
6828                         index->tree_height = 0;
6829         }
6830
6831         /*
6832          * Add a CPU-cost component to represent the costs of initial descent. We
6833          * just use log(N) here not log2(N) since the branching factor isn't
6834          * necessarily two anyway.  As for btree, charge once per SA scan.
6835          */
6836         if (index->tuples > 1)          /* avoid computing log(0) */
6837         {
6838                 descentCost = ceil(log(index->tuples)) * cpu_operator_cost;
6839                 costs.indexStartupCost += descentCost;
6840                 costs.indexTotalCost += costs.num_sa_scans * descentCost;
6841         }
6842
6843         /*
6844          * Likewise add a per-page charge, calculated the same as for btrees.
6845          */
6846         descentCost = (index->tree_height + 1) * 50.0 * cpu_operator_cost;
6847         costs.indexStartupCost += descentCost;
6848         costs.indexTotalCost += costs.num_sa_scans * descentCost;
6849
6850         *indexStartupCost = costs.indexStartupCost;
6851         *indexTotalCost = costs.indexTotalCost;
6852         *indexSelectivity = costs.indexSelectivity;
6853         *indexCorrelation = costs.indexCorrelation;
6854         *indexPages = costs.numIndexPages;
6855 }
6856
6857 void
6858 spgcostestimate(PlannerInfo *root, IndexPath *path, double loop_count,
6859                                 Cost *indexStartupCost, Cost *indexTotalCost,
6860                                 Selectivity *indexSelectivity, double *indexCorrelation,
6861                                 double *indexPages)
6862 {
6863         IndexOptInfo *index = path->indexinfo;
6864         List       *qinfos;
6865         GenericCosts costs;
6866         Cost            descentCost;
6867
6868         /* Do preliminary analysis of indexquals */
6869         qinfos = deconstruct_indexquals(path);
6870
6871         MemSet(&costs, 0, sizeof(costs));
6872
6873         genericcostestimate(root, path, loop_count, qinfos, &costs);
6874
6875         /*
6876          * We model index descent costs similarly to those for btree, but to do
6877          * that we first need an idea of the tree height.  We somewhat arbitrarily
6878          * assume that the fanout is 100, meaning the tree height is at most
6879          * log100(index->pages).
6880          *
6881          * Although this computation isn't really expensive enough to require
6882          * caching, we might as well use index->tree_height to cache it.
6883          */
6884         if (index->tree_height < 0) /* unknown? */
6885         {
6886                 if (index->pages > 1)   /* avoid computing log(0) */
6887                         index->tree_height = (int) (log(index->pages) / log(100.0));
6888                 else
6889                         index->tree_height = 0;
6890         }
6891
6892         /*
6893          * Add a CPU-cost component to represent the costs of initial descent. We
6894          * just use log(N) here not log2(N) since the branching factor isn't
6895          * necessarily two anyway.  As for btree, charge once per SA scan.
6896          */
6897         if (index->tuples > 1)          /* avoid computing log(0) */
6898         {
6899                 descentCost = ceil(log(index->tuples)) * cpu_operator_cost;
6900                 costs.indexStartupCost += descentCost;
6901                 costs.indexTotalCost += costs.num_sa_scans * descentCost;
6902         }
6903
6904         /*
6905          * Likewise add a per-page charge, calculated the same as for btrees.
6906          */
6907         descentCost = (index->tree_height + 1) * 50.0 * cpu_operator_cost;
6908         costs.indexStartupCost += descentCost;
6909         costs.indexTotalCost += costs.num_sa_scans * descentCost;
6910
6911         *indexStartupCost = costs.indexStartupCost;
6912         *indexTotalCost = costs.indexTotalCost;
6913         *indexSelectivity = costs.indexSelectivity;
6914         *indexCorrelation = costs.indexCorrelation;
6915         *indexPages = costs.numIndexPages;
6916 }
6917
6918
6919 /*
6920  * Support routines for gincostestimate
6921  */
6922
6923 typedef struct
6924 {
6925         bool            haveFullScan;
6926         double          partialEntries;
6927         double          exactEntries;
6928         double          searchEntries;
6929         double          arrayScans;
6930 } GinQualCounts;
6931
6932 /*
6933  * Estimate the number of index terms that need to be searched for while
6934  * testing the given GIN query, and increment the counts in *counts
6935  * appropriately.  If the query is unsatisfiable, return false.
6936  */
6937 static bool
6938 gincost_pattern(IndexOptInfo *index, int indexcol,
6939                                 Oid clause_op, Datum query,
6940                                 GinQualCounts *counts)
6941 {
6942         Oid                     extractProcOid;
6943         Oid                     collation;
6944         int                     strategy_op;
6945         Oid                     lefttype,
6946                                 righttype;
6947         int32           nentries = 0;
6948         bool       *partial_matches = NULL;
6949         Pointer    *extra_data = NULL;
6950         bool       *nullFlags = NULL;
6951         int32           searchMode = GIN_SEARCH_MODE_DEFAULT;
6952         int32           i;
6953
6954         /*
6955          * Get the operator's strategy number and declared input data types within
6956          * the index opfamily.  (We don't need the latter, but we use
6957          * get_op_opfamily_properties because it will throw error if it fails to
6958          * find a matching pg_amop entry.)
6959          */
6960         get_op_opfamily_properties(clause_op, index->opfamily[indexcol], false,
6961                                                            &strategy_op, &lefttype, &righttype);
6962
6963         /*
6964          * GIN always uses the "default" support functions, which are those with
6965          * lefttype == righttype == the opclass' opcintype (see
6966          * IndexSupportInitialize in relcache.c).
6967          */
6968         extractProcOid = get_opfamily_proc(index->opfamily[indexcol],
6969                                                                            index->opcintype[indexcol],
6970                                                                            index->opcintype[indexcol],
6971                                                                            GIN_EXTRACTQUERY_PROC);
6972
6973         if (!OidIsValid(extractProcOid))
6974         {
6975                 /* should not happen; throw same error as index_getprocinfo */
6976                 elog(ERROR, "missing support function %d for attribute %d of index \"%s\"",
6977                          GIN_EXTRACTQUERY_PROC, indexcol + 1,
6978                          get_rel_name(index->indexoid));
6979         }
6980
6981         /*
6982          * Choose collation to pass to extractProc (should match initGinState).
6983          */
6984         if (OidIsValid(index->indexcollations[indexcol]))
6985                 collation = index->indexcollations[indexcol];
6986         else
6987                 collation = DEFAULT_COLLATION_OID;
6988
6989         OidFunctionCall7Coll(extractProcOid,
6990                                                  collation,
6991                                                  query,
6992                                                  PointerGetDatum(&nentries),
6993                                                  UInt16GetDatum(strategy_op),
6994                                                  PointerGetDatum(&partial_matches),
6995                                                  PointerGetDatum(&extra_data),
6996                                                  PointerGetDatum(&nullFlags),
6997                                                  PointerGetDatum(&searchMode));
6998
6999         if (nentries <= 0 && searchMode == GIN_SEARCH_MODE_DEFAULT)
7000         {
7001                 /* No match is possible */
7002                 return false;
7003         }
7004
7005         for (i = 0; i < nentries; i++)
7006         {
7007                 /*
7008                  * For partial match we haven't any information to estimate number of
7009                  * matched entries in index, so, we just estimate it as 100
7010                  */
7011                 if (partial_matches && partial_matches[i])
7012                         counts->partialEntries += 100;
7013                 else
7014                         counts->exactEntries++;
7015
7016                 counts->searchEntries++;
7017         }
7018
7019         if (searchMode == GIN_SEARCH_MODE_INCLUDE_EMPTY)
7020         {
7021                 /* Treat "include empty" like an exact-match item */
7022                 counts->exactEntries++;
7023                 counts->searchEntries++;
7024         }
7025         else if (searchMode != GIN_SEARCH_MODE_DEFAULT)
7026         {
7027                 /* It's GIN_SEARCH_MODE_ALL */
7028                 counts->haveFullScan = true;
7029         }
7030
7031         return true;
7032 }
7033
7034 /*
7035  * Estimate the number of index terms that need to be searched for while
7036  * testing the given GIN index clause, and increment the counts in *counts
7037  * appropriately.  If the query is unsatisfiable, return false.
7038  */
7039 static bool
7040 gincost_opexpr(PlannerInfo *root,
7041                            IndexOptInfo *index,
7042                            IndexQualInfo *qinfo,
7043                            GinQualCounts *counts)
7044 {
7045         int                     indexcol = qinfo->indexcol;
7046         Oid                     clause_op = qinfo->clause_op;
7047         Node       *operand = qinfo->other_operand;
7048
7049         if (!qinfo->varonleft)
7050         {
7051                 /* must commute the operator */
7052                 clause_op = get_commutator(clause_op);
7053         }
7054
7055         /* aggressively reduce to a constant, and look through relabeling */
7056         operand = estimate_expression_value(root, operand);
7057
7058         if (IsA(operand, RelabelType))
7059                 operand = (Node *) ((RelabelType *) operand)->arg;
7060
7061         /*
7062          * It's impossible to call extractQuery method for unknown operand. So
7063          * unless operand is a Const we can't do much; just assume there will be
7064          * one ordinary search entry from the operand at runtime.
7065          */
7066         if (!IsA(operand, Const))
7067         {
7068                 counts->exactEntries++;
7069                 counts->searchEntries++;
7070                 return true;
7071         }
7072
7073         /* If Const is null, there can be no matches */
7074         if (((Const *) operand)->constisnull)
7075                 return false;
7076
7077         /* Otherwise, apply extractQuery and get the actual term counts */
7078         return gincost_pattern(index, indexcol, clause_op,
7079                                                    ((Const *) operand)->constvalue,
7080                                                    counts);
7081 }
7082
7083 /*
7084  * Estimate the number of index terms that need to be searched for while
7085  * testing the given GIN index clause, and increment the counts in *counts
7086  * appropriately.  If the query is unsatisfiable, return false.
7087  *
7088  * A ScalarArrayOpExpr will give rise to N separate indexscans at runtime,
7089  * each of which involves one value from the RHS array, plus all the
7090  * non-array quals (if any).  To model this, we average the counts across
7091  * the RHS elements, and add the averages to the counts in *counts (which
7092  * correspond to per-indexscan costs).  We also multiply counts->arrayScans
7093  * by N, causing gincostestimate to scale up its estimates accordingly.
7094  */
7095 static bool
7096 gincost_scalararrayopexpr(PlannerInfo *root,
7097                                                   IndexOptInfo *index,
7098                                                   IndexQualInfo *qinfo,
7099                                                   double numIndexEntries,
7100                                                   GinQualCounts *counts)
7101 {
7102         int                     indexcol = qinfo->indexcol;
7103         Oid                     clause_op = qinfo->clause_op;
7104         Node       *rightop = qinfo->other_operand;
7105         ArrayType  *arrayval;
7106         int16           elmlen;
7107         bool            elmbyval;
7108         char            elmalign;
7109         int                     numElems;
7110         Datum      *elemValues;
7111         bool       *elemNulls;
7112         GinQualCounts arraycounts;
7113         int                     numPossible = 0;
7114         int                     i;
7115
7116         Assert(((ScalarArrayOpExpr *) qinfo->rinfo->clause)->useOr);
7117
7118         /* aggressively reduce to a constant, and look through relabeling */
7119         rightop = estimate_expression_value(root, rightop);
7120
7121         if (IsA(rightop, RelabelType))
7122                 rightop = (Node *) ((RelabelType *) rightop)->arg;
7123
7124         /*
7125          * It's impossible to call extractQuery method for unknown operand. So
7126          * unless operand is a Const we can't do much; just assume there will be
7127          * one ordinary search entry from each array entry at runtime, and fall
7128          * back on a probably-bad estimate of the number of array entries.
7129          */
7130         if (!IsA(rightop, Const))
7131         {
7132                 counts->exactEntries++;
7133                 counts->searchEntries++;
7134                 counts->arrayScans *= estimate_array_length(rightop);
7135                 return true;
7136         }
7137
7138         /* If Const is null, there can be no matches */
7139         if (((Const *) rightop)->constisnull)
7140                 return false;
7141
7142         /* Otherwise, extract the array elements and iterate over them */
7143         arrayval = DatumGetArrayTypeP(((Const *) rightop)->constvalue);
7144         get_typlenbyvalalign(ARR_ELEMTYPE(arrayval),
7145                                                  &elmlen, &elmbyval, &elmalign);
7146         deconstruct_array(arrayval,
7147                                           ARR_ELEMTYPE(arrayval),
7148                                           elmlen, elmbyval, elmalign,
7149                                           &elemValues, &elemNulls, &numElems);
7150
7151         memset(&arraycounts, 0, sizeof(arraycounts));
7152
7153         for (i = 0; i < numElems; i++)
7154         {
7155                 GinQualCounts elemcounts;
7156
7157                 /* NULL can't match anything, so ignore, as the executor will */
7158                 if (elemNulls[i])
7159                         continue;
7160
7161                 /* Otherwise, apply extractQuery and get the actual term counts */
7162                 memset(&elemcounts, 0, sizeof(elemcounts));
7163
7164                 if (gincost_pattern(index, indexcol, clause_op, elemValues[i],
7165                                                         &elemcounts))
7166                 {
7167                         /* We ignore array elements that are unsatisfiable patterns */
7168                         numPossible++;
7169
7170                         if (elemcounts.haveFullScan)
7171                         {
7172                                 /*
7173                                  * Full index scan will be required.  We treat this as if
7174                                  * every key in the index had been listed in the query; is
7175                                  * that reasonable?
7176                                  */
7177                                 elemcounts.partialEntries = 0;
7178                                 elemcounts.exactEntries = numIndexEntries;
7179                                 elemcounts.searchEntries = numIndexEntries;
7180                         }
7181                         arraycounts.partialEntries += elemcounts.partialEntries;
7182                         arraycounts.exactEntries += elemcounts.exactEntries;
7183                         arraycounts.searchEntries += elemcounts.searchEntries;
7184                 }
7185         }
7186
7187         if (numPossible == 0)
7188         {
7189                 /* No satisfiable patterns in the array */
7190                 return false;
7191         }
7192
7193         /*
7194          * Now add the averages to the global counts.  This will give us an
7195          * estimate of the average number of terms searched for in each indexscan,
7196          * including contributions from both array and non-array quals.
7197          */
7198         counts->partialEntries += arraycounts.partialEntries / numPossible;
7199         counts->exactEntries += arraycounts.exactEntries / numPossible;
7200         counts->searchEntries += arraycounts.searchEntries / numPossible;
7201
7202         counts->arrayScans *= numPossible;
7203
7204         return true;
7205 }
7206
7207 /*
7208  * GIN has search behavior completely different from other index types
7209  */
7210 void
7211 gincostestimate(PlannerInfo *root, IndexPath *path, double loop_count,
7212                                 Cost *indexStartupCost, Cost *indexTotalCost,
7213                                 Selectivity *indexSelectivity, double *indexCorrelation,
7214                                 double *indexPages)
7215 {
7216         IndexOptInfo *index = path->indexinfo;
7217         List       *indexQuals = path->indexquals;
7218         List       *indexOrderBys = path->indexorderbys;
7219         List       *qinfos;
7220         ListCell   *l;
7221         List       *selectivityQuals;
7222         double          numPages = index->pages,
7223                                 numTuples = index->tuples;
7224         double          numEntryPages,
7225                                 numDataPages,
7226                                 numPendingPages,
7227                                 numEntries;
7228         GinQualCounts counts;
7229         bool            matchPossible;
7230         double          partialScale;
7231         double          entryPagesFetched,
7232                                 dataPagesFetched,
7233                                 dataPagesFetchedBySel;
7234         double          qual_op_cost,
7235                                 qual_arg_cost,
7236                                 spc_random_page_cost,
7237                                 outer_scans;
7238         Relation        indexRel;
7239         GinStatsData ginStats;
7240
7241         /* Do preliminary analysis of indexquals */
7242         qinfos = deconstruct_indexquals(path);
7243
7244         /*
7245          * Obtain statistical information from the meta page, if possible.  Else
7246          * set ginStats to zeroes, and we'll cope below.
7247          */
7248         if (!index->hypothetical)
7249         {
7250                 indexRel = index_open(index->indexoid, AccessShareLock);
7251                 ginGetStats(indexRel, &ginStats);
7252                 index_close(indexRel, AccessShareLock);
7253         }
7254         else
7255         {
7256                 memset(&ginStats, 0, sizeof(ginStats));
7257         }
7258
7259         /*
7260          * Assuming we got valid (nonzero) stats at all, nPendingPages can be
7261          * trusted, but the other fields are data as of the last VACUUM.  We can
7262          * scale them up to account for growth since then, but that method only
7263          * goes so far; in the worst case, the stats might be for a completely
7264          * empty index, and scaling them will produce pretty bogus numbers.
7265          * Somewhat arbitrarily, set the cutoff for doing scaling at 4X growth; if
7266          * it's grown more than that, fall back to estimating things only from the
7267          * assumed-accurate index size.  But we'll trust nPendingPages in any case
7268          * so long as it's not clearly insane, ie, more than the index size.
7269          */
7270         if (ginStats.nPendingPages < numPages)
7271                 numPendingPages = ginStats.nPendingPages;
7272         else
7273                 numPendingPages = 0;
7274
7275         if (numPages > 0 && ginStats.nTotalPages <= numPages &&
7276                 ginStats.nTotalPages > numPages / 4 &&
7277                 ginStats.nEntryPages > 0 && ginStats.nEntries > 0)
7278         {
7279                 /*
7280                  * OK, the stats seem close enough to sane to be trusted.  But we
7281                  * still need to scale them by the ratio numPages / nTotalPages to
7282                  * account for growth since the last VACUUM.
7283                  */
7284                 double          scale = numPages / ginStats.nTotalPages;
7285
7286                 numEntryPages = ceil(ginStats.nEntryPages * scale);
7287                 numDataPages = ceil(ginStats.nDataPages * scale);
7288                 numEntries = ceil(ginStats.nEntries * scale);
7289                 /* ensure we didn't round up too much */
7290                 numEntryPages = Min(numEntryPages, numPages - numPendingPages);
7291                 numDataPages = Min(numDataPages,
7292                                                    numPages - numPendingPages - numEntryPages);
7293         }
7294         else
7295         {
7296                 /*
7297                  * We might get here because it's a hypothetical index, or an index
7298                  * created pre-9.1 and never vacuumed since upgrading (in which case
7299                  * its stats would read as zeroes), or just because it's grown too
7300                  * much since the last VACUUM for us to put our faith in scaling.
7301                  *
7302                  * Invent some plausible internal statistics based on the index page
7303                  * count (and clamp that to at least 10 pages, just in case).  We
7304                  * estimate that 90% of the index is entry pages, and the rest is data
7305                  * pages.  Estimate 100 entries per entry page; this is rather bogus
7306                  * since it'll depend on the size of the keys, but it's more robust
7307                  * than trying to predict the number of entries per heap tuple.
7308                  */
7309                 numPages = Max(numPages, 10);
7310                 numEntryPages = floor((numPages - numPendingPages) * 0.90);
7311                 numDataPages = numPages - numPendingPages - numEntryPages;
7312                 numEntries = floor(numEntryPages * 100);
7313         }
7314
7315         /* In an empty index, numEntries could be zero.  Avoid divide-by-zero */
7316         if (numEntries < 1)
7317                 numEntries = 1;
7318
7319         /*
7320          * Include predicate in selectivityQuals (should match
7321          * genericcostestimate)
7322          */
7323         if (index->indpred != NIL)
7324         {
7325                 List       *predExtraQuals = NIL;
7326
7327                 foreach(l, index->indpred)
7328                 {
7329                         Node       *predQual = (Node *) lfirst(l);
7330                         List       *oneQual = list_make1(predQual);
7331
7332                         if (!predicate_implied_by(oneQual, indexQuals))
7333                                 predExtraQuals = list_concat(predExtraQuals, oneQual);
7334                 }
7335                 /* list_concat avoids modifying the passed-in indexQuals list */
7336                 selectivityQuals = list_concat(predExtraQuals, indexQuals);
7337         }
7338         else
7339                 selectivityQuals = indexQuals;
7340
7341         /* Estimate the fraction of main-table tuples that will be visited */
7342         *indexSelectivity = clauselist_selectivity(root, selectivityQuals,
7343                                                                                            index->rel->relid,
7344                                                                                            JOIN_INNER,
7345                                                                                            NULL);
7346
7347         /* fetch estimated page cost for tablespace containing index */
7348         get_tablespace_page_costs(index->reltablespace,
7349                                                           &spc_random_page_cost,
7350                                                           NULL);
7351
7352         /*
7353          * Generic assumption about index correlation: there isn't any.
7354          */
7355         *indexCorrelation = 0.0;
7356
7357         /*
7358          * Examine quals to estimate number of search entries & partial matches
7359          */
7360         memset(&counts, 0, sizeof(counts));
7361         counts.arrayScans = 1;
7362         matchPossible = true;
7363
7364         foreach(l, qinfos)
7365         {
7366                 IndexQualInfo *qinfo = (IndexQualInfo *) lfirst(l);
7367                 Expr       *clause = qinfo->rinfo->clause;
7368
7369                 if (IsA(clause, OpExpr))
7370                 {
7371                         matchPossible = gincost_opexpr(root,
7372                                                                                    index,
7373                                                                                    qinfo,
7374                                                                                    &counts);
7375                         if (!matchPossible)
7376                                 break;
7377                 }
7378                 else if (IsA(clause, ScalarArrayOpExpr))
7379                 {
7380                         matchPossible = gincost_scalararrayopexpr(root,
7381                                                                                                           index,
7382                                                                                                           qinfo,
7383                                                                                                           numEntries,
7384                                                                                                           &counts);
7385                         if (!matchPossible)
7386                                 break;
7387                 }
7388                 else
7389                 {
7390                         /* shouldn't be anything else for a GIN index */
7391                         elog(ERROR, "unsupported GIN indexqual type: %d",
7392                                  (int) nodeTag(clause));
7393                 }
7394         }
7395
7396         /* Fall out if there were any provably-unsatisfiable quals */
7397         if (!matchPossible)
7398         {
7399                 *indexStartupCost = 0;
7400                 *indexTotalCost = 0;
7401                 *indexSelectivity = 0;
7402                 return;
7403         }
7404
7405         if (counts.haveFullScan || indexQuals == NIL)
7406         {
7407                 /*
7408                  * Full index scan will be required.  We treat this as if every key in
7409                  * the index had been listed in the query; is that reasonable?
7410                  */
7411                 counts.partialEntries = 0;
7412                 counts.exactEntries = numEntries;
7413                 counts.searchEntries = numEntries;
7414         }
7415
7416         /* Will we have more than one iteration of a nestloop scan? */
7417         outer_scans = loop_count;
7418
7419         /*
7420          * Compute cost to begin scan, first of all, pay attention to pending
7421          * list.
7422          */
7423         entryPagesFetched = numPendingPages;
7424
7425         /*
7426          * Estimate number of entry pages read.  We need to do
7427          * counts.searchEntries searches.  Use a power function as it should be,
7428          * but tuples on leaf pages usually is much greater. Here we include all
7429          * searches in entry tree, including search of first entry in partial
7430          * match algorithm
7431          */
7432         entryPagesFetched += ceil(counts.searchEntries * rint(pow(numEntryPages, 0.15)));
7433
7434         /*
7435          * Add an estimate of entry pages read by partial match algorithm. It's a
7436          * scan over leaf pages in entry tree.  We haven't any useful stats here,
7437          * so estimate it as proportion.  Because counts.partialEntries is really
7438          * pretty bogus (see code above), it's possible that it is more than
7439          * numEntries; clamp the proportion to ensure sanity.
7440          */
7441         partialScale = counts.partialEntries / numEntries;
7442         partialScale = Min(partialScale, 1.0);
7443
7444         entryPagesFetched += ceil(numEntryPages * partialScale);
7445
7446         /*
7447          * Partial match algorithm reads all data pages before doing actual scan,
7448          * so it's a startup cost.  Again, we haven't any useful stats here, so
7449          * estimate it as proportion.
7450          */
7451         dataPagesFetched = ceil(numDataPages * partialScale);
7452
7453         /*
7454          * Calculate cache effects if more than one scan due to nestloops or array
7455          * quals.  The result is pro-rated per nestloop scan, but the array qual
7456          * factor shouldn't be pro-rated (compare genericcostestimate).
7457          */
7458         if (outer_scans > 1 || counts.arrayScans > 1)
7459         {
7460                 entryPagesFetched *= outer_scans * counts.arrayScans;
7461                 entryPagesFetched = index_pages_fetched(entryPagesFetched,
7462                                                                                                 (BlockNumber) numEntryPages,
7463                                                                                                 numEntryPages, root);
7464                 entryPagesFetched /= outer_scans;
7465                 dataPagesFetched *= outer_scans * counts.arrayScans;
7466                 dataPagesFetched = index_pages_fetched(dataPagesFetched,
7467                                                                                            (BlockNumber) numDataPages,
7468                                                                                            numDataPages, root);
7469                 dataPagesFetched /= outer_scans;
7470         }
7471
7472         /*
7473          * Here we use random page cost because logically-close pages could be far
7474          * apart on disk.
7475          */
7476         *indexStartupCost = (entryPagesFetched + dataPagesFetched) * spc_random_page_cost;
7477
7478         /*
7479          * Now compute the number of data pages fetched during the scan.
7480          *
7481          * We assume every entry to have the same number of items, and that there
7482          * is no overlap between them. (XXX: tsvector and array opclasses collect
7483          * statistics on the frequency of individual keys; it would be nice to use
7484          * those here.)
7485          */
7486         dataPagesFetched = ceil(numDataPages * counts.exactEntries / numEntries);
7487
7488         /*
7489          * If there is a lot of overlap among the entries, in particular if one of
7490          * the entries is very frequent, the above calculation can grossly
7491          * under-estimate.  As a simple cross-check, calculate a lower bound based
7492          * on the overall selectivity of the quals.  At a minimum, we must read
7493          * one item pointer for each matching entry.
7494          *
7495          * The width of each item pointer varies, based on the level of
7496          * compression.  We don't have statistics on that, but an average of
7497          * around 3 bytes per item is fairly typical.
7498          */
7499         dataPagesFetchedBySel = ceil(*indexSelectivity *
7500                                                                  (numTuples / (BLCKSZ / 3)));
7501         if (dataPagesFetchedBySel > dataPagesFetched)
7502                 dataPagesFetched = dataPagesFetchedBySel;
7503
7504         /* Account for cache effects, the same as above */
7505         if (outer_scans > 1 || counts.arrayScans > 1)
7506         {
7507                 dataPagesFetched *= outer_scans * counts.arrayScans;
7508                 dataPagesFetched = index_pages_fetched(dataPagesFetched,
7509                                                                                            (BlockNumber) numDataPages,
7510                                                                                            numDataPages, root);
7511                 dataPagesFetched /= outer_scans;
7512         }
7513
7514         /* And apply random_page_cost as the cost per page */
7515         *indexTotalCost = *indexStartupCost +
7516                 dataPagesFetched * spc_random_page_cost;
7517
7518         /*
7519          * Add on index qual eval costs, much as in genericcostestimate
7520          */
7521         qual_arg_cost = other_operands_eval_cost(root, qinfos) +
7522                 orderby_operands_eval_cost(root, path);
7523         qual_op_cost = cpu_operator_cost *
7524                 (list_length(indexQuals) + list_length(indexOrderBys));
7525
7526         *indexStartupCost += qual_arg_cost;
7527         *indexTotalCost += qual_arg_cost;
7528         *indexTotalCost += (numTuples * *indexSelectivity) * (cpu_index_tuple_cost + qual_op_cost);
7529         *indexPages = dataPagesFetched;
7530 }
7531
7532 /*
7533  * BRIN has search behavior completely different from other index types
7534  */
7535 void
7536 brincostestimate(PlannerInfo *root, IndexPath *path, double loop_count,
7537                                  Cost *indexStartupCost, Cost *indexTotalCost,
7538                                  Selectivity *indexSelectivity, double *indexCorrelation,
7539                                  double *indexPages)
7540 {
7541         IndexOptInfo *index = path->indexinfo;
7542         List       *indexQuals = path->indexquals;
7543         List       *indexOrderBys = path->indexorderbys;
7544         double          numPages = index->pages;
7545         double          numTuples = index->tuples;
7546         List       *qinfos;
7547         Cost            spc_seq_page_cost;
7548         Cost            spc_random_page_cost;
7549         double          qual_op_cost;
7550         double          qual_arg_cost;
7551
7552         /* Do preliminary analysis of indexquals */
7553         qinfos = deconstruct_indexquals(path);
7554
7555         /* fetch estimated page cost for tablespace containing index */
7556         get_tablespace_page_costs(index->reltablespace,
7557                                                           &spc_random_page_cost,
7558                                                           &spc_seq_page_cost);
7559
7560         /*
7561          * BRIN indexes are always read in full; use that as startup cost.
7562          *
7563          * XXX maybe only include revmap pages here?
7564          */
7565         *indexStartupCost = spc_seq_page_cost * numPages * loop_count;
7566
7567         /*
7568          * To read a BRIN index there might be a bit of back and forth over
7569          * regular pages, as revmap might point to them out of sequential order;
7570          * calculate this as reading the whole index in random order.
7571          */
7572         *indexTotalCost = spc_random_page_cost * numPages * loop_count;
7573
7574         *indexSelectivity =
7575                 clauselist_selectivity(root, indexQuals,
7576                                                            path->indexinfo->rel->relid,
7577                                                            JOIN_INNER, NULL);
7578         *indexCorrelation = 1;
7579
7580         /*
7581          * Add on index qual eval costs, much as in genericcostestimate.
7582          */
7583         qual_arg_cost = other_operands_eval_cost(root, qinfos) +
7584                 orderby_operands_eval_cost(root, path);
7585         qual_op_cost = cpu_operator_cost *
7586                 (list_length(indexQuals) + list_length(indexOrderBys));
7587
7588         *indexStartupCost += qual_arg_cost;
7589         *indexTotalCost += qual_arg_cost;
7590         *indexTotalCost += (numTuples * *indexSelectivity) * (cpu_index_tuple_cost + qual_op_cost);
7591         *indexPages = index->pages;
7592
7593         /* XXX what about pages_per_range? */
7594 }