]> granicus.if.org Git - postgresql/blob - src/backend/utils/adt/selfuncs.c
Teach planner about the idea that a mergejoin won't necessarily read
[postgresql] / src / backend / utils / adt / selfuncs.c
1 /*-------------------------------------------------------------------------
2  *
3  * selfuncs.c
4  *        Selectivity functions and index cost estimation functions for
5  *        standard operators and index access methods.
6  *
7  *        Selectivity routines are registered in the pg_operator catalog
8  *        in the "oprrest" and "oprjoin" attributes.
9  *
10  *        Index cost functions are registered in the pg_am catalog
11  *        in the "amcostestimate" attribute.
12  *
13  * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
14  * Portions Copyright (c) 1994, Regents of the University of California
15  *
16  *
17  * IDENTIFICATION
18  *        $Header: /cvsroot/pgsql/src/backend/utils/adt/selfuncs.c,v 1.104 2002/03/01 04:09:25 tgl Exp $
19  *
20  *-------------------------------------------------------------------------
21  */
22
23 /*----------
24  * Operator selectivity estimation functions are called to estimate the
25  * selectivity of WHERE clauses whose top-level operator is their operator.
26  * We divide the problem into two cases:
27  *              Restriction clause estimation: the clause involves vars of just
28  *                      one relation.
29  *              Join clause estimation: the clause involves vars of multiple rels.
30  * Join selectivity estimation is far more difficult and usually less accurate
31  * than restriction estimation.
32  *
33  * When dealing with the inner scan of a nestloop join, we consider the
34  * join's joinclauses as restriction clauses for the inner relation, and
35  * treat vars of the outer relation as parameters (a/k/a constants of unknown
36  * values).  So, restriction estimators need to be able to accept an argument
37  * telling which relation is to be treated as the variable.
38  *
39  * The call convention for a restriction estimator (oprrest function) is
40  *
41  *              Selectivity oprrest (Query *root,
42  *                                                       Oid operator,
43  *                                                       List *args,
44  *                                                       int varRelid);
45  *
46  * root: general information about the query (rtable and RelOptInfo lists
47  * are particularly important for the estimator).
48  * operator: OID of the specific operator in question.
49  * args: argument list from the operator clause.
50  * varRelid: if not zero, the relid (rtable index) of the relation to
51  * be treated as the variable relation.  May be zero if the args list
52  * is known to contain vars of only one relation.
53  *
54  * This is represented at the SQL level (in pg_proc) as
55  *
56  *              float8 oprrest (opaque, oid, opaque, int4);
57  *
58  * The call convention for a join estimator (oprjoin function) is similar
59  * except that varRelid is not needed:
60  *
61  *              Selectivity oprjoin (Query *root,
62  *                                                       Oid operator,
63  *                                                       List *args);
64  *
65  *              float8 oprjoin (opaque, oid, opaque);
66  *----------
67  */
68
69 #include "postgres.h"
70
71 #include <ctype.h>
72 #include <math.h>
73 #ifdef USE_LOCALE
74 #include <locale.h>
75 #endif
76
77 #include "access/heapam.h"
78 #include "catalog/catname.h"
79 #include "catalog/pg_operator.h"
80 #include "catalog/pg_proc.h"
81 #include "catalog/pg_statistic.h"
82 #include "catalog/pg_type.h"
83 #include "mb/pg_wchar.h"
84 #include "nodes/makefuncs.h"
85 #include "optimizer/clauses.h"
86 #include "optimizer/cost.h"
87 #include "optimizer/pathnode.h"
88 #include "optimizer/plancat.h"
89 #include "optimizer/prep.h"
90 #include "parser/parse_func.h"
91 #include "parser/parse_oper.h"
92 #include "parser/parsetree.h"
93 #include "utils/builtins.h"
94 #include "utils/date.h"
95 #include "utils/datum.h"
96 #include "utils/int8.h"
97 #include "utils/lsyscache.h"
98 #include "utils/selfuncs.h"
99 #include "utils/syscache.h"
100
101
102 /*
103  * Note: the default selectivity estimates are not chosen entirely at random.
104  * We want them to be small enough to ensure that indexscans will be used if
105  * available, for typical table densities of ~100 tuples/page.  Thus, for
106  * example, 0.01 is not quite small enough, since that makes it appear that
107  * nearly all pages will be hit anyway.  Also, since we sometimes estimate
108  * eqsel as 1/num_distinct, we probably want DEFAULT_NUM_DISTINCT to equal
109  * 1/DEFAULT_EQ_SEL.
110  */
111
112 /* default selectivity estimate for equalities such as "A = b" */
113 #define DEFAULT_EQ_SEL  0.005
114
115 /* default selectivity estimate for inequalities such as "A < b" */
116 #define DEFAULT_INEQ_SEL  (1.0 / 3.0)
117
118 /* default selectivity estimate for pattern-match operators such as LIKE */
119 #define DEFAULT_MATCH_SEL       0.005
120
121 /* default number of distinct values in a table */
122 #define DEFAULT_NUM_DISTINCT  200
123
124 /* default selectivity estimate for boolean and null test nodes */
125 #define DEFAULT_UNK_SEL                 0.005
126 #define DEFAULT_NOT_UNK_SEL             (1.0 - DEFAULT_UNK_SEL)
127 #define DEFAULT_BOOL_SEL                0.5
128
129 /*
130  * Clamp a computed probability estimate (which may suffer from roundoff or
131  * estimation errors) to valid range.  Argument must be a float variable.
132  */
133 #define CLAMP_PROBABILITY(p) \
134         do { \
135                 if (p < 0.0) \
136                         p = 0.0; \
137                 else if (p > 1.0) \
138                         p = 1.0; \
139         } while (0)
140
141
142 static bool get_var_maximum(Query *root, Var *var, Oid sortop, Datum *max);
143 static bool convert_to_scalar(Datum value, Oid valuetypid, double *scaledvalue,
144                                   Datum lobound, Datum hibound, Oid boundstypid,
145                                   double *scaledlobound, double *scaledhibound);
146 static double convert_numeric_to_scalar(Datum value, Oid typid);
147 static void convert_string_to_scalar(unsigned char *value,
148                                                  double *scaledvalue,
149                                                  unsigned char *lobound,
150                                                  double *scaledlobound,
151                                                  unsigned char *hibound,
152                                                  double *scaledhibound);
153 static void convert_bytea_to_scalar(Datum value,
154                                                 double *scaledvalue,
155                                                 Datum lobound,
156                                                 double *scaledlobound,
157                                                 Datum hibound,
158                                                 double *scaledhibound);
159 static double convert_one_string_to_scalar(unsigned char *value,
160                                                          int rangelo, int rangehi);
161 static double convert_one_bytea_to_scalar(unsigned char *value, int valuelen,
162                                                         int rangelo, int rangehi);
163 static unsigned char *convert_string_datum(Datum value, Oid typid);
164 static double convert_timevalue_to_scalar(Datum value, Oid typid);
165 static double get_att_numdistinct(Query *root, Var *var,
166                                         Form_pg_statistic stats);
167 static bool get_restriction_var(List *args, int varRelid,
168                                         Var **var, Node **other,
169                                         bool *varonleft);
170 static void get_join_vars(List *args, Var **var1, Var **var2);
171 static Selectivity prefix_selectivity(Query *root, Var *var, char *prefix);
172 static Selectivity pattern_selectivity(char *patt, Pattern_Type ptype);
173 static bool string_lessthan(const char *str1, const char *str2,
174                                 Oid datatype);
175 static Oid      find_operator(const char *opname, Oid datatype);
176 static Datum string_to_datum(const char *str, Oid datatype);
177 static Const *string_to_const(const char *str, Oid datatype);
178
179
180 /*
181  *              eqsel                   - Selectivity of "=" for any data types.
182  *
183  * Note: this routine is also used to estimate selectivity for some
184  * operators that are not "=" but have comparable selectivity behavior,
185  * such as "~=" (geometric approximate-match).  Even for "=", we must
186  * keep in mind that the left and right datatypes may differ.
187  */
188 Datum
189 eqsel(PG_FUNCTION_ARGS)
190 {
191         Query      *root = (Query *) PG_GETARG_POINTER(0);
192         Oid                     operator = PG_GETARG_OID(1);
193         List       *args = (List *) PG_GETARG_POINTER(2);
194         int                     varRelid = PG_GETARG_INT32(3);
195         Var                *var;
196         Node       *other;
197         bool            varonleft;
198         Oid                     relid;
199         HeapTuple       statsTuple;
200         Datum      *values;
201         int                     nvalues;
202         float4     *numbers;
203         int                     nnumbers;
204         double          selec;
205
206         /*
207          * If expression is not var = something or something = var for a
208          * simple var of a real relation (no subqueries, for now), then punt
209          * and return a default estimate.
210          */
211         if (!get_restriction_var(args, varRelid,
212                                                          &var, &other, &varonleft))
213                 PG_RETURN_FLOAT8(DEFAULT_EQ_SEL);
214         relid = getrelid(var->varno, root->rtable);
215         if (relid == InvalidOid)
216                 PG_RETURN_FLOAT8(DEFAULT_EQ_SEL);
217
218         /*
219          * If the something is a NULL constant, assume operator is strict and
220          * return zero, ie, operator will never return TRUE.
221          */
222         if (IsA(other, Const) &&((Const *) other)->constisnull)
223                 PG_RETURN_FLOAT8(0.0);
224
225         /* get stats for the attribute, if available */
226         statsTuple = SearchSysCache(STATRELATT,
227                                                                 ObjectIdGetDatum(relid),
228                                                                 Int16GetDatum(var->varattno),
229                                                                 0, 0);
230         if (HeapTupleIsValid(statsTuple))
231         {
232                 Form_pg_statistic stats;
233
234                 stats = (Form_pg_statistic) GETSTRUCT(statsTuple);
235
236                 if (IsA(other, Const))
237                 {
238                         /* Var is being compared to a known non-null constant */
239                         Datum           constval = ((Const *) other)->constvalue;
240                         bool            match = false;
241                         int                     i;
242
243                         /*
244                          * Is the constant "=" to any of the column's most common
245                          * values?      (Although the given operator may not really be
246                          * "=", we will assume that seeing whether it returns TRUE is
247                          * an appropriate test.  If you don't like this, maybe you
248                          * shouldn't be using eqsel for your operator...)
249                          */
250                         if (get_attstatsslot(statsTuple, var->vartype, var->vartypmod,
251                                                                  STATISTIC_KIND_MCV, InvalidOid,
252                                                                  &values, &nvalues,
253                                                                  &numbers, &nnumbers))
254                         {
255                                 FmgrInfo        eqproc;
256
257                                 fmgr_info(get_opcode(operator), &eqproc);
258
259                                 for (i = 0; i < nvalues; i++)
260                                 {
261                                         /* be careful to apply operator right way 'round */
262                                         if (varonleft)
263                                                 match = DatumGetBool(FunctionCall2(&eqproc,
264                                                                                                                    values[i],
265                                                                                                                    constval));
266                                         else
267                                                 match = DatumGetBool(FunctionCall2(&eqproc,
268                                                                                                                    constval,
269                                                                                                                    values[i]));
270                                         if (match)
271                                                 break;
272                                 }
273                         }
274                         else
275                         {
276                                 /* no most-common-value info available */
277                                 values = NULL;
278                                 numbers = NULL;
279                                 i = nvalues = nnumbers = 0;
280                         }
281
282                         if (match)
283                         {
284                                 /*
285                                  * Constant is "=" to this common value.  We know
286                                  * selectivity exactly (or as exactly as VACUUM could
287                                  * calculate it, anyway).
288                                  */
289                                 selec = numbers[i];
290                         }
291                         else
292                         {
293                                 /*
294                                  * Comparison is against a constant that is neither NULL
295                                  * nor any of the common values.  Its selectivity cannot
296                                  * be more than this:
297                                  */
298                                 double          sumcommon = 0.0;
299                                 double          otherdistinct;
300
301                                 for (i = 0; i < nnumbers; i++)
302                                         sumcommon += numbers[i];
303                                 selec = 1.0 - sumcommon - stats->stanullfrac;
304                                 CLAMP_PROBABILITY(selec);
305
306                                 /*
307                                  * and in fact it's probably a good deal less. We
308                                  * approximate that all the not-common values share this
309                                  * remaining fraction equally, so we divide by the number
310                                  * of other distinct values.
311                                  */
312                                 otherdistinct = get_att_numdistinct(root, var, stats)
313                                         - nnumbers;
314                                 if (otherdistinct > 1)
315                                         selec /= otherdistinct;
316
317                                 /*
318                                  * Another cross-check: selectivity shouldn't be estimated
319                                  * as more than the least common "most common value".
320                                  */
321                                 if (nnumbers > 0 && selec > numbers[nnumbers - 1])
322                                         selec = numbers[nnumbers - 1];
323                         }
324
325                         free_attstatsslot(var->vartype, values, nvalues,
326                                                           numbers, nnumbers);
327                 }
328                 else
329                 {
330                         double          ndistinct;
331
332                         /*
333                          * Search is for a value that we do not know a priori, but we
334                          * will assume it is not NULL.  Estimate the selectivity as
335                          * non-null fraction divided by number of distinct values, so
336                          * that we get a result averaged over all possible values
337                          * whether common or uncommon.  (Essentially, we are assuming
338                          * that the not-yet-known comparison value is equally likely
339                          * to be any of the possible values, regardless of their
340                          * frequency in the table.      Is that a good idea?)
341                          */
342                         selec = 1.0 - stats->stanullfrac;
343                         ndistinct = get_att_numdistinct(root, var, stats);
344                         if (ndistinct > 1)
345                                 selec /= ndistinct;
346
347                         /*
348                          * Cross-check: selectivity should never be estimated as more
349                          * than the most common value's.
350                          */
351                         if (get_attstatsslot(statsTuple, var->vartype, var->vartypmod,
352                                                                  STATISTIC_KIND_MCV, InvalidOid,
353                                                                  NULL, NULL,
354                                                                  &numbers, &nnumbers))
355                         {
356                                 if (nnumbers > 0 && selec > numbers[0])
357                                         selec = numbers[0];
358                                 free_attstatsslot(var->vartype, NULL, 0, numbers, nnumbers);
359                         }
360                 }
361
362                 ReleaseSysCache(statsTuple);
363         }
364         else
365         {
366                 /*
367                  * No VACUUM ANALYZE stats available, so make a guess using
368                  * estimated number of distinct values and assuming they are
369                  * equally common.      (The guess is unlikely to be very good, but we
370                  * do know a few special cases.)
371                  */
372                 selec = 1.0 / get_att_numdistinct(root, var, NULL);
373         }
374
375         /* result should be in range, but make sure... */
376         CLAMP_PROBABILITY(selec);
377
378         PG_RETURN_FLOAT8((float8) selec);
379 }
380
381 /*
382  *              neqsel                  - Selectivity of "!=" for any data types.
383  *
384  * This routine is also used for some operators that are not "!="
385  * but have comparable selectivity behavior.  See above comments
386  * for eqsel().
387  */
388 Datum
389 neqsel(PG_FUNCTION_ARGS)
390 {
391         Query      *root = (Query *) PG_GETARG_POINTER(0);
392         Oid                     operator = PG_GETARG_OID(1);
393         List       *args = (List *) PG_GETARG_POINTER(2);
394         int                     varRelid = PG_GETARG_INT32(3);
395         Oid                     eqop;
396         float8          result;
397
398         /*
399          * We want 1 - eqsel() where the equality operator is the one
400          * associated with this != operator, that is, its negator.
401          */
402         eqop = get_negator(operator);
403         if (eqop)
404         {
405                 result = DatumGetFloat8(DirectFunctionCall4(eqsel,
406                                                                                                         PointerGetDatum(root),
407                                                                                                   ObjectIdGetDatum(eqop),
408                                                                                                         PointerGetDatum(args),
409                                                                                            Int32GetDatum(varRelid)));
410         }
411         else
412         {
413                 /* Use default selectivity (should we raise an error instead?) */
414                 result = DEFAULT_EQ_SEL;
415         }
416         result = 1.0 - result;
417         PG_RETURN_FLOAT8(result);
418 }
419
420 /*
421  *      scalarineqsel           - Selectivity of "<", "<=", ">", ">=" for scalars.
422  *
423  * This is the guts of both scalarltsel and scalargtsel.  The caller has
424  * commuted the clause, if necessary, so that we can treat the Var as
425  * being on the left.  The caller must also make sure that the other side
426  * of the clause is a non-null Const, and dissect same into a value and
427  * datatype.
428  *
429  * This routine works for any datatype (or pair of datatypes) known to
430  * convert_to_scalar().  If it is applied to some other datatype,
431  * it will return a default estimate.
432  */
433 static double
434 scalarineqsel(Query *root, Oid operator, bool isgt,
435                           Var *var, Datum constval, Oid consttype)
436 {
437         Oid                     relid;
438         HeapTuple       statsTuple;
439         Form_pg_statistic stats;
440         FmgrInfo        opproc;
441         Datum      *values;
442         int                     nvalues;
443         float4     *numbers;
444         int                     nnumbers;
445         double          mcv_selec,
446                                 hist_selec,
447                                 sumcommon;
448         double          selec;
449         int                     i;
450
451         /*
452          * If expression is not var op something or something op var for a
453          * simple var of a real relation (no subqueries, for now), then punt
454          * and return a default estimate.
455          */
456         relid = getrelid(var->varno, root->rtable);
457         if (relid == InvalidOid)
458                 return DEFAULT_INEQ_SEL;
459
460         /* get stats for the attribute */
461         statsTuple = SearchSysCache(STATRELATT,
462                                                                 ObjectIdGetDatum(relid),
463                                                                 Int16GetDatum(var->varattno),
464                                                                 0, 0);
465         if (!HeapTupleIsValid(statsTuple))
466         {
467                 /* no stats available, so default result */
468                 return DEFAULT_INEQ_SEL;
469         }
470         stats = (Form_pg_statistic) GETSTRUCT(statsTuple);
471
472         fmgr_info(get_opcode(operator), &opproc);
473
474         /*
475          * If we have most-common-values info, add up the fractions of the MCV
476          * entries that satisfy MCV OP CONST.  These fractions contribute
477          * directly to the result selectivity.  Also add up the total fraction
478          * represented by MCV entries.
479          */
480         mcv_selec = 0.0;
481         sumcommon = 0.0;
482
483         if (get_attstatsslot(statsTuple, var->vartype, var->vartypmod,
484                                                  STATISTIC_KIND_MCV, InvalidOid,
485                                                  &values, &nvalues,
486                                                  &numbers, &nnumbers))
487         {
488                 for (i = 0; i < nvalues; i++)
489                 {
490                         if (DatumGetBool(FunctionCall2(&opproc,
491                                                                                    values[i],
492                                                                                    constval)))
493                                 mcv_selec += numbers[i];
494                         sumcommon += numbers[i];
495                 }
496                 free_attstatsslot(var->vartype, values, nvalues, numbers, nnumbers);
497         }
498
499         /*
500          * If there is a histogram, determine which bin the constant falls in,
501          * and compute the resulting contribution to selectivity.
502          *
503          * Someday, VACUUM might store more than one histogram per rel/att,
504          * corresponding to more than one possible sort ordering defined for
505          * the column type.  However, to make that work we will need to figure
506          * out which staop to search for --- it's not necessarily the one we
507          * have at hand!  (For example, we might have a '<=' operator rather
508          * than the '<' operator that will appear in staop.)  For now, assume
509          * that whatever appears in pg_statistic is sorted the same way our
510          * operator sorts, or the reverse way if isgt is TRUE.
511          */
512         hist_selec = 0.0;
513
514         if (get_attstatsslot(statsTuple, var->vartype, var->vartypmod,
515                                                  STATISTIC_KIND_HISTOGRAM, InvalidOid,
516                                                  &values, &nvalues,
517                                                  NULL, NULL))
518         {
519                 if (nvalues > 1)
520                 {
521                         double          histfrac;
522                         bool            ltcmp;
523
524                         ltcmp = DatumGetBool(FunctionCall2(&opproc,
525                                                                                            values[0],
526                                                                                            constval));
527                         if (isgt)
528                                 ltcmp = !ltcmp;
529                         if (!ltcmp)
530                         {
531                                 /* Constant is below lower histogram boundary. */
532                                 histfrac = 0.0;
533                         }
534                         else
535                         {
536                                 /*
537                                  * Scan to find proper location.  This could be made
538                                  * faster by using a binary-search method, but it's
539                                  * probably not worth the trouble for typical histogram
540                                  * sizes.
541                                  */
542                                 for (i = 1; i < nvalues; i++)
543                                 {
544                                         ltcmp = DatumGetBool(FunctionCall2(&opproc,
545                                                                                                            values[i],
546                                                                                                            constval));
547                                         if (isgt)
548                                                 ltcmp = !ltcmp;
549                                         if (!ltcmp)
550                                                 break;
551                                 }
552                                 if (i >= nvalues)
553                                 {
554                                         /* Constant is above upper histogram boundary. */
555                                         histfrac = 1.0;
556                                 }
557                                 else
558                                 {
559                                         double          val,
560                                                                 high,
561                                                                 low;
562                                         double          binfrac;
563
564                                         /*
565                                          * We have values[i-1] < constant < values[i].
566                                          *
567                                          * Convert the constant and the two nearest bin boundary
568                                          * values to a uniform comparison scale, and do a
569                                          * linear interpolation within this bin.
570                                          */
571                                         if (convert_to_scalar(constval, consttype, &val,
572                                                                                   values[i - 1], values[i],
573                                                                                   var->vartype,
574                                                                                   &low, &high))
575                                         {
576                                                 if (high <= low)
577                                                 {
578                                                         /* cope if bin boundaries appear identical */
579                                                         binfrac = 0.5;
580                                                 }
581                                                 else if (val <= low)
582                                                         binfrac = 0.0;
583                                                 else if (val >= high)
584                                                         binfrac = 1.0;
585                                                 else
586                                                 {
587                                                         binfrac = (val - low) / (high - low);
588
589                                                         /*
590                                                          * Watch out for the possibility that we got a
591                                                          * NaN or Infinity from the division.  This
592                                                          * can happen despite the previous checks, if
593                                                          * for example "low" is -Infinity.
594                                                          */
595                                                         if (isnan(binfrac) ||
596                                                                 binfrac < 0.0 || binfrac > 1.0)
597                                                                 binfrac = 0.5;
598                                                 }
599                                         }
600                                         else
601                                         {
602                                                 /*
603                                                  * Ideally we'd produce an error here, on the
604                                                  * grounds that the given operator shouldn't have
605                                                  * scalarXXsel registered as its selectivity func
606                                                  * unless we can deal with its operand types.  But
607                                                  * currently, all manner of stuff is invoking
608                                                  * scalarXXsel, so give a default estimate until
609                                                  * that can be fixed.
610                                                  */
611                                                 binfrac = 0.5;
612                                         }
613
614                                         /*
615                                          * Now, compute the overall selectivity across the
616                                          * values represented by the histogram.  We have i-1
617                                          * full bins and binfrac partial bin below the
618                                          * constant.
619                                          */
620                                         histfrac = (double) (i - 1) + binfrac;
621                                         histfrac /= (double) (nvalues - 1);
622                                 }
623                         }
624
625                         /*
626                          * Now histfrac = fraction of histogram entries below the
627                          * constant.
628                          *
629                          * Account for "<" vs ">"
630                          */
631                         hist_selec = isgt ? (1.0 - histfrac) : histfrac;
632
633                         /*
634                          * The histogram boundaries are only approximate to begin
635                          * with, and may well be out of date anyway.  Therefore, don't
636                          * believe extremely small or large selectivity estimates.
637                          */
638                         if (hist_selec < 0.0001)
639                                 hist_selec = 0.0001;
640                         else if (hist_selec > 0.9999)
641                                 hist_selec = 0.9999;
642                 }
643
644                 free_attstatsslot(var->vartype, values, nvalues, NULL, 0);
645         }
646
647         /*
648          * Now merge the results from the MCV and histogram calculations,
649          * realizing that the histogram covers only the non-null values that
650          * are not listed in MCV.
651          */
652         selec = 1.0 - stats->stanullfrac - sumcommon;
653
654         if (hist_selec > 0.0)
655                 selec *= hist_selec;
656         else
657         {
658                 /*
659                  * If no histogram but there are values not accounted for by MCV,
660                  * arbitrarily assume half of them will match.
661                  */
662                 selec *= 0.5;
663         }
664
665         selec += mcv_selec;
666
667         ReleaseSysCache(statsTuple);
668
669         /* result should be in range, but make sure... */
670         CLAMP_PROBABILITY(selec);
671
672         return selec;
673 }
674
675 /*
676  *              scalarltsel             - Selectivity of "<" (also "<=") for scalars.
677  */
678 Datum
679 scalarltsel(PG_FUNCTION_ARGS)
680 {
681         Query      *root = (Query *) PG_GETARG_POINTER(0);
682         Oid                     operator = PG_GETARG_OID(1);
683         List       *args = (List *) PG_GETARG_POINTER(2);
684         int                     varRelid = PG_GETARG_INT32(3);
685         Var                *var;
686         Node       *other;
687         Datum           constval;
688         Oid                     consttype;
689         bool            varonleft;
690         bool            isgt;
691         double          selec;
692
693         /*
694          * If expression is not var op something or something op var for a
695          * simple var of a real relation (no subqueries, for now), then punt
696          * and return a default estimate.
697          */
698         if (!get_restriction_var(args, varRelid,
699                                                          &var, &other, &varonleft))
700                 PG_RETURN_FLOAT8(DEFAULT_INEQ_SEL);
701
702         /*
703          * Can't do anything useful if the something is not a constant,
704          * either.
705          */
706         if (!IsA(other, Const))
707                 PG_RETURN_FLOAT8(DEFAULT_INEQ_SEL);
708
709         /*
710          * If the constant is NULL, assume operator is strict and return zero,
711          * ie, operator will never return TRUE.
712          */
713         if (((Const *) other)->constisnull)
714                 PG_RETURN_FLOAT8(0.0);
715         constval = ((Const *) other)->constvalue;
716         consttype = ((Const *) other)->consttype;
717
718         /*
719          * Force the var to be on the left to simplify logic in scalarineqsel.
720          */
721         if (varonleft)
722         {
723                 /* we have var < other */
724                 isgt = false;
725         }
726         else
727         {
728                 /* we have other < var, commute to make var > other */
729                 operator = get_commutator(operator);
730                 if (!operator)
731                 {
732                         /* Use default selectivity (should we raise an error instead?) */
733                         PG_RETURN_FLOAT8(DEFAULT_INEQ_SEL);
734                 }
735                 isgt = true;
736         }
737
738         selec = scalarineqsel(root, operator, isgt, var, constval, consttype);
739
740         PG_RETURN_FLOAT8((float8) selec);
741 }
742
743 /*
744  *              scalargtsel             - Selectivity of ">" (also ">=") for integers.
745  */
746 Datum
747 scalargtsel(PG_FUNCTION_ARGS)
748 {
749         Query      *root = (Query *) PG_GETARG_POINTER(0);
750         Oid                     operator = PG_GETARG_OID(1);
751         List       *args = (List *) PG_GETARG_POINTER(2);
752         int                     varRelid = PG_GETARG_INT32(3);
753         Var                *var;
754         Node       *other;
755         Datum           constval;
756         Oid                     consttype;
757         bool            varonleft;
758         bool            isgt;
759         double          selec;
760
761         /*
762          * If expression is not var op something or something op var for a
763          * simple var of a real relation (no subqueries, for now), then punt
764          * and return a default estimate.
765          */
766         if (!get_restriction_var(args, varRelid,
767                                                          &var, &other, &varonleft))
768                 PG_RETURN_FLOAT8(DEFAULT_INEQ_SEL);
769
770         /*
771          * Can't do anything useful if the something is not a constant,
772          * either.
773          */
774         if (!IsA(other, Const))
775                 PG_RETURN_FLOAT8(DEFAULT_INEQ_SEL);
776
777         /*
778          * If the constant is NULL, assume operator is strict and return zero,
779          * ie, operator will never return TRUE.
780          */
781         if (((Const *) other)->constisnull)
782                 PG_RETURN_FLOAT8(0.0);
783         constval = ((Const *) other)->constvalue;
784         consttype = ((Const *) other)->consttype;
785
786         /*
787          * Force the var to be on the left to simplify logic in scalarineqsel.
788          */
789         if (varonleft)
790         {
791                 /* we have var > other */
792                 isgt = true;
793         }
794         else
795         {
796                 /* we have other > var, commute to make var < other */
797                 operator = get_commutator(operator);
798                 if (!operator)
799                 {
800                         /* Use default selectivity (should we raise an error instead?) */
801                         PG_RETURN_FLOAT8(DEFAULT_INEQ_SEL);
802                 }
803                 isgt = false;
804         }
805
806         selec = scalarineqsel(root, operator, isgt, var, constval, consttype);
807
808         PG_RETURN_FLOAT8((float8) selec);
809 }
810
811 /*
812  * patternsel                   - Generic code for pattern-match selectivity.
813  */
814 static double
815 patternsel(PG_FUNCTION_ARGS, Pattern_Type ptype)
816 {
817         Query      *root = (Query *) PG_GETARG_POINTER(0);
818
819 #ifdef NOT_USED
820         Oid                     operator = PG_GETARG_OID(1);
821 #endif
822         List       *args = (List *) PG_GETARG_POINTER(2);
823         int                     varRelid = PG_GETARG_INT32(3);
824         Var                *var;
825         Node       *other;
826         bool            varonleft;
827         Oid                     relid;
828         Datum           constval;
829         char       *patt;
830         Pattern_Prefix_Status pstatus;
831         char       *prefix;
832         char       *rest;
833         double          result;
834
835         /*
836          * If expression is not var op constant for a simple var of a real
837          * relation (no subqueries, for now), then punt and return a default
838          * estimate.
839          */
840         if (!get_restriction_var(args, varRelid,
841                                                          &var, &other, &varonleft))
842                 return DEFAULT_MATCH_SEL;
843         if (!varonleft || !IsA(other, Const))
844                 return DEFAULT_MATCH_SEL;
845         relid = getrelid(var->varno, root->rtable);
846         if (relid == InvalidOid)
847                 return DEFAULT_MATCH_SEL;
848
849         /*
850          * If the constant is NULL, assume operator is strict and return zero,
851          * ie, operator will never return TRUE.
852          */
853         if (((Const *) other)->constisnull)
854                 return 0.0;
855         constval = ((Const *) other)->constvalue;
856         /* the right-hand const is type text for all supported operators */
857         Assert(((Const *) other)->consttype == TEXTOID);
858         patt = DatumGetCString(DirectFunctionCall1(textout, constval));
859
860         /* divide pattern into fixed prefix and remainder */
861         pstatus = pattern_fixed_prefix(patt, ptype, &prefix, &rest);
862
863         if (pstatus == Pattern_Prefix_Exact)
864         {
865                 /*
866                  * Pattern specifies an exact match, so pretend operator is '='
867                  */
868                 Oid                     eqopr = find_operator("=", var->vartype);
869                 Const      *eqcon;
870                 List       *eqargs;
871
872                 if (eqopr == InvalidOid)
873                         elog(ERROR, "patternsel: no = operator for type %u",
874                                  var->vartype);
875                 eqcon = string_to_const(prefix, var->vartype);
876                 eqargs = makeList2(var, eqcon);
877                 result = DatumGetFloat8(DirectFunctionCall4(eqsel,
878                                                                                                         PointerGetDatum(root),
879                                                                                                  ObjectIdGetDatum(eqopr),
880                                                                                                  PointerGetDatum(eqargs),
881                                                                                            Int32GetDatum(varRelid)));
882         }
883         else
884         {
885                 /*
886                  * Not exact-match pattern.  We estimate selectivity of the fixed
887                  * prefix and remainder of pattern separately, then combine the
888                  * two.
889                  */
890                 Selectivity prefixsel;
891                 Selectivity restsel;
892                 Selectivity selec;
893
894                 if (pstatus == Pattern_Prefix_Partial)
895                         prefixsel = prefix_selectivity(root, var, prefix);
896                 else
897                         prefixsel = 1.0;
898                 restsel = pattern_selectivity(rest, ptype);
899                 selec = prefixsel * restsel;
900                 /* result should be in range, but make sure... */
901                 CLAMP_PROBABILITY(selec);
902                 result = selec;
903         }
904
905         if (prefix)
906                 pfree(prefix);
907         pfree(patt);
908
909         return result;
910 }
911
912 /*
913  *              regexeqsel              - Selectivity of regular-expression pattern match.
914  */
915 Datum
916 regexeqsel(PG_FUNCTION_ARGS)
917 {
918         PG_RETURN_FLOAT8(patternsel(fcinfo, Pattern_Type_Regex));
919 }
920
921 /*
922  *              icregexeqsel    - Selectivity of case-insensitive regex match.
923  */
924 Datum
925 icregexeqsel(PG_FUNCTION_ARGS)
926 {
927         PG_RETURN_FLOAT8(patternsel(fcinfo, Pattern_Type_Regex_IC));
928 }
929
930 /*
931  *              likesel                 - Selectivity of LIKE pattern match.
932  */
933 Datum
934 likesel(PG_FUNCTION_ARGS)
935 {
936         PG_RETURN_FLOAT8(patternsel(fcinfo, Pattern_Type_Like));
937 }
938
939 /*
940  *              iclikesel                       - Selectivity of ILIKE pattern match.
941  */
942 Datum
943 iclikesel(PG_FUNCTION_ARGS)
944 {
945         PG_RETURN_FLOAT8(patternsel(fcinfo, Pattern_Type_Like_IC));
946 }
947
948 /*
949  *              regexnesel              - Selectivity of regular-expression pattern non-match.
950  */
951 Datum
952 regexnesel(PG_FUNCTION_ARGS)
953 {
954         double          result;
955
956         result = patternsel(fcinfo, Pattern_Type_Regex);
957         result = 1.0 - result;
958         PG_RETURN_FLOAT8(result);
959 }
960
961 /*
962  *              icregexnesel    - Selectivity of case-insensitive regex non-match.
963  */
964 Datum
965 icregexnesel(PG_FUNCTION_ARGS)
966 {
967         double          result;
968
969         result = patternsel(fcinfo, Pattern_Type_Regex_IC);
970         result = 1.0 - result;
971         PG_RETURN_FLOAT8(result);
972 }
973
974 /*
975  *              nlikesel                - Selectivity of LIKE pattern non-match.
976  */
977 Datum
978 nlikesel(PG_FUNCTION_ARGS)
979 {
980         double          result;
981
982         result = patternsel(fcinfo, Pattern_Type_Like);
983         result = 1.0 - result;
984         PG_RETURN_FLOAT8(result);
985 }
986
987 /*
988  *              icnlikesel              - Selectivity of ILIKE pattern non-match.
989  */
990 Datum
991 icnlikesel(PG_FUNCTION_ARGS)
992 {
993         double          result;
994
995         result = patternsel(fcinfo, Pattern_Type_Like_IC);
996         result = 1.0 - result;
997         PG_RETURN_FLOAT8(result);
998 }
999
1000 /*
1001  *              booltestsel             - Selectivity of BooleanTest Node.
1002  */
1003 Selectivity
1004 booltestsel(Query *root, BooleanTest *clause, int varRelid)
1005 {
1006         Var                *var;
1007         Node       *arg;
1008         Oid                     relid;
1009         HeapTuple       statsTuple;
1010         Datum      *values;
1011         int                     nvalues;
1012         float4     *numbers;
1013         int                     nnumbers;
1014         double          selec;
1015
1016         Assert(clause && IsA(clause, BooleanTest));
1017
1018         arg = (Node *) clause->arg;
1019
1020         /*
1021          * Ignore any binary-compatible relabeling (probably unnecessary, but
1022          * can't hurt)
1023          */
1024         if (IsA(arg, RelabelType))
1025                 arg = ((RelabelType *) arg)->arg;
1026
1027         if (IsA(arg, Var) &&(varRelid == 0 || varRelid == ((Var *) arg)->varno))
1028                 var = (Var *) arg;
1029         else
1030         {
1031                 /*
1032                  * If argument is not a Var, we can't get statistics for it, but
1033                  * perhaps clause_selectivity can do something with it.  We ignore
1034                  * the possibility of a NULL value when using clause_selectivity,
1035                  * and just assume the value is either TRUE or FALSE.
1036                  */
1037                 switch (clause->booltesttype)
1038                 {
1039                         case IS_UNKNOWN:
1040                                 selec = DEFAULT_UNK_SEL;
1041                                 break;
1042                         case IS_NOT_UNKNOWN:
1043                                 selec = DEFAULT_NOT_UNK_SEL;
1044                                 break;
1045                         case IS_TRUE:
1046                         case IS_NOT_FALSE:
1047                                 selec = (double) clause_selectivity(root, arg, varRelid);
1048                                 break;
1049                         case IS_FALSE:
1050                         case IS_NOT_TRUE:
1051                                 selec = 1.0 - (double) clause_selectivity(root, arg, varRelid);
1052                                 break;
1053                         default:
1054                                 elog(ERROR, "booltestsel: unexpected booltesttype %d",
1055                                          (int) clause->booltesttype);
1056                                 selec = 0.0;    /* Keep compiler quiet */
1057                                 break;
1058                 }
1059                 return (Selectivity) selec;
1060         }
1061
1062         /* get stats for the attribute, if available */
1063         relid = getrelid(var->varno, root->rtable);
1064         if (relid == InvalidOid)
1065                 statsTuple = NULL;
1066         else
1067                 statsTuple = SearchSysCache(STATRELATT,
1068                                                                         ObjectIdGetDatum(relid),
1069                                                                         Int16GetDatum(var->varattno),
1070                                                                         0, 0);
1071
1072         if (HeapTupleIsValid(statsTuple))
1073         {
1074                 Form_pg_statistic stats;
1075                 double          freq_null;
1076
1077                 stats = (Form_pg_statistic) GETSTRUCT(statsTuple);
1078
1079                 freq_null = stats->stanullfrac;
1080
1081                 if (get_attstatsslot(statsTuple, var->vartype, var->vartypmod,
1082                                                          STATISTIC_KIND_MCV, InvalidOid,
1083                                                          &values, &nvalues,
1084                                                          &numbers, &nnumbers)
1085                         && nnumbers > 0)
1086                 {
1087                         double          freq_true;
1088                         double          freq_false;
1089
1090                         /*
1091                          * Get first MCV frequency and derive frequency for true.
1092                          */
1093                         if (DatumGetBool(values[0]))
1094                                 freq_true = numbers[0];
1095                         else
1096                                 freq_true = 1.0 - numbers[0] - freq_null;
1097
1098                         /*
1099                          * Next derive freqency for false. Then use these as
1100                          * appropriate to derive frequency for each case.
1101                          */
1102                         freq_false = 1.0 - freq_true - freq_null;
1103
1104                         switch (clause->booltesttype)
1105                         {
1106                                 case IS_UNKNOWN:
1107                                         /* select only NULL values */
1108                                         selec = freq_null;
1109                                         break;
1110                                 case IS_NOT_UNKNOWN:
1111                                         /* select non-NULL values */
1112                                         selec = 1.0 - freq_null;
1113                                         break;
1114                                 case IS_TRUE:
1115                                         /* select only TRUE values */
1116                                         selec = freq_true;
1117                                         break;
1118                                 case IS_NOT_TRUE:
1119                                         /* select non-TRUE values */
1120                                         selec = 1.0 - freq_true;
1121                                         break;
1122                                 case IS_FALSE:
1123                                         /* select only FALSE values */
1124                                         selec = freq_false;
1125                                         break;
1126                                 case IS_NOT_FALSE:
1127                                         /* select non-FALSE values */
1128                                         selec = 1.0 - freq_false;
1129                                         break;
1130                                 default:
1131                                         elog(ERROR, "booltestsel: unexpected booltesttype %d",
1132                                                  (int) clause->booltesttype);
1133                                         selec = 0.0;    /* Keep compiler quiet */
1134                                         break;
1135                         }
1136
1137                         free_attstatsslot(var->vartype, values, nvalues,
1138                                                           numbers, nnumbers);
1139                 }
1140                 else
1141                 {
1142                         /*
1143                          * No most-common-value info available. Still have null
1144                          * fraction information, so use it for IS [NOT] UNKNOWN.
1145                          * Otherwise adjust for null fraction and assume an even split
1146                          * for boolean tests.
1147                          */
1148                         switch (clause->booltesttype)
1149                         {
1150                                 case IS_UNKNOWN:
1151
1152                                         /*
1153                                          * Use freq_null directly.
1154                                          */
1155                                         selec = freq_null;
1156                                         break;
1157                                 case IS_NOT_UNKNOWN:
1158
1159                                         /*
1160                                          * Select not unknown (not null) values. Calculate
1161                                          * from freq_null.
1162                                          */
1163                                         selec = 1.0 - freq_null;
1164                                         break;
1165                                 case IS_TRUE:
1166                                 case IS_NOT_TRUE:
1167                                 case IS_FALSE:
1168                                 case IS_NOT_FALSE:
1169                                         selec = (1.0 - freq_null) / 2.0;
1170                                         break;
1171                                 default:
1172                                         elog(ERROR, "booltestsel: unexpected booltesttype %d",
1173                                                  (int) clause->booltesttype);
1174                                         selec = 0.0;    /* Keep compiler quiet */
1175                                         break;
1176                         }
1177                 }
1178
1179                 ReleaseSysCache(statsTuple);
1180         }
1181         else
1182         {
1183                 /*
1184                  * No VACUUM ANALYZE stats available, so use a default value.
1185                  * (Note: not much point in recursing to clause_selectivity here.)
1186                  */
1187                 switch (clause->booltesttype)
1188                 {
1189                         case IS_UNKNOWN:
1190                                 selec = DEFAULT_UNK_SEL;
1191                                 break;
1192                         case IS_NOT_UNKNOWN:
1193                                 selec = DEFAULT_NOT_UNK_SEL;
1194                                 break;
1195                         case IS_TRUE:
1196                         case IS_NOT_TRUE:
1197                         case IS_FALSE:
1198                         case IS_NOT_FALSE:
1199                                 selec = DEFAULT_BOOL_SEL;
1200                                 break;
1201                         default:
1202                                 elog(ERROR, "booltestsel: unexpected booltesttype %d",
1203                                          (int) clause->booltesttype);
1204                                 selec = 0.0;    /* Keep compiler quiet */
1205                                 break;
1206                 }
1207         }
1208
1209         /* result should be in range, but make sure... */
1210         CLAMP_PROBABILITY(selec);
1211
1212         return (Selectivity) selec;
1213 }
1214
1215 /*
1216  *              nulltestsel             - Selectivity of NullTest Node.
1217  */
1218 Selectivity
1219 nulltestsel(Query *root, NullTest *clause, int varRelid)
1220 {
1221         Var                *var;
1222         Node       *arg;
1223         Oid                     relid;
1224         HeapTuple       statsTuple;
1225         double          selec;
1226         double          defselec;
1227         double          freq_null;
1228
1229         Assert(clause && IsA(clause, NullTest));
1230
1231         switch (clause->nulltesttype)
1232         {
1233                 case IS_NULL:
1234                         defselec = DEFAULT_UNK_SEL;
1235                         break;
1236                 case IS_NOT_NULL:
1237                         defselec = DEFAULT_NOT_UNK_SEL;
1238                         break;
1239                 default:
1240                         elog(ERROR, "nulltestsel: unexpected nulltesttype %d",
1241                                  (int) clause->nulltesttype);
1242                         return (Selectivity) 0;         /* keep compiler quiet */
1243         }
1244
1245         arg = (Node *) clause->arg;
1246
1247         /*
1248          * Ignore any binary-compatible relabeling
1249          */
1250         if (IsA(arg, RelabelType))
1251                 arg = ((RelabelType *) arg)->arg;
1252
1253         if (IsA(arg, Var) &&(varRelid == 0 || varRelid == ((Var *) arg)->varno))
1254                 var = (Var *) arg;
1255         else
1256         {
1257                 /*
1258                  * punt if non-Var argument
1259                  */
1260                 return (Selectivity) defselec;
1261         }
1262
1263         relid = getrelid(var->varno, root->rtable);
1264         if (relid == InvalidOid)
1265                 return (Selectivity) defselec;
1266
1267         /* get stats for the attribute, if available */
1268         statsTuple = SearchSysCache(STATRELATT,
1269                                                                 ObjectIdGetDatum(relid),
1270                                                                 Int16GetDatum(var->varattno),
1271                                                                 0, 0);
1272         if (HeapTupleIsValid(statsTuple))
1273         {
1274                 Form_pg_statistic stats;
1275
1276                 stats = (Form_pg_statistic) GETSTRUCT(statsTuple);
1277                 freq_null = stats->stanullfrac;
1278
1279                 switch (clause->nulltesttype)
1280                 {
1281                         case IS_NULL:
1282
1283                                 /*
1284                                  * Use freq_null directly.
1285                                  */
1286                                 selec = freq_null;
1287                                 break;
1288                         case IS_NOT_NULL:
1289
1290                                 /*
1291                                  * Select not unknown (not null) values. Calculate from
1292                                  * freq_null.
1293                                  */
1294                                 selec = 1.0 - freq_null;
1295                                 break;
1296                         default:
1297                                 elog(ERROR, "nulltestsel: unexpected nulltesttype %d",
1298                                          (int) clause->nulltesttype);
1299                                 return (Selectivity) 0; /* keep compiler quiet */
1300                 }
1301
1302                 ReleaseSysCache(statsTuple);
1303         }
1304         else
1305         {
1306                 /*
1307                  * No VACUUM ANALYZE stats available, so make a guess
1308                  */
1309                 selec = defselec;
1310         }
1311
1312         /* result should be in range, but make sure... */
1313         CLAMP_PROBABILITY(selec);
1314
1315         return (Selectivity) selec;
1316 }
1317
1318 /*
1319  *              eqjoinsel               - Join selectivity of "="
1320  */
1321 Datum
1322 eqjoinsel(PG_FUNCTION_ARGS)
1323 {
1324         Query      *root = (Query *) PG_GETARG_POINTER(0);
1325         Oid                     operator = PG_GETARG_OID(1);
1326         List       *args = (List *) PG_GETARG_POINTER(2);
1327         Var                *var1;
1328         Var                *var2;
1329         double          selec;
1330
1331         get_join_vars(args, &var1, &var2);
1332
1333         if (var1 == NULL && var2 == NULL)
1334                 selec = DEFAULT_EQ_SEL;
1335         else
1336         {
1337                 HeapTuple       statsTuple1 = NULL;
1338                 HeapTuple       statsTuple2 = NULL;
1339                 Form_pg_statistic stats1 = NULL;
1340                 Form_pg_statistic stats2 = NULL;
1341                 double          nd1 = DEFAULT_NUM_DISTINCT;
1342                 double          nd2 = DEFAULT_NUM_DISTINCT;
1343                 bool            have_mcvs1 = false;
1344                 Datum      *values1 = NULL;
1345                 int                     nvalues1 = 0;
1346                 float4     *numbers1 = NULL;
1347                 int                     nnumbers1 = 0;
1348                 bool            have_mcvs2 = false;
1349                 Datum      *values2 = NULL;
1350                 int                     nvalues2 = 0;
1351                 float4     *numbers2 = NULL;
1352                 int                     nnumbers2 = 0;
1353
1354                 if (var1 != NULL)
1355                 {
1356                         /* get stats for the attribute, if available */
1357                         Oid                     relid1 = getrelid(var1->varno, root->rtable);
1358
1359                         if (relid1 != InvalidOid)
1360                         {
1361                                 statsTuple1 = SearchSysCache(STATRELATT,
1362                                                                                          ObjectIdGetDatum(relid1),
1363                                                                                    Int16GetDatum(var1->varattno),
1364                                                                                          0, 0);
1365                                 if (HeapTupleIsValid(statsTuple1))
1366                                 {
1367                                         stats1 = (Form_pg_statistic) GETSTRUCT(statsTuple1);
1368                                         have_mcvs1 = get_attstatsslot(statsTuple1,
1369                                                                                                   var1->vartype,
1370                                                                                                   var1->vartypmod,
1371                                                                                                   STATISTIC_KIND_MCV,
1372                                                                                                   InvalidOid,
1373                                                                                                   &values1, &nvalues1,
1374                                                                                                   &numbers1, &nnumbers1);
1375                                 }
1376
1377                                 nd1 = get_att_numdistinct(root, var1, stats1);
1378                         }
1379                 }
1380
1381                 if (var2 != NULL)
1382                 {
1383                         /* get stats for the attribute, if available */
1384                         Oid                     relid2 = getrelid(var2->varno, root->rtable);
1385
1386                         if (relid2 != InvalidOid)
1387                         {
1388                                 statsTuple2 = SearchSysCache(STATRELATT,
1389                                                                                          ObjectIdGetDatum(relid2),
1390                                                                                    Int16GetDatum(var2->varattno),
1391                                                                                          0, 0);
1392                                 if (HeapTupleIsValid(statsTuple2))
1393                                 {
1394                                         stats2 = (Form_pg_statistic) GETSTRUCT(statsTuple2);
1395                                         have_mcvs2 = get_attstatsslot(statsTuple2,
1396                                                                                                   var2->vartype,
1397                                                                                                   var2->vartypmod,
1398                                                                                                   STATISTIC_KIND_MCV,
1399                                                                                                   InvalidOid,
1400                                                                                                   &values2, &nvalues2,
1401                                                                                                   &numbers2, &nnumbers2);
1402                                 }
1403
1404                                 nd2 = get_att_numdistinct(root, var2, stats2);
1405                         }
1406                 }
1407
1408                 if (have_mcvs1 && have_mcvs2)
1409                 {
1410                         /*
1411                          * We have most-common-value lists for both relations.  Run
1412                          * through the lists to see which MCVs actually join to each
1413                          * other with the given operator.  This allows us to determine
1414                          * the exact join selectivity for the portion of the relations
1415                          * represented by the MCV lists.  We still have to estimate
1416                          * for the remaining population, but in a skewed distribution
1417                          * this gives us a big leg up in accuracy.      For motivation see
1418                          * the analysis in Y. Ioannidis and S. Christodoulakis, "On
1419                          * the propagation of errors in the size of join results",
1420                          * Technical Report 1018, Computer Science Dept., University
1421                          * of Wisconsin, Madison, March 1991 (available from
1422                          * ftp.cs.wisc.edu).
1423                          */
1424                         FmgrInfo        eqproc;
1425                         bool       *hasmatch1;
1426                         bool       *hasmatch2;
1427                         double          matchprodfreq,
1428                                                 matchfreq1,
1429                                                 matchfreq2,
1430                                                 unmatchfreq1,
1431                                                 unmatchfreq2,
1432                                                 otherfreq1,
1433                                                 otherfreq2,
1434                                                 totalsel1,
1435                                                 totalsel2;
1436                         int                     i,
1437                                                 nmatches;
1438
1439                         fmgr_info(get_opcode(operator), &eqproc);
1440                         hasmatch1 = (bool *) palloc(nvalues1 * sizeof(bool));
1441                         memset(hasmatch1, 0, nvalues1 * sizeof(bool));
1442                         hasmatch2 = (bool *) palloc(nvalues2 * sizeof(bool));
1443                         memset(hasmatch2, 0, nvalues2 * sizeof(bool));
1444
1445                         /*
1446                          * Note we assume that each MCV will match at most one member
1447                          * of the other MCV list.  If the operator isn't really
1448                          * equality, there could be multiple matches --- but we don't
1449                          * look for them, both for speed and because the math wouldn't
1450                          * add up...
1451                          */
1452                         matchprodfreq = 0.0;
1453                         nmatches = 0;
1454                         for (i = 0; i < nvalues1; i++)
1455                         {
1456                                 int                     j;
1457
1458                                 for (j = 0; j < nvalues2; j++)
1459                                 {
1460                                         if (hasmatch2[j])
1461                                                 continue;
1462                                         if (DatumGetBool(FunctionCall2(&eqproc,
1463                                                                                                    values1[i],
1464                                                                                                    values2[j])))
1465                                         {
1466                                                 hasmatch1[i] = hasmatch2[j] = true;
1467                                                 matchprodfreq += numbers1[i] * numbers2[j];
1468                                                 nmatches++;
1469                                                 break;
1470                                         }
1471                                 }
1472                         }
1473                         CLAMP_PROBABILITY(matchprodfreq);
1474                         /* Sum up frequencies of matched and unmatched MCVs */
1475                         matchfreq1 = unmatchfreq1 = 0.0;
1476                         for (i = 0; i < nvalues1; i++)
1477                         {
1478                                 if (hasmatch1[i])
1479                                         matchfreq1 += numbers1[i];
1480                                 else
1481                                         unmatchfreq1 += numbers1[i];
1482                         }
1483                         CLAMP_PROBABILITY(matchfreq1);
1484                         CLAMP_PROBABILITY(unmatchfreq1);
1485                         matchfreq2 = unmatchfreq2 = 0.0;
1486                         for (i = 0; i < nvalues2; i++)
1487                         {
1488                                 if (hasmatch2[i])
1489                                         matchfreq2 += numbers2[i];
1490                                 else
1491                                         unmatchfreq2 += numbers2[i];
1492                         }
1493                         CLAMP_PROBABILITY(matchfreq2);
1494                         CLAMP_PROBABILITY(unmatchfreq2);
1495                         pfree(hasmatch1);
1496                         pfree(hasmatch2);
1497
1498                         /*
1499                          * Compute total frequency of non-null values that are not in
1500                          * the MCV lists.
1501                          */
1502                         otherfreq1 = 1.0 - stats1->stanullfrac - matchfreq1 - unmatchfreq1;
1503                         otherfreq2 = 1.0 - stats2->stanullfrac - matchfreq2 - unmatchfreq2;
1504                         CLAMP_PROBABILITY(otherfreq1);
1505                         CLAMP_PROBABILITY(otherfreq2);
1506
1507                         /*
1508                          * We can estimate the total selectivity from the point of
1509                          * view of relation 1 as: the known selectivity for matched
1510                          * MCVs, plus unmatched MCVs that are assumed to match against
1511                          * random members of relation 2's non-MCV population, plus
1512                          * non-MCV values that are assumed to match against random
1513                          * members of relation 2's unmatched MCVs plus non-MCV values.
1514                          */
1515                         totalsel1 = matchprodfreq;
1516                         if (nd2 > nvalues2)
1517                                 totalsel1 += unmatchfreq1 * otherfreq2 / (nd2 - nvalues2);
1518                         if (nd2 > nmatches)
1519                                 totalsel1 += otherfreq1 * (otherfreq2 + unmatchfreq2) /
1520                                         (nd2 - nmatches);
1521                         /* Same estimate from the point of view of relation 2. */
1522                         totalsel2 = matchprodfreq;
1523                         if (nd1 > nvalues1)
1524                                 totalsel2 += unmatchfreq2 * otherfreq1 / (nd1 - nvalues1);
1525                         if (nd1 > nmatches)
1526                                 totalsel2 += otherfreq2 * (otherfreq1 + unmatchfreq1) /
1527                                         (nd1 - nmatches);
1528
1529                         /*
1530                          * Use the smaller of the two estimates.  This can be
1531                          * justified in essentially the same terms as given below for
1532                          * the no-stats case: to a first approximation, we are
1533                          * estimating from the point of view of the relation with
1534                          * smaller nd.
1535                          */
1536                         selec = (totalsel1 < totalsel2) ? totalsel1 : totalsel2;
1537                 }
1538                 else
1539                 {
1540                         /*
1541                          * We do not have MCV lists for both sides.  Estimate the join
1542                          * selectivity as MIN(1/nd1, 1/nd2).  This is plausible if we
1543                          * assume that the values are about equally distributed: a
1544                          * given tuple of rel1 will join to either 0 or N2/nd2 rows of
1545                          * rel2, so total join rows are at most N1*N2/nd2 giving a
1546                          * join selectivity of not more than 1/nd2.  By the same logic
1547                          * it is not more than 1/nd1, so MIN(1/nd1, 1/nd2) is an upper
1548                          * bound.  Using the MIN() means we estimate from the point of
1549                          * view of the relation with smaller nd (since the larger nd
1550                          * is determining the MIN).  It is reasonable to assume that
1551                          * most tuples in this rel will have join partners, so the
1552                          * bound is probably reasonably tight and should be taken
1553                          * as-is.
1554                          *
1555                          * XXX Can we be smarter if we have an MCV list for just one
1556                          * side? It seems that if we assume equal distribution for the
1557                          * other side, we end up with the same answer anyway.
1558                          */
1559                         if (nd1 > nd2)
1560                                 selec = 1.0 / nd1;
1561                         else
1562                                 selec = 1.0 / nd2;
1563                 }
1564
1565                 if (have_mcvs1)
1566                         free_attstatsslot(var1->vartype, values1, nvalues1,
1567                                                           numbers1, nnumbers1);
1568                 if (have_mcvs2)
1569                         free_attstatsslot(var2->vartype, values2, nvalues2,
1570                                                           numbers2, nnumbers2);
1571                 if (HeapTupleIsValid(statsTuple1))
1572                         ReleaseSysCache(statsTuple1);
1573                 if (HeapTupleIsValid(statsTuple2))
1574                         ReleaseSysCache(statsTuple2);
1575         }
1576
1577         CLAMP_PROBABILITY(selec);
1578
1579         PG_RETURN_FLOAT8((float8) selec);
1580 }
1581
1582 /*
1583  *              neqjoinsel              - Join selectivity of "!="
1584  */
1585 Datum
1586 neqjoinsel(PG_FUNCTION_ARGS)
1587 {
1588         Query      *root = (Query *) PG_GETARG_POINTER(0);
1589         Oid                     operator = PG_GETARG_OID(1);
1590         List       *args = (List *) PG_GETARG_POINTER(2);
1591         Oid                     eqop;
1592         float8          result;
1593
1594         /*
1595          * We want 1 - eqjoinsel() where the equality operator is the one
1596          * associated with this != operator, that is, its negator.
1597          */
1598         eqop = get_negator(operator);
1599         if (eqop)
1600         {
1601                 result = DatumGetFloat8(DirectFunctionCall3(eqjoinsel,
1602                                                                                                         PointerGetDatum(root),
1603                                                                                                   ObjectIdGetDatum(eqop),
1604                                                                                                  PointerGetDatum(args)));
1605
1606         }
1607         else
1608         {
1609                 /* Use default selectivity (should we raise an error instead?) */
1610                 result = DEFAULT_EQ_SEL;
1611         }
1612         result = 1.0 - result;
1613         PG_RETURN_FLOAT8(result);
1614 }
1615
1616 /*
1617  *              scalarltjoinsel - Join selectivity of "<" and "<=" for scalars
1618  */
1619 Datum
1620 scalarltjoinsel(PG_FUNCTION_ARGS)
1621 {
1622         PG_RETURN_FLOAT8(DEFAULT_INEQ_SEL);
1623 }
1624
1625 /*
1626  *              scalargtjoinsel - Join selectivity of ">" and ">=" for scalars
1627  */
1628 Datum
1629 scalargtjoinsel(PG_FUNCTION_ARGS)
1630 {
1631         PG_RETURN_FLOAT8(DEFAULT_INEQ_SEL);
1632 }
1633
1634 /*
1635  *              regexeqjoinsel  - Join selectivity of regular-expression pattern match.
1636  */
1637 Datum
1638 regexeqjoinsel(PG_FUNCTION_ARGS)
1639 {
1640         PG_RETURN_FLOAT8(DEFAULT_MATCH_SEL);
1641 }
1642
1643 /*
1644  *              icregexeqjoinsel        - Join selectivity of case-insensitive regex match.
1645  */
1646 Datum
1647 icregexeqjoinsel(PG_FUNCTION_ARGS)
1648 {
1649         PG_RETURN_FLOAT8(DEFAULT_MATCH_SEL);
1650 }
1651
1652 /*
1653  *              likejoinsel                     - Join selectivity of LIKE pattern match.
1654  */
1655 Datum
1656 likejoinsel(PG_FUNCTION_ARGS)
1657 {
1658         PG_RETURN_FLOAT8(DEFAULT_MATCH_SEL);
1659 }
1660
1661 /*
1662  *              iclikejoinsel                   - Join selectivity of ILIKE pattern match.
1663  */
1664 Datum
1665 iclikejoinsel(PG_FUNCTION_ARGS)
1666 {
1667         PG_RETURN_FLOAT8(DEFAULT_MATCH_SEL);
1668 }
1669
1670 /*
1671  *              regexnejoinsel  - Join selectivity of regex non-match.
1672  */
1673 Datum
1674 regexnejoinsel(PG_FUNCTION_ARGS)
1675 {
1676         float8          result;
1677
1678         result = DatumGetFloat8(regexeqjoinsel(fcinfo));
1679         result = 1.0 - result;
1680         PG_RETURN_FLOAT8(result);
1681 }
1682
1683 /*
1684  *              icregexnejoinsel        - Join selectivity of case-insensitive regex non-match.
1685  */
1686 Datum
1687 icregexnejoinsel(PG_FUNCTION_ARGS)
1688 {
1689         float8          result;
1690
1691         result = DatumGetFloat8(icregexeqjoinsel(fcinfo));
1692         result = 1.0 - result;
1693         PG_RETURN_FLOAT8(result);
1694 }
1695
1696 /*
1697  *              nlikejoinsel            - Join selectivity of LIKE pattern non-match.
1698  */
1699 Datum
1700 nlikejoinsel(PG_FUNCTION_ARGS)
1701 {
1702         float8          result;
1703
1704         result = DatumGetFloat8(likejoinsel(fcinfo));
1705         result = 1.0 - result;
1706         PG_RETURN_FLOAT8(result);
1707 }
1708
1709 /*
1710  *              icnlikejoinsel          - Join selectivity of ILIKE pattern non-match.
1711  */
1712 Datum
1713 icnlikejoinsel(PG_FUNCTION_ARGS)
1714 {
1715         float8          result;
1716
1717         result = DatumGetFloat8(iclikejoinsel(fcinfo));
1718         result = 1.0 - result;
1719         PG_RETURN_FLOAT8(result);
1720 }
1721
1722 /*
1723  * mergejoinscansel                     - Scan selectivity of merge join.
1724  *
1725  * A merge join will stop as soon as it exhausts either input stream.
1726  * Therefore, if we can estimate the ranges of both input variables,
1727  * we can estimate how much of the input will actually be read.  This
1728  * can have a considerable impact on the cost when using indexscans.
1729  *
1730  * clause should be a clause already known to be mergejoinable.
1731  *
1732  * *leftscan is set to the fraction of the left-hand variable expected
1733  * to be scanned (0 to 1), and similarly *rightscan for the right-hand
1734  * variable.
1735  */
1736 void
1737 mergejoinscansel(Query *root, Node *clause,
1738                                  Selectivity *leftscan,
1739                                  Selectivity *rightscan)
1740 {
1741         Var                *left,
1742                            *right;
1743         Oid                     opno,
1744                                 lsortop,
1745                                 rsortop,
1746                                 ltop,
1747                                 gtop,
1748                                 revltop;
1749         Datum           leftmax,
1750                                 rightmax;
1751         double          selec;
1752
1753         /* Set default results if we can't figure anything out. */
1754         *leftscan = *rightscan = 1.0;
1755
1756         /* Deconstruct the merge clause */
1757         if (!is_opclause(clause))
1758                 return;                                 /* shouldn't happen */
1759         opno = ((Oper *) ((Expr *) clause)->oper)->opno;
1760         left = get_leftop((Expr *) clause);
1761         right = get_rightop((Expr *) clause);
1762         if (!right)
1763                 return;                                 /* shouldn't happen */
1764
1765         /* Can't do anything if inputs are not Vars */
1766         if (!IsA(left, Var) ||!IsA(right, Var))
1767                 return;
1768
1769         /* Verify mergejoinability and get left and right "<" operators */
1770         if (!op_mergejoinable(opno,
1771                                                   left->vartype,
1772                                                   right->vartype,
1773                                                   &lsortop,
1774                                                   &rsortop))
1775                 return;                                 /* shouldn't happen */
1776
1777         /* Try to get maximum values of both vars */
1778         if (!get_var_maximum(root, left, lsortop, &leftmax))
1779                 return;                                 /* no max available from stats */
1780
1781         if (!get_var_maximum(root, right, rsortop, &rightmax))
1782                 return;                                 /* no max available from stats */
1783
1784         /* Look up the "left < right" and "left > right" operators */
1785         op_mergejoin_crossops(opno, &ltop, &gtop, NULL, NULL);
1786
1787         /* Look up the "right < left" operator */
1788         revltop = get_commutator(gtop);
1789         if (!OidIsValid(revltop))
1790                 return;                                 /* shouldn't happen */
1791
1792         /*
1793          * Now, the fraction of the left variable that will be scanned is the
1794          * fraction that's <= the right-side maximum value.  But only believe
1795          * non-default estimates, else stick with our 1.0.
1796          */
1797         selec = scalarineqsel(root, ltop, false, left,
1798                                                   rightmax, right->vartype);
1799         if (selec != DEFAULT_INEQ_SEL)
1800                 *leftscan = selec;
1801
1802         /* And similarly for the right variable. */
1803         selec = scalarineqsel(root, revltop, false, right,
1804                                                   leftmax, left->vartype);
1805         if (selec != DEFAULT_INEQ_SEL)
1806                 *rightscan = selec;
1807
1808         /*
1809          * Only one of the two fractions can really be less than 1.0; believe
1810          * the smaller estimate and reset the other one to exactly 1.0.
1811          */
1812         if (*leftscan > *rightscan)
1813                 *leftscan = 1.0;
1814         else
1815                 *rightscan = 1.0;
1816 }
1817
1818 /*
1819  * get_var_maximum
1820  *              Estimate the maximum value of the specified variable.
1821  *              If successful, store value in *max and return TRUE.
1822  *              If no data available, return FALSE.
1823  *
1824  * sortop is the "<" comparison operator to use.  (To extract the
1825  * minimum instead of the maximum, just pass the ">" operator instead.)
1826  */
1827 static bool
1828 get_var_maximum(Query *root, Var *var, Oid sortop, Datum *max)
1829 {
1830         Datum           tmax = 0;
1831         bool            have_max = false;
1832         Oid                     relid;
1833         HeapTuple       statsTuple;
1834         Form_pg_statistic stats;
1835         int16           typLen;
1836         bool            typByVal;
1837         Datum      *values;
1838         int                     nvalues;
1839         int                     i;
1840
1841         relid = getrelid(var->varno, root->rtable);
1842         if (relid == InvalidOid)
1843                 return false;
1844
1845         /* get stats for the attribute */
1846         statsTuple = SearchSysCache(STATRELATT,
1847                                                                 ObjectIdGetDatum(relid),
1848                                                                 Int16GetDatum(var->varattno),
1849                                                                 0, 0);
1850         if (!HeapTupleIsValid(statsTuple))
1851         {
1852                 /* no stats available, so default result */
1853                 return false;
1854         }
1855         stats = (Form_pg_statistic) GETSTRUCT(statsTuple);
1856
1857         get_typlenbyval(var->vartype, &typLen, &typByVal);
1858
1859         /*
1860          * If there is a histogram, grab the last or first value as appropriate.
1861          *
1862          * If there is a histogram that is sorted with some other operator
1863          * than the one we want, fail --- this suggests that there is data
1864          * we can't use.
1865          */
1866         if (get_attstatsslot(statsTuple, var->vartype, var->vartypmod,
1867                                                  STATISTIC_KIND_HISTOGRAM, sortop,
1868                                                  &values, &nvalues,
1869                                                  NULL, NULL))
1870         {
1871                 if (nvalues > 0)
1872                 {
1873                         tmax = datumCopy(values[nvalues-1], typByVal, typLen);
1874                         have_max = true;
1875                 }
1876                 free_attstatsslot(var->vartype, values, nvalues, NULL, 0);
1877         }
1878         else
1879         {
1880                 Oid             rsortop = get_commutator(sortop);
1881
1882                 if (OidIsValid(rsortop) &&
1883                         get_attstatsslot(statsTuple, var->vartype, var->vartypmod,
1884                                                          STATISTIC_KIND_HISTOGRAM, rsortop,
1885                                                          &values, &nvalues,
1886                                                          NULL, NULL))
1887                 {
1888                         if (nvalues > 0)
1889                         {
1890                                 tmax = datumCopy(values[0], typByVal, typLen);
1891                                 have_max = true;
1892                         }
1893                         free_attstatsslot(var->vartype, values, nvalues, NULL, 0);
1894                 }
1895                 else if (get_attstatsslot(statsTuple, var->vartype, var->vartypmod,
1896                                                                   STATISTIC_KIND_HISTOGRAM, InvalidOid,
1897                                                                   &values, &nvalues,
1898                                                                   NULL, NULL))
1899                 {
1900                         free_attstatsslot(var->vartype, values, nvalues, NULL, 0);
1901                         ReleaseSysCache(statsTuple);
1902                         return false;
1903                 }
1904         }
1905
1906         /*
1907          * If we have most-common-values info, look for a large MCV.  This
1908          * is needed even if we also have a histogram, since the histogram
1909          * excludes the MCVs.  However, usually the MCVs will not be the
1910          * extreme values, so avoid unnecessary data copying.
1911          */
1912         if (get_attstatsslot(statsTuple, var->vartype, var->vartypmod,
1913                                                  STATISTIC_KIND_MCV, InvalidOid,
1914                                                  &values, &nvalues,
1915                                                  NULL, NULL))
1916         {
1917                 bool    large_mcv = false;
1918                 FmgrInfo        opproc;
1919
1920                 fmgr_info(get_opcode(sortop), &opproc);
1921
1922                 for (i = 0; i < nvalues; i++)
1923                 {
1924                         if (!have_max)
1925                         {
1926                                 tmax = values[i];
1927                                 large_mcv = have_max = true;
1928                         }
1929                         else if (DatumGetBool(FunctionCall2(&opproc, tmax, values[i])))
1930                         {
1931                                 tmax = values[i];
1932                                 large_mcv = true;
1933                         }
1934                 }
1935                 if (large_mcv)
1936                         tmax = datumCopy(tmax, typByVal, typLen);
1937                 free_attstatsslot(var->vartype, values, nvalues, NULL, 0);
1938         }
1939
1940         ReleaseSysCache(statsTuple);
1941
1942         *max = tmax;
1943         return have_max;
1944 }
1945
1946 /*
1947  * convert_to_scalar
1948  *        Convert non-NULL values of the indicated types to the comparison
1949  *        scale needed by scalarltsel()/scalargtsel().
1950  *        Returns "true" if successful.
1951  *
1952  * XXX this routine is a hack: ideally we should look up the conversion
1953  * subroutines in pg_type.
1954  *
1955  * All numeric datatypes are simply converted to their equivalent
1956  * "double" values.  (NUMERIC values that are outside the range of "double"
1957  * are clamped to +/- HUGE_VAL.)
1958  *
1959  * String datatypes are converted by convert_string_to_scalar(),
1960  * which is explained below.  The reason why this routine deals with
1961  * three values at a time, not just one, is that we need it for strings.
1962  *
1963  * The bytea datatype is just enough different from strings that it has
1964  * to be treated separately.
1965  *
1966  * The several datatypes representing absolute times are all converted
1967  * to Timestamp, which is actually a double, and then we just use that
1968  * double value.  Note this will give correct results even for the "special"
1969  * values of Timestamp, since those are chosen to compare correctly;
1970  * see timestamp_cmp.
1971  *
1972  * The several datatypes representing relative times (intervals) are all
1973  * converted to measurements expressed in seconds.
1974  */
1975 static bool
1976 convert_to_scalar(Datum value, Oid valuetypid, double *scaledvalue,
1977                                   Datum lobound, Datum hibound, Oid boundstypid,
1978                                   double *scaledlobound, double *scaledhibound)
1979 {
1980         switch (valuetypid)
1981         {
1982                         /*
1983                          * Built-in numeric types
1984                          */
1985                 case BOOLOID:
1986                 case INT2OID:
1987                 case INT4OID:
1988                 case INT8OID:
1989                 case FLOAT4OID:
1990                 case FLOAT8OID:
1991                 case NUMERICOID:
1992                 case OIDOID:
1993                 case REGPROCOID:
1994                         *scaledvalue = convert_numeric_to_scalar(value, valuetypid);
1995                         *scaledlobound = convert_numeric_to_scalar(lobound, boundstypid);
1996                         *scaledhibound = convert_numeric_to_scalar(hibound, boundstypid);
1997                         return true;
1998
1999                         /*
2000                          * Built-in string types
2001                          */
2002                 case CHAROID:
2003                 case BPCHAROID:
2004                 case VARCHAROID:
2005                 case TEXTOID:
2006                 case NAMEOID:
2007                         {
2008                                 unsigned char *valstr = convert_string_datum(value, valuetypid);
2009                                 unsigned char *lostr = convert_string_datum(lobound, boundstypid);
2010                                 unsigned char *histr = convert_string_datum(hibound, boundstypid);
2011
2012                                 convert_string_to_scalar(valstr, scaledvalue,
2013                                                                                  lostr, scaledlobound,
2014                                                                                  histr, scaledhibound);
2015                                 pfree(valstr);
2016                                 pfree(lostr);
2017                                 pfree(histr);
2018                                 return true;
2019                         }
2020
2021                         /*
2022                          * Built-in bytea type
2023                          */
2024                 case BYTEAOID:
2025                         {
2026                                 convert_bytea_to_scalar(value, scaledvalue,
2027                                                                                 lobound, scaledlobound,
2028                                                                                 hibound, scaledhibound);
2029                                 return true;
2030                         }
2031
2032                         /*
2033                          * Built-in time types
2034                          */
2035                 case TIMESTAMPOID:
2036                 case TIMESTAMPTZOID:
2037                 case ABSTIMEOID:
2038                 case DATEOID:
2039                 case INTERVALOID:
2040                 case RELTIMEOID:
2041                 case TINTERVALOID:
2042                 case TIMEOID:
2043                 case TIMETZOID:
2044                         *scaledvalue = convert_timevalue_to_scalar(value, valuetypid);
2045                         *scaledlobound = convert_timevalue_to_scalar(lobound, boundstypid);
2046                         *scaledhibound = convert_timevalue_to_scalar(hibound, boundstypid);
2047                         return true;
2048
2049                         /*
2050                          * Built-in network types
2051                          */
2052                 case INETOID:
2053                 case CIDROID:
2054                 case MACADDROID:
2055                         *scaledvalue = convert_network_to_scalar(value, valuetypid);
2056                         *scaledlobound = convert_network_to_scalar(lobound, boundstypid);
2057                         *scaledhibound = convert_network_to_scalar(hibound, boundstypid);
2058                         return true;
2059         }
2060         /* Don't know how to convert */
2061         return false;
2062 }
2063
2064 /*
2065  * Do convert_to_scalar()'s work for any numeric data type.
2066  */
2067 static double
2068 convert_numeric_to_scalar(Datum value, Oid typid)
2069 {
2070         switch (typid)
2071         {
2072                 case BOOLOID:
2073                         return (double) DatumGetBool(value);
2074                 case INT2OID:
2075                         return (double) DatumGetInt16(value);
2076                 case INT4OID:
2077                         return (double) DatumGetInt32(value);
2078                 case INT8OID:
2079                         return (double) DatumGetInt64(value);
2080                 case FLOAT4OID:
2081                         return (double) DatumGetFloat4(value);
2082                 case FLOAT8OID:
2083                         return (double) DatumGetFloat8(value);
2084                 case NUMERICOID:
2085                         /* Note: out-of-range values will be clamped to +-HUGE_VAL */
2086                         return (double)
2087                                 DatumGetFloat8(DirectFunctionCall1(numeric_float8_no_overflow,
2088                                                                                                    value));
2089                 case OIDOID:
2090                 case REGPROCOID:
2091                         /* we can treat OIDs as integers... */
2092                         return (double) DatumGetObjectId(value);
2093         }
2094
2095         /*
2096          * Can't get here unless someone tries to use scalarltsel/scalargtsel
2097          * on an operator with one numeric and one non-numeric operand.
2098          */
2099         elog(ERROR, "convert_numeric_to_scalar: unsupported type %u", typid);
2100         return 0;
2101 }
2102
2103 /*
2104  * Do convert_to_scalar()'s work for any character-string data type.
2105  *
2106  * String datatypes are converted to a scale that ranges from 0 to 1,
2107  * where we visualize the bytes of the string as fractional digits.
2108  *
2109  * We do not want the base to be 256, however, since that tends to
2110  * generate inflated selectivity estimates; few databases will have
2111  * occurrences of all 256 possible byte values at each position.
2112  * Instead, use the smallest and largest byte values seen in the bounds
2113  * as the estimated range for each byte, after some fudging to deal with
2114  * the fact that we probably aren't going to see the full range that way.
2115  *
2116  * An additional refinement is that we discard any common prefix of the
2117  * three strings before computing the scaled values.  This allows us to
2118  * "zoom in" when we encounter a narrow data range.  An example is a phone
2119  * number database where all the values begin with the same area code.
2120  * (Actually, the bounds will be adjacent histogram-bin-boundary values,
2121  * so this is more likely to happen than you might think.)
2122  */
2123 static void
2124 convert_string_to_scalar(unsigned char *value,
2125                                                  double *scaledvalue,
2126                                                  unsigned char *lobound,
2127                                                  double *scaledlobound,
2128                                                  unsigned char *hibound,
2129                                                  double *scaledhibound)
2130 {
2131         int                     rangelo,
2132                                 rangehi;
2133         unsigned char *sptr;
2134
2135         rangelo = rangehi = hibound[0];
2136         for (sptr = lobound; *sptr; sptr++)
2137         {
2138                 if (rangelo > *sptr)
2139                         rangelo = *sptr;
2140                 if (rangehi < *sptr)
2141                         rangehi = *sptr;
2142         }
2143         for (sptr = hibound; *sptr; sptr++)
2144         {
2145                 if (rangelo > *sptr)
2146                         rangelo = *sptr;
2147                 if (rangehi < *sptr)
2148                         rangehi = *sptr;
2149         }
2150         /* If range includes any upper-case ASCII chars, make it include all */
2151         if (rangelo <= 'Z' && rangehi >= 'A')
2152         {
2153                 if (rangelo > 'A')
2154                         rangelo = 'A';
2155                 if (rangehi < 'Z')
2156                         rangehi = 'Z';
2157         }
2158         /* Ditto lower-case */
2159         if (rangelo <= 'z' && rangehi >= 'a')
2160         {
2161                 if (rangelo > 'a')
2162                         rangelo = 'a';
2163                 if (rangehi < 'z')
2164                         rangehi = 'z';
2165         }
2166         /* Ditto digits */
2167         if (rangelo <= '9' && rangehi >= '0')
2168         {
2169                 if (rangelo > '0')
2170                         rangelo = '0';
2171                 if (rangehi < '9')
2172                         rangehi = '9';
2173         }
2174
2175         /*
2176          * If range includes less than 10 chars, assume we have not got enough
2177          * data, and make it include regular ASCII set.
2178          */
2179         if (rangehi - rangelo < 9)
2180         {
2181                 rangelo = ' ';
2182                 rangehi = 127;
2183         }
2184
2185         /*
2186          * Now strip any common prefix of the three strings.
2187          */
2188         while (*lobound)
2189         {
2190                 if (*lobound != *hibound || *lobound != *value)
2191                         break;
2192                 lobound++, hibound++, value++;
2193         }
2194
2195         /*
2196          * Now we can do the conversions.
2197          */
2198         *scaledvalue = convert_one_string_to_scalar(value, rangelo, rangehi);
2199         *scaledlobound = convert_one_string_to_scalar(lobound, rangelo, rangehi);
2200         *scaledhibound = convert_one_string_to_scalar(hibound, rangelo, rangehi);
2201 }
2202
2203 static double
2204 convert_one_string_to_scalar(unsigned char *value, int rangelo, int rangehi)
2205 {
2206         int                     slen = strlen((char *) value);
2207         double          num,
2208                                 denom,
2209                                 base;
2210
2211         if (slen <= 0)
2212                 return 0.0;                             /* empty string has scalar value 0 */
2213
2214         /*
2215          * Since base is at least 10, need not consider more than about 20
2216          * chars
2217          */
2218         if (slen > 20)
2219                 slen = 20;
2220
2221         /* Convert initial characters to fraction */
2222         base = rangehi - rangelo + 1;
2223         num = 0.0;
2224         denom = base;
2225         while (slen-- > 0)
2226         {
2227                 int                     ch = *value++;
2228
2229                 if (ch < rangelo)
2230                         ch = rangelo - 1;
2231                 else if (ch > rangehi)
2232                         ch = rangehi + 1;
2233                 num += ((double) (ch - rangelo)) / denom;
2234                 denom *= base;
2235         }
2236
2237         return num;
2238 }
2239
2240 /*
2241  * Convert a string-type Datum into a palloc'd, null-terminated string.
2242  *
2243  * If USE_LOCALE is defined, we must pass the string through strxfrm()
2244  * before continuing, so as to generate correct locale-specific results.
2245  */
2246 static unsigned char *
2247 convert_string_datum(Datum value, Oid typid)
2248 {
2249         char       *val;
2250
2251 #ifdef USE_LOCALE
2252         char       *xfrmstr;
2253         size_t          xfrmsize;
2254         size_t          xfrmlen;
2255 #endif
2256
2257         switch (typid)
2258         {
2259                 case CHAROID:
2260                         val = (char *) palloc(2);
2261                         val[0] = DatumGetChar(value);
2262                         val[1] = '\0';
2263                         break;
2264                 case BPCHAROID:
2265                 case VARCHAROID:
2266                 case TEXTOID:
2267                         {
2268                                 char       *str = (char *) VARDATA(DatumGetPointer(value));
2269                                 int                     strlength = VARSIZE(DatumGetPointer(value)) - VARHDRSZ;
2270
2271                                 val = (char *) palloc(strlength + 1);
2272                                 memcpy(val, str, strlength);
2273                                 val[strlength] = '\0';
2274                                 break;
2275                         }
2276                 case NAMEOID:
2277                         {
2278                                 NameData   *nm = (NameData *) DatumGetPointer(value);
2279
2280                                 val = pstrdup(NameStr(*nm));
2281                                 break;
2282                         }
2283                 default:
2284
2285                         /*
2286                          * Can't get here unless someone tries to use scalarltsel on
2287                          * an operator with one string and one non-string operand.
2288                          */
2289                         elog(ERROR, "convert_string_datum: unsupported type %u", typid);
2290                         return NULL;
2291         }
2292
2293 #ifdef USE_LOCALE
2294         /* Guess that transformed string is not much bigger than original */
2295         xfrmsize = strlen(val) + 32;    /* arbitrary pad value here... */
2296         xfrmstr = (char *) palloc(xfrmsize);
2297         xfrmlen = strxfrm(xfrmstr, val, xfrmsize);
2298         if (xfrmlen >= xfrmsize)
2299         {
2300                 /* Oops, didn't make it */
2301                 pfree(xfrmstr);
2302                 xfrmstr = (char *) palloc(xfrmlen + 1);
2303                 xfrmlen = strxfrm(xfrmstr, val, xfrmlen + 1);
2304         }
2305         pfree(val);
2306         val = xfrmstr;
2307 #endif
2308
2309         return (unsigned char *) val;
2310 }
2311
2312 /*
2313  * Do convert_to_scalar()'s work for any bytea data type.
2314  *
2315  * Very similar to convert_string_to_scalar except we can't assume
2316  * null-termination and therefore pass explicit lengths around.
2317  *
2318  * Also, assumptions about likely "normal" ranges of characters have been
2319  * removed - a data range of 0..255 is always used, for now.  (Perhaps
2320  * someday we will add information about actual byte data range to
2321  * pg_statistic.)
2322  */
2323 static void
2324 convert_bytea_to_scalar(Datum value,
2325                                                 double *scaledvalue,
2326                                                 Datum lobound,
2327                                                 double *scaledlobound,
2328                                                 Datum hibound,
2329                                                 double *scaledhibound)
2330 {
2331         int                     rangelo,
2332                                 rangehi,
2333                                 valuelen = VARSIZE(DatumGetPointer(value)) - VARHDRSZ,
2334                                 loboundlen = VARSIZE(DatumGetPointer(lobound)) - VARHDRSZ,
2335                                 hiboundlen = VARSIZE(DatumGetPointer(hibound)) - VARHDRSZ,
2336                                 i,
2337                                 minlen;
2338         unsigned char *valstr = (unsigned char *) VARDATA(DatumGetPointer(value)),
2339                            *lostr = (unsigned char *) VARDATA(DatumGetPointer(lobound)),
2340                            *histr = (unsigned char *) VARDATA(DatumGetPointer(hibound));
2341
2342         /*
2343          * Assume bytea data is uniformly distributed across all byte values.
2344          */
2345         rangelo = 0;
2346         rangehi = 255;
2347
2348         /*
2349          * Now strip any common prefix of the three strings.
2350          */
2351         minlen = Min(Min(valuelen, loboundlen), hiboundlen);
2352         for (i = 0; i < minlen; i++)
2353         {
2354                 if (*lostr != *histr || *lostr != *valstr)
2355                         break;
2356                 lostr++, histr++, valstr++;
2357                 loboundlen--, hiboundlen--, valuelen--;
2358         }
2359
2360         /*
2361          * Now we can do the conversions.
2362          */
2363         *scaledvalue = convert_one_bytea_to_scalar(valstr, valuelen, rangelo, rangehi);
2364         *scaledlobound = convert_one_bytea_to_scalar(lostr, loboundlen, rangelo, rangehi);
2365         *scaledhibound = convert_one_bytea_to_scalar(histr, hiboundlen, rangelo, rangehi);
2366 }
2367
2368 static double
2369 convert_one_bytea_to_scalar(unsigned char *value, int valuelen,
2370                                                         int rangelo, int rangehi)
2371 {
2372         double          num,
2373                                 denom,
2374                                 base;
2375
2376         if (valuelen <= 0)
2377                 return 0.0;                             /* empty string has scalar value 0 */
2378
2379         /*
2380          * Since base is 256, need not consider more than about 10 chars (even
2381          * this many seems like overkill)
2382          */
2383         if (valuelen > 10)
2384                 valuelen = 10;
2385
2386         /* Convert initial characters to fraction */
2387         base = rangehi - rangelo + 1;
2388         num = 0.0;
2389         denom = base;
2390         while (valuelen-- > 0)
2391         {
2392                 int                     ch = *value++;
2393
2394                 if (ch < rangelo)
2395                         ch = rangelo - 1;
2396                 else if (ch > rangehi)
2397                         ch = rangehi + 1;
2398                 num += ((double) (ch - rangelo)) / denom;
2399                 denom *= base;
2400         }
2401
2402         return num;
2403 }
2404
2405 /*
2406  * Do convert_to_scalar()'s work for any timevalue data type.
2407  */
2408 static double
2409 convert_timevalue_to_scalar(Datum value, Oid typid)
2410 {
2411         switch (typid)
2412         {
2413                 case TIMESTAMPOID:
2414                         return DatumGetTimestamp(value);
2415                 case TIMESTAMPTZOID:
2416                         return DatumGetTimestampTz(value);
2417                 case ABSTIMEOID:
2418                         return DatumGetTimestamp(DirectFunctionCall1(abstime_timestamp,
2419                                                                                                                  value));
2420                 case DATEOID:
2421                         return DatumGetTimestamp(DirectFunctionCall1(date_timestamp,
2422                                                                                                                  value));
2423                 case INTERVALOID:
2424                         {
2425                                 Interval   *interval = DatumGetIntervalP(value);
2426
2427                                 /*
2428                                  * Convert the month part of Interval to days using
2429                                  * assumed average month length of 365.25/12.0 days.  Not
2430                                  * too accurate, but plenty good enough for our purposes.
2431                                  */
2432                                 return interval->time +
2433                                         interval->month * (365.25 / 12.0 * 24.0 * 60.0 * 60.0);
2434                         }
2435                 case RELTIMEOID:
2436                         return DatumGetRelativeTime(value);
2437                 case TINTERVALOID:
2438                         {
2439                                 TimeInterval interval = DatumGetTimeInterval(value);
2440
2441                                 if (interval->status != 0)
2442                                         return interval->data[1] - interval->data[0];
2443                                 return 0;               /* for lack of a better idea */
2444                         }
2445                 case TIMEOID:
2446                         return DatumGetTimeADT(value);
2447                 case TIMETZOID:
2448                         {
2449                                 TimeTzADT  *timetz = DatumGetTimeTzADTP(value);
2450
2451                                 /* use GMT-equivalent time */
2452                                 return (double) (timetz->time + timetz->zone);
2453                         }
2454         }
2455
2456         /*
2457          * Can't get here unless someone tries to use scalarltsel/scalargtsel
2458          * on an operator with one timevalue and one non-timevalue operand.
2459          */
2460         elog(ERROR, "convert_timevalue_to_scalar: unsupported type %u", typid);
2461         return 0;
2462 }
2463
2464
2465 /*
2466  * get_att_numdistinct
2467  *        Estimate the number of distinct values of an attribute.
2468  *
2469  * var: identifies the attribute to examine.
2470  * stats: pg_statistic tuple for attribute, or NULL if not available.
2471  *
2472  * NB: be careful to produce an integral result, since callers may compare
2473  * the result to exact integer counts.
2474  */
2475 static double
2476 get_att_numdistinct(Query *root, Var *var, Form_pg_statistic stats)
2477 {
2478         RelOptInfo *rel;
2479         double          ntuples;
2480
2481         /*
2482          * Special-case boolean columns: presumably, two distinct values.
2483          *
2484          * Are there any other cases we should wire in special estimates for?
2485          */
2486         if (var->vartype == BOOLOID)
2487                 return 2.0;
2488
2489         /*
2490          * Otherwise we need to get the relation size.
2491          */
2492         rel = find_base_rel(root, var->varno);
2493         ntuples = rel->tuples;
2494
2495         if (ntuples <= 0.0)
2496                 return DEFAULT_NUM_DISTINCT;    /* no data available; return a
2497                                                                                  * default */
2498
2499         /*
2500          * Look to see if there is a unique index on the attribute. If so, we
2501          * assume it's distinct, ignoring pg_statistic info which could be out
2502          * of date.
2503          */
2504         if (has_unique_index(rel, var->varattno))
2505                 return ntuples;
2506
2507         /*
2508          * If ANALYZE determined a fixed or scaled estimate, use it.
2509          */
2510         if (stats)
2511         {
2512                 if (stats->stadistinct > 0.0)
2513                         return stats->stadistinct;
2514                 if (stats->stadistinct < 0.0)
2515                         return floor((-stats->stadistinct * ntuples) + 0.5);
2516         }
2517
2518         /*
2519          * ANALYZE does not compute stats for system attributes, but some of
2520          * them can reasonably be assumed unique anyway.
2521          */
2522         switch (var->varattno)
2523         {
2524                 case ObjectIdAttributeNumber:
2525                 case SelfItemPointerAttributeNumber:
2526                         return ntuples;
2527                 case TableOidAttributeNumber:
2528                         return 1.0;
2529         }
2530
2531         /*
2532          * Estimate ndistinct = ntuples if the table is small, else use
2533          * default.
2534          */
2535         if (ntuples < DEFAULT_NUM_DISTINCT)
2536                 return ntuples;
2537
2538         return DEFAULT_NUM_DISTINCT;
2539 }
2540
2541 /*
2542  * get_restriction_var
2543  *              Examine the args of a restriction clause to see if it's of the
2544  *              form (var op something) or (something op var).  If so, extract
2545  *              and return the var and the other argument.
2546  *
2547  * Inputs:
2548  *      args: clause argument list
2549  *      varRelid: see specs for restriction selectivity functions
2550  *
2551  * Outputs: (these are set only if TRUE is returned)
2552  *      *var: gets Var node
2553  *      *other: gets other clause argument
2554  *      *varonleft: set TRUE if var is on the left, FALSE if on the right
2555  *
2556  * Returns TRUE if a Var is identified, otherwise FALSE.
2557  */
2558 static bool
2559 get_restriction_var(List *args,
2560                                         int varRelid,
2561                                         Var **var,
2562                                         Node **other,
2563                                         bool *varonleft)
2564 {
2565         Node       *left,
2566                            *right;
2567
2568         if (length(args) != 2)
2569                 return false;
2570
2571         left = (Node *) lfirst(args);
2572         right = (Node *) lsecond(args);
2573
2574         /* Ignore any binary-compatible relabeling */
2575
2576         if (IsA(left, RelabelType))
2577                 left = ((RelabelType *) left)->arg;
2578         if (IsA(right, RelabelType))
2579                 right = ((RelabelType *) right)->arg;
2580
2581         /* Look for the var */
2582
2583         if (IsA(left, Var) &&
2584                 (varRelid == 0 || varRelid == ((Var *) left)->varno))
2585         {
2586                 *var = (Var *) left;
2587                 *other = right;
2588                 *varonleft = true;
2589         }
2590         else if (IsA(right, Var) &&
2591                          (varRelid == 0 || varRelid == ((Var *) right)->varno))
2592         {
2593                 *var = (Var *) right;
2594                 *other = left;
2595                 *varonleft = false;
2596         }
2597         else
2598         {
2599                 /* Duh, it's too complicated for me... */
2600                 return false;
2601         }
2602
2603         return true;
2604 }
2605
2606 /*
2607  * get_join_vars
2608  *
2609  * Extract the two Vars from a join clause's argument list.  Returns
2610  * NULL for arguments that are not simple vars.
2611  */
2612 static void
2613 get_join_vars(List *args, Var **var1, Var **var2)
2614 {
2615         Node       *left,
2616                            *right;
2617
2618         if (length(args) != 2)
2619         {
2620                 *var1 = NULL;
2621                 *var2 = NULL;
2622                 return;
2623         }
2624
2625         left = (Node *) lfirst(args);
2626         right = (Node *) lsecond(args);
2627
2628         /* Ignore any binary-compatible relabeling */
2629         if (IsA(left, RelabelType))
2630                 left = ((RelabelType *) left)->arg;
2631         if (IsA(right, RelabelType))
2632                 right = ((RelabelType *) right)->arg;
2633
2634         if (IsA(left, Var))
2635                 *var1 = (Var *) left;
2636         else
2637                 *var1 = NULL;
2638
2639         if (IsA(right, Var))
2640                 *var2 = (Var *) right;
2641         else
2642                 *var2 = NULL;
2643 }
2644
2645 /*-------------------------------------------------------------------------
2646  *
2647  * Pattern analysis functions
2648  *
2649  * These routines support analysis of LIKE and regular-expression patterns
2650  * by the planner/optimizer.  It's important that they agree with the
2651  * regular-expression code in backend/regex/ and the LIKE code in
2652  * backend/utils/adt/like.c.
2653  *
2654  * Note that the prefix-analysis functions are called from
2655  * backend/optimizer/path/indxpath.c as well as from routines in this file.
2656  *
2657  *-------------------------------------------------------------------------
2658  */
2659
2660 /*
2661  * Extract the fixed prefix, if any, for a pattern.
2662  * *prefix is set to a palloc'd prefix string,
2663  * or to NULL if no fixed prefix exists for the pattern.
2664  * *rest is set to point to the remainder of the pattern after the
2665  * portion describing the fixed prefix.
2666  * The return value distinguishes no fixed prefix, a partial prefix,
2667  * or an exact-match-only pattern.
2668  */
2669
2670 static Pattern_Prefix_Status
2671 like_fixed_prefix(char *patt, bool case_insensitive,
2672                                   char **prefix, char **rest)
2673 {
2674         char       *match;
2675         int                     pos,
2676                                 match_pos;
2677
2678         *prefix = match = palloc(strlen(patt) + 1);
2679         match_pos = 0;
2680
2681         for (pos = 0; patt[pos]; pos++)
2682         {
2683                 /* % and _ are wildcard characters in LIKE */
2684                 if (patt[pos] == '%' ||
2685                         patt[pos] == '_')
2686                         break;
2687                 /* Backslash quotes the next character */
2688                 if (patt[pos] == '\\')
2689                 {
2690                         pos++;
2691                         if (patt[pos] == '\0')
2692                                 break;
2693                 }
2694
2695                 /*
2696                  * XXX I suspect isalpha() is not an adequately locale-sensitive
2697                  * test for characters that can vary under case folding?
2698                  */
2699                 if (case_insensitive && isalpha((unsigned char) patt[pos]))
2700                         break;
2701
2702                 /*
2703                  * NOTE: this code used to think that %% meant a literal %, but
2704                  * textlike() itself does not think that, and the SQL92 spec
2705                  * doesn't say any such thing either.
2706                  */
2707                 match[match_pos++] = patt[pos];
2708         }
2709
2710         match[match_pos] = '\0';
2711         *rest = &patt[pos];
2712
2713         /* in LIKE, an empty pattern is an exact match! */
2714         if (patt[pos] == '\0')
2715                 return Pattern_Prefix_Exact;    /* reached end of pattern, so
2716                                                                                  * exact */
2717
2718         if (match_pos > 0)
2719                 return Pattern_Prefix_Partial;
2720
2721         pfree(match);
2722         *prefix = NULL;
2723         return Pattern_Prefix_None;
2724 }
2725
2726 static Pattern_Prefix_Status
2727 regex_fixed_prefix(char *patt, bool case_insensitive,
2728                                    char **prefix, char **rest)
2729 {
2730         char       *match;
2731         int                     pos,
2732                                 match_pos,
2733                                 paren_depth;
2734
2735         /* Pattern must be anchored left */
2736         if (patt[0] != '^')
2737         {
2738                 *prefix = NULL;
2739                 *rest = patt;
2740                 return Pattern_Prefix_None;
2741         }
2742
2743         /*
2744          * If unquoted | is present at paren level 0 in pattern, then there
2745          * are multiple alternatives for the start of the string.
2746          */
2747         paren_depth = 0;
2748         for (pos = 1; patt[pos]; pos++)
2749         {
2750                 if (patt[pos] == '|' && paren_depth == 0)
2751                 {
2752                         *prefix = NULL;
2753                         *rest = patt;
2754                         return Pattern_Prefix_None;
2755                 }
2756                 else if (patt[pos] == '(')
2757                         paren_depth++;
2758                 else if (patt[pos] == ')' && paren_depth > 0)
2759                         paren_depth--;
2760                 else if (patt[pos] == '\\')
2761                 {
2762                         /* backslash quotes the next character */
2763                         pos++;
2764                         if (patt[pos] == '\0')
2765                                 break;
2766                 }
2767         }
2768
2769         /* OK, allocate space for pattern */
2770         *prefix = match = palloc(strlen(patt) + 1);
2771         match_pos = 0;
2772
2773         /* note start at pos 1 to skip leading ^ */
2774         for (pos = 1; patt[pos]; pos++)
2775         {
2776                 /*
2777                  * Check for characters that indicate multiple possible matches
2778                  * here. XXX I suspect isalpha() is not an adequately
2779                  * locale-sensitive test for characters that can vary under case
2780                  * folding?
2781                  */
2782                 if (patt[pos] == '.' ||
2783                         patt[pos] == '(' ||
2784                         patt[pos] == '[' ||
2785                         patt[pos] == '$' ||
2786                         (case_insensitive && isalpha((unsigned char) patt[pos])))
2787                         break;
2788
2789                 /*
2790                  * Check for quantifiers.  Except for +, this means the preceding
2791                  * character is optional, so we must remove it from the prefix
2792                  * too!
2793                  */
2794                 if (patt[pos] == '*' ||
2795                         patt[pos] == '?' ||
2796                         patt[pos] == '{')
2797                 {
2798                         if (match_pos > 0)
2799                                 match_pos--;
2800                         pos--;
2801                         break;
2802                 }
2803                 if (patt[pos] == '+')
2804                 {
2805                         pos--;
2806                         break;
2807                 }
2808                 if (patt[pos] == '\\')
2809                 {
2810                         /* backslash quotes the next character */
2811                         pos++;
2812                         if (patt[pos] == '\0')
2813                                 break;
2814                 }
2815                 match[match_pos++] = patt[pos];
2816         }
2817
2818         match[match_pos] = '\0';
2819         *rest = &patt[pos];
2820
2821         if (patt[pos] == '$' && patt[pos + 1] == '\0')
2822         {
2823                 *rest = &patt[pos + 1];
2824                 return Pattern_Prefix_Exact;    /* pattern specifies exact match */
2825         }
2826
2827         if (match_pos > 0)
2828                 return Pattern_Prefix_Partial;
2829
2830         pfree(match);
2831         *prefix = NULL;
2832         return Pattern_Prefix_None;
2833 }
2834
2835 Pattern_Prefix_Status
2836 pattern_fixed_prefix(char *patt, Pattern_Type ptype,
2837                                          char **prefix, char **rest)
2838 {
2839         Pattern_Prefix_Status result;
2840
2841         switch (ptype)
2842         {
2843                 case Pattern_Type_Like:
2844                         result = like_fixed_prefix(patt, false, prefix, rest);
2845                         break;
2846                 case Pattern_Type_Like_IC:
2847                         result = like_fixed_prefix(patt, true, prefix, rest);
2848                         break;
2849                 case Pattern_Type_Regex:
2850                         result = regex_fixed_prefix(patt, false, prefix, rest);
2851                         break;
2852                 case Pattern_Type_Regex_IC:
2853                         result = regex_fixed_prefix(patt, true, prefix, rest);
2854                         break;
2855                 default:
2856                         elog(ERROR, "pattern_fixed_prefix: bogus ptype");
2857                         result = Pattern_Prefix_None;           /* keep compiler quiet */
2858                         break;
2859         }
2860         return result;
2861 }
2862
2863 /*
2864  * Estimate the selectivity of a fixed prefix for a pattern match.
2865  *
2866  * A fixed prefix "foo" is estimated as the selectivity of the expression
2867  * "var >= 'foo' AND var < 'fop'" (see also indxqual.c).
2868  *
2869  * XXX Note: we make use of the upper bound to estimate operator selectivity
2870  * even if the locale is such that we cannot rely on the upper-bound string.
2871  * The selectivity only needs to be approximately right anyway, so it seems
2872  * more useful to use the upper-bound code than not.
2873  */
2874 static Selectivity
2875 prefix_selectivity(Query *root, Var *var, char *prefix)
2876 {
2877         Selectivity prefixsel;
2878         Oid                     cmpopr;
2879         Const      *prefixcon;
2880         List       *cmpargs;
2881         char       *greaterstr;
2882
2883         cmpopr = find_operator(">=", var->vartype);
2884         if (cmpopr == InvalidOid)
2885                 elog(ERROR, "prefix_selectivity: no >= operator for type %u",
2886                          var->vartype);
2887         prefixcon = string_to_const(prefix, var->vartype);
2888         cmpargs = makeList2(var, prefixcon);
2889         /* Assume scalargtsel is appropriate for all supported types */
2890         prefixsel = DatumGetFloat8(DirectFunctionCall4(scalargtsel,
2891                                                                                                    PointerGetDatum(root),
2892                                                                                                 ObjectIdGetDatum(cmpopr),
2893                                                                                                 PointerGetDatum(cmpargs),
2894                                                                                                    Int32GetDatum(0)));
2895
2896         /*-------
2897          * If we can create a string larger than the prefix, say
2898          *      "x < greaterstr".
2899          *-------
2900          */
2901         greaterstr = make_greater_string(prefix, var->vartype);
2902         if (greaterstr)
2903         {
2904                 Selectivity topsel;
2905
2906                 cmpopr = find_operator("<", var->vartype);
2907                 if (cmpopr == InvalidOid)
2908                         elog(ERROR, "prefix_selectivity: no < operator for type %u",
2909                                  var->vartype);
2910                 prefixcon = string_to_const(greaterstr, var->vartype);
2911                 cmpargs = makeList2(var, prefixcon);
2912                 /* Assume scalarltsel is appropriate for all supported types */
2913                 topsel = DatumGetFloat8(DirectFunctionCall4(scalarltsel,
2914                                                                                                         PointerGetDatum(root),
2915                                                                                                 ObjectIdGetDatum(cmpopr),
2916                                                                                                 PointerGetDatum(cmpargs),
2917                                                                                                         Int32GetDatum(0)));
2918
2919                 /*
2920                  * Merge the two selectivities in the same way as for a range
2921                  * query (see clauselist_selectivity()).
2922                  */
2923                 prefixsel = topsel + prefixsel - 1.0;
2924
2925                 /*
2926                  * A zero or slightly negative prefixsel should be converted into
2927                  * a small positive value; we probably are dealing with a very
2928                  * tight range and got a bogus result due to roundoff errors.
2929                  * However, if prefixsel is very negative, then we probably have
2930                  * default selectivity estimates on one or both sides of the
2931                  * range.  In that case, insert a not-so-wildly-optimistic default
2932                  * estimate.
2933                  */
2934                 if (prefixsel <= 0.0)
2935                 {
2936                         if (prefixsel < -0.01)
2937                         {
2938                                 /*
2939                                  * No data available --- use a default estimate that is
2940                                  * small, but not real small.
2941                                  */
2942                                 prefixsel = 0.005;
2943                         }
2944                         else
2945                         {
2946                                 /*
2947                                  * It's just roundoff error; use a small positive value
2948                                  */
2949                                 prefixsel = 1.0e-10;
2950                         }
2951                 }
2952         }
2953
2954         return prefixsel;
2955 }
2956
2957
2958 /*
2959  * Estimate the selectivity of a pattern of the specified type.
2960  * Note that any fixed prefix of the pattern will have been removed already.
2961  *
2962  * For now, we use a very simplistic approach: fixed characters reduce the
2963  * selectivity a good deal, character ranges reduce it a little,
2964  * wildcards (such as % for LIKE or .* for regex) increase it.
2965  */
2966
2967 #define FIXED_CHAR_SEL  0.04    /* about 1/25 */
2968 #define CHAR_RANGE_SEL  0.25
2969 #define ANY_CHAR_SEL    0.9             /* not 1, since it won't match
2970                                                                  * end-of-string */
2971 #define FULL_WILDCARD_SEL 5.0
2972 #define PARTIAL_WILDCARD_SEL 2.0
2973
2974 static Selectivity
2975 like_selectivity(char *patt, bool case_insensitive)
2976 {
2977         Selectivity sel = 1.0;
2978         int                     pos;
2979
2980         /* Skip any leading %; it's already factored into initial sel */
2981         pos = (*patt == '%') ? 1 : 0;
2982         for (; patt[pos]; pos++)
2983         {
2984                 /* % and _ are wildcard characters in LIKE */
2985                 if (patt[pos] == '%')
2986                         sel *= FULL_WILDCARD_SEL;
2987                 else if (patt[pos] == '_')
2988                         sel *= ANY_CHAR_SEL;
2989                 else if (patt[pos] == '\\')
2990                 {
2991                         /* Backslash quotes the next character */
2992                         pos++;
2993                         if (patt[pos] == '\0')
2994                                 break;
2995                         sel *= FIXED_CHAR_SEL;
2996                 }
2997                 else
2998                         sel *= FIXED_CHAR_SEL;
2999         }
3000         /* Could get sel > 1 if multiple wildcards */
3001         if (sel > 1.0)
3002                 sel = 1.0;
3003         return sel;
3004 }
3005
3006 static Selectivity
3007 regex_selectivity_sub(char *patt, int pattlen, bool case_insensitive)
3008 {
3009         Selectivity sel = 1.0;
3010         int                     paren_depth = 0;
3011         int                     paren_pos = 0;  /* dummy init to keep compiler quiet */
3012         int                     pos;
3013
3014         for (pos = 0; pos < pattlen; pos++)
3015         {
3016                 if (patt[pos] == '(')
3017                 {
3018                         if (paren_depth == 0)
3019                                 paren_pos = pos;        /* remember start of parenthesized item */
3020                         paren_depth++;
3021                 }
3022                 else if (patt[pos] == ')' && paren_depth > 0)
3023                 {
3024                         paren_depth--;
3025                         if (paren_depth == 0)
3026                                 sel *= regex_selectivity_sub(patt + (paren_pos + 1),
3027                                                                                          pos - (paren_pos + 1),
3028                                                                                          case_insensitive);
3029                 }
3030                 else if (patt[pos] == '|' && paren_depth == 0)
3031                 {
3032                         /*
3033                          * If unquoted | is present at paren level 0 in pattern, we
3034                          * have multiple alternatives; sum their probabilities.
3035                          */
3036                         sel += regex_selectivity_sub(patt + (pos + 1),
3037                                                                                  pattlen - (pos + 1),
3038                                                                                  case_insensitive);
3039                         break;                          /* rest of pattern is now processed */
3040                 }
3041                 else if (patt[pos] == '[')
3042                 {
3043                         bool            negclass = false;
3044
3045                         if (patt[++pos] == '^')
3046                         {
3047                                 negclass = true;
3048                                 pos++;
3049                         }
3050                         if (patt[pos] == ']')           /* ']' at start of class is not
3051                                                                                  * special */
3052                                 pos++;
3053                         while (pos < pattlen && patt[pos] != ']')
3054                                 pos++;
3055                         if (paren_depth == 0)
3056                                 sel *= (negclass ? (1.0 - CHAR_RANGE_SEL) : CHAR_RANGE_SEL);
3057                 }
3058                 else if (patt[pos] == '.')
3059                 {
3060                         if (paren_depth == 0)
3061                                 sel *= ANY_CHAR_SEL;
3062                 }
3063                 else if (patt[pos] == '*' ||
3064                                  patt[pos] == '?' ||
3065                                  patt[pos] == '+')
3066                 {
3067                         /* Ought to be smarter about quantifiers... */
3068                         if (paren_depth == 0)
3069                                 sel *= PARTIAL_WILDCARD_SEL;
3070                 }
3071                 else if (patt[pos] == '{')
3072                 {
3073                         while (pos < pattlen && patt[pos] != '}')
3074                                 pos++;
3075                         if (paren_depth == 0)
3076                                 sel *= PARTIAL_WILDCARD_SEL;
3077                 }
3078                 else if (patt[pos] == '\\')
3079                 {
3080                         /* backslash quotes the next character */
3081                         pos++;
3082                         if (pos >= pattlen)
3083                                 break;
3084                         if (paren_depth == 0)
3085                                 sel *= FIXED_CHAR_SEL;
3086                 }
3087                 else
3088                 {
3089                         if (paren_depth == 0)
3090                                 sel *= FIXED_CHAR_SEL;
3091                 }
3092         }
3093         /* Could get sel > 1 if multiple wildcards */
3094         if (sel > 1.0)
3095                 sel = 1.0;
3096         return sel;
3097 }
3098
3099 static Selectivity
3100 regex_selectivity(char *patt, bool case_insensitive)
3101 {
3102         Selectivity sel;
3103         int                     pattlen = strlen(patt);
3104
3105         /* If patt doesn't end with $, consider it to have a trailing wildcard */
3106         if (pattlen > 0 && patt[pattlen - 1] == '$' &&
3107                 (pattlen == 1 || patt[pattlen - 2] != '\\'))
3108         {
3109                 /* has trailing $ */
3110                 sel = regex_selectivity_sub(patt, pattlen - 1, case_insensitive);
3111         }
3112         else
3113         {
3114                 /* no trailing $ */
3115                 sel = regex_selectivity_sub(patt, pattlen, case_insensitive);
3116                 sel *= FULL_WILDCARD_SEL;
3117                 if (sel > 1.0)
3118                         sel = 1.0;
3119         }
3120         return sel;
3121 }
3122
3123 static Selectivity
3124 pattern_selectivity(char *patt, Pattern_Type ptype)
3125 {
3126         Selectivity result;
3127
3128         switch (ptype)
3129         {
3130                 case Pattern_Type_Like:
3131                         result = like_selectivity(patt, false);
3132                         break;
3133                 case Pattern_Type_Like_IC:
3134                         result = like_selectivity(patt, true);
3135                         break;
3136                 case Pattern_Type_Regex:
3137                         result = regex_selectivity(patt, false);
3138                         break;
3139                 case Pattern_Type_Regex_IC:
3140                         result = regex_selectivity(patt, true);
3141                         break;
3142                 default:
3143                         elog(ERROR, "pattern_selectivity: bogus ptype");
3144                         result = 1.0;           /* keep compiler quiet */
3145                         break;
3146         }
3147         return result;
3148 }
3149
3150 /*
3151  * Test whether the database's LOCALE setting is safe for LIKE/regexp index
3152  * optimization.  The key requirement here is that given a prefix string,
3153  * say "foo", we must be able to generate another string "fop" that is
3154  * greater than all strings "foobar" starting with "foo".  Unfortunately,
3155  * many non-C locales have bizarre collation rules in which "fop" > "foo"
3156  * is not sufficient to ensure "fop" > "foobar".  Until we can come up
3157  * with a more bulletproof way of generating the upper-bound string,
3158  * disable the optimization in locales where it is not known to be safe.
3159  */
3160 bool
3161 locale_is_like_safe(void)
3162 {
3163 #ifdef USE_LOCALE
3164         /* Cache result so we only have to compute it once */
3165         static int      result = -1;
3166         char       *localeptr;
3167
3168         if (result >= 0)
3169                 return (bool) result;
3170         localeptr = setlocale(LC_COLLATE, NULL);
3171         if (!localeptr)
3172                 elog(STOP, "Invalid LC_COLLATE setting");
3173
3174         /*
3175          * Currently we accept only "C" and "POSIX" (do any systems still
3176          * return "POSIX"?).  Which other locales allow safe optimization?
3177          */
3178         if (strcmp(localeptr, "C") == 0)
3179                 result = true;
3180         else if (strcmp(localeptr, "POSIX") == 0)
3181                 result = true;
3182         else
3183                 result = false;
3184         return (bool) result;
3185 #else                                                   /* not USE_LOCALE */
3186         return true;                            /* We must be in C locale, which is OK */
3187 #endif   /* USE_LOCALE */
3188 }
3189
3190 /*
3191  * Try to generate a string greater than the given string or any string it is
3192  * a prefix of.  If successful, return a palloc'd string; else return NULL.
3193  *
3194  * To work correctly in non-ASCII locales with weird collation orders,
3195  * we cannot simply increment "foo" to "fop" --- we have to check whether
3196  * we actually produced a string greater than the given one.  If not,
3197  * increment the righthand byte again and repeat.  If we max out the righthand
3198  * byte, truncate off the last character and start incrementing the next.
3199  * For example, if "z" were the last character in the sort order, then we
3200  * could produce "foo" as a string greater than "fonz".
3201  *
3202  * This could be rather slow in the worst case, but in most cases we won't
3203  * have to try more than one or two strings before succeeding.
3204  *
3205  * XXX this is actually not sufficient, since it only copes with the case
3206  * where individual characters collate in an order different from their
3207  * numeric code assignments.  It does not handle cases where there are
3208  * cross-character effects, such as specially sorted digraphs, multiple
3209  * sort passes, etc.  For now, we just shut down the whole thing in locales
3210  * that do such things :-(
3211  */
3212 char *
3213 make_greater_string(const char *str, Oid datatype)
3214 {
3215         char       *workstr;
3216         int                     len;
3217
3218         /*
3219          * Make a modifiable copy, which will be our return value if
3220          * successful
3221          */
3222         workstr = pstrdup((char *) str);
3223
3224         while ((len = strlen(workstr)) > 0)
3225         {
3226                 unsigned char *lastchar = (unsigned char *) (workstr + len - 1);
3227
3228                 /*
3229                  * Try to generate a larger string by incrementing the last byte.
3230                  */
3231                 while (*lastchar < (unsigned char) 255)
3232                 {
3233                         (*lastchar)++;
3234                         if (string_lessthan(str, workstr, datatype))
3235                                 return workstr; /* Success! */
3236                 }
3237
3238                 /*
3239                  * Truncate off the last character, which might be more than 1
3240                  * byte in MULTIBYTE case.
3241                  */
3242 #ifdef MULTIBYTE
3243                 len = pg_mbcliplen((const unsigned char *) workstr, len, len - 1);
3244                 workstr[len] = '\0';
3245 #else
3246                 *lastchar = '\0';
3247 #endif
3248         }
3249
3250         /* Failed... */
3251         pfree(workstr);
3252         return NULL;
3253 }
3254
3255 /*
3256  * Test whether two strings are "<" according to the rules of the given
3257  * datatype.  We do this the hard way, ie, actually calling the type's
3258  * "<" operator function, to ensure we get the right result...
3259  */
3260 static bool
3261 string_lessthan(const char *str1, const char *str2, Oid datatype)
3262 {
3263         Datum           datum1 = string_to_datum(str1, datatype);
3264         Datum           datum2 = string_to_datum(str2, datatype);
3265         bool            result;
3266
3267         switch (datatype)
3268         {
3269                 case TEXTOID:
3270                         result = DatumGetBool(DirectFunctionCall2(text_lt,
3271                                                                                                           datum1, datum2));
3272                         break;
3273
3274                 case BPCHAROID:
3275                         result = DatumGetBool(DirectFunctionCall2(bpcharlt,
3276                                                                                                           datum1, datum2));
3277                         break;
3278
3279                 case VARCHAROID:
3280                         result = DatumGetBool(DirectFunctionCall2(varcharlt,
3281                                                                                                           datum1, datum2));
3282                         break;
3283
3284                 case NAMEOID:
3285                         result = DatumGetBool(DirectFunctionCall2(namelt,
3286                                                                                                           datum1, datum2));
3287                         break;
3288
3289                 case BYTEAOID:
3290                         result = DatumGetBool(DirectFunctionCall2(bytealt,
3291                                                                                                           datum1, datum2));
3292                         break;
3293
3294                 default:
3295                         elog(ERROR, "string_lessthan: unexpected datatype %u", datatype);
3296                         result = false;
3297                         break;
3298         }
3299
3300         pfree(DatumGetPointer(datum1));
3301         pfree(DatumGetPointer(datum2));
3302
3303         return result;
3304 }
3305
3306 /* See if there is a binary op of the given name for the given datatype */
3307 static Oid
3308 find_operator(const char *opname, Oid datatype)
3309 {
3310         return GetSysCacheOid(OPERNAME,
3311                                                   PointerGetDatum(opname),
3312                                                   ObjectIdGetDatum(datatype),
3313                                                   ObjectIdGetDatum(datatype),
3314                                                   CharGetDatum('b'));
3315 }
3316
3317 /*
3318  * Generate a Datum of the appropriate type from a C string.
3319  * Note that all of the supported types are pass-by-ref, so the
3320  * returned value should be pfree'd if no longer needed.
3321  */
3322 static Datum
3323 string_to_datum(const char *str, Oid datatype)
3324 {
3325         /*
3326          * We cheat a little by assuming that textin() will do for bpchar and
3327          * varchar constants too...
3328          */
3329         if (datatype == NAMEOID)
3330                 return DirectFunctionCall1(namein, CStringGetDatum(str));
3331         else
3332                 return DirectFunctionCall1(textin, CStringGetDatum(str));
3333 }
3334
3335 /*
3336  * Generate a Const node of the appropriate type from a C string.
3337  */
3338 static Const *
3339 string_to_const(const char *str, Oid datatype)
3340 {
3341         Datum           conval = string_to_datum(str, datatype);
3342
3343         return makeConst(datatype, ((datatype == NAMEOID) ? NAMEDATALEN : -1),
3344                                          conval, false, false, false, false);
3345 }
3346
3347 /*-------------------------------------------------------------------------
3348  *
3349  * Index cost estimation functions
3350  *
3351  * genericcostestimate is a general-purpose estimator for use when we
3352  * don't have any better idea about how to estimate.  Index-type-specific
3353  * knowledge can be incorporated in the type-specific routines.
3354  *
3355  *-------------------------------------------------------------------------
3356  */
3357
3358 static void
3359 genericcostestimate(Query *root, RelOptInfo *rel,
3360                                         IndexOptInfo *index, List *indexQuals,
3361                                         Cost *indexStartupCost,
3362                                         Cost *indexTotalCost,
3363                                         Selectivity *indexSelectivity,
3364                                         double *indexCorrelation)
3365 {
3366         double          numIndexTuples;
3367         double          numIndexPages;
3368         List       *selectivityQuals = indexQuals;
3369
3370         /*
3371          * If the index is partial, AND the index predicate with the
3372          * explicitly given indexquals to produce a more accurate idea of the
3373          * index restriction.  This may produce redundant clauses, which we
3374          * hope that cnfify and clauselist_selectivity will deal with
3375          * intelligently.
3376          *
3377          * Note that index->indpred and indexQuals are both in implicit-AND form
3378          * to start with, which we have to make explicit to hand to
3379          * canonicalize_qual, and then we get back implicit-AND form again.
3380          */
3381         if (index->indpred != NIL)
3382         {
3383                 Expr       *andedQuals;
3384
3385                 andedQuals = make_ands_explicit(nconc(listCopy(index->indpred),
3386                                                                                           indexQuals));
3387                 selectivityQuals = canonicalize_qual(andedQuals, true);
3388         }
3389
3390         /* Estimate the fraction of main-table tuples that will be visited */
3391         *indexSelectivity = clauselist_selectivity(root, selectivityQuals,
3392                                                                                            lfirsti(rel->relids));
3393
3394         /*
3395          * Estimate the number of tuples that will be visited.  We do it in
3396          * this rather peculiar-looking way in order to get the right answer
3397          * for partial indexes.  We can bound the number of tuples by the
3398          * index size, in any case.
3399          */
3400         numIndexTuples = *indexSelectivity * rel->tuples;
3401
3402         if (numIndexTuples > index->tuples)
3403                 numIndexTuples = index->tuples;
3404
3405         /*
3406          * Always estimate at least one tuple is touched, even when
3407          * indexSelectivity estimate is tiny.
3408          */
3409         if (numIndexTuples < 1.0)
3410                 numIndexTuples = 1.0;
3411
3412         /*
3413          * Estimate the number of index pages that will be retrieved.
3414          *
3415          * For all currently-supported index types, the first page of the index
3416          * is a metadata page, and we should figure on fetching that plus a
3417          * pro-rated fraction of the remaining pages.
3418          */
3419         if (index->pages > 1 && index->tuples > 0)
3420         {
3421                 numIndexPages = (numIndexTuples / index->tuples) * (index->pages - 1);
3422                 numIndexPages += 1;             /* count the metapage too */
3423                 numIndexPages = ceil(numIndexPages);
3424         }
3425         else
3426                 numIndexPages = 1.0;
3427
3428         /*
3429          * Compute the index access cost.
3430          *
3431          * Our generic assumption is that the index pages will be read
3432          * sequentially, so they have cost 1.0 each, not random_page_cost.
3433          * Also, we charge for evaluation of the indexquals at each index
3434          * tuple. All the costs are assumed to be paid incrementally during
3435          * the scan.
3436          */
3437         *indexStartupCost = 0;
3438         *indexTotalCost = numIndexPages +
3439                 (cpu_index_tuple_cost + cost_qual_eval(indexQuals)) * numIndexTuples;
3440
3441         /*
3442          * Generic assumption about index correlation: there isn't any.
3443          */
3444         *indexCorrelation = 0.0;
3445 }
3446
3447
3448 Datum
3449 btcostestimate(PG_FUNCTION_ARGS)
3450 {
3451         Query      *root = (Query *) PG_GETARG_POINTER(0);
3452         RelOptInfo *rel = (RelOptInfo *) PG_GETARG_POINTER(1);
3453         IndexOptInfo *index = (IndexOptInfo *) PG_GETARG_POINTER(2);
3454         List       *indexQuals = (List *) PG_GETARG_POINTER(3);
3455         Cost       *indexStartupCost = (Cost *) PG_GETARG_POINTER(4);
3456         Cost       *indexTotalCost = (Cost *) PG_GETARG_POINTER(5);
3457         Selectivity *indexSelectivity = (Selectivity *) PG_GETARG_POINTER(6);
3458         double     *indexCorrelation = (double *) PG_GETARG_POINTER(7);
3459
3460         genericcostestimate(root, rel, index, indexQuals,
3461                                                 indexStartupCost, indexTotalCost,
3462                                                 indexSelectivity, indexCorrelation);
3463
3464         /*
3465          * If it's a functional index, leave the default zero-correlation
3466          * estimate in place.  If not, and if we can get an estimate for the
3467          * first variable's ordering correlation C from pg_statistic, estimate
3468          * the index correlation as C / number-of-columns. (The idea here is
3469          * that multiple columns dilute the importance of the first column's
3470          * ordering, but don't negate it entirely.)
3471          */
3472         if (index->indproc == InvalidOid)
3473         {
3474                 Oid                     relid;
3475                 HeapTuple       tuple;
3476
3477                 relid = getrelid(lfirsti(rel->relids), root->rtable);
3478                 Assert(relid != InvalidOid);
3479                 tuple = SearchSysCache(STATRELATT,
3480                                                            ObjectIdGetDatum(relid),
3481                                                            Int16GetDatum(index->indexkeys[0]),
3482                                                            0, 0);
3483                 if (HeapTupleIsValid(tuple))
3484                 {
3485                         Oid                     typid;
3486                         int32           typmod;
3487                         float4     *numbers;
3488                         int                     nnumbers;
3489
3490                         get_atttypetypmod(relid, index->indexkeys[0],
3491                                                           &typid, &typmod);
3492                         if (get_attstatsslot(tuple, typid, typmod,
3493                                                                  STATISTIC_KIND_CORRELATION,
3494                                                                  index->ordering[0],
3495                                                                  NULL, NULL, &numbers, &nnumbers))
3496                         {
3497                                 double          varCorrelation;
3498                                 int                     nKeys;
3499
3500                                 Assert(nnumbers == 1);
3501                                 varCorrelation = numbers[0];
3502                                 for (nKeys = 1; index->indexkeys[nKeys] != 0; nKeys++)
3503                                          /* skip */ ;
3504
3505                                 *indexCorrelation = varCorrelation / nKeys;
3506
3507                                 free_attstatsslot(typid, NULL, 0, numbers, nnumbers);
3508                         }
3509                         ReleaseSysCache(tuple);
3510                 }
3511         }
3512
3513         PG_RETURN_VOID();
3514 }
3515
3516 Datum
3517 rtcostestimate(PG_FUNCTION_ARGS)
3518 {
3519         Query      *root = (Query *) PG_GETARG_POINTER(0);
3520         RelOptInfo *rel = (RelOptInfo *) PG_GETARG_POINTER(1);
3521         IndexOptInfo *index = (IndexOptInfo *) PG_GETARG_POINTER(2);
3522         List       *indexQuals = (List *) PG_GETARG_POINTER(3);
3523         Cost       *indexStartupCost = (Cost *) PG_GETARG_POINTER(4);
3524         Cost       *indexTotalCost = (Cost *) PG_GETARG_POINTER(5);
3525         Selectivity *indexSelectivity = (Selectivity *) PG_GETARG_POINTER(6);
3526         double     *indexCorrelation = (double *) PG_GETARG_POINTER(7);
3527
3528         genericcostestimate(root, rel, index, indexQuals,
3529                                                 indexStartupCost, indexTotalCost,
3530                                                 indexSelectivity, indexCorrelation);
3531
3532         PG_RETURN_VOID();
3533 }
3534
3535 Datum
3536 hashcostestimate(PG_FUNCTION_ARGS)
3537 {
3538         Query      *root = (Query *) PG_GETARG_POINTER(0);
3539         RelOptInfo *rel = (RelOptInfo *) PG_GETARG_POINTER(1);
3540         IndexOptInfo *index = (IndexOptInfo *) PG_GETARG_POINTER(2);
3541         List       *indexQuals = (List *) PG_GETARG_POINTER(3);
3542         Cost       *indexStartupCost = (Cost *) PG_GETARG_POINTER(4);
3543         Cost       *indexTotalCost = (Cost *) PG_GETARG_POINTER(5);
3544         Selectivity *indexSelectivity = (Selectivity *) PG_GETARG_POINTER(6);
3545         double     *indexCorrelation = (double *) PG_GETARG_POINTER(7);
3546
3547         genericcostestimate(root, rel, index, indexQuals,
3548                                                 indexStartupCost, indexTotalCost,
3549                                                 indexSelectivity, indexCorrelation);
3550
3551         PG_RETURN_VOID();
3552 }
3553
3554 Datum
3555 gistcostestimate(PG_FUNCTION_ARGS)
3556 {
3557         Query      *root = (Query *) PG_GETARG_POINTER(0);
3558         RelOptInfo *rel = (RelOptInfo *) PG_GETARG_POINTER(1);
3559         IndexOptInfo *index = (IndexOptInfo *) PG_GETARG_POINTER(2);
3560         List       *indexQuals = (List *) PG_GETARG_POINTER(3);
3561         Cost       *indexStartupCost = (Cost *) PG_GETARG_POINTER(4);
3562         Cost       *indexTotalCost = (Cost *) PG_GETARG_POINTER(5);
3563         Selectivity *indexSelectivity = (Selectivity *) PG_GETARG_POINTER(6);
3564         double     *indexCorrelation = (double *) PG_GETARG_POINTER(7);
3565
3566         genericcostestimate(root, rel, index, indexQuals,
3567                                                 indexStartupCost, indexTotalCost,
3568                                                 indexSelectivity, indexCorrelation);
3569
3570         PG_RETURN_VOID();
3571 }