granicus.if.org Git - postgresql/blob - src/backend/utils/adt/selfuncs.c

   1 /*-------------------------------------------------------------------------
   2  *
   3  * selfuncs.c
   4  *        Selectivity functions and index cost estimation functions for
   5  *        standard operators and index access methods.
   6  *
   7  *        Selectivity routines are registered in the pg_operator catalog
   8  *        in the "oprrest" and "oprjoin" attributes.
   9  *
  10  *        Index cost functions are located via the index AM's API struct,
  11  *        which is obtained from the handler function registered in pg_am.
  12  *
  13  * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
  14  * Portions Copyright (c) 1994, Regents of the University of California
  15  *
  16  *
  17  * IDENTIFICATION
  18  *        src/backend/utils/adt/selfuncs.c
  19  *
  20  *-------------------------------------------------------------------------
  21  */
  22
  23 /*----------
  24  * Operator selectivity estimation functions are called to estimate the
  25  * selectivity of WHERE clauses whose top-level operator is their operator.
  26  * We divide the problem into two cases:
  27  *              Restriction clause estimation: the clause involves vars of just
  28  *                      one relation.
  29  *              Join clause estimation: the clause involves vars of multiple rels.
  30  * Join selectivity estimation is far more difficult and usually less accurate
  31  * than restriction estimation.
  32  *
  33  * When dealing with the inner scan of a nestloop join, we consider the
  34  * join's joinclauses as restriction clauses for the inner relation, and
  35  * treat vars of the outer relation as parameters (a/k/a constants of unknown
  36  * values).  So, restriction estimators need to be able to accept an argument
  37  * telling which relation is to be treated as the variable.
  38  *
  39  * The call convention for a restriction estimator (oprrest function) is
  40  *
  41  *              Selectivity oprrest (PlannerInfo *root,
  42  *                                                       Oid operator,
  43  *                                                       List *args,
  44  *                                                       int varRelid);
  45  *
  46  * root: general information about the query (rtable and RelOptInfo lists
  47  * are particularly important for the estimator).
  48  * operator: OID of the specific operator in question.
  49  * args: argument list from the operator clause.
  50  * varRelid: if not zero, the relid (rtable index) of the relation to
  51  * be treated as the variable relation.  May be zero if the args list
  52  * is known to contain vars of only one relation.
  53  *
  54  * This is represented at the SQL level (in pg_proc) as
  55  *
  56  *              float8 oprrest (internal, oid, internal, int4);
  57  *
  58  * The result is a selectivity, that is, a fraction (0 to 1) of the rows
  59  * of the relation that are expected to produce a TRUE result for the
  60  * given operator.
  61  *
  62  * The call convention for a join estimator (oprjoin function) is similar
  63  * except that varRelid is not needed, and instead join information is
  64  * supplied:
  65  *
  66  *              Selectivity oprjoin (PlannerInfo *root,
  67  *                                                       Oid operator,
  68  *                                                       List *args,
  69  *                                                       JoinType jointype,
  70  *                                                       SpecialJoinInfo *sjinfo);
  71  *
  72  *              float8 oprjoin (internal, oid, internal, int2, internal);
  73  *
  74  * (Before Postgres 8.4, join estimators had only the first four of these
  75  * parameters.  That signature is still allowed, but deprecated.)  The
  76  * relationship between jointype and sjinfo is explained in the comments for
  77  * clause_selectivity() --- the short version is that jointype is usually
  78  * best ignored in favor of examining sjinfo.
  79  *
  80  * Join selectivity for regular inner and outer joins is defined as the
  81  * fraction (0 to 1) of the cross product of the relations that is expected
  82  * to produce a TRUE result for the given operator.  For both semi and anti
  83  * joins, however, the selectivity is defined as the fraction of the left-hand
  84  * side relation's rows that are expected to have a match (ie, at least one
  85  * row with a TRUE result) in the right-hand side.
  86  *
  87  * For both oprrest and oprjoin functions, the operator's input collation OID
  88  * (if any) is passed using the standard fmgr mechanism, so that the estimator
  89  * function can fetch it with PG_GET_COLLATION().  Note, however, that all
  90  * statistics in pg_statistic are currently built using the database's default
  91  * collation.  Thus, in most cases where we are looking at statistics, we
  92  * should ignore the actual operator collation and use DEFAULT_COLLATION_OID.
  93  * We expect that the error induced by doing this is usually not large enough
  94  * to justify complicating matters.
  95  *----------
  96  */
  97
  98 #include "postgres.h"
  99
 100 #include <ctype.h>
 101 #include <float.h>
 102 #include <math.h>
 103
 104 #include "access/brin.h"
 105 #include "access/gin.h"
 106 #include "access/htup_details.h"
 107 #include "access/sysattr.h"
 108 #include "catalog/index.h"
 109 #include "catalog/pg_am.h"
 110 #include "catalog/pg_collation.h"
 111 #include "catalog/pg_operator.h"
 112 #include "catalog/pg_opfamily.h"
 113 #include "catalog/pg_statistic.h"
 114 #include "catalog/pg_statistic_ext.h"
 115 #include "catalog/pg_type.h"
 116 #include "executor/executor.h"
 117 #include "mb/pg_wchar.h"
 118 #include "miscadmin.h"
 119 #include "nodes/makefuncs.h"
 120 #include "nodes/nodeFuncs.h"
 121 #include "optimizer/clauses.h"
 122 #include "optimizer/cost.h"
 123 #include "optimizer/pathnode.h"
 124 #include "optimizer/paths.h"
 125 #include "optimizer/plancat.h"
 126 #include "optimizer/predtest.h"
 127 #include "optimizer/restrictinfo.h"
 128 #include "optimizer/var.h"
 129 #include "parser/parse_clause.h"
 130 #include "parser/parse_coerce.h"
 131 #include "parser/parsetree.h"
 132 #include "statistics/statistics.h"
 133 #include "utils/acl.h"
 134 #include "utils/builtins.h"
 135 #include "utils/bytea.h"
 136 #include "utils/date.h"
 137 #include "utils/datum.h"
 138 #include "utils/fmgroids.h"
 139 #include "utils/index_selfuncs.h"
 140 #include "utils/lsyscache.h"
 141 #include "utils/nabstime.h"
 142 #include "utils/pg_locale.h"
 143 #include "utils/rel.h"
 144 #include "utils/selfuncs.h"
 145 #include "utils/spccache.h"
 146 #include "utils/syscache.h"
 147 #include "utils/timestamp.h"
 148 #include "utils/tqual.h"
 149 #include "utils/typcache.h"
 150 #include "utils/varlena.h"
 151
 152
 153 /* Hooks for plugins to get control when we ask for stats */
 154 get_relation_stats_hook_type get_relation_stats_hook = NULL;
 155 get_index_stats_hook_type get_index_stats_hook = NULL;
 156
 157 static double var_eq_const(VariableStatData *vardata, Oid operator,
 158                          Datum constval, bool constisnull,
 159                          bool varonleft);
 160 static double var_eq_non_const(VariableStatData *vardata, Oid operator,
 161                                  Node *other,
 162                                  bool varonleft);
 163 static double ineq_histogram_selectivity(PlannerInfo *root,
 164                                                    VariableStatData *vardata,
 165                                                    FmgrInfo *opproc, bool isgt,
 166                                                    Datum constval, Oid consttype);
 167 static double eqjoinsel_inner(Oid operator,
 168                                 VariableStatData *vardata1, VariableStatData *vardata2);
 169 static double eqjoinsel_semi(Oid operator,
 170                            VariableStatData *vardata1, VariableStatData *vardata2,
 171                            RelOptInfo *inner_rel);
 172 static bool estimate_multivariate_ndistinct(PlannerInfo *root,
 173                                                 RelOptInfo *rel, List **varinfos, double *ndistinct);
 174 static bool convert_to_scalar(Datum value, Oid valuetypid, double *scaledvalue,
 175                                   Datum lobound, Datum hibound, Oid boundstypid,
 176                                   double *scaledlobound, double *scaledhibound);
 177 static double convert_numeric_to_scalar(Datum value, Oid typid);
 178 static void convert_string_to_scalar(char *value,
 179                                                  double *scaledvalue,
 180                                                  char *lobound,
 181                                                  double *scaledlobound,
 182                                                  char *hibound,
 183                                                  double *scaledhibound);
 184 static void convert_bytea_to_scalar(Datum value,
 185                                                 double *scaledvalue,
 186                                                 Datum lobound,
 187                                                 double *scaledlobound,
 188                                                 Datum hibound,
 189                                                 double *scaledhibound);
 190 static double convert_one_string_to_scalar(char *value,
 191                                                          int rangelo, int rangehi);
 192 static double convert_one_bytea_to_scalar(unsigned char *value, int valuelen,
 193                                                         int rangelo, int rangehi);
 194 static char *convert_string_datum(Datum value, Oid typid);
 195 static double convert_timevalue_to_scalar(Datum value, Oid typid);
 196 static void examine_simple_variable(PlannerInfo *root, Var *var,
 197                                                 VariableStatData *vardata);
 198 static bool get_variable_range(PlannerInfo *root, VariableStatData *vardata,
 199                                    Oid sortop, Datum *min, Datum *max);
 200 static bool get_actual_variable_range(PlannerInfo *root,
 201                                                   VariableStatData *vardata,
 202                                                   Oid sortop,
 203                                                   Datum *min, Datum *max);
 204 static RelOptInfo *find_join_input_rel(PlannerInfo *root, Relids relids);
 205 static Selectivity prefix_selectivity(PlannerInfo *root,
 206                                    VariableStatData *vardata,
 207                                    Oid vartype, Oid opfamily, Const *prefixcon);
 208 static Selectivity like_selectivity(const char *patt, int pattlen,
 209                                  bool case_insensitive);
 210 static Selectivity regex_selectivity(const char *patt, int pattlen,
 211                                   bool case_insensitive,
 212                                   int fixed_prefix_len);
 213 static Datum string_to_datum(const char *str, Oid datatype);
 214 static Const *string_to_const(const char *str, Oid datatype);
 215 static Const *string_to_bytea_const(const char *str, size_t str_len);
 216 static List *add_predicate_to_quals(IndexOptInfo *index, List *indexQuals);
 217
 218
 219 /*
 220  *              eqsel                   - Selectivity of "=" for any data types.
 221  *
 222  * Note: this routine is also used to estimate selectivity for some
 223  * operators that are not "=" but have comparable selectivity behavior,
 224  * such as "~=" (geometric approximate-match).  Even for "=", we must
 225  * keep in mind that the left and right datatypes may differ.
 226  */
 227 Datum
 228 eqsel(PG_FUNCTION_ARGS)
 229 {
 230         PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
 231         Oid                     operator = PG_GETARG_OID(1);
 232         List       *args = (List *) PG_GETARG_POINTER(2);
 233         int                     varRelid = PG_GETARG_INT32(3);
 234         VariableStatData vardata;
 235         Node       *other;
 236         bool            varonleft;
 237         double          selec;
 238
 239         /*
 240          * If expression is not variable = something or something = variable, then
 241          * punt and return a default estimate.
 242          */
 243         if (!get_restriction_variable(root, args, varRelid,
 244                                                                   &vardata, &other, &varonleft))
 245                 PG_RETURN_FLOAT8(DEFAULT_EQ_SEL);
 246
 247         /*
 248          * We can do a lot better if the something is a constant.  (Note: the
 249          * Const might result from estimation rather than being a simple constant
 250          * in the query.)
 251          */
 252         if (IsA(other, Const))
 253                 selec = var_eq_const(&vardata, operator,
 254                                                          ((Const *) other)->constvalue,
 255                                                          ((Const *) other)->constisnull,
 256                                                          varonleft);
 257         else
 258                 selec = var_eq_non_const(&vardata, operator, other,
 259                                                                  varonleft);
 260
 261         ReleaseVariableStats(vardata);
 262
 263         PG_RETURN_FLOAT8((float8) selec);
 264 }
 265
 266 /*
 267  * var_eq_const --- eqsel for var = const case
 268  *
 269  * This is split out so that some other estimation functions can use it.
 270  */
 271 static double
 272 var_eq_const(VariableStatData *vardata, Oid operator,
 273                          Datum constval, bool constisnull,
 274                          bool varonleft)
 275 {
 276         double          selec;
 277         bool            isdefault;
 278         Oid                     opfuncoid;
 279
 280         /*
 281          * If the constant is NULL, assume operator is strict and return zero, ie,
 282          * operator will never return TRUE.
 283          */
 284         if (constisnull)
 285                 return 0.0;
 286
 287         /*
 288          * If we matched the var to a unique index or DISTINCT clause, assume
 289          * there is exactly one match regardless of anything else.  (This is
 290          * slightly bogus, since the index or clause's equality operator might be
 291          * different from ours, but it's much more likely to be right than
 292          * ignoring the information.)
 293          */
 294         if (vardata->isunique && vardata->rel && vardata->rel->tuples >= 1.0)
 295                 return 1.0 / vardata->rel->tuples;
 296
 297         if (HeapTupleIsValid(vardata->statsTuple) &&
 298                 statistic_proc_security_check(vardata,
 299                                                                           (opfuncoid = get_opcode(operator))))
 300         {
 301                 Form_pg_statistic stats;
 302                 AttStatsSlot sslot;
 303                 bool            match = false;
 304                 int                     i;
 305
 306                 stats = (Form_pg_statistic) GETSTRUCT(vardata->statsTuple);
 307
 308                 /*
 309                  * Is the constant "=" to any of the column's most common values?
 310                  * (Although the given operator may not really be "=", we will assume
 311                  * that seeing whether it returns TRUE is an appropriate test.  If you
 312                  * don't like this, maybe you shouldn't be using eqsel for your
 313                  * operator...)
 314                  */
 315                 if (get_attstatsslot(&sslot, vardata->statsTuple,
 316                                                          STATISTIC_KIND_MCV, InvalidOid,
 317                                                          ATTSTATSSLOT_VALUES | ATTSTATSSLOT_NUMBERS))
 318                 {
 319                         FmgrInfo        eqproc;
 320
 321                         fmgr_info(opfuncoid, &eqproc);
 322
 323                         for (i = 0; i < sslot.nvalues; i++)
 324                         {
 325                                 /* be careful to apply operator right way 'round */
 326                                 if (varonleft)
 327                                         match = DatumGetBool(FunctionCall2Coll(&eqproc,
 328                                                                                                            DEFAULT_COLLATION_OID,
 329                                                                                                                    sslot.values[i],
 330                                                                                                                    constval));
 331                                 else
 332                                         match = DatumGetBool(FunctionCall2Coll(&eqproc,
 333                                                                                                            DEFAULT_COLLATION_OID,
 334                                                                                                                    constval,
 335                                                                                                                    sslot.values[i]));
 336                                 if (match)
 337                                         break;
 338                         }
 339                 }
 340                 else
 341                 {
 342                         /* no most-common-value info available */
 343                         i = 0;                          /* keep compiler quiet */
 344                 }
 345
 346                 if (match)
 347                 {
 348                         /*
 349                          * Constant is "=" to this common value.  We know selectivity
 350                          * exactly (or as exactly as ANALYZE could calculate it, anyway).
 351                          */
 352                         selec = sslot.numbers[i];
 353                 }
 354                 else
 355                 {
 356                         /*
 357                          * Comparison is against a constant that is neither NULL nor any
 358                          * of the common values.  Its selectivity cannot be more than
 359                          * this:
 360                          */
 361                         double          sumcommon = 0.0;
 362                         double          otherdistinct;
 363
 364                         for (i = 0; i < sslot.nnumbers; i++)
 365                                 sumcommon += sslot.numbers[i];
 366                         selec = 1.0 - sumcommon - stats->stanullfrac;
 367                         CLAMP_PROBABILITY(selec);
 368
 369                         /*
 370                          * and in fact it's probably a good deal less. We approximate that
 371                          * all the not-common values share this remaining fraction
 372                          * equally, so we divide by the number of other distinct values.
 373                          */
 374                         otherdistinct = get_variable_numdistinct(vardata, &isdefault) -
 375                                 sslot.nnumbers;
 376                         if (otherdistinct > 1)
 377                                 selec /= otherdistinct;
 378
 379                         /*
 380                          * Another cross-check: selectivity shouldn't be estimated as more
 381                          * than the least common "most common value".
 382                          */
 383                         if (sslot.nnumbers > 0 && selec > sslot.numbers[sslot.nnumbers - 1])
 384                                 selec = sslot.numbers[sslot.nnumbers - 1];
 385                 }
 386
 387                 free_attstatsslot(&sslot);
 388         }
 389         else
 390         {
 391                 /*
 392                  * No ANALYZE stats available, so make a guess using estimated number
 393                  * of distinct values and assuming they are equally common. (The guess
 394                  * is unlikely to be very good, but we do know a few special cases.)
 395                  */
 396                 selec = 1.0 / get_variable_numdistinct(vardata, &isdefault);
 397         }
 398
 399         /* result should be in range, but make sure... */
 400         CLAMP_PROBABILITY(selec);
 401
 402         return selec;
 403 }
 404
 405 /*
 406  * var_eq_non_const --- eqsel for var = something-other-than-const case
 407  */
 408 static double
 409 var_eq_non_const(VariableStatData *vardata, Oid operator,
 410                                  Node *other,
 411                                  bool varonleft)
 412 {
 413         double          selec;
 414         bool            isdefault;
 415
 416         /*
 417          * If we matched the var to a unique index or DISTINCT clause, assume
 418          * there is exactly one match regardless of anything else.  (This is
 419          * slightly bogus, since the index or clause's equality operator might be
 420          * different from ours, but it's much more likely to be right than
 421          * ignoring the information.)
 422          */
 423         if (vardata->isunique && vardata->rel && vardata->rel->tuples >= 1.0)
 424                 return 1.0 / vardata->rel->tuples;
 425
 426         if (HeapTupleIsValid(vardata->statsTuple))
 427         {
 428                 Form_pg_statistic stats;
 429                 double          ndistinct;
 430                 AttStatsSlot sslot;
 431
 432                 stats = (Form_pg_statistic) GETSTRUCT(vardata->statsTuple);
 433
 434                 /*
 435                  * Search is for a value that we do not know a priori, but we will
 436                  * assume it is not NULL.  Estimate the selectivity as non-null
 437                  * fraction divided by number of distinct values, so that we get a
 438                  * result averaged over all possible values whether common or
 439                  * uncommon.  (Essentially, we are assuming that the not-yet-known
 440                  * comparison value is equally likely to be any of the possible
 441                  * values, regardless of their frequency in the table.  Is that a good
 442                  * idea?)
 443                  */
 444                 selec = 1.0 - stats->stanullfrac;
 445                 ndistinct = get_variable_numdistinct(vardata, &isdefault);
 446                 if (ndistinct > 1)
 447                         selec /= ndistinct;
 448
 449                 /*
 450                  * Cross-check: selectivity should never be estimated as more than the
 451                  * most common value's.
 452                  */
 453                 if (get_attstatsslot(&sslot, vardata->statsTuple,
 454                                                          STATISTIC_KIND_MCV, InvalidOid,
 455                                                          ATTSTATSSLOT_NUMBERS))
 456                 {
 457                         if (sslot.nnumbers > 0 && selec > sslot.numbers[0])
 458                                 selec = sslot.numbers[0];
 459                         free_attstatsslot(&sslot);
 460                 }
 461         }
 462         else
 463         {
 464                 /*
 465                  * No ANALYZE stats available, so make a guess using estimated number
 466                  * of distinct values and assuming they are equally common. (The guess
 467                  * is unlikely to be very good, but we do know a few special cases.)
 468                  */
 469                 selec = 1.0 / get_variable_numdistinct(vardata, &isdefault);
 470         }
 471
 472         /* result should be in range, but make sure... */
 473         CLAMP_PROBABILITY(selec);
 474
 475         return selec;
 476 }
 477
 478 /*
 479  *              neqsel                  - Selectivity of "!=" for any data types.
 480  *
 481  * This routine is also used for some operators that are not "!="
 482  * but have comparable selectivity behavior.  See above comments
 483  * for eqsel().
 484  */
 485 Datum
 486 neqsel(PG_FUNCTION_ARGS)
 487 {
 488         PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
 489         Oid                     operator = PG_GETARG_OID(1);
 490         List       *args = (List *) PG_GETARG_POINTER(2);
 491         int                     varRelid = PG_GETARG_INT32(3);
 492         Oid                     eqop;
 493         float8          result;
 494
 495         /*
 496          * We want 1 - eqsel() where the equality operator is the one associated
 497          * with this != operator, that is, its negator.
 498          */
 499         eqop = get_negator(operator);
 500         if (eqop)
 501         {
 502                 result = DatumGetFloat8(DirectFunctionCall4(eqsel,
 503                                                                                                         PointerGetDatum(root),
 504                                                                                                         ObjectIdGetDatum(eqop),
 505                                                                                                         PointerGetDatum(args),
 506                                                                                                         Int32GetDatum(varRelid)));
 507         }
 508         else
 509         {
 510                 /* Use default selectivity (should we raise an error instead?) */
 511                 result = DEFAULT_EQ_SEL;
 512         }
 513         result = 1.0 - result;
 514         PG_RETURN_FLOAT8(result);
 515 }
 516
 517 /*
 518  *      scalarineqsel           - Selectivity of "<", "<=", ">", ">=" for scalars.
 519  *
 520  * This is the guts of both scalarltsel and scalargtsel.  The caller has
 521  * commuted the clause, if necessary, so that we can treat the variable as
 522  * being on the left.  The caller must also make sure that the other side
 523  * of the clause is a non-null Const, and dissect same into a value and
 524  * datatype.
 525  *
 526  * This routine works for any datatype (or pair of datatypes) known to
 527  * convert_to_scalar().  If it is applied to some other datatype,
 528  * it will return a default estimate.
 529  */
 530 static double
 531 scalarineqsel(PlannerInfo *root, Oid operator, bool isgt,
 532                           VariableStatData *vardata, Datum constval, Oid consttype)
 533 {
 534         Form_pg_statistic stats;
 535         FmgrInfo        opproc;
 536         double          mcv_selec,
 537                                 hist_selec,
 538                                 sumcommon;
 539         double          selec;
 540
 541         if (!HeapTupleIsValid(vardata->statsTuple))
 542         {
 543                 /* no stats available, so default result */
 544                 return DEFAULT_INEQ_SEL;
 545         }
 546         stats = (Form_pg_statistic) GETSTRUCT(vardata->statsTuple);
 547
 548         fmgr_info(get_opcode(operator), &opproc);
 549
 550         /*
 551          * If we have most-common-values info, add up the fractions of the MCV
 552          * entries that satisfy MCV OP CONST.  These fractions contribute directly
 553          * to the result selectivity.  Also add up the total fraction represented
 554          * by MCV entries.
 555          */
 556         mcv_selec = mcv_selectivity(vardata, &opproc, constval, true,
 557                                                                 &sumcommon);
 558
 559         /*
 560          * If there is a histogram, determine which bin the constant falls in, and
 561          * compute the resulting contribution to selectivity.
 562          */
 563         hist_selec = ineq_histogram_selectivity(root, vardata, &opproc, isgt,
 564                                                                                         constval, consttype);
 565
 566         /*
 567          * Now merge the results from the MCV and histogram calculations,
 568          * realizing that the histogram covers only the non-null values that are
 569          * not listed in MCV.
 570          */
 571         selec = 1.0 - stats->stanullfrac - sumcommon;
 572
 573         if (hist_selec >= 0.0)
 574                 selec *= hist_selec;
 575         else
 576         {
 577                 /*
 578                  * If no histogram but there are values not accounted for by MCV,
 579                  * arbitrarily assume half of them will match.
 580                  */
 581                 selec *= 0.5;
 582         }
 583
 584         selec += mcv_selec;
 585
 586         /* result should be in range, but make sure... */
 587         CLAMP_PROBABILITY(selec);
 588
 589         return selec;
 590 }
 591
 592 /*
 593  *      mcv_selectivity                 - Examine the MCV list for selectivity estimates
 594  *
 595  * Determine the fraction of the variable's MCV population that satisfies
 596  * the predicate (VAR OP CONST), or (CONST OP VAR) if !varonleft.  Also
 597  * compute the fraction of the total column population represented by the MCV
 598  * list.  This code will work for any boolean-returning predicate operator.
 599  *
 600  * The function result is the MCV selectivity, and the fraction of the
 601  * total population is returned into *sumcommonp.  Zeroes are returned
 602  * if there is no MCV list.
 603  */
 604 double
 605 mcv_selectivity(VariableStatData *vardata, FmgrInfo *opproc,
 606                                 Datum constval, bool varonleft,
 607                                 double *sumcommonp)
 608 {
 609         double          mcv_selec,
 610                                 sumcommon;
 611         AttStatsSlot sslot;
 612         int                     i;
 613
 614         mcv_selec = 0.0;
 615         sumcommon = 0.0;
 616
 617         if (HeapTupleIsValid(vardata->statsTuple) &&
 618                 statistic_proc_security_check(vardata, opproc->fn_oid) &&
 619                 get_attstatsslot(&sslot, vardata->statsTuple,
 620                                                  STATISTIC_KIND_MCV, InvalidOid,
 621                                                  ATTSTATSSLOT_VALUES | ATTSTATSSLOT_NUMBERS))
 622         {
 623                 for (i = 0; i < sslot.nvalues; i++)
 624                 {
 625                         if (varonleft ?
 626                                 DatumGetBool(FunctionCall2Coll(opproc,
 627                                                                                            DEFAULT_COLLATION_OID,
 628                                                                                            sslot.values[i],
 629                                                                                            constval)) :
 630                                 DatumGetBool(FunctionCall2Coll(opproc,
 631                                                                                            DEFAULT_COLLATION_OID,
 632                                                                                            constval,
 633                                                                                            sslot.values[i])))
 634                                 mcv_selec += sslot.numbers[i];
 635                         sumcommon += sslot.numbers[i];
 636                 }
 637                 free_attstatsslot(&sslot);
 638         }
 639
 640         *sumcommonp = sumcommon;
 641         return mcv_selec;
 642 }
 643
 644 /*
 645  *      histogram_selectivity   - Examine the histogram for selectivity estimates
 646  *
 647  * Determine the fraction of the variable's histogram entries that satisfy
 648  * the predicate (VAR OP CONST), or (CONST OP VAR) if !varonleft.
 649  *
 650  * This code will work for any boolean-returning predicate operator, whether
 651  * or not it has anything to do with the histogram sort operator.  We are
 652  * essentially using the histogram just as a representative sample.  However,
 653  * small histograms are unlikely to be all that representative, so the caller
 654  * should be prepared to fall back on some other estimation approach when the
 655  * histogram is missing or very small.  It may also be prudent to combine this
 656  * approach with another one when the histogram is small.
 657  *
 658  * If the actual histogram size is not at least min_hist_size, we won't bother
 659  * to do the calculation at all.  Also, if the n_skip parameter is > 0, we
 660  * ignore the first and last n_skip histogram elements, on the grounds that
 661  * they are outliers and hence not very representative.  Typical values for
 662  * these parameters are 10 and 1.
 663  *
 664  * The function result is the selectivity, or -1 if there is no histogram
 665  * or it's smaller than min_hist_size.
 666  *
 667  * The output parameter *hist_size receives the actual histogram size,
 668  * or zero if no histogram.  Callers may use this number to decide how
 669  * much faith to put in the function result.
 670  *
 671  * Note that the result disregards both the most-common-values (if any) and
 672  * null entries.  The caller is expected to combine this result with
 673  * statistics for those portions of the column population.  It may also be
 674  * prudent to clamp the result range, ie, disbelieve exact 0 or 1 outputs.
 675  */
 676 double
 677 histogram_selectivity(VariableStatData *vardata, FmgrInfo *opproc,
 678                                           Datum constval, bool varonleft,
 679                                           int min_hist_size, int n_skip,
 680                                           int *hist_size)
 681 {
 682         double          result;
 683         AttStatsSlot sslot;
 684
 685         /* check sanity of parameters */
 686         Assert(n_skip >= 0);
 687         Assert(min_hist_size > 2 * n_skip);
 688
 689         if (HeapTupleIsValid(vardata->statsTuple) &&
 690                 statistic_proc_security_check(vardata, opproc->fn_oid) &&
 691                 get_attstatsslot(&sslot, vardata->statsTuple,
 692                                                  STATISTIC_KIND_HISTOGRAM, InvalidOid,
 693                                                  ATTSTATSSLOT_VALUES))
 694         {
 695                 *hist_size = sslot.nvalues;
 696                 if (sslot.nvalues >= min_hist_size)
 697                 {
 698                         int                     nmatch = 0;
 699                         int                     i;
 700
 701                         for (i = n_skip; i < sslot.nvalues - n_skip; i++)
 702                         {
 703                                 if (varonleft ?
 704                                         DatumGetBool(FunctionCall2Coll(opproc,
 705                                                                                                    DEFAULT_COLLATION_OID,
 706                                                                                                    sslot.values[i],
 707                                                                                                    constval)) :
 708                                         DatumGetBool(FunctionCall2Coll(opproc,
 709                                                                                                    DEFAULT_COLLATION_OID,
 710                                                                                                    constval,
 711                                                                                                    sslot.values[i])))
 712                                         nmatch++;
 713                         }
 714                         result = ((double) nmatch) / ((double) (sslot.nvalues - 2 * n_skip));
 715                 }
 716                 else
 717                         result = -1;
 718                 free_attstatsslot(&sslot);
 719         }
 720         else
 721         {
 722                 *hist_size = 0;
 723                 result = -1;
 724         }
 725
 726         return result;
 727 }
 728
 729 /*
 730  *      ineq_histogram_selectivity      - Examine the histogram for scalarineqsel
 731  *
 732  * Determine the fraction of the variable's histogram population that
 733  * satisfies the inequality condition, ie, VAR < CONST or VAR > CONST.
 734  *
 735  * Returns -1 if there is no histogram (valid results will always be >= 0).
 736  *
 737  * Note that the result disregards both the most-common-values (if any) and
 738  * null entries.  The caller is expected to combine this result with
 739  * statistics for those portions of the column population.
 740  */
 741 static double
 742 ineq_histogram_selectivity(PlannerInfo *root,
 743                                                    VariableStatData *vardata,
 744                                                    FmgrInfo *opproc, bool isgt,
 745                                                    Datum constval, Oid consttype)
 746 {
 747         double          hist_selec;
 748         AttStatsSlot sslot;
 749
 750         hist_selec = -1.0;
 751
 752         /*
 753          * Someday, ANALYZE might store more than one histogram per rel/att,
 754          * corresponding to more than one possible sort ordering defined for the
 755          * column type.  However, to make that work we will need to figure out
 756          * which staop to search for --- it's not necessarily the one we have at
 757          * hand!  (For example, we might have a '<=' operator rather than the '<'
 758          * operator that will appear in staop.)  For now, assume that whatever
 759          * appears in pg_statistic is sorted the same way our operator sorts, or
 760          * the reverse way if isgt is TRUE.
 761          */
 762         if (HeapTupleIsValid(vardata->statsTuple) &&
 763                 statistic_proc_security_check(vardata, opproc->fn_oid) &&
 764                 get_attstatsslot(&sslot, vardata->statsTuple,
 765                                                  STATISTIC_KIND_HISTOGRAM, InvalidOid,
 766                                                  ATTSTATSSLOT_VALUES))
 767         {
 768                 if (sslot.nvalues > 1)
 769                 {
 770                         /*
 771                          * Use binary search to find proper location, ie, the first slot
 772                          * at which the comparison fails.  (If the given operator isn't
 773                          * actually sort-compatible with the histogram, you'll get garbage
 774                          * results ... but probably not any more garbage-y than you would
 775                          * from the old linear search.)
 776                          *
 777                          * If the binary search accesses the first or last histogram
 778                          * entry, we try to replace that endpoint with the true column min
 779                          * or max as found by get_actual_variable_range().  This
 780                          * ameliorates misestimates when the min or max is moving as a
 781                          * result of changes since the last ANALYZE.  Note that this could
 782                          * result in effectively including MCVs into the histogram that
 783                          * weren't there before, but we don't try to correct for that.
 784                          */
 785                         double          histfrac;
 786                         int                     lobound = 0;    /* first possible slot to search */
 787                         int                     hibound = sslot.nvalues;                /* last+1 slot to search */
 788                         bool            have_end = false;
 789
 790                         /*
 791                          * If there are only two histogram entries, we'll want up-to-date
 792                          * values for both.  (If there are more than two, we need at most
 793                          * one of them to be updated, so we deal with that within the
 794                          * loop.)
 795                          */
 796                         if (sslot.nvalues == 2)
 797                                 have_end = get_actual_variable_range(root,
 798                                                                                                          vardata,
 799                                                                                                          sslot.staop,
 800                                                                                                          &sslot.values[0],
 801                                                                                                          &sslot.values[1]);
 802
 803                         while (lobound < hibound)
 804                         {
 805                                 int                     probe = (lobound + hibound) / 2;
 806                                 bool            ltcmp;
 807
 808                                 /*
 809                                  * If we find ourselves about to compare to the first or last
 810                                  * histogram entry, first try to replace it with the actual
 811                                  * current min or max (unless we already did so above).
 812                                  */
 813                                 if (probe == 0 && sslot.nvalues > 2)
 814                                         have_end = get_actual_variable_range(root,
 815                                                                                                                  vardata,
 816                                                                                                                  sslot.staop,
 817                                                                                                                  &sslot.values[0],
 818                                                                                                                  NULL);
 819                                 else if (probe == sslot.nvalues - 1 && sslot.nvalues > 2)
 820                                         have_end = get_actual_variable_range(root,
 821                                                                                                                  vardata,
 822                                                                                                                  sslot.staop,
 823                                                                                                                  NULL,
 824                                                                                                            &sslot.values[probe]);
 825
 826                                 ltcmp = DatumGetBool(FunctionCall2Coll(opproc,
 827                                                                                                            DEFAULT_COLLATION_OID,
 828                                                                                                            sslot.values[probe],
 829                                                                                                            constval));
 830                                 if (isgt)
 831                                         ltcmp = !ltcmp;
 832                                 if (ltcmp)
 833                                         lobound = probe + 1;
 834                                 else
 835                                         hibound = probe;
 836                         }
 837
 838                         if (lobound <= 0)
 839                         {
 840                                 /* Constant is below lower histogram boundary. */
 841                                 histfrac = 0.0;
 842                         }
 843                         else if (lobound >= sslot.nvalues)
 844                         {
 845                                 /* Constant is above upper histogram boundary. */
 846                                 histfrac = 1.0;
 847                         }
 848                         else
 849                         {
 850                                 int                     i = lobound;
 851                                 double          val,
 852                                                         high,
 853                                                         low;
 854                                 double          binfrac;
 855
 856                                 /*
 857                                  * We have values[i-1] <= constant <= values[i].
 858                                  *
 859                                  * Convert the constant and the two nearest bin boundary
 860                                  * values to a uniform comparison scale, and do a linear
 861                                  * interpolation within this bin.
 862                                  */
 863                                 if (convert_to_scalar(constval, consttype, &val,
 864                                                                           sslot.values[i - 1], sslot.values[i],
 865                                                                           vardata->vartype,
 866                                                                           &low, &high))
 867                                 {
 868                                         if (high <= low)
 869                                         {
 870                                                 /* cope if bin boundaries appear identical */
 871                                                 binfrac = 0.5;
 872                                         }
 873                                         else if (val <= low)
 874                                                 binfrac = 0.0;
 875                                         else if (val >= high)
 876                                                 binfrac = 1.0;
 877                                         else
 878                                         {
 879                                                 binfrac = (val - low) / (high - low);
 880
 881                                                 /*
 882                                                  * Watch out for the possibility that we got a NaN or
 883                                                  * Infinity from the division.  This can happen
 884                                                  * despite the previous checks, if for example "low"
 885                                                  * is -Infinity.
 886                                                  */
 887                                                 if (isnan(binfrac) ||
 888                                                         binfrac < 0.0 || binfrac > 1.0)
 889                                                         binfrac = 0.5;
 890                                         }
 891                                 }
 892                                 else
 893                                 {
 894                                         /*
 895                                          * Ideally we'd produce an error here, on the grounds that
 896                                          * the given operator shouldn't have scalarXXsel
 897                                          * registered as its selectivity func unless we can deal
 898                                          * with its operand types.  But currently, all manner of
 899                                          * stuff is invoking scalarXXsel, so give a default
 900                                          * estimate until that can be fixed.
 901                                          */
 902                                         binfrac = 0.5;
 903                                 }
 904
 905                                 /*
 906                                  * Now, compute the overall selectivity across the values
 907                                  * represented by the histogram.  We have i-1 full bins and
 908                                  * binfrac partial bin below the constant.
 909                                  */
 910                                 histfrac = (double) (i - 1) + binfrac;
 911                                 histfrac /= (double) (sslot.nvalues - 1);
 912                         }
 913
 914                         /*
 915                          * Now histfrac = fraction of histogram entries below the
 916                          * constant.
 917                          *
 918                          * Account for "<" vs ">"
 919                          */
 920                         hist_selec = isgt ? (1.0 - histfrac) : histfrac;
 921
 922                         /*
 923                          * The histogram boundaries are only approximate to begin with,
 924                          * and may well be out of date anyway.  Therefore, don't believe
 925                          * extremely small or large selectivity estimates --- unless we
 926                          * got actual current endpoint values from the table.
 927                          */
 928                         if (have_end)
 929                                 CLAMP_PROBABILITY(hist_selec);
 930                         else
 931                         {
 932                                 if (hist_selec < 0.0001)
 933                                         hist_selec = 0.0001;
 934                                 else if (hist_selec > 0.9999)
 935                                         hist_selec = 0.9999;
 936                         }
 937                 }
 938
 939                 free_attstatsslot(&sslot);
 940         }
 941
 942         return hist_selec;
 943 }
 944
 945 /*
 946  *              scalarltsel             - Selectivity of "<" (also "<=") for scalars.
 947  */
 948 Datum
 949 scalarltsel(PG_FUNCTION_ARGS)
 950 {
 951         PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
 952         Oid                     operator = PG_GETARG_OID(1);
 953         List       *args = (List *) PG_GETARG_POINTER(2);
 954         int                     varRelid = PG_GETARG_INT32(3);
 955         VariableStatData vardata;
 956         Node       *other;
 957         bool            varonleft;
 958         Datum           constval;
 959         Oid                     consttype;
 960         bool            isgt;
 961         double          selec;
 962
 963         /*
 964          * If expression is not variable op something or something op variable,
 965          * then punt and return a default estimate.
 966          */
 967         if (!get_restriction_variable(root, args, varRelid,
 968                                                                   &vardata, &other, &varonleft))
 969                 PG_RETURN_FLOAT8(DEFAULT_INEQ_SEL);
 970
 971         /*
 972          * Can't do anything useful if the something is not a constant, either.
 973          */
 974         if (!IsA(other, Const))
 975         {
 976                 ReleaseVariableStats(vardata);
 977                 PG_RETURN_FLOAT8(DEFAULT_INEQ_SEL);
 978         }
 979
 980         /*
 981          * If the constant is NULL, assume operator is strict and return zero, ie,
 982          * operator will never return TRUE.
 983          */
 984         if (((Const *) other)->constisnull)
 985         {
 986                 ReleaseVariableStats(vardata);
 987                 PG_RETURN_FLOAT8(0.0);
 988         }
 989         constval = ((Const *) other)->constvalue;
 990         consttype = ((Const *) other)->consttype;
 991
 992         /*
 993          * Force the var to be on the left to simplify logic in scalarineqsel.
 994          */
 995         if (varonleft)
 996         {
 997                 /* we have var < other */
 998                 isgt = false;
 999         }
1000         else
1001         {
1002                 /* we have other < var, commute to make var > other */
1003                 operator = get_commutator(operator);
1004                 if (!operator)
1005                 {
1006                         /* Use default selectivity (should we raise an error instead?) */
1007                         ReleaseVariableStats(vardata);
1008                         PG_RETURN_FLOAT8(DEFAULT_INEQ_SEL);
1009                 }
1010                 isgt = true;
1011         }
1012
1013         selec = scalarineqsel(root, operator, isgt, &vardata, constval, consttype);
1014
1015         ReleaseVariableStats(vardata);
1016
1017         PG_RETURN_FLOAT8((float8) selec);
1018 }
1019
1020 /*
1021  *              scalargtsel             - Selectivity of ">" (also ">=") for integers.
1022  */
1023 Datum
1024 scalargtsel(PG_FUNCTION_ARGS)
1025 {
1026         PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
1027         Oid                     operator = PG_GETARG_OID(1);
1028         List       *args = (List *) PG_GETARG_POINTER(2);
1029         int                     varRelid = PG_GETARG_INT32(3);
1030         VariableStatData vardata;
1031         Node       *other;
1032         bool            varonleft;
1033         Datum           constval;
1034         Oid                     consttype;
1035         bool            isgt;
1036         double          selec;
1037
1038         /*
1039          * If expression is not variable op something or something op variable,
1040          * then punt and return a default estimate.
1041          */
1042         if (!get_restriction_variable(root, args, varRelid,
1043                                                                   &vardata, &other, &varonleft))
1044                 PG_RETURN_FLOAT8(DEFAULT_INEQ_SEL);
1045
1046         /*
1047          * Can't do anything useful if the something is not a constant, either.
1048          */
1049         if (!IsA(other, Const))
1050         {
1051                 ReleaseVariableStats(vardata);
1052                 PG_RETURN_FLOAT8(DEFAULT_INEQ_SEL);
1053         }
1054
1055         /*
1056          * If the constant is NULL, assume operator is strict and return zero, ie,
1057          * operator will never return TRUE.
1058          */
1059         if (((Const *) other)->constisnull)
1060         {
1061                 ReleaseVariableStats(vardata);
1062                 PG_RETURN_FLOAT8(0.0);
1063         }
1064         constval = ((Const *) other)->constvalue;
1065         consttype = ((Const *) other)->consttype;
1066
1067         /*
1068          * Force the var to be on the left to simplify logic in scalarineqsel.
1069          */
1070         if (varonleft)
1071         {
1072                 /* we have var > other */
1073                 isgt = true;
1074         }
1075         else
1076         {
1077                 /* we have other > var, commute to make var < other */
1078                 operator = get_commutator(operator);
1079                 if (!operator)
1080                 {
1081                         /* Use default selectivity (should we raise an error instead?) */
1082                         ReleaseVariableStats(vardata);
1083                         PG_RETURN_FLOAT8(DEFAULT_INEQ_SEL);
1084                 }
1085                 isgt = false;
1086         }
1087
1088         selec = scalarineqsel(root, operator, isgt, &vardata, constval, consttype);
1089
1090         ReleaseVariableStats(vardata);
1091
1092         PG_RETURN_FLOAT8((float8) selec);
1093 }
1094
1095 /*
1096  * patternsel                   - Generic code for pattern-match selectivity.
1097  */
1098 static double
1099 patternsel(PG_FUNCTION_ARGS, Pattern_Type ptype, bool negate)
1100 {
1101         PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
1102         Oid                     operator = PG_GETARG_OID(1);
1103         List       *args = (List *) PG_GETARG_POINTER(2);
1104         int                     varRelid = PG_GETARG_INT32(3);
1105         Oid                     collation = PG_GET_COLLATION();
1106         VariableStatData vardata;
1107         Node       *other;
1108         bool            varonleft;
1109         Datum           constval;
1110         Oid                     consttype;
1111         Oid                     vartype;
1112         Oid                     opfamily;
1113         Pattern_Prefix_Status pstatus;
1114         Const      *patt;
1115         Const      *prefix = NULL;
1116         Selectivity rest_selec = 0;
1117         double          result;
1118
1119         /*
1120          * If this is for a NOT LIKE or similar operator, get the corresponding
1121          * positive-match operator and work with that.  Set result to the correct
1122          * default estimate, too.
1123          */
1124         if (negate)
1125         {
1126                 operator = get_negator(operator);
1127                 if (!OidIsValid(operator))
1128                         elog(ERROR, "patternsel called for operator without a negator");
1129                 result = 1.0 - DEFAULT_MATCH_SEL;
1130         }
1131         else
1132         {
1133                 result = DEFAULT_MATCH_SEL;
1134         }
1135
1136         /*
1137          * If expression is not variable op constant, then punt and return a
1138          * default estimate.
1139          */
1140         if (!get_restriction_variable(root, args, varRelid,
1141                                                                   &vardata, &other, &varonleft))
1142                 return result;
1143         if (!varonleft || !IsA(other, Const))
1144         {
1145                 ReleaseVariableStats(vardata);
1146                 return result;
1147         }
1148
1149         /*
1150          * If the constant is NULL, assume operator is strict and return zero, ie,
1151          * operator will never return TRUE.  (It's zero even for a negator op.)
1152          */
1153         if (((Const *) other)->constisnull)
1154         {
1155                 ReleaseVariableStats(vardata);
1156                 return 0.0;
1157         }
1158         constval = ((Const *) other)->constvalue;
1159         consttype = ((Const *) other)->consttype;
1160
1161         /*
1162          * The right-hand const is type text or bytea for all supported operators.
1163          * We do not expect to see binary-compatible types here, since
1164          * const-folding should have relabeled the const to exactly match the
1165          * operator's declared type.
1166          */
1167         if (consttype != TEXTOID && consttype != BYTEAOID)
1168         {
1169                 ReleaseVariableStats(vardata);
1170                 return result;
1171         }
1172
1173         /*
1174          * Similarly, the exposed type of the left-hand side should be one of
1175          * those we know.  (Do not look at vardata.atttype, which might be
1176          * something binary-compatible but different.)  We can use it to choose
1177          * the index opfamily from which we must draw the comparison operators.
1178          *
1179          * NOTE: It would be more correct to use the PATTERN opfamilies than the
1180          * simple ones, but at the moment ANALYZE will not generate statistics for
1181          * the PATTERN operators.  But our results are so approximate anyway that
1182          * it probably hardly matters.
1183          */
1184         vartype = vardata.vartype;
1185
1186         switch (vartype)
1187         {
1188                 case TEXTOID:
1189                         opfamily = TEXT_BTREE_FAM_OID;
1190                         break;
1191                 case BPCHAROID:
1192                         opfamily = BPCHAR_BTREE_FAM_OID;
1193                         break;
1194                 case NAMEOID:
1195                         opfamily = NAME_BTREE_FAM_OID;
1196                         break;
1197                 case BYTEAOID:
1198                         opfamily = BYTEA_BTREE_FAM_OID;
1199                         break;
1200                 default:
1201                         ReleaseVariableStats(vardata);
1202                         return result;
1203         }
1204
1205         /*
1206          * Pull out any fixed prefix implied by the pattern, and estimate the
1207          * fractional selectivity of the remainder of the pattern.  Unlike many of
1208          * the other functions in this file, we use the pattern operator's actual
1209          * collation for this step.  This is not because we expect the collation
1210          * to make a big difference in the selectivity estimate (it seldom would),
1211          * but because we want to be sure we cache compiled regexps under the
1212          * right cache key, so that they can be re-used at runtime.
1213          */
1214         patt = (Const *) other;
1215         pstatus = pattern_fixed_prefix(patt, ptype, collation,
1216                                                                    &prefix, &rest_selec);
1217
1218         /*
1219          * If necessary, coerce the prefix constant to the right type.
1220          */
1221         if (prefix && prefix->consttype != vartype)
1222         {
1223                 char       *prefixstr;
1224
1225                 switch (prefix->consttype)
1226                 {
1227                         case TEXTOID:
1228                                 prefixstr = TextDatumGetCString(prefix->constvalue);
1229                                 break;
1230                         case BYTEAOID:
1231                                 prefixstr = DatumGetCString(DirectFunctionCall1(byteaout,
1232                                                                                                                 prefix->constvalue));
1233                                 break;
1234                         default:
1235                                 elog(ERROR, "unrecognized consttype: %u",
1236                                          prefix->consttype);
1237                                 ReleaseVariableStats(vardata);
1238                                 return result;
1239                 }
1240                 prefix = string_to_const(prefixstr, vartype);
1241                 pfree(prefixstr);
1242         }
1243
1244         if (pstatus == Pattern_Prefix_Exact)
1245         {
1246                 /*
1247                  * Pattern specifies an exact match, so pretend operator is '='
1248                  */
1249                 Oid                     eqopr = get_opfamily_member(opfamily, vartype, vartype,
1250                                                                                                 BTEqualStrategyNumber);
1251
1252                 if (eqopr == InvalidOid)
1253                         elog(ERROR, "no = operator for opfamily %u", opfamily);
1254                 result = var_eq_const(&vardata, eqopr, prefix->constvalue,
1255                                                           false, true);
1256         }
1257         else
1258         {
1259                 /*
1260                  * Not exact-match pattern.  If we have a sufficiently large
1261                  * histogram, estimate selectivity for the histogram part of the
1262                  * population by counting matches in the histogram.  If not, estimate
1263                  * selectivity of the fixed prefix and remainder of pattern
1264                  * separately, then combine the two to get an estimate of the
1265                  * selectivity for the part of the column population represented by
1266                  * the histogram.  (For small histograms, we combine these
1267                  * approaches.)
1268                  *
1269                  * We then add up data for any most-common-values values; these are
1270                  * not in the histogram population, and we can get exact answers for
1271                  * them by applying the pattern operator, so there's no reason to
1272                  * approximate.  (If the MCVs cover a significant part of the total
1273                  * population, this gives us a big leg up in accuracy.)
1274                  */
1275                 Selectivity selec;
1276                 int                     hist_size;
1277                 FmgrInfo        opproc;
1278                 double          nullfrac,
1279                                         mcv_selec,
1280                                         sumcommon;
1281
1282                 /* Try to use the histogram entries to get selectivity */
1283                 fmgr_info(get_opcode(operator), &opproc);
1284
1285                 selec = histogram_selectivity(&vardata, &opproc, constval, true,
1286                                                                           10, 1, &hist_size);
1287
1288                 /* If not at least 100 entries, use the heuristic method */
1289                 if (hist_size < 100)
1290                 {
1291                         Selectivity heursel;
1292                         Selectivity prefixsel;
1293
1294                         if (pstatus == Pattern_Prefix_Partial)
1295                                 prefixsel = prefix_selectivity(root, &vardata, vartype,
1296                                                                                            opfamily, prefix);
1297                         else
1298                                 prefixsel = 1.0;
1299                         heursel = prefixsel * rest_selec;
1300
1301                         if (selec < 0)          /* fewer than 10 histogram entries? */
1302                                 selec = heursel;
1303                         else
1304                         {
1305                                 /*
1306                                  * For histogram sizes from 10 to 100, we combine the
1307                                  * histogram and heuristic selectivities, putting increasingly
1308                                  * more trust in the histogram for larger sizes.
1309                                  */
1310                                 double          hist_weight = hist_size / 100.0;
1311
1312                                 selec = selec * hist_weight + heursel * (1.0 - hist_weight);
1313                         }
1314                 }
1315
1316                 /* In any case, don't believe extremely small or large estimates. */
1317                 if (selec < 0.0001)
1318                         selec = 0.0001;
1319                 else if (selec > 0.9999)
1320                         selec = 0.9999;
1321
1322                 /*
1323                  * If we have most-common-values info, add up the fractions of the MCV
1324                  * entries that satisfy MCV OP PATTERN.  These fractions contribute
1325                  * directly to the result selectivity.  Also add up the total fraction
1326                  * represented by MCV entries.
1327                  */
1328                 mcv_selec = mcv_selectivity(&vardata, &opproc, constval, true,
1329                                                                         &sumcommon);
1330
1331                 if (HeapTupleIsValid(vardata.statsTuple))
1332                         nullfrac = ((Form_pg_statistic) GETSTRUCT(vardata.statsTuple))->stanullfrac;
1333                 else
1334                         nullfrac = 0.0;
1335
1336                 /*
1337                  * Now merge the results from the MCV and histogram calculations,
1338                  * realizing that the histogram covers only the non-null values that
1339                  * are not listed in MCV.
1340                  */
1341                 selec *= 1.0 - nullfrac - sumcommon;
1342                 selec += mcv_selec;
1343
1344                 /* result should be in range, but make sure... */
1345                 CLAMP_PROBABILITY(selec);
1346                 result = selec;
1347         }
1348
1349         if (prefix)
1350         {
1351                 pfree(DatumGetPointer(prefix->constvalue));
1352                 pfree(prefix);
1353         }
1354
1355         ReleaseVariableStats(vardata);
1356
1357         return negate ? (1.0 - result) : result;
1358 }
1359
1360 /*
1361  *              regexeqsel              - Selectivity of regular-expression pattern match.
1362  */
1363 Datum
1364 regexeqsel(PG_FUNCTION_ARGS)
1365 {
1366         PG_RETURN_FLOAT8(patternsel(fcinfo, Pattern_Type_Regex, false));
1367 }
1368
1369 /*
1370  *              icregexeqsel    - Selectivity of case-insensitive regex match.
1371  */
1372 Datum
1373 icregexeqsel(PG_FUNCTION_ARGS)
1374 {
1375         PG_RETURN_FLOAT8(patternsel(fcinfo, Pattern_Type_Regex_IC, false));
1376 }
1377
1378 /*
1379  *              likesel                 - Selectivity of LIKE pattern match.
1380  */
1381 Datum
1382 likesel(PG_FUNCTION_ARGS)
1383 {
1384         PG_RETURN_FLOAT8(patternsel(fcinfo, Pattern_Type_Like, false));
1385 }
1386
1387 /*
1388  *              iclikesel                       - Selectivity of ILIKE pattern match.
1389  */
1390 Datum
1391 iclikesel(PG_FUNCTION_ARGS)
1392 {
1393         PG_RETURN_FLOAT8(patternsel(fcinfo, Pattern_Type_Like_IC, false));
1394 }
1395
1396 /*
1397  *              regexnesel              - Selectivity of regular-expression pattern non-match.
1398  */
1399 Datum
1400 regexnesel(PG_FUNCTION_ARGS)
1401 {
1402         PG_RETURN_FLOAT8(patternsel(fcinfo, Pattern_Type_Regex, true));
1403 }
1404
1405 /*
1406  *              icregexnesel    - Selectivity of case-insensitive regex non-match.
1407  */
1408 Datum
1409 icregexnesel(PG_FUNCTION_ARGS)
1410 {
1411         PG_RETURN_FLOAT8(patternsel(fcinfo, Pattern_Type_Regex_IC, true));
1412 }
1413
1414 /*
1415  *              nlikesel                - Selectivity of LIKE pattern non-match.
1416  */
1417 Datum
1418 nlikesel(PG_FUNCTION_ARGS)
1419 {
1420         PG_RETURN_FLOAT8(patternsel(fcinfo, Pattern_Type_Like, true));
1421 }
1422
1423 /*
1424  *              icnlikesel              - Selectivity of ILIKE pattern non-match.
1425  */
1426 Datum
1427 icnlikesel(PG_FUNCTION_ARGS)
1428 {
1429         PG_RETURN_FLOAT8(patternsel(fcinfo, Pattern_Type_Like_IC, true));
1430 }
1431
1432 /*
1433  *              boolvarsel              - Selectivity of Boolean variable.
1434  *
1435  * This can actually be called on any boolean-valued expression.  If it
1436  * involves only Vars of the specified relation, and if there are statistics
1437  * about the Var or expression (the latter is possible if it's indexed) then
1438  * we'll produce a real estimate; otherwise it's just a default.
1439  */
1440 Selectivity
1441 boolvarsel(PlannerInfo *root, Node *arg, int varRelid)
1442 {
1443         VariableStatData vardata;
1444         double          selec;
1445
1446         examine_variable(root, arg, varRelid, &vardata);
1447         if (HeapTupleIsValid(vardata.statsTuple))
1448         {
1449                 /*
1450                  * A boolean variable V is equivalent to the clause V = 't', so we
1451                  * compute the selectivity as if that is what we have.
1452                  */
1453                 selec = var_eq_const(&vardata, BooleanEqualOperator,
1454                                                          BoolGetDatum(true), false, true);
1455         }
1456         else if (is_funcclause(arg))
1457         {
1458                 /*
1459                  * If we have no stats and it's a function call, estimate 0.3333333.
1460                  * This seems a pretty unprincipled choice, but Postgres has been
1461                  * using that estimate for function calls since 1992.  The hoariness
1462                  * of this behavior suggests that we should not be in too much hurry
1463                  * to use another value.
1464                  */
1465                 selec = 0.3333333;
1466         }
1467         else
1468         {
1469                 /* Otherwise, the default estimate is 0.5 */
1470                 selec = 0.5;
1471         }
1472         ReleaseVariableStats(vardata);
1473         return selec;
1474 }
1475
1476 /*
1477  *              booltestsel             - Selectivity of BooleanTest Node.
1478  */
1479 Selectivity
1480 booltestsel(PlannerInfo *root, BoolTestType booltesttype, Node *arg,
1481                         int varRelid, JoinType jointype, SpecialJoinInfo *sjinfo)
1482 {
1483         VariableStatData vardata;
1484         double          selec;
1485
1486         examine_variable(root, arg, varRelid, &vardata);
1487
1488         if (HeapTupleIsValid(vardata.statsTuple))
1489         {
1490                 Form_pg_statistic stats;
1491                 double          freq_null;
1492                 AttStatsSlot sslot;
1493
1494                 stats = (Form_pg_statistic) GETSTRUCT(vardata.statsTuple);
1495                 freq_null = stats->stanullfrac;
1496
1497                 if (get_attstatsslot(&sslot, vardata.statsTuple,
1498                                                          STATISTIC_KIND_MCV, InvalidOid,
1499                                                          ATTSTATSSLOT_VALUES | ATTSTATSSLOT_NUMBERS)
1500                         && sslot.nnumbers > 0)
1501                 {
1502                         double          freq_true;
1503                         double          freq_false;
1504
1505                         /*
1506                          * Get first MCV frequency and derive frequency for true.
1507                          */
1508                         if (DatumGetBool(sslot.values[0]))
1509                                 freq_true = sslot.numbers[0];
1510                         else
1511                                 freq_true = 1.0 - sslot.numbers[0] - freq_null;
1512
1513                         /*
1514                          * Next derive frequency for false. Then use these as appropriate
1515                          * to derive frequency for each case.
1516                          */
1517                         freq_false = 1.0 - freq_true - freq_null;
1518
1519                         switch (booltesttype)
1520                         {
1521                                 case IS_UNKNOWN:
1522                                         /* select only NULL values */
1523                                         selec = freq_null;
1524                                         break;
1525                                 case IS_NOT_UNKNOWN:
1526                                         /* select non-NULL values */
1527                                         selec = 1.0 - freq_null;
1528                                         break;
1529                                 case IS_TRUE:
1530                                         /* select only TRUE values */
1531                                         selec = freq_true;
1532                                         break;
1533                                 case IS_NOT_TRUE:
1534                                         /* select non-TRUE values */
1535                                         selec = 1.0 - freq_true;
1536                                         break;
1537                                 case IS_FALSE:
1538                                         /* select only FALSE values */
1539                                         selec = freq_false;
1540                                         break;
1541                                 case IS_NOT_FALSE:
1542                                         /* select non-FALSE values */
1543                                         selec = 1.0 - freq_false;
1544                                         break;
1545                                 default:
1546                                         elog(ERROR, "unrecognized booltesttype: %d",
1547                                                  (int) booltesttype);
1548                                         selec = 0.0;    /* Keep compiler quiet */
1549                                         break;
1550                         }
1551
1552                         free_attstatsslot(&sslot);
1553                 }
1554                 else
1555                 {
1556                         /*
1557                          * No most-common-value info available. Still have null fraction
1558                          * information, so use it for IS [NOT] UNKNOWN. Otherwise adjust
1559                          * for null fraction and assume a 50-50 split of TRUE and FALSE.
1560                          */
1561                         switch (booltesttype)
1562                         {
1563                                 case IS_UNKNOWN:
1564                                         /* select only NULL values */
1565                                         selec = freq_null;
1566                                         break;
1567                                 case IS_NOT_UNKNOWN:
1568                                         /* select non-NULL values */
1569                                         selec = 1.0 - freq_null;
1570                                         break;
1571                                 case IS_TRUE:
1572                                 case IS_FALSE:
1573                                         /* Assume we select half of the non-NULL values */
1574                                         selec = (1.0 - freq_null) / 2.0;
1575                                         break;
1576                                 case IS_NOT_TRUE:
1577                                 case IS_NOT_FALSE:
1578                                         /* Assume we select NULLs plus half of the non-NULLs */
1579                                         /* equiv. to freq_null + (1.0 - freq_null) / 2.0 */
1580                                         selec = (freq_null + 1.0) / 2.0;
1581                                         break;
1582                                 default:
1583                                         elog(ERROR, "unrecognized booltesttype: %d",
1584                                                  (int) booltesttype);
1585                                         selec = 0.0;    /* Keep compiler quiet */
1586                                         break;
1587                         }
1588                 }
1589         }
1590         else
1591         {
1592                 /*
1593                  * If we can't get variable statistics for the argument, perhaps
1594                  * clause_selectivity can do something with it.  We ignore the
1595                  * possibility of a NULL value when using clause_selectivity, and just
1596                  * assume the value is either TRUE or FALSE.
1597                  */
1598                 switch (booltesttype)
1599                 {
1600                         case IS_UNKNOWN:
1601                                 selec = DEFAULT_UNK_SEL;
1602                                 break;
1603                         case IS_NOT_UNKNOWN:
1604                                 selec = DEFAULT_NOT_UNK_SEL;
1605                                 break;
1606                         case IS_TRUE:
1607                         case IS_NOT_FALSE:
1608                                 selec = (double) clause_selectivity(root, arg,
1609                                                                                                         varRelid,
1610                                                                                                         jointype, sjinfo);
1611                                 break;
1612                         case IS_FALSE:
1613                         case IS_NOT_TRUE:
1614                                 selec = 1.0 - (double) clause_selectivity(root, arg,
1615                                                                                                                   varRelid,
1616                                                                                                                   jointype, sjinfo);
1617                                 break;
1618                         default:
1619                                 elog(ERROR, "unrecognized booltesttype: %d",
1620                                          (int) booltesttype);
1621                                 selec = 0.0;    /* Keep compiler quiet */
1622                                 break;
1623                 }
1624         }
1625
1626         ReleaseVariableStats(vardata);
1627
1628         /* result should be in range, but make sure... */
1629         CLAMP_PROBABILITY(selec);
1630
1631         return (Selectivity) selec;
1632 }
1633
1634 /*
1635  *              nulltestsel             - Selectivity of NullTest Node.
1636  */
1637 Selectivity
1638 nulltestsel(PlannerInfo *root, NullTestType nulltesttype, Node *arg,
1639                         int varRelid, JoinType jointype, SpecialJoinInfo *sjinfo)
1640 {
1641         VariableStatData vardata;
1642         double          selec;
1643
1644         examine_variable(root, arg, varRelid, &vardata);
1645
1646         if (HeapTupleIsValid(vardata.statsTuple))
1647         {
1648                 Form_pg_statistic stats;
1649                 double          freq_null;
1650
1651                 stats = (Form_pg_statistic) GETSTRUCT(vardata.statsTuple);
1652                 freq_null = stats->stanullfrac;
1653
1654                 switch (nulltesttype)
1655                 {
1656                         case IS_NULL:
1657
1658                                 /*
1659                                  * Use freq_null directly.
1660                                  */
1661                                 selec = freq_null;
1662                                 break;
1663                         case IS_NOT_NULL:
1664
1665                                 /*
1666                                  * Select not unknown (not null) values. Calculate from
1667                                  * freq_null.
1668                                  */
1669                                 selec = 1.0 - freq_null;
1670                                 break;
1671                         default:
1672                                 elog(ERROR, "unrecognized nulltesttype: %d",
1673                                          (int) nulltesttype);
1674                                 return (Selectivity) 0; /* keep compiler quiet */
1675                 }
1676         }
1677         else
1678         {
1679                 /*
1680                  * No ANALYZE stats available, so make a guess
1681                  */
1682                 switch (nulltesttype)
1683                 {
1684                         case IS_NULL:
1685                                 selec = DEFAULT_UNK_SEL;
1686                                 break;
1687                         case IS_NOT_NULL:
1688                                 selec = DEFAULT_NOT_UNK_SEL;
1689                                 break;
1690                         default:
1691                                 elog(ERROR, "unrecognized nulltesttype: %d",
1692                                          (int) nulltesttype);
1693                                 return (Selectivity) 0; /* keep compiler quiet */
1694                 }
1695         }
1696
1697         ReleaseVariableStats(vardata);
1698
1699         /* result should be in range, but make sure... */
1700         CLAMP_PROBABILITY(selec);
1701
1702         return (Selectivity) selec;
1703 }
1704
1705 /*
1706  * strip_array_coercion - strip binary-compatible relabeling from an array expr
1707  *
1708  * For array values, the parser normally generates ArrayCoerceExpr conversions,
1709  * but it seems possible that RelabelType might show up.  Also, the planner
1710  * is not currently tense about collapsing stacked ArrayCoerceExpr nodes,
1711  * so we need to be ready to deal with more than one level.
1712  */
1713 static Node *
1714 strip_array_coercion(Node *node)
1715 {
1716         for (;;)
1717         {
1718                 if (node && IsA(node, ArrayCoerceExpr) &&
1719                         ((ArrayCoerceExpr *) node)->elemfuncid == InvalidOid)
1720                 {
1721                         node = (Node *) ((ArrayCoerceExpr *) node)->arg;
1722                 }
1723                 else if (node && IsA(node, RelabelType))
1724                 {
1725                         /* We don't really expect this case, but may as well cope */
1726                         node = (Node *) ((RelabelType *) node)->arg;
1727                 }
1728                 else
1729                         break;
1730         }
1731         return node;
1732 }
1733
1734 /*
1735  *              scalararraysel          - Selectivity of ScalarArrayOpExpr Node.
1736  */
1737 Selectivity
1738 scalararraysel(PlannerInfo *root,
1739                            ScalarArrayOpExpr *clause,
1740                            bool is_join_clause,
1741                            int varRelid,
1742                            JoinType jointype,
1743                            SpecialJoinInfo *sjinfo)
1744 {
1745         Oid                     operator = clause->opno;
1746         bool            useOr = clause->useOr;
1747         bool            isEquality = false;
1748         bool            isInequality = false;
1749         Node       *leftop;
1750         Node       *rightop;
1751         Oid                     nominal_element_type;
1752         Oid                     nominal_element_collation;
1753         TypeCacheEntry *typentry;
1754         RegProcedure oprsel;
1755         FmgrInfo        oprselproc;
1756         Selectivity s1;
1757         Selectivity s1disjoint;
1758
1759         /* First, deconstruct the expression */
1760         Assert(list_length(clause->args) == 2);
1761         leftop = (Node *) linitial(clause->args);
1762         rightop = (Node *) lsecond(clause->args);
1763
1764         /* aggressively reduce both sides to constants */
1765         leftop = estimate_expression_value(root, leftop);
1766         rightop = estimate_expression_value(root, rightop);
1767
1768         /* get nominal (after relabeling) element type of rightop */
1769         nominal_element_type = get_base_element_type(exprType(rightop));
1770         if (!OidIsValid(nominal_element_type))
1771                 return (Selectivity) 0.5;               /* probably shouldn't happen */
1772         /* get nominal collation, too, for generating constants */
1773         nominal_element_collation = exprCollation(rightop);
1774
1775         /* look through any binary-compatible relabeling of rightop */
1776         rightop = strip_array_coercion(rightop);
1777
1778         /*
1779          * Detect whether the operator is the default equality or inequality
1780          * operator of the array element type.
1781          */
1782         typentry = lookup_type_cache(nominal_element_type, TYPECACHE_EQ_OPR);
1783         if (OidIsValid(typentry->eq_opr))
1784         {
1785                 if (operator == typentry->eq_opr)
1786                         isEquality = true;
1787                 else if (get_negator(operator) == typentry->eq_opr)
1788                         isInequality = true;
1789         }
1790
1791         /*
1792          * If it is equality or inequality, we might be able to estimate this as a
1793          * form of array containment; for instance "const = ANY(column)" can be
1794          * treated as "ARRAY[const] <@ column".  scalararraysel_containment tries
1795          * that, and returns the selectivity estimate if successful, or -1 if not.
1796          */
1797         if ((isEquality || isInequality) && !is_join_clause)
1798         {
1799                 s1 = scalararraysel_containment(root, leftop, rightop,
1800                                                                                 nominal_element_type,
1801                                                                                 isEquality, useOr, varRelid);
1802                 if (s1 >= 0.0)
1803                         return s1;
1804         }
1805
1806         /*
1807          * Look up the underlying operator's selectivity estimator. Punt if it
1808          * hasn't got one.
1809          */
1810         if (is_join_clause)
1811                 oprsel = get_oprjoin(operator);
1812         else
1813                 oprsel = get_oprrest(operator);
1814         if (!oprsel)
1815                 return (Selectivity) 0.5;
1816         fmgr_info(oprsel, &oprselproc);
1817
1818         /*
1819          * In the array-containment check above, we must only believe that an
1820          * operator is equality or inequality if it is the default btree equality
1821          * operator (or its negator) for the element type, since those are the
1822          * operators that array containment will use.  But in what follows, we can
1823          * be a little laxer, and also believe that any operators using eqsel() or
1824          * neqsel() as selectivity estimator act like equality or inequality.
1825          */
1826         if (oprsel == F_EQSEL || oprsel == F_EQJOINSEL)
1827                 isEquality = true;
1828         else if (oprsel == F_NEQSEL || oprsel == F_NEQJOINSEL)
1829                 isInequality = true;
1830
1831         /*
1832          * We consider three cases:
1833          *
1834          * 1. rightop is an Array constant: deconstruct the array, apply the
1835          * operator's selectivity function for each array element, and merge the
1836          * results in the same way that clausesel.c does for AND/OR combinations.
1837          *
1838          * 2. rightop is an ARRAY[] construct: apply the operator's selectivity
1839          * function for each element of the ARRAY[] construct, and merge.
1840          *
1841          * 3. otherwise, make a guess ...
1842          */
1843         if (rightop && IsA(rightop, Const))
1844         {
1845                 Datum           arraydatum = ((Const *) rightop)->constvalue;
1846                 bool            arrayisnull = ((Const *) rightop)->constisnull;
1847                 ArrayType  *arrayval;
1848                 int16           elmlen;
1849                 bool            elmbyval;
1850                 char            elmalign;
1851                 int                     num_elems;
1852                 Datum      *elem_values;
1853                 bool       *elem_nulls;
1854                 int                     i;
1855
1856                 if (arrayisnull)                /* qual can't succeed if null array */
1857                         return (Selectivity) 0.0;
1858                 arrayval = DatumGetArrayTypeP(arraydatum);
1859                 get_typlenbyvalalign(ARR_ELEMTYPE(arrayval),
1860                                                          &elmlen, &elmbyval, &elmalign);
1861                 deconstruct_array(arrayval,
1862                                                   ARR_ELEMTYPE(arrayval),
1863                                                   elmlen, elmbyval, elmalign,
1864                                                   &elem_values, &elem_nulls, &num_elems);
1865
1866                 /*
1867                  * For generic operators, we assume the probability of success is
1868                  * independent for each array element.  But for "= ANY" or "<> ALL",
1869                  * if the array elements are distinct (which'd typically be the case)
1870                  * then the probabilities are disjoint, and we should just sum them.
1871                  *
1872                  * If we were being really tense we would try to confirm that the
1873                  * elements are all distinct, but that would be expensive and it
1874                  * doesn't seem to be worth the cycles; it would amount to penalizing
1875                  * well-written queries in favor of poorly-written ones.  However, we
1876                  * do protect ourselves a little bit by checking whether the
1877                  * disjointness assumption leads to an impossible (out of range)
1878                  * probability; if so, we fall back to the normal calculation.
1879                  */
1880                 s1 = s1disjoint = (useOr ? 0.0 : 1.0);
1881
1882                 for (i = 0; i < num_elems; i++)
1883                 {
1884                         List       *args;
1885                         Selectivity s2;
1886
1887                         args = list_make2(leftop,
1888                                                           makeConst(nominal_element_type,
1889                                                                                 -1,
1890                                                                                 nominal_element_collation,
1891                                                                                 elmlen,
1892                                                                                 elem_values[i],
1893                                                                                 elem_nulls[i],
1894                                                                                 elmbyval));
1895                         if (is_join_clause)
1896                                 s2 = DatumGetFloat8(FunctionCall5Coll(&oprselproc,
1897                                                                                                           clause->inputcollid,
1898                                                                                                           PointerGetDatum(root),
1899                                                                                                   ObjectIdGetDatum(operator),
1900                                                                                                           PointerGetDatum(args),
1901                                                                                                           Int16GetDatum(jointype),
1902                                                                                                    PointerGetDatum(sjinfo)));
1903                         else
1904                                 s2 = DatumGetFloat8(FunctionCall4Coll(&oprselproc,
1905                                                                                                           clause->inputcollid,
1906                                                                                                           PointerGetDatum(root),
1907                                                                                                   ObjectIdGetDatum(operator),
1908                                                                                                           PointerGetDatum(args),
1909                                                                                                    Int32GetDatum(varRelid)));
1910
1911                         if (useOr)
1912                         {
1913                                 s1 = s1 + s2 - s1 * s2;
1914                                 if (isEquality)
1915                                         s1disjoint += s2;
1916                         }
1917                         else
1918                         {
1919                                 s1 = s1 * s2;
1920                                 if (isInequality)
1921                                         s1disjoint += s2 - 1.0;
1922                         }
1923                 }
1924
1925                 /* accept disjoint-probability estimate if in range */
1926                 if ((useOr ? isEquality : isInequality) &&
1927                         s1disjoint >= 0.0 && s1disjoint <= 1.0)
1928                         s1 = s1disjoint;
1929         }
1930         else if (rightop && IsA(rightop, ArrayExpr) &&
1931                          !((ArrayExpr *) rightop)->multidims)
1932         {
1933                 ArrayExpr  *arrayexpr = (ArrayExpr *) rightop;
1934                 int16           elmlen;
1935                 bool            elmbyval;
1936                 ListCell   *l;
1937
1938                 get_typlenbyval(arrayexpr->element_typeid,
1939                                                 &elmlen, &elmbyval);
1940
1941                 /*
1942                  * We use the assumption of disjoint probabilities here too, although
1943                  * the odds of equal array elements are rather higher if the elements
1944                  * are not all constants (which they won't be, else constant folding
1945                  * would have reduced the ArrayExpr to a Const).  In this path it's
1946                  * critical to have the sanity check on the s1disjoint estimate.
1947                  */
1948                 s1 = s1disjoint = (useOr ? 0.0 : 1.0);
1949
1950                 foreach(l, arrayexpr->elements)
1951                 {
1952                         Node       *elem = (Node *) lfirst(l);
1953                         List       *args;
1954                         Selectivity s2;
1955
1956                         /*
1957                          * Theoretically, if elem isn't of nominal_element_type we should
1958                          * insert a RelabelType, but it seems unlikely that any operator
1959                          * estimation function would really care ...
1960                          */
1961                         args = list_make2(leftop, elem);
1962                         if (is_join_clause)
1963                                 s2 = DatumGetFloat8(FunctionCall5Coll(&oprselproc,
1964                                                                                                           clause->inputcollid,
1965                                                                                                           PointerGetDatum(root),
1966                                                                                                   ObjectIdGetDatum(operator),
1967                                                                                                           PointerGetDatum(args),
1968                                                                                                           Int16GetDatum(jointype),
1969                                                                                                    PointerGetDatum(sjinfo)));
1970                         else
1971                                 s2 = DatumGetFloat8(FunctionCall4Coll(&oprselproc,
1972                                                                                                           clause->inputcollid,
1973                                                                                                           PointerGetDatum(root),
1974                                                                                                   ObjectIdGetDatum(operator),
1975                                                                                                           PointerGetDatum(args),
1976                                                                                                    Int32GetDatum(varRelid)));
1977
1978                         if (useOr)
1979                         {
1980                                 s1 = s1 + s2 - s1 * s2;
1981                                 if (isEquality)
1982                                         s1disjoint += s2;
1983                         }
1984                         else
1985                         {
1986                                 s1 = s1 * s2;
1987                                 if (isInequality)
1988                                         s1disjoint += s2 - 1.0;
1989                         }
1990                 }
1991
1992                 /* accept disjoint-probability estimate if in range */
1993                 if ((useOr ? isEquality : isInequality) &&
1994                         s1disjoint >= 0.0 && s1disjoint <= 1.0)
1995                         s1 = s1disjoint;
1996         }
1997         else
1998         {
1999                 CaseTestExpr *dummyexpr;
2000                 List       *args;
2001                 Selectivity s2;
2002                 int                     i;
2003
2004                 /*
2005                  * We need a dummy rightop to pass to the operator selectivity
2006                  * routine.  It can be pretty much anything that doesn't look like a
2007                  * constant; CaseTestExpr is a convenient choice.
2008                  */
2009                 dummyexpr = makeNode(CaseTestExpr);
2010                 dummyexpr->typeId = nominal_element_type;
2011                 dummyexpr->typeMod = -1;
2012                 dummyexpr->collation = clause->inputcollid;
2013                 args = list_make2(leftop, dummyexpr);
2014                 if (is_join_clause)
2015                         s2 = DatumGetFloat8(FunctionCall5Coll(&oprselproc,
2016                                                                                                   clause->inputcollid,
2017                                                                                                   PointerGetDatum(root),
2018                                                                                                   ObjectIdGetDatum(operator),
2019                                                                                                   PointerGetDatum(args),
2020                                                                                                   Int16GetDatum(jointype),
2021                                                                                                   PointerGetDatum(sjinfo)));
2022                 else
2023                         s2 = DatumGetFloat8(FunctionCall4Coll(&oprselproc,
2024                                                                                                   clause->inputcollid,
2025                                                                                                   PointerGetDatum(root),
2026                                                                                                   ObjectIdGetDatum(operator),
2027                                                                                                   PointerGetDatum(args),
2028                                                                                                   Int32GetDatum(varRelid)));
2029                 s1 = useOr ? 0.0 : 1.0;
2030
2031                 /*
2032                  * Arbitrarily assume 10 elements in the eventual array value (see
2033                  * also estimate_array_length).  We don't risk an assumption of
2034                  * disjoint probabilities here.
2035                  */
2036                 for (i = 0; i < 10; i++)
2037                 {
2038                         if (useOr)
2039                                 s1 = s1 + s2 - s1 * s2;
2040                         else
2041                                 s1 = s1 * s2;
2042                 }
2043         }
2044
2045         /* result should be in range, but make sure... */
2046         CLAMP_PROBABILITY(s1);
2047
2048         return s1;
2049 }
2050
2051 /*
2052  * Estimate number of elements in the array yielded by an expression.
2053  *
2054  * It's important that this agree with scalararraysel.
2055  */
2056 int
2057 estimate_array_length(Node *arrayexpr)
2058 {
2059         /* look through any binary-compatible relabeling of arrayexpr */
2060         arrayexpr = strip_array_coercion(arrayexpr);
2061
2062         if (arrayexpr && IsA(arrayexpr, Const))
2063         {
2064                 Datum           arraydatum = ((Const *) arrayexpr)->constvalue;
2065                 bool            arrayisnull = ((Const *) arrayexpr)->constisnull;
2066                 ArrayType  *arrayval;
2067
2068                 if (arrayisnull)
2069                         return 0;
2070                 arrayval = DatumGetArrayTypeP(arraydatum);
2071                 return ArrayGetNItems(ARR_NDIM(arrayval), ARR_DIMS(arrayval));
2072         }
2073         else if (arrayexpr && IsA(arrayexpr, ArrayExpr) &&
2074                          !((ArrayExpr *) arrayexpr)->multidims)
2075         {
2076                 return list_length(((ArrayExpr *) arrayexpr)->elements);
2077         }
2078         else
2079         {
2080                 /* default guess --- see also scalararraysel */
2081                 return 10;
2082         }
2083 }
2084
2085 /*
2086  *              rowcomparesel           - Selectivity of RowCompareExpr Node.
2087  *
2088  * We estimate RowCompare selectivity by considering just the first (high
2089  * order) columns, which makes it equivalent to an ordinary OpExpr.  While
2090  * this estimate could be refined by considering additional columns, it
2091  * seems unlikely that we could do a lot better without multi-column
2092  * statistics.
2093  */
2094 Selectivity
2095 rowcomparesel(PlannerInfo *root,
2096                           RowCompareExpr *clause,
2097                           int varRelid, JoinType jointype, SpecialJoinInfo *sjinfo)
2098 {
2099         Selectivity s1;
2100         Oid                     opno = linitial_oid(clause->opnos);
2101         Oid                     inputcollid = linitial_oid(clause->inputcollids);
2102         List       *opargs;
2103         bool            is_join_clause;
2104
2105         /* Build equivalent arg list for single operator */
2106         opargs = list_make2(linitial(clause->largs), linitial(clause->rargs));
2107
2108         /*
2109          * Decide if it's a join clause.  This should match clausesel.c's
2110          * treat_as_join_clause(), except that we intentionally consider only the
2111          * leading columns and not the rest of the clause.
2112          */
2113         if (varRelid != 0)
2114         {
2115                 /*
2116                  * Caller is forcing restriction mode (eg, because we are examining an
2117                  * inner indexscan qual).
2118                  */
2119                 is_join_clause = false;
2120         }
2121         else if (sjinfo == NULL)
2122         {
2123                 /*
2124                  * It must be a restriction clause, since it's being evaluated at a
2125                  * scan node.
2126                  */
2127                 is_join_clause = false;
2128         }
2129         else
2130         {
2131                 /*
2132                  * Otherwise, it's a join if there's more than one relation used.
2133                  */
2134                 is_join_clause = (NumRelids((Node *) opargs) > 1);
2135         }
2136
2137         if (is_join_clause)
2138         {
2139                 /* Estimate selectivity for a join clause. */
2140                 s1 = join_selectivity(root, opno,
2141                                                           opargs,
2142                                                           inputcollid,
2143                                                           jointype,
2144                                                           sjinfo);
2145         }
2146         else
2147         {
2148                 /* Estimate selectivity for a restriction clause. */
2149                 s1 = restriction_selectivity(root, opno,
2150                                                                          opargs,
2151                                                                          inputcollid,
2152                                                                          varRelid);
2153         }
2154
2155         return s1;
2156 }
2157
2158 /*
2159  *              eqjoinsel               - Join selectivity of "="
2160  */
2161 Datum
2162 eqjoinsel(PG_FUNCTION_ARGS)
2163 {
2164         PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
2165         Oid                     operator = PG_GETARG_OID(1);
2166         List       *args = (List *) PG_GETARG_POINTER(2);
2167
2168 #ifdef NOT_USED
2169         JoinType        jointype = (JoinType) PG_GETARG_INT16(3);
2170 #endif
2171         SpecialJoinInfo *sjinfo = (SpecialJoinInfo *) PG_GETARG_POINTER(4);
2172         double          selec;
2173         VariableStatData vardata1;
2174         VariableStatData vardata2;
2175         bool            join_is_reversed;
2176         RelOptInfo *inner_rel;
2177
2178         get_join_variables(root, args, sjinfo,
2179                                            &vardata1, &vardata2, &join_is_reversed);
2180
2181         switch (sjinfo->jointype)
2182         {
2183                 case JOIN_INNER:
2184                 case JOIN_LEFT:
2185                 case JOIN_FULL:
2186                         selec = eqjoinsel_inner(operator, &vardata1, &vardata2);
2187                         break;
2188                 case JOIN_SEMI:
2189                 case JOIN_ANTI:
2190
2191                         /*
2192                          * Look up the join's inner relation.  min_righthand is sufficient
2193                          * information because neither SEMI nor ANTI joins permit any
2194                          * reassociation into or out of their RHS, so the righthand will
2195                          * always be exactly that set of rels.
2196                          */
2197                         inner_rel = find_join_input_rel(root, sjinfo->min_righthand);
2198
2199                         if (!join_is_reversed)
2200                                 selec = eqjoinsel_semi(operator, &vardata1, &vardata2,
2201                                                                            inner_rel);
2202                         else
2203                                 selec = eqjoinsel_semi(get_commutator(operator),
2204                                                                            &vardata2, &vardata1,
2205                                                                            inner_rel);
2206                         break;
2207                 default:
2208                         /* other values not expected here */
2209                         elog(ERROR, "unrecognized join type: %d",
2210                                  (int) sjinfo->jointype);
2211                         selec = 0;                      /* keep compiler quiet */
2212                         break;
2213         }
2214
2215         ReleaseVariableStats(vardata1);
2216         ReleaseVariableStats(vardata2);
2217
2218         CLAMP_PROBABILITY(selec);
2219
2220         PG_RETURN_FLOAT8((float8) selec);
2221 }
2222
2223 /*
2224  * eqjoinsel_inner --- eqjoinsel for normal inner join
2225  *
2226  * We also use this for LEFT/FULL outer joins; it's not presently clear
2227  * that it's worth trying to distinguish them here.
2228  */
2229 static double
2230 eqjoinsel_inner(Oid operator,
2231                                 VariableStatData *vardata1, VariableStatData *vardata2)
2232 {
2233         double          selec;
2234         double          nd1;
2235         double          nd2;
2236         bool            isdefault1;
2237         bool            isdefault2;
2238         Oid                     opfuncoid;
2239         Form_pg_statistic stats1 = NULL;
2240         Form_pg_statistic stats2 = NULL;
2241         bool            have_mcvs1 = false;
2242         bool            have_mcvs2 = false;
2243         AttStatsSlot sslot1;
2244         AttStatsSlot sslot2;
2245
2246         nd1 = get_variable_numdistinct(vardata1, &isdefault1);
2247         nd2 = get_variable_numdistinct(vardata2, &isdefault2);
2248
2249         opfuncoid = get_opcode(operator);
2250
2251         memset(&sslot1, 0, sizeof(sslot1));
2252         memset(&sslot2, 0, sizeof(sslot2));
2253
2254         if (HeapTupleIsValid(vardata1->statsTuple))
2255         {
2256                 /* note we allow use of nullfrac regardless of security check */
2257                 stats1 = (Form_pg_statistic) GETSTRUCT(vardata1->statsTuple);
2258                 if (statistic_proc_security_check(vardata1, opfuncoid))
2259                         have_mcvs1 = get_attstatsslot(&sslot1, vardata1->statsTuple,
2260                                                                                   STATISTIC_KIND_MCV, InvalidOid,
2261                                                                  ATTSTATSSLOT_VALUES | ATTSTATSSLOT_NUMBERS);
2262         }
2263
2264         if (HeapTupleIsValid(vardata2->statsTuple))
2265         {
2266                 /* note we allow use of nullfrac regardless of security check */
2267                 stats2 = (Form_pg_statistic) GETSTRUCT(vardata2->statsTuple);
2268                 if (statistic_proc_security_check(vardata2, opfuncoid))
2269                         have_mcvs2 = get_attstatsslot(&sslot2, vardata2->statsTuple,
2270                                                                                   STATISTIC_KIND_MCV, InvalidOid,
2271                                                                  ATTSTATSSLOT_VALUES | ATTSTATSSLOT_NUMBERS);
2272         }
2273
2274         if (have_mcvs1 && have_mcvs2)
2275         {
2276                 /*
2277                  * We have most-common-value lists for both relations.  Run through
2278                  * the lists to see which MCVs actually join to each other with the
2279                  * given operator.  This allows us to determine the exact join
2280                  * selectivity for the portion of the relations represented by the MCV
2281                  * lists.  We still have to estimate for the remaining population, but
2282                  * in a skewed distribution this gives us a big leg up in accuracy.
2283                  * For motivation see the analysis in Y. Ioannidis and S.
2284                  * Christodoulakis, "On the propagation of errors in the size of join
2285                  * results", Technical Report 1018, Computer Science Dept., University
2286                  * of Wisconsin, Madison, March 1991 (available from ftp.cs.wisc.edu).
2287                  */
2288                 FmgrInfo        eqproc;
2289                 bool       *hasmatch1;
2290                 bool       *hasmatch2;
2291                 double          nullfrac1 = stats1->stanullfrac;
2292                 double          nullfrac2 = stats2->stanullfrac;
2293                 double          matchprodfreq,
2294                                         matchfreq1,
2295                                         matchfreq2,
2296                                         unmatchfreq1,
2297                                         unmatchfreq2,
2298                                         otherfreq1,
2299                                         otherfreq2,
2300                                         totalsel1,
2301                                         totalsel2;
2302                 int                     i,
2303                                         nmatches;
2304
2305                 fmgr_info(opfuncoid, &eqproc);
2306                 hasmatch1 = (bool *) palloc0(sslot1.nvalues * sizeof(bool));
2307                 hasmatch2 = (bool *) palloc0(sslot2.nvalues * sizeof(bool));
2308
2309                 /*
2310                  * Note we assume that each MCV will match at most one member of the
2311                  * other MCV list.  If the operator isn't really equality, there could
2312                  * be multiple matches --- but we don't look for them, both for speed
2313                  * and because the math wouldn't add up...
2314                  */
2315                 matchprodfreq = 0.0;
2316                 nmatches = 0;
2317                 for (i = 0; i < sslot1.nvalues; i++)
2318                 {
2319                         int                     j;
2320
2321                         for (j = 0; j < sslot2.nvalues; j++)
2322                         {
2323                                 if (hasmatch2[j])
2324                                         continue;
2325                                 if (DatumGetBool(FunctionCall2Coll(&eqproc,
2326                                                                                                    DEFAULT_COLLATION_OID,
2327                                                                                                    sslot1.values[i],
2328                                                                                                    sslot2.values[j])))
2329                                 {
2330                                         hasmatch1[i] = hasmatch2[j] = true;
2331                                         matchprodfreq += sslot1.numbers[i] * sslot2.numbers[j];
2332                                         nmatches++;
2333                                         break;
2334                                 }
2335                         }
2336                 }
2337                 CLAMP_PROBABILITY(matchprodfreq);
2338                 /* Sum up frequencies of matched and unmatched MCVs */
2339                 matchfreq1 = unmatchfreq1 = 0.0;
2340                 for (i = 0; i < sslot1.nvalues; i++)
2341                 {
2342                         if (hasmatch1[i])
2343                                 matchfreq1 += sslot1.numbers[i];
2344                         else
2345                                 unmatchfreq1 += sslot1.numbers[i];
2346                 }
2347                 CLAMP_PROBABILITY(matchfreq1);
2348                 CLAMP_PROBABILITY(unmatchfreq1);
2349                 matchfreq2 = unmatchfreq2 = 0.0;
2350                 for (i = 0; i < sslot2.nvalues; i++)
2351                 {
2352                         if (hasmatch2[i])
2353                                 matchfreq2 += sslot2.numbers[i];
2354                         else
2355                                 unmatchfreq2 += sslot2.numbers[i];
2356                 }
2357                 CLAMP_PROBABILITY(matchfreq2);
2358                 CLAMP_PROBABILITY(unmatchfreq2);
2359                 pfree(hasmatch1);
2360                 pfree(hasmatch2);
2361
2362                 /*
2363                  * Compute total frequency of non-null values that are not in the MCV
2364                  * lists.
2365                  */
2366                 otherfreq1 = 1.0 - nullfrac1 - matchfreq1 - unmatchfreq1;
2367                 otherfreq2 = 1.0 - nullfrac2 - matchfreq2 - unmatchfreq2;
2368                 CLAMP_PROBABILITY(otherfreq1);
2369                 CLAMP_PROBABILITY(otherfreq2);
2370
2371                 /*
2372                  * We can estimate the total selectivity from the point of view of
2373                  * relation 1 as: the known selectivity for matched MCVs, plus
2374                  * unmatched MCVs that are assumed to match against random members of
2375                  * relation 2's non-MCV population, plus non-MCV values that are
2376                  * assumed to match against random members of relation 2's unmatched
2377                  * MCVs plus non-MCV values.
2378                  */
2379                 totalsel1 = matchprodfreq;
2380                 if (nd2 > sslot2.nvalues)
2381                         totalsel1 += unmatchfreq1 * otherfreq2 / (nd2 - sslot2.nvalues);
2382                 if (nd2 > nmatches)
2383                         totalsel1 += otherfreq1 * (otherfreq2 + unmatchfreq2) /
2384                                 (nd2 - nmatches);
2385                 /* Same estimate from the point of view of relation 2. */
2386                 totalsel2 = matchprodfreq;
2387                 if (nd1 > sslot1.nvalues)
2388                         totalsel2 += unmatchfreq2 * otherfreq1 / (nd1 - sslot1.nvalues);
2389                 if (nd1 > nmatches)
2390                         totalsel2 += otherfreq2 * (otherfreq1 + unmatchfreq1) /
2391                                 (nd1 - nmatches);
2392
2393                 /*
2394                  * Use the smaller of the two estimates.  This can be justified in
2395                  * essentially the same terms as given below for the no-stats case: to
2396                  * a first approximation, we are estimating from the point of view of
2397                  * the relation with smaller nd.
2398                  */
2399                 selec = (totalsel1 < totalsel2) ? totalsel1 : totalsel2;
2400         }
2401         else
2402         {
2403                 /*
2404                  * We do not have MCV lists for both sides.  Estimate the join
2405                  * selectivity as MIN(1/nd1,1/nd2)*(1-nullfrac1)*(1-nullfrac2). This
2406                  * is plausible if we assume that the join operator is strict and the
2407                  * non-null values are about equally distributed: a given non-null
2408                  * tuple of rel1 will join to either zero or N2*(1-nullfrac2)/nd2 rows
2409                  * of rel2, so total join rows are at most
2410                  * N1*(1-nullfrac1)*N2*(1-nullfrac2)/nd2 giving a join selectivity of
2411                  * not more than (1-nullfrac1)*(1-nullfrac2)/nd2. By the same logic it
2412                  * is not more than (1-nullfrac1)*(1-nullfrac2)/nd1, so the expression
2413                  * with MIN() is an upper bound.  Using the MIN() means we estimate
2414                  * from the point of view of the relation with smaller nd (since the
2415                  * larger nd is determining the MIN).  It is reasonable to assume that
2416                  * most tuples in this rel will have join partners, so the bound is
2417                  * probably reasonably tight and should be taken as-is.
2418                  *
2419                  * XXX Can we be smarter if we have an MCV list for just one side? It
2420                  * seems that if we assume equal distribution for the other side, we
2421                  * end up with the same answer anyway.
2422                  */
2423                 double          nullfrac1 = stats1 ? stats1->stanullfrac : 0.0;
2424                 double          nullfrac2 = stats2 ? stats2->stanullfrac : 0.0;
2425
2426                 selec = (1.0 - nullfrac1) * (1.0 - nullfrac2);
2427                 if (nd1 > nd2)
2428                         selec /= nd1;
2429                 else
2430                         selec /= nd2;
2431         }
2432
2433         free_attstatsslot(&sslot1);
2434         free_attstatsslot(&sslot2);
2435
2436         return selec;
2437 }
2438
2439 /*
2440  * eqjoinsel_semi --- eqjoinsel for semi join
2441  *
2442  * (Also used for anti join, which we are supposed to estimate the same way.)
2443  * Caller has ensured that vardata1 is the LHS variable.
2444  * Unlike eqjoinsel_inner, we have to cope with operator being InvalidOid.
2445  */
2446 static double
2447 eqjoinsel_semi(Oid operator,
2448                            VariableStatData *vardata1, VariableStatData *vardata2,
2449                            RelOptInfo *inner_rel)
2450 {
2451         double          selec;
2452         double          nd1;
2453         double          nd2;
2454         bool            isdefault1;
2455         bool            isdefault2;
2456         Oid                     opfuncoid;
2457         Form_pg_statistic stats1 = NULL;
2458         bool            have_mcvs1 = false;
2459         bool            have_mcvs2 = false;
2460         AttStatsSlot sslot1;
2461         AttStatsSlot sslot2;
2462
2463         nd1 = get_variable_numdistinct(vardata1, &isdefault1);
2464         nd2 = get_variable_numdistinct(vardata2, &isdefault2);
2465
2466         opfuncoid = OidIsValid(operator) ? get_opcode(operator) : InvalidOid;
2467
2468         memset(&sslot1, 0, sizeof(sslot1));
2469         memset(&sslot2, 0, sizeof(sslot2));
2470
2471         /*
2472          * We clamp nd2 to be not more than what we estimate the inner relation's
2473          * size to be.  This is intuitively somewhat reasonable since obviously
2474          * there can't be more than that many distinct values coming from the
2475          * inner rel.  The reason for the asymmetry (ie, that we don't clamp nd1
2476          * likewise) is that this is the only pathway by which restriction clauses
2477          * applied to the inner rel will affect the join result size estimate,
2478          * since set_joinrel_size_estimates will multiply SEMI/ANTI selectivity by
2479          * only the outer rel's size.  If we clamped nd1 we'd be double-counting
2480          * the selectivity of outer-rel restrictions.
2481          *
2482          * We can apply this clamping both with respect to the base relation from
2483          * which the join variable comes (if there is just one), and to the
2484          * immediate inner input relation of the current join.
2485          *
2486          * If we clamp, we can treat nd2 as being a non-default estimate; it's not
2487          * great, maybe, but it didn't come out of nowhere either.  This is most
2488          * helpful when the inner relation is empty and consequently has no stats.
2489          */
2490         if (vardata2->rel)
2491         {
2492                 if (nd2 >= vardata2->rel->rows)
2493                 {
2494                         nd2 = vardata2->rel->rows;
2495                         isdefault2 = false;
2496                 }
2497         }
2498         if (nd2 >= inner_rel->rows)
2499         {
2500                 nd2 = inner_rel->rows;
2501                 isdefault2 = false;
2502         }
2503
2504         if (HeapTupleIsValid(vardata1->statsTuple))
2505         {
2506                 /* note we allow use of nullfrac regardless of security check */
2507                 stats1 = (Form_pg_statistic) GETSTRUCT(vardata1->statsTuple);
2508                 if (statistic_proc_security_check(vardata1, opfuncoid))
2509                         have_mcvs1 = get_attstatsslot(&sslot1, vardata1->statsTuple,
2510                                                                                   STATISTIC_KIND_MCV, InvalidOid,
2511                                                                  ATTSTATSSLOT_VALUES | ATTSTATSSLOT_NUMBERS);
2512         }
2513
2514         if (HeapTupleIsValid(vardata2->statsTuple) &&
2515                 statistic_proc_security_check(vardata2, opfuncoid))
2516         {
2517                 have_mcvs2 = get_attstatsslot(&sslot2, vardata2->statsTuple,
2518                                                                           STATISTIC_KIND_MCV, InvalidOid,
2519                                                                           ATTSTATSSLOT_VALUES);
2520                 /* note: currently don't need stanumbers from RHS */
2521         }
2522
2523         if (have_mcvs1 && have_mcvs2 && OidIsValid(operator))
2524         {
2525                 /*
2526                  * We have most-common-value lists for both relations.  Run through
2527                  * the lists to see which MCVs actually join to each other with the
2528                  * given operator.  This allows us to determine the exact join
2529                  * selectivity for the portion of the relations represented by the MCV
2530                  * lists.  We still have to estimate for the remaining population, but
2531                  * in a skewed distribution this gives us a big leg up in accuracy.
2532                  */
2533                 FmgrInfo        eqproc;
2534                 bool       *hasmatch1;
2535                 bool       *hasmatch2;
2536                 double          nullfrac1 = stats1->stanullfrac;
2537                 double          matchfreq1,
2538                                         uncertainfrac,
2539                                         uncertain;
2540                 int                     i,
2541                                         nmatches,
2542                                         clamped_nvalues2;
2543
2544                 /*
2545                  * The clamping above could have resulted in nd2 being less than
2546                  * sslot2.nvalues; in which case, we assume that precisely the nd2
2547                  * most common values in the relation will appear in the join input,
2548                  * and so compare to only the first nd2 members of the MCV list.  Of
2549                  * course this is frequently wrong, but it's the best bet we can make.
2550                  */
2551                 clamped_nvalues2 = Min(sslot2.nvalues, nd2);
2552
2553                 fmgr_info(opfuncoid, &eqproc);
2554                 hasmatch1 = (bool *) palloc0(sslot1.nvalues * sizeof(bool));
2555                 hasmatch2 = (bool *) palloc0(clamped_nvalues2 * sizeof(bool));
2556
2557                 /*
2558                  * Note we assume that each MCV will match at most one member of the
2559                  * other MCV list.  If the operator isn't really equality, there could
2560                  * be multiple matches --- but we don't look for them, both for speed
2561                  * and because the math wouldn't add up...
2562                  */
2563                 nmatches = 0;
2564                 for (i = 0; i < sslot1.nvalues; i++)
2565                 {
2566                         int                     j;
2567
2568                         for (j = 0; j < clamped_nvalues2; j++)
2569                         {
2570                                 if (hasmatch2[j])
2571                                         continue;
2572                                 if (DatumGetBool(FunctionCall2Coll(&eqproc,
2573                                                                                                    DEFAULT_COLLATION_OID,
2574                                                                                                    sslot1.values[i],
2575                                                                                                    sslot2.values[j])))
2576                                 {
2577                                         hasmatch1[i] = hasmatch2[j] = true;
2578                                         nmatches++;
2579                                         break;
2580                                 }
2581                         }
2582                 }
2583                 /* Sum up frequencies of matched MCVs */
2584                 matchfreq1 = 0.0;
2585                 for (i = 0; i < sslot1.nvalues; i++)
2586                 {
2587                         if (hasmatch1[i])
2588                                 matchfreq1 += sslot1.numbers[i];
2589                 }
2590                 CLAMP_PROBABILITY(matchfreq1);
2591                 pfree(hasmatch1);
2592                 pfree(hasmatch2);
2593
2594                 /*
2595                  * Now we need to estimate the fraction of relation 1 that has at
2596                  * least one join partner.  We know for certain that the matched MCVs
2597                  * do, so that gives us a lower bound, but we're really in the dark
2598                  * about everything else.  Our crude approach is: if nd1 <= nd2 then
2599                  * assume all non-null rel1 rows have join partners, else assume for
2600                  * the uncertain rows that a fraction nd2/nd1 have join partners. We
2601                  * can discount the known-matched MCVs from the distinct-values counts
2602                  * before doing the division.
2603                  *
2604                  * Crude as the above is, it's completely useless if we don't have
2605                  * reliable ndistinct values for both sides.  Hence, if either nd1 or
2606                  * nd2 is default, punt and assume half of the uncertain rows have
2607                  * join partners.
2608                  */
2609                 if (!isdefault1 && !isdefault2)
2610                 {
2611                         nd1 -= nmatches;
2612                         nd2 -= nmatches;
2613                         if (nd1 <= nd2 || nd2 < 0)
2614                                 uncertainfrac = 1.0;
2615                         else
2616                                 uncertainfrac = nd2 / nd1;
2617                 }
2618                 else
2619                         uncertainfrac = 0.5;
2620                 uncertain = 1.0 - matchfreq1 - nullfrac1;
2621                 CLAMP_PROBABILITY(uncertain);
2622                 selec = matchfreq1 + uncertainfrac * uncertain;
2623         }
2624         else
2625         {
2626                 /*
2627                  * Without MCV lists for both sides, we can only use the heuristic
2628                  * about nd1 vs nd2.
2629                  */
2630                 double          nullfrac1 = stats1 ? stats1->stanullfrac : 0.0;
2631
2632                 if (!isdefault1 && !isdefault2)
2633                 {
2634                         if (nd1 <= nd2 || nd2 < 0)
2635                                 selec = 1.0 - nullfrac1;
2636                         else
2637                                 selec = (nd2 / nd1) * (1.0 - nullfrac1);
2638                 }
2639                 else
2640                         selec = 0.5 * (1.0 - nullfrac1);
2641         }
2642
2643         free_attstatsslot(&sslot1);
2644         free_attstatsslot(&sslot2);
2645
2646         return selec;
2647 }
2648
2649 /*
2650  *              neqjoinsel              - Join selectivity of "!="
2651  */
2652 Datum
2653 neqjoinsel(PG_FUNCTION_ARGS)
2654 {
2655         PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
2656         Oid                     operator = PG_GETARG_OID(1);
2657         List       *args = (List *) PG_GETARG_POINTER(2);
2658         JoinType        jointype = (JoinType) PG_GETARG_INT16(3);
2659         SpecialJoinInfo *sjinfo = (SpecialJoinInfo *) PG_GETARG_POINTER(4);
2660         Oid                     eqop;
2661         float8          result;
2662
2663         /*
2664          * We want 1 - eqjoinsel() where the equality operator is the one
2665          * associated with this != operator, that is, its negator.
2666          */
2667         eqop = get_negator(operator);
2668         if (eqop)
2669         {
2670                 result = DatumGetFloat8(DirectFunctionCall5(eqjoinsel,
2671                                                                                                         PointerGetDatum(root),
2672                                                                                                         ObjectIdGetDatum(eqop),
2673                                                                                                         PointerGetDatum(args),
2674                                                                                                         Int16GetDatum(jointype),
2675                                                                                                         PointerGetDatum(sjinfo)));
2676         }
2677         else
2678         {
2679                 /* Use default selectivity (should we raise an error instead?) */
2680                 result = DEFAULT_EQ_SEL;
2681         }
2682         result = 1.0 - result;
2683         PG_RETURN_FLOAT8(result);
2684 }
2685
2686 /*
2687  *              scalarltjoinsel - Join selectivity of "<" and "<=" for scalars
2688  */
2689 Datum
2690 scalarltjoinsel(PG_FUNCTION_ARGS)
2691 {
2692         PG_RETURN_FLOAT8(DEFAULT_INEQ_SEL);
2693 }
2694
2695 /*
2696  *              scalargtjoinsel - Join selectivity of ">" and ">=" for scalars
2697  */
2698 Datum
2699 scalargtjoinsel(PG_FUNCTION_ARGS)
2700 {
2701         PG_RETURN_FLOAT8(DEFAULT_INEQ_SEL);
2702 }
2703
2704 /*
2705  * patternjoinsel               - Generic code for pattern-match join selectivity.
2706  */
2707 static double
2708 patternjoinsel(PG_FUNCTION_ARGS, Pattern_Type ptype, bool negate)
2709 {
2710         /* For the moment we just punt. */
2711         return negate ? (1.0 - DEFAULT_MATCH_SEL) : DEFAULT_MATCH_SEL;
2712 }
2713
2714 /*
2715  *              regexeqjoinsel  - Join selectivity of regular-expression pattern match.
2716  */
2717 Datum
2718 regexeqjoinsel(PG_FUNCTION_ARGS)
2719 {
2720         PG_RETURN_FLOAT8(patternjoinsel(fcinfo, Pattern_Type_Regex, false));
2721 }
2722
2723 /*
2724  *              icregexeqjoinsel        - Join selectivity of case-insensitive regex match.
2725  */
2726 Datum
2727 icregexeqjoinsel(PG_FUNCTION_ARGS)
2728 {
2729         PG_RETURN_FLOAT8(patternjoinsel(fcinfo, Pattern_Type_Regex_IC, false));
2730 }
2731
2732 /*
2733  *              likejoinsel                     - Join selectivity of LIKE pattern match.
2734  */
2735 Datum
2736 likejoinsel(PG_FUNCTION_ARGS)
2737 {
2738         PG_RETURN_FLOAT8(patternjoinsel(fcinfo, Pattern_Type_Like, false));
2739 }
2740
2741 /*
2742  *              iclikejoinsel                   - Join selectivity of ILIKE pattern match.
2743  */
2744 Datum
2745 iclikejoinsel(PG_FUNCTION_ARGS)
2746 {
2747         PG_RETURN_FLOAT8(patternjoinsel(fcinfo, Pattern_Type_Like_IC, false));
2748 }
2749
2750 /*
2751  *              regexnejoinsel  - Join selectivity of regex non-match.
2752  */
2753 Datum
2754 regexnejoinsel(PG_FUNCTION_ARGS)
2755 {
2756         PG_RETURN_FLOAT8(patternjoinsel(fcinfo, Pattern_Type_Regex, true));
2757 }
2758
2759 /*
2760  *              icregexnejoinsel        - Join selectivity of case-insensitive regex non-match.
2761  */
2762 Datum
2763 icregexnejoinsel(PG_FUNCTION_ARGS)
2764 {
2765         PG_RETURN_FLOAT8(patternjoinsel(fcinfo, Pattern_Type_Regex_IC, true));
2766 }
2767
2768 /*
2769  *              nlikejoinsel            - Join selectivity of LIKE pattern non-match.
2770  */
2771 Datum
2772 nlikejoinsel(PG_FUNCTION_ARGS)
2773 {
2774         PG_RETURN_FLOAT8(patternjoinsel(fcinfo, Pattern_Type_Like, true));
2775 }
2776
2777 /*
2778  *              icnlikejoinsel          - Join selectivity of ILIKE pattern non-match.
2779  */
2780 Datum
2781 icnlikejoinsel(PG_FUNCTION_ARGS)
2782 {
2783         PG_RETURN_FLOAT8(patternjoinsel(fcinfo, Pattern_Type_Like_IC, true));
2784 }
2785
2786 /*
2787  * mergejoinscansel                     - Scan selectivity of merge join.
2788  *
2789  * A merge join will stop as soon as it exhausts either input stream.
2790  * Therefore, if we can estimate the ranges of both input variables,
2791  * we can estimate how much of the input will actually be read.  This
2792  * can have a considerable impact on the cost when using indexscans.
2793  *
2794  * Also, we can estimate how much of each input has to be read before the
2795  * first join pair is found, which will affect the join's startup time.
2796  *
2797  * clause should be a clause already known to be mergejoinable.  opfamily,
2798  * strategy, and nulls_first specify the sort ordering being used.
2799  *
2800  * The outputs are:
2801  *              *leftstart is set to the fraction of the left-hand variable expected
2802  *               to be scanned before the first join pair is found (0 to 1).
2803  *              *leftend is set to the fraction of the left-hand variable expected
2804  *               to be scanned before the join terminates (0 to 1).
2805  *              *rightstart, *rightend similarly for the right-hand variable.
2806  */
2807 void
2808 mergejoinscansel(PlannerInfo *root, Node *clause,
2809                                  Oid opfamily, int strategy, bool nulls_first,
2810                                  Selectivity *leftstart, Selectivity *leftend,
2811                                  Selectivity *rightstart, Selectivity *rightend)
2812 {
2813         Node       *left,
2814                            *right;
2815         VariableStatData leftvar,
2816                                 rightvar;
2817         int                     op_strategy;
2818         Oid                     op_lefttype;
2819         Oid                     op_righttype;
2820         Oid                     opno,
2821                                 lsortop,
2822                                 rsortop,
2823                                 lstatop,
2824                                 rstatop,
2825                                 ltop,
2826                                 leop,
2827                                 revltop,
2828                                 revleop;
2829         bool            isgt;
2830         Datum           leftmin,
2831                                 leftmax,
2832                                 rightmin,
2833                                 rightmax;
2834         double          selec;
2835
2836         /* Set default results if we can't figure anything out. */
2837         /* XXX should default "start" fraction be a bit more than 0? */
2838         *leftstart = *rightstart = 0.0;
2839         *leftend = *rightend = 1.0;
2840
2841         /* Deconstruct the merge clause */
2842         if (!is_opclause(clause))
2843                 return;                                 /* shouldn't happen */
2844         opno = ((OpExpr *) clause)->opno;
2845         left = get_leftop((Expr *) clause);
2846         right = get_rightop((Expr *) clause);
2847         if (!right)
2848                 return;                                 /* shouldn't happen */
2849
2850         /* Look for stats for the inputs */
2851         examine_variable(root, left, 0, &leftvar);
2852         examine_variable(root, right, 0, &rightvar);
2853
2854         /* Extract the operator's declared left/right datatypes */
2855         get_op_opfamily_properties(opno, opfamily, false,
2856                                                            &op_strategy,
2857                                                            &op_lefttype,
2858                                                            &op_righttype);
2859         Assert(op_strategy == BTEqualStrategyNumber);
2860
2861         /*
2862          * Look up the various operators we need.  If we don't find them all, it
2863          * probably means the opfamily is broken, but we just fail silently.
2864          *
2865          * Note: we expect that pg_statistic histograms will be sorted by the '<'
2866          * operator, regardless of which sort direction we are considering.
2867          */
2868         switch (strategy)
2869         {
2870                 case BTLessStrategyNumber:
2871                         isgt = false;
2872                         if (op_lefttype == op_righttype)
2873                         {
2874                                 /* easy case */
2875                                 ltop = get_opfamily_member(opfamily,
2876                                                                                    op_lefttype, op_righttype,
2877                                                                                    BTLessStrategyNumber);
2878                                 leop = get_opfamily_member(opfamily,
2879                                                                                    op_lefttype, op_righttype,
2880                                                                                    BTLessEqualStrategyNumber);
2881                                 lsortop = ltop;
2882                                 rsortop = ltop;
2883                                 lstatop = lsortop;
2884                                 rstatop = rsortop;
2885                                 revltop = ltop;
2886                                 revleop = leop;
2887                         }
2888                         else
2889                         {
2890                                 ltop = get_opfamily_member(opfamily,
2891                                                                                    op_lefttype, op_righttype,
2892                                                                                    BTLessStrategyNumber);
2893                                 leop = get_opfamily_member(opfamily,
2894                                                                                    op_lefttype, op_righttype,
2895                                                                                    BTLessEqualStrategyNumber);
2896                                 lsortop = get_opfamily_member(opfamily,
2897                                                                                           op_lefttype, op_lefttype,
2898                                                                                           BTLessStrategyNumber);
2899                                 rsortop = get_opfamily_member(opfamily,
2900                                                                                           op_righttype, op_righttype,
2901                                                                                           BTLessStrategyNumber);
2902                                 lstatop = lsortop;
2903                                 rstatop = rsortop;
2904                                 revltop = get_opfamily_member(opfamily,
2905                                                                                           op_righttype, op_lefttype,
2906                                                                                           BTLessStrategyNumber);
2907                                 revleop = get_opfamily_member(opfamily,
2908                                                                                           op_righttype, op_lefttype,
2909                                                                                           BTLessEqualStrategyNumber);
2910                         }
2911                         break;
2912                 case BTGreaterStrategyNumber:
2913                         /* descending-order case */
2914                         isgt = true;
2915                         if (op_lefttype == op_righttype)
2916                         {
2917                                 /* easy case */
2918                                 ltop = get_opfamily_member(opfamily,
2919                                                                                    op_lefttype, op_righttype,
2920                                                                                    BTGreaterStrategyNumber);
2921                                 leop = get_opfamily_member(opfamily,
2922                                                                                    op_lefttype, op_righttype,
2923                                                                                    BTGreaterEqualStrategyNumber);
2924                                 lsortop = ltop;
2925                                 rsortop = ltop;
2926                                 lstatop = get_opfamily_member(opfamily,
2927                                                                                           op_lefttype, op_lefttype,
2928                                                                                           BTLessStrategyNumber);
2929                                 rstatop = lstatop;
2930                                 revltop = ltop;
2931                                 revleop = leop;
2932                         }
2933                         else
2934                         {
2935                                 ltop = get_opfamily_member(opfamily,
2936                                                                                    op_lefttype, op_righttype,
2937                                                                                    BTGreaterStrategyNumber);
2938                                 leop = get_opfamily_member(opfamily,
2939                                                                                    op_lefttype, op_righttype,
2940                                                                                    BTGreaterEqualStrategyNumber);
2941                                 lsortop = get_opfamily_member(opfamily,
2942                                                                                           op_lefttype, op_lefttype,
2943                                                                                           BTGreaterStrategyNumber);
2944                                 rsortop = get_opfamily_member(opfamily,
2945                                                                                           op_righttype, op_righttype,
2946                                                                                           BTGreaterStrategyNumber);
2947                                 lstatop = get_opfamily_member(opfamily,
2948                                                                                           op_lefttype, op_lefttype,
2949                                                                                           BTLessStrategyNumber);
2950                                 rstatop = get_opfamily_member(opfamily,
2951                                                                                           op_righttype, op_righttype,
2952                                                                                           BTLessStrategyNumber);
2953                                 revltop = get_opfamily_member(opfamily,
2954                                                                                           op_righttype, op_lefttype,
2955                                                                                           BTGreaterStrategyNumber);
2956                                 revleop = get_opfamily_member(opfamily,
2957                                                                                           op_righttype, op_lefttype,
2958                                                                                           BTGreaterEqualStrategyNumber);
2959                         }
2960                         break;
2961                 default:
2962                         goto fail;                      /* shouldn't get here */
2963         }
2964
2965         if (!OidIsValid(lsortop) ||
2966                 !OidIsValid(rsortop) ||
2967                 !OidIsValid(lstatop) ||
2968                 !OidIsValid(rstatop) ||
2969                 !OidIsValid(ltop) ||
2970                 !OidIsValid(leop) ||
2971                 !OidIsValid(revltop) ||
2972                 !OidIsValid(revleop))
2973                 goto fail;                              /* insufficient info in catalogs */
2974
2975         /* Try to get ranges of both inputs */
2976         if (!isgt)
2977         {
2978                 if (!get_variable_range(root, &leftvar, lstatop,
2979                                                                 &leftmin, &leftmax))
2980                         goto fail;                      /* no range available from stats */
2981                 if (!get_variable_range(root, &rightvar, rstatop,
2982                                                                 &rightmin, &rightmax))
2983                         goto fail;                      /* no range available from stats */
2984         }
2985         else
2986         {
2987                 /* need to swap the max and min */
2988                 if (!get_variable_range(root, &leftvar, lstatop,
2989                                                                 &leftmax, &leftmin))
2990                         goto fail;                      /* no range available from stats */
2991                 if (!get_variable_range(root, &rightvar, rstatop,
2992                                                                 &rightmax, &rightmin))
2993                         goto fail;                      /* no range available from stats */
2994         }
2995
2996         /*
2997          * Now, the fraction of the left variable that will be scanned is the
2998          * fraction that's <= the right-side maximum value.  But only believe
2999          * non-default estimates, else stick with our 1.0.
3000          */
3001         selec = scalarineqsel(root, leop, isgt, &leftvar,
3002                                                   rightmax, op_righttype);
3003         if (selec != DEFAULT_INEQ_SEL)
3004                 *leftend = selec;
3005
3006         /* And similarly for the right variable. */
3007         selec = scalarineqsel(root, revleop, isgt, &rightvar,
3008                                                   leftmax, op_lefttype);
3009         if (selec != DEFAULT_INEQ_SEL)
3010                 *rightend = selec;
3011
3012         /*
3013          * Only one of the two "end" fractions can really be less than 1.0;
3014          * believe the smaller estimate and reset the other one to exactly 1.0. If
3015          * we get exactly equal estimates (as can easily happen with self-joins),
3016          * believe neither.
3017          */
3018         if (*leftend > *rightend)
3019                 *leftend = 1.0;
3020         else if (*leftend < *rightend)
3021                 *rightend = 1.0;
3022         else
3023                 *leftend = *rightend = 1.0;
3024
3025         /*
3026          * Also, the fraction of the left variable that will be scanned before the
3027          * first join pair is found is the fraction that's < the right-side
3028          * minimum value.  But only believe non-default estimates, else stick with
3029          * our own default.
3030          */
3031         selec = scalarineqsel(root, ltop, isgt, &leftvar,
3032                                                   rightmin, op_righttype);
3033         if (selec != DEFAULT_INEQ_SEL)
3034                 *leftstart = selec;
3035
3036         /* And similarly for the right variable. */
3037         selec = scalarineqsel(root, revltop, isgt, &rightvar,
3038                                                   leftmin, op_lefttype);
3039         if (selec != DEFAULT_INEQ_SEL)
3040                 *rightstart = selec;
3041
3042         /*
3043          * Only one of the two "start" fractions can really be more than zero;
3044          * believe the larger estimate and reset the other one to exactly 0.0. If
3045          * we get exactly equal estimates (as can easily happen with self-joins),
3046          * believe neither.
3047          */
3048         if (*leftstart < *rightstart)
3049                 *leftstart = 0.0;
3050         else if (*leftstart > *rightstart)
3051                 *rightstart = 0.0;
3052         else
3053                 *leftstart = *rightstart = 0.0;
3054
3055         /*
3056          * If the sort order is nulls-first, we're going to have to skip over any
3057          * nulls too.  These would not have been counted by scalarineqsel, and we
3058          * can safely add in this fraction regardless of whether we believe
3059          * scalarineqsel's results or not.  But be sure to clamp the sum to 1.0!
3060          */
3061         if (nulls_first)
3062         {
3063                 Form_pg_statistic stats;
3064
3065                 if (HeapTupleIsValid(leftvar.statsTuple))
3066                 {
3067                         stats = (Form_pg_statistic) GETSTRUCT(leftvar.statsTuple);
3068                         *leftstart += stats->stanullfrac;
3069                         CLAMP_PROBABILITY(*leftstart);
3070                         *leftend += stats->stanullfrac;
3071                         CLAMP_PROBABILITY(*leftend);
3072                 }
3073                 if (HeapTupleIsValid(rightvar.statsTuple))
3074                 {
3075                         stats = (Form_pg_statistic) GETSTRUCT(rightvar.statsTuple);
3076                         *rightstart += stats->stanullfrac;
3077                         CLAMP_PROBABILITY(*rightstart);
3078                         *rightend += stats->stanullfrac;
3079                         CLAMP_PROBABILITY(*rightend);
3080                 }
3081         }
3082
3083         /* Disbelieve start >= end, just in case that can happen */
3084         if (*leftstart >= *leftend)
3085         {
3086                 *leftstart = 0.0;
3087                 *leftend = 1.0;
3088         }
3089         if (*rightstart >= *rightend)
3090         {
3091                 *rightstart = 0.0;
3092                 *rightend = 1.0;
3093         }
3094
3095 fail:
3096         ReleaseVariableStats(leftvar);
3097         ReleaseVariableStats(rightvar);
3098 }
3099
3100
3101 /*
3102  * Helper routine for estimate_num_groups: add an item to a list of
3103  * GroupVarInfos, but only if it's not known equal to any of the existing
3104  * entries.
3105  */
3106 typedef struct
3107 {
3108         Node       *var;                        /* might be an expression, not just a Var */
3109         RelOptInfo *rel;                        /* relation it belongs to */
3110         double          ndistinct;              /* # distinct values */
3111 } GroupVarInfo;
3112
3113 static List *
3114 add_unique_group_var(PlannerInfo *root, List *varinfos,
3115                                          Node *var, VariableStatData *vardata)
3116 {
3117         GroupVarInfo *varinfo;
3118         double          ndistinct;
3119         bool            isdefault;
3120         ListCell   *lc;
3121
3122         ndistinct = get_variable_numdistinct(vardata, &isdefault);
3123
3124         /* cannot use foreach here because of possible list_delete */
3125         lc = list_head(varinfos);
3126         while (lc)
3127         {
3128                 varinfo = (GroupVarInfo *) lfirst(lc);
3129
3130                 /* must advance lc before list_delete possibly pfree's it */
3131                 lc = lnext(lc);
3132
3133                 /* Drop exact duplicates */
3134                 if (equal(var, varinfo->var))
3135                         return varinfos;
3136
3137                 /*
3138                  * Drop known-equal vars, but only if they belong to different
3139                  * relations (see comments for estimate_num_groups)
3140                  */
3141                 if (vardata->rel != varinfo->rel &&
3142                         exprs_known_equal(root, var, varinfo->var))
3143                 {
3144                         if (varinfo->ndistinct <= ndistinct)
3145                         {
3146                                 /* Keep older item, forget new one */
3147                                 return varinfos;
3148                         }
3149                         else
3150                         {
3151                                 /* Delete the older item */
3152                                 varinfos = list_delete_ptr(varinfos, varinfo);
3153                         }
3154                 }
3155         }
3156
3157         varinfo = (GroupVarInfo *) palloc(sizeof(GroupVarInfo));
3158
3159         varinfo->var = var;
3160         varinfo->rel = vardata->rel;
3161         varinfo->ndistinct = ndistinct;
3162         varinfos = lappend(varinfos, varinfo);
3163         return varinfos;
3164 }
3165
3166 /*
3167  * estimate_num_groups          - Estimate number of groups in a grouped query
3168  *
3169  * Given a query having a GROUP BY clause, estimate how many groups there
3170  * will be --- ie, the number of distinct combinations of the GROUP BY
3171  * expressions.
3172  *
3173  * This routine is also used to estimate the number of rows emitted by
3174  * a DISTINCT filtering step; that is an isomorphic problem.  (Note:
3175  * actually, we only use it for DISTINCT when there's no grouping or
3176  * aggregation ahead of the DISTINCT.)
3177  *
3178  * Inputs:
3179  *      root - the query
3180  *      groupExprs - list of expressions being grouped by
3181  *      input_rows - number of rows estimated to arrive at the group/unique
3182  *              filter step
3183  *      pgset - NULL, or a List** pointing to a grouping set to filter the
3184  *              groupExprs against
3185  *
3186  * Given the lack of any cross-correlation statistics in the system, it's
3187  * impossible to do anything really trustworthy with GROUP BY conditions
3188  * involving multiple Vars.  We should however avoid assuming the worst
3189  * case (all possible cross-product terms actually appear as groups) since
3190  * very often the grouped-by Vars are highly correlated.  Our current approach
3191  * is as follows:
3192  *      1.  Expressions yielding boolean are assumed to contribute two groups,
3193  *              independently of their content, and are ignored in the subsequent
3194  *              steps.  This is mainly because tests like "col IS NULL" break the
3195  *              heuristic used in step 2 especially badly.
3196  *      2.  Reduce the given expressions to a list of unique Vars used.  For
3197  *              example, GROUP BY a, a + b is treated the same as GROUP BY a, b.
3198  *              It is clearly correct not to count the same Var more than once.
3199  *              It is also reasonable to treat f(x) the same as x: f() cannot
3200  *              increase the number of distinct values (unless it is volatile,
3201  *              which we consider unlikely for grouping), but it probably won't
3202  *              reduce the number of distinct values much either.
3203  *              As a special case, if a GROUP BY expression can be matched to an
3204  *              expressional index for which we have statistics, then we treat the
3205  *              whole expression as though it were just a Var.
3206  *      3.  If the list contains Vars of different relations that are known equal
3207  *              due to equivalence classes, then drop all but one of the Vars from each
3208  *              known-equal set, keeping the one with smallest estimated # of values
3209  *              (since the extra values of the others can't appear in joined rows).
3210  *              Note the reason we only consider Vars of different relations is that
3211  *              if we considered ones of the same rel, we'd be double-counting the
3212  *              restriction selectivity of the equality in the next step.
3213  *      4.  For Vars within a single source rel, we multiply together the numbers
3214  *              of values, clamp to the number of rows in the rel (divided by 10 if
3215  *              more than one Var), and then multiply by a factor based on the
3216  *              selectivity of the restriction clauses for that rel.  When there's
3217  *              more than one Var, the initial product is probably too high (it's the
3218  *              worst case) but clamping to a fraction of the rel's rows seems to be a
3219  *              helpful heuristic for not letting the estimate get out of hand.  (The
3220  *              factor of 10 is derived from pre-Postgres-7.4 practice.)  The factor
3221  *              we multiply by to adjust for the restriction selectivity assumes that
3222  *              the restriction clauses are independent of the grouping, which may not
3223  *              be a valid assumption, but it's hard to do better.
3224  *      5.  If there are Vars from multiple rels, we repeat step 4 for each such
3225  *              rel, and multiply the results together.
3226  * Note that rels not containing grouped Vars are ignored completely, as are
3227  * join clauses.  Such rels cannot increase the number of groups, and we
3228  * assume such clauses do not reduce the number either (somewhat bogus,
3229  * but we don't have the info to do better).
3230  */
3231 double
3232 estimate_num_groups(PlannerInfo *root, List *groupExprs, double input_rows,
3233                                         List **pgset)
3234 {
3235         List       *varinfos = NIL;
3236         double          numdistinct;
3237         ListCell   *l;
3238         int                     i;
3239
3240         /*
3241          * We don't ever want to return an estimate of zero groups, as that tends
3242          * to lead to division-by-zero and other unpleasantness.  The input_rows
3243          * estimate is usually already at least 1, but clamp it just in case it
3244          * isn't.
3245          */
3246         input_rows = clamp_row_est(input_rows);
3247
3248         /*
3249          * If no grouping columns, there's exactly one group.  (This can't happen
3250          * for normal cases with GROUP BY or DISTINCT, but it is possible for
3251          * corner cases with set operations.)
3252          */
3253         if (groupExprs == NIL || (pgset && list_length(*pgset) < 1))
3254                 return 1.0;
3255
3256         /*
3257          * Count groups derived from boolean grouping expressions.  For other
3258          * expressions, find the unique Vars used, treating an expression as a Var
3259          * if we can find stats for it.  For each one, record the statistical
3260          * estimate of number of distinct values (total in its table, without
3261          * regard for filtering).
3262          */
3263         numdistinct = 1.0;
3264
3265         i = 0;
3266         foreach(l, groupExprs)
3267         {
3268                 Node       *groupexpr = (Node *) lfirst(l);
3269                 VariableStatData vardata;
3270                 List       *varshere;
3271                 ListCell   *l2;
3272
3273                 /* is expression in this grouping set? */
3274                 if (pgset && !list_member_int(*pgset, i++))
3275                         continue;
3276
3277                 /* Short-circuit for expressions returning boolean */
3278                 if (exprType(groupexpr) == BOOLOID)
3279                 {
3280                         numdistinct *= 2.0;
3281                         continue;
3282                 }
3283
3284                 /*
3285                  * If examine_variable is able to deduce anything about the GROUP BY
3286                  * expression, treat it as a single variable even if it's really more
3287                  * complicated.
3288                  */
3289                 examine_variable(root, groupexpr, 0, &vardata);
3290                 if (HeapTupleIsValid(vardata.statsTuple) || vardata.isunique)
3291                 {
3292                         varinfos = add_unique_group_var(root, varinfos,
3293                                                                                         groupexpr, &vardata);
3294                         ReleaseVariableStats(vardata);
3295                         continue;
3296                 }
3297                 ReleaseVariableStats(vardata);
3298
3299                 /*
3300                  * Else pull out the component Vars.  Handle PlaceHolderVars by
3301                  * recursing into their arguments (effectively assuming that the
3302                  * PlaceHolderVar doesn't change the number of groups, which boils
3303                  * down to ignoring the possible addition of nulls to the result set).
3304                  */
3305                 varshere = pull_var_clause(groupexpr,
3306                                                                    PVC_RECURSE_AGGREGATES |
3307                                                                    PVC_RECURSE_WINDOWFUNCS |
3308                                                                    PVC_RECURSE_PLACEHOLDERS);
3309
3310                 /*
3311                  * If we find any variable-free GROUP BY item, then either it is a
3312                  * constant (and we can ignore it) or it contains a volatile function;
3313                  * in the latter case we punt and assume that each input row will
3314                  * yield a distinct group.
3315                  */
3316                 if (varshere == NIL)
3317                 {
3318                         if (contain_volatile_functions(groupexpr))
3319                                 return input_rows;
3320                         continue;
3321                 }
3322
3323                 /*
3324                  * Else add variables to varinfos list
3325                  */
3326                 foreach(l2, varshere)
3327                 {
3328                         Node       *var = (Node *) lfirst(l2);
3329
3330                         examine_variable(root, var, 0, &vardata);
3331                         varinfos = add_unique_group_var(root, varinfos, var, &vardata);
3332                         ReleaseVariableStats(vardata);
3333                 }
3334         }
3335
3336         /*
3337          * If now no Vars, we must have an all-constant or all-boolean GROUP BY
3338          * list.
3339          */
3340         if (varinfos == NIL)
3341         {
3342                 /* Guard against out-of-range answers */
3343                 if (numdistinct > input_rows)
3344                         numdistinct = input_rows;
3345                 return numdistinct;
3346         }
3347
3348         /*
3349          * Group Vars by relation and estimate total numdistinct.
3350          *
3351          * For each iteration of the outer loop, we process the frontmost Var in
3352          * varinfos, plus all other Vars in the same relation.  We remove these
3353          * Vars from the newvarinfos list for the next iteration. This is the
3354          * easiest way to group Vars of same rel together.
3355          */
3356         do
3357         {
3358                 GroupVarInfo *varinfo1 = (GroupVarInfo *) linitial(varinfos);
3359                 RelOptInfo *rel = varinfo1->rel;
3360                 double          reldistinct = 1;
3361                 double          relmaxndistinct = reldistinct;
3362                 int                     relvarcount = 0;
3363                 List       *newvarinfos = NIL;
3364                 List       *relvarinfos = NIL;
3365
3366                 /*
3367                  * Split the list of varinfos in two - one for the current rel, one
3368                  * for remaining Vars on other rels.
3369                  */
3370                 relvarinfos = lcons(varinfo1, relvarinfos);
3371                 for_each_cell(l, lnext(list_head(varinfos)))
3372                 {
3373                         GroupVarInfo *varinfo2 = (GroupVarInfo *) lfirst(l);
3374
3375                         if (varinfo2->rel == varinfo1->rel)
3376                         {
3377                                 /* varinfos on current rel */
3378                                 relvarinfos = lcons(varinfo2, relvarinfos);
3379                         }
3380                         else
3381                         {
3382                                 /* not time to process varinfo2 yet */
3383                                 newvarinfos = lcons(varinfo2, newvarinfos);
3384                         }
3385                 }
3386
3387                 /*
3388                  * Get the numdistinct estimate for the Vars of this rel.  We
3389                  * iteratively search for multivariate n-distinct with maximum number
3390                  * of vars; assuming that each var group is independent of the others,
3391                  * we multiply them together.  Any remaining relvarinfos after no more
3392                  * multivariate matches are found are assumed independent too, so
3393                  * their individual ndistinct estimates are multiplied also.
3394                  *
3395                  * While iterating, count how many separate numdistinct values we
3396                  * apply.  We apply a fudge factor below, but only if we multiplied
3397                  * more than one such values.
3398                  */
3399                 while (relvarinfos)
3400                 {
3401                         double          mvndistinct;
3402
3403                         if (estimate_multivariate_ndistinct(root, rel, &relvarinfos,
3404                                                                                                 &mvndistinct))
3405                         {
3406                                 reldistinct *= mvndistinct;
3407                                 if (relmaxndistinct < mvndistinct)
3408                                         relmaxndistinct = mvndistinct;
3409                                 relvarcount++;
3410                         }
3411                         else
3412                         {
3413                                 foreach(l, relvarinfos)
3414                                 {
3415                                         GroupVarInfo *varinfo2 = (GroupVarInfo *) lfirst(l);
3416
3417                                         reldistinct *= varinfo2->ndistinct;
3418                                         if (relmaxndistinct < varinfo2->ndistinct)
3419                                                 relmaxndistinct = varinfo2->ndistinct;
3420                                         relvarcount++;
3421                                 }
3422
3423                                 /* we're done with this relation */
3424                                 relvarinfos = NIL;
3425                         }
3426                 }
3427
3428                 /*
3429                  * Sanity check --- don't divide by zero if empty relation.
3430                  */
3431                 Assert(IS_SIMPLE_REL(rel));
3432                 if (rel->tuples > 0)
3433                 {
3434                         /*
3435                          * Clamp to size of rel, or size of rel / 10 if multiple Vars. The
3436                          * fudge factor is because the Vars are probably correlated but we
3437                          * don't know by how much.  We should never clamp to less than the
3438                          * largest ndistinct value for any of the Vars, though, since
3439                          * there will surely be at least that many groups.
3440                          */
3441                         double          clamp = rel->tuples;
3442
3443                         if (relvarcount > 1)
3444                         {
3445                                 clamp *= 0.1;
3446                                 if (clamp < relmaxndistinct)
3447                                 {
3448                                         clamp = relmaxndistinct;
3449                                         /* for sanity in case some ndistinct is too large: */
3450                                         if (clamp > rel->tuples)
3451                                                 clamp = rel->tuples;
3452                                 }
3453                         }
3454                         if (reldistinct > clamp)
3455                                 reldistinct = clamp;
3456
3457                         /*
3458                          * Update the estimate based on the restriction selectivity,
3459                          * guarding against division by zero when reldistinct is zero.
3460                          * Also skip this if we know that we are returning all rows.
3461                          */
3462                         if (reldistinct > 0 && rel->rows < rel->tuples)
3463                         {
3464                                 /*
3465                                  * Given a table containing N rows with n distinct values in a
3466                                  * uniform distribution, if we select p rows at random then
3467                                  * the expected number of distinct values selected is
3468                                  *
3469                                  * n * (1 - product((N-N/n-i)/(N-i), i=0..p-1))
3470                                  *
3471                                  * = n * (1 - (N-N/n)! / (N-N/n-p)! * (N-p)! / N!)
3472                                  *
3473                                  * See "Approximating block accesses in database
3474                                  * organizations", S. B. Yao, Communications of the ACM,
3475                                  * Volume 20 Issue 4, April 1977 Pages 260-261.
3476                                  *
3477                                  * Alternatively, re-arranging the terms from the factorials,
3478                                  * this may be written as
3479                                  *
3480                                  * n * (1 - product((N-p-i)/(N-i), i=0..N/n-1))
3481                                  *
3482                                  * This form of the formula is more efficient to compute in
3483                                  * the common case where p is larger than N/n.  Additionally,
3484                                  * as pointed out by Dell'Era, if i << N for all terms in the
3485                                  * product, it can be approximated by
3486                                  *
3487                                  * n * (1 - ((N-p)/N)^(N/n))
3488                                  *
3489                                  * See "Expected distinct values when selecting from a bag
3490                                  * without replacement", Alberto Dell'Era,
3491                                  * http://www.adellera.it/investigations/distinct_balls/.
3492                                  *
3493                                  * The condition i << N is equivalent to n >> 1, so this is a
3494                                  * good approximation when the number of distinct values in
3495                                  * the table is large.  It turns out that this formula also
3496                                  * works well even when n is small.
3497                                  */
3498                                 reldistinct *=
3499                                         (1 - pow((rel->tuples - rel->rows) / rel->tuples,
3500                                                          rel->tuples / reldistinct));
3501                         }
3502                         reldistinct = clamp_row_est(reldistinct);
3503
3504                         /*
3505                          * Update estimate of total distinct groups.
3506                          */
3507                         numdistinct *= reldistinct;
3508                 }
3509
3510                 varinfos = newvarinfos;
3511         } while (varinfos != NIL);
3512
3513         numdistinct = ceil(numdistinct);
3514
3515         /* Guard against out-of-range answers */
3516         if (numdistinct > input_rows)
3517                 numdistinct = input_rows;
3518         if (numdistinct < 1.0)
3519                 numdistinct = 1.0;
3520
3521         return numdistinct;
3522 }
3523
3524 /*
3525  * Estimate hash bucketsize fraction (ie, number of entries in a bucket
3526  * divided by total tuples in relation) if the specified expression is used
3527  * as a hash key.
3528  *
3529  * XXX This is really pretty bogus since we're effectively assuming that the
3530  * distribution of hash keys will be the same after applying restriction
3531  * clauses as it was in the underlying relation.  However, we are not nearly
3532  * smart enough to figure out how the restrict clauses might change the
3533  * distribution, so this will have to do for now.
3534  *
3535  * We are passed the number of buckets the executor will use for the given
3536  * input relation.  If the data were perfectly distributed, with the same
3537  * number of tuples going into each available bucket, then the bucketsize
3538  * fraction would be 1/nbuckets.  But this happy state of affairs will occur
3539  * only if (a) there are at least nbuckets distinct data values, and (b)
3540  * we have a not-too-skewed data distribution.  Otherwise the buckets will
3541  * be nonuniformly occupied.  If the other relation in the join has a key
3542  * distribution similar to this one's, then the most-loaded buckets are
3543  * exactly those that will be probed most often.  Therefore, the "average"
3544  * bucket size for costing purposes should really be taken as something close
3545  * to the "worst case" bucket size.  We try to estimate this by adjusting the
3546  * fraction if there are too few distinct data values, and then scaling up
3547  * by the ratio of the most common value's frequency to the average frequency.
3548  *
3549  * If no statistics are available, use a default estimate of 0.1.  This will
3550  * discourage use of a hash rather strongly if the inner relation is large,
3551  * which is what we want.  We do not want to hash unless we know that the
3552  * inner rel is well-dispersed (or the alternatives seem much worse).
3553  */
3554 Selectivity
3555 estimate_hash_bucketsize(PlannerInfo *root, Node *hashkey, double nbuckets)
3556 {
3557         VariableStatData vardata;
3558         double          estfract,
3559                                 ndistinct,
3560                                 stanullfrac,
3561                                 mcvfreq,
3562                                 avgfreq;
3563         bool            isdefault;
3564         AttStatsSlot sslot;
3565
3566         examine_variable(root, hashkey, 0, &vardata);
3567
3568         /* Get number of distinct values */
3569         ndistinct = get_variable_numdistinct(&vardata, &isdefault);
3570
3571         /* If ndistinct isn't real, punt and return 0.1, per comments above */
3572         if (isdefault)
3573         {
3574                 ReleaseVariableStats(vardata);
3575                 return (Selectivity) 0.1;
3576         }
3577
3578         /* Get fraction that are null */
3579         if (HeapTupleIsValid(vardata.statsTuple))
3580         {
3581                 Form_pg_statistic stats;
3582
3583                 stats = (Form_pg_statistic) GETSTRUCT(vardata.statsTuple);
3584                 stanullfrac = stats->stanullfrac;
3585         }
3586         else
3587                 stanullfrac = 0.0;
3588
3589         /* Compute avg freq of all distinct data values in raw relation */
3590         avgfreq = (1.0 - stanullfrac) / ndistinct;
3591
3592         /*
3593          * Adjust ndistinct to account for restriction clauses.  Observe we are
3594          * assuming that the data distribution is affected uniformly by the
3595          * restriction clauses!
3596          *
3597          * XXX Possibly better way, but much more expensive: multiply by
3598          * selectivity of rel's restriction clauses that mention the target Var.
3599          */
3600         if (vardata.rel && vardata.rel->tuples > 0)
3601         {
3602                 ndistinct *= vardata.rel->rows / vardata.rel->tuples;
3603                 ndistinct = clamp_row_est(ndistinct);
3604         }
3605
3606         /*
3607          * Initial estimate of bucketsize fraction is 1/nbuckets as long as the
3608          * number of buckets is less than the expected number of distinct values;
3609          * otherwise it is 1/ndistinct.
3610          */
3611         if (ndistinct > nbuckets)
3612                 estfract = 1.0 / nbuckets;
3613         else
3614                 estfract = 1.0 / ndistinct;
3615
3616         /*
3617          * Look up the frequency of the most common value, if available.
3618          */
3619         mcvfreq = 0.0;
3620
3621         if (HeapTupleIsValid(vardata.statsTuple))
3622         {
3623                 if (get_attstatsslot(&sslot, vardata.statsTuple,
3624                                                          STATISTIC_KIND_MCV, InvalidOid,
3625                                                          ATTSTATSSLOT_NUMBERS))
3626                 {
3627                         /*
3628                          * The first MCV stat is for the most common value.
3629                          */
3630                         if (sslot.nnumbers > 0)
3631                                 mcvfreq = sslot.numbers[0];
3632                         free_attstatsslot(&sslot);
3633                 }
3634         }
3635
3636         /*
3637          * Adjust estimated bucketsize upward to account for skewed distribution.
3638          */
3639         if (avgfreq > 0.0 && mcvfreq > avgfreq)
3640                 estfract *= mcvfreq / avgfreq;
3641
3642         /*
3643          * Clamp bucketsize to sane range (the above adjustment could easily
3644          * produce an out-of-range result).  We set the lower bound a little above
3645          * zero, since zero isn't a very sane result.
3646          */
3647         if (estfract < 1.0e-6)
3648                 estfract = 1.0e-6;
3649         else if (estfract > 1.0)
3650                 estfract = 1.0;
3651
3652         ReleaseVariableStats(vardata);
3653
3654         return (Selectivity) estfract;
3655 }
3656
3657
3658 /*-------------------------------------------------------------------------
3659  *
3660  * Support routines
3661  *
3662  *-------------------------------------------------------------------------
3663  */
3664
3665 /*
3666  * Find applicable ndistinct statistics for the given list of VarInfos (which
3667  * must all belong to the given rel), and update *ndistinct to the estimate of
3668  * the MVNDistinctItem that best matches.  If a match it found, *varinfos is
3669  * updated to remove the list of matched varinfos.
3670  *
3671  * Varinfos that aren't for simple Vars are ignored.
3672  *
3673  * Return TRUE if we're able to find a match, FALSE otherwise.
3674  */
3675 static bool
3676 estimate_multivariate_ndistinct(PlannerInfo *root, RelOptInfo *rel,
3677                                                                 List **varinfos, double *ndistinct)
3678 {
3679         ListCell   *lc;
3680         Bitmapset  *attnums = NULL;
3681         int                     nmatches;
3682         Oid                     statOid = InvalidOid;
3683         MVNDistinct *stats;
3684         Bitmapset  *matched = NULL;
3685
3686         /* bail out immediately if the table has no extended statistics */
3687         if (!rel->statlist)
3688                 return false;
3689
3690         /* Determine the attnums we're looking for */
3691         foreach(lc, *varinfos)
3692         {
3693                 GroupVarInfo *varinfo = (GroupVarInfo *) lfirst(lc);
3694
3695                 Assert(varinfo->rel == rel);
3696
3697                 if (IsA(varinfo->var, Var))
3698                 {
3699                         attnums = bms_add_member(attnums,
3700                                                                          ((Var *) varinfo->var)->varattno);
3701                 }
3702         }
3703
3704         /* look for the ndistinct statistics matching the most vars */
3705         nmatches = 1;                           /* we require at least two matches */
3706         foreach(lc, rel->statlist)
3707         {
3708                 StatisticExtInfo *info = (StatisticExtInfo *) lfirst(lc);
3709                 Bitmapset  *shared;
3710                 int                     nshared;
3711
3712                 /* skip statistics of other kinds */
3713                 if (info->kind != STATS_EXT_NDISTINCT)
3714                         continue;
3715
3716                 /* compute attnums shared by the vars and the statistics object */
3717                 shared = bms_intersect(info->keys, attnums);
3718                 nshared = bms_num_members(shared);
3719
3720                 /*
3721                  * Does this statistics object match more columns than the currently
3722                  * best object?  If so, use this one instead.
3723                  *
3724                  * XXX This should break ties using name of the object, or something
3725                  * like that, to make the outcome stable.
3726                  */
3727                 if (nshared > nmatches)
3728                 {
3729                         statOid = info->statOid;
3730                         nmatches = nshared;
3731                         matched = shared;
3732                 }
3733         }
3734
3735         /* No match? */
3736         if (statOid == InvalidOid)
3737                 return false;
3738         Assert(nmatches > 1 && matched != NULL);
3739
3740         stats = statext_ndistinct_load(statOid);
3741
3742         /*
3743          * If we have a match, search it for the specific item that matches (there
3744          * must be one), and construct the output values.
3745          */
3746         if (stats)
3747         {
3748                 int                     i;
3749                 List       *newlist = NIL;
3750                 MVNDistinctItem *item = NULL;
3751
3752                 /* Find the specific item that exactly matches the combination */
3753                 for (i = 0; i < stats->nitems; i++)
3754                 {
3755                         MVNDistinctItem *tmpitem = &stats->items[i];
3756
3757                         if (bms_subset_compare(tmpitem->attrs, matched) == BMS_EQUAL)
3758                         {
3759                                 item = tmpitem;
3760                                 break;
3761                         }
3762                 }
3763
3764                 /* make sure we found an item */
3765                 if (!item)
3766                         elog(ERROR, "corrupt MVNDistinct entry");
3767
3768                 /* Form the output varinfo list, keeping only unmatched ones */
3769                 foreach(lc, *varinfos)
3770                 {
3771                         GroupVarInfo *varinfo = (GroupVarInfo *) lfirst(lc);
3772                         AttrNumber      attnum;
3773
3774                         if (!IsA(varinfo->var, Var))
3775                         {
3776                                 newlist = lappend(newlist, varinfo);
3777                                 continue;
3778                         }
3779
3780                         attnum = ((Var *) varinfo->var)->varattno;
3781                         if (!bms_is_member(attnum, matched))
3782                                 newlist = lappend(newlist, varinfo);
3783                 }
3784
3785                 *varinfos = newlist;
3786                 *ndistinct = item->ndistinct;
3787                 return true;
3788         }
3789
3790         return false;
3791 }
3792
3793 /*
3794  * convert_to_scalar
3795  *        Convert non-NULL values of the indicated types to the comparison
3796  *        scale needed by scalarineqsel().
3797  *        Returns "true" if successful.
3798  *
3799  * XXX this routine is a hack: ideally we should look up the conversion
3800  * subroutines in pg_type.
3801  *
3802  * All numeric datatypes are simply converted to their equivalent
3803  * "double" values.  (NUMERIC values that are outside the range of "double"
3804  * are clamped to +/- HUGE_VAL.)
3805  *
3806  * String datatypes are converted by convert_string_to_scalar(),
3807  * which is explained below.  The reason why this routine deals with
3808  * three values at a time, not just one, is that we need it for strings.
3809  *
3810  * The bytea datatype is just enough different from strings that it has
3811  * to be treated separately.
3812  *
3813  * The several datatypes representing absolute times are all converted
3814  * to Timestamp, which is actually a double, and then we just use that
3815  * double value.  Note this will give correct results even for the "special"
3816  * values of Timestamp, since those are chosen to compare correctly;
3817  * see timestamp_cmp.
3818  *
3819  * The several datatypes representing relative times (intervals) are all
3820  * converted to measurements expressed in seconds.
3821  */
3822 static bool
3823 convert_to_scalar(Datum value, Oid valuetypid, double *scaledvalue,
3824                                   Datum lobound, Datum hibound, Oid boundstypid,
3825                                   double *scaledlobound, double *scaledhibound)
3826 {
3827         /*
3828          * Both the valuetypid and the boundstypid should exactly match the
3829          * declared input type(s) of the operator we are invoked for, so we just
3830          * error out if either is not recognized.
3831          *
3832          * XXX The histogram we are interpolating between points of could belong
3833          * to a column that's only binary-compatible with the declared type. In
3834          * essence we are assuming that the semantics of binary-compatible types
3835          * are enough alike that we can use a histogram generated with one type's
3836          * operators to estimate selectivity for the other's.  This is outright
3837          * wrong in some cases --- in particular signed versus unsigned
3838          * interpretation could trip us up.  But it's useful enough in the
3839          * majority of cases that we do it anyway.  Should think about more
3840          * rigorous ways to do it.
3841          */
3842         switch (valuetypid)
3843         {
3844                         /*
3845                          * Built-in numeric types
3846                          */
3847                 case BOOLOID:
3848                 case INT2OID:
3849                 case INT4OID:
3850                 case INT8OID:
3851                 case FLOAT4OID:
3852                 case FLOAT8OID:
3853                 case NUMERICOID:
3854                 case OIDOID:
3855                 case REGPROCOID:
3856                 case REGPROCEDUREOID:
3857                 case REGOPEROID:
3858                 case REGOPERATOROID:
3859                 case REGCLASSOID:
3860                 case REGTYPEOID:
3861                 case REGCONFIGOID:
3862                 case REGDICTIONARYOID:
3863                 case REGROLEOID:
3864                 case REGNAMESPACEOID:
3865                         *scaledvalue = convert_numeric_to_scalar(value, valuetypid);
3866                         *scaledlobound = convert_numeric_to_scalar(lobound, boundstypid);
3867                         *scaledhibound = convert_numeric_to_scalar(hibound, boundstypid);
3868                         return true;
3869
3870                         /*
3871                          * Built-in string types
3872                          */
3873                 case CHAROID:
3874                 case BPCHAROID:
3875                 case VARCHAROID:
3876                 case TEXTOID:
3877                 case NAMEOID:
3878                         {
3879                                 char       *valstr = convert_string_datum(value, valuetypid);
3880                                 char       *lostr = convert_string_datum(lobound, boundstypid);
3881                                 char       *histr = convert_string_datum(hibound, boundstypid);
3882
3883                                 convert_string_to_scalar(valstr, scaledvalue,
3884                                                                                  lostr, scaledlobound,
3885                                                                                  histr, scaledhibound);
3886                                 pfree(valstr);
3887                                 pfree(lostr);
3888                                 pfree(histr);
3889                                 return true;
3890                         }
3891
3892                         /*
3893                          * Built-in bytea type
3894                          */
3895                 case BYTEAOID:
3896                         {
3897                                 convert_bytea_to_scalar(value, scaledvalue,
3898                                                                                 lobound, scaledlobound,
3899                                                                                 hibound, scaledhibound);
3900                                 return true;
3901                         }
3902
3903                         /*
3904                          * Built-in time types
3905                          */
3906                 case TIMESTAMPOID:
3907                 case TIMESTAMPTZOID:
3908                 case ABSTIMEOID:
3909                 case DATEOID:
3910                 case INTERVALOID:
3911                 case RELTIMEOID:
3912                 case TINTERVALOID:
3913                 case TIMEOID:
3914                 case TIMETZOID:
3915                         *scaledvalue = convert_timevalue_to_scalar(value, valuetypid);
3916                         *scaledlobound = convert_timevalue_to_scalar(lobound, boundstypid);
3917                         *scaledhibound = convert_timevalue_to_scalar(hibound, boundstypid);
3918                         return true;
3919
3920                         /*
3921                          * Built-in network types
3922                          */
3923                 case INETOID:
3924                 case CIDROID:
3925                 case MACADDROID:
3926                 case MACADDR8OID:
3927                         *scaledvalue = convert_network_to_scalar(value, valuetypid);
3928                         *scaledlobound = convert_network_to_scalar(lobound, boundstypid);
3929                         *scaledhibound = convert_network_to_scalar(hibound, boundstypid);
3930                         return true;
3931         }
3932         /* Don't know how to convert */
3933         *scaledvalue = *scaledlobound = *scaledhibound = 0;
3934         return false;
3935 }
3936
3937 /*
3938  * Do convert_to_scalar()'s work for any numeric data type.
3939  */
3940 static double
3941 convert_numeric_to_scalar(Datum value, Oid typid)
3942 {
3943         switch (typid)
3944         {
3945                 case BOOLOID:
3946                         return (double) DatumGetBool(value);
3947                 case INT2OID:
3948                         return (double) DatumGetInt16(value);
3949                 case INT4OID:
3950                         return (double) DatumGetInt32(value);
3951                 case INT8OID:
3952                         return (double) DatumGetInt64(value);
3953                 case FLOAT4OID:
3954                         return (double) DatumGetFloat4(value);
3955                 case FLOAT8OID:
3956                         return (double) DatumGetFloat8(value);
3957                 case NUMERICOID:
3958                         /* Note: out-of-range values will be clamped to +-HUGE_VAL */
3959                         return (double)
3960                                 DatumGetFloat8(DirectFunctionCall1(numeric_float8_no_overflow,
3961                                                                                                    value));
3962                 case OIDOID:
3963                 case REGPROCOID:
3964                 case REGPROCEDUREOID:
3965                 case REGOPEROID:
3966                 case REGOPERATOROID:
3967                 case REGCLASSOID:
3968                 case REGTYPEOID:
3969                 case REGCONFIGOID:
3970                 case REGDICTIONARYOID:
3971                 case REGROLEOID:
3972                 case REGNAMESPACEOID:
3973                         /* we can treat OIDs as integers... */
3974                         return (double) DatumGetObjectId(value);
3975         }
3976
3977         /*
3978          * Can't get here unless someone tries to use scalarltsel/scalargtsel on
3979          * an operator with one numeric and one non-numeric operand.
3980          */
3981         elog(ERROR, "unsupported type: %u", typid);
3982         return 0;
3983 }
3984
3985 /*
3986  * Do convert_to_scalar()'s work for any character-string data type.
3987  *
3988  * String datatypes are converted to a scale that ranges from 0 to 1,
3989  * where we visualize the bytes of the string as fractional digits.
3990  *
3991  * We do not want the base to be 256, however, since that tends to
3992  * generate inflated selectivity estimates; few databases will have
3993  * occurrences of all 256 possible byte values at each position.
3994  * Instead, use the smallest and largest byte values seen in the bounds
3995  * as the estimated range for each byte, after some fudging to deal with
3996  * the fact that we probably aren't going to see the full range that way.
3997  *
3998  * An additional refinement is that we discard any common prefix of the
3999  * three strings before computing the scaled values.  This allows us to
4000  * "zoom in" when we encounter a narrow data range.  An example is a phone
4001  * number database where all the values begin with the same area code.
4002  * (Actually, the bounds will be adjacent histogram-bin-boundary values,
4003  * so this is more likely to happen than you might think.)
4004  */
4005 static void
4006 convert_string_to_scalar(char *value,
4007                                                  double *scaledvalue,
4008                                                  char *lobound,
4009                                                  double *scaledlobound,
4010                                                  char *hibound,
4011                                                  double *scaledhibound)
4012 {
4013         int                     rangelo,
4014                                 rangehi;
4015         char       *sptr;
4016
4017         rangelo = rangehi = (unsigned char) hibound[0];
4018         for (sptr = lobound; *sptr; sptr++)
4019         {
4020                 if (rangelo > (unsigned char) *sptr)
4021                         rangelo = (unsigned char) *sptr;
4022                 if (rangehi < (unsigned char) *sptr)
4023                         rangehi = (unsigned char) *sptr;
4024         }
4025         for (sptr = hibound; *sptr; sptr++)
4026         {
4027                 if (rangelo > (unsigned char) *sptr)
4028                         rangelo = (unsigned char) *sptr;
4029                 if (rangehi < (unsigned char) *sptr)
4030                         rangehi = (unsigned char) *sptr;
4031         }
4032         /* If range includes any upper-case ASCII chars, make it include all */
4033         if (rangelo <= 'Z' && rangehi >= 'A')
4034         {
4035                 if (rangelo > 'A')
4036                         rangelo = 'A';
4037                 if (rangehi < 'Z')
4038                         rangehi = 'Z';
4039         }
4040         /* Ditto lower-case */
4041         if (rangelo <= 'z' && rangehi >= 'a')
4042         {
4043                 if (rangelo > 'a')
4044                         rangelo = 'a';
4045                 if (rangehi < 'z')
4046                         rangehi = 'z';
4047         }
4048         /* Ditto digits */
4049         if (rangelo <= '9' && rangehi >= '0')
4050         {
4051                 if (rangelo > '0')
4052                         rangelo = '0';
4053                 if (rangehi < '9')
4054                         rangehi = '9';
4055         }
4056
4057         /*
4058          * If range includes less than 10 chars, assume we have not got enough
4059          * data, and make it include regular ASCII set.
4060          */
4061         if (rangehi - rangelo < 9)
4062         {
4063                 rangelo = ' ';
4064                 rangehi = 127;
4065         }
4066
4067         /*
4068          * Now strip any common prefix of the three strings.
4069          */
4070         while (*lobound)
4071         {
4072                 if (*lobound != *hibound || *lobound != *value)
4073                         break;
4074                 lobound++, hibound++, value++;
4075         }
4076
4077         /*
4078          * Now we can do the conversions.
4079          */
4080         *scaledvalue = convert_one_string_to_scalar(value, rangelo, rangehi);
4081         *scaledlobound = convert_one_string_to_scalar(lobound, rangelo, rangehi);
4082         *scaledhibound = convert_one_string_to_scalar(hibound, rangelo, rangehi);
4083 }
4084
4085 static double
4086 convert_one_string_to_scalar(char *value, int rangelo, int rangehi)
4087 {
4088         int                     slen = strlen(value);
4089         double          num,
4090                                 denom,
4091                                 base;
4092
4093         if (slen <= 0)
4094                 return 0.0;                             /* empty string has scalar value 0 */
4095
4096         /*
4097          * There seems little point in considering more than a dozen bytes from
4098          * the string.  Since base is at least 10, that will give us nominal
4099          * resolution of at least 12 decimal digits, which is surely far more
4100          * precision than this estimation technique has got anyway (especially in
4101          * non-C locales).  Also, even with the maximum possible base of 256, this
4102          * ensures denom cannot grow larger than 256^13 = 2.03e31, which will not
4103          * overflow on any known machine.
4104          */
4105         if (slen > 12)
4106                 slen = 12;
4107
4108         /* Convert initial characters to fraction */
4109         base = rangehi - rangelo + 1;
4110         num = 0.0;
4111         denom = base;
4112         while (slen-- > 0)
4113         {
4114                 int                     ch = (unsigned char) *value++;
4115
4116                 if (ch < rangelo)
4117                         ch = rangelo - 1;
4118                 else if (ch > rangehi)
4119                         ch = rangehi + 1;
4120                 num += ((double) (ch - rangelo)) / denom;
4121                 denom *= base;
4122         }
4123
4124         return num;
4125 }
4126
4127 /*
4128  * Convert a string-type Datum into a palloc'd, null-terminated string.
4129  *
4130  * When using a non-C locale, we must pass the string through strxfrm()
4131  * before continuing, so as to generate correct locale-specific results.
4132  */
4133 static char *
4134 convert_string_datum(Datum value, Oid typid)
4135 {
4136         char       *val;
4137
4138         switch (typid)
4139         {
4140                 case CHAROID:
4141                         val = (char *) palloc(2);
4142                         val[0] = DatumGetChar(value);
4143                         val[1] = '\0';
4144                         break;
4145                 case BPCHAROID:
4146                 case VARCHAROID:
4147                 case TEXTOID:
4148                         val = TextDatumGetCString(value);
4149                         break;
4150                 case NAMEOID:
4151                         {
4152                                 NameData   *nm = (NameData *) DatumGetPointer(value);
4153
4154                                 val = pstrdup(NameStr(*nm));
4155                                 break;
4156                         }
4157                 default:
4158
4159                         /*
4160                          * Can't get here unless someone tries to use scalarltsel on an
4161                          * operator with one string and one non-string operand.
4162                          */
4163                         elog(ERROR, "unsupported type: %u", typid);
4164                         return NULL;
4165         }
4166
4167         if (!lc_collate_is_c(DEFAULT_COLLATION_OID))
4168         {
4169                 char       *xfrmstr;
4170                 size_t          xfrmlen;
4171                 size_t xfrmlen2 PG_USED_FOR_ASSERTS_ONLY;
4172
4173                 /*
4174                  * XXX: We could guess at a suitable output buffer size and only call
4175                  * strxfrm twice if our guess is too small.
4176                  *
4177                  * XXX: strxfrm doesn't support UTF-8 encoding on Win32, it can return
4178                  * bogus data or set an error. This is not really a problem unless it
4179                  * crashes since it will only give an estimation error and nothing
4180                  * fatal.
4181                  */
4182 #if _MSC_VER == 1400                    /* VS.Net 2005 */
4183
4184                 /*
4185                  *
4186                  * http://connect.microsoft.com/VisualStudio/feedback/ViewFeedback.aspx?
4187                  * FeedbackID=99694 */
4188                 {
4189                         char            x[1];
4190
4191                         xfrmlen = strxfrm(x, val, 0);
4192                 }
4193 #else
4194                 xfrmlen = strxfrm(NULL, val, 0);
4195 #endif
4196 #ifdef WIN32
4197
4198                 /*
4199                  * On Windows, strxfrm returns INT_MAX when an error occurs. Instead
4200                  * of trying to allocate this much memory (and fail), just return the
4201                  * original string unmodified as if we were in the C locale.
4202                  */
4203                 if (xfrmlen == INT_MAX)
4204                         return val;
4205 #endif
4206                 xfrmstr = (char *) palloc(xfrmlen + 1);
4207                 xfrmlen2 = strxfrm(xfrmstr, val, xfrmlen + 1);
4208
4209                 /*
4210                  * Some systems (e.g., glibc) can return a smaller value from the
4211                  * second call than the first; thus the Assert must be <= not ==.
4212                  */
4213                 Assert(xfrmlen2 <= xfrmlen);
4214                 pfree(val);
4215                 val = xfrmstr;
4216         }
4217
4218         return val;
4219 }
4220
4221 /*
4222  * Do convert_to_scalar()'s work for any bytea data type.
4223  *
4224  * Very similar to convert_string_to_scalar except we can't assume
4225  * null-termination and therefore pass explicit lengths around.
4226  *
4227  * Also, assumptions about likely "normal" ranges of characters have been
4228  * removed - a data range of 0..255 is always used, for now.  (Perhaps
4229  * someday we will add information about actual byte data range to
4230  * pg_statistic.)
4231  */
4232 static void
4233 convert_bytea_to_scalar(Datum value,
4234                                                 double *scaledvalue,
4235                                                 Datum lobound,
4236                                                 double *scaledlobound,
4237                                                 Datum hibound,
4238                                                 double *scaledhibound)
4239 {
4240         int                     rangelo,
4241                                 rangehi,
4242                                 valuelen = VARSIZE(DatumGetPointer(value)) - VARHDRSZ,
4243                                 loboundlen = VARSIZE(DatumGetPointer(lobound)) - VARHDRSZ,
4244                                 hiboundlen = VARSIZE(DatumGetPointer(hibound)) - VARHDRSZ,
4245                                 i,
4246                                 minlen;
4247         unsigned char *valstr = (unsigned char *) VARDATA(DatumGetPointer(value)),
4248                            *lostr = (unsigned char *) VARDATA(DatumGetPointer(lobound)),
4249                            *histr = (unsigned char *) VARDATA(DatumGetPointer(hibound));
4250
4251         /*
4252          * Assume bytea data is uniformly distributed across all byte values.
4253          */
4254         rangelo = 0;
4255         rangehi = 255;
4256
4257         /*
4258          * Now strip any common prefix of the three strings.
4259          */
4260         minlen = Min(Min(valuelen, loboundlen), hiboundlen);
4261         for (i = 0; i < minlen; i++)
4262         {
4263                 if (*lostr != *histr || *lostr != *valstr)
4264                         break;
4265                 lostr++, histr++, valstr++;
4266                 loboundlen--, hiboundlen--, valuelen--;
4267         }
4268
4269         /*
4270          * Now we can do the conversions.
4271          */
4272         *scaledvalue = convert_one_bytea_to_scalar(valstr, valuelen, rangelo, rangehi);
4273         *scaledlobound = convert_one_bytea_to_scalar(lostr, loboundlen, rangelo, rangehi);
4274         *scaledhibound = convert_one_bytea_to_scalar(histr, hiboundlen, rangelo, rangehi);
4275 }
4276
4277 static double
4278 convert_one_bytea_to_scalar(unsigned char *value, int valuelen,
4279                                                         int rangelo, int rangehi)
4280 {
4281         double          num,
4282                                 denom,
4283                                 base;
4284
4285         if (valuelen <= 0)
4286                 return 0.0;                             /* empty string has scalar value 0 */
4287
4288         /*
4289          * Since base is 256, need not consider more than about 10 chars (even
4290          * this many seems like overkill)
4291          */
4292         if (valuelen > 10)
4293                 valuelen = 10;
4294
4295         /* Convert initial characters to fraction */
4296         base = rangehi - rangelo + 1;
4297         num = 0.0;
4298         denom = base;
4299         while (valuelen-- > 0)
4300         {
4301                 int                     ch = *value++;
4302
4303                 if (ch < rangelo)
4304                         ch = rangelo - 1;
4305                 else if (ch > rangehi)
4306                         ch = rangehi + 1;
4307                 num += ((double) (ch - rangelo)) / denom;
4308                 denom *= base;
4309         }
4310
4311         return num;
4312 }
4313
4314 /*
4315  * Do convert_to_scalar()'s work for any timevalue data type.
4316  */
4317 static double
4318 convert_timevalue_to_scalar(Datum value, Oid typid)
4319 {
4320         switch (typid)
4321         {
4322                 case TIMESTAMPOID:
4323                         return DatumGetTimestamp(value);
4324                 case TIMESTAMPTZOID:
4325                         return DatumGetTimestampTz(value);
4326                 case ABSTIMEOID:
4327                         return DatumGetTimestamp(DirectFunctionCall1(abstime_timestamp,
4328                                                                                                                  value));
4329                 case DATEOID:
4330                         return date2timestamp_no_overflow(DatumGetDateADT(value));
4331                 case INTERVALOID:
4332                         {
4333                                 Interval   *interval = DatumGetIntervalP(value);
4334
4335                                 /*
4336                                  * Convert the month part of Interval to days using assumed
4337                                  * average month length of 365.25/12.0 days.  Not too
4338                                  * accurate, but plenty good enough for our purposes.
4339                                  */
4340                                 return interval->time + interval->day * (double) USECS_PER_DAY +
4341                                         interval->month * ((DAYS_PER_YEAR / (double) MONTHS_PER_YEAR) * USECS_PER_DAY);
4342                         }
4343                 case RELTIMEOID:
4344                         return (DatumGetRelativeTime(value) * 1000000.0);
4345                 case TINTERVALOID:
4346                         {
4347                                 TimeInterval tinterval = DatumGetTimeInterval(value);
4348
4349                                 if (tinterval->status != 0)
4350                                         return ((tinterval->data[1] - tinterval->data[0]) * 1000000.0);
4351                                 return 0;               /* for lack of a better idea */
4352                         }
4353                 case TIMEOID:
4354                         return DatumGetTimeADT(value);
4355                 case TIMETZOID:
4356                         {
4357                                 TimeTzADT  *timetz = DatumGetTimeTzADTP(value);
4358
4359                                 /* use GMT-equivalent time */
4360                                 return (double) (timetz->time + (timetz->zone * 1000000.0));
4361                         }
4362         }
4363
4364         /*
4365          * Can't get here unless someone tries to use scalarltsel/scalargtsel on
4366          * an operator with one timevalue and one non-timevalue operand.
4367          */
4368         elog(ERROR, "unsupported type: %u", typid);
4369         return 0;
4370 }
4371
4372
4373 /*
4374  * get_restriction_variable
4375  *              Examine the args of a restriction clause to see if it's of the
4376  *              form (variable op pseudoconstant) or (pseudoconstant op variable),
4377  *              where "variable" could be either a Var or an expression in vars of a
4378  *              single relation.  If so, extract information about the variable,
4379  *              and also indicate which side it was on and the other argument.
4380  *
4381  * Inputs:
4382  *      root: the planner info
4383  *      args: clause argument list
4384  *      varRelid: see specs for restriction selectivity functions
4385  *
4386  * Outputs: (these are valid only if TRUE is returned)
4387  *      *vardata: gets information about variable (see examine_variable)
4388  *      *other: gets other clause argument, aggressively reduced to a constant
4389  *      *varonleft: set TRUE if variable is on the left, FALSE if on the right
4390  *
4391  * Returns TRUE if a variable is identified, otherwise FALSE.
4392  *
4393  * Note: if there are Vars on both sides of the clause, we must fail, because
4394  * callers are expecting that the other side will act like a pseudoconstant.
4395  */
4396 bool
4397 get_restriction_variable(PlannerInfo *root, List *args, int varRelid,
4398                                                  VariableStatData *vardata, Node **other,
4399                                                  bool *varonleft)
4400 {
4401         Node       *left,
4402                            *right;
4403         VariableStatData rdata;
4404
4405         /* Fail if not a binary opclause (probably shouldn't happen) */
4406         if (list_length(args) != 2)
4407                 return false;
4408
4409         left = (Node *) linitial(args);
4410         right = (Node *) lsecond(args);
4411
4412         /*
4413          * Examine both sides.  Note that when varRelid is nonzero, Vars of other
4414          * relations will be treated as pseudoconstants.
4415          */
4416         examine_variable(root, left, varRelid, vardata);
4417         examine_variable(root, right, varRelid, &rdata);
4418
4419         /*
4420          * If one side is a variable and the other not, we win.
4421          */
4422         if (vardata->rel && rdata.rel == NULL)
4423         {
4424                 *varonleft = true;
4425                 *other = estimate_expression_value(root, rdata.var);
4426                 /* Assume we need no ReleaseVariableStats(rdata) here */
4427                 return true;
4428         }
4429
4430         if (vardata->rel == NULL && rdata.rel)
4431         {
4432                 *varonleft = false;
4433                 *other = estimate_expression_value(root, vardata->var);
4434                 /* Assume we need no ReleaseVariableStats(*vardata) here */
4435                 *vardata = rdata;
4436                 return true;
4437         }
4438
4439         /* Oops, clause has wrong structure (probably var op var) */
4440         ReleaseVariableStats(*vardata);
4441         ReleaseVariableStats(rdata);
4442
4443         return false;
4444 }
4445
4446 /*
4447  * get_join_variables
4448  *              Apply examine_variable() to each side of a join clause.
4449  *              Also, attempt to identify whether the join clause has the same
4450  *              or reversed sense compared to the SpecialJoinInfo.
4451  *
4452  * We consider the join clause "normal" if it is "lhs_var OP rhs_var",
4453  * or "reversed" if it is "rhs_var OP lhs_var".  In complicated cases
4454  * where we can't tell for sure, we default to assuming it's normal.
4455  */
4456 void
4457 get_join_variables(PlannerInfo *root, List *args, SpecialJoinInfo *sjinfo,
4458                                    VariableStatData *vardata1, VariableStatData *vardata2,
4459                                    bool *join_is_reversed)
4460 {
4461         Node       *left,
4462                            *right;
4463
4464         if (list_length(args) != 2)
4465                 elog(ERROR, "join operator should take two arguments");
4466
4467         left = (Node *) linitial(args);
4468         right = (Node *) lsecond(args);
4469
4470         examine_variable(root, left, 0, vardata1);
4471         examine_variable(root, right, 0, vardata2);
4472
4473         if (vardata1->rel &&
4474                 bms_is_subset(vardata1->rel->relids, sjinfo->syn_righthand))
4475                 *join_is_reversed = true;               /* var1 is on RHS */
4476         else if (vardata2->rel &&
4477                          bms_is_subset(vardata2->rel->relids, sjinfo->syn_lefthand))
4478                 *join_is_reversed = true;               /* var2 is on LHS */
4479         else
4480                 *join_is_reversed = false;
4481 }
4482
4483 /*
4484  * examine_variable
4485  *              Try to look up statistical data about an expression.
4486  *              Fill in a VariableStatData struct to describe the expression.
4487  *
4488  * Inputs:
4489  *      root: the planner info
4490  *      node: the expression tree to examine
4491  *      varRelid: see specs for restriction selectivity functions
4492  *
4493  * Outputs: *vardata is filled as follows:
4494  *      var: the input expression (with any binary relabeling stripped, if
4495  *              it is or contains a variable; but otherwise the type is preserved)
4496  *      rel: RelOptInfo for relation containing variable; NULL if expression
4497  *              contains no Vars (NOTE this could point to a RelOptInfo of a
4498  *              subquery, not one in the current query).
4499  *      statsTuple: the pg_statistic entry for the variable, if one exists;
4500  *              otherwise NULL.
4501  *      freefunc: pointer to a function to release statsTuple with.
4502  *      vartype: exposed type of the expression; this should always match
4503  *              the declared input type of the operator we are estimating for.
4504  *      atttype, atttypmod: actual type/typmod of the "var" expression.  This is
4505  *              commonly the same as the exposed type of the variable argument,
4506  *              but can be different in binary-compatible-type cases.
4507  *      isunique: TRUE if we were able to match the var to a unique index or a
4508  *              single-column DISTINCT clause, implying its values are unique for
4509  *              this query.  (Caution: this should be trusted for statistical
4510  *              purposes only, since we do not check indimmediate nor verify that
4511  *              the exact same definition of equality applies.)
4512  *      acl_ok: TRUE if current user has permission to read the column(s)
4513  *              underlying the pg_statistic entry.  This is consulted by
4514  *              statistic_proc_security_check().
4515  *
4516  * Caller is responsible for doing ReleaseVariableStats() before exiting.
4517  */
4518 void
4519 examine_variable(PlannerInfo *root, Node *node, int varRelid,
4520                                  VariableStatData *vardata)
4521 {
4522         Node       *basenode;
4523         Relids          varnos;
4524         RelOptInfo *onerel;
4525
4526         /* Make sure we don't return dangling pointers in vardata */
4527         MemSet(vardata, 0, sizeof(VariableStatData));
4528
4529         /* Save the exposed type of the expression */
4530         vardata->vartype = exprType(node);
4531
4532         /* Look inside any binary-compatible relabeling */
4533
4534         if (IsA(node, RelabelType))
4535                 basenode = (Node *) ((RelabelType *) node)->arg;
4536         else
4537                 basenode = node;
4538
4539         /* Fast path for a simple Var */
4540
4541         if (IsA(basenode, Var) &&
4542                 (varRelid == 0 || varRelid == ((Var *) basenode)->varno))
4543         {
4544                 Var                *var = (Var *) basenode;
4545
4546                 /* Set up result fields other than the stats tuple */
4547                 vardata->var = basenode;        /* return Var without relabeling */
4548                 vardata->rel = find_base_rel(root, var->varno);
4549                 vardata->atttype = var->vartype;
4550                 vardata->atttypmod = var->vartypmod;
4551                 vardata->isunique = has_unique_index(vardata->rel, var->varattno);
4552
4553                 /* Try to locate some stats */
4554                 examine_simple_variable(root, var, vardata);
4555
4556                 return;
4557         }
4558
4559         /*
4560          * Okay, it's a more complicated expression.  Determine variable
4561          * membership.  Note that when varRelid isn't zero, only vars of that
4562          * relation are considered "real" vars.
4563          */
4564         varnos = pull_varnos(basenode);
4565
4566         onerel = NULL;
4567
4568         switch (bms_membership(varnos))
4569         {
4570                 case BMS_EMPTY_SET:
4571                         /* No Vars at all ... must be pseudo-constant clause */
4572                         break;
4573                 case BMS_SINGLETON:
4574                         if (varRelid == 0 || bms_is_member(varRelid, varnos))
4575                         {
4576                                 onerel = find_base_rel(root,
4577                                            (varRelid ? varRelid : bms_singleton_member(varnos)));
4578                                 vardata->rel = onerel;
4579                                 node = basenode;        /* strip any relabeling */
4580                         }
4581                         /* else treat it as a constant */
4582                         break;
4583                 case BMS_MULTIPLE:
4584                         if (varRelid == 0)
4585                         {
4586                                 /* treat it as a variable of a join relation */
4587                                 vardata->rel = find_join_rel(root, varnos);
4588                                 node = basenode;        /* strip any relabeling */
4589                         }
4590                         else if (bms_is_member(varRelid, varnos))
4591                         {
4592                                 /* ignore the vars belonging to other relations */
4593                                 vardata->rel = find_base_rel(root, varRelid);
4594                                 node = basenode;        /* strip any relabeling */
4595                                 /* note: no point in expressional-index search here */
4596                         }
4597                         /* else treat it as a constant */
4598                         break;
4599         }
4600
4601         bms_free(varnos);
4602
4603         vardata->var = node;
4604         vardata->atttype = exprType(node);
4605         vardata->atttypmod = exprTypmod(node);
4606
4607         if (onerel)
4608         {
4609                 /*
4610                  * We have an expression in vars of a single relation.  Try to match
4611                  * it to expressional index columns, in hopes of finding some
4612                  * statistics.
4613                  *
4614                  * XXX it's conceivable that there are multiple matches with different
4615                  * index opfamilies; if so, we need to pick one that matches the
4616                  * operator we are estimating for.  FIXME later.
4617                  */
4618                 ListCell   *ilist;
4619
4620                 foreach(ilist, onerel->indexlist)
4621                 {
4622                         IndexOptInfo *index = (IndexOptInfo *) lfirst(ilist);
4623                         ListCell   *indexpr_item;
4624                         int                     pos;
4625
4626                         indexpr_item = list_head(index->indexprs);
4627                         if (indexpr_item == NULL)
4628                                 continue;               /* no expressions here... */
4629
4630                         for (pos = 0; pos < index->ncolumns; pos++)
4631                         {
4632                                 if (index->indexkeys[pos] == 0)
4633                                 {
4634                                         Node       *indexkey;
4635
4636                                         if (indexpr_item == NULL)
4637                                                 elog(ERROR, "too few entries in indexprs list");
4638                                         indexkey = (Node *) lfirst(indexpr_item);
4639                                         if (indexkey && IsA(indexkey, RelabelType))
4640                                                 indexkey = (Node *) ((RelabelType *) indexkey)->arg;
4641                                         if (equal(node, indexkey))
4642                                         {
4643                                                 /*
4644                                                  * Found a match ... is it a unique index? Tests here
4645                                                  * should match has_unique_index().
4646                                                  */
4647                                                 if (index->unique &&
4648                                                         index->ncolumns == 1 &&
4649                                                         (index->indpred == NIL || index->predOK))
4650                                                         vardata->isunique = true;
4651
4652                                                 /*
4653                                                  * Has it got stats?  We only consider stats for
4654                                                  * non-partial indexes, since partial indexes probably
4655                                                  * don't reflect whole-relation statistics; the above
4656                                                  * check for uniqueness is the only info we take from
4657                                                  * a partial index.
4658                                                  *
4659                                                  * An index stats hook, however, must make its own
4660                                                  * decisions about what to do with partial indexes.
4661                                                  */
4662                                                 if (get_index_stats_hook &&
4663                                                         (*get_index_stats_hook) (root, index->indexoid,
4664                                                                                                          pos + 1, vardata))
4665                                                 {
4666                                                         /*
4667                                                          * The hook took control of acquiring a stats
4668                                                          * tuple.  If it did supply a tuple, it'd better
4669                                                          * have supplied a freefunc.
4670                                                          */
4671                                                         if (HeapTupleIsValid(vardata->statsTuple) &&
4672                                                                 !vardata->freefunc)
4673                                                                 elog(ERROR, "no function provided to release variable stats with");
4674                                                 }
4675                                                 else if (index->indpred == NIL)
4676                                                 {
4677                                                         vardata->statsTuple =
4678                                                                 SearchSysCache3(STATRELATTINH,
4679                                                                                    ObjectIdGetDatum(index->indexoid),
4680                                                                                                 Int16GetDatum(pos + 1),
4681                                                                                                 BoolGetDatum(false));
4682                                                         vardata->freefunc = ReleaseSysCache;
4683
4684                                                         if (HeapTupleIsValid(vardata->statsTuple))
4685                                                         {
4686                                                                 /* Get index's table for permission check */
4687                                                                 RangeTblEntry *rte;
4688
4689                                                                 rte = planner_rt_fetch(index->rel->relid, root);
4690                                                                 Assert(rte->rtekind == RTE_RELATION);
4691
4692                                                                 /*
4693                                                                  * For simplicity, we insist on the whole
4694                                                                  * table being selectable, rather than trying
4695                                                                  * to identify which column(s) the index
4696                                                                  * depends on.
4697                                                                  */
4698                                                                 vardata->acl_ok =
4699                                                                         (pg_class_aclcheck(rte->relid, GetUserId(),
4700                                                                                                  ACL_SELECT) == ACLCHECK_OK);
4701                                                         }
4702                                                         else
4703                                                         {
4704                                                                 /* suppress leakproofness checks later */
4705                                                                 vardata->acl_ok = true;
4706                                                         }
4707                                                 }
4708                                                 if (vardata->statsTuple)
4709                                                         break;
4710                                         }
4711                                         indexpr_item = lnext(indexpr_item);
4712                                 }
4713                         }
4714                         if (vardata->statsTuple)
4715                                 break;
4716                 }
4717         }
4718 }
4719
4720 /*
4721  * examine_simple_variable
4722  *              Handle a simple Var for examine_variable
4723  *
4724  * This is split out as a subroutine so that we can recurse to deal with
4725  * Vars referencing subqueries.
4726  *
4727  * We already filled in all the fields of *vardata except for the stats tuple.
4728  */
4729 static void
4730 examine_simple_variable(PlannerInfo *root, Var *var,
4731                                                 VariableStatData *vardata)
4732 {
4733         RangeTblEntry *rte = root->simple_rte_array[var->varno];
4734
4735         Assert(IsA(rte, RangeTblEntry));
4736
4737         if (get_relation_stats_hook &&
4738                 (*get_relation_stats_hook) (root, rte, var->varattno, vardata))
4739         {
4740                 /*
4741                  * The hook took control of acquiring a stats tuple.  If it did supply
4742                  * a tuple, it'd better have supplied a freefunc.
4743                  */
4744                 if (HeapTupleIsValid(vardata->statsTuple) &&
4745                         !vardata->freefunc)
4746                         elog(ERROR, "no function provided to release variable stats with");
4747         }
4748         else if (rte->rtekind == RTE_RELATION)
4749         {
4750                 /*
4751                  * Plain table or parent of an inheritance appendrel, so look up the
4752                  * column in pg_statistic
4753                  */
4754                 vardata->statsTuple = SearchSysCache3(STATRELATTINH,
4755                                                                                           ObjectIdGetDatum(rte->relid),
4756                                                                                           Int16GetDatum(var->varattno),
4757                                                                                           BoolGetDatum(rte->inh));
4758                 vardata->freefunc = ReleaseSysCache;
4759
4760                 if (HeapTupleIsValid(vardata->statsTuple))
4761                 {
4762                         /* check if user has permission to read this column */
4763                         vardata->acl_ok =
4764                                 (pg_class_aclcheck(rte->relid, GetUserId(),
4765                                                                    ACL_SELECT) == ACLCHECK_OK) ||
4766                                 (pg_attribute_aclcheck(rte->relid, var->varattno, GetUserId(),
4767                                                                            ACL_SELECT) == ACLCHECK_OK);
4768                 }
4769                 else
4770                 {
4771                         /* suppress any possible leakproofness checks later */
4772                         vardata->acl_ok = true;
4773                 }
4774         }
4775         else if (rte->rtekind == RTE_SUBQUERY && !rte->inh)
4776         {
4777                 /*
4778                  * Plain subquery (not one that was converted to an appendrel).
4779                  */
4780                 Query      *subquery = rte->subquery;
4781                 RelOptInfo *rel;
4782                 TargetEntry *ste;
4783
4784                 /*
4785                  * Punt if it's a whole-row var rather than a plain column reference.
4786                  */
4787                 if (var->varattno == InvalidAttrNumber)
4788                         return;
4789
4790                 /*
4791                  * Punt if subquery uses set operations or GROUP BY, as these will
4792                  * mash underlying columns' stats beyond recognition.  (Set ops are
4793                  * particularly nasty; if we forged ahead, we would return stats
4794                  * relevant to only the leftmost subselect...)  DISTINCT is also
4795                  * problematic, but we check that later because there is a possibility
4796                  * of learning something even with it.
4797                  */
4798                 if (subquery->setOperations ||
4799                         subquery->groupClause)
4800                         return;
4801
4802                 /*
4803                  * OK, fetch RelOptInfo for subquery.  Note that we don't change the
4804                  * rel returned in vardata, since caller expects it to be a rel of the
4805                  * caller's query level.  Because we might already be recursing, we
4806                  * can't use that rel pointer either, but have to look up the Var's
4807                  * rel afresh.
4808                  */
4809                 rel = find_base_rel(root, var->varno);
4810
4811                 /* If the subquery hasn't been planned yet, we have to punt */
4812                 if (rel->subroot == NULL)
4813                         return;
4814                 Assert(IsA(rel->subroot, PlannerInfo));
4815
4816                 /*
4817                  * Switch our attention to the subquery as mangled by the planner. It
4818                  * was okay to look at the pre-planning version for the tests above,
4819                  * but now we need a Var that will refer to the subroot's live
4820                  * RelOptInfos.  For instance, if any subquery pullup happened during
4821                  * planning, Vars in the targetlist might have gotten replaced, and we
4822                  * need to see the replacement expressions.
4823                  */
4824                 subquery = rel->subroot->parse;
4825                 Assert(IsA(subquery, Query));
4826
4827                 /* Get the subquery output expression referenced by the upper Var */
4828                 ste = get_tle_by_resno(subquery->targetList, var->varattno);
4829                 if (ste == NULL || ste->resjunk)
4830                         elog(ERROR, "subquery %s does not have attribute %d",
4831                                  rte->eref->aliasname, var->varattno);
4832                 var = (Var *) ste->expr;
4833
4834                 /*
4835                  * If subquery uses DISTINCT, we can't make use of any stats for the
4836                  * variable ... but, if it's the only DISTINCT column, we are entitled
4837                  * to consider it unique.  We do the test this way so that it works
4838                  * for cases involving DISTINCT ON.
4839                  */
4840                 if (subquery->distinctClause)
4841                 {
4842                         if (list_length(subquery->distinctClause) == 1 &&
4843                                 targetIsInSortList(ste, InvalidOid, subquery->distinctClause))
4844                                 vardata->isunique = true;
4845                         /* cannot go further */
4846                         return;
4847                 }
4848
4849                 /*
4850                  * If the sub-query originated from a view with the security_barrier
4851                  * attribute, we must not look at the variable's statistics, though it
4852                  * seems all right to notice the existence of a DISTINCT clause. So
4853                  * stop here.
4854                  *
4855                  * This is probably a harsher restriction than necessary; it's
4856                  * certainly OK for the selectivity estimator (which is a C function,
4857                  * and therefore omnipotent anyway) to look at the statistics.  But
4858                  * many selectivity estimators will happily *invoke the operator
4859                  * function* to try to work out a good estimate - and that's not OK.
4860                  * So for now, don't dig down for stats.
4861                  */
4862                 if (rte->security_barrier)
4863                         return;
4864
4865                 /* Can only handle a simple Var of subquery's query level */
4866                 if (var && IsA(var, Var) &&
4867                         var->varlevelsup == 0)
4868                 {
4869                         /*
4870                          * OK, recurse into the subquery.  Note that the original setting
4871                          * of vardata->isunique (which will surely be false) is left
4872                          * unchanged in this situation.  That's what we want, since even
4873                          * if the underlying column is unique, the subquery may have
4874                          * joined to other tables in a way that creates duplicates.
4875                          */
4876                         examine_simple_variable(rel->subroot, var, vardata);
4877                 }
4878         }
4879         else
4880         {
4881                 /*
4882                  * Otherwise, the Var comes from a FUNCTION, VALUES, or CTE RTE.  (We
4883                  * won't see RTE_JOIN here because join alias Vars have already been
4884                  * flattened.)  There's not much we can do with function outputs, but
4885                  * maybe someday try to be smarter about VALUES and/or CTEs.
4886                  */
4887         }
4888 }
4889
4890 /*
4891  * Check whether it is permitted to call func_oid passing some of the
4892  * pg_statistic data in vardata.  We allow this either if the user has SELECT
4893  * privileges on the table or column underlying the pg_statistic data or if
4894  * the function is marked leak-proof.
4895  */
4896 bool
4897 statistic_proc_security_check(VariableStatData *vardata, Oid func_oid)
4898 {
4899         if (vardata->acl_ok)
4900                 return true;
4901
4902         if (!OidIsValid(func_oid))
4903                 return false;
4904
4905         if (get_func_leakproof(func_oid))
4906                 return true;
4907
4908         ereport(DEBUG2,
4909                         (errmsg_internal("not using statistics because function \"%s\" is not leak-proof",
4910                                                          get_func_name(func_oid))));
4911         return false;
4912 }
4913
4914 /*
4915  * get_variable_numdistinct
4916  *        Estimate the number of distinct values of a variable.
4917  *
4918  * vardata: results of examine_variable
4919  * *isdefault: set to TRUE if the result is a default rather than based on
4920  * anything meaningful.
4921  *
4922  * NB: be careful to produce a positive integral result, since callers may
4923  * compare the result to exact integer counts, or might divide by it.
4924  */
4925 double
4926 get_variable_numdistinct(VariableStatData *vardata, bool *isdefault)
4927 {
4928         double          stadistinct;
4929         double          stanullfrac = 0.0;
4930         double          ntuples;
4931
4932         *isdefault = false;
4933
4934         /*
4935          * Determine the stadistinct value to use.  There are cases where we can
4936          * get an estimate even without a pg_statistic entry, or can get a better
4937          * value than is in pg_statistic.  Grab stanullfrac too if we can find it
4938          * (otherwise, assume no nulls, for lack of any better idea).
4939          */
4940         if (HeapTupleIsValid(vardata->statsTuple))
4941         {
4942                 /* Use the pg_statistic entry */
4943                 Form_pg_statistic stats;
4944
4945                 stats = (Form_pg_statistic) GETSTRUCT(vardata->statsTuple);
4946                 stadistinct = stats->stadistinct;
4947                 stanullfrac = stats->stanullfrac;
4948         }
4949         else if (vardata->vartype == BOOLOID)
4950         {
4951                 /*
4952                  * Special-case boolean columns: presumably, two distinct values.
4953                  *
4954                  * Are there any other datatypes we should wire in special estimates
4955                  * for?
4956                  */
4957                 stadistinct = 2.0;
4958         }
4959         else
4960         {
4961                 /*
4962                  * We don't keep statistics for system columns, but in some cases we
4963                  * can infer distinctness anyway.
4964                  */
4965                 if (vardata->var && IsA(vardata->var, Var))
4966                 {
4967                         switch (((Var *) vardata->var)->varattno)
4968                         {
4969                                 case ObjectIdAttributeNumber:
4970                                 case SelfItemPointerAttributeNumber:
4971                                         stadistinct = -1.0; /* unique (and all non null) */
4972                                         break;
4973                                 case TableOidAttributeNumber:
4974                                         stadistinct = 1.0;      /* only 1 value */
4975                                         break;
4976                                 default:
4977                                         stadistinct = 0.0;      /* means "unknown" */
4978                                         break;
4979                         }
4980                 }
4981                 else
4982                         stadistinct = 0.0;      /* means "unknown" */
4983
4984                 /*
4985                  * XXX consider using estimate_num_groups on expressions?
4986                  */
4987         }
4988
4989         /*
4990          * If there is a unique index or DISTINCT clause for the variable, assume
4991          * it is unique no matter what pg_statistic says; the statistics could be
4992          * out of date, or we might have found a partial unique index that proves
4993          * the var is unique for this query.  However, we'd better still believe
4994          * the null-fraction statistic.
4995          */
4996         if (vardata->isunique)
4997                 stadistinct = -1.0 * (1.0 - stanullfrac);
4998
4999         /*
5000          * If we had an absolute estimate, use that.
5001          */
5002         if (stadistinct > 0.0)
5003                 return clamp_row_est(stadistinct);
5004
5005         /*
5006          * Otherwise we need to get the relation size; punt if not available.
5007          */
5008         if (vardata->rel == NULL)
5009         {
5010                 *isdefault = true;
5011                 return DEFAULT_NUM_DISTINCT;
5012         }
5013         ntuples = vardata->rel->tuples;
5014         if (ntuples <= 0.0)
5015         {
5016                 *isdefault = true;
5017                 return DEFAULT_NUM_DISTINCT;
5018         }
5019
5020         /*
5021          * If we had a relative estimate, use that.
5022          */
5023         if (stadistinct < 0.0)
5024                 return clamp_row_est(-stadistinct * ntuples);
5025
5026         /*
5027          * With no data, estimate ndistinct = ntuples if the table is small, else
5028          * use default.  We use DEFAULT_NUM_DISTINCT as the cutoff for "small" so
5029          * that the behavior isn't discontinuous.
5030          */
5031         if (ntuples < DEFAULT_NUM_DISTINCT)
5032                 return clamp_row_est(ntuples);
5033
5034         *isdefault = true;
5035         return DEFAULT_NUM_DISTINCT;
5036 }
5037
5038 /*
5039  * get_variable_range
5040  *              Estimate the minimum and maximum value of the specified variable.
5041  *              If successful, store values in *min and *max, and return TRUE.
5042  *              If no data available, return FALSE.
5043  *
5044  * sortop is the "<" comparison operator to use.  This should generally
5045  * be "<" not ">", as only the former is likely to be found in pg_statistic.
5046  */
5047 static bool
5048 get_variable_range(PlannerInfo *root, VariableStatData *vardata, Oid sortop,
5049                                    Datum *min, Datum *max)
5050 {
5051         Datum           tmin = 0;
5052         Datum           tmax = 0;
5053         bool            have_data = false;
5054         int16           typLen;
5055         bool            typByVal;
5056         Oid                     opfuncoid;
5057         AttStatsSlot sslot;
5058         int                     i;
5059
5060         /*
5061          * XXX It's very tempting to try to use the actual column min and max, if
5062          * we can get them relatively-cheaply with an index probe.  However, since
5063          * this function is called many times during join planning, that could
5064          * have unpleasant effects on planning speed.  Need more investigation
5065          * before enabling this.
5066          */
5067 #ifdef NOT_USED
5068         if (get_actual_variable_range(root, vardata, sortop, min, max))
5069                 return true;
5070 #endif
5071
5072         if (!HeapTupleIsValid(vardata->statsTuple))
5073         {
5074                 /* no stats available, so default result */
5075                 return false;
5076         }
5077
5078         /*
5079          * If we can't apply the sortop to the stats data, just fail.  In
5080          * principle, if there's a histogram and no MCVs, we could return the
5081          * histogram endpoints without ever applying the sortop ... but it's
5082          * probably not worth trying, because whatever the caller wants to do with
5083          * the endpoints would likely fail the security check too.
5084          */
5085         if (!statistic_proc_security_check(vardata,
5086                                                                            (opfuncoid = get_opcode(sortop))))
5087                 return false;
5088
5089         get_typlenbyval(vardata->atttype, &typLen, &typByVal);
5090
5091         /*
5092          * If there is a histogram, grab the first and last values.
5093          *
5094          * If there is a histogram that is sorted with some other operator than
5095          * the one we want, fail --- this suggests that there is data we can't
5096          * use.
5097          */
5098         if (get_attstatsslot(&sslot, vardata->statsTuple,
5099                                                  STATISTIC_KIND_HISTOGRAM, sortop,
5100                                                  ATTSTATSSLOT_VALUES))
5101         {
5102                 if (sslot.nvalues > 0)
5103                 {
5104                         tmin = datumCopy(sslot.values[0], typByVal, typLen);
5105                         tmax = datumCopy(sslot.values[sslot.nvalues - 1], typByVal, typLen);
5106                         have_data = true;
5107                 }
5108                 free_attstatsslot(&sslot);
5109         }
5110         else if (get_attstatsslot(&sslot, vardata->statsTuple,
5111                                                           STATISTIC_KIND_HISTOGRAM, InvalidOid,
5112                                                           0))
5113         {
5114                 free_attstatsslot(&sslot);
5115                 return false;
5116         }
5117
5118         /*
5119          * If we have most-common-values info, look for extreme MCVs.  This is
5120          * needed even if we also have a histogram, since the histogram excludes
5121          * the MCVs.  However, usually the MCVs will not be the extreme values, so
5122          * avoid unnecessary data copying.
5123          */
5124         if (get_attstatsslot(&sslot, vardata->statsTuple,
5125                                                  STATISTIC_KIND_MCV, InvalidOid,
5126                                                  ATTSTATSSLOT_VALUES))
5127         {
5128                 bool            tmin_is_mcv = false;
5129                 bool            tmax_is_mcv = false;
5130                 FmgrInfo        opproc;
5131
5132                 fmgr_info(opfuncoid, &opproc);
5133
5134                 for (i = 0; i < sslot.nvalues; i++)
5135                 {
5136                         if (!have_data)
5137                         {
5138                                 tmin = tmax = sslot.values[i];
5139                                 tmin_is_mcv = tmax_is_mcv = have_data = true;
5140                                 continue;
5141                         }
5142                         if (DatumGetBool(FunctionCall2Coll(&opproc,
5143                                                                                            DEFAULT_COLLATION_OID,
5144                                                                                            sslot.values[i], tmin)))
5145                         {
5146                                 tmin = sslot.values[i];
5147                                 tmin_is_mcv = true;
5148                         }
5149                         if (DatumGetBool(FunctionCall2Coll(&opproc,
5150                                                                                            DEFAULT_COLLATION_OID,
5151                                                                                            tmax, sslot.values[i])))
5152                         {
5153                                 tmax = sslot.values[i];
5154                                 tmax_is_mcv = true;
5155                         }
5156                 }
5157                 if (tmin_is_mcv)
5158                         tmin = datumCopy(tmin, typByVal, typLen);
5159                 if (tmax_is_mcv)
5160                         tmax = datumCopy(tmax, typByVal, typLen);
5161                 free_attstatsslot(&sslot);
5162         }
5163
5164         *min = tmin;
5165         *max = tmax;
5166         return have_data;
5167 }
5168
5169
5170 /*
5171  * get_actual_variable_range
5172  *              Attempt to identify the current *actual* minimum and/or maximum
5173  *              of the specified variable, by looking for a suitable btree index
5174  *              and fetching its low and/or high values.
5175  *              If successful, store values in *min and *max, and return TRUE.
5176  *              (Either pointer can be NULL if that endpoint isn't needed.)
5177  *              If no data available, return FALSE.
5178  *
5179  * sortop is the "<" comparison operator to use.
5180  */
5181 static bool
5182 get_actual_variable_range(PlannerInfo *root, VariableStatData *vardata,
5183                                                   Oid sortop,
5184                                                   Datum *min, Datum *max)
5185 {
5186         bool            have_data = false;
5187         RelOptInfo *rel = vardata->rel;
5188         RangeTblEntry *rte;
5189         ListCell   *lc;
5190
5191         /* No hope if no relation or it doesn't have indexes */
5192         if (rel == NULL || rel->indexlist == NIL)
5193                 return false;
5194         /* If it has indexes it must be a plain relation */
5195         rte = root->simple_rte_array[rel->relid];
5196         Assert(rte->rtekind == RTE_RELATION);
5197
5198         /* Search through the indexes to see if any match our problem */
5199         foreach(lc, rel->indexlist)
5200         {
5201                 IndexOptInfo *index = (IndexOptInfo *) lfirst(lc);
5202                 ScanDirection indexscandir;
5203
5204                 /* Ignore non-btree indexes */
5205                 if (index->relam != BTREE_AM_OID)
5206                         continue;
5207
5208                 /*
5209                  * Ignore partial indexes --- we only want stats that cover the entire
5210                  * relation.
5211                  */
5212                 if (index->indpred != NIL)
5213                         continue;
5214
5215                 /*
5216                  * The index list might include hypothetical indexes inserted by a
5217                  * get_relation_info hook --- don't try to access them.
5218                  */
5219                 if (index->hypothetical)
5220                         continue;
5221
5222                 /*
5223                  * The first index column must match the desired variable and sort
5224                  * operator --- but we can use a descending-order index.
5225                  */
5226                 if (!match_index_to_operand(vardata->var, 0, index))
5227                         continue;
5228                 switch (get_op_opfamily_strategy(sortop, index->sortopfamily[0]))
5229                 {
5230                         case BTLessStrategyNumber:
5231                                 if (index->reverse_sort[0])
5232                                         indexscandir = BackwardScanDirection;
5233                                 else
5234                                         indexscandir = ForwardScanDirection;
5235                                 break;
5236                         case BTGreaterStrategyNumber:
5237                                 if (index->reverse_sort[0])
5238                                         indexscandir = ForwardScanDirection;
5239                                 else
5240                                         indexscandir = BackwardScanDirection;
5241                                 break;
5242                         default:
5243                                 /* index doesn't match the sortop */
5244                                 continue;
5245                 }
5246
5247                 /*
5248                  * Found a suitable index to extract data from.  We'll need an EState
5249                  * and a bunch of other infrastructure.
5250                  */
5251                 {
5252                         EState     *estate;
5253                         ExprContext *econtext;
5254                         MemoryContext tmpcontext;
5255                         MemoryContext oldcontext;
5256                         Relation        heapRel;
5257                         Relation        indexRel;
5258                         IndexInfo  *indexInfo;
5259                         TupleTableSlot *slot;
5260                         int16           typLen;
5261                         bool            typByVal;
5262                         ScanKeyData scankeys[1];
5263                         IndexScanDesc index_scan;
5264                         HeapTuple       tup;
5265                         Datum           values[INDEX_MAX_KEYS];
5266                         bool            isnull[INDEX_MAX_KEYS];
5267                         SnapshotData SnapshotDirty;
5268
5269                         estate = CreateExecutorState();
5270                         econtext = GetPerTupleExprContext(estate);
5271                         /* Make sure any cruft is generated in the econtext's memory */
5272                         tmpcontext = econtext->ecxt_per_tuple_memory;
5273                         oldcontext = MemoryContextSwitchTo(tmpcontext);
5274
5275                         /*
5276                          * Open the table and index so we can read from them.  We should
5277                          * already have at least AccessShareLock on the table, but not
5278                          * necessarily on the index.
5279                          */
5280                         heapRel = heap_open(rte->relid, NoLock);
5281                         indexRel = index_open(index->indexoid, AccessShareLock);
5282
5283                         /* extract index key information from the index's pg_index info */
5284                         indexInfo = BuildIndexInfo(indexRel);
5285
5286                         /* some other stuff */
5287                         slot = MakeSingleTupleTableSlot(RelationGetDescr(heapRel));
5288                         econtext->ecxt_scantuple = slot;
5289                         get_typlenbyval(vardata->atttype, &typLen, &typByVal);
5290                         InitDirtySnapshot(SnapshotDirty);
5291
5292                         /* set up an IS NOT NULL scan key so that we ignore nulls */
5293                         ScanKeyEntryInitialize(&scankeys[0],
5294                                                                    SK_ISNULL | SK_SEARCHNOTNULL,
5295                                                                    1,   /* index col to scan */
5296                                                                    InvalidStrategy,             /* no strategy */
5297                                                                    InvalidOid,  /* no strategy subtype */
5298                                                                    InvalidOid,  /* no collation */
5299                                                                    InvalidOid,  /* no reg proc for this */
5300                                                                    (Datum) 0);  /* constant */
5301
5302                         have_data = true;
5303
5304                         /* If min is requested ... */
5305                         if (min)
5306                         {
5307                                 /*
5308                                  * In principle, we should scan the index with our current
5309                                  * active snapshot, which is the best approximation we've got
5310                                  * to what the query will see when executed.  But that won't
5311                                  * be exact if a new snap is taken before running the query,
5312                                  * and it can be very expensive if a lot of uncommitted rows
5313                                  * exist at the end of the index (because we'll laboriously
5314                                  * fetch each one and reject it).  What seems like a good
5315                                  * compromise is to use SnapshotDirty.  That will accept
5316                                  * uncommitted rows, and thus avoid fetching multiple heap
5317                                  * tuples in this scenario.  On the other hand, it will reject
5318                                  * known-dead rows, and thus not give a bogus answer when the
5319                                  * extreme value has been deleted; that case motivates not
5320                                  * using SnapshotAny here.
5321                                  */
5322                                 index_scan = index_beginscan(heapRel, indexRel, &SnapshotDirty,
5323                                                                                          1, 0);
5324                                 index_rescan(index_scan, scankeys, 1, NULL, 0);
5325
5326                                 /* Fetch first tuple in sortop's direction */
5327                                 if ((tup = index_getnext(index_scan,
5328                                                                                  indexscandir)) != NULL)
5329                                 {
5330                                         /* Extract the index column values from the heap tuple */
5331                                         ExecStoreTuple(tup, slot, InvalidBuffer, false);
5332                                         FormIndexDatum(indexInfo, slot, estate,
5333                                                                    values, isnull);
5334
5335                                         /* Shouldn't have got a null, but be careful */
5336                                         if (isnull[0])
5337                                                 elog(ERROR, "found unexpected null value in index \"%s\"",
5338                                                          RelationGetRelationName(indexRel));
5339
5340                                         /* Copy the index column value out to caller's context */
5341                                         MemoryContextSwitchTo(oldcontext);
5342                                         *min = datumCopy(values[0], typByVal, typLen);
5343                                         MemoryContextSwitchTo(tmpcontext);
5344                                 }
5345                                 else
5346                                         have_data = false;
5347
5348                                 index_endscan(index_scan);
5349                         }
5350
5351                         /* If max is requested, and we didn't find the index is empty */
5352                         if (max && have_data)
5353                         {
5354                                 index_scan = index_beginscan(heapRel, indexRel, &SnapshotDirty,
5355                                                                                          1, 0);
5356                                 index_rescan(index_scan, scankeys, 1, NULL, 0);
5357
5358                                 /* Fetch first tuple in reverse direction */
5359                                 if ((tup = index_getnext(index_scan,
5360                                                                                  -indexscandir)) != NULL)
5361                                 {
5362                                         /* Extract the index column values from the heap tuple */
5363                                         ExecStoreTuple(tup, slot, InvalidBuffer, false);
5364                                         FormIndexDatum(indexInfo, slot, estate,
5365                                                                    values, isnull);
5366
5367                                         /* Shouldn't have got a null, but be careful */
5368                                         if (isnull[0])
5369                                                 elog(ERROR, "found unexpected null value in index \"%s\"",
5370                                                          RelationGetRelationName(indexRel));
5371
5372                                         /* Copy the index column value out to caller's context */
5373                                         MemoryContextSwitchTo(oldcontext);
5374                                         *max = datumCopy(values[0], typByVal, typLen);
5375                                         MemoryContextSwitchTo(tmpcontext);
5376                                 }
5377                                 else
5378                                         have_data = false;
5379
5380                                 index_endscan(index_scan);
5381                         }
5382
5383                         /* Clean everything up */
5384                         ExecDropSingleTupleTableSlot(slot);
5385
5386                         index_close(indexRel, AccessShareLock);
5387                         heap_close(heapRel, NoLock);
5388
5389                         MemoryContextSwitchTo(oldcontext);
5390                         FreeExecutorState(estate);
5391
5392                         /* And we're done */
5393                         break;
5394                 }
5395         }
5396
5397         return have_data;
5398 }
5399
5400 /*
5401  * find_join_input_rel
5402  *              Look up the input relation for a join.
5403  *
5404  * We assume that the input relation's RelOptInfo must have been constructed
5405  * already.
5406  */
5407 static RelOptInfo *
5408 find_join_input_rel(PlannerInfo *root, Relids relids)
5409 {
5410         RelOptInfo *rel = NULL;
5411
5412         switch (bms_membership(relids))
5413         {
5414                 case BMS_EMPTY_SET:
5415                         /* should not happen */
5416                         break;
5417                 case BMS_SINGLETON:
5418                         rel = find_base_rel(root, bms_singleton_member(relids));
5419                         break;
5420                 case BMS_MULTIPLE:
5421                         rel = find_join_rel(root, relids);
5422                         break;
5423         }
5424
5425         if (rel == NULL)
5426                 elog(ERROR, "could not find RelOptInfo for given relids");
5427
5428         return rel;
5429 }
5430
5431
5432 /*-------------------------------------------------------------------------
5433  *
5434  * Pattern analysis functions
5435  *
5436  * These routines support analysis of LIKE and regular-expression patterns
5437  * by the planner/optimizer.  It's important that they agree with the
5438  * regular-expression code in backend/regex/ and the LIKE code in
5439  * backend/utils/adt/like.c.  Also, the computation of the fixed prefix
5440  * must be conservative: if we report a string longer than the true fixed
5441  * prefix, the query may produce actually wrong answers, rather than just
5442  * getting a bad selectivity estimate!
5443  *
5444  * Note that the prefix-analysis functions are called from
5445  * backend/optimizer/path/indxpath.c as well as from routines in this file.
5446  *
5447  *-------------------------------------------------------------------------
5448  */
5449
5450 /*
5451  * Check whether char is a letter (and, hence, subject to case-folding)
5452  *
5453  * In multibyte character sets or with ICU, we can't use isalpha, and it does not seem
5454  * worth trying to convert to wchar_t to use iswalpha.  Instead, just assume
5455  * any multibyte char is potentially case-varying.
5456  */
5457 static int
5458 pattern_char_isalpha(char c, bool is_multibyte,
5459                                          pg_locale_t locale, bool locale_is_c)
5460 {
5461         if (locale_is_c)
5462                 return (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z');
5463         else if (is_multibyte && IS_HIGHBIT_SET(c))
5464                 return true;
5465         else if (locale && locale->provider == COLLPROVIDER_ICU)
5466                 return IS_HIGHBIT_SET(c) ? true : false;
5467 #ifdef HAVE_LOCALE_T
5468         else if (locale && locale->provider == COLLPROVIDER_LIBC)
5469                 return isalpha_l((unsigned char) c, locale->info.lt);
5470 #endif
5471         else
5472                 return isalpha((unsigned char) c);
5473 }
5474
5475 /*
5476  * Extract the fixed prefix, if any, for a pattern.
5477  *
5478  * *prefix is set to a palloc'd prefix string (in the form of a Const node),
5479  *      or to NULL if no fixed prefix exists for the pattern.
5480  * If rest_selec is not NULL, *rest_selec is set to an estimate of the
5481  *      selectivity of the remainder of the pattern (without any fixed prefix).
5482  * The prefix Const has the same type (TEXT or BYTEA) as the input pattern.
5483  *
5484  * The return value distinguishes no fixed prefix, a partial prefix,
5485  * or an exact-match-only pattern.
5486  */
5487
5488 static Pattern_Prefix_Status
5489 like_fixed_prefix(Const *patt_const, bool case_insensitive, Oid collation,
5490                                   Const **prefix_const, Selectivity *rest_selec)
5491 {
5492         char       *match;
5493         char       *patt;
5494         int                     pattlen;
5495         Oid                     typeid = patt_const->consttype;
5496         int                     pos,
5497                                 match_pos;
5498         bool            is_multibyte = (pg_database_encoding_max_length() > 1);
5499         pg_locale_t locale = 0;
5500         bool            locale_is_c = false;
5501
5502         /* the right-hand const is type text or bytea */
5503         Assert(typeid == BYTEAOID || typeid == TEXTOID);
5504
5505         if (case_insensitive)
5506         {
5507                 if (typeid == BYTEAOID)
5508                         ereport(ERROR,
5509                                         (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
5510                         errmsg("case insensitive matching not supported on type bytea")));
5511
5512                 /* If case-insensitive, we need locale info */
5513                 if (lc_ctype_is_c(collation))
5514                         locale_is_c = true;
5515                 else if (collation != DEFAULT_COLLATION_OID)
5516                 {
5517                         if (!OidIsValid(collation))
5518                         {
5519                                 /*
5520                                  * This typically means that the parser could not resolve a
5521                                  * conflict of implicit collations, so report it that way.
5522                                  */
5523                                 ereport(ERROR,
5524                                                 (errcode(ERRCODE_INDETERMINATE_COLLATION),
5525                                                  errmsg("could not determine which collation to use for ILIKE"),
5526                                                  errhint("Use the COLLATE clause to set the collation explicitly.")));
5527                         }
5528                         locale = pg_newlocale_from_collation(collation);
5529                 }
5530         }
5531
5532         if (typeid != BYTEAOID)
5533         {
5534                 patt = TextDatumGetCString(patt_const->constvalue);
5535                 pattlen = strlen(patt);
5536         }
5537         else
5538         {
5539                 bytea      *bstr = DatumGetByteaPP(patt_const->constvalue);
5540
5541                 pattlen = VARSIZE_ANY_EXHDR(bstr);
5542                 patt = (char *) palloc(pattlen);
5543                 memcpy(patt, VARDATA_ANY(bstr), pattlen);
5544                 Assert((Pointer) bstr == DatumGetPointer(patt_const->constvalue));
5545         }
5546
5547         match = palloc(pattlen + 1);
5548         match_pos = 0;
5549         for (pos = 0; pos < pattlen; pos++)
5550         {
5551                 /* % and _ are wildcard characters in LIKE */
5552                 if (patt[pos] == '%' ||
5553                         patt[pos] == '_')
5554                         break;
5555
5556                 /* Backslash escapes the next character */
5557                 if (patt[pos] == '\\')
5558                 {
5559                         pos++;
5560                         if (pos >= pattlen)
5561                                 break;
5562                 }
5563
5564                 /* Stop if case-varying character (it's sort of a wildcard) */
5565                 if (case_insensitive &&
5566                   pattern_char_isalpha(patt[pos], is_multibyte, locale, locale_is_c))
5567                         break;
5568
5569                 match[match_pos++] = patt[pos];
5570         }
5571
5572         match[match_pos] = '\0';
5573
5574         if (typeid != BYTEAOID)
5575                 *prefix_const = string_to_const(match, typeid);
5576         else
5577                 *prefix_const = string_to_bytea_const(match, match_pos);
5578
5579         if (rest_selec != NULL)
5580                 *rest_selec = like_selectivity(&patt[pos], pattlen - pos,
5581                                                                            case_insensitive);
5582
5583         pfree(patt);
5584         pfree(match);
5585
5586         /* in LIKE, an empty pattern is an exact match! */
5587         if (pos == pattlen)
5588                 return Pattern_Prefix_Exact;    /* reached end of pattern, so exact */
5589
5590         if (match_pos > 0)
5591                 return Pattern_Prefix_Partial;
5592
5593         return Pattern_Prefix_None;
5594 }
5595
5596 static Pattern_Prefix_Status
5597 regex_fixed_prefix(Const *patt_const, bool case_insensitive, Oid collation,
5598                                    Const **prefix_const, Selectivity *rest_selec)
5599 {
5600         Oid                     typeid = patt_const->consttype;
5601         char       *prefix;
5602         bool            exact;
5603
5604         /*
5605          * Should be unnecessary, there are no bytea regex operators defined. As
5606          * such, it should be noted that the rest of this function has *not* been
5607          * made safe for binary (possibly NULL containing) strings.
5608          */
5609         if (typeid == BYTEAOID)
5610                 ereport(ERROR,
5611                                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
5612                  errmsg("regular-expression matching not supported on type bytea")));
5613
5614         /* Use the regexp machinery to extract the prefix, if any */
5615         prefix = regexp_fixed_prefix(DatumGetTextPP(patt_const->constvalue),
5616                                                                  case_insensitive, collation,
5617                                                                  &exact);
5618
5619         if (prefix == NULL)
5620         {
5621                 *prefix_const = NULL;
5622
5623                 if (rest_selec != NULL)
5624                 {
5625                         char       *patt = TextDatumGetCString(patt_const->constvalue);
5626
5627                         *rest_selec = regex_selectivity(patt, strlen(patt),
5628                                                                                         case_insensitive,
5629                                                                                         0);
5630                         pfree(patt);
5631                 }
5632
5633                 return Pattern_Prefix_None;
5634         }
5635
5636         *prefix_const = string_to_const(prefix, typeid);
5637
5638         if (rest_selec != NULL)
5639         {
5640                 if (exact)
5641                 {
5642                         /* Exact match, so there's no additional selectivity */
5643                         *rest_selec = 1.0;
5644                 }
5645                 else
5646                 {
5647                         char       *patt = TextDatumGetCString(patt_const->constvalue);
5648
5649                         *rest_selec = regex_selectivity(patt, strlen(patt),
5650                                                                                         case_insensitive,
5651                                                                                         strlen(prefix));
5652                         pfree(patt);
5653                 }
5654         }
5655
5656         pfree(prefix);
5657
5658         if (exact)
5659                 return Pattern_Prefix_Exact;    /* pattern specifies exact match */
5660         else
5661                 return Pattern_Prefix_Partial;
5662 }
5663
5664 Pattern_Prefix_Status
5665 pattern_fixed_prefix(Const *patt, Pattern_Type ptype, Oid collation,
5666                                          Const **prefix, Selectivity *rest_selec)
5667 {
5668         Pattern_Prefix_Status result;
5669
5670         switch (ptype)
5671         {
5672                 case Pattern_Type_Like:
5673                         result = like_fixed_prefix(patt, false, collation,
5674                                                                            prefix, rest_selec);
5675                         break;
5676                 case Pattern_Type_Like_IC:
5677                         result = like_fixed_prefix(patt, true, collation,
5678                                                                            prefix, rest_selec);
5679                         break;
5680                 case Pattern_Type_Regex:
5681                         result = regex_fixed_prefix(patt, false, collation,
5682                                                                                 prefix, rest_selec);
5683                         break;
5684                 case Pattern_Type_Regex_IC:
5685                         result = regex_fixed_prefix(patt, true, collation,
5686                                                                                 prefix, rest_selec);
5687                         break;
5688                 default:
5689                         elog(ERROR, "unrecognized ptype: %d", (int) ptype);
5690                         result = Pattern_Prefix_None;           /* keep compiler quiet */
5691                         break;
5692         }
5693         return result;
5694 }
5695
5696 /*
5697  * Estimate the selectivity of a fixed prefix for a pattern match.
5698  *
5699  * A fixed prefix "foo" is estimated as the selectivity of the expression
5700  * "variable >= 'foo' AND variable < 'fop'" (see also indxpath.c).
5701  *
5702  * The selectivity estimate is with respect to the portion of the column
5703  * population represented by the histogram --- the caller must fold this
5704  * together with info about MCVs and NULLs.
5705  *
5706  * We use the >= and < operators from the specified btree opfamily to do the
5707  * estimation.  The given variable and Const must be of the associated
5708  * datatype.
5709  *
5710  * XXX Note: we make use of the upper bound to estimate operator selectivity
5711  * even if the locale is such that we cannot rely on the upper-bound string.
5712  * The selectivity only needs to be approximately right anyway, so it seems
5713  * more useful to use the upper-bound code than not.
5714  */
5715 static Selectivity
5716 prefix_selectivity(PlannerInfo *root, VariableStatData *vardata,
5717                                    Oid vartype, Oid opfamily, Const *prefixcon)
5718 {
5719         Selectivity prefixsel;
5720         Oid                     cmpopr;
5721         FmgrInfo        opproc;
5722         Const      *greaterstrcon;
5723         Selectivity eq_sel;
5724
5725         cmpopr = get_opfamily_member(opfamily, vartype, vartype,
5726                                                                  BTGreaterEqualStrategyNumber);
5727         if (cmpopr == InvalidOid)
5728                 elog(ERROR, "no >= operator for opfamily %u", opfamily);
5729         fmgr_info(get_opcode(cmpopr), &opproc);
5730
5731         prefixsel = ineq_histogram_selectivity(root, vardata, &opproc, true,
5732                                                                                    prefixcon->constvalue,
5733                                                                                    prefixcon->consttype);
5734
5735         if (prefixsel < 0.0)
5736         {
5737                 /* No histogram is present ... return a suitable default estimate */
5738                 return DEFAULT_MATCH_SEL;
5739         }
5740
5741         /*-------
5742          * If we can create a string larger than the prefix, say
5743          *      "x < greaterstr".
5744          *-------
5745          */
5746         cmpopr = get_opfamily_member(opfamily, vartype, vartype,
5747                                                                  BTLessStrategyNumber);
5748         if (cmpopr == InvalidOid)
5749                 elog(ERROR, "no < operator for opfamily %u", opfamily);
5750         fmgr_info(get_opcode(cmpopr), &opproc);
5751         greaterstrcon = make_greater_string(prefixcon, &opproc,
5752                                                                                 DEFAULT_COLLATION_OID);
5753         if (greaterstrcon)
5754         {
5755                 Selectivity topsel;
5756
5757                 topsel = ineq_histogram_selectivity(root, vardata, &opproc, false,
5758                                                                                         greaterstrcon->constvalue,
5759                                                                                         greaterstrcon->consttype);
5760
5761                 /* ineq_histogram_selectivity worked before, it shouldn't fail now */
5762                 Assert(topsel >= 0.0);
5763
5764                 /*
5765                  * Merge the two selectivities in the same way as for a range query
5766                  * (see clauselist_selectivity()).  Note that we don't need to worry
5767                  * about double-exclusion of nulls, since ineq_histogram_selectivity
5768                  * doesn't count those anyway.
5769                  */
5770                 prefixsel = topsel + prefixsel - 1.0;
5771         }
5772
5773         /*
5774          * If the prefix is long then the two bounding values might be too close
5775          * together for the histogram to distinguish them usefully, resulting in a
5776          * zero estimate (plus or minus roundoff error). To avoid returning a
5777          * ridiculously small estimate, compute the estimated selectivity for
5778          * "variable = 'foo'", and clamp to that. (Obviously, the resultant
5779          * estimate should be at least that.)
5780          *
5781          * We apply this even if we couldn't make a greater string.  That case
5782          * suggests that the prefix is near the maximum possible, and thus
5783          * probably off the end of the histogram, and thus we probably got a very
5784          * small estimate from the >= condition; so we still need to clamp.
5785          */
5786         cmpopr = get_opfamily_member(opfamily, vartype, vartype,
5787                                                                  BTEqualStrategyNumber);
5788         if (cmpopr == InvalidOid)
5789                 elog(ERROR, "no = operator for opfamily %u", opfamily);
5790         eq_sel = var_eq_const(vardata, cmpopr, prefixcon->constvalue,
5791                                                   false, true);
5792
5793         prefixsel = Max(prefixsel, eq_sel);
5794
5795         return prefixsel;
5796 }
5797
5798
5799 /*
5800  * Estimate the selectivity of a pattern of the specified type.
5801  * Note that any fixed prefix of the pattern will have been removed already,
5802  * so actually we may be looking at just a fragment of the pattern.
5803  *
5804  * For now, we use a very simplistic approach: fixed characters reduce the
5805  * selectivity a good deal, character ranges reduce it a little,
5806  * wildcards (such as % for LIKE or .* for regex) increase it.
5807  */
5808
5809 #define FIXED_CHAR_SEL  0.20    /* about 1/5 */
5810 #define CHAR_RANGE_SEL  0.25
5811 #define ANY_CHAR_SEL    0.9             /* not 1, since it won't match end-of-string */
5812 #define FULL_WILDCARD_SEL 5.0
5813 #define PARTIAL_WILDCARD_SEL 2.0
5814
5815 static Selectivity
5816 like_selectivity(const char *patt, int pattlen, bool case_insensitive)
5817 {
5818         Selectivity sel = 1.0;
5819         int                     pos;
5820
5821         /* Skip any leading wildcard; it's already factored into initial sel */
5822         for (pos = 0; pos < pattlen; pos++)
5823         {
5824                 if (patt[pos] != '%' && patt[pos] != '_')
5825                         break;
5826         }
5827
5828         for (; pos < pattlen; pos++)
5829         {
5830                 /* % and _ are wildcard characters in LIKE */
5831                 if (patt[pos] == '%')
5832                         sel *= FULL_WILDCARD_SEL;
5833                 else if (patt[pos] == '_')
5834                         sel *= ANY_CHAR_SEL;
5835                 else if (patt[pos] == '\\')
5836                 {
5837                         /* Backslash quotes the next character */
5838                         pos++;
5839                         if (pos >= pattlen)
5840                                 break;
5841                         sel *= FIXED_CHAR_SEL;
5842                 }
5843                 else
5844                         sel *= FIXED_CHAR_SEL;
5845         }
5846         /* Could get sel > 1 if multiple wildcards */
5847         if (sel > 1.0)
5848                 sel = 1.0;
5849         return sel;
5850 }
5851
5852 static Selectivity
5853 regex_selectivity_sub(const char *patt, int pattlen, bool case_insensitive)
5854 {
5855         Selectivity sel = 1.0;
5856         int                     paren_depth = 0;
5857         int                     paren_pos = 0;  /* dummy init to keep compiler quiet */
5858         int                     pos;
5859
5860         for (pos = 0; pos < pattlen; pos++)
5861         {
5862                 if (patt[pos] == '(')
5863                 {
5864                         if (paren_depth == 0)
5865                                 paren_pos = pos;        /* remember start of parenthesized item */
5866                         paren_depth++;
5867                 }
5868                 else if (patt[pos] == ')' && paren_depth > 0)
5869                 {
5870                         paren_depth--;
5871                         if (paren_depth == 0)
5872                                 sel *= regex_selectivity_sub(patt + (paren_pos + 1),
5873                                                                                          pos - (paren_pos + 1),
5874                                                                                          case_insensitive);
5875                 }
5876                 else if (patt[pos] == '|' && paren_depth == 0)
5877                 {
5878                         /*
5879                          * If unquoted | is present at paren level 0 in pattern, we have
5880                          * multiple alternatives; sum their probabilities.
5881                          */
5882                         sel += regex_selectivity_sub(patt + (pos + 1),
5883                                                                                  pattlen - (pos + 1),
5884                                                                                  case_insensitive);
5885                         break;                          /* rest of pattern is now processed */
5886                 }
5887                 else if (patt[pos] == '[')
5888                 {
5889                         bool            negclass = false;
5890
5891                         if (patt[++pos] == '^')
5892                         {
5893                                 negclass = true;
5894                                 pos++;
5895                         }
5896                         if (patt[pos] == ']')           /* ']' at start of class is not
5897                                                                                  * special */
5898                                 pos++;
5899                         while (pos < pattlen && patt[pos] != ']')
5900                                 pos++;
5901                         if (paren_depth == 0)
5902                                 sel *= (negclass ? (1.0 - CHAR_RANGE_SEL) : CHAR_RANGE_SEL);
5903                 }
5904                 else if (patt[pos] == '.')
5905                 {
5906                         if (paren_depth == 0)
5907                                 sel *= ANY_CHAR_SEL;
5908                 }
5909                 else if (patt[pos] == '*' ||
5910                                  patt[pos] == '?' ||
5911                                  patt[pos] == '+')
5912                 {
5913                         /* Ought to be smarter about quantifiers... */
5914                         if (paren_depth == 0)
5915                                 sel *= PARTIAL_WILDCARD_SEL;
5916                 }
5917                 else if (patt[pos] == '{')
5918                 {
5919                         while (pos < pattlen && patt[pos] != '}')
5920                                 pos++;
5921                         if (paren_depth == 0)
5922                                 sel *= PARTIAL_WILDCARD_SEL;
5923                 }
5924                 else if (patt[pos] == '\\')
5925                 {
5926                         /* backslash quotes the next character */
5927                         pos++;
5928                         if (pos >= pattlen)
5929                                 break;
5930                         if (paren_depth == 0)
5931                                 sel *= FIXED_CHAR_SEL;
5932                 }
5933                 else
5934                 {
5935                         if (paren_depth == 0)
5936                                 sel *= FIXED_CHAR_SEL;
5937                 }
5938         }
5939         /* Could get sel > 1 if multiple wildcards */
5940         if (sel > 1.0)
5941                 sel = 1.0;
5942         return sel;
5943 }
5944
5945 static Selectivity
5946 regex_selectivity(const char *patt, int pattlen, bool case_insensitive,
5947                                   int fixed_prefix_len)
5948 {
5949         Selectivity sel;
5950
5951         /* If patt doesn't end with $, consider it to have a trailing wildcard */
5952         if (pattlen > 0 && patt[pattlen - 1] == '$' &&
5953                 (pattlen == 1 || patt[pattlen - 2] != '\\'))
5954         {
5955                 /* has trailing $ */
5956                 sel = regex_selectivity_sub(patt, pattlen - 1, case_insensitive);
5957         }
5958         else
5959         {
5960                 /* no trailing $ */
5961                 sel = regex_selectivity_sub(patt, pattlen, case_insensitive);
5962                 sel *= FULL_WILDCARD_SEL;
5963         }
5964
5965         /* If there's a fixed prefix, discount its selectivity */
5966         if (fixed_prefix_len > 0)
5967                 sel /= pow(FIXED_CHAR_SEL, fixed_prefix_len);
5968
5969         /* Make sure result stays in range */
5970         CLAMP_PROBABILITY(sel);
5971         return sel;
5972 }
5973
5974
5975 /*
5976  * For bytea, the increment function need only increment the current byte
5977  * (there are no multibyte characters to worry about).
5978  */
5979 static bool
5980 byte_increment(unsigned char *ptr, int len)
5981 {
5982         if (*ptr >= 255)
5983                 return false;
5984         (*ptr)++;
5985         return true;
5986 }
5987
5988 /*
5989  * Try to generate a string greater than the given string or any
5990  * string it is a prefix of.  If successful, return a palloc'd string
5991  * in the form of a Const node; else return NULL.
5992  *
5993  * The caller must provide the appropriate "less than" comparison function
5994  * for testing the strings, along with the collation to use.
5995  *
5996  * The key requirement here is that given a prefix string, say "foo",
5997  * we must be able to generate another string "fop" that is greater than
5998  * all strings "foobar" starting with "foo".  We can test that we have
5999  * generated a string greater than the prefix string, but in non-C collations
6000  * that is not a bulletproof guarantee that an extension of the string might
6001  * not sort after it; an example is that "foo " is less than "foo!", but it
6002  * is not clear that a "dictionary" sort ordering will consider "foo!" less
6003  * than "foo bar".  CAUTION: Therefore, this function should be used only for
6004  * estimation purposes when working in a non-C collation.
6005  *
6006  * To try to catch most cases where an extended string might otherwise sort
6007  * before the result value, we determine which of the strings "Z", "z", "y",
6008  * and "9" is seen as largest by the collation, and append that to the given
6009  * prefix before trying to find a string that compares as larger.
6010  *
6011  * To search for a greater string, we repeatedly "increment" the rightmost
6012  * character, using an encoding-specific character incrementer function.
6013  * When it's no longer possible to increment the last character, we truncate
6014  * off that character and start incrementing the next-to-rightmost.
6015  * For example, if "z" were the last character in the sort order, then we
6016  * could produce "foo" as a string greater than "fonz".
6017  *
6018  * This could be rather slow in the worst case, but in most cases we
6019  * won't have to try more than one or two strings before succeeding.
6020  *
6021  * Note that it's important for the character incrementer not to be too anal
6022  * about producing every possible character code, since in some cases the only
6023  * way to get a larger string is to increment a previous character position.
6024  * So we don't want to spend too much time trying every possible character
6025  * code at the last position.  A good rule of thumb is to be sure that we
6026  * don't try more than 256*K values for a K-byte character (and definitely
6027  * not 256^K, which is what an exhaustive search would approach).
6028  */
6029 Const *
6030 make_greater_string(const Const *str_const, FmgrInfo *ltproc, Oid collation)
6031 {
6032         Oid                     datatype = str_const->consttype;
6033         char       *workstr;
6034         int                     len;
6035         Datum           cmpstr;
6036         text       *cmptxt = NULL;
6037         mbcharacter_incrementer charinc;
6038
6039         /*
6040          * Get a modifiable copy of the prefix string in C-string format, and set
6041          * up the string we will compare to as a Datum.  In C locale this can just
6042          * be the given prefix string, otherwise we need to add a suffix.  Types
6043          * NAME and BYTEA sort bytewise so they don't need a suffix either.
6044          */
6045         if (datatype == NAMEOID)
6046         {
6047                 workstr = DatumGetCString(DirectFunctionCall1(nameout,
6048                                                                                                           str_const->constvalue));
6049                 len = strlen(workstr);
6050                 cmpstr = str_const->constvalue;
6051         }
6052         else if (datatype == BYTEAOID)
6053         {
6054                 bytea      *bstr = DatumGetByteaPP(str_const->constvalue);
6055
6056                 len = VARSIZE_ANY_EXHDR(bstr);
6057                 workstr = (char *) palloc(len);
6058                 memcpy(workstr, VARDATA_ANY(bstr), len);
6059                 Assert((Pointer) bstr == DatumGetPointer(str_const->constvalue));
6060                 cmpstr = str_const->constvalue;
6061         }
6062         else
6063         {
6064                 workstr = TextDatumGetCString(str_const->constvalue);
6065                 len = strlen(workstr);
6066                 if (lc_collate_is_c(collation) || len == 0)
6067                         cmpstr = str_const->constvalue;
6068                 else
6069                 {
6070                         /* If first time through, determine the suffix to use */
6071                         static char suffixchar = 0;
6072                         static Oid      suffixcollation = 0;
6073
6074                         if (!suffixchar || suffixcollation != collation)
6075                         {
6076                                 char       *best;
6077
6078                                 best = "Z";
6079                                 if (varstr_cmp(best, 1, "z", 1, collation) < 0)
6080                                         best = "z";
6081                                 if (varstr_cmp(best, 1, "y", 1, collation) < 0)
6082                                         best = "y";
6083                                 if (varstr_cmp(best, 1, "9", 1, collation) < 0)
6084                                         best = "9";
6085                                 suffixchar = *best;
6086                                 suffixcollation = collation;
6087                         }
6088
6089                         /* And build the string to compare to */
6090                         cmptxt = (text *) palloc(VARHDRSZ + len + 1);
6091                         SET_VARSIZE(cmptxt, VARHDRSZ + len + 1);
6092                         memcpy(VARDATA(cmptxt), workstr, len);
6093                         *(VARDATA(cmptxt) + len) = suffixchar;
6094                         cmpstr = PointerGetDatum(cmptxt);
6095                 }
6096         }
6097
6098         /* Select appropriate character-incrementer function */
6099         if (datatype == BYTEAOID)
6100                 charinc = byte_increment;
6101         else
6102                 charinc = pg_database_encoding_character_incrementer();
6103
6104         /* And search ... */
6105         while (len > 0)
6106         {
6107                 int                     charlen;
6108                 unsigned char *lastchar;
6109
6110                 /* Identify the last character --- for bytea, just the last byte */
6111                 if (datatype == BYTEAOID)
6112                         charlen = 1;
6113                 else
6114                         charlen = len - pg_mbcliplen(workstr, len, len - 1);
6115                 lastchar = (unsigned char *) (workstr + len - charlen);
6116
6117                 /*
6118                  * Try to generate a larger string by incrementing the last character
6119                  * (for BYTEA, we treat each byte as a character).
6120                  *
6121                  * Note: the incrementer function is expected to return true if it's
6122                  * generated a valid-per-the-encoding new character, otherwise false.
6123                  * The contents of the character on false return are unspecified.
6124                  */
6125                 while (charinc(lastchar, charlen))
6126                 {
6127                         Const      *workstr_const;
6128
6129                         if (datatype == BYTEAOID)
6130                                 workstr_const = string_to_bytea_const(workstr, len);
6131                         else
6132                                 workstr_const = string_to_const(workstr, datatype);
6133
6134                         if (DatumGetBool(FunctionCall2Coll(ltproc,
6135                                                                                            collation,
6136                                                                                            cmpstr,
6137                                                                                            workstr_const->constvalue)))
6138                         {
6139                                 /* Successfully made a string larger than cmpstr */
6140                                 if (cmptxt)
6141                                         pfree(cmptxt);
6142                                 pfree(workstr);
6143                                 return workstr_const;
6144                         }
6145
6146                         /* No good, release unusable value and try again */
6147                         pfree(DatumGetPointer(workstr_const->constvalue));
6148                         pfree(workstr_const);
6149                 }
6150
6151                 /*
6152                  * No luck here, so truncate off the last character and try to
6153                  * increment the next one.
6154                  */
6155                 len -= charlen;
6156                 workstr[len] = '\0';
6157         }
6158
6159         /* Failed... */
6160         if (cmptxt)
6161                 pfree(cmptxt);
6162         pfree(workstr);
6163
6164         return NULL;
6165 }
6166
6167 /*
6168  * Generate a Datum of the appropriate type from a C string.
6169  * Note that all of the supported types are pass-by-ref, so the
6170  * returned value should be pfree'd if no longer needed.
6171  */
6172 static Datum
6173 string_to_datum(const char *str, Oid datatype)
6174 {
6175         Assert(str != NULL);
6176
6177         /*
6178          * We cheat a little by assuming that CStringGetTextDatum() will do for
6179          * bpchar and varchar constants too...
6180          */
6181         if (datatype == NAMEOID)
6182                 return DirectFunctionCall1(namein, CStringGetDatum(str));
6183         else if (datatype == BYTEAOID)
6184                 return DirectFunctionCall1(byteain, CStringGetDatum(str));
6185         else
6186                 return CStringGetTextDatum(str);
6187 }
6188
6189 /*
6190  * Generate a Const node of the appropriate type from a C string.
6191  */
6192 static Const *
6193 string_to_const(const char *str, Oid datatype)
6194 {
6195         Datum           conval = string_to_datum(str, datatype);
6196         Oid                     collation;
6197         int                     constlen;
6198
6199         /*
6200          * We only need to support a few datatypes here, so hard-wire properties
6201          * instead of incurring the expense of catalog lookups.
6202          */
6203         switch (datatype)
6204         {
6205                 case TEXTOID:
6206                 case VARCHAROID:
6207                 case BPCHAROID:
6208                         collation = DEFAULT_COLLATION_OID;
6209                         constlen = -1;
6210                         break;
6211
6212                 case NAMEOID:
6213                         collation = InvalidOid;
6214                         constlen = NAMEDATALEN;
6215                         break;
6216
6217                 case BYTEAOID:
6218                         collation = InvalidOid;
6219                         constlen = -1;
6220                         break;
6221
6222                 default:
6223                         elog(ERROR, "unexpected datatype in string_to_const: %u",
6224                                  datatype);
6225                         return NULL;
6226         }
6227
6228         return makeConst(datatype, -1, collation, constlen,
6229                                          conval, false, false);
6230 }
6231
6232 /*
6233  * Generate a Const node of bytea type from a binary C string and a length.
6234  */
6235 static Const *
6236 string_to_bytea_const(const char *str, size_t str_len)
6237 {
6238         bytea      *bstr = palloc(VARHDRSZ + str_len);
6239         Datum           conval;
6240
6241         memcpy(VARDATA(bstr), str, str_len);
6242         SET_VARSIZE(bstr, VARHDRSZ + str_len);
6243         conval = PointerGetDatum(bstr);
6244
6245         return makeConst(BYTEAOID, -1, InvalidOid, -1, conval, false, false);
6246 }
6247
6248 /*-------------------------------------------------------------------------
6249  *
6250  * Index cost estimation functions
6251  *
6252  *-------------------------------------------------------------------------
6253  */
6254
6255 List *
6256 deconstruct_indexquals(IndexPath *path)
6257 {
6258         List       *result = NIL;
6259         IndexOptInfo *index = path->indexinfo;
6260         ListCell   *lcc,
6261                            *lci;
6262
6263         forboth(lcc, path->indexquals, lci, path->indexqualcols)
6264         {
6265                 RestrictInfo *rinfo = lfirst_node(RestrictInfo, lcc);
6266                 int                     indexcol = lfirst_int(lci);
6267                 Expr       *clause;
6268                 Node       *leftop,
6269                                    *rightop;
6270                 IndexQualInfo *qinfo;
6271
6272                 clause = rinfo->clause;
6273
6274                 qinfo = (IndexQualInfo *) palloc(sizeof(IndexQualInfo));
6275                 qinfo->rinfo = rinfo;
6276                 qinfo->indexcol = indexcol;
6277
6278                 if (IsA(clause, OpExpr))
6279                 {
6280                         qinfo->clause_op = ((OpExpr *) clause)->opno;
6281                         leftop = get_leftop(clause);
6282                         rightop = get_rightop(clause);
6283                         if (match_index_to_operand(leftop, indexcol, index))
6284                         {
6285                                 qinfo->varonleft = true;
6286                                 qinfo->other_operand = rightop;
6287                         }
6288                         else
6289                         {
6290                                 Assert(match_index_to_operand(rightop, indexcol, index));
6291                                 qinfo->varonleft = false;
6292                                 qinfo->other_operand = leftop;
6293                         }
6294                 }
6295                 else if (IsA(clause, RowCompareExpr))
6296                 {
6297                         RowCompareExpr *rc = (RowCompareExpr *) clause;
6298
6299                         qinfo->clause_op = linitial_oid(rc->opnos);
6300                         /* Examine only first columns to determine left/right sides */
6301                         if (match_index_to_operand((Node *) linitial(rc->largs),
6302                                                                            indexcol, index))
6303                         {
6304                                 qinfo->varonleft = true;
6305                                 qinfo->other_operand = (Node *) rc->rargs;
6306                         }
6307                         else
6308                         {
6309                                 Assert(match_index_to_operand((Node *) linitial(rc->rargs),
6310                                                                                           indexcol, index));
6311                                 qinfo->varonleft = false;
6312                                 qinfo->other_operand = (Node *) rc->largs;
6313                         }
6314                 }
6315                 else if (IsA(clause, ScalarArrayOpExpr))
6316                 {
6317                         ScalarArrayOpExpr *saop = (ScalarArrayOpExpr *) clause;
6318
6319                         qinfo->clause_op = saop->opno;
6320                         /* index column is always on the left in this case */
6321                         Assert(match_index_to_operand((Node *) linitial(saop->args),
6322                                                                                   indexcol, index));
6323                         qinfo->varonleft = true;
6324                         qinfo->other_operand = (Node *) lsecond(saop->args);
6325                 }
6326                 else if (IsA(clause, NullTest))
6327                 {
6328                         qinfo->clause_op = InvalidOid;
6329                         Assert(match_index_to_operand((Node *) ((NullTest *) clause)->arg,
6330                                                                                   indexcol, index));
6331                         qinfo->varonleft = true;
6332                         qinfo->other_operand = NULL;
6333                 }
6334                 else
6335                 {
6336                         elog(ERROR, "unsupported indexqual type: %d",
6337                                  (int) nodeTag(clause));
6338                 }
6339
6340                 result = lappend(result, qinfo);
6341         }
6342         return result;
6343 }
6344
6345 /*
6346  * Simple function to compute the total eval cost of the "other operands"
6347  * in an IndexQualInfo list.  Since we know these will be evaluated just
6348  * once per scan, there's no need to distinguish startup from per-row cost.
6349  */
6350 static Cost
6351 other_operands_eval_cost(PlannerInfo *root, List *qinfos)
6352 {
6353         Cost            qual_arg_cost = 0;
6354         ListCell   *lc;
6355
6356         foreach(lc, qinfos)
6357         {
6358                 IndexQualInfo *qinfo = (IndexQualInfo *) lfirst(lc);
6359                 QualCost        index_qual_cost;
6360
6361                 cost_qual_eval_node(&index_qual_cost, qinfo->other_operand, root);
6362                 qual_arg_cost += index_qual_cost.startup + index_qual_cost.per_tuple;
6363         }
6364         return qual_arg_cost;
6365 }
6366
6367 /*
6368  * Get other-operand eval cost for an index orderby list.
6369  *
6370  * Index orderby expressions aren't represented as RestrictInfos (since they
6371  * aren't boolean, usually).  So we can't apply deconstruct_indexquals to
6372  * them.  However, they are much simpler to deal with since they are always
6373  * OpExprs and the index column is always on the left.
6374  */
6375 static Cost
6376 orderby_operands_eval_cost(PlannerInfo *root, IndexPath *path)
6377 {
6378         Cost            qual_arg_cost = 0;
6379         ListCell   *lc;
6380
6381         foreach(lc, path->indexorderbys)
6382         {
6383                 Expr       *clause = (Expr *) lfirst(lc);
6384                 Node       *other_operand;
6385                 QualCost        index_qual_cost;
6386
6387                 if (IsA(clause, OpExpr))
6388                 {
6389                         other_operand = get_rightop(clause);
6390                 }
6391                 else
6392                 {
6393                         elog(ERROR, "unsupported indexorderby type: %d",
6394                                  (int) nodeTag(clause));
6395                         other_operand = NULL;           /* keep compiler quiet */
6396                 }
6397
6398                 cost_qual_eval_node(&index_qual_cost, other_operand, root);
6399                 qual_arg_cost += index_qual_cost.startup + index_qual_cost.per_tuple;
6400         }
6401         return qual_arg_cost;
6402 }
6403
6404 void
6405 genericcostestimate(PlannerInfo *root,
6406                                         IndexPath *path,
6407                                         double loop_count,
6408                                         List *qinfos,
6409                                         GenericCosts *costs)
6410 {
6411         IndexOptInfo *index = path->indexinfo;
6412         List       *indexQuals = path->indexquals;
6413         List       *indexOrderBys = path->indexorderbys;
6414         Cost            indexStartupCost;
6415         Cost            indexTotalCost;
6416         Selectivity indexSelectivity;
6417         double          indexCorrelation;
6418         double          numIndexPages;
6419         double          numIndexTuples;
6420         double          spc_random_page_cost;
6421         double          num_sa_scans;
6422         double          num_outer_scans;
6423         double          num_scans;
6424         double          qual_op_cost;
6425         double          qual_arg_cost;
6426         List       *selectivityQuals;
6427         ListCell   *l;
6428
6429         /*
6430          * If the index is partial, AND the index predicate with the explicitly
6431          * given indexquals to produce a more accurate idea of the index
6432          * selectivity.
6433          */
6434         selectivityQuals = add_predicate_to_quals(index, indexQuals);
6435
6436         /*
6437          * Check for ScalarArrayOpExpr index quals, and estimate the number of
6438          * index scans that will be performed.
6439          */
6440         num_sa_scans = 1;
6441         foreach(l, indexQuals)
6442         {
6443                 RestrictInfo *rinfo = (RestrictInfo *) lfirst(l);
6444
6445                 if (IsA(rinfo->clause, ScalarArrayOpExpr))
6446                 {
6447                         ScalarArrayOpExpr *saop = (ScalarArrayOpExpr *) rinfo->clause;
6448                         int                     alength = estimate_array_length(lsecond(saop->args));
6449
6450                         if (alength > 1)
6451                                 num_sa_scans *= alength;
6452                 }
6453         }
6454
6455         /* Estimate the fraction of main-table tuples that will be visited */
6456         indexSelectivity = clauselist_selectivity(root, selectivityQuals,
6457                                                                                           index->rel->relid,
6458                                                                                           JOIN_INNER,
6459                                                                                           NULL);
6460
6461         /*
6462          * If caller didn't give us an estimate, estimate the number of index
6463          * tuples that will be visited.  We do it in this rather peculiar-looking
6464          * way in order to get the right answer for partial indexes.
6465          */
6466         numIndexTuples = costs->numIndexTuples;
6467         if (numIndexTuples <= 0.0)
6468         {
6469                 numIndexTuples = indexSelectivity * index->rel->tuples;
6470
6471                 /*
6472                  * The above calculation counts all the tuples visited across all
6473                  * scans induced by ScalarArrayOpExpr nodes.  We want to consider the
6474                  * average per-indexscan number, so adjust.  This is a handy place to
6475                  * round to integer, too.  (If caller supplied tuple estimate, it's
6476                  * responsible for handling these considerations.)
6477                  */
6478                 numIndexTuples = rint(numIndexTuples / num_sa_scans);
6479         }
6480
6481         /*
6482          * We can bound the number of tuples by the index size in any case. Also,
6483          * always estimate at least one tuple is touched, even when
6484          * indexSelectivity estimate is tiny.
6485          */
6486         if (numIndexTuples > index->tuples)
6487                 numIndexTuples = index->tuples;
6488         if (numIndexTuples < 1.0)
6489                 numIndexTuples = 1.0;
6490
6491         /*
6492          * Estimate the number of index pages that will be retrieved.
6493          *
6494          * We use the simplistic method of taking a pro-rata fraction of the total
6495          * number of index pages.  In effect, this counts only leaf pages and not
6496          * any overhead such as index metapage or upper tree levels.
6497          *
6498          * In practice access to upper index levels is often nearly free because
6499          * those tend to stay in cache under load; moreover, the cost involved is
6500          * highly dependent on index type.  We therefore ignore such costs here
6501          * and leave it to the caller to add a suitable charge if needed.
6502          */
6503         if (index->pages > 1 && index->tuples > 1)
6504                 numIndexPages = ceil(numIndexTuples * index->pages / index->tuples);
6505         else
6506                 numIndexPages = 1.0;
6507
6508         /* fetch estimated page cost for tablespace containing index */
6509         get_tablespace_page_costs(index->reltablespace,
6510                                                           &spc_random_page_cost,
6511                                                           NULL);
6512
6513         /*
6514          * Now compute the disk access costs.
6515          *
6516          * The above calculations are all per-index-scan.  However, if we are in a
6517          * nestloop inner scan, we can expect the scan to be repeated (with
6518          * different search keys) for each row of the outer relation.  Likewise,
6519          * ScalarArrayOpExpr quals result in multiple index scans.  This creates
6520          * the potential for cache effects to reduce the number of disk page
6521          * fetches needed.  We want to estimate the average per-scan I/O cost in
6522          * the presence of caching.
6523          *
6524          * We use the Mackert-Lohman formula (see costsize.c for details) to
6525          * estimate the total number of page fetches that occur.  While this
6526          * wasn't what it was designed for, it seems a reasonable model anyway.
6527          * Note that we are counting pages not tuples anymore, so we take N = T =
6528          * index size, as if there were one "tuple" per page.
6529          */
6530         num_outer_scans = loop_count;
6531         num_scans = num_sa_scans * num_outer_scans;
6532
6533         if (num_scans > 1)
6534         {
6535                 double          pages_fetched;
6536
6537                 /* total page fetches ignoring cache effects */
6538                 pages_fetched = numIndexPages * num_scans;
6539
6540                 /* use Mackert and Lohman formula to adjust for cache effects */
6541                 pages_fetched = index_pages_fetched(pages_fetched,
6542                                                                                         index->pages,
6543                                                                                         (double) index->pages,
6544                                                                                         root);
6545
6546                 /*
6547                  * Now compute the total disk access cost, and then report a pro-rated
6548                  * share for each outer scan.  (Don't pro-rate for ScalarArrayOpExpr,
6549                  * since that's internal to the indexscan.)
6550                  */
6551                 indexTotalCost = (pages_fetched * spc_random_page_cost)
6552                         / num_outer_scans;
6553         }
6554         else
6555         {
6556                 /*
6557                  * For a single index scan, we just charge spc_random_page_cost per
6558                  * page touched.
6559                  */
6560                 indexTotalCost = numIndexPages * spc_random_page_cost;
6561         }
6562
6563         /*
6564          * CPU cost: any complex expressions in the indexquals will need to be
6565          * evaluated once at the start of the scan to reduce them to runtime keys
6566          * to pass to the index AM (see nodeIndexscan.c).  We model the per-tuple
6567          * CPU costs as cpu_index_tuple_cost plus one cpu_operator_cost per
6568          * indexqual operator.  Because we have numIndexTuples as a per-scan
6569          * number, we have to multiply by num_sa_scans to get the correct result
6570          * for ScalarArrayOpExpr cases.  Similarly add in costs for any index
6571          * ORDER BY expressions.
6572          *
6573          * Note: this neglects the possible costs of rechecking lossy operators.
6574          * Detecting that that might be needed seems more expensive than it's
6575          * worth, though, considering all the other inaccuracies here ...
6576          */
6577         qual_arg_cost = other_operands_eval_cost(root, qinfos) +
6578                 orderby_operands_eval_cost(root, path);
6579         qual_op_cost = cpu_operator_cost *
6580                 (list_length(indexQuals) + list_length(indexOrderBys));
6581
6582         indexStartupCost = qual_arg_cost;
6583         indexTotalCost += qual_arg_cost;
6584         indexTotalCost += numIndexTuples * num_sa_scans * (cpu_index_tuple_cost + qual_op_cost);
6585
6586         /*
6587          * Generic assumption about index correlation: there isn't any.
6588          */
6589         indexCorrelation = 0.0;
6590
6591         /*
6592          * Return everything to caller.
6593          */
6594         costs->indexStartupCost = indexStartupCost;
6595         costs->indexTotalCost = indexTotalCost;
6596         costs->indexSelectivity = indexSelectivity;
6597         costs->indexCorrelation = indexCorrelation;
6598         costs->numIndexPages = numIndexPages;
6599         costs->numIndexTuples = numIndexTuples;
6600         costs->spc_random_page_cost = spc_random_page_cost;
6601         costs->num_sa_scans = num_sa_scans;
6602 }
6603
6604 /*
6605  * If the index is partial, add its predicate to the given qual list.
6606  *
6607  * ANDing the index predicate with the explicitly given indexquals produces
6608  * a more accurate idea of the index's selectivity.  However, we need to be
6609  * careful not to insert redundant clauses, because clauselist_selectivity()
6610  * is easily fooled into computing a too-low selectivity estimate.  Our
6611  * approach is to add only the predicate clause(s) that cannot be proven to
6612  * be implied by the given indexquals.  This successfully handles cases such
6613  * as a qual "x = 42" used with a partial index "WHERE x >= 40 AND x < 50".
6614  * There are many other cases where we won't detect redundancy, leading to a
6615  * too-low selectivity estimate, which will bias the system in favor of using
6616  * partial indexes where possible.  That is not necessarily bad though.
6617  *
6618  * Note that indexQuals contains RestrictInfo nodes while the indpred
6619  * does not, so the output list will be mixed.  This is OK for both
6620  * predicate_implied_by() and clauselist_selectivity(), but might be
6621  * problematic if the result were passed to other things.
6622  */
6623 static List *
6624 add_predicate_to_quals(IndexOptInfo *index, List *indexQuals)
6625 {
6626         List       *predExtraQuals = NIL;
6627         ListCell   *lc;
6628
6629         if (index->indpred == NIL)
6630                 return indexQuals;
6631
6632         foreach(lc, index->indpred)
6633         {
6634                 Node       *predQual = (Node *) lfirst(lc);
6635                 List       *oneQual = list_make1(predQual);
6636
6637                 if (!predicate_implied_by(oneQual, indexQuals))
6638                         predExtraQuals = list_concat(predExtraQuals, oneQual);
6639         }
6640         /* list_concat avoids modifying the passed-in indexQuals list */
6641         return list_concat(predExtraQuals, indexQuals);
6642 }
6643
6644
6645 void
6646 btcostestimate(PlannerInfo *root, IndexPath *path, double loop_count,
6647                            Cost *indexStartupCost, Cost *indexTotalCost,
6648                            Selectivity *indexSelectivity, double *indexCorrelation,
6649                            double *indexPages)
6650 {
6651         IndexOptInfo *index = path->indexinfo;
6652         List       *qinfos;
6653         GenericCosts costs;
6654         Oid                     relid;
6655         AttrNumber      colnum;
6656         VariableStatData vardata;
6657         double          numIndexTuples;
6658         Cost            descentCost;
6659         List       *indexBoundQuals;
6660         int                     indexcol;
6661         bool            eqQualHere;
6662         bool            found_saop;
6663         bool            found_is_null_op;
6664         double          num_sa_scans;
6665         ListCell   *lc;
6666
6667         /* Do preliminary analysis of indexquals */
6668         qinfos = deconstruct_indexquals(path);
6669
6670         /*
6671          * For a btree scan, only leading '=' quals plus inequality quals for the
6672          * immediately next attribute contribute to index selectivity (these are
6673          * the "boundary quals" that determine the starting and stopping points of
6674          * the index scan).  Additional quals can suppress visits to the heap, so
6675          * it's OK to count them in indexSelectivity, but they should not count
6676          * for estimating numIndexTuples.  So we must examine the given indexquals
6677          * to find out which ones count as boundary quals.  We rely on the
6678          * knowledge that they are given in index column order.
6679          *
6680          * For a RowCompareExpr, we consider only the first column, just as
6681          * rowcomparesel() does.
6682          *
6683          * If there's a ScalarArrayOpExpr in the quals, we'll actually perform N
6684          * index scans not one, but the ScalarArrayOpExpr's operator can be
6685          * considered to act the same as it normally does.
6686          */
6687         indexBoundQuals = NIL;
6688         indexcol = 0;
6689         eqQualHere = false;
6690         found_saop = false;
6691         found_is_null_op = false;
6692         num_sa_scans = 1;
6693         foreach(lc, qinfos)
6694         {
6695                 IndexQualInfo *qinfo = (IndexQualInfo *) lfirst(lc);
6696                 RestrictInfo *rinfo = qinfo->rinfo;
6697                 Expr       *clause = rinfo->clause;
6698                 Oid                     clause_op;
6699                 int                     op_strategy;
6700
6701                 if (indexcol != qinfo->indexcol)
6702                 {
6703                         /* Beginning of a new column's quals */
6704                         if (!eqQualHere)
6705                                 break;                  /* done if no '=' qual for indexcol */
6706                         eqQualHere = false;
6707                         indexcol++;
6708                         if (indexcol != qinfo->indexcol)
6709                                 break;                  /* no quals at all for indexcol */
6710                 }
6711
6712                 if (IsA(clause, ScalarArrayOpExpr))
6713                 {
6714                         int                     alength = estimate_array_length(qinfo->other_operand);
6715
6716                         found_saop = true;
6717                         /* count up number of SA scans induced by indexBoundQuals only */
6718                         if (alength > 1)
6719                                 num_sa_scans *= alength;
6720                 }
6721                 else if (IsA(clause, NullTest))
6722                 {
6723                         NullTest   *nt = (NullTest *) clause;
6724
6725                         if (nt->nulltesttype == IS_NULL)
6726                         {
6727                                 found_is_null_op = true;
6728                                 /* IS NULL is like = for selectivity determination purposes */
6729                                 eqQualHere = true;
6730                         }
6731                 }
6732
6733                 /*
6734                  * We would need to commute the clause_op if not varonleft, except
6735                  * that we only care if it's equality or not, so that refinement is
6736                  * unnecessary.
6737                  */
6738                 clause_op = qinfo->clause_op;
6739
6740                 /* check for equality operator */
6741                 if (OidIsValid(clause_op))
6742                 {
6743                         op_strategy = get_op_opfamily_strategy(clause_op,
6744                                                                                                    index->opfamily[indexcol]);
6745                         Assert(op_strategy != 0);       /* not a member of opfamily?? */
6746                         if (op_strategy == BTEqualStrategyNumber)
6747                                 eqQualHere = true;
6748                 }
6749
6750                 indexBoundQuals = lappend(indexBoundQuals, rinfo);
6751         }
6752
6753         /*
6754          * If index is unique and we found an '=' clause for each column, we can
6755          * just assume numIndexTuples = 1 and skip the expensive
6756          * clauselist_selectivity calculations.  However, a ScalarArrayOp or
6757          * NullTest invalidates that theory, even though it sets eqQualHere.
6758          */
6759         if (index->unique &&
6760                 indexcol == index->ncolumns - 1 &&
6761                 eqQualHere &&
6762                 !found_saop &&
6763                 !found_is_null_op)
6764                 numIndexTuples = 1.0;
6765         else
6766         {
6767                 List       *selectivityQuals;
6768                 Selectivity btreeSelectivity;
6769
6770                 /*
6771                  * If the index is partial, AND the index predicate with the
6772                  * index-bound quals to produce a more accurate idea of the number of
6773                  * rows covered by the bound conditions.
6774                  */
6775                 selectivityQuals = add_predicate_to_quals(index, indexBoundQuals);
6776
6777                 btreeSelectivity = clauselist_selectivity(root, selectivityQuals,
6778                                                                                                   index->rel->relid,
6779                                                                                                   JOIN_INNER,
6780                                                                                                   NULL);
6781                 numIndexTuples = btreeSelectivity * index->rel->tuples;
6782
6783                 /*
6784                  * As in genericcostestimate(), we have to adjust for any
6785                  * ScalarArrayOpExpr quals included in indexBoundQuals, and then round
6786                  * to integer.
6787                  */
6788                 numIndexTuples = rint(numIndexTuples / num_sa_scans);
6789         }
6790
6791         /*
6792          * Now do generic index cost estimation.
6793          */
6794         MemSet(&costs, 0, sizeof(costs));
6795         costs.numIndexTuples = numIndexTuples;
6796
6797         genericcostestimate(root, path, loop_count, qinfos, &costs);
6798
6799         /*
6800          * Add a CPU-cost component to represent the costs of initial btree
6801          * descent.  We don't charge any I/O cost for touching upper btree levels,
6802          * since they tend to stay in cache, but we still have to do about log2(N)
6803          * comparisons to descend a btree of N leaf tuples.  We charge one
6804          * cpu_operator_cost per comparison.
6805          *
6806          * If there are ScalarArrayOpExprs, charge this once per SA scan.  The
6807          * ones after the first one are not startup cost so far as the overall
6808          * plan is concerned, so add them only to "total" cost.
6809          */
6810         if (index->tuples > 1)          /* avoid computing log(0) */
6811         {
6812                 descentCost = ceil(log(index->tuples) / log(2.0)) * cpu_operator_cost;
6813                 costs.indexStartupCost += descentCost;
6814                 costs.indexTotalCost += costs.num_sa_scans * descentCost;
6815         }
6816
6817         /*
6818          * Even though we're not charging I/O cost for touching upper btree pages,
6819          * it's still reasonable to charge some CPU cost per page descended
6820          * through.  Moreover, if we had no such charge at all, bloated indexes
6821          * would appear to have the same search cost as unbloated ones, at least
6822          * in cases where only a single leaf page is expected to be visited.  This
6823          * cost is somewhat arbitrarily set at 50x cpu_operator_cost per page
6824          * touched.  The number of such pages is btree tree height plus one (ie,
6825          * we charge for the leaf page too).  As above, charge once per SA scan.
6826          */
6827         descentCost = (index->tree_height + 1) * 50.0 * cpu_operator_cost;
6828         costs.indexStartupCost += descentCost;
6829         costs.indexTotalCost += costs.num_sa_scans * descentCost;
6830
6831         /*
6832          * If we can get an estimate of the first column's ordering correlation C
6833          * from pg_statistic, estimate the index correlation as C for a
6834          * single-column index, or C * 0.75 for multiple columns. (The idea here
6835          * is that multiple columns dilute the importance of the first column's
6836          * ordering, but don't negate it entirely.  Before 8.0 we divided the
6837          * correlation by the number of columns, but that seems too strong.)
6838          */
6839         MemSet(&vardata, 0, sizeof(vardata));
6840
6841         if (index->indexkeys[0] != 0)
6842         {
6843                 /* Simple variable --- look to stats for the underlying table */
6844                 RangeTblEntry *rte = planner_rt_fetch(index->rel->relid, root);
6845
6846                 Assert(rte->rtekind == RTE_RELATION);
6847                 relid = rte->relid;
6848                 Assert(relid != InvalidOid);
6849                 colnum = index->indexkeys[0];
6850
6851                 if (get_relation_stats_hook &&
6852                         (*get_relation_stats_hook) (root, rte, colnum, &vardata))
6853                 {
6854                         /*
6855                          * The hook took control of acquiring a stats tuple.  If it did
6856                          * supply a tuple, it'd better have supplied a freefunc.
6857                          */
6858                         if (HeapTupleIsValid(vardata.statsTuple) &&
6859                                 !vardata.freefunc)
6860                                 elog(ERROR, "no function provided to release variable stats with");
6861                 }
6862                 else
6863                 {
6864                         vardata.statsTuple = SearchSysCache3(STATRELATTINH,
6865                                                                                                  ObjectIdGetDatum(relid),
6866                                                                                                  Int16GetDatum(colnum),
6867                                                                                                  BoolGetDatum(rte->inh));
6868                         vardata.freefunc = ReleaseSysCache;
6869                 }
6870         }
6871         else
6872         {
6873                 /* Expression --- maybe there are stats for the index itself */
6874                 relid = index->indexoid;
6875                 colnum = 1;
6876
6877                 if (get_index_stats_hook &&
6878                         (*get_index_stats_hook) (root, relid, colnum, &vardata))
6879                 {
6880                         /*
6881                          * The hook took control of acquiring a stats tuple.  If it did
6882                          * supply a tuple, it'd better have supplied a freefunc.
6883                          */
6884                         if (HeapTupleIsValid(vardata.statsTuple) &&
6885                                 !vardata.freefunc)
6886                                 elog(ERROR, "no function provided to release variable stats with");
6887                 }
6888                 else
6889                 {
6890                         vardata.statsTuple = SearchSysCache3(STATRELATTINH,
6891                                                                                                  ObjectIdGetDatum(relid),
6892                                                                                                  Int16GetDatum(colnum),
6893                                                                                                  BoolGetDatum(false));
6894                         vardata.freefunc = ReleaseSysCache;
6895                 }
6896         }
6897
6898         if (HeapTupleIsValid(vardata.statsTuple))
6899         {
6900                 Oid                     sortop;
6901                 AttStatsSlot sslot;
6902
6903                 sortop = get_opfamily_member(index->opfamily[0],
6904                                                                          index->opcintype[0],
6905                                                                          index->opcintype[0],
6906                                                                          BTLessStrategyNumber);
6907                 if (OidIsValid(sortop) &&
6908                         get_attstatsslot(&sslot, vardata.statsTuple,
6909                                                          STATISTIC_KIND_CORRELATION, sortop,
6910                                                          ATTSTATSSLOT_NUMBERS))
6911                 {
6912                         double          varCorrelation;
6913
6914                         Assert(sslot.nnumbers == 1);
6915                         varCorrelation = sslot.numbers[0];
6916
6917                         if (index->reverse_sort[0])
6918                                 varCorrelation = -varCorrelation;
6919
6920                         if (index->ncolumns > 1)
6921                                 costs.indexCorrelation = varCorrelation * 0.75;
6922                         else
6923                                 costs.indexCorrelation = varCorrelation;
6924
6925                         free_attstatsslot(&sslot);
6926                 }
6927         }
6928
6929         ReleaseVariableStats(vardata);
6930
6931         *indexStartupCost = costs.indexStartupCost;
6932         *indexTotalCost = costs.indexTotalCost;
6933         *indexSelectivity = costs.indexSelectivity;
6934         *indexCorrelation = costs.indexCorrelation;
6935         *indexPages = costs.numIndexPages;
6936 }
6937
6938 void
6939 hashcostestimate(PlannerInfo *root, IndexPath *path, double loop_count,
6940                                  Cost *indexStartupCost, Cost *indexTotalCost,
6941                                  Selectivity *indexSelectivity, double *indexCorrelation,
6942                                  double *indexPages)
6943 {
6944         List       *qinfos;
6945         GenericCosts costs;
6946
6947         /* Do preliminary analysis of indexquals */
6948         qinfos = deconstruct_indexquals(path);
6949
6950         MemSet(&costs, 0, sizeof(costs));
6951
6952         genericcostestimate(root, path, loop_count, qinfos, &costs);
6953
6954         /*
6955          * A hash index has no descent costs as such, since the index AM can go
6956          * directly to the target bucket after computing the hash value.  There
6957          * are a couple of other hash-specific costs that we could conceivably add
6958          * here, though:
6959          *
6960          * Ideally we'd charge spc_random_page_cost for each page in the target
6961          * bucket, not just the numIndexPages pages that genericcostestimate
6962          * thought we'd visit.  However in most cases we don't know which bucket
6963          * that will be.  There's no point in considering the average bucket size
6964          * because the hash AM makes sure that's always one page.
6965          *
6966          * Likewise, we could consider charging some CPU for each index tuple in
6967          * the bucket, if we knew how many there were.  But the per-tuple cost is
6968          * just a hash value comparison, not a general datatype-dependent
6969          * comparison, so any such charge ought to be quite a bit less than
6970          * cpu_operator_cost; which makes it probably not worth worrying about.
6971          *
6972          * A bigger issue is that chance hash-value collisions will result in
6973          * wasted probes into the heap.  We don't currently attempt to model this
6974          * cost on the grounds that it's rare, but maybe it's not rare enough.
6975          * (Any fix for this ought to consider the generic lossy-operator problem,
6976          * though; it's not entirely hash-specific.)
6977          */
6978
6979         *indexStartupCost = costs.indexStartupCost;
6980         *indexTotalCost = costs.indexTotalCost;
6981         *indexSelectivity = costs.indexSelectivity;
6982         *indexCorrelation = costs.indexCorrelation;
6983         *indexPages = costs.numIndexPages;
6984 }
6985
6986 void
6987 gistcostestimate(PlannerInfo *root, IndexPath *path, double loop_count,
6988                                  Cost *indexStartupCost, Cost *indexTotalCost,
6989                                  Selectivity *indexSelectivity, double *indexCorrelation,
6990                                  double *indexPages)
6991 {
6992         IndexOptInfo *index = path->indexinfo;
6993         List       *qinfos;
6994         GenericCosts costs;
6995         Cost            descentCost;
6996
6997         /* Do preliminary analysis of indexquals */
6998         qinfos = deconstruct_indexquals(path);
6999
7000         MemSet(&costs, 0, sizeof(costs));
7001
7002         genericcostestimate(root, path, loop_count, qinfos, &costs);
7003
7004         /*
7005          * We model index descent costs similarly to those for btree, but to do
7006          * that we first need an idea of the tree height.  We somewhat arbitrarily
7007          * assume that the fanout is 100, meaning the tree height is at most
7008          * log100(index->pages).
7009          *
7010          * Although this computation isn't really expensive enough to require
7011          * caching, we might as well use index->tree_height to cache it.
7012          */
7013         if (index->tree_height < 0) /* unknown? */
7014         {
7015                 if (index->pages > 1)   /* avoid computing log(0) */
7016                         index->tree_height = (int) (log(index->pages) / log(100.0));
7017                 else
7018                         index->tree_height = 0;
7019         }
7020
7021         /*
7022          * Add a CPU-cost component to represent the costs of initial descent. We
7023          * just use log(N) here not log2(N) since the branching factor isn't
7024          * necessarily two anyway.  As for btree, charge once per SA scan.
7025          */
7026         if (index->tuples > 1)          /* avoid computing log(0) */
7027         {
7028                 descentCost = ceil(log(index->tuples)) * cpu_operator_cost;
7029                 costs.indexStartupCost += descentCost;
7030                 costs.indexTotalCost += costs.num_sa_scans * descentCost;
7031         }
7032
7033         /*
7034          * Likewise add a per-page charge, calculated the same as for btrees.
7035          */
7036         descentCost = (index->tree_height + 1) * 50.0 * cpu_operator_cost;
7037         costs.indexStartupCost += descentCost;
7038         costs.indexTotalCost += costs.num_sa_scans * descentCost;
7039
7040         *indexStartupCost = costs.indexStartupCost;
7041         *indexTotalCost = costs.indexTotalCost;
7042         *indexSelectivity = costs.indexSelectivity;
7043         *indexCorrelation = costs.indexCorrelation;
7044         *indexPages = costs.numIndexPages;
7045 }
7046
7047 void
7048 spgcostestimate(PlannerInfo *root, IndexPath *path, double loop_count,
7049                                 Cost *indexStartupCost, Cost *indexTotalCost,
7050                                 Selectivity *indexSelectivity, double *indexCorrelation,
7051                                 double *indexPages)
7052 {
7053         IndexOptInfo *index = path->indexinfo;
7054         List       *qinfos;
7055         GenericCosts costs;
7056         Cost            descentCost;
7057
7058         /* Do preliminary analysis of indexquals */
7059         qinfos = deconstruct_indexquals(path);
7060
7061         MemSet(&costs, 0, sizeof(costs));
7062
7063         genericcostestimate(root, path, loop_count, qinfos, &costs);
7064
7065         /*
7066          * We model index descent costs similarly to those for btree, but to do
7067          * that we first need an idea of the tree height.  We somewhat arbitrarily
7068          * assume that the fanout is 100, meaning the tree height is at most
7069          * log100(index->pages).
7070          *
7071          * Although this computation isn't really expensive enough to require
7072          * caching, we might as well use index->tree_height to cache it.
7073          */
7074         if (index->tree_height < 0) /* unknown? */
7075         {
7076                 if (index->pages > 1)   /* avoid computing log(0) */
7077                         index->tree_height = (int) (log(index->pages) / log(100.0));
7078                 else
7079                         index->tree_height = 0;
7080         }
7081
7082         /*
7083          * Add a CPU-cost component to represent the costs of initial descent. We
7084          * just use log(N) here not log2(N) since the branching factor isn't
7085          * necessarily two anyway.  As for btree, charge once per SA scan.
7086          */
7087         if (index->tuples > 1)          /* avoid computing log(0) */
7088         {
7089                 descentCost = ceil(log(index->tuples)) * cpu_operator_cost;
7090                 costs.indexStartupCost += descentCost;
7091                 costs.indexTotalCost += costs.num_sa_scans * descentCost;
7092         }
7093
7094         /*
7095          * Likewise add a per-page charge, calculated the same as for btrees.
7096          */
7097         descentCost = (index->tree_height + 1) * 50.0 * cpu_operator_cost;
7098         costs.indexStartupCost += descentCost;
7099         costs.indexTotalCost += costs.num_sa_scans * descentCost;
7100
7101         *indexStartupCost = costs.indexStartupCost;
7102         *indexTotalCost = costs.indexTotalCost;
7103         *indexSelectivity = costs.indexSelectivity;
7104         *indexCorrelation = costs.indexCorrelation;
7105         *indexPages = costs.numIndexPages;
7106 }
7107
7108
7109 /*
7110  * Support routines for gincostestimate
7111  */
7112
7113 typedef struct
7114 {
7115         bool            haveFullScan;
7116         double          partialEntries;
7117         double          exactEntries;
7118         double          searchEntries;
7119         double          arrayScans;
7120 } GinQualCounts;
7121
7122 /*
7123  * Estimate the number of index terms that need to be searched for while
7124  * testing the given GIN query, and increment the counts in *counts
7125  * appropriately.  If the query is unsatisfiable, return false.
7126  */
7127 static bool
7128 gincost_pattern(IndexOptInfo *index, int indexcol,
7129                                 Oid clause_op, Datum query,
7130                                 GinQualCounts *counts)
7131 {
7132         Oid                     extractProcOid;
7133         Oid                     collation;
7134         int                     strategy_op;
7135         Oid                     lefttype,
7136                                 righttype;
7137         int32           nentries = 0;
7138         bool       *partial_matches = NULL;
7139         Pointer    *extra_data = NULL;
7140         bool       *nullFlags = NULL;
7141         int32           searchMode = GIN_SEARCH_MODE_DEFAULT;
7142         int32           i;
7143
7144         /*
7145          * Get the operator's strategy number and declared input data types within
7146          * the index opfamily.  (We don't need the latter, but we use
7147          * get_op_opfamily_properties because it will throw error if it fails to
7148          * find a matching pg_amop entry.)
7149          */
7150         get_op_opfamily_properties(clause_op, index->opfamily[indexcol], false,
7151                                                            &strategy_op, &lefttype, &righttype);
7152
7153         /*
7154          * GIN always uses the "default" support functions, which are those with
7155          * lefttype == righttype == the opclass' opcintype (see
7156          * IndexSupportInitialize in relcache.c).
7157          */
7158         extractProcOid = get_opfamily_proc(index->opfamily[indexcol],
7159                                                                            index->opcintype[indexcol],
7160                                                                            index->opcintype[indexcol],
7161                                                                            GIN_EXTRACTQUERY_PROC);
7162
7163         if (!OidIsValid(extractProcOid))
7164         {
7165                 /* should not happen; throw same error as index_getprocinfo */
7166                 elog(ERROR, "missing support function %d for attribute %d of index \"%s\"",
7167                          GIN_EXTRACTQUERY_PROC, indexcol + 1,
7168                          get_rel_name(index->indexoid));
7169         }
7170
7171         /*
7172          * Choose collation to pass to extractProc (should match initGinState).
7173          */
7174         if (OidIsValid(index->indexcollations[indexcol]))
7175                 collation = index->indexcollations[indexcol];
7176         else
7177                 collation = DEFAULT_COLLATION_OID;
7178
7179         OidFunctionCall7Coll(extractProcOid,
7180                                                  collation,
7181                                                  query,
7182                                                  PointerGetDatum(&nentries),
7183                                                  UInt16GetDatum(strategy_op),
7184                                                  PointerGetDatum(&partial_matches),
7185                                                  PointerGetDatum(&extra_data),
7186                                                  PointerGetDatum(&nullFlags),
7187                                                  PointerGetDatum(&searchMode));
7188
7189         if (nentries <= 0 && searchMode == GIN_SEARCH_MODE_DEFAULT)
7190         {
7191                 /* No match is possible */
7192                 return false;
7193         }
7194
7195         for (i = 0; i < nentries; i++)
7196         {
7197                 /*
7198                  * For partial match we haven't any information to estimate number of
7199                  * matched entries in index, so, we just estimate it as 100
7200                  */
7201                 if (partial_matches && partial_matches[i])
7202                         counts->partialEntries += 100;
7203                 else
7204                         counts->exactEntries++;
7205
7206                 counts->searchEntries++;
7207         }
7208
7209         if (searchMode == GIN_SEARCH_MODE_INCLUDE_EMPTY)
7210         {
7211                 /* Treat "include empty" like an exact-match item */
7212                 counts->exactEntries++;
7213                 counts->searchEntries++;
7214         }
7215         else if (searchMode != GIN_SEARCH_MODE_DEFAULT)
7216         {
7217                 /* It's GIN_SEARCH_MODE_ALL */
7218                 counts->haveFullScan = true;
7219         }
7220
7221         return true;
7222 }
7223
7224 /*
7225  * Estimate the number of index terms that need to be searched for while
7226  * testing the given GIN index clause, and increment the counts in *counts
7227  * appropriately.  If the query is unsatisfiable, return false.
7228  */
7229 static bool
7230 gincost_opexpr(PlannerInfo *root,
7231                            IndexOptInfo *index,
7232                            IndexQualInfo *qinfo,
7233                            GinQualCounts *counts)
7234 {
7235         int                     indexcol = qinfo->indexcol;
7236         Oid                     clause_op = qinfo->clause_op;
7237         Node       *operand = qinfo->other_operand;
7238
7239         if (!qinfo->varonleft)
7240         {
7241                 /* must commute the operator */
7242                 clause_op = get_commutator(clause_op);
7243         }
7244
7245         /* aggressively reduce to a constant, and look through relabeling */
7246         operand = estimate_expression_value(root, operand);
7247
7248         if (IsA(operand, RelabelType))
7249                 operand = (Node *) ((RelabelType *) operand)->arg;
7250
7251         /*
7252          * It's impossible to call extractQuery method for unknown operand. So
7253          * unless operand is a Const we can't do much; just assume there will be
7254          * one ordinary search entry from the operand at runtime.
7255          */
7256         if (!IsA(operand, Const))
7257         {
7258                 counts->exactEntries++;
7259                 counts->searchEntries++;
7260                 return true;
7261         }
7262
7263         /* If Const is null, there can be no matches */
7264         if (((Const *) operand)->constisnull)
7265                 return false;
7266
7267         /* Otherwise, apply extractQuery and get the actual term counts */
7268         return gincost_pattern(index, indexcol, clause_op,
7269                                                    ((Const *) operand)->constvalue,
7270                                                    counts);
7271 }
7272
7273 /*
7274  * Estimate the number of index terms that need to be searched for while
7275  * testing the given GIN index clause, and increment the counts in *counts
7276  * appropriately.  If the query is unsatisfiable, return false.
7277  *
7278  * A ScalarArrayOpExpr will give rise to N separate indexscans at runtime,
7279  * each of which involves one value from the RHS array, plus all the
7280  * non-array quals (if any).  To model this, we average the counts across
7281  * the RHS elements, and add the averages to the counts in *counts (which
7282  * correspond to per-indexscan costs).  We also multiply counts->arrayScans
7283  * by N, causing gincostestimate to scale up its estimates accordingly.
7284  */
7285 static bool
7286 gincost_scalararrayopexpr(PlannerInfo *root,
7287                                                   IndexOptInfo *index,
7288                                                   IndexQualInfo *qinfo,
7289                                                   double numIndexEntries,
7290                                                   GinQualCounts *counts)
7291 {
7292         int                     indexcol = qinfo->indexcol;
7293         Oid                     clause_op = qinfo->clause_op;
7294         Node       *rightop = qinfo->other_operand;
7295         ArrayType  *arrayval;
7296         int16           elmlen;
7297         bool            elmbyval;
7298         char            elmalign;
7299         int                     numElems;
7300         Datum      *elemValues;
7301         bool       *elemNulls;
7302         GinQualCounts arraycounts;
7303         int                     numPossible = 0;
7304         int                     i;
7305
7306         Assert(((ScalarArrayOpExpr *) qinfo->rinfo->clause)->useOr);
7307
7308         /* aggressively reduce to a constant, and look through relabeling */
7309         rightop = estimate_expression_value(root, rightop);
7310
7311         if (IsA(rightop, RelabelType))
7312                 rightop = (Node *) ((RelabelType *) rightop)->arg;
7313
7314         /*
7315          * It's impossible to call extractQuery method for unknown operand. So
7316          * unless operand is a Const we can't do much; just assume there will be
7317          * one ordinary search entry from each array entry at runtime, and fall
7318          * back on a probably-bad estimate of the number of array entries.
7319          */
7320         if (!IsA(rightop, Const))
7321         {
7322                 counts->exactEntries++;
7323                 counts->searchEntries++;
7324                 counts->arrayScans *= estimate_array_length(rightop);
7325                 return true;
7326         }
7327
7328         /* If Const is null, there can be no matches */
7329         if (((Const *) rightop)->constisnull)
7330                 return false;
7331
7332         /* Otherwise, extract the array elements and iterate over them */
7333         arrayval = DatumGetArrayTypeP(((Const *) rightop)->constvalue);
7334         get_typlenbyvalalign(ARR_ELEMTYPE(arrayval),
7335                                                  &elmlen, &elmbyval, &elmalign);
7336         deconstruct_array(arrayval,
7337                                           ARR_ELEMTYPE(arrayval),
7338                                           elmlen, elmbyval, elmalign,
7339                                           &elemValues, &elemNulls, &numElems);
7340
7341         memset(&arraycounts, 0, sizeof(arraycounts));
7342
7343         for (i = 0; i < numElems; i++)
7344         {
7345                 GinQualCounts elemcounts;
7346
7347                 /* NULL can't match anything, so ignore, as the executor will */
7348                 if (elemNulls[i])
7349                         continue;
7350
7351                 /* Otherwise, apply extractQuery and get the actual term counts */
7352                 memset(&elemcounts, 0, sizeof(elemcounts));
7353
7354                 if (gincost_pattern(index, indexcol, clause_op, elemValues[i],
7355                                                         &elemcounts))
7356                 {
7357                         /* We ignore array elements that are unsatisfiable patterns */
7358                         numPossible++;
7359
7360                         if (elemcounts.haveFullScan)
7361                         {
7362                                 /*
7363                                  * Full index scan will be required.  We treat this as if
7364                                  * every key in the index had been listed in the query; is
7365                                  * that reasonable?
7366                                  */
7367                                 elemcounts.partialEntries = 0;
7368                                 elemcounts.exactEntries = numIndexEntries;
7369                                 elemcounts.searchEntries = numIndexEntries;
7370                         }
7371                         arraycounts.partialEntries += elemcounts.partialEntries;
7372                         arraycounts.exactEntries += elemcounts.exactEntries;
7373                         arraycounts.searchEntries += elemcounts.searchEntries;
7374                 }
7375         }
7376
7377         if (numPossible == 0)
7378         {
7379                 /* No satisfiable patterns in the array */
7380                 return false;
7381         }
7382
7383         /*
7384          * Now add the averages to the global counts.  This will give us an
7385          * estimate of the average number of terms searched for in each indexscan,
7386          * including contributions from both array and non-array quals.
7387          */
7388         counts->partialEntries += arraycounts.partialEntries / numPossible;
7389         counts->exactEntries += arraycounts.exactEntries / numPossible;
7390         counts->searchEntries += arraycounts.searchEntries / numPossible;
7391
7392         counts->arrayScans *= numPossible;
7393
7394         return true;
7395 }
7396
7397 /*
7398  * GIN has search behavior completely different from other index types
7399  */
7400 void
7401 gincostestimate(PlannerInfo *root, IndexPath *path, double loop_count,
7402                                 Cost *indexStartupCost, Cost *indexTotalCost,
7403                                 Selectivity *indexSelectivity, double *indexCorrelation,
7404                                 double *indexPages)
7405 {
7406         IndexOptInfo *index = path->indexinfo;
7407         List       *indexQuals = path->indexquals;
7408         List       *indexOrderBys = path->indexorderbys;
7409         List       *qinfos;
7410         ListCell   *l;
7411         List       *selectivityQuals;
7412         double          numPages = index->pages,
7413                                 numTuples = index->tuples;
7414         double          numEntryPages,
7415                                 numDataPages,
7416                                 numPendingPages,
7417                                 numEntries;
7418         GinQualCounts counts;
7419         bool            matchPossible;
7420         double          partialScale;
7421         double          entryPagesFetched,
7422                                 dataPagesFetched,
7423                                 dataPagesFetchedBySel;
7424         double          qual_op_cost,
7425                                 qual_arg_cost,
7426                                 spc_random_page_cost,
7427                                 outer_scans;
7428         Relation        indexRel;
7429         GinStatsData ginStats;
7430
7431         /* Do preliminary analysis of indexquals */
7432         qinfos = deconstruct_indexquals(path);
7433
7434         /*
7435          * Obtain statistical information from the meta page, if possible.  Else
7436          * set ginStats to zeroes, and we'll cope below.
7437          */
7438         if (!index->hypothetical)
7439         {
7440                 indexRel = index_open(index->indexoid, AccessShareLock);
7441                 ginGetStats(indexRel, &ginStats);
7442                 index_close(indexRel, AccessShareLock);
7443         }
7444         else
7445         {
7446                 memset(&ginStats, 0, sizeof(ginStats));
7447         }
7448
7449         /*
7450          * Assuming we got valid (nonzero) stats at all, nPendingPages can be
7451          * trusted, but the other fields are data as of the last VACUUM.  We can
7452          * scale them up to account for growth since then, but that method only
7453          * goes so far; in the worst case, the stats might be for a completely
7454          * empty index, and scaling them will produce pretty bogus numbers.
7455          * Somewhat arbitrarily, set the cutoff for doing scaling at 4X growth; if
7456          * it's grown more than that, fall back to estimating things only from the
7457          * assumed-accurate index size.  But we'll trust nPendingPages in any case
7458          * so long as it's not clearly insane, ie, more than the index size.
7459          */
7460         if (ginStats.nPendingPages < numPages)
7461                 numPendingPages = ginStats.nPendingPages;
7462         else
7463                 numPendingPages = 0;
7464
7465         if (numPages > 0 && ginStats.nTotalPages <= numPages &&
7466                 ginStats.nTotalPages > numPages / 4 &&
7467                 ginStats.nEntryPages > 0 && ginStats.nEntries > 0)
7468         {
7469                 /*
7470                  * OK, the stats seem close enough to sane to be trusted.  But we
7471                  * still need to scale them by the ratio numPages / nTotalPages to
7472                  * account for growth since the last VACUUM.
7473                  */
7474                 double          scale = numPages / ginStats.nTotalPages;
7475
7476                 numEntryPages = ceil(ginStats.nEntryPages * scale);
7477                 numDataPages = ceil(ginStats.nDataPages * scale);
7478                 numEntries = ceil(ginStats.nEntries * scale);
7479                 /* ensure we didn't round up too much */
7480                 numEntryPages = Min(numEntryPages, numPages - numPendingPages);
7481                 numDataPages = Min(numDataPages,
7482                                                    numPages - numPendingPages - numEntryPages);
7483         }
7484         else
7485         {
7486                 /*
7487                  * We might get here because it's a hypothetical index, or an index
7488                  * created pre-9.1 and never vacuumed since upgrading (in which case
7489                  * its stats would read as zeroes), or just because it's grown too
7490                  * much since the last VACUUM for us to put our faith in scaling.
7491                  *
7492                  * Invent some plausible internal statistics based on the index page
7493                  * count (and clamp that to at least 10 pages, just in case).  We
7494                  * estimate that 90% of the index is entry pages, and the rest is data
7495                  * pages.  Estimate 100 entries per entry page; this is rather bogus
7496                  * since it'll depend on the size of the keys, but it's more robust
7497                  * than trying to predict the number of entries per heap tuple.
7498                  */
7499                 numPages = Max(numPages, 10);
7500                 numEntryPages = floor((numPages - numPendingPages) * 0.90);
7501                 numDataPages = numPages - numPendingPages - numEntryPages;
7502                 numEntries = floor(numEntryPages * 100);
7503         }
7504
7505         /* In an empty index, numEntries could be zero.  Avoid divide-by-zero */
7506         if (numEntries < 1)
7507                 numEntries = 1;
7508
7509         /*
7510          * Include predicate in selectivityQuals (should match
7511          * genericcostestimate)
7512          */
7513         if (index->indpred != NIL)
7514         {
7515                 List       *predExtraQuals = NIL;
7516
7517                 foreach(l, index->indpred)
7518                 {
7519                         Node       *predQual = (Node *) lfirst(l);
7520                         List       *oneQual = list_make1(predQual);
7521
7522                         if (!predicate_implied_by(oneQual, indexQuals))
7523                                 predExtraQuals = list_concat(predExtraQuals, oneQual);
7524                 }
7525                 /* list_concat avoids modifying the passed-in indexQuals list */
7526                 selectivityQuals = list_concat(predExtraQuals, indexQuals);
7527         }
7528         else
7529                 selectivityQuals = indexQuals;
7530
7531         /* Estimate the fraction of main-table tuples that will be visited */
7532         *indexSelectivity = clauselist_selectivity(root, selectivityQuals,
7533                                                                                            index->rel->relid,
7534                                                                                            JOIN_INNER,
7535                                                                                            NULL);
7536
7537         /* fetch estimated page cost for tablespace containing index */
7538         get_tablespace_page_costs(index->reltablespace,
7539                                                           &spc_random_page_cost,
7540                                                           NULL);
7541
7542         /*
7543          * Generic assumption about index correlation: there isn't any.
7544          */
7545         *indexCorrelation = 0.0;
7546
7547         /*
7548          * Examine quals to estimate number of search entries & partial matches
7549          */
7550         memset(&counts, 0, sizeof(counts));
7551         counts.arrayScans = 1;
7552         matchPossible = true;
7553
7554         foreach(l, qinfos)
7555         {
7556                 IndexQualInfo *qinfo = (IndexQualInfo *) lfirst(l);
7557                 Expr       *clause = qinfo->rinfo->clause;
7558
7559                 if (IsA(clause, OpExpr))
7560                 {
7561                         matchPossible = gincost_opexpr(root,
7562                                                                                    index,
7563                                                                                    qinfo,
7564                                                                                    &counts);
7565                         if (!matchPossible)
7566                                 break;
7567                 }
7568                 else if (IsA(clause, ScalarArrayOpExpr))
7569                 {
7570                         matchPossible = gincost_scalararrayopexpr(root,
7571                                                                                                           index,
7572                                                                                                           qinfo,
7573                                                                                                           numEntries,
7574                                                                                                           &counts);
7575                         if (!matchPossible)
7576                                 break;
7577                 }
7578                 else
7579                 {
7580                         /* shouldn't be anything else for a GIN index */
7581                         elog(ERROR, "unsupported GIN indexqual type: %d",
7582                                  (int) nodeTag(clause));
7583                 }
7584         }
7585
7586         /* Fall out if there were any provably-unsatisfiable quals */
7587         if (!matchPossible)
7588         {
7589                 *indexStartupCost = 0;
7590                 *indexTotalCost = 0;
7591                 *indexSelectivity = 0;
7592                 return;
7593         }
7594
7595         if (counts.haveFullScan || indexQuals == NIL)
7596         {
7597                 /*
7598                  * Full index scan will be required.  We treat this as if every key in
7599                  * the index had been listed in the query; is that reasonable?
7600                  */
7601                 counts.partialEntries = 0;
7602                 counts.exactEntries = numEntries;
7603                 counts.searchEntries = numEntries;
7604         }
7605
7606         /* Will we have more than one iteration of a nestloop scan? */
7607         outer_scans = loop_count;
7608
7609         /*
7610          * Compute cost to begin scan, first of all, pay attention to pending
7611          * list.
7612          */
7613         entryPagesFetched = numPendingPages;
7614
7615         /*
7616          * Estimate number of entry pages read.  We need to do
7617          * counts.searchEntries searches.  Use a power function as it should be,
7618          * but tuples on leaf pages usually is much greater. Here we include all
7619          * searches in entry tree, including search of first entry in partial
7620          * match algorithm
7621          */
7622         entryPagesFetched += ceil(counts.searchEntries * rint(pow(numEntryPages, 0.15)));
7623
7624         /*
7625          * Add an estimate of entry pages read by partial match algorithm. It's a
7626          * scan over leaf pages in entry tree.  We haven't any useful stats here,
7627          * so estimate it as proportion.  Because counts.partialEntries is really
7628          * pretty bogus (see code above), it's possible that it is more than
7629          * numEntries; clamp the proportion to ensure sanity.
7630          */
7631         partialScale = counts.partialEntries / numEntries;
7632         partialScale = Min(partialScale, 1.0);
7633
7634         entryPagesFetched += ceil(numEntryPages * partialScale);
7635
7636         /*
7637          * Partial match algorithm reads all data pages before doing actual scan,
7638          * so it's a startup cost.  Again, we haven't any useful stats here, so
7639          * estimate it as proportion.
7640          */
7641         dataPagesFetched = ceil(numDataPages * partialScale);
7642
7643         /*
7644          * Calculate cache effects if more than one scan due to nestloops or array
7645          * quals.  The result is pro-rated per nestloop scan, but the array qual
7646          * factor shouldn't be pro-rated (compare genericcostestimate).
7647          */
7648         if (outer_scans > 1 || counts.arrayScans > 1)
7649         {
7650                 entryPagesFetched *= outer_scans * counts.arrayScans;
7651                 entryPagesFetched = index_pages_fetched(entryPagesFetched,
7652                                                                                                 (BlockNumber) numEntryPages,
7653                                                                                                 numEntryPages, root);
7654                 entryPagesFetched /= outer_scans;
7655                 dataPagesFetched *= outer_scans * counts.arrayScans;
7656                 dataPagesFetched = index_pages_fetched(dataPagesFetched,
7657                                                                                            (BlockNumber) numDataPages,
7658                                                                                            numDataPages, root);
7659                 dataPagesFetched /= outer_scans;
7660         }
7661
7662         /*
7663          * Here we use random page cost because logically-close pages could be far
7664          * apart on disk.
7665          */
7666         *indexStartupCost = (entryPagesFetched + dataPagesFetched) * spc_random_page_cost;
7667
7668         /*
7669          * Now compute the number of data pages fetched during the scan.
7670          *
7671          * We assume every entry to have the same number of items, and that there
7672          * is no overlap between them. (XXX: tsvector and array opclasses collect
7673          * statistics on the frequency of individual keys; it would be nice to use
7674          * those here.)
7675          */
7676         dataPagesFetched = ceil(numDataPages * counts.exactEntries / numEntries);
7677
7678         /*
7679          * If there is a lot of overlap among the entries, in particular if one of
7680          * the entries is very frequent, the above calculation can grossly
7681          * under-estimate.  As a simple cross-check, calculate a lower bound based
7682          * on the overall selectivity of the quals.  At a minimum, we must read
7683          * one item pointer for each matching entry.
7684          *
7685          * The width of each item pointer varies, based on the level of
7686          * compression.  We don't have statistics on that, but an average of
7687          * around 3 bytes per item is fairly typical.
7688          */
7689         dataPagesFetchedBySel = ceil(*indexSelectivity *
7690                                                                  (numTuples / (BLCKSZ / 3)));
7691         if (dataPagesFetchedBySel > dataPagesFetched)
7692                 dataPagesFetched = dataPagesFetchedBySel;
7693
7694         /* Account for cache effects, the same as above */
7695         if (outer_scans > 1 || counts.arrayScans > 1)
7696         {
7697                 dataPagesFetched *= outer_scans * counts.arrayScans;
7698                 dataPagesFetched = index_pages_fetched(dataPagesFetched,
7699                                                                                            (BlockNumber) numDataPages,
7700                                                                                            numDataPages, root);
7701                 dataPagesFetched /= outer_scans;
7702         }
7703
7704         /* And apply random_page_cost as the cost per page */
7705         *indexTotalCost = *indexStartupCost +
7706                 dataPagesFetched * spc_random_page_cost;
7707
7708         /*
7709          * Add on index qual eval costs, much as in genericcostestimate
7710          */
7711         qual_arg_cost = other_operands_eval_cost(root, qinfos) +
7712                 orderby_operands_eval_cost(root, path);
7713         qual_op_cost = cpu_operator_cost *
7714                 (list_length(indexQuals) + list_length(indexOrderBys));
7715
7716         *indexStartupCost += qual_arg_cost;
7717         *indexTotalCost += qual_arg_cost;
7718         *indexTotalCost += (numTuples * *indexSelectivity) * (cpu_index_tuple_cost + qual_op_cost);
7719         *indexPages = dataPagesFetched;
7720 }
7721
7722 /*
7723  * BRIN has search behavior completely different from other index types
7724  */
7725 void
7726 brincostestimate(PlannerInfo *root, IndexPath *path, double loop_count,
7727                                  Cost *indexStartupCost, Cost *indexTotalCost,
7728                                  Selectivity *indexSelectivity, double *indexCorrelation,
7729                                  double *indexPages)
7730 {
7731         IndexOptInfo *index = path->indexinfo;
7732         List       *indexQuals = path->indexquals;
7733         double          numPages = index->pages;
7734         RelOptInfo *baserel = index->rel;
7735         RangeTblEntry *rte = planner_rt_fetch(baserel->relid, root);
7736         List       *qinfos;
7737         Cost            spc_seq_page_cost;
7738         Cost            spc_random_page_cost;
7739         double          qual_arg_cost;
7740         double          qualSelectivity;
7741         BrinStatsData statsData;
7742         double          indexRanges;
7743         double          minimalRanges;
7744         double          estimatedRanges;
7745         double          selec;
7746         Relation        indexRel;
7747         ListCell   *l;
7748         VariableStatData vardata;
7749
7750         Assert(rte->rtekind == RTE_RELATION);
7751
7752         /* fetch estimated page cost for the tablespace containing the index */
7753         get_tablespace_page_costs(index->reltablespace,
7754                                                           &spc_random_page_cost,
7755                                                           &spc_seq_page_cost);
7756
7757         /*
7758          * Obtain some data from the index itself.
7759          */
7760         indexRel = index_open(index->indexoid, AccessShareLock);
7761         brinGetStats(indexRel, &statsData);
7762         index_close(indexRel, AccessShareLock);
7763
7764         /*
7765          * Compute index correlation
7766          *
7767          * Because we can use all index quals equally when scanning, we can use
7768          * the largest correlation (in absolute value) among columns used by the
7769          * query.  Start at zero, the worst possible case.  If we cannot find any
7770          * correlation statistics, we will keep it as 0.
7771          */
7772         *indexCorrelation = 0;
7773
7774         qinfos = deconstruct_indexquals(path);
7775         foreach(l, qinfos)
7776         {
7777                 IndexQualInfo *qinfo = (IndexQualInfo *) lfirst(l);
7778                 AttrNumber      attnum = index->indexkeys[qinfo->indexcol];
7779
7780                 /* attempt to lookup stats in relation for this index column */
7781                 if (attnum != 0)
7782                 {
7783                         /* Simple variable -- look to stats for the underlying table */
7784                         if (get_relation_stats_hook &&
7785                                 (*get_relation_stats_hook) (root, rte, attnum, &vardata))
7786                         {
7787                                 /*
7788                                  * The hook took control of acquiring a stats tuple.  If it
7789                                  * did supply a tuple, it'd better have supplied a freefunc.
7790                                  */
7791                                 if (HeapTupleIsValid(vardata.statsTuple) && !vardata.freefunc)
7792                                         elog(ERROR,
7793                                           "no function provided to release variable stats with");
7794                         }
7795                         else
7796                         {
7797                                 vardata.statsTuple =
7798                                         SearchSysCache3(STATRELATTINH,
7799                                                                         ObjectIdGetDatum(rte->relid),
7800                                                                         Int16GetDatum(attnum),
7801                                                                         BoolGetDatum(false));
7802                                 vardata.freefunc = ReleaseSysCache;
7803                         }
7804                 }
7805                 else
7806                 {
7807                         /*
7808                          * Looks like we've found an expression column in the index. Let's
7809                          * see if there's any stats for it.
7810                          */
7811
7812                         /* get the attnum from the 0-based index. */
7813                         attnum = qinfo->indexcol + 1;
7814
7815                         if (get_index_stats_hook &&
7816                         (*get_index_stats_hook) (root, index->indexoid, attnum, &vardata))
7817                         {
7818                                 /*
7819                                  * The hook took control of acquiring a stats tuple.  If it
7820                                  * did supply a tuple, it'd better have supplied a freefunc.
7821                                  */
7822                                 if (HeapTupleIsValid(vardata.statsTuple) &&
7823                                         !vardata.freefunc)
7824                                         elog(ERROR, "no function provided to release variable stats with");
7825                         }
7826                         else
7827                         {
7828                                 vardata.statsTuple = SearchSysCache3(STATRELATTINH,
7829                                                                                    ObjectIdGetDatum(index->indexoid),
7830                                                                                                          Int16GetDatum(attnum),
7831                                                                                                          BoolGetDatum(false));
7832                                 vardata.freefunc = ReleaseSysCache;
7833                         }
7834                 }
7835
7836                 if (HeapTupleIsValid(vardata.statsTuple))
7837                 {
7838                         AttStatsSlot sslot;
7839
7840                         if (get_attstatsslot(&sslot, vardata.statsTuple,
7841                                                                  STATISTIC_KIND_CORRELATION, InvalidOid,
7842                                                                  ATTSTATSSLOT_NUMBERS))
7843                         {
7844                                 double          varCorrelation = 0.0;
7845
7846                                 if (sslot.nnumbers > 0)
7847                                         varCorrelation = Abs(sslot.numbers[0]);
7848
7849                                 if (varCorrelation > *indexCorrelation)
7850                                         *indexCorrelation = varCorrelation;
7851
7852                                 free_attstatsslot(&sslot);
7853                         }
7854                 }
7855
7856                 ReleaseVariableStats(vardata);
7857         }
7858
7859         qualSelectivity = clauselist_selectivity(root, indexQuals,
7860                                                                                          baserel->relid,
7861                                                                                          JOIN_INNER, NULL);
7862
7863         /* work out the actual number of ranges in the index */
7864         indexRanges = Max(ceil((double) baserel->pages / statsData.pagesPerRange),
7865                                           1.0);
7866
7867         /*
7868          * Now calculate the minimum possible ranges we could match with if all of
7869          * the rows were in the perfect order in the table's heap.
7870          */
7871         minimalRanges = ceil(indexRanges * qualSelectivity);
7872
7873         /*
7874          * Now estimate the number of ranges that we'll touch by using the
7875          * indexCorrelation from the stats. Careful not to divide by zero (note
7876          * we're using the absolute value of the correlation).
7877          */
7878         if (*indexCorrelation < 1.0e-10)
7879                 estimatedRanges = indexRanges;
7880         else
7881                 estimatedRanges = Min(minimalRanges / *indexCorrelation, indexRanges);
7882
7883         /* we expect to visit this portion of the table */
7884         selec = estimatedRanges / indexRanges;
7885
7886         CLAMP_PROBABILITY(selec);
7887
7888         *indexSelectivity = selec;
7889
7890         /*
7891          * Compute the index qual costs, much as in genericcostestimate, to add to
7892          * the index costs.
7893          */
7894         qual_arg_cost = other_operands_eval_cost(root, qinfos) +
7895                 orderby_operands_eval_cost(root, path);
7896
7897         /*
7898          * Compute the startup cost as the cost to read the whole revmap
7899          * sequentially, including the cost to execute the index quals.
7900          */
7901         *indexStartupCost =
7902                 spc_seq_page_cost * statsData.revmapNumPages * loop_count;
7903         *indexStartupCost += qual_arg_cost;
7904
7905         /*
7906          * To read a BRIN index there might be a bit of back and forth over
7907          * regular pages, as revmap might point to them out of sequential order;
7908          * calculate the total cost as reading the whole index in random order.
7909          */
7910         *indexTotalCost = *indexStartupCost +
7911                 spc_random_page_cost * (numPages - statsData.revmapNumPages) * loop_count;
7912
7913         /*
7914          * Charge a small amount per range tuple which we expect to match to. This
7915          * is meant to reflect the costs of manipulating the bitmap. The BRIN scan
7916          * will set a bit for each page in the range when we find a matching
7917          * range, so we must multiply the charge by the number of pages in the
7918          * range.
7919          */
7920         *indexTotalCost += 0.1 * cpu_operator_cost * estimatedRanges *
7921                 statsData.pagesPerRange;
7922
7923         *indexPages = index->pages;
7924 }