granicus.if.org Git - postgresql/blob - src/backend/utils/adt/selfuncs.c

   1 /*-------------------------------------------------------------------------
   2  *
   3  * selfuncs.c
   4  *        Selectivity functions and index cost estimation functions for
   5  *        standard operators and index access methods.
   6  *
   7  *        Selectivity routines are registered in the pg_operator catalog
   8  *        in the "oprrest" and "oprjoin" attributes.
   9  *
  10  *        Index cost functions are located via the index AM's API struct,
  11  *        which is obtained from the handler function registered in pg_am.
  12  *
  13  * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
  14  * Portions Copyright (c) 1994, Regents of the University of California
  15  *
  16  *
  17  * IDENTIFICATION
  18  *        src/backend/utils/adt/selfuncs.c
  19  *
  20  *-------------------------------------------------------------------------
  21  */
  22
  23 /*----------
  24  * Operator selectivity estimation functions are called to estimate the
  25  * selectivity of WHERE clauses whose top-level operator is their operator.
  26  * We divide the problem into two cases:
  27  *              Restriction clause estimation: the clause involves vars of just
  28  *                      one relation.
  29  *              Join clause estimation: the clause involves vars of multiple rels.
  30  * Join selectivity estimation is far more difficult and usually less accurate
  31  * than restriction estimation.
  32  *
  33  * When dealing with the inner scan of a nestloop join, we consider the
  34  * join's joinclauses as restriction clauses for the inner relation, and
  35  * treat vars of the outer relation as parameters (a/k/a constants of unknown
  36  * values).  So, restriction estimators need to be able to accept an argument
  37  * telling which relation is to be treated as the variable.
  38  *
  39  * The call convention for a restriction estimator (oprrest function) is
  40  *
  41  *              Selectivity oprrest (PlannerInfo *root,
  42  *                                                       Oid operator,
  43  *                                                       List *args,
  44  *                                                       int varRelid);
  45  *
  46  * root: general information about the query (rtable and RelOptInfo lists
  47  * are particularly important for the estimator).
  48  * operator: OID of the specific operator in question.
  49  * args: argument list from the operator clause.
  50  * varRelid: if not zero, the relid (rtable index) of the relation to
  51  * be treated as the variable relation.  May be zero if the args list
  52  * is known to contain vars of only one relation.
  53  *
  54  * This is represented at the SQL level (in pg_proc) as
  55  *
  56  *              float8 oprrest (internal, oid, internal, int4);
  57  *
  58  * The result is a selectivity, that is, a fraction (0 to 1) of the rows
  59  * of the relation that are expected to produce a TRUE result for the
  60  * given operator.
  61  *
  62  * The call convention for a join estimator (oprjoin function) is similar
  63  * except that varRelid is not needed, and instead join information is
  64  * supplied:
  65  *
  66  *              Selectivity oprjoin (PlannerInfo *root,
  67  *                                                       Oid operator,
  68  *                                                       List *args,
  69  *                                                       JoinType jointype,
  70  *                                                       SpecialJoinInfo *sjinfo);
  71  *
  72  *              float8 oprjoin (internal, oid, internal, int2, internal);
  73  *
  74  * (Before Postgres 8.4, join estimators had only the first four of these
  75  * parameters.  That signature is still allowed, but deprecated.)  The
  76  * relationship between jointype and sjinfo is explained in the comments for
  77  * clause_selectivity() --- the short version is that jointype is usually
  78  * best ignored in favor of examining sjinfo.
  79  *
  80  * Join selectivity for regular inner and outer joins is defined as the
  81  * fraction (0 to 1) of the cross product of the relations that is expected
  82  * to produce a TRUE result for the given operator.  For both semi and anti
  83  * joins, however, the selectivity is defined as the fraction of the left-hand
  84  * side relation's rows that are expected to have a match (ie, at least one
  85  * row with a TRUE result) in the right-hand side.
  86  *
  87  * For both oprrest and oprjoin functions, the operator's input collation OID
  88  * (if any) is passed using the standard fmgr mechanism, so that the estimator
  89  * function can fetch it with PG_GET_COLLATION().  Note, however, that all
  90  * statistics in pg_statistic are currently built using the database's default
  91  * collation.  Thus, in most cases where we are looking at statistics, we
  92  * should ignore the actual operator collation and use DEFAULT_COLLATION_OID.
  93  * We expect that the error induced by doing this is usually not large enough
  94  * to justify complicating matters.
  95  *----------
  96  */
  97
  98 #include "postgres.h"
  99
 100 #include <ctype.h>
 101 #include <float.h>
 102 #include <math.h>
 103
 104 #include "access/brin.h"
 105 #include "access/gin.h"
 106 #include "access/htup_details.h"
 107 #include "access/sysattr.h"
 108 #include "catalog/index.h"
 109 #include "catalog/pg_am.h"
 110 #include "catalog/pg_collation.h"
 111 #include "catalog/pg_operator.h"
 112 #include "catalog/pg_opfamily.h"
 113 #include "catalog/pg_statistic.h"
 114 #include "catalog/pg_statistic_ext.h"
 115 #include "catalog/pg_type.h"
 116 #include "executor/executor.h"
 117 #include "mb/pg_wchar.h"
 118 #include "nodes/makefuncs.h"
 119 #include "nodes/nodeFuncs.h"
 120 #include "optimizer/clauses.h"
 121 #include "optimizer/cost.h"
 122 #include "optimizer/pathnode.h"
 123 #include "optimizer/paths.h"
 124 #include "optimizer/plancat.h"
 125 #include "optimizer/predtest.h"
 126 #include "optimizer/restrictinfo.h"
 127 #include "optimizer/var.h"
 128 #include "parser/parse_clause.h"
 129 #include "parser/parse_coerce.h"
 130 #include "parser/parsetree.h"
 131 #include "statistics/statistics.h"
 132 #include "utils/builtins.h"
 133 #include "utils/bytea.h"
 134 #include "utils/date.h"
 135 #include "utils/datum.h"
 136 #include "utils/fmgroids.h"
 137 #include "utils/index_selfuncs.h"
 138 #include "utils/lsyscache.h"
 139 #include "utils/nabstime.h"
 140 #include "utils/pg_locale.h"
 141 #include "utils/rel.h"
 142 #include "utils/selfuncs.h"
 143 #include "utils/spccache.h"
 144 #include "utils/syscache.h"
 145 #include "utils/timestamp.h"
 146 #include "utils/tqual.h"
 147 #include "utils/typcache.h"
 148 #include "utils/varlena.h"
 149
 150
 151 /* Hooks for plugins to get control when we ask for stats */
 152 get_relation_stats_hook_type get_relation_stats_hook = NULL;
 153 get_index_stats_hook_type get_index_stats_hook = NULL;
 154
 155 static double var_eq_const(VariableStatData *vardata, Oid operator,
 156                          Datum constval, bool constisnull,
 157                          bool varonleft);
 158 static double var_eq_non_const(VariableStatData *vardata, Oid operator,
 159                                  Node *other,
 160                                  bool varonleft);
 161 static double ineq_histogram_selectivity(PlannerInfo *root,
 162                                                    VariableStatData *vardata,
 163                                                    FmgrInfo *opproc, bool isgt,
 164                                                    Datum constval, Oid consttype);
 165 static double eqjoinsel_inner(Oid operator,
 166                                 VariableStatData *vardata1, VariableStatData *vardata2);
 167 static double eqjoinsel_semi(Oid operator,
 168                            VariableStatData *vardata1, VariableStatData *vardata2,
 169                            RelOptInfo *inner_rel);
 170 static bool estimate_multivariate_ndistinct(PlannerInfo *root,
 171                            RelOptInfo *rel, List **varinfos, double *ndistinct);
 172 static bool convert_to_scalar(Datum value, Oid valuetypid, double *scaledvalue,
 173                                   Datum lobound, Datum hibound, Oid boundstypid,
 174                                   double *scaledlobound, double *scaledhibound);
 175 static double convert_numeric_to_scalar(Datum value, Oid typid);
 176 static void convert_string_to_scalar(char *value,
 177                                                  double *scaledvalue,
 178                                                  char *lobound,
 179                                                  double *scaledlobound,
 180                                                  char *hibound,
 181                                                  double *scaledhibound);
 182 static void convert_bytea_to_scalar(Datum value,
 183                                                 double *scaledvalue,
 184                                                 Datum lobound,
 185                                                 double *scaledlobound,
 186                                                 Datum hibound,
 187                                                 double *scaledhibound);
 188 static double convert_one_string_to_scalar(char *value,
 189                                                          int rangelo, int rangehi);
 190 static double convert_one_bytea_to_scalar(unsigned char *value, int valuelen,
 191                                                         int rangelo, int rangehi);
 192 static char *convert_string_datum(Datum value, Oid typid);
 193 static double convert_timevalue_to_scalar(Datum value, Oid typid);
 194 static void examine_simple_variable(PlannerInfo *root, Var *var,
 195                                                 VariableStatData *vardata);
 196 static bool get_variable_range(PlannerInfo *root, VariableStatData *vardata,
 197                                    Oid sortop, Datum *min, Datum *max);
 198 static bool get_actual_variable_range(PlannerInfo *root,
 199                                                   VariableStatData *vardata,
 200                                                   Oid sortop,
 201                                                   Datum *min, Datum *max);
 202 static RelOptInfo *find_join_input_rel(PlannerInfo *root, Relids relids);
 203 static Selectivity prefix_selectivity(PlannerInfo *root,
 204                                    VariableStatData *vardata,
 205                                    Oid vartype, Oid opfamily, Const *prefixcon);
 206 static Selectivity like_selectivity(const char *patt, int pattlen,
 207                                  bool case_insensitive);
 208 static Selectivity regex_selectivity(const char *patt, int pattlen,
 209                                   bool case_insensitive,
 210                                   int fixed_prefix_len);
 211 static Datum string_to_datum(const char *str, Oid datatype);
 212 static Const *string_to_const(const char *str, Oid datatype);
 213 static Const *string_to_bytea_const(const char *str, size_t str_len);
 214 static List *add_predicate_to_quals(IndexOptInfo *index, List *indexQuals);
 215
 216
 217 /*
 218  *              eqsel                   - Selectivity of "=" for any data types.
 219  *
 220  * Note: this routine is also used to estimate selectivity for some
 221  * operators that are not "=" but have comparable selectivity behavior,
 222  * such as "~=" (geometric approximate-match).  Even for "=", we must
 223  * keep in mind that the left and right datatypes may differ.
 224  */
 225 Datum
 226 eqsel(PG_FUNCTION_ARGS)
 227 {
 228         PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
 229         Oid                     operator = PG_GETARG_OID(1);
 230         List       *args = (List *) PG_GETARG_POINTER(2);
 231         int                     varRelid = PG_GETARG_INT32(3);
 232         VariableStatData vardata;
 233         Node       *other;
 234         bool            varonleft;
 235         double          selec;
 236
 237         /*
 238          * If expression is not variable = something or something = variable, then
 239          * punt and return a default estimate.
 240          */
 241         if (!get_restriction_variable(root, args, varRelid,
 242                                                                   &vardata, &other, &varonleft))
 243                 PG_RETURN_FLOAT8(DEFAULT_EQ_SEL);
 244
 245         /*
 246          * We can do a lot better if the something is a constant.  (Note: the
 247          * Const might result from estimation rather than being a simple constant
 248          * in the query.)
 249          */
 250         if (IsA(other, Const))
 251                 selec = var_eq_const(&vardata, operator,
 252                                                          ((Const *) other)->constvalue,
 253                                                          ((Const *) other)->constisnull,
 254                                                          varonleft);
 255         else
 256                 selec = var_eq_non_const(&vardata, operator, other,
 257                                                                  varonleft);
 258
 259         ReleaseVariableStats(vardata);
 260
 261         PG_RETURN_FLOAT8((float8) selec);
 262 }
 263
 264 /*
 265  * var_eq_const --- eqsel for var = const case
 266  *
 267  * This is split out so that some other estimation functions can use it.
 268  */
 269 static double
 270 var_eq_const(VariableStatData *vardata, Oid operator,
 271                          Datum constval, bool constisnull,
 272                          bool varonleft)
 273 {
 274         double          selec;
 275         bool            isdefault;
 276
 277         /*
 278          * If the constant is NULL, assume operator is strict and return zero, ie,
 279          * operator will never return TRUE.
 280          */
 281         if (constisnull)
 282                 return 0.0;
 283
 284         /*
 285          * If we matched the var to a unique index or DISTINCT clause, assume
 286          * there is exactly one match regardless of anything else.  (This is
 287          * slightly bogus, since the index or clause's equality operator might be
 288          * different from ours, but it's much more likely to be right than
 289          * ignoring the information.)
 290          */
 291         if (vardata->isunique && vardata->rel && vardata->rel->tuples >= 1.0)
 292                 return 1.0 / vardata->rel->tuples;
 293
 294         if (HeapTupleIsValid(vardata->statsTuple))
 295         {
 296                 Form_pg_statistic stats;
 297                 Datum      *values;
 298                 int                     nvalues;
 299                 float4     *numbers;
 300                 int                     nnumbers;
 301                 bool            match = false;
 302                 int                     i;
 303
 304                 stats = (Form_pg_statistic) GETSTRUCT(vardata->statsTuple);
 305
 306                 /*
 307                  * Is the constant "=" to any of the column's most common values?
 308                  * (Although the given operator may not really be "=", we will assume
 309                  * that seeing whether it returns TRUE is an appropriate test.  If you
 310                  * don't like this, maybe you shouldn't be using eqsel for your
 311                  * operator...)
 312                  */
 313                 if (get_attstatsslot(vardata->statsTuple,
 314                                                          vardata->atttype, vardata->atttypmod,
 315                                                          STATISTIC_KIND_MCV, InvalidOid,
 316                                                          NULL,
 317                                                          &values, &nvalues,
 318                                                          &numbers, &nnumbers))
 319                 {
 320                         FmgrInfo        eqproc;
 321
 322                         fmgr_info(get_opcode(operator), &eqproc);
 323
 324                         for (i = 0; i < nvalues; i++)
 325                         {
 326                                 /* be careful to apply operator right way 'round */
 327                                 if (varonleft)
 328                                         match = DatumGetBool(FunctionCall2Coll(&eqproc,
 329                                                                                                            DEFAULT_COLLATION_OID,
 330                                                                                                                    values[i],
 331                                                                                                                    constval));
 332                                 else
 333                                         match = DatumGetBool(FunctionCall2Coll(&eqproc,
 334                                                                                                            DEFAULT_COLLATION_OID,
 335                                                                                                                    constval,
 336                                                                                                                    values[i]));
 337                                 if (match)
 338                                         break;
 339                         }
 340                 }
 341                 else
 342                 {
 343                         /* no most-common-value info available */
 344                         values = NULL;
 345                         numbers = NULL;
 346                         i = nvalues = nnumbers = 0;
 347                 }
 348
 349                 if (match)
 350                 {
 351                         /*
 352                          * Constant is "=" to this common value.  We know selectivity
 353                          * exactly (or as exactly as ANALYZE could calculate it, anyway).
 354                          */
 355                         selec = numbers[i];
 356                 }
 357                 else
 358                 {
 359                         /*
 360                          * Comparison is against a constant that is neither NULL nor any
 361                          * of the common values.  Its selectivity cannot be more than
 362                          * this:
 363                          */
 364                         double          sumcommon = 0.0;
 365                         double          otherdistinct;
 366
 367                         for (i = 0; i < nnumbers; i++)
 368                                 sumcommon += numbers[i];
 369                         selec = 1.0 - sumcommon - stats->stanullfrac;
 370                         CLAMP_PROBABILITY(selec);
 371
 372                         /*
 373                          * and in fact it's probably a good deal less. We approximate that
 374                          * all the not-common values share this remaining fraction
 375                          * equally, so we divide by the number of other distinct values.
 376                          */
 377                         otherdistinct = get_variable_numdistinct(vardata, &isdefault) - nnumbers;
 378                         if (otherdistinct > 1)
 379                                 selec /= otherdistinct;
 380
 381                         /*
 382                          * Another cross-check: selectivity shouldn't be estimated as more
 383                          * than the least common "most common value".
 384                          */
 385                         if (nnumbers > 0 && selec > numbers[nnumbers - 1])
 386                                 selec = numbers[nnumbers - 1];
 387                 }
 388
 389                 free_attstatsslot(vardata->atttype, values, nvalues,
 390                                                   numbers, nnumbers);
 391         }
 392         else
 393         {
 394                 /*
 395                  * No ANALYZE stats available, so make a guess using estimated number
 396                  * of distinct values and assuming they are equally common. (The guess
 397                  * is unlikely to be very good, but we do know a few special cases.)
 398                  */
 399                 selec = 1.0 / get_variable_numdistinct(vardata, &isdefault);
 400         }
 401
 402         /* result should be in range, but make sure... */
 403         CLAMP_PROBABILITY(selec);
 404
 405         return selec;
 406 }
 407
 408 /*
 409  * var_eq_non_const --- eqsel for var = something-other-than-const case
 410  */
 411 static double
 412 var_eq_non_const(VariableStatData *vardata, Oid operator,
 413                                  Node *other,
 414                                  bool varonleft)
 415 {
 416         double          selec;
 417         bool            isdefault;
 418
 419         /*
 420          * If we matched the var to a unique index or DISTINCT clause, assume
 421          * there is exactly one match regardless of anything else.  (This is
 422          * slightly bogus, since the index or clause's equality operator might be
 423          * different from ours, but it's much more likely to be right than
 424          * ignoring the information.)
 425          */
 426         if (vardata->isunique && vardata->rel && vardata->rel->tuples >= 1.0)
 427                 return 1.0 / vardata->rel->tuples;
 428
 429         if (HeapTupleIsValid(vardata->statsTuple))
 430         {
 431                 Form_pg_statistic stats;
 432                 double          ndistinct;
 433                 float4     *numbers;
 434                 int                     nnumbers;
 435
 436                 stats = (Form_pg_statistic) GETSTRUCT(vardata->statsTuple);
 437
 438                 /*
 439                  * Search is for a value that we do not know a priori, but we will
 440                  * assume it is not NULL.  Estimate the selectivity as non-null
 441                  * fraction divided by number of distinct values, so that we get a
 442                  * result averaged over all possible values whether common or
 443                  * uncommon.  (Essentially, we are assuming that the not-yet-known
 444                  * comparison value is equally likely to be any of the possible
 445                  * values, regardless of their frequency in the table.  Is that a good
 446                  * idea?)
 447                  */
 448                 selec = 1.0 - stats->stanullfrac;
 449                 ndistinct = get_variable_numdistinct(vardata, &isdefault);
 450                 if (ndistinct > 1)
 451                         selec /= ndistinct;
 452
 453                 /*
 454                  * Cross-check: selectivity should never be estimated as more than the
 455                  * most common value's.
 456                  */
 457                 if (get_attstatsslot(vardata->statsTuple,
 458                                                          vardata->atttype, vardata->atttypmod,
 459                                                          STATISTIC_KIND_MCV, InvalidOid,
 460                                                          NULL,
 461                                                          NULL, NULL,
 462                                                          &numbers, &nnumbers))
 463                 {
 464                         if (nnumbers > 0 && selec > numbers[0])
 465                                 selec = numbers[0];
 466                         free_attstatsslot(vardata->atttype, NULL, 0, numbers, nnumbers);
 467                 }
 468         }
 469         else
 470         {
 471                 /*
 472                  * No ANALYZE stats available, so make a guess using estimated number
 473                  * of distinct values and assuming they are equally common. (The guess
 474                  * is unlikely to be very good, but we do know a few special cases.)
 475                  */
 476                 selec = 1.0 / get_variable_numdistinct(vardata, &isdefault);
 477         }
 478
 479         /* result should be in range, but make sure... */
 480         CLAMP_PROBABILITY(selec);
 481
 482         return selec;
 483 }
 484
 485 /*
 486  *              neqsel                  - Selectivity of "!=" for any data types.
 487  *
 488  * This routine is also used for some operators that are not "!="
 489  * but have comparable selectivity behavior.  See above comments
 490  * for eqsel().
 491  */
 492 Datum
 493 neqsel(PG_FUNCTION_ARGS)
 494 {
 495         PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
 496         Oid                     operator = PG_GETARG_OID(1);
 497         List       *args = (List *) PG_GETARG_POINTER(2);
 498         int                     varRelid = PG_GETARG_INT32(3);
 499         Oid                     eqop;
 500         float8          result;
 501
 502         /*
 503          * We want 1 - eqsel() where the equality operator is the one associated
 504          * with this != operator, that is, its negator.
 505          */
 506         eqop = get_negator(operator);
 507         if (eqop)
 508         {
 509                 result = DatumGetFloat8(DirectFunctionCall4(eqsel,
 510                                                                                                         PointerGetDatum(root),
 511                                                                                                         ObjectIdGetDatum(eqop),
 512                                                                                                         PointerGetDatum(args),
 513                                                                                                         Int32GetDatum(varRelid)));
 514         }
 515         else
 516         {
 517                 /* Use default selectivity (should we raise an error instead?) */
 518                 result = DEFAULT_EQ_SEL;
 519         }
 520         result = 1.0 - result;
 521         PG_RETURN_FLOAT8(result);
 522 }
 523
 524 /*
 525  *      scalarineqsel           - Selectivity of "<", "<=", ">", ">=" for scalars.
 526  *
 527  * This is the guts of both scalarltsel and scalargtsel.  The caller has
 528  * commuted the clause, if necessary, so that we can treat the variable as
 529  * being on the left.  The caller must also make sure that the other side
 530  * of the clause is a non-null Const, and dissect same into a value and
 531  * datatype.
 532  *
 533  * This routine works for any datatype (or pair of datatypes) known to
 534  * convert_to_scalar().  If it is applied to some other datatype,
 535  * it will return a default estimate.
 536  */
 537 static double
 538 scalarineqsel(PlannerInfo *root, Oid operator, bool isgt,
 539                           VariableStatData *vardata, Datum constval, Oid consttype)
 540 {
 541         Form_pg_statistic stats;
 542         FmgrInfo        opproc;
 543         double          mcv_selec,
 544                                 hist_selec,
 545                                 sumcommon;
 546         double          selec;
 547
 548         if (!HeapTupleIsValid(vardata->statsTuple))
 549         {
 550                 /* no stats available, so default result */
 551                 return DEFAULT_INEQ_SEL;
 552         }
 553         stats = (Form_pg_statistic) GETSTRUCT(vardata->statsTuple);
 554
 555         fmgr_info(get_opcode(operator), &opproc);
 556
 557         /*
 558          * If we have most-common-values info, add up the fractions of the MCV
 559          * entries that satisfy MCV OP CONST.  These fractions contribute directly
 560          * to the result selectivity.  Also add up the total fraction represented
 561          * by MCV entries.
 562          */
 563         mcv_selec = mcv_selectivity(vardata, &opproc, constval, true,
 564                                                                 &sumcommon);
 565
 566         /*
 567          * If there is a histogram, determine which bin the constant falls in, and
 568          * compute the resulting contribution to selectivity.
 569          */
 570         hist_selec = ineq_histogram_selectivity(root, vardata, &opproc, isgt,
 571                                                                                         constval, consttype);
 572
 573         /*
 574          * Now merge the results from the MCV and histogram calculations,
 575          * realizing that the histogram covers only the non-null values that are
 576          * not listed in MCV.
 577          */
 578         selec = 1.0 - stats->stanullfrac - sumcommon;
 579
 580         if (hist_selec >= 0.0)
 581                 selec *= hist_selec;
 582         else
 583         {
 584                 /*
 585                  * If no histogram but there are values not accounted for by MCV,
 586                  * arbitrarily assume half of them will match.
 587                  */
 588                 selec *= 0.5;
 589         }
 590
 591         selec += mcv_selec;
 592
 593         /* result should be in range, but make sure... */
 594         CLAMP_PROBABILITY(selec);
 595
 596         return selec;
 597 }
 598
 599 /*
 600  *      mcv_selectivity                 - Examine the MCV list for selectivity estimates
 601  *
 602  * Determine the fraction of the variable's MCV population that satisfies
 603  * the predicate (VAR OP CONST), or (CONST OP VAR) if !varonleft.  Also
 604  * compute the fraction of the total column population represented by the MCV
 605  * list.  This code will work for any boolean-returning predicate operator.
 606  *
 607  * The function result is the MCV selectivity, and the fraction of the
 608  * total population is returned into *sumcommonp.  Zeroes are returned
 609  * if there is no MCV list.
 610  */
 611 double
 612 mcv_selectivity(VariableStatData *vardata, FmgrInfo *opproc,
 613                                 Datum constval, bool varonleft,
 614                                 double *sumcommonp)
 615 {
 616         double          mcv_selec,
 617                                 sumcommon;
 618         Datum      *values;
 619         int                     nvalues;
 620         float4     *numbers;
 621         int                     nnumbers;
 622         int                     i;
 623
 624         mcv_selec = 0.0;
 625         sumcommon = 0.0;
 626
 627         if (HeapTupleIsValid(vardata->statsTuple) &&
 628                 get_attstatsslot(vardata->statsTuple,
 629                                                  vardata->atttype, vardata->atttypmod,
 630                                                  STATISTIC_KIND_MCV, InvalidOid,
 631                                                  NULL,
 632                                                  &values, &nvalues,
 633                                                  &numbers, &nnumbers))
 634         {
 635                 for (i = 0; i < nvalues; i++)
 636                 {
 637                         if (varonleft ?
 638                                 DatumGetBool(FunctionCall2Coll(opproc,
 639                                                                                            DEFAULT_COLLATION_OID,
 640                                                                                            values[i],
 641                                                                                            constval)) :
 642                                 DatumGetBool(FunctionCall2Coll(opproc,
 643                                                                                            DEFAULT_COLLATION_OID,
 644                                                                                            constval,
 645                                                                                            values[i])))
 646                                 mcv_selec += numbers[i];
 647                         sumcommon += numbers[i];
 648                 }
 649                 free_attstatsslot(vardata->atttype, values, nvalues,
 650                                                   numbers, nnumbers);
 651         }
 652
 653         *sumcommonp = sumcommon;
 654         return mcv_selec;
 655 }
 656
 657 /*
 658  *      histogram_selectivity   - Examine the histogram for selectivity estimates
 659  *
 660  * Determine the fraction of the variable's histogram entries that satisfy
 661  * the predicate (VAR OP CONST), or (CONST OP VAR) if !varonleft.
 662  *
 663  * This code will work for any boolean-returning predicate operator, whether
 664  * or not it has anything to do with the histogram sort operator.  We are
 665  * essentially using the histogram just as a representative sample.  However,
 666  * small histograms are unlikely to be all that representative, so the caller
 667  * should be prepared to fall back on some other estimation approach when the
 668  * histogram is missing or very small.  It may also be prudent to combine this
 669  * approach with another one when the histogram is small.
 670  *
 671  * If the actual histogram size is not at least min_hist_size, we won't bother
 672  * to do the calculation at all.  Also, if the n_skip parameter is > 0, we
 673  * ignore the first and last n_skip histogram elements, on the grounds that
 674  * they are outliers and hence not very representative.  Typical values for
 675  * these parameters are 10 and 1.
 676  *
 677  * The function result is the selectivity, or -1 if there is no histogram
 678  * or it's smaller than min_hist_size.
 679  *
 680  * The output parameter *hist_size receives the actual histogram size,
 681  * or zero if no histogram.  Callers may use this number to decide how
 682  * much faith to put in the function result.
 683  *
 684  * Note that the result disregards both the most-common-values (if any) and
 685  * null entries.  The caller is expected to combine this result with
 686  * statistics for those portions of the column population.  It may also be
 687  * prudent to clamp the result range, ie, disbelieve exact 0 or 1 outputs.
 688  */
 689 double
 690 histogram_selectivity(VariableStatData *vardata, FmgrInfo *opproc,
 691                                           Datum constval, bool varonleft,
 692                                           int min_hist_size, int n_skip,
 693                                           int *hist_size)
 694 {
 695         double          result;
 696         Datum      *values;
 697         int                     nvalues;
 698
 699         /* check sanity of parameters */
 700         Assert(n_skip >= 0);
 701         Assert(min_hist_size > 2 * n_skip);
 702
 703         if (HeapTupleIsValid(vardata->statsTuple) &&
 704                 get_attstatsslot(vardata->statsTuple,
 705                                                  vardata->atttype, vardata->atttypmod,
 706                                                  STATISTIC_KIND_HISTOGRAM, InvalidOid,
 707                                                  NULL,
 708                                                  &values, &nvalues,
 709                                                  NULL, NULL))
 710         {
 711                 *hist_size = nvalues;
 712                 if (nvalues >= min_hist_size)
 713                 {
 714                         int                     nmatch = 0;
 715                         int                     i;
 716
 717                         for (i = n_skip; i < nvalues - n_skip; i++)
 718                         {
 719                                 if (varonleft ?
 720                                         DatumGetBool(FunctionCall2Coll(opproc,
 721                                                                                                    DEFAULT_COLLATION_OID,
 722                                                                                                    values[i],
 723                                                                                                    constval)) :
 724                                         DatumGetBool(FunctionCall2Coll(opproc,
 725                                                                                                    DEFAULT_COLLATION_OID,
 726                                                                                                    constval,
 727                                                                                                    values[i])))
 728                                         nmatch++;
 729                         }
 730                         result = ((double) nmatch) / ((double) (nvalues - 2 * n_skip));
 731                 }
 732                 else
 733                         result = -1;
 734                 free_attstatsslot(vardata->atttype, values, nvalues, NULL, 0);
 735         }
 736         else
 737         {
 738                 *hist_size = 0;
 739                 result = -1;
 740         }
 741
 742         return result;
 743 }
 744
 745 /*
 746  *      ineq_histogram_selectivity      - Examine the histogram for scalarineqsel
 747  *
 748  * Determine the fraction of the variable's histogram population that
 749  * satisfies the inequality condition, ie, VAR < CONST or VAR > CONST.
 750  *
 751  * Returns -1 if there is no histogram (valid results will always be >= 0).
 752  *
 753  * Note that the result disregards both the most-common-values (if any) and
 754  * null entries.  The caller is expected to combine this result with
 755  * statistics for those portions of the column population.
 756  */
 757 static double
 758 ineq_histogram_selectivity(PlannerInfo *root,
 759                                                    VariableStatData *vardata,
 760                                                    FmgrInfo *opproc, bool isgt,
 761                                                    Datum constval, Oid consttype)
 762 {
 763         double          hist_selec;
 764         Oid                     hist_op;
 765         Datum      *values;
 766         int                     nvalues;
 767
 768         hist_selec = -1.0;
 769
 770         /*
 771          * Someday, ANALYZE might store more than one histogram per rel/att,
 772          * corresponding to more than one possible sort ordering defined for the
 773          * column type.  However, to make that work we will need to figure out
 774          * which staop to search for --- it's not necessarily the one we have at
 775          * hand!  (For example, we might have a '<=' operator rather than the '<'
 776          * operator that will appear in staop.)  For now, assume that whatever
 777          * appears in pg_statistic is sorted the same way our operator sorts, or
 778          * the reverse way if isgt is TRUE.
 779          */
 780         if (HeapTupleIsValid(vardata->statsTuple) &&
 781                 get_attstatsslot(vardata->statsTuple,
 782                                                  vardata->atttype, vardata->atttypmod,
 783                                                  STATISTIC_KIND_HISTOGRAM, InvalidOid,
 784                                                  &hist_op,
 785                                                  &values, &nvalues,
 786                                                  NULL, NULL))
 787         {
 788                 if (nvalues > 1)
 789                 {
 790                         /*
 791                          * Use binary search to find proper location, ie, the first slot
 792                          * at which the comparison fails.  (If the given operator isn't
 793                          * actually sort-compatible with the histogram, you'll get garbage
 794                          * results ... but probably not any more garbage-y than you would
 795                          * from the old linear search.)
 796                          *
 797                          * If the binary search accesses the first or last histogram
 798                          * entry, we try to replace that endpoint with the true column min
 799                          * or max as found by get_actual_variable_range().  This
 800                          * ameliorates misestimates when the min or max is moving as a
 801                          * result of changes since the last ANALYZE.  Note that this could
 802                          * result in effectively including MCVs into the histogram that
 803                          * weren't there before, but we don't try to correct for that.
 804                          */
 805                         double          histfrac;
 806                         int                     lobound = 0;    /* first possible slot to search */
 807                         int                     hibound = nvalues;              /* last+1 slot to search */
 808                         bool            have_end = false;
 809
 810                         /*
 811                          * If there are only two histogram entries, we'll want up-to-date
 812                          * values for both.  (If there are more than two, we need at most
 813                          * one of them to be updated, so we deal with that within the
 814                          * loop.)
 815                          */
 816                         if (nvalues == 2)
 817                                 have_end = get_actual_variable_range(root,
 818                                                                                                          vardata,
 819                                                                                                          hist_op,
 820                                                                                                          &values[0],
 821                                                                                                          &values[1]);
 822
 823                         while (lobound < hibound)
 824                         {
 825                                 int                     probe = (lobound + hibound) / 2;
 826                                 bool            ltcmp;
 827
 828                                 /*
 829                                  * If we find ourselves about to compare to the first or last
 830                                  * histogram entry, first try to replace it with the actual
 831                                  * current min or max (unless we already did so above).
 832                                  */
 833                                 if (probe == 0 && nvalues > 2)
 834                                         have_end = get_actual_variable_range(root,
 835                                                                                                                  vardata,
 836                                                                                                                  hist_op,
 837                                                                                                                  &values[0],
 838                                                                                                                  NULL);
 839                                 else if (probe == nvalues - 1 && nvalues > 2)
 840                                         have_end = get_actual_variable_range(root,
 841                                                                                                                  vardata,
 842                                                                                                                  hist_op,
 843                                                                                                                  NULL,
 844                                                                                                                  &values[probe]);
 845
 846                                 ltcmp = DatumGetBool(FunctionCall2Coll(opproc,
 847                                                                                                            DEFAULT_COLLATION_OID,
 848                                                                                                            values[probe],
 849                                                                                                            constval));
 850                                 if (isgt)
 851                                         ltcmp = !ltcmp;
 852                                 if (ltcmp)
 853                                         lobound = probe + 1;
 854                                 else
 855                                         hibound = probe;
 856                         }
 857
 858                         if (lobound <= 0)
 859                         {
 860                                 /* Constant is below lower histogram boundary. */
 861                                 histfrac = 0.0;
 862                         }
 863                         else if (lobound >= nvalues)
 864                         {
 865                                 /* Constant is above upper histogram boundary. */
 866                                 histfrac = 1.0;
 867                         }
 868                         else
 869                         {
 870                                 int                     i = lobound;
 871                                 double          val,
 872                                                         high,
 873                                                         low;
 874                                 double          binfrac;
 875
 876                                 /*
 877                                  * We have values[i-1] <= constant <= values[i].
 878                                  *
 879                                  * Convert the constant and the two nearest bin boundary
 880                                  * values to a uniform comparison scale, and do a linear
 881                                  * interpolation within this bin.
 882                                  */
 883                                 if (convert_to_scalar(constval, consttype, &val,
 884                                                                           values[i - 1], values[i],
 885                                                                           vardata->vartype,
 886                                                                           &low, &high))
 887                                 {
 888                                         if (high <= low)
 889                                         {
 890                                                 /* cope if bin boundaries appear identical */
 891                                                 binfrac = 0.5;
 892                                         }
 893                                         else if (val <= low)
 894                                                 binfrac = 0.0;
 895                                         else if (val >= high)
 896                                                 binfrac = 1.0;
 897                                         else
 898                                         {
 899                                                 binfrac = (val - low) / (high - low);
 900
 901                                                 /*
 902                                                  * Watch out for the possibility that we got a NaN or
 903                                                  * Infinity from the division.  This can happen
 904                                                  * despite the previous checks, if for example "low"
 905                                                  * is -Infinity.
 906                                                  */
 907                                                 if (isnan(binfrac) ||
 908                                                         binfrac < 0.0 || binfrac > 1.0)
 909                                                         binfrac = 0.5;
 910                                         }
 911                                 }
 912                                 else
 913                                 {
 914                                         /*
 915                                          * Ideally we'd produce an error here, on the grounds that
 916                                          * the given operator shouldn't have scalarXXsel
 917                                          * registered as its selectivity func unless we can deal
 918                                          * with its operand types.  But currently, all manner of
 919                                          * stuff is invoking scalarXXsel, so give a default
 920                                          * estimate until that can be fixed.
 921                                          */
 922                                         binfrac = 0.5;
 923                                 }
 924
 925                                 /*
 926                                  * Now, compute the overall selectivity across the values
 927                                  * represented by the histogram.  We have i-1 full bins and
 928                                  * binfrac partial bin below the constant.
 929                                  */
 930                                 histfrac = (double) (i - 1) + binfrac;
 931                                 histfrac /= (double) (nvalues - 1);
 932                         }
 933
 934                         /*
 935                          * Now histfrac = fraction of histogram entries below the
 936                          * constant.
 937                          *
 938                          * Account for "<" vs ">"
 939                          */
 940                         hist_selec = isgt ? (1.0 - histfrac) : histfrac;
 941
 942                         /*
 943                          * The histogram boundaries are only approximate to begin with,
 944                          * and may well be out of date anyway.  Therefore, don't believe
 945                          * extremely small or large selectivity estimates --- unless we
 946                          * got actual current endpoint values from the table.
 947                          */
 948                         if (have_end)
 949                                 CLAMP_PROBABILITY(hist_selec);
 950                         else
 951                         {
 952                                 if (hist_selec < 0.0001)
 953                                         hist_selec = 0.0001;
 954                                 else if (hist_selec > 0.9999)
 955                                         hist_selec = 0.9999;
 956                         }
 957                 }
 958
 959                 free_attstatsslot(vardata->atttype, values, nvalues, NULL, 0);
 960         }
 961
 962         return hist_selec;
 963 }
 964
 965 /*
 966  *              scalarltsel             - Selectivity of "<" (also "<=") for scalars.
 967  */
 968 Datum
 969 scalarltsel(PG_FUNCTION_ARGS)
 970 {
 971         PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
 972         Oid                     operator = PG_GETARG_OID(1);
 973         List       *args = (List *) PG_GETARG_POINTER(2);
 974         int                     varRelid = PG_GETARG_INT32(3);
 975         VariableStatData vardata;
 976         Node       *other;
 977         bool            varonleft;
 978         Datum           constval;
 979         Oid                     consttype;
 980         bool            isgt;
 981         double          selec;
 982
 983         /*
 984          * If expression is not variable op something or something op variable,
 985          * then punt and return a default estimate.
 986          */
 987         if (!get_restriction_variable(root, args, varRelid,
 988                                                                   &vardata, &other, &varonleft))
 989                 PG_RETURN_FLOAT8(DEFAULT_INEQ_SEL);
 990
 991         /*
 992          * Can't do anything useful if the something is not a constant, either.
 993          */
 994         if (!IsA(other, Const))
 995         {
 996                 ReleaseVariableStats(vardata);
 997                 PG_RETURN_FLOAT8(DEFAULT_INEQ_SEL);
 998         }
 999
1000         /*
1001          * If the constant is NULL, assume operator is strict and return zero, ie,
1002          * operator will never return TRUE.
1003          */
1004         if (((Const *) other)->constisnull)
1005         {
1006                 ReleaseVariableStats(vardata);
1007                 PG_RETURN_FLOAT8(0.0);
1008         }
1009         constval = ((Const *) other)->constvalue;
1010         consttype = ((Const *) other)->consttype;
1011
1012         /*
1013          * Force the var to be on the left to simplify logic in scalarineqsel.
1014          */
1015         if (varonleft)
1016         {
1017                 /* we have var < other */
1018                 isgt = false;
1019         }
1020         else
1021         {
1022                 /* we have other < var, commute to make var > other */
1023                 operator = get_commutator(operator);
1024                 if (!operator)
1025                 {
1026                         /* Use default selectivity (should we raise an error instead?) */
1027                         ReleaseVariableStats(vardata);
1028                         PG_RETURN_FLOAT8(DEFAULT_INEQ_SEL);
1029                 }
1030                 isgt = true;
1031         }
1032
1033         selec = scalarineqsel(root, operator, isgt, &vardata, constval, consttype);
1034
1035         ReleaseVariableStats(vardata);
1036
1037         PG_RETURN_FLOAT8((float8) selec);
1038 }
1039
1040 /*
1041  *              scalargtsel             - Selectivity of ">" (also ">=") for integers.
1042  */
1043 Datum
1044 scalargtsel(PG_FUNCTION_ARGS)
1045 {
1046         PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
1047         Oid                     operator = PG_GETARG_OID(1);
1048         List       *args = (List *) PG_GETARG_POINTER(2);
1049         int                     varRelid = PG_GETARG_INT32(3);
1050         VariableStatData vardata;
1051         Node       *other;
1052         bool            varonleft;
1053         Datum           constval;
1054         Oid                     consttype;
1055         bool            isgt;
1056         double          selec;
1057
1058         /*
1059          * If expression is not variable op something or something op variable,
1060          * then punt and return a default estimate.
1061          */
1062         if (!get_restriction_variable(root, args, varRelid,
1063                                                                   &vardata, &other, &varonleft))
1064                 PG_RETURN_FLOAT8(DEFAULT_INEQ_SEL);
1065
1066         /*
1067          * Can't do anything useful if the something is not a constant, either.
1068          */
1069         if (!IsA(other, Const))
1070         {
1071                 ReleaseVariableStats(vardata);
1072                 PG_RETURN_FLOAT8(DEFAULT_INEQ_SEL);
1073         }
1074
1075         /*
1076          * If the constant is NULL, assume operator is strict and return zero, ie,
1077          * operator will never return TRUE.
1078          */
1079         if (((Const *) other)->constisnull)
1080         {
1081                 ReleaseVariableStats(vardata);
1082                 PG_RETURN_FLOAT8(0.0);
1083         }
1084         constval = ((Const *) other)->constvalue;
1085         consttype = ((Const *) other)->consttype;
1086
1087         /*
1088          * Force the var to be on the left to simplify logic in scalarineqsel.
1089          */
1090         if (varonleft)
1091         {
1092                 /* we have var > other */
1093                 isgt = true;
1094         }
1095         else
1096         {
1097                 /* we have other > var, commute to make var < other */
1098                 operator = get_commutator(operator);
1099                 if (!operator)
1100                 {
1101                         /* Use default selectivity (should we raise an error instead?) */
1102                         ReleaseVariableStats(vardata);
1103                         PG_RETURN_FLOAT8(DEFAULT_INEQ_SEL);
1104                 }
1105                 isgt = false;
1106         }
1107
1108         selec = scalarineqsel(root, operator, isgt, &vardata, constval, consttype);
1109
1110         ReleaseVariableStats(vardata);
1111
1112         PG_RETURN_FLOAT8((float8) selec);
1113 }
1114
1115 /*
1116  * patternsel                   - Generic code for pattern-match selectivity.
1117  */
1118 static double
1119 patternsel(PG_FUNCTION_ARGS, Pattern_Type ptype, bool negate)
1120 {
1121         PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
1122         Oid                     operator = PG_GETARG_OID(1);
1123         List       *args = (List *) PG_GETARG_POINTER(2);
1124         int                     varRelid = PG_GETARG_INT32(3);
1125         Oid                     collation = PG_GET_COLLATION();
1126         VariableStatData vardata;
1127         Node       *other;
1128         bool            varonleft;
1129         Datum           constval;
1130         Oid                     consttype;
1131         Oid                     vartype;
1132         Oid                     opfamily;
1133         Pattern_Prefix_Status pstatus;
1134         Const      *patt;
1135         Const      *prefix = NULL;
1136         Selectivity rest_selec = 0;
1137         double          result;
1138
1139         /*
1140          * If this is for a NOT LIKE or similar operator, get the corresponding
1141          * positive-match operator and work with that.  Set result to the correct
1142          * default estimate, too.
1143          */
1144         if (negate)
1145         {
1146                 operator = get_negator(operator);
1147                 if (!OidIsValid(operator))
1148                         elog(ERROR, "patternsel called for operator without a negator");
1149                 result = 1.0 - DEFAULT_MATCH_SEL;
1150         }
1151         else
1152         {
1153                 result = DEFAULT_MATCH_SEL;
1154         }
1155
1156         /*
1157          * If expression is not variable op constant, then punt and return a
1158          * default estimate.
1159          */
1160         if (!get_restriction_variable(root, args, varRelid,
1161                                                                   &vardata, &other, &varonleft))
1162                 return result;
1163         if (!varonleft || !IsA(other, Const))
1164         {
1165                 ReleaseVariableStats(vardata);
1166                 return result;
1167         }
1168
1169         /*
1170          * If the constant is NULL, assume operator is strict and return zero, ie,
1171          * operator will never return TRUE.  (It's zero even for a negator op.)
1172          */
1173         if (((Const *) other)->constisnull)
1174         {
1175                 ReleaseVariableStats(vardata);
1176                 return 0.0;
1177         }
1178         constval = ((Const *) other)->constvalue;
1179         consttype = ((Const *) other)->consttype;
1180
1181         /*
1182          * The right-hand const is type text or bytea for all supported operators.
1183          * We do not expect to see binary-compatible types here, since
1184          * const-folding should have relabeled the const to exactly match the
1185          * operator's declared type.
1186          */
1187         if (consttype != TEXTOID && consttype != BYTEAOID)
1188         {
1189                 ReleaseVariableStats(vardata);
1190                 return result;
1191         }
1192
1193         /*
1194          * Similarly, the exposed type of the left-hand side should be one of
1195          * those we know.  (Do not look at vardata.atttype, which might be
1196          * something binary-compatible but different.)  We can use it to choose
1197          * the index opfamily from which we must draw the comparison operators.
1198          *
1199          * NOTE: It would be more correct to use the PATTERN opfamilies than the
1200          * simple ones, but at the moment ANALYZE will not generate statistics for
1201          * the PATTERN operators.  But our results are so approximate anyway that
1202          * it probably hardly matters.
1203          */
1204         vartype = vardata.vartype;
1205
1206         switch (vartype)
1207         {
1208                 case TEXTOID:
1209                         opfamily = TEXT_BTREE_FAM_OID;
1210                         break;
1211                 case BPCHAROID:
1212                         opfamily = BPCHAR_BTREE_FAM_OID;
1213                         break;
1214                 case NAMEOID:
1215                         opfamily = NAME_BTREE_FAM_OID;
1216                         break;
1217                 case BYTEAOID:
1218                         opfamily = BYTEA_BTREE_FAM_OID;
1219                         break;
1220                 default:
1221                         ReleaseVariableStats(vardata);
1222                         return result;
1223         }
1224
1225         /*
1226          * Pull out any fixed prefix implied by the pattern, and estimate the
1227          * fractional selectivity of the remainder of the pattern.  Unlike many of
1228          * the other functions in this file, we use the pattern operator's actual
1229          * collation for this step.  This is not because we expect the collation
1230          * to make a big difference in the selectivity estimate (it seldom would),
1231          * but because we want to be sure we cache compiled regexps under the
1232          * right cache key, so that they can be re-used at runtime.
1233          */
1234         patt = (Const *) other;
1235         pstatus = pattern_fixed_prefix(patt, ptype, collation,
1236                                                                    &prefix, &rest_selec);
1237
1238         /*
1239          * If necessary, coerce the prefix constant to the right type.
1240          */
1241         if (prefix && prefix->consttype != vartype)
1242         {
1243                 char       *prefixstr;
1244
1245                 switch (prefix->consttype)
1246                 {
1247                         case TEXTOID:
1248                                 prefixstr = TextDatumGetCString(prefix->constvalue);
1249                                 break;
1250                         case BYTEAOID:
1251                                 prefixstr = DatumGetCString(DirectFunctionCall1(byteaout,
1252                                                                                                                 prefix->constvalue));
1253                                 break;
1254                         default:
1255                                 elog(ERROR, "unrecognized consttype: %u",
1256                                          prefix->consttype);
1257                                 ReleaseVariableStats(vardata);
1258                                 return result;
1259                 }
1260                 prefix = string_to_const(prefixstr, vartype);
1261                 pfree(prefixstr);
1262         }
1263
1264         if (pstatus == Pattern_Prefix_Exact)
1265         {
1266                 /*
1267                  * Pattern specifies an exact match, so pretend operator is '='
1268                  */
1269                 Oid                     eqopr = get_opfamily_member(opfamily, vartype, vartype,
1270                                                                                                 BTEqualStrategyNumber);
1271
1272                 if (eqopr == InvalidOid)
1273                         elog(ERROR, "no = operator for opfamily %u", opfamily);
1274                 result = var_eq_const(&vardata, eqopr, prefix->constvalue,
1275                                                           false, true);
1276         }
1277         else
1278         {
1279                 /*
1280                  * Not exact-match pattern.  If we have a sufficiently large
1281                  * histogram, estimate selectivity for the histogram part of the
1282                  * population by counting matches in the histogram.  If not, estimate
1283                  * selectivity of the fixed prefix and remainder of pattern
1284                  * separately, then combine the two to get an estimate of the
1285                  * selectivity for the part of the column population represented by
1286                  * the histogram.  (For small histograms, we combine these
1287                  * approaches.)
1288                  *
1289                  * We then add up data for any most-common-values values; these are
1290                  * not in the histogram population, and we can get exact answers for
1291                  * them by applying the pattern operator, so there's no reason to
1292                  * approximate.  (If the MCVs cover a significant part of the total
1293                  * population, this gives us a big leg up in accuracy.)
1294                  */
1295                 Selectivity selec;
1296                 int                     hist_size;
1297                 FmgrInfo        opproc;
1298                 double          nullfrac,
1299                                         mcv_selec,
1300                                         sumcommon;
1301
1302                 /* Try to use the histogram entries to get selectivity */
1303                 fmgr_info(get_opcode(operator), &opproc);
1304
1305                 selec = histogram_selectivity(&vardata, &opproc, constval, true,
1306                                                                           10, 1, &hist_size);
1307
1308                 /* If not at least 100 entries, use the heuristic method */
1309                 if (hist_size < 100)
1310                 {
1311                         Selectivity heursel;
1312                         Selectivity prefixsel;
1313
1314                         if (pstatus == Pattern_Prefix_Partial)
1315                                 prefixsel = prefix_selectivity(root, &vardata, vartype,
1316                                                                                            opfamily, prefix);
1317                         else
1318                                 prefixsel = 1.0;
1319                         heursel = prefixsel * rest_selec;
1320
1321                         if (selec < 0)          /* fewer than 10 histogram entries? */
1322                                 selec = heursel;
1323                         else
1324                         {
1325                                 /*
1326                                  * For histogram sizes from 10 to 100, we combine the
1327                                  * histogram and heuristic selectivities, putting increasingly
1328                                  * more trust in the histogram for larger sizes.
1329                                  */
1330                                 double          hist_weight = hist_size / 100.0;
1331
1332                                 selec = selec * hist_weight + heursel * (1.0 - hist_weight);
1333                         }
1334                 }
1335
1336                 /* In any case, don't believe extremely small or large estimates. */
1337                 if (selec < 0.0001)
1338                         selec = 0.0001;
1339                 else if (selec > 0.9999)
1340                         selec = 0.9999;
1341
1342                 /*
1343                  * If we have most-common-values info, add up the fractions of the MCV
1344                  * entries that satisfy MCV OP PATTERN.  These fractions contribute
1345                  * directly to the result selectivity.  Also add up the total fraction
1346                  * represented by MCV entries.
1347                  */
1348                 mcv_selec = mcv_selectivity(&vardata, &opproc, constval, true,
1349                                                                         &sumcommon);
1350
1351                 if (HeapTupleIsValid(vardata.statsTuple))
1352                         nullfrac = ((Form_pg_statistic) GETSTRUCT(vardata.statsTuple))->stanullfrac;
1353                 else
1354                         nullfrac = 0.0;
1355
1356                 /*
1357                  * Now merge the results from the MCV and histogram calculations,
1358                  * realizing that the histogram covers only the non-null values that
1359                  * are not listed in MCV.
1360                  */
1361                 selec *= 1.0 - nullfrac - sumcommon;
1362                 selec += mcv_selec;
1363
1364                 /* result should be in range, but make sure... */
1365                 CLAMP_PROBABILITY(selec);
1366                 result = selec;
1367         }
1368
1369         if (prefix)
1370         {
1371                 pfree(DatumGetPointer(prefix->constvalue));
1372                 pfree(prefix);
1373         }
1374
1375         ReleaseVariableStats(vardata);
1376
1377         return negate ? (1.0 - result) : result;
1378 }
1379
1380 /*
1381  *              regexeqsel              - Selectivity of regular-expression pattern match.
1382  */
1383 Datum
1384 regexeqsel(PG_FUNCTION_ARGS)
1385 {
1386         PG_RETURN_FLOAT8(patternsel(fcinfo, Pattern_Type_Regex, false));
1387 }
1388
1389 /*
1390  *              icregexeqsel    - Selectivity of case-insensitive regex match.
1391  */
1392 Datum
1393 icregexeqsel(PG_FUNCTION_ARGS)
1394 {
1395         PG_RETURN_FLOAT8(patternsel(fcinfo, Pattern_Type_Regex_IC, false));
1396 }
1397
1398 /*
1399  *              likesel                 - Selectivity of LIKE pattern match.
1400  */
1401 Datum
1402 likesel(PG_FUNCTION_ARGS)
1403 {
1404         PG_RETURN_FLOAT8(patternsel(fcinfo, Pattern_Type_Like, false));
1405 }
1406
1407 /*
1408  *              iclikesel                       - Selectivity of ILIKE pattern match.
1409  */
1410 Datum
1411 iclikesel(PG_FUNCTION_ARGS)
1412 {
1413         PG_RETURN_FLOAT8(patternsel(fcinfo, Pattern_Type_Like_IC, false));
1414 }
1415
1416 /*
1417  *              regexnesel              - Selectivity of regular-expression pattern non-match.
1418  */
1419 Datum
1420 regexnesel(PG_FUNCTION_ARGS)
1421 {
1422         PG_RETURN_FLOAT8(patternsel(fcinfo, Pattern_Type_Regex, true));
1423 }
1424
1425 /*
1426  *              icregexnesel    - Selectivity of case-insensitive regex non-match.
1427  */
1428 Datum
1429 icregexnesel(PG_FUNCTION_ARGS)
1430 {
1431         PG_RETURN_FLOAT8(patternsel(fcinfo, Pattern_Type_Regex_IC, true));
1432 }
1433
1434 /*
1435  *              nlikesel                - Selectivity of LIKE pattern non-match.
1436  */
1437 Datum
1438 nlikesel(PG_FUNCTION_ARGS)
1439 {
1440         PG_RETURN_FLOAT8(patternsel(fcinfo, Pattern_Type_Like, true));
1441 }
1442
1443 /*
1444  *              icnlikesel              - Selectivity of ILIKE pattern non-match.
1445  */
1446 Datum
1447 icnlikesel(PG_FUNCTION_ARGS)
1448 {
1449         PG_RETURN_FLOAT8(patternsel(fcinfo, Pattern_Type_Like_IC, true));
1450 }
1451
1452 /*
1453  *              boolvarsel              - Selectivity of Boolean variable.
1454  *
1455  * This can actually be called on any boolean-valued expression.  If it
1456  * involves only Vars of the specified relation, and if there are statistics
1457  * about the Var or expression (the latter is possible if it's indexed) then
1458  * we'll produce a real estimate; otherwise it's just a default.
1459  */
1460 Selectivity
1461 boolvarsel(PlannerInfo *root, Node *arg, int varRelid)
1462 {
1463         VariableStatData vardata;
1464         double          selec;
1465
1466         examine_variable(root, arg, varRelid, &vardata);
1467         if (HeapTupleIsValid(vardata.statsTuple))
1468         {
1469                 /*
1470                  * A boolean variable V is equivalent to the clause V = 't', so we
1471                  * compute the selectivity as if that is what we have.
1472                  */
1473                 selec = var_eq_const(&vardata, BooleanEqualOperator,
1474                                                          BoolGetDatum(true), false, true);
1475         }
1476         else if (is_funcclause(arg))
1477         {
1478                 /*
1479                  * If we have no stats and it's a function call, estimate 0.3333333.
1480                  * This seems a pretty unprincipled choice, but Postgres has been
1481                  * using that estimate for function calls since 1992.  The hoariness
1482                  * of this behavior suggests that we should not be in too much hurry
1483                  * to use another value.
1484                  */
1485                 selec = 0.3333333;
1486         }
1487         else
1488         {
1489                 /* Otherwise, the default estimate is 0.5 */
1490                 selec = 0.5;
1491         }
1492         ReleaseVariableStats(vardata);
1493         return selec;
1494 }
1495
1496 /*
1497  *              booltestsel             - Selectivity of BooleanTest Node.
1498  */
1499 Selectivity
1500 booltestsel(PlannerInfo *root, BoolTestType booltesttype, Node *arg,
1501                         int varRelid, JoinType jointype, SpecialJoinInfo *sjinfo)
1502 {
1503         VariableStatData vardata;
1504         double          selec;
1505
1506         examine_variable(root, arg, varRelid, &vardata);
1507
1508         if (HeapTupleIsValid(vardata.statsTuple))
1509         {
1510                 Form_pg_statistic stats;
1511                 double          freq_null;
1512                 Datum      *values;
1513                 int                     nvalues;
1514                 float4     *numbers;
1515                 int                     nnumbers;
1516
1517                 stats = (Form_pg_statistic) GETSTRUCT(vardata.statsTuple);
1518                 freq_null = stats->stanullfrac;
1519
1520                 if (get_attstatsslot(vardata.statsTuple,
1521                                                          vardata.atttype, vardata.atttypmod,
1522                                                          STATISTIC_KIND_MCV, InvalidOid,
1523                                                          NULL,
1524                                                          &values, &nvalues,
1525                                                          &numbers, &nnumbers)
1526                         && nnumbers > 0)
1527                 {
1528                         double          freq_true;
1529                         double          freq_false;
1530
1531                         /*
1532                          * Get first MCV frequency and derive frequency for true.
1533                          */
1534                         if (DatumGetBool(values[0]))
1535                                 freq_true = numbers[0];
1536                         else
1537                                 freq_true = 1.0 - numbers[0] - freq_null;
1538
1539                         /*
1540                          * Next derive frequency for false. Then use these as appropriate
1541                          * to derive frequency for each case.
1542                          */
1543                         freq_false = 1.0 - freq_true - freq_null;
1544
1545                         switch (booltesttype)
1546                         {
1547                                 case IS_UNKNOWN:
1548                                         /* select only NULL values */
1549                                         selec = freq_null;
1550                                         break;
1551                                 case IS_NOT_UNKNOWN:
1552                                         /* select non-NULL values */
1553                                         selec = 1.0 - freq_null;
1554                                         break;
1555                                 case IS_TRUE:
1556                                         /* select only TRUE values */
1557                                         selec = freq_true;
1558                                         break;
1559                                 case IS_NOT_TRUE:
1560                                         /* select non-TRUE values */
1561                                         selec = 1.0 - freq_true;
1562                                         break;
1563                                 case IS_FALSE:
1564                                         /* select only FALSE values */
1565                                         selec = freq_false;
1566                                         break;
1567                                 case IS_NOT_FALSE:
1568                                         /* select non-FALSE values */
1569                                         selec = 1.0 - freq_false;
1570                                         break;
1571                                 default:
1572                                         elog(ERROR, "unrecognized booltesttype: %d",
1573                                                  (int) booltesttype);
1574                                         selec = 0.0;    /* Keep compiler quiet */
1575                                         break;
1576                         }
1577
1578                         free_attstatsslot(vardata.atttype, values, nvalues,
1579                                                           numbers, nnumbers);
1580                 }
1581                 else
1582                 {
1583                         /*
1584                          * No most-common-value info available. Still have null fraction
1585                          * information, so use it for IS [NOT] UNKNOWN. Otherwise adjust
1586                          * for null fraction and assume a 50-50 split of TRUE and FALSE.
1587                          */
1588                         switch (booltesttype)
1589                         {
1590                                 case IS_UNKNOWN:
1591                                         /* select only NULL values */
1592                                         selec = freq_null;
1593                                         break;
1594                                 case IS_NOT_UNKNOWN:
1595                                         /* select non-NULL values */
1596                                         selec = 1.0 - freq_null;
1597                                         break;
1598                                 case IS_TRUE:
1599                                 case IS_FALSE:
1600                                         /* Assume we select half of the non-NULL values */
1601                                         selec = (1.0 - freq_null) / 2.0;
1602                                         break;
1603                                 case IS_NOT_TRUE:
1604                                 case IS_NOT_FALSE:
1605                                         /* Assume we select NULLs plus half of the non-NULLs */
1606                                         /* equiv. to freq_null + (1.0 - freq_null) / 2.0 */
1607                                         selec = (freq_null + 1.0) / 2.0;
1608                                         break;
1609                                 default:
1610                                         elog(ERROR, "unrecognized booltesttype: %d",
1611                                                  (int) booltesttype);
1612                                         selec = 0.0;    /* Keep compiler quiet */
1613                                         break;
1614                         }
1615                 }
1616         }
1617         else
1618         {
1619                 /*
1620                  * If we can't get variable statistics for the argument, perhaps
1621                  * clause_selectivity can do something with it.  We ignore the
1622                  * possibility of a NULL value when using clause_selectivity, and just
1623                  * assume the value is either TRUE or FALSE.
1624                  */
1625                 switch (booltesttype)
1626                 {
1627                         case IS_UNKNOWN:
1628                                 selec = DEFAULT_UNK_SEL;
1629                                 break;
1630                         case IS_NOT_UNKNOWN:
1631                                 selec = DEFAULT_NOT_UNK_SEL;
1632                                 break;
1633                         case IS_TRUE:
1634                         case IS_NOT_FALSE:
1635                                 selec = (double) clause_selectivity(root, arg,
1636                                                                                                         varRelid,
1637                                                                                                         jointype, sjinfo);
1638                                 break;
1639                         case IS_FALSE:
1640                         case IS_NOT_TRUE:
1641                                 selec = 1.0 - (double) clause_selectivity(root, arg,
1642                                                                                                                   varRelid,
1643                                                                                                                   jointype, sjinfo);
1644                                 break;
1645                         default:
1646                                 elog(ERROR, "unrecognized booltesttype: %d",
1647                                          (int) booltesttype);
1648                                 selec = 0.0;    /* Keep compiler quiet */
1649                                 break;
1650                 }
1651         }
1652
1653         ReleaseVariableStats(vardata);
1654
1655         /* result should be in range, but make sure... */
1656         CLAMP_PROBABILITY(selec);
1657
1658         return (Selectivity) selec;
1659 }
1660
1661 /*
1662  *              nulltestsel             - Selectivity of NullTest Node.
1663  */
1664 Selectivity
1665 nulltestsel(PlannerInfo *root, NullTestType nulltesttype, Node *arg,
1666                         int varRelid, JoinType jointype, SpecialJoinInfo *sjinfo)
1667 {
1668         VariableStatData vardata;
1669         double          selec;
1670
1671         examine_variable(root, arg, varRelid, &vardata);
1672
1673         if (HeapTupleIsValid(vardata.statsTuple))
1674         {
1675                 Form_pg_statistic stats;
1676                 double          freq_null;
1677
1678                 stats = (Form_pg_statistic) GETSTRUCT(vardata.statsTuple);
1679                 freq_null = stats->stanullfrac;
1680
1681                 switch (nulltesttype)
1682                 {
1683                         case IS_NULL:
1684
1685                                 /*
1686                                  * Use freq_null directly.
1687                                  */
1688                                 selec = freq_null;
1689                                 break;
1690                         case IS_NOT_NULL:
1691
1692                                 /*
1693                                  * Select not unknown (not null) values. Calculate from
1694                                  * freq_null.
1695                                  */
1696                                 selec = 1.0 - freq_null;
1697                                 break;
1698                         default:
1699                                 elog(ERROR, "unrecognized nulltesttype: %d",
1700                                          (int) nulltesttype);
1701                                 return (Selectivity) 0; /* keep compiler quiet */
1702                 }
1703         }
1704         else
1705         {
1706                 /*
1707                  * No ANALYZE stats available, so make a guess
1708                  */
1709                 switch (nulltesttype)
1710                 {
1711                         case IS_NULL:
1712                                 selec = DEFAULT_UNK_SEL;
1713                                 break;
1714                         case IS_NOT_NULL:
1715                                 selec = DEFAULT_NOT_UNK_SEL;
1716                                 break;
1717                         default:
1718                                 elog(ERROR, "unrecognized nulltesttype: %d",
1719                                          (int) nulltesttype);
1720                                 return (Selectivity) 0; /* keep compiler quiet */
1721                 }
1722         }
1723
1724         ReleaseVariableStats(vardata);
1725
1726         /* result should be in range, but make sure... */
1727         CLAMP_PROBABILITY(selec);
1728
1729         return (Selectivity) selec;
1730 }
1731
1732 /*
1733  * strip_array_coercion - strip binary-compatible relabeling from an array expr
1734  *
1735  * For array values, the parser normally generates ArrayCoerceExpr conversions,
1736  * but it seems possible that RelabelType might show up.  Also, the planner
1737  * is not currently tense about collapsing stacked ArrayCoerceExpr nodes,
1738  * so we need to be ready to deal with more than one level.
1739  */
1740 static Node *
1741 strip_array_coercion(Node *node)
1742 {
1743         for (;;)
1744         {
1745                 if (node && IsA(node, ArrayCoerceExpr) &&
1746                         ((ArrayCoerceExpr *) node)->elemfuncid == InvalidOid)
1747                 {
1748                         node = (Node *) ((ArrayCoerceExpr *) node)->arg;
1749                 }
1750                 else if (node && IsA(node, RelabelType))
1751                 {
1752                         /* We don't really expect this case, but may as well cope */
1753                         node = (Node *) ((RelabelType *) node)->arg;
1754                 }
1755                 else
1756                         break;
1757         }
1758         return node;
1759 }
1760
1761 /*
1762  *              scalararraysel          - Selectivity of ScalarArrayOpExpr Node.
1763  */
1764 Selectivity
1765 scalararraysel(PlannerInfo *root,
1766                            ScalarArrayOpExpr *clause,
1767                            bool is_join_clause,
1768                            int varRelid,
1769                            JoinType jointype,
1770                            SpecialJoinInfo *sjinfo)
1771 {
1772         Oid                     operator = clause->opno;
1773         bool            useOr = clause->useOr;
1774         bool            isEquality = false;
1775         bool            isInequality = false;
1776         Node       *leftop;
1777         Node       *rightop;
1778         Oid                     nominal_element_type;
1779         Oid                     nominal_element_collation;
1780         TypeCacheEntry *typentry;
1781         RegProcedure oprsel;
1782         FmgrInfo        oprselproc;
1783         Selectivity s1;
1784         Selectivity s1disjoint;
1785
1786         /* First, deconstruct the expression */
1787         Assert(list_length(clause->args) == 2);
1788         leftop = (Node *) linitial(clause->args);
1789         rightop = (Node *) lsecond(clause->args);
1790
1791         /* aggressively reduce both sides to constants */
1792         leftop = estimate_expression_value(root, leftop);
1793         rightop = estimate_expression_value(root, rightop);
1794
1795         /* get nominal (after relabeling) element type of rightop */
1796         nominal_element_type = get_base_element_type(exprType(rightop));
1797         if (!OidIsValid(nominal_element_type))
1798                 return (Selectivity) 0.5;               /* probably shouldn't happen */
1799         /* get nominal collation, too, for generating constants */
1800         nominal_element_collation = exprCollation(rightop);
1801
1802         /* look through any binary-compatible relabeling of rightop */
1803         rightop = strip_array_coercion(rightop);
1804
1805         /*
1806          * Detect whether the operator is the default equality or inequality
1807          * operator of the array element type.
1808          */
1809         typentry = lookup_type_cache(nominal_element_type, TYPECACHE_EQ_OPR);
1810         if (OidIsValid(typentry->eq_opr))
1811         {
1812                 if (operator == typentry->eq_opr)
1813                         isEquality = true;
1814                 else if (get_negator(operator) == typentry->eq_opr)
1815                         isInequality = true;
1816         }
1817
1818         /*
1819          * If it is equality or inequality, we might be able to estimate this as a
1820          * form of array containment; for instance "const = ANY(column)" can be
1821          * treated as "ARRAY[const] <@ column".  scalararraysel_containment tries
1822          * that, and returns the selectivity estimate if successful, or -1 if not.
1823          */
1824         if ((isEquality || isInequality) && !is_join_clause)
1825         {
1826                 s1 = scalararraysel_containment(root, leftop, rightop,
1827                                                                                 nominal_element_type,
1828                                                                                 isEquality, useOr, varRelid);
1829                 if (s1 >= 0.0)
1830                         return s1;
1831         }
1832
1833         /*
1834          * Look up the underlying operator's selectivity estimator. Punt if it
1835          * hasn't got one.
1836          */
1837         if (is_join_clause)
1838                 oprsel = get_oprjoin(operator);
1839         else
1840                 oprsel = get_oprrest(operator);
1841         if (!oprsel)
1842                 return (Selectivity) 0.5;
1843         fmgr_info(oprsel, &oprselproc);
1844
1845         /*
1846          * In the array-containment check above, we must only believe that an
1847          * operator is equality or inequality if it is the default btree equality
1848          * operator (or its negator) for the element type, since those are the
1849          * operators that array containment will use.  But in what follows, we can
1850          * be a little laxer, and also believe that any operators using eqsel() or
1851          * neqsel() as selectivity estimator act like equality or inequality.
1852          */
1853         if (oprsel == F_EQSEL || oprsel == F_EQJOINSEL)
1854                 isEquality = true;
1855         else if (oprsel == F_NEQSEL || oprsel == F_NEQJOINSEL)
1856                 isInequality = true;
1857
1858         /*
1859          * We consider three cases:
1860          *
1861          * 1. rightop is an Array constant: deconstruct the array, apply the
1862          * operator's selectivity function for each array element, and merge the
1863          * results in the same way that clausesel.c does for AND/OR combinations.
1864          *
1865          * 2. rightop is an ARRAY[] construct: apply the operator's selectivity
1866          * function for each element of the ARRAY[] construct, and merge.
1867          *
1868          * 3. otherwise, make a guess ...
1869          */
1870         if (rightop && IsA(rightop, Const))
1871         {
1872                 Datum           arraydatum = ((Const *) rightop)->constvalue;
1873                 bool            arrayisnull = ((Const *) rightop)->constisnull;
1874                 ArrayType  *arrayval;
1875                 int16           elmlen;
1876                 bool            elmbyval;
1877                 char            elmalign;
1878                 int                     num_elems;
1879                 Datum      *elem_values;
1880                 bool       *elem_nulls;
1881                 int                     i;
1882
1883                 if (arrayisnull)                /* qual can't succeed if null array */
1884                         return (Selectivity) 0.0;
1885                 arrayval = DatumGetArrayTypeP(arraydatum);
1886                 get_typlenbyvalalign(ARR_ELEMTYPE(arrayval),
1887                                                          &elmlen, &elmbyval, &elmalign);
1888                 deconstruct_array(arrayval,
1889                                                   ARR_ELEMTYPE(arrayval),
1890                                                   elmlen, elmbyval, elmalign,
1891                                                   &elem_values, &elem_nulls, &num_elems);
1892
1893                 /*
1894                  * For generic operators, we assume the probability of success is
1895                  * independent for each array element.  But for "= ANY" or "<> ALL",
1896                  * if the array elements are distinct (which'd typically be the case)
1897                  * then the probabilities are disjoint, and we should just sum them.
1898                  *
1899                  * If we were being really tense we would try to confirm that the
1900                  * elements are all distinct, but that would be expensive and it
1901                  * doesn't seem to be worth the cycles; it would amount to penalizing
1902                  * well-written queries in favor of poorly-written ones.  However, we
1903                  * do protect ourselves a little bit by checking whether the
1904                  * disjointness assumption leads to an impossible (out of range)
1905                  * probability; if so, we fall back to the normal calculation.
1906                  */
1907                 s1 = s1disjoint = (useOr ? 0.0 : 1.0);
1908
1909                 for (i = 0; i < num_elems; i++)
1910                 {
1911                         List       *args;
1912                         Selectivity s2;
1913
1914                         args = list_make2(leftop,
1915                                                           makeConst(nominal_element_type,
1916                                                                                 -1,
1917                                                                                 nominal_element_collation,
1918                                                                                 elmlen,
1919                                                                                 elem_values[i],
1920                                                                                 elem_nulls[i],
1921                                                                                 elmbyval));
1922                         if (is_join_clause)
1923                                 s2 = DatumGetFloat8(FunctionCall5Coll(&oprselproc,
1924                                                                                                           clause->inputcollid,
1925                                                                                                           PointerGetDatum(root),
1926                                                                                                   ObjectIdGetDatum(operator),
1927                                                                                                           PointerGetDatum(args),
1928                                                                                                           Int16GetDatum(jointype),
1929                                                                                                    PointerGetDatum(sjinfo)));
1930                         else
1931                                 s2 = DatumGetFloat8(FunctionCall4Coll(&oprselproc,
1932                                                                                                           clause->inputcollid,
1933                                                                                                           PointerGetDatum(root),
1934                                                                                                   ObjectIdGetDatum(operator),
1935                                                                                                           PointerGetDatum(args),
1936                                                                                                    Int32GetDatum(varRelid)));
1937
1938                         if (useOr)
1939                         {
1940                                 s1 = s1 + s2 - s1 * s2;
1941                                 if (isEquality)
1942                                         s1disjoint += s2;
1943                         }
1944                         else
1945                         {
1946                                 s1 = s1 * s2;
1947                                 if (isInequality)
1948                                         s1disjoint += s2 - 1.0;
1949                         }
1950                 }
1951
1952                 /* accept disjoint-probability estimate if in range */
1953                 if ((useOr ? isEquality : isInequality) &&
1954                         s1disjoint >= 0.0 && s1disjoint <= 1.0)
1955                         s1 = s1disjoint;
1956         }
1957         else if (rightop && IsA(rightop, ArrayExpr) &&
1958                          !((ArrayExpr *) rightop)->multidims)
1959         {
1960                 ArrayExpr  *arrayexpr = (ArrayExpr *) rightop;
1961                 int16           elmlen;
1962                 bool            elmbyval;
1963                 ListCell   *l;
1964
1965                 get_typlenbyval(arrayexpr->element_typeid,
1966                                                 &elmlen, &elmbyval);
1967
1968                 /*
1969                  * We use the assumption of disjoint probabilities here too, although
1970                  * the odds of equal array elements are rather higher if the elements
1971                  * are not all constants (which they won't be, else constant folding
1972                  * would have reduced the ArrayExpr to a Const).  In this path it's
1973                  * critical to have the sanity check on the s1disjoint estimate.
1974                  */
1975                 s1 = s1disjoint = (useOr ? 0.0 : 1.0);
1976
1977                 foreach(l, arrayexpr->elements)
1978                 {
1979                         Node       *elem = (Node *) lfirst(l);
1980                         List       *args;
1981                         Selectivity s2;
1982
1983                         /*
1984                          * Theoretically, if elem isn't of nominal_element_type we should
1985                          * insert a RelabelType, but it seems unlikely that any operator
1986                          * estimation function would really care ...
1987                          */
1988                         args = list_make2(leftop, elem);
1989                         if (is_join_clause)
1990                                 s2 = DatumGetFloat8(FunctionCall5Coll(&oprselproc,
1991                                                                                                           clause->inputcollid,
1992                                                                                                           PointerGetDatum(root),
1993                                                                                                   ObjectIdGetDatum(operator),
1994                                                                                                           PointerGetDatum(args),
1995                                                                                                           Int16GetDatum(jointype),
1996                                                                                                    PointerGetDatum(sjinfo)));
1997                         else
1998                                 s2 = DatumGetFloat8(FunctionCall4Coll(&oprselproc,
1999                                                                                                           clause->inputcollid,
2000                                                                                                           PointerGetDatum(root),
2001                                                                                                   ObjectIdGetDatum(operator),
2002                                                                                                           PointerGetDatum(args),
2003                                                                                                    Int32GetDatum(varRelid)));
2004
2005                         if (useOr)
2006                         {
2007                                 s1 = s1 + s2 - s1 * s2;
2008                                 if (isEquality)
2009                                         s1disjoint += s2;
2010                         }
2011                         else
2012                         {
2013                                 s1 = s1 * s2;
2014                                 if (isInequality)
2015                                         s1disjoint += s2 - 1.0;
2016                         }
2017                 }
2018
2019                 /* accept disjoint-probability estimate if in range */
2020                 if ((useOr ? isEquality : isInequality) &&
2021                         s1disjoint >= 0.0 && s1disjoint <= 1.0)
2022                         s1 = s1disjoint;
2023         }
2024         else
2025         {
2026                 CaseTestExpr *dummyexpr;
2027                 List       *args;
2028                 Selectivity s2;
2029                 int                     i;
2030
2031                 /*
2032                  * We need a dummy rightop to pass to the operator selectivity
2033                  * routine.  It can be pretty much anything that doesn't look like a
2034                  * constant; CaseTestExpr is a convenient choice.
2035                  */
2036                 dummyexpr = makeNode(CaseTestExpr);
2037                 dummyexpr->typeId = nominal_element_type;
2038                 dummyexpr->typeMod = -1;
2039                 dummyexpr->collation = clause->inputcollid;
2040                 args = list_make2(leftop, dummyexpr);
2041                 if (is_join_clause)
2042                         s2 = DatumGetFloat8(FunctionCall5Coll(&oprselproc,
2043                                                                                                   clause->inputcollid,
2044                                                                                                   PointerGetDatum(root),
2045                                                                                                   ObjectIdGetDatum(operator),
2046                                                                                                   PointerGetDatum(args),
2047                                                                                                   Int16GetDatum(jointype),
2048                                                                                                   PointerGetDatum(sjinfo)));
2049                 else
2050                         s2 = DatumGetFloat8(FunctionCall4Coll(&oprselproc,
2051                                                                                                   clause->inputcollid,
2052                                                                                                   PointerGetDatum(root),
2053                                                                                                   ObjectIdGetDatum(operator),
2054                                                                                                   PointerGetDatum(args),
2055                                                                                                   Int32GetDatum(varRelid)));
2056                 s1 = useOr ? 0.0 : 1.0;
2057
2058                 /*
2059                  * Arbitrarily assume 10 elements in the eventual array value (see
2060                  * also estimate_array_length).  We don't risk an assumption of
2061                  * disjoint probabilities here.
2062                  */
2063                 for (i = 0; i < 10; i++)
2064                 {
2065                         if (useOr)
2066                                 s1 = s1 + s2 - s1 * s2;
2067                         else
2068                                 s1 = s1 * s2;
2069                 }
2070         }
2071
2072         /* result should be in range, but make sure... */
2073         CLAMP_PROBABILITY(s1);
2074
2075         return s1;
2076 }
2077
2078 /*
2079  * Estimate number of elements in the array yielded by an expression.
2080  *
2081  * It's important that this agree with scalararraysel.
2082  */
2083 int
2084 estimate_array_length(Node *arrayexpr)
2085 {
2086         /* look through any binary-compatible relabeling of arrayexpr */
2087         arrayexpr = strip_array_coercion(arrayexpr);
2088
2089         if (arrayexpr && IsA(arrayexpr, Const))
2090         {
2091                 Datum           arraydatum = ((Const *) arrayexpr)->constvalue;
2092                 bool            arrayisnull = ((Const *) arrayexpr)->constisnull;
2093                 ArrayType  *arrayval;
2094
2095                 if (arrayisnull)
2096                         return 0;
2097                 arrayval = DatumGetArrayTypeP(arraydatum);
2098                 return ArrayGetNItems(ARR_NDIM(arrayval), ARR_DIMS(arrayval));
2099         }
2100         else if (arrayexpr && IsA(arrayexpr, ArrayExpr) &&
2101                          !((ArrayExpr *) arrayexpr)->multidims)
2102         {
2103                 return list_length(((ArrayExpr *) arrayexpr)->elements);
2104         }
2105         else
2106         {
2107                 /* default guess --- see also scalararraysel */
2108                 return 10;
2109         }
2110 }
2111
2112 /*
2113  *              rowcomparesel           - Selectivity of RowCompareExpr Node.
2114  *
2115  * We estimate RowCompare selectivity by considering just the first (high
2116  * order) columns, which makes it equivalent to an ordinary OpExpr.  While
2117  * this estimate could be refined by considering additional columns, it
2118  * seems unlikely that we could do a lot better without multi-column
2119  * statistics.
2120  */
2121 Selectivity
2122 rowcomparesel(PlannerInfo *root,
2123                           RowCompareExpr *clause,
2124                           int varRelid, JoinType jointype, SpecialJoinInfo *sjinfo)
2125 {
2126         Selectivity s1;
2127         Oid                     opno = linitial_oid(clause->opnos);
2128         Oid                     inputcollid = linitial_oid(clause->inputcollids);
2129         List       *opargs;
2130         bool            is_join_clause;
2131
2132         /* Build equivalent arg list for single operator */
2133         opargs = list_make2(linitial(clause->largs), linitial(clause->rargs));
2134
2135         /*
2136          * Decide if it's a join clause.  This should match clausesel.c's
2137          * treat_as_join_clause(), except that we intentionally consider only the
2138          * leading columns and not the rest of the clause.
2139          */
2140         if (varRelid != 0)
2141         {
2142                 /*
2143                  * Caller is forcing restriction mode (eg, because we are examining an
2144                  * inner indexscan qual).
2145                  */
2146                 is_join_clause = false;
2147         }
2148         else if (sjinfo == NULL)
2149         {
2150                 /*
2151                  * It must be a restriction clause, since it's being evaluated at a
2152                  * scan node.
2153                  */
2154                 is_join_clause = false;
2155         }
2156         else
2157         {
2158                 /*
2159                  * Otherwise, it's a join if there's more than one relation used.
2160                  */
2161                 is_join_clause = (NumRelids((Node *) opargs) > 1);
2162         }
2163
2164         if (is_join_clause)
2165         {
2166                 /* Estimate selectivity for a join clause. */
2167                 s1 = join_selectivity(root, opno,
2168                                                           opargs,
2169                                                           inputcollid,
2170                                                           jointype,
2171                                                           sjinfo);
2172         }
2173         else
2174         {
2175                 /* Estimate selectivity for a restriction clause. */
2176                 s1 = restriction_selectivity(root, opno,
2177                                                                          opargs,
2178                                                                          inputcollid,
2179                                                                          varRelid);
2180         }
2181
2182         return s1;
2183 }
2184
2185 /*
2186  *              eqjoinsel               - Join selectivity of "="
2187  */
2188 Datum
2189 eqjoinsel(PG_FUNCTION_ARGS)
2190 {
2191         PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
2192         Oid                     operator = PG_GETARG_OID(1);
2193         List       *args = (List *) PG_GETARG_POINTER(2);
2194
2195 #ifdef NOT_USED
2196         JoinType        jointype = (JoinType) PG_GETARG_INT16(3);
2197 #endif
2198         SpecialJoinInfo *sjinfo = (SpecialJoinInfo *) PG_GETARG_POINTER(4);
2199         double          selec;
2200         VariableStatData vardata1;
2201         VariableStatData vardata2;
2202         bool            join_is_reversed;
2203         RelOptInfo *inner_rel;
2204
2205         get_join_variables(root, args, sjinfo,
2206                                            &vardata1, &vardata2, &join_is_reversed);
2207
2208         switch (sjinfo->jointype)
2209         {
2210                 case JOIN_INNER:
2211                 case JOIN_LEFT:
2212                 case JOIN_FULL:
2213                         selec = eqjoinsel_inner(operator, &vardata1, &vardata2);
2214                         break;
2215                 case JOIN_SEMI:
2216                 case JOIN_ANTI:
2217
2218                         /*
2219                          * Look up the join's inner relation.  min_righthand is sufficient
2220                          * information because neither SEMI nor ANTI joins permit any
2221                          * reassociation into or out of their RHS, so the righthand will
2222                          * always be exactly that set of rels.
2223                          */
2224                         inner_rel = find_join_input_rel(root, sjinfo->min_righthand);
2225
2226                         if (!join_is_reversed)
2227                                 selec = eqjoinsel_semi(operator, &vardata1, &vardata2,
2228                                                                            inner_rel);
2229                         else
2230                                 selec = eqjoinsel_semi(get_commutator(operator),
2231                                                                            &vardata2, &vardata1,
2232                                                                            inner_rel);
2233                         break;
2234                 default:
2235                         /* other values not expected here */
2236                         elog(ERROR, "unrecognized join type: %d",
2237                                  (int) sjinfo->jointype);
2238                         selec = 0;                      /* keep compiler quiet */
2239                         break;
2240         }
2241
2242         ReleaseVariableStats(vardata1);
2243         ReleaseVariableStats(vardata2);
2244
2245         CLAMP_PROBABILITY(selec);
2246
2247         PG_RETURN_FLOAT8((float8) selec);
2248 }
2249
2250 /*
2251  * eqjoinsel_inner --- eqjoinsel for normal inner join
2252  *
2253  * We also use this for LEFT/FULL outer joins; it's not presently clear
2254  * that it's worth trying to distinguish them here.
2255  */
2256 static double
2257 eqjoinsel_inner(Oid operator,
2258                                 VariableStatData *vardata1, VariableStatData *vardata2)
2259 {
2260         double          selec;
2261         double          nd1;
2262         double          nd2;
2263         bool            isdefault1;
2264         bool            isdefault2;
2265         Form_pg_statistic stats1 = NULL;
2266         Form_pg_statistic stats2 = NULL;
2267         bool            have_mcvs1 = false;
2268         Datum      *values1 = NULL;
2269         int                     nvalues1 = 0;
2270         float4     *numbers1 = NULL;
2271         int                     nnumbers1 = 0;
2272         bool            have_mcvs2 = false;
2273         Datum      *values2 = NULL;
2274         int                     nvalues2 = 0;
2275         float4     *numbers2 = NULL;
2276         int                     nnumbers2 = 0;
2277
2278         nd1 = get_variable_numdistinct(vardata1, &isdefault1);
2279         nd2 = get_variable_numdistinct(vardata2, &isdefault2);
2280
2281         if (HeapTupleIsValid(vardata1->statsTuple))
2282         {
2283                 stats1 = (Form_pg_statistic) GETSTRUCT(vardata1->statsTuple);
2284                 have_mcvs1 = get_attstatsslot(vardata1->statsTuple,
2285                                                                           vardata1->atttype,
2286                                                                           vardata1->atttypmod,
2287                                                                           STATISTIC_KIND_MCV,
2288                                                                           InvalidOid,
2289                                                                           NULL,
2290                                                                           &values1, &nvalues1,
2291                                                                           &numbers1, &nnumbers1);
2292         }
2293
2294         if (HeapTupleIsValid(vardata2->statsTuple))
2295         {
2296                 stats2 = (Form_pg_statistic) GETSTRUCT(vardata2->statsTuple);
2297                 have_mcvs2 = get_attstatsslot(vardata2->statsTuple,
2298                                                                           vardata2->atttype,
2299                                                                           vardata2->atttypmod,
2300                                                                           STATISTIC_KIND_MCV,
2301                                                                           InvalidOid,
2302                                                                           NULL,
2303                                                                           &values2, &nvalues2,
2304                                                                           &numbers2, &nnumbers2);
2305         }
2306
2307         if (have_mcvs1 && have_mcvs2)
2308         {
2309                 /*
2310                  * We have most-common-value lists for both relations.  Run through
2311                  * the lists to see which MCVs actually join to each other with the
2312                  * given operator.  This allows us to determine the exact join
2313                  * selectivity for the portion of the relations represented by the MCV
2314                  * lists.  We still have to estimate for the remaining population, but
2315                  * in a skewed distribution this gives us a big leg up in accuracy.
2316                  * For motivation see the analysis in Y. Ioannidis and S.
2317                  * Christodoulakis, "On the propagation of errors in the size of join
2318                  * results", Technical Report 1018, Computer Science Dept., University
2319                  * of Wisconsin, Madison, March 1991 (available from ftp.cs.wisc.edu).
2320                  */
2321                 FmgrInfo        eqproc;
2322                 bool       *hasmatch1;
2323                 bool       *hasmatch2;
2324                 double          nullfrac1 = stats1->stanullfrac;
2325                 double          nullfrac2 = stats2->stanullfrac;
2326                 double          matchprodfreq,
2327                                         matchfreq1,
2328                                         matchfreq2,
2329                                         unmatchfreq1,
2330                                         unmatchfreq2,
2331                                         otherfreq1,
2332                                         otherfreq2,
2333                                         totalsel1,
2334                                         totalsel2;
2335                 int                     i,
2336                                         nmatches;
2337
2338                 fmgr_info(get_opcode(operator), &eqproc);
2339                 hasmatch1 = (bool *) palloc0(nvalues1 * sizeof(bool));
2340                 hasmatch2 = (bool *) palloc0(nvalues2 * sizeof(bool));
2341
2342                 /*
2343                  * Note we assume that each MCV will match at most one member of the
2344                  * other MCV list.  If the operator isn't really equality, there could
2345                  * be multiple matches --- but we don't look for them, both for speed
2346                  * and because the math wouldn't add up...
2347                  */
2348                 matchprodfreq = 0.0;
2349                 nmatches = 0;
2350                 for (i = 0; i < nvalues1; i++)
2351                 {
2352                         int                     j;
2353
2354                         for (j = 0; j < nvalues2; j++)
2355                         {
2356                                 if (hasmatch2[j])
2357                                         continue;
2358                                 if (DatumGetBool(FunctionCall2Coll(&eqproc,
2359                                                                                                    DEFAULT_COLLATION_OID,
2360                                                                                                    values1[i],
2361                                                                                                    values2[j])))
2362                                 {
2363                                         hasmatch1[i] = hasmatch2[j] = true;
2364                                         matchprodfreq += numbers1[i] * numbers2[j];
2365                                         nmatches++;
2366                                         break;
2367                                 }
2368                         }
2369                 }
2370                 CLAMP_PROBABILITY(matchprodfreq);
2371                 /* Sum up frequencies of matched and unmatched MCVs */
2372                 matchfreq1 = unmatchfreq1 = 0.0;
2373                 for (i = 0; i < nvalues1; i++)
2374                 {
2375                         if (hasmatch1[i])
2376                                 matchfreq1 += numbers1[i];
2377                         else
2378                                 unmatchfreq1 += numbers1[i];
2379                 }
2380                 CLAMP_PROBABILITY(matchfreq1);
2381                 CLAMP_PROBABILITY(unmatchfreq1);
2382                 matchfreq2 = unmatchfreq2 = 0.0;
2383                 for (i = 0; i < nvalues2; i++)
2384                 {
2385                         if (hasmatch2[i])
2386                                 matchfreq2 += numbers2[i];
2387                         else
2388                                 unmatchfreq2 += numbers2[i];
2389                 }
2390                 CLAMP_PROBABILITY(matchfreq2);
2391                 CLAMP_PROBABILITY(unmatchfreq2);
2392                 pfree(hasmatch1);
2393                 pfree(hasmatch2);
2394
2395                 /*
2396                  * Compute total frequency of non-null values that are not in the MCV
2397                  * lists.
2398                  */
2399                 otherfreq1 = 1.0 - nullfrac1 - matchfreq1 - unmatchfreq1;
2400                 otherfreq2 = 1.0 - nullfrac2 - matchfreq2 - unmatchfreq2;
2401                 CLAMP_PROBABILITY(otherfreq1);
2402                 CLAMP_PROBABILITY(otherfreq2);
2403
2404                 /*
2405                  * We can estimate the total selectivity from the point of view of
2406                  * relation 1 as: the known selectivity for matched MCVs, plus
2407                  * unmatched MCVs that are assumed to match against random members of
2408                  * relation 2's non-MCV population, plus non-MCV values that are
2409                  * assumed to match against random members of relation 2's unmatched
2410                  * MCVs plus non-MCV values.
2411                  */
2412                 totalsel1 = matchprodfreq;
2413                 if (nd2 > nvalues2)
2414                         totalsel1 += unmatchfreq1 * otherfreq2 / (nd2 - nvalues2);
2415                 if (nd2 > nmatches)
2416                         totalsel1 += otherfreq1 * (otherfreq2 + unmatchfreq2) /
2417                                 (nd2 - nmatches);
2418                 /* Same estimate from the point of view of relation 2. */
2419                 totalsel2 = matchprodfreq;
2420                 if (nd1 > nvalues1)
2421                         totalsel2 += unmatchfreq2 * otherfreq1 / (nd1 - nvalues1);
2422                 if (nd1 > nmatches)
2423                         totalsel2 += otherfreq2 * (otherfreq1 + unmatchfreq1) /
2424                                 (nd1 - nmatches);
2425
2426                 /*
2427                  * Use the smaller of the two estimates.  This can be justified in
2428                  * essentially the same terms as given below for the no-stats case: to
2429                  * a first approximation, we are estimating from the point of view of
2430                  * the relation with smaller nd.
2431                  */
2432                 selec = (totalsel1 < totalsel2) ? totalsel1 : totalsel2;
2433         }
2434         else
2435         {
2436                 /*
2437                  * We do not have MCV lists for both sides.  Estimate the join
2438                  * selectivity as MIN(1/nd1,1/nd2)*(1-nullfrac1)*(1-nullfrac2). This
2439                  * is plausible if we assume that the join operator is strict and the
2440                  * non-null values are about equally distributed: a given non-null
2441                  * tuple of rel1 will join to either zero or N2*(1-nullfrac2)/nd2 rows
2442                  * of rel2, so total join rows are at most
2443                  * N1*(1-nullfrac1)*N2*(1-nullfrac2)/nd2 giving a join selectivity of
2444                  * not more than (1-nullfrac1)*(1-nullfrac2)/nd2. By the same logic it
2445                  * is not more than (1-nullfrac1)*(1-nullfrac2)/nd1, so the expression
2446                  * with MIN() is an upper bound.  Using the MIN() means we estimate
2447                  * from the point of view of the relation with smaller nd (since the
2448                  * larger nd is determining the MIN).  It is reasonable to assume that
2449                  * most tuples in this rel will have join partners, so the bound is
2450                  * probably reasonably tight and should be taken as-is.
2451                  *
2452                  * XXX Can we be smarter if we have an MCV list for just one side? It
2453                  * seems that if we assume equal distribution for the other side, we
2454                  * end up with the same answer anyway.
2455                  */
2456                 double          nullfrac1 = stats1 ? stats1->stanullfrac : 0.0;
2457                 double          nullfrac2 = stats2 ? stats2->stanullfrac : 0.0;
2458
2459                 selec = (1.0 - nullfrac1) * (1.0 - nullfrac2);
2460                 if (nd1 > nd2)
2461                         selec /= nd1;
2462                 else
2463                         selec /= nd2;
2464         }
2465
2466         if (have_mcvs1)
2467                 free_attstatsslot(vardata1->atttype, values1, nvalues1,
2468                                                   numbers1, nnumbers1);
2469         if (have_mcvs2)
2470                 free_attstatsslot(vardata2->atttype, values2, nvalues2,
2471                                                   numbers2, nnumbers2);
2472
2473         return selec;
2474 }
2475
2476 /*
2477  * eqjoinsel_semi --- eqjoinsel for semi join
2478  *
2479  * (Also used for anti join, which we are supposed to estimate the same way.)
2480  * Caller has ensured that vardata1 is the LHS variable.
2481  */
2482 static double
2483 eqjoinsel_semi(Oid operator,
2484                            VariableStatData *vardata1, VariableStatData *vardata2,
2485                            RelOptInfo *inner_rel)
2486 {
2487         double          selec;
2488         double          nd1;
2489         double          nd2;
2490         bool            isdefault1;
2491         bool            isdefault2;
2492         Form_pg_statistic stats1 = NULL;
2493         bool            have_mcvs1 = false;
2494         Datum      *values1 = NULL;
2495         int                     nvalues1 = 0;
2496         float4     *numbers1 = NULL;
2497         int                     nnumbers1 = 0;
2498         bool            have_mcvs2 = false;
2499         Datum      *values2 = NULL;
2500         int                     nvalues2 = 0;
2501         float4     *numbers2 = NULL;
2502         int                     nnumbers2 = 0;
2503
2504         nd1 = get_variable_numdistinct(vardata1, &isdefault1);
2505         nd2 = get_variable_numdistinct(vardata2, &isdefault2);
2506
2507         /*
2508          * We clamp nd2 to be not more than what we estimate the inner relation's
2509          * size to be.  This is intuitively somewhat reasonable since obviously
2510          * there can't be more than that many distinct values coming from the
2511          * inner rel.  The reason for the asymmetry (ie, that we don't clamp nd1
2512          * likewise) is that this is the only pathway by which restriction clauses
2513          * applied to the inner rel will affect the join result size estimate,
2514          * since set_joinrel_size_estimates will multiply SEMI/ANTI selectivity by
2515          * only the outer rel's size.  If we clamped nd1 we'd be double-counting
2516          * the selectivity of outer-rel restrictions.
2517          *
2518          * We can apply this clamping both with respect to the base relation from
2519          * which the join variable comes (if there is just one), and to the
2520          * immediate inner input relation of the current join.
2521          *
2522          * If we clamp, we can treat nd2 as being a non-default estimate; it's not
2523          * great, maybe, but it didn't come out of nowhere either.  This is most
2524          * helpful when the inner relation is empty and consequently has no stats.
2525          */
2526         if (vardata2->rel)
2527         {
2528                 if (nd2 >= vardata2->rel->rows)
2529                 {
2530                         nd2 = vardata2->rel->rows;
2531                         isdefault2 = false;
2532                 }
2533         }
2534         if (nd2 >= inner_rel->rows)
2535         {
2536                 nd2 = inner_rel->rows;
2537                 isdefault2 = false;
2538         }
2539
2540         if (HeapTupleIsValid(vardata1->statsTuple))
2541         {
2542                 stats1 = (Form_pg_statistic) GETSTRUCT(vardata1->statsTuple);
2543                 have_mcvs1 = get_attstatsslot(vardata1->statsTuple,
2544                                                                           vardata1->atttype,
2545                                                                           vardata1->atttypmod,
2546                                                                           STATISTIC_KIND_MCV,
2547                                                                           InvalidOid,
2548                                                                           NULL,
2549                                                                           &values1, &nvalues1,
2550                                                                           &numbers1, &nnumbers1);
2551         }
2552
2553         if (HeapTupleIsValid(vardata2->statsTuple))
2554         {
2555                 have_mcvs2 = get_attstatsslot(vardata2->statsTuple,
2556                                                                           vardata2->atttype,
2557                                                                           vardata2->atttypmod,
2558                                                                           STATISTIC_KIND_MCV,
2559                                                                           InvalidOid,
2560                                                                           NULL,
2561                                                                           &values2, &nvalues2,
2562                                                                           &numbers2, &nnumbers2);
2563         }
2564
2565         if (have_mcvs1 && have_mcvs2 && OidIsValid(operator))
2566         {
2567                 /*
2568                  * We have most-common-value lists for both relations.  Run through
2569                  * the lists to see which MCVs actually join to each other with the
2570                  * given operator.  This allows us to determine the exact join
2571                  * selectivity for the portion of the relations represented by the MCV
2572                  * lists.  We still have to estimate for the remaining population, but
2573                  * in a skewed distribution this gives us a big leg up in accuracy.
2574                  */
2575                 FmgrInfo        eqproc;
2576                 bool       *hasmatch1;
2577                 bool       *hasmatch2;
2578                 double          nullfrac1 = stats1->stanullfrac;
2579                 double          matchfreq1,
2580                                         uncertainfrac,
2581                                         uncertain;
2582                 int                     i,
2583                                         nmatches,
2584                                         clamped_nvalues2;
2585
2586                 /*
2587                  * The clamping above could have resulted in nd2 being less than
2588                  * nvalues2; in which case, we assume that precisely the nd2 most
2589                  * common values in the relation will appear in the join input, and so
2590                  * compare to only the first nd2 members of the MCV list.  Of course
2591                  * this is frequently wrong, but it's the best bet we can make.
2592                  */
2593                 clamped_nvalues2 = Min(nvalues2, nd2);
2594
2595                 fmgr_info(get_opcode(operator), &eqproc);
2596                 hasmatch1 = (bool *) palloc0(nvalues1 * sizeof(bool));
2597                 hasmatch2 = (bool *) palloc0(clamped_nvalues2 * sizeof(bool));
2598
2599                 /*
2600                  * Note we assume that each MCV will match at most one member of the
2601                  * other MCV list.  If the operator isn't really equality, there could
2602                  * be multiple matches --- but we don't look for them, both for speed
2603                  * and because the math wouldn't add up...
2604                  */
2605                 nmatches = 0;
2606                 for (i = 0; i < nvalues1; i++)
2607                 {
2608                         int                     j;
2609
2610                         for (j = 0; j < clamped_nvalues2; j++)
2611                         {
2612                                 if (hasmatch2[j])
2613                                         continue;
2614                                 if (DatumGetBool(FunctionCall2Coll(&eqproc,
2615                                                                                                    DEFAULT_COLLATION_OID,
2616                                                                                                    values1[i],
2617                                                                                                    values2[j])))
2618                                 {
2619                                         hasmatch1[i] = hasmatch2[j] = true;
2620                                         nmatches++;
2621                                         break;
2622                                 }
2623                         }
2624                 }
2625                 /* Sum up frequencies of matched MCVs */
2626                 matchfreq1 = 0.0;
2627                 for (i = 0; i < nvalues1; i++)
2628                 {
2629                         if (hasmatch1[i])
2630                                 matchfreq1 += numbers1[i];
2631                 }
2632                 CLAMP_PROBABILITY(matchfreq1);
2633                 pfree(hasmatch1);
2634                 pfree(hasmatch2);
2635
2636                 /*
2637                  * Now we need to estimate the fraction of relation 1 that has at
2638                  * least one join partner.  We know for certain that the matched MCVs
2639                  * do, so that gives us a lower bound, but we're really in the dark
2640                  * about everything else.  Our crude approach is: if nd1 <= nd2 then
2641                  * assume all non-null rel1 rows have join partners, else assume for
2642                  * the uncertain rows that a fraction nd2/nd1 have join partners. We
2643                  * can discount the known-matched MCVs from the distinct-values counts
2644                  * before doing the division.
2645                  *
2646                  * Crude as the above is, it's completely useless if we don't have
2647                  * reliable ndistinct values for both sides.  Hence, if either nd1 or
2648                  * nd2 is default, punt and assume half of the uncertain rows have
2649                  * join partners.
2650                  */
2651                 if (!isdefault1 && !isdefault2)
2652                 {
2653                         nd1 -= nmatches;
2654                         nd2 -= nmatches;
2655                         if (nd1 <= nd2 || nd2 < 0)
2656                                 uncertainfrac = 1.0;
2657                         else
2658                                 uncertainfrac = nd2 / nd1;
2659                 }
2660                 else
2661                         uncertainfrac = 0.5;
2662                 uncertain = 1.0 - matchfreq1 - nullfrac1;
2663                 CLAMP_PROBABILITY(uncertain);
2664                 selec = matchfreq1 + uncertainfrac * uncertain;
2665         }
2666         else
2667         {
2668                 /*
2669                  * Without MCV lists for both sides, we can only use the heuristic
2670                  * about nd1 vs nd2.
2671                  */
2672                 double          nullfrac1 = stats1 ? stats1->stanullfrac : 0.0;
2673
2674                 if (!isdefault1 && !isdefault2)
2675                 {
2676                         if (nd1 <= nd2 || nd2 < 0)
2677                                 selec = 1.0 - nullfrac1;
2678                         else
2679                                 selec = (nd2 / nd1) * (1.0 - nullfrac1);
2680                 }
2681                 else
2682                         selec = 0.5 * (1.0 - nullfrac1);
2683         }
2684
2685         if (have_mcvs1)
2686                 free_attstatsslot(vardata1->atttype, values1, nvalues1,
2687                                                   numbers1, nnumbers1);
2688         if (have_mcvs2)
2689                 free_attstatsslot(vardata2->atttype, values2, nvalues2,
2690                                                   numbers2, nnumbers2);
2691
2692         return selec;
2693 }
2694
2695 /*
2696  *              neqjoinsel              - Join selectivity of "!="
2697  */
2698 Datum
2699 neqjoinsel(PG_FUNCTION_ARGS)
2700 {
2701         PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
2702         Oid                     operator = PG_GETARG_OID(1);
2703         List       *args = (List *) PG_GETARG_POINTER(2);
2704         JoinType        jointype = (JoinType) PG_GETARG_INT16(3);
2705         SpecialJoinInfo *sjinfo = (SpecialJoinInfo *) PG_GETARG_POINTER(4);
2706         Oid                     eqop;
2707         float8          result;
2708
2709         /*
2710          * We want 1 - eqjoinsel() where the equality operator is the one
2711          * associated with this != operator, that is, its negator.
2712          */
2713         eqop = get_negator(operator);
2714         if (eqop)
2715         {
2716                 result = DatumGetFloat8(DirectFunctionCall5(eqjoinsel,
2717                                                                                                         PointerGetDatum(root),
2718                                                                                                         ObjectIdGetDatum(eqop),
2719                                                                                                         PointerGetDatum(args),
2720                                                                                                         Int16GetDatum(jointype),
2721                                                                                                         PointerGetDatum(sjinfo)));
2722         }
2723         else
2724         {
2725                 /* Use default selectivity (should we raise an error instead?) */
2726                 result = DEFAULT_EQ_SEL;
2727         }
2728         result = 1.0 - result;
2729         PG_RETURN_FLOAT8(result);
2730 }
2731
2732 /*
2733  *              scalarltjoinsel - Join selectivity of "<" and "<=" for scalars
2734  */
2735 Datum
2736 scalarltjoinsel(PG_FUNCTION_ARGS)
2737 {
2738         PG_RETURN_FLOAT8(DEFAULT_INEQ_SEL);
2739 }
2740
2741 /*
2742  *              scalargtjoinsel - Join selectivity of ">" and ">=" for scalars
2743  */
2744 Datum
2745 scalargtjoinsel(PG_FUNCTION_ARGS)
2746 {
2747         PG_RETURN_FLOAT8(DEFAULT_INEQ_SEL);
2748 }
2749
2750 /*
2751  * patternjoinsel               - Generic code for pattern-match join selectivity.
2752  */
2753 static double
2754 patternjoinsel(PG_FUNCTION_ARGS, Pattern_Type ptype, bool negate)
2755 {
2756         /* For the moment we just punt. */
2757         return negate ? (1.0 - DEFAULT_MATCH_SEL) : DEFAULT_MATCH_SEL;
2758 }
2759
2760 /*
2761  *              regexeqjoinsel  - Join selectivity of regular-expression pattern match.
2762  */
2763 Datum
2764 regexeqjoinsel(PG_FUNCTION_ARGS)
2765 {
2766         PG_RETURN_FLOAT8(patternjoinsel(fcinfo, Pattern_Type_Regex, false));
2767 }
2768
2769 /*
2770  *              icregexeqjoinsel        - Join selectivity of case-insensitive regex match.
2771  */
2772 Datum
2773 icregexeqjoinsel(PG_FUNCTION_ARGS)
2774 {
2775         PG_RETURN_FLOAT8(patternjoinsel(fcinfo, Pattern_Type_Regex_IC, false));
2776 }
2777
2778 /*
2779  *              likejoinsel                     - Join selectivity of LIKE pattern match.
2780  */
2781 Datum
2782 likejoinsel(PG_FUNCTION_ARGS)
2783 {
2784         PG_RETURN_FLOAT8(patternjoinsel(fcinfo, Pattern_Type_Like, false));
2785 }
2786
2787 /*
2788  *              iclikejoinsel                   - Join selectivity of ILIKE pattern match.
2789  */
2790 Datum
2791 iclikejoinsel(PG_FUNCTION_ARGS)
2792 {
2793         PG_RETURN_FLOAT8(patternjoinsel(fcinfo, Pattern_Type_Like_IC, false));
2794 }
2795
2796 /*
2797  *              regexnejoinsel  - Join selectivity of regex non-match.
2798  */
2799 Datum
2800 regexnejoinsel(PG_FUNCTION_ARGS)
2801 {
2802         PG_RETURN_FLOAT8(patternjoinsel(fcinfo, Pattern_Type_Regex, true));
2803 }
2804
2805 /*
2806  *              icregexnejoinsel        - Join selectivity of case-insensitive regex non-match.
2807  */
2808 Datum
2809 icregexnejoinsel(PG_FUNCTION_ARGS)
2810 {
2811         PG_RETURN_FLOAT8(patternjoinsel(fcinfo, Pattern_Type_Regex_IC, true));
2812 }
2813
2814 /*
2815  *              nlikejoinsel            - Join selectivity of LIKE pattern non-match.
2816  */
2817 Datum
2818 nlikejoinsel(PG_FUNCTION_ARGS)
2819 {
2820         PG_RETURN_FLOAT8(patternjoinsel(fcinfo, Pattern_Type_Like, true));
2821 }
2822
2823 /*
2824  *              icnlikejoinsel          - Join selectivity of ILIKE pattern non-match.
2825  */
2826 Datum
2827 icnlikejoinsel(PG_FUNCTION_ARGS)
2828 {
2829         PG_RETURN_FLOAT8(patternjoinsel(fcinfo, Pattern_Type_Like_IC, true));
2830 }
2831
2832 /*
2833  * mergejoinscansel                     - Scan selectivity of merge join.
2834  *
2835  * A merge join will stop as soon as it exhausts either input stream.
2836  * Therefore, if we can estimate the ranges of both input variables,
2837  * we can estimate how much of the input will actually be read.  This
2838  * can have a considerable impact on the cost when using indexscans.
2839  *
2840  * Also, we can estimate how much of each input has to be read before the
2841  * first join pair is found, which will affect the join's startup time.
2842  *
2843  * clause should be a clause already known to be mergejoinable.  opfamily,
2844  * strategy, and nulls_first specify the sort ordering being used.
2845  *
2846  * The outputs are:
2847  *              *leftstart is set to the fraction of the left-hand variable expected
2848  *               to be scanned before the first join pair is found (0 to 1).
2849  *              *leftend is set to the fraction of the left-hand variable expected
2850  *               to be scanned before the join terminates (0 to 1).
2851  *              *rightstart, *rightend similarly for the right-hand variable.
2852  */
2853 void
2854 mergejoinscansel(PlannerInfo *root, Node *clause,
2855                                  Oid opfamily, int strategy, bool nulls_first,
2856                                  Selectivity *leftstart, Selectivity *leftend,
2857                                  Selectivity *rightstart, Selectivity *rightend)
2858 {
2859         Node       *left,
2860                            *right;
2861         VariableStatData leftvar,
2862                                 rightvar;
2863         int                     op_strategy;
2864         Oid                     op_lefttype;
2865         Oid                     op_righttype;
2866         Oid                     opno,
2867                                 lsortop,
2868                                 rsortop,
2869                                 lstatop,
2870                                 rstatop,
2871                                 ltop,
2872                                 leop,
2873                                 revltop,
2874                                 revleop;
2875         bool            isgt;
2876         Datum           leftmin,
2877                                 leftmax,
2878                                 rightmin,
2879                                 rightmax;
2880         double          selec;
2881
2882         /* Set default results if we can't figure anything out. */
2883         /* XXX should default "start" fraction be a bit more than 0? */
2884         *leftstart = *rightstart = 0.0;
2885         *leftend = *rightend = 1.0;
2886
2887         /* Deconstruct the merge clause */
2888         if (!is_opclause(clause))
2889                 return;                                 /* shouldn't happen */
2890         opno = ((OpExpr *) clause)->opno;
2891         left = get_leftop((Expr *) clause);
2892         right = get_rightop((Expr *) clause);
2893         if (!right)
2894                 return;                                 /* shouldn't happen */
2895
2896         /* Look for stats for the inputs */
2897         examine_variable(root, left, 0, &leftvar);
2898         examine_variable(root, right, 0, &rightvar);
2899
2900         /* Extract the operator's declared left/right datatypes */
2901         get_op_opfamily_properties(opno, opfamily, false,
2902                                                            &op_strategy,
2903                                                            &op_lefttype,
2904                                                            &op_righttype);
2905         Assert(op_strategy == BTEqualStrategyNumber);
2906
2907         /*
2908          * Look up the various operators we need.  If we don't find them all, it
2909          * probably means the opfamily is broken, but we just fail silently.
2910          *
2911          * Note: we expect that pg_statistic histograms will be sorted by the '<'
2912          * operator, regardless of which sort direction we are considering.
2913          */
2914         switch (strategy)
2915         {
2916                 case BTLessStrategyNumber:
2917                         isgt = false;
2918                         if (op_lefttype == op_righttype)
2919                         {
2920                                 /* easy case */
2921                                 ltop = get_opfamily_member(opfamily,
2922                                                                                    op_lefttype, op_righttype,
2923                                                                                    BTLessStrategyNumber);
2924                                 leop = get_opfamily_member(opfamily,
2925                                                                                    op_lefttype, op_righttype,
2926                                                                                    BTLessEqualStrategyNumber);
2927                                 lsortop = ltop;
2928                                 rsortop = ltop;
2929                                 lstatop = lsortop;
2930                                 rstatop = rsortop;
2931                                 revltop = ltop;
2932                                 revleop = leop;
2933                         }
2934                         else
2935                         {
2936                                 ltop = get_opfamily_member(opfamily,
2937                                                                                    op_lefttype, op_righttype,
2938                                                                                    BTLessStrategyNumber);
2939                                 leop = get_opfamily_member(opfamily,
2940                                                                                    op_lefttype, op_righttype,
2941                                                                                    BTLessEqualStrategyNumber);
2942                                 lsortop = get_opfamily_member(opfamily,
2943                                                                                           op_lefttype, op_lefttype,
2944                                                                                           BTLessStrategyNumber);
2945                                 rsortop = get_opfamily_member(opfamily,
2946                                                                                           op_righttype, op_righttype,
2947                                                                                           BTLessStrategyNumber);
2948                                 lstatop = lsortop;
2949                                 rstatop = rsortop;
2950                                 revltop = get_opfamily_member(opfamily,
2951                                                                                           op_righttype, op_lefttype,
2952                                                                                           BTLessStrategyNumber);
2953                                 revleop = get_opfamily_member(opfamily,
2954                                                                                           op_righttype, op_lefttype,
2955                                                                                           BTLessEqualStrategyNumber);
2956                         }
2957                         break;
2958                 case BTGreaterStrategyNumber:
2959                         /* descending-order case */
2960                         isgt = true;
2961                         if (op_lefttype == op_righttype)
2962                         {
2963                                 /* easy case */
2964                                 ltop = get_opfamily_member(opfamily,
2965                                                                                    op_lefttype, op_righttype,
2966                                                                                    BTGreaterStrategyNumber);
2967                                 leop = get_opfamily_member(opfamily,
2968                                                                                    op_lefttype, op_righttype,
2969                                                                                    BTGreaterEqualStrategyNumber);
2970                                 lsortop = ltop;
2971                                 rsortop = ltop;
2972                                 lstatop = get_opfamily_member(opfamily,
2973                                                                                           op_lefttype, op_lefttype,
2974                                                                                           BTLessStrategyNumber);
2975                                 rstatop = lstatop;
2976                                 revltop = ltop;
2977                                 revleop = leop;
2978                         }
2979                         else
2980                         {
2981                                 ltop = get_opfamily_member(opfamily,
2982                                                                                    op_lefttype, op_righttype,
2983                                                                                    BTGreaterStrategyNumber);
2984                                 leop = get_opfamily_member(opfamily,
2985                                                                                    op_lefttype, op_righttype,
2986                                                                                    BTGreaterEqualStrategyNumber);
2987                                 lsortop = get_opfamily_member(opfamily,
2988                                                                                           op_lefttype, op_lefttype,
2989                                                                                           BTGreaterStrategyNumber);
2990                                 rsortop = get_opfamily_member(opfamily,
2991                                                                                           op_righttype, op_righttype,
2992                                                                                           BTGreaterStrategyNumber);
2993                                 lstatop = get_opfamily_member(opfamily,
2994                                                                                           op_lefttype, op_lefttype,
2995                                                                                           BTLessStrategyNumber);
2996                                 rstatop = get_opfamily_member(opfamily,
2997                                                                                           op_righttype, op_righttype,
2998                                                                                           BTLessStrategyNumber);
2999                                 revltop = get_opfamily_member(opfamily,
3000                                                                                           op_righttype, op_lefttype,
3001                                                                                           BTGreaterStrategyNumber);
3002                                 revleop = get_opfamily_member(opfamily,
3003                                                                                           op_righttype, op_lefttype,
3004                                                                                           BTGreaterEqualStrategyNumber);
3005                         }
3006                         break;
3007                 default:
3008                         goto fail;                      /* shouldn't get here */
3009         }
3010
3011         if (!OidIsValid(lsortop) ||
3012                 !OidIsValid(rsortop) ||
3013                 !OidIsValid(lstatop) ||
3014                 !OidIsValid(rstatop) ||
3015                 !OidIsValid(ltop) ||
3016                 !OidIsValid(leop) ||
3017                 !OidIsValid(revltop) ||
3018                 !OidIsValid(revleop))
3019                 goto fail;                              /* insufficient info in catalogs */
3020
3021         /* Try to get ranges of both inputs */
3022         if (!isgt)
3023         {
3024                 if (!get_variable_range(root, &leftvar, lstatop,
3025                                                                 &leftmin, &leftmax))
3026                         goto fail;                      /* no range available from stats */
3027                 if (!get_variable_range(root, &rightvar, rstatop,
3028                                                                 &rightmin, &rightmax))
3029                         goto fail;                      /* no range available from stats */
3030         }
3031         else
3032         {
3033                 /* need to swap the max and min */
3034                 if (!get_variable_range(root, &leftvar, lstatop,
3035                                                                 &leftmax, &leftmin))
3036                         goto fail;                      /* no range available from stats */
3037                 if (!get_variable_range(root, &rightvar, rstatop,
3038                                                                 &rightmax, &rightmin))
3039                         goto fail;                      /* no range available from stats */
3040         }
3041
3042         /*
3043          * Now, the fraction of the left variable that will be scanned is the
3044          * fraction that's <= the right-side maximum value.  But only believe
3045          * non-default estimates, else stick with our 1.0.
3046          */
3047         selec = scalarineqsel(root, leop, isgt, &leftvar,
3048                                                   rightmax, op_righttype);
3049         if (selec != DEFAULT_INEQ_SEL)
3050                 *leftend = selec;
3051
3052         /* And similarly for the right variable. */
3053         selec = scalarineqsel(root, revleop, isgt, &rightvar,
3054                                                   leftmax, op_lefttype);
3055         if (selec != DEFAULT_INEQ_SEL)
3056                 *rightend = selec;
3057
3058         /*
3059          * Only one of the two "end" fractions can really be less than 1.0;
3060          * believe the smaller estimate and reset the other one to exactly 1.0. If
3061          * we get exactly equal estimates (as can easily happen with self-joins),
3062          * believe neither.
3063          */
3064         if (*leftend > *rightend)
3065                 *leftend = 1.0;
3066         else if (*leftend < *rightend)
3067                 *rightend = 1.0;
3068         else
3069                 *leftend = *rightend = 1.0;
3070
3071         /*
3072          * Also, the fraction of the left variable that will be scanned before the
3073          * first join pair is found is the fraction that's < the right-side
3074          * minimum value.  But only believe non-default estimates, else stick with
3075          * our own default.
3076          */
3077         selec = scalarineqsel(root, ltop, isgt, &leftvar,
3078                                                   rightmin, op_righttype);
3079         if (selec != DEFAULT_INEQ_SEL)
3080                 *leftstart = selec;
3081
3082         /* And similarly for the right variable. */
3083         selec = scalarineqsel(root, revltop, isgt, &rightvar,
3084                                                   leftmin, op_lefttype);
3085         if (selec != DEFAULT_INEQ_SEL)
3086                 *rightstart = selec;
3087
3088         /*
3089          * Only one of the two "start" fractions can really be more than zero;
3090          * believe the larger estimate and reset the other one to exactly 0.0. If
3091          * we get exactly equal estimates (as can easily happen with self-joins),
3092          * believe neither.
3093          */
3094         if (*leftstart < *rightstart)
3095                 *leftstart = 0.0;
3096         else if (*leftstart > *rightstart)
3097                 *rightstart = 0.0;
3098         else
3099                 *leftstart = *rightstart = 0.0;
3100
3101         /*
3102          * If the sort order is nulls-first, we're going to have to skip over any
3103          * nulls too.  These would not have been counted by scalarineqsel, and we
3104          * can safely add in this fraction regardless of whether we believe
3105          * scalarineqsel's results or not.  But be sure to clamp the sum to 1.0!
3106          */
3107         if (nulls_first)
3108         {
3109                 Form_pg_statistic stats;
3110
3111                 if (HeapTupleIsValid(leftvar.statsTuple))
3112                 {
3113                         stats = (Form_pg_statistic) GETSTRUCT(leftvar.statsTuple);
3114                         *leftstart += stats->stanullfrac;
3115                         CLAMP_PROBABILITY(*leftstart);
3116                         *leftend += stats->stanullfrac;
3117                         CLAMP_PROBABILITY(*leftend);
3118                 }
3119                 if (HeapTupleIsValid(rightvar.statsTuple))
3120                 {
3121                         stats = (Form_pg_statistic) GETSTRUCT(rightvar.statsTuple);
3122                         *rightstart += stats->stanullfrac;
3123                         CLAMP_PROBABILITY(*rightstart);
3124                         *rightend += stats->stanullfrac;
3125                         CLAMP_PROBABILITY(*rightend);
3126                 }
3127         }
3128
3129         /* Disbelieve start >= end, just in case that can happen */
3130         if (*leftstart >= *leftend)
3131         {
3132                 *leftstart = 0.0;
3133                 *leftend = 1.0;
3134         }
3135         if (*rightstart >= *rightend)
3136         {
3137                 *rightstart = 0.0;
3138                 *rightend = 1.0;
3139         }
3140
3141 fail:
3142         ReleaseVariableStats(leftvar);
3143         ReleaseVariableStats(rightvar);
3144 }
3145
3146
3147 /*
3148  * Helper routine for estimate_num_groups: add an item to a list of
3149  * GroupVarInfos, but only if it's not known equal to any of the existing
3150  * entries.
3151  */
3152 typedef struct
3153 {
3154         Node       *var;                        /* might be an expression, not just a Var */
3155         RelOptInfo *rel;                        /* relation it belongs to */
3156         double          ndistinct;              /* # distinct values */
3157 } GroupVarInfo;
3158
3159 static List *
3160 add_unique_group_var(PlannerInfo *root, List *varinfos,
3161                                          Node *var, VariableStatData *vardata)
3162 {
3163         GroupVarInfo *varinfo;
3164         double          ndistinct;
3165         bool            isdefault;
3166         ListCell   *lc;
3167
3168         ndistinct = get_variable_numdistinct(vardata, &isdefault);
3169
3170         /* cannot use foreach here because of possible list_delete */
3171         lc = list_head(varinfos);
3172         while (lc)
3173         {
3174                 varinfo = (GroupVarInfo *) lfirst(lc);
3175
3176                 /* must advance lc before list_delete possibly pfree's it */
3177                 lc = lnext(lc);
3178
3179                 /* Drop exact duplicates */
3180                 if (equal(var, varinfo->var))
3181                         return varinfos;
3182
3183                 /*
3184                  * Drop known-equal vars, but only if they belong to different
3185                  * relations (see comments for estimate_num_groups)
3186                  */
3187                 if (vardata->rel != varinfo->rel &&
3188                         exprs_known_equal(root, var, varinfo->var))
3189                 {
3190                         if (varinfo->ndistinct <= ndistinct)
3191                         {
3192                                 /* Keep older item, forget new one */
3193                                 return varinfos;
3194                         }
3195                         else
3196                         {
3197                                 /* Delete the older item */
3198                                 varinfos = list_delete_ptr(varinfos, varinfo);
3199                         }
3200                 }
3201         }
3202
3203         varinfo = (GroupVarInfo *) palloc(sizeof(GroupVarInfo));
3204
3205         varinfo->var = var;
3206         varinfo->rel = vardata->rel;
3207         varinfo->ndistinct = ndistinct;
3208         varinfos = lappend(varinfos, varinfo);
3209         return varinfos;
3210 }
3211
3212 /*
3213  * estimate_num_groups          - Estimate number of groups in a grouped query
3214  *
3215  * Given a query having a GROUP BY clause, estimate how many groups there
3216  * will be --- ie, the number of distinct combinations of the GROUP BY
3217  * expressions.
3218  *
3219  * This routine is also used to estimate the number of rows emitted by
3220  * a DISTINCT filtering step; that is an isomorphic problem.  (Note:
3221  * actually, we only use it for DISTINCT when there's no grouping or
3222  * aggregation ahead of the DISTINCT.)
3223  *
3224  * Inputs:
3225  *      root - the query
3226  *      groupExprs - list of expressions being grouped by
3227  *      input_rows - number of rows estimated to arrive at the group/unique
3228  *              filter step
3229  *      pgset - NULL, or a List** pointing to a grouping set to filter the
3230  *              groupExprs against
3231  *
3232  * Given the lack of any cross-correlation statistics in the system, it's
3233  * impossible to do anything really trustworthy with GROUP BY conditions
3234  * involving multiple Vars.  We should however avoid assuming the worst
3235  * case (all possible cross-product terms actually appear as groups) since
3236  * very often the grouped-by Vars are highly correlated.  Our current approach
3237  * is as follows:
3238  *      1.  Expressions yielding boolean are assumed to contribute two groups,
3239  *              independently of their content, and are ignored in the subsequent
3240  *              steps.  This is mainly because tests like "col IS NULL" break the
3241  *              heuristic used in step 2 especially badly.
3242  *      2.  Reduce the given expressions to a list of unique Vars used.  For
3243  *              example, GROUP BY a, a + b is treated the same as GROUP BY a, b.
3244  *              It is clearly correct not to count the same Var more than once.
3245  *              It is also reasonable to treat f(x) the same as x: f() cannot
3246  *              increase the number of distinct values (unless it is volatile,
3247  *              which we consider unlikely for grouping), but it probably won't
3248  *              reduce the number of distinct values much either.
3249  *              As a special case, if a GROUP BY expression can be matched to an
3250  *              expressional index for which we have statistics, then we treat the
3251  *              whole expression as though it were just a Var.
3252  *      3.  If the list contains Vars of different relations that are known equal
3253  *              due to equivalence classes, then drop all but one of the Vars from each
3254  *              known-equal set, keeping the one with smallest estimated # of values
3255  *              (since the extra values of the others can't appear in joined rows).
3256  *              Note the reason we only consider Vars of different relations is that
3257  *              if we considered ones of the same rel, we'd be double-counting the
3258  *              restriction selectivity of the equality in the next step.
3259  *      4.  For Vars within a single source rel, we multiply together the numbers
3260  *              of values, clamp to the number of rows in the rel (divided by 10 if
3261  *              more than one Var), and then multiply by a factor based on the
3262  *              selectivity of the restriction clauses for that rel.  When there's
3263  *              more than one Var, the initial product is probably too high (it's the
3264  *              worst case) but clamping to a fraction of the rel's rows seems to be a
3265  *              helpful heuristic for not letting the estimate get out of hand.  (The
3266  *              factor of 10 is derived from pre-Postgres-7.4 practice.)  The factor
3267  *              we multiply by to adjust for the restriction selectivity assumes that
3268  *              the restriction clauses are independent of the grouping, which may not
3269  *              be a valid assumption, but it's hard to do better.
3270  *      5.  If there are Vars from multiple rels, we repeat step 4 for each such
3271  *              rel, and multiply the results together.
3272  * Note that rels not containing grouped Vars are ignored completely, as are
3273  * join clauses.  Such rels cannot increase the number of groups, and we
3274  * assume such clauses do not reduce the number either (somewhat bogus,
3275  * but we don't have the info to do better).
3276  */
3277 double
3278 estimate_num_groups(PlannerInfo *root, List *groupExprs, double input_rows,
3279                                         List **pgset)
3280 {
3281         List       *varinfos = NIL;
3282         double          numdistinct;
3283         ListCell   *l;
3284         int                     i;
3285
3286         /*
3287          * We don't ever want to return an estimate of zero groups, as that tends
3288          * to lead to division-by-zero and other unpleasantness.  The input_rows
3289          * estimate is usually already at least 1, but clamp it just in case it
3290          * isn't.
3291          */
3292         input_rows = clamp_row_est(input_rows);
3293
3294         /*
3295          * If no grouping columns, there's exactly one group.  (This can't happen
3296          * for normal cases with GROUP BY or DISTINCT, but it is possible for
3297          * corner cases with set operations.)
3298          */
3299         if (groupExprs == NIL || (pgset && list_length(*pgset) < 1))
3300                 return 1.0;
3301
3302         /*
3303          * Count groups derived from boolean grouping expressions.  For other
3304          * expressions, find the unique Vars used, treating an expression as a Var
3305          * if we can find stats for it.  For each one, record the statistical
3306          * estimate of number of distinct values (total in its table, without
3307          * regard for filtering).
3308          */
3309         numdistinct = 1.0;
3310
3311         i = 0;
3312         foreach(l, groupExprs)
3313         {
3314                 Node       *groupexpr = (Node *) lfirst(l);
3315                 VariableStatData vardata;
3316                 List       *varshere;
3317                 ListCell   *l2;
3318
3319                 /* is expression in this grouping set? */
3320                 if (pgset && !list_member_int(*pgset, i++))
3321                         continue;
3322
3323                 /* Short-circuit for expressions returning boolean */
3324                 if (exprType(groupexpr) == BOOLOID)
3325                 {
3326                         numdistinct *= 2.0;
3327                         continue;
3328                 }
3329
3330                 /*
3331                  * If examine_variable is able to deduce anything about the GROUP BY
3332                  * expression, treat it as a single variable even if it's really more
3333                  * complicated.
3334                  */
3335                 examine_variable(root, groupexpr, 0, &vardata);
3336                 if (HeapTupleIsValid(vardata.statsTuple) || vardata.isunique)
3337                 {
3338                         varinfos = add_unique_group_var(root, varinfos,
3339                                                                                         groupexpr, &vardata);
3340                         ReleaseVariableStats(vardata);
3341                         continue;
3342                 }
3343                 ReleaseVariableStats(vardata);
3344
3345                 /*
3346                  * Else pull out the component Vars.  Handle PlaceHolderVars by
3347                  * recursing into their arguments (effectively assuming that the
3348                  * PlaceHolderVar doesn't change the number of groups, which boils
3349                  * down to ignoring the possible addition of nulls to the result set).
3350                  */
3351                 varshere = pull_var_clause(groupexpr,
3352                                                                    PVC_RECURSE_AGGREGATES |
3353                                                                    PVC_RECURSE_WINDOWFUNCS |
3354                                                                    PVC_RECURSE_PLACEHOLDERS);
3355
3356                 /*
3357                  * If we find any variable-free GROUP BY item, then either it is a
3358                  * constant (and we can ignore it) or it contains a volatile function;
3359                  * in the latter case we punt and assume that each input row will
3360                  * yield a distinct group.
3361                  */
3362                 if (varshere == NIL)
3363                 {
3364                         if (contain_volatile_functions(groupexpr))
3365                                 return input_rows;
3366                         continue;
3367                 }
3368
3369                 /*
3370                  * Else add variables to varinfos list
3371                  */
3372                 foreach(l2, varshere)
3373                 {
3374                         Node       *var = (Node *) lfirst(l2);
3375
3376                         examine_variable(root, var, 0, &vardata);
3377                         varinfos = add_unique_group_var(root, varinfos, var, &vardata);
3378                         ReleaseVariableStats(vardata);
3379                 }
3380         }
3381
3382         /*
3383          * If now no Vars, we must have an all-constant or all-boolean GROUP BY
3384          * list.
3385          */
3386         if (varinfos == NIL)
3387         {
3388                 /* Guard against out-of-range answers */
3389                 if (numdistinct > input_rows)
3390                         numdistinct = input_rows;
3391                 return numdistinct;
3392         }
3393
3394         /*
3395          * Group Vars by relation and estimate total numdistinct.
3396          *
3397          * For each iteration of the outer loop, we process the frontmost Var in
3398          * varinfos, plus all other Vars in the same relation.  We remove these
3399          * Vars from the newvarinfos list for the next iteration. This is the
3400          * easiest way to group Vars of same rel together.
3401          */
3402         do
3403         {
3404                 GroupVarInfo *varinfo1 = (GroupVarInfo *) linitial(varinfos);
3405                 RelOptInfo *rel = varinfo1->rel;
3406                 double          reldistinct = 1;
3407                 double          relmaxndistinct = reldistinct;
3408                 int                     relvarcount = 0;
3409                 List       *newvarinfos = NIL;
3410                 List       *relvarinfos = NIL;
3411
3412                 /*
3413                  * Split the list of varinfos in two - one for the current rel,
3414                  * one for remaining Vars on other rels.
3415                  */
3416                 relvarinfos = lcons(varinfo1, relvarinfos);
3417                 for_each_cell(l, lnext(list_head(varinfos)))
3418                 {
3419                         GroupVarInfo *varinfo2 = (GroupVarInfo *) lfirst(l);
3420
3421                         if (varinfo2->rel == varinfo1->rel)
3422                         {
3423                                 /* varinfos on current rel */
3424                                 relvarinfos = lcons(varinfo2, relvarinfos);
3425                         }
3426                         else
3427                         {
3428                                 /* not time to process varinfo2 yet */
3429                                 newvarinfos = lcons(varinfo2, newvarinfos);
3430                         }
3431                 }
3432
3433                 /*
3434                  * Get the numdistinct estimate for the Vars of this rel.  We
3435                  * iteratively search for multivariate n-distinct with maximum number
3436                  * of vars; assuming that each var group is independent of the others,
3437                  * we multiply them together.  Any remaining relvarinfos after
3438                  * no more multivariate matches are found are assumed independent too,
3439                  * so their individual ndistinct estimates are multiplied also.
3440                  *
3441                  * While iterating, count how many separate numdistinct values we
3442                  * apply.  We apply a fudge factor below, but only if we multiplied
3443                  * more than one such values.
3444                  */
3445                 while (relvarinfos)
3446                 {
3447                         double          mvndistinct;
3448
3449                         if (estimate_multivariate_ndistinct(root, rel, &relvarinfos,
3450                                                                                                 &mvndistinct))
3451                         {
3452                                 reldistinct *= mvndistinct;
3453                                 if (relmaxndistinct < mvndistinct)
3454                                         relmaxndistinct = mvndistinct;
3455                                 relvarcount++;
3456                         }
3457                         else
3458                         {
3459                                 foreach (l, relvarinfos)
3460                                 {
3461                                         GroupVarInfo *varinfo2 = (GroupVarInfo *) lfirst(l);
3462
3463                                         reldistinct *= varinfo2->ndistinct;
3464                                         if (relmaxndistinct < varinfo2->ndistinct)
3465                                                 relmaxndistinct = varinfo2->ndistinct;
3466                                         relvarcount++;
3467                                 }
3468
3469                                 /* we're done with this relation */
3470                                 relvarinfos = NIL;
3471                         }
3472                 }
3473
3474                 /*
3475                  * Sanity check --- don't divide by zero if empty relation.
3476                  */
3477                 Assert(IS_SIMPLE_REL(rel));
3478                 if (rel->tuples > 0)
3479                 {
3480                         /*
3481                          * Clamp to size of rel, or size of rel / 10 if multiple Vars. The
3482                          * fudge factor is because the Vars are probably correlated but we
3483                          * don't know by how much.  We should never clamp to less than the
3484                          * largest ndistinct value for any of the Vars, though, since
3485                          * there will surely be at least that many groups.
3486                          */
3487                         double          clamp = rel->tuples;
3488
3489                         if (relvarcount > 1)
3490                         {
3491                                 clamp *= 0.1;
3492                                 if (clamp < relmaxndistinct)
3493                                 {
3494                                         clamp = relmaxndistinct;
3495                                         /* for sanity in case some ndistinct is too large: */
3496                                         if (clamp > rel->tuples)
3497                                                 clamp = rel->tuples;
3498                                 }
3499                         }
3500                         if (reldistinct > clamp)
3501                                 reldistinct = clamp;
3502
3503                         /*
3504                          * Update the estimate based on the restriction selectivity,
3505                          * guarding against division by zero when reldistinct is zero.
3506                          * Also skip this if we know that we are returning all rows.
3507                          */
3508                         if (reldistinct > 0 && rel->rows < rel->tuples)
3509                         {
3510                                 /*
3511                                  * Given a table containing N rows with n distinct values in a
3512                                  * uniform distribution, if we select p rows at random then
3513                                  * the expected number of distinct values selected is
3514                                  *
3515                                  * n * (1 - product((N-N/n-i)/(N-i), i=0..p-1))
3516                                  *
3517                                  * = n * (1 - (N-N/n)! / (N-N/n-p)! * (N-p)! / N!)
3518                                  *
3519                                  * See "Approximating block accesses in database
3520                                  * organizations", S. B. Yao, Communications of the ACM,
3521                                  * Volume 20 Issue 4, April 1977 Pages 260-261.
3522                                  *
3523                                  * Alternatively, re-arranging the terms from the factorials,
3524                                  * this may be written as
3525                                  *
3526                                  * n * (1 - product((N-p-i)/(N-i), i=0..N/n-1))
3527                                  *
3528                                  * This form of the formula is more efficient to compute in
3529                                  * the common case where p is larger than N/n.  Additionally,
3530                                  * as pointed out by Dell'Era, if i << N for all terms in the
3531                                  * product, it can be approximated by
3532                                  *
3533                                  * n * (1 - ((N-p)/N)^(N/n))
3534                                  *
3535                                  * See "Expected distinct values when selecting from a bag
3536                                  * without replacement", Alberto Dell'Era,
3537                                  * http://www.adellera.it/investigations/distinct_balls/.
3538                                  *
3539                                  * The condition i << N is equivalent to n >> 1, so this is a
3540                                  * good approximation when the number of distinct values in
3541                                  * the table is large.  It turns out that this formula also
3542                                  * works well even when n is small.
3543                                  */
3544                                 reldistinct *=
3545                                         (1 - pow((rel->tuples - rel->rows) / rel->tuples,
3546                                                          rel->tuples / reldistinct));
3547                         }
3548                         reldistinct = clamp_row_est(reldistinct);
3549
3550                         /*
3551                          * Update estimate of total distinct groups.
3552                          */
3553                         numdistinct *= reldistinct;
3554                 }
3555
3556                 varinfos = newvarinfos;
3557         } while (varinfos != NIL);
3558
3559         numdistinct = ceil(numdistinct);
3560
3561         /* Guard against out-of-range answers */
3562         if (numdistinct > input_rows)
3563                 numdistinct = input_rows;
3564         if (numdistinct < 1.0)
3565                 numdistinct = 1.0;
3566
3567         return numdistinct;
3568 }
3569
3570 /*
3571  * Estimate hash bucketsize fraction (ie, number of entries in a bucket
3572  * divided by total tuples in relation) if the specified expression is used
3573  * as a hash key.
3574  *
3575  * XXX This is really pretty bogus since we're effectively assuming that the
3576  * distribution of hash keys will be the same after applying restriction
3577  * clauses as it was in the underlying relation.  However, we are not nearly
3578  * smart enough to figure out how the restrict clauses might change the
3579  * distribution, so this will have to do for now.
3580  *
3581  * We are passed the number of buckets the executor will use for the given
3582  * input relation.  If the data were perfectly distributed, with the same
3583  * number of tuples going into each available bucket, then the bucketsize
3584  * fraction would be 1/nbuckets.  But this happy state of affairs will occur
3585  * only if (a) there are at least nbuckets distinct data values, and (b)
3586  * we have a not-too-skewed data distribution.  Otherwise the buckets will
3587  * be nonuniformly occupied.  If the other relation in the join has a key
3588  * distribution similar to this one's, then the most-loaded buckets are
3589  * exactly those that will be probed most often.  Therefore, the "average"
3590  * bucket size for costing purposes should really be taken as something close
3591  * to the "worst case" bucket size.  We try to estimate this by adjusting the
3592  * fraction if there are too few distinct data values, and then scaling up
3593  * by the ratio of the most common value's frequency to the average frequency.
3594  *
3595  * If no statistics are available, use a default estimate of 0.1.  This will
3596  * discourage use of a hash rather strongly if the inner relation is large,
3597  * which is what we want.  We do not want to hash unless we know that the
3598  * inner rel is well-dispersed (or the alternatives seem much worse).
3599  */
3600 Selectivity
3601 estimate_hash_bucketsize(PlannerInfo *root, Node *hashkey, double nbuckets)
3602 {
3603         VariableStatData vardata;
3604         double          estfract,
3605                                 ndistinct,
3606                                 stanullfrac,
3607                                 mcvfreq,
3608                                 avgfreq;
3609         bool            isdefault;
3610         float4     *numbers;
3611         int                     nnumbers;
3612
3613         examine_variable(root, hashkey, 0, &vardata);
3614
3615         /* Get number of distinct values */
3616         ndistinct = get_variable_numdistinct(&vardata, &isdefault);
3617
3618         /* If ndistinct isn't real, punt and return 0.1, per comments above */
3619         if (isdefault)
3620         {
3621                 ReleaseVariableStats(vardata);
3622                 return (Selectivity) 0.1;
3623         }
3624
3625         /* Get fraction that are null */
3626         if (HeapTupleIsValid(vardata.statsTuple))
3627         {
3628                 Form_pg_statistic stats;
3629
3630                 stats = (Form_pg_statistic) GETSTRUCT(vardata.statsTuple);
3631                 stanullfrac = stats->stanullfrac;
3632         }
3633         else
3634                 stanullfrac = 0.0;
3635
3636         /* Compute avg freq of all distinct data values in raw relation */
3637         avgfreq = (1.0 - stanullfrac) / ndistinct;
3638
3639         /*
3640          * Adjust ndistinct to account for restriction clauses.  Observe we are
3641          * assuming that the data distribution is affected uniformly by the
3642          * restriction clauses!
3643          *
3644          * XXX Possibly better way, but much more expensive: multiply by
3645          * selectivity of rel's restriction clauses that mention the target Var.
3646          */
3647         if (vardata.rel && vardata.rel->tuples > 0)
3648         {
3649                 ndistinct *= vardata.rel->rows / vardata.rel->tuples;
3650                 ndistinct = clamp_row_est(ndistinct);
3651         }
3652
3653         /*
3654          * Initial estimate of bucketsize fraction is 1/nbuckets as long as the
3655          * number of buckets is less than the expected number of distinct values;
3656          * otherwise it is 1/ndistinct.
3657          */
3658         if (ndistinct > nbuckets)
3659                 estfract = 1.0 / nbuckets;
3660         else
3661                 estfract = 1.0 / ndistinct;
3662
3663         /*
3664          * Look up the frequency of the most common value, if available.
3665          */
3666         mcvfreq = 0.0;
3667
3668         if (HeapTupleIsValid(vardata.statsTuple))
3669         {
3670                 if (get_attstatsslot(vardata.statsTuple,
3671                                                          vardata.atttype, vardata.atttypmod,
3672                                                          STATISTIC_KIND_MCV, InvalidOid,
3673                                                          NULL,
3674                                                          NULL, NULL,
3675                                                          &numbers, &nnumbers))
3676                 {
3677                         /*
3678                          * The first MCV stat is for the most common value.
3679                          */
3680                         if (nnumbers > 0)
3681                                 mcvfreq = numbers[0];
3682                         free_attstatsslot(vardata.atttype, NULL, 0,
3683                                                           numbers, nnumbers);
3684                 }
3685         }
3686
3687         /*
3688          * Adjust estimated bucketsize upward to account for skewed distribution.
3689          */
3690         if (avgfreq > 0.0 && mcvfreq > avgfreq)
3691                 estfract *= mcvfreq / avgfreq;
3692
3693         /*
3694          * Clamp bucketsize to sane range (the above adjustment could easily
3695          * produce an out-of-range result).  We set the lower bound a little above
3696          * zero, since zero isn't a very sane result.
3697          */
3698         if (estfract < 1.0e-6)
3699                 estfract = 1.0e-6;
3700         else if (estfract > 1.0)
3701                 estfract = 1.0;
3702
3703         ReleaseVariableStats(vardata);
3704
3705         return (Selectivity) estfract;
3706 }
3707
3708
3709 /*-------------------------------------------------------------------------
3710  *
3711  * Support routines
3712  *
3713  *-------------------------------------------------------------------------
3714  */
3715
3716 /*
3717  * Find applicable ndistinct statistics for the given list of VarInfos (which
3718  * must all belong to the given rel), and update *ndistinct to the estimate of
3719  * the MVNDistinctItem that best matches.  If a match it found, *varinfos is
3720  * updated to remove the list of matched varinfos.
3721  *
3722  * Varinfos that aren't for simple Vars are ignored.
3723  *
3724  * Return TRUE if we're able to find a match, FALSE otherwise.
3725  */
3726 static bool
3727 estimate_multivariate_ndistinct(PlannerInfo *root, RelOptInfo *rel,
3728                                                                 List **varinfos, double *ndistinct)
3729 {
3730         ListCell   *lc;
3731         Bitmapset  *attnums = NULL;
3732         int                     nmatches;
3733         Oid                     statOid = InvalidOid;
3734         MVNDistinct *stats;
3735         Bitmapset  *matched = NULL;
3736
3737         /* bail out immediately if the table has no extended statistics */
3738         if (!rel->statlist)
3739                 return false;
3740
3741         /* Determine the attnums we're looking for */
3742         foreach(lc, *varinfos)
3743         {
3744                 GroupVarInfo *varinfo = (GroupVarInfo *) lfirst(lc);
3745
3746                 Assert(varinfo->rel == rel);
3747
3748                 if (IsA(varinfo->var, Var))
3749                 {
3750                         attnums = bms_add_member(attnums,
3751                                                                          ((Var *) varinfo->var)->varattno);
3752                 }
3753         }
3754
3755         /* look for the ndistinct statistics matching the most vars */
3756         nmatches = 1; /* we require at least two matches */
3757         foreach(lc, rel->statlist)
3758         {
3759                 StatisticExtInfo *info = (StatisticExtInfo *) lfirst(lc);
3760                 Bitmapset  *shared;
3761
3762                 /* skip statistics of other kinds */
3763                 if (info->kind != STATS_EXT_NDISTINCT)
3764                         continue;
3765
3766                 /* compute attnums shared by the vars and the statistic */
3767                 shared = bms_intersect(info->keys, attnums);
3768
3769                 /*
3770                  * Does this statistics matches more columns than the currently
3771                  * best statistic?  If so, use this one instead.
3772                  *
3773                  * XXX This should break ties using name of the statistic, or
3774                  * something like that, to make the outcome stable.
3775                  */
3776                 if (bms_num_members(shared) > nmatches)
3777                 {
3778                         statOid = info->statOid;
3779                         nmatches = bms_num_members(shared);
3780                         matched = shared;
3781                 }
3782         }
3783
3784         /* No match? */
3785         if (statOid == InvalidOid)
3786                 return false;
3787         Assert(nmatches > 1 && matched != NULL);
3788
3789         stats = statext_ndistinct_load(statOid);
3790
3791         /*
3792          * If we have a match, search it for the specific item that matches (there
3793          * must be one), and construct the output values.
3794          */
3795         if (stats)
3796         {
3797                 int             i;
3798                 List   *newlist = NIL;
3799                 MVNDistinctItem *item = NULL;
3800
3801                 /* Find the specific item that exactly matches the combination */
3802                 for (i = 0; i < stats->nitems; i++)
3803                 {
3804                         MVNDistinctItem *tmpitem = &stats->items[i];
3805
3806                         if (bms_subset_compare(tmpitem->attrs, matched) == BMS_EQUAL)
3807                         {
3808                                 item = tmpitem;
3809                                 break;
3810                         }
3811                 }
3812
3813                 /* make sure we found an item */
3814                 if (!item)
3815                         elog(ERROR, "corrupt MVNDistinct entry");
3816
3817                 /* Form the output varinfo list, keeping only unmatched ones */
3818                 foreach(lc, *varinfos)
3819                 {
3820                         GroupVarInfo *varinfo = (GroupVarInfo *) lfirst(lc);
3821                         AttrNumber      attnum;
3822
3823                         if (!IsA(varinfo->var, Var))
3824                         {
3825                                 newlist = lappend(newlist, varinfo);
3826                                 continue;
3827                         }
3828
3829                         attnum = ((Var *) varinfo->var)->varattno;
3830                         if (!bms_is_member(attnum, matched))
3831                                 newlist = lappend(newlist, varinfo);
3832                 }
3833
3834                 *varinfos = newlist;
3835                 *ndistinct = item->ndistinct;
3836                 return true;
3837         }
3838
3839         return false;
3840 }
3841
3842 /*
3843  * convert_to_scalar
3844  *        Convert non-NULL values of the indicated types to the comparison
3845  *        scale needed by scalarineqsel().
3846  *        Returns "true" if successful.
3847  *
3848  * XXX this routine is a hack: ideally we should look up the conversion
3849  * subroutines in pg_type.
3850  *
3851  * All numeric datatypes are simply converted to their equivalent
3852  * "double" values.  (NUMERIC values that are outside the range of "double"
3853  * are clamped to +/- HUGE_VAL.)
3854  *
3855  * String datatypes are converted by convert_string_to_scalar(),
3856  * which is explained below.  The reason why this routine deals with
3857  * three values at a time, not just one, is that we need it for strings.
3858  *
3859  * The bytea datatype is just enough different from strings that it has
3860  * to be treated separately.
3861  *
3862  * The several datatypes representing absolute times are all converted
3863  * to Timestamp, which is actually a double, and then we just use that
3864  * double value.  Note this will give correct results even for the "special"
3865  * values of Timestamp, since those are chosen to compare correctly;
3866  * see timestamp_cmp.
3867  *
3868  * The several datatypes representing relative times (intervals) are all
3869  * converted to measurements expressed in seconds.
3870  */
3871 static bool
3872 convert_to_scalar(Datum value, Oid valuetypid, double *scaledvalue,
3873                                   Datum lobound, Datum hibound, Oid boundstypid,
3874                                   double *scaledlobound, double *scaledhibound)
3875 {
3876         /*
3877          * Both the valuetypid and the boundstypid should exactly match the
3878          * declared input type(s) of the operator we are invoked for, so we just
3879          * error out if either is not recognized.
3880          *
3881          * XXX The histogram we are interpolating between points of could belong
3882          * to a column that's only binary-compatible with the declared type. In
3883          * essence we are assuming that the semantics of binary-compatible types
3884          * are enough alike that we can use a histogram generated with one type's
3885          * operators to estimate selectivity for the other's.  This is outright
3886          * wrong in some cases --- in particular signed versus unsigned
3887          * interpretation could trip us up.  But it's useful enough in the
3888          * majority of cases that we do it anyway.  Should think about more
3889          * rigorous ways to do it.
3890          */
3891         switch (valuetypid)
3892         {
3893                         /*
3894                          * Built-in numeric types
3895                          */
3896                 case BOOLOID:
3897                 case INT2OID:
3898                 case INT4OID:
3899                 case INT8OID:
3900                 case FLOAT4OID:
3901                 case FLOAT8OID:
3902                 case NUMERICOID:
3903                 case OIDOID:
3904                 case REGPROCOID:
3905                 case REGPROCEDUREOID:
3906                 case REGOPEROID:
3907                 case REGOPERATOROID:
3908                 case REGCLASSOID:
3909                 case REGTYPEOID:
3910                 case REGCONFIGOID:
3911                 case REGDICTIONARYOID:
3912                 case REGROLEOID:
3913                 case REGNAMESPACEOID:
3914                         *scaledvalue = convert_numeric_to_scalar(value, valuetypid);
3915                         *scaledlobound = convert_numeric_to_scalar(lobound, boundstypid);
3916                         *scaledhibound = convert_numeric_to_scalar(hibound, boundstypid);
3917                         return true;
3918
3919                         /*
3920                          * Built-in string types
3921                          */
3922                 case CHAROID:
3923                 case BPCHAROID:
3924                 case VARCHAROID:
3925                 case TEXTOID:
3926                 case NAMEOID:
3927                         {
3928                                 char       *valstr = convert_string_datum(value, valuetypid);
3929                                 char       *lostr = convert_string_datum(lobound, boundstypid);
3930                                 char       *histr = convert_string_datum(hibound, boundstypid);
3931
3932                                 convert_string_to_scalar(valstr, scaledvalue,
3933                                                                                  lostr, scaledlobound,
3934                                                                                  histr, scaledhibound);
3935                                 pfree(valstr);
3936                                 pfree(lostr);
3937                                 pfree(histr);
3938                                 return true;
3939                         }
3940
3941                         /*
3942                          * Built-in bytea type
3943                          */
3944                 case BYTEAOID:
3945                         {
3946                                 convert_bytea_to_scalar(value, scaledvalue,
3947                                                                                 lobound, scaledlobound,
3948                                                                                 hibound, scaledhibound);
3949                                 return true;
3950                         }
3951
3952                         /*
3953                          * Built-in time types
3954                          */
3955                 case TIMESTAMPOID:
3956                 case TIMESTAMPTZOID:
3957                 case ABSTIMEOID:
3958                 case DATEOID:
3959                 case INTERVALOID:
3960                 case RELTIMEOID:
3961                 case TINTERVALOID:
3962                 case TIMEOID:
3963                 case TIMETZOID:
3964                         *scaledvalue = convert_timevalue_to_scalar(value, valuetypid);
3965                         *scaledlobound = convert_timevalue_to_scalar(lobound, boundstypid);
3966                         *scaledhibound = convert_timevalue_to_scalar(hibound, boundstypid);
3967                         return true;
3968
3969                         /*
3970                          * Built-in network types
3971                          */
3972                 case INETOID:
3973                 case CIDROID:
3974                 case MACADDROID:
3975                 case MACADDR8OID:
3976                         *scaledvalue = convert_network_to_scalar(value, valuetypid);
3977                         *scaledlobound = convert_network_to_scalar(lobound, boundstypid);
3978                         *scaledhibound = convert_network_to_scalar(hibound, boundstypid);
3979                         return true;
3980         }
3981         /* Don't know how to convert */
3982         *scaledvalue = *scaledlobound = *scaledhibound = 0;
3983         return false;
3984 }
3985
3986 /*
3987  * Do convert_to_scalar()'s work for any numeric data type.
3988  */
3989 static double
3990 convert_numeric_to_scalar(Datum value, Oid typid)
3991 {
3992         switch (typid)
3993         {
3994                 case BOOLOID:
3995                         return (double) DatumGetBool(value);
3996                 case INT2OID:
3997                         return (double) DatumGetInt16(value);
3998                 case INT4OID:
3999                         return (double) DatumGetInt32(value);
4000                 case INT8OID:
4001                         return (double) DatumGetInt64(value);
4002                 case FLOAT4OID:
4003                         return (double) DatumGetFloat4(value);
4004                 case FLOAT8OID:
4005                         return (double) DatumGetFloat8(value);
4006                 case NUMERICOID:
4007                         /* Note: out-of-range values will be clamped to +-HUGE_VAL */
4008                         return (double)
4009                                 DatumGetFloat8(DirectFunctionCall1(numeric_float8_no_overflow,
4010                                                                                                    value));
4011                 case OIDOID:
4012                 case REGPROCOID:
4013                 case REGPROCEDUREOID:
4014                 case REGOPEROID:
4015                 case REGOPERATOROID:
4016                 case REGCLASSOID:
4017                 case REGTYPEOID:
4018                 case REGCONFIGOID:
4019                 case REGDICTIONARYOID:
4020                 case REGROLEOID:
4021                 case REGNAMESPACEOID:
4022                         /* we can treat OIDs as integers... */
4023                         return (double) DatumGetObjectId(value);
4024         }
4025
4026         /*
4027          * Can't get here unless someone tries to use scalarltsel/scalargtsel on
4028          * an operator with one numeric and one non-numeric operand.
4029          */
4030         elog(ERROR, "unsupported type: %u", typid);
4031         return 0;
4032 }
4033
4034 /*
4035  * Do convert_to_scalar()'s work for any character-string data type.
4036  *
4037  * String datatypes are converted to a scale that ranges from 0 to 1,
4038  * where we visualize the bytes of the string as fractional digits.
4039  *
4040  * We do not want the base to be 256, however, since that tends to
4041  * generate inflated selectivity estimates; few databases will have
4042  * occurrences of all 256 possible byte values at each position.
4043  * Instead, use the smallest and largest byte values seen in the bounds
4044  * as the estimated range for each byte, after some fudging to deal with
4045  * the fact that we probably aren't going to see the full range that way.
4046  *
4047  * An additional refinement is that we discard any common prefix of the
4048  * three strings before computing the scaled values.  This allows us to
4049  * "zoom in" when we encounter a narrow data range.  An example is a phone
4050  * number database where all the values begin with the same area code.
4051  * (Actually, the bounds will be adjacent histogram-bin-boundary values,
4052  * so this is more likely to happen than you might think.)
4053  */
4054 static void
4055 convert_string_to_scalar(char *value,
4056                                                  double *scaledvalue,
4057                                                  char *lobound,
4058                                                  double *scaledlobound,
4059                                                  char *hibound,
4060                                                  double *scaledhibound)
4061 {
4062         int                     rangelo,
4063                                 rangehi;
4064         char       *sptr;
4065
4066         rangelo = rangehi = (unsigned char) hibound[0];
4067         for (sptr = lobound; *sptr; sptr++)
4068         {
4069                 if (rangelo > (unsigned char) *sptr)
4070                         rangelo = (unsigned char) *sptr;
4071                 if (rangehi < (unsigned char) *sptr)
4072                         rangehi = (unsigned char) *sptr;
4073         }
4074         for (sptr = hibound; *sptr; sptr++)
4075         {
4076                 if (rangelo > (unsigned char) *sptr)
4077                         rangelo = (unsigned char) *sptr;
4078                 if (rangehi < (unsigned char) *sptr)
4079                         rangehi = (unsigned char) *sptr;
4080         }
4081         /* If range includes any upper-case ASCII chars, make it include all */
4082         if (rangelo <= 'Z' && rangehi >= 'A')
4083         {
4084                 if (rangelo > 'A')
4085                         rangelo = 'A';
4086                 if (rangehi < 'Z')
4087                         rangehi = 'Z';
4088         }
4089         /* Ditto lower-case */
4090         if (rangelo <= 'z' && rangehi >= 'a')
4091         {
4092                 if (rangelo > 'a')
4093                         rangelo = 'a';
4094                 if (rangehi < 'z')
4095                         rangehi = 'z';
4096         }
4097         /* Ditto digits */
4098         if (rangelo <= '9' && rangehi >= '0')
4099         {
4100                 if (rangelo > '0')
4101                         rangelo = '0';
4102                 if (rangehi < '9')
4103                         rangehi = '9';
4104         }
4105
4106         /*
4107          * If range includes less than 10 chars, assume we have not got enough
4108          * data, and make it include regular ASCII set.
4109          */
4110         if (rangehi - rangelo < 9)
4111         {
4112                 rangelo = ' ';
4113                 rangehi = 127;
4114         }
4115
4116         /*
4117          * Now strip any common prefix of the three strings.
4118          */
4119         while (*lobound)
4120         {
4121                 if (*lobound != *hibound || *lobound != *value)
4122                         break;
4123                 lobound++, hibound++, value++;
4124         }
4125
4126         /*
4127          * Now we can do the conversions.
4128          */
4129         *scaledvalue = convert_one_string_to_scalar(value, rangelo, rangehi);
4130         *scaledlobound = convert_one_string_to_scalar(lobound, rangelo, rangehi);
4131         *scaledhibound = convert_one_string_to_scalar(hibound, rangelo, rangehi);
4132 }
4133
4134 static double
4135 convert_one_string_to_scalar(char *value, int rangelo, int rangehi)
4136 {
4137         int                     slen = strlen(value);
4138         double          num,
4139                                 denom,
4140                                 base;
4141
4142         if (slen <= 0)
4143                 return 0.0;                             /* empty string has scalar value 0 */
4144
4145         /*
4146          * There seems little point in considering more than a dozen bytes from
4147          * the string.  Since base is at least 10, that will give us nominal
4148          * resolution of at least 12 decimal digits, which is surely far more
4149          * precision than this estimation technique has got anyway (especially in
4150          * non-C locales).  Also, even with the maximum possible base of 256, this
4151          * ensures denom cannot grow larger than 256^13 = 2.03e31, which will not
4152          * overflow on any known machine.
4153          */
4154         if (slen > 12)
4155                 slen = 12;
4156
4157         /* Convert initial characters to fraction */
4158         base = rangehi - rangelo + 1;
4159         num = 0.0;
4160         denom = base;
4161         while (slen-- > 0)
4162         {
4163                 int                     ch = (unsigned char) *value++;
4164
4165                 if (ch < rangelo)
4166                         ch = rangelo - 1;
4167                 else if (ch > rangehi)
4168                         ch = rangehi + 1;
4169                 num += ((double) (ch - rangelo)) / denom;
4170                 denom *= base;
4171         }
4172
4173         return num;
4174 }
4175
4176 /*
4177  * Convert a string-type Datum into a palloc'd, null-terminated string.
4178  *
4179  * When using a non-C locale, we must pass the string through strxfrm()
4180  * before continuing, so as to generate correct locale-specific results.
4181  */
4182 static char *
4183 convert_string_datum(Datum value, Oid typid)
4184 {
4185         char       *val;
4186
4187         switch (typid)
4188         {
4189                 case CHAROID:
4190                         val = (char *) palloc(2);
4191                         val[0] = DatumGetChar(value);
4192                         val[1] = '\0';
4193                         break;
4194                 case BPCHAROID:
4195                 case VARCHAROID:
4196                 case TEXTOID:
4197                         val = TextDatumGetCString(value);
4198                         break;
4199                 case NAMEOID:
4200                         {
4201                                 NameData   *nm = (NameData *) DatumGetPointer(value);
4202
4203                                 val = pstrdup(NameStr(*nm));
4204                                 break;
4205                         }
4206                 default:
4207
4208                         /*
4209                          * Can't get here unless someone tries to use scalarltsel on an
4210                          * operator with one string and one non-string operand.
4211                          */
4212                         elog(ERROR, "unsupported type: %u", typid);
4213                         return NULL;
4214         }
4215
4216         if (!lc_collate_is_c(DEFAULT_COLLATION_OID))
4217         {
4218                 char       *xfrmstr;
4219                 size_t          xfrmlen;
4220                 size_t xfrmlen2 PG_USED_FOR_ASSERTS_ONLY;
4221
4222                 /*
4223                  * XXX: We could guess at a suitable output buffer size and only call
4224                  * strxfrm twice if our guess is too small.
4225                  *
4226                  * XXX: strxfrm doesn't support UTF-8 encoding on Win32, it can return
4227                  * bogus data or set an error. This is not really a problem unless it
4228                  * crashes since it will only give an estimation error and nothing
4229                  * fatal.
4230                  */
4231 #if _MSC_VER == 1400                    /* VS.Net 2005 */
4232
4233                 /*
4234                  *
4235                  * http://connect.microsoft.com/VisualStudio/feedback/ViewFeedback.aspx?
4236                  * FeedbackID=99694 */
4237                 {
4238                         char            x[1];
4239
4240                         xfrmlen = strxfrm(x, val, 0);
4241                 }
4242 #else
4243                 xfrmlen = strxfrm(NULL, val, 0);
4244 #endif
4245 #ifdef WIN32
4246
4247                 /*
4248                  * On Windows, strxfrm returns INT_MAX when an error occurs. Instead
4249                  * of trying to allocate this much memory (and fail), just return the
4250                  * original string unmodified as if we were in the C locale.
4251                  */
4252                 if (xfrmlen == INT_MAX)
4253                         return val;
4254 #endif
4255                 xfrmstr = (char *) palloc(xfrmlen + 1);
4256                 xfrmlen2 = strxfrm(xfrmstr, val, xfrmlen + 1);
4257
4258                 /*
4259                  * Some systems (e.g., glibc) can return a smaller value from the
4260                  * second call than the first; thus the Assert must be <= not ==.
4261                  */
4262                 Assert(xfrmlen2 <= xfrmlen);
4263                 pfree(val);
4264                 val = xfrmstr;
4265         }
4266
4267         return val;
4268 }
4269
4270 /*
4271  * Do convert_to_scalar()'s work for any bytea data type.
4272  *
4273  * Very similar to convert_string_to_scalar except we can't assume
4274  * null-termination and therefore pass explicit lengths around.
4275  *
4276  * Also, assumptions about likely "normal" ranges of characters have been
4277  * removed - a data range of 0..255 is always used, for now.  (Perhaps
4278  * someday we will add information about actual byte data range to
4279  * pg_statistic.)
4280  */
4281 static void
4282 convert_bytea_to_scalar(Datum value,
4283                                                 double *scaledvalue,
4284                                                 Datum lobound,
4285                                                 double *scaledlobound,
4286                                                 Datum hibound,
4287                                                 double *scaledhibound)
4288 {
4289         int                     rangelo,
4290                                 rangehi,
4291                                 valuelen = VARSIZE(DatumGetPointer(value)) - VARHDRSZ,
4292                                 loboundlen = VARSIZE(DatumGetPointer(lobound)) - VARHDRSZ,
4293                                 hiboundlen = VARSIZE(DatumGetPointer(hibound)) - VARHDRSZ,
4294                                 i,
4295                                 minlen;
4296         unsigned char *valstr = (unsigned char *) VARDATA(DatumGetPointer(value)),
4297                            *lostr = (unsigned char *) VARDATA(DatumGetPointer(lobound)),
4298                            *histr = (unsigned char *) VARDATA(DatumGetPointer(hibound));
4299
4300         /*
4301          * Assume bytea data is uniformly distributed across all byte values.
4302          */
4303         rangelo = 0;
4304         rangehi = 255;
4305
4306         /*
4307          * Now strip any common prefix of the three strings.
4308          */
4309         minlen = Min(Min(valuelen, loboundlen), hiboundlen);
4310         for (i = 0; i < minlen; i++)
4311         {
4312                 if (*lostr != *histr || *lostr != *valstr)
4313                         break;
4314                 lostr++, histr++, valstr++;
4315                 loboundlen--, hiboundlen--, valuelen--;
4316         }
4317
4318         /*
4319          * Now we can do the conversions.
4320          */
4321         *scaledvalue = convert_one_bytea_to_scalar(valstr, valuelen, rangelo, rangehi);
4322         *scaledlobound = convert_one_bytea_to_scalar(lostr, loboundlen, rangelo, rangehi);
4323         *scaledhibound = convert_one_bytea_to_scalar(histr, hiboundlen, rangelo, rangehi);
4324 }
4325
4326 static double
4327 convert_one_bytea_to_scalar(unsigned char *value, int valuelen,
4328                                                         int rangelo, int rangehi)
4329 {
4330         double          num,
4331                                 denom,
4332                                 base;
4333
4334         if (valuelen <= 0)
4335                 return 0.0;                             /* empty string has scalar value 0 */
4336
4337         /*
4338          * Since base is 256, need not consider more than about 10 chars (even
4339          * this many seems like overkill)
4340          */
4341         if (valuelen > 10)
4342                 valuelen = 10;
4343
4344         /* Convert initial characters to fraction */
4345         base = rangehi - rangelo + 1;
4346         num = 0.0;
4347         denom = base;
4348         while (valuelen-- > 0)
4349         {
4350                 int                     ch = *value++;
4351
4352                 if (ch < rangelo)
4353                         ch = rangelo - 1;
4354                 else if (ch > rangehi)
4355                         ch = rangehi + 1;
4356                 num += ((double) (ch - rangelo)) / denom;
4357                 denom *= base;
4358         }
4359
4360         return num;
4361 }
4362
4363 /*
4364  * Do convert_to_scalar()'s work for any timevalue data type.
4365  */
4366 static double
4367 convert_timevalue_to_scalar(Datum value, Oid typid)
4368 {
4369         switch (typid)
4370         {
4371                 case TIMESTAMPOID:
4372                         return DatumGetTimestamp(value);
4373                 case TIMESTAMPTZOID:
4374                         return DatumGetTimestampTz(value);
4375                 case ABSTIMEOID:
4376                         return DatumGetTimestamp(DirectFunctionCall1(abstime_timestamp,
4377                                                                                                                  value));
4378                 case DATEOID:
4379                         return date2timestamp_no_overflow(DatumGetDateADT(value));
4380                 case INTERVALOID:
4381                         {
4382                                 Interval   *interval = DatumGetIntervalP(value);
4383
4384                                 /*
4385                                  * Convert the month part of Interval to days using assumed
4386                                  * average month length of 365.25/12.0 days.  Not too
4387                                  * accurate, but plenty good enough for our purposes.
4388                                  */
4389                                 return interval->time + interval->day * (double) USECS_PER_DAY +
4390                                         interval->month * ((DAYS_PER_YEAR / (double) MONTHS_PER_YEAR) * USECS_PER_DAY);
4391                         }
4392                 case RELTIMEOID:
4393                         return (DatumGetRelativeTime(value) * 1000000.0);
4394                 case TINTERVALOID:
4395                         {
4396                                 TimeInterval tinterval = DatumGetTimeInterval(value);
4397
4398                                 if (tinterval->status != 0)
4399                                         return ((tinterval->data[1] - tinterval->data[0]) * 1000000.0);
4400                                 return 0;               /* for lack of a better idea */
4401                         }
4402                 case TIMEOID:
4403                         return DatumGetTimeADT(value);
4404                 case TIMETZOID:
4405                         {
4406                                 TimeTzADT  *timetz = DatumGetTimeTzADTP(value);
4407
4408                                 /* use GMT-equivalent time */
4409                                 return (double) (timetz->time + (timetz->zone * 1000000.0));
4410                         }
4411         }
4412
4413         /*
4414          * Can't get here unless someone tries to use scalarltsel/scalargtsel on
4415          * an operator with one timevalue and one non-timevalue operand.
4416          */
4417         elog(ERROR, "unsupported type: %u", typid);
4418         return 0;
4419 }
4420
4421
4422 /*
4423  * get_restriction_variable
4424  *              Examine the args of a restriction clause to see if it's of the
4425  *              form (variable op pseudoconstant) or (pseudoconstant op variable),
4426  *              where "variable" could be either a Var or an expression in vars of a
4427  *              single relation.  If so, extract information about the variable,
4428  *              and also indicate which side it was on and the other argument.
4429  *
4430  * Inputs:
4431  *      root: the planner info
4432  *      args: clause argument list
4433  *      varRelid: see specs for restriction selectivity functions
4434  *
4435  * Outputs: (these are valid only if TRUE is returned)
4436  *      *vardata: gets information about variable (see examine_variable)
4437  *      *other: gets other clause argument, aggressively reduced to a constant
4438  *      *varonleft: set TRUE if variable is on the left, FALSE if on the right
4439  *
4440  * Returns TRUE if a variable is identified, otherwise FALSE.
4441  *
4442  * Note: if there are Vars on both sides of the clause, we must fail, because
4443  * callers are expecting that the other side will act like a pseudoconstant.
4444  */
4445 bool
4446 get_restriction_variable(PlannerInfo *root, List *args, int varRelid,
4447                                                  VariableStatData *vardata, Node **other,
4448                                                  bool *varonleft)
4449 {
4450         Node       *left,
4451                            *right;
4452         VariableStatData rdata;
4453
4454         /* Fail if not a binary opclause (probably shouldn't happen) */
4455         if (list_length(args) != 2)
4456                 return false;
4457
4458         left = (Node *) linitial(args);
4459         right = (Node *) lsecond(args);
4460
4461         /*
4462          * Examine both sides.  Note that when varRelid is nonzero, Vars of other
4463          * relations will be treated as pseudoconstants.
4464          */
4465         examine_variable(root, left, varRelid, vardata);
4466         examine_variable(root, right, varRelid, &rdata);
4467
4468         /*
4469          * If one side is a variable and the other not, we win.
4470          */
4471         if (vardata->rel && rdata.rel == NULL)
4472         {
4473                 *varonleft = true;
4474                 *other = estimate_expression_value(root, rdata.var);
4475                 /* Assume we need no ReleaseVariableStats(rdata) here */
4476                 return true;
4477         }
4478
4479         if (vardata->rel == NULL && rdata.rel)
4480         {
4481                 *varonleft = false;
4482                 *other = estimate_expression_value(root, vardata->var);
4483                 /* Assume we need no ReleaseVariableStats(*vardata) here */
4484                 *vardata = rdata;
4485                 return true;
4486         }
4487
4488         /* Oops, clause has wrong structure (probably var op var) */
4489         ReleaseVariableStats(*vardata);
4490         ReleaseVariableStats(rdata);
4491
4492         return false;
4493 }
4494
4495 /*
4496  * get_join_variables
4497  *              Apply examine_variable() to each side of a join clause.
4498  *              Also, attempt to identify whether the join clause has the same
4499  *              or reversed sense compared to the SpecialJoinInfo.
4500  *
4501  * We consider the join clause "normal" if it is "lhs_var OP rhs_var",
4502  * or "reversed" if it is "rhs_var OP lhs_var".  In complicated cases
4503  * where we can't tell for sure, we default to assuming it's normal.
4504  */
4505 void
4506 get_join_variables(PlannerInfo *root, List *args, SpecialJoinInfo *sjinfo,
4507                                    VariableStatData *vardata1, VariableStatData *vardata2,
4508                                    bool *join_is_reversed)
4509 {
4510         Node       *left,
4511                            *right;
4512
4513         if (list_length(args) != 2)
4514                 elog(ERROR, "join operator should take two arguments");
4515
4516         left = (Node *) linitial(args);
4517         right = (Node *) lsecond(args);
4518
4519         examine_variable(root, left, 0, vardata1);
4520         examine_variable(root, right, 0, vardata2);
4521
4522         if (vardata1->rel &&
4523                 bms_is_subset(vardata1->rel->relids, sjinfo->syn_righthand))
4524                 *join_is_reversed = true;               /* var1 is on RHS */
4525         else if (vardata2->rel &&
4526                          bms_is_subset(vardata2->rel->relids, sjinfo->syn_lefthand))
4527                 *join_is_reversed = true;               /* var2 is on LHS */
4528         else
4529                 *join_is_reversed = false;
4530 }
4531
4532 /*
4533  * examine_variable
4534  *              Try to look up statistical data about an expression.
4535  *              Fill in a VariableStatData struct to describe the expression.
4536  *
4537  * Inputs:
4538  *      root: the planner info
4539  *      node: the expression tree to examine
4540  *      varRelid: see specs for restriction selectivity functions
4541  *
4542  * Outputs: *vardata is filled as follows:
4543  *      var: the input expression (with any binary relabeling stripped, if
4544  *              it is or contains a variable; but otherwise the type is preserved)
4545  *      rel: RelOptInfo for relation containing variable; NULL if expression
4546  *              contains no Vars (NOTE this could point to a RelOptInfo of a
4547  *              subquery, not one in the current query).
4548  *      statsTuple: the pg_statistic entry for the variable, if one exists;
4549  *              otherwise NULL.
4550  *      freefunc: pointer to a function to release statsTuple with.
4551  *      vartype: exposed type of the expression; this should always match
4552  *              the declared input type of the operator we are estimating for.
4553  *      atttype, atttypmod: type data to pass to get_attstatsslot().  This is
4554  *              commonly the same as the exposed type of the variable argument,
4555  *              but can be different in binary-compatible-type cases.
4556  *      isunique: TRUE if we were able to match the var to a unique index or a
4557  *              single-column DISTINCT clause, implying its values are unique for
4558  *              this query.  (Caution: this should be trusted for statistical
4559  *              purposes only, since we do not check indimmediate nor verify that
4560  *              the exact same definition of equality applies.)
4561  *
4562  * Caller is responsible for doing ReleaseVariableStats() before exiting.
4563  */
4564 void
4565 examine_variable(PlannerInfo *root, Node *node, int varRelid,
4566                                  VariableStatData *vardata)
4567 {
4568         Node       *basenode;
4569         Relids          varnos;
4570         RelOptInfo *onerel;
4571
4572         /* Make sure we don't return dangling pointers in vardata */
4573         MemSet(vardata, 0, sizeof(VariableStatData));
4574
4575         /* Save the exposed type of the expression */
4576         vardata->vartype = exprType(node);
4577
4578         /* Look inside any binary-compatible relabeling */
4579
4580         if (IsA(node, RelabelType))
4581                 basenode = (Node *) ((RelabelType *) node)->arg;
4582         else
4583                 basenode = node;
4584
4585         /* Fast path for a simple Var */
4586
4587         if (IsA(basenode, Var) &&
4588                 (varRelid == 0 || varRelid == ((Var *) basenode)->varno))
4589         {
4590                 Var                *var = (Var *) basenode;
4591
4592                 /* Set up result fields other than the stats tuple */
4593                 vardata->var = basenode;        /* return Var without relabeling */
4594                 vardata->rel = find_base_rel(root, var->varno);
4595                 vardata->atttype = var->vartype;
4596                 vardata->atttypmod = var->vartypmod;
4597                 vardata->isunique = has_unique_index(vardata->rel, var->varattno);
4598
4599                 /* Try to locate some stats */
4600                 examine_simple_variable(root, var, vardata);
4601
4602                 return;
4603         }
4604
4605         /*
4606          * Okay, it's a more complicated expression.  Determine variable
4607          * membership.  Note that when varRelid isn't zero, only vars of that
4608          * relation are considered "real" vars.
4609          */
4610         varnos = pull_varnos(basenode);
4611
4612         onerel = NULL;
4613
4614         switch (bms_membership(varnos))
4615         {
4616                 case BMS_EMPTY_SET:
4617                         /* No Vars at all ... must be pseudo-constant clause */
4618                         break;
4619                 case BMS_SINGLETON:
4620                         if (varRelid == 0 || bms_is_member(varRelid, varnos))
4621                         {
4622                                 onerel = find_base_rel(root,
4623                                            (varRelid ? varRelid : bms_singleton_member(varnos)));
4624                                 vardata->rel = onerel;
4625                                 node = basenode;        /* strip any relabeling */
4626                         }
4627                         /* else treat it as a constant */
4628                         break;
4629                 case BMS_MULTIPLE:
4630                         if (varRelid == 0)
4631                         {
4632                                 /* treat it as a variable of a join relation */
4633                                 vardata->rel = find_join_rel(root, varnos);
4634                                 node = basenode;        /* strip any relabeling */
4635                         }
4636                         else if (bms_is_member(varRelid, varnos))
4637                         {
4638                                 /* ignore the vars belonging to other relations */
4639                                 vardata->rel = find_base_rel(root, varRelid);
4640                                 node = basenode;        /* strip any relabeling */
4641                                 /* note: no point in expressional-index search here */
4642                         }
4643                         /* else treat it as a constant */
4644                         break;
4645         }
4646
4647         bms_free(varnos);
4648
4649         vardata->var = node;
4650         vardata->atttype = exprType(node);
4651         vardata->atttypmod = exprTypmod(node);
4652
4653         if (onerel)
4654         {
4655                 /*
4656                  * We have an expression in vars of a single relation.  Try to match
4657                  * it to expressional index columns, in hopes of finding some
4658                  * statistics.
4659                  *
4660                  * XXX it's conceivable that there are multiple matches with different
4661                  * index opfamilies; if so, we need to pick one that matches the
4662                  * operator we are estimating for.  FIXME later.
4663                  */
4664                 ListCell   *ilist;
4665
4666                 foreach(ilist, onerel->indexlist)
4667                 {
4668                         IndexOptInfo *index = (IndexOptInfo *) lfirst(ilist);
4669                         ListCell   *indexpr_item;
4670                         int                     pos;
4671
4672                         indexpr_item = list_head(index->indexprs);
4673                         if (indexpr_item == NULL)
4674                                 continue;               /* no expressions here... */
4675
4676                         for (pos = 0; pos < index->ncolumns; pos++)
4677                         {
4678                                 if (index->indexkeys[pos] == 0)
4679                                 {
4680                                         Node       *indexkey;
4681
4682                                         if (indexpr_item == NULL)
4683                                                 elog(ERROR, "too few entries in indexprs list");
4684                                         indexkey = (Node *) lfirst(indexpr_item);
4685                                         if (indexkey && IsA(indexkey, RelabelType))
4686                                                 indexkey = (Node *) ((RelabelType *) indexkey)->arg;
4687                                         if (equal(node, indexkey))
4688                                         {
4689                                                 /*
4690                                                  * Found a match ... is it a unique index? Tests here
4691                                                  * should match has_unique_index().
4692                                                  */
4693                                                 if (index->unique &&
4694                                                         index->ncolumns == 1 &&
4695                                                         (index->indpred == NIL || index->predOK))
4696                                                         vardata->isunique = true;
4697
4698                                                 /*
4699                                                  * Has it got stats?  We only consider stats for
4700                                                  * non-partial indexes, since partial indexes probably
4701                                                  * don't reflect whole-relation statistics; the above
4702                                                  * check for uniqueness is the only info we take from
4703                                                  * a partial index.
4704                                                  *
4705                                                  * An index stats hook, however, must make its own
4706                                                  * decisions about what to do with partial indexes.
4707                                                  */
4708                                                 if (get_index_stats_hook &&
4709                                                         (*get_index_stats_hook) (root, index->indexoid,
4710                                                                                                          pos + 1, vardata))
4711                                                 {
4712                                                         /*
4713                                                          * The hook took control of acquiring a stats
4714                                                          * tuple.  If it did supply a tuple, it'd better
4715                                                          * have supplied a freefunc.
4716                                                          */
4717                                                         if (HeapTupleIsValid(vardata->statsTuple) &&
4718                                                                 !vardata->freefunc)
4719                                                                 elog(ERROR, "no function provided to release variable stats with");
4720                                                 }
4721                                                 else if (index->indpred == NIL)
4722                                                 {
4723                                                         vardata->statsTuple =
4724                                                                 SearchSysCache3(STATRELATTINH,
4725                                                                                    ObjectIdGetDatum(index->indexoid),
4726                                                                                                 Int16GetDatum(pos + 1),
4727                                                                                                 BoolGetDatum(false));
4728                                                         vardata->freefunc = ReleaseSysCache;
4729                                                 }
4730                                                 if (vardata->statsTuple)
4731                                                         break;
4732                                         }
4733                                         indexpr_item = lnext(indexpr_item);
4734                                 }
4735                         }
4736                         if (vardata->statsTuple)
4737                                 break;
4738                 }
4739         }
4740 }
4741
4742 /*
4743  * examine_simple_variable
4744  *              Handle a simple Var for examine_variable
4745  *
4746  * This is split out as a subroutine so that we can recurse to deal with
4747  * Vars referencing subqueries.
4748  *
4749  * We already filled in all the fields of *vardata except for the stats tuple.
4750  */
4751 static void
4752 examine_simple_variable(PlannerInfo *root, Var *var,
4753                                                 VariableStatData *vardata)
4754 {
4755         RangeTblEntry *rte = root->simple_rte_array[var->varno];
4756
4757         Assert(IsA(rte, RangeTblEntry));
4758
4759         if (get_relation_stats_hook &&
4760                 (*get_relation_stats_hook) (root, rte, var->varattno, vardata))
4761         {
4762                 /*
4763                  * The hook took control of acquiring a stats tuple.  If it did supply
4764                  * a tuple, it'd better have supplied a freefunc.
4765                  */
4766                 if (HeapTupleIsValid(vardata->statsTuple) &&
4767                         !vardata->freefunc)
4768                         elog(ERROR, "no function provided to release variable stats with");
4769         }
4770         else if (rte->rtekind == RTE_RELATION)
4771         {
4772                 /*
4773                  * Plain table or parent of an inheritance appendrel, so look up the
4774                  * column in pg_statistic
4775                  */
4776                 vardata->statsTuple = SearchSysCache3(STATRELATTINH,
4777                                                                                           ObjectIdGetDatum(rte->relid),
4778                                                                                           Int16GetDatum(var->varattno),
4779                                                                                           BoolGetDatum(rte->inh));
4780                 vardata->freefunc = ReleaseSysCache;
4781         }
4782         else if (rte->rtekind == RTE_SUBQUERY && !rte->inh)
4783         {
4784                 /*
4785                  * Plain subquery (not one that was converted to an appendrel).
4786                  */
4787                 Query      *subquery = rte->subquery;
4788                 RelOptInfo *rel;
4789                 TargetEntry *ste;
4790
4791                 /*
4792                  * Punt if it's a whole-row var rather than a plain column reference.
4793                  */
4794                 if (var->varattno == InvalidAttrNumber)
4795                         return;
4796
4797                 /*
4798                  * Punt if subquery uses set operations or GROUP BY, as these will
4799                  * mash underlying columns' stats beyond recognition.  (Set ops are
4800                  * particularly nasty; if we forged ahead, we would return stats
4801                  * relevant to only the leftmost subselect...)  DISTINCT is also
4802                  * problematic, but we check that later because there is a possibility
4803                  * of learning something even with it.
4804                  */
4805                 if (subquery->setOperations ||
4806                         subquery->groupClause)
4807                         return;
4808
4809                 /*
4810                  * OK, fetch RelOptInfo for subquery.  Note that we don't change the
4811                  * rel returned in vardata, since caller expects it to be a rel of the
4812                  * caller's query level.  Because we might already be recursing, we
4813                  * can't use that rel pointer either, but have to look up the Var's
4814                  * rel afresh.
4815                  */
4816                 rel = find_base_rel(root, var->varno);
4817
4818                 /* If the subquery hasn't been planned yet, we have to punt */
4819                 if (rel->subroot == NULL)
4820                         return;
4821                 Assert(IsA(rel->subroot, PlannerInfo));
4822
4823                 /*
4824                  * Switch our attention to the subquery as mangled by the planner. It
4825                  * was okay to look at the pre-planning version for the tests above,
4826                  * but now we need a Var that will refer to the subroot's live
4827                  * RelOptInfos.  For instance, if any subquery pullup happened during
4828                  * planning, Vars in the targetlist might have gotten replaced, and we
4829                  * need to see the replacement expressions.
4830                  */
4831                 subquery = rel->subroot->parse;
4832                 Assert(IsA(subquery, Query));
4833
4834                 /* Get the subquery output expression referenced by the upper Var */
4835                 ste = get_tle_by_resno(subquery->targetList, var->varattno);
4836                 if (ste == NULL || ste->resjunk)
4837                         elog(ERROR, "subquery %s does not have attribute %d",
4838                                  rte->eref->aliasname, var->varattno);
4839                 var = (Var *) ste->expr;
4840
4841                 /*
4842                  * If subquery uses DISTINCT, we can't make use of any stats for the
4843                  * variable ... but, if it's the only DISTINCT column, we are entitled
4844                  * to consider it unique.  We do the test this way so that it works
4845                  * for cases involving DISTINCT ON.
4846                  */
4847                 if (subquery->distinctClause)
4848                 {
4849                         if (list_length(subquery->distinctClause) == 1 &&
4850                                 targetIsInSortList(ste, InvalidOid, subquery->distinctClause))
4851                                 vardata->isunique = true;
4852                         /* cannot go further */
4853                         return;
4854                 }
4855
4856                 /*
4857                  * If the sub-query originated from a view with the security_barrier
4858                  * attribute, we must not look at the variable's statistics, though it
4859                  * seems all right to notice the existence of a DISTINCT clause. So
4860                  * stop here.
4861                  *
4862                  * This is probably a harsher restriction than necessary; it's
4863                  * certainly OK for the selectivity estimator (which is a C function,
4864                  * and therefore omnipotent anyway) to look at the statistics.  But
4865                  * many selectivity estimators will happily *invoke the operator
4866                  * function* to try to work out a good estimate - and that's not OK.
4867                  * So for now, don't dig down for stats.
4868                  */
4869                 if (rte->security_barrier)
4870                         return;
4871
4872                 /* Can only handle a simple Var of subquery's query level */
4873                 if (var && IsA(var, Var) &&
4874                         var->varlevelsup == 0)
4875                 {
4876                         /*
4877                          * OK, recurse into the subquery.  Note that the original setting
4878                          * of vardata->isunique (which will surely be false) is left
4879                          * unchanged in this situation.  That's what we want, since even
4880                          * if the underlying column is unique, the subquery may have
4881                          * joined to other tables in a way that creates duplicates.
4882                          */
4883                         examine_simple_variable(rel->subroot, var, vardata);
4884                 }
4885         }
4886         else
4887         {
4888                 /*
4889                  * Otherwise, the Var comes from a FUNCTION, VALUES, or CTE RTE.  (We
4890                  * won't see RTE_JOIN here because join alias Vars have already been
4891                  * flattened.)  There's not much we can do with function outputs, but
4892                  * maybe someday try to be smarter about VALUES and/or CTEs.
4893                  */
4894         }
4895 }
4896
4897 /*
4898  * get_variable_numdistinct
4899  *        Estimate the number of distinct values of a variable.
4900  *
4901  * vardata: results of examine_variable
4902  * *isdefault: set to TRUE if the result is a default rather than based on
4903  * anything meaningful.
4904  *
4905  * NB: be careful to produce a positive integral result, since callers may
4906  * compare the result to exact integer counts, or might divide by it.
4907  */
4908 double
4909 get_variable_numdistinct(VariableStatData *vardata, bool *isdefault)
4910 {
4911         double          stadistinct;
4912         double          stanullfrac = 0.0;
4913         double          ntuples;
4914
4915         *isdefault = false;
4916
4917         /*
4918          * Determine the stadistinct value to use.  There are cases where we can
4919          * get an estimate even without a pg_statistic entry, or can get a better
4920          * value than is in pg_statistic.  Grab stanullfrac too if we can find it
4921          * (otherwise, assume no nulls, for lack of any better idea).
4922          */
4923         if (HeapTupleIsValid(vardata->statsTuple))
4924         {
4925                 /* Use the pg_statistic entry */
4926                 Form_pg_statistic stats;
4927
4928                 stats = (Form_pg_statistic) GETSTRUCT(vardata->statsTuple);
4929                 stadistinct = stats->stadistinct;
4930                 stanullfrac = stats->stanullfrac;
4931         }
4932         else if (vardata->vartype == BOOLOID)
4933         {
4934                 /*
4935                  * Special-case boolean columns: presumably, two distinct values.
4936                  *
4937                  * Are there any other datatypes we should wire in special estimates
4938                  * for?
4939                  */
4940                 stadistinct = 2.0;
4941         }
4942         else
4943         {
4944                 /*
4945                  * We don't keep statistics for system columns, but in some cases we
4946                  * can infer distinctness anyway.
4947                  */
4948                 if (vardata->var && IsA(vardata->var, Var))
4949                 {
4950                         switch (((Var *) vardata->var)->varattno)
4951                         {
4952                                 case ObjectIdAttributeNumber:
4953                                 case SelfItemPointerAttributeNumber:
4954                                         stadistinct = -1.0; /* unique (and all non null) */
4955                                         break;
4956                                 case TableOidAttributeNumber:
4957                                         stadistinct = 1.0;      /* only 1 value */
4958                                         break;
4959                                 default:
4960                                         stadistinct = 0.0;      /* means "unknown" */
4961                                         break;
4962                         }
4963                 }
4964                 else
4965                         stadistinct = 0.0;      /* means "unknown" */
4966
4967                 /*
4968                  * XXX consider using estimate_num_groups on expressions?
4969                  */
4970         }
4971
4972         /*
4973          * If there is a unique index or DISTINCT clause for the variable, assume
4974          * it is unique no matter what pg_statistic says; the statistics could be
4975          * out of date, or we might have found a partial unique index that proves
4976          * the var is unique for this query.  However, we'd better still believe
4977          * the null-fraction statistic.
4978          */
4979         if (vardata->isunique)
4980                 stadistinct = -1.0 * (1.0 - stanullfrac);
4981
4982         /*
4983          * If we had an absolute estimate, use that.
4984          */
4985         if (stadistinct > 0.0)
4986                 return clamp_row_est(stadistinct);
4987
4988         /*
4989          * Otherwise we need to get the relation size; punt if not available.
4990          */
4991         if (vardata->rel == NULL)
4992         {
4993                 *isdefault = true;
4994                 return DEFAULT_NUM_DISTINCT;
4995         }
4996         ntuples = vardata->rel->tuples;
4997         if (ntuples <= 0.0)
4998         {
4999                 *isdefault = true;
5000                 return DEFAULT_NUM_DISTINCT;
5001         }
5002
5003         /*
5004          * If we had a relative estimate, use that.
5005          */
5006         if (stadistinct < 0.0)
5007                 return clamp_row_est(-stadistinct * ntuples);
5008
5009         /*
5010          * With no data, estimate ndistinct = ntuples if the table is small, else
5011          * use default.  We use DEFAULT_NUM_DISTINCT as the cutoff for "small" so
5012          * that the behavior isn't discontinuous.
5013          */
5014         if (ntuples < DEFAULT_NUM_DISTINCT)
5015                 return clamp_row_est(ntuples);
5016
5017         *isdefault = true;
5018         return DEFAULT_NUM_DISTINCT;
5019 }
5020
5021 /*
5022  * get_variable_range
5023  *              Estimate the minimum and maximum value of the specified variable.
5024  *              If successful, store values in *min and *max, and return TRUE.
5025  *              If no data available, return FALSE.
5026  *
5027  * sortop is the "<" comparison operator to use.  This should generally
5028  * be "<" not ">", as only the former is likely to be found in pg_statistic.
5029  */
5030 static bool
5031 get_variable_range(PlannerInfo *root, VariableStatData *vardata, Oid sortop,
5032                                    Datum *min, Datum *max)
5033 {
5034         Datum           tmin = 0;
5035         Datum           tmax = 0;
5036         bool            have_data = false;
5037         int16           typLen;
5038         bool            typByVal;
5039         Datum      *values;
5040         int                     nvalues;
5041         int                     i;
5042
5043         /*
5044          * XXX It's very tempting to try to use the actual column min and max, if
5045          * we can get them relatively-cheaply with an index probe.  However, since
5046          * this function is called many times during join planning, that could
5047          * have unpleasant effects on planning speed.  Need more investigation
5048          * before enabling this.
5049          */
5050 #ifdef NOT_USED
5051         if (get_actual_variable_range(root, vardata, sortop, min, max))
5052                 return true;
5053 #endif
5054
5055         if (!HeapTupleIsValid(vardata->statsTuple))
5056         {
5057                 /* no stats available, so default result */
5058                 return false;
5059         }
5060
5061         get_typlenbyval(vardata->atttype, &typLen, &typByVal);
5062
5063         /*
5064          * If there is a histogram, grab the first and last values.
5065          *
5066          * If there is a histogram that is sorted with some other operator than
5067          * the one we want, fail --- this suggests that there is data we can't
5068          * use.
5069          */
5070         if (get_attstatsslot(vardata->statsTuple,
5071                                                  vardata->atttype, vardata->atttypmod,
5072                                                  STATISTIC_KIND_HISTOGRAM, sortop,
5073                                                  NULL,
5074                                                  &values, &nvalues,
5075                                                  NULL, NULL))
5076         {
5077                 if (nvalues > 0)
5078                 {
5079                         tmin = datumCopy(values[0], typByVal, typLen);
5080                         tmax = datumCopy(values[nvalues - 1], typByVal, typLen);
5081                         have_data = true;
5082                 }
5083                 free_attstatsslot(vardata->atttype, values, nvalues, NULL, 0);
5084         }
5085         else if (get_attstatsslot(vardata->statsTuple,
5086                                                           vardata->atttype, vardata->atttypmod,
5087                                                           STATISTIC_KIND_HISTOGRAM, InvalidOid,
5088                                                           NULL,
5089                                                           &values, &nvalues,
5090                                                           NULL, NULL))
5091         {
5092                 free_attstatsslot(vardata->atttype, values, nvalues, NULL, 0);
5093                 return false;
5094         }
5095
5096         /*
5097          * If we have most-common-values info, look for extreme MCVs.  This is
5098          * needed even if we also have a histogram, since the histogram excludes
5099          * the MCVs.  However, usually the MCVs will not be the extreme values, so
5100          * avoid unnecessary data copying.
5101          */
5102         if (get_attstatsslot(vardata->statsTuple,
5103                                                  vardata->atttype, vardata->atttypmod,
5104                                                  STATISTIC_KIND_MCV, InvalidOid,
5105                                                  NULL,
5106                                                  &values, &nvalues,
5107                                                  NULL, NULL))
5108         {
5109                 bool            tmin_is_mcv = false;
5110                 bool            tmax_is_mcv = false;
5111                 FmgrInfo        opproc;
5112
5113                 fmgr_info(get_opcode(sortop), &opproc);
5114
5115                 for (i = 0; i < nvalues; i++)
5116                 {
5117                         if (!have_data)
5118                         {
5119                                 tmin = tmax = values[i];
5120                                 tmin_is_mcv = tmax_is_mcv = have_data = true;
5121                                 continue;
5122                         }
5123                         if (DatumGetBool(FunctionCall2Coll(&opproc,
5124                                                                                            DEFAULT_COLLATION_OID,
5125                                                                                            values[i], tmin)))
5126                         {
5127                                 tmin = values[i];
5128                                 tmin_is_mcv = true;
5129                         }
5130                         if (DatumGetBool(FunctionCall2Coll(&opproc,
5131                                                                                            DEFAULT_COLLATION_OID,
5132                                                                                            tmax, values[i])))
5133                         {
5134                                 tmax = values[i];
5135                                 tmax_is_mcv = true;
5136                         }
5137                 }
5138                 if (tmin_is_mcv)
5139                         tmin = datumCopy(tmin, typByVal, typLen);
5140                 if (tmax_is_mcv)
5141                         tmax = datumCopy(tmax, typByVal, typLen);
5142                 free_attstatsslot(vardata->atttype, values, nvalues, NULL, 0);
5143         }
5144
5145         *min = tmin;
5146         *max = tmax;
5147         return have_data;
5148 }
5149
5150
5151 /*
5152  * get_actual_variable_range
5153  *              Attempt to identify the current *actual* minimum and/or maximum
5154  *              of the specified variable, by looking for a suitable btree index
5155  *              and fetching its low and/or high values.
5156  *              If successful, store values in *min and *max, and return TRUE.
5157  *              (Either pointer can be NULL if that endpoint isn't needed.)
5158  *              If no data available, return FALSE.
5159  *
5160  * sortop is the "<" comparison operator to use.
5161  */
5162 static bool
5163 get_actual_variable_range(PlannerInfo *root, VariableStatData *vardata,
5164                                                   Oid sortop,
5165                                                   Datum *min, Datum *max)
5166 {
5167         bool            have_data = false;
5168         RelOptInfo *rel = vardata->rel;
5169         RangeTblEntry *rte;
5170         ListCell   *lc;
5171
5172         /* No hope if no relation or it doesn't have indexes */
5173         if (rel == NULL || rel->indexlist == NIL)
5174                 return false;
5175         /* If it has indexes it must be a plain relation */
5176         rte = root->simple_rte_array[rel->relid];
5177         Assert(rte->rtekind == RTE_RELATION);
5178
5179         /* Search through the indexes to see if any match our problem */
5180         foreach(lc, rel->indexlist)
5181         {
5182                 IndexOptInfo *index = (IndexOptInfo *) lfirst(lc);
5183                 ScanDirection indexscandir;
5184
5185                 /* Ignore non-btree indexes */
5186                 if (index->relam != BTREE_AM_OID)
5187                         continue;
5188
5189                 /*
5190                  * Ignore partial indexes --- we only want stats that cover the entire
5191                  * relation.
5192                  */
5193                 if (index->indpred != NIL)
5194                         continue;
5195
5196                 /*
5197                  * The index list might include hypothetical indexes inserted by a
5198                  * get_relation_info hook --- don't try to access them.
5199                  */
5200                 if (index->hypothetical)
5201                         continue;
5202
5203                 /*
5204                  * The first index column must match the desired variable and sort
5205                  * operator --- but we can use a descending-order index.
5206                  */
5207                 if (!match_index_to_operand(vardata->var, 0, index))
5208                         continue;
5209                 switch (get_op_opfamily_strategy(sortop, index->sortopfamily[0]))
5210                 {
5211                         case BTLessStrategyNumber:
5212                                 if (index->reverse_sort[0])
5213                                         indexscandir = BackwardScanDirection;
5214                                 else
5215                                         indexscandir = ForwardScanDirection;
5216                                 break;
5217                         case BTGreaterStrategyNumber:
5218                                 if (index->reverse_sort[0])
5219                                         indexscandir = ForwardScanDirection;
5220                                 else
5221                                         indexscandir = BackwardScanDirection;
5222                                 break;
5223                         default:
5224                                 /* index doesn't match the sortop */
5225                                 continue;
5226                 }
5227
5228                 /*
5229                  * Found a suitable index to extract data from.  We'll need an EState
5230                  * and a bunch of other infrastructure.
5231                  */
5232                 {
5233                         EState     *estate;
5234                         ExprContext *econtext;
5235                         MemoryContext tmpcontext;
5236                         MemoryContext oldcontext;
5237                         Relation        heapRel;
5238                         Relation        indexRel;
5239                         IndexInfo  *indexInfo;
5240                         TupleTableSlot *slot;
5241                         int16           typLen;
5242                         bool            typByVal;
5243                         ScanKeyData scankeys[1];
5244                         IndexScanDesc index_scan;
5245                         HeapTuple       tup;
5246                         Datum           values[INDEX_MAX_KEYS];
5247                         bool            isnull[INDEX_MAX_KEYS];
5248                         SnapshotData SnapshotDirty;
5249
5250                         estate = CreateExecutorState();
5251                         econtext = GetPerTupleExprContext(estate);
5252                         /* Make sure any cruft is generated in the econtext's memory */
5253                         tmpcontext = econtext->ecxt_per_tuple_memory;
5254                         oldcontext = MemoryContextSwitchTo(tmpcontext);
5255
5256                         /*
5257                          * Open the table and index so we can read from them.  We should
5258                          * already have at least AccessShareLock on the table, but not
5259                          * necessarily on the index.
5260                          */
5261                         heapRel = heap_open(rte->relid, NoLock);
5262                         indexRel = index_open(index->indexoid, AccessShareLock);
5263
5264                         /* extract index key information from the index's pg_index info */
5265                         indexInfo = BuildIndexInfo(indexRel);
5266
5267                         /* some other stuff */
5268                         slot = MakeSingleTupleTableSlot(RelationGetDescr(heapRel));
5269                         econtext->ecxt_scantuple = slot;
5270                         get_typlenbyval(vardata->atttype, &typLen, &typByVal);
5271                         InitDirtySnapshot(SnapshotDirty);
5272
5273                         /* set up an IS NOT NULL scan key so that we ignore nulls */
5274                         ScanKeyEntryInitialize(&scankeys[0],
5275                                                                    SK_ISNULL | SK_SEARCHNOTNULL,
5276                                                                    1,   /* index col to scan */
5277                                                                    InvalidStrategy,             /* no strategy */
5278                                                                    InvalidOid,  /* no strategy subtype */
5279                                                                    InvalidOid,  /* no collation */
5280                                                                    InvalidOid,  /* no reg proc for this */
5281                                                                    (Datum) 0);  /* constant */
5282
5283                         have_data = true;
5284
5285                         /* If min is requested ... */
5286                         if (min)
5287                         {
5288                                 /*
5289                                  * In principle, we should scan the index with our current
5290                                  * active snapshot, which is the best approximation we've got
5291                                  * to what the query will see when executed.  But that won't
5292                                  * be exact if a new snap is taken before running the query,
5293                                  * and it can be very expensive if a lot of uncommitted rows
5294                                  * exist at the end of the index (because we'll laboriously
5295                                  * fetch each one and reject it).  What seems like a good
5296                                  * compromise is to use SnapshotDirty.  That will accept
5297                                  * uncommitted rows, and thus avoid fetching multiple heap
5298                                  * tuples in this scenario.  On the other hand, it will reject
5299                                  * known-dead rows, and thus not give a bogus answer when the
5300                                  * extreme value has been deleted; that case motivates not
5301                                  * using SnapshotAny here.
5302                                  */
5303                                 index_scan = index_beginscan(heapRel, indexRel, &SnapshotDirty,
5304                                                                                          1, 0);
5305                                 index_rescan(index_scan, scankeys, 1, NULL, 0);
5306
5307                                 /* Fetch first tuple in sortop's direction */
5308                                 if ((tup = index_getnext(index_scan,
5309                                                                                  indexscandir)) != NULL)
5310                                 {
5311                                         /* Extract the index column values from the heap tuple */
5312                                         ExecStoreTuple(tup, slot, InvalidBuffer, false);
5313                                         FormIndexDatum(indexInfo, slot, estate,
5314                                                                    values, isnull);
5315
5316                                         /* Shouldn't have got a null, but be careful */
5317                                         if (isnull[0])
5318                                                 elog(ERROR, "found unexpected null value in index \"%s\"",
5319                                                          RelationGetRelationName(indexRel));
5320
5321                                         /* Copy the index column value out to caller's context */
5322                                         MemoryContextSwitchTo(oldcontext);
5323                                         *min = datumCopy(values[0], typByVal, typLen);
5324                                         MemoryContextSwitchTo(tmpcontext);
5325                                 }
5326                                 else
5327                                         have_data = false;
5328
5329                                 index_endscan(index_scan);
5330                         }
5331
5332                         /* If max is requested, and we didn't find the index is empty */
5333                         if (max && have_data)
5334                         {
5335                                 index_scan = index_beginscan(heapRel, indexRel, &SnapshotDirty,
5336                                                                                          1, 0);
5337                                 index_rescan(index_scan, scankeys, 1, NULL, 0);
5338
5339                                 /* Fetch first tuple in reverse direction */
5340                                 if ((tup = index_getnext(index_scan,
5341                                                                                  -indexscandir)) != NULL)
5342                                 {
5343                                         /* Extract the index column values from the heap tuple */
5344                                         ExecStoreTuple(tup, slot, InvalidBuffer, false);
5345                                         FormIndexDatum(indexInfo, slot, estate,
5346                                                                    values, isnull);
5347
5348                                         /* Shouldn't have got a null, but be careful */
5349                                         if (isnull[0])
5350                                                 elog(ERROR, "found unexpected null value in index \"%s\"",
5351                                                          RelationGetRelationName(indexRel));
5352
5353                                         /* Copy the index column value out to caller's context */
5354                                         MemoryContextSwitchTo(oldcontext);
5355                                         *max = datumCopy(values[0], typByVal, typLen);
5356                                         MemoryContextSwitchTo(tmpcontext);
5357                                 }
5358                                 else
5359                                         have_data = false;
5360
5361                                 index_endscan(index_scan);
5362                         }
5363
5364                         /* Clean everything up */
5365                         ExecDropSingleTupleTableSlot(slot);
5366
5367                         index_close(indexRel, AccessShareLock);
5368                         heap_close(heapRel, NoLock);
5369
5370                         MemoryContextSwitchTo(oldcontext);
5371                         FreeExecutorState(estate);
5372
5373                         /* And we're done */
5374                         break;
5375                 }
5376         }
5377
5378         return have_data;
5379 }
5380
5381 /*
5382  * find_join_input_rel
5383  *              Look up the input relation for a join.
5384  *
5385  * We assume that the input relation's RelOptInfo must have been constructed
5386  * already.
5387  */
5388 static RelOptInfo *
5389 find_join_input_rel(PlannerInfo *root, Relids relids)
5390 {
5391         RelOptInfo *rel = NULL;
5392
5393         switch (bms_membership(relids))
5394         {
5395                 case BMS_EMPTY_SET:
5396                         /* should not happen */
5397                         break;
5398                 case BMS_SINGLETON:
5399                         rel = find_base_rel(root, bms_singleton_member(relids));
5400                         break;
5401                 case BMS_MULTIPLE:
5402                         rel = find_join_rel(root, relids);
5403                         break;
5404         }
5405
5406         if (rel == NULL)
5407                 elog(ERROR, "could not find RelOptInfo for given relids");
5408
5409         return rel;
5410 }
5411
5412
5413 /*-------------------------------------------------------------------------
5414  *
5415  * Pattern analysis functions
5416  *
5417  * These routines support analysis of LIKE and regular-expression patterns
5418  * by the planner/optimizer.  It's important that they agree with the
5419  * regular-expression code in backend/regex/ and the LIKE code in
5420  * backend/utils/adt/like.c.  Also, the computation of the fixed prefix
5421  * must be conservative: if we report a string longer than the true fixed
5422  * prefix, the query may produce actually wrong answers, rather than just
5423  * getting a bad selectivity estimate!
5424  *
5425  * Note that the prefix-analysis functions are called from
5426  * backend/optimizer/path/indxpath.c as well as from routines in this file.
5427  *
5428  *-------------------------------------------------------------------------
5429  */
5430
5431 /*
5432  * Check whether char is a letter (and, hence, subject to case-folding)
5433  *
5434  * In multibyte character sets or with ICU, we can't use isalpha, and it does not seem
5435  * worth trying to convert to wchar_t to use iswalpha.  Instead, just assume
5436  * any multibyte char is potentially case-varying.
5437  */
5438 static int
5439 pattern_char_isalpha(char c, bool is_multibyte,
5440                                          pg_locale_t locale, bool locale_is_c)
5441 {
5442         if (locale_is_c)
5443                 return (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z');
5444         else if (is_multibyte && IS_HIGHBIT_SET(c))
5445                 return true;
5446         else if (locale && locale->provider == COLLPROVIDER_ICU)
5447                 return IS_HIGHBIT_SET(c) ? true : false;
5448 #ifdef HAVE_LOCALE_T
5449         else if (locale && locale->provider == COLLPROVIDER_LIBC)
5450                 return isalpha_l((unsigned char) c, locale->info.lt);
5451 #endif
5452         else
5453                 return isalpha((unsigned char) c);
5454 }
5455
5456 /*
5457  * Extract the fixed prefix, if any, for a pattern.
5458  *
5459  * *prefix is set to a palloc'd prefix string (in the form of a Const node),
5460  *      or to NULL if no fixed prefix exists for the pattern.
5461  * If rest_selec is not NULL, *rest_selec is set to an estimate of the
5462  *      selectivity of the remainder of the pattern (without any fixed prefix).
5463  * The prefix Const has the same type (TEXT or BYTEA) as the input pattern.
5464  *
5465  * The return value distinguishes no fixed prefix, a partial prefix,
5466  * or an exact-match-only pattern.
5467  */
5468
5469 static Pattern_Prefix_Status
5470 like_fixed_prefix(Const *patt_const, bool case_insensitive, Oid collation,
5471                                   Const **prefix_const, Selectivity *rest_selec)
5472 {
5473         char       *match;
5474         char       *patt;
5475         int                     pattlen;
5476         Oid                     typeid = patt_const->consttype;
5477         int                     pos,
5478                                 match_pos;
5479         bool            is_multibyte = (pg_database_encoding_max_length() > 1);
5480         pg_locale_t locale = 0;
5481         bool            locale_is_c = false;
5482
5483         /* the right-hand const is type text or bytea */
5484         Assert(typeid == BYTEAOID || typeid == TEXTOID);
5485
5486         if (case_insensitive)
5487         {
5488                 if (typeid == BYTEAOID)
5489                         ereport(ERROR,
5490                                         (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
5491                         errmsg("case insensitive matching not supported on type bytea")));
5492
5493                 /* If case-insensitive, we need locale info */
5494                 if (lc_ctype_is_c(collation))
5495                         locale_is_c = true;
5496                 else if (collation != DEFAULT_COLLATION_OID)
5497                 {
5498                         if (!OidIsValid(collation))
5499                         {
5500                                 /*
5501                                  * This typically means that the parser could not resolve a
5502                                  * conflict of implicit collations, so report it that way.
5503                                  */
5504                                 ereport(ERROR,
5505                                                 (errcode(ERRCODE_INDETERMINATE_COLLATION),
5506                                                  errmsg("could not determine which collation to use for ILIKE"),
5507                                                  errhint("Use the COLLATE clause to set the collation explicitly.")));
5508                         }
5509                         locale = pg_newlocale_from_collation(collation);
5510                 }
5511         }
5512
5513         if (typeid != BYTEAOID)
5514         {
5515                 patt = TextDatumGetCString(patt_const->constvalue);
5516                 pattlen = strlen(patt);
5517         }
5518         else
5519         {
5520                 bytea      *bstr = DatumGetByteaPP(patt_const->constvalue);
5521
5522                 pattlen = VARSIZE_ANY_EXHDR(bstr);
5523                 patt = (char *) palloc(pattlen);
5524                 memcpy(patt, VARDATA_ANY(bstr), pattlen);
5525                 Assert((Pointer) bstr == DatumGetPointer(patt_const->constvalue));
5526         }
5527
5528         match = palloc(pattlen + 1);
5529         match_pos = 0;
5530         for (pos = 0; pos < pattlen; pos++)
5531         {
5532                 /* % and _ are wildcard characters in LIKE */
5533                 if (patt[pos] == '%' ||
5534                         patt[pos] == '_')
5535                         break;
5536
5537                 /* Backslash escapes the next character */
5538                 if (patt[pos] == '\\')
5539                 {
5540                         pos++;
5541                         if (pos >= pattlen)
5542                                 break;
5543                 }
5544
5545                 /* Stop if case-varying character (it's sort of a wildcard) */
5546                 if (case_insensitive &&
5547                   pattern_char_isalpha(patt[pos], is_multibyte, locale, locale_is_c))
5548                         break;
5549
5550                 match[match_pos++] = patt[pos];
5551         }
5552
5553         match[match_pos] = '\0';
5554
5555         if (typeid != BYTEAOID)
5556                 *prefix_const = string_to_const(match, typeid);
5557         else
5558                 *prefix_const = string_to_bytea_const(match, match_pos);
5559
5560         if (rest_selec != NULL)
5561                 *rest_selec = like_selectivity(&patt[pos], pattlen - pos,
5562                                                                            case_insensitive);
5563
5564         pfree(patt);
5565         pfree(match);
5566
5567         /* in LIKE, an empty pattern is an exact match! */
5568         if (pos == pattlen)
5569                 return Pattern_Prefix_Exact;    /* reached end of pattern, so exact */
5570
5571         if (match_pos > 0)
5572                 return Pattern_Prefix_Partial;
5573
5574         return Pattern_Prefix_None;
5575 }
5576
5577 static Pattern_Prefix_Status
5578 regex_fixed_prefix(Const *patt_const, bool case_insensitive, Oid collation,
5579                                    Const **prefix_const, Selectivity *rest_selec)
5580 {
5581         Oid                     typeid = patt_const->consttype;
5582         char       *prefix;
5583         bool            exact;
5584
5585         /*
5586          * Should be unnecessary, there are no bytea regex operators defined. As
5587          * such, it should be noted that the rest of this function has *not* been
5588          * made safe for binary (possibly NULL containing) strings.
5589          */
5590         if (typeid == BYTEAOID)
5591                 ereport(ERROR,
5592                                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
5593                  errmsg("regular-expression matching not supported on type bytea")));
5594
5595         /* Use the regexp machinery to extract the prefix, if any */
5596         prefix = regexp_fixed_prefix(DatumGetTextPP(patt_const->constvalue),
5597                                                                  case_insensitive, collation,
5598                                                                  &exact);
5599
5600         if (prefix == NULL)
5601         {
5602                 *prefix_const = NULL;
5603
5604                 if (rest_selec != NULL)
5605                 {
5606                         char       *patt = TextDatumGetCString(patt_const->constvalue);
5607
5608                         *rest_selec = regex_selectivity(patt, strlen(patt),
5609                                                                                         case_insensitive,
5610                                                                                         0);
5611                         pfree(patt);
5612                 }
5613
5614                 return Pattern_Prefix_None;
5615         }
5616
5617         *prefix_const = string_to_const(prefix, typeid);
5618
5619         if (rest_selec != NULL)
5620         {
5621                 if (exact)
5622                 {
5623                         /* Exact match, so there's no additional selectivity */
5624                         *rest_selec = 1.0;
5625                 }
5626                 else
5627                 {
5628                         char       *patt = TextDatumGetCString(patt_const->constvalue);
5629
5630                         *rest_selec = regex_selectivity(patt, strlen(patt),
5631                                                                                         case_insensitive,
5632                                                                                         strlen(prefix));
5633                         pfree(patt);
5634                 }
5635         }
5636
5637         pfree(prefix);
5638
5639         if (exact)
5640                 return Pattern_Prefix_Exact;    /* pattern specifies exact match */
5641         else
5642                 return Pattern_Prefix_Partial;
5643 }
5644
5645 Pattern_Prefix_Status
5646 pattern_fixed_prefix(Const *patt, Pattern_Type ptype, Oid collation,
5647                                          Const **prefix, Selectivity *rest_selec)
5648 {
5649         Pattern_Prefix_Status result;
5650
5651         switch (ptype)
5652         {
5653                 case Pattern_Type_Like:
5654                         result = like_fixed_prefix(patt, false, collation,
5655                                                                            prefix, rest_selec);
5656                         break;
5657                 case Pattern_Type_Like_IC:
5658                         result = like_fixed_prefix(patt, true, collation,
5659                                                                            prefix, rest_selec);
5660                         break;
5661                 case Pattern_Type_Regex:
5662                         result = regex_fixed_prefix(patt, false, collation,
5663                                                                                 prefix, rest_selec);
5664                         break;
5665                 case Pattern_Type_Regex_IC:
5666                         result = regex_fixed_prefix(patt, true, collation,
5667                                                                                 prefix, rest_selec);
5668                         break;
5669                 default:
5670                         elog(ERROR, "unrecognized ptype: %d", (int) ptype);
5671                         result = Pattern_Prefix_None;           /* keep compiler quiet */
5672                         break;
5673         }
5674         return result;
5675 }
5676
5677 /*
5678  * Estimate the selectivity of a fixed prefix for a pattern match.
5679  *
5680  * A fixed prefix "foo" is estimated as the selectivity of the expression
5681  * "variable >= 'foo' AND variable < 'fop'" (see also indxpath.c).
5682  *
5683  * The selectivity estimate is with respect to the portion of the column
5684  * population represented by the histogram --- the caller must fold this
5685  * together with info about MCVs and NULLs.
5686  *
5687  * We use the >= and < operators from the specified btree opfamily to do the
5688  * estimation.  The given variable and Const must be of the associated
5689  * datatype.
5690  *
5691  * XXX Note: we make use of the upper bound to estimate operator selectivity
5692  * even if the locale is such that we cannot rely on the upper-bound string.
5693  * The selectivity only needs to be approximately right anyway, so it seems
5694  * more useful to use the upper-bound code than not.
5695  */
5696 static Selectivity
5697 prefix_selectivity(PlannerInfo *root, VariableStatData *vardata,
5698                                    Oid vartype, Oid opfamily, Const *prefixcon)
5699 {
5700         Selectivity prefixsel;
5701         Oid                     cmpopr;
5702         FmgrInfo        opproc;
5703         Const      *greaterstrcon;
5704         Selectivity eq_sel;
5705
5706         cmpopr = get_opfamily_member(opfamily, vartype, vartype,
5707                                                                  BTGreaterEqualStrategyNumber);
5708         if (cmpopr == InvalidOid)
5709                 elog(ERROR, "no >= operator for opfamily %u", opfamily);
5710         fmgr_info(get_opcode(cmpopr), &opproc);
5711
5712         prefixsel = ineq_histogram_selectivity(root, vardata, &opproc, true,
5713                                                                                    prefixcon->constvalue,
5714                                                                                    prefixcon->consttype);
5715
5716         if (prefixsel < 0.0)
5717         {
5718                 /* No histogram is present ... return a suitable default estimate */
5719                 return DEFAULT_MATCH_SEL;
5720         }
5721
5722         /*-------
5723          * If we can create a string larger than the prefix, say
5724          *      "x < greaterstr".
5725          *-------
5726          */
5727         cmpopr = get_opfamily_member(opfamily, vartype, vartype,
5728                                                                  BTLessStrategyNumber);
5729         if (cmpopr == InvalidOid)
5730                 elog(ERROR, "no < operator for opfamily %u", opfamily);
5731         fmgr_info(get_opcode(cmpopr), &opproc);
5732         greaterstrcon = make_greater_string(prefixcon, &opproc,
5733                                                                                 DEFAULT_COLLATION_OID);
5734         if (greaterstrcon)
5735         {
5736                 Selectivity topsel;
5737
5738                 topsel = ineq_histogram_selectivity(root, vardata, &opproc, false,
5739                                                                                         greaterstrcon->constvalue,
5740                                                                                         greaterstrcon->consttype);
5741
5742                 /* ineq_histogram_selectivity worked before, it shouldn't fail now */
5743                 Assert(topsel >= 0.0);
5744
5745                 /*
5746                  * Merge the two selectivities in the same way as for a range query
5747                  * (see clauselist_selectivity()).  Note that we don't need to worry
5748                  * about double-exclusion of nulls, since ineq_histogram_selectivity
5749                  * doesn't count those anyway.
5750                  */
5751                 prefixsel = topsel + prefixsel - 1.0;
5752         }
5753
5754         /*
5755          * If the prefix is long then the two bounding values might be too close
5756          * together for the histogram to distinguish them usefully, resulting in a
5757          * zero estimate (plus or minus roundoff error). To avoid returning a
5758          * ridiculously small estimate, compute the estimated selectivity for
5759          * "variable = 'foo'", and clamp to that. (Obviously, the resultant
5760          * estimate should be at least that.)
5761          *
5762          * We apply this even if we couldn't make a greater string.  That case
5763          * suggests that the prefix is near the maximum possible, and thus
5764          * probably off the end of the histogram, and thus we probably got a very
5765          * small estimate from the >= condition; so we still need to clamp.
5766          */
5767         cmpopr = get_opfamily_member(opfamily, vartype, vartype,
5768                                                                  BTEqualStrategyNumber);
5769         if (cmpopr == InvalidOid)
5770                 elog(ERROR, "no = operator for opfamily %u", opfamily);
5771         eq_sel = var_eq_const(vardata, cmpopr, prefixcon->constvalue,
5772                                                   false, true);
5773
5774         prefixsel = Max(prefixsel, eq_sel);
5775
5776         return prefixsel;
5777 }
5778
5779
5780 /*
5781  * Estimate the selectivity of a pattern of the specified type.
5782  * Note that any fixed prefix of the pattern will have been removed already,
5783  * so actually we may be looking at just a fragment of the pattern.
5784  *
5785  * For now, we use a very simplistic approach: fixed characters reduce the
5786  * selectivity a good deal, character ranges reduce it a little,
5787  * wildcards (such as % for LIKE or .* for regex) increase it.
5788  */
5789
5790 #define FIXED_CHAR_SEL  0.20    /* about 1/5 */
5791 #define CHAR_RANGE_SEL  0.25
5792 #define ANY_CHAR_SEL    0.9             /* not 1, since it won't match end-of-string */
5793 #define FULL_WILDCARD_SEL 5.0
5794 #define PARTIAL_WILDCARD_SEL 2.0
5795
5796 static Selectivity
5797 like_selectivity(const char *patt, int pattlen, bool case_insensitive)
5798 {
5799         Selectivity sel = 1.0;
5800         int                     pos;
5801
5802         /* Skip any leading wildcard; it's already factored into initial sel */
5803         for (pos = 0; pos < pattlen; pos++)
5804         {
5805                 if (patt[pos] != '%' && patt[pos] != '_')
5806                         break;
5807         }
5808
5809         for (; pos < pattlen; pos++)
5810         {
5811                 /* % and _ are wildcard characters in LIKE */
5812                 if (patt[pos] == '%')
5813                         sel *= FULL_WILDCARD_SEL;
5814                 else if (patt[pos] == '_')
5815                         sel *= ANY_CHAR_SEL;
5816                 else if (patt[pos] == '\\')
5817                 {
5818                         /* Backslash quotes the next character */
5819                         pos++;
5820                         if (pos >= pattlen)
5821                                 break;
5822                         sel *= FIXED_CHAR_SEL;
5823                 }
5824                 else
5825                         sel *= FIXED_CHAR_SEL;
5826         }
5827         /* Could get sel > 1 if multiple wildcards */
5828         if (sel > 1.0)
5829                 sel = 1.0;
5830         return sel;
5831 }
5832
5833 static Selectivity
5834 regex_selectivity_sub(const char *patt, int pattlen, bool case_insensitive)
5835 {
5836         Selectivity sel = 1.0;
5837         int                     paren_depth = 0;
5838         int                     paren_pos = 0;  /* dummy init to keep compiler quiet */
5839         int                     pos;
5840
5841         for (pos = 0; pos < pattlen; pos++)
5842         {
5843                 if (patt[pos] == '(')
5844                 {
5845                         if (paren_depth == 0)
5846                                 paren_pos = pos;        /* remember start of parenthesized item */
5847                         paren_depth++;
5848                 }
5849                 else if (patt[pos] == ')' && paren_depth > 0)
5850                 {
5851                         paren_depth--;
5852                         if (paren_depth == 0)
5853                                 sel *= regex_selectivity_sub(patt + (paren_pos + 1),
5854                                                                                          pos - (paren_pos + 1),
5855                                                                                          case_insensitive);
5856                 }
5857                 else if (patt[pos] == '|' && paren_depth == 0)
5858                 {
5859                         /*
5860                          * If unquoted | is present at paren level 0 in pattern, we have
5861                          * multiple alternatives; sum their probabilities.
5862                          */
5863                         sel += regex_selectivity_sub(patt + (pos + 1),
5864                                                                                  pattlen - (pos + 1),
5865                                                                                  case_insensitive);
5866                         break;                          /* rest of pattern is now processed */
5867                 }
5868                 else if (patt[pos] == '[')
5869                 {
5870                         bool            negclass = false;
5871
5872                         if (patt[++pos] == '^')
5873                         {
5874                                 negclass = true;
5875                                 pos++;
5876                         }
5877                         if (patt[pos] == ']')           /* ']' at start of class is not
5878                                                                                  * special */
5879                                 pos++;
5880                         while (pos < pattlen && patt[pos] != ']')
5881                                 pos++;
5882                         if (paren_depth == 0)
5883                                 sel *= (negclass ? (1.0 - CHAR_RANGE_SEL) : CHAR_RANGE_SEL);
5884                 }
5885                 else if (patt[pos] == '.')
5886                 {
5887                         if (paren_depth == 0)
5888                                 sel *= ANY_CHAR_SEL;
5889                 }
5890                 else if (patt[pos] == '*' ||
5891                                  patt[pos] == '?' ||
5892                                  patt[pos] == '+')
5893                 {
5894                         /* Ought to be smarter about quantifiers... */
5895                         if (paren_depth == 0)
5896                                 sel *= PARTIAL_WILDCARD_SEL;
5897                 }
5898                 else if (patt[pos] == '{')
5899                 {
5900                         while (pos < pattlen && patt[pos] != '}')
5901                                 pos++;
5902                         if (paren_depth == 0)
5903                                 sel *= PARTIAL_WILDCARD_SEL;
5904                 }
5905                 else if (patt[pos] == '\\')
5906                 {
5907                         /* backslash quotes the next character */
5908                         pos++;
5909                         if (pos >= pattlen)
5910                                 break;
5911                         if (paren_depth == 0)
5912                                 sel *= FIXED_CHAR_SEL;
5913                 }
5914                 else
5915                 {
5916                         if (paren_depth == 0)
5917                                 sel *= FIXED_CHAR_SEL;
5918                 }
5919         }
5920         /* Could get sel > 1 if multiple wildcards */
5921         if (sel > 1.0)
5922                 sel = 1.0;
5923         return sel;
5924 }
5925
5926 static Selectivity
5927 regex_selectivity(const char *patt, int pattlen, bool case_insensitive,
5928                                   int fixed_prefix_len)
5929 {
5930         Selectivity sel;
5931
5932         /* If patt doesn't end with $, consider it to have a trailing wildcard */
5933         if (pattlen > 0 && patt[pattlen - 1] == '$' &&
5934                 (pattlen == 1 || patt[pattlen - 2] != '\\'))
5935         {
5936                 /* has trailing $ */
5937                 sel = regex_selectivity_sub(patt, pattlen - 1, case_insensitive);
5938         }
5939         else
5940         {
5941                 /* no trailing $ */
5942                 sel = regex_selectivity_sub(patt, pattlen, case_insensitive);
5943                 sel *= FULL_WILDCARD_SEL;
5944         }
5945
5946         /* If there's a fixed prefix, discount its selectivity */
5947         if (fixed_prefix_len > 0)
5948                 sel /= pow(FIXED_CHAR_SEL, fixed_prefix_len);
5949
5950         /* Make sure result stays in range */
5951         CLAMP_PROBABILITY(sel);
5952         return sel;
5953 }
5954
5955
5956 /*
5957  * For bytea, the increment function need only increment the current byte
5958  * (there are no multibyte characters to worry about).
5959  */
5960 static bool
5961 byte_increment(unsigned char *ptr, int len)
5962 {
5963         if (*ptr >= 255)
5964                 return false;
5965         (*ptr)++;
5966         return true;
5967 }
5968
5969 /*
5970  * Try to generate a string greater than the given string or any
5971  * string it is a prefix of.  If successful, return a palloc'd string
5972  * in the form of a Const node; else return NULL.
5973  *
5974  * The caller must provide the appropriate "less than" comparison function
5975  * for testing the strings, along with the collation to use.
5976  *
5977  * The key requirement here is that given a prefix string, say "foo",
5978  * we must be able to generate another string "fop" that is greater than
5979  * all strings "foobar" starting with "foo".  We can test that we have
5980  * generated a string greater than the prefix string, but in non-C collations
5981  * that is not a bulletproof guarantee that an extension of the string might
5982  * not sort after it; an example is that "foo " is less than "foo!", but it
5983  * is not clear that a "dictionary" sort ordering will consider "foo!" less
5984  * than "foo bar".  CAUTION: Therefore, this function should be used only for
5985  * estimation purposes when working in a non-C collation.
5986  *
5987  * To try to catch most cases where an extended string might otherwise sort
5988  * before the result value, we determine which of the strings "Z", "z", "y",
5989  * and "9" is seen as largest by the collation, and append that to the given
5990  * prefix before trying to find a string that compares as larger.
5991  *
5992  * To search for a greater string, we repeatedly "increment" the rightmost
5993  * character, using an encoding-specific character incrementer function.
5994  * When it's no longer possible to increment the last character, we truncate
5995  * off that character and start incrementing the next-to-rightmost.
5996  * For example, if "z" were the last character in the sort order, then we
5997  * could produce "foo" as a string greater than "fonz".
5998  *
5999  * This could be rather slow in the worst case, but in most cases we
6000  * won't have to try more than one or two strings before succeeding.
6001  *
6002  * Note that it's important for the character incrementer not to be too anal
6003  * about producing every possible character code, since in some cases the only
6004  * way to get a larger string is to increment a previous character position.
6005  * So we don't want to spend too much time trying every possible character
6006  * code at the last position.  A good rule of thumb is to be sure that we
6007  * don't try more than 256*K values for a K-byte character (and definitely
6008  * not 256^K, which is what an exhaustive search would approach).
6009  */
6010 Const *
6011 make_greater_string(const Const *str_const, FmgrInfo *ltproc, Oid collation)
6012 {
6013         Oid                     datatype = str_const->consttype;
6014         char       *workstr;
6015         int                     len;
6016         Datum           cmpstr;
6017         text       *cmptxt = NULL;
6018         mbcharacter_incrementer charinc;
6019
6020         /*
6021          * Get a modifiable copy of the prefix string in C-string format, and set
6022          * up the string we will compare to as a Datum.  In C locale this can just
6023          * be the given prefix string, otherwise we need to add a suffix.  Types
6024          * NAME and BYTEA sort bytewise so they don't need a suffix either.
6025          */
6026         if (datatype == NAMEOID)
6027         {
6028                 workstr = DatumGetCString(DirectFunctionCall1(nameout,
6029                                                                                                           str_const->constvalue));
6030                 len = strlen(workstr);
6031                 cmpstr = str_const->constvalue;
6032         }
6033         else if (datatype == BYTEAOID)
6034         {
6035                 bytea      *bstr = DatumGetByteaPP(str_const->constvalue);
6036
6037                 len = VARSIZE_ANY_EXHDR(bstr);
6038                 workstr = (char *) palloc(len);
6039                 memcpy(workstr, VARDATA_ANY(bstr), len);
6040                 Assert((Pointer) bstr == DatumGetPointer(str_const->constvalue));
6041                 cmpstr = str_const->constvalue;
6042         }
6043         else
6044         {
6045                 workstr = TextDatumGetCString(str_const->constvalue);
6046                 len = strlen(workstr);
6047                 if (lc_collate_is_c(collation) || len == 0)
6048                         cmpstr = str_const->constvalue;
6049                 else
6050                 {
6051                         /* If first time through, determine the suffix to use */
6052                         static char suffixchar = 0;
6053                         static Oid      suffixcollation = 0;
6054
6055                         if (!suffixchar || suffixcollation != collation)
6056                         {
6057                                 char       *best;
6058
6059                                 best = "Z";
6060                                 if (varstr_cmp(best, 1, "z", 1, collation) < 0)
6061                                         best = "z";
6062                                 if (varstr_cmp(best, 1, "y", 1, collation) < 0)
6063                                         best = "y";
6064                                 if (varstr_cmp(best, 1, "9", 1, collation) < 0)
6065                                         best = "9";
6066                                 suffixchar = *best;
6067                                 suffixcollation = collation;
6068                         }
6069
6070                         /* And build the string to compare to */
6071                         cmptxt = (text *) palloc(VARHDRSZ + len + 1);
6072                         SET_VARSIZE(cmptxt, VARHDRSZ + len + 1);
6073                         memcpy(VARDATA(cmptxt), workstr, len);
6074                         *(VARDATA(cmptxt) + len) = suffixchar;
6075                         cmpstr = PointerGetDatum(cmptxt);
6076                 }
6077         }
6078
6079         /* Select appropriate character-incrementer function */
6080         if (datatype == BYTEAOID)
6081                 charinc = byte_increment;
6082         else
6083                 charinc = pg_database_encoding_character_incrementer();
6084
6085         /* And search ... */
6086         while (len > 0)
6087         {
6088                 int                     charlen;
6089                 unsigned char *lastchar;
6090
6091                 /* Identify the last character --- for bytea, just the last byte */
6092                 if (datatype == BYTEAOID)
6093                         charlen = 1;
6094                 else
6095                         charlen = len - pg_mbcliplen(workstr, len, len - 1);
6096                 lastchar = (unsigned char *) (workstr + len - charlen);
6097
6098                 /*
6099                  * Try to generate a larger string by incrementing the last character
6100                  * (for BYTEA, we treat each byte as a character).
6101                  *
6102                  * Note: the incrementer function is expected to return true if it's
6103                  * generated a valid-per-the-encoding new character, otherwise false.
6104                  * The contents of the character on false return are unspecified.
6105                  */
6106                 while (charinc(lastchar, charlen))
6107                 {
6108                         Const      *workstr_const;
6109
6110                         if (datatype == BYTEAOID)
6111                                 workstr_const = string_to_bytea_const(workstr, len);
6112                         else
6113                                 workstr_const = string_to_const(workstr, datatype);
6114
6115                         if (DatumGetBool(FunctionCall2Coll(ltproc,
6116                                                                                            collation,
6117                                                                                            cmpstr,
6118                                                                                            workstr_const->constvalue)))
6119                         {
6120                                 /* Successfully made a string larger than cmpstr */
6121                                 if (cmptxt)
6122                                         pfree(cmptxt);
6123                                 pfree(workstr);
6124                                 return workstr_const;
6125                         }
6126
6127                         /* No good, release unusable value and try again */
6128                         pfree(DatumGetPointer(workstr_const->constvalue));
6129                         pfree(workstr_const);
6130                 }
6131
6132                 /*
6133                  * No luck here, so truncate off the last character and try to
6134                  * increment the next one.
6135                  */
6136                 len -= charlen;
6137                 workstr[len] = '\0';
6138         }
6139
6140         /* Failed... */
6141         if (cmptxt)
6142                 pfree(cmptxt);
6143         pfree(workstr);
6144
6145         return NULL;
6146 }
6147
6148 /*
6149  * Generate a Datum of the appropriate type from a C string.
6150  * Note that all of the supported types are pass-by-ref, so the
6151  * returned value should be pfree'd if no longer needed.
6152  */
6153 static Datum
6154 string_to_datum(const char *str, Oid datatype)
6155 {
6156         Assert(str != NULL);
6157
6158         /*
6159          * We cheat a little by assuming that CStringGetTextDatum() will do for
6160          * bpchar and varchar constants too...
6161          */
6162         if (datatype == NAMEOID)
6163                 return DirectFunctionCall1(namein, CStringGetDatum(str));
6164         else if (datatype == BYTEAOID)
6165                 return DirectFunctionCall1(byteain, CStringGetDatum(str));
6166         else
6167                 return CStringGetTextDatum(str);
6168 }
6169
6170 /*
6171  * Generate a Const node of the appropriate type from a C string.
6172  */
6173 static Const *
6174 string_to_const(const char *str, Oid datatype)
6175 {
6176         Datum           conval = string_to_datum(str, datatype);
6177         Oid                     collation;
6178         int                     constlen;
6179
6180         /*
6181          * We only need to support a few datatypes here, so hard-wire properties
6182          * instead of incurring the expense of catalog lookups.
6183          */
6184         switch (datatype)
6185         {
6186                 case TEXTOID:
6187                 case VARCHAROID:
6188                 case BPCHAROID:
6189                         collation = DEFAULT_COLLATION_OID;
6190                         constlen = -1;
6191                         break;
6192
6193                 case NAMEOID:
6194                         collation = InvalidOid;
6195                         constlen = NAMEDATALEN;
6196                         break;
6197
6198                 case BYTEAOID:
6199                         collation = InvalidOid;
6200                         constlen = -1;
6201                         break;
6202
6203                 default:
6204                         elog(ERROR, "unexpected datatype in string_to_const: %u",
6205                                  datatype);
6206                         return NULL;
6207         }
6208
6209         return makeConst(datatype, -1, collation, constlen,
6210                                          conval, false, false);
6211 }
6212
6213 /*
6214  * Generate a Const node of bytea type from a binary C string and a length.
6215  */
6216 static Const *
6217 string_to_bytea_const(const char *str, size_t str_len)
6218 {
6219         bytea      *bstr = palloc(VARHDRSZ + str_len);
6220         Datum           conval;
6221
6222         memcpy(VARDATA(bstr), str, str_len);
6223         SET_VARSIZE(bstr, VARHDRSZ + str_len);
6224         conval = PointerGetDatum(bstr);
6225
6226         return makeConst(BYTEAOID, -1, InvalidOid, -1, conval, false, false);
6227 }
6228
6229 /*-------------------------------------------------------------------------
6230  *
6231  * Index cost estimation functions
6232  *
6233  *-------------------------------------------------------------------------
6234  */
6235
6236 List *
6237 deconstruct_indexquals(IndexPath *path)
6238 {
6239         List       *result = NIL;
6240         IndexOptInfo *index = path->indexinfo;
6241         ListCell   *lcc,
6242                            *lci;
6243
6244         forboth(lcc, path->indexquals, lci, path->indexqualcols)
6245         {
6246                 RestrictInfo *rinfo = castNode(RestrictInfo, lfirst(lcc));
6247                 int                     indexcol = lfirst_int(lci);
6248                 Expr       *clause;
6249                 Node       *leftop,
6250                                    *rightop;
6251                 IndexQualInfo *qinfo;
6252
6253                 clause = rinfo->clause;
6254
6255                 qinfo = (IndexQualInfo *) palloc(sizeof(IndexQualInfo));
6256                 qinfo->rinfo = rinfo;
6257                 qinfo->indexcol = indexcol;
6258
6259                 if (IsA(clause, OpExpr))
6260                 {
6261                         qinfo->clause_op = ((OpExpr *) clause)->opno;
6262                         leftop = get_leftop(clause);
6263                         rightop = get_rightop(clause);
6264                         if (match_index_to_operand(leftop, indexcol, index))
6265                         {
6266                                 qinfo->varonleft = true;
6267                                 qinfo->other_operand = rightop;
6268                         }
6269                         else
6270                         {
6271                                 Assert(match_index_to_operand(rightop, indexcol, index));
6272                                 qinfo->varonleft = false;
6273                                 qinfo->other_operand = leftop;
6274                         }
6275                 }
6276                 else if (IsA(clause, RowCompareExpr))
6277                 {
6278                         RowCompareExpr *rc = (RowCompareExpr *) clause;
6279
6280                         qinfo->clause_op = linitial_oid(rc->opnos);
6281                         /* Examine only first columns to determine left/right sides */
6282                         if (match_index_to_operand((Node *) linitial(rc->largs),
6283                                                                            indexcol, index))
6284                         {
6285                                 qinfo->varonleft = true;
6286                                 qinfo->other_operand = (Node *) rc->rargs;
6287                         }
6288                         else
6289                         {
6290                                 Assert(match_index_to_operand((Node *) linitial(rc->rargs),
6291                                                                                           indexcol, index));
6292                                 qinfo->varonleft = false;
6293                                 qinfo->other_operand = (Node *) rc->largs;
6294                         }
6295                 }
6296                 else if (IsA(clause, ScalarArrayOpExpr))
6297                 {
6298                         ScalarArrayOpExpr *saop = (ScalarArrayOpExpr *) clause;
6299
6300                         qinfo->clause_op = saop->opno;
6301                         /* index column is always on the left in this case */
6302                         Assert(match_index_to_operand((Node *) linitial(saop->args),
6303                                                                                   indexcol, index));
6304                         qinfo->varonleft = true;
6305                         qinfo->other_operand = (Node *) lsecond(saop->args);
6306                 }
6307                 else if (IsA(clause, NullTest))
6308                 {
6309                         qinfo->clause_op = InvalidOid;
6310                         Assert(match_index_to_operand((Node *) ((NullTest *) clause)->arg,
6311                                                                                   indexcol, index));
6312                         qinfo->varonleft = true;
6313                         qinfo->other_operand = NULL;
6314                 }
6315                 else
6316                 {
6317                         elog(ERROR, "unsupported indexqual type: %d",
6318                                  (int) nodeTag(clause));
6319                 }
6320
6321                 result = lappend(result, qinfo);
6322         }
6323         return result;
6324 }
6325
6326 /*
6327  * Simple function to compute the total eval cost of the "other operands"
6328  * in an IndexQualInfo list.  Since we know these will be evaluated just
6329  * once per scan, there's no need to distinguish startup from per-row cost.
6330  */
6331 static Cost
6332 other_operands_eval_cost(PlannerInfo *root, List *qinfos)
6333 {
6334         Cost            qual_arg_cost = 0;
6335         ListCell   *lc;
6336
6337         foreach(lc, qinfos)
6338         {
6339                 IndexQualInfo *qinfo = (IndexQualInfo *) lfirst(lc);
6340                 QualCost        index_qual_cost;
6341
6342                 cost_qual_eval_node(&index_qual_cost, qinfo->other_operand, root);
6343                 qual_arg_cost += index_qual_cost.startup + index_qual_cost.per_tuple;
6344         }
6345         return qual_arg_cost;
6346 }
6347
6348 /*
6349  * Get other-operand eval cost for an index orderby list.
6350  *
6351  * Index orderby expressions aren't represented as RestrictInfos (since they
6352  * aren't boolean, usually).  So we can't apply deconstruct_indexquals to
6353  * them.  However, they are much simpler to deal with since they are always
6354  * OpExprs and the index column is always on the left.
6355  */
6356 static Cost
6357 orderby_operands_eval_cost(PlannerInfo *root, IndexPath *path)
6358 {
6359         Cost            qual_arg_cost = 0;
6360         ListCell   *lc;
6361
6362         foreach(lc, path->indexorderbys)
6363         {
6364                 Expr       *clause = (Expr *) lfirst(lc);
6365                 Node       *other_operand;
6366                 QualCost        index_qual_cost;
6367
6368                 if (IsA(clause, OpExpr))
6369                 {
6370                         other_operand = get_rightop(clause);
6371                 }
6372                 else
6373                 {
6374                         elog(ERROR, "unsupported indexorderby type: %d",
6375                                  (int) nodeTag(clause));
6376                         other_operand = NULL;           /* keep compiler quiet */
6377                 }
6378
6379                 cost_qual_eval_node(&index_qual_cost, other_operand, root);
6380                 qual_arg_cost += index_qual_cost.startup + index_qual_cost.per_tuple;
6381         }
6382         return qual_arg_cost;
6383 }
6384
6385 void
6386 genericcostestimate(PlannerInfo *root,
6387                                         IndexPath *path,
6388                                         double loop_count,
6389                                         List *qinfos,
6390                                         GenericCosts *costs)
6391 {
6392         IndexOptInfo *index = path->indexinfo;
6393         List       *indexQuals = path->indexquals;
6394         List       *indexOrderBys = path->indexorderbys;
6395         Cost            indexStartupCost;
6396         Cost            indexTotalCost;
6397         Selectivity indexSelectivity;
6398         double          indexCorrelation;
6399         double          numIndexPages;
6400         double          numIndexTuples;
6401         double          spc_random_page_cost;
6402         double          num_sa_scans;
6403         double          num_outer_scans;
6404         double          num_scans;
6405         double          qual_op_cost;
6406         double          qual_arg_cost;
6407         List       *selectivityQuals;
6408         ListCell   *l;
6409
6410         /*
6411          * If the index is partial, AND the index predicate with the explicitly
6412          * given indexquals to produce a more accurate idea of the index
6413          * selectivity.
6414          */
6415         selectivityQuals = add_predicate_to_quals(index, indexQuals);
6416
6417         /*
6418          * Check for ScalarArrayOpExpr index quals, and estimate the number of
6419          * index scans that will be performed.
6420          */
6421         num_sa_scans = 1;
6422         foreach(l, indexQuals)
6423         {
6424                 RestrictInfo *rinfo = (RestrictInfo *) lfirst(l);
6425
6426                 if (IsA(rinfo->clause, ScalarArrayOpExpr))
6427                 {
6428                         ScalarArrayOpExpr *saop = (ScalarArrayOpExpr *) rinfo->clause;
6429                         int                     alength = estimate_array_length(lsecond(saop->args));
6430
6431                         if (alength > 1)
6432                                 num_sa_scans *= alength;
6433                 }
6434         }
6435
6436         /* Estimate the fraction of main-table tuples that will be visited */
6437         indexSelectivity = clauselist_selectivity(root, selectivityQuals,
6438                                                                                           index->rel->relid,
6439                                                                                           JOIN_INNER,
6440                                                                                           NULL);
6441
6442         /*
6443          * If caller didn't give us an estimate, estimate the number of index
6444          * tuples that will be visited.  We do it in this rather peculiar-looking
6445          * way in order to get the right answer for partial indexes.
6446          */
6447         numIndexTuples = costs->numIndexTuples;
6448         if (numIndexTuples <= 0.0)
6449         {
6450                 numIndexTuples = indexSelectivity * index->rel->tuples;
6451
6452                 /*
6453                  * The above calculation counts all the tuples visited across all
6454                  * scans induced by ScalarArrayOpExpr nodes.  We want to consider the
6455                  * average per-indexscan number, so adjust.  This is a handy place to
6456                  * round to integer, too.  (If caller supplied tuple estimate, it's
6457                  * responsible for handling these considerations.)
6458                  */
6459                 numIndexTuples = rint(numIndexTuples / num_sa_scans);
6460         }
6461
6462         /*
6463          * We can bound the number of tuples by the index size in any case. Also,
6464          * always estimate at least one tuple is touched, even when
6465          * indexSelectivity estimate is tiny.
6466          */
6467         if (numIndexTuples > index->tuples)
6468                 numIndexTuples = index->tuples;
6469         if (numIndexTuples < 1.0)
6470                 numIndexTuples = 1.0;
6471
6472         /*
6473          * Estimate the number of index pages that will be retrieved.
6474          *
6475          * We use the simplistic method of taking a pro-rata fraction of the total
6476          * number of index pages.  In effect, this counts only leaf pages and not
6477          * any overhead such as index metapage or upper tree levels.
6478          *
6479          * In practice access to upper index levels is often nearly free because
6480          * those tend to stay in cache under load; moreover, the cost involved is
6481          * highly dependent on index type.  We therefore ignore such costs here
6482          * and leave it to the caller to add a suitable charge if needed.
6483          */
6484         if (index->pages > 1 && index->tuples > 1)
6485                 numIndexPages = ceil(numIndexTuples * index->pages / index->tuples);
6486         else
6487                 numIndexPages = 1.0;
6488
6489         /* fetch estimated page cost for tablespace containing index */
6490         get_tablespace_page_costs(index->reltablespace,
6491                                                           &spc_random_page_cost,
6492                                                           NULL);
6493
6494         /*
6495          * Now compute the disk access costs.
6496          *
6497          * The above calculations are all per-index-scan.  However, if we are in a
6498          * nestloop inner scan, we can expect the scan to be repeated (with
6499          * different search keys) for each row of the outer relation.  Likewise,
6500          * ScalarArrayOpExpr quals result in multiple index scans.  This creates
6501          * the potential for cache effects to reduce the number of disk page
6502          * fetches needed.  We want to estimate the average per-scan I/O cost in
6503          * the presence of caching.
6504          *
6505          * We use the Mackert-Lohman formula (see costsize.c for details) to
6506          * estimate the total number of page fetches that occur.  While this
6507          * wasn't what it was designed for, it seems a reasonable model anyway.
6508          * Note that we are counting pages not tuples anymore, so we take N = T =
6509          * index size, as if there were one "tuple" per page.
6510          */
6511         num_outer_scans = loop_count;
6512         num_scans = num_sa_scans * num_outer_scans;
6513
6514         if (num_scans > 1)
6515         {
6516                 double          pages_fetched;
6517
6518                 /* total page fetches ignoring cache effects */
6519                 pages_fetched = numIndexPages * num_scans;
6520
6521                 /* use Mackert and Lohman formula to adjust for cache effects */
6522                 pages_fetched = index_pages_fetched(pages_fetched,
6523                                                                                         index->pages,
6524                                                                                         (double) index->pages,
6525                                                                                         root);
6526
6527                 /*
6528                  * Now compute the total disk access cost, and then report a pro-rated
6529                  * share for each outer scan.  (Don't pro-rate for ScalarArrayOpExpr,
6530                  * since that's internal to the indexscan.)
6531                  */
6532                 indexTotalCost = (pages_fetched * spc_random_page_cost)
6533                         / num_outer_scans;
6534         }
6535         else
6536         {
6537                 /*
6538                  * For a single index scan, we just charge spc_random_page_cost per
6539                  * page touched.
6540                  */
6541                 indexTotalCost = numIndexPages * spc_random_page_cost;
6542         }
6543
6544         /*
6545          * CPU cost: any complex expressions in the indexquals will need to be
6546          * evaluated once at the start of the scan to reduce them to runtime keys
6547          * to pass to the index AM (see nodeIndexscan.c).  We model the per-tuple
6548          * CPU costs as cpu_index_tuple_cost plus one cpu_operator_cost per
6549          * indexqual operator.  Because we have numIndexTuples as a per-scan
6550          * number, we have to multiply by num_sa_scans to get the correct result
6551          * for ScalarArrayOpExpr cases.  Similarly add in costs for any index
6552          * ORDER BY expressions.
6553          *
6554          * Note: this neglects the possible costs of rechecking lossy operators.
6555          * Detecting that that might be needed seems more expensive than it's
6556          * worth, though, considering all the other inaccuracies here ...
6557          */
6558         qual_arg_cost = other_operands_eval_cost(root, qinfos) +
6559                 orderby_operands_eval_cost(root, path);
6560         qual_op_cost = cpu_operator_cost *
6561                 (list_length(indexQuals) + list_length(indexOrderBys));
6562
6563         indexStartupCost = qual_arg_cost;
6564         indexTotalCost += qual_arg_cost;
6565         indexTotalCost += numIndexTuples * num_sa_scans * (cpu_index_tuple_cost + qual_op_cost);
6566
6567         /*
6568          * Generic assumption about index correlation: there isn't any.
6569          */
6570         indexCorrelation = 0.0;
6571
6572         /*
6573          * Return everything to caller.
6574          */
6575         costs->indexStartupCost = indexStartupCost;
6576         costs->indexTotalCost = indexTotalCost;
6577         costs->indexSelectivity = indexSelectivity;
6578         costs->indexCorrelation = indexCorrelation;
6579         costs->numIndexPages = numIndexPages;
6580         costs->numIndexTuples = numIndexTuples;
6581         costs->spc_random_page_cost = spc_random_page_cost;
6582         costs->num_sa_scans = num_sa_scans;
6583 }
6584
6585 /*
6586  * If the index is partial, add its predicate to the given qual list.
6587  *
6588  * ANDing the index predicate with the explicitly given indexquals produces
6589  * a more accurate idea of the index's selectivity.  However, we need to be
6590  * careful not to insert redundant clauses, because clauselist_selectivity()
6591  * is easily fooled into computing a too-low selectivity estimate.  Our
6592  * approach is to add only the predicate clause(s) that cannot be proven to
6593  * be implied by the given indexquals.  This successfully handles cases such
6594  * as a qual "x = 42" used with a partial index "WHERE x >= 40 AND x < 50".
6595  * There are many other cases where we won't detect redundancy, leading to a
6596  * too-low selectivity estimate, which will bias the system in favor of using
6597  * partial indexes where possible.  That is not necessarily bad though.
6598  *
6599  * Note that indexQuals contains RestrictInfo nodes while the indpred
6600  * does not, so the output list will be mixed.  This is OK for both
6601  * predicate_implied_by() and clauselist_selectivity(), but might be
6602  * problematic if the result were passed to other things.
6603  */
6604 static List *
6605 add_predicate_to_quals(IndexOptInfo *index, List *indexQuals)
6606 {
6607         List       *predExtraQuals = NIL;
6608         ListCell   *lc;
6609
6610         if (index->indpred == NIL)
6611                 return indexQuals;
6612
6613         foreach(lc, index->indpred)
6614         {
6615                 Node       *predQual = (Node *) lfirst(lc);
6616                 List       *oneQual = list_make1(predQual);
6617
6618                 if (!predicate_implied_by(oneQual, indexQuals))
6619                         predExtraQuals = list_concat(predExtraQuals, oneQual);
6620         }
6621         /* list_concat avoids modifying the passed-in indexQuals list */
6622         return list_concat(predExtraQuals, indexQuals);
6623 }
6624
6625
6626 void
6627 btcostestimate(PlannerInfo *root, IndexPath *path, double loop_count,
6628                            Cost *indexStartupCost, Cost *indexTotalCost,
6629                            Selectivity *indexSelectivity, double *indexCorrelation,
6630                            double *indexPages)
6631 {
6632         IndexOptInfo *index = path->indexinfo;
6633         List       *qinfos;
6634         GenericCosts costs;
6635         Oid                     relid;
6636         AttrNumber      colnum;
6637         VariableStatData vardata;
6638         double          numIndexTuples;
6639         Cost            descentCost;
6640         List       *indexBoundQuals;
6641         int                     indexcol;
6642         bool            eqQualHere;
6643         bool            found_saop;
6644         bool            found_is_null_op;
6645         double          num_sa_scans;
6646         ListCell   *lc;
6647
6648         /* Do preliminary analysis of indexquals */
6649         qinfos = deconstruct_indexquals(path);
6650
6651         /*
6652          * For a btree scan, only leading '=' quals plus inequality quals for the
6653          * immediately next attribute contribute to index selectivity (these are
6654          * the "boundary quals" that determine the starting and stopping points of
6655          * the index scan).  Additional quals can suppress visits to the heap, so
6656          * it's OK to count them in indexSelectivity, but they should not count
6657          * for estimating numIndexTuples.  So we must examine the given indexquals
6658          * to find out which ones count as boundary quals.  We rely on the
6659          * knowledge that they are given in index column order.
6660          *
6661          * For a RowCompareExpr, we consider only the first column, just as
6662          * rowcomparesel() does.
6663          *
6664          * If there's a ScalarArrayOpExpr in the quals, we'll actually perform N
6665          * index scans not one, but the ScalarArrayOpExpr's operator can be
6666          * considered to act the same as it normally does.
6667          */
6668         indexBoundQuals = NIL;
6669         indexcol = 0;
6670         eqQualHere = false;
6671         found_saop = false;
6672         found_is_null_op = false;
6673         num_sa_scans = 1;
6674         foreach(lc, qinfos)
6675         {
6676                 IndexQualInfo *qinfo = (IndexQualInfo *) lfirst(lc);
6677                 RestrictInfo *rinfo = qinfo->rinfo;
6678                 Expr       *clause = rinfo->clause;
6679                 Oid                     clause_op;
6680                 int                     op_strategy;
6681
6682                 if (indexcol != qinfo->indexcol)
6683                 {
6684                         /* Beginning of a new column's quals */
6685                         if (!eqQualHere)
6686                                 break;                  /* done if no '=' qual for indexcol */
6687                         eqQualHere = false;
6688                         indexcol++;
6689                         if (indexcol != qinfo->indexcol)
6690                                 break;                  /* no quals at all for indexcol */
6691                 }
6692
6693                 if (IsA(clause, ScalarArrayOpExpr))
6694                 {
6695                         int                     alength = estimate_array_length(qinfo->other_operand);
6696
6697                         found_saop = true;
6698                         /* count up number of SA scans induced by indexBoundQuals only */
6699                         if (alength > 1)
6700                                 num_sa_scans *= alength;
6701                 }
6702                 else if (IsA(clause, NullTest))
6703                 {
6704                         NullTest   *nt = (NullTest *) clause;
6705
6706                         if (nt->nulltesttype == IS_NULL)
6707                         {
6708                                 found_is_null_op = true;
6709                                 /* IS NULL is like = for selectivity determination purposes */
6710                                 eqQualHere = true;
6711                         }
6712                 }
6713
6714                 /*
6715                  * We would need to commute the clause_op if not varonleft, except
6716                  * that we only care if it's equality or not, so that refinement is
6717                  * unnecessary.
6718                  */
6719                 clause_op = qinfo->clause_op;
6720
6721                 /* check for equality operator */
6722                 if (OidIsValid(clause_op))
6723                 {
6724                         op_strategy = get_op_opfamily_strategy(clause_op,
6725                                                                                                    index->opfamily[indexcol]);
6726                         Assert(op_strategy != 0);       /* not a member of opfamily?? */
6727                         if (op_strategy == BTEqualStrategyNumber)
6728                                 eqQualHere = true;
6729                 }
6730
6731                 indexBoundQuals = lappend(indexBoundQuals, rinfo);
6732         }
6733
6734         /*
6735          * If index is unique and we found an '=' clause for each column, we can
6736          * just assume numIndexTuples = 1 and skip the expensive
6737          * clauselist_selectivity calculations.  However, a ScalarArrayOp or
6738          * NullTest invalidates that theory, even though it sets eqQualHere.
6739          */
6740         if (index->unique &&
6741                 indexcol == index->ncolumns - 1 &&
6742                 eqQualHere &&
6743                 !found_saop &&
6744                 !found_is_null_op)
6745                 numIndexTuples = 1.0;
6746         else
6747         {
6748                 List       *selectivityQuals;
6749                 Selectivity btreeSelectivity;
6750
6751                 /*
6752                  * If the index is partial, AND the index predicate with the
6753                  * index-bound quals to produce a more accurate idea of the number of
6754                  * rows covered by the bound conditions.
6755                  */
6756                 selectivityQuals = add_predicate_to_quals(index, indexBoundQuals);
6757
6758                 btreeSelectivity = clauselist_selectivity(root, selectivityQuals,
6759                                                                                                   index->rel->relid,
6760                                                                                                   JOIN_INNER,
6761                                                                                                   NULL);
6762                 numIndexTuples = btreeSelectivity * index->rel->tuples;
6763
6764                 /*
6765                  * As in genericcostestimate(), we have to adjust for any
6766                  * ScalarArrayOpExpr quals included in indexBoundQuals, and then round
6767                  * to integer.
6768                  */
6769                 numIndexTuples = rint(numIndexTuples / num_sa_scans);
6770         }
6771
6772         /*
6773          * Now do generic index cost estimation.
6774          */
6775         MemSet(&costs, 0, sizeof(costs));
6776         costs.numIndexTuples = numIndexTuples;
6777
6778         genericcostestimate(root, path, loop_count, qinfos, &costs);
6779
6780         /*
6781          * Add a CPU-cost component to represent the costs of initial btree
6782          * descent.  We don't charge any I/O cost for touching upper btree levels,
6783          * since they tend to stay in cache, but we still have to do about log2(N)
6784          * comparisons to descend a btree of N leaf tuples.  We charge one
6785          * cpu_operator_cost per comparison.
6786          *
6787          * If there are ScalarArrayOpExprs, charge this once per SA scan.  The
6788          * ones after the first one are not startup cost so far as the overall
6789          * plan is concerned, so add them only to "total" cost.
6790          */
6791         if (index->tuples > 1)          /* avoid computing log(0) */
6792         {
6793                 descentCost = ceil(log(index->tuples) / log(2.0)) * cpu_operator_cost;
6794                 costs.indexStartupCost += descentCost;
6795                 costs.indexTotalCost += costs.num_sa_scans * descentCost;
6796         }
6797
6798         /*
6799          * Even though we're not charging I/O cost for touching upper btree pages,
6800          * it's still reasonable to charge some CPU cost per page descended
6801          * through.  Moreover, if we had no such charge at all, bloated indexes
6802          * would appear to have the same search cost as unbloated ones, at least
6803          * in cases where only a single leaf page is expected to be visited.  This
6804          * cost is somewhat arbitrarily set at 50x cpu_operator_cost per page
6805          * touched.  The number of such pages is btree tree height plus one (ie,
6806          * we charge for the leaf page too).  As above, charge once per SA scan.
6807          */
6808         descentCost = (index->tree_height + 1) * 50.0 * cpu_operator_cost;
6809         costs.indexStartupCost += descentCost;
6810         costs.indexTotalCost += costs.num_sa_scans * descentCost;
6811
6812         /*
6813          * If we can get an estimate of the first column's ordering correlation C
6814          * from pg_statistic, estimate the index correlation as C for a
6815          * single-column index, or C * 0.75 for multiple columns. (The idea here
6816          * is that multiple columns dilute the importance of the first column's
6817          * ordering, but don't negate it entirely.  Before 8.0 we divided the
6818          * correlation by the number of columns, but that seems too strong.)
6819          */
6820         MemSet(&vardata, 0, sizeof(vardata));
6821
6822         if (index->indexkeys[0] != 0)
6823         {
6824                 /* Simple variable --- look to stats for the underlying table */
6825                 RangeTblEntry *rte = planner_rt_fetch(index->rel->relid, root);
6826
6827                 Assert(rte->rtekind == RTE_RELATION);
6828                 relid = rte->relid;
6829                 Assert(relid != InvalidOid);
6830                 colnum = index->indexkeys[0];
6831
6832                 if (get_relation_stats_hook &&
6833                         (*get_relation_stats_hook) (root, rte, colnum, &vardata))
6834                 {
6835                         /*
6836                          * The hook took control of acquiring a stats tuple.  If it did
6837                          * supply a tuple, it'd better have supplied a freefunc.
6838                          */
6839                         if (HeapTupleIsValid(vardata.statsTuple) &&
6840                                 !vardata.freefunc)
6841                                 elog(ERROR, "no function provided to release variable stats with");
6842                 }
6843                 else
6844                 {
6845                         vardata.statsTuple = SearchSysCache3(STATRELATTINH,
6846                                                                                                  ObjectIdGetDatum(relid),
6847                                                                                                  Int16GetDatum(colnum),
6848                                                                                                  BoolGetDatum(rte->inh));
6849                         vardata.freefunc = ReleaseSysCache;
6850                 }
6851         }
6852         else
6853         {
6854                 /* Expression --- maybe there are stats for the index itself */
6855                 relid = index->indexoid;
6856                 colnum = 1;
6857
6858                 if (get_index_stats_hook &&
6859                         (*get_index_stats_hook) (root, relid, colnum, &vardata))
6860                 {
6861                         /*
6862                          * The hook took control of acquiring a stats tuple.  If it did
6863                          * supply a tuple, it'd better have supplied a freefunc.
6864                          */
6865                         if (HeapTupleIsValid(vardata.statsTuple) &&
6866                                 !vardata.freefunc)
6867                                 elog(ERROR, "no function provided to release variable stats with");
6868                 }
6869                 else
6870                 {
6871                         vardata.statsTuple = SearchSysCache3(STATRELATTINH,
6872                                                                                                  ObjectIdGetDatum(relid),
6873                                                                                                  Int16GetDatum(colnum),
6874                                                                                                  BoolGetDatum(false));
6875                         vardata.freefunc = ReleaseSysCache;
6876                 }
6877         }
6878
6879         if (HeapTupleIsValid(vardata.statsTuple))
6880         {
6881                 Oid                     sortop;
6882                 float4     *numbers;
6883                 int                     nnumbers;
6884
6885                 sortop = get_opfamily_member(index->opfamily[0],
6886                                                                          index->opcintype[0],
6887                                                                          index->opcintype[0],
6888                                                                          BTLessStrategyNumber);
6889                 if (OidIsValid(sortop) &&
6890                         get_attstatsslot(vardata.statsTuple, InvalidOid, 0,
6891                                                          STATISTIC_KIND_CORRELATION,
6892                                                          sortop,
6893                                                          NULL,
6894                                                          NULL, NULL,
6895                                                          &numbers, &nnumbers))
6896                 {
6897                         double          varCorrelation;
6898
6899                         Assert(nnumbers == 1);
6900                         varCorrelation = numbers[0];
6901
6902                         if (index->reverse_sort[0])
6903                                 varCorrelation = -varCorrelation;
6904
6905                         if (index->ncolumns > 1)
6906                                 costs.indexCorrelation = varCorrelation * 0.75;
6907                         else
6908                                 costs.indexCorrelation = varCorrelation;
6909
6910                         free_attstatsslot(InvalidOid, NULL, 0, numbers, nnumbers);
6911                 }
6912         }
6913
6914         ReleaseVariableStats(vardata);
6915
6916         *indexStartupCost = costs.indexStartupCost;
6917         *indexTotalCost = costs.indexTotalCost;
6918         *indexSelectivity = costs.indexSelectivity;
6919         *indexCorrelation = costs.indexCorrelation;
6920         *indexPages = costs.numIndexPages;
6921 }
6922
6923 void
6924 hashcostestimate(PlannerInfo *root, IndexPath *path, double loop_count,
6925                                  Cost *indexStartupCost, Cost *indexTotalCost,
6926                                  Selectivity *indexSelectivity, double *indexCorrelation,
6927                                  double *indexPages)
6928 {
6929         List       *qinfos;
6930         GenericCosts costs;
6931
6932         /* Do preliminary analysis of indexquals */
6933         qinfos = deconstruct_indexquals(path);
6934
6935         MemSet(&costs, 0, sizeof(costs));
6936
6937         genericcostestimate(root, path, loop_count, qinfos, &costs);
6938
6939         /*
6940          * A hash index has no descent costs as such, since the index AM can go
6941          * directly to the target bucket after computing the hash value.  There
6942          * are a couple of other hash-specific costs that we could conceivably add
6943          * here, though:
6944          *
6945          * Ideally we'd charge spc_random_page_cost for each page in the target
6946          * bucket, not just the numIndexPages pages that genericcostestimate
6947          * thought we'd visit.  However in most cases we don't know which bucket
6948          * that will be.  There's no point in considering the average bucket size
6949          * because the hash AM makes sure that's always one page.
6950          *
6951          * Likewise, we could consider charging some CPU for each index tuple in
6952          * the bucket, if we knew how many there were.  But the per-tuple cost is
6953          * just a hash value comparison, not a general datatype-dependent
6954          * comparison, so any such charge ought to be quite a bit less than
6955          * cpu_operator_cost; which makes it probably not worth worrying about.
6956          *
6957          * A bigger issue is that chance hash-value collisions will result in
6958          * wasted probes into the heap.  We don't currently attempt to model this
6959          * cost on the grounds that it's rare, but maybe it's not rare enough.
6960          * (Any fix for this ought to consider the generic lossy-operator problem,
6961          * though; it's not entirely hash-specific.)
6962          */
6963
6964         *indexStartupCost = costs.indexStartupCost;
6965         *indexTotalCost = costs.indexTotalCost;
6966         *indexSelectivity = costs.indexSelectivity;
6967         *indexCorrelation = costs.indexCorrelation;
6968         *indexPages = costs.numIndexPages;
6969 }
6970
6971 void
6972 gistcostestimate(PlannerInfo *root, IndexPath *path, double loop_count,
6973                                  Cost *indexStartupCost, Cost *indexTotalCost,
6974                                  Selectivity *indexSelectivity, double *indexCorrelation,
6975                                  double *indexPages)
6976 {
6977         IndexOptInfo *index = path->indexinfo;
6978         List       *qinfos;
6979         GenericCosts costs;
6980         Cost            descentCost;
6981
6982         /* Do preliminary analysis of indexquals */
6983         qinfos = deconstruct_indexquals(path);
6984
6985         MemSet(&costs, 0, sizeof(costs));
6986
6987         genericcostestimate(root, path, loop_count, qinfos, &costs);
6988
6989         /*
6990          * We model index descent costs similarly to those for btree, but to do
6991          * that we first need an idea of the tree height.  We somewhat arbitrarily
6992          * assume that the fanout is 100, meaning the tree height is at most
6993          * log100(index->pages).
6994          *
6995          * Although this computation isn't really expensive enough to require
6996          * caching, we might as well use index->tree_height to cache it.
6997          */
6998         if (index->tree_height < 0) /* unknown? */
6999         {
7000                 if (index->pages > 1)   /* avoid computing log(0) */
7001                         index->tree_height = (int) (log(index->pages) / log(100.0));
7002                 else
7003                         index->tree_height = 0;
7004         }
7005
7006         /*
7007          * Add a CPU-cost component to represent the costs of initial descent. We
7008          * just use log(N) here not log2(N) since the branching factor isn't
7009          * necessarily two anyway.  As for btree, charge once per SA scan.
7010          */
7011         if (index->tuples > 1)          /* avoid computing log(0) */
7012         {
7013                 descentCost = ceil(log(index->tuples)) * cpu_operator_cost;
7014                 costs.indexStartupCost += descentCost;
7015                 costs.indexTotalCost += costs.num_sa_scans * descentCost;
7016         }
7017
7018         /*
7019          * Likewise add a per-page charge, calculated the same as for btrees.
7020          */
7021         descentCost = (index->tree_height + 1) * 50.0 * cpu_operator_cost;
7022         costs.indexStartupCost += descentCost;
7023         costs.indexTotalCost += costs.num_sa_scans * descentCost;
7024
7025         *indexStartupCost = costs.indexStartupCost;
7026         *indexTotalCost = costs.indexTotalCost;
7027         *indexSelectivity = costs.indexSelectivity;
7028         *indexCorrelation = costs.indexCorrelation;
7029         *indexPages = costs.numIndexPages;
7030 }
7031
7032 void
7033 spgcostestimate(PlannerInfo *root, IndexPath *path, double loop_count,
7034                                 Cost *indexStartupCost, Cost *indexTotalCost,
7035                                 Selectivity *indexSelectivity, double *indexCorrelation,
7036                                 double *indexPages)
7037 {
7038         IndexOptInfo *index = path->indexinfo;
7039         List       *qinfos;
7040         GenericCosts costs;
7041         Cost            descentCost;
7042
7043         /* Do preliminary analysis of indexquals */
7044         qinfos = deconstruct_indexquals(path);
7045
7046         MemSet(&costs, 0, sizeof(costs));
7047
7048         genericcostestimate(root, path, loop_count, qinfos, &costs);
7049
7050         /*
7051          * We model index descent costs similarly to those for btree, but to do
7052          * that we first need an idea of the tree height.  We somewhat arbitrarily
7053          * assume that the fanout is 100, meaning the tree height is at most
7054          * log100(index->pages).
7055          *
7056          * Although this computation isn't really expensive enough to require
7057          * caching, we might as well use index->tree_height to cache it.
7058          */
7059         if (index->tree_height < 0) /* unknown? */
7060         {
7061                 if (index->pages > 1)   /* avoid computing log(0) */
7062                         index->tree_height = (int) (log(index->pages) / log(100.0));
7063                 else
7064                         index->tree_height = 0;
7065         }
7066
7067         /*
7068          * Add a CPU-cost component to represent the costs of initial descent. We
7069          * just use log(N) here not log2(N) since the branching factor isn't
7070          * necessarily two anyway.  As for btree, charge once per SA scan.
7071          */
7072         if (index->tuples > 1)          /* avoid computing log(0) */
7073         {
7074                 descentCost = ceil(log(index->tuples)) * cpu_operator_cost;
7075                 costs.indexStartupCost += descentCost;
7076                 costs.indexTotalCost += costs.num_sa_scans * descentCost;
7077         }
7078
7079         /*
7080          * Likewise add a per-page charge, calculated the same as for btrees.
7081          */
7082         descentCost = (index->tree_height + 1) * 50.0 * cpu_operator_cost;
7083         costs.indexStartupCost += descentCost;
7084         costs.indexTotalCost += costs.num_sa_scans * descentCost;
7085
7086         *indexStartupCost = costs.indexStartupCost;
7087         *indexTotalCost = costs.indexTotalCost;
7088         *indexSelectivity = costs.indexSelectivity;
7089         *indexCorrelation = costs.indexCorrelation;
7090         *indexPages = costs.numIndexPages;
7091 }
7092
7093
7094 /*
7095  * Support routines for gincostestimate
7096  */
7097
7098 typedef struct
7099 {
7100         bool            haveFullScan;
7101         double          partialEntries;
7102         double          exactEntries;
7103         double          searchEntries;
7104         double          arrayScans;
7105 } GinQualCounts;
7106
7107 /*
7108  * Estimate the number of index terms that need to be searched for while
7109  * testing the given GIN query, and increment the counts in *counts
7110  * appropriately.  If the query is unsatisfiable, return false.
7111  */
7112 static bool
7113 gincost_pattern(IndexOptInfo *index, int indexcol,
7114                                 Oid clause_op, Datum query,
7115                                 GinQualCounts *counts)
7116 {
7117         Oid                     extractProcOid;
7118         Oid                     collation;
7119         int                     strategy_op;
7120         Oid                     lefttype,
7121                                 righttype;
7122         int32           nentries = 0;
7123         bool       *partial_matches = NULL;
7124         Pointer    *extra_data = NULL;
7125         bool       *nullFlags = NULL;
7126         int32           searchMode = GIN_SEARCH_MODE_DEFAULT;
7127         int32           i;
7128
7129         /*
7130          * Get the operator's strategy number and declared input data types within
7131          * the index opfamily.  (We don't need the latter, but we use
7132          * get_op_opfamily_properties because it will throw error if it fails to
7133          * find a matching pg_amop entry.)
7134          */
7135         get_op_opfamily_properties(clause_op, index->opfamily[indexcol], false,
7136                                                            &strategy_op, &lefttype, &righttype);
7137
7138         /*
7139          * GIN always uses the "default" support functions, which are those with
7140          * lefttype == righttype == the opclass' opcintype (see
7141          * IndexSupportInitialize in relcache.c).
7142          */
7143         extractProcOid = get_opfamily_proc(index->opfamily[indexcol],
7144                                                                            index->opcintype[indexcol],
7145                                                                            index->opcintype[indexcol],
7146                                                                            GIN_EXTRACTQUERY_PROC);
7147
7148         if (!OidIsValid(extractProcOid))
7149         {
7150                 /* should not happen; throw same error as index_getprocinfo */
7151                 elog(ERROR, "missing support function %d for attribute %d of index \"%s\"",
7152                          GIN_EXTRACTQUERY_PROC, indexcol + 1,
7153                          get_rel_name(index->indexoid));
7154         }
7155
7156         /*
7157          * Choose collation to pass to extractProc (should match initGinState).
7158          */
7159         if (OidIsValid(index->indexcollations[indexcol]))
7160                 collation = index->indexcollations[indexcol];
7161         else
7162                 collation = DEFAULT_COLLATION_OID;
7163
7164         OidFunctionCall7Coll(extractProcOid,
7165                                                  collation,
7166                                                  query,
7167                                                  PointerGetDatum(&nentries),
7168                                                  UInt16GetDatum(strategy_op),
7169                                                  PointerGetDatum(&partial_matches),
7170                                                  PointerGetDatum(&extra_data),
7171                                                  PointerGetDatum(&nullFlags),
7172                                                  PointerGetDatum(&searchMode));
7173
7174         if (nentries <= 0 && searchMode == GIN_SEARCH_MODE_DEFAULT)
7175         {
7176                 /* No match is possible */
7177                 return false;
7178         }
7179
7180         for (i = 0; i < nentries; i++)
7181         {
7182                 /*
7183                  * For partial match we haven't any information to estimate number of
7184                  * matched entries in index, so, we just estimate it as 100
7185                  */
7186                 if (partial_matches && partial_matches[i])
7187                         counts->partialEntries += 100;
7188                 else
7189                         counts->exactEntries++;
7190
7191                 counts->searchEntries++;
7192         }
7193
7194         if (searchMode == GIN_SEARCH_MODE_INCLUDE_EMPTY)
7195         {
7196                 /* Treat "include empty" like an exact-match item */
7197                 counts->exactEntries++;
7198                 counts->searchEntries++;
7199         }
7200         else if (searchMode != GIN_SEARCH_MODE_DEFAULT)
7201         {
7202                 /* It's GIN_SEARCH_MODE_ALL */
7203                 counts->haveFullScan = true;
7204         }
7205
7206         return true;
7207 }
7208
7209 /*
7210  * Estimate the number of index terms that need to be searched for while
7211  * testing the given GIN index clause, and increment the counts in *counts
7212  * appropriately.  If the query is unsatisfiable, return false.
7213  */
7214 static bool
7215 gincost_opexpr(PlannerInfo *root,
7216                            IndexOptInfo *index,
7217                            IndexQualInfo *qinfo,
7218                            GinQualCounts *counts)
7219 {
7220         int                     indexcol = qinfo->indexcol;
7221         Oid                     clause_op = qinfo->clause_op;
7222         Node       *operand = qinfo->other_operand;
7223
7224         if (!qinfo->varonleft)
7225         {
7226                 /* must commute the operator */
7227                 clause_op = get_commutator(clause_op);
7228         }
7229
7230         /* aggressively reduce to a constant, and look through relabeling */
7231         operand = estimate_expression_value(root, operand);
7232
7233         if (IsA(operand, RelabelType))
7234                 operand = (Node *) ((RelabelType *) operand)->arg;
7235
7236         /*
7237          * It's impossible to call extractQuery method for unknown operand. So
7238          * unless operand is a Const we can't do much; just assume there will be
7239          * one ordinary search entry from the operand at runtime.
7240          */
7241         if (!IsA(operand, Const))
7242         {
7243                 counts->exactEntries++;
7244                 counts->searchEntries++;
7245                 return true;
7246         }
7247
7248         /* If Const is null, there can be no matches */
7249         if (((Const *) operand)->constisnull)
7250                 return false;
7251
7252         /* Otherwise, apply extractQuery and get the actual term counts */
7253         return gincost_pattern(index, indexcol, clause_op,
7254                                                    ((Const *) operand)->constvalue,
7255                                                    counts);
7256 }
7257
7258 /*
7259  * Estimate the number of index terms that need to be searched for while
7260  * testing the given GIN index clause, and increment the counts in *counts
7261  * appropriately.  If the query is unsatisfiable, return false.
7262  *
7263  * A ScalarArrayOpExpr will give rise to N separate indexscans at runtime,
7264  * each of which involves one value from the RHS array, plus all the
7265  * non-array quals (if any).  To model this, we average the counts across
7266  * the RHS elements, and add the averages to the counts in *counts (which
7267  * correspond to per-indexscan costs).  We also multiply counts->arrayScans
7268  * by N, causing gincostestimate to scale up its estimates accordingly.
7269  */
7270 static bool
7271 gincost_scalararrayopexpr(PlannerInfo *root,
7272                                                   IndexOptInfo *index,
7273                                                   IndexQualInfo *qinfo,
7274                                                   double numIndexEntries,
7275                                                   GinQualCounts *counts)
7276 {
7277         int                     indexcol = qinfo->indexcol;
7278         Oid                     clause_op = qinfo->clause_op;
7279         Node       *rightop = qinfo->other_operand;
7280         ArrayType  *arrayval;
7281         int16           elmlen;
7282         bool            elmbyval;
7283         char            elmalign;
7284         int                     numElems;
7285         Datum      *elemValues;
7286         bool       *elemNulls;
7287         GinQualCounts arraycounts;
7288         int                     numPossible = 0;
7289         int                     i;
7290
7291         Assert(((ScalarArrayOpExpr *) qinfo->rinfo->clause)->useOr);
7292
7293         /* aggressively reduce to a constant, and look through relabeling */
7294         rightop = estimate_expression_value(root, rightop);
7295
7296         if (IsA(rightop, RelabelType))
7297                 rightop = (Node *) ((RelabelType *) rightop)->arg;
7298
7299         /*
7300          * It's impossible to call extractQuery method for unknown operand. So
7301          * unless operand is a Const we can't do much; just assume there will be
7302          * one ordinary search entry from each array entry at runtime, and fall
7303          * back on a probably-bad estimate of the number of array entries.
7304          */
7305         if (!IsA(rightop, Const))
7306         {
7307                 counts->exactEntries++;
7308                 counts->searchEntries++;
7309                 counts->arrayScans *= estimate_array_length(rightop);
7310                 return true;
7311         }
7312
7313         /* If Const is null, there can be no matches */
7314         if (((Const *) rightop)->constisnull)
7315                 return false;
7316
7317         /* Otherwise, extract the array elements and iterate over them */
7318         arrayval = DatumGetArrayTypeP(((Const *) rightop)->constvalue);
7319         get_typlenbyvalalign(ARR_ELEMTYPE(arrayval),
7320                                                  &elmlen, &elmbyval, &elmalign);
7321         deconstruct_array(arrayval,
7322                                           ARR_ELEMTYPE(arrayval),
7323                                           elmlen, elmbyval, elmalign,
7324                                           &elemValues, &elemNulls, &numElems);
7325
7326         memset(&arraycounts, 0, sizeof(arraycounts));
7327
7328         for (i = 0; i < numElems; i++)
7329         {
7330                 GinQualCounts elemcounts;
7331
7332                 /* NULL can't match anything, so ignore, as the executor will */
7333                 if (elemNulls[i])
7334                         continue;
7335
7336                 /* Otherwise, apply extractQuery and get the actual term counts */
7337                 memset(&elemcounts, 0, sizeof(elemcounts));
7338
7339                 if (gincost_pattern(index, indexcol, clause_op, elemValues[i],
7340                                                         &elemcounts))
7341                 {
7342                         /* We ignore array elements that are unsatisfiable patterns */
7343                         numPossible++;
7344
7345                         if (elemcounts.haveFullScan)
7346                         {
7347                                 /*
7348                                  * Full index scan will be required.  We treat this as if
7349                                  * every key in the index had been listed in the query; is
7350                                  * that reasonable?
7351                                  */
7352                                 elemcounts.partialEntries = 0;
7353                                 elemcounts.exactEntries = numIndexEntries;
7354                                 elemcounts.searchEntries = numIndexEntries;
7355                         }
7356                         arraycounts.partialEntries += elemcounts.partialEntries;
7357                         arraycounts.exactEntries += elemcounts.exactEntries;
7358                         arraycounts.searchEntries += elemcounts.searchEntries;
7359                 }
7360         }
7361
7362         if (numPossible == 0)
7363         {
7364                 /* No satisfiable patterns in the array */
7365                 return false;
7366         }
7367
7368         /*
7369          * Now add the averages to the global counts.  This will give us an
7370          * estimate of the average number of terms searched for in each indexscan,
7371          * including contributions from both array and non-array quals.
7372          */
7373         counts->partialEntries += arraycounts.partialEntries / numPossible;
7374         counts->exactEntries += arraycounts.exactEntries / numPossible;
7375         counts->searchEntries += arraycounts.searchEntries / numPossible;
7376
7377         counts->arrayScans *= numPossible;
7378
7379         return true;
7380 }
7381
7382 /*
7383  * GIN has search behavior completely different from other index types
7384  */
7385 void
7386 gincostestimate(PlannerInfo *root, IndexPath *path, double loop_count,
7387                                 Cost *indexStartupCost, Cost *indexTotalCost,
7388                                 Selectivity *indexSelectivity, double *indexCorrelation,
7389                                 double *indexPages)
7390 {
7391         IndexOptInfo *index = path->indexinfo;
7392         List       *indexQuals = path->indexquals;
7393         List       *indexOrderBys = path->indexorderbys;
7394         List       *qinfos;
7395         ListCell   *l;
7396         List       *selectivityQuals;
7397         double          numPages = index->pages,
7398                                 numTuples = index->tuples;
7399         double          numEntryPages,
7400                                 numDataPages,
7401                                 numPendingPages,
7402                                 numEntries;
7403         GinQualCounts counts;
7404         bool            matchPossible;
7405         double          partialScale;
7406         double          entryPagesFetched,
7407                                 dataPagesFetched,
7408                                 dataPagesFetchedBySel;
7409         double          qual_op_cost,
7410                                 qual_arg_cost,
7411                                 spc_random_page_cost,
7412                                 outer_scans;
7413         Relation        indexRel;
7414         GinStatsData ginStats;
7415
7416         /* Do preliminary analysis of indexquals */
7417         qinfos = deconstruct_indexquals(path);
7418
7419         /*
7420          * Obtain statistical information from the meta page, if possible.  Else
7421          * set ginStats to zeroes, and we'll cope below.
7422          */
7423         if (!index->hypothetical)
7424         {
7425                 indexRel = index_open(index->indexoid, AccessShareLock);
7426                 ginGetStats(indexRel, &ginStats);
7427                 index_close(indexRel, AccessShareLock);
7428         }
7429         else
7430         {
7431                 memset(&ginStats, 0, sizeof(ginStats));
7432         }
7433
7434         /*
7435          * Assuming we got valid (nonzero) stats at all, nPendingPages can be
7436          * trusted, but the other fields are data as of the last VACUUM.  We can
7437          * scale them up to account for growth since then, but that method only
7438          * goes so far; in the worst case, the stats might be for a completely
7439          * empty index, and scaling them will produce pretty bogus numbers.
7440          * Somewhat arbitrarily, set the cutoff for doing scaling at 4X growth; if
7441          * it's grown more than that, fall back to estimating things only from the
7442          * assumed-accurate index size.  But we'll trust nPendingPages in any case
7443          * so long as it's not clearly insane, ie, more than the index size.
7444          */
7445         if (ginStats.nPendingPages < numPages)
7446                 numPendingPages = ginStats.nPendingPages;
7447         else
7448                 numPendingPages = 0;
7449
7450         if (numPages > 0 && ginStats.nTotalPages <= numPages &&
7451                 ginStats.nTotalPages > numPages / 4 &&
7452                 ginStats.nEntryPages > 0 && ginStats.nEntries > 0)
7453         {
7454                 /*
7455                  * OK, the stats seem close enough to sane to be trusted.  But we
7456                  * still need to scale them by the ratio numPages / nTotalPages to
7457                  * account for growth since the last VACUUM.
7458                  */
7459                 double          scale = numPages / ginStats.nTotalPages;
7460
7461                 numEntryPages = ceil(ginStats.nEntryPages * scale);
7462                 numDataPages = ceil(ginStats.nDataPages * scale);
7463                 numEntries = ceil(ginStats.nEntries * scale);
7464                 /* ensure we didn't round up too much */
7465                 numEntryPages = Min(numEntryPages, numPages - numPendingPages);
7466                 numDataPages = Min(numDataPages,
7467                                                    numPages - numPendingPages - numEntryPages);
7468         }
7469         else
7470         {
7471                 /*
7472                  * We might get here because it's a hypothetical index, or an index
7473                  * created pre-9.1 and never vacuumed since upgrading (in which case
7474                  * its stats would read as zeroes), or just because it's grown too
7475                  * much since the last VACUUM for us to put our faith in scaling.
7476                  *
7477                  * Invent some plausible internal statistics based on the index page
7478                  * count (and clamp that to at least 10 pages, just in case).  We
7479                  * estimate that 90% of the index is entry pages, and the rest is data
7480                  * pages.  Estimate 100 entries per entry page; this is rather bogus
7481                  * since it'll depend on the size of the keys, but it's more robust
7482                  * than trying to predict the number of entries per heap tuple.
7483                  */
7484                 numPages = Max(numPages, 10);
7485                 numEntryPages = floor((numPages - numPendingPages) * 0.90);
7486                 numDataPages = numPages - numPendingPages - numEntryPages;
7487                 numEntries = floor(numEntryPages * 100);
7488         }
7489
7490         /* In an empty index, numEntries could be zero.  Avoid divide-by-zero */
7491         if (numEntries < 1)
7492                 numEntries = 1;
7493
7494         /*
7495          * Include predicate in selectivityQuals (should match
7496          * genericcostestimate)
7497          */
7498         if (index->indpred != NIL)
7499         {
7500                 List       *predExtraQuals = NIL;
7501
7502                 foreach(l, index->indpred)
7503                 {
7504                         Node       *predQual = (Node *) lfirst(l);
7505                         List       *oneQual = list_make1(predQual);
7506
7507                         if (!predicate_implied_by(oneQual, indexQuals))
7508                                 predExtraQuals = list_concat(predExtraQuals, oneQual);
7509                 }
7510                 /* list_concat avoids modifying the passed-in indexQuals list */
7511                 selectivityQuals = list_concat(predExtraQuals, indexQuals);
7512         }
7513         else
7514                 selectivityQuals = indexQuals;
7515
7516         /* Estimate the fraction of main-table tuples that will be visited */
7517         *indexSelectivity = clauselist_selectivity(root, selectivityQuals,
7518                                                                                            index->rel->relid,
7519                                                                                            JOIN_INNER,
7520                                                                                            NULL);
7521
7522         /* fetch estimated page cost for tablespace containing index */
7523         get_tablespace_page_costs(index->reltablespace,
7524                                                           &spc_random_page_cost,
7525                                                           NULL);
7526
7527         /*
7528          * Generic assumption about index correlation: there isn't any.
7529          */
7530         *indexCorrelation = 0.0;
7531
7532         /*
7533          * Examine quals to estimate number of search entries & partial matches
7534          */
7535         memset(&counts, 0, sizeof(counts));
7536         counts.arrayScans = 1;
7537         matchPossible = true;
7538
7539         foreach(l, qinfos)
7540         {
7541                 IndexQualInfo *qinfo = (IndexQualInfo *) lfirst(l);
7542                 Expr       *clause = qinfo->rinfo->clause;
7543
7544                 if (IsA(clause, OpExpr))
7545                 {
7546                         matchPossible = gincost_opexpr(root,
7547                                                                                    index,
7548                                                                                    qinfo,
7549                                                                                    &counts);
7550                         if (!matchPossible)
7551                                 break;
7552                 }
7553                 else if (IsA(clause, ScalarArrayOpExpr))
7554                 {
7555                         matchPossible = gincost_scalararrayopexpr(root,
7556                                                                                                           index,
7557                                                                                                           qinfo,
7558                                                                                                           numEntries,
7559                                                                                                           &counts);
7560                         if (!matchPossible)
7561                                 break;
7562                 }
7563                 else
7564                 {
7565                         /* shouldn't be anything else for a GIN index */
7566                         elog(ERROR, "unsupported GIN indexqual type: %d",
7567                                  (int) nodeTag(clause));
7568                 }
7569         }
7570
7571         /* Fall out if there were any provably-unsatisfiable quals */
7572         if (!matchPossible)
7573         {
7574                 *indexStartupCost = 0;
7575                 *indexTotalCost = 0;
7576                 *indexSelectivity = 0;
7577                 return;
7578         }
7579
7580         if (counts.haveFullScan || indexQuals == NIL)
7581         {
7582                 /*
7583                  * Full index scan will be required.  We treat this as if every key in
7584                  * the index had been listed in the query; is that reasonable?
7585                  */
7586                 counts.partialEntries = 0;
7587                 counts.exactEntries = numEntries;
7588                 counts.searchEntries = numEntries;
7589         }
7590
7591         /* Will we have more than one iteration of a nestloop scan? */
7592         outer_scans = loop_count;
7593
7594         /*
7595          * Compute cost to begin scan, first of all, pay attention to pending
7596          * list.
7597          */
7598         entryPagesFetched = numPendingPages;
7599
7600         /*
7601          * Estimate number of entry pages read.  We need to do
7602          * counts.searchEntries searches.  Use a power function as it should be,
7603          * but tuples on leaf pages usually is much greater. Here we include all
7604          * searches in entry tree, including search of first entry in partial
7605          * match algorithm
7606          */
7607         entryPagesFetched += ceil(counts.searchEntries * rint(pow(numEntryPages, 0.15)));
7608
7609         /*
7610          * Add an estimate of entry pages read by partial match algorithm. It's a
7611          * scan over leaf pages in entry tree.  We haven't any useful stats here,
7612          * so estimate it as proportion.  Because counts.partialEntries is really
7613          * pretty bogus (see code above), it's possible that it is more than
7614          * numEntries; clamp the proportion to ensure sanity.
7615          */
7616         partialScale = counts.partialEntries / numEntries;
7617         partialScale = Min(partialScale, 1.0);
7618
7619         entryPagesFetched += ceil(numEntryPages * partialScale);
7620
7621         /*
7622          * Partial match algorithm reads all data pages before doing actual scan,
7623          * so it's a startup cost.  Again, we haven't any useful stats here, so
7624          * estimate it as proportion.
7625          */
7626         dataPagesFetched = ceil(numDataPages * partialScale);
7627
7628         /*
7629          * Calculate cache effects if more than one scan due to nestloops or array
7630          * quals.  The result is pro-rated per nestloop scan, but the array qual
7631          * factor shouldn't be pro-rated (compare genericcostestimate).
7632          */
7633         if (outer_scans > 1 || counts.arrayScans > 1)
7634         {
7635                 entryPagesFetched *= outer_scans * counts.arrayScans;
7636                 entryPagesFetched = index_pages_fetched(entryPagesFetched,
7637                                                                                                 (BlockNumber) numEntryPages,
7638                                                                                                 numEntryPages, root);
7639                 entryPagesFetched /= outer_scans;
7640                 dataPagesFetched *= outer_scans * counts.arrayScans;
7641                 dataPagesFetched = index_pages_fetched(dataPagesFetched,
7642                                                                                            (BlockNumber) numDataPages,
7643                                                                                            numDataPages, root);
7644                 dataPagesFetched /= outer_scans;
7645         }
7646
7647         /*
7648          * Here we use random page cost because logically-close pages could be far
7649          * apart on disk.
7650          */
7651         *indexStartupCost = (entryPagesFetched + dataPagesFetched) * spc_random_page_cost;
7652
7653         /*
7654          * Now compute the number of data pages fetched during the scan.
7655          *
7656          * We assume every entry to have the same number of items, and that there
7657          * is no overlap between them. (XXX: tsvector and array opclasses collect
7658          * statistics on the frequency of individual keys; it would be nice to use
7659          * those here.)
7660          */
7661         dataPagesFetched = ceil(numDataPages * counts.exactEntries / numEntries);
7662
7663         /*
7664          * If there is a lot of overlap among the entries, in particular if one of
7665          * the entries is very frequent, the above calculation can grossly
7666          * under-estimate.  As a simple cross-check, calculate a lower bound based
7667          * on the overall selectivity of the quals.  At a minimum, we must read
7668          * one item pointer for each matching entry.
7669          *
7670          * The width of each item pointer varies, based on the level of
7671          * compression.  We don't have statistics on that, but an average of
7672          * around 3 bytes per item is fairly typical.
7673          */
7674         dataPagesFetchedBySel = ceil(*indexSelectivity *
7675                                                                  (numTuples / (BLCKSZ / 3)));
7676         if (dataPagesFetchedBySel > dataPagesFetched)
7677                 dataPagesFetched = dataPagesFetchedBySel;
7678
7679         /* Account for cache effects, the same as above */
7680         if (outer_scans > 1 || counts.arrayScans > 1)
7681         {
7682                 dataPagesFetched *= outer_scans * counts.arrayScans;
7683                 dataPagesFetched = index_pages_fetched(dataPagesFetched,
7684                                                                                            (BlockNumber) numDataPages,
7685                                                                                            numDataPages, root);
7686                 dataPagesFetched /= outer_scans;
7687         }
7688
7689         /* And apply random_page_cost as the cost per page */
7690         *indexTotalCost = *indexStartupCost +
7691                 dataPagesFetched * spc_random_page_cost;
7692
7693         /*
7694          * Add on index qual eval costs, much as in genericcostestimate
7695          */
7696         qual_arg_cost = other_operands_eval_cost(root, qinfos) +
7697                 orderby_operands_eval_cost(root, path);
7698         qual_op_cost = cpu_operator_cost *
7699                 (list_length(indexQuals) + list_length(indexOrderBys));
7700
7701         *indexStartupCost += qual_arg_cost;
7702         *indexTotalCost += qual_arg_cost;
7703         *indexTotalCost += (numTuples * *indexSelectivity) * (cpu_index_tuple_cost + qual_op_cost);
7704         *indexPages = dataPagesFetched;
7705 }
7706
7707 /*
7708  * BRIN has search behavior completely different from other index types
7709  */
7710 void
7711 brincostestimate(PlannerInfo *root, IndexPath *path, double loop_count,
7712                                  Cost *indexStartupCost, Cost *indexTotalCost,
7713                                  Selectivity *indexSelectivity, double *indexCorrelation,
7714                                  double *indexPages)
7715 {
7716         IndexOptInfo *index = path->indexinfo;
7717         List       *indexQuals = path->indexquals;
7718         double          numPages = index->pages;
7719         RelOptInfo *baserel = index->rel;
7720         RangeTblEntry *rte = planner_rt_fetch(baserel->relid, root);
7721         List       *qinfos;
7722         Cost            spc_seq_page_cost;
7723         Cost            spc_random_page_cost;
7724         double          qual_arg_cost;
7725         double          qualSelectivity;
7726         BrinStatsData statsData;
7727         double          indexRanges;
7728         double          minimalRanges;
7729         double          estimatedRanges;
7730         double          selec;
7731         Relation        indexRel;
7732         ListCell   *l;
7733         VariableStatData vardata;
7734
7735         Assert(rte->rtekind == RTE_RELATION);
7736
7737         /* fetch estimated page cost for the tablespace containing the index */
7738         get_tablespace_page_costs(index->reltablespace,
7739                                                           &spc_random_page_cost,
7740                                                           &spc_seq_page_cost);
7741
7742         /*
7743          * Obtain some data from the index itself.
7744          */
7745         indexRel = index_open(index->indexoid, AccessShareLock);
7746         brinGetStats(indexRel, &statsData);
7747         index_close(indexRel, AccessShareLock);
7748
7749         /*
7750          * Compute index correlation
7751          *
7752          * Because we can use all index quals equally when scanning, we can use
7753          * the largest correlation (in absolute value) among columns used by the
7754          * query.  Start at zero, the worst possible case.  If we cannot find
7755          * any correlation statistics, we will keep it as 0.
7756          */
7757         *indexCorrelation = 0;
7758
7759         qinfos = deconstruct_indexquals(path);
7760         foreach(l, qinfos)
7761         {
7762                 IndexQualInfo *qinfo = (IndexQualInfo *) lfirst(l);
7763                 AttrNumber      attnum = index->indexkeys[qinfo->indexcol];
7764
7765                 /* attempt to lookup stats in relation for this index column */
7766                 if (attnum != 0)
7767                 {
7768                         /* Simple variable -- look to stats for the underlying table */
7769                         if (get_relation_stats_hook &&
7770                                 (*get_relation_stats_hook) (root, rte, attnum, &vardata))
7771                         {
7772                                 /*
7773                                  * The hook took control of acquiring a stats tuple.  If it
7774                                  * did supply a tuple, it'd better have supplied a freefunc.
7775                                  */
7776                                 if (HeapTupleIsValid(vardata.statsTuple) && !vardata.freefunc)
7777                                         elog(ERROR,
7778                                                  "no function provided to release variable stats with");
7779                         }
7780                         else
7781                         {
7782                                 vardata.statsTuple =
7783                                         SearchSysCache3(STATRELATTINH,
7784                                                                         ObjectIdGetDatum(rte->relid),
7785                                                                         Int16GetDatum(attnum),
7786                                                                         BoolGetDatum(false));
7787                                 vardata.freefunc = ReleaseSysCache;
7788                         }
7789                 }
7790                 else
7791                 {
7792                         /*
7793                          * Looks like we've found an expression column in the index. Let's
7794                          * see if there's any stats for it.
7795                          */
7796
7797                         /* get the attnum from the 0-based index. */
7798                         attnum = qinfo->indexcol + 1;
7799
7800                         if (get_index_stats_hook &&
7801                                 (*get_index_stats_hook) (root, index->indexoid, attnum, &vardata))
7802                         {
7803                                 /*
7804                                  * The hook took control of acquiring a stats tuple.  If it did
7805                                  * supply a tuple, it'd better have supplied a freefunc.
7806                                  */
7807                                 if (HeapTupleIsValid(vardata.statsTuple) &&
7808                                         !vardata.freefunc)
7809                                         elog(ERROR, "no function provided to release variable stats with");
7810                         }
7811                         else
7812                         {
7813                                 vardata.statsTuple = SearchSysCache3(STATRELATTINH,
7814                                                                                                          ObjectIdGetDatum(index->indexoid),
7815                                                                                                          Int16GetDatum(attnum),
7816                                                                                                          BoolGetDatum(false));
7817                                 vardata.freefunc = ReleaseSysCache;
7818                         }
7819                 }
7820
7821                 if (HeapTupleIsValid(vardata.statsTuple))
7822                 {
7823                         float4     *numbers;
7824                         int                     nnumbers;
7825
7826                         if (get_attstatsslot(vardata.statsTuple, InvalidOid, 0,
7827                                                                  STATISTIC_KIND_CORRELATION,
7828                                                                  InvalidOid,
7829                                                                  NULL,
7830                                                                  NULL, NULL,
7831                                                                  &numbers, &nnumbers))
7832                         {
7833                                 double          varCorrelation = 0.0;
7834
7835                                 if (nnumbers > 0)
7836                                         varCorrelation = Abs(numbers[0]);
7837
7838                                 if (varCorrelation > *indexCorrelation)
7839                                         *indexCorrelation = varCorrelation;
7840
7841                                 free_attstatsslot(InvalidOid, NULL, 0, numbers, nnumbers);
7842                         }
7843                 }
7844
7845                 ReleaseVariableStats(vardata);
7846         }
7847
7848         qualSelectivity = clauselist_selectivity(root, indexQuals,
7849                                                                                          baserel->relid,
7850                                                                                          JOIN_INNER, NULL);
7851
7852         /* work out the actual number of ranges in the index */
7853         indexRanges = Max(ceil((double) baserel->pages / statsData.pagesPerRange),
7854                                           1.0);
7855
7856         /*
7857          * Now calculate the minimum possible ranges we could match with if all of
7858          * the rows were in the perfect order in the table's heap.
7859          */
7860         minimalRanges = ceil(indexRanges * qualSelectivity);
7861
7862         /*
7863          * Now estimate the number of ranges that we'll touch by using the
7864          * indexCorrelation from the stats. Careful not to divide by zero
7865          * (note we're using the absolute value of the correlation).
7866          */
7867         if (*indexCorrelation < 1.0e-10)
7868                 estimatedRanges = indexRanges;
7869         else
7870                 estimatedRanges = Min(minimalRanges / *indexCorrelation, indexRanges);
7871
7872         /* we expect to visit this portion of the table */
7873         selec = estimatedRanges / indexRanges;
7874
7875         CLAMP_PROBABILITY(selec);
7876
7877         *indexSelectivity = selec;
7878
7879         /*
7880          * Compute the index qual costs, much as in genericcostestimate, to add
7881          * to the index costs.
7882          */
7883         qual_arg_cost = other_operands_eval_cost(root, qinfos) +
7884                 orderby_operands_eval_cost(root, path);
7885
7886         /*
7887          * Compute the startup cost as the cost to read the whole revmap
7888          * sequentially, including the cost to execute the index quals.
7889          */
7890         *indexStartupCost =
7891                 spc_seq_page_cost * statsData.revmapNumPages * loop_count;
7892         *indexStartupCost += qual_arg_cost;
7893
7894         /*
7895          * To read a BRIN index there might be a bit of back and forth over
7896          * regular pages, as revmap might point to them out of sequential order;
7897          * calculate the total cost as reading the whole index in random order.
7898          */
7899         *indexTotalCost = *indexStartupCost +
7900                 spc_random_page_cost * (numPages - statsData.revmapNumPages) * loop_count;
7901
7902         /*
7903          * Charge a small amount per range tuple which we expect to match to. This
7904          * is meant to reflect the costs of manipulating the bitmap. The BRIN scan
7905          * will set a bit for each page in the range when we find a matching
7906          * range, so we must multiply the charge by the number of pages in the
7907          * range.
7908          */
7909         *indexTotalCost += 0.1 * cpu_operator_cost * estimatedRanges *
7910                 statsData.pagesPerRange;
7911
7912         *indexPages = index->pages;
7913 }