granicus.if.org Git - postgresql/blob - src/backend/utils/adt/selfuncs.c

   1 /*-------------------------------------------------------------------------
   2  *
   3  * selfuncs.c
   4  *        Selectivity functions and index cost estimation functions for
   5  *        standard operators and index access methods.
   6  *
   7  *        Selectivity routines are registered in the pg_operator catalog
   8  *        in the "oprrest" and "oprjoin" attributes.
   9  *
  10  *        Index cost functions are registered in the pg_am catalog
  11  *        in the "amcostestimate" attribute.
  12  *
  13  * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
  14  * Portions Copyright (c) 1994, Regents of the University of California
  15  *
  16  *
  17  * IDENTIFICATION
  18  *        $Header: /cvsroot/pgsql/src/backend/utils/adt/selfuncs.c,v 1.107 2002/04/03 05:39:31 petere Exp $
  19  *
  20  *-------------------------------------------------------------------------
  21  */
  22
  23 /*----------
  24  * Operator selectivity estimation functions are called to estimate the
  25  * selectivity of WHERE clauses whose top-level operator is their operator.
  26  * We divide the problem into two cases:
  27  *              Restriction clause estimation: the clause involves vars of just
  28  *                      one relation.
  29  *              Join clause estimation: the clause involves vars of multiple rels.
  30  * Join selectivity estimation is far more difficult and usually less accurate
  31  * than restriction estimation.
  32  *
  33  * When dealing with the inner scan of a nestloop join, we consider the
  34  * join's joinclauses as restriction clauses for the inner relation, and
  35  * treat vars of the outer relation as parameters (a/k/a constants of unknown
  36  * values).  So, restriction estimators need to be able to accept an argument
  37  * telling which relation is to be treated as the variable.
  38  *
  39  * The call convention for a restriction estimator (oprrest function) is
  40  *
  41  *              Selectivity oprrest (Query *root,
  42  *                                                       Oid operator,
  43  *                                                       List *args,
  44  *                                                       int varRelid);
  45  *
  46  * root: general information about the query (rtable and RelOptInfo lists
  47  * are particularly important for the estimator).
  48  * operator: OID of the specific operator in question.
  49  * args: argument list from the operator clause.
  50  * varRelid: if not zero, the relid (rtable index) of the relation to
  51  * be treated as the variable relation.  May be zero if the args list
  52  * is known to contain vars of only one relation.
  53  *
  54  * This is represented at the SQL level (in pg_proc) as
  55  *
  56  *              float8 oprrest (opaque, oid, opaque, int4);
  57  *
  58  * The call convention for a join estimator (oprjoin function) is similar
  59  * except that varRelid is not needed:
  60  *
  61  *              Selectivity oprjoin (Query *root,
  62  *                                                       Oid operator,
  63  *                                                       List *args);
  64  *
  65  *              float8 oprjoin (opaque, oid, opaque);
  66  *----------
  67  */
  68
  69 #include "postgres.h"
  70
  71 #include <ctype.h>
  72 #include <math.h>
  73 #include <locale.h>
  74
  75 #include "access/heapam.h"
  76 #include "catalog/catname.h"
  77 #include "catalog/pg_operator.h"
  78 #include "catalog/pg_proc.h"
  79 #include "catalog/pg_statistic.h"
  80 #include "catalog/pg_type.h"
  81 #include "mb/pg_wchar.h"
  82 #include "nodes/makefuncs.h"
  83 #include "optimizer/clauses.h"
  84 #include "optimizer/cost.h"
  85 #include "optimizer/pathnode.h"
  86 #include "optimizer/plancat.h"
  87 #include "optimizer/prep.h"
  88 #include "parser/parse_func.h"
  89 #include "parser/parse_oper.h"
  90 #include "parser/parsetree.h"
  91 #include "utils/builtins.h"
  92 #include "utils/date.h"
  93 #include "utils/datum.h"
  94 #include "utils/int8.h"
  95 #include "utils/lsyscache.h"
  96 #include "utils/pg_locale.h"
  97 #include "utils/selfuncs.h"
  98 #include "utils/syscache.h"
  99
 100
 101 /*
 102  * Note: the default selectivity estimates are not chosen entirely at random.
 103  * We want them to be small enough to ensure that indexscans will be used if
 104  * available, for typical table densities of ~100 tuples/page.  Thus, for
 105  * example, 0.01 is not quite small enough, since that makes it appear that
 106  * nearly all pages will be hit anyway.  Also, since we sometimes estimate
 107  * eqsel as 1/num_distinct, we probably want DEFAULT_NUM_DISTINCT to equal
 108  * 1/DEFAULT_EQ_SEL.
 109  */
 110
 111 /* default selectivity estimate for equalities such as "A = b" */
 112 #define DEFAULT_EQ_SEL  0.005
 113
 114 /* default selectivity estimate for inequalities such as "A < b" */
 115 #define DEFAULT_INEQ_SEL  (1.0 / 3.0)
 116
 117 /* default selectivity estimate for pattern-match operators such as LIKE */
 118 #define DEFAULT_MATCH_SEL       0.005
 119
 120 /* default number of distinct values in a table */
 121 #define DEFAULT_NUM_DISTINCT  200
 122
 123 /* default selectivity estimate for boolean and null test nodes */
 124 #define DEFAULT_UNK_SEL                 0.005
 125 #define DEFAULT_NOT_UNK_SEL             (1.0 - DEFAULT_UNK_SEL)
 126 #define DEFAULT_BOOL_SEL                0.5
 127
 128 /*
 129  * Clamp a computed probability estimate (which may suffer from roundoff or
 130  * estimation errors) to valid range.  Argument must be a float variable.
 131  */
 132 #define CLAMP_PROBABILITY(p) \
 133         do { \
 134                 if (p < 0.0) \
 135                         p = 0.0; \
 136                 else if (p > 1.0) \
 137                         p = 1.0; \
 138         } while (0)
 139
 140
 141 static bool get_var_maximum(Query *root, Var *var, Oid sortop, Datum *max);
 142 static bool convert_to_scalar(Datum value, Oid valuetypid, double *scaledvalue,
 143                                   Datum lobound, Datum hibound, Oid boundstypid,
 144                                   double *scaledlobound, double *scaledhibound);
 145 static double convert_numeric_to_scalar(Datum value, Oid typid);
 146 static void convert_string_to_scalar(unsigned char *value,
 147                                                  double *scaledvalue,
 148                                                  unsigned char *lobound,
 149                                                  double *scaledlobound,
 150                                                  unsigned char *hibound,
 151                                                  double *scaledhibound);
 152 static void convert_bytea_to_scalar(Datum value,
 153                                                 double *scaledvalue,
 154                                                 Datum lobound,
 155                                                 double *scaledlobound,
 156                                                 Datum hibound,
 157                                                 double *scaledhibound);
 158 static double convert_one_string_to_scalar(unsigned char *value,
 159                                                          int rangelo, int rangehi);
 160 static double convert_one_bytea_to_scalar(unsigned char *value, int valuelen,
 161                                                         int rangelo, int rangehi);
 162 static unsigned char *convert_string_datum(Datum value, Oid typid);
 163 static double convert_timevalue_to_scalar(Datum value, Oid typid);
 164 static double get_att_numdistinct(Query *root, Var *var,
 165                                         Form_pg_statistic stats);
 166 static bool get_restriction_var(List *args, int varRelid,
 167                                         Var **var, Node **other,
 168                                         bool *varonleft);
 169 static void get_join_vars(List *args, Var **var1, Var **var2);
 170 static Selectivity prefix_selectivity(Query *root, Var *var, char *prefix);
 171 static Selectivity pattern_selectivity(char *patt, Pattern_Type ptype);
 172 static bool string_lessthan(const char *str1, const char *str2,
 173                                 Oid datatype);
 174 static Oid      find_operator(const char *opname, Oid datatype);
 175 static Datum string_to_datum(const char *str, Oid datatype);
 176 static Const *string_to_const(const char *str, Oid datatype);
 177
 178
 179 /*
 180  *              eqsel                   - Selectivity of "=" for any data types.
 181  *
 182  * Note: this routine is also used to estimate selectivity for some
 183  * operators that are not "=" but have comparable selectivity behavior,
 184  * such as "~=" (geometric approximate-match).  Even for "=", we must
 185  * keep in mind that the left and right datatypes may differ.
 186  */
 187 Datum
 188 eqsel(PG_FUNCTION_ARGS)
 189 {
 190         Query      *root = (Query *) PG_GETARG_POINTER(0);
 191         Oid                     operator = PG_GETARG_OID(1);
 192         List       *args = (List *) PG_GETARG_POINTER(2);
 193         int                     varRelid = PG_GETARG_INT32(3);
 194         Var                *var;
 195         Node       *other;
 196         bool            varonleft;
 197         Oid                     relid;
 198         HeapTuple       statsTuple;
 199         Datum      *values;
 200         int                     nvalues;
 201         float4     *numbers;
 202         int                     nnumbers;
 203         double          selec;
 204
 205         /*
 206          * If expression is not var = something or something = var for a
 207          * simple var of a real relation (no subqueries, for now), then punt
 208          * and return a default estimate.
 209          */
 210         if (!get_restriction_var(args, varRelid,
 211                                                          &var, &other, &varonleft))
 212                 PG_RETURN_FLOAT8(DEFAULT_EQ_SEL);
 213         relid = getrelid(var->varno, root->rtable);
 214         if (relid == InvalidOid)
 215                 PG_RETURN_FLOAT8(DEFAULT_EQ_SEL);
 216
 217         /*
 218          * If the something is a NULL constant, assume operator is strict and
 219          * return zero, ie, operator will never return TRUE.
 220          */
 221         if (IsA(other, Const) &&((Const *) other)->constisnull)
 222                 PG_RETURN_FLOAT8(0.0);
 223
 224         /* get stats for the attribute, if available */
 225         statsTuple = SearchSysCache(STATRELATT,
 226                                                                 ObjectIdGetDatum(relid),
 227                                                                 Int16GetDatum(var->varattno),
 228                                                                 0, 0);
 229         if (HeapTupleIsValid(statsTuple))
 230         {
 231                 Form_pg_statistic stats;
 232
 233                 stats = (Form_pg_statistic) GETSTRUCT(statsTuple);
 234
 235                 if (IsA(other, Const))
 236                 {
 237                         /* Var is being compared to a known non-null constant */
 238                         Datum           constval = ((Const *) other)->constvalue;
 239                         bool            match = false;
 240                         int                     i;
 241
 242                         /*
 243                          * Is the constant "=" to any of the column's most common
 244                          * values?      (Although the given operator may not really be
 245                          * "=", we will assume that seeing whether it returns TRUE is
 246                          * an appropriate test.  If you don't like this, maybe you
 247                          * shouldn't be using eqsel for your operator...)
 248                          */
 249                         if (get_attstatsslot(statsTuple, var->vartype, var->vartypmod,
 250                                                                  STATISTIC_KIND_MCV, InvalidOid,
 251                                                                  &values, &nvalues,
 252                                                                  &numbers, &nnumbers))
 253                         {
 254                                 FmgrInfo        eqproc;
 255
 256                                 fmgr_info(get_opcode(operator), &eqproc);
 257
 258                                 for (i = 0; i < nvalues; i++)
 259                                 {
 260                                         /* be careful to apply operator right way 'round */
 261                                         if (varonleft)
 262                                                 match = DatumGetBool(FunctionCall2(&eqproc,
 263                                                                                                                    values[i],
 264                                                                                                                    constval));
 265                                         else
 266                                                 match = DatumGetBool(FunctionCall2(&eqproc,
 267                                                                                                                    constval,
 268                                                                                                                    values[i]));
 269                                         if (match)
 270                                                 break;
 271                                 }
 272                         }
 273                         else
 274                         {
 275                                 /* no most-common-value info available */
 276                                 values = NULL;
 277                                 numbers = NULL;
 278                                 i = nvalues = nnumbers = 0;
 279                         }
 280
 281                         if (match)
 282                         {
 283                                 /*
 284                                  * Constant is "=" to this common value.  We know
 285                                  * selectivity exactly (or as exactly as VACUUM could
 286                                  * calculate it, anyway).
 287                                  */
 288                                 selec = numbers[i];
 289                         }
 290                         else
 291                         {
 292                                 /*
 293                                  * Comparison is against a constant that is neither NULL
 294                                  * nor any of the common values.  Its selectivity cannot
 295                                  * be more than this:
 296                                  */
 297                                 double          sumcommon = 0.0;
 298                                 double          otherdistinct;
 299
 300                                 for (i = 0; i < nnumbers; i++)
 301                                         sumcommon += numbers[i];
 302                                 selec = 1.0 - sumcommon - stats->stanullfrac;
 303                                 CLAMP_PROBABILITY(selec);
 304
 305                                 /*
 306                                  * and in fact it's probably a good deal less. We
 307                                  * approximate that all the not-common values share this
 308                                  * remaining fraction equally, so we divide by the number
 309                                  * of other distinct values.
 310                                  */
 311                                 otherdistinct = get_att_numdistinct(root, var, stats)
 312                                         - nnumbers;
 313                                 if (otherdistinct > 1)
 314                                         selec /= otherdistinct;
 315
 316                                 /*
 317                                  * Another cross-check: selectivity shouldn't be estimated
 318                                  * as more than the least common "most common value".
 319                                  */
 320                                 if (nnumbers > 0 && selec > numbers[nnumbers - 1])
 321                                         selec = numbers[nnumbers - 1];
 322                         }
 323
 324                         free_attstatsslot(var->vartype, values, nvalues,
 325                                                           numbers, nnumbers);
 326                 }
 327                 else
 328                 {
 329                         double          ndistinct;
 330
 331                         /*
 332                          * Search is for a value that we do not know a priori, but we
 333                          * will assume it is not NULL.  Estimate the selectivity as
 334                          * non-null fraction divided by number of distinct values, so
 335                          * that we get a result averaged over all possible values
 336                          * whether common or uncommon.  (Essentially, we are assuming
 337                          * that the not-yet-known comparison value is equally likely
 338                          * to be any of the possible values, regardless of their
 339                          * frequency in the table.      Is that a good idea?)
 340                          */
 341                         selec = 1.0 - stats->stanullfrac;
 342                         ndistinct = get_att_numdistinct(root, var, stats);
 343                         if (ndistinct > 1)
 344                                 selec /= ndistinct;
 345
 346                         /*
 347                          * Cross-check: selectivity should never be estimated as more
 348                          * than the most common value's.
 349                          */
 350                         if (get_attstatsslot(statsTuple, var->vartype, var->vartypmod,
 351                                                                  STATISTIC_KIND_MCV, InvalidOid,
 352                                                                  NULL, NULL,
 353                                                                  &numbers, &nnumbers))
 354                         {
 355                                 if (nnumbers > 0 && selec > numbers[0])
 356                                         selec = numbers[0];
 357                                 free_attstatsslot(var->vartype, NULL, 0, numbers, nnumbers);
 358                         }
 359                 }
 360
 361                 ReleaseSysCache(statsTuple);
 362         }
 363         else
 364         {
 365                 /*
 366                  * No VACUUM ANALYZE stats available, so make a guess using
 367                  * estimated number of distinct values and assuming they are
 368                  * equally common.      (The guess is unlikely to be very good, but we
 369                  * do know a few special cases.)
 370                  */
 371                 selec = 1.0 / get_att_numdistinct(root, var, NULL);
 372         }
 373
 374         /* result should be in range, but make sure... */
 375         CLAMP_PROBABILITY(selec);
 376
 377         PG_RETURN_FLOAT8((float8) selec);
 378 }
 379
 380 /*
 381  *              neqsel                  - Selectivity of "!=" for any data types.
 382  *
 383  * This routine is also used for some operators that are not "!="
 384  * but have comparable selectivity behavior.  See above comments
 385  * for eqsel().
 386  */
 387 Datum
 388 neqsel(PG_FUNCTION_ARGS)
 389 {
 390         Query      *root = (Query *) PG_GETARG_POINTER(0);
 391         Oid                     operator = PG_GETARG_OID(1);
 392         List       *args = (List *) PG_GETARG_POINTER(2);
 393         int                     varRelid = PG_GETARG_INT32(3);
 394         Oid                     eqop;
 395         float8          result;
 396
 397         /*
 398          * We want 1 - eqsel() where the equality operator is the one
 399          * associated with this != operator, that is, its negator.
 400          */
 401         eqop = get_negator(operator);
 402         if (eqop)
 403         {
 404                 result = DatumGetFloat8(DirectFunctionCall4(eqsel,
 405                                                                                                         PointerGetDatum(root),
 406                                                                                                   ObjectIdGetDatum(eqop),
 407                                                                                                         PointerGetDatum(args),
 408                                                                                            Int32GetDatum(varRelid)));
 409         }
 410         else
 411         {
 412                 /* Use default selectivity (should we raise an error instead?) */
 413                 result = DEFAULT_EQ_SEL;
 414         }
 415         result = 1.0 - result;
 416         PG_RETURN_FLOAT8(result);
 417 }
 418
 419 /*
 420  *      scalarineqsel           - Selectivity of "<", "<=", ">", ">=" for scalars.
 421  *
 422  * This is the guts of both scalarltsel and scalargtsel.  The caller has
 423  * commuted the clause, if necessary, so that we can treat the Var as
 424  * being on the left.  The caller must also make sure that the other side
 425  * of the clause is a non-null Const, and dissect same into a value and
 426  * datatype.
 427  *
 428  * This routine works for any datatype (or pair of datatypes) known to
 429  * convert_to_scalar().  If it is applied to some other datatype,
 430  * it will return a default estimate.
 431  */
 432 static double
 433 scalarineqsel(Query *root, Oid operator, bool isgt,
 434                           Var *var, Datum constval, Oid consttype)
 435 {
 436         Oid                     relid;
 437         HeapTuple       statsTuple;
 438         Form_pg_statistic stats;
 439         FmgrInfo        opproc;
 440         Datum      *values;
 441         int                     nvalues;
 442         float4     *numbers;
 443         int                     nnumbers;
 444         double          mcv_selec,
 445                                 hist_selec,
 446                                 sumcommon;
 447         double          selec;
 448         int                     i;
 449
 450         /*
 451          * If expression is not var op something or something op var for a
 452          * simple var of a real relation (no subqueries, for now), then punt
 453          * and return a default estimate.
 454          */
 455         relid = getrelid(var->varno, root->rtable);
 456         if (relid == InvalidOid)
 457                 return DEFAULT_INEQ_SEL;
 458
 459         /* get stats for the attribute */
 460         statsTuple = SearchSysCache(STATRELATT,
 461                                                                 ObjectIdGetDatum(relid),
 462                                                                 Int16GetDatum(var->varattno),
 463                                                                 0, 0);
 464         if (!HeapTupleIsValid(statsTuple))
 465         {
 466                 /* no stats available, so default result */
 467                 return DEFAULT_INEQ_SEL;
 468         }
 469         stats = (Form_pg_statistic) GETSTRUCT(statsTuple);
 470
 471         fmgr_info(get_opcode(operator), &opproc);
 472
 473         /*
 474          * If we have most-common-values info, add up the fractions of the MCV
 475          * entries that satisfy MCV OP CONST.  These fractions contribute
 476          * directly to the result selectivity.  Also add up the total fraction
 477          * represented by MCV entries.
 478          */
 479         mcv_selec = 0.0;
 480         sumcommon = 0.0;
 481
 482         if (get_attstatsslot(statsTuple, var->vartype, var->vartypmod,
 483                                                  STATISTIC_KIND_MCV, InvalidOid,
 484                                                  &values, &nvalues,
 485                                                  &numbers, &nnumbers))
 486         {
 487                 for (i = 0; i < nvalues; i++)
 488                 {
 489                         if (DatumGetBool(FunctionCall2(&opproc,
 490                                                                                    values[i],
 491                                                                                    constval)))
 492                                 mcv_selec += numbers[i];
 493                         sumcommon += numbers[i];
 494                 }
 495                 free_attstatsslot(var->vartype, values, nvalues, numbers, nnumbers);
 496         }
 497
 498         /*
 499          * If there is a histogram, determine which bin the constant falls in,
 500          * and compute the resulting contribution to selectivity.
 501          *
 502          * Someday, VACUUM might store more than one histogram per rel/att,
 503          * corresponding to more than one possible sort ordering defined for
 504          * the column type.  However, to make that work we will need to figure
 505          * out which staop to search for --- it's not necessarily the one we
 506          * have at hand!  (For example, we might have a '<=' operator rather
 507          * than the '<' operator that will appear in staop.)  For now, assume
 508          * that whatever appears in pg_statistic is sorted the same way our
 509          * operator sorts, or the reverse way if isgt is TRUE.
 510          */
 511         hist_selec = 0.0;
 512
 513         if (get_attstatsslot(statsTuple, var->vartype, var->vartypmod,
 514                                                  STATISTIC_KIND_HISTOGRAM, InvalidOid,
 515                                                  &values, &nvalues,
 516                                                  NULL, NULL))
 517         {
 518                 if (nvalues > 1)
 519                 {
 520                         double          histfrac;
 521                         bool            ltcmp;
 522
 523                         ltcmp = DatumGetBool(FunctionCall2(&opproc,
 524                                                                                            values[0],
 525                                                                                            constval));
 526                         if (isgt)
 527                                 ltcmp = !ltcmp;
 528                         if (!ltcmp)
 529                         {
 530                                 /* Constant is below lower histogram boundary. */
 531                                 histfrac = 0.0;
 532                         }
 533                         else
 534                         {
 535                                 /*
 536                                  * Scan to find proper location.  This could be made
 537                                  * faster by using a binary-search method, but it's
 538                                  * probably not worth the trouble for typical histogram
 539                                  * sizes.
 540                                  */
 541                                 for (i = 1; i < nvalues; i++)
 542                                 {
 543                                         ltcmp = DatumGetBool(FunctionCall2(&opproc,
 544                                                                                                            values[i],
 545                                                                                                            constval));
 546                                         if (isgt)
 547                                                 ltcmp = !ltcmp;
 548                                         if (!ltcmp)
 549                                                 break;
 550                                 }
 551                                 if (i >= nvalues)
 552                                 {
 553                                         /* Constant is above upper histogram boundary. */
 554                                         histfrac = 1.0;
 555                                 }
 556                                 else
 557                                 {
 558                                         double          val,
 559                                                                 high,
 560                                                                 low;
 561                                         double          binfrac;
 562
 563                                         /*
 564                                          * We have values[i-1] < constant < values[i].
 565                                          *
 566                                          * Convert the constant and the two nearest bin boundary
 567                                          * values to a uniform comparison scale, and do a
 568                                          * linear interpolation within this bin.
 569                                          */
 570                                         if (convert_to_scalar(constval, consttype, &val,
 571                                                                                   values[i - 1], values[i],
 572                                                                                   var->vartype,
 573                                                                                   &low, &high))
 574                                         {
 575                                                 if (high <= low)
 576                                                 {
 577                                                         /* cope if bin boundaries appear identical */
 578                                                         binfrac = 0.5;
 579                                                 }
 580                                                 else if (val <= low)
 581                                                         binfrac = 0.0;
 582                                                 else if (val >= high)
 583                                                         binfrac = 1.0;
 584                                                 else
 585                                                 {
 586                                                         binfrac = (val - low) / (high - low);
 587
 588                                                         /*
 589                                                          * Watch out for the possibility that we got a
 590                                                          * NaN or Infinity from the division.  This
 591                                                          * can happen despite the previous checks, if
 592                                                          * for example "low" is -Infinity.
 593                                                          */
 594                                                         if (isnan(binfrac) ||
 595                                                                 binfrac < 0.0 || binfrac > 1.0)
 596                                                                 binfrac = 0.5;
 597                                                 }
 598                                         }
 599                                         else
 600                                         {
 601                                                 /*
 602                                                  * Ideally we'd produce an error here, on the
 603                                                  * grounds that the given operator shouldn't have
 604                                                  * scalarXXsel registered as its selectivity func
 605                                                  * unless we can deal with its operand types.  But
 606                                                  * currently, all manner of stuff is invoking
 607                                                  * scalarXXsel, so give a default estimate until
 608                                                  * that can be fixed.
 609                                                  */
 610                                                 binfrac = 0.5;
 611                                         }
 612
 613                                         /*
 614                                          * Now, compute the overall selectivity across the
 615                                          * values represented by the histogram.  We have i-1
 616                                          * full bins and binfrac partial bin below the
 617                                          * constant.
 618                                          */
 619                                         histfrac = (double) (i - 1) + binfrac;
 620                                         histfrac /= (double) (nvalues - 1);
 621                                 }
 622                         }
 623
 624                         /*
 625                          * Now histfrac = fraction of histogram entries below the
 626                          * constant.
 627                          *
 628                          * Account for "<" vs ">"
 629                          */
 630                         hist_selec = isgt ? (1.0 - histfrac) : histfrac;
 631
 632                         /*
 633                          * The histogram boundaries are only approximate to begin
 634                          * with, and may well be out of date anyway.  Therefore, don't
 635                          * believe extremely small or large selectivity estimates.
 636                          */
 637                         if (hist_selec < 0.0001)
 638                                 hist_selec = 0.0001;
 639                         else if (hist_selec > 0.9999)
 640                                 hist_selec = 0.9999;
 641                 }
 642
 643                 free_attstatsslot(var->vartype, values, nvalues, NULL, 0);
 644         }
 645
 646         /*
 647          * Now merge the results from the MCV and histogram calculations,
 648          * realizing that the histogram covers only the non-null values that
 649          * are not listed in MCV.
 650          */
 651         selec = 1.0 - stats->stanullfrac - sumcommon;
 652
 653         if (hist_selec > 0.0)
 654                 selec *= hist_selec;
 655         else
 656         {
 657                 /*
 658                  * If no histogram but there are values not accounted for by MCV,
 659                  * arbitrarily assume half of them will match.
 660                  */
 661                 selec *= 0.5;
 662         }
 663
 664         selec += mcv_selec;
 665
 666         ReleaseSysCache(statsTuple);
 667
 668         /* result should be in range, but make sure... */
 669         CLAMP_PROBABILITY(selec);
 670
 671         return selec;
 672 }
 673
 674 /*
 675  *              scalarltsel             - Selectivity of "<" (also "<=") for scalars.
 676  */
 677 Datum
 678 scalarltsel(PG_FUNCTION_ARGS)
 679 {
 680         Query      *root = (Query *) PG_GETARG_POINTER(0);
 681         Oid                     operator = PG_GETARG_OID(1);
 682         List       *args = (List *) PG_GETARG_POINTER(2);
 683         int                     varRelid = PG_GETARG_INT32(3);
 684         Var                *var;
 685         Node       *other;
 686         Datum           constval;
 687         Oid                     consttype;
 688         bool            varonleft;
 689         bool            isgt;
 690         double          selec;
 691
 692         /*
 693          * If expression is not var op something or something op var for a
 694          * simple var of a real relation (no subqueries, for now), then punt
 695          * and return a default estimate.
 696          */
 697         if (!get_restriction_var(args, varRelid,
 698                                                          &var, &other, &varonleft))
 699                 PG_RETURN_FLOAT8(DEFAULT_INEQ_SEL);
 700
 701         /*
 702          * Can't do anything useful if the something is not a constant,
 703          * either.
 704          */
 705         if (!IsA(other, Const))
 706                 PG_RETURN_FLOAT8(DEFAULT_INEQ_SEL);
 707
 708         /*
 709          * If the constant is NULL, assume operator is strict and return zero,
 710          * ie, operator will never return TRUE.
 711          */
 712         if (((Const *) other)->constisnull)
 713                 PG_RETURN_FLOAT8(0.0);
 714         constval = ((Const *) other)->constvalue;
 715         consttype = ((Const *) other)->consttype;
 716
 717         /*
 718          * Force the var to be on the left to simplify logic in scalarineqsel.
 719          */
 720         if (varonleft)
 721         {
 722                 /* we have var < other */
 723                 isgt = false;
 724         }
 725         else
 726         {
 727                 /* we have other < var, commute to make var > other */
 728                 operator = get_commutator(operator);
 729                 if (!operator)
 730                 {
 731                         /* Use default selectivity (should we raise an error instead?) */
 732                         PG_RETURN_FLOAT8(DEFAULT_INEQ_SEL);
 733                 }
 734                 isgt = true;
 735         }
 736
 737         selec = scalarineqsel(root, operator, isgt, var, constval, consttype);
 738
 739         PG_RETURN_FLOAT8((float8) selec);
 740 }
 741
 742 /*
 743  *              scalargtsel             - Selectivity of ">" (also ">=") for integers.
 744  */
 745 Datum
 746 scalargtsel(PG_FUNCTION_ARGS)
 747 {
 748         Query      *root = (Query *) PG_GETARG_POINTER(0);
 749         Oid                     operator = PG_GETARG_OID(1);
 750         List       *args = (List *) PG_GETARG_POINTER(2);
 751         int                     varRelid = PG_GETARG_INT32(3);
 752         Var                *var;
 753         Node       *other;
 754         Datum           constval;
 755         Oid                     consttype;
 756         bool            varonleft;
 757         bool            isgt;
 758         double          selec;
 759
 760         /*
 761          * If expression is not var op something or something op var for a
 762          * simple var of a real relation (no subqueries, for now), then punt
 763          * and return a default estimate.
 764          */
 765         if (!get_restriction_var(args, varRelid,
 766                                                          &var, &other, &varonleft))
 767                 PG_RETURN_FLOAT8(DEFAULT_INEQ_SEL);
 768
 769         /*
 770          * Can't do anything useful if the something is not a constant,
 771          * either.
 772          */
 773         if (!IsA(other, Const))
 774                 PG_RETURN_FLOAT8(DEFAULT_INEQ_SEL);
 775
 776         /*
 777          * If the constant is NULL, assume operator is strict and return zero,
 778          * ie, operator will never return TRUE.
 779          */
 780         if (((Const *) other)->constisnull)
 781                 PG_RETURN_FLOAT8(0.0);
 782         constval = ((Const *) other)->constvalue;
 783         consttype = ((Const *) other)->consttype;
 784
 785         /*
 786          * Force the var to be on the left to simplify logic in scalarineqsel.
 787          */
 788         if (varonleft)
 789         {
 790                 /* we have var > other */
 791                 isgt = true;
 792         }
 793         else
 794         {
 795                 /* we have other > var, commute to make var < other */
 796                 operator = get_commutator(operator);
 797                 if (!operator)
 798                 {
 799                         /* Use default selectivity (should we raise an error instead?) */
 800                         PG_RETURN_FLOAT8(DEFAULT_INEQ_SEL);
 801                 }
 802                 isgt = false;
 803         }
 804
 805         selec = scalarineqsel(root, operator, isgt, var, constval, consttype);
 806
 807         PG_RETURN_FLOAT8((float8) selec);
 808 }
 809
 810 /*
 811  * patternsel                   - Generic code for pattern-match selectivity.
 812  */
 813 static double
 814 patternsel(PG_FUNCTION_ARGS, Pattern_Type ptype)
 815 {
 816         Query      *root = (Query *) PG_GETARG_POINTER(0);
 817
 818 #ifdef NOT_USED
 819         Oid                     operator = PG_GETARG_OID(1);
 820 #endif
 821         List       *args = (List *) PG_GETARG_POINTER(2);
 822         int                     varRelid = PG_GETARG_INT32(3);
 823         Var                *var;
 824         Node       *other;
 825         bool            varonleft;
 826         Oid                     relid;
 827         Datum           constval;
 828         char       *patt;
 829         Pattern_Prefix_Status pstatus;
 830         char       *prefix;
 831         char       *rest;
 832         double          result;
 833
 834         /*
 835          * If expression is not var op constant for a simple var of a real
 836          * relation (no subqueries, for now), then punt and return a default
 837          * estimate.
 838          */
 839         if (!get_restriction_var(args, varRelid,
 840                                                          &var, &other, &varonleft))
 841                 return DEFAULT_MATCH_SEL;
 842         if (!varonleft || !IsA(other, Const))
 843                 return DEFAULT_MATCH_SEL;
 844         relid = getrelid(var->varno, root->rtable);
 845         if (relid == InvalidOid)
 846                 return DEFAULT_MATCH_SEL;
 847
 848         /*
 849          * If the constant is NULL, assume operator is strict and return zero,
 850          * ie, operator will never return TRUE.
 851          */
 852         if (((Const *) other)->constisnull)
 853                 return 0.0;
 854         constval = ((Const *) other)->constvalue;
 855         /* the right-hand const is type text for all supported operators */
 856         Assert(((Const *) other)->consttype == TEXTOID);
 857         patt = DatumGetCString(DirectFunctionCall1(textout, constval));
 858
 859         /* divide pattern into fixed prefix and remainder */
 860         pstatus = pattern_fixed_prefix(patt, ptype, &prefix, &rest);
 861
 862         if (pstatus == Pattern_Prefix_Exact)
 863         {
 864                 /*
 865                  * Pattern specifies an exact match, so pretend operator is '='
 866                  */
 867                 Oid                     eqopr = find_operator("=", var->vartype);
 868                 Const      *eqcon;
 869                 List       *eqargs;
 870
 871                 if (eqopr == InvalidOid)
 872                         elog(ERROR, "patternsel: no = operator for type %u",
 873                                  var->vartype);
 874                 eqcon = string_to_const(prefix, var->vartype);
 875                 eqargs = makeList2(var, eqcon);
 876                 result = DatumGetFloat8(DirectFunctionCall4(eqsel,
 877                                                                                                         PointerGetDatum(root),
 878                                                                                                  ObjectIdGetDatum(eqopr),
 879                                                                                                  PointerGetDatum(eqargs),
 880                                                                                            Int32GetDatum(varRelid)));
 881         }
 882         else
 883         {
 884                 /*
 885                  * Not exact-match pattern.  We estimate selectivity of the fixed
 886                  * prefix and remainder of pattern separately, then combine the
 887                  * two.
 888                  */
 889                 Selectivity prefixsel;
 890                 Selectivity restsel;
 891                 Selectivity selec;
 892
 893                 if (pstatus == Pattern_Prefix_Partial)
 894                         prefixsel = prefix_selectivity(root, var, prefix);
 895                 else
 896                         prefixsel = 1.0;
 897                 restsel = pattern_selectivity(rest, ptype);
 898                 selec = prefixsel * restsel;
 899                 /* result should be in range, but make sure... */
 900                 CLAMP_PROBABILITY(selec);
 901                 result = selec;
 902         }
 903
 904         if (prefix)
 905                 pfree(prefix);
 906         pfree(patt);
 907
 908         return result;
 909 }
 910
 911 /*
 912  *              regexeqsel              - Selectivity of regular-expression pattern match.
 913  */
 914 Datum
 915 regexeqsel(PG_FUNCTION_ARGS)
 916 {
 917         PG_RETURN_FLOAT8(patternsel(fcinfo, Pattern_Type_Regex));
 918 }
 919
 920 /*
 921  *              icregexeqsel    - Selectivity of case-insensitive regex match.
 922  */
 923 Datum
 924 icregexeqsel(PG_FUNCTION_ARGS)
 925 {
 926         PG_RETURN_FLOAT8(patternsel(fcinfo, Pattern_Type_Regex_IC));
 927 }
 928
 929 /*
 930  *              likesel                 - Selectivity of LIKE pattern match.
 931  */
 932 Datum
 933 likesel(PG_FUNCTION_ARGS)
 934 {
 935         PG_RETURN_FLOAT8(patternsel(fcinfo, Pattern_Type_Like));
 936 }
 937
 938 /*
 939  *              iclikesel                       - Selectivity of ILIKE pattern match.
 940  */
 941 Datum
 942 iclikesel(PG_FUNCTION_ARGS)
 943 {
 944         PG_RETURN_FLOAT8(patternsel(fcinfo, Pattern_Type_Like_IC));
 945 }
 946
 947 /*
 948  *              regexnesel              - Selectivity of regular-expression pattern non-match.
 949  */
 950 Datum
 951 regexnesel(PG_FUNCTION_ARGS)
 952 {
 953         double          result;
 954
 955         result = patternsel(fcinfo, Pattern_Type_Regex);
 956         result = 1.0 - result;
 957         PG_RETURN_FLOAT8(result);
 958 }
 959
 960 /*
 961  *              icregexnesel    - Selectivity of case-insensitive regex non-match.
 962  */
 963 Datum
 964 icregexnesel(PG_FUNCTION_ARGS)
 965 {
 966         double          result;
 967
 968         result = patternsel(fcinfo, Pattern_Type_Regex_IC);
 969         result = 1.0 - result;
 970         PG_RETURN_FLOAT8(result);
 971 }
 972
 973 /*
 974  *              nlikesel                - Selectivity of LIKE pattern non-match.
 975  */
 976 Datum
 977 nlikesel(PG_FUNCTION_ARGS)
 978 {
 979         double          result;
 980
 981         result = patternsel(fcinfo, Pattern_Type_Like);
 982         result = 1.0 - result;
 983         PG_RETURN_FLOAT8(result);
 984 }
 985
 986 /*
 987  *              icnlikesel              - Selectivity of ILIKE pattern non-match.
 988  */
 989 Datum
 990 icnlikesel(PG_FUNCTION_ARGS)
 991 {
 992         double          result;
 993
 994         result = patternsel(fcinfo, Pattern_Type_Like_IC);
 995         result = 1.0 - result;
 996         PG_RETURN_FLOAT8(result);
 997 }
 998
 999 /*
1000  *              booltestsel             - Selectivity of BooleanTest Node.
1001  */
1002 Selectivity
1003 booltestsel(Query *root, BooleanTest *clause, int varRelid)
1004 {
1005         Var                *var;
1006         Node       *arg;
1007         Oid                     relid;
1008         HeapTuple       statsTuple;
1009         Datum      *values;
1010         int                     nvalues;
1011         float4     *numbers;
1012         int                     nnumbers;
1013         double          selec;
1014
1015         Assert(clause && IsA(clause, BooleanTest));
1016
1017         arg = (Node *) clause->arg;
1018
1019         /*
1020          * Ignore any binary-compatible relabeling (probably unnecessary, but
1021          * can't hurt)
1022          */
1023         if (IsA(arg, RelabelType))
1024                 arg = ((RelabelType *) arg)->arg;
1025
1026         if (IsA(arg, Var) &&(varRelid == 0 || varRelid == ((Var *) arg)->varno))
1027                 var = (Var *) arg;
1028         else
1029         {
1030                 /*
1031                  * If argument is not a Var, we can't get statistics for it, but
1032                  * perhaps clause_selectivity can do something with it.  We ignore
1033                  * the possibility of a NULL value when using clause_selectivity,
1034                  * and just assume the value is either TRUE or FALSE.
1035                  */
1036                 switch (clause->booltesttype)
1037                 {
1038                         case IS_UNKNOWN:
1039                                 selec = DEFAULT_UNK_SEL;
1040                                 break;
1041                         case IS_NOT_UNKNOWN:
1042                                 selec = DEFAULT_NOT_UNK_SEL;
1043                                 break;
1044                         case IS_TRUE:
1045                         case IS_NOT_FALSE:
1046                                 selec = (double) clause_selectivity(root, arg, varRelid);
1047                                 break;
1048                         case IS_FALSE:
1049                         case IS_NOT_TRUE:
1050                                 selec = 1.0 - (double) clause_selectivity(root, arg, varRelid);
1051                                 break;
1052                         default:
1053                                 elog(ERROR, "booltestsel: unexpected booltesttype %d",
1054                                          (int) clause->booltesttype);
1055                                 selec = 0.0;    /* Keep compiler quiet */
1056                                 break;
1057                 }
1058                 return (Selectivity) selec;
1059         }
1060
1061         /* get stats for the attribute, if available */
1062         relid = getrelid(var->varno, root->rtable);
1063         if (relid == InvalidOid)
1064                 statsTuple = NULL;
1065         else
1066                 statsTuple = SearchSysCache(STATRELATT,
1067                                                                         ObjectIdGetDatum(relid),
1068                                                                         Int16GetDatum(var->varattno),
1069                                                                         0, 0);
1070
1071         if (HeapTupleIsValid(statsTuple))
1072         {
1073                 Form_pg_statistic stats;
1074                 double          freq_null;
1075
1076                 stats = (Form_pg_statistic) GETSTRUCT(statsTuple);
1077
1078                 freq_null = stats->stanullfrac;
1079
1080                 if (get_attstatsslot(statsTuple, var->vartype, var->vartypmod,
1081                                                          STATISTIC_KIND_MCV, InvalidOid,
1082                                                          &values, &nvalues,
1083                                                          &numbers, &nnumbers)
1084                         && nnumbers > 0)
1085                 {
1086                         double          freq_true;
1087                         double          freq_false;
1088
1089                         /*
1090                          * Get first MCV frequency and derive frequency for true.
1091                          */
1092                         if (DatumGetBool(values[0]))
1093                                 freq_true = numbers[0];
1094                         else
1095                                 freq_true = 1.0 - numbers[0] - freq_null;
1096
1097                         /*
1098                          * Next derive freqency for false. Then use these as
1099                          * appropriate to derive frequency for each case.
1100                          */
1101                         freq_false = 1.0 - freq_true - freq_null;
1102
1103                         switch (clause->booltesttype)
1104                         {
1105                                 case IS_UNKNOWN:
1106                                         /* select only NULL values */
1107                                         selec = freq_null;
1108                                         break;
1109                                 case IS_NOT_UNKNOWN:
1110                                         /* select non-NULL values */
1111                                         selec = 1.0 - freq_null;
1112                                         break;
1113                                 case IS_TRUE:
1114                                         /* select only TRUE values */
1115                                         selec = freq_true;
1116                                         break;
1117                                 case IS_NOT_TRUE:
1118                                         /* select non-TRUE values */
1119                                         selec = 1.0 - freq_true;
1120                                         break;
1121                                 case IS_FALSE:
1122                                         /* select only FALSE values */
1123                                         selec = freq_false;
1124                                         break;
1125                                 case IS_NOT_FALSE:
1126                                         /* select non-FALSE values */
1127                                         selec = 1.0 - freq_false;
1128                                         break;
1129                                 default:
1130                                         elog(ERROR, "booltestsel: unexpected booltesttype %d",
1131                                                  (int) clause->booltesttype);
1132                                         selec = 0.0;    /* Keep compiler quiet */
1133                                         break;
1134                         }
1135
1136                         free_attstatsslot(var->vartype, values, nvalues,
1137                                                           numbers, nnumbers);
1138                 }
1139                 else
1140                 {
1141                         /*
1142                          * No most-common-value info available. Still have null
1143                          * fraction information, so use it for IS [NOT] UNKNOWN.
1144                          * Otherwise adjust for null fraction and assume an even split
1145                          * for boolean tests.
1146                          */
1147                         switch (clause->booltesttype)
1148                         {
1149                                 case IS_UNKNOWN:
1150
1151                                         /*
1152                                          * Use freq_null directly.
1153                                          */
1154                                         selec = freq_null;
1155                                         break;
1156                                 case IS_NOT_UNKNOWN:
1157
1158                                         /*
1159                                          * Select not unknown (not null) values. Calculate
1160                                          * from freq_null.
1161                                          */
1162                                         selec = 1.0 - freq_null;
1163                                         break;
1164                                 case IS_TRUE:
1165                                 case IS_NOT_TRUE:
1166                                 case IS_FALSE:
1167                                 case IS_NOT_FALSE:
1168                                         selec = (1.0 - freq_null) / 2.0;
1169                                         break;
1170                                 default:
1171                                         elog(ERROR, "booltestsel: unexpected booltesttype %d",
1172                                                  (int) clause->booltesttype);
1173                                         selec = 0.0;    /* Keep compiler quiet */
1174                                         break;
1175                         }
1176                 }
1177
1178                 ReleaseSysCache(statsTuple);
1179         }
1180         else
1181         {
1182                 /*
1183                  * No VACUUM ANALYZE stats available, so use a default value.
1184                  * (Note: not much point in recursing to clause_selectivity here.)
1185                  */
1186                 switch (clause->booltesttype)
1187                 {
1188                         case IS_UNKNOWN:
1189                                 selec = DEFAULT_UNK_SEL;
1190                                 break;
1191                         case IS_NOT_UNKNOWN:
1192                                 selec = DEFAULT_NOT_UNK_SEL;
1193                                 break;
1194                         case IS_TRUE:
1195                         case IS_NOT_TRUE:
1196                         case IS_FALSE:
1197                         case IS_NOT_FALSE:
1198                                 selec = DEFAULT_BOOL_SEL;
1199                                 break;
1200                         default:
1201                                 elog(ERROR, "booltestsel: unexpected booltesttype %d",
1202                                          (int) clause->booltesttype);
1203                                 selec = 0.0;    /* Keep compiler quiet */
1204                                 break;
1205                 }
1206         }
1207
1208         /* result should be in range, but make sure... */
1209         CLAMP_PROBABILITY(selec);
1210
1211         return (Selectivity) selec;
1212 }
1213
1214 /*
1215  *              nulltestsel             - Selectivity of NullTest Node.
1216  */
1217 Selectivity
1218 nulltestsel(Query *root, NullTest *clause, int varRelid)
1219 {
1220         Var                *var;
1221         Node       *arg;
1222         Oid                     relid;
1223         HeapTuple       statsTuple;
1224         double          selec;
1225         double          defselec;
1226         double          freq_null;
1227
1228         Assert(clause && IsA(clause, NullTest));
1229
1230         switch (clause->nulltesttype)
1231         {
1232                 case IS_NULL:
1233                         defselec = DEFAULT_UNK_SEL;
1234                         break;
1235                 case IS_NOT_NULL:
1236                         defselec = DEFAULT_NOT_UNK_SEL;
1237                         break;
1238                 default:
1239                         elog(ERROR, "nulltestsel: unexpected nulltesttype %d",
1240                                  (int) clause->nulltesttype);
1241                         return (Selectivity) 0;         /* keep compiler quiet */
1242         }
1243
1244         arg = (Node *) clause->arg;
1245
1246         /*
1247          * Ignore any binary-compatible relabeling
1248          */
1249         if (IsA(arg, RelabelType))
1250                 arg = ((RelabelType *) arg)->arg;
1251
1252         if (IsA(arg, Var) &&(varRelid == 0 || varRelid == ((Var *) arg)->varno))
1253                 var = (Var *) arg;
1254         else
1255         {
1256                 /*
1257                  * punt if non-Var argument
1258                  */
1259                 return (Selectivity) defselec;
1260         }
1261
1262         relid = getrelid(var->varno, root->rtable);
1263         if (relid == InvalidOid)
1264                 return (Selectivity) defselec;
1265
1266         /* get stats for the attribute, if available */
1267         statsTuple = SearchSysCache(STATRELATT,
1268                                                                 ObjectIdGetDatum(relid),
1269                                                                 Int16GetDatum(var->varattno),
1270                                                                 0, 0);
1271         if (HeapTupleIsValid(statsTuple))
1272         {
1273                 Form_pg_statistic stats;
1274
1275                 stats = (Form_pg_statistic) GETSTRUCT(statsTuple);
1276                 freq_null = stats->stanullfrac;
1277
1278                 switch (clause->nulltesttype)
1279                 {
1280                         case IS_NULL:
1281
1282                                 /*
1283                                  * Use freq_null directly.
1284                                  */
1285                                 selec = freq_null;
1286                                 break;
1287                         case IS_NOT_NULL:
1288
1289                                 /*
1290                                  * Select not unknown (not null) values. Calculate from
1291                                  * freq_null.
1292                                  */
1293                                 selec = 1.0 - freq_null;
1294                                 break;
1295                         default:
1296                                 elog(ERROR, "nulltestsel: unexpected nulltesttype %d",
1297                                          (int) clause->nulltesttype);
1298                                 return (Selectivity) 0; /* keep compiler quiet */
1299                 }
1300
1301                 ReleaseSysCache(statsTuple);
1302         }
1303         else
1304         {
1305                 /*
1306                  * No VACUUM ANALYZE stats available, so make a guess
1307                  */
1308                 selec = defselec;
1309         }
1310
1311         /* result should be in range, but make sure... */
1312         CLAMP_PROBABILITY(selec);
1313
1314         return (Selectivity) selec;
1315 }
1316
1317 /*
1318  *              eqjoinsel               - Join selectivity of "="
1319  */
1320 Datum
1321 eqjoinsel(PG_FUNCTION_ARGS)
1322 {
1323         Query      *root = (Query *) PG_GETARG_POINTER(0);
1324         Oid                     operator = PG_GETARG_OID(1);
1325         List       *args = (List *) PG_GETARG_POINTER(2);
1326         Var                *var1;
1327         Var                *var2;
1328         double          selec;
1329
1330         get_join_vars(args, &var1, &var2);
1331
1332         if (var1 == NULL && var2 == NULL)
1333                 selec = DEFAULT_EQ_SEL;
1334         else
1335         {
1336                 HeapTuple       statsTuple1 = NULL;
1337                 HeapTuple       statsTuple2 = NULL;
1338                 Form_pg_statistic stats1 = NULL;
1339                 Form_pg_statistic stats2 = NULL;
1340                 double          nd1 = DEFAULT_NUM_DISTINCT;
1341                 double          nd2 = DEFAULT_NUM_DISTINCT;
1342                 bool            have_mcvs1 = false;
1343                 Datum      *values1 = NULL;
1344                 int                     nvalues1 = 0;
1345                 float4     *numbers1 = NULL;
1346                 int                     nnumbers1 = 0;
1347                 bool            have_mcvs2 = false;
1348                 Datum      *values2 = NULL;
1349                 int                     nvalues2 = 0;
1350                 float4     *numbers2 = NULL;
1351                 int                     nnumbers2 = 0;
1352
1353                 if (var1 != NULL)
1354                 {
1355                         /* get stats for the attribute, if available */
1356                         Oid                     relid1 = getrelid(var1->varno, root->rtable);
1357
1358                         if (relid1 != InvalidOid)
1359                         {
1360                                 statsTuple1 = SearchSysCache(STATRELATT,
1361                                                                                          ObjectIdGetDatum(relid1),
1362                                                                                    Int16GetDatum(var1->varattno),
1363                                                                                          0, 0);
1364                                 if (HeapTupleIsValid(statsTuple1))
1365                                 {
1366                                         stats1 = (Form_pg_statistic) GETSTRUCT(statsTuple1);
1367                                         have_mcvs1 = get_attstatsslot(statsTuple1,
1368                                                                                                   var1->vartype,
1369                                                                                                   var1->vartypmod,
1370                                                                                                   STATISTIC_KIND_MCV,
1371                                                                                                   InvalidOid,
1372                                                                                                   &values1, &nvalues1,
1373                                                                                                   &numbers1, &nnumbers1);
1374                                 }
1375
1376                                 nd1 = get_att_numdistinct(root, var1, stats1);
1377                         }
1378                 }
1379
1380                 if (var2 != NULL)
1381                 {
1382                         /* get stats for the attribute, if available */
1383                         Oid                     relid2 = getrelid(var2->varno, root->rtable);
1384
1385                         if (relid2 != InvalidOid)
1386                         {
1387                                 statsTuple2 = SearchSysCache(STATRELATT,
1388                                                                                          ObjectIdGetDatum(relid2),
1389                                                                                    Int16GetDatum(var2->varattno),
1390                                                                                          0, 0);
1391                                 if (HeapTupleIsValid(statsTuple2))
1392                                 {
1393                                         stats2 = (Form_pg_statistic) GETSTRUCT(statsTuple2);
1394                                         have_mcvs2 = get_attstatsslot(statsTuple2,
1395                                                                                                   var2->vartype,
1396                                                                                                   var2->vartypmod,
1397                                                                                                   STATISTIC_KIND_MCV,
1398                                                                                                   InvalidOid,
1399                                                                                                   &values2, &nvalues2,
1400                                                                                                   &numbers2, &nnumbers2);
1401                                 }
1402
1403                                 nd2 = get_att_numdistinct(root, var2, stats2);
1404                         }
1405                 }
1406
1407                 if (have_mcvs1 && have_mcvs2)
1408                 {
1409                         /*
1410                          * We have most-common-value lists for both relations.  Run
1411                          * through the lists to see which MCVs actually join to each
1412                          * other with the given operator.  This allows us to determine
1413                          * the exact join selectivity for the portion of the relations
1414                          * represented by the MCV lists.  We still have to estimate
1415                          * for the remaining population, but in a skewed distribution
1416                          * this gives us a big leg up in accuracy.      For motivation see
1417                          * the analysis in Y. Ioannidis and S. Christodoulakis, "On
1418                          * the propagation of errors in the size of join results",
1419                          * Technical Report 1018, Computer Science Dept., University
1420                          * of Wisconsin, Madison, March 1991 (available from
1421                          * ftp.cs.wisc.edu).
1422                          */
1423                         FmgrInfo        eqproc;
1424                         bool       *hasmatch1;
1425                         bool       *hasmatch2;
1426                         double          matchprodfreq,
1427                                                 matchfreq1,
1428                                                 matchfreq2,
1429                                                 unmatchfreq1,
1430                                                 unmatchfreq2,
1431                                                 otherfreq1,
1432                                                 otherfreq2,
1433                                                 totalsel1,
1434                                                 totalsel2;
1435                         int                     i,
1436                                                 nmatches;
1437
1438                         fmgr_info(get_opcode(operator), &eqproc);
1439                         hasmatch1 = (bool *) palloc(nvalues1 * sizeof(bool));
1440                         memset(hasmatch1, 0, nvalues1 * sizeof(bool));
1441                         hasmatch2 = (bool *) palloc(nvalues2 * sizeof(bool));
1442                         memset(hasmatch2, 0, nvalues2 * sizeof(bool));
1443
1444                         /*
1445                          * Note we assume that each MCV will match at most one member
1446                          * of the other MCV list.  If the operator isn't really
1447                          * equality, there could be multiple matches --- but we don't
1448                          * look for them, both for speed and because the math wouldn't
1449                          * add up...
1450                          */
1451                         matchprodfreq = 0.0;
1452                         nmatches = 0;
1453                         for (i = 0; i < nvalues1; i++)
1454                         {
1455                                 int                     j;
1456
1457                                 for (j = 0; j < nvalues2; j++)
1458                                 {
1459                                         if (hasmatch2[j])
1460                                                 continue;
1461                                         if (DatumGetBool(FunctionCall2(&eqproc,
1462                                                                                                    values1[i],
1463                                                                                                    values2[j])))
1464                                         {
1465                                                 hasmatch1[i] = hasmatch2[j] = true;
1466                                                 matchprodfreq += numbers1[i] * numbers2[j];
1467                                                 nmatches++;
1468                                                 break;
1469                                         }
1470                                 }
1471                         }
1472                         CLAMP_PROBABILITY(matchprodfreq);
1473                         /* Sum up frequencies of matched and unmatched MCVs */
1474                         matchfreq1 = unmatchfreq1 = 0.0;
1475                         for (i = 0; i < nvalues1; i++)
1476                         {
1477                                 if (hasmatch1[i])
1478                                         matchfreq1 += numbers1[i];
1479                                 else
1480                                         unmatchfreq1 += numbers1[i];
1481                         }
1482                         CLAMP_PROBABILITY(matchfreq1);
1483                         CLAMP_PROBABILITY(unmatchfreq1);
1484                         matchfreq2 = unmatchfreq2 = 0.0;
1485                         for (i = 0; i < nvalues2; i++)
1486                         {
1487                                 if (hasmatch2[i])
1488                                         matchfreq2 += numbers2[i];
1489                                 else
1490                                         unmatchfreq2 += numbers2[i];
1491                         }
1492                         CLAMP_PROBABILITY(matchfreq2);
1493                         CLAMP_PROBABILITY(unmatchfreq2);
1494                         pfree(hasmatch1);
1495                         pfree(hasmatch2);
1496
1497                         /*
1498                          * Compute total frequency of non-null values that are not in
1499                          * the MCV lists.
1500                          */
1501                         otherfreq1 = 1.0 - stats1->stanullfrac - matchfreq1 - unmatchfreq1;
1502                         otherfreq2 = 1.0 - stats2->stanullfrac - matchfreq2 - unmatchfreq2;
1503                         CLAMP_PROBABILITY(otherfreq1);
1504                         CLAMP_PROBABILITY(otherfreq2);
1505
1506                         /*
1507                          * We can estimate the total selectivity from the point of
1508                          * view of relation 1 as: the known selectivity for matched
1509                          * MCVs, plus unmatched MCVs that are assumed to match against
1510                          * random members of relation 2's non-MCV population, plus
1511                          * non-MCV values that are assumed to match against random
1512                          * members of relation 2's unmatched MCVs plus non-MCV values.
1513                          */
1514                         totalsel1 = matchprodfreq;
1515                         if (nd2 > nvalues2)
1516                                 totalsel1 += unmatchfreq1 * otherfreq2 / (nd2 - nvalues2);
1517                         if (nd2 > nmatches)
1518                                 totalsel1 += otherfreq1 * (otherfreq2 + unmatchfreq2) /
1519                                         (nd2 - nmatches);
1520                         /* Same estimate from the point of view of relation 2. */
1521                         totalsel2 = matchprodfreq;
1522                         if (nd1 > nvalues1)
1523                                 totalsel2 += unmatchfreq2 * otherfreq1 / (nd1 - nvalues1);
1524                         if (nd1 > nmatches)
1525                                 totalsel2 += otherfreq2 * (otherfreq1 + unmatchfreq1) /
1526                                         (nd1 - nmatches);
1527
1528                         /*
1529                          * Use the smaller of the two estimates.  This can be
1530                          * justified in essentially the same terms as given below for
1531                          * the no-stats case: to a first approximation, we are
1532                          * estimating from the point of view of the relation with
1533                          * smaller nd.
1534                          */
1535                         selec = (totalsel1 < totalsel2) ? totalsel1 : totalsel2;
1536                 }
1537                 else
1538                 {
1539                         /*
1540                          * We do not have MCV lists for both sides.  Estimate the join
1541                          * selectivity as MIN(1/nd1, 1/nd2).  This is plausible if we
1542                          * assume that the values are about equally distributed: a
1543                          * given tuple of rel1 will join to either 0 or N2/nd2 rows of
1544                          * rel2, so total join rows are at most N1*N2/nd2 giving a
1545                          * join selectivity of not more than 1/nd2.  By the same logic
1546                          * it is not more than 1/nd1, so MIN(1/nd1, 1/nd2) is an upper
1547                          * bound.  Using the MIN() means we estimate from the point of
1548                          * view of the relation with smaller nd (since the larger nd
1549                          * is determining the MIN).  It is reasonable to assume that
1550                          * most tuples in this rel will have join partners, so the
1551                          * bound is probably reasonably tight and should be taken
1552                          * as-is.
1553                          *
1554                          * XXX Can we be smarter if we have an MCV list for just one
1555                          * side? It seems that if we assume equal distribution for the
1556                          * other side, we end up with the same answer anyway.
1557                          */
1558                         if (nd1 > nd2)
1559                                 selec = 1.0 / nd1;
1560                         else
1561                                 selec = 1.0 / nd2;
1562                 }
1563
1564                 if (have_mcvs1)
1565                         free_attstatsslot(var1->vartype, values1, nvalues1,
1566                                                           numbers1, nnumbers1);
1567                 if (have_mcvs2)
1568                         free_attstatsslot(var2->vartype, values2, nvalues2,
1569                                                           numbers2, nnumbers2);
1570                 if (HeapTupleIsValid(statsTuple1))
1571                         ReleaseSysCache(statsTuple1);
1572                 if (HeapTupleIsValid(statsTuple2))
1573                         ReleaseSysCache(statsTuple2);
1574         }
1575
1576         CLAMP_PROBABILITY(selec);
1577
1578         PG_RETURN_FLOAT8((float8) selec);
1579 }
1580
1581 /*
1582  *              neqjoinsel              - Join selectivity of "!="
1583  */
1584 Datum
1585 neqjoinsel(PG_FUNCTION_ARGS)
1586 {
1587         Query      *root = (Query *) PG_GETARG_POINTER(0);
1588         Oid                     operator = PG_GETARG_OID(1);
1589         List       *args = (List *) PG_GETARG_POINTER(2);
1590         Oid                     eqop;
1591         float8          result;
1592
1593         /*
1594          * We want 1 - eqjoinsel() where the equality operator is the one
1595          * associated with this != operator, that is, its negator.
1596          */
1597         eqop = get_negator(operator);
1598         if (eqop)
1599         {
1600                 result = DatumGetFloat8(DirectFunctionCall3(eqjoinsel,
1601                                                                                                         PointerGetDatum(root),
1602                                                                                                   ObjectIdGetDatum(eqop),
1603                                                                                                  PointerGetDatum(args)));
1604
1605         }
1606         else
1607         {
1608                 /* Use default selectivity (should we raise an error instead?) */
1609                 result = DEFAULT_EQ_SEL;
1610         }
1611         result = 1.0 - result;
1612         PG_RETURN_FLOAT8(result);
1613 }
1614
1615 /*
1616  *              scalarltjoinsel - Join selectivity of "<" and "<=" for scalars
1617  */
1618 Datum
1619 scalarltjoinsel(PG_FUNCTION_ARGS)
1620 {
1621         PG_RETURN_FLOAT8(DEFAULT_INEQ_SEL);
1622 }
1623
1624 /*
1625  *              scalargtjoinsel - Join selectivity of ">" and ">=" for scalars
1626  */
1627 Datum
1628 scalargtjoinsel(PG_FUNCTION_ARGS)
1629 {
1630         PG_RETURN_FLOAT8(DEFAULT_INEQ_SEL);
1631 }
1632
1633 /*
1634  *              regexeqjoinsel  - Join selectivity of regular-expression pattern match.
1635  */
1636 Datum
1637 regexeqjoinsel(PG_FUNCTION_ARGS)
1638 {
1639         PG_RETURN_FLOAT8(DEFAULT_MATCH_SEL);
1640 }
1641
1642 /*
1643  *              icregexeqjoinsel        - Join selectivity of case-insensitive regex match.
1644  */
1645 Datum
1646 icregexeqjoinsel(PG_FUNCTION_ARGS)
1647 {
1648         PG_RETURN_FLOAT8(DEFAULT_MATCH_SEL);
1649 }
1650
1651 /*
1652  *              likejoinsel                     - Join selectivity of LIKE pattern match.
1653  */
1654 Datum
1655 likejoinsel(PG_FUNCTION_ARGS)
1656 {
1657         PG_RETURN_FLOAT8(DEFAULT_MATCH_SEL);
1658 }
1659
1660 /*
1661  *              iclikejoinsel                   - Join selectivity of ILIKE pattern match.
1662  */
1663 Datum
1664 iclikejoinsel(PG_FUNCTION_ARGS)
1665 {
1666         PG_RETURN_FLOAT8(DEFAULT_MATCH_SEL);
1667 }
1668
1669 /*
1670  *              regexnejoinsel  - Join selectivity of regex non-match.
1671  */
1672 Datum
1673 regexnejoinsel(PG_FUNCTION_ARGS)
1674 {
1675         float8          result;
1676
1677         result = DatumGetFloat8(regexeqjoinsel(fcinfo));
1678         result = 1.0 - result;
1679         PG_RETURN_FLOAT8(result);
1680 }
1681
1682 /*
1683  *              icregexnejoinsel        - Join selectivity of case-insensitive regex non-match.
1684  */
1685 Datum
1686 icregexnejoinsel(PG_FUNCTION_ARGS)
1687 {
1688         float8          result;
1689
1690         result = DatumGetFloat8(icregexeqjoinsel(fcinfo));
1691         result = 1.0 - result;
1692         PG_RETURN_FLOAT8(result);
1693 }
1694
1695 /*
1696  *              nlikejoinsel            - Join selectivity of LIKE pattern non-match.
1697  */
1698 Datum
1699 nlikejoinsel(PG_FUNCTION_ARGS)
1700 {
1701         float8          result;
1702
1703         result = DatumGetFloat8(likejoinsel(fcinfo));
1704         result = 1.0 - result;
1705         PG_RETURN_FLOAT8(result);
1706 }
1707
1708 /*
1709  *              icnlikejoinsel          - Join selectivity of ILIKE pattern non-match.
1710  */
1711 Datum
1712 icnlikejoinsel(PG_FUNCTION_ARGS)
1713 {
1714         float8          result;
1715
1716         result = DatumGetFloat8(iclikejoinsel(fcinfo));
1717         result = 1.0 - result;
1718         PG_RETURN_FLOAT8(result);
1719 }
1720
1721 /*
1722  * mergejoinscansel                     - Scan selectivity of merge join.
1723  *
1724  * A merge join will stop as soon as it exhausts either input stream.
1725  * Therefore, if we can estimate the ranges of both input variables,
1726  * we can estimate how much of the input will actually be read.  This
1727  * can have a considerable impact on the cost when using indexscans.
1728  *
1729  * clause should be a clause already known to be mergejoinable.
1730  *
1731  * *leftscan is set to the fraction of the left-hand variable expected
1732  * to be scanned (0 to 1), and similarly *rightscan for the right-hand
1733  * variable.
1734  */
1735 void
1736 mergejoinscansel(Query *root, Node *clause,
1737                                  Selectivity *leftscan,
1738                                  Selectivity *rightscan)
1739 {
1740         Var                *left,
1741                            *right;
1742         Oid                     opno,
1743                                 lsortop,
1744                                 rsortop,
1745                                 ltop,
1746                                 gtop,
1747                                 revltop;
1748         Datum           leftmax,
1749                                 rightmax;
1750         double          selec;
1751
1752         /* Set default results if we can't figure anything out. */
1753         *leftscan = *rightscan = 1.0;
1754
1755         /* Deconstruct the merge clause */
1756         if (!is_opclause(clause))
1757                 return;                                 /* shouldn't happen */
1758         opno = ((Oper *) ((Expr *) clause)->oper)->opno;
1759         left = get_leftop((Expr *) clause);
1760         right = get_rightop((Expr *) clause);
1761         if (!right)
1762                 return;                                 /* shouldn't happen */
1763
1764         /* Can't do anything if inputs are not Vars */
1765         if (!IsA(left, Var) ||!IsA(right, Var))
1766                 return;
1767
1768         /* Verify mergejoinability and get left and right "<" operators */
1769         if (!op_mergejoinable(opno,
1770                                                   left->vartype,
1771                                                   right->vartype,
1772                                                   &lsortop,
1773                                                   &rsortop))
1774                 return;                                 /* shouldn't happen */
1775
1776         /* Try to get maximum values of both vars */
1777         if (!get_var_maximum(root, left, lsortop, &leftmax))
1778                 return;                                 /* no max available from stats */
1779
1780         if (!get_var_maximum(root, right, rsortop, &rightmax))
1781                 return;                                 /* no max available from stats */
1782
1783         /* Look up the "left < right" and "left > right" operators */
1784         op_mergejoin_crossops(opno, &ltop, &gtop, NULL, NULL);
1785
1786         /* Look up the "right < left" operator */
1787         revltop = get_commutator(gtop);
1788         if (!OidIsValid(revltop))
1789                 return;                                 /* shouldn't happen */
1790
1791         /*
1792          * Now, the fraction of the left variable that will be scanned is the
1793          * fraction that's <= the right-side maximum value.  But only believe
1794          * non-default estimates, else stick with our 1.0.
1795          */
1796         selec = scalarineqsel(root, ltop, false, left,
1797                                                   rightmax, right->vartype);
1798         if (selec != DEFAULT_INEQ_SEL)
1799                 *leftscan = selec;
1800
1801         /* And similarly for the right variable. */
1802         selec = scalarineqsel(root, revltop, false, right,
1803                                                   leftmax, left->vartype);
1804         if (selec != DEFAULT_INEQ_SEL)
1805                 *rightscan = selec;
1806
1807         /*
1808          * Only one of the two fractions can really be less than 1.0; believe
1809          * the smaller estimate and reset the other one to exactly 1.0.
1810          */
1811         if (*leftscan > *rightscan)
1812                 *leftscan = 1.0;
1813         else
1814                 *rightscan = 1.0;
1815 }
1816
1817 /*
1818  * get_var_maximum
1819  *              Estimate the maximum value of the specified variable.
1820  *              If successful, store value in *max and return TRUE.
1821  *              If no data available, return FALSE.
1822  *
1823  * sortop is the "<" comparison operator to use.  (To extract the
1824  * minimum instead of the maximum, just pass the ">" operator instead.)
1825  */
1826 static bool
1827 get_var_maximum(Query *root, Var *var, Oid sortop, Datum *max)
1828 {
1829         Datum           tmax = 0;
1830         bool            have_max = false;
1831         Oid                     relid;
1832         HeapTuple       statsTuple;
1833         Form_pg_statistic stats;
1834         int16           typLen;
1835         bool            typByVal;
1836         Datum      *values;
1837         int                     nvalues;
1838         int                     i;
1839
1840         relid = getrelid(var->varno, root->rtable);
1841         if (relid == InvalidOid)
1842                 return false;
1843
1844         /* get stats for the attribute */
1845         statsTuple = SearchSysCache(STATRELATT,
1846                                                                 ObjectIdGetDatum(relid),
1847                                                                 Int16GetDatum(var->varattno),
1848                                                                 0, 0);
1849         if (!HeapTupleIsValid(statsTuple))
1850         {
1851                 /* no stats available, so default result */
1852                 return false;
1853         }
1854         stats = (Form_pg_statistic) GETSTRUCT(statsTuple);
1855
1856         get_typlenbyval(var->vartype, &typLen, &typByVal);
1857
1858         /*
1859          * If there is a histogram, grab the last or first value as appropriate.
1860          *
1861          * If there is a histogram that is sorted with some other operator
1862          * than the one we want, fail --- this suggests that there is data
1863          * we can't use.
1864          */
1865         if (get_attstatsslot(statsTuple, var->vartype, var->vartypmod,
1866                                                  STATISTIC_KIND_HISTOGRAM, sortop,
1867                                                  &values, &nvalues,
1868                                                  NULL, NULL))
1869         {
1870                 if (nvalues > 0)
1871                 {
1872                         tmax = datumCopy(values[nvalues-1], typByVal, typLen);
1873                         have_max = true;
1874                 }
1875                 free_attstatsslot(var->vartype, values, nvalues, NULL, 0);
1876         }
1877         else
1878         {
1879                 Oid             rsortop = get_commutator(sortop);
1880
1881                 if (OidIsValid(rsortop) &&
1882                         get_attstatsslot(statsTuple, var->vartype, var->vartypmod,
1883                                                          STATISTIC_KIND_HISTOGRAM, rsortop,
1884                                                          &values, &nvalues,
1885                                                          NULL, NULL))
1886                 {
1887                         if (nvalues > 0)
1888                         {
1889                                 tmax = datumCopy(values[0], typByVal, typLen);
1890                                 have_max = true;
1891                         }
1892                         free_attstatsslot(var->vartype, values, nvalues, NULL, 0);
1893                 }
1894                 else if (get_attstatsslot(statsTuple, var->vartype, var->vartypmod,
1895                                                                   STATISTIC_KIND_HISTOGRAM, InvalidOid,
1896                                                                   &values, &nvalues,
1897                                                                   NULL, NULL))
1898                 {
1899                         free_attstatsslot(var->vartype, values, nvalues, NULL, 0);
1900                         ReleaseSysCache(statsTuple);
1901                         return false;
1902                 }
1903         }
1904
1905         /*
1906          * If we have most-common-values info, look for a large MCV.  This
1907          * is needed even if we also have a histogram, since the histogram
1908          * excludes the MCVs.  However, usually the MCVs will not be the
1909          * extreme values, so avoid unnecessary data copying.
1910          */
1911         if (get_attstatsslot(statsTuple, var->vartype, var->vartypmod,
1912                                                  STATISTIC_KIND_MCV, InvalidOid,
1913                                                  &values, &nvalues,
1914                                                  NULL, NULL))
1915         {
1916                 bool    large_mcv = false;
1917                 FmgrInfo        opproc;
1918
1919                 fmgr_info(get_opcode(sortop), &opproc);
1920
1921                 for (i = 0; i < nvalues; i++)
1922                 {
1923                         if (!have_max)
1924                         {
1925                                 tmax = values[i];
1926                                 large_mcv = have_max = true;
1927                         }
1928                         else if (DatumGetBool(FunctionCall2(&opproc, tmax, values[i])))
1929                         {
1930                                 tmax = values[i];
1931                                 large_mcv = true;
1932                         }
1933                 }
1934                 if (large_mcv)
1935                         tmax = datumCopy(tmax, typByVal, typLen);
1936                 free_attstatsslot(var->vartype, values, nvalues, NULL, 0);
1937         }
1938
1939         ReleaseSysCache(statsTuple);
1940
1941         *max = tmax;
1942         return have_max;
1943 }
1944
1945 /*
1946  * convert_to_scalar
1947  *        Convert non-NULL values of the indicated types to the comparison
1948  *        scale needed by scalarltsel()/scalargtsel().
1949  *        Returns "true" if successful.
1950  *
1951  * XXX this routine is a hack: ideally we should look up the conversion
1952  * subroutines in pg_type.
1953  *
1954  * All numeric datatypes are simply converted to their equivalent
1955  * "double" values.  (NUMERIC values that are outside the range of "double"
1956  * are clamped to +/- HUGE_VAL.)
1957  *
1958  * String datatypes are converted by convert_string_to_scalar(),
1959  * which is explained below.  The reason why this routine deals with
1960  * three values at a time, not just one, is that we need it for strings.
1961  *
1962  * The bytea datatype is just enough different from strings that it has
1963  * to be treated separately.
1964  *
1965  * The several datatypes representing absolute times are all converted
1966  * to Timestamp, which is actually a double, and then we just use that
1967  * double value.  Note this will give correct results even for the "special"
1968  * values of Timestamp, since those are chosen to compare correctly;
1969  * see timestamp_cmp.
1970  *
1971  * The several datatypes representing relative times (intervals) are all
1972  * converted to measurements expressed in seconds.
1973  */
1974 static bool
1975 convert_to_scalar(Datum value, Oid valuetypid, double *scaledvalue,
1976                                   Datum lobound, Datum hibound, Oid boundstypid,
1977                                   double *scaledlobound, double *scaledhibound)
1978 {
1979         switch (valuetypid)
1980         {
1981                         /*
1982                          * Built-in numeric types
1983                          */
1984                 case BOOLOID:
1985                 case INT2OID:
1986                 case INT4OID:
1987                 case INT8OID:
1988                 case FLOAT4OID:
1989                 case FLOAT8OID:
1990                 case NUMERICOID:
1991                 case OIDOID:
1992                 case REGPROCOID:
1993                         *scaledvalue = convert_numeric_to_scalar(value, valuetypid);
1994                         *scaledlobound = convert_numeric_to_scalar(lobound, boundstypid);
1995                         *scaledhibound = convert_numeric_to_scalar(hibound, boundstypid);
1996                         return true;
1997
1998                         /*
1999                          * Built-in string types
2000                          */
2001                 case CHAROID:
2002                 case BPCHAROID:
2003                 case VARCHAROID:
2004                 case TEXTOID:
2005                 case NAMEOID:
2006                         {
2007                                 unsigned char *valstr = convert_string_datum(value, valuetypid);
2008                                 unsigned char *lostr = convert_string_datum(lobound, boundstypid);
2009                                 unsigned char *histr = convert_string_datum(hibound, boundstypid);
2010
2011                                 convert_string_to_scalar(valstr, scaledvalue,
2012                                                                                  lostr, scaledlobound,
2013                                                                                  histr, scaledhibound);
2014                                 pfree(valstr);
2015                                 pfree(lostr);
2016                                 pfree(histr);
2017                                 return true;
2018                         }
2019
2020                         /*
2021                          * Built-in bytea type
2022                          */
2023                 case BYTEAOID:
2024                         {
2025                                 convert_bytea_to_scalar(value, scaledvalue,
2026                                                                                 lobound, scaledlobound,
2027                                                                                 hibound, scaledhibound);
2028                                 return true;
2029                         }
2030
2031                         /*
2032                          * Built-in time types
2033                          */
2034                 case TIMESTAMPOID:
2035                 case TIMESTAMPTZOID:
2036                 case ABSTIMEOID:
2037                 case DATEOID:
2038                 case INTERVALOID:
2039                 case RELTIMEOID:
2040                 case TINTERVALOID:
2041                 case TIMEOID:
2042                 case TIMETZOID:
2043                         *scaledvalue = convert_timevalue_to_scalar(value, valuetypid);
2044                         *scaledlobound = convert_timevalue_to_scalar(lobound, boundstypid);
2045                         *scaledhibound = convert_timevalue_to_scalar(hibound, boundstypid);
2046                         return true;
2047
2048                         /*
2049                          * Built-in network types
2050                          */
2051                 case INETOID:
2052                 case CIDROID:
2053                 case MACADDROID:
2054                         *scaledvalue = convert_network_to_scalar(value, valuetypid);
2055                         *scaledlobound = convert_network_to_scalar(lobound, boundstypid);
2056                         *scaledhibound = convert_network_to_scalar(hibound, boundstypid);
2057                         return true;
2058         }
2059         /* Don't know how to convert */
2060         return false;
2061 }
2062
2063 /*
2064  * Do convert_to_scalar()'s work for any numeric data type.
2065  */
2066 static double
2067 convert_numeric_to_scalar(Datum value, Oid typid)
2068 {
2069         switch (typid)
2070         {
2071                 case BOOLOID:
2072                         return (double) DatumGetBool(value);
2073                 case INT2OID:
2074                         return (double) DatumGetInt16(value);
2075                 case INT4OID:
2076                         return (double) DatumGetInt32(value);
2077                 case INT8OID:
2078                         return (double) DatumGetInt64(value);
2079                 case FLOAT4OID:
2080                         return (double) DatumGetFloat4(value);
2081                 case FLOAT8OID:
2082                         return (double) DatumGetFloat8(value);
2083                 case NUMERICOID:
2084                         /* Note: out-of-range values will be clamped to +-HUGE_VAL */
2085                         return (double)
2086                                 DatumGetFloat8(DirectFunctionCall1(numeric_float8_no_overflow,
2087                                                                                                    value));
2088                 case OIDOID:
2089                 case REGPROCOID:
2090                         /* we can treat OIDs as integers... */
2091                         return (double) DatumGetObjectId(value);
2092         }
2093
2094         /*
2095          * Can't get here unless someone tries to use scalarltsel/scalargtsel
2096          * on an operator with one numeric and one non-numeric operand.
2097          */
2098         elog(ERROR, "convert_numeric_to_scalar: unsupported type %u", typid);
2099         return 0;
2100 }
2101
2102 /*
2103  * Do convert_to_scalar()'s work for any character-string data type.
2104  *
2105  * String datatypes are converted to a scale that ranges from 0 to 1,
2106  * where we visualize the bytes of the string as fractional digits.
2107  *
2108  * We do not want the base to be 256, however, since that tends to
2109  * generate inflated selectivity estimates; few databases will have
2110  * occurrences of all 256 possible byte values at each position.
2111  * Instead, use the smallest and largest byte values seen in the bounds
2112  * as the estimated range for each byte, after some fudging to deal with
2113  * the fact that we probably aren't going to see the full range that way.
2114  *
2115  * An additional refinement is that we discard any common prefix of the
2116  * three strings before computing the scaled values.  This allows us to
2117  * "zoom in" when we encounter a narrow data range.  An example is a phone
2118  * number database where all the values begin with the same area code.
2119  * (Actually, the bounds will be adjacent histogram-bin-boundary values,
2120  * so this is more likely to happen than you might think.)
2121  */
2122 static void
2123 convert_string_to_scalar(unsigned char *value,
2124                                                  double *scaledvalue,
2125                                                  unsigned char *lobound,
2126                                                  double *scaledlobound,
2127                                                  unsigned char *hibound,
2128                                                  double *scaledhibound)
2129 {
2130         int                     rangelo,
2131                                 rangehi;
2132         unsigned char *sptr;
2133
2134         rangelo = rangehi = hibound[0];
2135         for (sptr = lobound; *sptr; sptr++)
2136         {
2137                 if (rangelo > *sptr)
2138                         rangelo = *sptr;
2139                 if (rangehi < *sptr)
2140                         rangehi = *sptr;
2141         }
2142         for (sptr = hibound; *sptr; sptr++)
2143         {
2144                 if (rangelo > *sptr)
2145                         rangelo = *sptr;
2146                 if (rangehi < *sptr)
2147                         rangehi = *sptr;
2148         }
2149         /* If range includes any upper-case ASCII chars, make it include all */
2150         if (rangelo <= 'Z' && rangehi >= 'A')
2151         {
2152                 if (rangelo > 'A')
2153                         rangelo = 'A';
2154                 if (rangehi < 'Z')
2155                         rangehi = 'Z';
2156         }
2157         /* Ditto lower-case */
2158         if (rangelo <= 'z' && rangehi >= 'a')
2159         {
2160                 if (rangelo > 'a')
2161                         rangelo = 'a';
2162                 if (rangehi < 'z')
2163                         rangehi = 'z';
2164         }
2165         /* Ditto digits */
2166         if (rangelo <= '9' && rangehi >= '0')
2167         {
2168                 if (rangelo > '0')
2169                         rangelo = '0';
2170                 if (rangehi < '9')
2171                         rangehi = '9';
2172         }
2173
2174         /*
2175          * If range includes less than 10 chars, assume we have not got enough
2176          * data, and make it include regular ASCII set.
2177          */
2178         if (rangehi - rangelo < 9)
2179         {
2180                 rangelo = ' ';
2181                 rangehi = 127;
2182         }
2183
2184         /*
2185          * Now strip any common prefix of the three strings.
2186          */
2187         while (*lobound)
2188         {
2189                 if (*lobound != *hibound || *lobound != *value)
2190                         break;
2191                 lobound++, hibound++, value++;
2192         }
2193
2194         /*
2195          * Now we can do the conversions.
2196          */
2197         *scaledvalue = convert_one_string_to_scalar(value, rangelo, rangehi);
2198         *scaledlobound = convert_one_string_to_scalar(lobound, rangelo, rangehi);
2199         *scaledhibound = convert_one_string_to_scalar(hibound, rangelo, rangehi);
2200 }
2201
2202 static double
2203 convert_one_string_to_scalar(unsigned char *value, int rangelo, int rangehi)
2204 {
2205         int                     slen = strlen((char *) value);
2206         double          num,
2207                                 denom,
2208                                 base;
2209
2210         if (slen <= 0)
2211                 return 0.0;                             /* empty string has scalar value 0 */
2212
2213         /*
2214          * Since base is at least 10, need not consider more than about 20
2215          * chars
2216          */
2217         if (slen > 20)
2218                 slen = 20;
2219
2220         /* Convert initial characters to fraction */
2221         base = rangehi - rangelo + 1;
2222         num = 0.0;
2223         denom = base;
2224         while (slen-- > 0)
2225         {
2226                 int                     ch = *value++;
2227
2228                 if (ch < rangelo)
2229                         ch = rangelo - 1;
2230                 else if (ch > rangehi)
2231                         ch = rangehi + 1;
2232                 num += ((double) (ch - rangelo)) / denom;
2233                 denom *= base;
2234         }
2235
2236         return num;
2237 }
2238
2239 /*
2240  * Convert a string-type Datum into a palloc'd, null-terminated string.
2241  *
2242  * When using a non-C locale, we must pass the string through strxfrm()
2243  * before continuing, so as to generate correct locale-specific results.
2244  */
2245 static unsigned char *
2246 convert_string_datum(Datum value, Oid typid)
2247 {
2248         char       *val;
2249         char       *xfrmstr;
2250         size_t          xfrmsize;
2251         size_t          xfrmlen;
2252
2253         switch (typid)
2254         {
2255                 case CHAROID:
2256                         val = (char *) palloc(2);
2257                         val[0] = DatumGetChar(value);
2258                         val[1] = '\0';
2259                         break;
2260                 case BPCHAROID:
2261                 case VARCHAROID:
2262                 case TEXTOID:
2263                         {
2264                                 char       *str = (char *) VARDATA(DatumGetPointer(value));
2265                                 int                     strlength = VARSIZE(DatumGetPointer(value)) - VARHDRSZ;
2266
2267                                 val = (char *) palloc(strlength + 1);
2268                                 memcpy(val, str, strlength);
2269                                 val[strlength] = '\0';
2270                                 break;
2271                         }
2272                 case NAMEOID:
2273                         {
2274                                 NameData   *nm = (NameData *) DatumGetPointer(value);
2275
2276                                 val = pstrdup(NameStr(*nm));
2277                                 break;
2278                         }
2279                 default:
2280
2281                         /*
2282                          * Can't get here unless someone tries to use scalarltsel on
2283                          * an operator with one string and one non-string operand.
2284                          */
2285                         elog(ERROR, "convert_string_datum: unsupported type %u", typid);
2286                         return NULL;
2287         }
2288
2289         if (!lc_collate_is_c())
2290         {
2291                 /* Guess that transformed string is not much bigger than original */
2292                 xfrmsize = strlen(val) + 32;    /* arbitrary pad value here... */
2293                 xfrmstr = (char *) palloc(xfrmsize);
2294                 xfrmlen = strxfrm(xfrmstr, val, xfrmsize);
2295                 if (xfrmlen >= xfrmsize)
2296                 {
2297                         /* Oops, didn't make it */
2298                         pfree(xfrmstr);
2299                         xfrmstr = (char *) palloc(xfrmlen + 1);
2300                         xfrmlen = strxfrm(xfrmstr, val, xfrmlen + 1);
2301                 }
2302                 pfree(val);
2303                 val = xfrmstr;
2304         }
2305
2306         return (unsigned char *) val;
2307 }
2308
2309 /*
2310  * Do convert_to_scalar()'s work for any bytea data type.
2311  *
2312  * Very similar to convert_string_to_scalar except we can't assume
2313  * null-termination and therefore pass explicit lengths around.
2314  *
2315  * Also, assumptions about likely "normal" ranges of characters have been
2316  * removed - a data range of 0..255 is always used, for now.  (Perhaps
2317  * someday we will add information about actual byte data range to
2318  * pg_statistic.)
2319  */
2320 static void
2321 convert_bytea_to_scalar(Datum value,
2322                                                 double *scaledvalue,
2323                                                 Datum lobound,
2324                                                 double *scaledlobound,
2325                                                 Datum hibound,
2326                                                 double *scaledhibound)
2327 {
2328         int                     rangelo,
2329                                 rangehi,
2330                                 valuelen = VARSIZE(DatumGetPointer(value)) - VARHDRSZ,
2331                                 loboundlen = VARSIZE(DatumGetPointer(lobound)) - VARHDRSZ,
2332                                 hiboundlen = VARSIZE(DatumGetPointer(hibound)) - VARHDRSZ,
2333                                 i,
2334                                 minlen;
2335         unsigned char *valstr = (unsigned char *) VARDATA(DatumGetPointer(value)),
2336                            *lostr = (unsigned char *) VARDATA(DatumGetPointer(lobound)),
2337                            *histr = (unsigned char *) VARDATA(DatumGetPointer(hibound));
2338
2339         /*
2340          * Assume bytea data is uniformly distributed across all byte values.
2341          */
2342         rangelo = 0;
2343         rangehi = 255;
2344
2345         /*
2346          * Now strip any common prefix of the three strings.
2347          */
2348         minlen = Min(Min(valuelen, loboundlen), hiboundlen);
2349         for (i = 0; i < minlen; i++)
2350         {
2351                 if (*lostr != *histr || *lostr != *valstr)
2352                         break;
2353                 lostr++, histr++, valstr++;
2354                 loboundlen--, hiboundlen--, valuelen--;
2355         }
2356
2357         /*
2358          * Now we can do the conversions.
2359          */
2360         *scaledvalue = convert_one_bytea_to_scalar(valstr, valuelen, rangelo, rangehi);
2361         *scaledlobound = convert_one_bytea_to_scalar(lostr, loboundlen, rangelo, rangehi);
2362         *scaledhibound = convert_one_bytea_to_scalar(histr, hiboundlen, rangelo, rangehi);
2363 }
2364
2365 static double
2366 convert_one_bytea_to_scalar(unsigned char *value, int valuelen,
2367                                                         int rangelo, int rangehi)
2368 {
2369         double          num,
2370                                 denom,
2371                                 base;
2372
2373         if (valuelen <= 0)
2374                 return 0.0;                             /* empty string has scalar value 0 */
2375
2376         /*
2377          * Since base is 256, need not consider more than about 10 chars (even
2378          * this many seems like overkill)
2379          */
2380         if (valuelen > 10)
2381                 valuelen = 10;
2382
2383         /* Convert initial characters to fraction */
2384         base = rangehi - rangelo + 1;
2385         num = 0.0;
2386         denom = base;
2387         while (valuelen-- > 0)
2388         {
2389                 int                     ch = *value++;
2390
2391                 if (ch < rangelo)
2392                         ch = rangelo - 1;
2393                 else if (ch > rangehi)
2394                         ch = rangehi + 1;
2395                 num += ((double) (ch - rangelo)) / denom;
2396                 denom *= base;
2397         }
2398
2399         return num;
2400 }
2401
2402 /*
2403  * Do convert_to_scalar()'s work for any timevalue data type.
2404  */
2405 static double
2406 convert_timevalue_to_scalar(Datum value, Oid typid)
2407 {
2408         switch (typid)
2409         {
2410                 case TIMESTAMPOID:
2411                         return DatumGetTimestamp(value);
2412                 case TIMESTAMPTZOID:
2413                         return DatumGetTimestampTz(value);
2414                 case ABSTIMEOID:
2415                         return DatumGetTimestamp(DirectFunctionCall1(abstime_timestamp,
2416                                                                                                                  value));
2417                 case DATEOID:
2418                         return DatumGetTimestamp(DirectFunctionCall1(date_timestamp,
2419                                                                                                                  value));
2420                 case INTERVALOID:
2421                         {
2422                                 Interval   *interval = DatumGetIntervalP(value);
2423
2424                                 /*
2425                                  * Convert the month part of Interval to days using
2426                                  * assumed average month length of 365.25/12.0 days.  Not
2427                                  * too accurate, but plenty good enough for our purposes.
2428                                  */
2429                                 return interval->time +
2430                                         interval->month * (365.25 / 12.0 * 24.0 * 60.0 * 60.0);
2431                         }
2432                 case RELTIMEOID:
2433                         return DatumGetRelativeTime(value);
2434                 case TINTERVALOID:
2435                         {
2436                                 TimeInterval interval = DatumGetTimeInterval(value);
2437
2438                                 if (interval->status != 0)
2439                                         return interval->data[1] - interval->data[0];
2440                                 return 0;               /* for lack of a better idea */
2441                         }
2442                 case TIMEOID:
2443                         return DatumGetTimeADT(value);
2444                 case TIMETZOID:
2445                         {
2446                                 TimeTzADT  *timetz = DatumGetTimeTzADTP(value);
2447
2448                                 /* use GMT-equivalent time */
2449                                 return (double) (timetz->time + timetz->zone);
2450                         }
2451         }
2452
2453         /*
2454          * Can't get here unless someone tries to use scalarltsel/scalargtsel
2455          * on an operator with one timevalue and one non-timevalue operand.
2456          */
2457         elog(ERROR, "convert_timevalue_to_scalar: unsupported type %u", typid);
2458         return 0;
2459 }
2460
2461
2462 /*
2463  * get_att_numdistinct
2464  *        Estimate the number of distinct values of an attribute.
2465  *
2466  * var: identifies the attribute to examine.
2467  * stats: pg_statistic tuple for attribute, or NULL if not available.
2468  *
2469  * NB: be careful to produce an integral result, since callers may compare
2470  * the result to exact integer counts.
2471  */
2472 static double
2473 get_att_numdistinct(Query *root, Var *var, Form_pg_statistic stats)
2474 {
2475         RelOptInfo *rel;
2476         double          ntuples;
2477
2478         /*
2479          * Special-case boolean columns: presumably, two distinct values.
2480          *
2481          * Are there any other cases we should wire in special estimates for?
2482          */
2483         if (var->vartype == BOOLOID)
2484                 return 2.0;
2485
2486         /*
2487          * Otherwise we need to get the relation size.
2488          */
2489         rel = find_base_rel(root, var->varno);
2490         ntuples = rel->tuples;
2491
2492         if (ntuples <= 0.0)
2493                 return DEFAULT_NUM_DISTINCT;    /* no data available; return a
2494                                                                                  * default */
2495
2496         /*
2497          * Look to see if there is a unique index on the attribute. If so, we
2498          * assume it's distinct, ignoring pg_statistic info which could be out
2499          * of date.
2500          */
2501         if (has_unique_index(rel, var->varattno))
2502                 return ntuples;
2503
2504         /*
2505          * If ANALYZE determined a fixed or scaled estimate, use it.
2506          */
2507         if (stats)
2508         {
2509                 if (stats->stadistinct > 0.0)
2510                         return stats->stadistinct;
2511                 if (stats->stadistinct < 0.0)
2512                         return floor((-stats->stadistinct * ntuples) + 0.5);
2513         }
2514
2515         /*
2516          * ANALYZE does not compute stats for system attributes, but some of
2517          * them can reasonably be assumed unique anyway.
2518          */
2519         switch (var->varattno)
2520         {
2521                 case ObjectIdAttributeNumber:
2522                 case SelfItemPointerAttributeNumber:
2523                         return ntuples;
2524                 case TableOidAttributeNumber:
2525                         return 1.0;
2526         }
2527
2528         /*
2529          * Estimate ndistinct = ntuples if the table is small, else use
2530          * default.
2531          */
2532         if (ntuples < DEFAULT_NUM_DISTINCT)
2533                 return ntuples;
2534
2535         return DEFAULT_NUM_DISTINCT;
2536 }
2537
2538 /*
2539  * get_restriction_var
2540  *              Examine the args of a restriction clause to see if it's of the
2541  *              form (var op something) or (something op var).  If so, extract
2542  *              and return the var and the other argument.
2543  *
2544  * Inputs:
2545  *      args: clause argument list
2546  *      varRelid: see specs for restriction selectivity functions
2547  *
2548  * Outputs: (these are set only if TRUE is returned)
2549  *      *var: gets Var node
2550  *      *other: gets other clause argument
2551  *      *varonleft: set TRUE if var is on the left, FALSE if on the right
2552  *
2553  * Returns TRUE if a Var is identified, otherwise FALSE.
2554  */
2555 static bool
2556 get_restriction_var(List *args,
2557                                         int varRelid,
2558                                         Var **var,
2559                                         Node **other,
2560                                         bool *varonleft)
2561 {
2562         Node       *left,
2563                            *right;
2564
2565         if (length(args) != 2)
2566                 return false;
2567
2568         left = (Node *) lfirst(args);
2569         right = (Node *) lsecond(args);
2570
2571         /* Ignore any binary-compatible relabeling */
2572
2573         if (IsA(left, RelabelType))
2574                 left = ((RelabelType *) left)->arg;
2575         if (IsA(right, RelabelType))
2576                 right = ((RelabelType *) right)->arg;
2577
2578         /* Look for the var */
2579
2580         if (IsA(left, Var) &&
2581                 (varRelid == 0 || varRelid == ((Var *) left)->varno))
2582         {
2583                 *var = (Var *) left;
2584                 *other = right;
2585                 *varonleft = true;
2586         }
2587         else if (IsA(right, Var) &&
2588                          (varRelid == 0 || varRelid == ((Var *) right)->varno))
2589         {
2590                 *var = (Var *) right;
2591                 *other = left;
2592                 *varonleft = false;
2593         }
2594         else
2595         {
2596                 /* Duh, it's too complicated for me... */
2597                 return false;
2598         }
2599
2600         return true;
2601 }
2602
2603 /*
2604  * get_join_vars
2605  *
2606  * Extract the two Vars from a join clause's argument list.  Returns
2607  * NULL for arguments that are not simple vars.
2608  */
2609 static void
2610 get_join_vars(List *args, Var **var1, Var **var2)
2611 {
2612         Node       *left,
2613                            *right;
2614
2615         if (length(args) != 2)
2616         {
2617                 *var1 = NULL;
2618                 *var2 = NULL;
2619                 return;
2620         }
2621
2622         left = (Node *) lfirst(args);
2623         right = (Node *) lsecond(args);
2624
2625         /* Ignore any binary-compatible relabeling */
2626         if (IsA(left, RelabelType))
2627                 left = ((RelabelType *) left)->arg;
2628         if (IsA(right, RelabelType))
2629                 right = ((RelabelType *) right)->arg;
2630
2631         if (IsA(left, Var))
2632                 *var1 = (Var *) left;
2633         else
2634                 *var1 = NULL;
2635
2636         if (IsA(right, Var))
2637                 *var2 = (Var *) right;
2638         else
2639                 *var2 = NULL;
2640 }
2641
2642 /*-------------------------------------------------------------------------
2643  *
2644  * Pattern analysis functions
2645  *
2646  * These routines support analysis of LIKE and regular-expression patterns
2647  * by the planner/optimizer.  It's important that they agree with the
2648  * regular-expression code in backend/regex/ and the LIKE code in
2649  * backend/utils/adt/like.c.
2650  *
2651  * Note that the prefix-analysis functions are called from
2652  * backend/optimizer/path/indxpath.c as well as from routines in this file.
2653  *
2654  *-------------------------------------------------------------------------
2655  */
2656
2657 /*
2658  * Extract the fixed prefix, if any, for a pattern.
2659  * *prefix is set to a palloc'd prefix string,
2660  * or to NULL if no fixed prefix exists for the pattern.
2661  * *rest is set to point to the remainder of the pattern after the
2662  * portion describing the fixed prefix.
2663  * The return value distinguishes no fixed prefix, a partial prefix,
2664  * or an exact-match-only pattern.
2665  */
2666
2667 static Pattern_Prefix_Status
2668 like_fixed_prefix(char *patt, bool case_insensitive,
2669                                   char **prefix, char **rest)
2670 {
2671         char       *match;
2672         int                     pos,
2673                                 match_pos;
2674
2675         *prefix = match = palloc(strlen(patt) + 1);
2676         match_pos = 0;
2677
2678         for (pos = 0; patt[pos]; pos++)
2679         {
2680                 /* % and _ are wildcard characters in LIKE */
2681                 if (patt[pos] == '%' ||
2682                         patt[pos] == '_')
2683                         break;
2684                 /* Backslash quotes the next character */
2685                 if (patt[pos] == '\\')
2686                 {
2687                         pos++;
2688                         if (patt[pos] == '\0')
2689                                 break;
2690                 }
2691
2692                 /*
2693                  * XXX I suspect isalpha() is not an adequately locale-sensitive
2694                  * test for characters that can vary under case folding?
2695                  */
2696                 if (case_insensitive && isalpha((unsigned char) patt[pos]))
2697                         break;
2698
2699                 /*
2700                  * NOTE: this code used to think that %% meant a literal %, but
2701                  * textlike() itself does not think that, and the SQL92 spec
2702                  * doesn't say any such thing either.
2703                  */
2704                 match[match_pos++] = patt[pos];
2705         }
2706
2707         match[match_pos] = '\0';
2708         *rest = &patt[pos];
2709
2710         /* in LIKE, an empty pattern is an exact match! */
2711         if (patt[pos] == '\0')
2712                 return Pattern_Prefix_Exact;    /* reached end of pattern, so
2713                                                                                  * exact */
2714
2715         if (match_pos > 0)
2716                 return Pattern_Prefix_Partial;
2717
2718         pfree(match);
2719         *prefix = NULL;
2720         return Pattern_Prefix_None;
2721 }
2722
2723 static Pattern_Prefix_Status
2724 regex_fixed_prefix(char *patt, bool case_insensitive,
2725                                    char **prefix, char **rest)
2726 {
2727         char       *match;
2728         int                     pos,
2729                                 match_pos,
2730                                 paren_depth;
2731
2732         /* Pattern must be anchored left */
2733         if (patt[0] != '^')
2734         {
2735                 *prefix = NULL;
2736                 *rest = patt;
2737                 return Pattern_Prefix_None;
2738         }
2739
2740         /*
2741          * If unquoted | is present at paren level 0 in pattern, then there
2742          * are multiple alternatives for the start of the string.
2743          */
2744         paren_depth = 0;
2745         for (pos = 1; patt[pos]; pos++)
2746         {
2747                 if (patt[pos] == '|' && paren_depth == 0)
2748                 {
2749                         *prefix = NULL;
2750                         *rest = patt;
2751                         return Pattern_Prefix_None;
2752                 }
2753                 else if (patt[pos] == '(')
2754                         paren_depth++;
2755                 else if (patt[pos] == ')' && paren_depth > 0)
2756                         paren_depth--;
2757                 else if (patt[pos] == '\\')
2758                 {
2759                         /* backslash quotes the next character */
2760                         pos++;
2761                         if (patt[pos] == '\0')
2762                                 break;
2763                 }
2764         }
2765
2766         /* OK, allocate space for pattern */
2767         *prefix = match = palloc(strlen(patt) + 1);
2768         match_pos = 0;
2769
2770         /* note start at pos 1 to skip leading ^ */
2771         for (pos = 1; patt[pos]; pos++)
2772         {
2773                 /*
2774                  * Check for characters that indicate multiple possible matches
2775                  * here. XXX I suspect isalpha() is not an adequately
2776                  * locale-sensitive test for characters that can vary under case
2777                  * folding?
2778                  */
2779                 if (patt[pos] == '.' ||
2780                         patt[pos] == '(' ||
2781                         patt[pos] == '[' ||
2782                         patt[pos] == '$' ||
2783                         (case_insensitive && isalpha((unsigned char) patt[pos])))
2784                         break;
2785
2786                 /*
2787                  * Check for quantifiers.  Except for +, this means the preceding
2788                  * character is optional, so we must remove it from the prefix
2789                  * too!
2790                  */
2791                 if (patt[pos] == '*' ||
2792                         patt[pos] == '?' ||
2793                         patt[pos] == '{')
2794                 {
2795                         if (match_pos > 0)
2796                                 match_pos--;
2797                         pos--;
2798                         break;
2799                 }
2800                 if (patt[pos] == '+')
2801                 {
2802                         pos--;
2803                         break;
2804                 }
2805                 if (patt[pos] == '\\')
2806                 {
2807                         /* backslash quotes the next character */
2808                         pos++;
2809                         if (patt[pos] == '\0')
2810                                 break;
2811                 }
2812                 match[match_pos++] = patt[pos];
2813         }
2814
2815         match[match_pos] = '\0';
2816         *rest = &patt[pos];
2817
2818         if (patt[pos] == '$' && patt[pos + 1] == '\0')
2819         {
2820                 *rest = &patt[pos + 1];
2821                 return Pattern_Prefix_Exact;    /* pattern specifies exact match */
2822         }
2823
2824         if (match_pos > 0)
2825                 return Pattern_Prefix_Partial;
2826
2827         pfree(match);
2828         *prefix = NULL;
2829         return Pattern_Prefix_None;
2830 }
2831
2832 Pattern_Prefix_Status
2833 pattern_fixed_prefix(char *patt, Pattern_Type ptype,
2834                                          char **prefix, char **rest)
2835 {
2836         Pattern_Prefix_Status result;
2837
2838         switch (ptype)
2839         {
2840                 case Pattern_Type_Like:
2841                         result = like_fixed_prefix(patt, false, prefix, rest);
2842                         break;
2843                 case Pattern_Type_Like_IC:
2844                         result = like_fixed_prefix(patt, true, prefix, rest);
2845                         break;
2846                 case Pattern_Type_Regex:
2847                         result = regex_fixed_prefix(patt, false, prefix, rest);
2848                         break;
2849                 case Pattern_Type_Regex_IC:
2850                         result = regex_fixed_prefix(patt, true, prefix, rest);
2851                         break;
2852                 default:
2853                         elog(ERROR, "pattern_fixed_prefix: bogus ptype");
2854                         result = Pattern_Prefix_None;           /* keep compiler quiet */
2855                         break;
2856         }
2857         return result;
2858 }
2859
2860 /*
2861  * Estimate the selectivity of a fixed prefix for a pattern match.
2862  *
2863  * A fixed prefix "foo" is estimated as the selectivity of the expression
2864  * "var >= 'foo' AND var < 'fop'" (see also indxqual.c).
2865  *
2866  * XXX Note: we make use of the upper bound to estimate operator selectivity
2867  * even if the locale is such that we cannot rely on the upper-bound string.
2868  * The selectivity only needs to be approximately right anyway, so it seems
2869  * more useful to use the upper-bound code than not.
2870  */
2871 static Selectivity
2872 prefix_selectivity(Query *root, Var *var, char *prefix)
2873 {
2874         Selectivity prefixsel;
2875         Oid                     cmpopr;
2876         Const      *prefixcon;
2877         List       *cmpargs;
2878         char       *greaterstr;
2879
2880         cmpopr = find_operator(">=", var->vartype);
2881         if (cmpopr == InvalidOid)
2882                 elog(ERROR, "prefix_selectivity: no >= operator for type %u",
2883                          var->vartype);
2884         prefixcon = string_to_const(prefix, var->vartype);
2885         cmpargs = makeList2(var, prefixcon);
2886         /* Assume scalargtsel is appropriate for all supported types */
2887         prefixsel = DatumGetFloat8(DirectFunctionCall4(scalargtsel,
2888                                                                                                    PointerGetDatum(root),
2889                                                                                                 ObjectIdGetDatum(cmpopr),
2890                                                                                                 PointerGetDatum(cmpargs),
2891                                                                                                    Int32GetDatum(0)));
2892
2893         /*-------
2894          * If we can create a string larger than the prefix, say
2895          *      "x < greaterstr".
2896          *-------
2897          */
2898         greaterstr = make_greater_string(prefix, var->vartype);
2899         if (greaterstr)
2900         {
2901                 Selectivity topsel;
2902
2903                 cmpopr = find_operator("<", var->vartype);
2904                 if (cmpopr == InvalidOid)
2905                         elog(ERROR, "prefix_selectivity: no < operator for type %u",
2906                                  var->vartype);
2907                 prefixcon = string_to_const(greaterstr, var->vartype);
2908                 cmpargs = makeList2(var, prefixcon);
2909                 /* Assume scalarltsel is appropriate for all supported types */
2910                 topsel = DatumGetFloat8(DirectFunctionCall4(scalarltsel,
2911                                                                                                         PointerGetDatum(root),
2912                                                                                                 ObjectIdGetDatum(cmpopr),
2913                                                                                                 PointerGetDatum(cmpargs),
2914                                                                                                         Int32GetDatum(0)));
2915
2916                 /*
2917                  * Merge the two selectivities in the same way as for a range
2918                  * query (see clauselist_selectivity()).
2919                  */
2920                 prefixsel = topsel + prefixsel - 1.0;
2921
2922                 /*
2923                  * A zero or slightly negative prefixsel should be converted into
2924                  * a small positive value; we probably are dealing with a very
2925                  * tight range and got a bogus result due to roundoff errors.
2926                  * However, if prefixsel is very negative, then we probably have
2927                  * default selectivity estimates on one or both sides of the
2928                  * range.  In that case, insert a not-so-wildly-optimistic default
2929                  * estimate.
2930                  */
2931                 if (prefixsel <= 0.0)
2932                 {
2933                         if (prefixsel < -0.01)
2934                         {
2935                                 /*
2936                                  * No data available --- use a default estimate that is
2937                                  * small, but not real small.
2938                                  */
2939                                 prefixsel = 0.005;
2940                         }
2941                         else
2942                         {
2943                                 /*
2944                                  * It's just roundoff error; use a small positive value
2945                                  */
2946                                 prefixsel = 1.0e-10;
2947                         }
2948                 }
2949         }
2950
2951         return prefixsel;
2952 }
2953
2954
2955 /*
2956  * Estimate the selectivity of a pattern of the specified type.
2957  * Note that any fixed prefix of the pattern will have been removed already.
2958  *
2959  * For now, we use a very simplistic approach: fixed characters reduce the
2960  * selectivity a good deal, character ranges reduce it a little,
2961  * wildcards (such as % for LIKE or .* for regex) increase it.
2962  */
2963
2964 #define FIXED_CHAR_SEL  0.20    /* about 1/5 */
2965 #define CHAR_RANGE_SEL  0.25
2966 #define ANY_CHAR_SEL    0.9             /* not 1, since it won't match
2967                                                                  * end-of-string */
2968 #define FULL_WILDCARD_SEL 5.0
2969 #define PARTIAL_WILDCARD_SEL 2.0
2970
2971 static Selectivity
2972 like_selectivity(char *patt, bool case_insensitive)
2973 {
2974         Selectivity sel = 1.0;
2975         int                     pos;
2976
2977         /* Skip any leading %; it's already factored into initial sel */
2978         pos = (*patt == '%') ? 1 : 0;
2979         for (; patt[pos]; pos++)
2980         {
2981                 /* % and _ are wildcard characters in LIKE */
2982                 if (patt[pos] == '%')
2983                         sel *= FULL_WILDCARD_SEL;
2984                 else if (patt[pos] == '_')
2985                         sel *= ANY_CHAR_SEL;
2986                 else if (patt[pos] == '\\')
2987                 {
2988                         /* Backslash quotes the next character */
2989                         pos++;
2990                         if (patt[pos] == '\0')
2991                                 break;
2992                         sel *= FIXED_CHAR_SEL;
2993                 }
2994                 else
2995                         sel *= FIXED_CHAR_SEL;
2996         }
2997         /* Could get sel > 1 if multiple wildcards */
2998         if (sel > 1.0)
2999                 sel = 1.0;
3000         return sel;
3001 }
3002
3003 static Selectivity
3004 regex_selectivity_sub(char *patt, int pattlen, bool case_insensitive)
3005 {
3006         Selectivity sel = 1.0;
3007         int                     paren_depth = 0;
3008         int                     paren_pos = 0;  /* dummy init to keep compiler quiet */
3009         int                     pos;
3010
3011         for (pos = 0; pos < pattlen; pos++)
3012         {
3013                 if (patt[pos] == '(')
3014                 {
3015                         if (paren_depth == 0)
3016                                 paren_pos = pos;        /* remember start of parenthesized item */
3017                         paren_depth++;
3018                 }
3019                 else if (patt[pos] == ')' && paren_depth > 0)
3020                 {
3021                         paren_depth--;
3022                         if (paren_depth == 0)
3023                                 sel *= regex_selectivity_sub(patt + (paren_pos + 1),
3024                                                                                          pos - (paren_pos + 1),
3025                                                                                          case_insensitive);
3026                 }
3027                 else if (patt[pos] == '|' && paren_depth == 0)
3028                 {
3029                         /*
3030                          * If unquoted | is present at paren level 0 in pattern, we
3031                          * have multiple alternatives; sum their probabilities.
3032                          */
3033                         sel += regex_selectivity_sub(patt + (pos + 1),
3034                                                                                  pattlen - (pos + 1),
3035                                                                                  case_insensitive);
3036                         break;                          /* rest of pattern is now processed */
3037                 }
3038                 else if (patt[pos] == '[')
3039                 {
3040                         bool            negclass = false;
3041
3042                         if (patt[++pos] == '^')
3043                         {
3044                                 negclass = true;
3045                                 pos++;
3046                         }
3047                         if (patt[pos] == ']')           /* ']' at start of class is not
3048                                                                                  * special */
3049                                 pos++;
3050                         while (pos < pattlen && patt[pos] != ']')
3051                                 pos++;
3052                         if (paren_depth == 0)
3053                                 sel *= (negclass ? (1.0 - CHAR_RANGE_SEL) : CHAR_RANGE_SEL);
3054                 }
3055                 else if (patt[pos] == '.')
3056                 {
3057                         if (paren_depth == 0)
3058                                 sel *= ANY_CHAR_SEL;
3059                 }
3060                 else if (patt[pos] == '*' ||
3061                                  patt[pos] == '?' ||
3062                                  patt[pos] == '+')
3063                 {
3064                         /* Ought to be smarter about quantifiers... */
3065                         if (paren_depth == 0)
3066                                 sel *= PARTIAL_WILDCARD_SEL;
3067                 }
3068                 else if (patt[pos] == '{')
3069                 {
3070                         while (pos < pattlen && patt[pos] != '}')
3071                                 pos++;
3072                         if (paren_depth == 0)
3073                                 sel *= PARTIAL_WILDCARD_SEL;
3074                 }
3075                 else if (patt[pos] == '\\')
3076                 {
3077                         /* backslash quotes the next character */
3078                         pos++;
3079                         if (pos >= pattlen)
3080                                 break;
3081                         if (paren_depth == 0)
3082                                 sel *= FIXED_CHAR_SEL;
3083                 }
3084                 else
3085                 {
3086                         if (paren_depth == 0)
3087                                 sel *= FIXED_CHAR_SEL;
3088                 }
3089         }
3090         /* Could get sel > 1 if multiple wildcards */
3091         if (sel > 1.0)
3092                 sel = 1.0;
3093         return sel;
3094 }
3095
3096 static Selectivity
3097 regex_selectivity(char *patt, bool case_insensitive)
3098 {
3099         Selectivity sel;
3100         int                     pattlen = strlen(patt);
3101
3102         /* If patt doesn't end with $, consider it to have a trailing wildcard */
3103         if (pattlen > 0 && patt[pattlen - 1] == '$' &&
3104                 (pattlen == 1 || patt[pattlen - 2] != '\\'))
3105         {
3106                 /* has trailing $ */
3107                 sel = regex_selectivity_sub(patt, pattlen - 1, case_insensitive);
3108         }
3109         else
3110         {
3111                 /* no trailing $ */
3112                 sel = regex_selectivity_sub(patt, pattlen, case_insensitive);
3113                 sel *= FULL_WILDCARD_SEL;
3114                 if (sel > 1.0)
3115                         sel = 1.0;
3116         }
3117         return sel;
3118 }
3119
3120 static Selectivity
3121 pattern_selectivity(char *patt, Pattern_Type ptype)
3122 {
3123         Selectivity result;
3124
3125         switch (ptype)
3126         {
3127                 case Pattern_Type_Like:
3128                         result = like_selectivity(patt, false);
3129                         break;
3130                 case Pattern_Type_Like_IC:
3131                         result = like_selectivity(patt, true);
3132                         break;
3133                 case Pattern_Type_Regex:
3134                         result = regex_selectivity(patt, false);
3135                         break;
3136                 case Pattern_Type_Regex_IC:
3137                         result = regex_selectivity(patt, true);
3138                         break;
3139                 default:
3140                         elog(ERROR, "pattern_selectivity: bogus ptype");
3141                         result = 1.0;           /* keep compiler quiet */
3142                         break;
3143         }
3144         return result;
3145 }
3146
3147
3148 /*
3149  * We want test whether the database's LC_COLLATE setting is safe for
3150  * LIKE/regexp index optimization.
3151  *
3152  * The key requirement here is that given a prefix string, say "foo",
3153  * we must be able to generate another string "fop" that is greater
3154  * than all strings "foobar" starting with "foo".  Unfortunately, a
3155  * non-C locale may have arbitrary collation rules in which "fop" >
3156  * "foo" is not sufficient to ensure "fop" > "foobar".  Until we can
3157  * come up with a more bulletproof way of generating the upper-bound
3158  * string, the optimization is disabled in all non-C locales.
3159  *
3160  * (In theory, locales other than C may be LIKE-safe so this function
3161  * could be different from lc_collate_is_c(), but in a different
3162  * theory, non-C locales are completely unpredicable so it's unlikely
3163  * to happen.)
3164  */
3165 bool
3166 locale_is_like_safe(void)
3167 {
3168         return lc_collate_is_c();
3169 }
3170
3171 /*
3172  * Try to generate a string greater than the given string or any string it is
3173  * a prefix of.  If successful, return a palloc'd string; else return NULL.
3174  *
3175  * To work correctly in non-ASCII locales with weird collation orders,
3176  * we cannot simply increment "foo" to "fop" --- we have to check whether
3177  * we actually produced a string greater than the given one.  If not,
3178  * increment the righthand byte again and repeat.  If we max out the righthand
3179  * byte, truncate off the last character and start incrementing the next.
3180  * For example, if "z" were the last character in the sort order, then we
3181  * could produce "foo" as a string greater than "fonz".
3182  *
3183  * This could be rather slow in the worst case, but in most cases we won't
3184  * have to try more than one or two strings before succeeding.
3185  *
3186  * XXX this is actually not sufficient, since it only copes with the case
3187  * where individual characters collate in an order different from their
3188  * numeric code assignments.  It does not handle cases where there are
3189  * cross-character effects, such as specially sorted digraphs, multiple
3190  * sort passes, etc.  For now, we just shut down the whole thing in locales
3191  * that do such things :-(
3192  */
3193 char *
3194 make_greater_string(const char *str, Oid datatype)
3195 {
3196         char       *workstr;
3197         int                     len;
3198
3199         /*
3200          * Make a modifiable copy, which will be our return value if
3201          * successful
3202          */
3203         workstr = pstrdup((char *) str);
3204
3205         while ((len = strlen(workstr)) > 0)
3206         {
3207                 unsigned char *lastchar = (unsigned char *) (workstr + len - 1);
3208
3209                 /*
3210                  * Try to generate a larger string by incrementing the last byte.
3211                  */
3212                 while (*lastchar < (unsigned char) 255)
3213                 {
3214                         (*lastchar)++;
3215                         if (string_lessthan(str, workstr, datatype))
3216                                 return workstr; /* Success! */
3217                 }
3218
3219                 /*
3220                  * Truncate off the last character, which might be more than 1
3221                  * byte in MULTIBYTE case.
3222                  */
3223 #ifdef MULTIBYTE
3224                 len = pg_mbcliplen((const unsigned char *) workstr, len, len - 1);
3225                 workstr[len] = '\0';
3226 #else
3227                 *lastchar = '\0';
3228 #endif
3229         }
3230
3231         /* Failed... */
3232         pfree(workstr);
3233         return NULL;
3234 }
3235
3236 /*
3237  * Test whether two strings are "<" according to the rules of the given
3238  * datatype.  We do this the hard way, ie, actually calling the type's
3239  * "<" operator function, to ensure we get the right result...
3240  */
3241 static bool
3242 string_lessthan(const char *str1, const char *str2, Oid datatype)
3243 {
3244         Datum           datum1 = string_to_datum(str1, datatype);
3245         Datum           datum2 = string_to_datum(str2, datatype);
3246         bool            result;
3247
3248         switch (datatype)
3249         {
3250                 case TEXTOID:
3251                         result = DatumGetBool(DirectFunctionCall2(text_lt,
3252                                                                                                           datum1, datum2));
3253                         break;
3254
3255                 case BPCHAROID:
3256                         result = DatumGetBool(DirectFunctionCall2(bpcharlt,
3257                                                                                                           datum1, datum2));
3258                         break;
3259
3260                 case VARCHAROID:
3261                         result = DatumGetBool(DirectFunctionCall2(varcharlt,
3262                                                                                                           datum1, datum2));
3263                         break;
3264
3265                 case NAMEOID:
3266                         result = DatumGetBool(DirectFunctionCall2(namelt,
3267                                                                                                           datum1, datum2));
3268                         break;
3269
3270                 case BYTEAOID:
3271                         result = DatumGetBool(DirectFunctionCall2(bytealt,
3272                                                                                                           datum1, datum2));
3273                         break;
3274
3275                 default:
3276                         elog(ERROR, "string_lessthan: unexpected datatype %u", datatype);
3277                         result = false;
3278                         break;
3279         }
3280
3281         pfree(DatumGetPointer(datum1));
3282         pfree(DatumGetPointer(datum2));
3283
3284         return result;
3285 }
3286
3287 /* See if there is a binary op of the given name for the given datatype */
3288 static Oid
3289 find_operator(const char *opname, Oid datatype)
3290 {
3291         return GetSysCacheOid(OPERNAME,
3292                                                   PointerGetDatum(opname),
3293                                                   ObjectIdGetDatum(datatype),
3294                                                   ObjectIdGetDatum(datatype),
3295                                                   CharGetDatum('b'));
3296 }
3297
3298 /*
3299  * Generate a Datum of the appropriate type from a C string.
3300  * Note that all of the supported types are pass-by-ref, so the
3301  * returned value should be pfree'd if no longer needed.
3302  */
3303 static Datum
3304 string_to_datum(const char *str, Oid datatype)
3305 {
3306         /*
3307          * We cheat a little by assuming that textin() will do for bpchar and
3308          * varchar constants too...
3309          */
3310         if (datatype == NAMEOID)
3311                 return DirectFunctionCall1(namein, CStringGetDatum(str));
3312         else
3313                 return DirectFunctionCall1(textin, CStringGetDatum(str));
3314 }
3315
3316 /*
3317  * Generate a Const node of the appropriate type from a C string.
3318  */
3319 static Const *
3320 string_to_const(const char *str, Oid datatype)
3321 {
3322         Datum           conval = string_to_datum(str, datatype);
3323
3324         return makeConst(datatype, ((datatype == NAMEOID) ? NAMEDATALEN : -1),
3325                                          conval, false, false, false, false);
3326 }
3327
3328 /*-------------------------------------------------------------------------
3329  *
3330  * Index cost estimation functions
3331  *
3332  * genericcostestimate is a general-purpose estimator for use when we
3333  * don't have any better idea about how to estimate.  Index-type-specific
3334  * knowledge can be incorporated in the type-specific routines.
3335  *
3336  *-------------------------------------------------------------------------
3337  */
3338
3339 static void
3340 genericcostestimate(Query *root, RelOptInfo *rel,
3341                                         IndexOptInfo *index, List *indexQuals,
3342                                         Cost *indexStartupCost,
3343                                         Cost *indexTotalCost,
3344                                         Selectivity *indexSelectivity,
3345                                         double *indexCorrelation)
3346 {
3347         double          numIndexTuples;
3348         double          numIndexPages;
3349         List       *selectivityQuals = indexQuals;
3350
3351         /*
3352          * If the index is partial, AND the index predicate with the
3353          * explicitly given indexquals to produce a more accurate idea of the
3354          * index restriction.  This may produce redundant clauses, which we
3355          * hope that cnfify and clauselist_selectivity will deal with
3356          * intelligently.
3357          *
3358          * Note that index->indpred and indexQuals are both in implicit-AND form
3359          * to start with, which we have to make explicit to hand to
3360          * canonicalize_qual, and then we get back implicit-AND form again.
3361          */
3362         if (index->indpred != NIL)
3363         {
3364                 Expr       *andedQuals;
3365
3366                 andedQuals = make_ands_explicit(nconc(listCopy(index->indpred),
3367                                                                                           indexQuals));
3368                 selectivityQuals = canonicalize_qual(andedQuals, true);
3369         }
3370
3371         /* Estimate the fraction of main-table tuples that will be visited */
3372         *indexSelectivity = clauselist_selectivity(root, selectivityQuals,
3373                                                                                            lfirsti(rel->relids));
3374
3375         /*
3376          * Estimate the number of tuples that will be visited.  We do it in
3377          * this rather peculiar-looking way in order to get the right answer
3378          * for partial indexes.  We can bound the number of tuples by the
3379          * index size, in any case.
3380          */
3381         numIndexTuples = *indexSelectivity * rel->tuples;
3382
3383         if (numIndexTuples > index->tuples)
3384                 numIndexTuples = index->tuples;
3385
3386         /*
3387          * Always estimate at least one tuple is touched, even when
3388          * indexSelectivity estimate is tiny.
3389          */
3390         if (numIndexTuples < 1.0)
3391                 numIndexTuples = 1.0;
3392
3393         /*
3394          * Estimate the number of index pages that will be retrieved.
3395          *
3396          * For all currently-supported index types, the first page of the index
3397          * is a metadata page, and we should figure on fetching that plus a
3398          * pro-rated fraction of the remaining pages.
3399          */
3400         if (index->pages > 1 && index->tuples > 0)
3401         {
3402                 numIndexPages = (numIndexTuples / index->tuples) * (index->pages - 1);
3403                 numIndexPages += 1;             /* count the metapage too */
3404                 numIndexPages = ceil(numIndexPages);
3405         }
3406         else
3407                 numIndexPages = 1.0;
3408
3409         /*
3410          * Compute the index access cost.
3411          *
3412          * Our generic assumption is that the index pages will be read
3413          * sequentially, so they have cost 1.0 each, not random_page_cost.
3414          * Also, we charge for evaluation of the indexquals at each index
3415          * tuple. All the costs are assumed to be paid incrementally during
3416          * the scan.
3417          */
3418         *indexStartupCost = 0;
3419         *indexTotalCost = numIndexPages +
3420                 (cpu_index_tuple_cost + cost_qual_eval(indexQuals)) * numIndexTuples;
3421
3422         /*
3423          * Generic assumption about index correlation: there isn't any.
3424          */
3425         *indexCorrelation = 0.0;
3426 }
3427
3428
3429 Datum
3430 btcostestimate(PG_FUNCTION_ARGS)
3431 {
3432         Query      *root = (Query *) PG_GETARG_POINTER(0);
3433         RelOptInfo *rel = (RelOptInfo *) PG_GETARG_POINTER(1);
3434         IndexOptInfo *index = (IndexOptInfo *) PG_GETARG_POINTER(2);
3435         List       *indexQuals = (List *) PG_GETARG_POINTER(3);
3436         Cost       *indexStartupCost = (Cost *) PG_GETARG_POINTER(4);
3437         Cost       *indexTotalCost = (Cost *) PG_GETARG_POINTER(5);
3438         Selectivity *indexSelectivity = (Selectivity *) PG_GETARG_POINTER(6);
3439         double     *indexCorrelation = (double *) PG_GETARG_POINTER(7);
3440
3441         genericcostestimate(root, rel, index, indexQuals,
3442                                                 indexStartupCost, indexTotalCost,
3443                                                 indexSelectivity, indexCorrelation);
3444
3445         /*
3446          * If it's a functional index, leave the default zero-correlation
3447          * estimate in place.  If not, and if we can get an estimate for the
3448          * first variable's ordering correlation C from pg_statistic, estimate
3449          * the index correlation as C / number-of-columns. (The idea here is
3450          * that multiple columns dilute the importance of the first column's
3451          * ordering, but don't negate it entirely.)
3452          */
3453         if (index->indproc == InvalidOid)
3454         {
3455                 Oid                     relid;
3456                 HeapTuple       tuple;
3457
3458                 relid = getrelid(lfirsti(rel->relids), root->rtable);
3459                 Assert(relid != InvalidOid);
3460                 tuple = SearchSysCache(STATRELATT,
3461                                                            ObjectIdGetDatum(relid),
3462                                                            Int16GetDatum(index->indexkeys[0]),
3463                                                            0, 0);
3464                 if (HeapTupleIsValid(tuple))
3465                 {
3466                         Oid                     typid;
3467                         int32           typmod;
3468                         float4     *numbers;
3469                         int                     nnumbers;
3470
3471                         get_atttypetypmod(relid, index->indexkeys[0],
3472                                                           &typid, &typmod);
3473                         if (get_attstatsslot(tuple, typid, typmod,
3474                                                                  STATISTIC_KIND_CORRELATION,
3475                                                                  index->ordering[0],
3476                                                                  NULL, NULL, &numbers, &nnumbers))
3477                         {
3478                                 double          varCorrelation;
3479                                 int                     nKeys;
3480
3481                                 Assert(nnumbers == 1);
3482                                 varCorrelation = numbers[0];
3483                                 for (nKeys = 1; index->indexkeys[nKeys] != 0; nKeys++)
3484                                          /* skip */ ;
3485
3486                                 *indexCorrelation = varCorrelation / nKeys;
3487
3488                                 free_attstatsslot(typid, NULL, 0, numbers, nnumbers);
3489                         }
3490                         ReleaseSysCache(tuple);
3491                 }
3492         }
3493
3494         PG_RETURN_VOID();
3495 }
3496
3497 Datum
3498 rtcostestimate(PG_FUNCTION_ARGS)
3499 {
3500         Query      *root = (Query *) PG_GETARG_POINTER(0);
3501         RelOptInfo *rel = (RelOptInfo *) PG_GETARG_POINTER(1);
3502         IndexOptInfo *index = (IndexOptInfo *) PG_GETARG_POINTER(2);
3503         List       *indexQuals = (List *) PG_GETARG_POINTER(3);
3504         Cost       *indexStartupCost = (Cost *) PG_GETARG_POINTER(4);
3505         Cost       *indexTotalCost = (Cost *) PG_GETARG_POINTER(5);
3506         Selectivity *indexSelectivity = (Selectivity *) PG_GETARG_POINTER(6);
3507         double     *indexCorrelation = (double *) PG_GETARG_POINTER(7);
3508
3509         genericcostestimate(root, rel, index, indexQuals,
3510                                                 indexStartupCost, indexTotalCost,
3511                                                 indexSelectivity, indexCorrelation);
3512
3513         PG_RETURN_VOID();
3514 }
3515
3516 Datum
3517 hashcostestimate(PG_FUNCTION_ARGS)
3518 {
3519         Query      *root = (Query *) PG_GETARG_POINTER(0);
3520         RelOptInfo *rel = (RelOptInfo *) PG_GETARG_POINTER(1);
3521         IndexOptInfo *index = (IndexOptInfo *) PG_GETARG_POINTER(2);
3522         List       *indexQuals = (List *) PG_GETARG_POINTER(3);
3523         Cost       *indexStartupCost = (Cost *) PG_GETARG_POINTER(4);
3524         Cost       *indexTotalCost = (Cost *) PG_GETARG_POINTER(5);
3525         Selectivity *indexSelectivity = (Selectivity *) PG_GETARG_POINTER(6);
3526         double     *indexCorrelation = (double *) PG_GETARG_POINTER(7);
3527
3528         genericcostestimate(root, rel, index, indexQuals,
3529                                                 indexStartupCost, indexTotalCost,
3530                                                 indexSelectivity, indexCorrelation);
3531
3532         PG_RETURN_VOID();
3533 }
3534
3535 Datum
3536 gistcostestimate(PG_FUNCTION_ARGS)
3537 {
3538         Query      *root = (Query *) PG_GETARG_POINTER(0);
3539         RelOptInfo *rel = (RelOptInfo *) PG_GETARG_POINTER(1);
3540         IndexOptInfo *index = (IndexOptInfo *) PG_GETARG_POINTER(2);
3541         List       *indexQuals = (List *) PG_GETARG_POINTER(3);
3542         Cost       *indexStartupCost = (Cost *) PG_GETARG_POINTER(4);
3543         Cost       *indexTotalCost = (Cost *) PG_GETARG_POINTER(5);
3544         Selectivity *indexSelectivity = (Selectivity *) PG_GETARG_POINTER(6);
3545         double     *indexCorrelation = (double *) PG_GETARG_POINTER(7);
3546
3547         genericcostestimate(root, rel, index, indexQuals,
3548                                                 indexStartupCost, indexTotalCost,
3549                                                 indexSelectivity, indexCorrelation);
3550
3551         PG_RETURN_VOID();
3552 }