]> granicus.if.org Git - postgresql/commitdiff
Move pattern selectivity code from selfuncs.c to like_support.c.
authorTom Lane <tgl@sss.pgh.pa.us>
Thu, 14 Feb 2019 15:51:59 +0000 (10:51 -0500)
committerTom Lane <tgl@sss.pgh.pa.us>
Thu, 14 Feb 2019 15:51:59 +0000 (10:51 -0500)
While at it, refactor patternsel() a bit so that it can be used from
the LIKE/regex planner support functions as well.  This makes the
planner able to deal equally well with either operator or function
syntax for these operations.  I'm not excited about that as a feature
in itself, but it provides a nice model for extensions to follow if
they want such behavior for their operations.

This change localizes the use of pattern_fixed_prefix() and
make_greater_string() so that they no longer need be exported.
(We might get pushback from extensions about that, perhaps,
in which case I'd be inclined to re-export them in a new header
file like_support.h.)

This reduces the bulk of selfuncs.c a fair amount, removing ~1370
lines or about one-sixth of that file; it's still too big, but this
is progress.

Discussion: https://postgr.es/m/24537.1550093915@sss.pgh.pa.us

src/backend/utils/adt/like_support.c
src/backend/utils/adt/selfuncs.c
src/include/utils/selfuncs.h

index b001dde5fc7d73b24853296ebc27e20044fae2c7..69509811ef78a746de793d96a1525215debfba17 100644 (file)
  */
 #include "postgres.h"
 
+#include <math.h>
+
+#include "access/htup_details.h"
 #include "access/stratnum.h"
+#include "catalog/pg_collation.h"
 #include "catalog/pg_opfamily.h"
+#include "catalog/pg_statistic.h"
 #include "catalog/pg_type.h"
+#include "mb/pg_wchar.h"
 #include "nodes/makefuncs.h"
 #include "nodes/nodeFuncs.h"
 #include "nodes/supportnodes.h"
 #include "utils/builtins.h"
+#include "utils/datum.h"
 #include "utils/lsyscache.h"
 #include "utils/pg_locale.h"
 #include "utils/selfuncs.h"
+#include "utils/varlena.h"
+
+
+typedef enum
+{
+       Pattern_Type_Like,
+       Pattern_Type_Like_IC,
+       Pattern_Type_Regex,
+       Pattern_Type_Regex_IC,
+       Pattern_Type_Prefix
+} Pattern_Type;
 
+typedef enum
+{
+       Pattern_Prefix_None, Pattern_Prefix_Partial, Pattern_Prefix_Exact
+} Pattern_Prefix_Status;
 
 static Node *like_regex_support(Node *rawreq, Pattern_Type ptype);
 static List *match_pattern_prefix(Node *leftop,
@@ -53,6 +75,34 @@ static List *match_pattern_prefix(Node *leftop,
                                         Oid expr_coll,
                                         Oid opfamily,
                                         Oid indexcollation);
+static double patternsel_common(PlannerInfo *root,
+                                 Oid oprid,
+                                 Oid opfuncid,
+                                 List *args,
+                                 int varRelid,
+                                 Oid collation,
+                                 Pattern_Type ptype,
+                                 bool negate);
+static Pattern_Prefix_Status pattern_fixed_prefix(Const *patt,
+                                        Pattern_Type ptype,
+                                        Oid collation,
+                                        Const **prefix,
+                                        Selectivity *rest_selec);
+static Selectivity prefix_selectivity(PlannerInfo *root,
+                                  VariableStatData *vardata,
+                                  Oid vartype, Oid opfamily, Const *prefixcon);
+static Selectivity like_selectivity(const char *patt, int pattlen,
+                                bool case_insensitive);
+static Selectivity regex_selectivity(const char *patt, int pattlen,
+                                 bool case_insensitive,
+                                 int fixed_prefix_len);
+static int pattern_char_isalpha(char c, bool is_multibyte,
+                                        pg_locale_t locale, bool locale_is_c);
+static Const *make_greater_string(const Const *str_const, FmgrInfo *ltproc,
+                                       Oid collation);
+static Datum string_to_datum(const char *str, Oid datatype);
+static Const *string_to_const(const char *str, Oid datatype);
+static Const *string_to_bytea_const(const char *str, size_t str_len);
 
 
 /*
@@ -96,7 +146,39 @@ like_regex_support(Node *rawreq, Pattern_Type ptype)
 {
        Node       *ret = NULL;
 
-       if (IsA(rawreq, SupportRequestIndexCondition))
+       if (IsA(rawreq, SupportRequestSelectivity))
+       {
+               /*
+                * Make a selectivity estimate for a function call, just as we'd do if
+                * the call was via the corresponding operator.
+                */
+               SupportRequestSelectivity *req = (SupportRequestSelectivity *) rawreq;
+               Selectivity s1;
+
+               if (req->is_join)
+               {
+                       /*
+                        * For the moment we just punt.  If patternjoinsel is ever
+                        * improved to do better, this should be made to call it.
+                        */
+                       s1 = DEFAULT_MATCH_SEL;
+               }
+               else
+               {
+                       /* Share code with operator restriction selectivity functions */
+                       s1 = patternsel_common(req->root,
+                                                                  InvalidOid,
+                                                                  req->funcid,
+                                                                  req->args,
+                                                                  req->varRelid,
+                                                                  req->inputcollid,
+                                                                  ptype,
+                                                                  false);
+               }
+               req->selectivity = s1;
+               ret = (Node *) req;
+       }
+       else if (IsA(rawreq, SupportRequestIndexCondition))
        {
                /* Try to convert operator/function call to index conditions */
                SupportRequestIndexCondition *req = (SupportRequestIndexCondition *) rawreq;
@@ -311,3 +393,1333 @@ match_pattern_prefix(Node *leftop,
 
        return result;
 }
+
+
+/*
+ * patternsel_common - generic code for pattern-match restriction selectivity.
+ *
+ * To support using this from either the operator or function paths, caller
+ * may pass either operator OID or underlying function OID; we look up the
+ * latter from the former if needed.  (We could just have patternsel() call
+ * get_opcode(), but the work would be wasted if we don't have a need to
+ * compare a fixed prefix to the pg_statistic data.)
+ *
+ * Note that oprid and/or opfuncid should be for the positive-match operator
+ * even when negate is true.
+ */
+static double
+patternsel_common(PlannerInfo *root,
+                                 Oid oprid,
+                                 Oid opfuncid,
+                                 List *args,
+                                 int varRelid,
+                                 Oid collation,
+                                 Pattern_Type ptype,
+                                 bool negate)
+{
+       VariableStatData vardata;
+       Node       *other;
+       bool            varonleft;
+       Datum           constval;
+       Oid                     consttype;
+       Oid                     vartype;
+       Oid                     opfamily;
+       Pattern_Prefix_Status pstatus;
+       Const      *patt;
+       Const      *prefix = NULL;
+       Selectivity rest_selec = 0;
+       double          nullfrac = 0.0;
+       double          result;
+
+       /*
+        * Initialize result to the appropriate default estimate depending on
+        * whether it's a match or not-match operator.
+        */
+       if (negate)
+               result = 1.0 - DEFAULT_MATCH_SEL;
+       else
+               result = DEFAULT_MATCH_SEL;
+
+       /*
+        * If expression is not variable op constant, then punt and return the
+        * default estimate.
+        */
+       if (!get_restriction_variable(root, args, varRelid,
+                                                                 &vardata, &other, &varonleft))
+               return result;
+       if (!varonleft || !IsA(other, Const))
+       {
+               ReleaseVariableStats(vardata);
+               return result;
+       }
+
+       /*
+        * If the constant is NULL, assume operator is strict and return zero, ie,
+        * operator will never return TRUE.  (It's zero even for a negator op.)
+        */
+       if (((Const *) other)->constisnull)
+       {
+               ReleaseVariableStats(vardata);
+               return 0.0;
+       }
+       constval = ((Const *) other)->constvalue;
+       consttype = ((Const *) other)->consttype;
+
+       /*
+        * The right-hand const is type text or bytea for all supported operators.
+        * We do not expect to see binary-compatible types here, since
+        * const-folding should have relabeled the const to exactly match the
+        * operator's declared type.
+        */
+       if (consttype != TEXTOID && consttype != BYTEAOID)
+       {
+               ReleaseVariableStats(vardata);
+               return result;
+       }
+
+       /*
+        * Similarly, the exposed type of the left-hand side should be one of
+        * those we know.  (Do not look at vardata.atttype, which might be
+        * something binary-compatible but different.)  We can use it to choose
+        * the index opfamily from which we must draw the comparison operators.
+        *
+        * NOTE: It would be more correct to use the PATTERN opfamilies than the
+        * simple ones, but at the moment ANALYZE will not generate statistics for
+        * the PATTERN operators.  But our results are so approximate anyway that
+        * it probably hardly matters.
+        */
+       vartype = vardata.vartype;
+
+       switch (vartype)
+       {
+               case TEXTOID:
+               case NAMEOID:
+                       opfamily = TEXT_BTREE_FAM_OID;
+                       break;
+               case BPCHAROID:
+                       opfamily = BPCHAR_BTREE_FAM_OID;
+                       break;
+               case BYTEAOID:
+                       opfamily = BYTEA_BTREE_FAM_OID;
+                       break;
+               default:
+                       ReleaseVariableStats(vardata);
+                       return result;
+       }
+
+       /*
+        * Grab the nullfrac for use below.
+        */
+       if (HeapTupleIsValid(vardata.statsTuple))
+       {
+               Form_pg_statistic stats;
+
+               stats = (Form_pg_statistic) GETSTRUCT(vardata.statsTuple);
+               nullfrac = stats->stanullfrac;
+       }
+
+       /*
+        * Pull out any fixed prefix implied by the pattern, and estimate the
+        * fractional selectivity of the remainder of the pattern.  Unlike many
+        * other selectivity estimators, we use the pattern operator's actual
+        * collation for this step.  This is not because we expect the collation
+        * to make a big difference in the selectivity estimate (it seldom would),
+        * but because we want to be sure we cache compiled regexps under the
+        * right cache key, so that they can be re-used at runtime.
+        */
+       patt = (Const *) other;
+       pstatus = pattern_fixed_prefix(patt, ptype, collation,
+                                                                  &prefix, &rest_selec);
+
+       /*
+        * If necessary, coerce the prefix constant to the right type.
+        */
+       if (prefix && prefix->consttype != vartype)
+       {
+               char       *prefixstr;
+
+               switch (prefix->consttype)
+               {
+                       case TEXTOID:
+                               prefixstr = TextDatumGetCString(prefix->constvalue);
+                               break;
+                       case BYTEAOID:
+                               prefixstr = DatumGetCString(DirectFunctionCall1(byteaout,
+                                                                                                                               prefix->constvalue));
+                               break;
+                       default:
+                               elog(ERROR, "unrecognized consttype: %u",
+                                        prefix->consttype);
+                               ReleaseVariableStats(vardata);
+                               return result;
+               }
+               prefix = string_to_const(prefixstr, vartype);
+               pfree(prefixstr);
+       }
+
+       if (pstatus == Pattern_Prefix_Exact)
+       {
+               /*
+                * Pattern specifies an exact match, so pretend operator is '='
+                */
+               Oid                     eqopr = get_opfamily_member(opfamily, vartype, vartype,
+                                                                                               BTEqualStrategyNumber);
+
+               if (eqopr == InvalidOid)
+                       elog(ERROR, "no = operator for opfamily %u", opfamily);
+               result = var_eq_const(&vardata, eqopr, prefix->constvalue,
+                                                         false, true, false);
+       }
+       else
+       {
+               /*
+                * Not exact-match pattern.  If we have a sufficiently large
+                * histogram, estimate selectivity for the histogram part of the
+                * population by counting matches in the histogram.  If not, estimate
+                * selectivity of the fixed prefix and remainder of pattern
+                * separately, then combine the two to get an estimate of the
+                * selectivity for the part of the column population represented by
+                * the histogram.  (For small histograms, we combine these
+                * approaches.)
+                *
+                * We then add up data for any most-common-values values; these are
+                * not in the histogram population, and we can get exact answers for
+                * them by applying the pattern operator, so there's no reason to
+                * approximate.  (If the MCVs cover a significant part of the total
+                * population, this gives us a big leg up in accuracy.)
+                */
+               Selectivity selec;
+               int                     hist_size;
+               FmgrInfo        opproc;
+               double          mcv_selec,
+                                       sumcommon;
+
+               /* Try to use the histogram entries to get selectivity */
+               if (!OidIsValid(opfuncid))
+                       opfuncid = get_opcode(oprid);
+               fmgr_info(opfuncid, &opproc);
+
+               selec = histogram_selectivity(&vardata, &opproc, constval, true,
+                                                                         10, 1, &hist_size);
+
+               /* If not at least 100 entries, use the heuristic method */
+               if (hist_size < 100)
+               {
+                       Selectivity heursel;
+                       Selectivity prefixsel;
+
+                       if (pstatus == Pattern_Prefix_Partial)
+                               prefixsel = prefix_selectivity(root, &vardata, vartype,
+                                                                                          opfamily, prefix);
+                       else
+                               prefixsel = 1.0;
+                       heursel = prefixsel * rest_selec;
+
+                       if (selec < 0)          /* fewer than 10 histogram entries? */
+                               selec = heursel;
+                       else
+                       {
+                               /*
+                                * For histogram sizes from 10 to 100, we combine the
+                                * histogram and heuristic selectivities, putting increasingly
+                                * more trust in the histogram for larger sizes.
+                                */
+                               double          hist_weight = hist_size / 100.0;
+
+                               selec = selec * hist_weight + heursel * (1.0 - hist_weight);
+                       }
+               }
+
+               /* In any case, don't believe extremely small or large estimates. */
+               if (selec < 0.0001)
+                       selec = 0.0001;
+               else if (selec > 0.9999)
+                       selec = 0.9999;
+
+               /*
+                * If we have most-common-values info, add up the fractions of the MCV
+                * entries that satisfy MCV OP PATTERN.  These fractions contribute
+                * directly to the result selectivity.  Also add up the total fraction
+                * represented by MCV entries.
+                */
+               mcv_selec = mcv_selectivity(&vardata, &opproc, constval, true,
+                                                                       &sumcommon);
+
+               /*
+                * Now merge the results from the MCV and histogram calculations,
+                * realizing that the histogram covers only the non-null values that
+                * are not listed in MCV.
+                */
+               selec *= 1.0 - nullfrac - sumcommon;
+               selec += mcv_selec;
+               result = selec;
+       }
+
+       /* now adjust if we wanted not-match rather than match */
+       if (negate)
+               result = 1.0 - result - nullfrac;
+
+       /* result should be in range, but make sure... */
+       CLAMP_PROBABILITY(result);
+
+       if (prefix)
+       {
+               pfree(DatumGetPointer(prefix->constvalue));
+               pfree(prefix);
+       }
+
+       ReleaseVariableStats(vardata);
+
+       return result;
+}
+
+/*
+ * Fix impedance mismatch between SQL-callable functions and patternsel_common
+ */
+static double
+patternsel(PG_FUNCTION_ARGS, Pattern_Type ptype, bool negate)
+{
+       PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
+       Oid                     operator = PG_GETARG_OID(1);
+       List       *args = (List *) PG_GETARG_POINTER(2);
+       int                     varRelid = PG_GETARG_INT32(3);
+       Oid                     collation = PG_GET_COLLATION();
+
+       /*
+        * If this is for a NOT LIKE or similar operator, get the corresponding
+        * positive-match operator and work with that.
+        */
+       if (negate)
+       {
+               operator = get_negator(operator);
+               if (!OidIsValid(operator))
+                       elog(ERROR, "patternsel called for operator without a negator");
+       }
+
+       return patternsel_common(root,
+                                                        operator,
+                                                        InvalidOid,
+                                                        args,
+                                                        varRelid,
+                                                        collation,
+                                                        ptype,
+                                                        negate);
+}
+
+/*
+ *             regexeqsel              - Selectivity of regular-expression pattern match.
+ */
+Datum
+regexeqsel(PG_FUNCTION_ARGS)
+{
+       PG_RETURN_FLOAT8(patternsel(fcinfo, Pattern_Type_Regex, false));
+}
+
+/*
+ *             icregexeqsel    - Selectivity of case-insensitive regex match.
+ */
+Datum
+icregexeqsel(PG_FUNCTION_ARGS)
+{
+       PG_RETURN_FLOAT8(patternsel(fcinfo, Pattern_Type_Regex_IC, false));
+}
+
+/*
+ *             likesel                 - Selectivity of LIKE pattern match.
+ */
+Datum
+likesel(PG_FUNCTION_ARGS)
+{
+       PG_RETURN_FLOAT8(patternsel(fcinfo, Pattern_Type_Like, false));
+}
+
+/*
+ *             prefixsel                       - selectivity of prefix operator
+ */
+Datum
+prefixsel(PG_FUNCTION_ARGS)
+{
+       PG_RETURN_FLOAT8(patternsel(fcinfo, Pattern_Type_Prefix, false));
+}
+
+/*
+ *
+ *             iclikesel                       - Selectivity of ILIKE pattern match.
+ */
+Datum
+iclikesel(PG_FUNCTION_ARGS)
+{
+       PG_RETURN_FLOAT8(patternsel(fcinfo, Pattern_Type_Like_IC, false));
+}
+
+/*
+ *             regexnesel              - Selectivity of regular-expression pattern non-match.
+ */
+Datum
+regexnesel(PG_FUNCTION_ARGS)
+{
+       PG_RETURN_FLOAT8(patternsel(fcinfo, Pattern_Type_Regex, true));
+}
+
+/*
+ *             icregexnesel    - Selectivity of case-insensitive regex non-match.
+ */
+Datum
+icregexnesel(PG_FUNCTION_ARGS)
+{
+       PG_RETURN_FLOAT8(patternsel(fcinfo, Pattern_Type_Regex_IC, true));
+}
+
+/*
+ *             nlikesel                - Selectivity of LIKE pattern non-match.
+ */
+Datum
+nlikesel(PG_FUNCTION_ARGS)
+{
+       PG_RETURN_FLOAT8(patternsel(fcinfo, Pattern_Type_Like, true));
+}
+
+/*
+ *             icnlikesel              - Selectivity of ILIKE pattern non-match.
+ */
+Datum
+icnlikesel(PG_FUNCTION_ARGS)
+{
+       PG_RETURN_FLOAT8(patternsel(fcinfo, Pattern_Type_Like_IC, true));
+}
+
+/*
+ * patternjoinsel              - Generic code for pattern-match join selectivity.
+ */
+static double
+patternjoinsel(PG_FUNCTION_ARGS, Pattern_Type ptype, bool negate)
+{
+       /* For the moment we just punt. */
+       return negate ? (1.0 - DEFAULT_MATCH_SEL) : DEFAULT_MATCH_SEL;
+}
+
+/*
+ *             regexeqjoinsel  - Join selectivity of regular-expression pattern match.
+ */
+Datum
+regexeqjoinsel(PG_FUNCTION_ARGS)
+{
+       PG_RETURN_FLOAT8(patternjoinsel(fcinfo, Pattern_Type_Regex, false));
+}
+
+/*
+ *             icregexeqjoinsel        - Join selectivity of case-insensitive regex match.
+ */
+Datum
+icregexeqjoinsel(PG_FUNCTION_ARGS)
+{
+       PG_RETURN_FLOAT8(patternjoinsel(fcinfo, Pattern_Type_Regex_IC, false));
+}
+
+/*
+ *             likejoinsel                     - Join selectivity of LIKE pattern match.
+ */
+Datum
+likejoinsel(PG_FUNCTION_ARGS)
+{
+       PG_RETURN_FLOAT8(patternjoinsel(fcinfo, Pattern_Type_Like, false));
+}
+
+/*
+ *             prefixjoinsel                   - Join selectivity of prefix operator
+ */
+Datum
+prefixjoinsel(PG_FUNCTION_ARGS)
+{
+       PG_RETURN_FLOAT8(patternjoinsel(fcinfo, Pattern_Type_Prefix, false));
+}
+
+/*
+ *             iclikejoinsel                   - Join selectivity of ILIKE pattern match.
+ */
+Datum
+iclikejoinsel(PG_FUNCTION_ARGS)
+{
+       PG_RETURN_FLOAT8(patternjoinsel(fcinfo, Pattern_Type_Like_IC, false));
+}
+
+/*
+ *             regexnejoinsel  - Join selectivity of regex non-match.
+ */
+Datum
+regexnejoinsel(PG_FUNCTION_ARGS)
+{
+       PG_RETURN_FLOAT8(patternjoinsel(fcinfo, Pattern_Type_Regex, true));
+}
+
+/*
+ *             icregexnejoinsel        - Join selectivity of case-insensitive regex non-match.
+ */
+Datum
+icregexnejoinsel(PG_FUNCTION_ARGS)
+{
+       PG_RETURN_FLOAT8(patternjoinsel(fcinfo, Pattern_Type_Regex_IC, true));
+}
+
+/*
+ *             nlikejoinsel            - Join selectivity of LIKE pattern non-match.
+ */
+Datum
+nlikejoinsel(PG_FUNCTION_ARGS)
+{
+       PG_RETURN_FLOAT8(patternjoinsel(fcinfo, Pattern_Type_Like, true));
+}
+
+/*
+ *             icnlikejoinsel          - Join selectivity of ILIKE pattern non-match.
+ */
+Datum
+icnlikejoinsel(PG_FUNCTION_ARGS)
+{
+       PG_RETURN_FLOAT8(patternjoinsel(fcinfo, Pattern_Type_Like_IC, true));
+}
+
+
+/*-------------------------------------------------------------------------
+ *
+ * Pattern analysis functions
+ *
+ * These routines support analysis of LIKE and regular-expression patterns
+ * by the planner/optimizer.  It's important that they agree with the
+ * regular-expression code in backend/regex/ and the LIKE code in
+ * backend/utils/adt/like.c.  Also, the computation of the fixed prefix
+ * must be conservative: if we report a string longer than the true fixed
+ * prefix, the query may produce actually wrong answers, rather than just
+ * getting a bad selectivity estimate!
+ *
+ *-------------------------------------------------------------------------
+ */
+
+/*
+ * Extract the fixed prefix, if any, for a pattern.
+ *
+ * *prefix is set to a palloc'd prefix string (in the form of a Const node),
+ *     or to NULL if no fixed prefix exists for the pattern.
+ * If rest_selec is not NULL, *rest_selec is set to an estimate of the
+ *     selectivity of the remainder of the pattern (without any fixed prefix).
+ * The prefix Const has the same type (TEXT or BYTEA) as the input pattern.
+ *
+ * The return value distinguishes no fixed prefix, a partial prefix,
+ * or an exact-match-only pattern.
+ */
+
+static Pattern_Prefix_Status
+like_fixed_prefix(Const *patt_const, bool case_insensitive, Oid collation,
+                                 Const **prefix_const, Selectivity *rest_selec)
+{
+       char       *match;
+       char       *patt;
+       int                     pattlen;
+       Oid                     typeid = patt_const->consttype;
+       int                     pos,
+                               match_pos;
+       bool            is_multibyte = (pg_database_encoding_max_length() > 1);
+       pg_locale_t locale = 0;
+       bool            locale_is_c = false;
+
+       /* the right-hand const is type text or bytea */
+       Assert(typeid == BYTEAOID || typeid == TEXTOID);
+
+       if (case_insensitive)
+       {
+               if (typeid == BYTEAOID)
+                       ereport(ERROR,
+                                       (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+                                        errmsg("case insensitive matching not supported on type bytea")));
+
+               /* If case-insensitive, we need locale info */
+               if (lc_ctype_is_c(collation))
+                       locale_is_c = true;
+               else if (collation != DEFAULT_COLLATION_OID)
+               {
+                       if (!OidIsValid(collation))
+                       {
+                               /*
+                                * This typically means that the parser could not resolve a
+                                * conflict of implicit collations, so report it that way.
+                                */
+                               ereport(ERROR,
+                                               (errcode(ERRCODE_INDETERMINATE_COLLATION),
+                                                errmsg("could not determine which collation to use for ILIKE"),
+                                                errhint("Use the COLLATE clause to set the collation explicitly.")));
+                       }
+                       locale = pg_newlocale_from_collation(collation);
+               }
+       }
+
+       if (typeid != BYTEAOID)
+       {
+               patt = TextDatumGetCString(patt_const->constvalue);
+               pattlen = strlen(patt);
+       }
+       else
+       {
+               bytea      *bstr = DatumGetByteaPP(patt_const->constvalue);
+
+               pattlen = VARSIZE_ANY_EXHDR(bstr);
+               patt = (char *) palloc(pattlen);
+               memcpy(patt, VARDATA_ANY(bstr), pattlen);
+               Assert((Pointer) bstr == DatumGetPointer(patt_const->constvalue));
+       }
+
+       match = palloc(pattlen + 1);
+       match_pos = 0;
+       for (pos = 0; pos < pattlen; pos++)
+       {
+               /* % and _ are wildcard characters in LIKE */
+               if (patt[pos] == '%' ||
+                       patt[pos] == '_')
+                       break;
+
+               /* Backslash escapes the next character */
+               if (patt[pos] == '\\')
+               {
+                       pos++;
+                       if (pos >= pattlen)
+                               break;
+               }
+
+               /* Stop if case-varying character (it's sort of a wildcard) */
+               if (case_insensitive &&
+                       pattern_char_isalpha(patt[pos], is_multibyte, locale, locale_is_c))
+                       break;
+
+               match[match_pos++] = patt[pos];
+       }
+
+       match[match_pos] = '\0';
+
+       if (typeid != BYTEAOID)
+               *prefix_const = string_to_const(match, typeid);
+       else
+               *prefix_const = string_to_bytea_const(match, match_pos);
+
+       if (rest_selec != NULL)
+               *rest_selec = like_selectivity(&patt[pos], pattlen - pos,
+                                                                          case_insensitive);
+
+       pfree(patt);
+       pfree(match);
+
+       /* in LIKE, an empty pattern is an exact match! */
+       if (pos == pattlen)
+               return Pattern_Prefix_Exact;    /* reached end of pattern, so exact */
+
+       if (match_pos > 0)
+               return Pattern_Prefix_Partial;
+
+       return Pattern_Prefix_None;
+}
+
+static Pattern_Prefix_Status
+regex_fixed_prefix(Const *patt_const, bool case_insensitive, Oid collation,
+                                  Const **prefix_const, Selectivity *rest_selec)
+{
+       Oid                     typeid = patt_const->consttype;
+       char       *prefix;
+       bool            exact;
+
+       /*
+        * Should be unnecessary, there are no bytea regex operators defined. As
+        * such, it should be noted that the rest of this function has *not* been
+        * made safe for binary (possibly NULL containing) strings.
+        */
+       if (typeid == BYTEAOID)
+               ereport(ERROR,
+                               (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+                                errmsg("regular-expression matching not supported on type bytea")));
+
+       /* Use the regexp machinery to extract the prefix, if any */
+       prefix = regexp_fixed_prefix(DatumGetTextPP(patt_const->constvalue),
+                                                                case_insensitive, collation,
+                                                                &exact);
+
+       if (prefix == NULL)
+       {
+               *prefix_const = NULL;
+
+               if (rest_selec != NULL)
+               {
+                       char       *patt = TextDatumGetCString(patt_const->constvalue);
+
+                       *rest_selec = regex_selectivity(patt, strlen(patt),
+                                                                                       case_insensitive,
+                                                                                       0);
+                       pfree(patt);
+               }
+
+               return Pattern_Prefix_None;
+       }
+
+       *prefix_const = string_to_const(prefix, typeid);
+
+       if (rest_selec != NULL)
+       {
+               if (exact)
+               {
+                       /* Exact match, so there's no additional selectivity */
+                       *rest_selec = 1.0;
+               }
+               else
+               {
+                       char       *patt = TextDatumGetCString(patt_const->constvalue);
+
+                       *rest_selec = regex_selectivity(patt, strlen(patt),
+                                                                                       case_insensitive,
+                                                                                       strlen(prefix));
+                       pfree(patt);
+               }
+       }
+
+       pfree(prefix);
+
+       if (exact)
+               return Pattern_Prefix_Exact;    /* pattern specifies exact match */
+       else
+               return Pattern_Prefix_Partial;
+}
+
+static Pattern_Prefix_Status
+pattern_fixed_prefix(Const *patt, Pattern_Type ptype, Oid collation,
+                                        Const **prefix, Selectivity *rest_selec)
+{
+       Pattern_Prefix_Status result;
+
+       switch (ptype)
+       {
+               case Pattern_Type_Like:
+                       result = like_fixed_prefix(patt, false, collation,
+                                                                          prefix, rest_selec);
+                       break;
+               case Pattern_Type_Like_IC:
+                       result = like_fixed_prefix(patt, true, collation,
+                                                                          prefix, rest_selec);
+                       break;
+               case Pattern_Type_Regex:
+                       result = regex_fixed_prefix(patt, false, collation,
+                                                                               prefix, rest_selec);
+                       break;
+               case Pattern_Type_Regex_IC:
+                       result = regex_fixed_prefix(patt, true, collation,
+                                                                               prefix, rest_selec);
+                       break;
+               case Pattern_Type_Prefix:
+                       /* Prefix type work is trivial.  */
+                       result = Pattern_Prefix_Partial;
+                       *rest_selec = 1.0;      /* all */
+                       *prefix = makeConst(patt->consttype,
+                                                               patt->consttypmod,
+                                                               patt->constcollid,
+                                                               patt->constlen,
+                                                               datumCopy(patt->constvalue,
+                                                                                 patt->constbyval,
+                                                                                 patt->constlen),
+                                                               patt->constisnull,
+                                                               patt->constbyval);
+                       break;
+               default:
+                       elog(ERROR, "unrecognized ptype: %d", (int) ptype);
+                       result = Pattern_Prefix_None;   /* keep compiler quiet */
+                       break;
+       }
+       return result;
+}
+
+/*
+ * Estimate the selectivity of a fixed prefix for a pattern match.
+ *
+ * A fixed prefix "foo" is estimated as the selectivity of the expression
+ * "variable >= 'foo' AND variable < 'fop'" (see also indxpath.c).
+ *
+ * The selectivity estimate is with respect to the portion of the column
+ * population represented by the histogram --- the caller must fold this
+ * together with info about MCVs and NULLs.
+ *
+ * We use the >= and < operators from the specified btree opfamily to do the
+ * estimation.  The given variable and Const must be of the associated
+ * datatype.
+ *
+ * XXX Note: we make use of the upper bound to estimate operator selectivity
+ * even if the locale is such that we cannot rely on the upper-bound string.
+ * The selectivity only needs to be approximately right anyway, so it seems
+ * more useful to use the upper-bound code than not.
+ */
+static Selectivity
+prefix_selectivity(PlannerInfo *root, VariableStatData *vardata,
+                                  Oid vartype, Oid opfamily, Const *prefixcon)
+{
+       Selectivity prefixsel;
+       Oid                     cmpopr;
+       FmgrInfo        opproc;
+       AttStatsSlot sslot;
+       Const      *greaterstrcon;
+       Selectivity eq_sel;
+
+       cmpopr = get_opfamily_member(opfamily, vartype, vartype,
+                                                                BTGreaterEqualStrategyNumber);
+       if (cmpopr == InvalidOid)
+               elog(ERROR, "no >= operator for opfamily %u", opfamily);
+       fmgr_info(get_opcode(cmpopr), &opproc);
+
+       prefixsel = ineq_histogram_selectivity(root, vardata,
+                                                                                  &opproc, true, true,
+                                                                                  prefixcon->constvalue,
+                                                                                  prefixcon->consttype);
+
+       if (prefixsel < 0.0)
+       {
+               /* No histogram is present ... return a suitable default estimate */
+               return DEFAULT_MATCH_SEL;
+       }
+
+       /*-------
+        * If we can create a string larger than the prefix, say
+        * "x < greaterstr".  We try to generate the string referencing the
+        * collation of the var's statistics, but if that's not available,
+        * use DEFAULT_COLLATION_OID.
+        *-------
+        */
+       if (HeapTupleIsValid(vardata->statsTuple) &&
+               get_attstatsslot(&sslot, vardata->statsTuple,
+                                                STATISTIC_KIND_HISTOGRAM, InvalidOid, 0))
+                /* sslot.stacoll is set up */ ;
+       else
+               sslot.stacoll = DEFAULT_COLLATION_OID;
+       cmpopr = get_opfamily_member(opfamily, vartype, vartype,
+                                                                BTLessStrategyNumber);
+       if (cmpopr == InvalidOid)
+               elog(ERROR, "no < operator for opfamily %u", opfamily);
+       fmgr_info(get_opcode(cmpopr), &opproc);
+       greaterstrcon = make_greater_string(prefixcon, &opproc, sslot.stacoll);
+       if (greaterstrcon)
+       {
+               Selectivity topsel;
+
+               topsel = ineq_histogram_selectivity(root, vardata,
+                                                                                       &opproc, false, false,
+                                                                                       greaterstrcon->constvalue,
+                                                                                       greaterstrcon->consttype);
+
+               /* ineq_histogram_selectivity worked before, it shouldn't fail now */
+               Assert(topsel >= 0.0);
+
+               /*
+                * Merge the two selectivities in the same way as for a range query
+                * (see clauselist_selectivity()).  Note that we don't need to worry
+                * about double-exclusion of nulls, since ineq_histogram_selectivity
+                * doesn't count those anyway.
+                */
+               prefixsel = topsel + prefixsel - 1.0;
+       }
+
+       /*
+        * If the prefix is long then the two bounding values might be too close
+        * together for the histogram to distinguish them usefully, resulting in a
+        * zero estimate (plus or minus roundoff error). To avoid returning a
+        * ridiculously small estimate, compute the estimated selectivity for
+        * "variable = 'foo'", and clamp to that. (Obviously, the resultant
+        * estimate should be at least that.)
+        *
+        * We apply this even if we couldn't make a greater string.  That case
+        * suggests that the prefix is near the maximum possible, and thus
+        * probably off the end of the histogram, and thus we probably got a very
+        * small estimate from the >= condition; so we still need to clamp.
+        */
+       cmpopr = get_opfamily_member(opfamily, vartype, vartype,
+                                                                BTEqualStrategyNumber);
+       if (cmpopr == InvalidOid)
+               elog(ERROR, "no = operator for opfamily %u", opfamily);
+       eq_sel = var_eq_const(vardata, cmpopr, prefixcon->constvalue,
+                                                 false, true, false);
+
+       prefixsel = Max(prefixsel, eq_sel);
+
+       return prefixsel;
+}
+
+
+/*
+ * Estimate the selectivity of a pattern of the specified type.
+ * Note that any fixed prefix of the pattern will have been removed already,
+ * so actually we may be looking at just a fragment of the pattern.
+ *
+ * For now, we use a very simplistic approach: fixed characters reduce the
+ * selectivity a good deal, character ranges reduce it a little,
+ * wildcards (such as % for LIKE or .* for regex) increase it.
+ */
+
+#define FIXED_CHAR_SEL 0.20    /* about 1/5 */
+#define CHAR_RANGE_SEL 0.25
+#define ANY_CHAR_SEL   0.9             /* not 1, since it won't match end-of-string */
+#define FULL_WILDCARD_SEL 5.0
+#define PARTIAL_WILDCARD_SEL 2.0
+
+static Selectivity
+like_selectivity(const char *patt, int pattlen, bool case_insensitive)
+{
+       Selectivity sel = 1.0;
+       int                     pos;
+
+       /* Skip any leading wildcard; it's already factored into initial sel */
+       for (pos = 0; pos < pattlen; pos++)
+       {
+               if (patt[pos] != '%' && patt[pos] != '_')
+                       break;
+       }
+
+       for (; pos < pattlen; pos++)
+       {
+               /* % and _ are wildcard characters in LIKE */
+               if (patt[pos] == '%')
+                       sel *= FULL_WILDCARD_SEL;
+               else if (patt[pos] == '_')
+                       sel *= ANY_CHAR_SEL;
+               else if (patt[pos] == '\\')
+               {
+                       /* Backslash quotes the next character */
+                       pos++;
+                       if (pos >= pattlen)
+                               break;
+                       sel *= FIXED_CHAR_SEL;
+               }
+               else
+                       sel *= FIXED_CHAR_SEL;
+       }
+       /* Could get sel > 1 if multiple wildcards */
+       if (sel > 1.0)
+               sel = 1.0;
+       return sel;
+}
+
+static Selectivity
+regex_selectivity_sub(const char *patt, int pattlen, bool case_insensitive)
+{
+       Selectivity sel = 1.0;
+       int                     paren_depth = 0;
+       int                     paren_pos = 0;  /* dummy init to keep compiler quiet */
+       int                     pos;
+
+       for (pos = 0; pos < pattlen; pos++)
+       {
+               if (patt[pos] == '(')
+               {
+                       if (paren_depth == 0)
+                               paren_pos = pos;        /* remember start of parenthesized item */
+                       paren_depth++;
+               }
+               else if (patt[pos] == ')' && paren_depth > 0)
+               {
+                       paren_depth--;
+                       if (paren_depth == 0)
+                               sel *= regex_selectivity_sub(patt + (paren_pos + 1),
+                                                                                        pos - (paren_pos + 1),
+                                                                                        case_insensitive);
+               }
+               else if (patt[pos] == '|' && paren_depth == 0)
+               {
+                       /*
+                        * If unquoted | is present at paren level 0 in pattern, we have
+                        * multiple alternatives; sum their probabilities.
+                        */
+                       sel += regex_selectivity_sub(patt + (pos + 1),
+                                                                                pattlen - (pos + 1),
+                                                                                case_insensitive);
+                       break;                          /* rest of pattern is now processed */
+               }
+               else if (patt[pos] == '[')
+               {
+                       bool            negclass = false;
+
+                       if (patt[++pos] == '^')
+                       {
+                               negclass = true;
+                               pos++;
+                       }
+                       if (patt[pos] == ']')   /* ']' at start of class is not special */
+                               pos++;
+                       while (pos < pattlen && patt[pos] != ']')
+                               pos++;
+                       if (paren_depth == 0)
+                               sel *= (negclass ? (1.0 - CHAR_RANGE_SEL) : CHAR_RANGE_SEL);
+               }
+               else if (patt[pos] == '.')
+               {
+                       if (paren_depth == 0)
+                               sel *= ANY_CHAR_SEL;
+               }
+               else if (patt[pos] == '*' ||
+                                patt[pos] == '?' ||
+                                patt[pos] == '+')
+               {
+                       /* Ought to be smarter about quantifiers... */
+                       if (paren_depth == 0)
+                               sel *= PARTIAL_WILDCARD_SEL;
+               }
+               else if (patt[pos] == '{')
+               {
+                       while (pos < pattlen && patt[pos] != '}')
+                               pos++;
+                       if (paren_depth == 0)
+                               sel *= PARTIAL_WILDCARD_SEL;
+               }
+               else if (patt[pos] == '\\')
+               {
+                       /* backslash quotes the next character */
+                       pos++;
+                       if (pos >= pattlen)
+                               break;
+                       if (paren_depth == 0)
+                               sel *= FIXED_CHAR_SEL;
+               }
+               else
+               {
+                       if (paren_depth == 0)
+                               sel *= FIXED_CHAR_SEL;
+               }
+       }
+       /* Could get sel > 1 if multiple wildcards */
+       if (sel > 1.0)
+               sel = 1.0;
+       return sel;
+}
+
+static Selectivity
+regex_selectivity(const char *patt, int pattlen, bool case_insensitive,
+                                 int fixed_prefix_len)
+{
+       Selectivity sel;
+
+       /* If patt doesn't end with $, consider it to have a trailing wildcard */
+       if (pattlen > 0 && patt[pattlen - 1] == '$' &&
+               (pattlen == 1 || patt[pattlen - 2] != '\\'))
+       {
+               /* has trailing $ */
+               sel = regex_selectivity_sub(patt, pattlen - 1, case_insensitive);
+       }
+       else
+       {
+               /* no trailing $ */
+               sel = regex_selectivity_sub(patt, pattlen, case_insensitive);
+               sel *= FULL_WILDCARD_SEL;
+       }
+
+       /* If there's a fixed prefix, discount its selectivity */
+       if (fixed_prefix_len > 0)
+               sel /= pow(FIXED_CHAR_SEL, fixed_prefix_len);
+
+       /* Make sure result stays in range */
+       CLAMP_PROBABILITY(sel);
+       return sel;
+}
+
+/*
+ * Check whether char is a letter (and, hence, subject to case-folding)
+ *
+ * In multibyte character sets or with ICU, we can't use isalpha, and it does
+ * not seem worth trying to convert to wchar_t to use iswalpha.  Instead, just
+ * assume any multibyte char is potentially case-varying.
+ */
+static int
+pattern_char_isalpha(char c, bool is_multibyte,
+                                        pg_locale_t locale, bool locale_is_c)
+{
+       if (locale_is_c)
+               return (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z');
+       else if (is_multibyte && IS_HIGHBIT_SET(c))
+               return true;
+       else if (locale && locale->provider == COLLPROVIDER_ICU)
+               return IS_HIGHBIT_SET(c) ? true : false;
+#ifdef HAVE_LOCALE_T
+       else if (locale && locale->provider == COLLPROVIDER_LIBC)
+               return isalpha_l((unsigned char) c, locale->info.lt);
+#endif
+       else
+               return isalpha((unsigned char) c);
+}
+
+
+/*
+ * For bytea, the increment function need only increment the current byte
+ * (there are no multibyte characters to worry about).
+ */
+static bool
+byte_increment(unsigned char *ptr, int len)
+{
+       if (*ptr >= 255)
+               return false;
+       (*ptr)++;
+       return true;
+}
+
+/*
+ * Try to generate a string greater than the given string or any
+ * string it is a prefix of.  If successful, return a palloc'd string
+ * in the form of a Const node; else return NULL.
+ *
+ * The caller must provide the appropriate "less than" comparison function
+ * for testing the strings, along with the collation to use.
+ *
+ * The key requirement here is that given a prefix string, say "foo",
+ * we must be able to generate another string "fop" that is greater than
+ * all strings "foobar" starting with "foo".  We can test that we have
+ * generated a string greater than the prefix string, but in non-C collations
+ * that is not a bulletproof guarantee that an extension of the string might
+ * not sort after it; an example is that "foo " is less than "foo!", but it
+ * is not clear that a "dictionary" sort ordering will consider "foo!" less
+ * than "foo bar".  CAUTION: Therefore, this function should be used only for
+ * estimation purposes when working in a non-C collation.
+ *
+ * To try to catch most cases where an extended string might otherwise sort
+ * before the result value, we determine which of the strings "Z", "z", "y",
+ * and "9" is seen as largest by the collation, and append that to the given
+ * prefix before trying to find a string that compares as larger.
+ *
+ * To search for a greater string, we repeatedly "increment" the rightmost
+ * character, using an encoding-specific character incrementer function.
+ * When it's no longer possible to increment the last character, we truncate
+ * off that character and start incrementing the next-to-rightmost.
+ * For example, if "z" were the last character in the sort order, then we
+ * could produce "foo" as a string greater than "fonz".
+ *
+ * This could be rather slow in the worst case, but in most cases we
+ * won't have to try more than one or two strings before succeeding.
+ *
+ * Note that it's important for the character incrementer not to be too anal
+ * about producing every possible character code, since in some cases the only
+ * way to get a larger string is to increment a previous character position.
+ * So we don't want to spend too much time trying every possible character
+ * code at the last position.  A good rule of thumb is to be sure that we
+ * don't try more than 256*K values for a K-byte character (and definitely
+ * not 256^K, which is what an exhaustive search would approach).
+ */
+static Const *
+make_greater_string(const Const *str_const, FmgrInfo *ltproc, Oid collation)
+{
+       Oid                     datatype = str_const->consttype;
+       char       *workstr;
+       int                     len;
+       Datum           cmpstr;
+       char       *cmptxt = NULL;
+       mbcharacter_incrementer charinc;
+
+       /*
+        * Get a modifiable copy of the prefix string in C-string format, and set
+        * up the string we will compare to as a Datum.  In C locale this can just
+        * be the given prefix string, otherwise we need to add a suffix.  Type
+        * BYTEA sorts bytewise so it never needs a suffix either.
+        */
+       if (datatype == BYTEAOID)
+       {
+               bytea      *bstr = DatumGetByteaPP(str_const->constvalue);
+
+               len = VARSIZE_ANY_EXHDR(bstr);
+               workstr = (char *) palloc(len);
+               memcpy(workstr, VARDATA_ANY(bstr), len);
+               Assert((Pointer) bstr == DatumGetPointer(str_const->constvalue));
+               cmpstr = str_const->constvalue;
+       }
+       else
+       {
+               if (datatype == NAMEOID)
+                       workstr = DatumGetCString(DirectFunctionCall1(nameout,
+                                                                                                                 str_const->constvalue));
+               else
+                       workstr = TextDatumGetCString(str_const->constvalue);
+               len = strlen(workstr);
+               if (lc_collate_is_c(collation) || len == 0)
+                       cmpstr = str_const->constvalue;
+               else
+               {
+                       /* If first time through, determine the suffix to use */
+                       static char suffixchar = 0;
+                       static Oid      suffixcollation = 0;
+
+                       if (!suffixchar || suffixcollation != collation)
+                       {
+                               char       *best;
+
+                               best = "Z";
+                               if (varstr_cmp(best, 1, "z", 1, collation) < 0)
+                                       best = "z";
+                               if (varstr_cmp(best, 1, "y", 1, collation) < 0)
+                                       best = "y";
+                               if (varstr_cmp(best, 1, "9", 1, collation) < 0)
+                                       best = "9";
+                               suffixchar = *best;
+                               suffixcollation = collation;
+                       }
+
+                       /* And build the string to compare to */
+                       if (datatype == NAMEOID)
+                       {
+                               cmptxt = palloc(len + 2);
+                               memcpy(cmptxt, workstr, len);
+                               cmptxt[len] = suffixchar;
+                               cmptxt[len + 1] = '\0';
+                               cmpstr = PointerGetDatum(cmptxt);
+                       }
+                       else
+                       {
+                               cmptxt = palloc(VARHDRSZ + len + 1);
+                               SET_VARSIZE(cmptxt, VARHDRSZ + len + 1);
+                               memcpy(VARDATA(cmptxt), workstr, len);
+                               *(VARDATA(cmptxt) + len) = suffixchar;
+                               cmpstr = PointerGetDatum(cmptxt);
+                       }
+               }
+       }
+
+       /* Select appropriate character-incrementer function */
+       if (datatype == BYTEAOID)
+               charinc = byte_increment;
+       else
+               charinc = pg_database_encoding_character_incrementer();
+
+       /* And search ... */
+       while (len > 0)
+       {
+               int                     charlen;
+               unsigned char *lastchar;
+
+               /* Identify the last character --- for bytea, just the last byte */
+               if (datatype == BYTEAOID)
+                       charlen = 1;
+               else
+                       charlen = len - pg_mbcliplen(workstr, len, len - 1);
+               lastchar = (unsigned char *) (workstr + len - charlen);
+
+               /*
+                * Try to generate a larger string by incrementing the last character
+                * (for BYTEA, we treat each byte as a character).
+                *
+                * Note: the incrementer function is expected to return true if it's
+                * generated a valid-per-the-encoding new character, otherwise false.
+                * The contents of the character on false return are unspecified.
+                */
+               while (charinc(lastchar, charlen))
+               {
+                       Const      *workstr_const;
+
+                       if (datatype == BYTEAOID)
+                               workstr_const = string_to_bytea_const(workstr, len);
+                       else
+                               workstr_const = string_to_const(workstr, datatype);
+
+                       if (DatumGetBool(FunctionCall2Coll(ltproc,
+                                                                                          collation,
+                                                                                          cmpstr,
+                                                                                          workstr_const->constvalue)))
+                       {
+                               /* Successfully made a string larger than cmpstr */
+                               if (cmptxt)
+                                       pfree(cmptxt);
+                               pfree(workstr);
+                               return workstr_const;
+                       }
+
+                       /* No good, release unusable value and try again */
+                       pfree(DatumGetPointer(workstr_const->constvalue));
+                       pfree(workstr_const);
+               }
+
+               /*
+                * No luck here, so truncate off the last character and try to
+                * increment the next one.
+                */
+               len -= charlen;
+               workstr[len] = '\0';
+       }
+
+       /* Failed... */
+       if (cmptxt)
+               pfree(cmptxt);
+       pfree(workstr);
+
+       return NULL;
+}
+
+/*
+ * Generate a Datum of the appropriate type from a C string.
+ * Note that all of the supported types are pass-by-ref, so the
+ * returned value should be pfree'd if no longer needed.
+ */
+static Datum
+string_to_datum(const char *str, Oid datatype)
+{
+       Assert(str != NULL);
+
+       /*
+        * We cheat a little by assuming that CStringGetTextDatum() will do for
+        * bpchar and varchar constants too...
+        */
+       if (datatype == NAMEOID)
+               return DirectFunctionCall1(namein, CStringGetDatum(str));
+       else if (datatype == BYTEAOID)
+               return DirectFunctionCall1(byteain, CStringGetDatum(str));
+       else
+               return CStringGetTextDatum(str);
+}
+
+/*
+ * Generate a Const node of the appropriate type from a C string.
+ */
+static Const *
+string_to_const(const char *str, Oid datatype)
+{
+       Datum           conval = string_to_datum(str, datatype);
+       Oid                     collation;
+       int                     constlen;
+
+       /*
+        * We only need to support a few datatypes here, so hard-wire properties
+        * instead of incurring the expense of catalog lookups.
+        */
+       switch (datatype)
+       {
+               case TEXTOID:
+               case VARCHAROID:
+               case BPCHAROID:
+                       collation = DEFAULT_COLLATION_OID;
+                       constlen = -1;
+                       break;
+
+               case NAMEOID:
+                       collation = C_COLLATION_OID;
+                       constlen = NAMEDATALEN;
+                       break;
+
+               case BYTEAOID:
+                       collation = InvalidOid;
+                       constlen = -1;
+                       break;
+
+               default:
+                       elog(ERROR, "unexpected datatype in string_to_const: %u",
+                                datatype);
+                       return NULL;
+       }
+
+       return makeConst(datatype, -1, collation, constlen,
+                                        conval, false, false);
+}
+
+/*
+ * Generate a Const node of bytea type from a binary C string and a length.
+ */
+static Const *
+string_to_bytea_const(const char *str, size_t str_len)
+{
+       bytea      *bstr = palloc(VARHDRSZ + str_len);
+       Datum           conval;
+
+       memcpy(VARDATA(bstr), str, str_len);
+       SET_VARSIZE(bstr, VARHDRSZ + str_len);
+       conval = PointerGetDatum(bstr);
+
+       return makeConst(BYTEAOID, -1, InvalidOid, -1, conval, false, false);
+}
index b9f99fa050b6f150adbae6acaba7d863ecb1bf33..c25357a00888b40cd32f91a9953d5378d6d7f7f6 100644 (file)
 #include "catalog/pg_am.h"
 #include "catalog/pg_collation.h"
 #include "catalog/pg_operator.h"
-#include "catalog/pg_opfamily.h"
 #include "catalog/pg_statistic.h"
 #include "catalog/pg_statistic_ext.h"
-#include "catalog/pg_type.h"
 #include "executor/executor.h"
-#include "mb/pg_wchar.h"
 #include "miscadmin.h"
 #include "nodes/makefuncs.h"
 #include "nodes/nodeFuncs.h"
 #include "optimizer/pathnode.h"
 #include "optimizer/paths.h"
 #include "optimizer/plancat.h"
-#include "optimizer/restrictinfo.h"
 #include "parser/parse_clause.h"
-#include "parser/parse_coerce.h"
 #include "parser/parsetree.h"
 #include "statistics/statistics.h"
-#include "utils/acl.h"
 #include "utils/builtins.h"
-#include "utils/bytea.h"
 #include "utils/date.h"
 #include "utils/datum.h"
 #include "utils/fmgroids.h"
 #include "utils/syscache.h"
 #include "utils/timestamp.h"
 #include "utils/typcache.h"
-#include "utils/varlena.h"
 
 
 /* Hooks for plugins to get control when we ask for stats */
@@ -154,16 +146,6 @@ get_relation_stats_hook_type get_relation_stats_hook = NULL;
 get_index_stats_hook_type get_index_stats_hook = NULL;
 
 static double eqsel_internal(PG_FUNCTION_ARGS, bool negate);
-static double var_eq_const(VariableStatData *vardata, Oid operator,
-                        Datum constval, bool constisnull,
-                        bool varonleft, bool negate);
-static double var_eq_non_const(VariableStatData *vardata, Oid operator,
-                                Node *other,
-                                bool varonleft, bool negate);
-static double ineq_histogram_selectivity(PlannerInfo *root,
-                                                  VariableStatData *vardata,
-                                                  FmgrInfo *opproc, bool isgt, bool iseq,
-                                                  Datum constval, Oid consttype);
 static double eqjoinsel_inner(Oid opfuncoid,
                                VariableStatData *vardata1, VariableStatData *vardata2,
                                double nd1, double nd2,
@@ -215,17 +197,6 @@ static bool get_actual_variable_range(PlannerInfo *root,
                                                  Oid sortop,
                                                  Datum *min, Datum *max);
 static RelOptInfo *find_join_input_rel(PlannerInfo *root, Relids relids);
-static Selectivity prefix_selectivity(PlannerInfo *root,
-                                  VariableStatData *vardata,
-                                  Oid vartype, Oid opfamily, Const *prefixcon);
-static Selectivity like_selectivity(const char *patt, int pattlen,
-                                bool case_insensitive);
-static Selectivity regex_selectivity(const char *patt, int pattlen,
-                                 bool case_insensitive,
-                                 int fixed_prefix_len);
-static Datum string_to_datum(const char *str, Oid datatype);
-static Const *string_to_const(const char *str, Oid datatype);
-static Const *string_to_bytea_const(const char *str, size_t str_len);
 static IndexQualInfo *deconstruct_indexqual(RestrictInfo *rinfo,
                                          IndexOptInfo *index, int indexcol);
 static List *add_predicate_to_quals(IndexOptInfo *index, List *indexQuals);
@@ -304,9 +275,9 @@ eqsel_internal(PG_FUNCTION_ARGS, bool negate)
 /*
  * var_eq_const --- eqsel for var = const case
  *
- * This is split out so that some other estimation functions can use it.
+ * This is exported so that some other estimation functions can use it.
  */
-static double
+double
 var_eq_const(VariableStatData *vardata, Oid operator,
                         Datum constval, bool constisnull,
                         bool varonleft, bool negate)
@@ -457,8 +428,10 @@ var_eq_const(VariableStatData *vardata, Oid operator,
 
 /*
  * var_eq_non_const --- eqsel for var = something-other-than-const case
+ *
+ * This is exported so that some other estimation functions can use it.
  */
-static double
+double
 var_eq_non_const(VariableStatData *vardata, Oid operator,
                                 Node *other,
                                 bool varonleft, bool negate)
@@ -784,8 +757,10 @@ histogram_selectivity(VariableStatData *vardata, FmgrInfo *opproc,
  * Note that the result disregards both the most-common-values (if any) and
  * null entries.  The caller is expected to combine this result with
  * statistics for those portions of the column population.
+ *
+ * This is exported so that some other estimation functions can use it.
  */
-static double
+double
 ineq_histogram_selectivity(PlannerInfo *root,
                                                   VariableStatData *vardata,
                                                   FmgrInfo *opproc, bool isgt, bool iseq,
@@ -1198,361 +1173,6 @@ scalargesel(PG_FUNCTION_ARGS)
        return scalarineqsel_wrapper(fcinfo, true, true);
 }
 
-/*
- * patternsel                  - Generic code for pattern-match selectivity.
- */
-static double
-patternsel(PG_FUNCTION_ARGS, Pattern_Type ptype, bool negate)
-{
-       PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
-       Oid                     operator = PG_GETARG_OID(1);
-       List       *args = (List *) PG_GETARG_POINTER(2);
-       int                     varRelid = PG_GETARG_INT32(3);
-       Oid                     collation = PG_GET_COLLATION();
-       VariableStatData vardata;
-       Node       *other;
-       bool            varonleft;
-       Datum           constval;
-       Oid                     consttype;
-       Oid                     vartype;
-       Oid                     opfamily;
-       Pattern_Prefix_Status pstatus;
-       Const      *patt;
-       Const      *prefix = NULL;
-       Selectivity rest_selec = 0;
-       double          nullfrac = 0.0;
-       double          result;
-
-       /*
-        * If this is for a NOT LIKE or similar operator, get the corresponding
-        * positive-match operator and work with that.  Set result to the correct
-        * default estimate, too.
-        */
-       if (negate)
-       {
-               operator = get_negator(operator);
-               if (!OidIsValid(operator))
-                       elog(ERROR, "patternsel called for operator without a negator");
-               result = 1.0 - DEFAULT_MATCH_SEL;
-       }
-       else
-       {
-               result = DEFAULT_MATCH_SEL;
-       }
-
-       /*
-        * If expression is not variable op constant, then punt and return a
-        * default estimate.
-        */
-       if (!get_restriction_variable(root, args, varRelid,
-                                                                 &vardata, &other, &varonleft))
-               return result;
-       if (!varonleft || !IsA(other, Const))
-       {
-               ReleaseVariableStats(vardata);
-               return result;
-       }
-
-       /*
-        * If the constant is NULL, assume operator is strict and return zero, ie,
-        * operator will never return TRUE.  (It's zero even for a negator op.)
-        */
-       if (((Const *) other)->constisnull)
-       {
-               ReleaseVariableStats(vardata);
-               return 0.0;
-       }
-       constval = ((Const *) other)->constvalue;
-       consttype = ((Const *) other)->consttype;
-
-       /*
-        * The right-hand const is type text or bytea for all supported operators.
-        * We do not expect to see binary-compatible types here, since
-        * const-folding should have relabeled the const to exactly match the
-        * operator's declared type.
-        */
-       if (consttype != TEXTOID && consttype != BYTEAOID)
-       {
-               ReleaseVariableStats(vardata);
-               return result;
-       }
-
-       /*
-        * Similarly, the exposed type of the left-hand side should be one of
-        * those we know.  (Do not look at vardata.atttype, which might be
-        * something binary-compatible but different.)  We can use it to choose
-        * the index opfamily from which we must draw the comparison operators.
-        *
-        * NOTE: It would be more correct to use the PATTERN opfamilies than the
-        * simple ones, but at the moment ANALYZE will not generate statistics for
-        * the PATTERN operators.  But our results are so approximate anyway that
-        * it probably hardly matters.
-        */
-       vartype = vardata.vartype;
-
-       switch (vartype)
-       {
-               case TEXTOID:
-               case NAMEOID:
-                       opfamily = TEXT_BTREE_FAM_OID;
-                       break;
-               case BPCHAROID:
-                       opfamily = BPCHAR_BTREE_FAM_OID;
-                       break;
-               case BYTEAOID:
-                       opfamily = BYTEA_BTREE_FAM_OID;
-                       break;
-               default:
-                       ReleaseVariableStats(vardata);
-                       return result;
-       }
-
-       /*
-        * Grab the nullfrac for use below.
-        */
-       if (HeapTupleIsValid(vardata.statsTuple))
-       {
-               Form_pg_statistic stats;
-
-               stats = (Form_pg_statistic) GETSTRUCT(vardata.statsTuple);
-               nullfrac = stats->stanullfrac;
-       }
-
-       /*
-        * Pull out any fixed prefix implied by the pattern, and estimate the
-        * fractional selectivity of the remainder of the pattern.  Unlike many of
-        * the other functions in this file, we use the pattern operator's actual
-        * collation for this step.  This is not because we expect the collation
-        * to make a big difference in the selectivity estimate (it seldom would),
-        * but because we want to be sure we cache compiled regexps under the
-        * right cache key, so that they can be re-used at runtime.
-        */
-       patt = (Const *) other;
-       pstatus = pattern_fixed_prefix(patt, ptype, collation,
-                                                                  &prefix, &rest_selec);
-
-       /*
-        * If necessary, coerce the prefix constant to the right type.
-        */
-       if (prefix && prefix->consttype != vartype)
-       {
-               char       *prefixstr;
-
-               switch (prefix->consttype)
-               {
-                       case TEXTOID:
-                               prefixstr = TextDatumGetCString(prefix->constvalue);
-                               break;
-                       case BYTEAOID:
-                               prefixstr = DatumGetCString(DirectFunctionCall1(byteaout,
-                                                                                                                               prefix->constvalue));
-                               break;
-                       default:
-                               elog(ERROR, "unrecognized consttype: %u",
-                                        prefix->consttype);
-                               ReleaseVariableStats(vardata);
-                               return result;
-               }
-               prefix = string_to_const(prefixstr, vartype);
-               pfree(prefixstr);
-       }
-
-       if (pstatus == Pattern_Prefix_Exact)
-       {
-               /*
-                * Pattern specifies an exact match, so pretend operator is '='
-                */
-               Oid                     eqopr = get_opfamily_member(opfamily, vartype, vartype,
-                                                                                               BTEqualStrategyNumber);
-
-               if (eqopr == InvalidOid)
-                       elog(ERROR, "no = operator for opfamily %u", opfamily);
-               result = var_eq_const(&vardata, eqopr, prefix->constvalue,
-                                                         false, true, false);
-       }
-       else
-       {
-               /*
-                * Not exact-match pattern.  If we have a sufficiently large
-                * histogram, estimate selectivity for the histogram part of the
-                * population by counting matches in the histogram.  If not, estimate
-                * selectivity of the fixed prefix and remainder of pattern
-                * separately, then combine the two to get an estimate of the
-                * selectivity for the part of the column population represented by
-                * the histogram.  (For small histograms, we combine these
-                * approaches.)
-                *
-                * We then add up data for any most-common-values values; these are
-                * not in the histogram population, and we can get exact answers for
-                * them by applying the pattern operator, so there's no reason to
-                * approximate.  (If the MCVs cover a significant part of the total
-                * population, this gives us a big leg up in accuracy.)
-                */
-               Selectivity selec;
-               int                     hist_size;
-               FmgrInfo        opproc;
-               double          mcv_selec,
-                                       sumcommon;
-
-               /* Try to use the histogram entries to get selectivity */
-               fmgr_info(get_opcode(operator), &opproc);
-
-               selec = histogram_selectivity(&vardata, &opproc, constval, true,
-                                                                         10, 1, &hist_size);
-
-               /* If not at least 100 entries, use the heuristic method */
-               if (hist_size < 100)
-               {
-                       Selectivity heursel;
-                       Selectivity prefixsel;
-
-                       if (pstatus == Pattern_Prefix_Partial)
-                               prefixsel = prefix_selectivity(root, &vardata, vartype,
-                                                                                          opfamily, prefix);
-                       else
-                               prefixsel = 1.0;
-                       heursel = prefixsel * rest_selec;
-
-                       if (selec < 0)          /* fewer than 10 histogram entries? */
-                               selec = heursel;
-                       else
-                       {
-                               /*
-                                * For histogram sizes from 10 to 100, we combine the
-                                * histogram and heuristic selectivities, putting increasingly
-                                * more trust in the histogram for larger sizes.
-                                */
-                               double          hist_weight = hist_size / 100.0;
-
-                               selec = selec * hist_weight + heursel * (1.0 - hist_weight);
-                       }
-               }
-
-               /* In any case, don't believe extremely small or large estimates. */
-               if (selec < 0.0001)
-                       selec = 0.0001;
-               else if (selec > 0.9999)
-                       selec = 0.9999;
-
-               /*
-                * If we have most-common-values info, add up the fractions of the MCV
-                * entries that satisfy MCV OP PATTERN.  These fractions contribute
-                * directly to the result selectivity.  Also add up the total fraction
-                * represented by MCV entries.
-                */
-               mcv_selec = mcv_selectivity(&vardata, &opproc, constval, true,
-                                                                       &sumcommon);
-
-               /*
-                * Now merge the results from the MCV and histogram calculations,
-                * realizing that the histogram covers only the non-null values that
-                * are not listed in MCV.
-                */
-               selec *= 1.0 - nullfrac - sumcommon;
-               selec += mcv_selec;
-               result = selec;
-       }
-
-       /* now adjust if we wanted not-match rather than match */
-       if (negate)
-               result = 1.0 - result - nullfrac;
-
-       /* result should be in range, but make sure... */
-       CLAMP_PROBABILITY(result);
-
-       if (prefix)
-       {
-               pfree(DatumGetPointer(prefix->constvalue));
-               pfree(prefix);
-       }
-
-       ReleaseVariableStats(vardata);
-
-       return result;
-}
-
-/*
- *             regexeqsel              - Selectivity of regular-expression pattern match.
- */
-Datum
-regexeqsel(PG_FUNCTION_ARGS)
-{
-       PG_RETURN_FLOAT8(patternsel(fcinfo, Pattern_Type_Regex, false));
-}
-
-/*
- *             icregexeqsel    - Selectivity of case-insensitive regex match.
- */
-Datum
-icregexeqsel(PG_FUNCTION_ARGS)
-{
-       PG_RETURN_FLOAT8(patternsel(fcinfo, Pattern_Type_Regex_IC, false));
-}
-
-/*
- *             likesel                 - Selectivity of LIKE pattern match.
- */
-Datum
-likesel(PG_FUNCTION_ARGS)
-{
-       PG_RETURN_FLOAT8(patternsel(fcinfo, Pattern_Type_Like, false));
-}
-
-/*
- *             prefixsel                       - selectivity of prefix operator
- */
-Datum
-prefixsel(PG_FUNCTION_ARGS)
-{
-       PG_RETURN_FLOAT8(patternsel(fcinfo, Pattern_Type_Prefix, false));
-}
-
-/*
- *
- *             iclikesel                       - Selectivity of ILIKE pattern match.
- */
-Datum
-iclikesel(PG_FUNCTION_ARGS)
-{
-       PG_RETURN_FLOAT8(patternsel(fcinfo, Pattern_Type_Like_IC, false));
-}
-
-/*
- *             regexnesel              - Selectivity of regular-expression pattern non-match.
- */
-Datum
-regexnesel(PG_FUNCTION_ARGS)
-{
-       PG_RETURN_FLOAT8(patternsel(fcinfo, Pattern_Type_Regex, true));
-}
-
-/*
- *             icregexnesel    - Selectivity of case-insensitive regex non-match.
- */
-Datum
-icregexnesel(PG_FUNCTION_ARGS)
-{
-       PG_RETURN_FLOAT8(patternsel(fcinfo, Pattern_Type_Regex_IC, true));
-}
-
-/*
- *             nlikesel                - Selectivity of LIKE pattern non-match.
- */
-Datum
-nlikesel(PG_FUNCTION_ARGS)
-{
-       PG_RETURN_FLOAT8(patternsel(fcinfo, Pattern_Type_Like, true));
-}
-
-/*
- *             icnlikesel              - Selectivity of ILIKE pattern non-match.
- */
-Datum
-icnlikesel(PG_FUNCTION_ARGS)
-{
-       PG_RETURN_FLOAT8(patternsel(fcinfo, Pattern_Type_Like_IC, true));
-}
-
 /*
  *             boolvarsel              - Selectivity of Boolean variable.
  *
@@ -2896,123 +2516,33 @@ scalargejoinsel(PG_FUNCTION_ARGS)
        PG_RETURN_FLOAT8(DEFAULT_INEQ_SEL);
 }
 
-/*
- * patternjoinsel              - Generic code for pattern-match join selectivity.
- */
-static double
-patternjoinsel(PG_FUNCTION_ARGS, Pattern_Type ptype, bool negate)
-{
-       /* For the moment we just punt. */
-       return negate ? (1.0 - DEFAULT_MATCH_SEL) : DEFAULT_MATCH_SEL;
-}
-
-/*
- *             regexeqjoinsel  - Join selectivity of regular-expression pattern match.
- */
-Datum
-regexeqjoinsel(PG_FUNCTION_ARGS)
-{
-       PG_RETURN_FLOAT8(patternjoinsel(fcinfo, Pattern_Type_Regex, false));
-}
-
-/*
- *             icregexeqjoinsel        - Join selectivity of case-insensitive regex match.
- */
-Datum
-icregexeqjoinsel(PG_FUNCTION_ARGS)
-{
-       PG_RETURN_FLOAT8(patternjoinsel(fcinfo, Pattern_Type_Regex_IC, false));
-}
-
-/*
- *             likejoinsel                     - Join selectivity of LIKE pattern match.
- */
-Datum
-likejoinsel(PG_FUNCTION_ARGS)
-{
-       PG_RETURN_FLOAT8(patternjoinsel(fcinfo, Pattern_Type_Like, false));
-}
 
 /*
- *             prefixjoinsel                   - Join selectivity of prefix operator
+ * mergejoinscansel                    - Scan selectivity of merge join.
+ *
+ * A merge join will stop as soon as it exhausts either input stream.
+ * Therefore, if we can estimate the ranges of both input variables,
+ * we can estimate how much of the input will actually be read.  This
+ * can have a considerable impact on the cost when using indexscans.
+ *
+ * Also, we can estimate how much of each input has to be read before the
+ * first join pair is found, which will affect the join's startup time.
+ *
+ * clause should be a clause already known to be mergejoinable.  opfamily,
+ * strategy, and nulls_first specify the sort ordering being used.
+ *
+ * The outputs are:
+ *             *leftstart is set to the fraction of the left-hand variable expected
+ *              to be scanned before the first join pair is found (0 to 1).
+ *             *leftend is set to the fraction of the left-hand variable expected
+ *              to be scanned before the join terminates (0 to 1).
+ *             *rightstart, *rightend similarly for the right-hand variable.
  */
-Datum
-prefixjoinsel(PG_FUNCTION_ARGS)
-{
-       PG_RETURN_FLOAT8(patternjoinsel(fcinfo, Pattern_Type_Prefix, false));
-}
-
-/*
- *             iclikejoinsel                   - Join selectivity of ILIKE pattern match.
- */
-Datum
-iclikejoinsel(PG_FUNCTION_ARGS)
-{
-       PG_RETURN_FLOAT8(patternjoinsel(fcinfo, Pattern_Type_Like_IC, false));
-}
-
-/*
- *             regexnejoinsel  - Join selectivity of regex non-match.
- */
-Datum
-regexnejoinsel(PG_FUNCTION_ARGS)
-{
-       PG_RETURN_FLOAT8(patternjoinsel(fcinfo, Pattern_Type_Regex, true));
-}
-
-/*
- *             icregexnejoinsel        - Join selectivity of case-insensitive regex non-match.
- */
-Datum
-icregexnejoinsel(PG_FUNCTION_ARGS)
-{
-       PG_RETURN_FLOAT8(patternjoinsel(fcinfo, Pattern_Type_Regex_IC, true));
-}
-
-/*
- *             nlikejoinsel            - Join selectivity of LIKE pattern non-match.
- */
-Datum
-nlikejoinsel(PG_FUNCTION_ARGS)
-{
-       PG_RETURN_FLOAT8(patternjoinsel(fcinfo, Pattern_Type_Like, true));
-}
-
-/*
- *             icnlikejoinsel          - Join selectivity of ILIKE pattern non-match.
- */
-Datum
-icnlikejoinsel(PG_FUNCTION_ARGS)
-{
-       PG_RETURN_FLOAT8(patternjoinsel(fcinfo, Pattern_Type_Like_IC, true));
-}
-
-/*
- * mergejoinscansel                    - Scan selectivity of merge join.
- *
- * A merge join will stop as soon as it exhausts either input stream.
- * Therefore, if we can estimate the ranges of both input variables,
- * we can estimate how much of the input will actually be read.  This
- * can have a considerable impact on the cost when using indexscans.
- *
- * Also, we can estimate how much of each input has to be read before the
- * first join pair is found, which will affect the join's startup time.
- *
- * clause should be a clause already known to be mergejoinable.  opfamily,
- * strategy, and nulls_first specify the sort ordering being used.
- *
- * The outputs are:
- *             *leftstart is set to the fraction of the left-hand variable expected
- *              to be scanned before the first join pair is found (0 to 1).
- *             *leftend is set to the fraction of the left-hand variable expected
- *              to be scanned before the join terminates (0 to 1).
- *             *rightstart, *rightend similarly for the right-hand variable.
- */
-void
-mergejoinscansel(PlannerInfo *root, Node *clause,
-                                Oid opfamily, int strategy, bool nulls_first,
-                                Selectivity *leftstart, Selectivity *leftend,
-                                Selectivity *rightstart, Selectivity *rightend)
+void
+mergejoinscansel(PlannerInfo *root, Node *clause,
+                                Oid opfamily, int strategy, bool nulls_first,
+                                Selectivity *leftstart, Selectivity *leftend,
+                                Selectivity *rightstart, Selectivity *rightend)
 {
        Node       *left,
                           *right;
@@ -5716,853 +5246,6 @@ find_join_input_rel(PlannerInfo *root, Relids relids)
 }
 
 
-/*-------------------------------------------------------------------------
- *
- * Pattern analysis functions
- *
- * These routines support analysis of LIKE and regular-expression patterns
- * by the planner/optimizer.  It's important that they agree with the
- * regular-expression code in backend/regex/ and the LIKE code in
- * backend/utils/adt/like.c.  Also, the computation of the fixed prefix
- * must be conservative: if we report a string longer than the true fixed
- * prefix, the query may produce actually wrong answers, rather than just
- * getting a bad selectivity estimate!
- *
- * Note that the prefix-analysis functions are called from
- * backend/optimizer/path/indxpath.c as well as from routines in this file.
- *
- *-------------------------------------------------------------------------
- */
-
-/*
- * Check whether char is a letter (and, hence, subject to case-folding)
- *
- * In multibyte character sets or with ICU, we can't use isalpha, and it does not seem
- * worth trying to convert to wchar_t to use iswalpha.  Instead, just assume
- * any multibyte char is potentially case-varying.
- */
-static int
-pattern_char_isalpha(char c, bool is_multibyte,
-                                        pg_locale_t locale, bool locale_is_c)
-{
-       if (locale_is_c)
-               return (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z');
-       else if (is_multibyte && IS_HIGHBIT_SET(c))
-               return true;
-       else if (locale && locale->provider == COLLPROVIDER_ICU)
-               return IS_HIGHBIT_SET(c) ? true : false;
-#ifdef HAVE_LOCALE_T
-       else if (locale && locale->provider == COLLPROVIDER_LIBC)
-               return isalpha_l((unsigned char) c, locale->info.lt);
-#endif
-       else
-               return isalpha((unsigned char) c);
-}
-
-/*
- * Extract the fixed prefix, if any, for a pattern.
- *
- * *prefix is set to a palloc'd prefix string (in the form of a Const node),
- *     or to NULL if no fixed prefix exists for the pattern.
- * If rest_selec is not NULL, *rest_selec is set to an estimate of the
- *     selectivity of the remainder of the pattern (without any fixed prefix).
- * The prefix Const has the same type (TEXT or BYTEA) as the input pattern.
- *
- * The return value distinguishes no fixed prefix, a partial prefix,
- * or an exact-match-only pattern.
- */
-
-static Pattern_Prefix_Status
-like_fixed_prefix(Const *patt_const, bool case_insensitive, Oid collation,
-                                 Const **prefix_const, Selectivity *rest_selec)
-{
-       char       *match;
-       char       *patt;
-       int                     pattlen;
-       Oid                     typeid = patt_const->consttype;
-       int                     pos,
-                               match_pos;
-       bool            is_multibyte = (pg_database_encoding_max_length() > 1);
-       pg_locale_t locale = 0;
-       bool            locale_is_c = false;
-
-       /* the right-hand const is type text or bytea */
-       Assert(typeid == BYTEAOID || typeid == TEXTOID);
-
-       if (case_insensitive)
-       {
-               if (typeid == BYTEAOID)
-                       ereport(ERROR,
-                                       (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
-                                        errmsg("case insensitive matching not supported on type bytea")));
-
-               /* If case-insensitive, we need locale info */
-               if (lc_ctype_is_c(collation))
-                       locale_is_c = true;
-               else if (collation != DEFAULT_COLLATION_OID)
-               {
-                       if (!OidIsValid(collation))
-                       {
-                               /*
-                                * This typically means that the parser could not resolve a
-                                * conflict of implicit collations, so report it that way.
-                                */
-                               ereport(ERROR,
-                                               (errcode(ERRCODE_INDETERMINATE_COLLATION),
-                                                errmsg("could not determine which collation to use for ILIKE"),
-                                                errhint("Use the COLLATE clause to set the collation explicitly.")));
-                       }
-                       locale = pg_newlocale_from_collation(collation);
-               }
-       }
-
-       if (typeid != BYTEAOID)
-       {
-               patt = TextDatumGetCString(patt_const->constvalue);
-               pattlen = strlen(patt);
-       }
-       else
-       {
-               bytea      *bstr = DatumGetByteaPP(patt_const->constvalue);
-
-               pattlen = VARSIZE_ANY_EXHDR(bstr);
-               patt = (char *) palloc(pattlen);
-               memcpy(patt, VARDATA_ANY(bstr), pattlen);
-               Assert((Pointer) bstr == DatumGetPointer(patt_const->constvalue));
-       }
-
-       match = palloc(pattlen + 1);
-       match_pos = 0;
-       for (pos = 0; pos < pattlen; pos++)
-       {
-               /* % and _ are wildcard characters in LIKE */
-               if (patt[pos] == '%' ||
-                       patt[pos] == '_')
-                       break;
-
-               /* Backslash escapes the next character */
-               if (patt[pos] == '\\')
-               {
-                       pos++;
-                       if (pos >= pattlen)
-                               break;
-               }
-
-               /* Stop if case-varying character (it's sort of a wildcard) */
-               if (case_insensitive &&
-                       pattern_char_isalpha(patt[pos], is_multibyte, locale, locale_is_c))
-                       break;
-
-               match[match_pos++] = patt[pos];
-       }
-
-       match[match_pos] = '\0';
-
-       if (typeid != BYTEAOID)
-               *prefix_const = string_to_const(match, typeid);
-       else
-               *prefix_const = string_to_bytea_const(match, match_pos);
-
-       if (rest_selec != NULL)
-               *rest_selec = like_selectivity(&patt[pos], pattlen - pos,
-                                                                          case_insensitive);
-
-       pfree(patt);
-       pfree(match);
-
-       /* in LIKE, an empty pattern is an exact match! */
-       if (pos == pattlen)
-               return Pattern_Prefix_Exact;    /* reached end of pattern, so exact */
-
-       if (match_pos > 0)
-               return Pattern_Prefix_Partial;
-
-       return Pattern_Prefix_None;
-}
-
-static Pattern_Prefix_Status
-regex_fixed_prefix(Const *patt_const, bool case_insensitive, Oid collation,
-                                  Const **prefix_const, Selectivity *rest_selec)
-{
-       Oid                     typeid = patt_const->consttype;
-       char       *prefix;
-       bool            exact;
-
-       /*
-        * Should be unnecessary, there are no bytea regex operators defined. As
-        * such, it should be noted that the rest of this function has *not* been
-        * made safe for binary (possibly NULL containing) strings.
-        */
-       if (typeid == BYTEAOID)
-               ereport(ERROR,
-                               (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
-                                errmsg("regular-expression matching not supported on type bytea")));
-
-       /* Use the regexp machinery to extract the prefix, if any */
-       prefix = regexp_fixed_prefix(DatumGetTextPP(patt_const->constvalue),
-                                                                case_insensitive, collation,
-                                                                &exact);
-
-       if (prefix == NULL)
-       {
-               *prefix_const = NULL;
-
-               if (rest_selec != NULL)
-               {
-                       char       *patt = TextDatumGetCString(patt_const->constvalue);
-
-                       *rest_selec = regex_selectivity(patt, strlen(patt),
-                                                                                       case_insensitive,
-                                                                                       0);
-                       pfree(patt);
-               }
-
-               return Pattern_Prefix_None;
-       }
-
-       *prefix_const = string_to_const(prefix, typeid);
-
-       if (rest_selec != NULL)
-       {
-               if (exact)
-               {
-                       /* Exact match, so there's no additional selectivity */
-                       *rest_selec = 1.0;
-               }
-               else
-               {
-                       char       *patt = TextDatumGetCString(patt_const->constvalue);
-
-                       *rest_selec = regex_selectivity(patt, strlen(patt),
-                                                                                       case_insensitive,
-                                                                                       strlen(prefix));
-                       pfree(patt);
-               }
-       }
-
-       pfree(prefix);
-
-       if (exact)
-               return Pattern_Prefix_Exact;    /* pattern specifies exact match */
-       else
-               return Pattern_Prefix_Partial;
-}
-
-Pattern_Prefix_Status
-pattern_fixed_prefix(Const *patt, Pattern_Type ptype, Oid collation,
-                                        Const **prefix, Selectivity *rest_selec)
-{
-       Pattern_Prefix_Status result;
-
-       switch (ptype)
-       {
-               case Pattern_Type_Like:
-                       result = like_fixed_prefix(patt, false, collation,
-                                                                          prefix, rest_selec);
-                       break;
-               case Pattern_Type_Like_IC:
-                       result = like_fixed_prefix(patt, true, collation,
-                                                                          prefix, rest_selec);
-                       break;
-               case Pattern_Type_Regex:
-                       result = regex_fixed_prefix(patt, false, collation,
-                                                                               prefix, rest_selec);
-                       break;
-               case Pattern_Type_Regex_IC:
-                       result = regex_fixed_prefix(patt, true, collation,
-                                                                               prefix, rest_selec);
-                       break;
-               case Pattern_Type_Prefix:
-                       /* Prefix type work is trivial.  */
-                       result = Pattern_Prefix_Partial;
-                       *rest_selec = 1.0;      /* all */
-                       *prefix = makeConst(patt->consttype,
-                                                               patt->consttypmod,
-                                                               patt->constcollid,
-                                                               patt->constlen,
-                                                               datumCopy(patt->constvalue,
-                                                                                 patt->constbyval,
-                                                                                 patt->constlen),
-                                                               patt->constisnull,
-                                                               patt->constbyval);
-                       break;
-               default:
-                       elog(ERROR, "unrecognized ptype: %d", (int) ptype);
-                       result = Pattern_Prefix_None;   /* keep compiler quiet */
-                       break;
-       }
-       return result;
-}
-
-/*
- * Estimate the selectivity of a fixed prefix for a pattern match.
- *
- * A fixed prefix "foo" is estimated as the selectivity of the expression
- * "variable >= 'foo' AND variable < 'fop'" (see also indxpath.c).
- *
- * The selectivity estimate is with respect to the portion of the column
- * population represented by the histogram --- the caller must fold this
- * together with info about MCVs and NULLs.
- *
- * We use the >= and < operators from the specified btree opfamily to do the
- * estimation.  The given variable and Const must be of the associated
- * datatype.
- *
- * XXX Note: we make use of the upper bound to estimate operator selectivity
- * even if the locale is such that we cannot rely on the upper-bound string.
- * The selectivity only needs to be approximately right anyway, so it seems
- * more useful to use the upper-bound code than not.
- */
-static Selectivity
-prefix_selectivity(PlannerInfo *root, VariableStatData *vardata,
-                                  Oid vartype, Oid opfamily, Const *prefixcon)
-{
-       Selectivity prefixsel;
-       Oid                     cmpopr;
-       FmgrInfo        opproc;
-       AttStatsSlot sslot;
-       Const      *greaterstrcon;
-       Selectivity eq_sel;
-
-       cmpopr = get_opfamily_member(opfamily, vartype, vartype,
-                                                                BTGreaterEqualStrategyNumber);
-       if (cmpopr == InvalidOid)
-               elog(ERROR, "no >= operator for opfamily %u", opfamily);
-       fmgr_info(get_opcode(cmpopr), &opproc);
-
-       prefixsel = ineq_histogram_selectivity(root, vardata,
-                                                                                  &opproc, true, true,
-                                                                                  prefixcon->constvalue,
-                                                                                  prefixcon->consttype);
-
-       if (prefixsel < 0.0)
-       {
-               /* No histogram is present ... return a suitable default estimate */
-               return DEFAULT_MATCH_SEL;
-       }
-
-       /*-------
-        * If we can create a string larger than the prefix, say
-        * "x < greaterstr".  We try to generate the string referencing the
-        * collation of the var's statistics, but if that's not available,
-        * use DEFAULT_COLLATION_OID.
-        *-------
-        */
-       if (HeapTupleIsValid(vardata->statsTuple) &&
-               get_attstatsslot(&sslot, vardata->statsTuple,
-                                                STATISTIC_KIND_HISTOGRAM, InvalidOid, 0))
-                /* sslot.stacoll is set up */ ;
-       else
-               sslot.stacoll = DEFAULT_COLLATION_OID;
-       cmpopr = get_opfamily_member(opfamily, vartype, vartype,
-                                                                BTLessStrategyNumber);
-       if (cmpopr == InvalidOid)
-               elog(ERROR, "no < operator for opfamily %u", opfamily);
-       fmgr_info(get_opcode(cmpopr), &opproc);
-       greaterstrcon = make_greater_string(prefixcon, &opproc, sslot.stacoll);
-       if (greaterstrcon)
-       {
-               Selectivity topsel;
-
-               topsel = ineq_histogram_selectivity(root, vardata,
-                                                                                       &opproc, false, false,
-                                                                                       greaterstrcon->constvalue,
-                                                                                       greaterstrcon->consttype);
-
-               /* ineq_histogram_selectivity worked before, it shouldn't fail now */
-               Assert(topsel >= 0.0);
-
-               /*
-                * Merge the two selectivities in the same way as for a range query
-                * (see clauselist_selectivity()).  Note that we don't need to worry
-                * about double-exclusion of nulls, since ineq_histogram_selectivity
-                * doesn't count those anyway.
-                */
-               prefixsel = topsel + prefixsel - 1.0;
-       }
-
-       /*
-        * If the prefix is long then the two bounding values might be too close
-        * together for the histogram to distinguish them usefully, resulting in a
-        * zero estimate (plus or minus roundoff error). To avoid returning a
-        * ridiculously small estimate, compute the estimated selectivity for
-        * "variable = 'foo'", and clamp to that. (Obviously, the resultant
-        * estimate should be at least that.)
-        *
-        * We apply this even if we couldn't make a greater string.  That case
-        * suggests that the prefix is near the maximum possible, and thus
-        * probably off the end of the histogram, and thus we probably got a very
-        * small estimate from the >= condition; so we still need to clamp.
-        */
-       cmpopr = get_opfamily_member(opfamily, vartype, vartype,
-                                                                BTEqualStrategyNumber);
-       if (cmpopr == InvalidOid)
-               elog(ERROR, "no = operator for opfamily %u", opfamily);
-       eq_sel = var_eq_const(vardata, cmpopr, prefixcon->constvalue,
-                                                 false, true, false);
-
-       prefixsel = Max(prefixsel, eq_sel);
-
-       return prefixsel;
-}
-
-
-/*
- * Estimate the selectivity of a pattern of the specified type.
- * Note that any fixed prefix of the pattern will have been removed already,
- * so actually we may be looking at just a fragment of the pattern.
- *
- * For now, we use a very simplistic approach: fixed characters reduce the
- * selectivity a good deal, character ranges reduce it a little,
- * wildcards (such as % for LIKE or .* for regex) increase it.
- */
-
-#define FIXED_CHAR_SEL 0.20    /* about 1/5 */
-#define CHAR_RANGE_SEL 0.25
-#define ANY_CHAR_SEL   0.9             /* not 1, since it won't match end-of-string */
-#define FULL_WILDCARD_SEL 5.0
-#define PARTIAL_WILDCARD_SEL 2.0
-
-static Selectivity
-like_selectivity(const char *patt, int pattlen, bool case_insensitive)
-{
-       Selectivity sel = 1.0;
-       int                     pos;
-
-       /* Skip any leading wildcard; it's already factored into initial sel */
-       for (pos = 0; pos < pattlen; pos++)
-       {
-               if (patt[pos] != '%' && patt[pos] != '_')
-                       break;
-       }
-
-       for (; pos < pattlen; pos++)
-       {
-               /* % and _ are wildcard characters in LIKE */
-               if (patt[pos] == '%')
-                       sel *= FULL_WILDCARD_SEL;
-               else if (patt[pos] == '_')
-                       sel *= ANY_CHAR_SEL;
-               else if (patt[pos] == '\\')
-               {
-                       /* Backslash quotes the next character */
-                       pos++;
-                       if (pos >= pattlen)
-                               break;
-                       sel *= FIXED_CHAR_SEL;
-               }
-               else
-                       sel *= FIXED_CHAR_SEL;
-       }
-       /* Could get sel > 1 if multiple wildcards */
-       if (sel > 1.0)
-               sel = 1.0;
-       return sel;
-}
-
-static Selectivity
-regex_selectivity_sub(const char *patt, int pattlen, bool case_insensitive)
-{
-       Selectivity sel = 1.0;
-       int                     paren_depth = 0;
-       int                     paren_pos = 0;  /* dummy init to keep compiler quiet */
-       int                     pos;
-
-       for (pos = 0; pos < pattlen; pos++)
-       {
-               if (patt[pos] == '(')
-               {
-                       if (paren_depth == 0)
-                               paren_pos = pos;        /* remember start of parenthesized item */
-                       paren_depth++;
-               }
-               else if (patt[pos] == ')' && paren_depth > 0)
-               {
-                       paren_depth--;
-                       if (paren_depth == 0)
-                               sel *= regex_selectivity_sub(patt + (paren_pos + 1),
-                                                                                        pos - (paren_pos + 1),
-                                                                                        case_insensitive);
-               }
-               else if (patt[pos] == '|' && paren_depth == 0)
-               {
-                       /*
-                        * If unquoted | is present at paren level 0 in pattern, we have
-                        * multiple alternatives; sum their probabilities.
-                        */
-                       sel += regex_selectivity_sub(patt + (pos + 1),
-                                                                                pattlen - (pos + 1),
-                                                                                case_insensitive);
-                       break;                          /* rest of pattern is now processed */
-               }
-               else if (patt[pos] == '[')
-               {
-                       bool            negclass = false;
-
-                       if (patt[++pos] == '^')
-                       {
-                               negclass = true;
-                               pos++;
-                       }
-                       if (patt[pos] == ']')   /* ']' at start of class is not special */
-                               pos++;
-                       while (pos < pattlen && patt[pos] != ']')
-                               pos++;
-                       if (paren_depth == 0)
-                               sel *= (negclass ? (1.0 - CHAR_RANGE_SEL) : CHAR_RANGE_SEL);
-               }
-               else if (patt[pos] == '.')
-               {
-                       if (paren_depth == 0)
-                               sel *= ANY_CHAR_SEL;
-               }
-               else if (patt[pos] == '*' ||
-                                patt[pos] == '?' ||
-                                patt[pos] == '+')
-               {
-                       /* Ought to be smarter about quantifiers... */
-                       if (paren_depth == 0)
-                               sel *= PARTIAL_WILDCARD_SEL;
-               }
-               else if (patt[pos] == '{')
-               {
-                       while (pos < pattlen && patt[pos] != '}')
-                               pos++;
-                       if (paren_depth == 0)
-                               sel *= PARTIAL_WILDCARD_SEL;
-               }
-               else if (patt[pos] == '\\')
-               {
-                       /* backslash quotes the next character */
-                       pos++;
-                       if (pos >= pattlen)
-                               break;
-                       if (paren_depth == 0)
-                               sel *= FIXED_CHAR_SEL;
-               }
-               else
-               {
-                       if (paren_depth == 0)
-                               sel *= FIXED_CHAR_SEL;
-               }
-       }
-       /* Could get sel > 1 if multiple wildcards */
-       if (sel > 1.0)
-               sel = 1.0;
-       return sel;
-}
-
-static Selectivity
-regex_selectivity(const char *patt, int pattlen, bool case_insensitive,
-                                 int fixed_prefix_len)
-{
-       Selectivity sel;
-
-       /* If patt doesn't end with $, consider it to have a trailing wildcard */
-       if (pattlen > 0 && patt[pattlen - 1] == '$' &&
-               (pattlen == 1 || patt[pattlen - 2] != '\\'))
-       {
-               /* has trailing $ */
-               sel = regex_selectivity_sub(patt, pattlen - 1, case_insensitive);
-       }
-       else
-       {
-               /* no trailing $ */
-               sel = regex_selectivity_sub(patt, pattlen, case_insensitive);
-               sel *= FULL_WILDCARD_SEL;
-       }
-
-       /* If there's a fixed prefix, discount its selectivity */
-       if (fixed_prefix_len > 0)
-               sel /= pow(FIXED_CHAR_SEL, fixed_prefix_len);
-
-       /* Make sure result stays in range */
-       CLAMP_PROBABILITY(sel);
-       return sel;
-}
-
-
-/*
- * For bytea, the increment function need only increment the current byte
- * (there are no multibyte characters to worry about).
- */
-static bool
-byte_increment(unsigned char *ptr, int len)
-{
-       if (*ptr >= 255)
-               return false;
-       (*ptr)++;
-       return true;
-}
-
-/*
- * Try to generate a string greater than the given string or any
- * string it is a prefix of.  If successful, return a palloc'd string
- * in the form of a Const node; else return NULL.
- *
- * The caller must provide the appropriate "less than" comparison function
- * for testing the strings, along with the collation to use.
- *
- * The key requirement here is that given a prefix string, say "foo",
- * we must be able to generate another string "fop" that is greater than
- * all strings "foobar" starting with "foo".  We can test that we have
- * generated a string greater than the prefix string, but in non-C collations
- * that is not a bulletproof guarantee that an extension of the string might
- * not sort after it; an example is that "foo " is less than "foo!", but it
- * is not clear that a "dictionary" sort ordering will consider "foo!" less
- * than "foo bar".  CAUTION: Therefore, this function should be used only for
- * estimation purposes when working in a non-C collation.
- *
- * To try to catch most cases where an extended string might otherwise sort
- * before the result value, we determine which of the strings "Z", "z", "y",
- * and "9" is seen as largest by the collation, and append that to the given
- * prefix before trying to find a string that compares as larger.
- *
- * To search for a greater string, we repeatedly "increment" the rightmost
- * character, using an encoding-specific character incrementer function.
- * When it's no longer possible to increment the last character, we truncate
- * off that character and start incrementing the next-to-rightmost.
- * For example, if "z" were the last character in the sort order, then we
- * could produce "foo" as a string greater than "fonz".
- *
- * This could be rather slow in the worst case, but in most cases we
- * won't have to try more than one or two strings before succeeding.
- *
- * Note that it's important for the character incrementer not to be too anal
- * about producing every possible character code, since in some cases the only
- * way to get a larger string is to increment a previous character position.
- * So we don't want to spend too much time trying every possible character
- * code at the last position.  A good rule of thumb is to be sure that we
- * don't try more than 256*K values for a K-byte character (and definitely
- * not 256^K, which is what an exhaustive search would approach).
- */
-Const *
-make_greater_string(const Const *str_const, FmgrInfo *ltproc, Oid collation)
-{
-       Oid                     datatype = str_const->consttype;
-       char       *workstr;
-       int                     len;
-       Datum           cmpstr;
-       char       *cmptxt = NULL;
-       mbcharacter_incrementer charinc;
-
-       /*
-        * Get a modifiable copy of the prefix string in C-string format, and set
-        * up the string we will compare to as a Datum.  In C locale this can just
-        * be the given prefix string, otherwise we need to add a suffix.  Type
-        * BYTEA sorts bytewise so it never needs a suffix either.
-        */
-       if (datatype == BYTEAOID)
-       {
-               bytea      *bstr = DatumGetByteaPP(str_const->constvalue);
-
-               len = VARSIZE_ANY_EXHDR(bstr);
-               workstr = (char *) palloc(len);
-               memcpy(workstr, VARDATA_ANY(bstr), len);
-               Assert((Pointer) bstr == DatumGetPointer(str_const->constvalue));
-               cmpstr = str_const->constvalue;
-       }
-       else
-       {
-               if (datatype == NAMEOID)
-                       workstr = DatumGetCString(DirectFunctionCall1(nameout,
-                                                                                                                 str_const->constvalue));
-               else
-                       workstr = TextDatumGetCString(str_const->constvalue);
-               len = strlen(workstr);
-               if (lc_collate_is_c(collation) || len == 0)
-                       cmpstr = str_const->constvalue;
-               else
-               {
-                       /* If first time through, determine the suffix to use */
-                       static char suffixchar = 0;
-                       static Oid      suffixcollation = 0;
-
-                       if (!suffixchar || suffixcollation != collation)
-                       {
-                               char       *best;
-
-                               best = "Z";
-                               if (varstr_cmp(best, 1, "z", 1, collation) < 0)
-                                       best = "z";
-                               if (varstr_cmp(best, 1, "y", 1, collation) < 0)
-                                       best = "y";
-                               if (varstr_cmp(best, 1, "9", 1, collation) < 0)
-                                       best = "9";
-                               suffixchar = *best;
-                               suffixcollation = collation;
-                       }
-
-                       /* And build the string to compare to */
-                       if (datatype == NAMEOID)
-                       {
-                               cmptxt = palloc(len + 2);
-                               memcpy(cmptxt, workstr, len);
-                               cmptxt[len] = suffixchar;
-                               cmptxt[len + 1] = '\0';
-                               cmpstr = PointerGetDatum(cmptxt);
-                       }
-                       else
-                       {
-                               cmptxt = palloc(VARHDRSZ + len + 1);
-                               SET_VARSIZE(cmptxt, VARHDRSZ + len + 1);
-                               memcpy(VARDATA(cmptxt), workstr, len);
-                               *(VARDATA(cmptxt) + len) = suffixchar;
-                               cmpstr = PointerGetDatum(cmptxt);
-                       }
-               }
-       }
-
-       /* Select appropriate character-incrementer function */
-       if (datatype == BYTEAOID)
-               charinc = byte_increment;
-       else
-               charinc = pg_database_encoding_character_incrementer();
-
-       /* And search ... */
-       while (len > 0)
-       {
-               int                     charlen;
-               unsigned char *lastchar;
-
-               /* Identify the last character --- for bytea, just the last byte */
-               if (datatype == BYTEAOID)
-                       charlen = 1;
-               else
-                       charlen = len - pg_mbcliplen(workstr, len, len - 1);
-               lastchar = (unsigned char *) (workstr + len - charlen);
-
-               /*
-                * Try to generate a larger string by incrementing the last character
-                * (for BYTEA, we treat each byte as a character).
-                *
-                * Note: the incrementer function is expected to return true if it's
-                * generated a valid-per-the-encoding new character, otherwise false.
-                * The contents of the character on false return are unspecified.
-                */
-               while (charinc(lastchar, charlen))
-               {
-                       Const      *workstr_const;
-
-                       if (datatype == BYTEAOID)
-                               workstr_const = string_to_bytea_const(workstr, len);
-                       else
-                               workstr_const = string_to_const(workstr, datatype);
-
-                       if (DatumGetBool(FunctionCall2Coll(ltproc,
-                                                                                          collation,
-                                                                                          cmpstr,
-                                                                                          workstr_const->constvalue)))
-                       {
-                               /* Successfully made a string larger than cmpstr */
-                               if (cmptxt)
-                                       pfree(cmptxt);
-                               pfree(workstr);
-                               return workstr_const;
-                       }
-
-                       /* No good, release unusable value and try again */
-                       pfree(DatumGetPointer(workstr_const->constvalue));
-                       pfree(workstr_const);
-               }
-
-               /*
-                * No luck here, so truncate off the last character and try to
-                * increment the next one.
-                */
-               len -= charlen;
-               workstr[len] = '\0';
-       }
-
-       /* Failed... */
-       if (cmptxt)
-               pfree(cmptxt);
-       pfree(workstr);
-
-       return NULL;
-}
-
-/*
- * Generate a Datum of the appropriate type from a C string.
- * Note that all of the supported types are pass-by-ref, so the
- * returned value should be pfree'd if no longer needed.
- */
-static Datum
-string_to_datum(const char *str, Oid datatype)
-{
-       Assert(str != NULL);
-
-       /*
-        * We cheat a little by assuming that CStringGetTextDatum() will do for
-        * bpchar and varchar constants too...
-        */
-       if (datatype == NAMEOID)
-               return DirectFunctionCall1(namein, CStringGetDatum(str));
-       else if (datatype == BYTEAOID)
-               return DirectFunctionCall1(byteain, CStringGetDatum(str));
-       else
-               return CStringGetTextDatum(str);
-}
-
-/*
- * Generate a Const node of the appropriate type from a C string.
- */
-static Const *
-string_to_const(const char *str, Oid datatype)
-{
-       Datum           conval = string_to_datum(str, datatype);
-       Oid                     collation;
-       int                     constlen;
-
-       /*
-        * We only need to support a few datatypes here, so hard-wire properties
-        * instead of incurring the expense of catalog lookups.
-        */
-       switch (datatype)
-       {
-               case TEXTOID:
-               case VARCHAROID:
-               case BPCHAROID:
-                       collation = DEFAULT_COLLATION_OID;
-                       constlen = -1;
-                       break;
-
-               case NAMEOID:
-                       collation = C_COLLATION_OID;
-                       constlen = NAMEDATALEN;
-                       break;
-
-               case BYTEAOID:
-                       collation = InvalidOid;
-                       constlen = -1;
-                       break;
-
-               default:
-                       elog(ERROR, "unexpected datatype in string_to_const: %u",
-                                datatype);
-                       return NULL;
-       }
-
-       return makeConst(datatype, -1, collation, constlen,
-                                        conval, false, false);
-}
-
-/*
- * Generate a Const node of bytea type from a binary C string and a length.
- */
-static Const *
-string_to_bytea_const(const char *str, size_t str_len)
-{
-       bytea      *bstr = palloc(VARHDRSZ + str_len);
-       Datum           conval;
-
-       memcpy(VARDATA(bstr), str, str_len);
-       SET_VARSIZE(bstr, VARHDRSZ + str_len);
-       conval = PointerGetDatum(bstr);
-
-       return makeConst(BYTEAOID, -1, InvalidOid, -1, conval, false, false);
-}
-
 /*-------------------------------------------------------------------------
  *
  * Index cost estimation functions
index 087b56f917e7ae32f1e56711cdba5b2e1e5c7a36..8829889f9f52d1578ea2b47ea7498c0be9f4306d 100644 (file)
@@ -15,7 +15,6 @@
 #ifndef SELFUNCS_H
 #define SELFUNCS_H
 
-#include "fmgr.h"
 #include "access/htup.h"
 #include "nodes/pathnodes.h"
 
@@ -85,20 +84,6 @@ typedef struct VariableStatData
        } while(0)
 
 
-typedef enum
-{
-       Pattern_Type_Like,
-       Pattern_Type_Like_IC,
-       Pattern_Type_Regex,
-       Pattern_Type_Regex_IC,
-       Pattern_Type_Prefix
-} Pattern_Type;
-
-typedef enum
-{
-       Pattern_Prefix_None, Pattern_Prefix_Partial, Pattern_Prefix_Exact
-} Pattern_Prefix_Status;
-
 /*
  * deconstruct_indexquals is a simple function to examine the indexquals
  * attached to a proposed IndexPath.  It returns a list of IndexQualInfo
@@ -175,14 +160,16 @@ extern double histogram_selectivity(VariableStatData *vardata, FmgrInfo *opproc,
                                          Datum constval, bool varonleft,
                                          int min_hist_size, int n_skip,
                                          int *hist_size);
-
-extern Pattern_Prefix_Status pattern_fixed_prefix(Const *patt,
-                                        Pattern_Type ptype,
-                                        Oid collation,
-                                        Const **prefix,
-                                        Selectivity *rest_selec);
-extern Const *make_greater_string(const Const *str_const, FmgrInfo *ltproc,
-                                       Oid collation);
+extern double ineq_histogram_selectivity(PlannerInfo *root,
+                                                  VariableStatData *vardata,
+                                                  FmgrInfo *opproc, bool isgt, bool iseq,
+                                                  Datum constval, Oid consttype);
+extern double var_eq_const(VariableStatData *vardata, Oid oproid,
+                        Datum constval, bool constisnull,
+                        bool varonleft, bool negate);
+extern double var_eq_non_const(VariableStatData *vardata, Oid oproid,
+                                Node *other,
+                                bool varonleft, bool negate);
 
 extern Selectivity boolvarsel(PlannerInfo *root, Node *arg, int varRelid);
 extern Selectivity booltestsel(PlannerInfo *root, BoolTestType booltesttype,