]> granicus.if.org Git - postgresql/commitdiff
Add new selectivity estimation functions for pattern-matching operators
authorTom Lane <tgl@sss.pgh.pa.us>
Sun, 16 Apr 2000 04:41:03 +0000 (04:41 +0000)
committerTom Lane <tgl@sss.pgh.pa.us>
Sun, 16 Apr 2000 04:41:03 +0000 (04:41 +0000)
(LIKE and regexp matches).  These are not yet referenced in pg_operator,
so by default the system will continue to use eqsel/neqsel.
Also, tweak convert_to_scalar() logic so that common prefixes of strings
are stripped off, allowing better accuracy when all strings in a table
share a common prefix.

doc/src/sgml/xoper.sgml
src/backend/optimizer/path/indxpath.c
src/backend/utils/adt/selfuncs.c
src/include/catalog/pg_proc.h
src/include/utils/builtins.h

index a0513a7051d54a702aac12adde0f271aec288716..2c938148fcde007a68c6c3f101e2dc04c5ff44e7 100644 (file)
@@ -1,5 +1,5 @@
 <!--
-$Header: /cvsroot/pgsql/doc/src/sgml/xoper.sgml,v 1.9 2000/03/31 03:27:41 thomas Exp $
+$Header: /cvsroot/pgsql/doc/src/sgml/xoper.sgml,v 1.10 2000/04/16 04:41:01 tgl Exp $
 -->
 
  <Chapter Id="xoper">
@@ -254,9 +254,9 @@ SELECT (a + b) AS c FROM test_complex;
    <para>
     You can frequently get away with using either eqsel or neqsel for
     operators that have very high or very low selectivity, even if they
-    aren't really equality or inequality.  For example, the regular expression
-    matching operators (~, ~*, etc) use eqsel on the assumption that they'll
-    usually only match a small fraction of the entries in a table.
+    aren't really equality or inequality.  For example, the
+    approximate-equality geometric operators use eqsel on the assumption that
+    they'll usually only match a small fraction of the entries in a table.
    </para>
 
    <para>
index 98c5112f7c3807882733785388b7fdf30dba7a47..06f9cd0247f0b40e9ae079d168de5dc3359dcc76 100644 (file)
@@ -9,22 +9,20 @@
  *
  *
  * IDENTIFICATION
- *       $Header: /cvsroot/pgsql/src/backend/optimizer/path/indxpath.c,v 1.82 2000/04/12 17:15:19 momjian Exp $
+ *       $Header: /cvsroot/pgsql/src/backend/optimizer/path/indxpath.c,v 1.83 2000/04/16 04:41:01 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
-#include <ctype.h>
-#include <math.h>
-
 #include "postgres.h"
 
+#include <math.h>
+
 #include "access/heapam.h"
 #include "access/nbtree.h"
 #include "catalog/catname.h"
 #include "catalog/pg_amop.h"
 #include "catalog/pg_operator.h"
 #include "executor/executor.h"
-#include "mb/pg_wchar.h"
 #include "nodes/makefuncs.h"
 #include "nodes/nodeFuncs.h"
 #include "optimizer/clauses.h"
 #define is_indexable_operator(clause,opclass,relam,indexkey_on_left) \
        (indexable_operator(clause,opclass,relam,indexkey_on_left) != InvalidOid)
 
-typedef enum
-{
-       Prefix_None, Prefix_Partial, Prefix_Exact
-} Prefix_Status;
-
 static void match_index_orclauses(RelOptInfo *rel, IndexOptInfo *index,
                                          List *restrictinfo_list);
 static List *match_index_orclause(RelOptInfo *rel, IndexOptInfo *index,
@@ -92,17 +85,11 @@ static bool function_index_operand(Expr *funcOpnd, RelOptInfo *rel,
                                           IndexOptInfo *index);
 static bool match_special_index_operator(Expr *clause, Oid opclass, Oid relam,
                                                         bool indexkey_on_left);
-static Prefix_Status like_fixed_prefix(char *patt, char **prefix);
-static Prefix_Status regex_fixed_prefix(char *patt, bool case_insensitive,
-                                  char **prefix);
 static List *prefix_quals(Var *leftop, Oid expr_op,
-                        char *prefix, Prefix_Status pstatus);
-static char *make_greater_string(const char *str, Oid datatype);
+                        char *prefix, Pattern_Prefix_Status pstatus);
 static Oid     find_operator(const char *opname, Oid datatype);
 static Datum string_to_datum(const char *str, Oid datatype);
 static Const *string_to_const(const char *str, Oid datatype);
-static bool string_lessthan(const char *str1, const char *str2,
-                               Oid datatype);
 
 
 /*
@@ -1644,6 +1631,7 @@ match_special_index_operator(Expr *clause, Oid opclass, Oid relam,
        Datum           constvalue;
        char       *patt;
        char       *prefix;
+       char       *rest;
 
        /*
         * Currently, all known special operators require the indexkey on the
@@ -1672,7 +1660,8 @@ match_special_index_operator(Expr *clause, Oid opclass, Oid relam,
                case OID_NAME_LIKE_OP:
                        /* the right-hand const is type text for all of these */
                        patt = textout((text *) DatumGetPointer(constvalue));
-                       isIndexable = like_fixed_prefix(patt, &prefix) != Prefix_None;
+                       isIndexable = pattern_fixed_prefix(patt, Pattern_Type_Like,
+                                                                                          &prefix, &rest) != Pattern_Prefix_None;
                        if (prefix)
                                pfree(prefix);
                        pfree(patt);
@@ -1684,7 +1673,8 @@ match_special_index_operator(Expr *clause, Oid opclass, Oid relam,
                case OID_NAME_REGEXEQ_OP:
                        /* the right-hand const is type text for all of these */
                        patt = textout((text *) DatumGetPointer(constvalue));
-                       isIndexable = regex_fixed_prefix(patt, false, &prefix) != Prefix_None;
+                       isIndexable = pattern_fixed_prefix(patt, Pattern_Type_Regex,
+                                                                                          &prefix, &rest) != Pattern_Prefix_None;
                        if (prefix)
                                pfree(prefix);
                        pfree(patt);
@@ -1696,7 +1686,8 @@ match_special_index_operator(Expr *clause, Oid opclass, Oid relam,
                case OID_NAME_ICREGEXEQ_OP:
                        /* the right-hand const is type text for all of these */
                        patt = textout((text *) DatumGetPointer(constvalue));
-                       isIndexable = regex_fixed_prefix(patt, true, &prefix) != Prefix_None;
+                       isIndexable = pattern_fixed_prefix(patt, Pattern_Type_Regex_IC,
+                                                                                          &prefix, &rest) != Pattern_Prefix_None;
                        if (prefix)
                                pfree(prefix);
                        pfree(patt);
@@ -1776,7 +1767,8 @@ expand_indexqual_conditions(List *indexquals)
                Datum           constvalue;
                char       *patt;
                char       *prefix;
-               Prefix_Status pstatus;
+               char       *rest;
+               Pattern_Prefix_Status pstatus;
 
                switch (expr_op)
                {
@@ -1794,7 +1786,8 @@ expand_indexqual_conditions(List *indexquals)
                                /* the right-hand const is type text for all of these */
                                constvalue = ((Const *) rightop)->constvalue;
                                patt = textout((text *) DatumGetPointer(constvalue));
-                               pstatus = like_fixed_prefix(patt, &prefix);
+                               pstatus = pattern_fixed_prefix(patt, Pattern_Type_Like,
+                                                                                          &prefix, &rest);
                                resultquals = nconc(resultquals,
                                                                        prefix_quals(leftop, expr_op,
                                                                                                 prefix, pstatus));
@@ -1810,7 +1803,8 @@ expand_indexqual_conditions(List *indexquals)
                                /* the right-hand const is type text for all of these */
                                constvalue = ((Const *) rightop)->constvalue;
                                patt = textout((text *) DatumGetPointer(constvalue));
-                               pstatus = regex_fixed_prefix(patt, false, &prefix);
+                               pstatus = pattern_fixed_prefix(patt, Pattern_Type_Regex,
+                                                                                          &prefix, &rest);
                                resultquals = nconc(resultquals,
                                                                        prefix_quals(leftop, expr_op,
                                                                                                 prefix, pstatus));
@@ -1826,7 +1820,8 @@ expand_indexqual_conditions(List *indexquals)
                                /* the right-hand const is type text for all of these */
                                constvalue = ((Const *) rightop)->constvalue;
                                patt = textout((text *) DatumGetPointer(constvalue));
-                               pstatus = regex_fixed_prefix(patt, true, &prefix);
+                               pstatus = pattern_fixed_prefix(patt, Pattern_Type_Regex_IC,
+                                                                                          &prefix, &rest);
                                resultquals = nconc(resultquals,
                                                                        prefix_quals(leftop, expr_op,
                                                                                                 prefix, pstatus));
@@ -1844,130 +1839,6 @@ expand_indexqual_conditions(List *indexquals)
        return resultquals;
 }
 
-/*
- * Extract the fixed prefix, if any, for a LIKE pattern.
- * *prefix is set to a palloc'd prefix string,
- * or to NULL if no fixed prefix exists for the pattern.
- * The return value distinguishes no fixed prefix, a partial prefix,
- * or an exact-match-only pattern.
- */
-static Prefix_Status
-like_fixed_prefix(char *patt, char **prefix)
-{
-       char       *match;
-       int                     pos,
-                               match_pos;
-
-       *prefix = match = palloc(strlen(patt) + 1);
-       match_pos = 0;
-
-       for (pos = 0; patt[pos]; pos++)
-       {
-               /* % and _ are wildcard characters in LIKE */
-               if (patt[pos] == '%' ||
-                       patt[pos] == '_')
-                       break;
-               /* Backslash quotes the next character */
-               if (patt[pos] == '\\')
-               {
-                       pos++;
-                       if (patt[pos] == '\0')
-                               break;
-               }
-
-               /*
-                * NOTE: this code used to think that %% meant a literal %, but
-                * textlike() itself does not think that, and the SQL92 spec
-                * doesn't say any such thing either.
-                */
-               match[match_pos++] = patt[pos];
-       }
-
-       match[match_pos] = '\0';
-
-       /* in LIKE, an empty pattern is an exact match! */
-       if (patt[pos] == '\0')
-               return Prefix_Exact;    /* reached end of pattern, so exact */
-
-       if (match_pos > 0)
-               return Prefix_Partial;
-       return Prefix_None;
-}
-
-/*
- * Extract the fixed prefix, if any, for a regex pattern.
- * *prefix is set to a palloc'd prefix string,
- * or to NULL if no fixed prefix exists for the pattern.
- * The return value distinguishes no fixed prefix, a partial prefix,
- * or an exact-match-only pattern.
- */
-static Prefix_Status
-regex_fixed_prefix(char *patt, bool case_insensitive,
-                                  char **prefix)
-{
-       char       *match;
-       int                     pos,
-                               match_pos;
-
-       *prefix = NULL;
-
-       /* Pattern must be anchored left */
-       if (patt[0] != '^')
-               return Prefix_None;
-
-       /* Cannot optimize if unquoted | { } is present in pattern */
-       for (pos = 1; patt[pos]; pos++)
-       {
-               if (patt[pos] == '|' ||
-                       patt[pos] == '{' ||
-                       patt[pos] == '}')
-                       return Prefix_None;
-               if (patt[pos] == '\\')
-               {
-                       pos++;
-                       if (patt[pos] == '\0')
-                               break;
-               }
-       }
-
-       /* OK, allocate space for pattern */
-       *prefix = match = palloc(strlen(patt) + 1);
-       match_pos = 0;
-
-       /* note start at pos 1 to skip leading ^ */
-       for (pos = 1; patt[pos]; pos++)
-       {
-               if (patt[pos] == '.' ||
-                       patt[pos] == '?' ||
-                       patt[pos] == '*' ||
-                       patt[pos] == '[' ||
-                       patt[pos] == '$' ||
-
-               /*
-                * XXX I suspect isalpha() is not an adequately locale-sensitive
-                * test for characters that can vary under case folding?
-                */
-                       (case_insensitive && isalpha(patt[pos])))
-                       break;
-               if (patt[pos] == '\\')
-               {
-                       pos++;
-                       if (patt[pos] == '\0')
-                               break;
-               }
-               match[match_pos++] = patt[pos];
-       }
-
-       match[match_pos] = '\0';
-
-       if (patt[pos] == '$' && patt[pos + 1] == '\0')
-               return Prefix_Exact;    /* pattern specifies exact match */
-
-       if (match_pos > 0)
-               return Prefix_Partial;
-       return Prefix_None;
-}
-
 /*
  * Given a fixed prefix that all the "leftop" values must have,
  * generate suitable indexqual condition(s).  expr_op is the original
@@ -1976,7 +1847,7 @@ regex_fixed_prefix(char *patt, bool case_insensitive,
  */
 static List *
 prefix_quals(Var *leftop, Oid expr_op,
-                        char *prefix, Prefix_Status pstatus)
+                        char *prefix, Pattern_Prefix_Status pstatus)
 {
        List       *result;
        Oid                     datatype;
@@ -1986,7 +1857,7 @@ prefix_quals(Var *leftop, Oid expr_op,
        Expr       *expr;
        char       *greaterstr;
 
-       Assert(pstatus != Prefix_None);
+       Assert(pstatus != Pattern_Prefix_None);
 
        switch (expr_op)
        {
@@ -2022,7 +1893,7 @@ prefix_quals(Var *leftop, Oid expr_op,
        /*
         * If we found an exact-match pattern, generate an "=" indexqual.
         */
-       if (pstatus == Prefix_Exact)
+       if (pstatus == Pattern_Prefix_Exact)
        {
                oproid = find_operator("=", datatype);
                if (oproid == InvalidOid)
@@ -2067,68 +1938,6 @@ prefix_quals(Var *leftop, Oid expr_op,
        return result;
 }
 
-/*
- * Try to generate a string greater than the given string or any string it is
- * a prefix of.  If successful, return a palloc'd string; else return NULL.
- *
- * To work correctly in non-ASCII locales with weird collation orders,
- * we cannot simply increment "foo" to "fop" --- we have to check whether
- * we actually produced a string greater than the given one.  If not,
- * increment the righthand byte again and repeat.  If we max out the righthand
- * byte, truncate off the last character and start incrementing the next.
- * For example, if "z" were the last character in the sort order, then we
- * could produce "foo" as a string greater than "fonz".
- *
- * This could be rather slow in the worst case, but in most cases we won't
- * have to try more than one or two strings before succeeding.
- *
- * XXX in a sufficiently weird locale, this might produce incorrect results?
- * For example, in German I believe "ss" is treated specially --- if we are
- * given "foos" and return "foot", will this actually be greater than "fooss"?
- */
-static char *
-make_greater_string(const char *str, Oid datatype)
-{
-       char       *workstr;
-       int                     len;
-
-       /*
-        * Make a modifiable copy, which will be our return value if
-        * successful
-        */
-       workstr = pstrdup((char *) str);
-
-       while ((len = strlen(workstr)) > 0)
-       {
-               unsigned char *lastchar = (unsigned char *) (workstr + len - 1);
-
-               /*
-                * Try to generate a larger string by incrementing the last byte.
-                */
-               while (*lastchar < (unsigned char) 255)
-               {
-                       (*lastchar)++;
-                       if (string_lessthan(str, workstr, datatype))
-                               return workstr; /* Success! */
-               }
-
-               /*
-                * Truncate off the last character, which might be more than 1
-                * byte in MULTIBYTE case.
-                */
-#ifdef MULTIBYTE
-               len = pg_mbcliplen((const unsigned char *) workstr, len, len - 1);
-               workstr[len] = '\0';
-#else
-               *lastchar = '\0';
-#endif
-       }
-
-       /* Failed... */
-       pfree(workstr);
-       return NULL;
-}
-
 /*
  * Handy subroutines for match_special_index_operator() and friends.
  */
@@ -2179,45 +1988,3 @@ string_to_const(const char *str, Oid datatype)
        return makeConst(datatype, ((datatype == NAMEOID) ? NAMEDATALEN : -1),
                                         conval, false, false, false, false);
 }
-
-/*
- * Test whether two strings are "<" according to the rules of the given
- * datatype.  We do this the hard way, ie, actually calling the type's
- * "<" operator function, to ensure we get the right result...
- */
-static bool
-string_lessthan(const char *str1, const char *str2, Oid datatype)
-{
-       Datum           datum1 = string_to_datum(str1, datatype);
-       Datum           datum2 = string_to_datum(str2, datatype);
-       bool            result;
-
-       switch (datatype)
-       {
-               case TEXTOID:
-                       result = text_lt((text *) datum1, (text *) datum2);
-                       break;
-
-               case BPCHAROID:
-                       result = bpcharlt((char *) datum1, (char *) datum2);
-                       break;
-
-               case VARCHAROID:
-                       result = varcharlt((char *) datum1, (char *) datum2);
-                       break;
-
-               case NAMEOID:
-                       result = namelt((NameData *) datum1, (NameData *) datum2);
-                       break;
-
-               default:
-                       elog(ERROR, "string_lessthan: unexpected datatype %u", datatype);
-                       result = false;
-                       break;
-       }
-
-       pfree(DatumGetPointer(datum1));
-       pfree(DatumGetPointer(datum2));
-
-       return result;
-}
index fc6852c14902ebc5b94b100d35a81b4377aebe11..a1405602ee4189f5976749b821c17e562795ec61 100644 (file)
  *
  *
  * IDENTIFICATION
- *       $Header: /cvsroot/pgsql/src/backend/utils/adt/selfuncs.c,v 1.64 2000/04/12 17:15:51 momjian Exp $
+ *       $Header: /cvsroot/pgsql/src/backend/utils/adt/selfuncs.c,v 1.65 2000/04/16 04:41:02 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
 
 #include "postgres.h"
 
+#include <ctype.h>
 #include <math.h>
 
 #include "access/heapam.h"
@@ -30,6 +31,7 @@
 #include "catalog/pg_proc.h"
 #include "catalog/pg_statistic.h"
 #include "catalog/pg_type.h"
+#include "mb/pg_wchar.h"
 #include "optimizer/cost.h"
 #include "parser/parse_func.h"
 #include "parser/parse_oper.h"
 /* default selectivity estimate for inequalities such as "A < b" */
 #define DEFAULT_INEQ_SEL  (1.0 / 3.0)
 
-static bool convert_string_to_scalar(char *str, int strlength,
-                                                double *scaleval);
+/* default selectivity estimate for pattern-match operators such as LIKE */
+#define DEFAULT_MATCH_SEL      0.01
+
+static bool convert_to_scalar(Datum value, Oid valuetypid, double *scaledvalue,
+                                                         Datum lobound, Datum hibound, Oid boundstypid,
+                                                         double *scaledlobound, double *scaledhibound);
+static double convert_numeric_to_scalar(Datum value, Oid typid);
+static void convert_string_to_scalar(unsigned char *value,
+                                                                        double *scaledvalue,
+                                                                        unsigned char *lobound,
+                                                                        double *scaledlobound,
+                                                                        unsigned char *hibound,
+                                                                        double *scaledhibound);
+static double convert_one_string_to_scalar(unsigned char *value,
+                                                                                  int rangelo, int rangehi);
+static unsigned char * convert_string_datum(Datum value, Oid typid);
+static double convert_timevalue_to_scalar(Datum value, Oid typid);
 static void getattproperties(Oid relid, AttrNumber attnum,
                                 Oid *typid,
                                 int *typlen,
@@ -64,6 +81,15 @@ static bool getattstatistics(Oid relid, AttrNumber attnum,
                                 Datum *commonval,
                                 Datum *loval,
                                 Datum *hival);
+static Selectivity prefix_selectivity(char *prefix,
+                                                                         Oid relid,
+                                                                         AttrNumber attno,
+                                                                         Oid datatype);
+static Selectivity pattern_selectivity(char *patt, Pattern_Type ptype);
+static bool string_lessthan(const char *str1, const char *str2,
+                               Oid datatype);
+static Oid     find_operator(const char *opname, Oid datatype);
+static Datum string_to_datum(const char *str, Oid datatype);
 
 
 /*
@@ -71,9 +97,10 @@ static bool getattstatistics(Oid relid, AttrNumber attnum,
  *
  * Note: this routine is also used to estimate selectivity for some
  * operators that are not "=" but have comparable selectivity behavior,
- * such as "~~" (text LIKE).  Even for "=" we must keep in mind that
- * the left and right datatypes may differ, so the type of the given
- * constant "value" may be different from the type of the attribute.
+ * such as "~=" (geometric approximate-match).  Even for "=", we must
+ * keep in mind that the left and right datatypes may differ, so the type
+ * of the given constant "value" may be different from the type of the
+ * attribute.
  */
 float64
 eqsel(Oid opid,
@@ -255,7 +282,8 @@ scalarltsel(Oid opid,
        {
                HeapTuple       oprtuple;
                Oid                     ltype,
-                                       rtype;
+                                       rtype,
+                                       contype;
                Oid                     typid;
                int                     typlen;
                bool            typbyval;
@@ -277,23 +305,7 @@ scalarltsel(Oid opid,
                        elog(ERROR, "scalarltsel: no tuple for operator %u", opid);
                ltype = ((Form_pg_operator) GETSTRUCT(oprtuple))->oprleft;
                rtype = ((Form_pg_operator) GETSTRUCT(oprtuple))->oprright;
-
-               /* Convert the constant to a uniform comparison scale. */
-               if (!convert_to_scalar(value,
-                                                          ((flag & SEL_RIGHT) ? rtype : ltype),
-                                                          &val))
-               {
-
-                       /*
-                        * Ideally we'd produce an error here, on the grounds that the
-                        * given operator shouldn't have scalarltsel registered as its
-                        * selectivity func unless we can deal with its operand types.
-                        * But currently, all manner of stuff is invoking scalarltsel,
-                        * so give a default estimate until that can be fixed.
-                        */
-                       *result = DEFAULT_INEQ_SEL;
-                       return result;
-               }
+               contype = (flag & SEL_RIGHT) ? rtype : ltype;
 
                /* Now get info and stats about the attribute */
                getattproperties(relid, attno,
@@ -308,17 +320,24 @@ scalarltsel(Oid opid,
                        return result;
                }
 
-               /* Convert the attribute's loval/hival to common scale. */
-               if (!convert_to_scalar(loval, typid, &low) ||
-                       !convert_to_scalar(hival, typid, &high))
+               /* Convert the values to a uniform comparison scale. */
+               if (!convert_to_scalar(value, contype, &val,
+                                                          loval, hival, typid,
+                                                          &low, &high))
                {
-                       /* See above comments... */
+
+                       /*
+                        * Ideally we'd produce an error here, on the grounds that the
+                        * given operator shouldn't have scalarltsel registered as its
+                        * selectivity func unless we can deal with its operand types.
+                        * But currently, all manner of stuff is invoking scalarltsel,
+                        * so give a default estimate until that can be fixed.
+                        */
                        if (!typbyval)
                        {
                                pfree(DatumGetPointer(hival));
                                pfree(DatumGetPointer(loval));
                        }
-
                        *result = DEFAULT_INEQ_SEL;
                        return result;
                }
@@ -391,6 +410,183 @@ scalargtsel(Oid opid,
        return result;
 }
 
+/*
+ * patternsel                  - Generic code for pattern-match selectivity.
+ */
+static float64
+patternsel(Oid opid,
+                  Pattern_Type ptype,
+                  Oid relid,
+                  AttrNumber attno,
+                  Datum value,
+                  int32 flag)
+{
+       float64         result;
+
+       result = (float64) palloc(sizeof(float64data));
+       /* Must have a constant for the pattern, or cannot learn anything */
+       if ((flag & (SEL_CONSTANT | SEL_RIGHT)) != (SEL_CONSTANT | SEL_RIGHT))
+               *result = DEFAULT_MATCH_SEL;
+       else
+       {
+               HeapTuple       oprtuple;
+               Oid                     ltype,
+                                       rtype;
+               char       *patt;
+               Pattern_Prefix_Status pstatus;
+               char       *prefix;
+               char       *rest;
+
+               /*
+                * Get left and right datatypes of the operator so we know what
+                * type the attribute is.
+                */
+               oprtuple = get_operator_tuple(opid);
+               if (!HeapTupleIsValid(oprtuple))
+                       elog(ERROR, "patternsel: no tuple for operator %u", opid);
+               ltype = ((Form_pg_operator) GETSTRUCT(oprtuple))->oprleft;
+               rtype = ((Form_pg_operator) GETSTRUCT(oprtuple))->oprright;
+
+               /* the right-hand const is type text for all supported operators */
+               Assert(rtype == TEXTOID);
+               patt = textout((text *) DatumGetPointer(value));
+
+               /* divide pattern into fixed prefix and remainder */
+               pstatus = pattern_fixed_prefix(patt, ptype, &prefix, &rest);
+
+               if (pstatus == Pattern_Prefix_Exact)
+               {
+                       /* Pattern specifies an exact match, so pretend operator is '=' */
+                       Oid             eqopr = find_operator("=", ltype);
+                       Datum   eqcon;
+
+                       if (eqopr == InvalidOid)
+                               elog(ERROR, "patternsel: no = operator for type %u", ltype);
+                       eqcon = string_to_datum(prefix, ltype);
+                       result = eqsel(eqopr, relid, attno, eqcon, SEL_CONSTANT|SEL_RIGHT);
+                       pfree(DatumGetPointer(eqcon));
+               }
+               else
+               {
+                       /*
+                        * Not exact-match pattern.  We estimate selectivity of the
+                        * fixed prefix and remainder of pattern separately, then
+                        * combine the two.
+                        */
+                       Selectivity prefixsel;
+                       Selectivity restsel;
+                       Selectivity selec;
+
+                       if (pstatus == Pattern_Prefix_Partial)
+                               prefixsel = prefix_selectivity(prefix, relid, attno, ltype);
+                       else
+                               prefixsel = 1.0;
+                       restsel = pattern_selectivity(rest, ptype);
+                       selec = prefixsel * restsel;
+                       /* result should be in range, but make sure... */
+                       if (selec < 0.0)
+                               selec = 0.0;
+                       else if (selec > 1.0)
+                               selec = 1.0;
+                       *result = (float64data) selec;
+               }
+               if (prefix)
+                       pfree(prefix);
+               pfree(patt);
+       }
+       return result;
+}
+
+/*
+ *             regexeqsel              - Selectivity of regular-expression pattern match.
+ */
+float64
+regexeqsel(Oid opid,
+                  Oid relid,
+                  AttrNumber attno,
+                  Datum value,
+                  int32 flag)
+{
+       return patternsel(opid, Pattern_Type_Regex, relid, attno, value, flag);
+}
+
+/*
+ *             icregexeqsel    - Selectivity of case-insensitive regex match.
+ */
+float64
+icregexeqsel(Oid opid,
+                        Oid relid,
+                        AttrNumber attno,
+                        Datum value,
+                        int32 flag)
+{
+       return patternsel(opid, Pattern_Type_Regex_IC, relid, attno, value, flag);
+}
+
+/*
+ *             likesel                 - Selectivity of LIKE pattern match.
+ */
+float64
+likesel(Oid opid,
+               Oid relid,
+               AttrNumber attno,
+               Datum value,
+               int32 flag)
+{
+       return patternsel(opid, Pattern_Type_Like, relid, attno, value, flag);
+}
+
+/*
+ *             regexnesel              - Selectivity of regular-expression pattern non-match.
+ */
+float64
+regexnesel(Oid opid,
+                  Oid relid,
+                  AttrNumber attno,
+                  Datum value,
+                  int32 flag)
+{
+       float64         result;
+
+       result = patternsel(opid, Pattern_Type_Regex, relid, attno, value, flag);
+       *result = 1.0 - *result;
+       return result;
+}
+
+/*
+ *             icregexnesel    - Selectivity of case-insensitive regex non-match.
+ */
+float64
+icregexnesel(Oid opid,
+                        Oid relid,
+                        AttrNumber attno,
+                        Datum value,
+                        int32 flag)
+{
+       float64         result;
+
+       result = patternsel(opid, Pattern_Type_Regex_IC, relid, attno, value, flag);
+       *result = 1.0 - *result;
+       return result;
+}
+
+/*
+ *             nlikesel                - Selectivity of LIKE pattern non-match.
+ */
+float64
+nlikesel(Oid opid,
+                Oid relid,
+                AttrNumber attno,
+                Datum value,
+                int32 flag)
+{
+       float64         result;
+
+       result = patternsel(opid, Pattern_Type_Like, relid, attno, value, flag);
+       *result = 1.0 - *result;
+       return result;
+}
+
 /*
  *             eqjoinsel               - Join selectivity of "="
  */
@@ -491,9 +687,112 @@ scalargtjoinsel(Oid opid,
        return result;
 }
 
+/*
+ *             regexeqjoinsel  - Join selectivity of regular-expression pattern match.
+ */
+float64
+regexeqjoinsel(Oid opid,
+                          Oid relid1,
+                          AttrNumber attno1,
+                          Oid relid2,
+                          AttrNumber attno2)
+{
+       float64         result;
+
+       result = (float64) palloc(sizeof(float64data));
+       *result = DEFAULT_MATCH_SEL;
+       return result;
+}
+
+/*
+ *             icregexeqjoinsel        - Join selectivity of case-insensitive regex match.
+ */
+float64
+icregexeqjoinsel(Oid opid,
+                                Oid relid1,
+                                AttrNumber attno1,
+                                Oid relid2,
+                                AttrNumber attno2)
+{
+       float64         result;
+
+       result = (float64) palloc(sizeof(float64data));
+       *result = DEFAULT_MATCH_SEL;
+       return result;
+}
+
+/*
+ *             likejoinsel                     - Join selectivity of LIKE pattern match.
+ */
+float64
+likejoinsel(Oid opid,
+                       Oid relid1,
+                       AttrNumber attno1,
+                       Oid relid2,
+                       AttrNumber attno2)
+{
+       float64         result;
+
+       result = (float64) palloc(sizeof(float64data));
+       *result = DEFAULT_MATCH_SEL;
+       return result;
+}
+
+/*
+ *             regexnejoinsel  - Join selectivity of regex non-match.
+ */
+float64
+regexnejoinsel(Oid opid,
+                          Oid relid1,
+                          AttrNumber attno1,
+                          Oid relid2,
+                          AttrNumber attno2)
+{
+       float64         result;
+
+       result = regexeqjoinsel(opid, relid1, attno1, relid2, attno2);
+       *result = 1.0 - *result;
+       return result;
+}
+
+/*
+ *             icregexnejoinsel        - Join selectivity of case-insensitive regex non-match.
+ */
+float64
+icregexnejoinsel(Oid opid,
+                                Oid relid1,
+                                AttrNumber attno1,
+                                Oid relid2,
+                                AttrNumber attno2)
+{
+       float64         result;
+
+       result = icregexeqjoinsel(opid, relid1, attno1, relid2, attno2);
+       *result = 1.0 - *result;
+       return result;
+}
+
+/*
+ *             nlikejoinsel            - Join selectivity of LIKE pattern non-match.
+ */
+float64
+nlikejoinsel(Oid opid,
+                        Oid relid1,
+                        AttrNumber attno1,
+                        Oid relid2,
+                        AttrNumber attno2)
+{
+       float64         result;
+
+       result = likejoinsel(opid, relid1, attno1, relid2, attno2);
+       *result = 1.0 - *result;
+       return result;
+}
+
+
 /*
  * convert_to_scalar
- *       Convert a non-NULL value of the indicated type to the comparison
+ *       Convert non-NULL values of the indicated types to the comparison
  *       scale needed by scalarltsel()/scalargtsel().
  *       Returns "true" if successful.
  *
@@ -501,7 +800,8 @@ scalargtjoinsel(Oid opid,
  * "double" values.
  *
  * String datatypes are converted by convert_string_to_scalar(),
- * which is explained below.
+ * which is explained below.  The reason why this routine deals with
+ * three values at a time, not just one, is that we need it for strings.
  *
  * The several datatypes representing absolute times are all converted
  * to Timestamp, which is actually a double, and then we just use that
@@ -511,237 +811,349 @@ scalargtjoinsel(Oid opid,
  * The several datatypes representing relative times (intervals) are all
  * converted to measurements expressed in seconds.
  */
-bool
-convert_to_scalar(Datum value, Oid typid,
-                                 double *scaleval)
+static bool
+convert_to_scalar(Datum value, Oid valuetypid, double *scaledvalue,
+                                 Datum lobound, Datum hibound, Oid boundstypid,
+                                 double *scaledlobound, double *scaledhibound)
 {
-       switch (typid)
+       switch (valuetypid)
        {
 
-                       /*
-                        * Built-in numeric types
-                        */
-                       case BOOLOID:
-                       *scaleval = (double) DatumGetUInt8(value);
-                       return true;
+               /*
+                * Built-in numeric types
+                */
+               case BOOLOID:
                case INT2OID:
-                       *scaleval = (double) DatumGetInt16(value);
-                       return true;
                case INT4OID:
-                       *scaleval = (double) DatumGetInt32(value);
-                       return true;
                case INT8OID:
-                       *scaleval = (double) (*i8tod((int64 *) DatumGetPointer(value)));
-                       return true;
                case FLOAT4OID:
-                       *scaleval = (double) (*DatumGetFloat32(value));
-                       return true;
                case FLOAT8OID:
-                       *scaleval = (double) (*DatumGetFloat64(value));
-                       return true;
                case NUMERICOID:
-                       *scaleval = (double) (*numeric_float8((Numeric) DatumGetPointer(value)));
-                       return true;
                case OIDOID:
                case REGPROCOID:
-                       /* we can treat OIDs as integers... */
-                       *scaleval = (double) DatumGetObjectId(value);
+                       *scaledvalue = convert_numeric_to_scalar(value, valuetypid);
+                       *scaledlobound = convert_numeric_to_scalar(lobound, boundstypid);
+                       *scaledhibound = convert_numeric_to_scalar(hibound, boundstypid);
                        return true;
 
-                       /*
-                        * Built-in string types
-                        */
+               /*
+                * Built-in string types
+                */
                case CHAROID:
-                       {
-                               char            ch = DatumGetChar(value);
-
-                               return convert_string_to_scalar(&ch, 1, scaleval);
-                       }
                case BPCHAROID:
                case VARCHAROID:
                case TEXTOID:
-                       {
-                               char       *str = (char *) VARDATA(DatumGetPointer(value));
-                               int                     strlength = VARSIZE(DatumGetPointer(value)) - VARHDRSZ;
-
-                               return convert_string_to_scalar(str, strlength, scaleval);
-                       }
                case NAMEOID:
-                       {
-                               NameData   *nm = (NameData *) DatumGetPointer(value);
-
-                               return convert_string_to_scalar(NameStr(*nm), strlen(NameStr(*nm)),
-                                                                                               scaleval);
-                       }
+               {
+                       unsigned char *valstr = convert_string_datum(value, valuetypid);
+                       unsigned char *lostr = convert_string_datum(lobound, boundstypid);
+                       unsigned char *histr = convert_string_datum(hibound, boundstypid);
+
+                       convert_string_to_scalar(valstr, scaledvalue,
+                                                                        lostr, scaledlobound,
+                                                                        histr, scaledhibound);
+                       pfree(valstr);
+                       pfree(lostr);
+                       pfree(histr);
+                       return true;
+               }
 
-                       /*
-                        * Built-in absolute-time types
-                        */
+               /*
+                * Built-in time types
+                */
                case TIMESTAMPOID:
-                       *scaleval = *((Timestamp *) DatumGetPointer(value));
-                       return true;
                case ABSTIMEOID:
-                       *scaleval = *abstime_timestamp(value);
-                       return true;
                case DATEOID:
-                       *scaleval = *date_timestamp(value);
-                       return true;
-
-                       /*
-                        * Built-in relative-time types
-                        */
                case INTERVALOID:
-                       {
-                               Interval   *interval = (Interval *) DatumGetPointer(value);
-
-                               /*
-                                * Convert the month part of Interval to days using
-                                * assumed average month length of 365.25/12.0 days.  Not
-                                * too accurate, but plenty good enough for our purposes.
-                                */
-                               *scaleval = interval->time +
-                                       interval->month * (365.25 / 12.0 * 24.0 * 60.0 * 60.0);
-                               return true;
-                       }
                case RELTIMEOID:
-                       *scaleval = (RelativeTime) DatumGetInt32(value);
-                       return true;
                case TINTERVALOID:
-                       {
-                               TimeInterval interval = (TimeInterval) DatumGetPointer(value);
-
-                               if (interval->status != 0)
-                               {
-                                       *scaleval = interval->data[1] - interval->data[0];
-                                       return true;
-                               }
-                               break;
-                       }
                case TIMEOID:
-                       *scaleval = *((TimeADT *) DatumGetPointer(value));
+                       *scaledvalue = convert_timevalue_to_scalar(value, valuetypid);
+                       *scaledlobound = convert_timevalue_to_scalar(lobound, boundstypid);
+                       *scaledhibound = convert_timevalue_to_scalar(hibound, boundstypid);
                        return true;
-
-               default:
-                       {
-
-                               /*
-                                * See whether there is a registered type-conversion
-                                * function, namely a procedure named "float8" with the
-                                * right signature. If so, assume we can convert the value
-                                * to the numeric scale.
-                                *
-                                * NOTE: there are no such procedures in the standard
-                                * distribution, except with argument types that we
-                                * already dealt with above. This code is just here as an
-                                * escape for user-defined types.
-                                */
-                               Oid                     oid_array[FUNC_MAX_ARGS];
-                               HeapTuple       ftup;
-
-                               MemSet(oid_array, 0, FUNC_MAX_ARGS * sizeof(Oid));
-                               oid_array[0] = typid;
-                               ftup = SearchSysCacheTuple(PROCNAME,
-                                                                                  PointerGetDatum("float8"),
-                                                                                  Int32GetDatum(1),
-                                                                                  PointerGetDatum(oid_array),
-                                                                                  0);
-                               if (HeapTupleIsValid(ftup) &&
-                               ((Form_pg_proc) GETSTRUCT(ftup))->prorettype == FLOAT8OID)
-                               {
-                                       RegProcedure convertproc = (RegProcedure) ftup->t_data->t_oid;
-                                       Datum           converted = (Datum) fmgr(convertproc, value);
-
-                                       *scaleval = (double) (*DatumGetFloat64(converted));
-                                       return true;
-                               }
-                               break;
-                       }
        }
        /* Don't know how to convert */
        return false;
 }
 
+/*
+ * Do convert_to_scalar()'s work for any numeric data type.
+ */
+static double
+convert_numeric_to_scalar(Datum value, Oid typid)
+{
+       switch (typid)
+       {
+               case BOOLOID:
+                       return (double) DatumGetUInt8(value);
+               case INT2OID:
+                       return (double) DatumGetInt16(value);
+               case INT4OID:
+                       return (double) DatumGetInt32(value);
+               case INT8OID:
+                       return (double) (*i8tod((int64 *) DatumGetPointer(value)));
+               case FLOAT4OID:
+                       return (double) (*DatumGetFloat32(value));
+               case FLOAT8OID:
+                       return (double) (*DatumGetFloat64(value));
+               case NUMERICOID:
+                       return (double) (*numeric_float8((Numeric) DatumGetPointer(value)));
+               case OIDOID:
+               case REGPROCOID:
+                       /* we can treat OIDs as integers... */
+                       return (double) DatumGetObjectId(value);
+       }
+       /* Can't get here unless someone tries to use scalarltsel/scalargtsel
+        * on an operator with one numeric and one non-numeric operand.
+        */
+       elog(ERROR, "convert_numeric_to_scalar: unsupported type %u", typid);
+       return 0;
+}
+
 /*
  * Do convert_to_scalar()'s work for any character-string data type.
  *
- * String datatypes are converted to a scale that ranges from 0 to 1, where
- * we visualize the bytes of the string as fractional base-256 digits.
- * It's sufficient to consider the first few bytes, since double has only
- * limited precision (and we can't expect huge accuracy in our selectivity
- * predictions anyway!)
+ * String datatypes are converted to a scale that ranges from 0 to 1,
+ * where we visualize the bytes of the string as fractional digits.
  *
- * If USE_LOCALE is defined, we must pass the string through strxfrm()
- * before doing the computation, so as to generate correct locale-specific
- * results.
+ * We do not want the base to be 256, however, since that tends to
+ * generate inflated selectivity estimates; few databases will have
+ * occurrences of all 256 possible byte values at each position.
+ * Instead, use the smallest and largest byte values seen in the bounds
+ * as the estimated range for each byte, after some fudging to deal with
+ * the fact that we probably aren't going to see the full range that way.
+ *
+ * An additional refinement is that we discard any common prefix of the
+ * three strings before computing the scaled values.  This allows us to
+ * "zoom in" when we encounter a narrow data range.  An example is a phone
+ * number database where all the values begin with the same area code.
  */
-static bool
-convert_string_to_scalar(char *str, int strlength,
-                                                double *scaleval)
+static void
+convert_string_to_scalar(unsigned char *value,
+                                                double *scaledvalue,
+                                                unsigned char *lobound,
+                                                double *scaledlobound,
+                                                unsigned char *hibound,
+                                                double *scaledhibound)
 {
+       int                     rangelo,
+                               rangehi;
        unsigned char *sptr;
-       int                     slen;
 
+       rangelo = rangehi = hibound[0];
+       for (sptr = lobound; *sptr; sptr++)
+       {
+               if (rangelo > *sptr)
+                       rangelo = *sptr;
+               if (rangehi < *sptr)
+                       rangehi = *sptr;
+       }
+       for (sptr = hibound; *sptr; sptr++)
+       {
+               if (rangelo > *sptr)
+                       rangelo = *sptr;
+               if (rangehi < *sptr)
+                       rangehi = *sptr;
+       }
+       /* If range includes any upper-case ASCII chars, make it include all */
+       if (rangelo <= 'Z' && rangehi >= 'A')
+       {
+               if (rangelo > 'A')
+                       rangelo = 'A';
+               if (rangehi < 'Z')
+                       rangehi = 'Z';
+       }
+       /* Ditto lower-case */
+       if (rangelo <= 'z' && rangehi >= 'a')
+       {
+               if (rangelo > 'a')
+                       rangelo = 'a';
+               if (rangehi < 'z')
+                       rangehi = 'z';
+       }
+       /* Ditto digits */
+       if (rangelo <= '9' && rangehi >= '0')
+       {
+               if (rangelo > '0')
+                       rangelo = '0';
+               if (rangehi < '9')
+                       rangehi = '9';
+       }
+       /* If range includes less than 10 chars, assume we have not got enough
+        * data, and make it include regular ASCII set.
+        */
+       if (rangehi - rangelo < 9)
+       {
+               rangelo = ' ';
+               rangehi = 127;
+       }
+
+       /*
+        * Now strip any common prefix of the three strings.
+        */
+       while (*lobound)
+       {
+               if (*lobound != *hibound || *lobound != *value)
+                       break;
+               lobound++, hibound++, value++;
+       }
+
+       /*
+        * Now we can do the conversions.
+        */
+       *scaledvalue = convert_one_string_to_scalar(value, rangelo, rangehi);
+       *scaledlobound = convert_one_string_to_scalar(lobound, rangelo, rangehi);
+       *scaledhibound = convert_one_string_to_scalar(hibound, rangelo, rangehi);
+}
+
+static double
+convert_one_string_to_scalar(unsigned char *value, int rangelo, int rangehi)
+{
+       int                     slen = strlen((char *) value);
+       double          num,
+                               denom,
+                               base;
+
+       if (slen <= 0)
+               return 0.0;                             /* empty string has scalar value 0 */
+
+       /* Since base is at least 10, need not consider more than about 20 chars */
+       if (slen > 20)
+               slen = 20;
+
+       /* Convert initial characters to fraction */
+       base = rangehi - rangelo + 1;
+       num = 0.0;
+       denom = base;
+       while (slen-- > 0)
+       {
+               int             ch = *value++;
+
+               if (ch < rangelo)
+                       ch = rangelo-1;
+               else if (ch > rangehi)
+                       ch = rangehi+1;
+               num += ((double) (ch - rangelo)) / denom;
+               denom *= base;
+       }
+
+       return num;
+}
+
+/*
+ * Convert a string-type Datum into a palloc'd, null-terminated string.
+ *
+ * If USE_LOCALE is defined, we must pass the string through strxfrm()
+ * before continuing, so as to generate correct locale-specific results.
+ */
+static unsigned char *
+convert_string_datum(Datum value, Oid typid)
+{
+       char       *val;
 #ifdef USE_LOCALE
-       char       *rawstr;
        char       *xfrmstr;
        size_t          xfrmsize;
        size_t          xfrmlen;
-
 #endif
-       double          num,
-                               denom;
 
-       if (strlength <= 0)
+       switch (typid)
        {
-               *scaleval = 0;                  /* empty string has scalar value 0 */
-               return true;
+               case CHAROID:
+                       val = (char *) palloc(2);
+                       val[0] = DatumGetChar(value);
+                       val[1] = '\0';
+                       break;
+               case BPCHAROID:
+               case VARCHAROID:
+               case TEXTOID:
+               {
+                       char       *str = (char *) VARDATA(DatumGetPointer(value));
+                       int                     strlength = VARSIZE(DatumGetPointer(value)) - VARHDRSZ;
+
+                       val = (char *) palloc(strlength+1);
+                       memcpy(val, str, strlength);
+                       val[strlength] = '\0';
+                       break;
+               }
+               case NAMEOID:
+               {
+                       NameData   *nm = (NameData *) DatumGetPointer(value);
+
+                       val = pstrdup(NameStr(*nm));
+                       break;
+               }
+               default:
+                       /* Can't get here unless someone tries to use scalarltsel
+                        * on an operator with one string and one non-string operand.
+                        */
+                       elog(ERROR, "convert_string_datum: unsupported type %u", typid);
+                       return NULL;
        }
 
 #ifdef USE_LOCALE
-       /* Need a null-terminated string to pass to strxfrm() */
-       rawstr = (char *) palloc(strlength + 1);
-       memcpy(rawstr, str, strlength);
-       rawstr[strlength] = '\0';
-
-       /* Guess that transformed string is not much bigger */
-       xfrmsize = strlength + 32;      /* arbitrary pad value here... */
+       /* Guess that transformed string is not much bigger than original */
+       xfrmsize = strlen(val) + 32;            /* arbitrary pad value here... */
        xfrmstr = (char *) palloc(xfrmsize);
-       xfrmlen = strxfrm(xfrmstr, rawstr, xfrmsize);
+       xfrmlen = strxfrm(xfrmstr, val, xfrmsize);
        if (xfrmlen >= xfrmsize)
        {
                /* Oops, didn't make it */
                pfree(xfrmstr);
                xfrmstr = (char *) palloc(xfrmlen + 1);
-               xfrmlen = strxfrm(xfrmstr, rawstr, xfrmlen + 1);
+               xfrmlen = strxfrm(xfrmstr, val, xfrmlen + 1);
        }
-       pfree(rawstr);
-
-       sptr = (unsigned char *) xfrmstr;
-       slen = xfrmlen;
-#else
-       sptr = (unsigned char *) str;
-       slen = strlength;
+       pfree(val);
+       val = xfrmstr;
 #endif
 
-       /* No need to consider more than about 8 bytes (sizeof double) */
-       if (slen > 8)
-               slen = 8;
+       return (unsigned char *) val;
+}
 
-       /* Convert initial characters to fraction */
-       num = 0.0;
-       denom = 256.0;
-       while (slen-- > 0)
+/*
+ * Do convert_to_scalar()'s work for any timevalue data type.
+ */
+static double
+convert_timevalue_to_scalar(Datum value, Oid typid)
+{
+       switch (typid)
        {
-               num += ((double) (*sptr++)) / denom;
-               denom *= 256.0;
-       }
+               case TIMESTAMPOID:
+                       return *((Timestamp *) DatumGetPointer(value));
+               case ABSTIMEOID:
+                       return *abstime_timestamp(value);
+               case DATEOID:
+                       return *date_timestamp(value);
+               case INTERVALOID:
+               {
+                       Interval   *interval = (Interval *) DatumGetPointer(value);
 
-#ifdef USE_LOCALE
-       pfree(xfrmstr);
-#endif
+                       /*
+                        * Convert the month part of Interval to days using
+                        * assumed average month length of 365.25/12.0 days.  Not
+                        * too accurate, but plenty good enough for our purposes.
+                        */
+                       return interval->time +
+                               interval->month * (365.25 / 12.0 * 24.0 * 60.0 * 60.0);
+               }
+               case RELTIMEOID:
+                       return (RelativeTime) DatumGetInt32(value);
+               case TINTERVALOID:
+               {
+                       TimeInterval interval = (TimeInterval) DatumGetPointer(value);
 
-       *scaleval = num;
-       return true;
+                       if (interval->status != 0)
+                               return interval->data[1] - interval->data[0];
+                       return 0;                       /* for lack of a better idea */
+               }
+               case TIMEOID:
+                       return *((TimeADT *) DatumGetPointer(value));
+       }
+       /* Can't get here unless someone tries to use scalarltsel/scalargtsel
+        * on an operator with one timevalue and one non-timevalue operand.
+        */
+       elog(ERROR, "convert_timevalue_to_scalar: unsupported type %u", typid);
+       return 0;
 }
 
 
@@ -914,6 +1326,623 @@ getattstatistics(Oid relid,
        return true;
 }
 
+/*-------------------------------------------------------------------------
+ *
+ * Pattern analysis functions
+ *
+ * These routines support analysis of LIKE and regular-expression patterns
+ * by the planner/optimizer.  It's important that they agree with the
+ * regular-expression code in backend/regex/ and the LIKE code in
+ * backend/utils/adt/like.c.
+ *
+ * Note that the prefix-analysis functions are called from
+ * backend/optimizer/path/indxpath.c as well as from routines in this file.
+ *
+ *-------------------------------------------------------------------------
+ */
+
+/*
+ * Extract the fixed prefix, if any, for a pattern.
+ * *prefix is set to a palloc'd prefix string,
+ * or to NULL if no fixed prefix exists for the pattern.
+ * *rest is set to point to the remainder of the pattern after the
+ * portion describing the fixed prefix.
+ * The return value distinguishes no fixed prefix, a partial prefix,
+ * or an exact-match-only pattern.
+ */
+
+static Pattern_Prefix_Status
+like_fixed_prefix(char *patt, char **prefix, char **rest)
+{
+       char       *match;
+       int                     pos,
+                               match_pos;
+
+       *prefix = match = palloc(strlen(patt) + 1);
+       match_pos = 0;
+
+       for (pos = 0; patt[pos]; pos++)
+       {
+               /* % and _ are wildcard characters in LIKE */
+               if (patt[pos] == '%' ||
+                       patt[pos] == '_')
+                       break;
+               /* Backslash quotes the next character */
+               if (patt[pos] == '\\')
+               {
+                       pos++;
+                       if (patt[pos] == '\0')
+                               break;
+               }
+
+               /*
+                * NOTE: this code used to think that %% meant a literal %, but
+                * textlike() itself does not think that, and the SQL92 spec
+                * doesn't say any such thing either.
+                */
+               match[match_pos++] = patt[pos];
+       }
+
+       match[match_pos] = '\0';
+       *rest = &patt[pos];
+
+       /* in LIKE, an empty pattern is an exact match! */
+       if (patt[pos] == '\0')
+               return Pattern_Prefix_Exact;    /* reached end of pattern, so exact */
+
+       if (match_pos > 0)
+               return Pattern_Prefix_Partial;
+
+       pfree(match);
+       *prefix = NULL;
+       return Pattern_Prefix_None;
+}
+
+static Pattern_Prefix_Status
+regex_fixed_prefix(char *patt, bool case_insensitive,
+                                  char **prefix, char **rest)
+{
+       char       *match;
+       int                     pos,
+                               match_pos,
+                               paren_depth;
+
+       /* Pattern must be anchored left */
+       if (patt[0] != '^')
+       {
+               *prefix = NULL;
+               *rest = patt;
+               return Pattern_Prefix_None;
+       }
+
+       /* If unquoted | is present at paren level 0 in pattern, then there
+        * are multiple alternatives for the start of the string.
+        */
+       paren_depth = 0;
+       for (pos = 1; patt[pos]; pos++)
+       {
+               if (patt[pos] == '|' && paren_depth == 0)
+               {
+                       *prefix = NULL;
+                       *rest = patt;
+                       return Pattern_Prefix_None;
+               }
+               else if (patt[pos] == '(')
+                       paren_depth++;
+               else if (patt[pos] == ')' && paren_depth > 0)
+                       paren_depth--;
+               else if (patt[pos] == '\\')
+               {
+                       /* backslash quotes the next character */
+                       pos++;
+                       if (patt[pos] == '\0')
+                               break;
+               }
+       }
+
+       /* OK, allocate space for pattern */
+       *prefix = match = palloc(strlen(patt) + 1);
+       match_pos = 0;
+
+       /* note start at pos 1 to skip leading ^ */
+       for (pos = 1; patt[pos]; pos++)
+       {
+               /*
+                * Check for characters that indicate multiple possible matches here.
+                * XXX I suspect isalpha() is not an adequately locale-sensitive
+                * test for characters that can vary under case folding?
+                */
+               if (patt[pos] == '.' ||
+                       patt[pos] == '(' ||
+                       patt[pos] == '[' ||
+                       patt[pos] == '$' ||
+                       (case_insensitive && isalpha(patt[pos])))
+                       break;
+               /*
+                * Check for quantifiers.  Except for +, this means the preceding
+                * character is optional, so we must remove it from the prefix too!
+                */
+               if (patt[pos] == '*' ||
+                       patt[pos] == '?' ||
+                       patt[pos] == '{')
+               {
+                       if (match_pos > 0)
+                               match_pos--;
+                       pos--;
+                       break;
+               }
+               if (patt[pos] == '+')
+               {
+                       pos--;
+                       break;
+               }
+               if (patt[pos] == '\\')
+               {
+                       /* backslash quotes the next character */
+                       pos++;
+                       if (patt[pos] == '\0')
+                               break;
+               }
+               match[match_pos++] = patt[pos];
+       }
+
+       match[match_pos] = '\0';
+       *rest = &patt[pos];
+
+       if (patt[pos] == '$' && patt[pos + 1] == '\0')
+       {
+               *rest = &patt[pos + 1];
+               return Pattern_Prefix_Exact;    /* pattern specifies exact match */
+       }
+
+       if (match_pos > 0)
+               return Pattern_Prefix_Partial;
+
+       pfree(match);
+       *prefix = NULL;
+       return Pattern_Prefix_None;
+}
+
+Pattern_Prefix_Status
+pattern_fixed_prefix(char *patt, Pattern_Type ptype,
+                                        char **prefix, char **rest)
+{
+       Pattern_Prefix_Status result;
+
+       switch (ptype)
+       {
+               case Pattern_Type_Like:
+                       result = like_fixed_prefix(patt, prefix, rest);
+                       break;
+               case Pattern_Type_Regex:
+                       result = regex_fixed_prefix(patt, false, prefix, rest);
+                       break;
+               case Pattern_Type_Regex_IC:
+                       result = regex_fixed_prefix(patt, true, prefix, rest);
+                       break;
+               default:
+                       elog(ERROR, "pattern_fixed_prefix: bogus ptype");
+                       result = Pattern_Prefix_None; /* keep compiler quiet */
+                       break;
+       }
+       return result;
+}
+
+/*
+ * Estimate the selectivity of a fixed prefix for a pattern match.
+ *
+ * A fixed prefix "foo" is estimated as the selectivity of the expression
+ * "var >= 'foo' AND var < 'fop'" (see also indxqual.c).
+ */
+static Selectivity
+prefix_selectivity(char *prefix,
+                                  Oid relid,
+                                  AttrNumber attno,
+                                  Oid datatype)
+{
+       Selectivity     prefixsel;
+       Oid                     cmpopr;
+       Datum           prefixcon;
+       char       *greaterstr;
+
+       cmpopr = find_operator(">=", datatype);
+       if (cmpopr == InvalidOid)
+               elog(ERROR, "prefix_selectivity: no >= operator for type %u",
+                        datatype);
+       prefixcon = string_to_datum(prefix, datatype);
+       /* Assume scalargtsel is appropriate for all supported types */
+       prefixsel = * scalargtsel(cmpopr, relid, attno,
+                                                         prefixcon, SEL_CONSTANT|SEL_RIGHT);
+       pfree(DatumGetPointer(prefixcon));
+
+       /*
+        * If we can create a string larger than the prefix,
+        * say "x < greaterstr".
+        */
+       greaterstr = make_greater_string(prefix, datatype);
+       if (greaterstr)
+       {
+               Selectivity             topsel;
+
+               cmpopr = find_operator("<", datatype);
+               if (cmpopr == InvalidOid)
+                       elog(ERROR, "prefix_selectivity: no < operator for type %u",
+                                datatype);
+               prefixcon = string_to_datum(greaterstr, datatype);
+               /* Assume scalarltsel is appropriate for all supported types */
+               topsel = * scalarltsel(cmpopr, relid, attno,
+                                                          prefixcon, SEL_CONSTANT|SEL_RIGHT);
+               pfree(DatumGetPointer(prefixcon));
+               pfree(greaterstr);
+
+               /*
+                * Merge the two selectivities in the same way as for
+                * a range query (see clauselist_selectivity()).
+                */
+               prefixsel = topsel + prefixsel - 1.0;
+
+               /*
+                * A zero or slightly negative prefixsel should be converted into a
+                * small positive value; we probably are dealing with a very
+                * tight range and got a bogus result due to roundoff errors.
+                * However, if prefixsel is very negative, then we probably have
+                * default selectivity estimates on one or both sides of the
+                * range.  In that case, insert a not-so-wildly-optimistic
+                * default estimate.
+                */
+               if (prefixsel <= 0.0)
+               {
+                       if (prefixsel < -0.01)
+                       {
+
+                               /*
+                                * No data available --- use a default estimate that
+                                * is small, but not real small.
+                                */
+                               prefixsel = 0.01;
+                       }
+                       else
+                       {
+
+                               /*
+                                * It's just roundoff error; use a small positive value
+                                */
+                               prefixsel = 1.0e-10;
+                       }
+               }
+       }
+
+       return prefixsel;
+}
+
+
+/*
+ * Estimate the selectivity of a pattern of the specified type.
+ * Note that any fixed prefix of the pattern will have been removed already.
+ *
+ * For now, we use a very simplistic approach: fixed characters reduce the
+ * selectivity a good deal, character ranges reduce it a little,
+ * wildcards (such as % for LIKE or .* for regex) increase it.
+ */
+
+#define FIXED_CHAR_SEL 0.04    /* about 1/25 */
+#define CHAR_RANGE_SEL 0.25
+#define ANY_CHAR_SEL   0.9             /* not 1, since it won't match end-of-string */
+#define FULL_WILDCARD_SEL 5.0
+#define PARTIAL_WILDCARD_SEL 2.0
+
+static Selectivity
+like_selectivity(char *patt)
+{
+       Selectivity             sel = 1.0;
+       int                             pos;
+
+       /* Skip any leading %; it's already factored into initial sel */
+       pos = (*patt == '%') ? 1 : 0;
+       for (; patt[pos]; pos++)
+       {
+               /* % and _ are wildcard characters in LIKE */
+               if (patt[pos] == '%')
+                       sel *= FULL_WILDCARD_SEL;
+               else if (patt[pos] == '_')
+                       sel *= ANY_CHAR_SEL;
+               else if (patt[pos] == '\\')
+               {
+                       /* Backslash quotes the next character */
+                       pos++;
+                       if (patt[pos] == '\0')
+                               break;
+                       sel *= FIXED_CHAR_SEL;
+               }
+               else
+                       sel *= FIXED_CHAR_SEL;
+       }
+       /* Could get sel > 1 if multiple wildcards */
+       if (sel > 1.0)
+               sel = 1.0;
+       return sel;
+}
+
+static Selectivity
+regex_selectivity_sub(char *patt, int pattlen, bool case_insensitive)
+{
+       Selectivity             sel = 1.0;
+       int                             paren_depth = 0;
+       int                             paren_pos = 0; /* dummy init to keep compiler quiet */
+       int                             pos;
+
+       for (pos = 0; pos < pattlen; pos++)
+       {
+               if (patt[pos] == '(')
+               {
+                       if (paren_depth == 0)
+                               paren_pos = pos; /* remember start of parenthesized item */
+                       paren_depth++;
+               }
+               else if (patt[pos] == ')' && paren_depth > 0)
+               {
+                       paren_depth--;
+                       if (paren_depth == 0)
+                               sel *= regex_selectivity_sub(patt + (paren_pos + 1),
+                                                                                        pos - (paren_pos + 1),
+                                                                                        case_insensitive);
+               }
+               else if (patt[pos] == '|' && paren_depth == 0)
+               {
+                       /*
+                        * If unquoted | is present at paren level 0 in pattern,
+                        * we have multiple alternatives; sum their probabilities.
+                        */
+                       sel += regex_selectivity_sub(patt + (pos + 1),
+                                                                                pattlen - (pos + 1),
+                                                                                case_insensitive);
+                       break;                          /* rest of pattern is now processed */
+               }
+               else if (patt[pos] == '[')
+               {
+                       bool    negclass = false;
+
+                       if (patt[++pos] == '^')
+                       {
+                               negclass = true;
+                               pos++;
+                       }
+                       if (patt[pos] == ']') /* ']' at start of class is not special */
+                               pos++;
+                       while (pos < pattlen && patt[pos] != ']')
+                               pos++;
+                       if (paren_depth == 0)
+                               sel *= (negclass ? (1.0-CHAR_RANGE_SEL) : CHAR_RANGE_SEL);
+               }
+               else if (patt[pos] == '.')
+               {
+                       if (paren_depth == 0)
+                               sel *= ANY_CHAR_SEL;
+               }
+               else if (patt[pos] == '*' ||
+                                patt[pos] == '?' ||
+                                patt[pos] == '+')
+               {
+                       /* Ought to be smarter about quantifiers... */
+                       if (paren_depth == 0)
+                               sel *= PARTIAL_WILDCARD_SEL;
+               }
+               else if (patt[pos] == '{')
+               {
+                       while (pos < pattlen && patt[pos] != '}')
+                               pos++;
+                       if (paren_depth == 0)
+                               sel *= PARTIAL_WILDCARD_SEL;
+               }
+               else if (patt[pos] == '\\')
+               {
+                       /* backslash quotes the next character */
+                       pos++;
+                       if (pos >= pattlen)
+                               break;
+                       if (paren_depth == 0)
+                               sel *= FIXED_CHAR_SEL;
+               }
+               else
+               {
+                       if (paren_depth == 0)
+                               sel *= FIXED_CHAR_SEL;
+               }
+       }
+       /* Could get sel > 1 if multiple wildcards */
+       if (sel > 1.0)
+               sel = 1.0;
+       return sel;
+}
+
+static Selectivity
+regex_selectivity(char *patt, bool case_insensitive)
+{
+       Selectivity             sel;
+       int                             pattlen = strlen(patt);
+
+       /* If patt doesn't end with $, consider it to have a trailing wildcard */
+       if (pattlen > 0 && patt[pattlen-1] == '$' &&
+               (pattlen == 1 || patt[pattlen-2] != '\\'))
+       {
+               /* has trailing $ */
+               sel = regex_selectivity_sub(patt, pattlen-1, case_insensitive);
+       }
+       else
+       {
+               /* no trailing $ */
+               sel = regex_selectivity_sub(patt, pattlen, case_insensitive);
+               sel *= FULL_WILDCARD_SEL;
+               if (sel > 1.0)
+                       sel = 1.0;
+       }
+       return sel;
+}
+
+static Selectivity
+pattern_selectivity(char *patt, Pattern_Type ptype)
+{
+       Selectivity result;
+
+       switch (ptype)
+       {
+               case Pattern_Type_Like:
+                       result = like_selectivity(patt);
+                       break;
+               case Pattern_Type_Regex:
+                       result = regex_selectivity(patt, false);
+                       break;
+               case Pattern_Type_Regex_IC:
+                       result = regex_selectivity(patt, true);
+                       break;
+               default:
+                       elog(ERROR, "pattern_selectivity: bogus ptype");
+                       result = 1.0;           /* keep compiler quiet */
+                       break;
+       }
+       return result;
+}
+
+
+/*
+ * Try to generate a string greater than the given string or any string it is
+ * a prefix of.  If successful, return a palloc'd string; else return NULL.
+ *
+ * To work correctly in non-ASCII locales with weird collation orders,
+ * we cannot simply increment "foo" to "fop" --- we have to check whether
+ * we actually produced a string greater than the given one.  If not,
+ * increment the righthand byte again and repeat.  If we max out the righthand
+ * byte, truncate off the last character and start incrementing the next.
+ * For example, if "z" were the last character in the sort order, then we
+ * could produce "foo" as a string greater than "fonz".
+ *
+ * This could be rather slow in the worst case, but in most cases we won't
+ * have to try more than one or two strings before succeeding.
+ *
+ * XXX in a sufficiently weird locale, this might produce incorrect results?
+ * For example, in German I believe "ss" is treated specially --- if we are
+ * given "foos" and return "foot", will this actually be greater than "fooss"?
+ */
+char *
+make_greater_string(const char *str, Oid datatype)
+{
+       char       *workstr;
+       int                     len;
+
+       /*
+        * Make a modifiable copy, which will be our return value if
+        * successful
+        */
+       workstr = pstrdup((char *) str);
+
+       while ((len = strlen(workstr)) > 0)
+       {
+               unsigned char *lastchar = (unsigned char *) (workstr + len - 1);
+
+               /*
+                * Try to generate a larger string by incrementing the last byte.
+                */
+               while (*lastchar < (unsigned char) 255)
+               {
+                       (*lastchar)++;
+                       if (string_lessthan(str, workstr, datatype))
+                               return workstr; /* Success! */
+               }
+
+               /*
+                * Truncate off the last character, which might be more than 1
+                * byte in MULTIBYTE case.
+                */
+#ifdef MULTIBYTE
+               len = pg_mbcliplen((const unsigned char *) workstr, len, len - 1);
+               workstr[len] = '\0';
+#else
+               *lastchar = '\0';
+#endif
+       }
+
+       /* Failed... */
+       pfree(workstr);
+       return NULL;
+}
+
+/*
+ * Test whether two strings are "<" according to the rules of the given
+ * datatype.  We do this the hard way, ie, actually calling the type's
+ * "<" operator function, to ensure we get the right result...
+ */
+static bool
+string_lessthan(const char *str1, const char *str2, Oid datatype)
+{
+       Datum           datum1 = string_to_datum(str1, datatype);
+       Datum           datum2 = string_to_datum(str2, datatype);
+       bool            result;
+
+       switch (datatype)
+       {
+               case TEXTOID:
+                       result = text_lt((text *) datum1, (text *) datum2);
+                       break;
+
+               case BPCHAROID:
+                       result = bpcharlt((char *) datum1, (char *) datum2);
+                       break;
+
+               case VARCHAROID:
+                       result = varcharlt((char *) datum1, (char *) datum2);
+                       break;
+
+               case NAMEOID:
+                       result = namelt((NameData *) datum1, (NameData *) datum2);
+                       break;
+
+               default:
+                       elog(ERROR, "string_lessthan: unexpected datatype %u", datatype);
+                       result = false;
+                       break;
+       }
+
+       pfree(DatumGetPointer(datum1));
+       pfree(DatumGetPointer(datum2));
+
+       return result;
+}
+
+/* See if there is a binary op of the given name for the given datatype */
+static Oid
+find_operator(const char *opname, Oid datatype)
+{
+       HeapTuple       optup;
+
+       optup = SearchSysCacheTuple(OPERNAME,
+                                                               PointerGetDatum(opname),
+                                                               ObjectIdGetDatum(datatype),
+                                                               ObjectIdGetDatum(datatype),
+                                                               CharGetDatum('b'));
+       if (!HeapTupleIsValid(optup))
+               return InvalidOid;
+       return optup->t_data->t_oid;
+}
+
+/*
+ * Generate a Datum of the appropriate type from a C string.
+ * Note that all of the supported types are pass-by-ref, so the
+ * returned value should be pfree'd if no longer needed.
+ */
+static Datum
+string_to_datum(const char *str, Oid datatype)
+{
+
+       /*
+        * We cheat a little by assuming that textin() will do for bpchar and
+        * varchar constants too...
+        */
+       if (datatype == NAMEOID)
+               return PointerGetDatum(namein((char *) str));
+       else
+               return PointerGetDatum(textin((char *) str));
+}
+
 /*-------------------------------------------------------------------------
  *
  * Index cost estimation functions
index 34d4494b238a6b83328c49e7d4e6915c5ab32e71..6995587c0cc64a9dd8cd94d17671299c665e7331 100644 (file)
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2000, PostgreSQL, Inc
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $Id: pg_proc.h,v 1.132 2000/04/12 17:16:29 momjian Exp $
+ * $Id: pg_proc.h,v 1.133 2000/04/16 04:41:03 tgl Exp $
  *
  * NOTES
  *       The script catalog/genbki.sh reads this file and generates .bki
@@ -2436,6 +2436,32 @@ DESCR("convert text to timestamp");
 DATA(insert OID = 1780 ( to_date                       PGUID 11 f t f 2 f      1082 "25 25" 100 0 0 100  to_date - ));
 DESCR("convert text to date");
 
+/* Selectivity estimators for LIKE and related operators */
+DATA(insert OID = 1818 ( regexeqsel                    PGUID 11 f t f 5 f 701 "26 26 21 0 23" 100 0 0 100  regexeqsel - ));
+DESCR("restriction selectivity of regex match");
+DATA(insert OID = 1819 ( likesel                       PGUID 11 f t f 5 f 701 "26 26 21 0 23" 100 0 0 100  likesel - ));
+DESCR("restriction selectivity of LIKE");
+DATA(insert OID = 1820 ( icregexeqsel          PGUID 11 f t f 5 f 701 "26 26 21 0 23" 100 0 0 100  icregexeqsel - ));
+DESCR("restriction selectivity of case-insensitive regex match");
+DATA(insert OID = 1821 ( regexnesel                    PGUID 11 f t f 5 f 701 "26 26 21 0 23" 100 0 0 100  regexnesel - ));
+DESCR("restriction selectivity of regex non-match");
+DATA(insert OID = 1822 ( nlikesel                      PGUID 11 f t f 5 f 701 "26 26 21 0 23" 100 0 0 100  nlikesel - ));
+DESCR("restriction selectivity of NOT LIKE");
+DATA(insert OID = 1823 ( icregexnesel          PGUID 11 f t f 5 f 701 "26 26 21 0 23" 100 0 0 100  icregexnesel - ));
+DESCR("restriction selectivity of case-insensitive regex non-match");
+DATA(insert OID = 1824 ( regexeqjoinsel                PGUID 11 f t f 5 f 701 "26 26 21 26 21" 100 0 0 100     regexeqjoinsel - ));
+DESCR("join selectivity of regex match");
+DATA(insert OID = 1825 ( likejoinsel           PGUID 11 f t f 5 f 701 "26 26 21 26 21" 100 0 0 100     likejoinsel - ));
+DESCR("join selectivity of LIKE");
+DATA(insert OID = 1826 ( icregexeqjoinsel      PGUID 11 f t f 5 f 701 "26 26 21 26 21" 100 0 0 100     icregexeqjoinsel - ));
+DESCR("join selectivity of case-insensitive regex match");
+DATA(insert OID = 1827 ( regexnejoinsel                PGUID 11 f t f 5 f 701 "26 26 21 26 21" 100 0 0 100     regexnejoinsel - ));
+DESCR("join selectivity of regex non-match");
+DATA(insert OID = 1828 ( nlikejoinsel          PGUID 11 f t f 5 f 701 "26 26 21 26 21" 100 0 0 100     nlikejoinsel - ));
+DESCR("join selectivity of NOT LIKE");
+DATA(insert OID = 1829 ( icregexnejoinsel      PGUID 11 f t f 5 f 701 "26 26 21 26 21" 100 0 0 100     icregexnejoinsel - ));
+DESCR("join selectivity of case-insensitive regex non-match");
+
 
 /*
  * prototypes for functions pg_proc.c
index 80d91aba75a20bcc7d0ad4b6a5f0b4c4a87fc3aa..16bff65054dbf7754a53eb73a811f069050aaaaa 100644 (file)
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2000, PostgreSQL, Inc
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $Id: builtins.h,v 1.110 2000/04/12 17:16:54 momjian Exp $
+ * $Id: builtins.h,v 1.111 2000/04/16 04:41:03 tgl Exp $
  *
  * NOTES
  *       This should normally only be included by fmgr.h.
@@ -371,15 +371,47 @@ extern char *deparse_expression(Node *expr, List *rangetables,
                                   bool forceprefix);
 
 /* selfuncs.c */
-extern float64 eqsel(Oid opid, Oid relid, AttrNumber attno, Datum value, int32 flag);
-extern float64 neqsel(Oid opid, Oid relid, AttrNumber attno, Datum value, int32 flag);
-extern float64 scalarltsel(Oid opid, Oid relid, AttrNumber attno, Datum value, int32 flag);
-extern float64 scalargtsel(Oid opid, Oid relid, AttrNumber attno, Datum value, int32 flag);
-extern float64 eqjoinsel(Oid opid, Oid relid1, AttrNumber attno1, Oid relid2, AttrNumber attno2);
-extern float64 neqjoinsel(Oid opid, Oid relid1, AttrNumber attno1, Oid relid2, AttrNumber attno2);
-extern float64 scalarltjoinsel(Oid opid, Oid relid1, AttrNumber attno1, Oid relid2, AttrNumber attno2);
-extern float64 scalargtjoinsel(Oid opid, Oid relid1, AttrNumber attno1, Oid relid2, AttrNumber attno2);
-extern bool convert_to_scalar(Datum value, Oid typid, double *scaleval);
+extern float64 eqsel(Oid opid, Oid relid, AttrNumber attno,
+                                        Datum value, int32 flag);
+extern float64 neqsel(Oid opid, Oid relid, AttrNumber attno,
+                                         Datum value, int32 flag);
+extern float64 scalarltsel(Oid opid, Oid relid, AttrNumber attno,
+                                                  Datum value, int32 flag);
+extern float64 scalargtsel(Oid opid, Oid relid, AttrNumber attno,
+                                                  Datum value, int32 flag);
+extern float64 regexeqsel(Oid opid, Oid relid, AttrNumber attno,
+                                                 Datum value, int32 flag);
+extern float64 likesel(Oid opid, Oid relid, AttrNumber attno,
+                                          Datum value, int32 flag);
+extern float64 icregexeqsel(Oid opid, Oid relid, AttrNumber attno,
+                                                       Datum value, int32 flag);
+extern float64 regexnesel(Oid opid, Oid relid, AttrNumber attno,
+                                                 Datum value, int32 flag);
+extern float64 nlikesel(Oid opid, Oid relid, AttrNumber attno,
+                                               Datum value, int32 flag);
+extern float64 icregexnesel(Oid opid, Oid relid, AttrNumber attno,
+                                                       Datum value, int32 flag);
+
+extern float64 eqjoinsel(Oid opid, Oid relid1, AttrNumber attno1,
+                                                Oid relid2, AttrNumber attno2);
+extern float64 neqjoinsel(Oid opid, Oid relid1, AttrNumber attno1,
+                                                 Oid relid2, AttrNumber attno2);
+extern float64 scalarltjoinsel(Oid opid, Oid relid1, AttrNumber attno1,
+                                                          Oid relid2, AttrNumber attno2);
+extern float64 scalargtjoinsel(Oid opid, Oid relid1, AttrNumber attno1,
+                                                          Oid relid2, AttrNumber attno2);
+extern float64 regexeqjoinsel(Oid opid, Oid relid1, AttrNumber attno1,
+                                                         Oid relid2, AttrNumber attno2);
+extern float64 likejoinsel(Oid opid, Oid relid1, AttrNumber attno1,
+                                                  Oid relid2, AttrNumber attno2);
+extern float64 icregexeqjoinsel(Oid opid, Oid relid1, AttrNumber attno1,
+                                                               Oid relid2, AttrNumber attno2);
+extern float64 regexnejoinsel(Oid opid, Oid relid1, AttrNumber attno1,
+                                                         Oid relid2, AttrNumber attno2);
+extern float64 nlikejoinsel(Oid opid, Oid relid1, AttrNumber attno1,
+                                                       Oid relid2, AttrNumber attno2);
+extern float64 icregexnejoinsel(Oid opid, Oid relid1, AttrNumber attno1,
+                                                               Oid relid2, AttrNumber attno2);
 
 extern void btcostestimate(Query *root, RelOptInfo *rel,
                           IndexOptInfo *index, List *indexQuals,
@@ -402,6 +434,22 @@ extern void gistcostestimate(Query *root, RelOptInfo *rel,
                                 Cost *indexTotalCost,
                                 Selectivity *indexSelectivity);
 
+typedef enum
+{
+       Pattern_Type_Like, Pattern_Type_Regex, Pattern_Type_Regex_IC
+} Pattern_Type;
+
+typedef enum
+{
+       Pattern_Prefix_None, Pattern_Prefix_Partial, Pattern_Prefix_Exact
+} Pattern_Prefix_Status;
+
+extern Pattern_Prefix_Status pattern_fixed_prefix(char *patt,
+                                                                                                 Pattern_Type ptype,
+                                                                                                 char **prefix,
+                                                                                                 char **rest);
+extern char *make_greater_string(const char *str, Oid datatype);
+
 /* tid.c */
 extern ItemPointer tidin(const char *str);
 extern char *tidout(ItemPointer itemPtr);