]> granicus.if.org Git - postgresql/commitdiff
Refactor pattern_fixed_prefix() to avoid dealing in incomplete patterns.
authorTom Lane <tgl@sss.pgh.pa.us>
Tue, 10 Jul 2012 03:22:55 +0000 (23:22 -0400)
committerTom Lane <tgl@sss.pgh.pa.us>
Tue, 10 Jul 2012 03:22:55 +0000 (23:22 -0400)
Previously, pattern_fixed_prefix() was defined to return whatever fixed
prefix it could extract from the pattern, plus the "rest" of the pattern.
That definition was sensible for LIKE patterns, but not so much for
regexes, where reconstituting a valid pattern minus the prefix could be
quite tricky (certainly the existing code wasn't doing that correctly).
Since the only thing that callers ever did with the "rest" of the pattern
was to pass it to like_selectivity() or regex_selectivity(), let's cut out
the middle-man and just have pattern_fixed_prefix's subroutines do this
directly.  Then pattern_fixed_prefix can return a simple selectivity
number, and the question of how to cope with partial patterns is removed
from its API specification.

While at it, adjust the API spec so that callers who don't actually care
about the pattern's selectivity (which is a lot of them) can pass NULL for
the selectivity pointer to skip doing the work of computing a selectivity
estimate.

This patch is only an API refactoring that doesn't actually change any
processing, other than allowing a little bit of useless work to be skipped.
However, it's necessary infrastructure for my upcoming fix to regex prefix
extraction, because after that change there won't be any simple way to
identify the "rest" of the regex, not even to the low level of fidelity
needed by regex_selectivity.  We can cope with that if regex_fixed_prefix
and regex_selectivity communicate directly, but not if we have to work
within the old API.  Hence, back-patch to all active branches.

src/backend/optimizer/path/indxpath.c
src/backend/utils/adt/selfuncs.c
src/include/utils/selfuncs.h

index 2e8ccd057853ef2aa138cab47d5282157a2620c3..66b68fc71d1a3f192100d1932db3c4000078451f 100644 (file)
@@ -2785,7 +2785,6 @@ match_special_index_operator(Expr *clause, Oid opfamily, Oid idxcollation,
        Oid                     expr_coll;
        Const      *patt;
        Const      *prefix = NULL;
-       Const      *rest = NULL;
        Pattern_Prefix_Status pstatus = Pattern_Prefix_None;
 
        /*
@@ -2814,13 +2813,13 @@ match_special_index_operator(Expr *clause, Oid opfamily, Oid idxcollation,
                case OID_NAME_LIKE_OP:
                        /* the right-hand const is type text for all of these */
                        pstatus = pattern_fixed_prefix(patt, Pattern_Type_Like, expr_coll,
-                                                                                  &prefix, &rest);
+                                                                                  &prefix, NULL);
                        isIndexable = (pstatus != Pattern_Prefix_None);
                        break;
 
                case OID_BYTEA_LIKE_OP:
                        pstatus = pattern_fixed_prefix(patt, Pattern_Type_Like, expr_coll,
-                                                                                  &prefix, &rest);
+                                                                                  &prefix, NULL);
                        isIndexable = (pstatus != Pattern_Prefix_None);
                        break;
 
@@ -2829,7 +2828,7 @@ match_special_index_operator(Expr *clause, Oid opfamily, Oid idxcollation,
                case OID_NAME_ICLIKE_OP:
                        /* the right-hand const is type text for all of these */
                        pstatus = pattern_fixed_prefix(patt, Pattern_Type_Like_IC, expr_coll,
-                                                                                  &prefix, &rest);
+                                                                                  &prefix, NULL);
                        isIndexable = (pstatus != Pattern_Prefix_None);
                        break;
 
@@ -2838,7 +2837,7 @@ match_special_index_operator(Expr *clause, Oid opfamily, Oid idxcollation,
                case OID_NAME_REGEXEQ_OP:
                        /* the right-hand const is type text for all of these */
                        pstatus = pattern_fixed_prefix(patt, Pattern_Type_Regex, expr_coll,
-                                                                                  &prefix, &rest);
+                                                                                  &prefix, NULL);
                        isIndexable = (pstatus != Pattern_Prefix_None);
                        break;
 
@@ -2847,7 +2846,7 @@ match_special_index_operator(Expr *clause, Oid opfamily, Oid idxcollation,
                case OID_NAME_ICREGEXEQ_OP:
                        /* the right-hand const is type text for all of these */
                        pstatus = pattern_fixed_prefix(patt, Pattern_Type_Regex_IC, expr_coll,
-                                                                                  &prefix, &rest);
+                                                                                  &prefix, NULL);
                        isIndexable = (pstatus != Pattern_Prefix_None);
                        break;
 
@@ -3115,7 +3114,6 @@ expand_indexqual_opclause(RestrictInfo *rinfo, Oid opfamily, Oid idxcollation)
        Oid                     expr_coll = ((OpExpr *) clause)->inputcollid;
        Const      *patt = (Const *) rightop;
        Const      *prefix = NULL;
-       Const      *rest = NULL;
        Pattern_Prefix_Status pstatus;
 
        /*
@@ -3135,7 +3133,7 @@ expand_indexqual_opclause(RestrictInfo *rinfo, Oid opfamily, Oid idxcollation)
                        if (!op_in_opfamily(expr_op, opfamily))
                        {
                                pstatus = pattern_fixed_prefix(patt, Pattern_Type_Like, expr_coll,
-                                                                                          &prefix, &rest);
+                                                                                          &prefix, NULL);
                                return prefix_quals(leftop, opfamily, idxcollation, prefix, pstatus);
                        }
                        break;
@@ -3147,7 +3145,7 @@ expand_indexqual_opclause(RestrictInfo *rinfo, Oid opfamily, Oid idxcollation)
                        {
                                /* the right-hand const is type text for all of these */
                                pstatus = pattern_fixed_prefix(patt, Pattern_Type_Like_IC, expr_coll,
-                                                                                          &prefix, &rest);
+                                                                                          &prefix, NULL);
                                return prefix_quals(leftop, opfamily, idxcollation, prefix, pstatus);
                        }
                        break;
@@ -3159,7 +3157,7 @@ expand_indexqual_opclause(RestrictInfo *rinfo, Oid opfamily, Oid idxcollation)
                        {
                                /* the right-hand const is type text for all of these */
                                pstatus = pattern_fixed_prefix(patt, Pattern_Type_Regex, expr_coll,
-                                                                                          &prefix, &rest);
+                                                                                          &prefix, NULL);
                                return prefix_quals(leftop, opfamily, idxcollation, prefix, pstatus);
                        }
                        break;
@@ -3171,7 +3169,7 @@ expand_indexqual_opclause(RestrictInfo *rinfo, Oid opfamily, Oid idxcollation)
                        {
                                /* the right-hand const is type text for all of these */
                                pstatus = pattern_fixed_prefix(patt, Pattern_Type_Regex_IC, expr_coll,
-                                                                                          &prefix, &rest);
+                                                                                          &prefix, NULL);
                                return prefix_quals(leftop, opfamily, idxcollation, prefix, pstatus);
                        }
                        break;
index dc38034104eb88560f083dbd2adc1db66656c356..7eb64cba4bb039314610f28df35d7c35d40dc0a2 100644 (file)
@@ -192,7 +192,10 @@ static RelOptInfo *find_join_input_rel(PlannerInfo *root, Relids relids);
 static Selectivity prefix_selectivity(PlannerInfo *root,
                                   VariableStatData *vardata,
                                   Oid vartype, Oid opfamily, Const *prefixcon);
-static Selectivity pattern_selectivity(Const *patt, Pattern_Type ptype);
+static Selectivity like_selectivity(const char *patt, int pattlen,
+                                                                       bool case_insensitive);
+static Selectivity regex_selectivity(const char *patt, int pattlen,
+                                                                        bool case_insensitive);
 static Datum string_to_datum(const char *str, Oid datatype);
 static Const *string_to_const(const char *str, Oid datatype);
 static Const *string_to_bytea_const(const char *str, size_t str_len);
@@ -1115,9 +1118,9 @@ patternsel(PG_FUNCTION_ARGS, Pattern_Type ptype, bool negate)
        Oid                     vartype;
        Oid                     opfamily;
        Pattern_Prefix_Status pstatus;
-       Const      *patt = NULL;
+       Const      *patt;
        Const      *prefix = NULL;
-       Const      *rest = NULL;
+       Selectivity     rest_selec = 0;
        double          result;
 
        /*
@@ -1207,8 +1210,9 @@ patternsel(PG_FUNCTION_ARGS, Pattern_Type ptype, bool negate)
        }
 
        /*
-        * Divide pattern into fixed prefix and remainder.  Unlike many of the
-        * other functions in this file, we use the pattern operator's actual
+        * Pull out any fixed prefix implied by the pattern, and estimate the
+        * fractional selectivity of the remainder of the pattern.  Unlike many of
+        * the other functions in this file, we use the pattern operator's actual
         * collation for this step.  This is not because we expect the collation
         * to make a big difference in the selectivity estimate (it seldom would),
         * but because we want to be sure we cache compiled regexps under the
@@ -1216,11 +1220,10 @@ patternsel(PG_FUNCTION_ARGS, Pattern_Type ptype, bool negate)
         */
        patt = (Const *) other;
        pstatus = pattern_fixed_prefix(patt, ptype, collation,
-                                                                  &prefix, &rest);
+                                                                  &prefix, &rest_selec);
 
        /*
-        * If necessary, coerce the prefix constant to the right type. (The "rest"
-        * constant need not be changed.)
+        * If necessary, coerce the prefix constant to the right type.
         */
        if (prefix && prefix->consttype != vartype)
        {
@@ -1294,15 +1297,13 @@ patternsel(PG_FUNCTION_ARGS, Pattern_Type ptype, bool negate)
                {
                        Selectivity heursel;
                        Selectivity prefixsel;
-                       Selectivity restsel;
 
                        if (pstatus == Pattern_Prefix_Partial)
                                prefixsel = prefix_selectivity(root, &vardata, vartype,
                                                                                           opfamily, prefix);
                        else
                                prefixsel = 1.0;
-                       restsel = pattern_selectivity(rest, ptype);
-                       heursel = prefixsel * restsel;
+                       heursel = prefixsel * rest_selec;
 
                        if (selec < 0)          /* fewer than 10 histogram entries? */
                                selec = heursel;
@@ -5133,9 +5134,9 @@ pattern_char_isalpha(char c, bool is_multibyte,
  *
  * *prefix is set to a palloc'd prefix string (in the form of a Const node),
  *     or to NULL if no fixed prefix exists for the pattern.
- * *rest is set to a palloc'd Const representing the remainder of the pattern
- *     after the portion describing the fixed prefix.
- * Each of these has the same type (TEXT or BYTEA) as the given pattern Const.
+ * If rest_selec is not NULL, *rest_selec is set to an estimate of the
+ *     selectivity of the remainder of the pattern (without any fixed prefix).
+ * The prefix Const has the same type (TEXT or BYTEA) as the input pattern.
  *
  * The return value distinguishes no fixed prefix, a partial prefix,
  * or an exact-match-only pattern.
@@ -5143,12 +5144,11 @@ pattern_char_isalpha(char c, bool is_multibyte,
 
 static Pattern_Prefix_Status
 like_fixed_prefix(Const *patt_const, bool case_insensitive, Oid collation,
-                                 Const **prefix_const, Const **rest_const)
+                                 Const **prefix_const, Selectivity *rest_selec)
 {
        char       *match;
        char       *patt;
        int                     pattlen;
-       char       *rest;
        Oid                     typeid = patt_const->consttype;
        int                     pos,
                                match_pos;
@@ -5228,18 +5228,15 @@ like_fixed_prefix(Const *patt_const, bool case_insensitive, Oid collation,
        }
 
        match[match_pos] = '\0';
-       rest = &patt[pos];
 
        if (typeid != BYTEAOID)
-       {
                *prefix_const = string_to_const(match, typeid);
-               *rest_const = string_to_const(rest, typeid);
-       }
        else
-       {
                *prefix_const = string_to_bytea_const(match, match_pos);
-               *rest_const = string_to_bytea_const(rest, pattlen - pos);
-       }
+
+       if (rest_selec != NULL)
+               *rest_selec = like_selectivity(&patt[pos], pattlen - pos,
+                                                                          case_insensitive);
 
        pfree(patt);
        pfree(match);
@@ -5256,7 +5253,7 @@ like_fixed_prefix(Const *patt_const, bool case_insensitive, Oid collation,
 
 static Pattern_Prefix_Status
 regex_fixed_prefix(Const *patt_const, bool case_insensitive, Oid collation,
-                                  Const **prefix_const, Const **rest_const)
+                                  Const **prefix_const, Selectivity *rest_selec)
 {
        char       *match;
        int                     pos,
@@ -5318,10 +5315,11 @@ regex_fixed_prefix(Const *patt_const, bool case_insensitive, Oid collation,
        /* Pattern must be anchored left */
        if (patt[pos] != '^')
        {
-               rest = patt;
-
                *prefix_const = NULL;
-               *rest_const = string_to_const(rest, typeid);
+
+               if (rest_selec != NULL)
+                       *rest_selec = regex_selectivity(patt, strlen(patt),
+                                                                                       case_insensitive);
 
                return Pattern_Prefix_None;
        }
@@ -5335,10 +5333,11 @@ regex_fixed_prefix(Const *patt_const, bool case_insensitive, Oid collation,
         */
        if (strchr(patt + pos, '|') != NULL)
        {
-               rest = patt;
-
                *prefix_const = NULL;
-               *rest_const = string_to_const(rest, typeid);
+
+               if (rest_selec != NULL)
+                       *rest_selec = regex_selectivity(patt, strlen(patt),
+                                                                                       case_insensitive);
 
                return Pattern_Prefix_None;
        }
@@ -5434,10 +5433,10 @@ regex_fixed_prefix(Const *patt_const, bool case_insensitive, Oid collation,
 
        if (patt[pos] == '$' && patt[pos + 1] == '\0')
        {
-               rest = &patt[pos + 1];
-
                *prefix_const = string_to_const(match, typeid);
-               *rest_const = string_to_const(rest, typeid);
+
+               if (rest_selec != NULL)
+                       *rest_selec = 1.0;
 
                pfree(patt);
                pfree(match);
@@ -5446,7 +5445,10 @@ regex_fixed_prefix(Const *patt_const, bool case_insensitive, Oid collation,
        }
 
        *prefix_const = string_to_const(match, typeid);
-       *rest_const = string_to_const(rest, typeid);
+
+       if (rest_selec != NULL)
+               *rest_selec = regex_selectivity(rest, strlen(rest),
+                                                                               case_insensitive);
 
        pfree(patt);
        pfree(match);
@@ -5459,23 +5461,27 @@ regex_fixed_prefix(Const *patt_const, bool case_insensitive, Oid collation,
 
 Pattern_Prefix_Status
 pattern_fixed_prefix(Const *patt, Pattern_Type ptype, Oid collation,
-                                        Const **prefix, Const **rest)
+                                        Const **prefix, Selectivity *rest_selec)
 {
        Pattern_Prefix_Status result;
 
        switch (ptype)
        {
                case Pattern_Type_Like:
-                       result = like_fixed_prefix(patt, false, collation, prefix, rest);
+                       result = like_fixed_prefix(patt, false, collation,
+                                                                          prefix, rest_selec);
                        break;
                case Pattern_Type_Like_IC:
-                       result = like_fixed_prefix(patt, true, collation, prefix, rest);
+                       result = like_fixed_prefix(patt, true, collation,
+                                                                          prefix, rest_selec);
                        break;
                case Pattern_Type_Regex:
-                       result = regex_fixed_prefix(patt, false, collation, prefix, rest);
+                       result = regex_fixed_prefix(patt, false, collation,
+                                                                               prefix, rest_selec);
                        break;
                case Pattern_Type_Regex_IC:
-                       result = regex_fixed_prefix(patt, true, collation, prefix, rest);
+                       result = regex_fixed_prefix(patt, true, collation,
+                                                                               prefix, rest_selec);
                        break;
                default:
                        elog(ERROR, "unrecognized ptype: %d", (int) ptype);
@@ -5590,7 +5596,8 @@ prefix_selectivity(PlannerInfo *root, VariableStatData *vardata,
 
 /*
  * Estimate the selectivity of a pattern of the specified type.
- * Note that any fixed prefix of the pattern will have been removed already.
+ * Note that any fixed prefix of the pattern will have been removed already,
+ * so actually we may be looking at just a fragment of the pattern.
  *
  * For now, we use a very simplistic approach: fixed characters reduce the
  * selectivity a good deal, character ranges reduce it a little,
@@ -5604,37 +5611,10 @@ prefix_selectivity(PlannerInfo *root, VariableStatData *vardata,
 #define PARTIAL_WILDCARD_SEL 2.0
 
 static Selectivity
-like_selectivity(Const *patt_const, bool case_insensitive)
+like_selectivity(const char *patt, int pattlen, bool case_insensitive)
 {
        Selectivity sel = 1.0;
        int                     pos;
-       Oid                     typeid = patt_const->consttype;
-       char       *patt;
-       int                     pattlen;
-
-       /* the right-hand const is type text or bytea */
-       Assert(typeid == BYTEAOID || typeid == TEXTOID);
-
-       if (typeid == BYTEAOID && case_insensitive)
-               ereport(ERROR,
-                               (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
-                  errmsg("case insensitive matching not supported on type bytea")));
-
-       if (typeid != BYTEAOID)
-       {
-               patt = TextDatumGetCString(patt_const->constvalue);
-               pattlen = strlen(patt);
-       }
-       else
-       {
-               bytea      *bstr = DatumGetByteaP(patt_const->constvalue);
-
-               pattlen = VARSIZE(bstr) - VARHDRSZ;
-               patt = (char *) palloc(pattlen);
-               memcpy(patt, VARDATA(bstr), pattlen);
-               if ((Pointer) bstr != DatumGetPointer(patt_const->constvalue))
-                       pfree(bstr);
-       }
 
        /* Skip any leading wildcard; it's already factored into initial sel */
        for (pos = 0; pos < pattlen; pos++)
@@ -5664,13 +5644,11 @@ like_selectivity(Const *patt_const, bool case_insensitive)
        /* Could get sel > 1 if multiple wildcards */
        if (sel > 1.0)
                sel = 1.0;
-
-       pfree(patt);
        return sel;
 }
 
 static Selectivity
-regex_selectivity_sub(char *patt, int pattlen, bool case_insensitive)
+regex_selectivity_sub(const char *patt, int pattlen, bool case_insensitive)
 {
        Selectivity sel = 1.0;
        int                     paren_depth = 0;
@@ -5763,26 +5741,9 @@ regex_selectivity_sub(char *patt, int pattlen, bool case_insensitive)
 }
 
 static Selectivity
-regex_selectivity(Const *patt_const, bool case_insensitive)
+regex_selectivity(const char *patt, int pattlen, bool case_insensitive)
 {
        Selectivity sel;
-       char       *patt;
-       int                     pattlen;
-       Oid                     typeid = patt_const->consttype;
-
-       /*
-        * Should be unnecessary, there are no bytea regex operators defined. As
-        * such, it should be noted that the rest of this function has *not* been
-        * made safe for binary (possibly NULL containing) strings.
-        */
-       if (typeid == BYTEAOID)
-               ereport(ERROR,
-                               (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
-                errmsg("regular-expression matching not supported on type bytea")));
-
-       /* the right-hand const is type text for all of these */
-       patt = TextDatumGetCString(patt_const->constvalue);
-       pattlen = strlen(patt);
 
        /* If patt doesn't end with $, consider it to have a trailing wildcard */
        if (pattlen > 0 && patt[pattlen - 1] == '$' &&
@@ -5802,33 +5763,6 @@ regex_selectivity(Const *patt_const, bool case_insensitive)
        return sel;
 }
 
-static Selectivity
-pattern_selectivity(Const *patt, Pattern_Type ptype)
-{
-       Selectivity result;
-
-       switch (ptype)
-       {
-               case Pattern_Type_Like:
-                       result = like_selectivity(patt, false);
-                       break;
-               case Pattern_Type_Like_IC:
-                       result = like_selectivity(patt, true);
-                       break;
-               case Pattern_Type_Regex:
-                       result = regex_selectivity(patt, false);
-                       break;
-               case Pattern_Type_Regex_IC:
-                       result = regex_selectivity(patt, true);
-                       break;
-               default:
-                       elog(ERROR, "unrecognized ptype: %d", (int) ptype);
-                       result = 1.0;           /* keep compiler quiet */
-                       break;
-       }
-       return result;
-}
-
 
 /*
  * For bytea, the increment function need only increment the current byte
index 87c6554b327d45cc2d82ab50c3a97341333939a2..aa5ee69d8bb7b414538896fa83aab2b7c5971e14 100644 (file)
@@ -134,7 +134,7 @@ extern Pattern_Prefix_Status pattern_fixed_prefix(Const *patt,
                                         Pattern_Type ptype,
                                         Oid collation,
                                         Const **prefix,
-                                        Const **rest);
+                                        Selectivity *rest_selec);
 extern Const *make_greater_string(const Const *str_const, FmgrInfo *ltproc,
                                        Oid collation);