]> granicus.if.org Git - postgresql/commitdiff
Refactor pattern_fixed_prefix() to avoid dealing in incomplete patterns.
authorTom Lane <tgl@sss.pgh.pa.us>
Tue, 10 Jul 2012 03:23:28 +0000 (23:23 -0400)
committerTom Lane <tgl@sss.pgh.pa.us>
Tue, 10 Jul 2012 03:23:28 +0000 (23:23 -0400)
Previously, pattern_fixed_prefix() was defined to return whatever fixed
prefix it could extract from the pattern, plus the "rest" of the pattern.
That definition was sensible for LIKE patterns, but not so much for
regexes, where reconstituting a valid pattern minus the prefix could be
quite tricky (certainly the existing code wasn't doing that correctly).
Since the only thing that callers ever did with the "rest" of the pattern
was to pass it to like_selectivity() or regex_selectivity(), let's cut out
the middle-man and just have pattern_fixed_prefix's subroutines do this
directly.  Then pattern_fixed_prefix can return a simple selectivity
number, and the question of how to cope with partial patterns is removed
from its API specification.

While at it, adjust the API spec so that callers who don't actually care
about the pattern's selectivity (which is a lot of them) can pass NULL for
the selectivity pointer to skip doing the work of computing a selectivity
estimate.

This patch is only an API refactoring that doesn't actually change any
processing, other than allowing a little bit of useless work to be skipped.
However, it's necessary infrastructure for my upcoming fix to regex prefix
extraction, because after that change there won't be any simple way to
identify the "rest" of the regex, not even to the low level of fidelity
needed by regex_selectivity.  We can cope with that if regex_fixed_prefix
and regex_selectivity communicate directly, but not if we have to work
within the old API.  Hence, back-patch to all active branches.

src/backend/optimizer/path/indxpath.c
src/backend/utils/adt/selfuncs.c
src/include/utils/selfuncs.h

index 856e399fa2ec6fc35e835fdd41f0606ae303c567..d598d66dd738a08743d9568c44edca97920aeeda 100644 (file)
@@ -2081,7 +2081,6 @@ match_special_index_operator(Expr *clause, Oid opfamily,
        Oid                     expr_op;
        Const      *patt;
        Const      *prefix = NULL;
-       Const      *rest = NULL;
 
        /*
         * Currently, all known special operators require the indexkey on the
@@ -2108,12 +2107,12 @@ match_special_index_operator(Expr *clause, Oid opfamily,
                case OID_NAME_LIKE_OP:
                        /* the right-hand const is type text for all of these */
                        isIndexable = pattern_fixed_prefix(patt, Pattern_Type_Like,
-                                                                         &prefix, &rest) != Pattern_Prefix_None;
+                                                                         &prefix, NULL) != Pattern_Prefix_None;
                        break;
 
                case OID_BYTEA_LIKE_OP:
                        isIndexable = pattern_fixed_prefix(patt, Pattern_Type_Like,
-                                                                         &prefix, &rest) != Pattern_Prefix_None;
+                                                                         &prefix, NULL) != Pattern_Prefix_None;
                        break;
 
                case OID_TEXT_ICLIKE_OP:
@@ -2121,7 +2120,7 @@ match_special_index_operator(Expr *clause, Oid opfamily,
                case OID_NAME_ICLIKE_OP:
                        /* the right-hand const is type text for all of these */
                        isIndexable = pattern_fixed_prefix(patt, Pattern_Type_Like_IC,
-                                                                         &prefix, &rest) != Pattern_Prefix_None;
+                                                                         &prefix, NULL) != Pattern_Prefix_None;
                        break;
 
                case OID_TEXT_REGEXEQ_OP:
@@ -2129,7 +2128,7 @@ match_special_index_operator(Expr *clause, Oid opfamily,
                case OID_NAME_REGEXEQ_OP:
                        /* the right-hand const is type text for all of these */
                        isIndexable = pattern_fixed_prefix(patt, Pattern_Type_Regex,
-                                                                         &prefix, &rest) != Pattern_Prefix_None;
+                                                                         &prefix, NULL) != Pattern_Prefix_None;
                        break;
 
                case OID_TEXT_ICREGEXEQ_OP:
@@ -2137,7 +2136,7 @@ match_special_index_operator(Expr *clause, Oid opfamily,
                case OID_NAME_ICREGEXEQ_OP:
                        /* the right-hand const is type text for all of these */
                        isIndexable = pattern_fixed_prefix(patt, Pattern_Type_Regex_IC,
-                                                                         &prefix, &rest) != Pattern_Prefix_None;
+                                                                         &prefix, NULL) != Pattern_Prefix_None;
                        break;
 
                case OID_INET_SUB_OP:
@@ -2380,7 +2379,6 @@ expand_indexqual_opclause(RestrictInfo *rinfo, Oid opfamily)
        Oid                     expr_op = ((OpExpr *) clause)->opno;
        Const      *patt = (Const *) rightop;
        Const      *prefix = NULL;
-       Const      *rest = NULL;
        Pattern_Prefix_Status pstatus;
        List       *result;
 
@@ -2396,7 +2394,7 @@ expand_indexqual_opclause(RestrictInfo *rinfo, Oid opfamily)
                case OID_NAME_LIKE_OP:
                case OID_BYTEA_LIKE_OP:
                        pstatus = pattern_fixed_prefix(patt, Pattern_Type_Like,
-                                                                                  &prefix, &rest);
+                                                                                  &prefix, NULL);
                        result = prefix_quals(leftop, opfamily, prefix, pstatus);
                        break;
 
@@ -2405,7 +2403,7 @@ expand_indexqual_opclause(RestrictInfo *rinfo, Oid opfamily)
                case OID_NAME_ICLIKE_OP:
                        /* the right-hand const is type text for all of these */
                        pstatus = pattern_fixed_prefix(patt, Pattern_Type_Like_IC,
-                                                                                  &prefix, &rest);
+                                                                                  &prefix, NULL);
                        result = prefix_quals(leftop, opfamily, prefix, pstatus);
                        break;
 
@@ -2414,7 +2412,7 @@ expand_indexqual_opclause(RestrictInfo *rinfo, Oid opfamily)
                case OID_NAME_REGEXEQ_OP:
                        /* the right-hand const is type text for all of these */
                        pstatus = pattern_fixed_prefix(patt, Pattern_Type_Regex,
-                                                                                  &prefix, &rest);
+                                                                                  &prefix, NULL);
                        result = prefix_quals(leftop, opfamily, prefix, pstatus);
                        break;
 
@@ -2423,7 +2421,7 @@ expand_indexqual_opclause(RestrictInfo *rinfo, Oid opfamily)
                case OID_NAME_ICREGEXEQ_OP:
                        /* the right-hand const is type text for all of these */
                        pstatus = pattern_fixed_prefix(patt, Pattern_Type_Regex_IC,
-                                                                                  &prefix, &rest);
+                                                                                  &prefix, NULL);
                        result = prefix_quals(leftop, opfamily, prefix, pstatus);
                        break;
 
index 1455116ce9debe0a094a0356612b0c4e7afed049..8217628ab96c044ad600cf05be503c29228e0dfd 100644 (file)
@@ -132,7 +132,10 @@ static bool get_variable_range(PlannerInfo *root, VariableStatData *vardata,
                                         Oid sortop, Datum *min, Datum *max);
 static Selectivity prefix_selectivity(VariableStatData *vardata,
                                   Oid vartype, Oid opfamily, Const *prefixcon);
-static Selectivity pattern_selectivity(Const *patt, Pattern_Type ptype);
+static Selectivity like_selectivity(const char *patt, int pattlen,
+                                                                       bool case_insensitive);
+static Selectivity regex_selectivity(const char *patt, int pattlen,
+                                                                        bool case_insensitive);
 static Datum string_to_datum(const char *str, Oid datatype);
 static Const *string_to_const(const char *str, Oid datatype);
 static Const *string_to_bytea_const(const char *str, size_t str_len);
@@ -916,9 +919,9 @@ patternsel(PG_FUNCTION_ARGS, Pattern_Type ptype, bool negate)
        Oid                     vartype;
        Oid                     opfamily;
        Pattern_Prefix_Status pstatus;
-       Const      *patt = NULL;
+       Const      *patt;
        Const      *prefix = NULL;
-       Const      *rest = NULL;
+       Selectivity     rest_selec = 0;
        double          result;
 
        /*
@@ -1008,13 +1011,15 @@ patternsel(PG_FUNCTION_ARGS, Pattern_Type ptype, bool negate)
                        return result;
        }
 
-       /* divide pattern into fixed prefix and remainder */
+       /*
+        * Pull out any fixed prefix implied by the pattern, and estimate the
+        * fractional selectivity of the remainder of the pattern.
+        */
        patt = (Const *) other;
-       pstatus = pattern_fixed_prefix(patt, ptype, &prefix, &rest);
+       pstatus = pattern_fixed_prefix(patt, ptype, &prefix, &rest_selec);
 
        /*
-        * If necessary, coerce the prefix constant to the right type. (The "rest"
-        * constant need not be changed.)
+        * If necessary, coerce the prefix constant to the right type.
         */
        if (prefix && prefix->consttype != vartype)
        {
@@ -1088,15 +1093,13 @@ patternsel(PG_FUNCTION_ARGS, Pattern_Type ptype, bool negate)
                {
                        /* Nope, so fake it with the heuristic method */
                        Selectivity prefixsel;
-                       Selectivity restsel;
 
                        if (pstatus == Pattern_Prefix_Partial)
                                prefixsel = prefix_selectivity(&vardata, vartype,
                                                                                           opfamily, prefix);
                        else
                                prefixsel = 1.0;
-                       restsel = pattern_selectivity(rest, ptype);
-                       selec = prefixsel * restsel;
+                       selec = prefixsel * rest_selec;
                }
                else
                {
@@ -4092,9 +4095,9 @@ get_variable_range(PlannerInfo *root, VariableStatData *vardata, Oid sortop,
  *
  * *prefix is set to a palloc'd prefix string (in the form of a Const node),
  *     or to NULL if no fixed prefix exists for the pattern.
- * *rest is set to a palloc'd Const representing the remainder of the pattern
- *     after the portion describing the fixed prefix.
- * Each of these has the same type (TEXT or BYTEA) as the given pattern Const.
+ * If rest_selec is not NULL, *rest_selec is set to an estimate of the
+ *     selectivity of the remainder of the pattern (without any fixed prefix).
+ * The prefix Const has the same type (TEXT or BYTEA) as the input pattern.
  *
  * The return value distinguishes no fixed prefix, a partial prefix,
  * or an exact-match-only pattern.
@@ -4102,12 +4105,11 @@ get_variable_range(PlannerInfo *root, VariableStatData *vardata, Oid sortop,
 
 static Pattern_Prefix_Status
 like_fixed_prefix(Const *patt_const, bool case_insensitive,
-                                 Const **prefix_const, Const **rest_const)
+                                 Const **prefix_const, Selectivity *rest_selec)
 {
        char       *match;
        char       *patt;
        int                     pattlen;
-       char       *rest;
        Oid                     typeid = patt_const->consttype;
        int                     pos,
                                match_pos;
@@ -4175,18 +4177,15 @@ like_fixed_prefix(Const *patt_const, bool case_insensitive,
        }
 
        match[match_pos] = '\0';
-       rest = &patt[pos];
 
        if (typeid != BYTEAOID)
-       {
                *prefix_const = string_to_const(match, typeid);
-               *rest_const = string_to_const(rest, typeid);
-       }
        else
-       {
                *prefix_const = string_to_bytea_const(match, match_pos);
-               *rest_const = string_to_bytea_const(rest, pattlen - pos);
-       }
+
+       if (rest_selec != NULL)
+               *rest_selec = like_selectivity(&patt[pos], pattlen - pos,
+                                                                          case_insensitive);
 
        pfree(patt);
        pfree(match);
@@ -4203,7 +4202,7 @@ like_fixed_prefix(Const *patt_const, bool case_insensitive,
 
 static Pattern_Prefix_Status
 regex_fixed_prefix(Const *patt_const, bool case_insensitive,
-                                  Const **prefix_const, Const **rest_const)
+                                  Const **prefix_const, Selectivity *rest_selec)
 {
        char       *match;
        int                     pos,
@@ -4244,10 +4243,11 @@ regex_fixed_prefix(Const *patt_const, bool case_insensitive,
        /* Pattern must be anchored left */
        if (patt[pos] != '^')
        {
-               rest = patt;
-
                *prefix_const = NULL;
-               *rest_const = string_to_const(rest, typeid);
+
+               if (rest_selec != NULL)
+                       *rest_selec = regex_selectivity(patt, strlen(patt),
+                                                                                       case_insensitive);
 
                return Pattern_Prefix_None;
        }
@@ -4261,10 +4261,11 @@ regex_fixed_prefix(Const *patt_const, bool case_insensitive,
         */
        if (strchr(patt + pos, '|') != NULL)
        {
-               rest = patt;
-
                *prefix_const = NULL;
-               *rest_const = string_to_const(rest, typeid);
+
+               if (rest_selec != NULL)
+                       *rest_selec = regex_selectivity(patt, strlen(patt),
+                                                                                       case_insensitive);
 
                return Pattern_Prefix_None;
        }
@@ -4376,10 +4377,10 @@ regex_fixed_prefix(Const *patt_const, bool case_insensitive,
 
        if (patt[pos] == '$' && patt[pos + 1] == '\0')
        {
-               rest = &patt[pos + 1];
-
                *prefix_const = string_to_const(match, typeid);
-               *rest_const = string_to_const(rest, typeid);
+
+               if (rest_selec != NULL)
+                       *rest_selec = 1.0;
 
                pfree(patt);
                pfree(match);
@@ -4388,7 +4389,10 @@ regex_fixed_prefix(Const *patt_const, bool case_insensitive,
        }
 
        *prefix_const = string_to_const(match, typeid);
-       *rest_const = string_to_const(rest, typeid);
+
+       if (rest_selec != NULL)
+               *rest_selec = regex_selectivity(rest, strlen(rest),
+                                                                               case_insensitive);
 
        pfree(patt);
        pfree(match);
@@ -4401,23 +4405,23 @@ regex_fixed_prefix(Const *patt_const, bool case_insensitive,
 
 Pattern_Prefix_Status
 pattern_fixed_prefix(Const *patt, Pattern_Type ptype,
-                                        Const **prefix, Const **rest)
+                                        Const **prefix, Selectivity *rest_selec)
 {
        Pattern_Prefix_Status result;
 
        switch (ptype)
        {
                case Pattern_Type_Like:
-                       result = like_fixed_prefix(patt, false, prefix, rest);
+                       result = like_fixed_prefix(patt, false, prefix, rest_selec);
                        break;
                case Pattern_Type_Like_IC:
-                       result = like_fixed_prefix(patt, true, prefix, rest);
+                       result = like_fixed_prefix(patt, true, prefix, rest_selec);
                        break;
                case Pattern_Type_Regex:
-                       result = regex_fixed_prefix(patt, false, prefix, rest);
+                       result = regex_fixed_prefix(patt, false, prefix, rest_selec);
                        break;
                case Pattern_Type_Regex_IC:
-                       result = regex_fixed_prefix(patt, true, prefix, rest);
+                       result = regex_fixed_prefix(patt, true, prefix, rest_selec);
                        break;
                default:
                        elog(ERROR, "unrecognized ptype: %d", (int) ptype);
@@ -4517,7 +4521,8 @@ prefix_selectivity(VariableStatData *vardata,
 
 /*
  * Estimate the selectivity of a pattern of the specified type.
- * Note that any fixed prefix of the pattern will have been removed already.
+ * Note that any fixed prefix of the pattern will have been removed already,
+ * so actually we may be looking at just a fragment of the pattern.
  *
  * For now, we use a very simplistic approach: fixed characters reduce the
  * selectivity a good deal, character ranges reduce it a little,
@@ -4531,37 +4536,10 @@ prefix_selectivity(VariableStatData *vardata,
 #define PARTIAL_WILDCARD_SEL 2.0
 
 static Selectivity
-like_selectivity(Const *patt_const, bool case_insensitive)
+like_selectivity(const char *patt, int pattlen, bool case_insensitive)
 {
        Selectivity sel = 1.0;
        int                     pos;
-       Oid                     typeid = patt_const->consttype;
-       char       *patt;
-       int                     pattlen;
-
-       /* the right-hand const is type text or bytea */
-       Assert(typeid == BYTEAOID || typeid == TEXTOID);
-
-       if (typeid == BYTEAOID && case_insensitive)
-               ereport(ERROR,
-                               (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
-                  errmsg("case insensitive matching not supported on type bytea")));
-
-       if (typeid != BYTEAOID)
-       {
-               patt = DatumGetCString(DirectFunctionCall1(textout, patt_const->constvalue));
-               pattlen = strlen(patt);
-       }
-       else
-       {
-               bytea      *bstr = DatumGetByteaP(patt_const->constvalue);
-
-               pattlen = VARSIZE(bstr) - VARHDRSZ;
-               patt = (char *) palloc(pattlen);
-               memcpy(patt, VARDATA(bstr), pattlen);
-               if ((Pointer) bstr != DatumGetPointer(patt_const->constvalue))
-                       pfree(bstr);
-       }
 
        /* Skip any leading wildcard; it's already factored into initial sel */
        for (pos = 0; pos < pattlen; pos++)
@@ -4591,13 +4569,11 @@ like_selectivity(Const *patt_const, bool case_insensitive)
        /* Could get sel > 1 if multiple wildcards */
        if (sel > 1.0)
                sel = 1.0;
-
-       pfree(patt);
        return sel;
 }
 
 static Selectivity
-regex_selectivity_sub(char *patt, int pattlen, bool case_insensitive)
+regex_selectivity_sub(const char *patt, int pattlen, bool case_insensitive)
 {
        Selectivity sel = 1.0;
        int                     paren_depth = 0;
@@ -4690,26 +4666,9 @@ regex_selectivity_sub(char *patt, int pattlen, bool case_insensitive)
 }
 
 static Selectivity
-regex_selectivity(Const *patt_const, bool case_insensitive)
+regex_selectivity(const char *patt, int pattlen, bool case_insensitive)
 {
        Selectivity sel;
-       char       *patt;
-       int                     pattlen;
-       Oid                     typeid = patt_const->consttype;
-
-       /*
-        * Should be unnecessary, there are no bytea regex operators defined. As
-        * such, it should be noted that the rest of this function has *not* been
-        * made safe for binary (possibly NULL containing) strings.
-        */
-       if (typeid == BYTEAOID)
-               ereport(ERROR,
-                               (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
-                errmsg("regular-expression matching not supported on type bytea")));
-
-       /* the right-hand const is type text for all of these */
-       patt = DatumGetCString(DirectFunctionCall1(textout, patt_const->constvalue));
-       pattlen = strlen(patt);
 
        /* If patt doesn't end with $, consider it to have a trailing wildcard */
        if (pattlen > 0 && patt[pattlen - 1] == '$' &&
@@ -4729,33 +4688,6 @@ regex_selectivity(Const *patt_const, bool case_insensitive)
        return sel;
 }
 
-static Selectivity
-pattern_selectivity(Const *patt, Pattern_Type ptype)
-{
-       Selectivity result;
-
-       switch (ptype)
-       {
-               case Pattern_Type_Like:
-                       result = like_selectivity(patt, false);
-                       break;
-               case Pattern_Type_Like_IC:
-                       result = like_selectivity(patt, true);
-                       break;
-               case Pattern_Type_Regex:
-                       result = regex_selectivity(patt, false);
-                       break;
-               case Pattern_Type_Regex_IC:
-                       result = regex_selectivity(patt, true);
-                       break;
-               default:
-                       elog(ERROR, "unrecognized ptype: %d", (int) ptype);
-                       result = 1.0;           /* keep compiler quiet */
-                       break;
-       }
-       return result;
-}
-
 
 /*
  * Try to generate a string greater than the given string or any
index 7efe32b60ea3a9d5dad6fee748d86cce1bf57654..7ca940b7b5aeb16936659f29181f63e9b43b6760 100644 (file)
@@ -117,7 +117,7 @@ extern double histogram_selectivity(VariableStatData *vardata, FmgrInfo *opproc,
 extern Pattern_Prefix_Status pattern_fixed_prefix(Const *patt,
                                         Pattern_Type ptype,
                                         Const **prefix,
-                                        Const **rest);
+                                        Selectivity *rest_selec);
 extern Const *make_greater_string(const Const *str_const, FmgrInfo *ltproc);
 
 extern Datum eqsel(PG_FUNCTION_ARGS);