bool isIndexable = false;
Node *rightop;
Oid expr_op;
+ Oid expr_coll;
Const *patt;
Const *prefix = NULL;
Const *rest = NULL;
/* we know these will succeed */
rightop = get_rightop(clause);
expr_op = ((OpExpr *) clause)->opno;
+ expr_coll = ((OpExpr *) clause)->inputcollid;
/* again, required for all current special ops: */
if (!IsA(rightop, Const) ||
case OID_BPCHAR_LIKE_OP:
case OID_NAME_LIKE_OP:
/* the right-hand const is type text for all of these */
- pstatus = pattern_fixed_prefix(patt, Pattern_Type_Like,
+ pstatus = pattern_fixed_prefix(patt, Pattern_Type_Like, expr_coll,
&prefix, &rest);
isIndexable = (pstatus != Pattern_Prefix_None);
break;
case OID_BYTEA_LIKE_OP:
- pstatus = pattern_fixed_prefix(patt, Pattern_Type_Like,
+ pstatus = pattern_fixed_prefix(patt, Pattern_Type_Like, expr_coll,
&prefix, &rest);
isIndexable = (pstatus != Pattern_Prefix_None);
break;
case OID_BPCHAR_ICLIKE_OP:
case OID_NAME_ICLIKE_OP:
/* the right-hand const is type text for all of these */
- pstatus = pattern_fixed_prefix(patt, Pattern_Type_Like_IC,
+ pstatus = pattern_fixed_prefix(patt, Pattern_Type_Like_IC, expr_coll,
&prefix, &rest);
isIndexable = (pstatus != Pattern_Prefix_None);
break;
case OID_BPCHAR_REGEXEQ_OP:
case OID_NAME_REGEXEQ_OP:
/* the right-hand const is type text for all of these */
- pstatus = pattern_fixed_prefix(patt, Pattern_Type_Regex,
+ pstatus = pattern_fixed_prefix(patt, Pattern_Type_Regex, expr_coll,
&prefix, &rest);
isIndexable = (pstatus != Pattern_Prefix_None);
break;
case OID_BPCHAR_ICREGEXEQ_OP:
case OID_NAME_ICREGEXEQ_OP:
/* the right-hand const is type text for all of these */
- pstatus = pattern_fixed_prefix(patt, Pattern_Type_Regex_IC,
+ pstatus = pattern_fixed_prefix(patt, Pattern_Type_Regex_IC, expr_coll,
&prefix, &rest);
isIndexable = (pstatus != Pattern_Prefix_None);
break;
*
* The non-pattern opclasses will not sort the way we need in most non-C
* locales. We can use such an index anyway for an exact match (simple
- * equality), but not for prefix-match cases. Note that we are looking at
- * the index's collation, not the expression's collation -- this test is
- * not dependent on the LIKE/regex operator's collation (which would only
- * affect case folding behavior of ILIKE, anyway).
+ * equality), but not for prefix-match cases. Note that here we are
+ * looking at the index's collation, not the expression's collation --
+ * this test is *not* dependent on the LIKE/regex operator's collation.
*/
switch (expr_op)
{
isIndexable =
(opfamily == TEXT_PATTERN_BTREE_FAM_OID) ||
(opfamily == TEXT_BTREE_FAM_OID &&
- (pstatus == Pattern_Prefix_Exact || lc_collate_is_c(idxcollation)));
+ (pstatus == Pattern_Prefix_Exact ||
+ lc_collate_is_c(idxcollation)));
break;
case OID_BPCHAR_LIKE_OP:
isIndexable =
(opfamily == BPCHAR_PATTERN_BTREE_FAM_OID) ||
(opfamily == BPCHAR_BTREE_FAM_OID &&
- (pstatus == Pattern_Prefix_Exact || lc_collate_is_c(idxcollation)));
+ (pstatus == Pattern_Prefix_Exact ||
+ lc_collate_is_c(idxcollation)));
break;
case OID_NAME_LIKE_OP:
Node *leftop = get_leftop(clause);
Node *rightop = get_rightop(clause);
Oid expr_op = ((OpExpr *) clause)->opno;
+ Oid expr_coll = ((OpExpr *) clause)->inputcollid;
Const *patt = (Const *) rightop;
Const *prefix = NULL;
Const *rest = NULL;
case OID_BYTEA_LIKE_OP:
if (!op_in_opfamily(expr_op, opfamily))
{
- pstatus = pattern_fixed_prefix(patt, Pattern_Type_Like,
+ pstatus = pattern_fixed_prefix(patt, Pattern_Type_Like, expr_coll,
&prefix, &rest);
return prefix_quals(leftop, opfamily, idxcollation, prefix, pstatus);
}
if (!op_in_opfamily(expr_op, opfamily))
{
/* the right-hand const is type text for all of these */
- pstatus = pattern_fixed_prefix(patt, Pattern_Type_Like_IC,
+ pstatus = pattern_fixed_prefix(patt, Pattern_Type_Like_IC, expr_coll,
&prefix, &rest);
return prefix_quals(leftop, opfamily, idxcollation, prefix, pstatus);
}
if (!op_in_opfamily(expr_op, opfamily))
{
/* the right-hand const is type text for all of these */
- pstatus = pattern_fixed_prefix(patt, Pattern_Type_Regex,
+ pstatus = pattern_fixed_prefix(patt, Pattern_Type_Regex, expr_coll,
&prefix, &rest);
return prefix_quals(leftop, opfamily, idxcollation, prefix, pstatus);
}
if (!op_in_opfamily(expr_op, opfamily))
{
/* the right-hand const is type text for all of these */
- pstatus = pattern_fixed_prefix(patt, Pattern_Type_Regex_IC,
+ pstatus = pattern_fixed_prefix(patt, Pattern_Type_Regex_IC, expr_coll,
&prefix, &rest);
return prefix_quals(leftop, opfamily, idxcollation, prefix, pstatus);
}
return result;
}
- /* divide pattern into fixed prefix and remainder */
+ /*
+ * Divide pattern into fixed prefix and remainder. XXX we have to assume
+ * default collation here, because we don't have access to the actual
+ * input collation for the operator. FIXME ...
+ */
patt = (Const *) other;
- pstatus = pattern_fixed_prefix(patt, ptype, &prefix, &rest);
+ pstatus = pattern_fixed_prefix(patt, ptype, DEFAULT_COLLATION_OID,
+ &prefix, &rest);
/*
* If necessary, coerce the prefix constant to the right type. (The "rest"
*-------------------------------------------------------------------------
*/
+/*
+ * Check whether char is a letter (and, hence, subject to case-folding)
+ *
+ * In multibyte character sets, we can't use isalpha, and it does not seem
+ * worth trying to convert to wchar_t to use iswalpha. Instead, just assume
+ * any multibyte char is potentially case-varying.
+ */
+static int
+pattern_char_isalpha(char c, bool is_multibyte,
+ pg_locale_t locale, bool locale_is_c)
+{
+ if (locale_is_c)
+ return (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z');
+ else if (is_multibyte && IS_HIGHBIT_SET(c))
+ return true;
+#ifdef HAVE_LOCALE_T
+ else if (locale)
+ return isalpha_l((unsigned char) c, locale);
+#endif
+ else
+ return isalpha((unsigned char) c);
+}
+
/*
* Extract the fixed prefix, if any, for a pattern.
*
*/
static Pattern_Prefix_Status
-like_fixed_prefix(Const *patt_const, bool case_insensitive,
+like_fixed_prefix(Const *patt_const, bool case_insensitive, Oid collation,
Const **prefix_const, Const **rest_const)
{
char *match;
int pos,
match_pos;
bool is_multibyte = (pg_database_encoding_max_length() > 1);
+ pg_locale_t locale = 0;
+ bool locale_is_c = false;
/* the right-hand const is type text or bytea */
Assert(typeid == BYTEAOID || typeid == TEXTOID);
- if (typeid == BYTEAOID && case_insensitive)
- ereport(ERROR,
- (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ if (case_insensitive)
+ {
+ if (typeid == BYTEAOID)
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("case insensitive matching not supported on type bytea")));
+ /* If case-insensitive, we need locale info */
+ if (lc_ctype_is_c(collation))
+ locale_is_c = true;
+ else if (collation != DEFAULT_COLLATION_OID)
+ {
+ if (!OidIsValid(collation))
+ {
+ /*
+ * This typically means that the parser could not resolve a
+ * conflict of implicit collations, so report it that way.
+ */
+ ereport(ERROR,
+ (errcode(ERRCODE_INDETERMINATE_COLLATION),
+ errmsg("could not determine which collation to use for ILIKE"),
+ errhint("Use the COLLATE clause to set the collation explicitly.")));
+ }
+ locale = pg_newlocale_from_collation(collation);
+ }
+ }
+
if (typeid != BYTEAOID)
{
patt = TextDatumGetCString(patt_const->constvalue);
break;
}
- /*
- * XXX In multibyte character sets, we can't trust isalpha, so assume
- * any multibyte char is potentially case-varying.
- */
- if (case_insensitive)
- {
- if (is_multibyte && (unsigned char) patt[pos] >= 0x80)
- break;
- if (isalpha((unsigned char) patt[pos]))
- break;
- }
+ /* Stop if case-varying character (it's sort of a wildcard) */
+ if (case_insensitive &&
+ pattern_char_isalpha(patt[pos], is_multibyte, locale, locale_is_c))
+ break;
- /*
- * NOTE: this code used to think that %% meant a literal %, but
- * textlike() itself does not think that, and the SQL92 spec doesn't
- * say any such thing either.
- */
match[match_pos++] = patt[pos];
}
}
static Pattern_Prefix_Status
-regex_fixed_prefix(Const *patt_const, bool case_insensitive,
+regex_fixed_prefix(Const *patt_const, bool case_insensitive, Oid collation,
Const **prefix_const, Const **rest_const)
{
char *match;
char *rest;
Oid typeid = patt_const->consttype;
bool is_multibyte = (pg_database_encoding_max_length() > 1);
+ pg_locale_t locale = 0;
+ bool locale_is_c = false;
/*
* Should be unnecessary, there are no bytea regex operators defined. As
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("regular-expression matching not supported on type bytea")));
+ if (case_insensitive)
+ {
+ /* If case-insensitive, we need locale info */
+ if (lc_ctype_is_c(collation))
+ locale_is_c = true;
+ else if (collation != DEFAULT_COLLATION_OID)
+ {
+ if (!OidIsValid(collation))
+ {
+ /*
+ * This typically means that the parser could not resolve a
+ * conflict of implicit collations, so report it that way.
+ */
+ ereport(ERROR,
+ (errcode(ERRCODE_INDETERMINATE_COLLATION),
+ errmsg("could not determine which collation to use for regular expression"),
+ errhint("Use the COLLATE clause to set the collation explicitly.")));
+ }
+ locale = pg_newlocale_from_collation(collation);
+ }
+ }
+
/* the right-hand const is type text for all of these */
patt = TextDatumGetCString(patt_const->constvalue);
patt[pos] == '$')
break;
- /*
- * XXX In multibyte character sets, we can't trust isalpha, so assume
- * any multibyte char is potentially case-varying.
- */
- if (case_insensitive)
- {
- if (is_multibyte && (unsigned char) patt[pos] >= 0x80)
- break;
- if (isalpha((unsigned char) patt[pos]))
- break;
- }
+ /* Stop if case-varying character (it's sort of a wildcard) */
+ if (case_insensitive &&
+ pattern_char_isalpha(patt[pos], is_multibyte, locale, locale_is_c))
+ break;
/*
* Check for quantifiers. Except for +, this means the preceding
* backslash followed by alphanumeric is an escape, not a quoted
* character. Must treat it as having multiple possible matches.
* Note: since only ASCII alphanumerics are escapes, we don't have to
- * be paranoid about multibyte here.
+ * be paranoid about multibyte or collations here.
*/
if (patt[pos] == '\\')
{
}
Pattern_Prefix_Status
-pattern_fixed_prefix(Const *patt, Pattern_Type ptype,
+pattern_fixed_prefix(Const *patt, Pattern_Type ptype, Oid collation,
Const **prefix, Const **rest)
{
Pattern_Prefix_Status result;
switch (ptype)
{
case Pattern_Type_Like:
- result = like_fixed_prefix(patt, false, prefix, rest);
+ result = like_fixed_prefix(patt, false, collation, prefix, rest);
break;
case Pattern_Type_Like_IC:
- result = like_fixed_prefix(patt, true, prefix, rest);
+ result = like_fixed_prefix(patt, true, collation, prefix, rest);
break;
case Pattern_Type_Regex:
- result = regex_fixed_prefix(patt, false, prefix, rest);
+ result = regex_fixed_prefix(patt, false, collation, prefix, rest);
break;
case Pattern_Type_Regex_IC:
- result = regex_fixed_prefix(patt, true, prefix, rest);
+ result = regex_fixed_prefix(patt, true, collation, prefix, rest);
break;
default:
elog(ERROR, "unrecognized ptype: %d", (int) ptype);