From 6497a18e6c1b5874566a77737ec3d381fded3ec2 Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Mon, 19 Mar 2018 18:49:53 -0400 Subject: [PATCH] Fix some corner-case issues in REFRESH MATERIALIZED VIEW CONCURRENTLY. refresh_by_match_merge() has some issues in the way it builds a SQL query to construct the "diff" table: 1. It doesn't require the selected unique index(es) to be indimmediate. 2. It doesn't pay attention to the particular equality semantics enforced by a given index, but just assumes that they must be those of the column datatype's default btree opclass. 3. It doesn't check that the indexes are btrees. 4. It's insufficiently careful to ensure that the parser will pick the intended operator when parsing the query. (This would have been a security bug before CVE-2018-1058.) 5. It's not careful about indexes on system columns. The way to fix #4 is to make use of the existing code in ri_triggers.c for generating an arbitrary binary operator clause. I chose to move that to ruleutils.c, since that seems a more reasonable place to be exporting such functionality from than ri_triggers.c. While #1, #3, and #5 are just latent given existing feature restrictions, and #2 doesn't arise in the core system for lack of alternate opclasses with different equality behaviors, #4 seems like an issue worth back-patching. That's the bulk of the change anyway, so just back-patch the whole thing to 9.4 where this code was introduced. Discussion: https://postgr.es/m/13836.1521413227@sss.pgh.pa.us --- src/backend/commands/matview.c | 181 ++++++++++++++++++---------- src/backend/utils/adt/ri_triggers.c | 69 ++--------- src/backend/utils/adt/ruleutils.c | 80 ++++++++++++ src/include/utils/builtins.h | 4 + 4 files changed, 207 insertions(+), 127 deletions(-) diff --git a/src/backend/commands/matview.c b/src/backend/commands/matview.c index 2ff23abbe9..23892b1b81 100644 --- a/src/backend/commands/matview.c +++ b/src/backend/commands/matview.c @@ -21,6 +21,8 @@ #include "catalog/catalog.h" #include "catalog/indexing.h" #include "catalog/namespace.h" +#include "catalog/pg_am.h" +#include "catalog/pg_opclass.h" #include "catalog/pg_operator.h" #include "commands/cluster.h" #include "commands/matview.h" @@ -40,7 +42,6 @@ #include "utils/rel.h" #include "utils/snapmgr.h" #include "utils/syscache.h" -#include "utils/typcache.h" typedef struct @@ -62,14 +63,11 @@ static void transientrel_shutdown(DestReceiver *self); static void transientrel_destroy(DestReceiver *self); static uint64 refresh_matview_datafill(DestReceiver *dest, Query *query, const char *queryString); - static char *make_temptable_name_n(char *tempname, int n); -static void mv_GenerateOper(StringInfo buf, Oid opoid); - static void refresh_by_match_merge(Oid matviewOid, Oid tempOid, Oid relowner, int save_sec_context); static void refresh_by_heap_swap(Oid matviewOid, Oid OIDNewHeap, char relpersistence); - +static bool is_usable_unique_index(Relation indexRel); static void OpenMatViewIncrementalMaintenance(void); static void CloseMatViewIncrementalMaintenance(void); @@ -230,23 +228,12 @@ ExecRefreshMatView(RefreshMatViewStmt *stmt, const char *queryString, { Oid indexoid = lfirst_oid(indexoidscan); Relation indexRel; - Form_pg_index indexStruct; indexRel = index_open(indexoid, AccessShareLock); - indexStruct = indexRel->rd_index; - - if (indexStruct->indisunique && - IndexIsValid(indexStruct) && - RelationGetIndexExpressions(indexRel) == NIL && - RelationGetIndexPredicate(indexRel) == NIL && - indexStruct->indnatts > 0) - { - hasUniqueIndex = true; - index_close(indexRel, AccessShareLock); - break; - } - + hasUniqueIndex = is_usable_unique_index(indexRel); index_close(indexRel, AccessShareLock); + if (hasUniqueIndex) + break; } list_free(indexoidlist); @@ -557,25 +544,6 @@ make_temptable_name_n(char *tempname, int n) return namebuf.data; } -static void -mv_GenerateOper(StringInfo buf, Oid opoid) -{ - HeapTuple opertup; - Form_pg_operator operform; - - opertup = SearchSysCache1(OPEROID, ObjectIdGetDatum(opoid)); - if (!HeapTupleIsValid(opertup)) - elog(ERROR, "cache lookup failed for operator %u", opoid); - operform = (Form_pg_operator) GETSTRUCT(opertup); - Assert(operform->oprkind == 'b'); - - appendStringInfo(buf, "OPERATOR(%s.%s)", - quote_identifier(get_namespace_name(operform->oprnamespace)), - NameStr(operform->oprname)); - - ReleaseSysCache(opertup); -} - /* * refresh_by_match_merge * @@ -623,7 +591,7 @@ refresh_by_match_merge(Oid matviewOid, Oid tempOid, Oid relowner, List *indexoidlist; ListCell *indexoidscan; int16 relnatts; - bool *usedForQual; + Oid *opUsedForQual; initStringInfo(&querybuf); matviewRel = heap_open(matviewOid, NoLock); @@ -635,7 +603,6 @@ refresh_by_match_merge(Oid matviewOid, Oid tempOid, Oid relowner, diffname = make_temptable_name_n(tempname, 2); relnatts = matviewRel->rd_rel->relnatts; - usedForQual = (bool *) palloc0(sizeof(bool) * relnatts); /* Open SPI context. */ if (SPI_connect() != SPI_OK_CONNECT) @@ -699,46 +666,82 @@ refresh_by_match_merge(Oid matviewOid, Oid tempOid, Oid relowner, * include all rows. */ tupdesc = matviewRel->rd_att; + opUsedForQual = (Oid *) palloc0(sizeof(Oid) * relnatts); foundUniqueIndex = false; + indexoidlist = RelationGetIndexList(matviewRel); foreach(indexoidscan, indexoidlist) { Oid indexoid = lfirst_oid(indexoidscan); Relation indexRel; - Form_pg_index indexStruct; indexRel = index_open(indexoid, RowExclusiveLock); - indexStruct = indexRel->rd_index; - - /* - * We're only interested if it is unique, valid, contains no - * expressions, and is not partial. - */ - if (indexStruct->indisunique && - IndexIsValid(indexStruct) && - RelationGetIndexExpressions(indexRel) == NIL && - RelationGetIndexPredicate(indexRel) == NIL) + if (is_usable_unique_index(indexRel)) { + Form_pg_index indexStruct = indexRel->rd_index; int numatts = indexStruct->indnatts; + oidvector *indclass; + Datum indclassDatum; + bool isnull; int i; + /* Must get indclass the hard way. */ + indclassDatum = SysCacheGetAttr(INDEXRELID, + indexRel->rd_indextuple, + Anum_pg_index_indclass, + &isnull); + Assert(!isnull); + indclass = (oidvector *) DatumGetPointer(indclassDatum); + /* Add quals for all columns from this index. */ for (i = 0; i < numatts; i++) { int attnum = indexStruct->indkey.values[i]; + Oid opclass = indclass->values[i]; Form_pg_attribute attr = TupleDescAttr(tupdesc, attnum - 1); - Oid type; + Oid attrtype = attr->atttypid; + HeapTuple cla_ht; + Form_pg_opclass cla_tup; + Oid opfamily; + Oid opcintype; Oid op; - const char *colname; + const char *leftop; + const char *rightop; /* - * Only include the column once regardless of how many times - * it shows up in how many indexes. + * Identify the equality operator associated with this index + * column. First we need to look up the column's opclass. */ - if (usedForQual[attnum - 1]) + cla_ht = SearchSysCache1(CLAOID, ObjectIdGetDatum(opclass)); + if (!HeapTupleIsValid(cla_ht)) + elog(ERROR, "cache lookup failed for opclass %u", opclass); + cla_tup = (Form_pg_opclass) GETSTRUCT(cla_ht); + Assert(cla_tup->opcmethod == BTREE_AM_OID); + opfamily = cla_tup->opcfamily; + opcintype = cla_tup->opcintype; + ReleaseSysCache(cla_ht); + + op = get_opfamily_member(opfamily, opcintype, opcintype, + BTEqualStrategyNumber); + if (!OidIsValid(op)) + elog(ERROR, "missing operator %d(%u,%u) in opfamily %u", + BTEqualStrategyNumber, opcintype, opcintype, opfamily); + + /* + * If we find the same column with the same equality semantics + * in more than one index, we only need to emit the equality + * clause once. + * + * Since we only remember the last equality operator, this + * code could be fooled into emitting duplicate clauses given + * multiple indexes with several different opclasses ... but + * that's so unlikely it doesn't seem worth spending extra + * code to avoid. + */ + if (opUsedForQual[attnum - 1] == op) continue; - usedForQual[attnum - 1] = true; + opUsedForQual[attnum - 1] = op; /* * Actually add the qual, ANDed with any others. @@ -746,12 +749,15 @@ refresh_by_match_merge(Oid matviewOid, Oid tempOid, Oid relowner, if (foundUniqueIndex) appendStringInfoString(&querybuf, " AND "); - colname = quote_identifier(NameStr(attr->attname)); - appendStringInfo(&querybuf, "newdata.%s ", colname); - type = attnumTypeId(matviewRel, attnum); - op = lookup_type_cache(type, TYPECACHE_EQ_OPR)->eq_opr; - mv_GenerateOper(&querybuf, op); - appendStringInfo(&querybuf, " mv.%s", colname); + leftop = quote_qualified_identifier("newdata", + NameStr(attr->attname)); + rightop = quote_qualified_identifier("mv", + NameStr(attr->attname)); + + generate_operator_clause(&querybuf, + leftop, attrtype, + op, + rightop, attrtype); foundUniqueIndex = true; } @@ -764,11 +770,11 @@ refresh_by_match_merge(Oid matviewOid, Oid tempOid, Oid relowner, list_free(indexoidlist); /* - * There must be at least one unique index on the matview. + * There must be at least one usable unique index on the matview. * * ExecRefreshMatView() checks that after taking the exclusive lock on the * matview. So at least one unique index is guaranteed to exist here - * because the lock is still being held. + * because the lock is still being held; so an Assert seems sufficient. */ Assert(foundUniqueIndex); @@ -845,6 +851,51 @@ refresh_by_heap_swap(Oid matviewOid, Oid OIDNewHeap, char relpersistence) RecentXmin, ReadNextMultiXactId(), relpersistence); } +/* + * Check whether specified index is usable for match merge. + */ +static bool +is_usable_unique_index(Relation indexRel) +{ + Form_pg_index indexStruct = indexRel->rd_index; + + /* + * Must be unique, valid, immediate, non-partial, and be defined over + * plain user columns (not expressions). We also require it to be a + * btree. Even if we had any other unique index kinds, we'd not know how + * to identify the corresponding equality operator, nor could we be sure + * that the planner could implement the required FULL JOIN with non-btree + * operators. + */ + if (indexStruct->indisunique && + indexStruct->indimmediate && + indexRel->rd_rel->relam == BTREE_AM_OID && + IndexIsValid(indexStruct) && + RelationGetIndexPredicate(indexRel) == NIL && + indexStruct->indnatts > 0) + { + /* + * The point of groveling through the index columns individually is to + * reject both index expressions and system columns. Currently, + * matviews couldn't have OID columns so there's no way to create an + * index on a system column; but maybe someday that wouldn't be true, + * so let's be safe. + */ + int numatts = indexStruct->indnatts; + int i; + + for (i = 0; i < numatts; i++) + { + int attnum = indexStruct->indkey.values[i]; + + if (attnum <= 0) + return false; + } + return true; + } + return false; +} + /* * This should be used to test whether the backend is in a context where it is diff --git a/src/backend/utils/adt/ri_triggers.c b/src/backend/utils/adt/ri_triggers.c index 8faae1d069..4d7fee0ecb 100644 --- a/src/backend/utils/adt/ri_triggers.c +++ b/src/backend/utils/adt/ri_triggers.c @@ -203,7 +203,6 @@ static void ri_GenerateQual(StringInfo buf, const char *leftop, Oid leftoptype, Oid opoid, const char *rightop, Oid rightoptype); -static void ri_add_cast_to(StringInfo buf, Oid typid); static void ri_GenerateQualCollation(StringInfo buf, Oid collation); static int ri_NullCheck(HeapTuple tup, const RI_ConstraintInfo *riinfo, bool rel_is_pk); @@ -2134,13 +2133,10 @@ quoteRelationName(char *buffer, Relation rel) /* * ri_GenerateQual --- generate a WHERE clause equating two variables * - * The idea is to append " sep leftop op rightop" to buf. The complexity - * comes from needing to be sure that the parser will select the desired - * operator. We always name the operator using OPERATOR(schema.op) syntax - * (readability isn't a big priority here), so as to avoid search-path - * uncertainties. We have to emit casts too, if either input isn't already - * the input type of the operator; else we are at the mercy of the parser's - * heuristics for ambiguous-operator resolution. + * This basically appends " sep leftop op rightop" to buf, adding casts + * and schema qualification as needed to ensure that the parser will select + * the operator we specify. leftop and rightop should be parenthesized + * if they aren't variables or parameters. */ static void ri_GenerateQual(StringInfo buf, @@ -2149,60 +2145,9 @@ ri_GenerateQual(StringInfo buf, Oid opoid, const char *rightop, Oid rightoptype) { - HeapTuple opertup; - Form_pg_operator operform; - char *oprname; - char *nspname; - - opertup = SearchSysCache1(OPEROID, ObjectIdGetDatum(opoid)); - if (!HeapTupleIsValid(opertup)) - elog(ERROR, "cache lookup failed for operator %u", opoid); - operform = (Form_pg_operator) GETSTRUCT(opertup); - Assert(operform->oprkind == 'b'); - oprname = NameStr(operform->oprname); - - nspname = get_namespace_name(operform->oprnamespace); - - appendStringInfo(buf, " %s %s", sep, leftop); - if (leftoptype != operform->oprleft) - ri_add_cast_to(buf, operform->oprleft); - appendStringInfo(buf, " OPERATOR(%s.", quote_identifier(nspname)); - appendStringInfoString(buf, oprname); - appendStringInfo(buf, ") %s", rightop); - if (rightoptype != operform->oprright) - ri_add_cast_to(buf, operform->oprright); - - ReleaseSysCache(opertup); -} - -/* - * Add a cast specification to buf. We spell out the type name the hard way, - * intentionally not using format_type_be(). This is to avoid corner cases - * for CHARACTER, BIT, and perhaps other types, where specifying the type - * using SQL-standard syntax results in undesirable data truncation. By - * doing it this way we can be certain that the cast will have default (-1) - * target typmod. - */ -static void -ri_add_cast_to(StringInfo buf, Oid typid) -{ - HeapTuple typetup; - Form_pg_type typform; - char *typname; - char *nspname; - - typetup = SearchSysCache1(TYPEOID, ObjectIdGetDatum(typid)); - if (!HeapTupleIsValid(typetup)) - elog(ERROR, "cache lookup failed for type %u", typid); - typform = (Form_pg_type) GETSTRUCT(typetup); - - typname = NameStr(typform->typname); - nspname = get_namespace_name(typform->typnamespace); - - appendStringInfo(buf, "::%s.%s", - quote_identifier(nspname), quote_identifier(typname)); - - ReleaseSysCache(typetup); + appendStringInfo(buf, " %s ", sep); + generate_operator_clause(buf, leftop, leftoptype, opoid, + rightop, rightoptype); } /* diff --git a/src/backend/utils/adt/ruleutils.c b/src/backend/utils/adt/ruleutils.c index b58ee3c387..2cd54ec33f 100644 --- a/src/backend/utils/adt/ruleutils.c +++ b/src/backend/utils/adt/ruleutils.c @@ -462,6 +462,7 @@ static char *generate_function_name(Oid funcid, int nargs, bool has_variadic, bool *use_variadic_p, ParseExprKind special_exprkind); static char *generate_operator_name(Oid operid, Oid arg1, Oid arg2); +static void add_cast_to(StringInfo buf, Oid typid); static char *generate_qualified_type_name(Oid typid); static text *string_to_text(char *str); static char *flatten_reloptions(Oid relid); @@ -10849,6 +10850,85 @@ generate_operator_name(Oid operid, Oid arg1, Oid arg2) return buf.data; } +/* + * generate_operator_clause --- generate a binary-operator WHERE clause + * + * This is used for internally-generated-and-executed SQL queries, where + * precision is essential and readability is secondary. The basic + * requirement is to append "leftop op rightop" to buf, where leftop and + * rightop are given as strings and are assumed to yield types leftoptype + * and rightoptype; the operator is identified by OID. The complexity + * comes from needing to be sure that the parser will select the desired + * operator when the query is parsed. We always name the operator using + * OPERATOR(schema.op) syntax, so as to avoid search-path uncertainties. + * We have to emit casts too, if either input isn't already the input type + * of the operator; else we are at the mercy of the parser's heuristics for + * ambiguous-operator resolution. The caller must ensure that leftop and + * rightop are suitable arguments for a cast operation; it's best to insert + * parentheses if they aren't just variables or parameters. + */ +void +generate_operator_clause(StringInfo buf, + const char *leftop, Oid leftoptype, + Oid opoid, + const char *rightop, Oid rightoptype) +{ + HeapTuple opertup; + Form_pg_operator operform; + char *oprname; + char *nspname; + + opertup = SearchSysCache1(OPEROID, ObjectIdGetDatum(opoid)); + if (!HeapTupleIsValid(opertup)) + elog(ERROR, "cache lookup failed for operator %u", opoid); + operform = (Form_pg_operator) GETSTRUCT(opertup); + Assert(operform->oprkind == 'b'); + oprname = NameStr(operform->oprname); + + nspname = get_namespace_name(operform->oprnamespace); + + appendStringInfoString(buf, leftop); + if (leftoptype != operform->oprleft) + add_cast_to(buf, operform->oprleft); + appendStringInfo(buf, " OPERATOR(%s.", quote_identifier(nspname)); + appendStringInfoString(buf, oprname); + appendStringInfo(buf, ") %s", rightop); + if (rightoptype != operform->oprright) + add_cast_to(buf, operform->oprright); + + ReleaseSysCache(opertup); +} + +/* + * Add a cast specification to buf. We spell out the type name the hard way, + * intentionally not using format_type_be(). This is to avoid corner cases + * for CHARACTER, BIT, and perhaps other types, where specifying the type + * using SQL-standard syntax results in undesirable data truncation. By + * doing it this way we can be certain that the cast will have default (-1) + * target typmod. + */ +static void +add_cast_to(StringInfo buf, Oid typid) +{ + HeapTuple typetup; + Form_pg_type typform; + char *typname; + char *nspname; + + typetup = SearchSysCache1(TYPEOID, ObjectIdGetDatum(typid)); + if (!HeapTupleIsValid(typetup)) + elog(ERROR, "cache lookup failed for type %u", typid); + typform = (Form_pg_type) GETSTRUCT(typetup); + + typname = NameStr(typform->typname); + nspname = get_namespace_name(typform->typnamespace); + + appendStringInfo(buf, "::%s.%s", + quote_identifier(nspname), quote_identifier(typname)); + + ReleaseSysCache(typetup); +} + /* * generate_qualified_type_name * Compute the name to display for a type specified by OID diff --git a/src/include/utils/builtins.h b/src/include/utils/builtins.h index 01e76aea09..d0416e90fc 100644 --- a/src/include/utils/builtins.h +++ b/src/include/utils/builtins.h @@ -78,6 +78,10 @@ extern bool quote_all_identifiers; extern const char *quote_identifier(const char *ident); extern char *quote_qualified_identifier(const char *qualifier, const char *ident); +extern void generate_operator_clause(fmStringInfo buf, + const char *leftop, Oid leftoptype, + Oid opoid, + const char *rightop, Oid rightoptype); /* varchar.c */ extern int bpchartruelen(char *s, int len); -- 2.40.0