From 258cef12540fa1cb244881a0f019cefd698c809e Mon Sep 17 00:00:00 2001 From: Robert Haas Date: Tue, 11 Apr 2017 09:08:36 -0400 Subject: [PATCH] Fix possibile deadlock when dropping partitions. heap_drop_with_catalog and RangeVarCallbackForDropRelation should lock the parent before locking the target relation. Amit Langote Discussion: http://postgr.es/m/29588799-a8ce-b0a2-3dae-f39ff6d35922@lab.ntt.co.jp --- src/backend/catalog/heap.c | 30 +++++++++++++++++------------- src/backend/commands/tablecmds.c | 30 ++++++++++++++++++++++++++++++ 2 files changed, 47 insertions(+), 13 deletions(-) diff --git a/src/backend/catalog/heap.c b/src/backend/catalog/heap.c index a264f1b9eb..4a5f545dc6 100644 --- a/src/backend/catalog/heap.c +++ b/src/backend/catalog/heap.c @@ -1759,29 +1759,33 @@ void heap_drop_with_catalog(Oid relid) { Relation rel; + HeapTuple tuple; Oid parentOid; Relation parent = NULL; /* - * Open and lock the relation. + * To drop a partition safely, we must grab exclusive lock on its parent, + * because another backend might be about to execute a query on the parent + * table. If it relies on previously cached partition descriptor, then + * it could attempt to access the just-dropped relation as its partition. + * We must therefore take a table lock strong enough to prevent all + * queries on the table from proceeding until we commit and send out a + * shared-cache-inval notice that will make them update their index lists. */ - rel = relation_open(relid, AccessExclusiveLock); - - /* - * If the relation is a partition, we must grab exclusive lock on its - * parent because we need to update its partition descriptor. We must take - * a table lock strong enough to prevent all queries on the parent from - * proceeding until we commit and send out a shared-cache-inval notice - * that will make them update their partition descriptor. Sometimes, doing - * this is cycles spent uselessly, especially if the parent will be - * dropped as part of the same command anyway. - */ - if (rel->rd_rel->relispartition) + tuple = SearchSysCache1(RELOID, ObjectIdGetDatum(relid)); + if (((Form_pg_class) GETSTRUCT(tuple))->relispartition) { parentOid = get_partition_parent(relid); parent = heap_open(parentOid, AccessExclusiveLock); } + ReleaseSysCache(tuple); + + /* + * Open and lock the relation. + */ + rel = relation_open(relid, AccessExclusiveLock); + /* * There can no longer be anyone *else* touching the relation, but we * might still have open queries or cursors, or pending trigger events, in diff --git a/src/backend/commands/tablecmds.c b/src/backend/commands/tablecmds.c index abb262b851..a6a9f54b13 100644 --- a/src/backend/commands/tablecmds.c +++ b/src/backend/commands/tablecmds.c @@ -271,6 +271,7 @@ struct DropRelationCallbackState { char relkind; Oid heapOid; + Oid partParentOid; bool concurrent; }; @@ -1049,6 +1050,7 @@ RemoveRelations(DropStmt *drop) /* Look up the appropriate relation using namespace search. */ state.relkind = relkind; state.heapOid = InvalidOid; + state.partParentOid = InvalidOid; state.concurrent = drop->concurrent; relOid = RangeVarGetRelidExtended(rel, lockmode, true, false, @@ -1078,6 +1080,8 @@ RemoveRelations(DropStmt *drop) /* * Before acquiring a table lock, check whether we have sufficient rights. * In the case of DROP INDEX, also try to lock the table before the index. + * Also, if the table to be dropped is a partition, we try to lock the parent + * first. */ static void RangeVarCallbackForDropRelation(const RangeVar *rel, Oid relOid, Oid oldRelOid, @@ -1087,6 +1091,7 @@ RangeVarCallbackForDropRelation(const RangeVar *rel, Oid relOid, Oid oldRelOid, struct DropRelationCallbackState *state; char relkind; char expected_relkind; + bool is_partition; Form_pg_class classform; LOCKMODE heap_lockmode; @@ -1106,6 +1111,17 @@ RangeVarCallbackForDropRelation(const RangeVar *rel, Oid relOid, Oid oldRelOid, state->heapOid = InvalidOid; } + /* + * Similarly, if we previously locked some other partition's heap, and + * the name we're looking up no longer refers to that relation, release + * the now-useless lock. + */ + if (relOid != oldRelOid && OidIsValid(state->partParentOid)) + { + UnlockRelationOid(state->partParentOid, AccessExclusiveLock); + state->partParentOid = InvalidOid; + } + /* Didn't find a relation, so no need for locking or permission checks. */ if (!OidIsValid(relOid)) return; @@ -1114,6 +1130,7 @@ RangeVarCallbackForDropRelation(const RangeVar *rel, Oid relOid, Oid oldRelOid, if (!HeapTupleIsValid(tuple)) return; /* concurrently dropped, so nothing to do */ classform = (Form_pg_class) GETSTRUCT(tuple); + is_partition = classform->relispartition; /* * Both RELKIND_RELATION and RELKIND_PARTITIONED_TABLE are OBJECT_TABLE, @@ -1157,6 +1174,19 @@ RangeVarCallbackForDropRelation(const RangeVar *rel, Oid relOid, Oid oldRelOid, if (OidIsValid(state->heapOid)) LockRelationOid(state->heapOid, heap_lockmode); } + + /* + * Similarly, if the relation is a partition, we must acquire lock on its + * parent before locking the partition. That's because queries lock the + * parent before its partitions, so we risk deadlock it we do it the other + * way around. + */ + if (is_partition && relOid != oldRelOid) + { + state->partParentOid = get_partition_parent(relOid); + if (OidIsValid(state->partParentOid)) + LockRelationOid(state->partParentOid, AccessExclusiveLock); + } } /* -- 2.40.0