From 279628a0a7cf582f7dfb68e25b7b76183dd8ff2f Mon Sep 17 00:00:00 2001 From: Alvaro Herrera Date: Thu, 17 Jan 2013 15:55:10 -0300 Subject: [PATCH] Accelerate end-of-transaction dropping of relations MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit When relations are dropped, at end of transaction we need to remove the files and clean the buffer pool of buffers containing pages of those relations. Previously we would scan the buffer pool once per relation to clean up buffers. When there are many relations to drop, the repeated scans make this process slow; so we now instead pass a list of relations to drop and scan the pool once, checking each buffer against the passed list. When the number of relations is larger than a threshold (which as of this patch is being set to 20 relations) we sort the array before starting, and bsearch the array; when it's smaller, we simply scan the array linearly each time, because that's faster. The exact optimal threshold value depends on many factors, but the difference is not likely to be significant enough to justify making it user-settable. This has been measured to be a significant win (a 15x win when dropping 100,000 relations; an extreme case, but reportedly a real one). Author: Tomas Vondra, some tweaks by me Reviewed by: Robert Haas, Shigeru Hanada, Andres Freund, Álvaro Herrera --- src/backend/catalog/storage.c | 26 ++++++- src/backend/storage/buffer/bufmgr.c | 109 +++++++++++++++++++++++++--- src/backend/storage/smgr/smgr.c | 82 ++++++++++++++++++++- src/include/storage/bufmgr.h | 2 +- src/include/storage/smgr.h | 1 + 5 files changed, 206 insertions(+), 14 deletions(-) diff --git a/src/backend/catalog/storage.c b/src/backend/catalog/storage.c index 95eda7af47..c43bebce85 100644 --- a/src/backend/catalog/storage.c +++ b/src/backend/catalog/storage.c @@ -312,6 +312,10 @@ smgrDoPendingDeletes(bool isCommit) PendingRelDelete *pending; PendingRelDelete *prev; PendingRelDelete *next; + int nrels = 0, + i = 0, + maxrels = 8; + SMgrRelation *srels = palloc(maxrels * sizeof(SMgrRelation)); prev = NULL; for (pending = pendingDeletes; pending != NULL; pending = next) @@ -335,14 +339,32 @@ smgrDoPendingDeletes(bool isCommit) SMgrRelation srel; srel = smgropen(pending->relnode, pending->backend); - smgrdounlink(srel, false); - smgrclose(srel); + + /* extend the array if needed (double the size) */ + if (maxrels <= nrels) + { + maxrels *= 2; + srels = repalloc(srels, sizeof(SMgrRelation) * maxrels); + } + + srels[nrels++] = srel; } /* must explicitly free the list entry */ pfree(pending); /* prev does not change */ } } + + if (nrels > 0) + { + smgrdounlinkall(srels, nrels, false); + + for (i = 0; i < nrels; i++) + smgrclose(srels[i]); + } + + pfree(srels); + } /* diff --git a/src/backend/storage/buffer/bufmgr.c b/src/backend/storage/buffer/bufmgr.c index 03ed41dc15..13b80aefc5 100644 --- a/src/backend/storage/buffer/bufmgr.c +++ b/src/backend/storage/buffer/bufmgr.c @@ -62,6 +62,7 @@ #define BUF_WRITTEN 0x01 #define BUF_REUSABLE 0x02 +#define DROP_RELS_BSEARCH_THRESHOLD 20 /* GUC variables */ bool zero_damaged_pages = false; @@ -107,6 +108,7 @@ static volatile BufferDesc *BufferAlloc(SMgrRelation smgr, bool *foundPtr); static void FlushBuffer(volatile BufferDesc *buf, SMgrRelation reln); static void AtProcExit_Buffers(int code, Datum arg); +static int rnode_comparator(const void *p1, const void *p2); /* @@ -2086,43 +2088,103 @@ DropRelFileNodeBuffers(RelFileNodeBackend rnode, ForkNumber forkNum, } /* --------------------------------------------------------------------- - * DropRelFileNodeAllBuffers + * DropRelFileNodesAllBuffers * * This function removes from the buffer pool all the pages of all - * forks of the specified relation. It's equivalent to calling - * DropRelFileNodeBuffers once per fork with firstDelBlock = 0. + * forks of the specified relations. It's equivalent to calling + * DropRelFileNodeBuffers once per fork per relation with + * firstDelBlock = 0. * -------------------------------------------------------------------- */ void -DropRelFileNodeAllBuffers(RelFileNodeBackend rnode) +DropRelFileNodesAllBuffers(RelFileNodeBackend *rnodes, int nnodes) { - int i; + int i, + n = 0; + RelFileNode *nodes; + bool use_bsearch; + + if (nnodes == 0) + return; + + nodes = palloc(sizeof(RelFileNode) * nnodes); /* non-local relations */ /* If it's a local relation, it's localbuf.c's problem. */ - if (RelFileNodeBackendIsTemp(rnode)) + for (i = 0; i < nnodes; i++) { - if (rnode.backend == MyBackendId) - DropRelFileNodeAllLocalBuffers(rnode.node); + if (RelFileNodeBackendIsTemp(rnodes[i])) + { + if (rnodes[i].backend == MyBackendId) + DropRelFileNodeAllLocalBuffers(rnodes[i].node); + } + else + nodes[n++] = rnodes[i].node; + } + + /* + * If there are no non-local relations, then we're done. Release the memory + * and return. + */ + if (n == 0) + { + pfree(nodes); return; } + /* + * For low number of relations to drop just use a simple walk through, to + * save the bsearch overhead. The threshold to use is rather a guess than a + * exactly determined value, as it depends on many factors (CPU and RAM + * speeds, amount of shared buffers etc.). + */ + use_bsearch = n > DROP_RELS_BSEARCH_THRESHOLD; + + /* sort the list of rnodes if necessary */ + if (use_bsearch) + pg_qsort(nodes, n, sizeof(RelFileNode), rnode_comparator); + for (i = 0; i < NBuffers; i++) { + RelFileNode *rnode = NULL; volatile BufferDesc *bufHdr = &BufferDescriptors[i]; /* * As in DropRelFileNodeBuffers, an unlocked precheck should be safe * and saves some cycles. */ - if (!RelFileNodeEquals(bufHdr->tag.rnode, rnode.node)) + + if (!use_bsearch) + { + int j; + + for (j = 0; j < n; j++) + { + if (RelFileNodeEquals(bufHdr->tag.rnode, nodes[j])) + { + rnode = &nodes[j]; + break; + } + } + } + else + { + rnode = bsearch((const void *) &(bufHdr->tag.rnode), + nodes, n, sizeof(RelFileNode), + rnode_comparator); + } + + /* buffer doesn't belong to any of the given relfilenodes; skip it */ + if (rnode == NULL) continue; LockBufHdr(bufHdr); - if (RelFileNodeEquals(bufHdr->tag.rnode, rnode.node)) + if (RelFileNodeEquals(bufHdr->tag.rnode, (*rnode))) InvalidateBuffer(bufHdr); /* releases spinlock */ else UnlockBufHdr(bufHdr); } + + pfree(nodes); } /* --------------------------------------------------------------------- @@ -2953,3 +3015,30 @@ local_buffer_write_error_callback(void *arg) pfree(path); } } + +/* + * RelFileNode qsort/bsearch comparator; see RelFileNodeEquals. + */ +static int +rnode_comparator(const void *p1, const void *p2) +{ + RelFileNode n1 = *(RelFileNode *) p1; + RelFileNode n2 = *(RelFileNode *) p2; + + if (n1.relNode < n2.relNode) + return -1; + else if (n1.relNode > n2.relNode) + return 1; + + if (n1.dbNode < n2.dbNode) + return -1; + else if (n1.dbNode > n2.dbNode) + return 1; + + if (n1.spcNode < n2.spcNode) + return -1; + else if (n1.spcNode > n2.spcNode) + return 1; + else + return 0; +} diff --git a/src/backend/storage/smgr/smgr.c b/src/backend/storage/smgr/smgr.c index 925238cd89..3aa6325481 100644 --- a/src/backend/storage/smgr/smgr.c +++ b/src/backend/storage/smgr/smgr.c @@ -390,7 +390,7 @@ smgrdounlink(SMgrRelation reln, bool isRedo) * Get rid of any remaining buffers for the relation. bufmgr will just * drop them without bothering to write the contents. */ - DropRelFileNodeAllBuffers(rnode); + DropRelFileNodesAllBuffers(&rnode, 1); /* * It'd be nice to tell the stats collector to forget it immediately, too. @@ -419,6 +419,86 @@ smgrdounlink(SMgrRelation reln, bool isRedo) (*(smgrsw[which].smgr_unlink)) (rnode, InvalidForkNumber, isRedo); } +/* + * smgrdounlinkall() -- Immediately unlink all forks of all given relations + * + * All forks of all given relations are removed from the store. This + * should not be used during transactional operations, since it can't be + * undone. + * + * If isRedo is true, it is okay for the underlying file(s) to be gone + * already. + * + * This is equivalent to calling smgrdounlink for each relation, but it's + * significantly quicker so should be preferred when possible. + */ +void +smgrdounlinkall(SMgrRelation *rels, int nrels, bool isRedo) +{ + int i = 0; + RelFileNodeBackend *rnodes; + ForkNumber forknum; + + if (nrels == 0) + return; + + /* + * create an array which contains all relations to be dropped, and + * close each relation's forks at the smgr level while at it + */ + rnodes = palloc(sizeof(RelFileNodeBackend) * nrels); + for (i = 0; i < nrels; i++) + { + RelFileNodeBackend rnode = rels[i]->smgr_rnode; + int which = rels[i]->smgr_which; + + rnodes[i] = rnode; + + /* Close the forks at smgr level */ + for (forknum = 0; forknum <= MAX_FORKNUM; forknum++) + (*(smgrsw[which].smgr_close)) (rels[i], forknum); + } + + /* + * Get rid of any remaining buffers for the relations. bufmgr will just + * drop them without bothering to write the contents. + */ + DropRelFileNodesAllBuffers(rnodes, nrels); + + /* + * It'd be nice to tell the stats collector to forget them immediately, too. + * But we can't because we don't know the OIDs. + */ + + /* + * Send a shared-inval message to force other backends to close any + * dangling smgr references they may have for these rels. We should do + * this before starting the actual unlinking, in case we fail partway + * through that step. Note that the sinval messages will eventually come + * back to this backend, too, and thereby provide a backstop that we closed + * our own smgr rel. + */ + for (i = 0; i < nrels; i++) + CacheInvalidateSmgr(rnodes[i]); + + /* + * Delete the physical file(s). + * + * Note: smgr_unlink must treat deletion failure as a WARNING, not an + * ERROR, because we've already decided to commit or abort the current + * xact. + */ + + for (i = 0; i < nrels; i++) + { + int which = rels[i]->smgr_which; + for (forknum = 0; forknum <= MAX_FORKNUM; forknum++) + (*(smgrsw[which].smgr_unlink)) (rnodes[i], forknum, isRedo); + } + + pfree(rnodes); +} + /* * smgrdounlinkfork() -- Immediately unlink one fork of a relation. * diff --git a/src/include/storage/bufmgr.h b/src/include/storage/bufmgr.h index d34034bcb7..2ad536b745 100644 --- a/src/include/storage/bufmgr.h +++ b/src/include/storage/bufmgr.h @@ -188,7 +188,7 @@ extern void FlushRelationBuffers(Relation rel); extern void FlushDatabaseBuffers(Oid dbid); extern void DropRelFileNodeBuffers(RelFileNodeBackend rnode, ForkNumber forkNum, BlockNumber firstDelBlock); -extern void DropRelFileNodeAllBuffers(RelFileNodeBackend rnode); +extern void DropRelFileNodesAllBuffers(RelFileNodeBackend *rnodes, int nnodes); extern void DropDatabaseBuffers(Oid dbid); #define RelationGetNumberOfBlocks(reln) \ diff --git a/src/include/storage/smgr.h b/src/include/storage/smgr.h index 4547a0f518..98b6f13137 100644 --- a/src/include/storage/smgr.h +++ b/src/include/storage/smgr.h @@ -85,6 +85,7 @@ extern void smgrcloseall(void); extern void smgrclosenode(RelFileNodeBackend rnode); extern void smgrcreate(SMgrRelation reln, ForkNumber forknum, bool isRedo); extern void smgrdounlink(SMgrRelation reln, bool isRedo); +extern void smgrdounlinkall(SMgrRelation *rels, int nrels, bool isRedo); extern void smgrdounlinkfork(SMgrRelation reln, ForkNumber forknum, bool isRedo); extern void smgrextend(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, char *buffer, bool skipFsync); -- 2.40.0