From a4d6d6a25c12ca9a19a5e7ec6f76115bced69521 Mon Sep 17 00:00:00 2001 From: Thomas Munro Date: Mon, 19 Nov 2018 13:40:50 +1300 Subject: [PATCH] Don't forget about failed fsync() requests. If fsync() fails, md.c must keep the request in its bitmap, so that future attempts will try again. Back-patch to all supported releases. Author: Thomas Munro Reviewed-by: Amit Kapila Reported-by: Andrew Gierth Discussion: https://postgr.es/m/87y3i1ia4w.fsf%40news-spur.riddles.org.uk --- src/backend/storage/smgr/md.c | 23 ++++++++++++++++++----- 1 file changed, 18 insertions(+), 5 deletions(-) diff --git a/src/backend/storage/smgr/md.c b/src/backend/storage/smgr/md.c index bfa065d9fb..91a290c357 100644 --- a/src/backend/storage/smgr/md.c +++ b/src/backend/storage/smgr/md.c @@ -1148,10 +1148,8 @@ mdsync(void) * The bitmap manipulations are slightly tricky, because we can call * AbsorbFsyncRequests() inside the loop and that could result in * bms_add_member() modifying and even re-palloc'ing the bitmapsets. - * This is okay because we unlink each bitmapset from the hashtable - * entry before scanning it. That means that any incoming fsync - * requests will be processed now if they reach the table before we - * begin to scan their fork. + * So we detach it, but if we fail we'll merge it with any new + * requests that have arrived in the meantime. */ for (forknum = 0; forknum <= MAX_FORKNUM; forknum++) { @@ -1161,7 +1159,8 @@ mdsync(void) entry->requests[forknum] = NULL; entry->canceled[forknum] = false; - while ((segno = bms_first_member(requests)) >= 0) + segno = -1; + while ((segno = bms_next_member(requests, segno)) >= 0) { int failures; @@ -1242,6 +1241,7 @@ mdsync(void) longest = elapsed; total_elapsed += elapsed; processed++; + requests = bms_del_member(requests, segno); if (log_checkpoints) elog(DEBUG1, "checkpoint sync: number=%d file=%s time=%.3f msec", processed, @@ -1270,10 +1270,23 @@ mdsync(void) */ if (!FILE_POSSIBLY_DELETED(errno) || failures > 0) + { + Bitmapset *new_requests; + + /* + * We need to merge these unsatisfied requests with + * any others that have arrived since we started. + */ + new_requests = entry->requests[forknum]; + entry->requests[forknum] = + bms_join(new_requests, requests); + + errno = save_errno; ereport(ERROR, (errcode_for_file_access(), errmsg("could not fsync file \"%s\": %m", path))); + } else ereport(DEBUG1, (errcode_for_file_access(), -- 2.40.0