+
+ /*
+ * The fsync table could contain requests to fsync segments
+ * that have been deleted (unlinked) by the time we get to
+ * them. Rather than just hoping an ENOENT (or EACCES on
+ * Windows) error can be ignored, what we do on error is
+ * absorb pending requests and then retry. Since mdunlink()
+ * queues a "cancel" message before actually unlinking, the
+ * fsync request is guaranteed to be marked canceled after the
+ * absorb if it really was this case. DROP DATABASE likewise
+ * has to tell us to forget fsync requests before it starts
+ * deletions.
+ */
+ for (failures = 0;; failures++) /* loop exits at "break" */
+ {
+ SMgrRelation reln;
+ MdfdVec *seg;
+ char *path;
+ int save_errno;
+
+ /*
+ * Find or create an smgr hash entry for this relation.
+ * This may seem a bit unclean -- md calling smgr? But
+ * it's really the best solution. It ensures that the
+ * open file reference isn't permanently leaked if we get
+ * an error here. (You may say "but an unreferenced
+ * SMgrRelation is still a leak!" Not really, because the
+ * only case in which a checkpoint is done by a process
+ * that isn't about to shut down is in the checkpointer,
+ * and it will periodically do smgrcloseall(). This fact
+ * justifies our not closing the reln in the success path
+ * either, which is a good thing since in non-checkpointer
+ * cases we couldn't safely do that.)
+ */
+ reln = smgropen(entry->rnode, InvalidBackendId);
+
+ /* Attempt to open and fsync the target segment */
+ seg = _mdfd_getseg(reln, forknum,
+ (BlockNumber) segno * (BlockNumber) RELSEG_SIZE,
+ false,
+ EXTENSION_RETURN_NULL
+ | EXTENSION_DONT_CHECK_SIZE);
+
+ INSTR_TIME_SET_CURRENT(sync_start);
+
+ if (seg != NULL &&
+ FileSync(seg->mdfd_vfd, WAIT_EVENT_DATA_FILE_SYNC) >= 0)
+ {
+ /* Success; update statistics about sync timing */
+ INSTR_TIME_SET_CURRENT(sync_end);
+ sync_diff = sync_end;
+ INSTR_TIME_SUBTRACT(sync_diff, sync_start);
+ elapsed = INSTR_TIME_GET_MICROSEC(sync_diff);
+ if (elapsed > longest)
+ longest = elapsed;
+ total_elapsed += elapsed;
+ processed++;
+ requests = bms_del_member(requests, segno);
+ if (log_checkpoints)
+ elog(DEBUG1, "checkpoint sync: number=%d file=%s time=%.3f msec",
+ processed,
+ FilePathName(seg->mdfd_vfd),
+ (double) elapsed / 1000);
+
+ break; /* out of retry loop */
+ }
+
+ /* Compute file name for use in message */
+ save_errno = errno;
+ path = _mdfd_segpath(reln, forknum, (BlockNumber) segno);
+ errno = save_errno;
+
+ /*
+ * It is possible that the relation has been dropped or
+ * truncated since the fsync request was entered.
+ * Therefore, allow ENOENT, but only if we didn't fail
+ * already on this file. This applies both for
+ * _mdfd_getseg() and for FileSync, since fd.c might have
+ * closed the file behind our back.
+ *
+ * XXX is there any point in allowing more than one retry?
+ * Don't see one at the moment, but easy to change the
+ * test here if so.
+ */
+ if (!FILE_POSSIBLY_DELETED(errno) ||
+ failures > 0)
+ {
+ Bitmapset *new_requests;
+
+ /*
+ * We need to merge these unsatisfied requests with
+ * any others that have arrived since we started.
+ */
+ new_requests = entry->requests[forknum];
+ entry->requests[forknum] =
+ bms_join(new_requests, requests);
+
+ errno = save_errno;
+ ereport(data_sync_elevel(ERROR),
+ (errcode_for_file_access(),
+ errmsg("could not fsync file \"%s\": %m",
+ path)));
+ }
+ else
+ ereport(DEBUG1,
+ (errcode_for_file_access(),
+ errmsg("could not fsync file \"%s\" but retrying: %m",
+ path)));
+ pfree(path);
+
+ /*
+ * Absorb incoming requests and check to see if a cancel
+ * arrived for this relation fork.
+ */
+ AbsorbFsyncRequests();
+ absorb_counter = FSYNCS_PER_ABSORB; /* might as well... */
+
+ if (entry->canceled[forknum])
+ break;
+ } /* end retry loop */