#include "postgres_fe.h"
+#include "access/visibilitymap.h"
#include "pg_upgrade.h"
+#include "storage/bufpage.h"
+#include "storage/checksum.h"
+#include "storage/checksum_impl.h"
+#include <sys/stat.h>
#include <fcntl.h>
+#define BITS_PER_HEAPBLOCK_OLD 1
#ifndef WIN32
#endif
+/*
+ * rewriteVisibilityMap()
+ *
+ * In versions of PostgreSQL prior to catversion 201603011, PostgreSQL's
+ * visibility map included one bit per heap page; it now includes two.
+ * When upgrading a cluster from before that time to a current PostgreSQL
+ * version, we could refuse to copy visibility maps from the old cluster
+ * to the new cluster; the next VACUUM would recreate them, but at the
+ * price of scanning the entire table. So, instead, we rewrite the old
+ * visibility maps in the new format. That way, the all-visible bit
+ * remains set for the pages for which it was set previously. The
+ * all-frozen bit is never set by this conversion; we leave that to
+ * VACUUM.
+ */
+const char *
+rewriteVisibilityMap(const char *fromfile, const char *tofile, bool force)
+{
+ int src_fd = 0;
+ int dst_fd = 0;
+ char buffer[BLCKSZ];
+ ssize_t bytesRead;
+ ssize_t src_filesize;
+ int rewriteVmBytesPerPage;
+ BlockNumber new_blkno = 0;
+ struct stat statbuf;
+
+ /* Compute we need how many old page bytes to rewrite a new page */
+ rewriteVmBytesPerPage = (BLCKSZ - SizeOfPageHeaderData) / 2;
+
+ if ((fromfile == NULL) || (tofile == NULL))
+ return "Invalid old file or new file";
+
+ if ((src_fd = open(fromfile, O_RDONLY, 0)) < 0)
+ return getErrorText();
+
+ if (fstat(src_fd, &statbuf) != 0)
+ {
+ close(src_fd);
+ return getErrorText();
+ }
+
+ if ((dst_fd = open(tofile, O_RDWR | O_CREAT | (force ? 0 : O_EXCL), S_IRUSR | S_IWUSR)) < 0)
+ {
+ close(src_fd);
+ return getErrorText();
+ }
+
+ /* Save old file size */
+ src_filesize = statbuf.st_size;
+
+ /*
+ * Turn each visibility map page into 2 pages one by one. Each new page
+ * has the same page header as the old one. If the last section of last
+ * page is empty, we skip it, mostly to avoid turning one-page visibility
+ * maps for small relations into two pages needlessly.
+ */
+ while ((bytesRead = read(src_fd, buffer, BLCKSZ)) == BLCKSZ)
+ {
+ char *old_cur;
+ char *old_break;
+ char *old_blkend;
+ PageHeaderData pageheader;
+ bool old_lastblk = ((BLCKSZ * (new_blkno + 1)) == src_filesize);
+
+ /* Save the page header data */
+ memcpy(&pageheader, buffer, SizeOfPageHeaderData);
+
+ /*
+ * These old_* variables point to old visibility map page. old_cur
+ * points to current position on old page. old_blkend points to end of
+ * old block. old_break points to old page break position for
+ * rewriting a new page. After wrote a new page, old_break proceeds
+ * rewriteVmBytesPerPage bytes.
+ */
+ old_cur = buffer + SizeOfPageHeaderData;
+ old_blkend = buffer + bytesRead;
+ old_break = old_cur + rewriteVmBytesPerPage;
+
+ while (old_blkend >= old_break)
+ {
+ char new_vmbuf[BLCKSZ];
+ char *new_cur = new_vmbuf;
+ bool empty = true;
+ bool old_lastpart;
+
+ /* Copy page header in advance */
+ memcpy(new_vmbuf, &pageheader, SizeOfPageHeaderData);
+
+ /* Rewrite the last part of the old page? */
+ old_lastpart = old_lastblk && (old_blkend == old_break);
+
+ new_cur += SizeOfPageHeaderData;
+
+ /* Process old page bytes one by one, and turn it into new page. */
+ while (old_break > old_cur)
+ {
+ uint16 new_vmbits = 0;
+ int i;
+
+ /* Generate new format bits while keeping old information */
+ for (i = 0; i < BITS_PER_BYTE; i++)
+ {
+ uint8 byte = *(uint8 *) old_cur;
+
+ if (byte & (1 << (BITS_PER_HEAPBLOCK_OLD * i)))
+ {
+ empty = false;
+ new_vmbits |= 1 << (BITS_PER_HEAPBLOCK * i);
+ }
+ }
+
+ /* Copy new visibility map bit to new format page */
+ memcpy(new_cur, &new_vmbits, BITS_PER_HEAPBLOCK);
+
+ old_cur += BITS_PER_HEAPBLOCK_OLD;
+ new_cur += BITS_PER_HEAPBLOCK;
+ }
+
+ /* If the last part of the old page is empty, skip to write it */
+ if (old_lastpart && empty)
+ break;
+
+ /* Set new checksum for a visibility map page (if enabled) */
+ if (old_cluster.controldata.data_checksum_version != 0 &&
+ new_cluster.controldata.data_checksum_version != 0)
+ ((PageHeader) new_vmbuf)->pd_checksum =
+ pg_checksum_page(new_vmbuf, new_blkno);
+
+ if (write(dst_fd, new_vmbuf, BLCKSZ) != BLCKSZ)
+ {
+ close(dst_fd);
+ close(src_fd);
+ return getErrorText();
+ }
+
+ old_break += rewriteVmBytesPerPage;
+ new_blkno++;
+ }
+ }
+
+ /* Close files */
+ close(dst_fd);
+ close(src_fd);
+
+ return NULL;
+
+}
+
void
check_hard_link(void)
{
*/
#define VISIBILITY_MAP_CRASHSAFE_CAT_VER 201107031
+/*
+ * The format of visibility map is changed with this 9.6 commit,
+ */
+#define VISIBILITY_MAP_FROZEN_BIT_CAT_VER 201603011
/*
* pg_multixact format changed in 9.3 commit 0ac5ad5134f2769ccbaefec73844f85,
* ("Improve concurrency of foreign key locking") which also updated catalog
const char *copyFile(const char *src, const char *dst, bool force);
const char *linkFile(const char *src, const char *dst);
+const char *rewriteVisibilityMap(const char *fromfile, const char *tofile,
+ bool force);
void check_hard_link(void);
FILE *fopen_priv(const char *path, const char *mode);
#include "pg_upgrade.h"
+#include <sys/stat.h>
#include "catalog/pg_class.h"
#include "access/transam.h"
static void transfer_single_new_db(FileNameMap *maps, int size, char *old_tablespace);
-static void transfer_relfile(FileNameMap *map, const char *suffix);
+static void transfer_relfile(FileNameMap *map, const char *suffix, bool vm_must_add_frozenbit);
/*
{
int mapnum;
bool vm_crashsafe_match = true;
+ bool vm_must_add_frozenbit = false;
/*
* Do the old and new cluster disagree on the crash-safetiness of the vm
new_cluster.controldata.cat_ver >= VISIBILITY_MAP_CRASHSAFE_CAT_VER)
vm_crashsafe_match = false;
+ /*
+ * Do we need to rewrite visibilitymap?
+ */
+ if (old_cluster.controldata.cat_ver < VISIBILITY_MAP_FROZEN_BIT_CAT_VER &&
+ new_cluster.controldata.cat_ver >= VISIBILITY_MAP_FROZEN_BIT_CAT_VER)
+ vm_must_add_frozenbit = true;
+
for (mapnum = 0; mapnum < size; mapnum++)
{
if (old_tablespace == NULL ||
strcmp(maps[mapnum].old_tablespace, old_tablespace) == 0)
{
/* transfer primary file */
- transfer_relfile(&maps[mapnum], "");
+ transfer_relfile(&maps[mapnum], "", vm_must_add_frozenbit);
/* fsm/vm files added in PG 8.4 */
if (GET_MAJOR_VERSION(old_cluster.major_version) >= 804)
/*
* Copy/link any fsm and vm files, if they exist
*/
- transfer_relfile(&maps[mapnum], "_fsm");
+ transfer_relfile(&maps[mapnum], "_fsm", vm_must_add_frozenbit);
if (vm_crashsafe_match)
- transfer_relfile(&maps[mapnum], "_vm");
+ transfer_relfile(&maps[mapnum], "_vm", vm_must_add_frozenbit);
}
}
}
/*
* transfer_relfile()
*
- * Copy or link file from old cluster to new one.
+ * Copy or link file from old cluster to new one. If vm_must_add_frozenbit
+ * is true, visibility map forks are converted and rewritten, even in link
+ * mode.
*/
static void
-transfer_relfile(FileNameMap *map, const char *type_suffix)
+transfer_relfile(FileNameMap *map, const char *type_suffix, bool vm_must_add_frozenbit)
{
const char *msg;
char old_file[MAXPGPATH];
char new_file[MAXPGPATH];
- int fd;
int segno;
char extent_suffix[65];
+ struct stat statbuf;
/*
* Now copy/link any related segments as well. Remember, PG breaks large
if (type_suffix[0] != '\0' || segno != 0)
{
/* Did file open fail? */
- if ((fd = open(old_file, O_RDONLY, 0)) == -1)
+ if (stat(old_file, &statbuf) != 0)
{
/* File does not exist? That's OK, just return */
if (errno == ENOENT)
map->nspname, map->relname, old_file, new_file,
getErrorText());
}
- close(fd);
+
+ /* If file is empty, just return */
+ if (statbuf.st_size == 0)
+ return;
}
unlink(new_file);
{
pg_log(PG_VERBOSE, "copying \"%s\" to \"%s\"\n", old_file, new_file);
- if ((msg = copyFile(old_file, new_file, true)) != NULL)
+ /* Rewrite visibility map if needed */
+ if (vm_must_add_frozenbit && (strcmp(type_suffix, "_vm") == 0))
+ msg = rewriteVisibilityMap(old_file, new_file, true);
+ else
+ msg = copyFile(old_file, new_file, true);
+
+ if (msg)
pg_fatal("error while copying relation \"%s.%s\" (\"%s\" to \"%s\"): %s\n",
map->nspname, map->relname, old_file, new_file, msg);
}
{
pg_log(PG_VERBOSE, "linking \"%s\" to \"%s\"\n", old_file, new_file);
- if ((msg = linkFile(old_file, new_file)) != NULL)
+ /* Rewrite visibility map if needed */
+ if (vm_must_add_frozenbit && (strcmp(type_suffix, "_vm") == 0))
+ msg = rewriteVisibilityMap(old_file, new_file, true);
+ else
+ msg = linkFile(old_file, new_file);
+
+ if (msg)
pg_fatal("error while creating link for relation \"%s.%s\" (\"%s\" to \"%s\"): %s\n",
map->nspname, map->relname, old_file, new_file, msg);
}