From 8694cc96b52a967a49725f32be7aa77fd3b6ac25 Mon Sep 17 00:00:00 2001 From: Teodor Sigaev Date: Fri, 23 Mar 2018 19:14:12 +0300 Subject: [PATCH] Exclude unlogged tables from base backups Exclude unlogged tables from base backup entirely except init fork which marks created unlogged table. The next question is do not backup temp table but it's a story for separate patch. Author: David Steele Review by: Adam Brightwell, Masahiko Sawada Discussion: https://www.postgresql.org/message-id/flat/04791bab-cb04-ba43-e9c0-664a4c1ffb2c@pgmasters.net --- doc/src/sgml/protocol.sgml | 6 ++ src/backend/replication/basebackup.c | 64 ++++++++++++++++++++ src/backend/storage/file/reinit.c | 4 +- src/bin/pg_basebackup/t/010_pg_basebackup.pl | 42 ++++++++++++- src/include/storage/reinit.h | 2 + 5 files changed, 113 insertions(+), 5 deletions(-) diff --git a/doc/src/sgml/protocol.sgml b/doc/src/sgml/protocol.sgml index 366b684293..472bd3ef69 100644 --- a/doc/src/sgml/protocol.sgml +++ b/doc/src/sgml/protocol.sgml @@ -2568,6 +2568,12 @@ The commands accepted in replication mode are: with pgsql_tmp. + + + Unlogged relations, except for the init fork which is required to + recreate the (empty) unlogged relation on recovery. + + pg_wal, including subdirectories. If the backup is run diff --git a/src/backend/replication/basebackup.c b/src/backend/replication/basebackup.c index 185f32a5f9..eb6eb7206d 100644 --- a/src/backend/replication/basebackup.c +++ b/src/backend/replication/basebackup.c @@ -26,6 +26,7 @@ #include "nodes/pg_list.h" #include "pgtar.h" #include "pgstat.h" +#include "port.h" #include "postmaster/syslogger.h" #include "replication/basebackup.h" #include "replication/walsender.h" @@ -33,6 +34,7 @@ #include "storage/dsm_impl.h" #include "storage/fd.h" #include "storage/ipc.h" +#include "storage/reinit.h" #include "utils/builtins.h" #include "utils/ps_status.h" #include "utils/relcache.h" @@ -958,12 +960,44 @@ sendDir(const char *path, int basepathlen, bool sizeonly, List *tablespaces, char pathbuf[MAXPGPATH * 2]; struct stat statbuf; int64 size = 0; + const char *lastDir; /* Split last dir from parent path. */ + bool isDbDir = false; /* Does this directory contain relations? */ + + /* + * Determine if the current path is a database directory that can + * contain relations. + * + * Start by finding the location of the delimiter between the parent + * path and the current path. + */ + lastDir = last_dir_separator(path); + + /* Does this path look like a database path (i.e. all digits)? */ + if (lastDir != NULL && + strspn(lastDir + 1, "0123456789") == strlen(lastDir + 1)) + { + /* Part of path that contains the parent directory. */ + int parentPathLen = lastDir - path; + + /* + * Mark path as a database directory if the parent path is either + * $PGDATA/base or a tablespace version path. + */ + if (strncmp(path, "./base", parentPathLen) == 0 || + (parentPathLen >= (sizeof(TABLESPACE_VERSION_DIRECTORY) - 1) && + strncmp(lastDir - (sizeof(TABLESPACE_VERSION_DIRECTORY) - 1), + TABLESPACE_VERSION_DIRECTORY, + sizeof(TABLESPACE_VERSION_DIRECTORY) - 1) == 0)) + isDbDir = true; + } dir = AllocateDir(path); while ((de = ReadDir(dir, path)) != NULL) { int excludeIdx; bool excludeFound; + ForkNumber relForkNum; /* Type of fork if file is a relation */ + int relOidChars; /* Chars in filename that are the rel oid */ /* Skip special stuff */ if (strcmp(de->d_name, ".") == 0 || strcmp(de->d_name, "..") == 0) @@ -1007,6 +1041,36 @@ sendDir(const char *path, int basepathlen, bool sizeonly, List *tablespaces, if (excludeFound) continue; + /* Exclude all forks for unlogged tables except the init fork */ + if (isDbDir && + parse_filename_for_nontemp_relation(de->d_name, &relOidChars, + &relForkNum)) + { + /* Never exclude init forks */ + if (relForkNum != INIT_FORKNUM) + { + char initForkFile[MAXPGPATH]; + char relOid[OIDCHARS + 1]; + + /* + * If any other type of fork, check if there is an init fork + * with the same OID. If so, the file can be excluded. + */ + strncpy(relOid, de->d_name, relOidChars); + snprintf(initForkFile, sizeof(initForkFile), "%s/%s_init", + path, relOid); + + if (lstat(initForkFile, &statbuf) == 0) + { + elog(DEBUG2, + "unlogged relation file \"%s\" excluded from backup", + de->d_name); + + continue; + } + } + } + snprintf(pathbuf, sizeof(pathbuf), "%s/%s", path, de->d_name); /* Skip pg_control here to back up it last */ diff --git a/src/backend/storage/file/reinit.c b/src/backend/storage/file/reinit.c index 92363ae6ad..7b8a1253c4 100644 --- a/src/backend/storage/file/reinit.c +++ b/src/backend/storage/file/reinit.c @@ -28,8 +28,6 @@ static void ResetUnloggedRelationsInTablespaceDir(const char *tsdirname, int op); static void ResetUnloggedRelationsInDbspaceDir(const char *dbspacedirname, int op); -static bool parse_filename_for_nontemp_relation(const char *name, - int *oidchars, ForkNumber *fork); typedef struct { @@ -373,7 +371,7 @@ ResetUnloggedRelationsInDbspaceDir(const char *dbspacedirname, int op) * portion of the filename. This is critical to protect against a possible * buffer overrun. */ -static bool +bool parse_filename_for_nontemp_relation(const char *name, int *oidchars, ForkNumber *fork) { diff --git a/src/bin/pg_basebackup/t/010_pg_basebackup.pl b/src/bin/pg_basebackup/t/010_pg_basebackup.pl index cdf4f5be37..455c7fca0d 100644 --- a/src/bin/pg_basebackup/t/010_pg_basebackup.pl +++ b/src/bin/pg_basebackup/t/010_pg_basebackup.pl @@ -4,7 +4,7 @@ use Cwd; use Config; use PostgresNode; use TestLib; -use Test::More tests => 79; +use Test::More tests => 87; program_help_ok('pg_basebackup'); program_version_ok('pg_basebackup'); @@ -66,6 +66,16 @@ foreach my $filename ( # positive. $node->safe_psql('postgres', 'SELECT 1;'); +# Create an unlogged table to test that forks other than init are not copied. +$node->safe_psql('postgres', 'CREATE UNLOGGED TABLE base_unlogged (id int)'); + +my $baseUnloggedPath = $node->safe_psql('postgres', + q{select pg_relation_filepath('base_unlogged')}); + +# Make sure main and init forks exist +ok(-f "$pgdata/${baseUnloggedPath}_init", 'unlogged init fork in base'); +ok(-f "$pgdata/$baseUnloggedPath", 'unlogged main fork in base'); + $node->command_ok([ 'pg_basebackup', '-D', "$tempdir/backup", '-X', 'none' ], 'pg_basebackup runs'); ok(-f "$tempdir/backup/PG_VERSION", 'backup was created'); @@ -96,6 +106,12 @@ foreach my $filename ( ok(!-f "$tempdir/backup/$filename", "$filename not copied"); } +# Unlogged relation forks other than init should not be copied +ok(-f "$tempdir/backup/${baseUnloggedPath}_init", + 'unlogged init fork in backup'); +ok(!-f "$tempdir/backup/$baseUnloggedPath", + 'unlogged main fork not in backup'); + # Make sure existing backup_label was ignored. isnt(slurp_file("$tempdir/backup/backup_label"), 'DONOTCOPY', 'existing backup_label not copied'); @@ -147,7 +163,7 @@ unlink "$pgdata/$superlongname"; # skip on Windows. SKIP: { - skip "symlinks not supported on Windows", 11 if ($windows_os); + skip "symlinks not supported on Windows", 15 if ($windows_os); # Move pg_replslot out of $pgdata and create a symlink to it. $node->stop; @@ -177,6 +193,19 @@ SKIP: my @tblspc_tars = glob "$tempdir/tarbackup2/[0-9]*.tar"; is(scalar(@tblspc_tars), 1, 'one tablespace tar was created'); + # Create an unlogged table to test that forks other than init are not copied. + $node->safe_psql('postgres', + 'CREATE UNLOGGED TABLE tblspc1_unlogged (id int) TABLESPACE tblspc1;'); + + my $tblspc1UnloggedPath = $node->safe_psql( + 'postgres', q{select pg_relation_filepath('tblspc1_unlogged')}); + + # Make sure main and init forks exist + ok(-f "$pgdata/${tblspc1UnloggedPath}_init", + 'unlogged init fork in tablespace'); + ok(-f "$pgdata/$tblspc1UnloggedPath", + 'unlogged main fork in tablespace'); + $node->command_fails( [ 'pg_basebackup', '-D', "$tempdir/backup1", '-Fp' ], 'plain format with tablespaces fails without tablespace mapping'); @@ -195,11 +224,20 @@ SKIP: "tablespace symlink was updated"); closedir $dh; + # Unlogged relation forks other than init should not be copied + my ($tblspc1UnloggedBackupPath) = $tblspc1UnloggedPath =~ /[^\/]*\/[^\/]*\/[^\/]*$/g; + + ok(-f "$tempdir/tbackup/tblspc1/${tblspc1UnloggedBackupPath}_init", + 'unlogged init fork in tablespace backup'); + ok(!-f "$tempdir/tbackup/tblspc1/$tblspc1UnloggedBackupPath", + 'unlogged main fork not in tablespace backup'); + ok( -d "$tempdir/backup1/pg_replslot", 'pg_replslot symlink copied as directory'); mkdir "$tempdir/tbl=spc2"; $node->safe_psql('postgres', "DROP TABLE test1;"); + $node->safe_psql('postgres', "DROP TABLE tblspc1_unlogged;"); $node->safe_psql('postgres', "DROP TABLESPACE tblspc1;"); $node->safe_psql('postgres', "CREATE TABLESPACE tblspc2 LOCATION '$shorter_tempdir/tbl=spc2';"); diff --git a/src/include/storage/reinit.h b/src/include/storage/reinit.h index addda2c0ea..5c34f88b95 100644 --- a/src/include/storage/reinit.h +++ b/src/include/storage/reinit.h @@ -16,6 +16,8 @@ #define REINIT_H extern void ResetUnloggedRelations(int op); +extern bool parse_filename_for_nontemp_relation( + const char *name, int *oidchars, ForkNumber *fork); #define UNLOGGED_RELATION_CLEANUP 0x0001 #define UNLOGGED_RELATION_INIT 0x0002 -- 2.40.0