]> granicus.if.org Git - postgresql/commitdiff
Use whitelist to choose files scanned with pg_verify_checksums
authorMichael Paquier <michael@paquier.xyz>
Fri, 19 Oct 2018 13:44:12 +0000 (22:44 +0900)
committerMichael Paquier <michael@paquier.xyz>
Fri, 19 Oct 2018 13:44:12 +0000 (22:44 +0900)
The original implementation of pg_verify_checksums used a blacklist to
decide which files should be skipped for scanning as they do not include
data checksums, like pg_internal.init or pg_control.  However, this
missed two things:
- Some files are created within builds of EXEC_BACKEND and these were
not listed, causing failures on Windows.
- Extensions may create custom files in data folders, causing the tool
to equally fail.

This commit switches to a whitelist-like method instead by checking if
the files to scan are authorized relation files.  This is close to a
reverse-engineering of what is defined in relpath.c in charge of
building the relation paths, and we could consider refactoring what this
patch does so as all routines are in a single place.  This is left for
later.

This is based on a suggestion from Andres Freund.  TAP tests are updated
so as multiple file patterns are tested.  The bug has been spotted by
various buildfarm members as a result of b34e84f which has introduced
the TAP tests of pg_verify_checksums.

Author: Michael Paquier
Reviewed-by: Andrew Dunstan, Michael Banck
Discussion: https://postgr.es/m/20181012005614.GC26424@paquier.xyz
Backpatch-through: 11

src/bin/pg_verify_checksums/pg_verify_checksums.c
src/bin/pg_verify_checksums/t/002_actions.pl

index 1bc020ab6cab1359e78c4f734bc76c240ecadd32..f0e09bea20212fb389fa32e92e591e7b6fff6600 100644 (file)
@@ -15,6 +15,7 @@
 
 #include "catalog/pg_control.h"
 #include "common/controldata_utils.h"
+#include "common/relpath.h"
 #include "getopt_long.h"
 #include "pg_getopt.h"
 #include "storage/bufpage.h"
@@ -49,27 +50,69 @@ usage(void)
        printf(_("Report bugs to <pgsql-bugs@postgresql.org>.\n"));
 }
 
-static const char *const skip[] = {
-       "pg_control",
-       "pg_filenode.map",
-       "pg_internal.init",
-       "PG_VERSION",
-       NULL,
-};
-
+/*
+ * isRelFileName
+ *
+ * Check if the given file name is authorized for checksum verification.
+ */
 static bool
-skipfile(const char *fn)
+isRelFileName(const char *fn)
 {
-       const char *const *f;
-
-       if (strcmp(fn, ".") == 0 ||
-               strcmp(fn, "..") == 0)
+       int                     pos;
+
+       /*----------
+        * Only files including data checksums are authorized for verification.
+        * This is guessed based on the file name by reverse-engineering
+        * GetRelationPath() so make sure to update both code paths if any
+        * updates are done.  The following file name formats are allowed:
+        * <digits>
+        * <digits>.<segment>
+        * <digits>_<forkname>
+        * <digits>_<forkname>.<segment>
+        *
+        * Note that temporary files, beginning with 't', are also skipped.
+        *
+        *----------
+        */
+
+       /* A non-empty string of digits should follow */
+       for (pos = 0; isdigit((unsigned char) fn[pos]); ++pos)
+               ;
+       /* leave if no digits */
+       if (pos == 0)
+               return false;
+       /* good to go if only digits */
+       if (fn[pos] == '\0')
                return true;
 
-       for (f = skip; *f; f++)
-               if (strcmp(*f, fn) == 0)
-                       return true;
-       return false;
+       /* Authorized fork files can be scanned */
+       if (fn[pos] == '_')
+       {
+               int                     forkchar = forkname_chars(&fn[pos + 1], NULL);
+
+               if (forkchar <= 0)
+                       return false;
+
+               pos += forkchar + 1;
+       }
+
+       /* Check for an optional segment number */
+       if (fn[pos] == '.')
+       {
+               int                     segchar;
+
+               for (segchar = 1; isdigit((unsigned char) fn[pos + segchar]); ++segchar)
+                       ;
+
+               if (segchar <= 1)
+                       return false;
+               pos += segchar;
+       }
+
+       /* Now this should be the end */
+       if (fn[pos] != '\0')
+               return false;
+       return true;
 }
 
 static void
@@ -146,7 +189,7 @@ scan_directory(const char *basedir, const char *subdir)
                char            fn[MAXPGPATH];
                struct stat st;
 
-               if (skipfile(de->d_name))
+               if (!isRelFileName(de->d_name))
                        continue;
 
                snprintf(fn, sizeof(fn), "%s/%s", path, de->d_name);
index dc29da09af111765be03dfb5b4e42fa268149424..a9003b14cc88857df355b5ee73f591ec551de22b 100644 (file)
@@ -5,7 +5,7 @@ use strict;
 use warnings;
 use PostgresNode;
 use TestLib;
-use Test::More tests => 12;
+use Test::More tests => 36;
 
 # Initialize node with checksums enabled.
 my $node = get_new_node('node_checksum');
@@ -17,6 +17,31 @@ command_like(['pg_controldata', $pgdata],
             qr/Data page checksum version:.*1/,
                 'checksums enabled in control file');
 
+# Add set of dummy files with some contents.  These should not be scanned
+# by the tool.
+append_to_file "$pgdata/global/123.", "foo";
+append_to_file "$pgdata/global/123_", "foo";
+append_to_file "$pgdata/global/123_.", "foo";
+append_to_file "$pgdata/global/123.12t", "foo";
+append_to_file "$pgdata/global/foo", "foo2";
+append_to_file "$pgdata/global/t123", "bar";
+append_to_file "$pgdata/global/123a", "bar2";
+append_to_file "$pgdata/global/.123", "foobar";
+append_to_file "$pgdata/global/_fsm", "foobar2";
+append_to_file "$pgdata/global/_init", "foobar3";
+append_to_file "$pgdata/global/_vm.123", "foohoge";
+append_to_file "$pgdata/global/123_vm.123t", "foohoge2";
+
+# Those are correct but empty files, so they should pass through.
+append_to_file "$pgdata/global/99999", "";
+append_to_file "$pgdata/global/99999.123", "";
+append_to_file "$pgdata/global/99999_fsm", "";
+append_to_file "$pgdata/global/99999_init", "";
+append_to_file "$pgdata/global/99999_vm", "";
+append_to_file "$pgdata/global/99999_init.123", "";
+append_to_file "$pgdata/global/99999_fsm.123", "";
+append_to_file "$pgdata/global/99999_vm.123", "";
+
 # Checksums pass on a newly-created cluster
 command_ok(['pg_verify_checksums',  '-D', $pgdata],
                   "succeeds with offline cluster");
@@ -67,3 +92,32 @@ $node->command_checks_all([ 'pg_verify_checksums', '-D', $pgdata, '-r',
                                                  [qr/Bad checksums:.*1/],
                                                  [qr/checksum verification failed/],
                                                  'fails for corrupted data on single relfilenode');
+
+# Utility routine to check that pg_verify_checksums is able to detect
+# correctly-named relation files filled with some corrupted data.
+sub fail_corrupt
+{
+       my $node = shift;
+       my $file = shift;
+       my $pgdata = $node->data_dir;
+
+       # Create the file with some dummy data in it.
+       append_to_file "$pgdata/global/$file", "foo";
+
+       $node->command_checks_all([ 'pg_verify_checksums', '-D', $pgdata],
+                                                 1,
+                                                 [qr/^$/],
+                                                 [qr/could not read block/],
+                                                 "fails for corrupted data in $file");
+}
+
+# Authorized relation files filled with corrupted data cause the
+# checksum checks to fail.
+fail_corrupt($node, "99999");
+fail_corrupt($node, "99999.123");
+fail_corrupt($node, "99999_fsm");
+fail_corrupt($node, "99999_init");
+fail_corrupt($node, "99999_vm");
+fail_corrupt($node, "99999_init.123");
+fail_corrupt($node, "99999_fsm.123");
+fail_corrupt($node, "99999_vm.123");