]> granicus.if.org Git - postgresql/commitdiff
During pg_upgrade, conditionally skip transfer of FSMs.
authorAmit Kapila <akapila@postgresql.org>
Fri, 15 Mar 2019 02:55:57 +0000 (08:25 +0530)
committerAmit Kapila <akapila@postgresql.org>
Fri, 15 Mar 2019 02:55:57 +0000 (08:25 +0530)
If a heap on the old cluster has 4 pages or fewer, and the old cluster
was PG v11 or earlier, don't copy or link the FSM. This will shrink
space usage for installations with large numbers of small tables.

This will allow pg_upgrade to take advantage of commit b0eaa4c51b where
we have avoided creation of the free space map for small heap relations.

Author: John Naylor
Reviewed-by: Amit Kapila
Discussion: https://postgr.es/m/CACPNZCu4cOdm3uGnNEGXivy7Gz8UWyQjynDpdkPGabQ18_zK6g%40mail.gmail.com

doc/src/sgml/ref/pgupgrade.sgml
src/bin/pg_upgrade/info.c
src/bin/pg_upgrade/pg_upgrade.h
src/bin/pg_upgrade/relfilenode.c

index 7e1afaf0a50be25309a0fc5609d2ff6ef676abde..c896882dd12174ccd7b64fdacbac55155e32e434 100644 (file)
@@ -792,6 +792,13 @@ psql --username=postgres --file=script.sql postgres
    is down.
   </para>
 
+  <para>
+   In <productname>PostgreSQL</productname> 12 and later small tables by
+   default don't have a free space map, as a space optimization.  If you are
+   upgrading a pre-12 cluster, the free space maps of small tables will
+   likewise not be transferred to the new cluster.  
+  </para>
+
  </refsect1>
 
  <refsect1>
index 2f925f086c31a9eaaf3981b78b85674128ebd3c1..902bfc647e9d492b0658ea35a22568fa2d3c3a8d 100644 (file)
@@ -200,6 +200,8 @@ create_rel_filename_map(const char *old_data, const char *new_data,
 
        map->old_db_oid = old_db->db_oid;
        map->new_db_oid = new_db->db_oid;
+       map->relpages = old_rel->relpages;
+       map->relkind = old_rel->relkind;
 
        /*
         * old_relfilenode might differ from pg_class.oid (and hence
@@ -418,6 +420,7 @@ get_rel_infos(ClusterInfo *cluster, DbInfo *dbinfo)
        char       *nspname = NULL;
        char       *relname = NULL;
        char       *tablespace = NULL;
+       char       *relkind = NULL;
        int                     i_spclocation,
                                i_nspname,
                                i_relname,
@@ -425,7 +428,9 @@ get_rel_infos(ClusterInfo *cluster, DbInfo *dbinfo)
                                i_indtable,
                                i_toastheap,
                                i_relfilenode,
-                               i_reltablespace;
+                               i_reltablespace,
+                               i_relpages,
+                               i_relkind;
        char            query[QUERY_ALLOC];
        char       *last_namespace = NULL,
                           *last_tablespace = NULL;
@@ -494,7 +499,7 @@ get_rel_infos(ClusterInfo *cluster, DbInfo *dbinfo)
         */
        snprintf(query + strlen(query), sizeof(query) - strlen(query),
                         "SELECT all_rels.*, n.nspname, c.relname, "
-                        "  c.relfilenode, c.reltablespace, %s "
+                        "  c.relfilenode, c.reltablespace, c.relpages, c.relkind, %s "
                         "FROM (SELECT * FROM regular_heap "
                         "      UNION ALL "
                         "      SELECT * FROM toast_heap "
@@ -525,6 +530,8 @@ get_rel_infos(ClusterInfo *cluster, DbInfo *dbinfo)
        i_relname = PQfnumber(res, "relname");
        i_relfilenode = PQfnumber(res, "relfilenode");
        i_reltablespace = PQfnumber(res, "reltablespace");
+       i_relpages = PQfnumber(res, "relpages");
+       i_relkind = PQfnumber(res, "relkind");
        i_spclocation = PQfnumber(res, "spclocation");
 
        for (relnum = 0; relnum < ntups; relnum++)
@@ -556,6 +563,11 @@ get_rel_infos(ClusterInfo *cluster, DbInfo *dbinfo)
                curr->relname = pg_strdup(relname);
 
                curr->relfilenode = atooid(PQgetvalue(res, relnum, i_relfilenode));
+               curr->relpages = atoi(PQgetvalue(res, relnum, i_relpages));
+
+               relkind = PQgetvalue(res, relnum, i_relkind);
+               curr->relkind = relkind[0];
+
                curr->tblsp_alloc = false;
 
                /* Is the tablespace oid non-default? */
index 2f67eee22b82287b0a41293504e5c8a0cca50067..baeb8ff0f855d939ae2805532a332f5d5b455093 100644 (file)
@@ -147,6 +147,8 @@ typedef struct
        char       *tablespace;         /* tablespace path; "" for cluster default */
        bool            nsp_alloc;              /* should nspname be freed? */
        bool            tblsp_alloc;    /* should tablespace be freed? */
+       int32           relpages;               /* # of pages -- see pg_class.h */
+       char            relkind;                /* relation kind -- see pg_class.h */
 } RelInfo;
 
 typedef struct
@@ -173,6 +175,10 @@ typedef struct
         */
        Oid                     old_relfilenode;
        Oid                     new_relfilenode;
+
+       int32           relpages;               /* # of pages -- see pg_class.h */
+       char            relkind;                /* relation kind -- see pg_class.h */
+
        /* the rest are used only for logging and error reporting */
        char       *nspname;            /* namespaces */
        char       *relname;
index 0c78073f0eb08b8efc0916920a681ced0bc7e7f0..dd3c8cefe429767e47ee7510004ff4ff9dceface 100644 (file)
 #include <sys/stat.h>
 #include "catalog/pg_class_d.h"
 #include "access/transam.h"
+#include "storage/freespace.h"
 
 
 static void transfer_single_new_db(FileNameMap *maps, int size, char *old_tablespace);
 static void transfer_relfile(FileNameMap *map, const char *suffix, bool vm_must_add_frozenbit);
+static bool new_cluster_needs_fsm(FileNameMap *map);
 
 
 /*
@@ -174,7 +176,8 @@ transfer_single_new_db(FileNameMap *maps, int size, char *old_tablespace)
                                /*
                                 * Copy/link any fsm and vm files, if they exist
                                 */
-                               transfer_relfile(&maps[mapnum], "_fsm", vm_must_add_frozenbit);
+                               if (new_cluster_needs_fsm(&maps[mapnum]))
+                                       transfer_relfile(&maps[mapnum], "_fsm", vm_must_add_frozenbit);
                                if (vm_crashsafe_match)
                                        transfer_relfile(&maps[mapnum], "_vm", vm_must_add_frozenbit);
                        }
@@ -278,3 +281,61 @@ transfer_relfile(FileNameMap *map, const char *type_suffix, bool vm_must_add_fro
                        }
        }
 }
+
+/*
+ * new_cluster_needs_fsm()
+ *
+ * Return false for small heaps if we're upgrading across PG 12, the first
+ * version where small heap relations don't have FSMs by default.
+ */
+static bool
+new_cluster_needs_fsm(FileNameMap *map)
+{
+       char            old_primary_file[MAXPGPATH];
+       struct stat statbuf;
+
+       /* fsm/vm files added in PG 8.4 */
+       Assert(GET_MAJOR_VERSION(old_cluster.major_version) >= 804);
+
+       if (!(GET_MAJOR_VERSION(old_cluster.major_version) <= 1100 &&
+                 GET_MAJOR_VERSION(new_cluster.major_version) >= 1200))
+               return true;
+
+       /* Always transfer FSMs of non-heap relations. */
+       if (map->relkind != RELKIND_RELATION &&
+               map->relkind != RELKIND_TOASTVALUE)
+               return true;
+
+       /*
+        * If pg_class.relpages falsely reports that the heap is above the
+        * threshold, we will transfer a FSM when we don't need to, but this is
+        * harmless.
+        */
+       if (map->relpages > HEAP_FSM_CREATION_THRESHOLD)
+               return true;
+
+       /* Determine path of the primary file. */
+       snprintf(old_primary_file, sizeof(old_primary_file), "%s%s/%u/%u",
+                        map->old_tablespace,
+                        map->old_tablespace_suffix,
+                        map->old_db_oid,
+                        map->old_relfilenode);
+
+       /*
+        * If pg_class.relpages falsely reports that the heap is below the
+        * threshold, a FSM would be skipped when we actually need it.  To guard
+        * against this, we verify the size of the primary file.
+        */
+       if (stat(old_primary_file, &statbuf) != 0)
+       {
+               pg_fatal("error while checking for file existence \"%s.%s\" (\"%s\"): %s\n",
+                                map->nspname, map->relname, old_primary_file, strerror(errno));
+
+               /* Keep compiler quiet. */
+               return false;
+       }
+       else if (statbuf.st_size > HEAP_FSM_CREATION_THRESHOLD * BLCKSZ)
+               return true;
+       else
+               return false;
+}