From c92c3d50d7fbe7391b5fc864b444346b2db29ffe Mon Sep 17 00:00:00 2001
From: Peter Eisentraut <peter_e@gmx.net>
Date: Mon, 14 Apr 2014 23:15:05 -0400
Subject: [PATCH] vacuumdb: Add option --analyze-in-stages

Add vacuumdb option --analyze-in-stages which runs ANALYZE three times
with different configuration settings, adopting the logic from the
analyze_new_cluster.sh script that pg_upgrade generates.  That way,
users of pg_dump/pg_restore can also use that functionality.

Change pg_upgrade to create the script so that it calls vacuumdb instead
of implementing the logic itself.
---
 contrib/pg_upgrade/check.c               | 58 ++------------------
 doc/src/sgml/ref/vacuumdb.sgml           | 20 +++++++
 src/bin/scripts/t/102_vacuumdb_stages.pl | 16 ++++++
 src/bin/scripts/vacuumdb.c               | 70 ++++++++++++++++++------
 4 files changed, 94 insertions(+), 70 deletions(-)
 create mode 100644 src/bin/scripts/t/102_vacuumdb_stages.pl

diff --git a/contrib/pg_upgrade/check.c b/contrib/pg_upgrade/check.c
index acf8660044..d22b6d38e2 100644
--- a/contrib/pg_upgrade/check.c
+++ b/contrib/pg_upgrade/check.c
@@ -565,62 +565,12 @@ create_script_for_cluster_analyze(char **analyze_script_file_name)
 			"--analyze-only" : "--analyze", ECHO_QUOTE);
 	fprintf(script, "echo%s\n\n", ECHO_BLANK);
 
-#ifndef WIN32
-	fprintf(script, "sleep 2\n");
-	fprintf(script, "PGOPTIONS='-c default_statistics_target=1 -c vacuum_cost_delay=0'\n");
-	/* only need to export once */
-	fprintf(script, "export PGOPTIONS\n");
-#else
-	fprintf(script, "REM simulate sleep 2\n");
-	fprintf(script, "PING 1.1.1.1 -n 1 -w 2000 > nul\n");
-	fprintf(script, "SET PGOPTIONS=-c default_statistics_target=1 -c vacuum_cost_delay=0\n");
-#endif
-
-	fprintf(script, "echo %sGenerating minimal optimizer statistics (1 target)%s\n",
-			ECHO_QUOTE, ECHO_QUOTE);
-	fprintf(script, "echo %s--------------------------------------------------%s\n",
-			ECHO_QUOTE, ECHO_QUOTE);
-	fprintf(script, "\"%s/vacuumdb\" %s--all --analyze-only\n",
-			new_cluster.bindir, user_specification);
-	fprintf(script, "echo%s\n", ECHO_BLANK);
-	fprintf(script, "echo %sThe server is now available with minimal optimizer statistics.%s\n",
-			ECHO_QUOTE, ECHO_QUOTE);
-	fprintf(script, "echo %sQuery performance will be optimal once this script completes.%s\n",
-			ECHO_QUOTE, ECHO_QUOTE);
-	fprintf(script, "echo%s\n\n", ECHO_BLANK);
-
-#ifndef WIN32
-	fprintf(script, "sleep 2\n");
-	fprintf(script, "PGOPTIONS='-c default_statistics_target=10'\n");
-#else
-	fprintf(script, "REM simulate sleep\n");
-	fprintf(script, "PING 1.1.1.1 -n 1 -w 2000 > nul\n");
-	fprintf(script, "SET PGOPTIONS=-c default_statistics_target=10\n");
-#endif
-
-	fprintf(script, "echo %sGenerating medium optimizer statistics (10 targets)%s\n",
-			ECHO_QUOTE, ECHO_QUOTE);
-	fprintf(script, "echo %s---------------------------------------------------%s\n",
-			ECHO_QUOTE, ECHO_QUOTE);
-	fprintf(script, "\"%s/vacuumdb\" %s--all --analyze-only\n",
+	fprintf(script, "\"%s/vacuumdb\" %s--all --analyze-in-stages\n",
 			new_cluster.bindir, user_specification);
-	fprintf(script, "echo%s\n\n", ECHO_BLANK);
-
-#ifndef WIN32
-	fprintf(script, "unset PGOPTIONS\n");
-#else
-	fprintf(script, "SET PGOPTIONS\n");
-#endif
-
-	fprintf(script, "echo %sGenerating default (full) optimizer statistics (100 targets?)%s\n",
-			ECHO_QUOTE, ECHO_QUOTE);
-	fprintf(script, "echo %s-------------------------------------------------------------%s\n",
-			ECHO_QUOTE, ECHO_QUOTE);
-	fprintf(script, "\"%s/vacuumdb\" %s--all %s\n", new_cluster.bindir,
-			user_specification,
 	/* Did we copy the free space files? */
-			(GET_MAJOR_VERSION(old_cluster.major_version) >= 804) ?
-			"--analyze-only" : "--analyze");
+	if (GET_MAJOR_VERSION(old_cluster.major_version) < 804)
+		fprintf(script, "\"%s/vacuumdb\" %s--all\n", new_cluster.bindir,
+				user_specification);
 
 	fprintf(script, "echo%s\n\n", ECHO_BLANK);
 	fprintf(script, "echo %sDone%s\n",
diff --git a/doc/src/sgml/ref/vacuumdb.sgml b/doc/src/sgml/ref/vacuumdb.sgml
index 1c519f5944..3ecd999981 100644
--- a/doc/src/sgml/ref/vacuumdb.sgml
+++ b/doc/src/sgml/ref/vacuumdb.sgml
@@ -203,6 +203,26 @@ PostgreSQL documentation
       </listitem>
      </varlistentry>
 
+     <varlistentry>
+      <term><option>--analyze-in-stages</option></term>
+      <listitem>
+       <para>
+        Only calculate statistics for use by the optimizer (no vacuum),
+        like <option>--analyze-only</option>.  Run several (currently three)
+        stages of analyze with different configuration settings, to produce
+        usable statistics faster.
+       </para>
+
+       <para>
+        This option is useful to analyze a database that was newly populated
+        from a restored dump or by <command>pg_upgrade</command>.  This option
+        will try to create some statistics as fast as possible, to make the
+        database usable, and then produce full statistics in the subsequent
+        stages.
+       </para>
+      </listitem>
+     </varlistentry>
+
      <varlistentry>
        <term><option>-?</></term>
        <term><option>--help</></term>
diff --git a/src/bin/scripts/t/102_vacuumdb_stages.pl b/src/bin/scripts/t/102_vacuumdb_stages.pl
new file mode 100644
index 0000000000..743743edba
--- /dev/null
+++ b/src/bin/scripts/t/102_vacuumdb_stages.pl
@@ -0,0 +1,16 @@
+use strict;
+use warnings;
+use TestLib;
+use Test::More tests => 1;
+
+my $tempdir = tempdir;
+start_test_server $tempdir;
+
+issues_sql_like(['vacuumdb', '--analyze-in-stages', 'postgres'],
+				qr/.*statement:\ SET\ default_statistics_target=1;\ SET\ vacuum_cost_delay=0;
+                   .*statement:\ ANALYZE.*
+                   .*statement:\ SET\ default_statistics_target=10;\ RESET\ vacuum_cost_delay;
+                   .*statement:\ ANALYZE.*
+                   .*statement:\ RESET\ default_statistics_target;
+                   .*statement:\ ANALYZE/sx,
+				'analyze three times');
diff --git a/src/bin/scripts/vacuumdb.c b/src/bin/scripts/vacuumdb.c
index 07a60d0b6d..1e288ef978 100644
--- a/src/bin/scripts/vacuumdb.c
+++ b/src/bin/scripts/vacuumdb.c
@@ -16,12 +16,12 @@
 
 
 static void vacuum_one_database(const char *dbname, bool full, bool verbose,
-					bool and_analyze, bool analyze_only, bool freeze,
+					bool and_analyze, bool analyze_only, bool analyze_in_stages, bool freeze,
 					const char *table, const char *host, const char *port,
 					const char *username, enum trivalue prompt_password,
 					const char *progname, bool echo);
 static void vacuum_all_databases(bool full, bool verbose, bool and_analyze,
-					 bool analyze_only, bool freeze,
+					 bool analyze_only, bool analyze_in_stages, bool freeze,
 					 const char *maintenance_db,
 					 const char *host, const char *port,
 					 const char *username, enum trivalue prompt_password,
@@ -50,6 +50,7 @@ main(int argc, char *argv[])
 		{"full", no_argument, NULL, 'f'},
 		{"verbose", no_argument, NULL, 'v'},
 		{"maintenance-db", required_argument, NULL, 2},
+		{"analyze-in-stages", no_argument, NULL, 3},
 		{NULL, 0, NULL, 0}
 	};
 
@@ -67,6 +68,7 @@ main(int argc, char *argv[])
 	bool		quiet = false;
 	bool		and_analyze = false;
 	bool		analyze_only = false;
+	bool		analyze_in_stages = false;
 	bool		freeze = false;
 	bool		alldb = false;
 	bool		full = false;
@@ -130,6 +132,9 @@ main(int argc, char *argv[])
 			case 2:
 				maintenance_db = pg_strdup(optarg);
 				break;
+			case 3:
+				analyze_in_stages = analyze_only = true;
+				break;
 			default:
 				fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname);
 				exit(1);
@@ -189,7 +194,7 @@ main(int argc, char *argv[])
 			exit(1);
 		}
 
-		vacuum_all_databases(full, verbose, and_analyze, analyze_only, freeze,
+		vacuum_all_databases(full, verbose, and_analyze, analyze_only, analyze_in_stages, freeze,
 							 maintenance_db, host, port, username,
 							 prompt_password, progname, echo, quiet);
 	}
@@ -212,7 +217,7 @@ main(int argc, char *argv[])
 			for (cell = tables.head; cell; cell = cell->next)
 			{
 				vacuum_one_database(dbname, full, verbose, and_analyze,
-									analyze_only,
+									analyze_only, analyze_in_stages,
 									freeze, cell->val,
 									host, port, username, prompt_password,
 									progname, echo);
@@ -220,7 +225,7 @@ main(int argc, char *argv[])
 		}
 		else
 			vacuum_one_database(dbname, full, verbose, and_analyze,
-								analyze_only,
+								analyze_only, analyze_in_stages,
 								freeze, NULL,
 								host, port, username, prompt_password,
 								progname, echo);
@@ -230,9 +235,26 @@ main(int argc, char *argv[])
 }
 
 
+static void
+run_vacuum_command(PGconn *conn, const char *sql, bool echo, const char *dbname, const char *table, const char *progname)
+{
+	if (!executeMaintenanceCommand(conn, sql, echo))
+	{
+		if (table)
+			fprintf(stderr, _("%s: vacuuming of table \"%s\" in database \"%s\" failed: %s"),
+					progname, table, dbname, PQerrorMessage(conn));
+		else
+			fprintf(stderr, _("%s: vacuuming of database \"%s\" failed: %s"),
+					progname, dbname, PQerrorMessage(conn));
+		PQfinish(conn);
+		exit(1);
+	}
+}
+
+
 static void
 vacuum_one_database(const char *dbname, bool full, bool verbose, bool and_analyze,
-					bool analyze_only, bool freeze, const char *table,
+					bool analyze_only, bool analyze_in_stages, bool freeze, const char *table,
 					const char *host, const char *port,
 					const char *username, enum trivalue prompt_password,
 					const char *progname, bool echo)
@@ -300,17 +322,30 @@ vacuum_one_database(const char *dbname, bool full, bool verbose, bool and_analyz
 		appendPQExpBuffer(&sql, " %s", table);
 	appendPQExpBufferStr(&sql, ";");
 
-	if (!executeMaintenanceCommand(conn, sql.data, echo))
+	if (analyze_in_stages)
 	{
-		if (table)
-			fprintf(stderr, _("%s: vacuuming of table \"%s\" in database \"%s\" failed: %s"),
-					progname, table, dbname, PQerrorMessage(conn));
-		else
-			fprintf(stderr, _("%s: vacuuming of database \"%s\" failed: %s"),
-					progname, dbname, PQerrorMessage(conn));
-		PQfinish(conn);
-		exit(1);
+		const char *stage_commands[] = {
+			"SET default_statistics_target=1; SET vacuum_cost_delay=0;",
+			"SET default_statistics_target=10; RESET vacuum_cost_delay;",
+			"RESET default_statistics_target;"
+		};
+		const char *stage_messages[] = {
+			gettext_noop("Generating minimal optimizer statistics (1 target)"),
+			gettext_noop("Generating medium optimizer statistics (10 targets)"),
+			gettext_noop("Generating default (full) optimizer statistics")
+		};
+		int i;
+
+		for (i = 0; i < 3; i++)
+		{
+			puts(gettext(stage_messages[i]));
+			executeCommand(conn, stage_commands[i], progname, echo);
+			run_vacuum_command(conn, sql.data, echo, dbname, table, progname);
+		}
 	}
+	else
+		run_vacuum_command(conn, sql.data, echo, dbname, NULL, progname);
+
 	PQfinish(conn);
 	termPQExpBuffer(&sql);
 }
@@ -318,7 +353,7 @@ vacuum_one_database(const char *dbname, bool full, bool verbose, bool and_analyz
 
 static void
 vacuum_all_databases(bool full, bool verbose, bool and_analyze, bool analyze_only,
-					 bool freeze, const char *maintenance_db,
+					 bool analyze_in_stages, bool freeze, const char *maintenance_db,
 					 const char *host, const char *port,
 					 const char *username, enum trivalue prompt_password,
 					 const char *progname, bool echo, bool quiet)
@@ -343,6 +378,7 @@ vacuum_all_databases(bool full, bool verbose, bool and_analyze, bool analyze_onl
 		}
 
 		vacuum_one_database(dbname, full, verbose, and_analyze, analyze_only,
+							analyze_in_stages,
 						 freeze, NULL, host, port, username, prompt_password,
 							progname, echo);
 	}
@@ -369,6 +405,8 @@ help(const char *progname)
 	printf(_("  -V, --version                   output version information, then exit\n"));
 	printf(_("  -z, --analyze                   update optimizer statistics\n"));
 	printf(_("  -Z, --analyze-only              only update optimizer statistics\n"));
+	printf(_("      --analyze-in-stages         only update optimizer statistics, in multiple\n"
+			 "                                  stages for faster results\n"));
 	printf(_("  -?, --help                      show this help, then exit\n"));
 	printf(_("\nConnection options:\n"));
 	printf(_("  -h, --host=HOSTNAME       database server host or socket directory\n"));
-- 
2.40.0