From be182e4f9e899a531094bee83b14fd434e52f7cb Mon Sep 17 00:00:00 2001 From: Michael Paquier Date: Wed, 28 Aug 2019 11:47:35 +0900 Subject: [PATCH] Disable timeouts when running pg_rewind with online source cluster In this case, the transfer uses a libpq connection, which is subject to the timeout parameters set at system level, and this can make the rewind operation suddenly canceled which is not good for automation. One workaround to such issues would be to use PGOPTIONS to enforce the wanted timeout parameters, but that's annoying, and for example pg_dump, which can run potentially long-running queries disables all types of timeouts. lock_timeout and statement_timeout are the ones which can cause problems now. Note that pg_rewind does not use transactions, so disabling idle_in_transaction_session_timeout is optional, but it feels safer to do so for the future. This is back-patched down to 9.5. idle_in_transaction_session_timeout is only present since 9.6. Author: Alexander Kukushkin Discussion: https://postgr.es/m/CAFh8B=krcVXksxiwVQh1SoY+ziJ-JC=6FcuoBL3yce_40Es5_g@mail.gmail.com Backpatch-through: 9.5 --- src/bin/pg_rewind/libpq_fetch.c | 37 +++++++++++++++++++++++---------- 1 file changed, 26 insertions(+), 11 deletions(-) diff --git a/src/bin/pg_rewind/libpq_fetch.c b/src/bin/pg_rewind/libpq_fetch.c index 37eccc3126..002776f696 100644 --- a/src/bin/pg_rewind/libpq_fetch.c +++ b/src/bin/pg_rewind/libpq_fetch.c @@ -39,6 +39,7 @@ static PGconn *conn = NULL; static void receiveFileChunks(const char *sql); static void execute_pagemap(datapagemap_t *pagemap, const char *path); static char *run_simple_query(const char *sql); +static void run_simple_command(const char *sql); void libpqConnect(const char *connstr) @@ -54,6 +55,11 @@ libpqConnect(const char *connstr) if (showprogress) pg_log_info("connected to server"); + /* disable all types of timeouts */ + run_simple_command("SET statement_timeout = 0"); + run_simple_command("SET lock_timeout = 0"); + run_simple_command("SET idle_in_transaction_session_timeout = 0"); + res = PQexec(conn, ALWAYS_SECURE_SEARCH_PATH_SQL); if (PQresultStatus(res) != PGRES_TUPLES_OK) pg_fatal("could not clear search_path: %s", @@ -88,11 +94,7 @@ libpqConnect(const char *connstr) * replication, and replication isn't working for some reason, we don't * want to get stuck, waiting for it to start working again. */ - res = PQexec(conn, "SET synchronous_commit = off"); - if (PQresultStatus(res) != PGRES_COMMAND_OK) - pg_fatal("could not set up connection context: %s", - PQresultErrorMessage(res)); - PQclear(res); + run_simple_command("SET synchronous_commit = off"); } /* @@ -122,6 +124,24 @@ run_simple_query(const char *sql) return result; } +/* + * Runs a command. + * In the event of a failure, exit immediately. + */ +static void +run_simple_command(const char *sql) +{ + PGresult *res; + + res = PQexec(conn, sql); + + if (PQresultStatus(res) != PGRES_COMMAND_OK) + pg_fatal("error running query (%s) in source server: %s", + sql, PQresultErrorMessage(res)); + + PQclear(res); +} + /* * Calls pg_current_wal_insert_lsn() function */ @@ -427,12 +447,7 @@ libpq_executeFileMap(filemap_t *map) * need to fetch. */ sql = "CREATE TEMPORARY TABLE fetchchunks(path text, begin int8, len int4);"; - res = PQexec(conn, sql); - - if (PQresultStatus(res) != PGRES_COMMAND_OK) - pg_fatal("could not create temporary table: %s", - PQresultErrorMessage(res)); - PQclear(res); + run_simple_command(sql); sql = "COPY fetchchunks FROM STDIN"; res = PQexec(conn, sql); -- 2.40.0