From: Alvaro Herrera <alvherre@alvh.no-ip.org>
Date: Wed, 24 Oct 2007 19:08:25 +0000 (+0000)
Subject: Minor changes to autovacuum worker: change error handling so that it continues
X-Git-Tag: REL8_3_BETA2~25
X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=2627c83561d8a0ff1f07d85426a389c5eb82980f;p=postgresql

Minor changes to autovacuum worker: change error handling so that it continues
with the next table on schedule instead of exiting, in all cases instead of
just on query cancel.

Add a errcontext() line indicating the activity of the worker to the error
message when it is cancelled.

Change the WorkerInfo struct to contain a pointer to the worker's PGPROC
instead of just the PID.

Add forgotten post-auth delays, per Simon Riggs.  Also to autovac launcher.
---

diff --git a/src/backend/postmaster/autovacuum.c b/src/backend/postmaster/autovacuum.c
index bce0ba4323..9e472d02ff 100644
--- a/src/backend/postmaster/autovacuum.c
+++ b/src/backend/postmaster/autovacuum.c
@@ -55,7 +55,7 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/postmaster/autovacuum.c,v 1.61 2007/09/24 04:12:01 alvherre Exp $
+ *	  $PostgreSQL: pgsql/src/backend/postmaster/autovacuum.c,v 1.62 2007/10/24 19:08:25 alvherre Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -182,7 +182,7 @@ typedef struct autovac_table
  * wi_links		entry into free list or running list
  * wi_dboid		OID of the database this worker is supposed to work on
  * wi_tableoid	OID of the table currently being vacuumed
- * wi_workerpid	PID of the running worker, 0 if not yet started
+ * wi_proc		pointer to PGPROC of the running worker, NULL if not started
  * wi_launchtime Time at which this worker was launched
  * wi_cost_*	Vacuum cost-based delay parameters current in this worker
  *
@@ -196,7 +196,7 @@ typedef struct WorkerInfoData
 	SHM_QUEUE	wi_links;
 	Oid			wi_dboid;
 	Oid			wi_tableoid;
-	int			wi_workerpid;
+	PGPROC	   *wi_proc;
 	TimestampTz	wi_launchtime;
 	int			wi_cost_delay;
 	int			wi_cost_limit;
@@ -395,6 +395,9 @@ AutoVacLauncherMain(int argc, char *argv[])
 	/* Identify myself via ps */
 	init_ps_display("autovacuum launcher process", "", "", "");
 
+	if (PostAuthDelay)
+		pg_usleep(PostAuthDelay * 1000000L);
+
 	SetProcessingMode(InitProcessing);
 
 	/*
@@ -694,7 +697,7 @@ AutoVacLauncherMain(int argc, char *argv[])
 					worker = (WorkerInfo) MAKE_PTR(AutoVacuumShmem->av_startingWorker);
 					worker->wi_dboid = InvalidOid;
 					worker->wi_tableoid = InvalidOid;
-					worker->wi_workerpid = 0;
+					worker->wi_proc = NULL;
 					worker->wi_launchtime = 0;
 					worker->wi_links.next = AutoVacuumShmem->av_freeWorkers;
 					AutoVacuumShmem->av_freeWorkers = MAKE_OFFSET(worker);
@@ -1198,7 +1201,7 @@ do_start_worker(void)
 		AutoVacuumShmem->av_freeWorkers = worker->wi_links.next;
 
 		worker->wi_dboid = avdb->adw_datid;
-		worker->wi_workerpid = 0;
+		worker->wi_proc = NULL;
 		worker->wi_launchtime = GetCurrentTimestamp();
 
 		AutoVacuumShmem->av_startingWorker = sworker;
@@ -1437,6 +1440,9 @@ AutoVacWorkerMain(int argc, char *argv[])
 	/* Identify myself via ps */
 	init_ps_display("autovacuum worker process", "", "", "");
 
+	if (PostAuthDelay)
+		pg_usleep(PostAuthDelay * 1000000L);
+
 	SetProcessingMode(InitProcessing);
 
 	/*
@@ -1542,7 +1548,7 @@ AutoVacWorkerMain(int argc, char *argv[])
 	{
 		MyWorkerInfo = (WorkerInfo) MAKE_PTR(AutoVacuumShmem->av_startingWorker);
 		dbid = MyWorkerInfo->wi_dboid;
-		MyWorkerInfo->wi_workerpid = MyProcPid;
+		MyWorkerInfo->wi_proc = MyProc;
 
 		/* insert into the running list */
 		SHMQueueInsertBefore(&AutoVacuumShmem->av_runningWorkers, 
@@ -1637,7 +1643,7 @@ FreeWorkerInfo(int code, Datum arg)
 		MyWorkerInfo->wi_links.next = AutoVacuumShmem->av_freeWorkers;
 		MyWorkerInfo->wi_dboid = InvalidOid;
 		MyWorkerInfo->wi_tableoid = InvalidOid;
-		MyWorkerInfo->wi_workerpid = 0;
+		MyWorkerInfo->wi_proc = NULL;
 		MyWorkerInfo->wi_launchtime = 0;
 		MyWorkerInfo->wi_cost_delay = 0;
 		MyWorkerInfo->wi_cost_limit = 0;
@@ -1701,7 +1707,7 @@ autovac_balance_cost(void)
 									   offsetof(WorkerInfoData, wi_links));
 	while (worker)
 	{
-		if (worker->wi_workerpid != 0 &&
+		if (worker->wi_proc != NULL &&
 			worker->wi_cost_limit_base > 0 && worker->wi_cost_delay > 0)
 			cost_total +=
 				(double) worker->wi_cost_limit_base / worker->wi_cost_delay;
@@ -1724,7 +1730,7 @@ autovac_balance_cost(void)
 									   offsetof(WorkerInfoData, wi_links));
 	while (worker)
 	{
-		if (worker->wi_workerpid != 0 &&
+		if (worker->wi_proc != NULL &&
 			worker->wi_cost_limit_base > 0 && worker->wi_cost_delay > 0)
 		{
 			int     limit = (int)
@@ -1737,7 +1743,7 @@ autovac_balance_cost(void)
 			worker->wi_cost_limit = Max(Min(limit, worker->wi_cost_limit_base), 1);
 
 			elog(DEBUG2, "autovac_balance_cost(pid=%u db=%u, rel=%u, cost_limit=%d, cost_delay=%d)",
-				 worker->wi_workerpid, worker->wi_dboid,
+				 worker->wi_proc->pid, worker->wi_dboid,
 				 worker->wi_tableoid, worker->wi_cost_limit, worker->wi_cost_delay);
 		}
 
@@ -2062,25 +2068,27 @@ next_worker:
 		VacuumCostDelay = tab->at_vacuum_cost_delay;
 		VacuumCostLimit = tab->at_vacuum_cost_limit;
 
-		/*
-		 * Advertise my cost delay parameters for the balancing algorithm, and
-		 * do a balance
-		 */
+		/* Last fixups before actually starting to work */
 		LWLockAcquire(AutovacuumLock, LW_EXCLUSIVE);
+
+		/* advertise my cost delay parameters for the balancing algorithm */
 		MyWorkerInfo->wi_cost_delay = tab->at_vacuum_cost_delay;
 		MyWorkerInfo->wi_cost_limit = tab->at_vacuum_cost_limit;
 		MyWorkerInfo->wi_cost_limit_base = tab->at_vacuum_cost_limit;
+
+		/* do a balance */
 		autovac_balance_cost();
+
+		/* done */
 		LWLockRelease(AutovacuumLock);
 
 		/* clean up memory before each iteration */
 		MemoryContextResetAndDeleteChildren(PortalContext);
 
 		/*
-		 * We will abort vacuuming the current table if we are interrupted, and
-		 * continue with the next one in schedule; but if anything else
-		 * happens, we will do our usual error handling which is to cause the
-		 * worker process to exit.
+		 * We will abort vacuuming the current table if something errors out,
+		 * and continue with the next one in schedule; in particular, this
+		 * happens if we are interrupted with SIGINT.
 		 */
 		PG_TRY();
 		{
@@ -2094,39 +2102,40 @@ next_worker:
 		}
 		PG_CATCH();
 		{
-			ErrorData	   *errdata;
-
-			MemoryContextSwitchTo(TopTransactionContext);
-			errdata = CopyErrorData();
-
 			/*
-			 * If we errored out due to a cancel request, abort and restart the
-			 * transaction and go to the next table.  Otherwise rethrow the
-			 * error so that the outermost handler deals with it.
+			 * Abort the transaction, start a new one, and proceed with the
+			 * next table in our list.
 			 */
-			if (errdata->sqlerrcode == ERRCODE_QUERY_CANCELED)
-			{
-				HOLD_INTERRUPTS();
-				elog(LOG, "cancelling autovacuum of table \"%s.%s.%s\"",
-					 get_database_name(MyDatabaseId),
-					 get_namespace_name(get_rel_namespace(tab->at_relid)),
-					 get_rel_name(tab->at_relid));
-
-				AbortOutOfAnyTransaction();
-				FlushErrorState();
-				MemoryContextResetAndDeleteChildren(PortalContext);
-
-				/* restart our transaction for the following operations */
-				StartTransactionCommand();
-				RESUME_INTERRUPTS();
-			}
+			HOLD_INTERRUPTS();
+			if (tab->at_dovacuum)
+				errcontext("automatic vacuum of table \"%s.%s.%s\"",
+						   get_database_name(MyDatabaseId),
+						   get_namespace_name(get_rel_namespace(tab->at_relid)),
+						   get_rel_name(tab->at_relid));
 			else
-				PG_RE_THROW();
+				errcontext("automatic analyze of table \"%s.%s.%s\"",
+						   get_database_name(MyDatabaseId),
+						   get_namespace_name(get_rel_namespace(tab->at_relid)),
+						   get_rel_name(tab->at_relid));
+			EmitErrorReport();
+
+			AbortOutOfAnyTransaction();
+			FlushErrorState();
+			MemoryContextResetAndDeleteChildren(PortalContext);
+
+			/* restart our transaction for the following operations */
+			StartTransactionCommand();
+			RESUME_INTERRUPTS();
 		}
 		PG_END_TRY();
 
 		/* be tidy */
 		pfree(tab);
+
+		/* remove my info from shared memory */
+		LWLockAcquire(AutovacuumLock, LW_EXCLUSIVE);
+		MyWorkerInfo->wi_tableoid = InvalidOid;
+		LWLockRelease(AutovacuumLock);
 	}
 
 	/*