]> granicus.if.org Git - postgresql/commitdiff
Minor changes to autovacuum worker: change error handling so that it continues
authorAlvaro Herrera <alvherre@alvh.no-ip.org>
Wed, 24 Oct 2007 19:08:25 +0000 (19:08 +0000)
committerAlvaro Herrera <alvherre@alvh.no-ip.org>
Wed, 24 Oct 2007 19:08:25 +0000 (19:08 +0000)
with the next table on schedule instead of exiting, in all cases instead of
just on query cancel.

Add a errcontext() line indicating the activity of the worker to the error
message when it is cancelled.

Change the WorkerInfo struct to contain a pointer to the worker's PGPROC
instead of just the PID.

Add forgotten post-auth delays, per Simon Riggs.  Also to autovac launcher.

src/backend/postmaster/autovacuum.c

index bce0ba4323ff02b17b5acf6a43897db4e930eb79..9e472d02ff6bd20fd22e4c11cf683e522069c6cc 100644 (file)
@@ -55,7 +55,7 @@
  *
  *
  * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/postmaster/autovacuum.c,v 1.61 2007/09/24 04:12:01 alvherre Exp $
+ *       $PostgreSQL: pgsql/src/backend/postmaster/autovacuum.c,v 1.62 2007/10/24 19:08:25 alvherre Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -182,7 +182,7 @@ typedef struct autovac_table
  * wi_links            entry into free list or running list
  * wi_dboid            OID of the database this worker is supposed to work on
  * wi_tableoid OID of the table currently being vacuumed
- * wi_workerpid        PID of the running worker, 0 if not yet started
+ * wi_proc             pointer to PGPROC of the running worker, NULL if not started
  * wi_launchtime Time at which this worker was launched
  * wi_cost_*   Vacuum cost-based delay parameters current in this worker
  *
@@ -196,7 +196,7 @@ typedef struct WorkerInfoData
        SHM_QUEUE       wi_links;
        Oid                     wi_dboid;
        Oid                     wi_tableoid;
-       int                     wi_workerpid;
+       PGPROC     *wi_proc;
        TimestampTz     wi_launchtime;
        int                     wi_cost_delay;
        int                     wi_cost_limit;
@@ -395,6 +395,9 @@ AutoVacLauncherMain(int argc, char *argv[])
        /* Identify myself via ps */
        init_ps_display("autovacuum launcher process", "", "", "");
 
+       if (PostAuthDelay)
+               pg_usleep(PostAuthDelay * 1000000L);
+
        SetProcessingMode(InitProcessing);
 
        /*
@@ -694,7 +697,7 @@ AutoVacLauncherMain(int argc, char *argv[])
                                        worker = (WorkerInfo) MAKE_PTR(AutoVacuumShmem->av_startingWorker);
                                        worker->wi_dboid = InvalidOid;
                                        worker->wi_tableoid = InvalidOid;
-                                       worker->wi_workerpid = 0;
+                                       worker->wi_proc = NULL;
                                        worker->wi_launchtime = 0;
                                        worker->wi_links.next = AutoVacuumShmem->av_freeWorkers;
                                        AutoVacuumShmem->av_freeWorkers = MAKE_OFFSET(worker);
@@ -1198,7 +1201,7 @@ do_start_worker(void)
                AutoVacuumShmem->av_freeWorkers = worker->wi_links.next;
 
                worker->wi_dboid = avdb->adw_datid;
-               worker->wi_workerpid = 0;
+               worker->wi_proc = NULL;
                worker->wi_launchtime = GetCurrentTimestamp();
 
                AutoVacuumShmem->av_startingWorker = sworker;
@@ -1437,6 +1440,9 @@ AutoVacWorkerMain(int argc, char *argv[])
        /* Identify myself via ps */
        init_ps_display("autovacuum worker process", "", "", "");
 
+       if (PostAuthDelay)
+               pg_usleep(PostAuthDelay * 1000000L);
+
        SetProcessingMode(InitProcessing);
 
        /*
@@ -1542,7 +1548,7 @@ AutoVacWorkerMain(int argc, char *argv[])
        {
                MyWorkerInfo = (WorkerInfo) MAKE_PTR(AutoVacuumShmem->av_startingWorker);
                dbid = MyWorkerInfo->wi_dboid;
-               MyWorkerInfo->wi_workerpid = MyProcPid;
+               MyWorkerInfo->wi_proc = MyProc;
 
                /* insert into the running list */
                SHMQueueInsertBefore(&AutoVacuumShmem->av_runningWorkers, 
@@ -1637,7 +1643,7 @@ FreeWorkerInfo(int code, Datum arg)
                MyWorkerInfo->wi_links.next = AutoVacuumShmem->av_freeWorkers;
                MyWorkerInfo->wi_dboid = InvalidOid;
                MyWorkerInfo->wi_tableoid = InvalidOid;
-               MyWorkerInfo->wi_workerpid = 0;
+               MyWorkerInfo->wi_proc = NULL;
                MyWorkerInfo->wi_launchtime = 0;
                MyWorkerInfo->wi_cost_delay = 0;
                MyWorkerInfo->wi_cost_limit = 0;
@@ -1701,7 +1707,7 @@ autovac_balance_cost(void)
                                                                           offsetof(WorkerInfoData, wi_links));
        while (worker)
        {
-               if (worker->wi_workerpid != 0 &&
+               if (worker->wi_proc != NULL &&
                        worker->wi_cost_limit_base > 0 && worker->wi_cost_delay > 0)
                        cost_total +=
                                (double) worker->wi_cost_limit_base / worker->wi_cost_delay;
@@ -1724,7 +1730,7 @@ autovac_balance_cost(void)
                                                                           offsetof(WorkerInfoData, wi_links));
        while (worker)
        {
-               if (worker->wi_workerpid != 0 &&
+               if (worker->wi_proc != NULL &&
                        worker->wi_cost_limit_base > 0 && worker->wi_cost_delay > 0)
                {
                        int     limit = (int)
@@ -1737,7 +1743,7 @@ autovac_balance_cost(void)
                        worker->wi_cost_limit = Max(Min(limit, worker->wi_cost_limit_base), 1);
 
                        elog(DEBUG2, "autovac_balance_cost(pid=%u db=%u, rel=%u, cost_limit=%d, cost_delay=%d)",
-                                worker->wi_workerpid, worker->wi_dboid,
+                                worker->wi_proc->pid, worker->wi_dboid,
                                 worker->wi_tableoid, worker->wi_cost_limit, worker->wi_cost_delay);
                }
 
@@ -2062,25 +2068,27 @@ next_worker:
                VacuumCostDelay = tab->at_vacuum_cost_delay;
                VacuumCostLimit = tab->at_vacuum_cost_limit;
 
-               /*
-                * Advertise my cost delay parameters for the balancing algorithm, and
-                * do a balance
-                */
+               /* Last fixups before actually starting to work */
                LWLockAcquire(AutovacuumLock, LW_EXCLUSIVE);
+
+               /* advertise my cost delay parameters for the balancing algorithm */
                MyWorkerInfo->wi_cost_delay = tab->at_vacuum_cost_delay;
                MyWorkerInfo->wi_cost_limit = tab->at_vacuum_cost_limit;
                MyWorkerInfo->wi_cost_limit_base = tab->at_vacuum_cost_limit;
+
+               /* do a balance */
                autovac_balance_cost();
+
+               /* done */
                LWLockRelease(AutovacuumLock);
 
                /* clean up memory before each iteration */
                MemoryContextResetAndDeleteChildren(PortalContext);
 
                /*
-                * We will abort vacuuming the current table if we are interrupted, and
-                * continue with the next one in schedule; but if anything else
-                * happens, we will do our usual error handling which is to cause the
-                * worker process to exit.
+                * We will abort vacuuming the current table if something errors out,
+                * and continue with the next one in schedule; in particular, this
+                * happens if we are interrupted with SIGINT.
                 */
                PG_TRY();
                {
@@ -2094,39 +2102,40 @@ next_worker:
                }
                PG_CATCH();
                {
-                       ErrorData          *errdata;
-
-                       MemoryContextSwitchTo(TopTransactionContext);
-                       errdata = CopyErrorData();
-
                        /*
-                        * If we errored out due to a cancel request, abort and restart the
-                        * transaction and go to the next table.  Otherwise rethrow the
-                        * error so that the outermost handler deals with it.
+                        * Abort the transaction, start a new one, and proceed with the
+                        * next table in our list.
                         */
-                       if (errdata->sqlerrcode == ERRCODE_QUERY_CANCELED)
-                       {
-                               HOLD_INTERRUPTS();
-                               elog(LOG, "cancelling autovacuum of table \"%s.%s.%s\"",
-                                        get_database_name(MyDatabaseId),
-                                        get_namespace_name(get_rel_namespace(tab->at_relid)),
-                                        get_rel_name(tab->at_relid));
-
-                               AbortOutOfAnyTransaction();
-                               FlushErrorState();
-                               MemoryContextResetAndDeleteChildren(PortalContext);
-
-                               /* restart our transaction for the following operations */
-                               StartTransactionCommand();
-                               RESUME_INTERRUPTS();
-                       }
+                       HOLD_INTERRUPTS();
+                       if (tab->at_dovacuum)
+                               errcontext("automatic vacuum of table \"%s.%s.%s\"",
+                                                  get_database_name(MyDatabaseId),
+                                                  get_namespace_name(get_rel_namespace(tab->at_relid)),
+                                                  get_rel_name(tab->at_relid));
                        else
-                               PG_RE_THROW();
+                               errcontext("automatic analyze of table \"%s.%s.%s\"",
+                                                  get_database_name(MyDatabaseId),
+                                                  get_namespace_name(get_rel_namespace(tab->at_relid)),
+                                                  get_rel_name(tab->at_relid));
+                       EmitErrorReport();
+
+                       AbortOutOfAnyTransaction();
+                       FlushErrorState();
+                       MemoryContextResetAndDeleteChildren(PortalContext);
+
+                       /* restart our transaction for the following operations */
+                       StartTransactionCommand();
+                       RESUME_INTERRUPTS();
                }
                PG_END_TRY();
 
                /* be tidy */
                pfree(tab);
+
+               /* remove my info from shared memory */
+               LWLockAcquire(AutovacuumLock, LW_EXCLUSIVE);
+               MyWorkerInfo->wi_tableoid = InvalidOid;
+               LWLockRelease(AutovacuumLock);
        }
 
        /*