From: Tom Lane Date: Sat, 20 Nov 2010 03:28:20 +0000 (-0500) Subject: Fix leakage of cost_limit when multiple autovacuum workers are active. X-Git-Tag: REL9_1_ALPHA3~157 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=b58c25055ef6d7097618c680f6768689a110d529;p=postgresql Fix leakage of cost_limit when multiple autovacuum workers are active. When using default autovacuum_vac_cost_limit, autovac_balance_cost relied on VacuumCostLimit to contain the correct global value ... but after the first time through in a particular worker process, it didn't, because we'd trashed it in previous iterations. Depending on the state of other autovac workers, this could result in a steady reduction of the effective cost_limit setting as a particular worker processed more and more tables, causing it to go slower and slower. Spotted by Simon Poole (bug #5759). Fix by saving and restoring the GUC variables in the loop in do_autovacuum. In passing, improve a few comments. Back-patch to 8.3 ... the cost rebalancing code has been buggy since it was put in. --- diff --git a/src/backend/postmaster/autovacuum.c b/src/backend/postmaster/autovacuum.c index a617b882a3..c7d704dc8c 100644 --- a/src/backend/postmaster/autovacuum.c +++ b/src/backend/postmaster/autovacuum.c @@ -190,7 +190,7 @@ typedef struct autovac_table * * wi_links entry into free list or running list * wi_dboid OID of the database this worker is supposed to work on - * wi_tableoid OID of the table currently being vacuumed + * wi_tableoid OID of the table currently being vacuumed, if any * wi_proc pointer to PGPROC of the running worker, NULL if not started * wi_launchtime Time at which this worker was launched * wi_cost_* Vacuum cost-based delay parameters current in this worker @@ -1629,7 +1629,7 @@ FreeWorkerInfo(int code, Datum arg) * limit setting of the remaining workers. * * We somewhat ignore the risk that the launcher changes its PID - * between we reading it and the actual kill; we expect ProcKill to be + * between us reading it and the actual kill; we expect ProcKill to be * called shortly after us, and we assume that PIDs are not reused too * quickly after a process exits. */ @@ -1673,16 +1673,18 @@ AutoVacuumUpdateDelay(void) /* * autovac_balance_cost - * Recalculate the cost limit setting for each active workers. + * Recalculate the cost limit setting for each active worker. * * Caller must hold the AutovacuumLock in exclusive mode. */ static void autovac_balance_cost(void) { - WorkerInfo worker; - /* + * The idea here is that we ration out I/O equally. The amount of I/O + * that a worker can consume is determined by cost_limit/cost_delay, so + * we try to equalize those ratios rather than the raw limit settings. + * * note: in cost_limit, zero also means use value from elsewhere, because * zero is not a valid value. */ @@ -1692,6 +1694,7 @@ autovac_balance_cost(void) autovacuum_vac_cost_delay : VacuumCostDelay); double cost_total; double cost_avail; + WorkerInfo worker; /* not set? nothing to do */ if (vac_cost_limit <= 0 || vac_cost_delay <= 0) @@ -1718,7 +1721,7 @@ autovac_balance_cost(void) return; /* - * Adjust each cost limit of active workers to balance the total of cost + * Adjust cost limit of each active worker to balance the total of cost * limit to autovacuum_vacuum_cost_limit. */ cost_avail = (double) vac_cost_limit / vac_cost_delay; @@ -1734,14 +1737,19 @@ autovac_balance_cost(void) (cost_avail * worker->wi_cost_limit_base / cost_total); /* - * We put a lower bound of 1 to the cost_limit, to avoid division- - * by-zero in the vacuum code. + * We put a lower bound of 1 on the cost_limit, to avoid division- + * by-zero in the vacuum code. Also, in case of roundoff trouble + * in these calculations, let's be sure we don't ever set + * cost_limit to more than the base value. */ - worker->wi_cost_limit = Max(Min(limit, worker->wi_cost_limit_base), 1); - - elog(DEBUG2, "autovac_balance_cost(pid=%u db=%u, rel=%u, cost_limit=%d, cost_delay=%d)", - worker->wi_proc->pid, worker->wi_dboid, - worker->wi_tableoid, worker->wi_cost_limit, worker->wi_cost_delay); + worker->wi_cost_limit = Max(Min(limit, + worker->wi_cost_limit_base), + 1); + + elog(DEBUG2, "autovac_balance_cost(pid=%u db=%u, rel=%u, cost_limit=%d, cost_limit_base=%d, cost_delay=%d)", + worker->wi_proc->pid, worker->wi_dboid, worker->wi_tableoid, + worker->wi_cost_limit, worker->wi_cost_limit_base, + worker->wi_cost_delay); } worker = (WorkerInfo) SHMQueueNext(&AutoVacuumShmem->av_runningWorkers, @@ -2125,6 +2133,8 @@ do_autovacuum(void) autovac_table *tab; WorkerInfo worker; bool skipit; + int stdVacuumCostDelay; + int stdVacuumCostLimit; CHECK_FOR_INTERRUPTS(); @@ -2198,11 +2208,15 @@ do_autovacuum(void) MyWorkerInfo->wi_tableoid = relid; LWLockRelease(AutovacuumScheduleLock); - /* Set the initial vacuum cost parameters for this table */ - VacuumCostDelay = tab->at_vacuum_cost_delay; - VacuumCostLimit = tab->at_vacuum_cost_limit; + /* + * Remember the prevailing values of the vacuum cost GUCs. We have + * to restore these at the bottom of the loop, else we'll compute + * wrong values in the next iteration of autovac_balance_cost(). + */ + stdVacuumCostDelay = VacuumCostDelay; + stdVacuumCostLimit = VacuumCostLimit; - /* Last fixups before actually starting to work */ + /* Must hold AutovacuumLock while mucking with cost balance info */ LWLockAcquire(AutovacuumLock, LW_EXCLUSIVE); /* advertise my cost delay parameters for the balancing algorithm */ @@ -2213,6 +2227,9 @@ do_autovacuum(void) /* do a balance */ autovac_balance_cost(); + /* set the active cost parameters from the result of that */ + AutoVacuumUpdateDelay(); + /* done */ LWLockRelease(AutovacuumLock); @@ -2290,10 +2307,20 @@ deleted: pfree(tab->at_relname); pfree(tab); - /* remove my info from shared memory */ + /* + * Remove my info from shared memory. We could, but intentionally + * don't, clear wi_cost_limit and friends --- this is on the + * assumption that we probably have more to do with similar cost + * settings, so we don't want to give up our share of I/O for a very + * short interval and thereby thrash the global balance. + */ LWLockAcquire(AutovacuumLock, LW_EXCLUSIVE); MyWorkerInfo->wi_tableoid = InvalidOid; LWLockRelease(AutovacuumLock); + + /* restore vacuum cost GUCs for the next iteration */ + VacuumCostDelay = stdVacuumCostDelay; + VacuumCostLimit = stdVacuumCostLimit; } /*