From: Thibault Godouet Date: Thu, 9 Jun 2016 08:36:43 +0000 (+0100) Subject: Fixed occasional 1s slippage. Disable fcrondyn if we don't have gettimeofday() (or... X-Git-Tag: ver3_2_1~3 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=46dbfcb1bb011a703a0f7b9c1da83e756ca50855;p=fcron Fixed occasional 1s slippage. Disable fcrondyn if we don't have gettimeofday() (or it won't work anyway) This was due to a race condition between when time_to_sleep is run and when we compute how long to sleep for, which could happen in the following second: if that happened we end up sleeping for 1s instead of not sleeping at all. The fix was to replace time_to_sleep() by next_wake_time(). --- diff --git a/configure.in b/configure.in index 37142a5..2f51af8 100644 --- a/configure.in +++ b/configure.in @@ -116,6 +116,7 @@ AC_CHECK_FUNCS(setlinebuf) AC_CHECK_FUNCS(signal) AC_CHECK_FUNCS(sigset) +AC_CHECK_FUNCS(gettimeofday, [gettimeofday=1], [gettimeofday=0]) AC_CHECK_FUNCS(seteuid, [seteuid=1], [seteuid=0]) AC_CHECK_FUNCS(setegid, [setegid=1], [setegid=0]) AC_CHECK_FUNCS(setresuid, [setresuid=1], [setresuid=0]) @@ -473,7 +474,12 @@ WARNING : if test "$fcrondyn" = ""; then - fcrondyn=1 + dnl As it stands gettimeofday() is required to have fcrondyn + if test "$gettimeofday" = 1; then + fcrondyn=1 + else + fcrondyn=0 + fi fi AC_MSG_CHECKING(if fcrondyn should be compiled) AC_ARG_WITH(fcrondyn, @@ -484,18 +490,18 @@ AC_ARG_WITH(fcrondyn, AC_MSG_RESULT(no) ;; yes) - if test "$crypt" -eq 1; then + if test "$crypt" -eq 1 || test "$gettimeofday" -eq 1; then fcrondyn=1 AC_MSG_RESULT(yes) else - AC_MSG_ERROR(Need a crypt() function.) + AC_MSG_ERROR(Need a crypt() and gettimeofday() function to enable fcrondyn.) fi ;; *) AC_MSG_ERROR(Must be set to either "yes" or "no".) ;; esac ], - if test "$fcrondyn" = "1" && test "$crypt" -eq 1; then + if test "$fcrondyn" = "1" && test "$crypt" -eq 1 && test "$gettimeofday" -eq 1; then AC_MSG_RESULT(yes) else fcrondyn=0 diff --git a/database.c b/database.c index c76e629..c672e6e 100644 --- a/database.c +++ b/database.c @@ -1457,9 +1457,9 @@ mail_notrun(cl_t * line, char context, struct tm *since) time_t check_lavg(time_t lim) /* run a job based on system load average if one should be run - * and return the time to sleep */ + * and return the next time fcron should wake */ { - time_t tts = 0; + time_t nwt = now; lavg_t *l = NULL; #ifdef NOLOADAVG @@ -1470,8 +1470,8 @@ check_lavg(time_t lim) } - tts = time_to_sleep(lim); - return tts; + nwt = next_wake_time(lim); + return nwt; #else int i = 0; @@ -1506,10 +1506,10 @@ check_lavg(time_t lim) } /* we do this set here as the nextexe of lavg line may change before */ - tts = time_to_sleep(lim); + nwt = next_wake_time(lim); if (lavg_list->num_entries == 0) - return tts; + return nwt; if ((i = getloadavg(l_avg, 3)) != 3) debug("got only %d lavg values", i); @@ -1549,35 +1549,34 @@ check_lavg(time_t lim) } - if (lavg_list->num_entries == 0) - return tts; - else - return (LAVG_SLEEP < tts) ? LAVG_SLEEP : tts; + if (lavg_list->num_entries == 0) { + return nwt; + } + else { + return tmin(now + LAVG_SLEEP, nwt); + } #endif /* def NOLOADAVG */ } time_t -time_to_sleep(time_t lim) - /* return the time to sleep until next task have to be executed. */ +next_wake_time(time_t lim) + /* return the earliest between lim (e.g. when to save to disk) + * and the next time fcron should run a job */ { - /* we set tts to a big value, unless some problems can occurs - * with files without any line */ - time_t tts = lim; - time_t ti = time(NULL); + time_t nwt = lim; /* note : jobs in queue_base are sorted */ if (queue_base != NULL) { if (queue_base->j_line->cl_nextexe < lim) - tts = queue_base->j_line->cl_nextexe; + nwt = queue_base->j_line->cl_nextexe; } - tts = tts - ti; - if (tts < 0) - tts = 0; + if (nwt < now) + nwt = now; /* debug("Time to sleep: %lds", tts); */ - return tts; + return nwt; } diff --git a/database.h b/database.h index d3fb5eb..44ae4e7 100644 --- a/database.h +++ b/database.h @@ -29,7 +29,7 @@ extern void test_jobs(void); extern void wait_chld(void); extern void wait_all(int *counter); -extern time_t time_to_sleep(time_t lim); +extern time_t next_wake_time(time_t lim); extern time_t check_lavg(time_t lim); extern void set_next_exe(struct cl_t *line, char option, int info_fd); #define NO_GOTO 1 /* set_next_exe() : no goto_non_matching() */ diff --git a/doc/en/changes.sgml b/doc/en/changes.sgml index 99ca004..ca8e436 100644 --- a/doc/en/changes.sgml +++ b/doc/en/changes.sgml @@ -17,6 +17,9 @@ A copy of the license is included in gfdl.sgml. @-line can now be run every second (minimum every 10s previously) + + Fixed occasional 1s slippage. This was due to a race condition between when time_to_sleep is run and when we compute how long to sleep for, which could happen in the following second: if that happened we end up sleeping for 1s instead of not sleeping at all. The fix was to replace time_to_sleep() by next_wake_time(). + add From: header to emails. Similarly to other crons, use: "From: %s (fcron)" with %s being either the user the job is run as or the value of MAILFROM. diff --git a/fcron.c b/fcron.c index fb425ed..5e9bee0 100644 --- a/fcron.c +++ b/fcron.c @@ -58,6 +58,7 @@ char foreground = 0; /* set to 1 when we are on foreground, else 0 */ #endif time_t first_sleep = FIRST_SLEEP; +const time_t min_sleep_usec = 1000; /* 1ms -- we won't sleep for less than this time */ time_t save_time = SAVE; char once = 0; /* set to 1 if fcron shall return immediately after running * all jobs that are due at the time when fcron is started */ @@ -191,7 +192,7 @@ xexit(int exit_value) { cf_t *f = NULL; - now = time(NULL); + now = my_time(); /* we save all files now and after having waiting for all * job being executed because we might get a SIGKILL @@ -650,7 +651,8 @@ main(int argc, char **argv) siginterrupt(SIGUSR1, 0); signal(SIGUSR2, sigusr2_handler); siginterrupt(SIGUSR2, 0); - /* we don't want SIGPIPE to kill fcron, and don't need to handle it */ + /* we don't want SIGPIPE to kill fcron, and don't need to handle it as when ignored + * write() on a pipe closed at the other end will return EPIPE */ signal(SIGPIPE, SIG_IGN); #elif HAVE_SIGSET sigset(SIGTERM, sigterm_handler); @@ -772,71 +774,119 @@ main_loop() * sleep, and then test all jobs and execute if needed. */ { time_t save; /* time remaining until next save */ - time_t stime; /* time to sleep until next job - * execution */ + time_t nwt; /* next wake time */ #ifdef HAVE_GETTIMEOFDAY - struct timeval tv; /* we use usec field to get more precision */ + struct timeval now_tv; /* we use usec field to get more precision */ #endif -#ifdef FCRONDYN +#if defined(FCRONDYN) && defined(HAVE_GETTIMEOFDAY) int retcode = 0; + struct timeval sleep_tv; /* we use usec field to get more precision */ #endif debug("Entering main loop"); - now = time(NULL); + now = my_time(); synchronize_dir(".", is_system_startup()); /* synchronize save with jobs execution */ save = now + save_time; - if (serial_num > 0 || once) - stime = first_sleep; - else if ((stime = time_to_sleep(save)) < first_sleep) + if (serial_num > 0 || once) { + nwt = now + first_sleep; + } + else { /* force first execution after first_sleep sec : execution of jobs * during system boot time is not what we want */ - stime = first_sleep; - debug("initial sleep time : %ld", stime); + nwt = tmax(next_wake_time(save), now + first_sleep); + } + debug("initial wake time : %s", ctime(&nwt)); for (;;) { #ifdef HAVE_GETTIMEOFDAY #ifdef FCRONDYN - gettimeofday(&tv, NULL); - tv.tv_sec = (stime > 1) ? stime - 1 : 0; - /* we set tv_usec to slightly more than necessary so as - * we don't wake up too early, in which case we would - * have to sleep again for some time */ - tv.tv_usec = 1001000 - tv.tv_usec; - /* On some systems (BSD, etc), tv_usec cannot be greater than 999999 */ - if (tv.tv_usec > 999999) - tv.tv_usec = 999999; + gettimeofday(&now_tv, NULL); + debug("now gettimeofday tv_sec=%ld, tv_usec=%ld %s", now_tv.tv_sec, + now_tv.tv_usec, ctime(&nwt)); + + if (nwt <= now_tv.tv_sec) { + sleep_tv.tv_sec = 0; + sleep_tv.tv_usec = 0; + } + else { + /* compensate for any time spent in the loop, + * so as we wake up exactly at the beginning of the second */ + sleep_tv.tv_sec = nwt - now_tv.tv_sec - 1; + /* we set tv_usec to slightly more than necessary so as we don't wake + * up too early (e.g. due to rounding to the system clock granularity), + * in which case we would have to go back to sleep again */ + sleep_tv.tv_usec = 1000000 + min_sleep_usec - now_tv.tv_usec; + } + + if (sleep_tv.tv_usec > 999999) { + /* On some systems (BSD, etc), tv_usec cannot be greater than 999999 */ + sleep_tv.tv_usec = 999999; + } + else if (sleep_tv.tv_sec == 0 && sleep_tv.tv_usec < min_sleep_usec) { + /* to prevent any infinite loop, sleep at least 1ms */ + debug + ("We'll sleep for a tiny bit to avoid any risk of infinite loop"); + sleep_tv.tv_usec = min_sleep_usec; + } /* note: read_set is set in socket.c */ - if ((retcode = select(set_max_fd + 1, &read_set, NULL, NULL, &tv)) < 0 + debug("nwt=%s, sleep sec=%ld, usec=%ld", ctime(&nwt), sleep_tv.tv_sec, + sleep_tv.tv_usec); + if ((retcode = + select(set_max_fd + 1, &read_set, NULL, NULL, &sleep_tv)) < 0 && errno != EINTR) die_e("select returned %d", errno); -#else - if (stime > 1) - sleep(stime - 1); - gettimeofday(&tv, NULL); +#else /* FCRONDYN */ + if (nwt - now > 0) { + sleep(nwt - now - 1); + } + gettimeofday(&now_tv, NULL); /* we set tv_usec to slightly more than necessary to avoid * infinite loop */ - usleep(1001000 - tv.tv_usec); + usleep(1000000 + min_sleep_usec - now_tv.tv_usec); #endif /* FCRONDYN */ -#else - sleep(stime); +#else /* HAVE_GETTIMEOFDAY */ + if (nwt - now > 0) { + sleep(nwt - now); + } #endif /* HAVE_GETTIMEOFDAY */ - now = time(NULL); + debug("\n"); + now = my_time(); +#ifdef HAVE_GETTIMEOFDAY + if (debug_opt) { + gettimeofday(&now_tv, NULL); + debug("now=%ld now_tv sec=%ld usec=%ld", now, now_tv.tv_sec, + now_tv.tv_usec); + } +#endif check_signal(); - - debug("\n"); +#ifdef HAVE_GETTIMEOFDAY + if (debug_opt) { + gettimeofday(&now_tv, NULL); + debug("after check_signal: now_tv sec=%ld usec=%ld", now_tv.tv_sec, + now_tv.tv_usec); + } +#endif test_jobs(); +#ifdef HAVE_GETTIMEOFDAY + if (debug_opt) { + gettimeofday(&now_tv, NULL); + debug("after test_jobs: now_tv sec=%ld usec=%ld", now_tv.tv_sec, + now_tv.tv_usec); + } +#endif - while (serial_num > 0 && serial_running < serial_max_running) + while (serial_num > 0 && serial_running < serial_max_running) { run_serial_job(); + } if (once) { explain("Running with option once : exiting ... "); @@ -848,15 +898,29 @@ main_loop() /* save all files */ save_file(NULL); } +#ifdef HAVE_GETTIMEOFDAY + if (debug_opt) { + gettimeofday(&now_tv, NULL); + debug("after save: now_tv sec=%ld usec=%ld", now_tv.tv_sec, + now_tv.tv_usec); + } +#endif -#ifdef FCRONDYN +#if defined(FCRONDYN) && defined(HAVE_GETTIMEOFDAY) /* check if there's a new connection, a new command to answer, etc ... */ /* we do that *after* other checks, to avoid Denial Of Service attacks */ check_socket(retcode); #endif - stime = check_lavg(save); - debug("next sleep time : %ld", stime); + nwt = check_lavg(save); +#ifdef HAVE_GETTIMEOFDAY + if (debug_opt) { + gettimeofday(&now_tv, NULL); + debug("after check_lavg: now_tv sec=%ld usec=%ld", now_tv.tv_sec, + now_tv.tv_usec); + } +#endif + debug("next wake time : %s", ctime(&nwt)); check_signal();