1 /* Licensed to the Apache Software Foundation (ASF) under one or more
2 * contributor license agreements. See the NOTICE file distributed with
3 * this work for additional information regarding copyright ownership.
4 * The ASF licenses this file to You under the Apache License, Version 2.0
5 * (the "License"); you may not use this file except in compliance with
6 * the License. You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
18 * htcacheclean.c: simple program for cleaning of
19 * the disk cache of the Apache HTTP server
21 * Contributed by Andreas Steinmetz <ast domdv.de>
27 #include "apr_strings.h"
28 #include "apr_file_io.h"
29 #include "apr_file_info.h"
30 #include "apr_pools.h"
32 #include "apr_thread_proc.h"
33 #include "apr_signal.h"
34 #include "apr_getopt.h"
37 #include "apr_buckets.h"
38 #include "../modules/cache/mod_disk_cache.h"
47 /* define the following for debugging */
51 * Note: on Linux delays <= 2ms are busy waits without
52 * scheduling, so never use a delay <= 2ms below
55 #define NICE_DELAY 10000 /* usecs */
56 #define DELETE_NICE 10 /* be nice after this amount of delete ops */
57 #define STAT_ATTEMPTS 10 /* maximum stat attempts for a file */
58 #define STAT_DELAY 5000 /* usecs */
59 #define HEADER 1 /* headers file */
60 #define DATA 2 /* body file */
61 #define TEMP 4 /* temporary file */
62 #define HEADERDATA (HEADER|DATA)
63 #define MAXDEVIATION 3600 /* secs */
64 #define SECS_PER_MIN 60
67 #define GBYTE 1073741824
69 #define DIRINFO (APR_FINFO_MTIME|APR_FINFO_SIZE|APR_FINFO_TYPE|APR_FINFO_LINK)
71 typedef struct _direntry {
72 APR_RING_ENTRY(_direntry) link;
73 int type; /* type of file/fileset: TEMP, HEADER, DATA, HEADERDATA */
74 apr_time_t htime; /* headers file modification time */
75 apr_time_t dtime; /* body file modification time */
76 apr_off_t hsize; /* headers file size */
77 apr_off_t dsize; /* body or temporary file size */
78 char *basename; /* file/fileset base name */
81 typedef struct _entry {
82 APR_RING_ENTRY(_entry) link;
83 apr_time_t expire; /* cache entry exiration time */
84 apr_time_t response_time; /* cache entry time of last response to client */
85 apr_time_t htime; /* headers file modification time */
86 apr_time_t dtime; /* body file modification time */
87 apr_off_t hsize; /* headers file size */
88 apr_off_t dsize; /* body or temporary file size */
89 char *basename; /* fileset base name */
93 static int delcount; /* file deletion count for nice mode */
94 static int interrupted; /* flag: true if SIGINT or SIGTERM occurred */
95 static int realclean; /* flag: true means user said apache is not running */
96 static int verbose; /* flag: true means print statistics */
97 static int benice; /* flag: true means nice mode is activated */
98 static int dryrun; /* flag: true means dry run, don't actually delete
100 static int deldirs; /* flag: true means directories should be deleted */
101 static int baselen; /* string length of the path to the proxy directory */
102 static apr_time_t now; /* start time of this processing run */
104 static apr_file_t *errfile; /* stderr file handle */
105 static apr_off_t unsolicited; /* file size summary for deleted unsolicited
107 static APR_RING_ENTRY(_entry) root; /* ENTRY ring anchor */
109 /* short program name as called */
110 static const char *shortname = "htcacheclean";
114 * fake delete for debug purposes
116 #define apr_file_remove fake_file_remove
117 static void fake_file_remove(char *pathname, apr_pool_t *p)
121 /* stat and printing to simulate some deletion system load and to
122 display what would actually have happened */
123 apr_stat(&info, pathname, DIRINFO, p);
124 apr_file_printf(errfile, "would delete %s" APR_EOL_STR, pathname);
129 * called on SIGINT or SIGTERM
131 static void setterm(int unused)
134 apr_file_printf(errfile, "interrupt" APR_EOL_STR);
140 * called in out of memory condition
142 static int oom(int unused)
144 static int called = 0;
146 /* be careful to call exit() only once */
155 * print purge statistics
157 static void printstats(apr_off_t total, apr_off_t sum, apr_off_t max,
158 apr_off_t etotal, apr_off_t entries)
160 char ttype, stype, mtype, utype;
161 apr_off_t tfrag, sfrag, ufrag;
168 tfrag = ((total * 10) / KBYTE) % 10;
170 if (total >= KBYTE) {
172 tfrag = ((total * 10) / KBYTE) % 10;
177 sfrag = ((sum * 10) / KBYTE) % 10;
181 sfrag = ((sum * 10) / KBYTE) % 10;
192 apr_file_printf(errfile, "Statistics:" APR_EOL_STR);
195 ufrag = ((unsolicited * 10) / KBYTE) % 10;
196 unsolicited /= KBYTE;
197 if (unsolicited >= KBYTE) {
199 ufrag = ((unsolicited * 10) / KBYTE) % 10;
200 unsolicited /= KBYTE;
202 if (!unsolicited && !ufrag) {
205 apr_file_printf(errfile, "unsolicited size %d.%d%c" APR_EOL_STR,
206 (int)(unsolicited), (int)(ufrag), utype);
208 apr_file_printf(errfile, "size limit %d.0%c" APR_EOL_STR,
210 apr_file_printf(errfile, "total size was %d.%d%c, total size now "
211 "%d.%d%c" APR_EOL_STR,
212 (int)(total), (int)(tfrag), ttype, (int)(sum),
213 (int)(sfrag), stype);
214 apr_file_printf(errfile, "total entries was %d, total entries now %d"
215 APR_EOL_STR, (int)(etotal), (int)(entries));
219 * delete a single file
221 static void delete_file(char *path, char *basename, apr_pool_t *pool)
230 /* temp pool, otherwise lots of memory could be allocated */
231 apr_pool_create(&p, pool);
232 nextpath = apr_pstrcat(p, path, "/", basename, NULL);
233 apr_file_remove(nextpath, p);
237 if (++delcount >= DELETE_NICE) {
238 apr_sleep(NICE_DELAY);
245 * delete cache file set
247 static void delete_entry(char *path, char *basename, apr_pool_t *pool)
256 /* temp pool, otherwise lots of memory could be allocated */
257 apr_pool_create(&p, pool);
259 nextpath = apr_pstrcat(p, path, "/", basename, CACHE_HEADER_SUFFIX, NULL);
260 apr_file_remove(nextpath, p);
262 nextpath = apr_pstrcat(p, path, "/", basename, CACHE_DATA_SUFFIX, NULL);
263 apr_file_remove(nextpath, p);
269 if (delcount >= DELETE_NICE) {
270 apr_sleep(NICE_DELAY);
277 * walk the cache directory tree
279 static int process_dir(char *path, apr_pool_t *pool)
289 apr_time_t current, deviation;
290 char *nextpath, *base, *ext, *orig_basename;
291 APR_RING_ENTRY(_direntry) anchor;
295 disk_cache_info_t disk_info;
297 APR_RING_INIT(&anchor, _direntry, link);
298 apr_pool_create(&p, pool);
299 h = apr_hash_make(p);
302 deviation = MAXDEVIATION * APR_USEC_PER_SEC;
304 if (apr_dir_open(&dir, path, p) != APR_SUCCESS) {
308 while (apr_dir_read(&info, 0, dir) == APR_SUCCESS && !interrupted) {
309 if (!strcmp(info.name, ".") || !strcmp(info.name, "..")) {
312 d = apr_pcalloc(p, sizeof(DIRENTRY));
313 d->basename = apr_pstrcat(p, path, "/", info.name, NULL);
314 APR_RING_INSERT_TAIL(&anchor, d, _direntry, link);
325 for (d = APR_RING_FIRST(&anchor);
326 !interrupted && d != APR_RING_SENTINEL(&anchor, _direntry, link);
328 n = APR_RING_NEXT(d, link);
329 base = strrchr(d->basename, '/');
333 ext = strchr(base, '.');
335 /* there may be temporary files which may be gone before
336 * processing, always skip these if not in realclean mode
338 if (!ext && !realclean) {
339 if (!strncasecmp(base, AP_TEMPFILE_BASE, AP_TEMPFILE_BASELEN)
340 && strlen(base) == AP_TEMPFILE_NAMELEN) {
345 /* this may look strange but apr_stat() may return errno which
346 * is system dependent and there may be transient failures,
347 * so just blindly retry for a short while
349 retries = STAT_ATTEMPTS;
350 status = APR_SUCCESS;
352 if (status != APR_SUCCESS) {
353 apr_sleep(STAT_DELAY);
355 status = apr_stat(&info, d->basename, DIRINFO, p);
356 } while (status != APR_SUCCESS && !interrupted && --retries);
358 /* what may happen here is that apache did create a file which
359 * we did detect but then does delete the file before we can
360 * get file information, so if we don't get any file information
361 * we will ignore the file in this case
363 if (status != APR_SUCCESS) {
364 if (!realclean && !interrupted) {
370 if (info.filetype == APR_DIR) {
371 /* Make a copy of the basename, as process_dir modifies it */
372 orig_basename = apr_pstrdup(pool, d->basename);
373 if (process_dir(d->basename, pool)) {
377 /* If asked to delete dirs, do so now. We don't care if it fails.
378 * If it fails, it likely means there was something else there.
380 if (deldirs && !dryrun) {
381 apr_dir_remove(orig_basename, pool);
386 if (info.filetype != APR_REG) {
391 if (!strncasecmp(base, AP_TEMPFILE_BASE, AP_TEMPFILE_BASELEN)
392 && strlen(base) == AP_TEMPFILE_NAMELEN) {
395 d->dsize = info.size;
396 apr_hash_set(h, d->basename, APR_HASH_KEY_STRING, d);
401 if (!strcasecmp(ext, CACHE_HEADER_SUFFIX)) {
404 /* if a user manually creates a '.header' file */
405 if (d->basename[0] == '\0') {
408 t = apr_hash_get(h, d->basename, APR_HASH_KEY_STRING);
413 d->htime = info.mtime;
414 d->hsize = info.size;
415 apr_hash_set(h, d->basename, APR_HASH_KEY_STRING, d);
419 if (!strcasecmp(ext, CACHE_DATA_SUFFIX)) {
422 /* if a user manually creates a '.data' file */
423 if (d->basename[0] == '\0') {
426 t = apr_hash_get(h, d->basename, APR_HASH_KEY_STRING);
431 d->dtime = info.mtime;
432 d->dsize = info.size;
433 apr_hash_set(h, d->basename, APR_HASH_KEY_STRING, d);
441 path[baselen] = '\0';
443 for (i = apr_hash_first(p, h); i && !interrupted; i = apr_hash_next(i)) {
447 apr_hash_this(i, NULL, NULL, &hvalue);
452 nextpath = apr_pstrcat(p, path, "/", d->basename,
453 CACHE_HEADER_SUFFIX, NULL);
454 if (apr_file_open(&fd, nextpath, APR_FOPEN_READ | APR_FOPEN_BINARY,
455 APR_OS_DEFAULT, p) == APR_SUCCESS) {
456 len = sizeof(format);
457 if (apr_file_read_full(fd, &format, len,
458 &len) == APR_SUCCESS) {
459 if (format == DISK_FORMAT_VERSION) {
460 apr_off_t offset = 0;
462 apr_file_seek(fd, APR_SET, &offset);
464 len = sizeof(disk_cache_info_t);
466 if (apr_file_read_full(fd, &disk_info, len,
467 &len) == APR_SUCCESS) {
469 e = apr_palloc(pool, sizeof(ENTRY));
470 APR_RING_INSERT_TAIL(&root, e, _entry, link);
471 e->expire = disk_info.expire;
472 e->response_time = disk_info.response_time;
477 e->basename = apr_pstrdup(pool, d->basename);
484 else if (format == VARY_FORMAT_VERSION) {
485 /* This must be a URL that added Vary headers later,
486 * so kill the orphaned .data file
489 apr_file_remove(apr_pstrcat(p, path, "/", d->basename,
490 CACHE_DATA_SUFFIX, NULL),
500 /* we have a somehow unreadable headers file which is associated
501 * with a data file. this may be caused by apache currently
502 * rewriting the headers file. thus we may delete the file set
503 * either in realclean mode or if the headers file modification
504 * timestamp is not within a specified positive or negative offset
505 * to the current time.
507 current = apr_time_now();
508 if (realclean || d->htime < current - deviation
509 || d->htime > current + deviation) {
510 delete_entry(path, d->basename, p);
511 unsolicited += d->hsize;
512 unsolicited += d->dsize;
516 /* single data and header files may be deleted either in realclean
517 * mode or if their modification timestamp is not within a
518 * specified positive or negative offset to the current time.
519 * this handling is necessary due to possible race conditions
520 * between apache and this process
523 current = apr_time_now();
524 nextpath = apr_pstrcat(p, path, "/", d->basename,
525 CACHE_HEADER_SUFFIX, NULL);
526 if (apr_file_open(&fd, nextpath, APR_FOPEN_READ | APR_FOPEN_BINARY,
527 APR_OS_DEFAULT, p) == APR_SUCCESS) {
528 len = sizeof(format);
529 if (apr_file_read_full(fd, &format, len,
530 &len) == APR_SUCCESS) {
531 if (format == VARY_FORMAT_VERSION) {
534 len = sizeof(expires);
536 if (apr_file_read_full(fd, &expires, len,
537 &len) == APR_SUCCESS) {
541 if (expires < current) {
542 delete_entry(path, d->basename, p);
551 if (realclean || d->htime < current - deviation
552 || d->htime > current + deviation) {
553 delete_entry(path, d->basename, p);
554 unsolicited += d->hsize;
559 current = apr_time_now();
560 if (realclean || d->dtime < current - deviation
561 || d->dtime > current + deviation) {
562 delete_entry(path, d->basename, p);
563 unsolicited += d->dsize;
567 /* temp files may only be deleted in realclean mode which
568 * is asserted above if a tempfile is in the hash array
571 delete_file(path, d->basename, p);
572 unsolicited += d->dsize;
584 apr_sleep(NICE_DELAY);
595 * purge cache entries
597 static void purge(char *path, apr_pool_t *pool, apr_off_t max)
599 apr_off_t sum, total, entries, etotal;
600 ENTRY *e, *n, *oldest;
605 for (e = APR_RING_FIRST(&root);
606 e != APR_RING_SENTINEL(&root, _entry, link);
607 e = APR_RING_NEXT(e, link)) {
617 printstats(total, sum, max, etotal, entries);
621 /* process all entries with a timestamp in the future, this may
622 * happen if a wrong system time is corrected
625 for (e = APR_RING_FIRST(&root);
626 e != APR_RING_SENTINEL(&root, _entry, link) && !interrupted;) {
627 n = APR_RING_NEXT(e, link);
628 if (e->response_time > now || e->htime > now || e->dtime > now) {
629 delete_entry(path, e->basename, pool);
633 APR_RING_REMOVE(e, link);
636 printstats(total, sum, max, etotal, entries);
648 /* process all entries with are expired */
649 for (e = APR_RING_FIRST(&root);
650 e != APR_RING_SENTINEL(&root, _entry, link) && !interrupted;) {
651 n = APR_RING_NEXT(e, link);
652 if (e->expire != APR_DATE_BAD && e->expire < now) {
653 delete_entry(path, e->basename, pool);
657 APR_RING_REMOVE(e, link);
660 printstats(total, sum, max, etotal, entries);
672 /* process remaining entries oldest to newest, the check for an emtpy
673 * ring actually isn't necessary except when the compiler does
674 * corrupt 64bit arithmetics which happend to me once, so better safe
677 while (sum > max && !interrupted && !APR_RING_EMPTY(&root, _entry, link)) {
678 oldest = APR_RING_FIRST(&root);
680 for (e = APR_RING_NEXT(oldest, link);
681 e != APR_RING_SENTINEL(&root, _entry, link);
682 e = APR_RING_NEXT(e, link)) {
683 if (e->dtime < oldest->dtime) {
688 delete_entry(path, oldest->basename, pool);
689 sum -= oldest->hsize;
690 sum -= oldest->dsize;
692 APR_RING_REMOVE(oldest, link);
696 printstats(total, sum, max, etotal, entries);
703 #define NL APR_EOL_STR
704 static void usage(const char *error)
707 apr_file_printf(errfile, "%s error: %s\n", shortname, error);
709 apr_file_printf(errfile,
710 "%s -- program for cleaning the disk cache." NL
711 "Usage: %s [-Dvtrn] -pPATH -lLIMIT" NL
712 " %s [-nti] -dINTERVAL -pPATH -lLIMIT" NL
715 " -d Daemonize and repeat cache cleaning every INTERVAL minutes." NL
716 " This option is mutually exclusive with the -D, -v and -r" NL
719 " -D Do a dry run and don't delete anything. This option is mutually" NL
720 " exclusive with the -d option." NL
722 " -v Be verbose and print statistics. This option is mutually" NL
723 " exclusive with the -d option." NL
725 " -r Clean thoroughly. This assumes that the Apache web server is " NL
726 " not running. This option is mutually exclusive with the -d" NL
727 " option and implies -t." NL
729 " -n Be nice. This causes slower processing in favour of other" NL
732 " -t Delete all empty directories. By default only cache files are" NL
733 " removed, however with some configurations the large number of" NL
734 " directories created may require attention." NL
736 " -p Specify PATH as the root directory of the disk cache." NL
738 " -l Specify LIMIT as the total disk cache size limit. Attach 'K'" NL
739 " or 'M' to the number for specifying KBytes or MBytes." NL
741 " -i Be intelligent and run only when there was a modification of" NL
742 " the disk cache. This option is only possible together with the" NL
756 int main(int argc, const char * const argv[])
759 apr_time_t current, repeat, delay, previous;
761 apr_pool_t *pool, *instance;
764 int retries, isdaemon, limit_found, intelligent, dowork;
767 char *proxypath, *path;
781 previous = 0; /* avoid compiler warning */
784 if (apr_app_initialize(&argc, &argv, NULL) != APR_SUCCESS) {
787 atexit(apr_terminate);
790 shortname = apr_filepath_name_get(argv[0]);
793 if (apr_pool_create(&pool, NULL) != APR_SUCCESS) {
796 apr_pool_abort_set(oom, pool);
797 apr_file_open_stderr(&errfile, pool);
798 apr_signal(SIGINT, setterm);
799 apr_signal(SIGTERM, setterm);
801 apr_getopt_init(&o, pool, argc, argv);
804 status = apr_getopt(o, "iDnvrtd:l:L:p:", &opt, &arg);
805 if (status == APR_EOF) {
808 else if (status != APR_SUCCESS) {
815 usage(apr_psprintf(pool, "The option '%c' cannot be specified more than once", (int)opt));
822 usage(apr_psprintf(pool, "The option '%c' cannot be specified more than once", (int)opt));
829 usage(apr_psprintf(pool, "The option '%c' cannot be specified more than once", (int)opt));
836 usage(apr_psprintf(pool, "The option '%c' cannot be specified more than once", (int)opt));
843 usage(apr_psprintf(pool, "The option '%c' cannot be specified more than once", (int)opt));
850 usage(apr_psprintf(pool, "The option '%c' cannot be specified more than once", (int)opt));
858 usage(apr_psprintf(pool, "The option '%c' cannot be specified more than once", (int)opt));
861 repeat = apr_atoi64(arg);
862 repeat *= SECS_PER_MIN;
863 repeat *= APR_USEC_PER_SEC;
868 usage(apr_psprintf(pool, "The option '%c' cannot be specified more than once", (int)opt));
876 rv = apr_strtoff(&max, arg, &end, 10);
877 if (rv == APR_SUCCESS) {
878 if ((*end == 'K' || *end == 'k') && !end[1]) {
881 else if ((*end == 'M' || *end == 'm') && !end[1]) {
884 else if ((*end == 'G' || *end == 'g') && !end[1]) {
887 else if (*end && /* neither empty nor [Bb] */
888 ((*end != 'B' && *end != 'b') || end[1])) {
892 if (rv != APR_SUCCESS) {
893 usage(apr_psprintf(pool, "Invalid limit: %s"
894 APR_EOL_STR APR_EOL_STR, arg));
901 usage(apr_psprintf(pool, "The option '%c' cannot be specified more than once", (int)opt));
903 proxypath = apr_pstrdup(pool, arg);
904 if ((status = apr_filepath_set(proxypath, pool)) != APR_SUCCESS) {
905 usage(apr_psprintf(pool, "Could not set filepath to '%s': %s",
906 proxypath, apr_strerror(status, errmsg, sizeof errmsg)));
917 if (o->ind != argc) {
918 usage("Additional parameters specified on the command line, aborting");
921 if (isdaemon && repeat <= 0) {
922 usage("Option -d must be greater than zero");
925 if (isdaemon && (verbose || realclean || dryrun)) {
926 usage("Option -d cannot be used with -v, -r or -D");
929 if (!isdaemon && intelligent) {
930 usage("Option -i cannot be used without -d");
934 usage("Option -p must be specified");
938 usage("Option -l must be greater than zero");
941 if (apr_filepath_get(&path, 0, pool) != APR_SUCCESS) {
942 usage(apr_psprintf(pool, "Could not get the filepath: %s",
943 apr_strerror(status, errmsg, sizeof errmsg)));
945 baselen = strlen(path);
949 apr_file_close(errfile);
950 apr_proc_detach(APR_PROC_DETACH_DAEMONIZE);
955 apr_pool_create(&instance, pool);
957 now = apr_time_now();
958 APR_RING_INIT(&root, _entry, link);
963 switch (intelligent) {
969 retries = STAT_ATTEMPTS;
970 status = APR_SUCCESS;
973 if (status != APR_SUCCESS) {
974 apr_sleep(STAT_DELAY);
976 status = apr_stat(&info, path, APR_FINFO_MTIME, instance);
977 } while (status != APR_SUCCESS && !interrupted && --retries);
979 if (status == APR_SUCCESS) {
980 previous = info.mtime;
987 retries = STAT_ATTEMPTS;
988 status = APR_SUCCESS;
991 if (status != APR_SUCCESS) {
992 apr_sleep(STAT_DELAY);
994 status = apr_stat(&info, path, APR_FINFO_MTIME, instance);
995 } while (status != APR_SUCCESS && !interrupted && --retries);
997 if (status == APR_SUCCESS) {
998 if (previous != info.mtime) {
1001 previous = info.mtime;
1009 if (dowork && !interrupted) {
1010 if (!process_dir(path, instance) && !interrupted) {
1011 purge(path, instance, max);
1013 else if (!isdaemon && !interrupted) {
1014 apr_file_printf(errfile, "An error occurred, cache cleaning "
1015 "aborted." APR_EOL_STR);
1019 if (intelligent && !interrupted) {
1020 retries = STAT_ATTEMPTS;
1021 status = APR_SUCCESS;
1023 if (status != APR_SUCCESS) {
1024 apr_sleep(STAT_DELAY);
1026 status = apr_stat(&info, path, APR_FINFO_MTIME, instance);
1027 } while (status != APR_SUCCESS && !interrupted && --retries);
1029 if (status == APR_SUCCESS) {
1030 previous = info.mtime;
1039 apr_pool_destroy(instance);
1041 current = apr_time_now();
1042 if (current < now) {
1045 else if (current - now >= repeat) {
1049 delay = now + repeat - current;
1052 /* we can't sleep the whole delay time here apiece as this is racy
1053 * with respect to interrupt delivery - think about what happens
1054 * if we have tested for an interrupt, then get scheduled
1055 * before the apr_sleep() call and while waiting for the cpu
1056 * we do get an interrupt
1059 while (delay && !interrupted) {
1060 if (delay > APR_USEC_PER_SEC) {
1061 apr_sleep(APR_USEC_PER_SEC);
1062 delay -= APR_USEC_PER_SEC;
1070 } while (isdaemon && !interrupted);
1072 if (!isdaemon && interrupted) {
1073 apr_file_printf(errfile, "Cache cleaning aborted due to user "
1074 "request." APR_EOL_STR);