1 /* Copyright 2001-2004 The Apache Software Foundation
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at
7 * http://www.apache.org/licenses/LICENSE-2.0
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
17 * htcacheclean.c: simple program for cleaning of
18 * the disk cache of the Apache HTTP server
20 * Contributed by Andreas Steinmetz <ast domdv.de>
26 #include "apr_strings.h"
27 #include "apr_file_io.h"
28 #include "apr_file_info.h"
29 #include "apr_pools.h"
31 #include "apr_thread_proc.h"
32 #include "apr_signal.h"
33 #include "apr_getopt.h"
44 /* mod_disk_cache.c extract start */
46 #define DISK_FORMAT_VERSION 0
48 /* Indicates the format of the header struct stored on-disk. */
50 /* The HTTP status code returned for this response. */
52 /* The size of the entity name that follows. */
54 /* The number of times we've cached this entity. */
55 apr_size_t entity_version;
56 /* Miscellaneous time values. */
59 apr_time_t request_time;
60 apr_time_t response_time;
63 #define CACHE_HEADER_SUFFIX ".header"
64 #define CACHE_DATA_SUFFIX ".data"
65 /* mod_disk_cache.c extract end */
67 /* mod_disk_cache.c related definitions start */
70 * this is based on #define AP_TEMPFILE "/aptmpXXXXXX"
72 * the above definition could be reworked into the following:
74 * #define AP_TEMPFILE_PREFIX "/"
75 * #define AP_TEMPFILE_BASE "aptmp"
76 * #define AP_TEMPFILE_SUFFIX "XXXXXX"
77 * #define AP_TEMPFILE_BASELEN strlen(AP_TEMPFILE_BASE)
78 * #define AP_TEMPFILE_NAMELEN strlen(AP_TEMPFILE_BASE AP_TEMPFILE_SUFFIX)
79 * #define AP_TEMPFILE AP_TEMPFILE_PREFIX AP_TEMPFILE_BASE AP_TEMPFILE_SUFFIX
81 * these definitions would then match the definitions below:
84 #define AP_TEMPFILE_BASE "aptmp"
85 #define AP_TEMPFILE_SUFFIX "XXXXXX"
86 #define AP_TEMPFILE_BASELEN strlen(AP_TEMPFILE_BASE)
87 #define AP_TEMPFILE_NAMELEN strlen(AP_TEMPFILE_BASE AP_TEMPFILE_SUFFIX)
89 /* mod_disk_cache.c related definitions end */
91 /* define the following for debugging */
95 * Note: on Linux delays <= 2ms are busy waits without
96 * scheduling, so never use a delay <= 2ms below
99 #define NICE_DELAY 10000 /* usecs */
100 #define DELETE_NICE 10 /* be nice after this amount of delete ops */
101 #define STAT_ATTEMPTS 10 /* maximum stat attempts for a file */
102 #define STAT_DELAY 5000 /* usecs */
103 #define HEADER 1 /* headers file */
104 #define DATA 2 /* body file */
105 #define TEMP 4 /* temporary file */
106 #define HEADERDATA (HEADER|DATA)
107 #define MAXDEVIATION 3600 /* secs */
108 #define SECS_PER_MIN 60
110 #define MBYTE 1048576
112 #define DIRINFO (APR_FINFO_MTIME|APR_FINFO_SIZE|APR_FINFO_TYPE|APR_FINFO_LINK)
114 typedef struct _direntry {
115 APR_RING_ENTRY(_direntry) link;
116 int type; /* type of file/fileset: TEMP, HEADER, DATA, HEADERDATA */
117 apr_time_t htime; /* headers file modification time */
118 apr_time_t dtime; /* body file modification time */
119 apr_off_t hsize; /* headers file size */
120 apr_off_t dsize; /* body or temporary file size */
121 char *basename; /* file/fileset base name */
124 typedef struct _entry {
125 APR_RING_ENTRY(_entry) link;
126 apr_time_t expire; /* cache entry exiration time */
127 apr_time_t response_time; /* cache entry time of last response to client */
128 apr_time_t htime; /* headers file modification time */
129 apr_time_t dtime; /* body file modification time */
130 apr_off_t hsize; /* headers file size */
131 apr_off_t dsize; /* body or temporary file size */
132 char *basename; /* fileset base name */
136 static int delcount; /* file deletion count for nice mode */
137 static int interrupted; /* flag: true if SIGINT or SIGTERM occurred */
138 static int realclean; /* flag: true means user said apache is not running */
139 static int verbose; /* flag: true means print statistics */
140 static int benice; /* flag: true means nice mode is activated */
141 static int dryrun; /* flag: true means dry run, don't actually delete
143 static int baselen; /* string length of the path to the proxy directory */
144 static apr_time_t now; /* start time of this processing run */
146 static apr_file_t *errfile; /* stderr file handle */
147 static apr_off_t unsolicited; /* file size summary for deleted unsolicited
149 static APR_RING_ENTRY(_entry) root; /* ENTRY ring anchor */
151 /* short program name as called */
152 static const char *shortname = "htcacheclean";
156 * fake delete for debug purposes
158 #define apr_file_remove fake_file_remove
159 static void fake_file_remove(char *pathname, apr_pool_t *p)
163 /* stat and printing to simulate some deletion system load and to
164 display what would actually have happened */
165 apr_stat(&info, pathname, DIRINFO, p);
166 apr_file_printf(errfile, "would delete %s" APR_EOL_STR, pathname);
171 * called on SIGINT or SIGTERM
173 static void setterm(int unused)
176 apr_file_printf(errfile, "interrupt" APR_EOL_STR);
182 * called in out of memory condition
184 static int oom(int unused)
186 static int called = 0;
188 /* be careful to call exit() only once */
197 * print purge statistics
199 static void printstats(apr_off_t total, apr_off_t sum, apr_off_t max,
200 apr_off_t etotal, apr_off_t entries)
202 char ttype, stype, mtype, utype;
203 apr_off_t tfrag, sfrag, ufrag;
210 tfrag = ((total * 10) / KBYTE) % 10;
212 if (total >= KBYTE) {
214 tfrag = ((total * 10) / KBYTE) % 10;
219 sfrag = ((sum * 10) / KBYTE) % 10;
223 sfrag = ((sum * 10) / KBYTE) % 10;
234 apr_file_printf(errfile, "Statistics:" APR_EOL_STR);
237 ufrag = ((unsolicited * 10) / KBYTE) % 10;
238 unsolicited /= KBYTE;
239 if (unsolicited >= KBYTE) {
241 ufrag = ((unsolicited * 10) / KBYTE) % 10;
242 unsolicited /= KBYTE;
244 if (!unsolicited && !ufrag) {
247 apr_file_printf(errfile, "unsolicited size %d.%d%c" APR_EOL_STR,
248 (int)(unsolicited), (int)(ufrag), utype);
250 apr_file_printf(errfile, "size limit %d.0%c" APR_EOL_STR,
252 apr_file_printf(errfile, "total size was %d.%d%c, total size now "
253 "%d.%d%c" APR_EOL_STR,
254 (int)(total), (int)(tfrag), ttype, (int)(sum),
255 (int)(sfrag), stype);
256 apr_file_printf(errfile, "total entries was %d, total entries now %d"
257 APR_EOL_STR, (int)(etotal), (int)(entries));
261 * delete a single file
263 static void delete_file(char *path, char *basename, apr_pool_t *pool)
272 /* temp pool, otherwise lots of memory could be allocated */
273 apr_pool_create(&p, pool);
274 nextpath = apr_pstrcat(p, path, "/", basename, NULL);
275 apr_file_remove(nextpath, p);
279 if (++delcount >= DELETE_NICE) {
280 apr_sleep(NICE_DELAY);
287 * delete cache file set
289 static void delete_entry(char *path, char *basename, apr_pool_t *pool)
298 /* temp pool, otherwise lots of memory could be allocated */
299 apr_pool_create(&p, pool);
301 nextpath = apr_pstrcat(p, path, "/", basename, CACHE_HEADER_SUFFIX, NULL);
302 apr_file_remove(nextpath, p);
304 nextpath = apr_pstrcat(p, path, "/", basename, CACHE_DATA_SUFFIX, NULL);
305 apr_file_remove(nextpath, p);
311 if (delcount >= DELETE_NICE) {
312 apr_sleep(NICE_DELAY);
319 * walk the cache directory tree
321 static int process_dir(char *path, apr_pool_t *pool)
331 apr_time_t current, deviation;
332 char *nextpath, *base, *ext;
333 APR_RING_ENTRY(_direntry) anchor;
337 disk_cache_info_t disk_info;
339 APR_RING_INIT(&anchor, _direntry, link);
340 apr_pool_create(&p, pool);
341 h = apr_hash_make(p);
344 deviation = MAXDEVIATION * APR_USEC_PER_SEC;
346 if (apr_dir_open(&dir, path, p) != APR_SUCCESS) {
350 while (apr_dir_read(&info, 0, dir) == APR_SUCCESS && !interrupted) {
351 /* skip first two entries which will always be '.' and '..' */
356 d = apr_pcalloc(p, sizeof(DIRENTRY));
357 d->basename = apr_pstrcat(p, path, "/", info.name, NULL);
358 APR_RING_INSERT_TAIL(&anchor, d, _direntry, link);
369 for (d = APR_RING_FIRST(&anchor);
370 !interrupted && d != APR_RING_SENTINEL(&anchor, _direntry, link);
372 n = APR_RING_NEXT(d, link);
373 base = strrchr(d->basename, '/');
377 ext = strchr(base, '.');
379 /* there may be temporary files which may be gone before
380 * processing, always skip these if not in realclean mode
382 if (!ext && !realclean) {
383 if (!strncasecmp(base, AP_TEMPFILE_BASE, AP_TEMPFILE_BASELEN)
384 && strlen(base) == AP_TEMPFILE_NAMELEN) {
389 /* this may look strange but apr_stat() may return errno which
390 * is system dependent and there may be transient failures,
391 * so just blindly retry for a short while
393 retries = STAT_ATTEMPTS;
394 status = APR_SUCCESS;
396 if (status != APR_SUCCESS) {
397 apr_sleep(STAT_DELAY);
399 status = apr_stat(&info, d->basename, DIRINFO, p);
400 } while (status != APR_SUCCESS && !interrupted && --retries);
402 /* what may happen here is that apache did create a file which
403 * we did detect but then does delete the file before we can
404 * get file information, so if we don't get any file information
405 * we will ignore the file in this case
407 if (status != APR_SUCCESS) {
408 if (!realclean && !interrupted) {
414 if (info.filetype == APR_DIR) {
415 if (process_dir(d->basename, pool)) {
421 if (info.filetype != APR_REG) {
426 if (!strncasecmp(base, AP_TEMPFILE_BASE, AP_TEMPFILE_BASELEN)
427 && strlen(base) == AP_TEMPFILE_NAMELEN) {
430 d->dsize = info.size;
431 apr_hash_set(h, d->basename, APR_HASH_KEY_STRING, d);
436 if (!strcasecmp(ext, CACHE_HEADER_SUFFIX)) {
439 /* if a user manually creates a '.header' file */
440 if (d->basename[0] == '\0') {
443 t = apr_hash_get(h, d->basename, APR_HASH_KEY_STRING);
448 d->htime = info.mtime;
449 d->hsize = info.size;
450 apr_hash_set(h, d->basename, APR_HASH_KEY_STRING, d);
454 if (!strcasecmp(ext, CACHE_DATA_SUFFIX)) {
457 /* if a user manually creates a '.data' file */
458 if (d->basename[0] == '\0') {
461 t = apr_hash_get(h, d->basename, APR_HASH_KEY_STRING);
466 d->dtime = info.mtime;
467 d->dsize = info.size;
468 apr_hash_set(h, d->basename, APR_HASH_KEY_STRING, d);
476 path[baselen] = '\0';
478 for (i = apr_hash_first(p, h); i && !interrupted; i = apr_hash_next(i)) {
481 apr_hash_this(i, NULL, NULL, &hvalue);
486 nextpath = apr_pstrcat(p, path, "/", d->basename,
487 CACHE_HEADER_SUFFIX, NULL);
488 if (apr_file_open(&fd, nextpath, APR_READ, APR_OS_DEFAULT,
490 len = sizeof(disk_cache_info_t);
491 if (apr_file_read_full(fd, &disk_info, len,
492 &len) == APR_SUCCESS) {
494 if (disk_info.format == DISK_FORMAT_VERSION) {
495 e = apr_palloc(pool, sizeof(ENTRY));
496 APR_RING_INSERT_TAIL(&root, e, _entry, link);
497 e->expire = disk_info.expire;
498 e->response_time = disk_info.response_time;
503 e->basename = apr_palloc(pool,
504 strlen(d->basename) + 1);
505 strcpy(e->basename, d->basename);
513 /* we have a somehow unreadable headers file which is associated
514 * with a data file. this may be caused by apache currently
515 * rewriting the headers file. thus we may delete the file set
516 * either in realclean mode or if the headers file modification
517 * timestamp is not within a specified positive or negative offset
518 * to the current time.
520 current = apr_time_now();
521 if (realclean || d->htime < current - deviation
522 || d->htime > current + deviation) {
523 delete_entry(path, d->basename, p);
524 unsolicited += d->hsize;
525 unsolicited += d->dsize;
529 /* single data and header files may be deleted either in realclean
530 * mode or if their modification timestamp is not within a
531 * specified positive or negative offset to the current time.
532 * this handling is necessary due to possible race conditions
533 * between apache and this process
536 current = apr_time_now();
537 if (realclean || d->htime < current - deviation
538 || d->htime > current + deviation) {
539 delete_entry(path, d->basename, p);
540 unsolicited += d->hsize;
545 current = apr_time_now();
546 if (realclean || d->dtime < current - deviation
547 || d->dtime > current + deviation) {
548 delete_entry(path, d->basename, p);
549 unsolicited += d->dsize;
553 /* temp files may only be deleted in realclean mode which
554 * is asserted above if a tempfile is in the hash array
557 delete_file(path, d->basename, p);
558 unsolicited += d->dsize;
570 apr_sleep(NICE_DELAY);
581 * purge cache entries
583 static void purge(char *path, apr_pool_t *pool, apr_off_t max)
585 apr_off_t sum, total, entries, etotal;
586 ENTRY *e, *n, *oldest;
591 for (e = APR_RING_FIRST(&root);
592 e != APR_RING_SENTINEL(&root, _entry, link);
593 e = APR_RING_NEXT(e, link)) {
603 printstats(total, sum, max, etotal, entries);
607 /* process all entries with a timestamp in the future, this may
608 * happen if a wrong system time is corrected
611 for (e = APR_RING_FIRST(&root);
612 e != APR_RING_SENTINEL(&root, _entry, link) && !interrupted;) {
613 n = APR_RING_NEXT(e, link);
614 if (e->response_time > now || e->htime > now || e->dtime > now) {
615 delete_entry(path, e->basename, pool);
619 APR_RING_REMOVE(e, link);
622 printstats(total, sum, max, etotal, entries);
634 /* process all entries with are expired */
635 for (e = APR_RING_FIRST(&root);
636 e != APR_RING_SENTINEL(&root, _entry, link) && !interrupted;) {
637 n = APR_RING_NEXT(e, link);
638 if (e->expire != APR_DATE_BAD && e->expire < now) {
639 delete_entry(path, e->basename, pool);
643 APR_RING_REMOVE(e, link);
646 printstats(total, sum, max, etotal, entries);
658 /* process remaining entries oldest to newest, the check for an emtpy
659 * ring actually isn't necessary except when the compiler does
660 * corrupt 64bit arithmetics which happend to me once, so better safe
663 while (sum > max && !interrupted && !APR_RING_EMPTY(&root, _entry, link)) {
664 oldest = APR_RING_FIRST(&root);
666 for (e = APR_RING_NEXT(oldest, link);
667 e != APR_RING_SENTINEL(&root, _entry, link);
668 e = APR_RING_NEXT(e, link)) {
669 if (e->dtime < oldest->dtime) {
674 delete_entry(path, oldest->basename, pool);
675 sum -= oldest->hsize;
676 sum -= oldest->dsize;
678 APR_RING_REMOVE(oldest, link);
682 printstats(total, sum, max, etotal, entries);
689 #define NL APR_EOL_STR
690 static void usage(void)
692 apr_file_printf(errfile,
693 "%s -- program for cleaning the disk cache." NL
694 "Usage: %s [-Dvrn] -pPATH -lLIMIT" NL
695 " %s [-ni] -dINTERVAL -pPATH -lLIMIT" NL
698 " -d Daemonize and repeat cache cleaning every INTERVAL minutes." NL
699 " This option is mutually exclusive with the -D, -v and -r" NL
702 " -D Do a dry run and don't delete anything. This option is mutually" NL
703 " exclusive with the -d option." NL
705 " -v Be verbose and print statistics. This option is mutually" NL
706 " exclusive with the -d option." NL
708 " -r Clean thoroughly. This assumes that the Apache web server is " NL
709 " not running. This option is mutually exclusive with the -d" NL
712 " -n Be nice. This causes slower processing in favour of other" NL
715 " -p Specify PATH as the root directory of the disk cache." NL
717 " -l Specify LIMIT as the total disk cache size limit. Attach 'K'" NL
718 " or 'M' to the number for specifying KBytes or MBytes." NL
720 " -i Be intelligent and run only when there was a modification of" NL
721 " the disk cache. This option is only possible together with the" NL
735 int main(int argc, const char * const argv[])
738 apr_time_t current, repeat, delay, previous;
740 apr_pool_t *pool, *instance;
743 int retries, isdaemon, limit_found, intelligent, dowork;
746 char *proxypath, *path;
758 previous = 0; /* avoid compiler warning */
761 if (apr_app_initialize(&argc, &argv, NULL) != APR_SUCCESS) {
764 atexit(apr_terminate);
767 shortname = apr_filepath_name_get(argv[0]);
770 if (apr_pool_create(&pool, NULL) != APR_SUCCESS) {
773 apr_pool_abort_set(oom, pool);
774 apr_file_open_stderr(&errfile, pool);
775 apr_signal(SIGINT, setterm);
776 apr_signal(SIGTERM, setterm);
778 apr_getopt_init(&o, pool, argc, argv);
781 status = apr_getopt(o, "iDnvrd:l:L:p:", &opt, &arg);
782 if (status == APR_EOF) {
785 else if (status != APR_SUCCESS) {
830 repeat = apr_atoi64(arg);
831 repeat *= SECS_PER_MIN;
832 repeat *= APR_USEC_PER_SEC;
845 rv = apr_strtoff(&max, arg, &end, 10);
846 if (rv == APR_SUCCESS) {
847 if ((*end == 'K' || *end == 'k') && !end[1]) {
850 else if ((*end == 'M' || *end == 'm') && !end[1]) {
853 else if (*end && /* neither empty nor [Bb] */
854 ((*end != 'B' && *end != 'b') || end[1])) {
858 if (rv != APR_SUCCESS) {
859 apr_file_printf(errfile, "Invalid limit: %s"
860 APR_EOL_STR APR_EOL_STR, arg);
870 proxypath = apr_pstrdup(pool, arg);
871 if (apr_filepath_set(proxypath, pool) != APR_SUCCESS) {
879 if (o->ind != argc) {
883 if (isdaemon && (repeat <= 0 || verbose || realclean || dryrun)) {
887 if (!isdaemon && intelligent) {
891 if (!proxypath || max <= 0) {
895 if (apr_filepath_get(&path, 0, pool) != APR_SUCCESS) {
898 baselen = strlen(path);
902 apr_file_close(errfile);
903 apr_proc_detach(APR_PROC_DETACH_DAEMONIZE);
908 apr_pool_create(&instance, pool);
910 now = apr_time_now();
911 APR_RING_INIT(&root, _entry, link);
916 switch (intelligent) {
922 retries = STAT_ATTEMPTS;
923 status = APR_SUCCESS;
926 if (status != APR_SUCCESS) {
927 apr_sleep(STAT_DELAY);
929 status = apr_stat(&info, path, APR_FINFO_MTIME, instance);
930 } while (status != APR_SUCCESS && !interrupted && --retries);
932 if (status == APR_SUCCESS) {
933 previous = info.mtime;
940 retries = STAT_ATTEMPTS;
941 status = APR_SUCCESS;
944 if (status != APR_SUCCESS) {
945 apr_sleep(STAT_DELAY);
947 status = apr_stat(&info, path, APR_FINFO_MTIME, instance);
948 } while (status != APR_SUCCESS && !interrupted && --retries);
950 if (status == APR_SUCCESS) {
951 if (previous != info.mtime) {
954 previous = info.mtime;
962 if (dowork && !interrupted) {
963 if (!process_dir(path, instance) && !interrupted) {
964 purge(path, instance, max);
966 else if (!isdaemon && !interrupted) {
967 apr_file_printf(errfile, "An error occurred, cache cleaning "
968 "aborted." APR_EOL_STR);
972 if (intelligent && !interrupted) {
973 retries = STAT_ATTEMPTS;
974 status = APR_SUCCESS;
976 if (status != APR_SUCCESS) {
977 apr_sleep(STAT_DELAY);
979 status = apr_stat(&info, path, APR_FINFO_MTIME, instance);
980 } while (status != APR_SUCCESS && !interrupted && --retries);
982 if (status == APR_SUCCESS) {
983 previous = info.mtime;
992 apr_pool_destroy(instance);
994 current = apr_time_now();
998 else if (current - now >= repeat) {
1002 delay = now + repeat - current;
1005 /* we can't sleep the whole delay time here apiece as this is racy
1006 * with respect to interrupt delivery - think about what happens
1007 * if we have tested for an interrupt, then get scheduled
1008 * before the apr_sleep() call and while waiting for the cpu
1009 * we do get an interrupt
1012 while (delay && !interrupted) {
1013 if (delay > APR_USEC_PER_SEC) {
1014 apr_sleep(APR_USEC_PER_SEC);
1015 delay -= APR_USEC_PER_SEC;
1023 } while (isdaemon && !interrupted);
1025 if (!isdaemon && interrupted) {
1026 apr_file_printf(errfile, "Cache cleaning aborted due to user "
1027 "request." APR_EOL_STR);