1 /* Licensed to the Apache Software Foundation (ASF) under one or more
2 * contributor license agreements. See the NOTICE file distributed with
3 * this work for additional information regarding copyright ownership.
4 * The ASF licenses this file to You under the Apache License, Version 2.0
5 * (the "License"); you may not use this file except in compliance with
6 * the License. You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
18 * htcacheclean.c: simple program for cleaning of
19 * the disk cache of the Apache HTTP server
21 * Contributed by Andreas Steinmetz <ast domdv.de>
27 #include "apr_strings.h"
28 #include "apr_file_io.h"
29 #include "apr_file_info.h"
30 #include "apr_pools.h"
32 #include "apr_thread_proc.h"
33 #include "apr_signal.h"
34 #include "apr_getopt.h"
38 #include "apr_buckets.h"
40 #include "../modules/cache/cache_common.h"
41 #include "../modules/cache/cache_disk_common.h"
50 /* define the following for debugging */
54 * Note: on Linux delays <= 2ms are busy waits without
55 * scheduling, so never use a delay <= 2ms below
58 #define NICE_DELAY 10000 /* usecs */
59 #define DELETE_NICE 10 /* be nice after this amount of delete ops */
60 #define STAT_ATTEMPTS 10 /* maximum stat attempts for a file */
61 #define STAT_DELAY 5000 /* usecs */
62 #define HEADER 1 /* headers file */
63 #define DATA 2 /* body file */
64 #define TEMP 4 /* temporary file */
65 #define HEADERDATA (HEADER|DATA)
66 #define MAXDEVIATION 3600 /* secs */
67 #define SECS_PER_MIN 60
70 #define GBYTE 1073741824
72 #define DIRINFO (APR_FINFO_MTIME|APR_FINFO_SIZE|APR_FINFO_TYPE|APR_FINFO_LINK)
74 typedef struct _direntry {
75 APR_RING_ENTRY(_direntry) link;
76 int type; /* type of file/fileset: TEMP, HEADER, DATA, HEADERDATA */
77 apr_time_t htime; /* headers file modification time */
78 apr_time_t dtime; /* body file modification time */
79 apr_off_t hsize; /* headers file size */
80 apr_off_t dsize; /* body or temporary file size */
81 char *basename; /* file/fileset base name */
84 typedef struct _entry {
85 APR_RING_ENTRY(_entry) link;
86 apr_time_t expire; /* cache entry exiration time */
87 apr_time_t response_time; /* cache entry time of last response to client */
88 apr_time_t htime; /* headers file modification time */
89 apr_time_t dtime; /* body file modification time */
90 apr_off_t hsize; /* headers file size */
91 apr_off_t dsize; /* body or temporary file size */
92 char *basename; /* fileset base name */
96 static int delcount; /* file deletion count for nice mode */
97 static int interrupted; /* flag: true if SIGINT or SIGTERM occurred */
98 static int realclean; /* flag: true means user said apache is not running */
99 static int verbose; /* flag: true means print statistics */
100 static int benice; /* flag: true means nice mode is activated */
101 static int dryrun; /* flag: true means dry run, don't actually delete
103 static int deldirs; /* flag: true means directories should be deleted */
104 static int listurls; /* flag: true means list cached urls */
105 static int listextended;/* flag: true means list cached urls */
106 static int baselen; /* string length of the path to the proxy directory */
107 static apr_time_t now; /* start time of this processing run */
109 static apr_file_t *errfile; /* stderr file handle */
110 static apr_file_t *outfile; /* stdout file handle */
111 static apr_off_t unsolicited; /* file size summary for deleted unsolicited
113 static APR_RING_ENTRY(_entry) root; /* ENTRY ring anchor */
115 /* short program name as called */
116 static const char *shortname = "htcacheclean";
118 /* what did we clean? */
136 * fake delete for debug purposes
138 #define apr_file_remove fake_file_remove
139 static void fake_file_remove(char *pathname, apr_pool_t *p)
143 /* stat and printing to simulate some deletion system load and to
144 display what would actually have happened */
145 apr_stat(&info, pathname, DIRINFO, p);
146 apr_file_printf(errfile, "would delete %s" APR_EOL_STR, pathname);
151 * called on SIGINT or SIGTERM
153 static void setterm(int unused)
156 apr_file_printf(errfile, "interrupt" APR_EOL_STR);
162 * called in out of memory condition
164 static int oom(int unused)
166 static int called = 0;
168 /* be careful to call exit() only once */
177 * print purge statistics
179 static void printstats(char *path, struct stats *s)
181 char ttype, stype, mtype, utype;
182 apr_off_t tfrag, sfrag, ufrag;
189 tfrag = ((s->total * 10) / KBYTE) % 10;
191 if (s->total >= KBYTE) {
193 tfrag = ((s->total * 10) / KBYTE) % 10;
198 sfrag = ((s->sum * 10) / KBYTE) % 10;
200 if (s->sum >= KBYTE) {
202 sfrag = ((s->sum * 10) / KBYTE) % 10;
208 if (s->max >= KBYTE) {
213 apr_file_printf(errfile, "Cleaned %s. Statistics:" APR_EOL_STR, path);
216 ufrag = ((unsolicited * 10) / KBYTE) % 10;
217 unsolicited /= KBYTE;
218 if (unsolicited >= KBYTE) {
220 ufrag = ((unsolicited * 10) / KBYTE) % 10;
221 unsolicited /= KBYTE;
223 if (!unsolicited && !ufrag) {
226 apr_file_printf(errfile, "unsolicited size %d.%d%c" APR_EOL_STR,
227 (int)(unsolicited), (int)(ufrag), utype);
229 apr_file_printf(errfile, "size limit %" APR_OFF_T_FMT ".0%c" APR_EOL_STR,
231 apr_file_printf(errfile, "inodes limit %" APR_OFF_T_FMT APR_EOL_STR,
235 "total size was %" APR_OFF_T_FMT ".%" APR_OFF_T_FMT "%c, total size now "
236 "%" APR_OFF_T_FMT ".%" APR_OFF_T_FMT "%c" APR_EOL_STR, s->total,
237 tfrag, ttype, s->sum, sfrag, stype);
238 apr_file_printf(errfile, "total inodes was %" APR_OFF_T_FMT
239 ", total %sinodes now "
240 "%" APR_OFF_T_FMT APR_EOL_STR, s->ntotal, dryrun && deldirs ? "estimated "
244 "total entries was %" APR_OFF_T_FMT ", total entries now %" APR_OFF_T_FMT
245 APR_EOL_STR, s->etotal, s->entries);
248 "%" APR_OFF_T_FMT " entries deleted (%" APR_OFF_T_FMT " from future, %"
249 APR_OFF_T_FMT " expired, %" APR_OFF_T_FMT " fresh)" APR_EOL_STR,
250 (s->etotal - s->entries), s->dfuture, s->dexpired, s->dfresh);
254 * Round the value up to the given threshold.
256 static apr_size_t round_up(apr_size_t val, apr_off_t round) {
258 return (apr_size_t)(((val + round - 1) / round) * round);
264 * delete parent directories
266 static void delete_parent(const char *path, const char *basename,
267 apr_off_t *nodes, apr_pool_t *pool)
269 char *nextpath, *name;
272 /* temp pool, otherwise lots of memory could be allocated */
273 apr_pool_create(&p, pool);
274 name = apr_pstrdup(p, basename);
276 /* If asked to delete dirs, do so now. We don't care if it fails.
277 * If it fails, it likely means there was something else there.
279 if (deldirs && !dryrun) {
281 char *end = strrchr(name, '/');
285 /* remove the directory */
286 nextpath = apr_pstrcat(p, path, "/", name, NULL);
287 if (!apr_dir_remove(nextpath, p)) {
290 /* vary directory found? */
291 vary = strstr(name, CACHE_VDIR_SUFFIX);
292 if (vary && !vary[sizeof(CACHE_VDIR_SUFFIX) - 1]) {
293 nextpath = apr_pstrcat(p, path, "/", apr_pstrndup(p, name, vary
295 if (!apr_file_remove(nextpath, p)) {
304 end = strrchr(name, '/');
311 if (++delcount >= DELETE_NICE) {
312 apr_sleep(NICE_DELAY);
320 * delete a single file
322 static void delete_file(char *path, char *basename, apr_off_t *nodes,
328 /* temp pool, otherwise lots of memory could be allocated */
329 apr_pool_create(&p, pool);
330 nextpath = apr_pstrcat(p, path, "/", basename, NULL);
334 if (!apr_stat(&finfo, nextpath, APR_FINFO_NLINK, p)) {
338 else if (!apr_file_remove(nextpath, p)) {
345 if (++delcount >= DELETE_NICE) {
346 apr_sleep(NICE_DELAY);
351 delete_parent(path, basename, nodes, pool);
356 * delete cache file set
358 static void delete_entry(char *path, char *basename, apr_off_t *nodes,
364 /* temp pool, otherwise lots of memory could be allocated */
365 apr_pool_create(&p, pool);
367 nextpath = apr_pstrcat(p, path, "/", basename, CACHE_HEADER_SUFFIX, NULL);
370 if (!apr_stat(&finfo, nextpath, APR_FINFO_NLINK, p)) {
374 else if (!apr_file_remove(nextpath, p)) {
378 nextpath = apr_pstrcat(p, path, "/", basename, CACHE_DATA_SUFFIX, NULL);
381 if (!apr_stat(&finfo, nextpath, APR_FINFO_NLINK, p)) {
385 else if (!apr_file_remove(nextpath, p)) {
393 if (delcount >= DELETE_NICE) {
394 apr_sleep(NICE_DELAY);
399 delete_parent(path, basename, nodes, pool);
404 * list the cache directory tree
406 static int list_urls(char *path, apr_pool_t *pool, apr_off_t round)
413 const char *ext, *nextpath;
416 disk_cache_info_t disk_info;
418 apr_pool_create(&p, pool);
420 if (apr_dir_open(&dir, path, p) != APR_SUCCESS) {
424 while (apr_dir_read(&info, APR_FINFO_TYPE, dir) == APR_SUCCESS && !interrupted) {
426 if (info.filetype == APR_DIR) {
427 if (!strcmp(info.name, ".") || !strcmp(info.name, "..")) {
431 if (list_urls(apr_pstrcat(p, path, "/", info.name, NULL), pool, round)) {
436 else if (info.filetype == APR_REG) {
438 ext = strchr(info.name, '.');
440 if (ext && !strcasecmp(ext, CACHE_HEADER_SUFFIX)) {
442 nextpath = apr_pstrcat(p, path, "/", info.name, NULL);
444 if (apr_file_open(&fd, nextpath, APR_FOPEN_READ
445 | APR_FOPEN_BINARY, APR_OS_DEFAULT, p) == APR_SUCCESS) {
446 len = sizeof(format);
447 if (apr_file_read_full(fd, &format, len, &len)
449 if (format == DISK_FORMAT_VERSION) {
450 apr_off_t offset = 0;
452 apr_file_seek(fd, APR_SET, &offset);
454 len = sizeof(disk_cache_info_t);
456 if (apr_file_read_full(fd, &disk_info, len, &len)
458 len = disk_info.name_len;
459 url = apr_palloc(p, len + 1);
462 if (apr_file_read_full(fd, url, len, &len)
466 apr_finfo_t hinfo, dinfo;
468 /* stat the header file */
469 if (APR_SUCCESS != apr_file_info_get(
470 &hinfo, APR_FINFO_SIZE, fd)) {
471 /* ignore the file */
473 else if (disk_info.has_body && APR_SUCCESS
490 /* ignore the file */
492 else if (disk_info.has_body && (dinfo.device
495 != disk_info.inode)) {
496 /* ignore the file */
502 "%s %" APR_SIZE_T_FMT
504 " %d %" APR_SIZE_T_FMT
511 round_up((apr_size_t)hinfo.size, round),
513 disk_info.has_body ? (apr_size_t)dinfo.size
516 disk_info.entity_version,
519 disk_info.request_time,
520 disk_info.response_time,
522 disk_info.header_only);
528 /* stat the data file */
529 if (disk_info.has_body && APR_SUCCESS
546 /* ignore the file */
548 else if (disk_info.has_body && (dinfo.device
551 != disk_info.inode)) {
552 /* ignore the file */
555 apr_file_printf(outfile, "%s\n",
582 apr_sleep(NICE_DELAY);
593 * walk the cache directory tree
595 static int process_dir(char *path, apr_pool_t *pool, apr_off_t *nodes)
605 apr_time_t current, deviation;
606 char *nextpath, *base, *ext;
607 APR_RING_ENTRY(_direntry) anchor;
611 disk_cache_info_t disk_info;
613 APR_RING_INIT(&anchor, _direntry, link);
614 apr_pool_create(&p, pool);
615 h = apr_hash_make(p);
617 deviation = MAXDEVIATION * APR_USEC_PER_SEC;
619 if (apr_dir_open(&dir, path, p) != APR_SUCCESS) {
623 while (apr_dir_read(&info, 0, dir) == APR_SUCCESS && !interrupted) {
624 if (!strcmp(info.name, ".") || !strcmp(info.name, "..")) {
627 d = apr_pcalloc(p, sizeof(DIRENTRY));
628 d->basename = apr_pstrcat(p, path, "/", info.name, NULL);
629 APR_RING_INSERT_TAIL(&anchor, d, _direntry, link);
641 for (d = APR_RING_FIRST(&anchor);
642 !interrupted && d != APR_RING_SENTINEL(&anchor, _direntry, link);
644 n = APR_RING_NEXT(d, link);
645 base = strrchr(d->basename, '/');
649 ext = strchr(base, '.');
651 /* there may be temporary files which may be gone before
652 * processing, always skip these if not in realclean mode
654 if (!ext && !realclean) {
655 if (!strncasecmp(base, AP_TEMPFILE_BASE, AP_TEMPFILE_BASELEN)
656 && strlen(base) == AP_TEMPFILE_NAMELEN) {
661 /* this may look strange but apr_stat() may return an error which
662 * is system dependent and there may be transient failures,
663 * so just blindly retry for a short while
665 retries = STAT_ATTEMPTS;
666 status = APR_SUCCESS;
668 if (status != APR_SUCCESS) {
669 apr_sleep(STAT_DELAY);
671 status = apr_stat(&info, d->basename, DIRINFO, p);
672 } while (status != APR_SUCCESS && !interrupted && --retries);
674 /* what may happen here is that apache did create a file which
675 * we did detect but then does delete the file before we can
676 * get file information, so if we don't get any file information
677 * we will ignore the file in this case
679 if (status != APR_SUCCESS) {
680 if (!realclean && !interrupted) {
686 if (info.filetype == APR_DIR) {
687 if (process_dir(d->basename, pool, nodes)) {
693 if (info.filetype != APR_REG) {
698 if (!strncasecmp(base, AP_TEMPFILE_BASE, AP_TEMPFILE_BASELEN)
699 && strlen(base) == AP_TEMPFILE_NAMELEN) {
702 d->dsize = info.size;
703 apr_hash_set(h, d->basename, APR_HASH_KEY_STRING, d);
708 if (!strcasecmp(ext, CACHE_HEADER_SUFFIX)) {
711 /* if a user manually creates a '.header' file */
712 if (d->basename[0] == '\0') {
715 t = apr_hash_get(h, d->basename, APR_HASH_KEY_STRING);
720 d->htime = info.mtime;
721 d->hsize = info.size;
722 apr_hash_set(h, d->basename, APR_HASH_KEY_STRING, d);
726 if (!strcasecmp(ext, CACHE_DATA_SUFFIX)) {
729 /* if a user manually creates a '.data' file */
730 if (d->basename[0] == '\0') {
733 t = apr_hash_get(h, d->basename, APR_HASH_KEY_STRING);
738 d->dtime = info.mtime;
739 d->dsize = info.size;
740 apr_hash_set(h, d->basename, APR_HASH_KEY_STRING, d);
748 path[baselen] = '\0';
750 for (i = apr_hash_first(p, h); i && !interrupted; i = apr_hash_next(i)) {
754 apr_hash_this(i, NULL, NULL, &hvalue);
759 nextpath = apr_pstrcat(p, path, "/", d->basename,
760 CACHE_HEADER_SUFFIX, NULL);
761 if (apr_file_open(&fd, nextpath, APR_FOPEN_READ | APR_FOPEN_BINARY,
762 APR_OS_DEFAULT, p) == APR_SUCCESS) {
763 len = sizeof(format);
764 if (apr_file_read_full(fd, &format, len,
765 &len) == APR_SUCCESS) {
766 if (format == DISK_FORMAT_VERSION) {
767 apr_off_t offset = 0;
769 apr_file_seek(fd, APR_SET, &offset);
771 len = sizeof(disk_cache_info_t);
773 if (apr_file_read_full(fd, &disk_info, len,
774 &len) == APR_SUCCESS) {
776 e = apr_palloc(pool, sizeof(ENTRY));
777 APR_RING_INSERT_TAIL(&root, e, _entry, link);
778 e->expire = disk_info.expire;
779 e->response_time = disk_info.response_time;
784 e->basename = apr_pstrdup(pool, d->basename);
785 if (!disk_info.has_body) {
786 delete_file(path, apr_pstrcat(p, path, "/",
787 d->basename, CACHE_DATA_SUFFIX, NULL),
796 else if (format == VARY_FORMAT_VERSION) {
799 /* This must be a URL that added Vary headers later,
800 * so kill the orphaned .data file
804 if (apr_stat(&finfo, apr_pstrcat(p, nextpath,
805 CACHE_VDIR_SUFFIX, NULL), APR_FINFO_TYPE, p)
806 || finfo.filetype != APR_DIR) {
807 delete_entry(path, d->basename, nodes, p);
810 delete_file(path, apr_pstrcat(p, path, "/",
811 d->basename, CACHE_DATA_SUFFIX, NULL),
817 /* We didn't recognise the format, kill the files */
819 delete_entry(path, d->basename, nodes, p);
828 /* we have a somehow unreadable headers file which is associated
829 * with a data file. this may be caused by apache currently
830 * rewriting the headers file. thus we may delete the file set
831 * either in realclean mode or if the headers file modification
832 * timestamp is not within a specified positive or negative offset
833 * to the current time.
835 current = apr_time_now();
836 if (realclean || d->htime < current - deviation
837 || d->htime > current + deviation) {
838 delete_entry(path, d->basename, nodes, p);
839 unsolicited += d->hsize;
840 unsolicited += d->dsize;
844 /* single data and header files may be deleted either in realclean
845 * mode or if their modification timestamp is not within a
846 * specified positive or negative offset to the current time.
847 * this handling is necessary due to possible race conditions
848 * between apache and this process
851 current = apr_time_now();
852 nextpath = apr_pstrcat(p, path, "/", d->basename,
853 CACHE_HEADER_SUFFIX, NULL);
854 if (apr_file_open(&fd, nextpath, APR_FOPEN_READ | APR_FOPEN_BINARY,
855 APR_OS_DEFAULT, p) == APR_SUCCESS) {
856 len = sizeof(format);
857 if (apr_file_read_full(fd, &format, len,
858 &len) == APR_SUCCESS) {
859 if (format == VARY_FORMAT_VERSION) {
862 len = sizeof(expires);
864 if (apr_file_read_full(fd, &expires, len,
865 &len) == APR_SUCCESS) {
870 if (apr_stat(&finfo, apr_pstrcat(p, nextpath,
871 CACHE_VDIR_SUFFIX, NULL), APR_FINFO_TYPE, p)
872 || finfo.filetype != APR_DIR) {
873 delete_entry(path, d->basename, nodes, p);
875 else if (expires < current) {
876 delete_entry(path, d->basename, nodes, p);
882 else if (format == DISK_FORMAT_VERSION) {
883 apr_off_t offset = 0;
885 apr_file_seek(fd, APR_SET, &offset);
887 len = sizeof(disk_cache_info_t);
889 if (apr_file_read_full(fd, &disk_info, len,
890 &len) == APR_SUCCESS) {
892 e = apr_palloc(pool, sizeof(ENTRY));
893 APR_RING_INSERT_TAIL(&root, e, _entry, link);
894 e->expire = disk_info.expire;
895 e->response_time = disk_info.response_time;
900 e->basename = apr_pstrdup(pool, d->basename);
909 delete_entry(path, d->basename, nodes, p);
918 if (realclean || d->htime < current - deviation
919 || d->htime > current + deviation) {
920 delete_entry(path, d->basename, nodes, p);
921 unsolicited += d->hsize;
926 current = apr_time_now();
927 if (realclean || d->dtime < current - deviation
928 || d->dtime > current + deviation) {
929 delete_entry(path, d->basename, nodes, p);
930 unsolicited += d->dsize;
934 /* temp files may only be deleted in realclean mode which
935 * is asserted above if a tempfile is in the hash array
938 delete_file(path, d->basename, nodes, p);
939 unsolicited += d->dsize;
951 apr_sleep(NICE_DELAY);
962 * purge cache entries
964 static void purge(char *path, apr_pool_t *pool, apr_off_t max,
965 apr_off_t inodes, apr_off_t nodes, apr_off_t round)
967 ENTRY *e, *n, *oldest;
980 for (e = APR_RING_FIRST(&root);
981 e != APR_RING_SENTINEL(&root, _entry, link);
982 e = APR_RING_NEXT(e, link)) {
983 s.sum += round_up((apr_size_t)e->hsize, round);
984 s.sum += round_up((apr_size_t)e->dsize, round);
989 s.etotal = s.entries;
991 if ((!s.max || s.sum <= s.max) && (!s.inodes || s.nodes <= s.inodes)) {
992 printstats(path, &s);
996 /* process all entries with a timestamp in the future, this may
997 * happen if a wrong system time is corrected
1000 for (e = APR_RING_FIRST(&root);
1001 e != APR_RING_SENTINEL(&root, _entry, link) && !interrupted;) {
1002 n = APR_RING_NEXT(e, link);
1003 if (e->response_time > now || e->htime > now || e->dtime > now) {
1004 delete_entry(path, e->basename, &s.nodes, pool);
1005 s.sum -= round_up((apr_size_t)e->hsize, round);
1006 s.sum -= round_up((apr_size_t)e->dsize, round);
1009 APR_RING_REMOVE(e, link);
1010 if ((!s.max || s.sum <= s.max) && (!s.inodes || s.nodes <= s.inodes)) {
1012 printstats(path, &s);
1024 /* process all entries with are expired */
1025 for (e = APR_RING_FIRST(&root);
1026 e != APR_RING_SENTINEL(&root, _entry, link) && !interrupted;) {
1027 n = APR_RING_NEXT(e, link);
1028 if (e->expire != APR_DATE_BAD && e->expire < now) {
1029 delete_entry(path, e->basename, &s.nodes, pool);
1030 s.sum -= round_up((apr_size_t)e->hsize, round);
1031 s.sum -= round_up((apr_size_t)e->dsize, round);
1034 APR_RING_REMOVE(e, link);
1035 if ((!s.max || s.sum <= s.max) && (!s.inodes || s.nodes <= s.inodes)) {
1037 printstats(path, &s);
1049 /* process remaining entries oldest to newest, the check for an empty
1050 * ring actually isn't necessary except when the compiler does
1051 * corrupt 64bit arithmetics which happened to me once, so better safe
1054 while (!((!s.max || s.sum <= s.max) && (!s.inodes || s.nodes <= s.inodes))
1055 && !interrupted && !APR_RING_EMPTY(&root, _entry, link)) {
1056 oldest = APR_RING_FIRST(&root);
1058 for (e = APR_RING_NEXT(oldest, link);
1059 e != APR_RING_SENTINEL(&root, _entry, link);
1060 e = APR_RING_NEXT(e, link)) {
1061 if (e->dtime < oldest->dtime) {
1066 delete_entry(path, oldest->basename, &s.nodes, pool);
1067 s.sum -= round_up((apr_size_t)oldest->hsize, round);
1068 s.sum -= round_up((apr_size_t)oldest->dsize, round);
1071 APR_RING_REMOVE(oldest, link);
1075 printstats(path, &s);
1079 static apr_status_t remove_directory(apr_pool_t *pool, const char *dir)
1085 rv = apr_dir_open(&dirp, dir, pool);
1086 if (APR_STATUS_IS_ENOENT(rv)) {
1089 if (rv != APR_SUCCESS) {
1090 apr_file_printf(errfile, "Could not open directory %s: %pm" APR_EOL_STR,
1095 while (apr_dir_read(&dirent, APR_FINFO_DIRENT | APR_FINFO_TYPE, dirp)
1097 if (dirent.filetype == APR_DIR) {
1098 if (strcmp(dirent.name, ".") && strcmp(dirent.name, "..")) {
1099 rv = remove_directory(pool, apr_pstrcat(pool, dir, "/",
1100 dirent.name, NULL));
1101 /* tolerate the directory not being empty, the cache may have
1102 * attempted to recreate the directory in the mean time.
1104 if (APR_SUCCESS != rv && APR_ENOTEMPTY != rv) {
1109 const char *file = apr_pstrcat(pool, dir, "/", dirent.name, NULL);
1110 rv = apr_file_remove(file, pool);
1111 if (APR_SUCCESS != rv) {
1112 apr_file_printf(errfile,
1113 "Could not remove file '%s': %pm" APR_EOL_STR, file,
1120 apr_dir_close(dirp);
1122 if (rv == APR_SUCCESS) {
1123 rv = apr_dir_remove(dir, pool);
1124 if (APR_ENOTEMPTY == rv) {
1127 if (rv != APR_SUCCESS) {
1128 apr_file_printf(errfile, "Could not remove directory %s: %pm" APR_EOL_STR,
1136 static apr_status_t find_directory(apr_pool_t *pool, const char *base,
1142 int found = 0, files = 0;
1143 const char *header = apr_pstrcat(pool, rest, CACHE_HEADER_SUFFIX, NULL);
1144 const char *data = apr_pstrcat(pool, rest, CACHE_DATA_SUFFIX, NULL);
1145 const char *vdir = apr_pstrcat(pool, rest, CACHE_HEADER_SUFFIX,
1146 CACHE_VDIR_SUFFIX, NULL);
1147 const char *dirname = NULL;
1149 rv = apr_dir_open(&dirp, base, pool);
1150 if (rv != APR_SUCCESS) {
1151 apr_file_printf(errfile, "Could not open directory %s: %pm" APR_EOL_STR,
1158 while (apr_dir_read(&dirent, APR_FINFO_DIRENT | APR_FINFO_TYPE, dirp)
1160 int len = strlen(dirent.name);
1161 int restlen = strlen(rest);
1162 if (dirent.filetype == APR_DIR && !strncmp(rest, dirent.name, len)) {
1163 dirname = apr_pstrcat(pool, base, "/", dirent.name, NULL);
1164 rv = find_directory(pool, dirname, rest + (len < restlen ? len
1166 if (APR_SUCCESS == rv) {
1170 if (dirent.filetype == APR_DIR) {
1171 if (!strcmp(dirent.name, vdir)) {
1175 if (dirent.filetype == APR_REG) {
1176 if (!strcmp(dirent.name, header) || !strcmp(dirent.name, data)) {
1182 apr_dir_close(dirp);
1188 apr_status_t status;
1190 remove = apr_pstrcat(pool, base, "/", header, NULL);
1191 status = apr_file_remove(remove, pool);
1192 if (status != APR_SUCCESS && !APR_STATUS_IS_ENOENT(status)) {
1193 apr_file_printf(errfile, "Could not remove file %s: %pm" APR_EOL_STR,
1198 remove = apr_pstrcat(pool, base, "/", data, NULL);
1199 status = apr_file_remove(remove, pool);
1200 if (status != APR_SUCCESS && !APR_STATUS_IS_ENOENT(status)) {
1201 apr_file_printf(errfile, "Could not remove file %s: %pm" APR_EOL_STR,
1206 status = remove_directory(pool, apr_pstrcat(pool, base, "/", vdir, NULL));
1207 if (status != APR_SUCCESS && !APR_STATUS_IS_ENOENT(status)) {
1213 /* If asked to delete dirs, do so now. We don't care if it fails.
1214 * If it fails, it likely means there was something else there.
1216 if (dirname && deldirs && !dryrun) {
1217 apr_dir_remove(dirname, pool);
1228 * Delete a specific URL from the cache.
1230 static apr_status_t delete_url(apr_pool_t *pool, const char *proxypath, const char *url)
1232 apr_md5_ctx_t context;
1233 unsigned char digest[16];
1237 static const char enc_table[64] =
1238 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789_@";
1240 apr_md5_init(&context);
1241 apr_md5_update(&context, (const unsigned char *) url, strlen(url));
1242 apr_md5_final(digest, &context);
1244 /* encode 128 bits as 22 characters, using a modified uuencoding
1245 * the encoding is 3 bytes -> 4 characters* i.e. 128 bits is
1246 * 5 x 3 bytes + 1 byte -> 5 * 4 characters + 2 characters
1248 for (i = 0, k = 0; i < 15; i += 3) {
1249 x = (digest[i] << 16) | (digest[i + 1] << 8) | digest[i + 2];
1250 tmp[k++] = enc_table[x >> 18];
1251 tmp[k++] = enc_table[(x >> 12) & 0x3f];
1252 tmp[k++] = enc_table[(x >> 6) & 0x3f];
1253 tmp[k++] = enc_table[x & 0x3f];
1258 tmp[k++] = enc_table[x >> 2]; /* use up 6 bits */
1259 tmp[k++] = enc_table[(x << 4) & 0x3f];
1262 /* automatically find the directory levels */
1263 return find_directory(pool, proxypath, tmp);
1269 #define NL APR_EOL_STR
1270 static void usage(const char *error)
1273 apr_file_printf(errfile, "%s error: %s\n", shortname, error);
1275 apr_file_printf(errfile,
1276 "%s -- program for cleaning the disk cache." NL
1277 "Usage: %s [-Dvtrn] -pPATH [-lLIMIT|-LLIMIT] [-PPIDFILE]" NL
1278 " %s [-nti] -dINTERVAL -pPATH [-lLIMIT|-LLIMIT] [-PPIDFILE]" NL
1279 " %s [-Dvt] -pPATH URL ..." NL
1282 " -d Daemonize and repeat cache cleaning every INTERVAL minutes." NL
1283 " This option is mutually exclusive with the -D, -v and -r" NL
1286 " -D Do a dry run and don't delete anything. This option is mutually" NL
1287 " exclusive with the -d option. When doing a dry run and deleting" NL
1288 " directories with -t, the inodes reported deleted in the stats" NL
1289 " cannot take into account the directories deleted, and will be" NL
1290 " marked as an estimate." NL
1292 " -v Be verbose and print statistics. This option is mutually" NL
1293 " exclusive with the -d option." NL
1295 " -r Clean thoroughly. This assumes that the Apache web server is " NL
1296 " not running. This option is mutually exclusive with the -d" NL
1297 " option and implies -t." NL
1299 " -n Be nice. This causes slower processing in favour of other" NL
1302 " -t Delete all empty directories. By default only cache files are" NL
1303 " removed, however with some configurations the large number of" NL
1304 " directories created may require attention." NL
1306 " -p Specify PATH as the root directory of the disk cache." NL
1308 " -P Specify PIDFILE as the file to write the pid to." NL
1310 " -R Specify amount to round sizes up to." NL
1312 " -l Specify LIMIT as the total disk cache size limit. Attach 'K'" NL
1313 " or 'M' to the number for specifying KBytes or MBytes." NL
1315 " -L Specify LIMIT as the total disk cache inode limit." NL
1317 " -i Be intelligent and run only when there was a modification of" NL
1318 " the disk cache. This option is only possible together with the" NL
1321 " -a List the URLs currently stored in the cache. Variants of the" NL
1322 " same URL will be listed once for each variant." NL
1324 " -A List the URLs currently stored in the cache, along with their" NL
1325 " attributes in the following order: url, header size, body size," NL
1326 " status, entity version, date, expiry, request time," NL
1327 " response time, body present, head request." NL
1329 "Should an URL be provided on the command line, the URL will be" NL
1330 "deleted from the cache. A reverse proxied URL is made up as follows:" NL
1331 "http://<hostname>:<port><path>?[query]. So, for the path \"/\" on the" NL
1332 "host \"localhost\" and port 80, the URL to delete becomes" NL
1333 "\"http://localhost:80/?\". Note the '?' in the URL must always be" NL
1334 "specified explicitly, whether a query string is present or not." NL,
1345 static void usage_repeated_arg(apr_pool_t *pool, char option) {
1346 usage(apr_psprintf(pool,
1347 "The option '%c' cannot be specified more than once",
1351 static void log_pid(apr_pool_t *pool, const char *pidfilename, apr_file_t **pidfile)
1353 apr_status_t status;
1354 pid_t mypid = getpid();
1356 if (APR_SUCCESS == (status = apr_file_open(pidfile, pidfilename,
1357 APR_FOPEN_WRITE | APR_FOPEN_CREATE | APR_FOPEN_TRUNCATE |
1358 APR_FOPEN_DELONCLOSE, APR_FPROT_UREAD | APR_FPROT_UWRITE |
1359 APR_FPROT_GREAD | APR_FPROT_WREAD, pool))) {
1360 apr_file_printf(*pidfile, "%" APR_PID_T_FMT APR_EOL_STR, mypid);
1364 apr_file_printf(errfile,
1365 "Could not write the pid file '%s': %pm" APR_EOL_STR,
1366 pidfilename, &status);
1375 int main(int argc, const char * const argv[])
1377 apr_off_t max, inodes, round;
1378 apr_time_t current, repeat, delay, previous;
1379 apr_status_t status;
1380 apr_pool_t *pool, *instance;
1383 apr_file_t *pidfile;
1384 int retries, isdaemon, limit_found, inodes_found, intelligent, dowork;
1387 char *proxypath, *path, *pidfilename;
1403 previous = 0; /* avoid compiler warning */
1407 if (apr_app_initialize(&argc, &argv, NULL) != APR_SUCCESS) {
1410 atexit(apr_terminate);
1413 shortname = apr_filepath_name_get(argv[0]);
1416 if (apr_pool_create(&pool, NULL) != APR_SUCCESS) {
1419 apr_pool_abort_set(oom, pool);
1420 apr_file_open_stderr(&errfile, pool);
1421 apr_file_open_stdout(&outfile, pool);
1422 apr_signal(SIGINT, setterm);
1423 apr_signal(SIGTERM, setterm);
1425 apr_getopt_init(&o, pool, argc, argv);
1428 status = apr_getopt(o, "iDnvrtd:l:L:p:P:R:aA", &opt, &arg);
1429 if (status == APR_EOF) {
1432 else if (status != APR_SUCCESS) {
1441 usage_repeated_arg(pool, opt);
1448 usage_repeated_arg(pool, opt);
1455 usage_repeated_arg(pool, opt);
1462 usage_repeated_arg(pool, opt);
1469 usage_repeated_arg(pool, opt);
1476 usage_repeated_arg(pool, opt);
1484 usage_repeated_arg(pool, opt);
1487 repeat = apr_atoi64(arg);
1488 repeat *= SECS_PER_MIN;
1489 repeat *= APR_USEC_PER_SEC;
1494 usage_repeated_arg(pool, opt);
1499 rv = apr_strtoff(&max, arg, &end, 10);
1500 if (rv == APR_SUCCESS) {
1501 if ((*end == 'K' || *end == 'k') && !end[1]) {
1504 else if ((*end == 'M' || *end == 'm') && !end[1]) {
1507 else if ((*end == 'G' || *end == 'g') && !end[1]) {
1510 else if (*end && /* neither empty nor [Bb] */
1511 ((*end != 'B' && *end != 'b') || end[1])) {
1515 if (rv != APR_SUCCESS) {
1516 usage(apr_psprintf(pool, "Invalid limit: %s"
1517 APR_EOL_STR APR_EOL_STR, arg));
1524 usage_repeated_arg(pool, opt);
1529 rv = apr_strtoff(&inodes, arg, &end, 10);
1530 if (rv == APR_SUCCESS) {
1531 if ((*end == 'K' || *end == 'k') && !end[1]) {
1534 else if ((*end == 'M' || *end == 'm') && !end[1]) {
1537 else if ((*end == 'G' || *end == 'g') && !end[1]) {
1540 else if (*end && /* neither empty nor [Bb] */
1541 ((*end != 'B' && *end != 'b') || end[1])) {
1545 if (rv != APR_SUCCESS) {
1546 usage(apr_psprintf(pool, "Invalid limit: %s"
1547 APR_EOL_STR APR_EOL_STR, arg));
1554 usage_repeated_arg(pool, opt);
1561 usage_repeated_arg(pool, opt);
1569 usage_repeated_arg(pool, opt);
1571 proxypath = apr_pstrdup(pool, arg);
1572 if ((status = apr_filepath_set(proxypath, pool)) != APR_SUCCESS) {
1573 usage(apr_psprintf(pool, "Could not set filepath to '%s': %pm",
1574 proxypath, &status));
1580 usage_repeated_arg(pool, opt);
1582 pidfilename = apr_pstrdup(pool, arg);
1587 usage_repeated_arg(pool, opt);
1589 rv = apr_strtoff(&round, arg, &end, 10);
1590 if (rv == APR_SUCCESS) {
1592 usage(apr_psprintf(pool, "Invalid round value: %s"
1593 APR_EOL_STR APR_EOL_STR, arg));
1595 else if (round < 0) {
1596 usage(apr_psprintf(pool, "Round value must be positive: %s"
1597 APR_EOL_STR APR_EOL_STR, arg));
1600 if (rv != APR_SUCCESS) {
1601 usage(apr_psprintf(pool, "Invalid round value: %s"
1602 APR_EOL_STR APR_EOL_STR, arg));
1615 usage("Option -p must be specified");
1618 if (o->ind < argc) {
1622 usage("Option -d cannot be used with URL arguments, aborting");
1625 usage("Option -i cannot be used with URL arguments, aborting");
1628 usage("Option -l cannot be used with URL arguments, aborting");
1630 while (o->ind < argc) {
1631 status = delete_url(pool, proxypath, argv[o->ind]);
1632 if (APR_SUCCESS == status) {
1634 apr_file_printf(errfile, "Removed: %s" APR_EOL_STR,
1639 else if (APR_ENOENT == status) {
1641 apr_file_printf(errfile, "Not cached: %s" APR_EOL_STR,
1647 apr_file_printf(errfile, "Error while removed: %s" APR_EOL_STR,
1654 return error ? 1 : deleted ? 0 : 2;
1657 if (isdaemon && repeat <= 0) {
1658 usage("Option -d must be greater than zero");
1661 if (isdaemon && (verbose || realclean || dryrun || listurls)) {
1662 usage("Option -d cannot be used with -v, -r, -L or -D");
1665 if (!isdaemon && intelligent) {
1666 usage("Option -i cannot be used without -d");
1669 if (!listurls && max <= 0 && inodes <= 0) {
1670 usage("At least one of option -l or -L must be greater than zero");
1673 if (apr_filepath_get(&path, 0, pool) != APR_SUCCESS) {
1674 usage(apr_psprintf(pool, "Could not get the filepath: %pm", &status));
1676 baselen = strlen(path);
1679 log_pid(pool, pidfilename, &pidfile); /* before daemonizing, so we
1685 list_urls(path, pool, round);
1686 return (interrupted != 0);
1691 apr_file_close(errfile);
1694 apr_file_close(pidfile); /* delete original pidfile only in parent */
1696 apr_proc_detach(APR_PROC_DETACH_DAEMONIZE);
1698 log_pid(pool, pidfilename, &pidfile);
1704 apr_pool_create(&instance, pool);
1706 now = apr_time_now();
1707 APR_RING_INIT(&root, _entry, link);
1712 switch (intelligent) {
1718 retries = STAT_ATTEMPTS;
1719 status = APR_SUCCESS;
1722 if (status != APR_SUCCESS) {
1723 apr_sleep(STAT_DELAY);
1725 status = apr_stat(&info, path, APR_FINFO_MTIME, instance);
1726 } while (status != APR_SUCCESS && !interrupted && --retries);
1728 if (status == APR_SUCCESS) {
1729 previous = info.mtime;
1736 retries = STAT_ATTEMPTS;
1737 status = APR_SUCCESS;
1740 if (status != APR_SUCCESS) {
1741 apr_sleep(STAT_DELAY);
1743 status = apr_stat(&info, path, APR_FINFO_MTIME, instance);
1744 } while (status != APR_SUCCESS && !interrupted && --retries);
1746 if (status == APR_SUCCESS) {
1747 if (previous != info.mtime) {
1750 previous = info.mtime;
1758 if (dowork && !interrupted) {
1759 apr_off_t nodes = 0;
1760 if (!process_dir(path, instance, &nodes) && !interrupted) {
1761 purge(path, instance, max, inodes, nodes, round);
1763 else if (!isdaemon && !interrupted) {
1764 apr_file_printf(errfile, "An error occurred, cache cleaning "
1765 "aborted." APR_EOL_STR);
1769 if (intelligent && !interrupted) {
1770 retries = STAT_ATTEMPTS;
1771 status = APR_SUCCESS;
1773 if (status != APR_SUCCESS) {
1774 apr_sleep(STAT_DELAY);
1776 status = apr_stat(&info, path, APR_FINFO_MTIME, instance);
1777 } while (status != APR_SUCCESS && !interrupted && --retries);
1779 if (status == APR_SUCCESS) {
1780 previous = info.mtime;
1789 apr_pool_destroy(instance);
1791 current = apr_time_now();
1792 if (current < now) {
1795 else if (current - now >= repeat) {
1799 delay = now + repeat - current;
1802 /* we can't sleep the whole delay time here apiece as this is racy
1803 * with respect to interrupt delivery - think about what happens
1804 * if we have tested for an interrupt, then get scheduled
1805 * before the apr_sleep() call and while waiting for the cpu
1806 * we do get an interrupt
1809 while (delay && !interrupted) {
1810 if (delay > APR_USEC_PER_SEC) {
1811 apr_sleep(APR_USEC_PER_SEC);
1812 delay -= APR_USEC_PER_SEC;
1820 } while (isdaemon && !interrupted);
1822 if (!isdaemon && interrupted) {
1823 apr_file_printf(errfile, "Cache cleaning aborted due to user "
1824 "request." APR_EOL_STR);