1 /* Licensed to the Apache Software Foundation (ASF) under one or more
2 * contributor license agreements. See the NOTICE file distributed with
3 * this work for additional information regarding copyright ownership.
4 * The ASF licenses this file to You under the Apache License, Version 2.0
5 * (the "License"); you may not use this file except in compliance with
6 * the License. You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
18 * htcacheclean.c: simple program for cleaning of
19 * the disk cache of the Apache HTTP server
21 * Contributed by Andreas Steinmetz <ast domdv.de>
27 #include "apr_strings.h"
28 #include "apr_file_io.h"
29 #include "apr_file_info.h"
30 #include "apr_pools.h"
32 #include "apr_thread_proc.h"
33 #include "apr_signal.h"
34 #include "apr_getopt.h"
38 #include "apr_buckets.h"
40 #include "../modules/cache/cache_common.h"
41 #include "../modules/cache/disk_cache_common.h"
51 /* XXX: Maybe we should link util_debug into htcacheclean? */
57 /* define the following for debugging */
61 * Note: on Linux delays <= 2ms are busy waits without
62 * scheduling, so never use a delay <= 2ms below
65 #define NICE_DELAY 10000 /* usecs */
66 #define DELETE_NICE 10 /* be nice after this amount of delete ops */
67 #define STAT_ATTEMPTS 10 /* maximum stat attempts for a file */
68 #define STAT_DELAY 5000 /* usecs */
69 #define HEADER 1 /* headers file */
70 #define DATA 2 /* body file */
71 #define TEMP 4 /* temporary file */
72 #define HEADERDATA (HEADER|DATA)
73 #define MAXDEVIATION 3600 /* secs */
74 #define SECS_PER_MIN 60
77 #define GBYTE 1073741824
79 #define DIRINFO (APR_FINFO_MTIME|APR_FINFO_SIZE|APR_FINFO_TYPE|APR_FINFO_LINK)
81 typedef struct _direntry {
82 APR_RING_ENTRY(_direntry) link;
83 int type; /* type of file/fileset: TEMP, HEADER, DATA, HEADERDATA */
84 apr_time_t htime; /* headers file modification time */
85 apr_time_t dtime; /* body file modification time */
86 apr_off_t hsize; /* headers file size */
87 apr_off_t dsize; /* body or temporary file size */
88 char *basename; /* file/fileset base name */
91 typedef struct _entry {
92 APR_RING_ENTRY(_entry) link;
93 apr_time_t expire; /* cache entry exiration time */
94 apr_time_t response_time; /* cache entry time of last response to client */
95 apr_time_t htime; /* headers file modification time */
96 apr_time_t dtime; /* body file modification time */
97 apr_off_t hsize; /* headers file size */
98 apr_off_t dsize; /* body or temporary file size */
99 char *basename; /* fileset base name */
103 static int delcount; /* file deletion count for nice mode */
104 static int interrupted; /* flag: true if SIGINT or SIGTERM occurred */
105 static int realclean; /* flag: true means user said apache is not running */
106 static int verbose; /* flag: true means print statistics */
107 static int benice; /* flag: true means nice mode is activated */
108 static int dryrun; /* flag: true means dry run, don't actually delete
110 static int deldirs; /* flag: true means directories should be deleted */
111 static int listurls; /* flag: true means list cached urls */
112 static int listextended;/* flag: true means list cached urls */
113 static int baselen; /* string length of the path to the proxy directory */
114 static apr_time_t now; /* start time of this processing run */
116 static apr_file_t *errfile; /* stderr file handle */
117 static apr_file_t *outfile; /* stdout file handle */
118 static apr_off_t unsolicited; /* file size summary for deleted unsolicited
120 static APR_RING_ENTRY(_entry) root; /* ENTRY ring anchor */
122 /* short program name as called */
123 static const char *shortname = "htcacheclean";
125 /* what did we clean? */
143 * fake delete for debug purposes
145 #define apr_file_remove fake_file_remove
146 static void fake_file_remove(char *pathname, apr_pool_t *p)
150 /* stat and printing to simulate some deletion system load and to
151 display what would actually have happened */
152 apr_stat(&info, pathname, DIRINFO, p);
153 apr_file_printf(errfile, "would delete %s" APR_EOL_STR, pathname);
158 * called on SIGINT or SIGTERM
160 static void setterm(int unused)
163 apr_file_printf(errfile, "interrupt" APR_EOL_STR);
169 * called in out of memory condition
171 static int oom(int unused)
173 static int called = 0;
175 /* be careful to call exit() only once */
184 * print purge statistics
186 static void printstats(char *path, struct stats *s)
188 char ttype, stype, mtype, utype;
189 apr_off_t tfrag, sfrag, ufrag;
196 tfrag = ((s->total * 10) / KBYTE) % 10;
198 if (s->total >= KBYTE) {
200 tfrag = ((s->total * 10) / KBYTE) % 10;
205 sfrag = ((s->sum * 10) / KBYTE) % 10;
207 if (s->sum >= KBYTE) {
209 sfrag = ((s->sum * 10) / KBYTE) % 10;
215 if (s->max >= KBYTE) {
220 apr_file_printf(errfile, "Cleaned %s. Statistics:" APR_EOL_STR, path);
223 ufrag = ((unsolicited * 10) / KBYTE) % 10;
224 unsolicited /= KBYTE;
225 if (unsolicited >= KBYTE) {
227 ufrag = ((unsolicited * 10) / KBYTE) % 10;
228 unsolicited /= KBYTE;
230 if (!unsolicited && !ufrag) {
233 apr_file_printf(errfile, "unsolicited size %d.%d%c" APR_EOL_STR,
234 (int)(unsolicited), (int)(ufrag), utype);
236 apr_file_printf(errfile, "size limit %" APR_OFF_T_FMT ".0%c" APR_EOL_STR,
238 apr_file_printf(errfile, "inodes limit %" APR_OFF_T_FMT APR_EOL_STR,
242 "total size was %" APR_OFF_T_FMT ".%" APR_OFF_T_FMT "%c, total size now "
243 "%" APR_OFF_T_FMT ".%" APR_OFF_T_FMT "%c" APR_EOL_STR, s->total,
244 tfrag, ttype, s->sum, sfrag, stype);
245 apr_file_printf(errfile, "total inodes was %" APR_OFF_T_FMT
246 ", total %sinodes now "
247 "%" APR_OFF_T_FMT APR_EOL_STR, s->ntotal, dryrun && deldirs ? "estimated "
251 "total entries was %" APR_OFF_T_FMT ", total entries now %" APR_OFF_T_FMT
252 APR_EOL_STR, s->etotal, s->entries);
255 "%" APR_OFF_T_FMT " entries deleted (%" APR_OFF_T_FMT " from future, %"
256 APR_OFF_T_FMT " expired, %" APR_OFF_T_FMT " fresh)" APR_EOL_STR,
257 (s->etotal - s->entries), s->dfuture, s->dexpired, s->dfresh);
261 * Round the value up to the given threshold.
263 static apr_size_t round_up(apr_size_t val, apr_off_t round) {
265 return ((val + round - 1) / round) * round;
271 * delete parent directories
273 static void delete_parent(const char *path, const char *basename,
274 apr_off_t *nodes, apr_pool_t *pool)
276 char *nextpath, *name;
279 /* temp pool, otherwise lots of memory could be allocated */
280 apr_pool_create(&p, pool);
281 name = apr_pstrdup(p, basename);
283 /* If asked to delete dirs, do so now. We don't care if it fails.
284 * If it fails, it likely means there was something else there.
286 if (deldirs && !dryrun) {
288 char *end = strrchr(name, '/');
292 /* remove the directory */
293 nextpath = apr_pstrcat(p, path, "/", name, NULL);
294 if (!apr_dir_remove(nextpath, p)) {
297 /* vary directory found? */
298 vary = strstr(name, CACHE_VDIR_SUFFIX);
299 if (vary && !vary[sizeof(CACHE_VDIR_SUFFIX) - 1]) {
300 nextpath = apr_pstrcat(p, path, "/", apr_pstrndup(p, name, vary
302 if (!apr_file_remove(nextpath, p)) {
311 end = strrchr(name, '/');
318 if (++delcount >= DELETE_NICE) {
319 apr_sleep(NICE_DELAY);
327 * delete a single file
329 static void delete_file(char *path, char *basename, apr_off_t *nodes,
335 /* temp pool, otherwise lots of memory could be allocated */
336 apr_pool_create(&p, pool);
337 nextpath = apr_pstrcat(p, path, "/", basename, NULL);
341 if (!apr_stat(&finfo, nextpath, APR_FINFO_NLINK, p)) {
345 else if (!apr_file_remove(nextpath, p)) {
352 if (++delcount >= DELETE_NICE) {
353 apr_sleep(NICE_DELAY);
358 delete_parent(path, basename, nodes, pool);
363 * delete cache file set
365 static void delete_entry(char *path, char *basename, apr_off_t *nodes,
371 /* temp pool, otherwise lots of memory could be allocated */
372 apr_pool_create(&p, pool);
374 nextpath = apr_pstrcat(p, path, "/", basename, CACHE_HEADER_SUFFIX, NULL);
377 if (!apr_stat(&finfo, nextpath, APR_FINFO_NLINK, p)) {
381 else if (!apr_file_remove(nextpath, p)) {
385 nextpath = apr_pstrcat(p, path, "/", basename, CACHE_DATA_SUFFIX, NULL);
388 if (!apr_stat(&finfo, nextpath, APR_FINFO_NLINK, p)) {
392 else if (!apr_file_remove(nextpath, p)) {
400 if (delcount >= DELETE_NICE) {
401 apr_sleep(NICE_DELAY);
406 delete_parent(path, basename, nodes, pool);
411 * list the cache directory tree
413 static int list_urls(char *path, apr_pool_t *pool, apr_off_t round)
420 const char *ext, *nextpath;
423 disk_cache_info_t disk_info;
425 apr_pool_create(&p, pool);
427 if (apr_dir_open(&dir, path, p) != APR_SUCCESS) {
431 while (apr_dir_read(&info, 0, dir) == APR_SUCCESS && !interrupted) {
433 if (info.filetype == APR_DIR) {
434 if (!strcmp(info.name, ".") || !strcmp(info.name, "..")) {
438 if (list_urls(apr_pstrcat(p, path, "/", info.name, NULL), pool, round)) {
443 else if (info.filetype == APR_REG) {
445 ext = strchr(info.name, '.');
447 if (ext && !strcasecmp(ext, CACHE_HEADER_SUFFIX)) {
449 nextpath = apr_pstrcat(p, path, "/", info.name, NULL);
451 if (apr_file_open(&fd, nextpath, APR_FOPEN_READ
452 | APR_FOPEN_BINARY, APR_OS_DEFAULT, p) == APR_SUCCESS) {
453 len = sizeof(format);
454 if (apr_file_read_full(fd, &format, len, &len)
456 if (format == DISK_FORMAT_VERSION) {
457 apr_off_t offset = 0;
459 apr_file_seek(fd, APR_SET, &offset);
461 len = sizeof(disk_cache_info_t);
463 if (apr_file_read_full(fd, &disk_info, len, &len)
465 len = disk_info.name_len;
466 url = apr_palloc(p, len + 1);
469 if (apr_file_read_full(fd, url, len, &len)
473 apr_finfo_t hinfo, dinfo;
475 /* stat the header file */
476 if (APR_SUCCESS != apr_file_info_get(
477 &hinfo, APR_FINFO_SIZE, fd)) {
478 /* ignore the file */
480 else if (disk_info.has_body && APR_SUCCESS
497 /* ignore the file */
499 else if (disk_info.has_body && (dinfo.device
502 != disk_info.inode)) {
503 /* ignore the file */
509 "%s %" APR_SIZE_T_FMT
511 " %d %" APR_SIZE_T_FMT
518 round_up(hinfo.size, round),
520 disk_info.has_body ? dinfo.size
523 disk_info.entity_version,
526 disk_info.request_time,
527 disk_info.response_time,
529 disk_info.header_only);
535 /* stat the data file */
536 if (disk_info.has_body && APR_SUCCESS
553 /* ignore the file */
555 else if (disk_info.has_body && (dinfo.device
558 != disk_info.inode)) {
559 /* ignore the file */
562 apr_file_printf(outfile, "%s\n",
589 apr_sleep(NICE_DELAY);
600 * walk the cache directory tree
602 static int process_dir(char *path, apr_pool_t *pool, apr_off_t *nodes)
612 apr_time_t current, deviation;
613 char *nextpath, *base, *ext;
614 APR_RING_ENTRY(_direntry) anchor;
618 disk_cache_info_t disk_info;
620 APR_RING_INIT(&anchor, _direntry, link);
621 apr_pool_create(&p, pool);
622 h = apr_hash_make(p);
625 deviation = MAXDEVIATION * APR_USEC_PER_SEC;
627 if (apr_dir_open(&dir, path, p) != APR_SUCCESS) {
631 while (apr_dir_read(&info, 0, dir) == APR_SUCCESS && !interrupted) {
632 if (!strcmp(info.name, ".") || !strcmp(info.name, "..")) {
635 d = apr_pcalloc(p, sizeof(DIRENTRY));
636 d->basename = apr_pstrcat(p, path, "/", info.name, NULL);
637 APR_RING_INSERT_TAIL(&anchor, d, _direntry, link);
649 for (d = APR_RING_FIRST(&anchor);
650 !interrupted && d != APR_RING_SENTINEL(&anchor, _direntry, link);
652 n = APR_RING_NEXT(d, link);
653 base = strrchr(d->basename, '/');
657 ext = strchr(base, '.');
659 /* there may be temporary files which may be gone before
660 * processing, always skip these if not in realclean mode
662 if (!ext && !realclean) {
663 if (!strncasecmp(base, AP_TEMPFILE_BASE, AP_TEMPFILE_BASELEN)
664 && strlen(base) == AP_TEMPFILE_NAMELEN) {
669 /* this may look strange but apr_stat() may return an error which
670 * is system dependent and there may be transient failures,
671 * so just blindly retry for a short while
673 retries = STAT_ATTEMPTS;
674 status = APR_SUCCESS;
676 if (status != APR_SUCCESS) {
677 apr_sleep(STAT_DELAY);
679 status = apr_stat(&info, d->basename, DIRINFO, p);
680 } while (status != APR_SUCCESS && !interrupted && --retries);
682 /* what may happen here is that apache did create a file which
683 * we did detect but then does delete the file before we can
684 * get file information, so if we don't get any file information
685 * we will ignore the file in this case
687 if (status != APR_SUCCESS) {
688 if (!realclean && !interrupted) {
694 if (info.filetype == APR_DIR) {
695 if (process_dir(d->basename, pool, nodes)) {
701 if (info.filetype != APR_REG) {
706 if (!strncasecmp(base, AP_TEMPFILE_BASE, AP_TEMPFILE_BASELEN)
707 && strlen(base) == AP_TEMPFILE_NAMELEN) {
710 d->dsize = info.size;
711 apr_hash_set(h, d->basename, APR_HASH_KEY_STRING, d);
716 if (!strcasecmp(ext, CACHE_HEADER_SUFFIX)) {
719 /* if a user manually creates a '.header' file */
720 if (d->basename[0] == '\0') {
723 t = apr_hash_get(h, d->basename, APR_HASH_KEY_STRING);
728 d->htime = info.mtime;
729 d->hsize = info.size;
730 apr_hash_set(h, d->basename, APR_HASH_KEY_STRING, d);
734 if (!strcasecmp(ext, CACHE_DATA_SUFFIX)) {
737 /* if a user manually creates a '.data' file */
738 if (d->basename[0] == '\0') {
741 t = apr_hash_get(h, d->basename, APR_HASH_KEY_STRING);
746 d->dtime = info.mtime;
747 d->dsize = info.size;
748 apr_hash_set(h, d->basename, APR_HASH_KEY_STRING, d);
756 path[baselen] = '\0';
758 for (i = apr_hash_first(p, h); i && !interrupted; i = apr_hash_next(i)) {
762 apr_hash_this(i, NULL, NULL, &hvalue);
767 nextpath = apr_pstrcat(p, path, "/", d->basename,
768 CACHE_HEADER_SUFFIX, NULL);
769 if (apr_file_open(&fd, nextpath, APR_FOPEN_READ | APR_FOPEN_BINARY,
770 APR_OS_DEFAULT, p) == APR_SUCCESS) {
771 len = sizeof(format);
772 if (apr_file_read_full(fd, &format, len,
773 &len) == APR_SUCCESS) {
774 if (format == DISK_FORMAT_VERSION) {
775 apr_off_t offset = 0;
777 apr_file_seek(fd, APR_SET, &offset);
779 len = sizeof(disk_cache_info_t);
781 if (apr_file_read_full(fd, &disk_info, len,
782 &len) == APR_SUCCESS) {
784 e = apr_palloc(pool, sizeof(ENTRY));
785 APR_RING_INSERT_TAIL(&root, e, _entry, link);
786 e->expire = disk_info.expire;
787 e->response_time = disk_info.response_time;
792 e->basename = apr_pstrdup(pool, d->basename);
793 if (!disk_info.has_body) {
794 delete_file(path, apr_pstrcat(p, path, "/",
795 d->basename, CACHE_DATA_SUFFIX, NULL),
804 else if (format == VARY_FORMAT_VERSION) {
807 /* This must be a URL that added Vary headers later,
808 * so kill the orphaned .data file
812 if (apr_stat(&finfo, apr_pstrcat(p, nextpath,
813 CACHE_VDIR_SUFFIX, NULL), APR_FINFO_TYPE, p)
814 || finfo.filetype != APR_DIR) {
815 delete_entry(path, d->basename, nodes, p);
818 delete_file(path, apr_pstrcat(p, path, "/",
819 d->basename, CACHE_DATA_SUFFIX, NULL),
825 /* We didn't recognise the format, kill the files */
827 delete_entry(path, d->basename, nodes, p);
836 /* we have a somehow unreadable headers file which is associated
837 * with a data file. this may be caused by apache currently
838 * rewriting the headers file. thus we may delete the file set
839 * either in realclean mode or if the headers file modification
840 * timestamp is not within a specified positive or negative offset
841 * to the current time.
843 current = apr_time_now();
844 if (realclean || d->htime < current - deviation
845 || d->htime > current + deviation) {
846 delete_entry(path, d->basename, nodes, p);
847 unsolicited += d->hsize;
848 unsolicited += d->dsize;
852 /* single data and header files may be deleted either in realclean
853 * mode or if their modification timestamp is not within a
854 * specified positive or negative offset to the current time.
855 * this handling is necessary due to possible race conditions
856 * between apache and this process
859 current = apr_time_now();
860 nextpath = apr_pstrcat(p, path, "/", d->basename,
861 CACHE_HEADER_SUFFIX, NULL);
862 if (apr_file_open(&fd, nextpath, APR_FOPEN_READ | APR_FOPEN_BINARY,
863 APR_OS_DEFAULT, p) == APR_SUCCESS) {
864 len = sizeof(format);
865 if (apr_file_read_full(fd, &format, len,
866 &len) == APR_SUCCESS) {
867 if (format == VARY_FORMAT_VERSION) {
870 len = sizeof(expires);
872 if (apr_file_read_full(fd, &expires, len,
873 &len) == APR_SUCCESS) {
878 if (apr_stat(&finfo, apr_pstrcat(p, nextpath,
879 CACHE_VDIR_SUFFIX, NULL), APR_FINFO_TYPE, p)
880 || finfo.filetype != APR_DIR) {
881 delete_entry(path, d->basename, nodes, p);
883 else if (expires < current) {
884 delete_entry(path, d->basename, nodes, p);
890 else if (format == DISK_FORMAT_VERSION) {
891 apr_off_t offset = 0;
893 apr_file_seek(fd, APR_SET, &offset);
895 len = sizeof(disk_cache_info_t);
897 if (apr_file_read_full(fd, &disk_info, len,
898 &len) == APR_SUCCESS) {
900 e = apr_palloc(pool, sizeof(ENTRY));
901 APR_RING_INSERT_TAIL(&root, e, _entry, link);
902 e->expire = disk_info.expire;
903 e->response_time = disk_info.response_time;
908 e->basename = apr_pstrdup(pool, d->basename);
917 delete_entry(path, d->basename, nodes, p);
926 if (realclean || d->htime < current - deviation
927 || d->htime > current + deviation) {
928 delete_entry(path, d->basename, nodes, p);
929 unsolicited += d->hsize;
934 current = apr_time_now();
935 if (realclean || d->dtime < current - deviation
936 || d->dtime > current + deviation) {
937 delete_entry(path, d->basename, nodes, p);
938 unsolicited += d->dsize;
942 /* temp files may only be deleted in realclean mode which
943 * is asserted above if a tempfile is in the hash array
946 delete_file(path, d->basename, nodes, p);
947 unsolicited += d->dsize;
959 apr_sleep(NICE_DELAY);
970 * purge cache entries
972 static void purge(char *path, apr_pool_t *pool, apr_off_t max,
973 apr_off_t inodes, apr_off_t nodes, apr_off_t round)
975 ENTRY *e, *n, *oldest;
988 for (e = APR_RING_FIRST(&root);
989 e != APR_RING_SENTINEL(&root, _entry, link);
990 e = APR_RING_NEXT(e, link)) {
991 s.sum += round_up(e->hsize, round);
992 s.sum += round_up(e->dsize, round);
997 s.etotal = s.entries;
999 if ((!s.max || s.sum <= s.max) && (!s.inodes || s.nodes <= s.inodes)) {
1000 printstats(path, &s);
1004 /* process all entries with a timestamp in the future, this may
1005 * happen if a wrong system time is corrected
1008 for (e = APR_RING_FIRST(&root);
1009 e != APR_RING_SENTINEL(&root, _entry, link) && !interrupted;) {
1010 n = APR_RING_NEXT(e, link);
1011 if (e->response_time > now || e->htime > now || e->dtime > now) {
1012 delete_entry(path, e->basename, &s.nodes, pool);
1013 s.sum -= round_up(e->hsize, round);
1014 s.sum -= round_up(e->dsize, round);
1017 APR_RING_REMOVE(e, link);
1018 if ((!s.max || s.sum <= s.max) && (!s.inodes || s.nodes <= s.inodes)) {
1020 printstats(path, &s);
1032 /* process all entries with are expired */
1033 for (e = APR_RING_FIRST(&root);
1034 e != APR_RING_SENTINEL(&root, _entry, link) && !interrupted;) {
1035 n = APR_RING_NEXT(e, link);
1036 if (e->expire != APR_DATE_BAD && e->expire < now) {
1037 delete_entry(path, e->basename, &s.nodes, pool);
1038 s.sum -= round_up(e->hsize, round);
1039 s.sum -= round_up(e->dsize, round);
1042 APR_RING_REMOVE(e, link);
1043 if ((!s.max || s.sum <= s.max) && (!s.inodes || s.nodes <= s.inodes)) {
1045 printstats(path, &s);
1057 /* process remaining entries oldest to newest, the check for an emtpy
1058 * ring actually isn't necessary except when the compiler does
1059 * corrupt 64bit arithmetics which happend to me once, so better safe
1062 while (!((!s.max || s.sum <= s.max) && (!s.inodes || s.nodes <= s.inodes))
1063 && !interrupted && !APR_RING_EMPTY(&root, _entry, link)) {
1064 oldest = APR_RING_FIRST(&root);
1066 for (e = APR_RING_NEXT(oldest, link);
1067 e != APR_RING_SENTINEL(&root, _entry, link);
1068 e = APR_RING_NEXT(e, link)) {
1069 if (e->dtime < oldest->dtime) {
1074 delete_entry(path, oldest->basename, &s.nodes, pool);
1075 s.sum -= round_up(oldest->hsize, round);
1076 s.sum -= round_up(oldest->dsize, round);
1079 APR_RING_REMOVE(oldest, link);
1083 printstats(path, &s);
1087 static apr_status_t remove_directory(apr_pool_t *pool, const char *dir)
1093 rv = apr_dir_open(&dirp, dir, pool);
1094 if (rv == APR_ENOENT) {
1097 if (rv != APR_SUCCESS) {
1099 apr_file_printf(errfile, "Could not open directory %s: %s" APR_EOL_STR,
1100 dir, apr_strerror(rv, errmsg, sizeof errmsg));
1104 while (apr_dir_read(&dirent, APR_FINFO_DIRENT | APR_FINFO_TYPE, dirp)
1106 if (dirent.filetype == APR_DIR) {
1107 if (strcmp(dirent.name, ".") && strcmp(dirent.name, "..")) {
1108 rv = remove_directory(pool, apr_pstrcat(pool, dir, "/",
1109 dirent.name, NULL));
1110 /* tolerate the directory not being empty, the cache may have
1111 * attempted to recreate the directory in the mean time.
1113 if (APR_SUCCESS != rv && APR_ENOTEMPTY != rv) {
1118 const char *file = apr_pstrcat(pool, dir, "/", dirent.name, NULL);
1119 rv = apr_file_remove(file, pool);
1120 if (APR_SUCCESS != rv) {
1122 apr_file_printf(errfile,
1123 "Could not remove file '%s': %s" APR_EOL_STR, file,
1124 apr_strerror(rv, errmsg, sizeof errmsg));
1130 apr_dir_close(dirp);
1132 if (rv == APR_SUCCESS) {
1133 rv = apr_dir_remove(dir, pool);
1134 if (APR_ENOTEMPTY == rv) {
1137 if (rv != APR_SUCCESS) {
1139 apr_file_printf(errfile, "Could not remove directory %s: %s" APR_EOL_STR,
1140 dir, apr_strerror(rv, errmsg, sizeof errmsg));
1147 static apr_status_t find_directory(apr_pool_t *pool, const char *base,
1153 int found = 0, files = 0;
1154 const char *header = apr_pstrcat(pool, rest, CACHE_HEADER_SUFFIX, NULL);
1155 const char *data = apr_pstrcat(pool, rest, CACHE_DATA_SUFFIX, NULL);
1156 const char *vdir = apr_pstrcat(pool, rest, CACHE_HEADER_SUFFIX,
1157 CACHE_VDIR_SUFFIX, NULL);
1158 const char *dirname = NULL;
1160 rv = apr_dir_open(&dirp, base, pool);
1161 if (rv != APR_SUCCESS) {
1163 apr_file_printf(errfile, "Could not open directory %s: %s" APR_EOL_STR,
1164 base, apr_strerror(rv, errmsg, sizeof errmsg));
1170 while (apr_dir_read(&dirent, APR_FINFO_DIRENT | APR_FINFO_TYPE, dirp)
1172 int len = strlen(dirent.name);
1173 int restlen = strlen(rest);
1174 if (dirent.filetype == APR_DIR && !strncmp(rest, dirent.name, len)) {
1175 dirname = apr_pstrcat(pool, base, "/", dirent.name, NULL);
1176 rv = find_directory(pool, dirname, rest + (len < restlen ? len
1178 if (APR_SUCCESS == rv) {
1182 if (dirent.filetype == APR_DIR) {
1183 if (!strcmp(dirent.name, vdir)) {
1187 if (dirent.filetype == APR_REG) {
1188 if (!strcmp(dirent.name, header) || !strcmp(dirent.name, data)) {
1194 apr_dir_close(dirp);
1200 apr_status_t status;
1202 remove = apr_pstrcat(pool, base, "/", header, NULL);
1203 status = apr_file_remove(remove, pool);
1204 if (status != APR_SUCCESS && status != APR_ENOENT) {
1206 apr_file_printf(errfile, "Could not remove file %s: %s" APR_EOL_STR,
1207 remove, apr_strerror(status, errmsg, sizeof errmsg));
1211 remove = apr_pstrcat(pool, base, "/", data, NULL);
1212 status = apr_file_remove(remove, pool);
1213 if (status != APR_SUCCESS && status != APR_ENOENT) {
1215 apr_file_printf(errfile, "Could not remove file %s: %s" APR_EOL_STR,
1216 remove, apr_strerror(status, errmsg, sizeof errmsg));
1220 status = remove_directory(pool, apr_pstrcat(pool, base, "/", vdir, NULL));
1221 if (status != APR_SUCCESS && status != APR_ENOENT) {
1227 /* If asked to delete dirs, do so now. We don't care if it fails.
1228 * If it fails, it likely means there was something else there.
1230 if (dirname && deldirs && !dryrun) {
1231 apr_dir_remove(dirname, pool);
1242 * Delete a specific URL from the cache.
1244 static apr_status_t delete_url(apr_pool_t *pool, const char *proxypath, const char *url)
1246 apr_md5_ctx_t context;
1247 unsigned char digest[16];
1251 static const char enc_table[64] =
1252 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789_@";
1254 apr_md5_init(&context);
1255 apr_md5_update(&context, (const unsigned char *) url, strlen(url));
1256 apr_md5_final(digest, &context);
1258 /* encode 128 bits as 22 characters, using a modified uuencoding
1259 * the encoding is 3 bytes -> 4 characters* i.e. 128 bits is
1260 * 5 x 3 bytes + 1 byte -> 5 * 4 characters + 2 characters
1262 for (i = 0, k = 0; i < 15; i += 3) {
1263 x = (digest[i] << 16) | (digest[i + 1] << 8) | digest[i + 2];
1264 tmp[k++] = enc_table[x >> 18];
1265 tmp[k++] = enc_table[(x >> 12) & 0x3f];
1266 tmp[k++] = enc_table[(x >> 6) & 0x3f];
1267 tmp[k++] = enc_table[x & 0x3f];
1272 tmp[k++] = enc_table[x >> 2]; /* use up 6 bits */
1273 tmp[k++] = enc_table[(x << 4) & 0x3f];
1276 /* automatically find the directory levels */
1277 return find_directory(pool, proxypath, tmp);
1283 #define NL APR_EOL_STR
1284 static void usage(const char *error)
1287 apr_file_printf(errfile, "%s error: %s\n", shortname, error);
1289 apr_file_printf(errfile,
1290 "%s -- program for cleaning the disk cache." NL
1291 "Usage: %s [-Dvtrn] -pPATH [-lLIMIT|-LLIMIT] [-PPIDFILE]" NL
1292 " %s [-nti] -dINTERVAL -pPATH [-lLIMIT|-LLIMIT] [-PPIDFILE]" NL
1293 " %s [-Dvt] -pPATH URL ..." NL
1296 " -d Daemonize and repeat cache cleaning every INTERVAL minutes." NL
1297 " This option is mutually exclusive with the -D, -v and -r" NL
1300 " -D Do a dry run and don't delete anything. This option is mutually" NL
1301 " exclusive with the -d option. When doing a dry run and deleting" NL
1302 " directories with -t, the inodes reported deleted in the stats" NL
1303 " cannot take into account the directories deleted, and will be" NL
1304 " marked as an estimate." NL
1306 " -v Be verbose and print statistics. This option is mutually" NL
1307 " exclusive with the -d option." NL
1309 " -r Clean thoroughly. This assumes that the Apache web server is " NL
1310 " not running. This option is mutually exclusive with the -d" NL
1311 " option and implies -t." NL
1313 " -n Be nice. This causes slower processing in favour of other" NL
1316 " -t Delete all empty directories. By default only cache files are" NL
1317 " removed, however with some configurations the large number of" NL
1318 " directories created may require attention." NL
1320 " -p Specify PATH as the root directory of the disk cache." NL
1322 " -P Specify PIDFILE as the file to write the pid to." NL
1324 " -R Specify amount to round sizes up to." NL
1326 " -l Specify LIMIT as the total disk cache size limit. Attach 'K'" NL
1327 " or 'M' to the number for specifying KBytes or MBytes." NL
1329 " -L Specify LIMIT as the total disk cache inode limit." NL
1331 " -i Be intelligent and run only when there was a modification of" NL
1332 " the disk cache. This option is only possible together with the" NL
1335 " -a List the URLs currently stored in the cache. Variants of the" NL
1336 " same URL will be listed once for each variant." NL
1338 " -A List the URLs currently stored in the cache, along with their" NL
1339 " attributes in the following order: url, header size, body size," NL
1340 " status, entity version, date, expiry, request time," NL
1341 " response time, body present, head request." NL
1343 "Should an URL be provided on the command line, the URL will be" NL
1344 "deleted from the cache. A reverse proxied URL is made up as follows:" NL
1345 "http://<hostname>:<port><path>?[query]. So, for the path \"/\" on the" NL
1346 "host \"localhost\" and port 80, the URL to delete becomes" NL
1347 "\"http://localhost:80/?\". Note the '?' in the URL must always be" NL
1348 "specified explicitly, whether a query string is present or not." NL,
1359 static void usage_repeated_arg(apr_pool_t *pool, char option) {
1360 usage(apr_psprintf(pool,
1361 "The option '%c' cannot be specified more than once",
1365 static void log_pid(apr_pool_t *pool, const char *pidfilename, apr_file_t **pidfile)
1367 apr_status_t status;
1369 pid_t mypid = getpid();
1371 if (APR_SUCCESS == (status = apr_file_open(pidfile, pidfilename,
1372 APR_FOPEN_WRITE | APR_FOPEN_CREATE | APR_FOPEN_TRUNCATE |
1373 APR_FOPEN_DELONCLOSE, APR_FPROT_UREAD | APR_FPROT_UWRITE |
1374 APR_FPROT_GREAD | APR_FPROT_WREAD, pool))) {
1375 apr_file_printf(*pidfile, "%" APR_PID_T_FMT APR_EOL_STR, mypid);
1379 apr_file_printf(errfile,
1380 "Could not write the pid file '%s': %s" APR_EOL_STR,
1382 apr_strerror(status, errmsg, sizeof errmsg));
1391 int main(int argc, const char * const argv[])
1393 apr_off_t max, inodes, round;
1394 apr_time_t current, repeat, delay, previous;
1395 apr_status_t status;
1396 apr_pool_t *pool, *instance;
1399 apr_file_t *pidfile;
1400 int retries, isdaemon, limit_found, inodes_found, intelligent, dowork;
1403 char *proxypath, *path, *pidfilename;
1420 previous = 0; /* avoid compiler warning */
1424 if (apr_app_initialize(&argc, &argv, NULL) != APR_SUCCESS) {
1427 atexit(apr_terminate);
1430 shortname = apr_filepath_name_get(argv[0]);
1433 if (apr_pool_create(&pool, NULL) != APR_SUCCESS) {
1436 apr_pool_abort_set(oom, pool);
1437 apr_file_open_stderr(&errfile, pool);
1438 apr_file_open_stdout(&outfile, pool);
1439 apr_signal(SIGINT, setterm);
1440 apr_signal(SIGTERM, setterm);
1442 apr_getopt_init(&o, pool, argc, argv);
1445 status = apr_getopt(o, "iDnvrtd:l:L:p:P:R:aA", &opt, &arg);
1446 if (status == APR_EOF) {
1449 else if (status != APR_SUCCESS) {
1458 usage_repeated_arg(pool, opt);
1465 usage_repeated_arg(pool, opt);
1472 usage_repeated_arg(pool, opt);
1479 usage_repeated_arg(pool, opt);
1486 usage_repeated_arg(pool, opt);
1493 usage_repeated_arg(pool, opt);
1501 usage_repeated_arg(pool, opt);
1504 repeat = apr_atoi64(arg);
1505 repeat *= SECS_PER_MIN;
1506 repeat *= APR_USEC_PER_SEC;
1511 usage_repeated_arg(pool, opt);
1516 rv = apr_strtoff(&max, arg, &end, 10);
1517 if (rv == APR_SUCCESS) {
1518 if ((*end == 'K' || *end == 'k') && !end[1]) {
1521 else if ((*end == 'M' || *end == 'm') && !end[1]) {
1524 else if ((*end == 'G' || *end == 'g') && !end[1]) {
1527 else if (*end && /* neither empty nor [Bb] */
1528 ((*end != 'B' && *end != 'b') || end[1])) {
1532 if (rv != APR_SUCCESS) {
1533 usage(apr_psprintf(pool, "Invalid limit: %s"
1534 APR_EOL_STR APR_EOL_STR, arg));
1541 usage_repeated_arg(pool, opt);
1546 rv = apr_strtoff(&inodes, arg, &end, 10);
1547 if (rv == APR_SUCCESS) {
1548 if ((*end == 'K' || *end == 'k') && !end[1]) {
1551 else if ((*end == 'M' || *end == 'm') && !end[1]) {
1554 else if ((*end == 'G' || *end == 'g') && !end[1]) {
1557 else if (*end && /* neither empty nor [Bb] */
1558 ((*end != 'B' && *end != 'b') || end[1])) {
1562 if (rv != APR_SUCCESS) {
1563 usage(apr_psprintf(pool, "Invalid limit: %s"
1564 APR_EOL_STR APR_EOL_STR, arg));
1571 usage_repeated_arg(pool, opt);
1578 usage_repeated_arg(pool, opt);
1586 usage_repeated_arg(pool, opt);
1588 proxypath = apr_pstrdup(pool, arg);
1589 if ((status = apr_filepath_set(proxypath, pool)) != APR_SUCCESS) {
1590 usage(apr_psprintf(pool, "Could not set filepath to '%s': %s",
1591 proxypath, apr_strerror(status, errmsg, sizeof errmsg)));
1597 usage_repeated_arg(pool, opt);
1599 pidfilename = apr_pstrdup(pool, arg);
1604 usage_repeated_arg(pool, opt);
1606 rv = apr_strtoff(&round, arg, &end, 10);
1607 if (rv == APR_SUCCESS) {
1609 usage(apr_psprintf(pool, "Invalid round value: %s"
1610 APR_EOL_STR APR_EOL_STR, arg));
1612 else if (round < 0) {
1613 usage(apr_psprintf(pool, "Round value must be positive: %s"
1614 APR_EOL_STR APR_EOL_STR, arg));
1617 if (rv != APR_SUCCESS) {
1618 usage(apr_psprintf(pool, "Invalid round value: %s"
1619 APR_EOL_STR APR_EOL_STR, arg));
1631 if (o->ind < argc) {
1635 usage("Option -d cannot be used with URL arguments, aborting");
1638 usage("Option -i cannot be used with URL arguments, aborting");
1641 usage("Option -l cannot be used with URL arguments, aborting");
1643 while (o->ind < argc) {
1644 status = delete_url(pool, proxypath, argv[o->ind]);
1645 if (APR_SUCCESS == status) {
1647 apr_file_printf(errfile, "Removed: %s" APR_EOL_STR,
1652 else if (APR_ENOENT == status) {
1654 apr_file_printf(errfile, "Not cached: %s" APR_EOL_STR,
1660 apr_file_printf(errfile, "Error while removed: %s" APR_EOL_STR,
1667 return error ? 1 : deleted ? 0 : 2;
1670 if (isdaemon && repeat <= 0) {
1671 usage("Option -d must be greater than zero");
1674 if (isdaemon && (verbose || realclean || dryrun || listurls)) {
1675 usage("Option -d cannot be used with -v, -r, -L or -D");
1678 if (!isdaemon && intelligent) {
1679 usage("Option -i cannot be used without -d");
1683 usage("Option -p must be specified");
1686 if (!listurls && max <= 0 && inodes <= 0) {
1687 usage("At least one of option -l or -L must be greater than zero");
1690 if (apr_filepath_get(&path, 0, pool) != APR_SUCCESS) {
1691 usage(apr_psprintf(pool, "Could not get the filepath: %s",
1692 apr_strerror(status, errmsg, sizeof errmsg)));
1694 baselen = strlen(path);
1697 log_pid(pool, pidfilename, &pidfile); /* before daemonizing, so we
1703 list_urls(path, pool, round);
1704 return (interrupted != 0);
1709 apr_file_close(errfile);
1712 apr_file_close(pidfile); /* delete original pidfile only in parent */
1714 apr_proc_detach(APR_PROC_DETACH_DAEMONIZE);
1716 log_pid(pool, pidfilename, &pidfile);
1722 apr_pool_create(&instance, pool);
1724 now = apr_time_now();
1725 APR_RING_INIT(&root, _entry, link);
1730 switch (intelligent) {
1736 retries = STAT_ATTEMPTS;
1737 status = APR_SUCCESS;
1740 if (status != APR_SUCCESS) {
1741 apr_sleep(STAT_DELAY);
1743 status = apr_stat(&info, path, APR_FINFO_MTIME, instance);
1744 } while (status != APR_SUCCESS && !interrupted && --retries);
1746 if (status == APR_SUCCESS) {
1747 previous = info.mtime;
1754 retries = STAT_ATTEMPTS;
1755 status = APR_SUCCESS;
1758 if (status != APR_SUCCESS) {
1759 apr_sleep(STAT_DELAY);
1761 status = apr_stat(&info, path, APR_FINFO_MTIME, instance);
1762 } while (status != APR_SUCCESS && !interrupted && --retries);
1764 if (status == APR_SUCCESS) {
1765 if (previous != info.mtime) {
1768 previous = info.mtime;
1776 if (dowork && !interrupted) {
1777 apr_off_t nodes = 0;
1778 if (!process_dir(path, instance, &nodes) && !interrupted) {
1779 purge(path, instance, max, inodes, nodes, round);
1781 else if (!isdaemon && !interrupted) {
1782 apr_file_printf(errfile, "An error occurred, cache cleaning "
1783 "aborted." APR_EOL_STR);
1787 if (intelligent && !interrupted) {
1788 retries = STAT_ATTEMPTS;
1789 status = APR_SUCCESS;
1791 if (status != APR_SUCCESS) {
1792 apr_sleep(STAT_DELAY);
1794 status = apr_stat(&info, path, APR_FINFO_MTIME, instance);
1795 } while (status != APR_SUCCESS && !interrupted && --retries);
1797 if (status == APR_SUCCESS) {
1798 previous = info.mtime;
1807 apr_pool_destroy(instance);
1809 current = apr_time_now();
1810 if (current < now) {
1813 else if (current - now >= repeat) {
1817 delay = now + repeat - current;
1820 /* we can't sleep the whole delay time here apiece as this is racy
1821 * with respect to interrupt delivery - think about what happens
1822 * if we have tested for an interrupt, then get scheduled
1823 * before the apr_sleep() call and while waiting for the cpu
1824 * we do get an interrupt
1827 while (delay && !interrupted) {
1828 if (delay > APR_USEC_PER_SEC) {
1829 apr_sleep(APR_USEC_PER_SEC);
1830 delay -= APR_USEC_PER_SEC;
1838 } while (isdaemon && !interrupted);
1840 if (!isdaemon && interrupted) {
1841 apr_file_printf(errfile, "Cache cleaning aborted due to user "
1842 "request." APR_EOL_STR);