From e7f90638f7a0cccd20ffed72946f81c7e03cabb0 Mon Sep 17 00:00:00 2001 From: Graham Leggett Date: Mon, 29 Mar 2010 22:30:46 +0000 Subject: [PATCH] htcacheclean: Introduce the ability to clean specific URLs from the cache, if provided as an optional parameter on the command line. git-svn-id: https://svn.apache.org/repos/asf/httpd/httpd/trunk@928921 13f79535-47bb-0310-9956-ffa450edef68 --- CHANGES | 4 + docs/manual/programs/htcacheclean.xml | 35 +++- support/htcacheclean.c | 244 +++++++++++++++++++++++++- 3 files changed, 277 insertions(+), 6 deletions(-) diff --git a/CHANGES b/CHANGES index ab616cc91e..af2920d0e4 100644 --- a/CHANGES +++ b/CHANGES @@ -28,6 +28,10 @@ Changes with Apache 2.3.7 processing is completed, avoiding orphaned callback pointers. [Brett Gervasoni , Jeff Trawick] + *) htcacheclean: Introduce the ability to clean specific URLs from the + cache, if provided as an optional parameter on the command line. + [Graham Leggett] + *) core: Introduce the IncludeStrict directive, which explicitly fails server startup if no files or directories match a wildcard path. [Graham Leggett] diff --git a/docs/manual/programs/htcacheclean.xml b/docs/manual/programs/htcacheclean.xml index 973f48ce75..e7338c9542 100644 --- a/docs/manual/programs/htcacheclean.xml +++ b/docs/manual/programs/htcacheclean.xml @@ -29,9 +29,12 @@

htcacheclean is used to keep the size of mod_disk_cache's storage within a certain limit. This tool can run either manually or in daemon mode. When running in - daemon mode, it sleeps in the background and checks the cache directories + daemon mode, it sleeps in the background and checks the cache directory at regular intervals for cached content to be removed. You can stop the daemon - cleanly by sending it a TERM or INT signal.

+ cleanly by sending it a TERM or INT signal. When run manually, a once off + check of the cache directory is made for cached content to be removed. If + one or more URLs are specified, each URL will be deleted from the cache, if + present.

mod_disk_cache @@ -53,6 +56,13 @@ -dinterval -ppath -llimit

+ +

htcacheclean + [ -D ] + [ -v ] + [ -t ] + -ppath + url

Options @@ -113,9 +123,28 @@
+
Deleting a specific URL +

If htcacheclean is passed one or more URLs, each URL will + be deleted from the cache. If multiple variants of an URL exists, all + variants would be deleted.

+ +

When a reverse proxied URL is to be deleted, the effective URL is + constructed from the Host header, the + port, the path and the + query. Note the '?' in the URL must always be specified + explicitly, whether a query string is present or not. For example, an + attempt to delete the path / from the server + localhost, the URL to delete would be + http://localhost:80/?.

+ +
+
Exit Status

htcacheclean returns a zero status ("true") if all - operations were successful, 1 otherwise.

+ operations were successful, 1 otherwise. If an URL is + specified, and the URL was cached and successfully removed, + 0 is returned, 2 otherwise. If an error + occurred during URL removal, 1 is returned.

diff --git a/support/htcacheclean.c b/support/htcacheclean.c index da780a54bf..72983ce558 100644 --- a/support/htcacheclean.c +++ b/support/htcacheclean.c @@ -32,6 +32,7 @@ #include "apr_thread_proc.h" #include "apr_signal.h" #include "apr_getopt.h" +#include "apr_md5.h" #include "apr_ring.h" #include "apr_date.h" #include "apr_buckets.h" @@ -722,6 +723,199 @@ static void purge(char *path, apr_pool_t *pool, apr_off_t max) } } +static apr_status_t remove_directory(apr_pool_t *pool, const char *dir) +{ + apr_status_t rv; + apr_dir_t *dirp; + apr_finfo_t dirent; + + rv = apr_dir_open(&dirp, dir, pool); + if (rv == APR_ENOENT) { + return rv; + } + if (rv != APR_SUCCESS) { + char errmsg[120]; + apr_file_printf(errfile, "Could not open directory %s: %s" APR_EOL_STR, + dir, apr_strerror(rv, errmsg, sizeof errmsg)); + return rv; + } + + while (apr_dir_read(&dirent, APR_FINFO_DIRENT | APR_FINFO_TYPE, dirp) + == APR_SUCCESS) { + if (dirent.filetype == APR_DIR) { + if (strcmp(dirent.name, ".") && strcmp(dirent.name, "..")) { + rv = remove_directory(pool, apr_pstrcat(pool, dir, "/", + dirent.name, NULL)); + /* tolerate the directory not being empty, the cache may have + * attempted to recreate the directory in the mean time. + */ + if (APR_SUCCESS != rv && APR_ENOTEMPTY != rv) { + break; + } + } + } else { + const char *file = apr_pstrcat(pool, dir, "/", dirent.name, NULL); + rv = apr_file_remove(file, pool); + if (APR_SUCCESS != rv) { + char errmsg[120]; + apr_file_printf(errfile, + "Could not remove file '%s': %s" APR_EOL_STR, file, + apr_strerror(rv, errmsg, sizeof errmsg)); + break; + } + } + } + + apr_dir_close(dirp); + + if (rv == APR_SUCCESS) { + rv = apr_dir_remove(dir, pool); + if (APR_ENOTEMPTY == rv) { + rv = APR_SUCCESS; + } + if (rv != APR_SUCCESS) { + char errmsg[120]; + apr_file_printf(errfile, "Could not remove directory %s: %s" APR_EOL_STR, + dir, apr_strerror(rv, errmsg, sizeof errmsg)); + } + } + + return rv; +} + +static apr_status_t find_directory(apr_pool_t *pool, const char *base, + const char *rest) +{ + apr_status_t rv; + apr_dir_t *dirp; + apr_finfo_t dirent; + int found = 0, files = 0; + const char *header = apr_pstrcat(pool, rest, CACHE_HEADER_SUFFIX, NULL); + const char *data = apr_pstrcat(pool, rest, CACHE_DATA_SUFFIX, NULL); + const char *vdir = apr_pstrcat(pool, rest, CACHE_HEADER_SUFFIX, + CACHE_VDIR_SUFFIX, NULL); + const char *dirname = NULL; + + rv = apr_dir_open(&dirp, base, pool); + if (rv != APR_SUCCESS) { + char errmsg[120]; + apr_file_printf(errfile, "Could not open directory %s: %s" APR_EOL_STR, + base, apr_strerror(rv, errmsg, sizeof errmsg)); + return rv; + } + + rv = APR_ENOENT; + + while (apr_dir_read(&dirent, APR_FINFO_DIRENT | APR_FINFO_TYPE, dirp) + == APR_SUCCESS) { + int len = strlen(dirent.name); + int restlen = strlen(rest); + if (dirent.filetype == APR_DIR && !strncmp(rest, dirent.name, len)) { + dirname = apr_pstrcat(pool, base, "/", dirent.name, NULL); + rv = find_directory(pool, dirname, rest + (len < restlen ? len + : restlen)); + if (APR_SUCCESS == rv) { + found = 1; + } + } + if (dirent.filetype == APR_DIR) { + if (!strcmp(dirent.name, vdir)) { + files = 1; + } + } + if (dirent.filetype == APR_REG) { + if (!strcmp(dirent.name, header) || !strcmp(dirent.name, data)) { + files = 1; + } + } + } + + apr_dir_close(dirp); + + if (files) { + rv = APR_SUCCESS; + if (!dryrun) { + const char *remove; + apr_status_t status; + + remove = apr_pstrcat(pool, base, "/", header, NULL); + status = apr_file_remove(remove, pool); + if (status != APR_SUCCESS && status != APR_ENOENT) { + char errmsg[120]; + apr_file_printf(errfile, "Could not remove file %s: %s" APR_EOL_STR, + remove, apr_strerror(status, errmsg, sizeof errmsg)); + rv = status; + } + + remove = apr_pstrcat(pool, base, "/", data, NULL); + status = apr_file_remove(remove, pool); + if (status != APR_SUCCESS && status != APR_ENOENT) { + char errmsg[120]; + apr_file_printf(errfile, "Could not remove file %s: %s" APR_EOL_STR, + remove, apr_strerror(status, errmsg, sizeof errmsg)); + rv = status; + } + + status = remove_directory(pool, apr_pstrcat(pool, base, "/", vdir, NULL)); + if (status != APR_SUCCESS && status != APR_ENOENT) { + rv = status; + } + } + } + + /* If asked to delete dirs, do so now. We don't care if it fails. + * If it fails, it likely means there was something else there. + */ + if (dirname && deldirs && !dryrun) { + apr_dir_remove(dirname, pool); + } + + if (found) { + return APR_SUCCESS; + } + + return rv; +} + +/** + * Delete a specific URL from the cache. + */ +static apr_status_t delete_url(apr_pool_t *pool, const char *proxypath, const char *url) +{ + apr_md5_ctx_t context; + unsigned char digest[16]; + char tmp[23]; + int i, k; + unsigned int x; + static const char enc_table[64] = + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789_@"; + + apr_md5_init(&context); + apr_md5_update(&context, (const unsigned char *) url, strlen(url)); + apr_md5_final(digest, &context); + + /* encode 128 bits as 22 characters, using a modified uuencoding + * the encoding is 3 bytes -> 4 characters* i.e. 128 bits is + * 5 x 3 bytes + 1 byte -> 5 * 4 characters + 2 characters + */ + for (i = 0, k = 0; i < 15; i += 3) { + x = (digest[i] << 16) | (digest[i + 1] << 8) | digest[i + 2]; + tmp[k++] = enc_table[x >> 18]; + tmp[k++] = enc_table[(x >> 12) & 0x3f]; + tmp[k++] = enc_table[(x >> 6) & 0x3f]; + tmp[k++] = enc_table[x & 0x3f]; + } + + /* one byte left */ + x = digest[15]; + tmp[k++] = enc_table[x >> 2]; /* use up 6 bits */ + tmp[k++] = enc_table[(x << 4) & 0x3f]; + tmp[k] = 0; + + /* automatically find the directory levels */ + return find_directory(pool, proxypath, tmp); +} + /* * usage info */ @@ -735,6 +929,7 @@ static void usage(const char *error) "%s -- program for cleaning the disk cache." NL "Usage: %s [-Dvtrn] -pPATH -lLIMIT [-PPIDFILE]" NL " %s [-nti] -dINTERVAL -pPATH -lLIMIT [-PPIDFILE]" NL + " %s [-Dvt] -pPATH URL ..." NL NL "Options:" NL " -d Daemonize and repeat cache cleaning every INTERVAL minutes." NL @@ -767,7 +962,15 @@ static void usage(const char *error) NL " -i Be intelligent and run only when there was a modification of" NL " the disk cache. This option is only possible together with the" NL - " -d option." NL, + " -d option." NL + NL + "Should an URL be provided on the command line, the URL will be" NL + "deleted from the cache. A reverse proxied URL is made up as follows:" NL + "http://:?[query]. So, for the path \"/\" on the" NL + "host \"localhost\" and port 80, the URL to delete becomes" NL + "\"http://localhost:80/?\". Note the '?' in the URL must always be" NL + "specified explicitly, whether a query string is present or not." NL, + shortname, shortname, shortname, shortname @@ -980,8 +1183,43 @@ int main(int argc, const char * const argv[]) usage(NULL); } - if (o->ind != argc) { - usage("Additional parameters specified on the command line, aborting"); + if (o->ind < argc) { + int deleted = 0; + int error = 0; + if (isdaemon) { + usage("Option -d cannot be used with URL arguments, aborting"); + } + if (intelligent) { + usage("Option -i cannot be used with URL arguments, aborting"); + } + if (limit_found) { + usage("Option -l cannot be used with URL arguments, aborting"); + } + while (o->ind < argc) { + status = delete_url(pool, proxypath, argv[o->ind]); + if (APR_SUCCESS == status) { + if (verbose) { + apr_file_printf(errfile, "Removed: %s" APR_EOL_STR, + argv[o->ind]); + } + deleted = 1; + } + else if (APR_ENOENT == status) { + if (verbose) { + apr_file_printf(errfile, "Not cached: %s" APR_EOL_STR, + argv[o->ind]); + } + } + else { + if (verbose) { + apr_file_printf(errfile, "Error while removed: %s" APR_EOL_STR, + argv[o->ind]); + } + error = 1; + } + o->ind++; + } + return error ? 1 : deleted ? 0 : 2; } if (isdaemon && repeat <= 0) { -- 2.40.0