From c1bce715620f15fe06cbcf1dbab91f205dcadd41 Mon Sep 17 00:00:00 2001 From: Bill Stoddard Date: Thu, 23 Aug 2001 14:15:00 +0000 Subject: [PATCH] Experimental cache based on Graham Leggett's layered cache design. mod_cache implements a quick handler, and three filters. The filters are CACHE_IN for loading the cache, CACHE_OUT for serving content out of the cache and CACHE_CONDITIONAL, which handles stale entries in the cache. mod_cache implements code that makes RFC compliant caching decisions. It interfaces with the actual storage mechanism via calls to functions defined in cache_storage.c. This commit includes a simple in memory (malloc'ed memory) cache implementation that demonstrates autoloading and serving files keyed on URL. This is not even close to production ready. You have been warned :-) git-svn-id: https://svn.apache.org/repos/asf/httpd/httpd/trunk@90549 13f79535-47bb-0310-9956-ffa450edef68 --- modules/experimental/cache_storage.c | 276 ++++++++++ modules/experimental/cache_util.c | 206 +++++++ modules/experimental/mod_cache.c | 769 +++++++++++++++++++++++++-- modules/experimental/mod_cache.h | 206 ++++++- modules/experimental/mod_mem_cache.c | 509 ++++++++++++++++++ 5 files changed, 1919 insertions(+), 47 deletions(-) create mode 100644 modules/experimental/cache_storage.c create mode 100644 modules/experimental/cache_util.c create mode 100644 modules/experimental/mod_mem_cache.c diff --git a/modules/experimental/cache_storage.c b/modules/experimental/cache_storage.c new file mode 100644 index 0000000000..050628dad1 --- /dev/null +++ b/modules/experimental/cache_storage.c @@ -0,0 +1,276 @@ +/* ==================================================================== + * The Apache Software License, Version 1.1 + * + * Copyright (c) 2000-2001 The Apache Software Foundation. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * 3. The end-user documentation included with the redistribution, + * if any, must include the following acknowledgment: + * "This product includes software developed by the + * Apache Software Foundation (http://www.apache.org/)." + * Alternately, this acknowledgment may appear in the software itself, + * if and wherever such third-party acknowledgments normally appear. + * + * 4. The names "Apache" and "Apache Software Foundation" must + * not be used to endorse or promote products derived from this + * software without prior written permission. For written + * permission, please contact apache@apache.org. + * + * 5. Products derived from this software may not be called "Apache", + * nor may "Apache" appear in their name, without prior written + * permission of the Apache Software Foundation. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF + * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * ==================================================================== + * + * This software consists of voluntary contributions made by many + * individuals on behalf of the Apache Software Foundation. For more + * information on the Apache Software Foundation, please see + * . + * + * Portions of this software are based upon public domain software + * originally written at the National Center for Supercomputing Applications, + * University of Illinois, Urbana-Champaign. + */ + +#define CORE_PRIVATE + +#include "mod_cache.h" + +APR_HOOK_STRUCT( + APR_HOOK_LINK(remove_url) + APR_HOOK_LINK(create_entity) + APR_HOOK_LINK(open_entity) +) + +module AP_MODULE_DECLARE_DATA tcache_module; + +/* -------------------------------------------------------------- */ + +/* + * delete all URL entities from the cache + * + */ +int cache_remove_url(request_rec *r, const char *types, char *url) +{ + const char *next = types; + const char *type; + + /* for each specified cache type, delete the URL */ + while ((type = ap_cache_tokstr(r->pool, next, &next))) { + cache_run_remove_url(type, url); + } + return OK; +} + + +/* + * create a new URL entity in the cache + * + * It is possible to store more than once entity per URL. This + * function will always create a new entity, regardless of whether + * other entities already exist for the same URL. + * + * The size of the entity is provided so that a cache module can + * decide whether or not it wants to cache this particular entity. + * If the size is unknown, a size of -1 should be set. + */ +int cache_create_entity(request_rec *r, const char *types, char *url, apr_size_t size) +{ + cache_handle *h; + const char *next = types; + const char *type; + apr_status_t rv; + cache_request_rec *cache = (cache_request_rec *) ap_get_module_config(r->request_config, + &tcache_module); + + /* for each specified cache type, delete the URL */ + while (next) { + type = ap_cache_tokstr(r->pool, next, &next); + switch (rv = cache_run_create_entity(&h, type, url, size)) { + case OK: { + cache->handle = h; + return OK; + } + case DECLINED: { + continue; + } + default: { + return rv; + } + } + } + return DECLINED; +} + +/* + * remove a specific URL entity from the cache + * + * The specific entity referenced by the cache_handle is removed + * from the cache, and the cache_handle is closed. + */ +int cache_remove_entity(request_rec *r, const char *types, cache_handle *h) +{ + const char *next = types; + const char *type; + + while (next) { + type = ap_cache_tokstr(r->pool, next, &next); + } + return 1; +} + +/* + * select a specific URL entity in the cache + * + * It is possible to store more than one entity per URL. Content + * negotiation is used to select an entity. Once an entity is + * selected, details of it are stored in the per request + * config to save time when serving the request later. + * + * This function returns OK if successful, DECLINED if no + * cached entity fits the bill. + */ +int cache_select_url(request_rec *r, const char *types, char *url) +{ + cache_handle *h; + const char *next = types; + const char *type; + apr_status_t rv; + cache_request_rec *cache = (cache_request_rec *) ap_get_module_config(r->request_config, + &tcache_module); + + /* go through the cache types till we get a match */ + while (next) { + type = ap_cache_tokstr(r->pool, next, &next); + switch ((rv = cache_run_open_entity(&h, type, url))) { + case OK: { + /* cool bananas! */ + cache->handle = h; +/*** loop through returned entities */ +/*** do freshness calculation here */ + cache->fresh = 1; +/*** do content negotiation here */ + return OK; + } + case DECLINED: { + /* try again with next cache type */ + continue; + } + default: { + /* oo-er! an error */ + return rv; + } + } + } + return DECLINED; +} + +apr_status_t cache_write_entity_headers(cache_handle *h, request_rec *r, cache_info *info, + apr_table_t *headers_in, apr_table_t *headers_out) +{ + const char *ct; + + ct = ap_table_get(r->headers_out, "Content-Type"); + info->content_type = ct; + h->write_headers(h, r, info); + return APR_SUCCESS; +} +apr_status_t cache_write_entity_body(cache_handle *h, apr_bucket_brigade *b) +{ + apr_status_t rv = APR_SUCCESS; + if (h->write_body(h, b) != OK) { + } + return rv; +} + +apr_status_t cache_read_entity_headers(cache_handle *h, request_rec *r, + apr_table_t **headers) +{ + cache_info *info; + + /* Be careful to not modify info. */ + h->read_headers(h, r, &info); + + /* Build the header table from info in the info struct */ + *headers = apr_table_make(r->pool, 15); + /* Content-Length */ + if (info->len) + apr_table_set(*headers, "Content-Length", apr_psprintf(r->pool, "%lu", info->len)); + + /* Last-Modified */ + if (info->lastmod) { + } + /* Expires */ + if (info->expire) { + } + if (info->content_type) { + r->content_type = apr_pstrdup(r->pool, info->content_type); + } + /* Date */ + + return APR_SUCCESS; +} +apr_status_t cache_read_entity_body(cache_handle *h, apr_bucket_brigade *b) +{ + h->read_body(h, b); + return APR_SUCCESS; +} + +APR_IMPLEMENT_EXTERNAL_HOOK_RUN_FIRST(cache, CACHE, int, create_entity, + (cache_handle **hp, const char *type, + char *url, apr_size_t len),(hp,type,url,len),DECLINED) +APR_IMPLEMENT_EXTERNAL_HOOK_RUN_FIRST(cache, CACHE, int, open_entity, + (cache_handle **hp, const char *type, + char *url),(hp,type,url),DECLINED) +APR_IMPLEMENT_EXTERNAL_HOOK_RUN_ALL(cache, CACHE, int, remove_url, + (const char *type, char *url),(type,url),OK,DECLINED) +#if 0 +/* BillS doesn't think these should be hooks. + * All functions which accept a cache_handle * argument should use + * function pointers in the cache_handle. Leave them here for now as + * points for discussion... + */ + +APR_IMPLEMENT_EXTERNAL_HOOK_RUN_FIRST(cache, CACHE, int, remove_entity, + (cache_handle *h),(h),DECLINED) + +APR_IMPLEMENT_EXTERNAL_HOOK_RUN_FIRST(cache, CACHE, int, read_entity_headers, + (cache_handle *h, request_rec *r, + apr_table_t **headers), + (h,info,headers_in,headers_out),DECLINED) +APR_IMPLEMENT_EXTERNAL_HOOK_RUN_FIRST(cache, CACHE, int, read_entity_body, + (cache_handle *h, + apr_bucket_brigade *out),(h,out),DECLINED) +APR_IMPLEMENT_EXTERNAL_HOOK_RUN_FIRST(cache, CACHE, int, write_entity_headers, + (cache_handle *h, cache_info *info, + apr_table_t *headers_in, + apr_table_t *headers_out), + (h,info,headers_in,headers_out),DECLINED) +APR_IMPLEMENT_EXTERNAL_HOOK_RUN_FIRST(cache, CACHE, int, write_entity_body, + (cache_handle *h, + apr_bucket_brigade *in),(h,in),DECLINED) +#endif diff --git a/modules/experimental/cache_util.c b/modules/experimental/cache_util.c new file mode 100644 index 0000000000..cc37aa972c --- /dev/null +++ b/modules/experimental/cache_util.c @@ -0,0 +1,206 @@ +/* ==================================================================== + * The Apache Software License, Version 1.1 + * + * Copyright (c) 2000-2001 The Apache Software Foundation. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * 3. The end-user documentation included with the redistribution, + * if any, must include the following acknowledgment: + * "This product includes software developed by the + * Apache Software Foundation (http://www.apache.org/)." + * Alternately, this acknowledgment may appear in the software itself, + * if and wherever such third-party acknowledgments normally appear. + * + * 4. The names "Apache" and "Apache Software Foundation" must + * not be used to endorse or promote products derived from this + * software without prior written permission. For written + * permission, please contact apache@apache.org. + * + * 5. Products derived from this software may not be called "Apache", + * nor may "Apache" appear in their name, without prior written + * permission of the Apache Software Foundation. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF + * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * ==================================================================== + * + * This software consists of voluntary contributions made by many + * individuals on behalf of the Apache Software Foundation. For more + * information on the Apache Software Foundation, please see + * . + * + * Portions of this software are based upon public domain software + * originally written at the National Center for Supercomputing Applications, + * University of Illinois, Urbana-Champaign. + */ + +#define CORE_PRIVATE + +#include "mod_cache.h" + + + +/* -------------------------------------------------------------- */ + +/* return true if the request is conditional */ +int ap_cache_request_is_conditional(request_rec *r) +{ + if (apr_table_get(r->headers_in, "If-Match") || + apr_table_get(r->headers_in, "If-None-Match") || + apr_table_get(r->headers_in, "If-Modified-Since") || + apr_table_get(r->headers_in, "If-Unmodified-Since")) { + + return 1; + } + return 0; +} + + +/* remove other filters from filter stack */ +void ap_cache_reset_output_filters(request_rec *r) +{ + ap_filter_t *f = r->output_filters; + + while (f) { + if (!strcasecmp(f->frec->name, "CORE") || + !strcasecmp(f->frec->name, "CONTENT_LENGTH") || + !strcasecmp(f->frec->name, "HTTP_HEADER")) { + f = f->next; + continue; + } + else { + ap_remove_output_filter(f); + f = f->next; + } + } +} + +const char *ap_cache_get_cachetype(request_rec *r, cache_server_conf *conf, const char *url) +{ + const char *type = NULL; + int i; + + /* loop through all the cacheenable entries */ + for (i = 0; i < conf->cacheenable->nelts; i++) { + struct cache_enable *ent = (struct cache_enable *) conf->cacheenable->elts; + const char *thisurl = ent[i].url; + const char *thistype = ent[i].type; + if ((thisurl) && !strncasecmp(thisurl, url, strlen(thisurl))) { + if (!type) { + type = thistype; + } + else { + type = apr_pstrcat(r->pool, type, ",", thistype, NULL); + } + } + } + + /* then loop through all the cachedisable entries */ + for (i = 0; i < conf->cachedisable->nelts; i++) { + struct cache_disable *ent = (struct cache_disable *) conf->cachedisable->elts; + const char *thisurl = ent[i].url; + if ((thisurl) && !strncasecmp(thisurl, url, strlen(thisurl))) { + type = NULL; + } + } + + return type; +} + +/* + * list is a comma-separated list of case-insensitive tokens, with + * optional whitespace around the tokens. + * The return returns 1 if the token val is found in the list, or 0 + * otherwise. + */ +int ap_cache_liststr(const char *list, const char *key, char **val) +{ + int len, i; + char *p; + char valbuf[HUGE_STRING_LEN]; + valbuf[sizeof(valbuf)-1] = 0; /* safety terminating zero */ + + len = strlen(key); + + while (list != NULL) { + p = strchr((char *) list, ','); + if (p != NULL) { + i = p - list; + do + p++; + while (ap_isspace(*p)); + } + else + i = strlen(list); + + while (i > 0 && ap_isspace(list[i - 1])) + i--; + if (i == len && strncasecmp(list, key, len) == 0) { + if (val) { + p = strchr((char *) list, ','); + while (ap_isspace(*list)) { + list++; + } + if ('=' == list[0]) + list++; + while (ap_isspace(*list)) { + list++; + } + strncpy(valbuf, list, MIN(p-list, sizeof(valbuf)-1)); + *val = valbuf; + } + return 1; + } + list = p; + } + return 0; +} + +/* return each comma separated token, one at a time */ +const char *ap_cache_tokstr(apr_pool_t *p, const char *list, const char **str) +{ + apr_off_t len, i; + const char *s; + + s = ap_strchr_c(list, ','); + if (s != NULL) { + i = s - list; + do + s++; + while (apr_isspace(*s)); + } + else + i = strlen(list); + + while (i > 0 && apr_isspace(list[i - 1])) + i--; + + *str = s; + if (len) + return apr_pstrndup(p, list, len); + else + return NULL; + +} diff --git a/modules/experimental/mod_cache.c b/modules/experimental/mod_cache.c index 91059ebd37..97bd257f41 100644 --- a/modules/experimental/mod_cache.c +++ b/modules/experimental/mod_cache.c @@ -56,69 +56,750 @@ * University of Illinois, Urbana-Champaign. */ -#include "apr_strings.h" -#include "ap_config.h" -#include "util_filter.h" -#include "httpd.h" -#include "http_config.h" -#include "http_request.h" -#include "http_core.h" -#include "http_protocol.h" -#include "http_log.h" -#include "http_main.h" -#include "util_script.h" -#include "http_core.h" +#define CORE_PRIVATE + #include "mod_cache.h" -#include "apr_hooks.h" -AP_DECLARE_DATA module cache_module; +module AP_MODULE_DECLARE_DATA tcache_module; + + + +/* -------------------------------------------------------------- */ + -APR_HOOK_STRUCT( - APR_HOOK_LINK(serve_cache) - APR_HOOK_LINK(store_cache) -) -AP_IMPLEMENT_HOOK_RUN_FIRST(int,serve_cache,(request_rec *r),(r),DECLINED) -AP_IMPLEMENT_HOOK_RUN_FIRST(int,store_cache,(request_rec *r, apr_bucket_brigade *bb, void **cf), - (r, bb, cf),DECLINED) +/* + * CACHE handler + * ------------- + * + * Can we deliver this request from the cache? + * If yes: + * deliver the content by installing the CACHE_OUT filter. + * If no: + * check whether we're allowed to try cache it + * If yes: + * add CACHE_IN filter + * If No: + * oh well. + */ + +int ap_url_cache_handler(request_rec *r); -static int cache_handler(request_rec *r) +int ap_url_cache_handler(request_rec *r) { - /* I am sure there is common error checking that belongs in this function, - * but I'm not sure what it is. + apr_status_t rv; + const char *cc_in; + apr_pool_t *p = r->pool; + apr_uri_components uri = r->parsed_uri; + char *url = r->unparsed_uri; + char *path = uri.path; + const char *types; + cache_info *info = NULL; + cache_request_rec *cache; + cache_server_conf *conf = (cache_server_conf *) ap_get_module_config(r->server->module_config, + &tcache_module); + + /* we don't handle anything but GET */ + if (r->method_number != M_GET) return DECLINED; + + /* + * Which cache module (if any) should handle this request? + */ + if (!(types = ap_cache_get_cachetype(r, conf, path))) { + return DECLINED; + } + ap_log_error(APLOG_MARK, APLOG_DEBUG | APLOG_NOERRNO, 0, r->server, + "cache: URL %s is being handled by %s", path, types); + + /* make space for the per request config */ + cache = (cache_request_rec *) ap_get_module_config(r->request_config, &tcache_module); + if (!cache) { + cache = ap_pcalloc(r->pool, sizeof(cache_request_rec)); + ap_set_module_config(r->request_config, &tcache_module, cache); + } + + /* save away the type */ + cache->types = types; + + /* + * Are we allowed to serve cached info at all? + */ + + /* find certain cache controlling headers */ + cc_in = ap_table_get(r->headers_in, "Cache-Control"); + + /* first things first - does the request allow us to return + * cached information at all? If not, just decline the request. + * + * Note that there is a big difference between not being allowed + * to cache a request (no-store) and not being allowed to return + * a cached request without revalidation (max-age=0). + * + * Caching is forbidden under the following circumstances: + * + * - RFC2616 14.9.2 Cache-Control: no-store + * we are not supposed to store this request at all. Behave as a + * tunnel. + */ + if (ap_cache_liststr(cc_in, "no-store", NULL)) { + /* delete the previously cached file */ + cache_remove_url(r, cache->types, url); + + ap_log_error(APLOG_MARK, APLOG_DEBUG | APLOG_NOERRNO, 0, r->server, + "cache: no-store forbids caching of %s", url); + return DECLINED; + } + + /* + * Try serve this request from the cache. + * + * If no existing cache file + * add cache_in filter + * If stale cache file + * If conditional request + * add cache_in filter + * If non-conditional request + * fudge response into a conditional + * add cache_conditional filter + * If fresh cache file + * clear filter stack + * add cache_out filter */ - return ap_run_serve_cache(r); + + rv = cache_select_url(r, cache->types, url); + if (DECLINED == rv) { + /* no existing cache file */ + ap_log_error(APLOG_MARK, APLOG_DEBUG | APLOG_NOERRNO, 0, r->server, + "cache: no cache - add cache_in filter and DECLINE"); + /* add cache_in filter to cache this request */ + ap_add_output_filter("CACHE_IN", NULL, r, r->connection); + /* return DECLINED */ + return DECLINED; + } + else if (OK == rv) { + /* cache file exists */ + if (cache->fresh) { + apr_bucket_brigade *out; + + /* fresh data available */ + ap_log_error(APLOG_MARK, APLOG_DEBUG | APLOG_NOERRNO, 0, r->server, + "cache: fresh cache - add cache_out filter and handle request"); + + /* We are in the quick handler hook, which means that no output + * filters have been set. So lets run the insert_filter hook. + * Humm... Probably should not go through most of these hooks + * for a proxy request, so take out all but the basics. + */ + ap_run_insert_filter(r); + if (r->proxyreq) { + ap_cache_reset_output_filters(r); + } + /* Now add the cache_out filter. cache_out is a FTYPE_CONTENT + * which means it will be inserted first in the stream, which + * is exactly what we need. + */ + ap_add_output_filter("CACHE_OUT", NULL, r, r->connection); + + /* kick off the filter stack */ + out = apr_brigade_create(r->pool); + if (APR_SUCCESS != (rv = ap_pass_brigade(r->output_filters, out))) { + ap_log_error(APLOG_MARK, APLOG_ERR, rv, r->server, + "cache: error returned while trying to return %s cached data", cache->type); + return rv; + } + return OK; + } + else { + /* stale data available */ + ap_log_error(APLOG_MARK, APLOG_DEBUG | APLOG_NOERRNO, 0, r->server, + "cache: stale cache - test conditional"); + /* if conditional request */ + if (ap_cache_request_is_conditional(r)) { + ap_log_error(APLOG_MARK, APLOG_DEBUG | APLOG_NOERRNO, 0, r->server, + "cache: conditional - add cache_in filter and DECLINE"); + /* add cache_in filter */ + ap_add_output_filter("CACHE_IN", NULL, r, r->connection); + /* return DECLINED */ + return DECLINED; + } + /* else if non-conditional request */ + else { + /* fudge response into a conditional */ + if (info && info->etag) { + ap_log_error(APLOG_MARK, APLOG_DEBUG | APLOG_NOERRNO, 0, r->server, + "cache: nonconditional - fudge conditional by etag"); + /* if we have a cached etag */ + apr_table_set(r->headers_in, "If-None-Match", info->etag); + } + else if (info && info->lastmods) { + ap_log_error(APLOG_MARK, APLOG_DEBUG | APLOG_NOERRNO, 0, r->server, + "cache: nonconditional - fudge conditional by lastmod"); + /* if we have a cached IMS */ + apr_table_set(r->headers_in, "If-Modified-Since", info->lastmods); + } + else { + /* something else - pretend there was no cache */ + ap_log_error(APLOG_MARK, APLOG_DEBUG | APLOG_NOERRNO, 0, r->server, + "cache: nonconditional - no cached etag/lastmods - add cache_in and DECLINE"); + /* add cache_in filter to cache this request */ + ap_add_output_filter("CACHE_IN", NULL, r, r->connection); + /* return DECLINED */ + return DECLINED; + } + /* add cache_conditional filter */ + ap_log_error(APLOG_MARK, APLOG_DEBUG | APLOG_NOERRNO, 0, r->server, + "cache: nonconditional - add cache_conditional and DECLINE"); + ap_add_output_filter("CACHE_CONDITIONAL", NULL, r, r->connection); + /* return DECLINED */ + return DECLINED; + } + } + } + else { + /* error */ + ap_log_error(APLOG_MARK, APLOG_ERR, rv, r->server, + "cache: error returned while checking for cached file by %s cache", cache->type); + return DECLINED; + } } -typedef struct cache_struct { - void *cf; -} cache_struct; +/* + * CACHE_OUT filter + * ---------------- + * + * Deliver cached content (headers and body) up the stack. + */ +int ap_cache_out_filter(ap_filter_t *f, apr_bucket_brigade *bb); -static int cache_filter(ap_filter_t *f, apr_bucket_brigade *bb) +int ap_cache_out_filter(ap_filter_t *f, apr_bucket_brigade *bb) { - cache_struct *ctx = f->ctx; + cache_info *info = NULL; + request_rec *r = f->r; + apr_table_t *headers; + cache_request_rec *cache = (cache_request_rec *) ap_get_module_config(r->request_config, + &tcache_module); + + ap_log_error(APLOG_MARK, APLOG_DEBUG | APLOG_NOERRNO, 0, r->server, + "cache: running CACHE_OUT filter"); + + cache_read_entity_headers(cache->handle, r, &headers); + r->headers_out = headers; + cache_read_entity_body(cache->handle, bb); + + /* This filter is done once it has served up its content */ + ap_remove_output_filter(f); + return ap_pass_brigade(f->next, bb); +} + + +/* + * CACHE_CONDITIONAL filter + * ------------------------ + * + * Decide whether or not cached content should be delivered + * based on our fudged conditional request. + * If response HTTP_NOT_MODIFIED + * replace ourselves with cache_out filter + * Otherwise + * replace ourselves with cache_in filter + */ +int ap_cache_conditional_filter(ap_filter_t *f, apr_bucket_brigade *in); + +int ap_cache_conditional_filter(ap_filter_t *f, apr_bucket_brigade *in) +{ + ap_log_error(APLOG_MARK, APLOG_DEBUG | APLOG_NOERRNO, 0, f->r->server, + "cache: running CACHE_CONDITIONAL filter"); + + if (f->r->status == HTTP_NOT_MODIFIED) { + /* replace ourselves with CACHE_OUT filter */ + ap_add_output_filter("CACHE_OUT", NULL, f->r, f->r->connection); + } + else { + /* replace ourselves with CACHE_IN filter */ + ap_add_output_filter("CACHE_IN", NULL, f->r, f->r->connection); + } + ap_remove_output_filter(f); + + return ap_pass_brigade(f->next, in); +} + + +/* + * CACHE_IN filter + * --------------- + * + * Decide whether or not this content should be cached. + * If we decide no it should: + * remove the filter from the chain + * If we decide yes it should: + * pass the data to the storage manager + * pass the data to the next filter (the network) + * + */ +int ap_cache_in_filter(ap_filter_t *f, apr_bucket_brigade *in); + +int ap_cache_in_filter(ap_filter_t *f, apr_bucket_brigade *in) +{ + request_rec *r = f->r; + apr_uri_components uri = r->parsed_uri; + char *url = r->unparsed_uri; + apr_pool_t *p = r->pool; + apr_bucket *e; + apr_bucket_brigade *out = apr_brigade_create(p); + + const char *cc_out = ap_table_get(r->headers_out, "Cache-Control"); + const char *exps, *lastmods, *dates, *etag; + apr_time_t exp, date, lastmod, now; + apr_size_t size; + + cache_info *info; + void *sconf = r->server->module_config; + cache_server_conf *conf = + (cache_server_conf *) ap_get_module_config(sconf, &tcache_module); + void *scache = r->request_config; + cache_request_rec *cache = + (cache_request_rec *) ap_get_module_config(scache, &tcache_module); + + + ap_log_error(APLOG_MARK, APLOG_DEBUG | APLOG_NOERRNO, 0, f->r->server, + "cache: running CACHE_IN filter"); + + /* check first whether running this filter has any point or not */ + if(r->no_cache) { + return ap_pass_brigade(f->next, in); + } + + /* + * Pass Data to Cache + * ------------------ + * This section passes the brigades into the cache modules, but only + * if the setup section (see below) is complete. + */ + + /* have we already run the cachability check and set up the cached file handle? */ + if(cache->in_checked) { + /* pass the brigades into the cache, then pass them + * up the filter stack + */ + cache_write_entity_body(cache->handle, in); + return ap_pass_brigade(f->next, in); + } + + /* + * Setup Data in Cache + * ------------------- + * This section opens the cache entity and sets various caching parameters, + * and decides whether this URL should be cached at all. This section is + * run before the above section. + */ + info = ap_pcalloc(r->pool, sizeof(cache_info)); + + /* read expiry date; if a bad date, then leave it so the client can + * read it + */ + exps = ap_table_get(r->headers_out, "Expires"); + if (exps != NULL) { + if (APR_DATE_BAD == (exp = apr_date_parse_http(exps))) { + exps = NULL; + } + } + else { + exp = APR_DATE_BAD; + } + + /* read the last-modified date; if the date is bad, then delete it */ + lastmods = ap_table_get(r->headers_out, "Last-Modified"); + if (lastmods != NULL) { + if (APR_DATE_BAD == (lastmod = apr_date_parse_http(lastmods))) { + lastmods = NULL; + } + } + else { + lastmod = APR_DATE_BAD; + } + + /* read the etag from the entity */ + etag = ap_table_get(r->headers_out, "Etag"); + + /* + * what responses should we not cache? + * + * At this point we decide based on the response headers whether it + * is appropriate _NOT_ to cache the data from the server. There are + * a whole lot of conditions that prevent us from caching this data. + * They are tested here one by one to be clear and unambiguous. + */ + + /* RFC2616 13.4 we are allowed to cache 200, 203, 206, 300, 301 or 410 + * We don't cache 206, because we don't (yet) cache partial responses. + * We include 304 Not Modified here too as this is the origin server + * telling us to serve the cached copy. */ + if ((r->status != HTTP_OK && r->status != HTTP_NON_AUTHORITATIVE && + r->status != HTTP_MULTIPLE_CHOICES && + r->status != HTTP_MOVED_PERMANENTLY && r->status != HTTP_NOT_MODIFIED) || + + /* if a broken Expires header is present, don't cache it */ + (exps != NULL && exp == APR_DATE_BAD) || + + /* if the server said 304 Not Modified but we have no cache file - pass + * this untouched to the user agent, it's not for us. */ + (r->status == HTTP_NOT_MODIFIED && (NULL == cache->handle)) || + + /* 200 OK response from HTTP/1.0 and up without a Last-Modified header/Etag */ + (r->status == HTTP_OK && lastmods == NULL && etag == NULL) || + + /* HEAD requests */ + r->header_only || + + /* RFC2616 14.9.2 Cache-Control: no-store response indicating do not + * cache, or stop now if you are trying to cache it */ + ap_cache_liststr(cc_out, "no-store", NULL) || + + /* RFC2616 14.9.1 Cache-Control: private + * this object is marked for this user's eyes only. Behave as a tunnel. */ + ap_cache_liststr(cc_out, "private", NULL) || + + /* RFC2616 14.8 Authorisation: + * if authorisation is included in the request, we don't cache, but we + * can cache if the following exceptions are true: + * 1) If Cache-Control: s-maxage is included + * 2) If Cache-Control: must-revalidate is included + * 3) If Cache-Control: public is included + */ + (ap_table_get(r->headers_in, "Authorization") != NULL && + !(ap_cache_liststr(cc_out, "s-maxage", NULL) || + ap_cache_liststr(cc_out, "must-revalidate", NULL) || + ap_cache_liststr(cc_out, "public", NULL)) + ) || + + /* or we've been asked not to cache it above */ + r->no_cache) { + + ap_log_error(APLOG_MARK, APLOG_DEBUG | APLOG_NOERRNO, 0, r->server, + "cache: response is not cachable"); + + /* remove this object from the cache */ + cache_remove_url(r, cache->types, url); + + /* remove this filter from the chain */ + ap_remove_output_filter(f); + + /* ship the data up the stack */ + return ap_pass_brigade(f->next, in); + } + cache->in_checked = 1; + + /* Set the content length if known. We almost certainly do NOT want to + * cache streams with unknown content lengths in the in-memory cache. + * Streams with unknown content length should be first cached in the + * file system. If they are withing acceptable limits, then they can be + * moved to the in-memory cache. + */ + { + const char* cl; + cl = apr_table_get(r->headers_out, "Content-Length"); + if (cl) { + size = atol(cl); + } + else + size = -1; + } + + /* It's safe to cache the response. + * + * There are two possiblities at this point: + * - cache->handle == NULL. In this case there is no previously + * cached entity anywhere on the system. We must create a brand + * new entity and store the response in it. + * - cache->handle != NULL. In this case there is a stale + * entity in the system which needs to be replaced by new + * content (unless the result was 304 Not Modified, which means + * the cached entity is actually fresh, and we should update + * the headers). + */ + /* no cache handle, create a new entity */ + if (!cache->handle) { + cache_create_entity(r, cache->types, url, size); + } + /* pre-existing cache handle and 304, make entity fresh */ + else if (r->status == HTTP_NOT_MODIFIED) { + /* update headers */ + + /* remove this filter ??? */ + } + /* pre-existing cache handle and new entity, replace entity with this one */ + else { + cache_remove_entity(r, cache->types, cache->handle); + cache_create_entity(r, cache->types, url, size); + } + - if (ctx == NULL) { - f->ctx = ctx = apr_pcalloc(f->r->pool, sizeof(*ctx)); + /* + * We now want to update the cache file header information with + * the new date, last modified, expire and content length and write + * it away to our cache file. First, we determine these values from + * the response, using heuristics if appropriate. + * + * In addition, we make HTTP/1.1 age calculations and write them away + * too. + */ + + /* Read the date. Generate one if one is not supplied */ + dates = ap_table_get(r->headers_out, "Date"); + if (dates != NULL) + info->date = apr_date_parse_http(dates); + else + info->date = APR_DATE_BAD; + + now = apr_time_now(); + if (info->date == APR_DATE_BAD) { /* No, or bad date */ + char *dates; + /* no date header! */ + /* add one; N.B. use the time _now_ rather than when we were checking the cache */ + date = now; + dates = apr_pcalloc(r->pool, MAX_STRING_LEN); + apr_rfc822_date(dates, now); + ap_table_set(r->headers_out, "Date", dates); + ap_log_error(APLOG_MARK, APLOG_DEBUG | APLOG_NOERRNO, 0, r->server, + "cache: Added date header"); + } + info->date = date; + + /* set response_time for HTTP/1.1 age calculations */ + info->response_time = now; + + /* get the request time */ + info->request_time = r->request_time; + + /* check last-modified date */ + if (lastmod != APR_DATE_BAD && lastmod > date) + { + /* if its in the future, then replace by date */ + lastmod = date; + lastmods = dates; + ap_log_error(APLOG_MARK, APLOG_DEBUG | APLOG_NOERRNO, 0, r->server, + "cache: Last modified is in the future, replacing with now"); + } + info->lastmod = lastmod; + + /* so we now have the expiry date */ + /* if no expiry date then + * if lastmod + * expiry date = now + min((date - lastmod) * factor, maxexpire) + * else + * expire date = now + defaultexpire + */ + ap_log_error(APLOG_MARK, APLOG_DEBUG | APLOG_NOERRNO, 0, r->server, + "cache: Expiry date is %ld", (long)exp); + if (exp == APR_DATE_BAD) { + if (lastmod != APR_DATE_BAD) { + double x = (double) (date - lastmod) * conf->factor; + double maxex = conf->maxex; + if (x > maxex) + x = maxex; + exp = now + (int) x; + } + else + exp = now + conf->defex; + ap_log_error(APLOG_MARK, APLOG_DEBUG | APLOG_NOERRNO, 0, r->server, + "cache: Expiry date calculated %ld", (long)exp); } + info->expire = exp; - ap_run_store_cache(f->r, bb, &ctx->cf); - ap_pass_brigade(f->next, bb); - return APR_SUCCESS; + /* + * Write away header information to cache. + */ + cache_write_entity_headers(cache->handle, r, info, r->headers_in, r->headers_out); + + return ap_pass_brigade(f->next, in); } -static void cache_register_hook(apr_pool_t *p) +/* -------------------------------------------------------------- */ +/* Setup configurable data */ + +static void * create_cache_config(apr_pool_t *p, server_rec *s) { - ap_hook_handler(cache_handler, NULL, NULL, APR_HOOK_MIDDLE); - ap_register_output_filter("CACHE", cache_filter, AP_FTYPE_HTTP_HEADER); + cache_server_conf *ps = ap_pcalloc(p, sizeof(cache_server_conf)); + + /* 1 if the cache is enabled, 0 otherwise */ + ps->cacheon = 0; + ps->cacheon_set = 0; + /* array of URL prefixes for which caching is enabled */ + ps->cacheenable = ap_make_array(p, 10, sizeof(struct cache_enable)); + /* array of URL prefixes for which caching is disabled */ + ps->cachedisable = ap_make_array(p, 10, sizeof(struct cache_disable)); + /* maximum time to cache a document */ + ps->maxex = DEFAULT_CACHE_MAXEXPIRE; + ps->maxex_set = 0; + /* default time to cache a document */ + ps->defex = DEFAULT_CACHE_EXPIRE; + ps->defex_set = 0; + /* factor used to estimate Expires date from LastModified date */ + ps->factor = DEFAULT_CACHE_LMFACTOR; + ps->factor_set = 0; + /* default percentage to force cache completion */ + ps->complete = DEFAULT_CACHE_COMPLETION; + ps->complete_set = 0; + return ps; } -AP_DECLARE_DATA module cache_module = { +static void * merge_cache_config(apr_pool_t *p, void *basev, void *overridesv) +{ + cache_server_conf *ps = ap_pcalloc(p, sizeof(cache_server_conf)); + cache_server_conf *base = (cache_server_conf *) basev; + cache_server_conf *overrides = (cache_server_conf *) overridesv; + + /* 1 if the cache is enabled, 0 otherwise */ + ps->cacheon = (overrides->cacheon_set == 0) ? base->cacheon : overrides->cacheon; + /* array of URL prefixes for which caching is disabled */ + ps->cachedisable = ap_append_arrays(p, base->cachedisable, overrides->cachedisable); + /* array of URL prefixes for which caching is enabled */ + ps->cacheenable = ap_append_arrays(p, base->cacheenable, overrides->cacheenable); + /* maximum time to cache a document */ + ps->maxex = (overrides->maxex_set == 0) ? base->maxex : overrides->maxex; + /* default time to cache a document */ + ps->defex = (overrides->defex_set == 0) ? base->defex : overrides->defex; + /* factor used to estimate Expires date from LastModified date */ + ps->factor = (overrides->factor_set == 0) ? base->factor : overrides->factor; + /* default percentage to force cache completion */ + ps->complete = (overrides->complete_set == 0) ? base->complete : overrides->complete; + + return ps; +} + +static const char +*set_cache_on(cmd_parms *parms, void *dummy, int flag) +{ + cache_server_conf *conf = ap_get_module_config(parms->server->module_config, &tcache_module); + + conf->cacheon = 1; + conf->cacheon_set = 1; + return NULL; +} + +static const char +*add_cache_enable(cmd_parms *parms, void *dummy, const char *type, const char *url) +{ + cache_server_conf *conf = ap_get_module_config(parms->server->module_config, &tcache_module); + struct cache_enable *new; + + new = apr_array_push(conf->cacheenable); + new->type = type; + new->url = url; + return NULL; +} + +static const char +*add_cache_disable(cmd_parms *parms, void *dummy, const char *url) +{ + cache_server_conf *conf = ap_get_module_config(parms->server->module_config, &tcache_module); + struct cache_enable *new; + + new = apr_array_push(conf->cachedisable); + new->url = url; + return NULL; +} + +static const char +*set_cache_maxex(cmd_parms *parms, void *dummy, const char *arg) +{ + cache_server_conf *conf = ap_get_module_config(parms->server->module_config, &tcache_module); + double val; + + if (sscanf(arg, "%lg", &val) != 1) + return "CacheMaxExpire value must be a float"; + conf->maxex = (apr_time_t) (val * MSEC_ONE_HR); + conf->maxex_set = 1; + return NULL; +} + +static const char +*set_cache_defex(cmd_parms *parms, void *dummy, const char *arg) +{ + cache_server_conf *conf = ap_get_module_config(parms->server->module_config, &tcache_module); + double val; + + if (sscanf(arg, "%lg", &val) != 1) + return "CacheDefaultExpire value must be a float"; + conf->defex = (apr_time_t) (val * MSEC_ONE_HR); + conf->defex_set = 1; + return NULL; +} + +static const char +*set_cache_factor(cmd_parms *parms, void *dummy, const char *arg) +{ + cache_server_conf *conf = ap_get_module_config(parms->server->module_config, &tcache_module); + double val; + + if (sscanf(arg, "%lg", &val) != 1) + return "CacheLastModifiedFactor value must be a float"; + conf->factor = val; + conf->factor_set = 1; + return NULL; +} + +static const char +*set_cache_complete(cmd_parms *parms, void *dummy, const char *arg) +{ + cache_server_conf *conf = ap_get_module_config(parms->server->module_config, &tcache_module); + int val; + + if (sscanf(arg, "%u", &val) != 1) + return "CacheForceCompletion value must be a percentage"; + conf->complete = val; + conf->complete_set = 1; + return NULL; +} + +static const command_rec cache_cmds[] = +{ + /* XXX + * Consider a new config directive that enables loading specific cache + * implememtations (like mod_cache_mem, mod_cache_file, etc.). + * Rather than using a LoadModule directive, admin would use something + * like CacheModule mem_cache_module | file_cache_module, etc, + * which would cause the approprpriate cache module to be loaded. + * This is more intuitive that requiring a LoadModule directive. + */ + + AP_INIT_FLAG("CacheOn", set_cache_on, NULL, RSRC_CONF, + "On if the transparent cache should be enabled"), + AP_INIT_TAKE2("CacheEnable", add_cache_enable, NULL, RSRC_CONF, + "A cache type and partial URL prefix below which caching is enabled"), + AP_INIT_TAKE1("CacheDisable", add_cache_disable, NULL, RSRC_CONF, + "A partial URL prefix below which caching is disabled"), + AP_INIT_TAKE1("CacheMaxExpire", set_cache_maxex, NULL, RSRC_CONF, + "The maximum time in hours to cache a document"), + AP_INIT_TAKE1("CacheDefaultExpire", set_cache_defex, NULL, RSRC_CONF, + "The default time in hours to cache a document"), + AP_INIT_TAKE1("CacheLastModifiedFactor", set_cache_factor, NULL, RSRC_CONF, + "The factor used to estimate Expires date from LastModified date"), + AP_INIT_TAKE1("CacheForceCompletion", set_cache_complete, NULL, RSRC_CONF, + "Percentage of download to arrive for the cache to force complete transfer"), + {NULL} +}; + +static void +register_hooks(apr_pool_t *p) +{ + /* cache initializer */ + /* cache handler */ + ap_hook_quick_handler(ap_url_cache_handler, NULL, NULL, APR_HOOK_FIRST); + /* cache filters */ + ap_register_output_filter("CACHE_IN", ap_cache_in_filter, AP_FTYPE_NETWORK); + ap_register_output_filter("CACHE_OUT", ap_cache_out_filter, AP_FTYPE_CONTENT); + ap_register_output_filter("CACHE_CONDITIONAL", ap_cache_conditional_filter, AP_FTYPE_NETWORK); +} + +module AP_MODULE_DECLARE_DATA tcache_module = +{ STANDARD20_MODULE_STUFF, NULL, /* create per-directory config structure */ - NULL, /* merge per-directory config structures */ - NULL, /* create per-server config structure */ - NULL, /* merge per-server config structures */ - NULL, /* command apr_table_t */ - cache_register_hook /* register hooks */ + NULL, /* merge per-directory config structures */ + create_cache_config, /* create per-server config structure */ + merge_cache_config, /* merge per-server config structures */ + cache_cmds, /* command apr_table_t */ + register_hooks }; + diff --git a/modules/experimental/mod_cache.h b/modules/experimental/mod_cache.h index 7f2cf9b0c1..923b5f7c2c 100644 --- a/modules/experimental/mod_cache.h +++ b/modules/experimental/mod_cache.h @@ -56,10 +56,210 @@ * University of Illinois, Urbana-Champaign. */ -#include "apr_buckets.h" +#ifndef MOD_CACHE_H +#define MOD_CACHE_H + +/* + * Main include file for the Apache Transparent Cache + */ + +#define CORE_PRIVATE + #include "apr_hooks.h" +#include "apr.h" +#include "apr_compat.h" +#include "apr_lib.h" +#include "apr_strings.h" +#include "apr_buckets.h" +#include "apr_md5.h" +#include "apr_pools.h" +#include "apr_strings.h" + #include "httpd.h" +#include "http_config.h" +#include "ap_config.h" +#include "http_core.h" +#include "http_protocol.h" +#include "http_request.h" +#include "http_vhost.h" +#include "http_main.h" +#include "http_log.h" +#include "http_connection.h" +#include "util_filter.h" +#include "apr_date.h" +#include "apr_uri.h" + +#ifdef HAVE_NETDB_H +#include +#endif + +#ifdef HAVE_SYS_SOCKET_H +#include +#endif + +#ifdef HAVE_NETINET_IN_H +#include +#endif + +#ifdef HAVE_ARPA_INET_H +#include +#endif + +#ifndef MAX +#define MAX(a,b) ((a) > (b) ? (a) : (b)) +#endif +#ifndef MIN +#define MIN(a,b) ((a) < (b) ? (a) : (b)) +#endif + + +/* default completion is 60% */ +#define DEFAULT_CACHE_COMPLETION (60) + +#define MSEC_ONE_DAY ((apr_time_t)(86400*APR_USEC_PER_SEC)) /* one day, in microseconds */ +#define MSEC_ONE_HR ((apr_time_t)(3600*APR_USEC_PER_SEC)) /* one hour, in microseconds */ + +#define DEFAULT_CACHE_MAXEXPIRE MSEC_ONE_DAY +#define DEFAULT_CACHE_EXPIRE MSEC_ONE_HR +#define DEFAULT_CACHE_LMFACTOR (0.1) + +struct cache_enable { + const char *url; + const char *type; +}; + +struct cache_disable { + const char *url; +}; + +/* static information about the local cache */ +typedef struct { + int cacheon; /* Cache enabled? */ + int cacheon_set; + apr_array_header_t *cacheenable; /* URLs to cache */ + apr_array_header_t *cachedisable; /* URLs not to cache */ + apr_time_t maxex; /* Maximum time to keep cached files in msecs */ + int maxex_set; + apr_time_t defex; /* default time to keep cached file in msecs */ + int defex_set; + double factor; /* factor for estimating expires date */ + int factor_set; + int complete; /* Force cache completion after this point */ + int complete_set; + +} cache_server_conf; + +/* cache info information */ +typedef struct cache_info cache_info; +struct cache_info { + char *content_type; + const char *etag; + const char *lastmods; /* last modified of cache entity */ + apr_time_t date; + apr_time_t lastmod; + char lastmod_str[APR_RFC822_DATE_LEN]; + apr_time_t expire; + apr_time_t request_time; + apr_time_t response_time; + apr_size_t len; +}; + +/* cache handle information */ +typedef struct cache_handle cache_handle; +struct cache_handle { + cache_info *info; + cache_handle *next; + void *cache_obj; /* Pointer to cache specific object */ + + /* Cache call back functions */ + int (*remove_entity) (cache_handle *h); + int (*write_headers)(cache_handle *h, request_rec *r, cache_info *i); + int (*write_body)(cache_handle *h, apr_bucket_brigade *b); + int (*read_headers) (cache_handle *h, request_rec *r, cache_info **i); + int (*read_body) (cache_handle *h, apr_bucket_brigade *bb); + +}; + +/* per request cache information */ +typedef struct { + const char *types; /* the types of caches allowed */ + const char *type; /* the type of cache selected */ + int fresh; /* is the entitey fresh? */ + cache_handle *handle; /* current cache handle */ + int in_checked; /* CACHE_IN must cache the entity */ +} cache_request_rec; + + +/* cache_util.c */ +int ap_cache_request_is_conditional(request_rec *r); +void ap_cache_reset_output_filters(request_rec *r); +const char *ap_cache_get_cachetype(request_rec *r, cache_server_conf *conf, const char *url); +int ap_cache_liststr(const char *list, const char *key, char **val); +const char *ap_cache_tokstr(apr_pool_t *p, const char *list, const char **str); + +/** + * cache_storage.c + */ +int cache_remove_url(request_rec *r, const char *types, char *url); +int cache_create_entity(request_rec *r, const char *types, char *url, apr_size_t size); +int cache_remove_entity(request_rec *r, const char *types, cache_handle *h); +int cache_select_url(request_rec *r, const char *types, char *url); + +apr_status_t cache_write_entity_headers(cache_handle *h, request_rec *r, cache_info *info, + apr_table_t *headers_in, apr_table_t *headers_out); +apr_status_t cache_write_entity_body(cache_handle *h, apr_bucket_brigade *bb); + +apr_status_t cache_read_entity_headers(cache_handle *h, request_rec *r, apr_table_t **headers); +apr_status_t cache_read_entity_body(cache_handle *h, apr_bucket_brigade *bb); + + +/* hooks */ + +/* Create a set of CACHE_DECLARE(type), CACHE_DECLARE_NONSTD(type) and + * CACHE_DECLARE_DATA with appropriate export and import tags for the platform + */ +#if !defined(WIN32) +#define CACHE_DECLARE(type) type +#define CACHE_DECLARE_NONSTD(type) type +#define CACHE_DECLARE_DATA +#elif defined(CACHE_DECLARE_STATIC) +#define CACHE_DECLARE(type) type __stdcall +#define CACHE_DECLARE_NONSTD(type) type +#define CACHE_DECLARE_DATA +#elif defined(CACHE_DECLARE_EXPORT) +#define CACHE_DECLARE(type) __declspec(dllexport) type __stdcall +#define CACHE_DECLARE_NONSTD(type) __declspec(dllexport) type +#define CACHE_DECLARE_DATA __declspec(dllexport) +#else +#define CACHE_DECLARE(type) __declspec(dllimport) type __stdcall +#define CACHE_DECLARE_NONSTD(type) __declspec(dllimport) type +#define CACHE_DECLARE_DATA __declspec(dllimport) +#endif + +APR_DECLARE_EXTERNAL_HOOK(cache, CACHE, int, create_entity, + (cache_handle **hp, const char *type, + char *url, apr_size_t len)) +APR_DECLARE_EXTERNAL_HOOK(cache, CACHE, int, open_entity, + (cache_handle **hp, const char *type, + char *url)) +APR_DECLARE_EXTERNAL_HOOK(cache, CACHE, int, remove_url, + (const char *type, char *url)) -AP_DECLARE_HOOK(int,serve_cache,(request_rec *r)); -AP_DECLARE_HOOK(int,store_cache,(request_rec *r, apr_bucket_brigade *bb, void **cf)); +#if 0 +APR_DECLARE_EXTERNAL_HOOK(cache, CACHE, int, remove_entity, + (cache_handle *h)) +APR_DECLARE_EXTERNAL_HOOK(cache, CACHE, int, read_entity_headers, + (cache_handle *h, cache_info **info, + apr_table_t **headers_in, apr_table_t **headers_out)) +APR_DECLARE_EXTERNAL_HOOK(cache, CACHE, int, read_entity_body, + (cache_handle *h, + apr_bucket_brigade *out)) +APR_DECLARE_EXTERNAL_HOOK(cache, CACHE, int, write_entity_headers, + (cache_handle *h, cache_info *info, + apr_table_t *headers_in, apr_table_t *headers_out)) +APR_DECLARE_EXTERNAL_HOOK(cache, CACHE, int, write_entity_body, + (cache_handle *h, + apr_bucket_brigade *in)) +#endif +#endif /*MOD_CACHE_H*/ diff --git a/modules/experimental/mod_mem_cache.c b/modules/experimental/mod_mem_cache.c new file mode 100644 index 0000000000..2d2781a1a2 --- /dev/null +++ b/modules/experimental/mod_mem_cache.c @@ -0,0 +1,509 @@ +/* ==================================================================== + * The Apache Software License, Version 1.1 + * + * Copyright (c) 2000-2001 The Apache Software Foundation. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * 3. The end-user documentation included with the redistribution, + * if any, must include the following acknowledgment: + * "This product includes software developed by the + * Apache Software Foundation (http://www.apache.org/)." + * Alternately, this acknowledgment may appear in the software itself, + * if and wherever such third-party acknowledgments normally appear. + * + * 4. The names "Apache" and "Apache Software Foundation" must + * not be used to endorse or promote products derived from this + * software without prior written permission. For written + * permission, please contact apache@apache.org. + * + * 5. Products derived from this software may not be called "Apache", + * nor may "Apache" appear in their name, without prior written + * permission of the Apache Software Foundation. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF + * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * ==================================================================== + * + * This software consists of voluntary contributions made by many + * individuals on behalf of the Apache Software Foundation. For more + * information on the Apache Software Foundation, please see + * . + * + * Portions of this software are based upon public domain software + * originally written at the National Center for Supercomputing Applications, + * University of Illinois, Urbana-Champaign. + */ + +#define CORE_PRIVATE + +#include "mod_cache.h" +#define MAX_CACHE 5000 +module AP_MODULE_DECLARE_DATA mem_cache_module; + +/* + * XXX + * This cache uses apr_hash functions which leak storage when something is removed + * from the cache. This can be fixed in the apr_hash functions by making them use + * malloc/free rather than pools to manage their storage requirements. + */ + +/* + * XXX Introduce a type field that identifies whether the cache obj + * references malloc'ed or mmap storage or a file descriptor + */ +typedef enum { + CACHE_TYPE_FILE = 1, + CACHE_TYPE_MALLOC, + CACHE_TYPE_MMAP +} cache_type_e; + +typedef struct { + cache_type_e type; + char *key; + void *m; + apr_size_t m_len; + cache_info info; + int complete; +} cache_object_t; + +typedef struct { + apr_lock_t *lock; + apr_hash_t *cacheht; + int space; + apr_time_t maxexpire; + apr_time_t defaultexpire; +} mem_cache_conf; +static mem_cache_conf *sconf; + +#define DEFAULT_CACHE_SPACE 100*1024 +#define CACHEFILE_LEN 20 + +/* Forward declarations */ +static int remove_entity(cache_handle *h); +static int write_headers(cache_handle *h, request_rec *r, cache_info *i); +static int write_body(cache_handle *h, apr_bucket_brigade *b); +static int read_headers(cache_handle *h, request_rec *r, cache_info **info); +static int read_body(cache_handle *h, apr_bucket_brigade *bb); + +static void cleanup_cache_object(cache_object_t *obj) +{ + /* The cache object has been removed from the cache. Now clean + * it up, freeing any storage, closing file descriptors, etc. + */ + /* XXX - + * The action of freeing a cache entry is asynchronous with the rest of + * the operation of the cache. Frees can be driven by garbage collection, + * the result of some command or an HTTP request. It is okay to remove + * an entry from the cache at anytime but we need a mechanism to keep + * us from cleaning up the cache entry out from under other threads + * that may still be referencing it. + * + * Bill thinks that we need a special purpose reference counted + * bucket (or three). When an entry is removed from the cache, the + * bucket for that entry is marked for cleanup. A bucket marked for + * cleanup is freed by the last routine referencing the bucket, + * either during brigade destroy or this routine. + */ + + /* + * Ref count decrementing and checking needs to be atomic + + obj->ref_count--; + if (obj->ref_count) { + defer_cleanup (let the brigade cleanup free the bucket) + } + else { + free the bucket + } + */ + + if (obj->info.content_type) + free(obj->info.content_type); + if (obj->key) + free(obj->key); + if (obj->m) + free(obj->m); + + free(obj); +} + +static apr_status_t cleanup_cache_mem(void *sconfv) +{ + cache_object_t *obj; + apr_hash_index_t *hi; + mem_cache_conf *co = (mem_cache_conf*) sconfv; + + if (!co) { + return APR_SUCCESS; + } + + /* Iterate over the frag hash table and clean up each entry */ + /* XXX need to lock the hash */ + for (hi = apr_hash_first(NULL, co->cacheht); hi; hi=apr_hash_next(hi)) { + apr_hash_this(hi, NULL, NULL, (void **)&obj); + if (obj) + cleanup_cache_object(obj); + } + return APR_SUCCESS; +} +static void *create_cache_config(apr_pool_t *p, server_rec *s) +{ + sconf = apr_pcalloc(p, sizeof(mem_cache_conf)); + sconf->space = DEFAULT_CACHE_SPACE; +#if 0 + sconf->maxexpire = DEFAULT_CACHE_MAXEXPIRE; + sconf->defaultexpire = DEFAULT_CACHE_EXPIRE; +#endif + + apr_lock_create(&sconf->lock, APR_MUTEX, APR_INTRAPROCESS, "foo", p); + sconf->cacheht = apr_hash_make(p); + apr_pool_cleanup_register(p, NULL, cleanup_cache_mem, apr_pool_cleanup_null); + + return sconf; +} + +static int create_entity(cache_handle **hp, const char *type, char *key, apr_size_t len) +{ + apr_status_t rv; + cache_object_t *obj; + cache_handle *h; + + /* Create the cache handle and begin populating it. + */ + if (strcasecmp(type, "mem")) { + return DECLINED; + } + + /* Check len to see if it is withing acceptable bounds + * XXX max cache check should be configurable variable. + */ + if (len < 0 || len > MAX_CACHE) { + return DECLINED; + } + /* Check total cache size and number of entries. Are they within the + * configured limits? If not, kick off garbage collection thread. + */ + + /* Allocate the cache_handle and set up call back functions specific to + * this cache handler. + */ + h = malloc(sizeof(cache_handle)); + *hp = h; + if (!h) { + /* handle the error */ + return DECLINED; + } + h->read_body = &read_body; + h->read_headers = &read_headers; + h->write_body = &write_body; + h->write_headers = &write_headers; + + /* Allocate and initialize the cache object. The cache object is + * unique to this implementation. + */ + obj = malloc(sizeof(*obj)); + if (!obj) { + /* Handle ther error */ + free(h); + return DECLINED; + } + + obj->key = malloc(strlen(key)); + if (!obj->key) { + /* XXX Uuugh, there has got to be a better way to manage memory. + */ + free(h); + free(obj); + return DECLINED; + } + obj->m_len = len; /* One of these len fields can go */ + obj->info.len = len; + strcpy(obj->key, key); + h->cache_obj = (void *) obj; + + /* Mark the cache object as incomplete and put it into the cache */ + obj->complete = 0; + + /* XXX Need a way to insert into the cache w/o sch coarse grained locking */ + apr_lock_acquire(sconf->lock); + apr_hash_set(sconf->cacheht, obj->key, strlen(obj->key), obj); + apr_lock_release(sconf->lock); +} + +static int open_entity(cache_handle **hp, const char *type, char *key) +{ + cache_object_t *obj; + cache_handle *h; + + /* Look up entity keyed to 'url' */ + if (strcasecmp(type, "mem")) { + return DECLINED; + } + apr_lock_acquire(sconf->lock); + obj = (cache_object_t *) apr_hash_get(sconf->cacheht, key, APR_HASH_KEY_STRING); + apr_lock_release(sconf->lock); + + if (!obj || !(obj->complete)) { + return DECLINED; + } + + /* Allocate the cache_handle and initialize it */ + h = malloc(sizeof(cache_handle)); + *hp = h; + if (!h) { + /* handle the error */ + return DECLINED; + } + h->read_body = &read_body; + h->read_headers = &read_headers; + h->write_body = &write_body; + h->write_headers = &write_headers; + h->cache_obj = obj; + if (!obj || !(obj->complete)) { + return DECLINED; + } + return OK; +} + +static int remove_entity(cache_handle *h) +{ + cache_object_t *obj = (cache_object_t *) h->cache_obj; + + apr_lock_acquire(sconf->lock); + apr_hash_set(sconf->cacheht, obj->key, strlen(obj->key), NULL); + apr_lock_release(sconf->lock); + + cleanup_cache_object(obj); + + /* Reinit the cache_handle fields? */ + h->cache_obj = NULL; + + /* The caller should free the cache_handle ? */ + free(h); +} + +/* Define request processing hook handlers */ +static int remove_url(const char *type, char *key) +{ + cache_object_t *obj; + + if (strcasecmp(type, "mem")) { + return DECLINED; + } + + /* WIBNIF + * apr_hash_set(..,..,..,NULL) returned pointer to the object just removed. + * That way, we could free the object w/o doing another call to an + * apr_hash function. + */ + + /* First, find the object in the cache */ + apr_lock_acquire(sconf->lock); + obj = (cache_object_t *) apr_hash_get(sconf->cacheht, key, APR_HASH_KEY_STRING); + apr_lock_release(sconf->lock); + + if (!obj) { + return DECLINED; + } + + /* Found it. Now take it out of the cache and free it. */ + apr_lock_acquire(sconf->lock); + apr_hash_set(sconf->cacheht, obj->key, strlen(obj->key), NULL); + apr_lock_release(sconf->lock); + + cleanup_cache_object(obj); + + return OK; +} + +static int read_headers(cache_handle *h, request_rec *r, cache_info **info) +{ + cache_object_t *obj = (cache_object_t*) h->cache_obj; + + *info = &(obj->info); + + return OK; +} + +static int read_body(cache_handle *h, apr_bucket_brigade *bb) +{ + apr_bucket *b; + cache_object_t *obj = h->cache_obj; + + b = apr_bucket_immortal_create(obj->m, obj->m_len); + APR_BRIGADE_INSERT_TAIL(bb, b); + b = apr_bucket_eos_create(); + APR_BRIGADE_INSERT_TAIL(bb, b); + + return OK; +} + +static int write_headers(cache_handle *h, request_rec *r, cache_info *info) +{ + apr_size_t len; + cache_object_t *obj = (cache_object_t*) h->cache_obj; + if (info->date) { + obj->info.date = info->date; + } + if (info->lastmod) { + obj->info.lastmod = info->lastmod; + } + if (info->expire) { + obj->info.expire = info->expire; + } + if (info->content_type) { + obj->info.content_type = (char*) malloc(strlen(info->content_type)); + if (obj->info.content_type) + strcpy(obj->info.content_type, info->content_type); + } + + return OK; +} + +static int write_body(cache_handle *h, apr_bucket_brigade *b) +{ + apr_status_t rv; + apr_size_t idx = 0; + cache_object_t *obj = (cache_object_t *) h->cache_obj; + apr_read_type_e eblock = APR_BLOCK_READ; + apr_bucket *e; + + /* XXX mmap, malloc or file? + * Enable this decision to be configured.... + */ + char *m = malloc(obj->m_len); + obj->m = m; + if (!m) { + /* Cleanup cache entry and return */ + } + obj->type = CACHE_TYPE_MALLOC; + + /* Iterate accross the brigade and populate the cache storage */ + /* XXX doesn't handle multiple brigades */ + APR_BRIGADE_FOREACH(e, b) { + const char *s; + apr_size_t len; + + rv = apr_bucket_read(e, &s, &len, eblock); + if (rv != APR_SUCCESS) { + /* Big problem! Cleanup cache entry and return */ + } + /* XXX Check for overflow */ + if (len ) { + memcpy(m, s, len); + m+=len; + } + } + + /* XXX - Check for EOS before setting obj->complete + * Open for business. This entry can be served from the cache + */ + obj->complete = 1; + return OK; +} + +static const char +*set_cache_size(cmd_parms *parms, char *struct_ptr, char *arg) +{ + int val; + + if (sscanf(arg, "%d", &val) != 1) + return "CacheSize value must be an integer (kBytes)"; + sconf->space = val; + return NULL; +} +#if 0 +static const char +*set_cache_factor(cmd_parms *parms, void *dummy, char *arg) +{ + double val; + + if (sscanf(arg, "%lg", &val) != 1) + return "CacheLastModifiedFactor value must be a float"; + sconf->lmfactor = val; + + return NULL; +} +#endif +#if 0 +static const char +*set_cache_maxex(cmd_parms *parms, void *dummy, char *arg) +{ + mem_cache_conf *pc = ap_get_module_config(parms->server->module_config, &mem_cache_module); + double val; + + if (sscanf(arg, "%lg", &val) != 1) + return "CacheMaxExpire value must be a float"; + sconf->maxexpire = (apr_time_t) (val * MSEC_ONE_HR); + return NULL; +} +#endif +#if 0 +static const char +*set_cache_defex(cmd_parms *parms, void *dummy, char *arg) +{ + mem_cache_conf *pc = ap_get_module_config(parms->server->module_config, &mem_cache_module); + double val; + + if (sscanf(arg, "%lg", &val) != 1) + return "CacheDefaultExpire value must be a float"; + pc->defaultexpire = (apr_time_t) (val * MSEC_ONE_HR); + return NULL; +} +#endif +static const command_rec cache_cmds[] = +{ + /* XXX + * What config directives does this module need? + * Should this module manage expire policy for its entries? + * Certainly cache limits like max number of entries, + * max entry size, and max size of the cache should + * be managed by this module. + */ + AP_INIT_TAKE1("CacheSizeMem", set_cache_size, NULL, RSRC_CONF, + "The maximum disk space used by the cache in Kb"), + {NULL} +}; + +static void register_hooks(apr_pool_t *p) +{ + /* cache initializer */ +/* cache_hook_cache_init(cache_init, NULL, NULL, AP_HOOK_FIRST); */ + cache_hook_create_entity(create_entity, NULL, NULL, APR_HOOK_MIDDLE); + cache_hook_open_entity(open_entity, NULL, NULL, APR_HOOK_MIDDLE); + cache_hook_remove_url(remove_url, NULL, NULL, APR_HOOK_MIDDLE); +} + +module AP_MODULE_DECLARE_DATA mem_cache_module = +{ + STANDARD20_MODULE_STUFF, + NULL, /* create per-directory config structure */ + NULL, /* merge per-directory config structures */ + create_cache_config, /* create per-server config structure */ + NULL, /* merge per-server config structures */ + cache_cmds, /* command apr_table_t */ + register_hooks +}; + -- 2.40.0