1 /* Licensed to the Apache Software Foundation (ASF) under one or more
2 * contributor license agreements. See the NOTICE file distributed with
3 * this work for additional information regarding copyright ownership.
4 * The ASF licenses this file to You under the Apache License, Version 2.0
5 * (the "License"); you may not use this file except in compliance with
6 * the License. You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
18 #include "apr_file_io.h"
19 #include "apr_strings.h"
20 #include "mod_cache.h"
21 #include "mod_disk_cache.h"
22 #include "http_config.h"
24 #include "http_core.h"
25 #include "ap_provider.h"
26 #include "util_filter.h"
27 #include "util_script.h"
28 #include "util_charset.h"
31 * mod_disk_cache: Disk Based HTTP 1.1 Cache.
33 * Flow to Find the .data file:
34 * Incoming client requests URI /foo/bar/baz
35 * Generate <hash> off of /foo/bar/baz
37 * Read in <hash>.header file (may contain Format #1 or Format #2)
38 * If format #1 (Contains a list of Vary Headers):
39 * Use each header name (from .header) with our request values (headers_in) to
40 * regenerate <hash> using HeaderName+HeaderValue+.../foo/bar/baz
41 * re-read in <hash>.header (must be format #2)
45 * apr_uint32_t format;
47 * apr_array_t vary_headers (delimited by CRLF)
50 * disk_cache_info_t (first sizeof(apr_uint32_t) bytes is the format)
51 * entity name (dobj->name) [length is in disk_cache_info_t->name_len]
52 * r->headers_out (delimited by CRLF)
54 * r->headers_in (delimited by CRLF)
58 module AP_MODULE_DECLARE_DATA disk_cache_module;
60 /* Forward declarations */
61 static int remove_entity(cache_handle_t *h);
62 static apr_status_t store_headers(cache_handle_t *h, request_rec *r, cache_info *i);
63 static apr_status_t store_body(cache_handle_t *h, request_rec *r, apr_bucket_brigade *in,
64 apr_bucket_brigade *out);
65 static apr_status_t recall_headers(cache_handle_t *h, request_rec *r);
66 static apr_status_t recall_body(cache_handle_t *h, apr_pool_t *p, apr_bucket_brigade *bb);
67 static apr_status_t read_array(request_rec *r, apr_array_header_t* arr,
71 * Local static functions
74 static char *header_file(apr_pool_t *p, disk_cache_conf *conf,
75 disk_cache_object_t *dobj, const char *name)
77 if (!dobj->hashfile) {
78 dobj->hashfile = ap_cache_generate_name(p, conf->dirlevels,
79 conf->dirlength, name);
83 return apr_pstrcat(p, dobj->prefix, CACHE_VDIR_SUFFIX, "/",
84 dobj->hashfile, CACHE_HEADER_SUFFIX, NULL);
87 return apr_pstrcat(p, conf->cache_root, "/", dobj->hashfile,
88 CACHE_HEADER_SUFFIX, NULL);
92 static char *data_file(apr_pool_t *p, disk_cache_conf *conf,
93 disk_cache_object_t *dobj, const char *name)
95 if (!dobj->hashfile) {
96 dobj->hashfile = ap_cache_generate_name(p, conf->dirlevels,
97 conf->dirlength, name);
101 return apr_pstrcat(p, dobj->prefix, CACHE_VDIR_SUFFIX, "/",
102 dobj->hashfile, CACHE_DATA_SUFFIX, NULL);
105 return apr_pstrcat(p, conf->cache_root, "/", dobj->hashfile,
106 CACHE_DATA_SUFFIX, NULL);
110 static apr_status_t mkdir_structure(disk_cache_conf *conf, const char *file, apr_pool_t *pool)
115 for (p = (char*)file + conf->cache_root_len + 1;;) {
121 rv = apr_dir_make(file,
122 APR_UREAD|APR_UWRITE|APR_UEXECUTE, pool);
123 if (rv != APR_SUCCESS && !APR_STATUS_IS_EEXIST(rv)) {
132 /* htcacheclean may remove directories underneath us.
133 * So, we'll try renaming three times at a cost of 0.002 seconds.
135 static apr_status_t safe_file_rename(disk_cache_conf *conf,
136 const char *src, const char *dest,
141 rv = apr_file_rename(src, dest, pool);
143 if (rv != APR_SUCCESS) {
146 for (i = 0; i < 2 && rv != APR_SUCCESS; i++) {
147 /* 1000 micro-seconds aka 0.001 seconds. */
150 rv = mkdir_structure(conf, dest, pool);
151 if (rv != APR_SUCCESS)
154 rv = apr_file_rename(src, dest, pool);
161 static apr_status_t file_cache_el_final(disk_cache_conf *conf, disk_cache_file_t *file,
164 apr_status_t rv = APR_SUCCESS;
166 /* This assumes that the tempfiles are on the same file system
167 * as the cache_root. If not, then we need a file copy/move
168 * rather than a rename.
171 /* move the file over */
174 rv = safe_file_rename(conf, file->tempfile, file->file, file->pool);
175 if (rv != APR_SUCCESS) {
176 ap_log_rerror(APLOG_MARK, APLOG_WARNING, rv, r,
177 "disk_cache: rename tempfile to file failed:"
178 " %s -> %s", file->tempfile, file->file);
179 apr_file_remove(file->tempfile, file->pool);
188 static apr_status_t file_cache_temp_cleanup(void *dummy) {
189 disk_cache_file_t *file = (disk_cache_file_t *)dummy;
191 /* clean up the temporary file */
193 apr_file_remove(file->tempfile, file->pool);
196 file->tempfile = NULL;
202 static apr_status_t file_cache_create(disk_cache_conf *conf, disk_cache_file_t *file,
206 file->tempfile = apr_pstrcat(pool, conf->cache_root, AP_TEMPFILE, NULL);
208 apr_pool_cleanup_register(pool, file, file_cache_temp_cleanup, apr_pool_cleanup_null);
213 /* These two functions get and put state information into the data
214 * file for an ap_cache_el, this state information will be read
215 * and written transparent to clients of this module
217 static int file_cache_recall_mydata(apr_file_t *fd, cache_info *info,
218 disk_cache_object_t *dobj, request_rec *r)
224 /* read the data from the cache file */
225 len = sizeof(disk_cache_info_t);
226 rv = apr_file_read_full(fd, &dobj->disk_info, len, &len);
227 if (rv != APR_SUCCESS) {
231 /* Store it away so we can get it later. */
232 info->status = dobj->disk_info.status;
233 info->date = dobj->disk_info.date;
234 info->expire = dobj->disk_info.expire;
235 info->request_time = dobj->disk_info.request_time;
236 info->response_time = dobj->disk_info.response_time;
238 memcpy(&info->control, &dobj->disk_info.control, sizeof(cache_control_t));
240 /* Note that we could optimize this by conditionally doing the palloc
241 * depending upon the size. */
242 urlbuff = apr_palloc(r->pool, dobj->disk_info.name_len + 1);
243 len = dobj->disk_info.name_len;
244 rv = apr_file_read_full(fd, urlbuff, len, &len);
245 if (rv != APR_SUCCESS) {
248 urlbuff[dobj->disk_info.name_len] = '\0';
250 /* check that we have the same URL */
251 /* Would strncmp be correct? */
252 if (strcmp(urlbuff, dobj->name) != 0) {
259 static const char* regen_key(apr_pool_t *p, apr_table_t *headers,
260 apr_array_header_t *varray, const char *oldkey)
268 nvec = (varray->nelts * 2) + 1;
269 iov = apr_palloc(p, sizeof(struct iovec) * nvec);
270 elts = (const char **) varray->elts;
273 * - Handle multiple-value headers better. (sort them?)
274 * - Handle Case in-sensitive Values better.
275 * This isn't the end of the world, since it just lowers the cache
276 * hit rate, but it would be nice to fix.
278 * The majority are case insenstive if they are values (encoding etc).
279 * Most of rfc2616 is case insensitive on header contents.
281 * So the better solution may be to identify headers which should be
282 * treated case-sensitive?
283 * HTTP URI's (3.2.3) [host and scheme are insensitive]
284 * HTTP method (5.1.1)
285 * HTTP-date values (3.3.1)
286 * 3.7 Media Types [exerpt]
287 * The type, subtype, and parameter attribute names are case-
288 * insensitive. Parameter values might or might not be case-sensitive,
289 * depending on the semantics of the parameter name.
290 * 4.20 Except [exerpt]
291 * Comparison of expectation values is case-insensitive for unquoted
292 * tokens (including the 100-continue token), and is case-sensitive for
293 * quoted-string expectation-extensions.
296 for(i=0, k=0; i < varray->nelts; i++) {
297 header = apr_table_get(headers, elts[i]);
301 iov[k].iov_base = (char*) elts[i];
302 iov[k].iov_len = strlen(elts[i]);
304 iov[k].iov_base = (char*) header;
305 iov[k].iov_len = strlen(header);
308 iov[k].iov_base = (char*) oldkey;
309 iov[k].iov_len = strlen(oldkey);
312 return apr_pstrcatv(p, iov, k, NULL);
315 static int array_alphasort(const void *fn1, const void *fn2)
317 return strcmp(*(char**)fn1, *(char**)fn2);
320 static void tokens_to_array(apr_pool_t *p, const char *data,
321 apr_array_header_t *arr)
325 while ((token = ap_get_list_item(p, &data)) != NULL) {
326 *((const char **) apr_array_push(arr)) = token;
329 /* Sort it so that "Vary: A, B" and "Vary: B, A" are stored the same. */
330 qsort((void *) arr->elts, arr->nelts,
331 sizeof(char *), array_alphasort);
335 * Hook and mod_cache callback functions
337 static int create_entity(cache_handle_t *h, request_rec *r, const char *key, apr_off_t len,
338 apr_bucket_brigade *bb)
340 disk_cache_dir_conf *dconf = ap_get_module_config(r->per_dir_config, &disk_cache_module);
341 disk_cache_conf *conf = ap_get_module_config(r->server->module_config,
344 disk_cache_object_t *dobj;
347 if (conf->cache_root == NULL) {
351 /* we don't support caching of range requests (yet) */
352 if (r->status == HTTP_PARTIAL_CONTENT) {
353 ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r,
354 "disk_cache: URL %s partial content response not cached",
359 /* Note, len is -1 if unknown so don't trust it too hard */
360 if (len > dconf->maxfs) {
361 ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r,
362 "disk_cache: URL %s failed the size check "
363 "(%" APR_OFF_T_FMT " > %" APR_OFF_T_FMT ")",
364 key, len, dconf->maxfs);
367 if (len >= 0 && len < dconf->minfs) {
368 ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r,
369 "disk_cache: URL %s failed the size check "
370 "(%" APR_OFF_T_FMT " < %" APR_OFF_T_FMT ")",
371 key, len, dconf->minfs);
375 /* Allocate and initialize cache_object_t and disk_cache_object_t */
376 h->cache_obj = obj = apr_pcalloc(r->pool, sizeof(*obj));
377 obj->vobj = dobj = apr_pcalloc(r->pool, sizeof(*dobj));
379 obj->key = apr_pstrdup(r->pool, key);
381 dobj->name = obj->key;
383 /* Save the cache root */
384 dobj->root = apr_pstrndup(r->pool, conf->cache_root, conf->cache_root_len);
385 dobj->root_len = conf->cache_root_len;
387 apr_pool_create(&pool, r->pool);
389 file_cache_create(conf, &dobj->hdrs, pool);
390 file_cache_create(conf, &dobj->vary, pool);
391 file_cache_create(conf, &dobj->data, pool);
393 dobj->data.file = data_file(r->pool, conf, dobj, key);
394 dobj->hdrs.file = header_file(r->pool, conf, dobj, key);
395 dobj->vary.file = header_file(r->pool, conf, dobj, key);
397 dobj->disk_info.header_only = r->header_only;
402 static int open_entity(cache_handle_t *h, request_rec *r, const char *key)
408 static int error_logged = 0;
409 disk_cache_conf *conf = ap_get_module_config(r->server->module_config,
411 #ifdef APR_SENDFILE_ENABLED
412 core_dir_config *coreconf = ap_get_module_config(r->per_dir_config,
418 disk_cache_object_t *dobj;
424 /* Look up entity keyed to 'url' */
425 if (conf->cache_root == NULL) {
428 ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r,
429 "disk_cache: Cannot cache files to disk without a CacheRoot specified.");
434 /* Create and init the cache object */
435 h->cache_obj = obj = apr_pcalloc(r->pool, sizeof(cache_object_t));
436 obj->vobj = dobj = apr_pcalloc(r->pool, sizeof(disk_cache_object_t));
440 /* Open the headers file */
443 /* Save the cache root */
444 dobj->root = apr_pstrndup(r->pool, conf->cache_root, conf->cache_root_len);
445 dobj->root_len = conf->cache_root_len;
447 dobj->vary.file = header_file(r->pool, conf, dobj, key);
448 flags = APR_READ|APR_BINARY|APR_BUFFERED;
449 rc = apr_file_open(&dobj->vary.fd, dobj->vary.file, flags, 0, r->pool);
450 if (rc != APR_SUCCESS) {
454 /* read the format from the cache file */
455 len = sizeof(format);
456 apr_file_read_full(dobj->vary.fd, &format, len, &len);
458 if (format == VARY_FORMAT_VERSION) {
459 apr_array_header_t* varray;
462 len = sizeof(expire);
463 apr_file_read_full(dobj->vary.fd, &expire, len, &len);
465 varray = apr_array_make(r->pool, 5, sizeof(char*));
466 rc = read_array(r, varray, dobj->vary.fd);
467 if (rc != APR_SUCCESS) {
468 ap_log_rerror(APLOG_MARK, APLOG_ERR, rc, r,
469 "disk_cache: Cannot parse vary header file: %s",
471 apr_file_close(dobj->vary.fd);
474 apr_file_close(dobj->vary.fd);
476 nkey = regen_key(r->pool, r->headers_in, varray, key);
478 dobj->hashfile = NULL;
479 dobj->prefix = dobj->vary.file;
480 dobj->hdrs.file = header_file(r->pool, conf, dobj, nkey);
482 flags = APR_READ|APR_BINARY|APR_BUFFERED;
483 rc = apr_file_open(&dobj->hdrs.fd, dobj->hdrs.file, flags, 0, r->pool);
484 if (rc != APR_SUCCESS) {
488 else if (format != DISK_FORMAT_VERSION) {
489 ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r,
490 "disk_cache: File '%s' has a version mismatch. File had version: %d.",
491 dobj->vary.file, format);
492 apr_file_close(dobj->vary.fd);
496 apr_off_t offset = 0;
498 /* oops, not vary as it turns out */
499 dobj->hdrs.fd = dobj->vary.fd;
500 dobj->vary.fd = NULL;
501 dobj->hdrs.file = dobj->vary.file;
503 /* This wasn't a Vary Format file, so we must seek to the
504 * start of the file again, so that later reads work.
506 apr_file_seek(dobj->hdrs.fd, APR_SET, &offset);
514 apr_pool_create(&pool, r->pool);
516 file_cache_create(conf, &dobj->hdrs, pool);
517 file_cache_create(conf, &dobj->vary, pool);
518 file_cache_create(conf, &dobj->data, pool);
520 dobj->data.file = data_file(r->pool, conf, dobj, nkey);
522 /* Read the bytes to setup the cache_info fields */
523 rc = file_cache_recall_mydata(dobj->hdrs.fd, info, dobj, r);
524 if (rc != APR_SUCCESS) {
525 ap_log_rerror(APLOG_MARK, APLOG_ERR, rc, r,
526 "disk_cache: Cannot read header file %s", dobj->hdrs.file);
527 apr_file_close(dobj->hdrs.fd);
531 apr_file_close(dobj->hdrs.fd);
533 /* Is this a cached HEAD request? */
534 if (dobj->disk_info.header_only && !r->header_only) {
535 ap_log_rerror(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r,
536 "disk_cache: HEAD request cached, non-HEAD requested, ignoring: %s",
541 /* Open the data file */
542 if (dobj->disk_info.has_body) {
543 flags = APR_READ | APR_BINARY;
544 #ifdef APR_SENDFILE_ENABLED
545 /* When we are in the quick handler we don't have the per-directory
546 * configuration, so this check only takes the global setting of
547 * the EnableSendFile directive into account.
549 flags |= AP_SENDFILE_ENABLED(coreconf->enable_sendfile);
551 rc = apr_file_open(&dobj->data.fd, dobj->data.file, flags, 0, r->pool);
552 if (rc != APR_SUCCESS) {
553 ap_log_rerror(APLOG_MARK, APLOG_ERR, rc, r,
554 "disk_cache: Cannot open data file %s", dobj->data.file);
555 apr_file_close(dobj->hdrs.fd);
559 rc = apr_file_info_get(&finfo, APR_FINFO_SIZE | APR_FINFO_IDENT,
561 if (rc == APR_SUCCESS) {
562 dobj->file_size = finfo.size;
565 /* Atomic check - does the body file belong to the header file? */
566 if (dobj->disk_info.inode == finfo.inode &&
567 dobj->disk_info.device == finfo.device) {
569 /* Initialize the cache_handle callback functions */
570 ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r,
571 "disk_cache: Recalled cached URL info header %s", dobj->name);
581 /* Oh dear, no luck matching header to the body */
582 ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r,
583 "disk_cache: Cached URL info header '%s' didn't match body, ignoring this entry",
589 static int remove_entity(cache_handle_t *h)
591 /* Null out the cache object pointer so next time we start from scratch */
596 static int remove_url(cache_handle_t *h, request_rec *r)
599 disk_cache_object_t *dobj;
601 /* Get disk cache object from cache handle */
602 dobj = (disk_cache_object_t *) h->cache_obj->vobj;
607 /* Delete headers file */
608 if (dobj->hdrs.file) {
609 ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r,
610 "disk_cache: Deleting %s from cache.", dobj->hdrs.file);
612 rc = apr_file_remove(dobj->hdrs.file, r->pool);
613 if ((rc != APR_SUCCESS) && !APR_STATUS_IS_ENOENT(rc)) {
614 /* Will only result in an output if httpd is started with -e debug.
615 * For reason see log_error_core for the case s == NULL.
617 ap_log_rerror(APLOG_MARK, APLOG_DEBUG, rc, r,
618 "disk_cache: Failed to delete headers file %s from cache.",
624 /* Delete data file */
625 if (dobj->data.file) {
626 ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r,
627 "disk_cache: Deleting %s from cache.", dobj->data.file);
629 rc = apr_file_remove(dobj->data.file, r->pool);
630 if ((rc != APR_SUCCESS) && !APR_STATUS_IS_ENOENT(rc)) {
631 /* Will only result in an output if httpd is started with -e debug.
632 * For reason see log_error_core for the case s == NULL.
634 ap_log_rerror(APLOG_MARK, APLOG_DEBUG, rc, r,
635 "disk_cache: Failed to delete data file %s from cache.",
641 /* now delete directories as far as possible up to our cache root */
643 const char *str_to_copy;
645 str_to_copy = dobj->hdrs.file ? dobj->hdrs.file : dobj->data.file;
647 char *dir, *slash, *q;
649 dir = apr_pstrdup(r->pool, str_to_copy);
651 /* remove filename */
652 slash = strrchr(dir, '/');
656 * now walk our way back to the cache root, delete everything
657 * in the way as far as possible
659 * Note: due to the way we constructed the file names in
660 * header_file and data_file, we are guaranteed that the
661 * cache_root is suffixed by at least one '/' which will be
662 * turned into a terminating null by this loop. Therefore,
663 * we won't either delete or go above our cache root.
665 for (q = dir + dobj->root_len; *q ; ) {
666 ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r,
667 "disk_cache: Deleting directory %s from cache",
670 rc = apr_dir_remove(dir, r->pool);
671 if (rc != APR_SUCCESS && !APR_STATUS_IS_ENOENT(rc)) {
674 slash = strrchr(q, '/');
683 static apr_status_t read_array(request_rec *r, apr_array_header_t* arr,
686 char w[MAX_STRING_LEN];
691 rv = apr_file_gets(w, MAX_STRING_LEN - 1, file);
692 if (rv != APR_SUCCESS) {
693 ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r,
694 "Premature end of vary array.");
699 if (p > 0 && w[p - 1] == '\n') {
700 if (p > 1 && w[p - 2] == CR) {
708 /* If we've finished reading the array, break out of the loop. */
713 *((const char **) apr_array_push(arr)) = apr_pstrdup(r->pool, w);
719 static apr_status_t store_array(apr_file_t *fd, apr_array_header_t* arr)
727 elts = (const char **) arr->elts;
729 for (i = 0; i < arr->nelts; i++) {
730 iov[0].iov_base = (char*) elts[i];
731 iov[0].iov_len = strlen(elts[i]);
732 iov[1].iov_base = CRLF;
733 iov[1].iov_len = sizeof(CRLF) - 1;
735 rv = apr_file_writev(fd, (const struct iovec *) &iov, 2,
737 if (rv != APR_SUCCESS) {
742 iov[0].iov_base = CRLF;
743 iov[0].iov_len = sizeof(CRLF) - 1;
745 return apr_file_writev(fd, (const struct iovec *) &iov, 1,
749 static apr_status_t read_table(cache_handle_t *handle, request_rec *r,
750 apr_table_t *table, apr_file_t *file)
752 char w[MAX_STRING_LEN];
759 /* ### What about APR_EOF? */
760 rv = apr_file_gets(w, MAX_STRING_LEN - 1, file);
761 if (rv != APR_SUCCESS) {
762 ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r,
763 "Premature end of cache headers.");
767 /* Delete terminal (CR?)LF */
770 /* Indeed, the host's '\n':
771 '\012' for UNIX; '\015' for MacOS; '\025' for OS/390
772 -- whatever the script generates.
774 if (p > 0 && w[p - 1] == '\n') {
775 if (p > 1 && w[p - 2] == CR) {
783 /* If we've finished reading the headers, break out of the loop. */
788 #if APR_CHARSET_EBCDIC
789 /* Chances are that we received an ASCII header text instead of
790 * the expected EBCDIC header lines. Try to auto-detect:
792 if (!(l = strchr(w, ':'))) {
793 int maybeASCII = 0, maybeEBCDIC = 0;
794 unsigned char *cp, native;
795 apr_size_t inbytes_left, outbytes_left;
797 for (cp = w; *cp != '\0'; ++cp) {
798 native = apr_xlate_conv_byte(ap_hdrs_from_ascii, *cp);
799 if (apr_isprint(*cp) && !apr_isprint(native))
801 if (!apr_isprint(*cp) && apr_isprint(native))
804 if (maybeASCII > maybeEBCDIC) {
805 ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r,
806 "CGI Interface Error: Script headers apparently ASCII: (CGI = %s)",
808 inbytes_left = outbytes_left = cp - w;
809 apr_xlate_conv_buffer(ap_hdrs_from_ascii,
810 w, &inbytes_left, w, &outbytes_left);
813 #endif /*APR_CHARSET_EBCDIC*/
815 /* if we see a bogus header don't ignore it. Shout and scream */
816 if (!(l = strchr(w, ':'))) {
821 while (*l && apr_isspace(*l)) {
825 apr_table_add(table, w, l);
832 * Reads headers from a buffer and returns an array of headers.
833 * Returns NULL on file error
834 * This routine tries to deal with too long lines and continuation lines.
835 * @@@: XXX: FIXME: currently the headers are passed thru un-merged.
836 * Is that okay, or should they be collapsed where possible?
838 static apr_status_t recall_headers(cache_handle_t *h, request_rec *r)
840 disk_cache_object_t *dobj = (disk_cache_object_t *) h->cache_obj->vobj;
842 /* This case should not happen... */
843 if (!dobj->hdrs.fd) {
844 ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r,
845 "disk_cache: recalling headers; but no header fd for %s", dobj->name);
849 h->req_hdrs = apr_table_make(r->pool, 20);
850 h->resp_hdrs = apr_table_make(r->pool, 20);
852 /* Call routine to read the header lines/status line */
853 read_table(h, r, h->resp_hdrs, dobj->hdrs.fd);
854 read_table(h, r, h->req_hdrs, dobj->hdrs.fd);
856 apr_file_close(dobj->hdrs.fd);
858 ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r,
859 "disk_cache: Recalled headers for URL %s", dobj->name);
863 static apr_status_t recall_body(cache_handle_t *h, apr_pool_t *p, apr_bucket_brigade *bb)
865 disk_cache_object_t *dobj = (disk_cache_object_t*) h->cache_obj->vobj;
868 apr_brigade_insert_file(bb, dobj->data.fd, 0, dobj->file_size, p);
874 static apr_status_t store_table(apr_file_t *fd, apr_table_t *table)
880 apr_table_entry_t *elts;
882 elts = (apr_table_entry_t *) apr_table_elts(table)->elts;
883 for (i = 0; i < apr_table_elts(table)->nelts; ++i) {
884 if (elts[i].key != NULL) {
885 iov[0].iov_base = elts[i].key;
886 iov[0].iov_len = strlen(elts[i].key);
887 iov[1].iov_base = ": ";
888 iov[1].iov_len = sizeof(": ") - 1;
889 iov[2].iov_base = elts[i].val;
890 iov[2].iov_len = strlen(elts[i].val);
891 iov[3].iov_base = CRLF;
892 iov[3].iov_len = sizeof(CRLF) - 1;
894 rv = apr_file_writev(fd, (const struct iovec *) &iov, 4,
896 if (rv != APR_SUCCESS) {
901 iov[0].iov_base = CRLF;
902 iov[0].iov_len = sizeof(CRLF) - 1;
903 rv = apr_file_writev(fd, (const struct iovec *) &iov, 1,
908 static apr_status_t store_headers(cache_handle_t *h, request_rec *r, cache_info *info)
910 disk_cache_object_t *dobj = (disk_cache_object_t*) h->cache_obj->vobj;
912 memcpy(&h->cache_obj->info, info, sizeof(cache_info));
914 if (r->headers_out) {
915 dobj->headers_out = ap_cache_cacheable_headers_out(r);
919 dobj->headers_in = ap_cache_cacheable_headers_in(r);
925 static apr_status_t write_headers(cache_handle_t *h, request_rec *r)
927 disk_cache_conf *conf = ap_get_module_config(r->server->module_config,
931 disk_cache_object_t *dobj = (disk_cache_object_t*) h->cache_obj->vobj;
933 disk_cache_info_t disk_info;
936 memset(&disk_info, 0, sizeof(disk_cache_info_t));
938 if (dobj->headers_out) {
941 tmp = apr_table_get(dobj->headers_out, "Vary");
944 apr_array_header_t* varray;
945 apr_uint32_t format = VARY_FORMAT_VERSION;
947 /* If we were initially opened as a vary format, rollback
948 * that internal state for the moment so we can recreate the
949 * vary format hints in the appropriate directory.
952 dobj->hdrs.file = dobj->prefix;
956 rv = mkdir_structure(conf, dobj->hdrs.file, r->pool);
958 rv = apr_file_mktemp(&dobj->vary.tempfd, dobj->vary.tempfile,
959 APR_CREATE | APR_WRITE | APR_BINARY | APR_EXCL,
962 if (rv != APR_SUCCESS) {
963 ap_log_rerror(APLOG_MARK, APLOG_WARNING, rv, r,
964 "disk_cache: could not create temp file %s",
965 dobj->vary.tempfile);
969 amt = sizeof(format);
970 apr_file_write(dobj->vary.tempfd, &format, &amt);
972 amt = sizeof(h->cache_obj->info.expire);
973 apr_file_write(dobj->vary.tempfd, &h->cache_obj->info.expire, &amt);
975 varray = apr_array_make(r->pool, 6, sizeof(char*));
976 tokens_to_array(r->pool, tmp, varray);
978 store_array(dobj->vary.tempfd, varray);
980 apr_file_close(dobj->vary.tempfd);
982 tmp = regen_key(r->pool, dobj->headers_in, varray, dobj->name);
983 dobj->prefix = dobj->hdrs.file;
984 dobj->hashfile = NULL;
985 dobj->data.file = data_file(r->pool, conf, dobj, tmp);
986 dobj->hdrs.file = header_file(r->pool, conf, dobj, tmp);
991 rv = apr_file_mktemp(&dobj->hdrs.tempfd, dobj->hdrs.tempfile,
992 APR_CREATE | APR_WRITE | APR_BINARY |
993 APR_BUFFERED | APR_EXCL, dobj->hdrs.pool);
995 if (rv != APR_SUCCESS) {
996 ap_log_rerror(APLOG_MARK, APLOG_WARNING, rv, r,
997 "disk_cache: could not create temp file %s",
998 dobj->hdrs.tempfile);
1002 disk_info.format = DISK_FORMAT_VERSION;
1003 disk_info.date = h->cache_obj->info.date;
1004 disk_info.expire = h->cache_obj->info.expire;
1005 disk_info.entity_version = dobj->disk_info.entity_version++;
1006 disk_info.request_time = h->cache_obj->info.request_time;
1007 disk_info.response_time = h->cache_obj->info.response_time;
1008 disk_info.status = h->cache_obj->info.status;
1009 disk_info.inode = dobj->disk_info.inode;
1010 disk_info.device = dobj->disk_info.device;
1011 disk_info.has_body = dobj->disk_info.has_body;
1012 disk_info.header_only = dobj->disk_info.header_only;
1014 disk_info.name_len = strlen(dobj->name);
1016 memcpy(&disk_info.control, &h->cache_obj->info.control, sizeof(cache_control_t));
1018 iov[0].iov_base = (void*)&disk_info;
1019 iov[0].iov_len = sizeof(disk_cache_info_t);
1020 iov[1].iov_base = (void*)dobj->name;
1021 iov[1].iov_len = disk_info.name_len;
1023 rv = apr_file_writev(dobj->hdrs.tempfd, (const struct iovec *) &iov, 2, &amt);
1024 if (rv != APR_SUCCESS) {
1025 ap_log_rerror(APLOG_MARK, APLOG_WARNING, rv, r,
1026 "disk_cache: could not write info to header file %s",
1027 dobj->hdrs.tempfile);
1028 apr_file_close(dobj->hdrs.tempfd);
1032 if (dobj->headers_out) {
1033 rv = store_table(dobj->hdrs.tempfd, dobj->headers_out);
1034 if (rv != APR_SUCCESS) {
1035 ap_log_rerror(APLOG_MARK, APLOG_WARNING, rv, r,
1036 "disk_cache: could not write out-headers to header file %s",
1037 dobj->hdrs.tempfile);
1038 apr_file_close(dobj->hdrs.tempfd);
1043 /* Parse the vary header and dump those fields from the headers_in. */
1044 /* FIXME: Make call to the same thing cache_select calls to crack Vary. */
1045 if (dobj->headers_in) {
1046 rv = store_table(dobj->hdrs.tempfd, dobj->headers_in);
1047 if (rv != APR_SUCCESS) {
1048 ap_log_rerror(APLOG_MARK, APLOG_WARNING, rv, r,
1049 "disk_cache: could not write in-headers to header file %s",
1050 dobj->hdrs.tempfile);
1051 apr_file_close(dobj->hdrs.tempfd);
1056 apr_file_close(dobj->hdrs.tempfd); /* flush and close */
1061 static apr_status_t store_body(cache_handle_t *h, request_rec *r,
1062 apr_bucket_brigade *in, apr_bucket_brigade *out)
1065 apr_status_t rv = APR_SUCCESS;
1066 disk_cache_object_t *dobj = (disk_cache_object_t *) h->cache_obj->vobj;
1067 disk_cache_dir_conf *dconf = ap_get_module_config(r->per_dir_config, &disk_cache_module);
1071 dobj->bb = apr_brigade_create(r->pool, r->connection->bucket_alloc);
1073 if (!dobj->offset) {
1074 dobj->offset = dconf->readsize;
1076 if (!dobj->timeout && dconf->readtime) {
1077 dobj->timeout = apr_time_now() + dconf->readtime;
1081 apr_brigade_partition(in, dobj->offset, &e);
1084 while (APR_SUCCESS == rv && !APR_BRIGADE_EMPTY(in)) {
1086 apr_size_t length, written;
1088 e = APR_BRIGADE_FIRST(in);
1090 /* are we done completely? if so, pass any trailing buckets right through */
1092 APR_BUCKET_REMOVE(e);
1093 APR_BRIGADE_INSERT_TAIL(out, e);
1097 /* have we seen eos yet? */
1098 if (APR_BUCKET_IS_EOS(e)) {
1101 APR_BUCKET_REMOVE(e);
1102 APR_BRIGADE_CONCAT(out, dobj->bb);
1103 APR_BRIGADE_INSERT_TAIL(out, e);
1107 /* honour flush buckets, we'll get called again */
1108 if (APR_BUCKET_IS_FLUSH(e)) {
1109 APR_BUCKET_REMOVE(e);
1110 APR_BRIGADE_CONCAT(out, dobj->bb);
1111 APR_BRIGADE_INSERT_TAIL(out, e);
1115 /* metadata buckets are preserved as is */
1116 if (APR_BUCKET_IS_METADATA(e)) {
1117 APR_BUCKET_REMOVE(e);
1118 APR_BRIGADE_INSERT_TAIL(dobj->bb, e);
1122 /* read the bucket, write to the cache */
1123 rv = apr_bucket_read(e, &str, &length, APR_BLOCK_READ);
1124 APR_BUCKET_REMOVE(e);
1125 APR_BRIGADE_INSERT_TAIL(dobj->bb, e);
1126 if (rv != APR_SUCCESS) {
1127 ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r,
1128 "disk_cache: Error when reading bucket for URL %s",
1130 /* Remove the intermediate cache file and return non-APR_SUCCESS */
1131 apr_pool_destroy(dobj->data.pool);
1132 APR_BRIGADE_CONCAT(out, dobj->bb);
1136 /* don't write empty buckets to the cache */
1141 /* Attempt to create the data file at the last possible moment, if
1142 * the body is empty, we don't write a file at all, and save an inode.
1144 if (!dobj->data.tempfd) {
1146 rv = apr_file_mktemp(&dobj->data.tempfd, dobj->data.tempfile,
1147 APR_CREATE | APR_WRITE | APR_BINARY |
1148 APR_BUFFERED | APR_EXCL, dobj->data.pool);
1149 if (rv != APR_SUCCESS) {
1152 dobj->file_size = 0;
1153 rv = apr_file_info_get(&finfo, APR_FINFO_IDENT,
1155 if (rv != APR_SUCCESS) {
1158 dobj->disk_info.device = finfo.device;
1159 dobj->disk_info.inode = finfo.inode;
1160 dobj->disk_info.has_body = 1;
1163 /* write to the cache, leave if we fail */
1164 rv = apr_file_write_full(dobj->data.tempfd, str, length, &written);
1165 if (rv != APR_SUCCESS) {
1166 ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r,
1167 "disk_cache: Error when writing cache file for URL %s",
1169 /* Remove the intermediate cache file and return non-APR_SUCCESS */
1170 apr_pool_destroy(dobj->data.pool);
1171 APR_BRIGADE_CONCAT(out, dobj->bb);
1174 dobj->file_size += written;
1175 if (dobj->file_size > dconf->maxfs) {
1176 ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r,
1177 "disk_cache: URL %s failed the size check "
1178 "(%" APR_OFF_T_FMT ">%" APR_OFF_T_FMT ")",
1179 h->cache_obj->key, dobj->file_size, dconf->maxfs);
1180 /* Remove the intermediate cache file and return non-APR_SUCCESS */
1181 apr_pool_destroy(dobj->data.pool);
1182 APR_BRIGADE_CONCAT(out, dobj->bb);
1183 return APR_EGENERAL;
1186 /* have we reached the limit of how much we're prepared to write in one
1187 * go? If so, leave, we'll get called again. This prevents us from trying
1188 * to swallow too much data at once, or taking so long to write the data
1189 * the client times out.
1191 dobj->offset -= length;
1192 if (dobj->offset <= 0) {
1194 APR_BRIGADE_CONCAT(out, dobj->bb);
1197 if ((dconf->readtime && apr_time_now() > dobj->timeout)) {
1199 APR_BRIGADE_CONCAT(out, dobj->bb);
1205 /* Was this the final bucket? If yes, close the temp file and perform
1209 const char *cl_header = apr_table_get(r->headers_out, "Content-Length");
1211 if (dobj->data.tempfd) {
1212 apr_file_close(dobj->data.tempfd);
1215 if (r->connection->aborted || r->no_cache) {
1216 ap_log_rerror(APLOG_MARK, APLOG_INFO, 0, r,
1217 "disk_cache: Discarding body for URL %s "
1218 "because connection has been aborted.",
1220 /* Remove the intermediate cache file and return non-APR_SUCCESS */
1221 apr_pool_destroy(dobj->data.pool);
1222 return APR_EGENERAL;
1224 if (dobj->file_size < dconf->minfs) {
1225 ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r,
1226 "disk_cache: URL %s failed the size check "
1227 "(%" APR_OFF_T_FMT "<%" APR_OFF_T_FMT ")",
1228 h->cache_obj->key, dobj->file_size, dconf->minfs);
1229 /* Remove the intermediate cache file and return non-APR_SUCCESS */
1230 apr_pool_destroy(dobj->data.pool);
1231 return APR_EGENERAL;
1234 apr_int64_t cl = apr_atoi64(cl_header);
1235 if ((errno == 0) && (dobj->file_size != cl)) {
1236 ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r,
1237 "disk_cache: URL %s didn't receive complete response, not caching",
1239 /* Remove the intermediate cache file and return non-APR_SUCCESS */
1240 apr_pool_destroy(dobj->data.pool);
1241 return APR_EGENERAL;
1245 /* All checks were fine, we're good to go when the commit comes */
1251 static apr_status_t commit_entity(cache_handle_t *h, request_rec *r)
1253 disk_cache_conf *conf = ap_get_module_config(r->server->module_config,
1254 &disk_cache_module);
1255 disk_cache_object_t *dobj = (disk_cache_object_t *) h->cache_obj->vobj;
1258 /* write the headers to disk at the last possible moment */
1259 rv = write_headers(h, r);
1261 /* move header and data tempfiles to the final destination */
1262 if (APR_SUCCESS == rv) {
1263 rv = file_cache_el_final(conf, &dobj->hdrs, r);
1265 if (APR_SUCCESS == rv) {
1266 rv = file_cache_el_final(conf, &dobj->vary, r);
1268 if (APR_SUCCESS == rv) {
1269 rv = file_cache_el_final(conf, &dobj->data, r);
1272 /* remove the cached items completely on any failure */
1273 if (APR_SUCCESS != rv) {
1275 ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r,
1276 "disk_cache: commit_entity: URL '%s' not cached due to earlier disk error.",
1280 ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r,
1281 "disk_cache: commit_entity: Headers and body for URL %s cached.",
1285 apr_pool_destroy(dobj->data.pool);
1290 static void *create_dir_config(apr_pool_t *p, char *dummy)
1292 disk_cache_dir_conf *dconf = apr_pcalloc(p, sizeof(disk_cache_dir_conf));
1294 dconf->maxfs = DEFAULT_MAX_FILE_SIZE;
1295 dconf->minfs = DEFAULT_MIN_FILE_SIZE;
1296 dconf->readsize = DEFAULT_READSIZE;
1297 dconf->readtime = DEFAULT_READTIME;
1302 static void *merge_dir_config(apr_pool_t *p, void *basev, void *addv) {
1303 disk_cache_dir_conf *new = (disk_cache_dir_conf *) apr_pcalloc(p, sizeof(disk_cache_dir_conf));
1304 disk_cache_dir_conf *add = (disk_cache_dir_conf *) addv;
1305 disk_cache_dir_conf *base = (disk_cache_dir_conf *) basev;
1307 new->maxfs = (add->maxfs_set == 0) ? base->maxfs : add->maxfs;
1308 new->maxfs_set = add->maxfs_set || base->maxfs_set;
1309 new->minfs = (add->minfs_set == 0) ? base->minfs : add->minfs;
1310 new->minfs_set = add->minfs_set || base->minfs_set;
1311 new->readsize = (add->readsize_set == 0) ? base->readsize : add->readsize;
1312 new->readsize_set = add->readsize_set || base->readsize_set;
1313 new->readtime = (add->readtime_set == 0) ? base->readtime : add->readtime;
1314 new->readtime_set = add->readtime_set || base->readtime_set;
1319 static void *create_config(apr_pool_t *p, server_rec *s)
1321 disk_cache_conf *conf = apr_pcalloc(p, sizeof(disk_cache_conf));
1323 /* XXX: Set default values */
1324 conf->dirlevels = DEFAULT_DIRLEVELS;
1325 conf->dirlength = DEFAULT_DIRLENGTH;
1327 conf->cache_root = NULL;
1328 conf->cache_root_len = 0;
1334 * mod_disk_cache configuration directives handlers.
1337 *set_cache_root(cmd_parms *parms, void *in_struct_ptr, const char *arg)
1339 disk_cache_conf *conf = ap_get_module_config(parms->server->module_config,
1340 &disk_cache_module);
1341 conf->cache_root = arg;
1342 conf->cache_root_len = strlen(arg);
1343 /* TODO: canonicalize cache_root and strip off any trailing slashes */
1349 * Consider eliminating the next two directives in favor of
1350 * Ian's prime number hash...
1351 * key = hash_fn( r->uri)
1352 * filename = "/key % prime1 /key %prime2/key %prime3"
1355 *set_cache_dirlevels(cmd_parms *parms, void *in_struct_ptr, const char *arg)
1357 disk_cache_conf *conf = ap_get_module_config(parms->server->module_config,
1358 &disk_cache_module);
1359 int val = atoi(arg);
1361 return "CacheDirLevels value must be an integer greater than 0";
1362 if (val * conf->dirlength > CACHEFILE_LEN)
1363 return "CacheDirLevels*CacheDirLength value must not be higher than 20";
1364 conf->dirlevels = val;
1368 *set_cache_dirlength(cmd_parms *parms, void *in_struct_ptr, const char *arg)
1370 disk_cache_conf *conf = ap_get_module_config(parms->server->module_config,
1371 &disk_cache_module);
1372 int val = atoi(arg);
1374 return "CacheDirLength value must be an integer greater than 0";
1375 if (val * conf->dirlevels > CACHEFILE_LEN)
1376 return "CacheDirLevels*CacheDirLength value must not be higher than 20";
1378 conf->dirlength = val;
1383 *set_cache_minfs(cmd_parms *parms, void *in_struct_ptr, const char *arg)
1385 disk_cache_dir_conf *dconf = (disk_cache_dir_conf *)in_struct_ptr;
1387 if (apr_strtoff(&dconf->minfs, arg, NULL, 0) != APR_SUCCESS ||
1390 return "CacheMinFileSize argument must be a non-negative integer representing the min size of a file to cache in bytes.";
1396 *set_cache_maxfs(cmd_parms *parms, void *in_struct_ptr, const char *arg)
1398 disk_cache_dir_conf *dconf = (disk_cache_dir_conf *)in_struct_ptr;
1400 if (apr_strtoff(&dconf->maxfs, arg, NULL, 0) != APR_SUCCESS ||
1403 return "CacheMaxFileSize argument must be a non-negative integer representing the max size of a file to cache in bytes.";
1409 *set_cache_readsize(cmd_parms *parms, void *in_struct_ptr, const char *arg)
1411 disk_cache_dir_conf *dconf = (disk_cache_dir_conf *)in_struct_ptr;
1413 if (apr_strtoff(&dconf->readsize, arg, NULL, 0) != APR_SUCCESS ||
1414 dconf->readsize < 0)
1416 return "CacheReadSize argument must be a non-negative integer representing the max amount of data to cache in go.";
1418 dconf->readsize_set = 1;
1423 *set_cache_readtime(cmd_parms *parms, void *in_struct_ptr, const char *arg)
1425 disk_cache_dir_conf *dconf = (disk_cache_dir_conf *)in_struct_ptr;
1426 apr_off_t milliseconds;
1428 if (apr_strtoff(&milliseconds, arg, NULL, 0) != APR_SUCCESS ||
1431 return "CacheReadTime argument must be a non-negative integer representing the max amount of time taken to cache in go.";
1433 dconf->readtime = apr_time_from_msec(milliseconds);
1434 dconf->readtime_set = 1;
1438 static const command_rec disk_cache_cmds[] =
1440 AP_INIT_TAKE1("CacheRoot", set_cache_root, NULL, RSRC_CONF,
1441 "The directory to store cache files"),
1442 AP_INIT_TAKE1("CacheDirLevels", set_cache_dirlevels, NULL, RSRC_CONF,
1443 "The number of levels of subdirectories in the cache"),
1444 AP_INIT_TAKE1("CacheDirLength", set_cache_dirlength, NULL, RSRC_CONF,
1445 "The number of characters in subdirectory names"),
1446 AP_INIT_TAKE1("CacheMinFileSize", set_cache_minfs, NULL, RSRC_CONF | ACCESS_CONF,
1447 "The minimum file size to cache a document"),
1448 AP_INIT_TAKE1("CacheMaxFileSize", set_cache_maxfs, NULL, RSRC_CONF | ACCESS_CONF,
1449 "The maximum file size to cache a document"),
1450 AP_INIT_TAKE1("CacheReadSize", set_cache_readsize, NULL, RSRC_CONF | ACCESS_CONF,
1451 "The maximum quantity of data to attempt to read and cache in one go"),
1452 AP_INIT_TAKE1("CacheReadTime", set_cache_readtime, NULL, RSRC_CONF | ACCESS_CONF,
1453 "The maximum time taken to attempt to read and cache in go"),
1457 static const cache_provider cache_disk_provider =
1470 static void disk_cache_register_hook(apr_pool_t *p)
1472 /* cache initializer */
1473 ap_register_provider(p, CACHE_PROVIDER_GROUP, "disk", "0",
1474 &cache_disk_provider);
1477 AP_DECLARE_MODULE(disk_cache) = {
1478 STANDARD20_MODULE_STUFF,
1479 create_dir_config, /* create per-directory config structure */
1480 merge_dir_config, /* merge per-directory config structures */
1481 create_config, /* create per-server config structure */
1482 NULL, /* merge per-server config structures */
1483 disk_cache_cmds, /* command apr_table_t */
1484 disk_cache_register_hook /* register hooks */