1 /* Licensed to the Apache Software Foundation (ASF) under one or more
2 * contributor license agreements. See the NOTICE file distributed with
3 * this work for additional information regarding copyright ownership.
4 * The ASF licenses this file to You under the Apache License, Version 2.0
5 * (the "License"); you may not use this file except in compliance with
6 * the License. You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
17 #include "apr_file_io.h"
18 #include "apr_strings.h"
19 #include "mod_cache.h"
20 #include "mod_disk_cache.h"
21 #include "ap_provider.h"
22 #include "util_filter.h"
23 #include "util_script.h"
24 #include "util_charset.h"
27 * mod_disk_cache: Disk Based HTTP 1.1 Cache.
29 * Flow to Find the .data file:
30 * Incoming client requests URI /foo/bar/baz
31 * Generate <hash> off of /foo/bar/baz
33 * Read in <hash>.header file (may contain Format #1 or Format #2)
34 * If format #1 (Contains a list of Vary Headers):
35 * Use each header name (from .header) with our request values (headers_in) to
36 * regenerate <hash> using HeaderName+HeaderValue+.../foo/bar/baz
37 * re-read in <hash>.header (must be format #2)
41 * apr_uint32_t format;
43 * apr_array_t vary_headers (delimited by CRLF)
46 * disk_cache_info_t (first sizeof(apr_uint32_t) bytes is the format)
47 * entity name (dobj->name) [length is in disk_cache_info_t->name_len]
48 * r->headers_out (delimited by CRLF)
50 * r->headers_in (delimited by CRLF)
54 module AP_MODULE_DECLARE_DATA disk_cache_module;
56 /* Forward declarations */
57 static int remove_entity(cache_handle_t *h);
58 static apr_status_t store_headers(cache_handle_t *h, request_rec *r, cache_info *i);
59 static apr_status_t store_body(cache_handle_t *h, request_rec *r, apr_bucket_brigade *in,
60 apr_bucket_brigade *out);
61 static apr_status_t recall_headers(cache_handle_t *h, request_rec *r);
62 static apr_status_t recall_body(cache_handle_t *h, apr_pool_t *p, apr_bucket_brigade *bb);
63 static apr_status_t read_array(request_rec *r, apr_array_header_t* arr,
67 * Local static functions
70 static char *header_file(apr_pool_t *p, disk_cache_conf *conf,
71 disk_cache_object_t *dobj, const char *name)
73 if (!dobj->hashfile) {
74 dobj->hashfile = ap_cache_generate_name(p, conf->dirlevels,
75 conf->dirlength, name);
79 return apr_pstrcat(p, dobj->prefix, CACHE_VDIR_SUFFIX, "/",
80 dobj->hashfile, CACHE_HEADER_SUFFIX, NULL);
83 return apr_pstrcat(p, conf->cache_root, "/", dobj->hashfile,
84 CACHE_HEADER_SUFFIX, NULL);
88 static char *data_file(apr_pool_t *p, disk_cache_conf *conf,
89 disk_cache_object_t *dobj, const char *name)
91 if (!dobj->hashfile) {
92 dobj->hashfile = ap_cache_generate_name(p, conf->dirlevels,
93 conf->dirlength, name);
97 return apr_pstrcat(p, dobj->prefix, CACHE_VDIR_SUFFIX, "/",
98 dobj->hashfile, CACHE_DATA_SUFFIX, NULL);
101 return apr_pstrcat(p, conf->cache_root, "/", dobj->hashfile,
102 CACHE_DATA_SUFFIX, NULL);
106 static apr_status_t mkdir_structure(disk_cache_conf *conf, const char *file, apr_pool_t *pool)
111 for (p = (char*)file + conf->cache_root_len + 1;;) {
117 rv = apr_dir_make(file,
118 APR_UREAD|APR_UWRITE|APR_UEXECUTE, pool);
119 if (rv != APR_SUCCESS && !APR_STATUS_IS_EEXIST(rv)) {
128 /* htcacheclean may remove directories underneath us.
129 * So, we'll try renaming three times at a cost of 0.002 seconds.
131 static apr_status_t safe_file_rename(disk_cache_conf *conf,
132 const char *src, const char *dest,
137 rv = apr_file_rename(src, dest, pool);
139 if (rv != APR_SUCCESS) {
142 for (i = 0; i < 2 && rv != APR_SUCCESS; i++) {
143 /* 1000 micro-seconds aka 0.001 seconds. */
146 rv = mkdir_structure(conf, dest, pool);
147 if (rv != APR_SUCCESS)
150 rv = apr_file_rename(src, dest, pool);
157 static apr_status_t file_cache_el_final(disk_cache_conf *conf, disk_cache_file_t *file,
162 /* This assumes that the tempfiles are on the same file system
163 * as the cache_root. If not, then we need a file copy/move
164 * rather than a rename.
167 /* move the file over */
170 rv = safe_file_rename(conf, file->tempfile, file->file, file->pool);
171 if (rv != APR_SUCCESS) {
172 ap_log_error(APLOG_MARK, APLOG_WARNING, rv, r->server,
173 "disk_cache: rename tempfile to file failed:"
174 " %s -> %s", file->tempfile, file->file);
175 apr_file_remove(file->tempfile, file->pool);
184 static apr_status_t file_cache_temp_cleanup(void *dummy) {
185 disk_cache_file_t *file = (disk_cache_file_t *)dummy;
187 /* clean up the temporary file */
189 apr_file_remove(file->tempfile, file->pool);
192 file->tempfile = NULL;
198 static apr_status_t file_cache_create(disk_cache_conf *conf, disk_cache_file_t *file,
202 file->tempfile = apr_pstrcat(pool, conf->cache_root, AP_TEMPFILE, NULL);
204 apr_pool_cleanup_register(pool, file, file_cache_temp_cleanup, file_cache_temp_cleanup);
209 /* These two functions get and put state information into the data
210 * file for an ap_cache_el, this state information will be read
211 * and written transparent to clients of this module
213 static int file_cache_recall_mydata(apr_file_t *fd, cache_info *info,
214 disk_cache_object_t *dobj, request_rec *r)
218 disk_cache_info_t disk_info;
221 /* read the data from the cache file */
222 len = sizeof(disk_cache_info_t);
223 rv = apr_file_read_full(fd, &disk_info, len, &len);
224 if (rv != APR_SUCCESS) {
228 /* Store it away so we can get it later. */
229 dobj->disk_info = disk_info;
231 info->status = disk_info.status;
232 info->date = disk_info.date;
233 info->expire = disk_info.expire;
234 info->request_time = disk_info.request_time;
235 info->response_time = disk_info.response_time;
237 /* Note that we could optimize this by conditionally doing the palloc
238 * depending upon the size. */
239 urlbuff = apr_palloc(r->pool, disk_info.name_len + 1);
240 len = disk_info.name_len;
241 rv = apr_file_read_full(fd, urlbuff, len, &len);
242 if (rv != APR_SUCCESS) {
245 urlbuff[disk_info.name_len] = '\0';
247 /* check that we have the same URL */
248 /* Would strncmp be correct? */
249 if (strcmp(urlbuff, dobj->name) != 0) {
256 static const char* regen_key(apr_pool_t *p, apr_table_t *headers,
257 apr_array_header_t *varray, const char *oldkey)
265 nvec = (varray->nelts * 2) + 1;
266 iov = apr_palloc(p, sizeof(struct iovec) * nvec);
267 elts = (const char **) varray->elts;
270 * - Handle multiple-value headers better. (sort them?)
271 * - Handle Case in-sensitive Values better.
272 * This isn't the end of the world, since it just lowers the cache
273 * hit rate, but it would be nice to fix.
275 * The majority are case insenstive if they are values (encoding etc).
276 * Most of rfc2616 is case insensitive on header contents.
278 * So the better solution may be to identify headers which should be
279 * treated case-sensitive?
280 * HTTP URI's (3.2.3) [host and scheme are insensitive]
281 * HTTP method (5.1.1)
282 * HTTP-date values (3.3.1)
283 * 3.7 Media Types [exerpt]
284 * The type, subtype, and parameter attribute names are case-
285 * insensitive. Parameter values might or might not be case-sensitive,
286 * depending on the semantics of the parameter name.
287 * 4.20 Except [exerpt]
288 * Comparison of expectation values is case-insensitive for unquoted
289 * tokens (including the 100-continue token), and is case-sensitive for
290 * quoted-string expectation-extensions.
293 for(i=0, k=0; i < varray->nelts; i++) {
294 header = apr_table_get(headers, elts[i]);
298 iov[k].iov_base = (char*) elts[i];
299 iov[k].iov_len = strlen(elts[i]);
301 iov[k].iov_base = (char*) header;
302 iov[k].iov_len = strlen(header);
305 iov[k].iov_base = (char*) oldkey;
306 iov[k].iov_len = strlen(oldkey);
309 return apr_pstrcatv(p, iov, k, NULL);
312 static int array_alphasort(const void *fn1, const void *fn2)
314 return strcmp(*(char**)fn1, *(char**)fn2);
317 static void tokens_to_array(apr_pool_t *p, const char *data,
318 apr_array_header_t *arr)
322 while ((token = ap_get_list_item(p, &data)) != NULL) {
323 *((const char **) apr_array_push(arr)) = token;
326 /* Sort it so that "Vary: A, B" and "Vary: B, A" are stored the same. */
327 qsort((void *) arr->elts, arr->nelts,
328 sizeof(char *), array_alphasort);
332 * Hook and mod_cache callback functions
334 static int create_entity(cache_handle_t *h, request_rec *r, const char *key, apr_off_t len)
336 disk_cache_conf *conf = ap_get_module_config(r->server->module_config,
339 disk_cache_object_t *dobj;
342 if (conf->cache_root == NULL) {
346 /* we don't support caching of range requests (yet) */
347 if (r->status == HTTP_PARTIAL_CONTENT) {
348 ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, r->server,
349 "disk_cache: URL %s partial content response not cached",
354 /* Note, len is -1 if unknown so don't trust it too hard */
355 if (len > conf->maxfs) {
356 ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, r->server,
357 "disk_cache: URL %s failed the size check "
358 "(%" APR_OFF_T_FMT " > %" APR_OFF_T_FMT ")",
359 key, len, conf->maxfs);
362 if (len >= 0 && len < conf->minfs) {
363 ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, r->server,
364 "disk_cache: URL %s failed the size check "
365 "(%" APR_OFF_T_FMT " < %" APR_OFF_T_FMT ")",
366 key, len, conf->minfs);
370 /* Allocate and initialize cache_object_t and disk_cache_object_t */
371 h->cache_obj = obj = apr_pcalloc(r->pool, sizeof(*obj));
372 obj->vobj = dobj = apr_pcalloc(r->pool, sizeof(*dobj));
374 obj->key = apr_pstrdup(r->pool, key);
376 dobj->name = obj->key;
378 /* Save the cache root */
379 dobj->root = apr_pstrndup(r->pool, conf->cache_root, conf->cache_root_len);
380 dobj->root_len = conf->cache_root_len;
382 apr_pool_create(&pool, r->pool);
384 file_cache_create(conf, &dobj->hdrs, pool);
385 file_cache_create(conf, &dobj->vary, pool);
386 file_cache_create(conf, &dobj->data, pool);
388 dobj->data.file = data_file(r->pool, conf, dobj, key);
389 dobj->hdrs.file = header_file(r->pool, conf, dobj, key);
390 dobj->vary.file = header_file(r->pool, conf, dobj, key);
395 static int open_entity(cache_handle_t *h, request_rec *r, const char *key)
401 static int error_logged = 0;
402 disk_cache_conf *conf = ap_get_module_config(r->server->module_config,
404 #ifdef APR_SENDFILE_ENABLED
405 core_dir_config *coreconf = ap_get_module_config(r->per_dir_config,
411 disk_cache_object_t *dobj;
417 /* Look up entity keyed to 'url' */
418 if (conf->cache_root == NULL) {
421 ap_log_error(APLOG_MARK, APLOG_ERR, 0, r->server,
422 "disk_cache: Cannot cache files to disk without a CacheRoot specified.");
427 /* Create and init the cache object */
428 h->cache_obj = obj = apr_pcalloc(r->pool, sizeof(cache_object_t));
429 obj->vobj = dobj = apr_pcalloc(r->pool, sizeof(disk_cache_object_t));
433 /* Open the headers file */
436 /* Save the cache root */
437 dobj->root = apr_pstrndup(r->pool, conf->cache_root, conf->cache_root_len);
438 dobj->root_len = conf->cache_root_len;
440 dobj->vary.file = header_file(r->pool, conf, dobj, key);
441 flags = APR_READ|APR_BINARY|APR_BUFFERED;
442 rc = apr_file_open(&dobj->vary.fd, dobj->vary.file, flags, 0, r->pool);
443 if (rc != APR_SUCCESS) {
447 /* read the format from the cache file */
448 len = sizeof(format);
449 apr_file_read_full(dobj->vary.fd, &format, len, &len);
451 if (format == VARY_FORMAT_VERSION) {
452 apr_array_header_t* varray;
455 len = sizeof(expire);
456 apr_file_read_full(dobj->vary.fd, &expire, len, &len);
458 varray = apr_array_make(r->pool, 5, sizeof(char*));
459 rc = read_array(r, varray, dobj->vary.fd);
460 if (rc != APR_SUCCESS) {
461 ap_log_error(APLOG_MARK, APLOG_ERR, rc, r->server,
462 "disk_cache: Cannot parse vary header file: %s",
464 apr_file_close(dobj->vary.fd);
467 apr_file_close(dobj->vary.fd);
469 nkey = regen_key(r->pool, r->headers_in, varray, key);
471 dobj->hashfile = NULL;
472 dobj->prefix = dobj->vary.file;
473 dobj->hdrs.file = header_file(r->pool, conf, dobj, nkey);
475 flags = APR_READ|APR_BINARY|APR_BUFFERED;
476 rc = apr_file_open(&dobj->hdrs.fd, dobj->hdrs.file, flags, 0, r->pool);
477 if (rc != APR_SUCCESS) {
481 else if (format != DISK_FORMAT_VERSION) {
482 ap_log_error(APLOG_MARK, APLOG_ERR, 0, r->server,
483 "disk_cache: File '%s' has a version mismatch. File had version: %d.",
484 dobj->vary.file, format);
485 apr_file_close(dobj->vary.fd);
489 apr_off_t offset = 0;
491 /* oops, not vary as it turns out */
492 dobj->hdrs.fd = dobj->vary.fd;
493 dobj->vary.fd = NULL;
494 dobj->hdrs.file = dobj->vary.file;
496 /* This wasn't a Vary Format file, so we must seek to the
497 * start of the file again, so that later reads work.
499 apr_file_seek(dobj->hdrs.fd, APR_SET, &offset);
507 apr_pool_create(&pool, r->pool);
509 file_cache_create(conf, &dobj->hdrs, pool);
510 file_cache_create(conf, &dobj->vary, pool);
511 file_cache_create(conf, &dobj->data, pool);
513 dobj->data.file = data_file(r->pool, conf, dobj, nkey);
515 /* Open the data file */
516 flags = APR_READ|APR_BINARY;
517 #ifdef APR_SENDFILE_ENABLED
518 /* When we are in the quick handler we don't have the per-directory
519 * configuration, so this check only takes the global setting of
520 * the EnableSendFile directive into account.
522 flags |= AP_SENDFILE_ENABLED(coreconf->enable_sendfile);
524 rc = apr_file_open(&dobj->data.fd, dobj->data.file, flags, 0, r->pool);
525 if (rc != APR_SUCCESS) {
526 ap_log_error(APLOG_MARK, APLOG_ERR, rc, r->server,
527 "disk_cache: Cannot open data file %s", dobj->data.file);
528 apr_file_close(dobj->hdrs.fd);
532 rc = apr_file_info_get(&finfo, APR_FINFO_SIZE, dobj->data.fd);
533 if (rc == APR_SUCCESS) {
534 dobj->file_size = finfo.size;
537 /* Read the bytes to setup the cache_info fields */
538 rc = file_cache_recall_mydata(dobj->hdrs.fd, info, dobj, r);
539 if (rc != APR_SUCCESS) {
540 ap_log_error(APLOG_MARK, APLOG_ERR, rc, r->server,
541 "disk_cache: Cannot read header file %s", dobj->hdrs.file);
542 apr_file_close(dobj->hdrs.fd);
546 /* Initialize the cache_handle callback functions */
547 ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, r->server,
548 "disk_cache: Recalled cached URL info header %s", dobj->name);
550 apr_file_close(dobj->hdrs.fd);
555 static int remove_entity(cache_handle_t *h)
557 /* Null out the cache object pointer so next time we start from scratch */
562 static int remove_url(cache_handle_t *h, apr_pool_t *p)
565 disk_cache_object_t *dobj;
567 /* Get disk cache object from cache handle */
568 dobj = (disk_cache_object_t *) h->cache_obj->vobj;
573 /* Delete headers file */
574 if (dobj->hdrs.file) {
575 ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, NULL,
576 "disk_cache: Deleting %s from cache.", dobj->hdrs.file);
578 rc = apr_file_remove(dobj->hdrs.file, p);
579 if ((rc != APR_SUCCESS) && !APR_STATUS_IS_ENOENT(rc)) {
580 /* Will only result in an output if httpd is started with -e debug.
581 * For reason see log_error_core for the case s == NULL.
583 ap_log_error(APLOG_MARK, APLOG_DEBUG, rc, NULL,
584 "disk_cache: Failed to delete headers file %s from cache.",
590 /* Delete data file */
591 if (dobj->data.file) {
592 ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, NULL,
593 "disk_cache: Deleting %s from cache.", dobj->data.file);
595 rc = apr_file_remove(dobj->data.file, p);
596 if ((rc != APR_SUCCESS) && !APR_STATUS_IS_ENOENT(rc)) {
597 /* Will only result in an output if httpd is started with -e debug.
598 * For reason see log_error_core for the case s == NULL.
600 ap_log_error(APLOG_MARK, APLOG_DEBUG, rc, NULL,
601 "disk_cache: Failed to delete data file %s from cache.",
607 /* now delete directories as far as possible up to our cache root */
609 const char *str_to_copy;
611 str_to_copy = dobj->hdrs.file ? dobj->hdrs.file : dobj->data.file;
613 char *dir, *slash, *q;
615 dir = apr_pstrdup(p, str_to_copy);
617 /* remove filename */
618 slash = strrchr(dir, '/');
622 * now walk our way back to the cache root, delete everything
623 * in the way as far as possible
625 * Note: due to the way we constructed the file names in
626 * header_file and data_file, we are guaranteed that the
627 * cache_root is suffixed by at least one '/' which will be
628 * turned into a terminating null by this loop. Therefore,
629 * we won't either delete or go above our cache root.
631 for (q = dir + dobj->root_len; *q ; ) {
632 ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, NULL,
633 "disk_cache: Deleting directory %s from cache",
636 rc = apr_dir_remove(dir, p);
637 if (rc != APR_SUCCESS && !APR_STATUS_IS_ENOENT(rc)) {
640 slash = strrchr(q, '/');
649 static apr_status_t read_array(request_rec *r, apr_array_header_t* arr,
652 char w[MAX_STRING_LEN];
657 rv = apr_file_gets(w, MAX_STRING_LEN - 1, file);
658 if (rv != APR_SUCCESS) {
659 ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r,
660 "Premature end of vary array.");
665 if (p > 0 && w[p - 1] == '\n') {
666 if (p > 1 && w[p - 2] == CR) {
674 /* If we've finished reading the array, break out of the loop. */
679 *((const char **) apr_array_push(arr)) = apr_pstrdup(r->pool, w);
685 static apr_status_t store_array(apr_file_t *fd, apr_array_header_t* arr)
693 elts = (const char **) arr->elts;
695 for (i = 0; i < arr->nelts; i++) {
696 iov[0].iov_base = (char*) elts[i];
697 iov[0].iov_len = strlen(elts[i]);
698 iov[1].iov_base = CRLF;
699 iov[1].iov_len = sizeof(CRLF) - 1;
701 rv = apr_file_writev(fd, (const struct iovec *) &iov, 2,
703 if (rv != APR_SUCCESS) {
708 iov[0].iov_base = CRLF;
709 iov[0].iov_len = sizeof(CRLF) - 1;
711 return apr_file_writev(fd, (const struct iovec *) &iov, 1,
715 static apr_status_t read_table(cache_handle_t *handle, request_rec *r,
716 apr_table_t *table, apr_file_t *file)
718 char w[MAX_STRING_LEN];
725 /* ### What about APR_EOF? */
726 rv = apr_file_gets(w, MAX_STRING_LEN - 1, file);
727 if (rv != APR_SUCCESS) {
728 ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r,
729 "Premature end of cache headers.");
733 /* Delete terminal (CR?)LF */
736 /* Indeed, the host's '\n':
737 '\012' for UNIX; '\015' for MacOS; '\025' for OS/390
738 -- whatever the script generates.
740 if (p > 0 && w[p - 1] == '\n') {
741 if (p > 1 && w[p - 2] == CR) {
749 /* If we've finished reading the headers, break out of the loop. */
754 #if APR_CHARSET_EBCDIC
755 /* Chances are that we received an ASCII header text instead of
756 * the expected EBCDIC header lines. Try to auto-detect:
758 if (!(l = strchr(w, ':'))) {
759 int maybeASCII = 0, maybeEBCDIC = 0;
760 unsigned char *cp, native;
761 apr_size_t inbytes_left, outbytes_left;
763 for (cp = w; *cp != '\0'; ++cp) {
764 native = apr_xlate_conv_byte(ap_hdrs_from_ascii, *cp);
765 if (apr_isprint(*cp) && !apr_isprint(native))
767 if (!apr_isprint(*cp) && apr_isprint(native))
770 if (maybeASCII > maybeEBCDIC) {
771 ap_log_error(APLOG_MARK, APLOG_ERR, 0, r->server,
772 "CGI Interface Error: Script headers apparently ASCII: (CGI = %s)",
774 inbytes_left = outbytes_left = cp - w;
775 apr_xlate_conv_buffer(ap_hdrs_from_ascii,
776 w, &inbytes_left, w, &outbytes_left);
779 #endif /*APR_CHARSET_EBCDIC*/
781 /* if we see a bogus header don't ignore it. Shout and scream */
782 if (!(l = strchr(w, ':'))) {
787 while (*l && apr_isspace(*l)) {
791 apr_table_add(table, w, l);
798 * Reads headers from a buffer and returns an array of headers.
799 * Returns NULL on file error
800 * This routine tries to deal with too long lines and continuation lines.
801 * @@@: XXX: FIXME: currently the headers are passed thru un-merged.
802 * Is that okay, or should they be collapsed where possible?
804 static apr_status_t recall_headers(cache_handle_t *h, request_rec *r)
806 disk_cache_object_t *dobj = (disk_cache_object_t *) h->cache_obj->vobj;
808 /* This case should not happen... */
809 if (!dobj->hdrs.fd) {
810 ap_log_error(APLOG_MARK, APLOG_ERR, 0, r->server,
811 "disk_cache: recalling headers; but no header fd for %s", dobj->name);
815 h->req_hdrs = apr_table_make(r->pool, 20);
816 h->resp_hdrs = apr_table_make(r->pool, 20);
818 /* Call routine to read the header lines/status line */
819 read_table(h, r, h->resp_hdrs, dobj->hdrs.fd);
820 read_table(h, r, h->req_hdrs, dobj->hdrs.fd);
822 apr_file_close(dobj->hdrs.fd);
824 ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, r->server,
825 "disk_cache: Recalled headers for URL %s", dobj->name);
829 static apr_status_t recall_body(cache_handle_t *h, apr_pool_t *p, apr_bucket_brigade *bb)
832 disk_cache_object_t *dobj = (disk_cache_object_t*) h->cache_obj->vobj;
834 apr_brigade_insert_file(bb, dobj->data.fd, 0, dobj->file_size, p);
836 e = apr_bucket_eos_create(bb->bucket_alloc);
837 APR_BRIGADE_INSERT_TAIL(bb, e);
842 static apr_status_t store_table(apr_file_t *fd, apr_table_t *table)
848 apr_table_entry_t *elts;
850 elts = (apr_table_entry_t *) apr_table_elts(table)->elts;
851 for (i = 0; i < apr_table_elts(table)->nelts; ++i) {
852 if (elts[i].key != NULL) {
853 iov[0].iov_base = elts[i].key;
854 iov[0].iov_len = strlen(elts[i].key);
855 iov[1].iov_base = ": ";
856 iov[1].iov_len = sizeof(": ") - 1;
857 iov[2].iov_base = elts[i].val;
858 iov[2].iov_len = strlen(elts[i].val);
859 iov[3].iov_base = CRLF;
860 iov[3].iov_len = sizeof(CRLF) - 1;
862 rv = apr_file_writev(fd, (const struct iovec *) &iov, 4,
864 if (rv != APR_SUCCESS) {
869 iov[0].iov_base = CRLF;
870 iov[0].iov_len = sizeof(CRLF) - 1;
871 rv = apr_file_writev(fd, (const struct iovec *) &iov, 1,
876 static apr_status_t store_headers(cache_handle_t *h, request_rec *r, cache_info *info)
878 disk_cache_conf *conf = ap_get_module_config(r->server->module_config,
882 disk_cache_object_t *dobj = (disk_cache_object_t*) h->cache_obj->vobj;
884 disk_cache_info_t disk_info;
887 /* This is flaky... we need to manage the cache_info differently */
888 h->cache_obj->info = *info;
890 if (r->headers_out) {
893 tmp = apr_table_get(r->headers_out, "Vary");
896 apr_array_header_t* varray;
897 apr_uint32_t format = VARY_FORMAT_VERSION;
899 /* If we were initially opened as a vary format, rollback
900 * that internal state for the moment so we can recreate the
901 * vary format hints in the appropriate directory.
904 dobj->hdrs.file = dobj->prefix;
908 rv = mkdir_structure(conf, dobj->hdrs.file, r->pool);
910 rv = apr_file_mktemp(&dobj->vary.tempfd, dobj->vary.tempfile,
911 APR_CREATE | APR_WRITE | APR_BINARY | APR_EXCL,
914 if (rv != APR_SUCCESS) {
915 ap_log_error(APLOG_MARK, APLOG_WARNING, rv, r->server,
916 "disk_cache: could not create temp file %s",
917 dobj->vary.tempfile);
921 amt = sizeof(format);
922 apr_file_write(dobj->vary.tempfd, &format, &amt);
924 amt = sizeof(info->expire);
925 apr_file_write(dobj->vary.tempfd, &info->expire, &amt);
927 varray = apr_array_make(r->pool, 6, sizeof(char*));
928 tokens_to_array(r->pool, tmp, varray);
930 store_array(dobj->vary.tempfd, varray);
932 apr_file_close(dobj->vary.tempfd);
934 tmp = regen_key(r->pool, r->headers_in, varray, dobj->name);
935 dobj->prefix = dobj->hdrs.file;
936 dobj->hashfile = NULL;
937 dobj->data.file = data_file(r->pool, conf, dobj, tmp);
938 dobj->hdrs.file = header_file(r->pool, conf, dobj, tmp);
943 rv = apr_file_mktemp(&dobj->hdrs.tempfd, dobj->hdrs.tempfile,
944 APR_CREATE | APR_WRITE | APR_BINARY |
945 APR_BUFFERED | APR_EXCL, dobj->hdrs.pool);
947 if (rv != APR_SUCCESS) {
948 ap_log_error(APLOG_MARK, APLOG_WARNING, rv, r->server,
949 "disk_cache: could not create temp file %s",
950 dobj->hdrs.tempfile);
954 disk_info.format = DISK_FORMAT_VERSION;
955 disk_info.date = info->date;
956 disk_info.expire = info->expire;
957 disk_info.entity_version = dobj->disk_info.entity_version++;
958 disk_info.request_time = info->request_time;
959 disk_info.response_time = info->response_time;
960 disk_info.status = info->status;
962 disk_info.name_len = strlen(dobj->name);
964 iov[0].iov_base = (void*)&disk_info;
965 iov[0].iov_len = sizeof(disk_cache_info_t);
966 iov[1].iov_base = (void*)dobj->name;
967 iov[1].iov_len = disk_info.name_len;
969 rv = apr_file_writev(dobj->hdrs.tempfd, (const struct iovec *) &iov, 2, &amt);
970 if (rv != APR_SUCCESS) {
971 ap_log_error(APLOG_MARK, APLOG_WARNING, rv, r->server,
972 "disk_cache: could not write info to header file %s",
973 dobj->hdrs.tempfile);
974 apr_file_close(dobj->hdrs.tempfd);
978 if (r->headers_out) {
979 apr_table_t *headers_out;
981 headers_out = ap_cache_cacheable_headers_out(r);
983 rv = store_table(dobj->hdrs.tempfd, headers_out);
984 if (rv != APR_SUCCESS) {
985 ap_log_error(APLOG_MARK, APLOG_WARNING, rv, r->server,
986 "disk_cache: could not write out-headers to header file %s",
987 dobj->hdrs.tempfile);
988 apr_file_close(dobj->hdrs.tempfd);
993 /* Parse the vary header and dump those fields from the headers_in. */
994 /* FIXME: Make call to the same thing cache_select calls to crack Vary. */
996 apr_table_t *headers_in;
998 headers_in = ap_cache_cacheable_headers_in(r);
1000 rv = store_table(dobj->hdrs.tempfd, headers_in);
1001 if (rv != APR_SUCCESS) {
1002 ap_log_error(APLOG_MARK, APLOG_WARNING, rv, r->server,
1003 "disk_cache: could not write in-headers to header file %s",
1004 dobj->hdrs.tempfile);
1005 apr_file_close(dobj->hdrs.tempfd);
1010 apr_file_close(dobj->hdrs.tempfd); /* flush and close */
1015 static apr_status_t store_body(cache_handle_t *h, request_rec *r,
1016 apr_bucket_brigade *in, apr_bucket_brigade *out)
1019 apr_status_t rv = APR_SUCCESS;
1020 disk_cache_object_t *dobj = (disk_cache_object_t *) h->cache_obj->vobj;
1021 disk_cache_conf *conf = ap_get_module_config(r->server->module_config,
1022 &disk_cache_module);
1023 disk_cache_dir_conf *dconf = ap_get_module_config(r->per_dir_config, &disk_cache_module);
1026 /* We write to a temp file and then atomically rename the file over
1027 * in file_cache_el_final().
1029 if (!dobj->data.tempfd) {
1030 rv = apr_file_mktemp(&dobj->data.tempfd, dobj->data.tempfile,
1031 APR_CREATE | APR_WRITE | APR_BINARY |
1032 APR_BUFFERED | APR_EXCL, dobj->data.pool);
1033 if (rv != APR_SUCCESS) {
1036 dobj->file_size = 0;
1039 dobj->bb = apr_brigade_create(r->pool, r->connection->bucket_alloc);
1041 if (!dobj->offset) {
1042 dobj->offset = dconf->readsize;
1044 if (!dobj->timeout && dconf->readtime) {
1045 dobj->timeout = apr_time_now() + dconf->readtime;
1049 apr_brigade_partition(in, dobj->offset, &e);
1052 while (APR_SUCCESS == rv && !APR_BRIGADE_EMPTY(in)) {
1054 apr_size_t length, written;
1056 e = APR_BRIGADE_FIRST(in);
1058 /* are we done completely? if so, pass any trailing buckets right through */
1060 APR_BUCKET_REMOVE(e);
1061 APR_BRIGADE_INSERT_TAIL(out, e);
1065 /* have we seen eos yet? */
1066 if (APR_BUCKET_IS_EOS(e)) {
1069 APR_BUCKET_REMOVE(e);
1070 APR_BRIGADE_CONCAT(out, dobj->bb);
1071 APR_BRIGADE_INSERT_TAIL(out, e);
1075 /* honour flush buckets, we'll get called again */
1076 if (APR_BUCKET_IS_FLUSH(e)) {
1077 APR_BUCKET_REMOVE(e);
1078 APR_BRIGADE_CONCAT(out, dobj->bb);
1079 APR_BRIGADE_INSERT_TAIL(out, e);
1083 /* metadata buckets are preserved as is */
1084 if (APR_BUCKET_IS_METADATA(e)) {
1085 APR_BUCKET_REMOVE(e);
1086 APR_BRIGADE_INSERT_TAIL(dobj->bb, e);
1090 /* read the bucket, write to the cache */
1091 rv = apr_bucket_read(e, &str, &length, APR_BLOCK_READ);
1092 APR_BUCKET_REMOVE(e);
1093 APR_BRIGADE_INSERT_TAIL(dobj->bb, e);
1094 if (rv != APR_SUCCESS) {
1095 ap_log_error(APLOG_MARK, APLOG_ERR, 0, r->server,
1096 "disk_cache: Error when reading bucket for URL %s",
1098 /* Remove the intermediate cache file and return non-APR_SUCCESS */
1099 apr_pool_destroy(dobj->data.pool);
1100 APR_BRIGADE_CONCAT(out, dobj->bb);
1104 /* write to the cache, leave if we fail */
1105 rv = apr_file_write_full(dobj->data.tempfd, str, length, &written);
1106 if (rv != APR_SUCCESS) {
1107 ap_log_error(APLOG_MARK, APLOG_ERR, 0, r->server,
1108 "disk_cache: Error when writing cache file for URL %s",
1110 /* Remove the intermediate cache file and return non-APR_SUCCESS */
1111 apr_pool_destroy(dobj->data.pool);
1112 APR_BRIGADE_CONCAT(out, dobj->bb);
1115 dobj->file_size += written;
1116 if (dobj->file_size > conf->maxfs) {
1117 ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, r->server,
1118 "disk_cache: URL %s failed the size check "
1119 "(%" APR_OFF_T_FMT ">%" APR_OFF_T_FMT ")",
1120 h->cache_obj->key, dobj->file_size, conf->maxfs);
1121 /* Remove the intermediate cache file and return non-APR_SUCCESS */
1122 apr_pool_destroy(dobj->data.pool);
1123 APR_BRIGADE_CONCAT(out, dobj->bb);
1124 return APR_EGENERAL;
1127 /* have we reached the limit of how much we're prepared to write in one
1128 * go? If so, leave, we'll get called again. This prevents us from trying
1129 * to swallow too much data at once, or taking so long to write the data
1130 * the client times out.
1132 dobj->offset -= length;
1133 if (dobj->offset <= 0) {
1135 APR_BRIGADE_CONCAT(out, dobj->bb);
1138 if ((dconf->readtime && apr_time_now() > dobj->timeout)) {
1140 APR_BRIGADE_CONCAT(out, dobj->bb);
1146 /* Was this the final bucket? If yes, close the temp file and perform
1150 const char *cl_header = apr_table_get(r->headers_out, "Content-Length");
1152 apr_file_close(dobj->data.tempfd);
1154 if (r->connection->aborted || r->no_cache) {
1155 ap_log_error(APLOG_MARK, APLOG_INFO, 0, r->server,
1156 "disk_cache: Discarding body for URL %s "
1157 "because connection has been aborted.",
1159 /* Remove the intermediate cache file and return non-APR_SUCCESS */
1160 apr_pool_destroy(dobj->data.pool);
1161 return APR_EGENERAL;
1163 if (dobj->file_size < conf->minfs) {
1164 ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, r->server,
1165 "disk_cache: URL %s failed the size check "
1166 "(%" APR_OFF_T_FMT "<%" APR_OFF_T_FMT ")",
1167 h->cache_obj->key, dobj->file_size, conf->minfs);
1168 /* Remove the intermediate cache file and return non-APR_SUCCESS */
1169 apr_pool_destroy(dobj->data.pool);
1170 return APR_EGENERAL;
1173 apr_int64_t cl = apr_atoi64(cl_header);
1174 if ((errno == 0) && (dobj->file_size != cl)) {
1175 ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, r->server,
1176 "disk_cache: URL %s didn't receive complete response, not caching",
1178 /* Remove the intermediate cache file and return non-APR_SUCCESS */
1179 apr_pool_destroy(dobj->data.pool);
1180 return APR_EGENERAL;
1184 /* All checks were fine, we're good to go when the commit comes */
1190 static apr_status_t commit_entity(cache_handle_t *h, request_rec *r)
1192 disk_cache_conf *conf = ap_get_module_config(r->server->module_config,
1193 &disk_cache_module);
1194 disk_cache_object_t *dobj = (disk_cache_object_t *) h->cache_obj->vobj;
1197 /* move header and data tempfiles to the final destination */
1198 rv = file_cache_el_final(conf, &dobj->hdrs, r);
1199 if (APR_SUCCESS == rv) {
1200 rv = file_cache_el_final(conf, &dobj->vary, r);
1202 if (APR_SUCCESS == rv) {
1203 rv = file_cache_el_final(conf, &dobj->data, r);
1206 /* remove the cached items completely on any failure */
1207 if (APR_SUCCESS != rv) {
1208 remove_url(h, r->pool);
1209 ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, r->server,
1210 "disk_cache: commit_entity: URL '%s' not cached due to earlier disk error.",
1214 ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, r->server,
1215 "disk_cache: commit_entity: Headers and body for URL %s cached.",
1219 apr_pool_destroy(dobj->data.pool);
1224 static void *create_dir_config(apr_pool_t *p, char *dummy)
1226 disk_cache_dir_conf *dconf = apr_pcalloc(p, sizeof(disk_cache_dir_conf));
1228 dconf->readsize = DEFAULT_READSIZE;
1229 dconf->readtime = DEFAULT_READTIME;
1234 static void *merge_dir_config(apr_pool_t *p, void *basev, void *addv) {
1235 disk_cache_dir_conf *new = (disk_cache_dir_conf *) apr_pcalloc(p, sizeof(disk_cache_dir_conf));
1236 disk_cache_dir_conf *add = (disk_cache_dir_conf *) addv;
1237 disk_cache_dir_conf *base = (disk_cache_dir_conf *) basev;
1239 new->readsize = (add->readsize_set == 0) ? base->readsize : add->readsize;
1240 new->readsize_set = add->readsize_set || base->readsize_set;
1241 new->readtime = (add->readtime_set == 0) ? base->readtime : add->readtime;
1242 new->readtime_set = add->readtime_set || base->readtime_set;
1247 static void *create_config(apr_pool_t *p, server_rec *s)
1249 disk_cache_conf *conf = apr_pcalloc(p, sizeof(disk_cache_conf));
1251 /* XXX: Set default values */
1252 conf->dirlevels = DEFAULT_DIRLEVELS;
1253 conf->dirlength = DEFAULT_DIRLENGTH;
1254 conf->maxfs = DEFAULT_MAX_FILE_SIZE;
1255 conf->minfs = DEFAULT_MIN_FILE_SIZE;
1257 conf->cache_root = NULL;
1258 conf->cache_root_len = 0;
1264 * mod_disk_cache configuration directives handlers.
1267 *set_cache_root(cmd_parms *parms, void *in_struct_ptr, const char *arg)
1269 disk_cache_conf *conf = ap_get_module_config(parms->server->module_config,
1270 &disk_cache_module);
1271 conf->cache_root = arg;
1272 conf->cache_root_len = strlen(arg);
1273 /* TODO: canonicalize cache_root and strip off any trailing slashes */
1279 * Consider eliminating the next two directives in favor of
1280 * Ian's prime number hash...
1281 * key = hash_fn( r->uri)
1282 * filename = "/key % prime1 /key %prime2/key %prime3"
1285 *set_cache_dirlevels(cmd_parms *parms, void *in_struct_ptr, const char *arg)
1287 disk_cache_conf *conf = ap_get_module_config(parms->server->module_config,
1288 &disk_cache_module);
1289 int val = atoi(arg);
1291 return "CacheDirLevels value must be an integer greater than 0";
1292 if (val * conf->dirlength > CACHEFILE_LEN)
1293 return "CacheDirLevels*CacheDirLength value must not be higher than 20";
1294 conf->dirlevels = val;
1298 *set_cache_dirlength(cmd_parms *parms, void *in_struct_ptr, const char *arg)
1300 disk_cache_conf *conf = ap_get_module_config(parms->server->module_config,
1301 &disk_cache_module);
1302 int val = atoi(arg);
1304 return "CacheDirLength value must be an integer greater than 0";
1305 if (val * conf->dirlevels > CACHEFILE_LEN)
1306 return "CacheDirLevels*CacheDirLength value must not be higher than 20";
1308 conf->dirlength = val;
1313 *set_cache_minfs(cmd_parms *parms, void *in_struct_ptr, const char *arg)
1315 disk_cache_conf *conf = ap_get_module_config(parms->server->module_config,
1316 &disk_cache_module);
1318 if (apr_strtoff(&conf->minfs, arg, NULL, 0) != APR_SUCCESS ||
1321 return "CacheMinFileSize argument must be a non-negative integer representing the min size of a file to cache in bytes.";
1327 *set_cache_maxfs(cmd_parms *parms, void *in_struct_ptr, const char *arg)
1329 disk_cache_conf *conf = ap_get_module_config(parms->server->module_config,
1330 &disk_cache_module);
1331 if (apr_strtoff(&conf->maxfs, arg, NULL, 0) != APR_SUCCESS ||
1334 return "CacheMaxFileSize argument must be a non-negative integer representing the max size of a file to cache in bytes.";
1340 *set_cache_readsize(cmd_parms *parms, void *in_struct_ptr, const char *arg)
1342 disk_cache_dir_conf *dconf = (disk_cache_dir_conf *)in_struct_ptr;
1344 if (apr_strtoff(&dconf->readsize, arg, NULL, 0) != APR_SUCCESS ||
1345 dconf->readsize < 0)
1347 return "CacheReadSize argument must be a non-negative integer representing the max amount of data to cache in go.";
1349 dconf->readsize_set = 1;
1354 *set_cache_readtime(cmd_parms *parms, void *in_struct_ptr, const char *arg)
1356 disk_cache_dir_conf *dconf = (disk_cache_dir_conf *)in_struct_ptr;
1357 apr_off_t milliseconds;
1359 if (apr_strtoff(&milliseconds, arg, NULL, 0) != APR_SUCCESS ||
1362 return "CacheReadTime argument must be a non-negative integer representing the max amount of time taken to cache in go.";
1364 dconf->readtime = apr_time_from_msec(milliseconds);
1365 dconf->readtime_set = 1;
1369 static const command_rec disk_cache_cmds[] =
1371 AP_INIT_TAKE1("CacheRoot", set_cache_root, NULL, RSRC_CONF,
1372 "The directory to store cache files"),
1373 AP_INIT_TAKE1("CacheDirLevels", set_cache_dirlevels, NULL, RSRC_CONF,
1374 "The number of levels of subdirectories in the cache"),
1375 AP_INIT_TAKE1("CacheDirLength", set_cache_dirlength, NULL, RSRC_CONF,
1376 "The number of characters in subdirectory names"),
1377 AP_INIT_TAKE1("CacheMinFileSize", set_cache_minfs, NULL, RSRC_CONF,
1378 "The minimum file size to cache a document"),
1379 AP_INIT_TAKE1("CacheMaxFileSize", set_cache_maxfs, NULL, RSRC_CONF,
1380 "The maximum file size to cache a document"),
1381 AP_INIT_TAKE1("CacheReadSize", set_cache_readsize, NULL, RSRC_CONF,
1382 "The maximum quantity of data to attempt to read and cache in one go"),
1383 AP_INIT_TAKE1("CacheReadTime", set_cache_readtime, NULL, RSRC_CONF,
1384 "The maximum time taken to attempt to read and cache in go"),
1388 static const cache_provider cache_disk_provider =
1401 static void disk_cache_register_hook(apr_pool_t *p)
1403 /* cache initializer */
1404 ap_register_provider(p, CACHE_PROVIDER_GROUP, "disk", "0",
1405 &cache_disk_provider);
1408 AP_DECLARE_MODULE(disk_cache) = {
1409 STANDARD20_MODULE_STUFF,
1410 create_dir_config, /* create per-directory config structure */
1411 merge_dir_config, /* merge per-directory config structures */
1412 create_config, /* create per-server config structure */
1413 NULL, /* merge per-server config structures */
1414 disk_cache_cmds, /* command apr_table_t */
1415 disk_cache_register_hook /* register hooks */