1 /* Licensed to the Apache Software Foundation (ASF) under one or more
2 * contributor license agreements. See the NOTICE file distributed with
3 * this work for additional information regarding copyright ownership.
4 * The ASF licenses this file to You under the Apache License, Version 2.0
5 * (the "License"); you may not use this file except in compliance with
6 * the License. You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
17 #include "mod_cache.h"
19 #include "cache_storage.h"
20 #include "cache_util.h"
22 APLOG_USE_MODULE(cache);
24 extern APR_OPTIONAL_FN_TYPE(ap_cache_generate_key) *cache_generate_key;
26 extern module AP_MODULE_DECLARE_DATA cache_module;
28 /* -------------------------------------------------------------- */
31 * delete all URL entities from the cache
34 int cache_remove_url(cache_request_rec *cache, apr_pool_t *p)
36 cache_provider_list *list;
39 list = cache->providers;
41 /* Remove the stale cache entry if present. If not, we're
42 * being called from outside of a request; remove the
45 h = cache->stale_handle ? cache->stale_handle : cache->handle;
49 ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, NULL,
50 "cache: Removing url %s from the cache", h->cache_obj->key);
52 /* for each specified cache type, delete the URL */
54 list->provider->remove_url(h, p);
62 * create a new URL entity in the cache
64 * It is possible to store more than once entity per URL. This
65 * function will always create a new entity, regardless of whether
66 * other entities already exist for the same URL.
68 * The size of the entity is provided so that a cache module can
69 * decide whether or not it wants to cache this particular entity.
70 * If the size is unknown, a size of -1 should be set.
72 int cache_create_entity(cache_request_rec *cache, request_rec *r,
73 apr_off_t size, apr_bucket_brigade *in)
75 cache_provider_list *list;
76 cache_handle_t *h = apr_pcalloc(r->pool, sizeof(cache_handle_t));
80 /* This should never happen */
81 ap_log_error(APLOG_MARK, APLOG_ERR, APR_EGENERAL, r->server,
82 "cache: No cache request information available for key"
87 rv = cache_generate_key(r, r->pool, &cache->key);
88 if (rv != APR_SUCCESS) {
92 list = cache->providers;
93 /* for each specified cache type, delete the URL */
95 switch (rv = list->provider->create_entity(h, r, cache->key, size, in)) {
98 cache->provider = list->provider;
99 cache->provider_name = list->provider_name;
114 static int set_cookie_doo_doo(void *v, const char *key, const char *val)
116 apr_table_addn(v, key, val);
121 * Take headers from the cache, and overlap them over the existing response
124 void cache_accept_headers(cache_handle_t *h, request_rec *r,
127 apr_table_t *cookie_table, *hdr_copy;
130 v = apr_table_get(h->resp_hdrs, "Content-Type");
132 ap_set_content_type(r, v);
133 apr_table_unset(h->resp_hdrs, "Content-Type");
135 * Also unset possible Content-Type headers in r->headers_out and
136 * r->err_headers_out as they may be different to what we have received
138 * Actually they are not needed as r->content_type set by
139 * ap_set_content_type above will be used in the store_headers functions
140 * of the storage providers as a fallback and the HTTP_HEADER filter
141 * does overwrite the Content-Type header with r->content_type anyway.
143 apr_table_unset(r->headers_out, "Content-Type");
144 apr_table_unset(r->err_headers_out, "Content-Type");
147 /* If the cache gave us a Last-Modified header, we can't just
148 * pass it on blindly because of restrictions on future values.
150 v = apr_table_get(h->resp_hdrs, "Last-Modified");
152 ap_update_mtime(r, apr_date_parse_http(v));
153 ap_set_last_modified(r);
154 apr_table_unset(h->resp_hdrs, "Last-Modified");
157 /* The HTTP specification says that it is legal to merge duplicate
158 * headers into one. Some browsers that support Cookies don't like
159 * merged headers and prefer that each Set-Cookie header is sent
160 * separately. Lets humour those browsers by not merging.
161 * Oh what a pain it is.
163 cookie_table = apr_table_make(r->pool, 2);
164 apr_table_do(set_cookie_doo_doo, cookie_table, r->err_headers_out,
166 apr_table_do(set_cookie_doo_doo, cookie_table, h->resp_hdrs,
168 apr_table_unset(r->err_headers_out, "Set-Cookie");
169 apr_table_unset(h->resp_hdrs, "Set-Cookie");
172 hdr_copy = apr_table_copy(r->pool, h->resp_hdrs);
173 apr_table_overlap(hdr_copy, r->headers_out, APR_OVERLAP_TABLES_SET);
174 r->headers_out = hdr_copy;
177 apr_table_overlap(r->headers_out, h->resp_hdrs, APR_OVERLAP_TABLES_SET);
179 if (!apr_is_empty_table(cookie_table)) {
180 r->err_headers_out = apr_table_overlay(r->pool, r->err_headers_out,
186 * select a specific URL entity in the cache
188 * It is possible to store more than one entity per URL. Content
189 * negotiation is used to select an entity. Once an entity is
190 * selected, details of it are stored in the per request
191 * config to save time when serving the request later.
193 * This function returns OK if successful, DECLINED if no
194 * cached entity fits the bill.
196 int cache_select(cache_request_rec *cache, request_rec *r)
198 cache_provider_list *list;
203 /* This should never happen */
204 ap_log_error(APLOG_MARK, APLOG_ERR, APR_EGENERAL, r->server,
205 "cache: No cache request information available for key"
210 rv = cache_generate_key(r, r->pool, &cache->key);
211 if (rv != APR_SUCCESS) {
215 if (!ap_cache_check_allowed(r)) {
219 /* go through the cache types till we get a match */
220 h = apr_palloc(r->pool, sizeof(cache_handle_t));
222 list = cache->providers;
225 switch ((rv = list->provider->open_entity(h, r, cache->key))) {
230 if (list->provider->recall_headers(h, r) != APR_SUCCESS) {
231 /* TODO: Handle this error */
236 * Check Content-Negotiation - Vary
238 * At this point we need to make sure that the object we found in
239 * the cache is the same object that would be delivered to the
240 * client, when the effects of content negotiation are taken into
243 * In plain english, we want to make sure that a language-negotiated
244 * document in one language is not given to a client asking for a
245 * language negotiated document in a different language by mistake.
247 * This code makes the assumption that the storage manager will
248 * cache the req_hdrs if the response contains a Vary
251 * RFC2616 13.6 and 14.44 describe the Vary mechanism.
253 vary = apr_pstrdup(r->pool, apr_table_get(h->resp_hdrs, "Vary"));
254 while (vary && *vary) {
258 /* isolate header name */
259 while (*vary && !apr_isspace(*vary) && (*vary != ','))
261 while (*vary && (apr_isspace(*vary) || (*vary == ','))) {
267 * is this header in the request and the header in the cached
268 * request identical? If not, we give up and do a straight get
270 h1 = apr_table_get(r->headers_in, name);
271 h2 = apr_table_get(h->req_hdrs, name);
273 /* both headers NULL, so a match - do nothing */
275 else if (h1 && h2 && !strcmp(h1, h2)) {
276 /* both headers exist and are equal - do nothing */
279 /* headers do not match, so Vary failed */
280 ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS,
282 "cache_select_url(): Vary header mismatch.");
287 cache->provider = list->provider;
288 cache->provider_name = list->provider_name;
290 /* Is our cached response fresh enough? */
291 fresh = cache_check_freshness(h, cache, r);
293 const char *etag, *lastmod;
295 ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server,
296 "Cached response for %s isn't fresh. Adding/replacing "
297 "conditional request headers.", r->uri);
299 /* Make response into a conditional */
300 cache->stale_headers = apr_table_copy(r->pool,
303 /* We can only revalidate with our own conditionals: remove the
304 * conditions from the original request.
306 apr_table_unset(r->headers_in, "If-Match");
307 apr_table_unset(r->headers_in, "If-Modified-Since");
308 apr_table_unset(r->headers_in, "If-None-Match");
309 apr_table_unset(r->headers_in, "If-Range");
310 apr_table_unset(r->headers_in, "If-Unmodified-Since");
313 * Do not do Range requests with our own conditionals: If
314 * we get 304 the Range does not matter and otherwise the
315 * entity changed and we want to have the complete entity
317 apr_table_unset(r->headers_in, "Range");
319 etag = apr_table_get(h->resp_hdrs, "ETag");
320 lastmod = apr_table_get(h->resp_hdrs, "Last-Modified");
322 if (etag || lastmod) {
323 /* If we have a cached etag and/or Last-Modified add in
324 * our own conditionals.
328 apr_table_set(r->headers_in, "If-None-Match", etag);
332 apr_table_set(r->headers_in, "If-Modified-Since",
335 cache->stale_handle = h;
341 * The copy isn't fresh enough, but we cannot revalidate.
342 * So it is the same case as if there had not been a cached
343 * entry at all. Thus delete the entry from cache.
345 irv = cache->provider->remove_url(h, r->pool);
347 ap_log_error(APLOG_MARK, APLOG_DEBUG, irv, r->server,
348 "cache: attempt to remove url from cache unsuccessful.");
355 /* Okay, this response looks okay. Merge in our stuff and go. */
356 cache_accept_headers(h, r, 0);
362 /* try again with next cache type */
367 /* oo-er! an error */
375 apr_status_t cache_generate_key_default(request_rec *r, apr_pool_t* p,
378 cache_server_conf *conf;
379 char *port_str, *hn, *lcs;
380 const char *hostname, *scheme;
382 char *path, *querystring;
386 * We have been here before during the processing of this request.
392 * Get the module configuration. We need this for the CacheIgnoreQueryString
395 conf = (cache_server_conf *) ap_get_module_config(r->server->module_config,
399 * Use the canonical name to improve cache hit rate, but only if this is
400 * not a proxy request or if this is a reverse proxy request.
401 * We need to handle both cases in the same manner as for the reverse proxy
402 * case we have the following situation:
404 * If a cached entry is looked up by mod_cache's quick handler r->proxyreq
405 * is still unset in the reverse proxy case as it only gets set in the
406 * translate name hook (either by ProxyPass or mod_rewrite) which is run
407 * after the quick handler hook. This is different to the forward proxy
408 * case where it gets set before the quick handler is run (in the
409 * post_read_request hook).
410 * If a cache entry is created by the CACHE_SAVE filter we always have
411 * r->proxyreq set correctly.
412 * So we must ensure that in the reverse proxy case we use the same code
413 * path and using the canonical name seems to be the right thing to do
414 * in the reverse proxy case.
416 if (!r->proxyreq || (r->proxyreq == PROXYREQ_REVERSE)) {
417 /* Use _default_ as the hostname if none present, as in mod_vhost */
418 hostname = ap_get_server_name(r);
420 hostname = "_default_";
423 else if(r->parsed_uri.hostname) {
424 /* Copy the parsed uri hostname */
425 hn = apr_pstrdup(p, r->parsed_uri.hostname);
427 /* const work-around */
431 /* We are a proxied request, with no hostname. Unlikely
432 * to get very far - but just in case */
433 hostname = "_default_";
437 * Copy the scheme, ensuring that it is lower case. If the parsed uri
438 * contains no string or if this is not a proxy request get the http
439 * scheme for this request. As r->parsed_uri.scheme is not set if this
440 * is a reverse proxy request, it is ensured that the cases
441 * "no proxy request" and "reverse proxy request" are handled in the same
442 * manner (see above why this is needed).
444 if (r->proxyreq && r->parsed_uri.scheme) {
445 /* Copy the scheme and lower-case it */
446 lcs = apr_pstrdup(p, r->parsed_uri.scheme);
448 /* const work-around */
452 scheme = ap_http_scheme(r);
456 * If this is a proxy request, but not a reverse proxy request (see comment
457 * above why these cases must be handled in the same manner), copy the
458 * URI's port-string (which may be a service name). If the URI contains
459 * no port-string, use apr-util's notion of the default port for that
460 * scheme - if available. Otherwise use the port-number of the current
463 if(r->proxyreq && (r->proxyreq != PROXYREQ_REVERSE)) {
464 if (r->parsed_uri.port_str) {
465 port_str = apr_pcalloc(p, strlen(r->parsed_uri.port_str) + 2);
467 for (i = 0; r->parsed_uri.port_str[i]; i++) {
468 port_str[i + 1] = apr_tolower(r->parsed_uri.port_str[i]);
471 else if (apr_uri_port_of_scheme(scheme)) {
472 port_str = apr_psprintf(p, ":%u", apr_uri_port_of_scheme(scheme));
475 /* No port string given in the AbsoluteUri, and we have no
476 * idea what the default port for the scheme is. Leave it
477 * blank and live with the inefficiency of some extra cached
484 /* Use the server port */
485 port_str = apr_psprintf(p, ":%u", ap_get_server_port(r));
489 * Check if we need to ignore session identifiers in the URL and do so
493 querystring = r->parsed_uri.query;
494 if (conf->ignore_session_id->nelts) {
498 identifier = (char **)conf->ignore_session_id->elts;
499 for (i = 0; i < conf->ignore_session_id->nelts; i++, identifier++) {
503 len = strlen(*identifier);
505 * Check that we have a parameter separator in the last segment
506 * of the path and that the parameter matches our identifier
508 if ((param = strrchr(path, ';'))
509 && !strncmp(param + 1, *identifier, len)
510 && (*(param + len + 1) == '=')
511 && !strchr(param + len + 2, '/')) {
512 path = apr_pstrndup(p, path, param - path);
516 * Check if the identifier is in the querystring and cut it out.
520 * First check if the identifier is at the beginning of the
521 * querystring and followed by a '='
523 if (!strncmp(querystring, *identifier, len)
524 && (*(querystring + len) == '=')) {
531 * In order to avoid subkey matching (PR 48401) prepend
532 * identifier with a '&' and append a '='
534 complete = apr_pstrcat(p, "&", *identifier, "=", NULL);
535 param = strstr(querystring, complete);
536 /* If we found something we are sitting on the '&' */
544 if (querystring != param) {
545 querystring = apr_pstrndup(p, querystring,
546 param - querystring);
552 if ((amp = strchr(param + len + 1, '&'))) {
553 querystring = apr_pstrcat(p, querystring, amp + 1, NULL);
557 * If querystring is not "", then we have the case
558 * that the identifier parameter we removed was the
559 * last one in the original querystring. Hence we have
560 * a trailing '&' which needs to be removed.
563 querystring[strlen(querystring) - 1] = '\0';
571 /* Key format is a URI, optionally without the query-string */
572 if (conf->ignorequerystring) {
573 *key = apr_pstrcat(p, scheme, "://", hostname, port_str,
577 *key = apr_pstrcat(p, scheme, "://", hostname, port_str,
578 path, "?", querystring, NULL);
582 * Store the key in the request_config for the cache as r->parsed_uri
583 * might have changed in the time from our first visit here triggered by the
584 * quick handler and our possible second visit triggered by the CACHE_SAVE
585 * filter (e.g. r->parsed_uri got unescaped). In this case we would save the
586 * resource in the cache under a key where it is never found by the quick
587 * handler during following requests.
589 ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, NULL,
590 "cache: Key for entity %s?%s is %s", r->uri,
591 r->parsed_uri.query, *key);