From 852989856d3802a9e7bd2f1e368302d92ddf66e2 Mon Sep 17 00:00:00 2001 From: Daniel Stenberg Date: Wed, 30 Apr 2008 21:20:08 +0000 Subject: [PATCH] - To make it easier for applications that want lots of magic stuff done on redirections and thus cannot use CURLOPT_FOLLOWLOCATION easily, we now introduce the new CURLINFO_REDIRECT_URL option that lets applications extract the URL libcurl would've redirected to if it had been told to. This then enables the application to continue to that URL as it thinks is suitable, without having to re-implement the magic of creating the new URL from the Location: header etc. Test 1029 verifies it. --- CHANGES | 9 ++ RELEASE-NOTES | 1 + docs/curl.1 | 8 +- docs/libcurl/curl_easy_getinfo.3 | 8 +- include/curl/curl.h | 3 +- lib/getinfo.c | 7 +- lib/multi.c | 6 +- lib/transfer.c | 140 +++++++++++++++++++------------ lib/transfer.h | 17 +++- lib/url.c | 5 ++ lib/urldata.h | 18 ++-- src/writeout.c | 11 ++- tests/data/Makefile.am | 3 +- tests/data/test1029 | 56 +++++++++++++ 14 files changed, 222 insertions(+), 70 deletions(-) create mode 100644 tests/data/test1029 diff --git a/CHANGES b/CHANGES index ac6bdcdf4..fc3f3f0e9 100644 --- a/CHANGES +++ b/CHANGES @@ -7,6 +7,15 @@ Changelog +Daniel Stenberg (29 Apr 2008) +- To make it easier for applications that want lots of magic stuff done on + redirections and thus cannot use CURLOPT_FOLLOWLOCATION easily, we now + introduce the new CURLINFO_REDIRECT_URL option that lets applications + extract the URL libcurl would've redirected to if it had been told to. This + then enables the application to continue to that URL as it thinks is + suitable, without having to re-implement the magic of creating the new URL + from the Location: header etc. Test 1029 verifies it. + Yang Tse (29 Apr 2008) - Improved easy interface resolving timeout handling in c-ares enabled builds diff --git a/RELEASE-NOTES b/RELEASE-NOTES index 13bd9e716..3a4fffeba 100644 --- a/RELEASE-NOTES +++ b/RELEASE-NOTES @@ -13,6 +13,7 @@ This release includes the following changes: o CURLFORM_STREAM was added o CURLOPT_NOBODY is now supported over SFTP o curl can now run on Symbian OS + o curl -w redirect_url and CURLINFO_REDIRECT_URL This release includes the following bugfixes: diff --git a/docs/curl.1 b/docs/curl.1 index f5e49dc05..98bfc198c 100644 --- a/docs/curl.1 +++ b/docs/curl.1 @@ -1288,7 +1288,9 @@ The URL that was fetched last. This is mostly meaningful if you've told curl to follow location: headers. .TP .B http_code -The numerical code that was found in the last retrieved HTTP(S) page. +The numerical response code that was found in the last retrieved HTTP(S) or +FTP(s) transfer. In 7.18.2 the alias \fBresponse_code\fP was added to show the +same info. .TP .B http_connect The numerical code that was found in the last response (from a proxy) to a @@ -1349,6 +1351,10 @@ Number of new connects made in the recent transfer. (Added in 7.12.3) .B num_redirects Number of redirects that were followed in the request. (Added in 7.12.3) .TP +.B redirect_url +When a HTTP request was made without -L to follow redirects, this variable +will show the actual URL a redirect \fIwould\fP take you to. (Added in 7.18.2) +.TP .B ftp_entry_path The initial path libcurl ended up in when logging on to the remote FTP server. (Added in 7.15.4) diff --git a/docs/libcurl/curl_easy_getinfo.3 b/docs/libcurl/curl_easy_getinfo.3 index 95455e3a1..be0f060d4 100644 --- a/docs/libcurl/curl_easy_getinfo.3 +++ b/docs/libcurl/curl_easy_getinfo.3 @@ -5,7 +5,7 @@ .\" * | (__| |_| | _ <| |___ .\" * \___|\___/|_| \_\_____| .\" * -.\" * Copyright (C) 1998 - 2007, Daniel Stenberg, , et al. +.\" * Copyright (C) 1998 - 2008, Daniel Stenberg, , et al. .\" * .\" * This software is licensed as described in the file COPYING, which .\" * you should have received as part of this distribution. The terms @@ -89,6 +89,12 @@ complete execution time for multiple redirections. (Added in 7.9.7) .IP CURLINFO_REDIRECT_COUNT Pass a pointer to a long to receive the total number of redirections that were actually followed. (Added in 7.9.7) +.IP CURLINFO_REDIRECT_URL +Pass a pointer to a char pointer to receive the URL a redirect \fIwould\fP +take you to if you would enable CURLOPT_FOLLOWLOCATION. This can come very +handy if you think using the built-in libcurl redirect logic isn't good enough +for you but you would still prefer to avoid implementing all the magic of +figuring out the new URL. (Added in 7.18.2) .IP CURLINFO_SIZE_UPLOAD Pass a pointer to a double to receive the total amount of bytes that were uploaded. diff --git a/include/curl/curl.h b/include/curl/curl.h index 8b7268188..6fcc34faf 100644 --- a/include/curl/curl.h +++ b/include/curl/curl.h @@ -1587,9 +1587,10 @@ typedef enum { CURLINFO_COOKIELIST = CURLINFO_SLIST + 28, CURLINFO_LASTSOCKET = CURLINFO_LONG + 29, CURLINFO_FTP_ENTRY_PATH = CURLINFO_STRING + 30, + CURLINFO_REDIRECT_URL = CURLINFO_STRING + 31, /* Fill in new entries below here! */ - CURLINFO_LASTONE = 30 + CURLINFO_LASTONE = 31 } CURLINFO; /* CURLINFO_RESPONSE_CODE is the new name for the option previously known as diff --git a/lib/getinfo.c b/lib/getinfo.c index b3ef2f649..145a71b22 100644 --- a/lib/getinfo.c +++ b/lib/getinfo.c @@ -5,7 +5,7 @@ * | (__| |_| | _ <| |___ * \___|\___/|_| \_\_____| * - * Copyright (C) 1998 - 2007, Daniel Stenberg, , et al. + * Copyright (C) 1998 - 2008, Daniel Stenberg, , et al. * * This software is licensed as described in the file COPYING, which * you should have received as part of this distribution. The terms @@ -231,6 +231,11 @@ CURLcode Curl_getinfo(struct SessionHandle *data, CURLINFO info, ...) else *param_longp = -1; break; + case CURLINFO_REDIRECT_URL: + /* Return the URL this request would have been redirected to if that + option had been enabled! */ + *param_charp = data->info.wouldredirect; + break; default: return CURLE_BAD_FUNCTION_ARGUMENT; } diff --git a/lib/multi.c b/lib/multi.c index 73e8e7e3d..df287129b 100644 --- a/lib/multi.c +++ b/lib/multi.c @@ -1262,6 +1262,7 @@ static CURLMcode multi_runsingle(struct Curl_multi *multi, else if(TRUE == done) { char *newurl; bool retry = Curl_retry_request(easy->easy_conn, &newurl); + followtype follow=FOLLOW_NONE; /* call this even if the readwrite function returned error */ Curl_posttransfer(easy->easy_handle); @@ -1278,10 +1279,13 @@ static CURLMcode multi_runsingle(struct Curl_multi *multi, then figure out the URL here */ newurl = easy->easy_handle->req.newurl; easy->easy_handle->req.newurl = NULL; + follow = FOLLOW_REDIR; } + else + follow = FOLLOW_RETRY; easy->result = Curl_done(&easy->easy_conn, CURLE_OK, FALSE); if(easy->result == CURLE_OK) - easy->result = Curl_follow(easy->easy_handle, newurl, retry); + easy->result = Curl_follow(easy->easy_handle, newurl, follow); if(CURLE_OK == easy->result) { multistate(easy, CURLM_STATE_CONNECT); result = CURLM_CALL_MULTI_PERFORM; diff --git a/lib/transfer.c b/lib/transfer.c index 0856d2a55..0ebe19652 100644 --- a/lib/transfer.c +++ b/lib/transfer.c @@ -250,7 +250,7 @@ CURLcode Curl_readrewind(struct connectdata *conn) err = (data->set.seek_func)(data->set.seek_client, 0, SEEK_SET); if(err) { - failf(data, "seek callback returned error %d", (int)err); + failf(data, "seek callback returned error %d", (int)err); return CURLE_SEND_FAIL_REWIND; } } @@ -1113,34 +1113,37 @@ CURLcode Curl_readwrite(struct connectdata *conn, } else if((k->httpcode >= 300 && k->httpcode < 400) && checkprefix("Location:", k->p)) { - if(data->set.http_follow_location) { - /* this is the URL that the server advices us to get instead */ - char *ptr; - char *start=k->p; - char backup; - - start += 9; /* pass "Location:" */ - - /* Skip spaces and tabs. We do this to support multiple - white spaces after the "Location:" keyword. */ - while(*start && ISSPACE(*start )) - start++; - - /* Scan through the string from the end to find the last - non-space. k->end_ptr points to the actual terminating zero - letter, move pointer one letter back and start from - there. This logic strips off trailing whitespace, but keeps - any embedded whitespace. */ - ptr = k->end_ptr-1; - while((ptr>=start) && ISSPACE(*ptr)) - ptr--; - ptr++; - - backup = *ptr; /* store the ending letter */ - if(ptr != start) { - *ptr = '\0'; /* zero terminate */ - data->req.newurl = strdup(start); /* clone string */ - *ptr = backup; /* restore ending letter */ + /* this is the URL that the server advices us to use instead */ + char *ptr; + char *start=k->p; + char backup; + + start += 9; /* pass "Location:" */ + + /* Skip spaces and tabs. We do this to support multiple + white spaces after the "Location:" keyword. */ + while(*start && ISSPACE(*start )) + start++; + + /* Scan through the string from the end to find the last + non-space. k->end_ptr points to the actual terminating zero + letter, move pointer one letter back and start from + there. This logic strips off trailing whitespace, but keeps + any embedded whitespace. */ + ptr = k->end_ptr-1; + while((ptr>=start) && ISSPACE(*ptr)) + ptr--; + ptr++; + + backup = *ptr; /* store the ending letter */ + if(ptr != start) { + *ptr = '\0'; /* zero terminate */ + data->req.location = strdup(start); /* clone string */ + *ptr = backup; /* restore ending letter */ + if(!data->req.location) + return CURLE_OUT_OF_MEMORY; + if(data->set.http_follow_location) { + data->req.newurl = strdup(data->req.location); /* clone */ if(!data->req.newurl) return CURLE_OUT_OF_MEMORY; } @@ -1969,16 +1972,16 @@ CURLcode Curl_follow(struct SessionHandle *data, char *newurl, /* this 'newurl' is the Location: string, and it must be malloc()ed before passed here */ - bool retry) /* set TRUE if this is a request retry as - opposed to a real redirect following */ + followtype type) /* see transfer.h */ { /* Location: redirect */ char prot[16]; /* URL protocol string storage */ char letter; /* used for a silly sscanf */ size_t newlen; char *newest; + bool disallowport = FALSE; - if(!retry) { + if(type == FOLLOW_REDIR) { if((data->set.maxredirs != -1) && (data->set.followlocation >= data->set.maxredirs)) { failf(data,"Maximum (%d) redirects followed", data->set.maxredirs); @@ -1989,19 +1992,19 @@ CURLcode Curl_follow(struct SessionHandle *data, data->state.this_is_a_follow = TRUE; data->set.followlocation++; /* count location-followers */ - } - if(data->set.http_auto_referer) { - /* We are asked to automatically set the previous URL as the - referer when we get the next URL. We pick the ->url field, - which may or may not be 100% correct */ + if(data->set.http_auto_referer) { + /* We are asked to automatically set the previous URL as the referer + when we get the next URL. We pick the ->url field, which may or may + not be 100% correct */ - if(data->change.referer_alloc) - /* If we already have an allocated referer, free this first */ - free(data->change.referer); + if(data->change.referer_alloc) + /* If we already have an allocated referer, free this first */ + free(data->change.referer); - data->change.referer = strdup(data->change.url); - data->change.referer_alloc = TRUE; /* yes, free this later */ + data->change.referer = strdup(data->change.url); + data->change.referer_alloc = TRUE; /* yes, free this later */ + } } if(2 != sscanf(newurl, "%15[^?&/:]://%c", prot, &letter)) { @@ -2141,7 +2144,7 @@ CURLcode Curl_follow(struct SessionHandle *data, } else { /* This is an absolute URL, don't allow the custom port number */ - data->state.allow_port = FALSE; + disallowport = TRUE; if(strchr(newurl, ' ')) { /* This new URL contains at least one space, this is a mighty stupid @@ -2159,6 +2162,16 @@ CURLcode Curl_follow(struct SessionHandle *data, } + if(type == FOLLOW_FAKE) { + /* we're only figuring out the new url if we would've followed locations + but now we're done so we can get out! */ + data->info.wouldredirect = newurl; + return CURLE_OK; + } + + if(disallowport) + data->state.allow_port = FALSE; + if(data->change.url_alloc) free(data->change.url); else @@ -2289,7 +2302,9 @@ connect_host(struct SessionHandle *data, return res; } -/* Returns TRUE and sets '*url' if a request retry is wanted */ +/* Returns TRUE and sets '*url' if a request retry is wanted. + + NOTE: that the *url is malloc()ed. */ bool Curl_retry_request(struct connectdata *conn, char **url) { @@ -2335,7 +2350,7 @@ CURLcode Curl_perform(struct SessionHandle *data) CURLcode res2; struct connectdata *conn=NULL; char *newurl = NULL; /* possibly a new URL to follow to! */ - bool retry = FALSE; + int follow = FOLLOW_NONE; data->state.used_interface = Curl_if_easy; @@ -2366,14 +2381,29 @@ CURLcode Curl_perform(struct SessionHandle *data) if(res == CURLE_OK) { res = Transfer(conn); /* now fetch that URL please */ if(res == CURLE_OK) { - retry = Curl_retry_request(conn, &newurl); + bool retry = Curl_retry_request(conn, &newurl); - if(!retry) + if(retry) + follow = FOLLOW_RETRY; + else { /* * We must duplicate the new URL here as the connection data may - * be free()ed in the Curl_done() function. + * be free()ed in the Curl_done() function. We prefer the newurl + * one since that's used for redirects or just further requests + * for retries or multi-stage HTTP auth methods etc. */ - newurl = data->req.newurl?strdup(data->req.newurl):NULL; + if(data->req.newurl) { + follow = FOLLOW_REDIR; + newurl = strdup(data->req.newurl); + } + else if(data->req.location) { + follow = FOLLOW_FAKE; + newurl = strdup(data->req.location); + } + } + + /* in the above cases where 'newurl' gets assigned, we have a fresh + * allocated memory pointed to */ } else { /* The transfer phase returned error, we mark the connection to get @@ -2409,11 +2439,17 @@ CURLcode Curl_perform(struct SessionHandle *data) * in 'Curl_done' or other functions. */ - if((res == CURLE_OK) && newurl) { - res = Curl_follow(data, newurl, retry); + if((res == CURLE_OK) && follow) { + res = Curl_follow(data, newurl, follow); if(CURLE_OK == res) { + /* if things went fine, Curl_follow() freed or otherwise took + responsibility for the newurl pointer */ newurl = NULL; - continue; + if(follow >= FOLLOW_RETRY) { + follow = FOLLOW_NONE; + continue; + } + /* else we break out of the loop below */ } } } diff --git a/lib/transfer.h b/lib/transfer.h index c368c4682..aad82ebaf 100644 --- a/lib/transfer.h +++ b/lib/transfer.h @@ -7,7 +7,7 @@ * | (__| |_| | _ <| |___ * \___|\___/|_| \_\_____| * - * Copyright (C) 1998 - 2007, Daniel Stenberg, , et al. + * Copyright (C) 1998 - 2008, Daniel Stenberg, , et al. * * This software is licensed as described in the file COPYING, which * you should have received as part of this distribution. The terms @@ -26,7 +26,20 @@ CURLcode Curl_perform(struct SessionHandle *data); CURLcode Curl_pretransfer(struct SessionHandle *data); CURLcode Curl_second_connect(struct connectdata *conn); CURLcode Curl_posttransfer(struct SessionHandle *data); -CURLcode Curl_follow(struct SessionHandle *data, char *newurl, bool retry); + +typedef enum { + FOLLOW_NONE, /* not used within the function, just a placeholder to + allow initing to this */ + FOLLOW_FAKE, /* only records stuff, not actually following */ + FOLLOW_RETRY, /* set if this is a request retry as opposed to a real + redirect following */ + FOLLOW_REDIR, /* a full true redirect */ + FOLLOW_LAST /* never used */ +} followtype; + +CURLcode Curl_follow(struct SessionHandle *data, char *newurl, followtype type); + + CURLcode Curl_readwrite(struct connectdata *conn, bool *done); int Curl_single_getsock(const struct connectdata *conn, curl_socket_t *socks, diff --git a/lib/url.c b/lib/url.c index 9cda6dbac..906792332 100644 --- a/lib/url.c +++ b/lib/url.c @@ -494,6 +494,7 @@ CURLcode Curl_close(struct SessionHandle *data) Curl_digest_cleanup(data); Curl_safefree(data->info.contenttype); + Curl_safefree(data->info.wouldredirect); /* this destroys the channel and we cannot use it anymore after this */ ares_destroy(data->state.areschannel); @@ -4440,6 +4441,10 @@ CURLcode Curl_done(struct connectdata **connp, free(data->req.newurl); data->req.newurl = NULL; } + if(data->req.location) { + free(data->req.location); + data->req.location = NULL; + } if(conn->dns_entry) { Curl_resolv_unlock(data, conn->dns_entry); /* done with this */ diff --git a/lib/urldata.h b/lib/urldata.h index ce6287190..fa93a6454 100644 --- a/lib/urldata.h +++ b/lib/urldata.h @@ -760,8 +760,10 @@ struct SingleRequest { bool ignorecl; /* This HTTP response has no body so we ignore the Content- Length: header */ - char *newurl; /* This can only be set if a Location: was in the - document headers */ + char *location; /* This points to an allocated version of the Location: + header data */ + char *newurl; /* Set to the new URL to use when a redirect or a retry is + wanted */ /* 'upload_present' is used to keep a byte counter of how much data there is still left in the buffer, aimed for upload. */ @@ -1021,21 +1023,19 @@ struct connectdata { */ struct PureInfo { int httpcode; /* Recent HTTP or FTP response code */ - int httpproxycode; - int httpversion; + int httpproxycode; /* response code from proxy when received separate */ + int httpversion; /* the http version number X.Y = X*10+Y */ long filetime; /* If requested, this is might get set. Set to -1 if the time was unretrievable. We cannot have this of type time_t, since time_t is unsigned on several platforms such as OpenVMS. */ long header_size; /* size of read header(s) in bytes */ long request_size; /* the amount of bytes sent in the request(s) */ - - long proxyauthavail; - long httpauthavail; - + long proxyauthavail; /* what proxy auth types were announced */ + long httpauthavail; /* what host auth types were announced */ long numconnects; /* how many new connection did libcurl created */ - char *contenttype; /* the content type of the object */ + char *wouldredirect; /* URL this would've been redirected to if asked to */ }; diff --git a/src/writeout.c b/src/writeout.c index b45b7cba4..2a0e37af2 100644 --- a/src/writeout.c +++ b/src/writeout.c @@ -5,7 +5,7 @@ * | (__| |_| | _ <| |___ * \___|\___/|_| \_\_____| * - * Copyright (C) 1998 - 2006, Daniel Stenberg, , et al. + * Copyright (C) 1998 - 2008, Daniel Stenberg, , et al. * * This software is licensed as described in the file COPYING, which * you should have received as part of this distribution. The terms @@ -61,6 +61,7 @@ typedef enum { VAR_REDIRECT_TIME, VAR_REDIRECT_COUNT, VAR_FTP_ENTRY_PATH, + VAR_REDIRECT_URL, VAR_NUM_OF_VARS /* must be the last */ } replaceid; @@ -73,6 +74,7 @@ struct variable { static const struct variable replacements[]={ {"url_effective", VAR_EFFECTIVE_URL}, {"http_code", VAR_HTTP_CODE}, + {"response_code", VAR_HTTP_CODE}, {"http_connect", VAR_HTTP_CODE_PROXY}, {"time_total", VAR_TOTAL_TIME}, {"time_namelookup", VAR_NAMELOOKUP_TIME}, @@ -90,6 +92,7 @@ static const struct variable replacements[]={ {"time_redirect", VAR_REDIRECT_TIME}, {"num_redirects", VAR_REDIRECT_COUNT}, {"ftp_entry_path", VAR_FTP_ENTRY_PATH}, + {"redirect_url", VAR_REDIRECT_URL}, {NULL, VAR_NONE} }; @@ -222,6 +225,12 @@ void ourWriteOut(CURL *curl, const char *writeinfo) && stringp) fputs(stringp, stream); break; + case VAR_REDIRECT_URL: + if((CURLE_OK == + curl_easy_getinfo(curl, CURLINFO_REDIRECT_URL, &stringp)) + && stringp) + fputs(stringp, stream); + break; default: break; } diff --git a/tests/data/Makefile.am b/tests/data/Makefile.am index fbb553a93..7459c3e9a 100644 --- a/tests/data/Makefile.am +++ b/tests/data/Makefile.am @@ -50,11 +50,12 @@ EXTRA_DIST = test1 test108 test117 test127 test20 test27 test34 test46 \ test551 test552 test1016 test1017 test1018 test1019 test1020 test553 \ test1021 test1022 test1023 test309 test616 test617 test618 test619 \ test620 test621 test622 test623 test624 test625 test626 test627 test554 \ - test1024 test1025 test555 test1026 test1027 test1028 + test1024 test1025 test555 test1026 test1027 test1028 test1029 filecheck: @mkdir test-place; \ cp "$(top_srcdir)"/tests/data/test[0-9]* test-place/; \ + rm test-place/*~; \ for f in $(EXTRA_DIST); do \ if test -f "$(top_srcdir)/tests/data/$$f"; then \ rm -f test-place/$$f; \ diff --git a/tests/data/test1029 b/tests/data/test1029 new file mode 100644 index 000000000..c91feafc4 --- /dev/null +++ b/tests/data/test1029 @@ -0,0 +1,56 @@ + + + +HTTP +HTTP GET +redirect_url +followlocation + + +# Server-side + + +HTTP/1.1 301 This is a weirdo text message swsclose +Location: data/10290002.txt?coolsite=yes +Content-Length: 62 +Connection: close + +This server reply is for testing a simple Location: following + + + +# Client-side + + +http + + +HTTP Location: and 'redirect_url' check + + +http://%HOSTIP:%HTTPPORT/we/want/our/1029 -w '%{redirect_url}\n' + + + +# Verify data after the test has been "shot" + + +^User-Agent:.* + + +GET /we/want/our/1029 HTTP/1.1 +Host: %HOSTIP:%HTTPPORT +Accept: */* + + + +HTTP/1.1 301 This is a weirdo text message swsclose +Location: data/10290002.txt?coolsite=yes +Content-Length: 62 +Connection: close + +This server reply is for testing a simple Location: following +http://127.0.0.1:8990/we/want/our/data/10290002.txt?coolsite=yes + + + -- 2.40.0