From cda85a0d1ca217c927211af799072c85b04b4e89 Mon Sep 17 00:00:00 2001 From: Martin Panter Date: Tue, 24 Nov 2015 22:33:18 +0000 Subject: [PATCH] Issue #25576: Remove application/x-www-form-urlencoded charset advice No charset parameter is standardized for this Content-Type value. Also clarify that urlencode() outputs ASCII. --- Doc/howto/urllib2.rst | 6 +++--- Doc/library/urllib.parse.rst | 9 +++++---- Doc/library/urllib.request.rst | 31 ++++++++----------------------- 3 files changed, 16 insertions(+), 30 deletions(-) diff --git a/Doc/howto/urllib2.rst b/Doc/howto/urllib2.rst index e3d77142f4..9052ed59b0 100644 --- a/Doc/howto/urllib2.rst +++ b/Doc/howto/urllib2.rst @@ -115,7 +115,7 @@ library. :: 'language' : 'Python' } data = urllib.parse.urlencode(values) - data = data.encode('utf-8') # data should be bytes + data = data.encode('ascii') # data should be bytes req = urllib.request.Request(url, data) with urllib.request.urlopen(req) as response: the_page = response.read() @@ -180,8 +180,8 @@ Explorer [#]_. :: 'language' : 'Python' } headers = { 'User-Agent' : user_agent } - data = urllib.parse.urlencode(values) - data = data.encode('utf-8') + data = urllib.parse.urlencode(values) + data = data.encode('ascii') req = urllib.request.Request(url, data, headers) with urllib.request.urlopen(req) as response: the_page = response.read() diff --git a/Doc/library/urllib.parse.rst b/Doc/library/urllib.parse.rst index fbbabcadf9..ac04f99deb 100644 --- a/Doc/library/urllib.parse.rst +++ b/Doc/library/urllib.parse.rst @@ -519,10 +519,11 @@ task isn't already covered by the URL parsing functions above. .. function:: urlencode(query, doseq=False, safe='', encoding=None, errors=None) Convert a mapping object or a sequence of two-element tuples, which may - contain :class:`str` or :class:`bytes` objects, to a "percent-encoded" - string. If the resultant string is to be used as a *data* for POST - operation with :func:`~urllib.request.urlopen` function, then it should be - properly encoded to bytes, otherwise it would result in a :exc:`TypeError`. + contain :class:`str` or :class:`bytes` objects, to a percent-encoded ASCII + text string. If the resultant string is to be used as a *data* for POST + operation with the :func:`~urllib.request.urlopen` function, then + it should be encoded to bytes, otherwise it would result in a + :exc:`TypeError`. The resulting string is a series of ``key=value`` pairs separated by ``'&'`` characters, where both *key* and *value* are quoted using :func:`quote_plus` diff --git a/Doc/library/urllib.request.rst b/Doc/library/urllib.request.rst index 4df0f568c8..d98f2005d8 100644 --- a/Doc/library/urllib.request.rst +++ b/Doc/library/urllib.request.rst @@ -36,13 +36,8 @@ The :mod:`urllib.request` module defines the following functions: *data* should be a buffer in the standard :mimetype:`application/x-www-form-urlencoded` format. The :func:`urllib.parse.urlencode` function takes a mapping or sequence of - 2-tuples and returns a string in this format. It should be encoded to bytes - before being used as the *data* parameter. The charset parameter in - ``Content-Type`` header may be used to specify the encoding. If charset - parameter is not sent with the Content-Type header, the server following the - HTTP 1.1 recommendation may assume that the data is encoded in ISO-8859-1 - encoding. It is advisable to use charset parameter with encoding used in - ``Content-Type`` header with the :class:`Request`. + 2-tuples and returns an ASCII text string in this format. It should + be encoded to bytes before being used as the *data* parameter. urllib.request module uses HTTP/1.1 and includes ``Connection:close`` header in its HTTP requests. @@ -179,16 +174,9 @@ The following classes are provided: the only ones that use *data*; the HTTP request will be a POST instead of a GET when the *data* parameter is provided. *data* should be a buffer in the standard :mimetype:`application/x-www-form-urlencoded` format. - The :func:`urllib.parse.urlencode` function takes a mapping or sequence of - 2-tuples and returns a string in this format. It should be encoded to bytes - before being used as the *data* parameter. The charset parameter in - ``Content-Type`` header may be used to specify the encoding. If charset - parameter is not sent with the Content-Type header, the server following the - HTTP 1.1 recommendation may assume that the data is encoded in ISO-8859-1 - encoding. It is advisable to use charset parameter with encoding used in - ``Content-Type`` header with the :class:`Request`. - + 2-tuples and returns an ASCII string in this format. It should be + encoded to bytes before being used as the *data* parameter. *headers* should be a dictionary, and will be treated as if :meth:`add_header` was called with each key and value as arguments. @@ -201,7 +189,7 @@ The following classes are provided: ``"Python-urllib/2.6"`` (on Python 2.6). An example of using ``Content-Type`` header with *data* argument would be - sending a dictionary like ``{"Content-Type":" application/x-www-form-urlencoded;charset=utf-8"}``. + sending a dictionary like ``{"Content-Type": "application/x-www-form-urlencoded"}``. The final two arguments are only of interest for correct handling of third-party HTTP cookies: @@ -1169,7 +1157,7 @@ every :class:`Request`. To change this:: opener.open('http://www.example.com/') Also, remember that a few standard headers (:mailheader:`Content-Length`, -:mailheader:`Content-Type` without charset parameter and :mailheader:`Host`) +:mailheader:`Content-Type` and :mailheader:`Host`) are added when the :class:`Request` is passed to :func:`urlopen` (or :meth:`OpenerDirector.open`). @@ -1192,11 +1180,8 @@ from urlencode is encoded to bytes before it is sent to urlopen as data:: >>> import urllib.request >>> import urllib.parse >>> data = urllib.parse.urlencode({'spam': 1, 'eggs': 2, 'bacon': 0}) - >>> data = data.encode('utf-8') - >>> request = urllib.request.Request("http://requestb.in/xrbl82xr") - >>> # adding charset parameter to the Content-Type header. - >>> request.add_header("Content-Type","application/x-www-form-urlencoded;charset=utf-8") - >>> with urllib.request.urlopen(request, data) as f: + >>> data = data.encode('ascii') + >>> with urllib.request.urlopen("http://requestb.in/xrbl82xr", data) as f: ... print(f.read().decode('utf-8')) ... -- 2.40.0