From: Serhiy Storchaka Date: Sun, 18 Jan 2015 09:42:50 +0000 (+0200) Subject: Issue #23181: More "codepoint" -> "code point". X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=e8c9e14af9bd9f2ced50e8b6535e45f0d33d8e72;p=python Issue #23181: More "codepoint" -> "code point". --- diff --git a/Doc/library/sgmllib.rst b/Doc/library/sgmllib.rst index 1da19cfb7d..84911fec3e 100644 --- a/Doc/library/sgmllib.rst +++ b/Doc/library/sgmllib.rst @@ -153,7 +153,7 @@ A single exception is defined as well: .. method:: SGMLParser.convert_codepoint(codepoint) - Convert a codepoint to a :class:`str` value. Encodings can be handled here if + Convert a code point to a :class:`str` value. Encodings can be handled here if appropriate, though the rest of :mod:`sgmllib` is oblivious on this matter. .. versionadded:: 2.5 diff --git a/Lib/htmlentitydefs.py b/Lib/htmlentitydefs.py index 3dd14a79fa..1f40d09d57 100644 --- a/Lib/htmlentitydefs.py +++ b/Lib/htmlentitydefs.py @@ -1,6 +1,6 @@ """HTML character entity references.""" -# maps the HTML entity name to the Unicode codepoint +# maps the HTML entity name to the Unicode code point name2codepoint = { 'AElig': 0x00c6, # latin capital letter AE = latin capital ligature AE, U+00C6 ISOlat1 'Aacute': 0x00c1, # latin capital letter A with acute, U+00C1 ISOlat1 @@ -256,7 +256,7 @@ name2codepoint = { 'zwnj': 0x200c, # zero width non-joiner, U+200C NEW RFC 2070 } -# maps the Unicode codepoint to the HTML entity name +# maps the Unicode code point to the HTML entity name codepoint2name = {} # maps the HTML entity name to the character diff --git a/Lib/test/test_multibytecodec.py b/Lib/test/test_multibytecodec.py index 8aca3819e6..c38df8d51d 100644 --- a/Lib/test/test_multibytecodec.py +++ b/Lib/test/test_multibytecodec.py @@ -72,7 +72,7 @@ class Test_IncrementalEncoder(unittest.TestCase): self.assertEqual(encoder.reset(), None) def test_stateful(self): - # jisx0213 encoder is stateful for a few codepoints. eg) + # jisx0213 encoder is stateful for a few code points. eg) # U+00E6 => A9DC # U+00E6 U+0300 => ABC4 # U+0300 => ABDC diff --git a/Lib/test/test_multibytecodec_support.py b/Lib/test/test_multibytecodec_support.py index c5dcfa3af9..1631e46e39 100644 --- a/Lib/test/test_multibytecodec_support.py +++ b/Lib/test/test_multibytecodec_support.py @@ -20,7 +20,7 @@ class TestBase: roundtriptest = 1 # set if roundtrip is possible with unicode has_iso10646 = 0 # set if this encoding contains whole iso10646 map xmlcharnametest = None # string to test xmlcharrefreplace - unmappedunicode = u'\udeee' # a unicode codepoint that is not mapped. + unmappedunicode = u'\udeee' # a unicode code point that is not mapped. def setUp(self): if self.codec is None: diff --git a/Lib/test/test_stringprep.py b/Lib/test/test_stringprep.py index 15bdf87652..f6db89e0cc 100644 --- a/Lib/test/test_stringprep.py +++ b/Lib/test/test_stringprep.py @@ -1,5 +1,5 @@ # To fully test this module, we would need a copy of the stringprep tables. -# Since we don't have them, this test checks only a few codepoints. +# Since we don't have them, this test checks only a few code points. import unittest from test import test_support diff --git a/Lib/test/test_unicode.py b/Lib/test/test_unicode.py index 9f6cc75853..f2018ecd95 100644 --- a/Lib/test/test_unicode.py +++ b/Lib/test/test_unicode.py @@ -874,9 +874,9 @@ class UnicodeTest( def test_utf8_decode_invalid_sequences(self): # continuation bytes in a sequence of 2, 3, or 4 bytes continuation_bytes = map(chr, range(0x80, 0xC0)) - # start bytes of a 2-byte sequence equivalent to codepoints < 0x7F + # start bytes of a 2-byte sequence equivalent to code points < 0x7F invalid_2B_seq_start_bytes = map(chr, range(0xC0, 0xC2)) - # start bytes of a 4-byte sequence equivalent to codepoints > 0x10FFFF + # start bytes of a 4-byte sequence equivalent to code points > 0x10FFFF invalid_4B_seq_start_bytes = map(chr, range(0xF5, 0xF8)) invalid_start_bytes = ( continuation_bytes + invalid_2B_seq_start_bytes + diff --git a/Modules/cjkcodecs/_codecs_cn.c b/Modules/cjkcodecs/_codecs_cn.c index ab4e659332..6e0d2ce550 100644 --- a/Modules/cjkcodecs/_codecs_cn.c +++ b/Modules/cjkcodecs/_codecs_cn.c @@ -15,7 +15,7 @@ #undef hz #endif -/* GBK and GB2312 map differently in few codepoints that are listed below: +/* GBK and GB2312 map differently in few code points that are listed below: * * gb2312 gbk * A1A4 U+30FB KATAKANA MIDDLE DOT U+00B7 MIDDLE DOT diff --git a/Modules/cjkcodecs/_codecs_hk.c b/Modules/cjkcodecs/_codecs_hk.c index 558a42f89c..6ee7bb1404 100644 --- a/Modules/cjkcodecs/_codecs_hk.c +++ b/Modules/cjkcodecs/_codecs_hk.c @@ -164,7 +164,7 @@ DECODER(big5hkscs) default: return 2; } - NEXT(2, 2) /* all decoded codepoints are pairs, above. */ + NEXT(2, 2) /* all decoded code points are pairs, above. */ } return 0; diff --git a/Modules/cjkcodecs/_codecs_kr.c b/Modules/cjkcodecs/_codecs_kr.c index 9272e363e1..1fc83feaf5 100644 --- a/Modules/cjkcodecs/_codecs_kr.c +++ b/Modules/cjkcodecs/_codecs_kr.c @@ -64,7 +64,7 @@ ENCODER(euc_kr) OUT1(EUCKR_JAMO_FIRSTBYTE) OUT2(EUCKR_JAMO_FILLER) - /* All codepoints in CP949 extension are in unicode + /* All code points in CP949 extension are in unicode * Hangul Syllable area. */ assert(0xac00 <= c && c <= 0xd7a3); c -= 0xac00; diff --git a/Modules/cjkcodecs/cjkcodecs.h b/Modules/cjkcodecs/cjkcodecs.h index 7e8390a2d1..399e9dca40 100644 --- a/Modules/cjkcodecs/cjkcodecs.h +++ b/Modules/cjkcodecs/cjkcodecs.h @@ -12,10 +12,10 @@ #include "multibytecodec.h" -/* a unicode "undefined" codepoint */ +/* a unicode "undefined" code point */ #define UNIINV 0xFFFE -/* internal-use DBCS codepoints which aren't used by any charsets */ +/* internal-use DBCS code points which aren't used by any charsets */ #define NOCHAR 0xFFFF #define MULTIC 0xFFFE #define DBCINV 0xFFFD diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index bdb14d74c2..340f8ccc38 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -2039,7 +2039,7 @@ PyObject *PyUnicode_DecodeUTF8Stateful(const char *s, see http://www.unicode.org/versions/Unicode5.2.0/ch03.pdf (table 3-7) and http://www.rfc-editor.org/rfc/rfc3629.txt Uncomment the 2 lines below to make them invalid, - codepoints: d800-dfff; UTF-8: \xed\xa0\x80-\xed\xbf\xbf. */ + code points: d800-dfff; UTF-8: \xed\xa0\x80-\xed\xbf\xbf. */ if ((s[1] & 0xc0) != 0x80 || (s[2] & 0xc0) != 0x80 || ((unsigned char)s[0] == 0xE0 && @@ -2337,7 +2337,7 @@ PyUnicode_DecodeUTF32Stateful(const char *s, } /* On narrow builds we split characters outside the BMP into two - codepoints => count how much extra space we need. */ + code points => count how much extra space we need. */ #ifndef Py_UNICODE_WIDE for (qq = q; e - qq >= 4; qq += 4) if (qq[iorder[2]] != 0 || qq[iorder[3]] != 0) @@ -2372,7 +2372,7 @@ PyUnicode_DecodeUTF32Stateful(const char *s, if (ch >= 0x110000) { - errmsg = "codepoint not in range(0x110000)"; + errmsg = "code point not in range(0x110000)"; startinpos = ((const char *)q)-starts; endinpos = startinpos+4; goto utf32Error; @@ -2449,7 +2449,7 @@ PyUnicode_EncodeUTF32(const Py_UNICODE *s, p += 4; \ } while(0) - /* In narrow builds we can output surrogate pairs as one codepoint, + /* In narrow builds we can output surrogate pairs as one code point, so we need less space. */ #ifndef Py_UNICODE_WIDE for (i = pairs = 0; i < size-1; i++)