- Do not compile unicodeobject, unicodectype, and unicodedata if Unicode is disabled
- check for Py_USING_UNICODE in all places that use Unicode functions
- disables unicode literals, and the builtin functions
- add the types.StringTypes list
- remove Unicode literals from most tests.
#define PyInt_Check(op) ((op)->ob_type == &PyInt_Type)
extern DL_IMPORT(PyObject *) PyInt_FromString(char*, char**, int);
+#ifdef Py_USING_UNICODE
extern DL_IMPORT(PyObject *) PyInt_FromUnicode(Py_UNICODE*, int, int);
+#endif
extern DL_IMPORT(PyObject *) PyInt_FromLong(long);
extern DL_IMPORT(long) PyInt_AsLong(PyObject *);
extern DL_IMPORT(long) PyInt_GetMax(void);
#endif /* HAVE_LONG_LONG */
DL_IMPORT(PyObject *) PyLong_FromString(char *, char **, int);
+#ifdef Py_USING_UNICODE
DL_IMPORT(PyObject *) PyLong_FromUnicode(Py_UNICODE*, int, int);
+#endif
/* _PyLong_FromByteArray: View the n unsigned bytes as a binary integer in
base 256, and return a Python long with the same numeric value.
extern DL_IMPORT(void) _PyObject_Dump(PyObject *);
extern DL_IMPORT(PyObject *) PyObject_Repr(PyObject *);
extern DL_IMPORT(PyObject *) PyObject_Str(PyObject *);
+#ifdef Py_USING_UNICODE
extern DL_IMPORT(PyObject *) PyObject_Unicode(PyObject *);
+#endif
extern DL_IMPORT(int) PyObject_Compare(PyObject *, PyObject *);
extern DL_IMPORT(PyObject *) PyObject_RichCompare(PyObject *, PyObject *, int);
extern DL_IMPORT(int) PyObject_RichCompareBool(PyObject *, PyObject *, int);
/* --- Internal Unicode Format -------------------------------------------- */
+#ifndef Py_USING_UNICODE
+
+#define PyUnicode_Check(op) 0
+
+#else
+
/* FIXME: MvL's new implementation assumes that Py_UNICODE_SIZE is
properly set, but the default rules below doesn't set it. I'll
sort this out some other day -- fredrik@pythonware.com */
#ifdef __cplusplus
}
#endif
+#endif /* Py_USING_UNICODE */
#endif /* !Py_UNICODEOBJECT_H */
write the configuration state in .ini format
"""
-import string
+import string, types
import re
__all__ = ["NoSectionError","DuplicateSectionError","NoOptionError",
configuration files in the list will be read. A single
filename may also be given.
"""
- if type(filenames) in [type(''), type(u'')]:
+ if type(filenames) in types.StringTypes:
filenames = [filenames]
for filename in filenames:
try:
d[types.LongType] = _copy_atomic
d[types.FloatType] = _copy_atomic
d[types.StringType] = _copy_atomic
-d[types.UnicodeType] = _copy_atomic
+try:
+ d[types.UnicodeType] = _copy_atomic
+except AttributeError:
+ pass
try:
d[types.CodeType] = _copy_atomic
except AttributeError:
d[types.LongType] = _deepcopy_atomic
d[types.FloatType] = _deepcopy_atomic
d[types.StringType] = _deepcopy_atomic
-d[types.UnicodeType] = _deepcopy_atomic
+try:
+ d[types.UnicodeType] = _deepcopy_atomic
+except AttributeError:
+ pass
d[types.CodeType] = _deepcopy_atomic
d[types.TypeType] = _deepcopy_atomic
d[types.XRangeType] = _deepcopy_atomic
encoding = "undefined"
if encoding != "ascii":
- sys.setdefaultencoding(encoding)
+ # On Non-Unicode builds this will raise an AttributeError...
+ sys.setdefaultencoding(encoding) # Needs Python Unicode build !
#
# Run custom site specific code, if available.
# test_pickle and test_cpickle both use this.
-from test_support import TestFailed
+from test_support import TestFailed, have_unicode
import sys
# break into multiple strings to please font-lock-mode
print "accepted insecure string: %s" % repr(buf)
# Test some Unicode end cases
- endcases = [u'', u'<\\u>', u'<\\\u1234>', u'<\n>', u'<\\>']
+ if have_unicode:
+ endcases = [unicode(''), unicode('<\\u>'), unicode('<\\\u1234>'),
+ unicode('<\n>'), unicode('<\\>')]
+ else:
+ endcases = []
for u in endcases:
try:
u2 = pickle.loads(pickle.dumps(u))
"""Common tests shared by test_string and test_userstring"""
import string
-from test_support import verify, verbose, TestFailed
+from test_support import verify, verbose, TestFailed, have_unicode
transtable = '\000\001\002\003\004\005\006\007\010\011\012\013\014\015\016\017\020\021\022\023\024\025\026\027\030\031\032\033\034\035\036\037 !"#$%&\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`xyzdefghijklmnopqrstuvwxyz{|}~\177\200\201\202\203\204\205\206\207\210\211\212\213\214\215\216\217\220\221\222\223\224\225\226\227\230\231\232\233\234\235\236\237\240\241\242\243\244\245\246\247\250\251\252\253\254\255\256\257\260\261\262\263\264\265\266\267\270\271\272\273\274\275\276\277\300\301\302\303\304\305\306\307\310\311\312\313\314\315\316\317\320\321\322\323\324\325\326\327\330\331\332\333\334\335\336\337\340\341\342\343\344\345\346\347\350\351\352\353\354\355\356\357\360\361\362\363\364\365\366\367\370\371\372\373\374\375\376\377'
test('join', ' ', 'w x y z', Sequence())
test('join', 'a', 'abc', ('abc',))
test('join', 'a', 'z', UserList(['z']))
- test('join', u'.', u'a.b.c', ['a', 'b', 'c'])
- test('join', '.', u'a.b.c', [u'a', 'b', 'c'])
- test('join', '.', u'a.b.c', ['a', u'b', 'c'])
- test('join', '.', u'a.b.c', ['a', 'b', u'c'])
- test('join', '.', TypeError, ['a', u'b', 3])
+ if have_unicode:
+ test('join', unicode('.'), unicode('a.b.c'), ['a', 'b', 'c'])
+ test('join', '.', unicode('a.b.c'), [unicode('a'), 'b', 'c'])
+ test('join', '.', unicode('a.b.c'), ['a', unicode('b'), 'c'])
+ test('join', '.', unicode('a.b.c'), ['a', 'b', unicode('c')])
+ test('join', '.', TypeError, ['a', unicode('b'), 3])
for i in [5, 25, 125]:
test('join', '-', ((('a' * i) + '-') * i)[:-1],
['a' * i] * i)
if complex(0j, 3.14) != 3.14j: raise TestFailed, 'complex(0j, 3.14)'
if complex(0.0, 3.14) != 3.14j: raise TestFailed, 'complex(0.0, 3.14)'
if complex(" 3.14+J ") != 3.14+1j: raise TestFailed, 'complex(" 3.14+J )"'
-if complex(u" 3.14+J ") != 3.14+1j: raise TestFailed, 'complex(u" 3.14+J )"'
+if have_unicode:
+ if complex(unicode(" 3.14+J ")) != 3.14+1j:
+ raise TestFailed, 'complex(u" 3.14+J )"'
class Z:
def __complex__(self): return 3.14j
z = Z()
raise TestFailed, "eval(3)"
if eval('c', globals, locals) != 300:
raise TestFailed, "eval(4)"
-if eval(u'1+1') != 2: raise TestFailed, 'eval(u\'1+1\')'
-if eval(u' 1+1\n') != 2: raise TestFailed, 'eval(u\' 1+1\\n\')'
+if have_unicode:
+ if eval(unicode('1+1')) != 2: raise TestFailed, 'eval(u\'1+1\')'
+ if eval(unicode(' 1+1\n')) != 2: raise TestFailed, 'eval(u\' 1+1\\n\')'
globals = {'a': 1, 'b': 2}
locals = {'b': 200, 'c': 300}
-if eval(u'a', globals) != 1:
- raise TestFailed, "eval(1) == %s" % eval(u'a', globals)
-if eval(u'a', globals, locals) != 1:
- raise TestFailed, "eval(2)"
-if eval(u'b', globals, locals) != 200:
- raise TestFailed, "eval(3)"
-if eval(u'c', globals, locals) != 300:
- raise TestFailed, "eval(4)"
+if have_unicode:
+ if eval(unicode('a'), globals) != 1:
+ raise TestFailed, "eval(1) == %s" % eval(unicode('a'), globals)
+ if eval(unicode('a'), globals, locals) != 1:
+ raise TestFailed, "eval(2)"
+ if eval(unicode('b'), globals, locals) != 200:
+ raise TestFailed, "eval(3)"
+ if eval(unicode('c'), globals, locals) != 300:
+ raise TestFailed, "eval(4)"
print 'execfile'
z = 0
if float(314) != 314.0: raise TestFailed, 'float(314)'
if float(314L) != 314.0: raise TestFailed, 'float(314L)'
if float(" 3.14 ") != 3.14: raise TestFailed, 'float(" 3.14 ")'
-if float(u" 3.14 ") != 3.14: raise TestFailed, 'float(u" 3.14 ")'
-if float(u" \u0663.\u0661\u0664 ") != 3.14:
- raise TestFailed, 'float(u" \u0663.\u0661\u0664 ")'
+if have_unicode:
+ if float(unicode(" 3.14 ")) != 3.14:
+ raise TestFailed, 'float(u" 3.14 ")'
+ if float(unicode(" \u0663.\u0661\u0664 ")) != 3.14:
+ raise TestFailed, 'float(u" \u0663.\u0661\u0664 ")'
print 'getattr'
import sys
if int(-3.5) != -3: raise TestFailed, 'int(-3.5)'
# Different base:
if int("10",16) != 16L: raise TestFailed, 'int("10",16)'
-if int(u"10",16) != 16L: raise TestFailed, 'int(u"10",16)'
+if have_unicode:
+ if int(unicode("10"),16) != 16L:
+ raise TestFailed, 'int(u"10",16)'
# Test conversion from strings and various anomalies
L = [
('0', 0),
(' 1\02 ', ValueError),
('', ValueError),
(' ', ValueError),
- (' \t\t ', ValueError),
- (u'0', 0),
- (u'1', 1),
- (u'9', 9),
- (u'10', 10),
- (u'99', 99),
- (u'100', 100),
- (u'314', 314),
- (u' 314', 314),
- (u'\u0663\u0661\u0664 ', 314),
- (u' \t\t 314 \t\t ', 314),
- (u' 1x', ValueError),
- (u' 1 ', 1),
- (u' 1\02 ', ValueError),
- (u'', ValueError),
- (u' ', ValueError),
- (u' \t\t ', ValueError),
+ (' \t\t ', ValueError)
+]
+if have_unicode:
+ L += [
+ (unicode('0'), 0),
+ (unicode('1'), 1),
+ (unicode('9'), 9),
+ (unicode('10'), 10),
+ (unicode('99'), 99),
+ (unicode('100'), 100),
+ (unicode('314'), 314),
+ (unicode(' 314'), 314),
+ (unicode('\u0663\u0661\u0664 '), 314),
+ (unicode(' \t\t 314 \t\t '), 314),
+ (unicode(' 1x'), ValueError),
+ (unicode(' 1 '), 1),
+ (unicode(' 1\02 '), ValueError),
+ (unicode(''), ValueError),
+ (unicode(' '), ValueError),
+ (unicode(' \t\t '), ValueError),
]
for s, v in L:
for sign in "", "+", "-":
if long(3.5) != 3L: raise TestFailed, 'long(3.5)'
if long(-3.5) != -3L: raise TestFailed, 'long(-3.5)'
if long("-3") != -3L: raise TestFailed, 'long("-3")'
-if long(u"-3") != -3L: raise TestFailed, 'long(u"-3")'
+if have_unicode:
+ if long(unicode("-3")) != -3L:
+ raise TestFailed, 'long(u"-3")'
# Different base:
if long("10",16) != 16L: raise TestFailed, 'long("10",16)'
-if long(u"10",16) != 16L: raise TestFailed, 'long(u"10",16)'
+if have_unicode:
+ if long(unicode("10"),16) != 16L:
+ raise TestFailed, 'long(u"10",16)'
# Check conversions from string (same test set as for int(), and then some)
LL = [
('1' + '0'*20, 10L**20),
- ('1' + '0'*100, 10L**100),
- (u'1' + u'0'*20, 10L**20),
- (u'1' + u'0'*100, 10L**100),
+ ('1' + '0'*100, 10L**100)
+]
+if have_unicode:
+ L+=[
+ (unicode('1') + unicode('0')*20, 10L**20),
+ (unicode('1') + unicode('0')*100, 10L**100),
]
for s, v in L + LL:
for sign in "", "+", "-":
-from test_support import TestFailed
+from test_support import TestFailed, have_unicode
class base_set:
except TypeError:
pass
-# Test char in Unicode
-check('c' in u'abc', "'c' not in u'abc'")
-check('d' not in u'abc', "'d' in u'abc'")
+if have_unicode:
-try:
- '' in u'abc'
- check(0, "'' in u'abc' did not raise error")
-except TypeError:
- pass
+ # Test char in Unicode
-try:
- 'ab' in u'abc'
- check(0, "'ab' in u'abc' did not raise error")
-except TypeError:
- pass
+ check('c' in unicode('abc'), "'c' not in u'abc'")
+ check('d' not in unicode('abc'), "'d' in u'abc'")
-try:
- None in u'abc'
- check(0, "None in u'abc' did not raise error")
-except TypeError:
- pass
+ try:
+ '' in unicode('abc')
+ check(0, "'' in u'abc' did not raise error")
+ except TypeError:
+ pass
-# Test Unicode char in Unicode
+ try:
+ 'ab' in unicode('abc')
+ check(0, "'ab' in u'abc' did not raise error")
+ except TypeError:
+ pass
-check(u'c' in u'abc', "u'c' not in u'abc'")
-check(u'd' not in u'abc', "u'd' in u'abc'")
+ try:
+ None in unicode('abc')
+ check(0, "None in u'abc' did not raise error")
+ except TypeError:
+ pass
-try:
- u'' in u'abc'
- check(0, "u'' in u'abc' did not raise error")
-except TypeError:
- pass
+ # Test Unicode char in Unicode
-try:
- u'ab' in u'abc'
- check(0, "u'ab' in u'abc' did not raise error")
-except TypeError:
- pass
+ check(unicode('c') in unicode('abc'), "u'c' not in u'abc'")
+ check(unicode('d') not in unicode('abc'), "u'd' in u'abc'")
-# Test Unicode char in string
+ try:
+ unicode('') in unicode('abc')
+ check(0, "u'' in u'abc' did not raise error")
+ except TypeError:
+ pass
-check(u'c' in 'abc', "u'c' not in 'abc'")
-check(u'd' not in 'abc', "u'd' in 'abc'")
+ try:
+ unicode('ab') in unicode('abc')
+ check(0, "u'ab' in u'abc' did not raise error")
+ except TypeError:
+ pass
-try:
- u'' in 'abc'
- check(0, "u'' in 'abc' did not raise error")
-except TypeError:
- pass
+ # Test Unicode char in string
-try:
- u'ab' in 'abc'
- check(0, "u'ab' in 'abc' did not raise error")
-except TypeError:
- pass
+ check(unicode('c') in 'abc', "u'c' not in 'abc'")
+ check(unicode('d') not in 'abc', "u'd' in 'abc'")
+
+ try:
+ unicode('') in 'abc'
+ check(0, "u'' in 'abc' did not raise error")
+ except TypeError:
+ pass
+
+ try:
+ unicode('ab') in 'abc'
+ check(0, "u'ab' in 'abc' did not raise error")
+ except TypeError:
+ pass
# A collection of tests on builtin sequence types
a = range(10)
-from test_support import verbose
+from test_support import verbose, have_unicode
import sys
# test string formatting operator (I am not sure if this is being tested
def testboth(formatstr, *args):
testformat(formatstr, *args)
- testformat(unicode(formatstr), *args)
+ if have_unicode:
+ testformat(unicode(formatstr), *args)
testboth("%.1d", (1,), "1")
test_exc('abc %a', 1, ValueError,
"unsupported format character 'a' (0x61) at index 5")
-test_exc(u'abc %\u3000', 1, ValueError,
- "unsupported format character '?' (0x3000) at index 5")
+if have_unicode:
+ test_exc(unicode('abc %\u3000'), 1, ValueError,
+ "unsupported format character '?' (0x3000) at index 5")
# Test iterators.
import unittest
-from test_support import run_unittest, TESTFN, unlink
+from test_support import run_unittest, TESTFN, unlink, have_unicode
# Test result of triple loop (too big to inline)
TRIPLETS = [(0, 0, 0), (0, 0, 1), (0, 0, 2),
self.check_for_loop(iter("abcde"), ["a", "b", "c", "d", "e"])
# Test a Unicode string
- def test_iter_unicode(self):
- self.check_for_loop(iter(u"abcde"), [u"a", u"b", u"c", u"d", u"e"])
+ if have_unicode:
+ def test_iter_unicode(self):
+ self.check_for_loop(iter(unicode("abcde")),
+ [unicode("a"), unicode("b"), unicode("c"),
+ unicode("d"), unicode("e")])
# Test a directory
def test_iter_dict(self):
d = {"one": 1, "two": 2, "three": 3}
self.assertEqual(reduce(add, d), "".join(d.keys()))
+ # This test case will be removed if we don't have Unicode
def test_unicode_join_endcase(self):
# This class inserts a Unicode object into its argument's natural
i = self.i
self.i = i+1
if i == 2:
- return u"fooled you!"
+ return unicode("fooled you!")
return self.it.next()
f = open(TESTFN, "w")
# and pass that on to unicode.join().
try:
got = " - ".join(OhPhooey(f))
- self.assertEqual(got, u"a\n - b\n - fooled you! - c\n")
+ self.assertEqual(got, unicode("a\n - b\n - fooled you! - c\n"))
finally:
f.close()
try:
unlink(TESTFN)
except OSError:
pass
+ if not have_unicode:
+ def test_unicode_join_endcase(self): pass
# Test iterators with 'x in y' and 'x not in y'.
def test_in_and_not_in(self):
import test_support
+try:
+ uni = unicode
+except NameError:
+ def uni(x):return x
+
class QueryTestCase(unittest.TestCase):
def test_basic(self):
"""Verify .isrecursive() and .isreadable() w/o recursion."""
verify = self.assert_
- for safe in (2, 2.0, 2j, "abc", [3], (2,2), {3: 3}, u"yaddayadda",
+ for safe in (2, 2.0, 2j, "abc", [3], (2,2), {3: 3}, uni("yaddayadda"),
self.a, self.b):
verify(not pprint.isrecursive(safe),
"expected not isrecursive for " + `safe`)
def test_same_as_repr(self):
"Simple objects and small containers that should be same as repr()."
verify = self.assert_
- for simple in (0, 0L, 0+0j, 0.0, "", u"", (), [], {}, verify, pprint,
- -6, -6L, -6-6j, -1.5, "x", u"x", (3,), [3], {3: 6},
+ for simple in (0, 0L, 0+0j, 0.0, "", uni(""), (), [], {}, verify, pprint,
+ -6, -6L, -6-6j, -1.5, "x", uni("x"), (3,), [3], {3: 6},
(1,2), [3,4], {5: 6, 7: 8},
{"xy\tab\n": (3,), 5: [[]], (): {}},
range(10, -11, -1)
import sys
sys.path=['.']+sys.path
-from test_support import verbose, TestFailed
+from test_support import verbose, TestFailed, have_unicode
import sre
import sys, os, string, traceback
# Try the match with UNICODE locale enabled, and check
# that it still succeeds.
- obj=sre.compile(pattern, sre.UNICODE)
- result=obj.search(s)
- if result==None:
- print '=== Fails on unicode-sensitive match', t
+ if have_unicode:
+ obj=sre.compile(pattern, sre.UNICODE)
+ result=obj.search(s)
+ if result==None:
+ print '=== Fails on unicode-sensitive match', t
return cmp(len(x), len(y))
return cmp(x, y)
+try:
+ unicode
+ have_unicode = 1
+except NameError:
+ have_unicode = 0
+
import os
# Filename used for testing
if os.name == 'java':
elif os.name != 'riscos':
TESTFN = '@test'
# Unicode name only used if TEST_FN_ENCODING exists for the platform.
- TESTFN_UNICODE=u"@test-\xe0\xf2" # 2 latin characters.
- if os.name=="nt":
- TESTFN_ENCODING="mbcs"
+ if have_unicode:
+ TESTFN_UNICODE=unicode("@test-\xe0\xf2", "latin-1") # 2 latin characters.
+ if os.name=="nt":
+ TESTFN_ENCODING="mbcs"
else:
TESTFN = 'test'
del os
from _winreg import *
import os, sys
-from test_support import verify
+from test_support import verify, have_unicode
test_key_name = "SOFTWARE\\Python Registry Test Key - Delete Me"
test_data = [
("Int Value", 45, REG_DWORD),
("String Val", "A string value", REG_SZ,),
- (u"Unicode Val", u"A Unicode value", REG_SZ,),
("StringExpand", "The path is %path%", REG_EXPAND_SZ),
- ("UnicodeExpand", u"The path is %path%", REG_EXPAND_SZ),
("Multi-string", ["Lots", "of", "string", "values"], REG_MULTI_SZ),
- ("Multi-unicode", [u"Lots", u"of", u"unicode", u"values"], REG_MULTI_SZ),
- ("Multi-mixed", [u"Unicode", u"and", "string", "values"],REG_MULTI_SZ),
("Raw Data", ("binary"+chr(0)+"data"), REG_BINARY),
]
+if have_unicode:
+ test_data+=[
+ (unicode("Unicode Val"), unicode("A Unicode value"), REG_SZ,),
+ ("UnicodeExpand", unicode("The path is %path%"), REG_EXPAND_SZ),
+ ("Multi-unicode", [unicode("Lots"), unicode("of"), unicode("unicode"), unicode("values")], REG_MULTI_SZ),
+ ("Multi-mixed", [unicode("Unicode"), unicode("and"), "string", "values"],REG_MULTI_SZ),
+ ]
def WriteTestData(root_key):
# Set the default value for this key.
pass
StringType = str
-UnicodeType = unicode
+try:
+ UnicodeType = unicode
+ StringTypes = [StringType, UnicodeType]
+except NameError:
+ StringTypes = [StringType]
+
BufferType = type(buffer(''))
TupleType = tuple
DLINCLDIR= @DLINCLDIR@
DYNLOADFILE= @DYNLOADFILE@
MACHDEP_OBJS= @MACHDEP_OBJS@
+UNICODE_OBJS= @UNICODE_OBJS@
PYTHON= python$(EXE)
Objects/stringobject.o \
Objects/tupleobject.o \
Objects/typeobject.o \
- Objects/unicodeobject.o \
- Objects/unicodectype.o
+ $(UNICODE_OBJS)
##########################################################################
What's New in Python 2.2a2?
===========================
+Build
+
+- configure supports a new option --enable-unicode, with the values
+ ucs2 and ucs4 (new in 2.2a1). With --disable-unicode, the Unicode
+ type and supporting code is completely removed from the interpreter.
+
Tools
- The new Tools/scripts/cleanfuture.py can be used to automatically
sure to check the Unicode width compatibility in their extensions by
using at least one of the mangled Unicode APIs in the extension.
+- Two new flags METH_NOARGS and METH_O are available in method definition
+ tables to simplify implementation of methods with no arguments and a
+ single untyped argument. Calling such methods is more efficient than
+ calling corresponding METH_VARARGS methods. METH_OLDARGS is now
+ deprecated.
+
Windows
- "import module" now compiles module.pyw if it exists and nothing else
(These warnings currently don't conform to the warnings framework of
PEP 230; we intend to fix this in 2.2a2.)
-- Two new flags METH_NOARGS and METH_O are available in method definition
- tables to simplify implementation of methods with no arguments and a
- single untyped argument. Calling such methods is more efficient than
- calling corresponding METH_VARARGS methods. METH_OLDARGS is now
- deprecated.
-
- The UTF-16 codec was modified to be more RFC compliant. It will now
only remove BOM characters at the start of the string and then
only if running in native mode (UTF-16-LE and -BE won't remove a
return NULL;
}
+#ifdef Py_USING_UNICODE
/* --- Helpers ------------------------------------------------------------ */
static
}
#endif /* MS_WIN32 */
+#endif /* Py_USING_UNICODE */
/* --- Module API --------------------------------------------------------- */
static PyMethodDef _codecs_functions[] = {
{"register", codecregister, 1},
{"lookup", codeclookup, 1},
+#ifdef Py_USING_UNICODE
{"utf_8_encode", utf_8_encode, 1},
{"utf_8_decode", utf_8_decode, 1},
{"utf_16_encode", utf_16_encode, 1},
{"mbcs_encode", mbcs_encode, 1},
{"mbcs_decode", mbcs_decode, 1},
#endif
+#endif /* Py_USING_UNICODE */
{NULL, NULL} /* sentinel */
};
/* defining this one enables tracing */
#undef VERBOSE
-#if PY_VERSION_HEX >= 0x01060000
+#if PY_VERSION_HEX >= 0x01060000 && defined(Py_USING_UNICODE)
/* defining this enables unicode support (default under 1.6a1 and later) */
#define HAVE_UNICODE
#endif
{
if (PyString_Check(value))
return PyString_AsString(value);
+#ifdef Py_USING_UNICODE
else if (PyUnicode_Check(value)) {
PyObject *v = PyUnicode_AsUTF8String(value);
if (v == NULL)
Py_DECREF(v);
return PyString_AsString(v);
}
+#endif
else {
PyObject *v = PyObject_Str(value);
if (v == NULL)
ckfree(FREECAST argv);
return result;
}
+#ifdef Py_USING_UNICODE
else if (PyUnicode_Check(value)) {
#if TKMAJORMINOR <= 8001
/* In Tcl 8.1 we must use UTF-8 */
PyUnicode_GET_SIZE(value));
#endif /* TKMAJORMINOR > 8001 */
}
+#endif
else {
PyObject *v = PyObject_Str(value);
if (!v)
so would confuse applications that expect a string. */
char *s = Tcl_GetStringResult(interp);
char *p = s;
+
/* If the result contains any bytes with the top bit set,
it's UTF-8 and we should decode it to Unicode */
+#ifdef Py_USING_UNICODE
while (*p != '\0') {
if (*p & 0x80)
break;
p++;
}
+
if (*p == '\0')
res = PyString_FromStringAndSize(s, (int)(p-s));
else {
res = PyString_FromStringAndSize(s, (int)(p-s));
}
}
+#else
+ p = strchr(p, '\0');
+ res = PyString_FromStringAndSize(s, (int)(p-s));
+#endif
}
LEAVE_OVERLAP_TCL
}
+#ifdef Py_USING_UNICODE
/* A copy of PyUnicode_EncodeRawUnicodeEscape() that also translates
backslash and newline characters to \uXXXX escapes. */
static PyObject *
Py_XDECREF(repr);
return -1;
}
+#endif
static int
goto finally;
}
+#ifdef Py_USING_UNICODE
case 'u':
if ((type == &PyUnicode_Type) && (PyString_GET_SIZE(args) < 2)) {
res = save_unicode(self, args, 0);
goto finally;
}
+#endif
}
if (args->ob_refcnt > 1) {
}
break;
+#ifdef Py_USING_UNICODE
case 'u':
if (type == &PyUnicode_Type) {
res = save_unicode(self, args, 1);
goto finally;
}
break;
+#endif
case 't':
if (type == &PyTuple_Type) {
}
+#ifdef Py_USING_UNICODE
static int
load_unicode(Unpicklerobject *self) {
PyObject *str = 0;
finally:
return res;
}
+#endif
+#ifdef Py_USING_UNICODE
static int
load_binunicode(Unpicklerobject *self) {
PyObject *unicode;
PDATA_PUSH(self->stack, unicode, -1);
return 0;
}
+#endif
static int
break;
continue;
+#ifdef Py_USING_UNICODE
case UNICODE:
if (load_unicode(self) < 0)
break;
if (load_binunicode(self) < 0)
break;
continue;
+#endif
case EMPTY_TUPLE:
if (load_empty_tuple(self) < 0)
break;
continue;
+#ifdef Py_USING_UNICODE
case UNICODE:
if (load_unicode(self) < 0)
break;
if (load_binunicode(self) < 0)
break;
continue;
+#endif
case EMPTY_TUPLE:
if (load_empty_tuple(self) < 0)
#define Py_TPFLAGS_GC 0
#endif
+#if (PY_MAJOR_VERSION == 1 && PY_MINOR_VERSION > 5) || (PY_MAJOR_VERSION == 2 && PY_MINOR_VERSION < 2)
+/* In Python 1.6, 2.0 and 2.1, disabling Unicode was not possible. */
+#define Py_USING_UNICODE
+#endif
+
enum HandlerTypes {
StartElement,
EndElement,
}
#endif
-#if !(PY_MAJOR_VERSION == 1 && PY_MINOR_VERSION < 6)
+#ifdef Py_USING_UNICODE
#if EXPAT_VERSION == 0x010200
static PyObject *
conv_atts_using_unicode(XML_Char **atts)
return res;
}
-#if PY_MAJOR_VERSION == 1 && PY_MINOR_VERSION < 6
+#ifndef Py_USING_UNICODE
#define STRING_CONV_FUNC conv_string_to_utf8
#else
/* Python 1.6 and later versions */
const XML_Char *data),
("(O&O&)",STRING_CONV_FUNC,target, STRING_CONV_FUNC,data))
-#if PY_MAJOR_VERSION == 1 && PY_MINOR_VERSION < 6
+#ifndef Py_USING_UNICODE
VOID_HANDLER(CharacterData,
(void *userData, const XML_Char *data, int len),
("(N)", conv_string_len_to_utf8(data,len)))
STRING_CONV_FUNC,notationName))
#if EXPAT_VERSION >= 0x015f00
-#if PY_MAJOR_VERSION == 1 && PY_MINOR_VERSION < 6
+#ifndef Py_USING_UNICODE
VOID_HANDLER(EntityDecl,
(void *userData,
const XML_Char *entityName,
return conv_content_model(model, conv_string_to_utf8);
}
-#if !(PY_MAJOR_VERSION == 1 && PY_MINOR_VERSION < 6)
+#ifdef Py_USING_UNICODE
static PyObject *
conv_content_model_unicode(XML_Content * const model)
{
(void *userData),
("()"))
-#if PY_MAJOR_VERSION == 1 && PY_MINOR_VERSION < 6
+#ifndef Py_USING_UNICODE
VOID_HANDLER(Default,
(void *userData, const XML_Char *s, int len),
("(N)", conv_string_len_to_utf8(s,len)))
/* ---------- */
-#if !(PY_MAJOR_VERSION == 1 && PY_MINOR_VERSION < 6)
+#ifdef Py_USING_UNICODE
/*
pyexpat international encoding support.
return NULL;
}
XML_SetUserData(self->itself, (void *)self);
-#if PY_MAJOR_VERSION == 1 && PY_MINOR_VERSION < 6
-#else
+#ifdef Py_USING_UNICODE
XML_SetUnknownEncodingHandler(self->itself, (XML_UnknownEncodingHandler) PyUnknownEncodingHandler, NULL);
#endif
}
if (strcmp(name, "returns_unicode") == 0) {
if (PyObject_IsTrue(v)) {
-#if PY_MAJOR_VERSION == 1 && PY_MINOR_VERSION < 6
+#ifndef Py_USING_UNICODE
PyErr_SetString(PyExc_ValueError,
"Cannot return Unicode strings in Python 1.5");
return -1;
info.minor, info.micro));
}
#endif
-#if PY_MAJOR_VERSION == 1 && PY_MINOR_VERSION < 6
-#else
+#ifdef Py_USING_UNICODE
init_template_buffer();
#endif
/* XXX When Expat supports some way of figuring out how it was
{
if (PyString_Check(v))
return PyString_Format(v, w);
+#ifdef Py_USING_UNICODE
else if (PyUnicode_Check(v))
return PyUnicode_Format(v, w);
+#endif
return binary_op(v, w, NB_SLOT(nb_remainder), "%");
}
{
if (PyString_Check(v))
return PyString_Format(v, w);
+#ifdef Py_USING_UNICODE
else if (PyUnicode_Check(v))
return PyUnicode_Format(v, w);
+#endif
else
return binary_iop(v, w, NB_SLOT(nb_inplace_remainder),
NB_SLOT(nb_remainder), "%=");
if (PyString_Check(o))
return int_from_string(PyString_AS_STRING(o),
PyString_GET_SIZE(o));
+#ifdef Py_USING_UNICODE
if (PyUnicode_Check(o))
return PyInt_FromUnicode(PyUnicode_AS_UNICODE(o),
PyUnicode_GET_SIZE(o),
10);
+#endif
m = o->ob_type->tp_as_number;
if (m && m->nb_int)
return m->nb_int(o);
*/
return long_from_string(PyString_AS_STRING(o),
PyString_GET_SIZE(o));
+#ifdef Py_USING_UNICODE
if (PyUnicode_Check(o))
/* The above check is done in PyLong_FromUnicode(). */
return PyLong_FromUnicode(PyUnicode_AS_UNICODE(o),
PyUnicode_GET_SIZE(o),
10);
+#endif
m = o->ob_type->tp_as_number;
if (m && m->nb_long)
return m->nb_long(o);
int sw_error=0;
int sign;
char buffer[256]; /* For errors */
- char s_buffer[256];
int len;
if (PyString_Check(v)) {
s = PyString_AS_STRING(v);
len = PyString_GET_SIZE(v);
}
+#ifdef Py_USING_UNICODE
else if (PyUnicode_Check(v)) {
+ char s_buffer[256];
if (PyUnicode_GET_SIZE(v) >= sizeof(s_buffer)) {
PyErr_SetString(PyExc_ValueError,
"complex() literal too large to convert");
s = s_buffer;
len = (int)strlen(s);
}
+#endif
else if (PyObject_AsCharBuffer(v, &s, &len)) {
PyErr_SetString(PyExc_TypeError,
"complex() arg is not a string");
const char *s, *last, *end;
double x;
char buffer[256]; /* for errors */
+#ifdef Py_USING_UNICODE
char s_buffer[256]; /* for objects convertible to a char buffer */
+#endif
int len;
if (pend)
s = PyString_AS_STRING(v);
len = PyString_GET_SIZE(v);
}
+#ifdef Py_USING_UNICODE
else if (PyUnicode_Check(v)) {
if (PyUnicode_GET_SIZE(v) >= sizeof(s_buffer)) {
PyErr_SetString(PyExc_ValueError,
s = s_buffer;
len = (int)strlen(s);
}
+#endif
else if (PyObject_AsCharBuffer(v, &s, &len)) {
PyErr_SetString(PyExc_TypeError,
"float() needs a string argument");
return PyInt_FromLong(x);
}
+#ifdef Py_USING_UNICODE
PyObject *
PyInt_FromUnicode(Py_UNICODE *s, int length, int base)
{
return NULL;
return PyInt_FromString(buffer, NULL, base);
}
+#endif
/* Methods */
return PyNumber_Int(x);
if (PyString_Check(x))
return PyInt_FromString(PyString_AS_STRING(x), NULL, base);
+#ifdef Py_USING_UNICODE
if (PyUnicode_Check(x))
return PyInt_FromUnicode(PyUnicode_AS_UNICODE(x),
PyUnicode_GET_SIZE(x),
base);
+#endif
PyErr_SetString(PyExc_TypeError,
"int() can't convert non-string with explicit base");
return NULL;
return NULL;
}
+#ifdef Py_USING_UNICODE
PyObject *
PyLong_FromUnicode(Py_UNICODE *u, int length, int base)
{
return PyLong_FromString(buffer, NULL, base);
}
+#endif
/* forward */
static PyLongObject *x_divrem
return PyNumber_Long(x);
else if (PyString_Check(x))
return PyLong_FromString(PyString_AS_STRING(x), NULL, base);
+#ifdef Py_USING_UNICODE
else if (PyUnicode_Check(x))
return PyLong_FromUnicode(PyUnicode_AS_UNICODE(x),
PyUnicode_GET_SIZE(x),
base);
+#endif
else {
PyErr_SetString(PyExc_TypeError,
"long() can't convert non-string with explicit base");
res = (*v->ob_type->tp_repr)(v);
if (res == NULL)
return NULL;
+#ifdef Py_USING_UNICODE
if (PyUnicode_Check(res)) {
PyObject* str;
str = PyUnicode_AsUnicodeEscapeString(res);
else
return NULL;
}
+#endif
if (!PyString_Check(res)) {
PyErr_Format(PyExc_TypeError,
"__repr__ returned non-string (type %.200s)",
res = (*v->ob_type->tp_str)(v);
if (res == NULL)
return NULL;
+#ifdef Py_USING_UNICODE
if (PyUnicode_Check(res)) {
PyObject* str;
str = PyUnicode_AsEncodedString(res, NULL, NULL);
else
return NULL;
}
+#endif
if (!PyString_Check(res)) {
PyErr_Format(PyExc_TypeError,
"__str__ returned non-string (type %.200s)",
return res;
}
+#ifdef Py_USING_UNICODE
PyObject *
PyObject_Unicode(PyObject *v)
{
}
return res;
}
+#endif
/* Macro to get the tp_richcompare field of a type if defined */
return (vv < ww) ? -1 : (vv > ww) ? 1 : 0;
}
+#ifdef Py_USING_UNICODE
/* Special case for Unicode */
if (PyUnicode_Check(v) || PyUnicode_Check(w)) {
c = PyUnicode_Compare(v, w);
return -2;
PyErr_Clear();
}
+#endif
/* None is smaller than anything */
if (v == Py_None)
{
PyTypeObject *tp = v->ob_type;
+#ifdef Py_USING_UNICODE
/* The Unicode to string conversion is done here because the
existing tp_getattro slots expect a string object as name
and we wouldn't want to break those. */
if (name == NULL)
return NULL;
}
+#endif
+
if (!PyString_Check(name)) {
PyErr_SetString(PyExc_TypeError,
"attribute name must be string");
PyTypeObject *tp = v->ob_type;
int err;
+#ifdef Py_USING_UNICODE
/* The Unicode to string conversion is done here because the
existing tp_setattro slots expect a string object as name
and we wouldn't want to break those. */
if (name == NULL)
return -1;
}
- else if (!PyString_Check(name)){
+ else
+#endif
+ if (!PyString_Check(name)){
PyErr_SetString(PyExc_TypeError,
"attribute name must be string");
return -1;
goto onError;
}
- if (encoding == NULL)
+ if (encoding == NULL) {
+#ifdef Py_USING_UNICODE
encoding = PyUnicode_GetDefaultEncoding();
+#else
+ PyErr_SetString(PyExc_ValueError, "no encoding specified");
+ goto onError;
+#endif
+ }
/* Decode via the codec registry */
v = PyCodec_Decode(str, encoding, errors);
if (v == NULL)
goto onError;
+#ifdef Py_USING_UNICODE
/* Convert Unicode to a string using the default encoding */
if (PyUnicode_Check(v)) {
PyObject *temp = v;
if (v == NULL)
goto onError;
}
+#endif
if (!PyString_Check(v)) {
PyErr_Format(PyExc_TypeError,
"decoder did not return a string object (type=%.400s)",
goto onError;
}
- if (encoding == NULL)
+ if (encoding == NULL) {
+#ifdef Py_USING_UNICODE
encoding = PyUnicode_GetDefaultEncoding();
+#else
+ PyErr_SetString(PyExc_ValueError, "no encoding specified");
+ goto onError;
+#endif
+ }
/* Encode via the codec registry */
v = PyCodec_Encode(str, encoding, errors);
if (v == NULL)
goto onError;
+#ifdef Py_USING_UNICODE
/* Convert Unicode to a string using the default encoding */
if (PyUnicode_Check(v)) {
PyObject *temp = v;
if (v == NULL)
goto onError;
}
+#endif
if (!PyString_Check(v)) {
PyErr_Format(PyExc_TypeError,
"encoder did not return a string object (type=%.400s)",
}
if (!PyString_Check(obj)) {
+#ifdef Py_USING_UNICODE
if (PyUnicode_Check(obj)) {
obj = _PyUnicode_AsDefaultEncodedString(obj, NULL);
if (obj == NULL)
return -1;
}
- else {
+ else
+#endif
+ {
PyErr_Format(PyExc_TypeError,
"expected string or Unicode object, "
"%.200s found", obj->ob_type->tp_name);
register unsigned int size;
register PyStringObject *op;
if (!PyString_Check(bb)) {
+#ifdef Py_USING_UNICODE
if (PyUnicode_Check(bb))
return PyUnicode_Concat((PyObject *)a, bb);
+#endif
PyErr_Format(PyExc_TypeError,
"cannot add type \"%.200s\" to string",
bb->ob_type->tp_name);
{
register char *s, *end;
register char c;
+#ifdef Py_USING_UNICODE
if (PyUnicode_Check(el))
return PyUnicode_Contains(a, el);
+#endif
if (!PyString_Check(el) || PyString_Size(el) != 1) {
PyErr_SetString(PyExc_TypeError,
"'in <string>' requires character as left operand");
sub = PyString_AS_STRING(subobj);
n = PyString_GET_SIZE(subobj);
}
+#ifdef Py_USING_UNICODE
else if (PyUnicode_Check(subobj))
return PyUnicode_Split((PyObject *)self, subobj, maxsplit);
+#endif
else if (PyObject_AsCharBuffer(subobj, &sub, &n))
return NULL;
if (n == 0) {
const size_t old_sz = sz;
item = PySequence_Fast_GET_ITEM(seq, i);
if (!PyString_Check(item)){
+#ifdef Py_USING_UNICODE
if (PyUnicode_Check(item)) {
/* Defer to Unicode join.
* CAUTION: There's no gurantee that the
Py_DECREF(seq);
return result;
}
+#endif
PyErr_Format(PyExc_TypeError,
"sequence item %i: expected string,"
" %.80s found",
sub = PyString_AS_STRING(subobj);
n = PyString_GET_SIZE(subobj);
}
+#ifdef Py_USING_UNICODE
else if (PyUnicode_Check(subobj))
return PyUnicode_Find((PyObject *)self, subobj, i, last, 1);
+#endif
else if (PyObject_AsCharBuffer(subobj, &sub, &n))
return -2;
sub = PyString_AS_STRING(subobj);
n = PyString_GET_SIZE(subobj);
}
+#ifdef Py_USING_UNICODE
else if (PyUnicode_Check(subobj)) {
int count;
count = PyUnicode_Count((PyObject *)self, subobj, i, last);
else
return PyInt_FromLong((long) count);
}
+#endif
else if (PyObject_AsCharBuffer(subobj, &sub, &n))
return NULL;
table1 = PyString_AS_STRING(tableobj);
tablen = PyString_GET_SIZE(tableobj);
}
+#ifdef Py_USING_UNICODE
else if (PyUnicode_Check(tableobj)) {
/* Unicode .translate() does not support the deletechars
parameter; instead a mapping to None will cause characters
}
return PyUnicode_Translate((PyObject *)self, tableobj, NULL);
}
+#endif
else if (PyObject_AsCharBuffer(tableobj, &table1, &tablen))
return NULL;
del_table = PyString_AS_STRING(delobj);
dellen = PyString_GET_SIZE(delobj);
}
+#ifdef Py_USING_UNICODE
else if (PyUnicode_Check(delobj)) {
PyErr_SetString(PyExc_TypeError,
"deletions are implemented differently for unicode");
return NULL;
}
+#endif
else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))
return NULL;
sub = PyString_AS_STRING(subobj);
sub_len = PyString_GET_SIZE(subobj);
}
+#ifdef Py_USING_UNICODE
else if (PyUnicode_Check(subobj))
return PyUnicode_Replace((PyObject *)self,
subobj, replobj, count);
+#endif
else if (PyObject_AsCharBuffer(subobj, &sub, &sub_len))
return NULL;
repl = PyString_AS_STRING(replobj);
repl_len = PyString_GET_SIZE(replobj);
}
+#ifdef Py_USING_UNICODE
else if (PyUnicode_Check(replobj))
return PyUnicode_Replace((PyObject *)self,
subobj, replobj, count);
+#endif
else if (PyObject_AsCharBuffer(replobj, &repl, &repl_len))
return NULL;
prefix = PyString_AS_STRING(subobj);
plen = PyString_GET_SIZE(subobj);
}
+#ifdef Py_USING_UNICODE
else if (PyUnicode_Check(subobj)) {
int rc;
rc = PyUnicode_Tailmatch((PyObject *)self,
else
return PyInt_FromLong((long) rc);
}
+#endif
else if (PyObject_AsCharBuffer(subobj, &prefix, &plen))
return NULL;
suffix = PyString_AS_STRING(subobj);
slen = PyString_GET_SIZE(subobj);
}
+#ifdef Py_USING_UNICODE
else if (PyUnicode_Check(subobj)) {
int rc;
rc = PyUnicode_Tailmatch((PyObject *)self,
else
return PyInt_FromLong((long) rc);
}
+#endif
else if (PyObject_AsCharBuffer(subobj, &suffix, &slen))
return NULL;
char *fmt, *res;
int fmtcnt, rescnt, reslen, arglen, argidx;
int args_owned = 0;
- PyObject *result, *orig_args, *v, *w;
+ PyObject *result, *orig_args;
+#ifdef Py_USING_UNICODE
+ PyObject *v, *w;
+#endif
PyObject *dict = NULL;
if (format == NULL || !PyString_Check(format) || args == NULL) {
PyErr_BadInternalCall();
int sign;
int len;
char formatbuf[FORMATBUFLEN]; /* For format{float,int,char}() */
+#ifdef Py_USING_UNICODE
char *fmt_start = fmt;
int argidx_start = argidx;
+#endif
fmt++;
if (*fmt == '(') {
break;
case 's':
case 'r':
+#ifdef Py_USING_UNICODE
if (PyUnicode_Check(v)) {
fmt = fmt_start;
argidx = argidx_start;
goto unicode;
}
+#endif
if (c == 's')
temp = PyObject_Str(v);
else
_PyString_Resize(&result, reslen - rescnt);
return result;
+#ifdef Py_USING_UNICODE
unicode:
if (args_owned) {
Py_DECREF(args);
Py_DECREF(v);
Py_DECREF(args);
return w;
+#endif /* Py_USING_UNICODE */
error:
Py_DECREF(result);
Return a string of one character with ordinal i; 0 <= i < 256.";
+#ifdef Py_USING_UNICODE
static PyObject *
builtin_unichr(PyObject *self, PyObject *args)
{
"unichr(i) -> Unicode character\n\
\n\
Return a Unicode string of one character with ordinal i; 0 <= i <= 0x10ffff.";
+#endif
static PyObject *
if (!PyArg_ParseTuple(args, "OO|O:getattr", &v, &name, &dflt))
return NULL;
+#ifdef Py_USING_UNICODE
if (PyUnicode_Check(name)) {
name = _PyUnicode_AsDefaultEncodedString(name, NULL);
if (name == NULL)
return NULL;
}
+#endif
if (!PyString_Check(name)) {
PyErr_SetString(PyExc_TypeError,
if (!PyArg_ParseTuple(args, "OO:hasattr", &v, &name))
return NULL;
+#ifdef Py_USING_UNICODE
if (PyUnicode_Check(name)) {
name = _PyUnicode_AsDefaultEncodedString(name, NULL);
if (name == NULL)
return NULL;
}
+#endif
if (!PyString_Check(name)) {
PyErr_SetString(PyExc_TypeError,
ord = (long)((unsigned char)*PyString_AS_STRING(obj));
return PyInt_FromLong(ord);
}
+#ifdef Py_USING_UNICODE
} else if (PyUnicode_Check(obj)) {
size = PyUnicode_GET_SIZE(obj);
if (size == 1) {
ord = (long)*PyUnicode_AS_UNICODE(obj);
return PyInt_FromLong(ord);
}
+#endif
} else {
PyErr_Format(PyExc_TypeError,
"ord() expected string of length 1, but " \
{"round", builtin_round, METH_VARARGS, round_doc},
{"setattr", builtin_setattr, METH_VARARGS, setattr_doc},
{"slice", builtin_slice, METH_VARARGS, slice_doc},
+#ifdef Py_USING_UNICODE
{"unichr", builtin_unichr, METH_VARARGS, unichr_doc},
+#endif
{"vars", builtin_vars, METH_VARARGS, vars_doc},
{"xrange", builtin_xrange, METH_VARARGS, xrange_doc},
{"zip", builtin_zip, METH_VARARGS, zip_doc},
return NULL;
if (PyDict_SetItemString(dict, "type", (PyObject *) &PyType_Type) < 0)
return NULL;
+#ifdef Py_USING_UNICODE
if (PyDict_SetItemString(dict, "unicode",
(PyObject *) &PyUnicode_Type) < 0)
return NULL;
+#endif
debug = PyInt_FromLong(Py_OptimizeFlag == 0);
if (PyDict_SetItemString(dict, "__debug__", debug) < 0) {
Py_XDECREF(debug);
static PyCodeObject *icompile(node *, struct compiling *);
static PyCodeObject *jcompile(node *, char *, struct compiling *,
PyCompilerFlags *);
-static PyObject *parsestrplus(node *);
-static PyObject *parsestr(char *);
+static PyObject *parsestrplus(struct compiling*, node *);
+static PyObject *parsestr(struct compiling *, char *);
static node *get_rawdocstring(node *);
static int get_ref_type(struct compiling *, char *);
}
static PyObject *
-parsestr(char *s)
+parsestr(struct compiling *com, char *s)
{
PyObject *v;
size_t len;
int first = *s;
int quote = first;
int rawmode = 0;
+#ifdef Py_USING_UNICODE
int unicode = 0;
+#endif
if (isalpha(quote) || quote == '_') {
if (quote == 'u' || quote == 'U') {
+#ifdef Py_USING_UNICODE
quote = *++s;
unicode = 1;
+#else
+ com_error(com, PyExc_SyntaxError,
+ "Unicode literals not supported in this Python");
+ return NULL;
+#endif
}
if (quote == 'r' || quote == 'R') {
quote = *++s;
return NULL;
}
}
+#ifdef Py_USING_UNICODE
if (unicode || Py_UnicodeFlag) {
if (rawmode)
return PyUnicode_DecodeRawUnicodeEscape(
return PyUnicode_DecodeUnicodeEscape(
s, len, NULL);
}
+#endif
if (rawmode || strchr(s, '\\') == NULL)
return PyString_FromStringAndSize(s, len);
v = PyString_FromStringAndSize((char *)NULL, len);
}
static PyObject *
-parsestrplus(node *n)
+parsestrplus(struct compiling* c, node *n)
{
PyObject *v;
int i;
REQ(CHILD(n, 0), STRING);
- if ((v = parsestr(STR(CHILD(n, 0)))) != NULL) {
+ if ((v = parsestr(c, STR(CHILD(n, 0)))) != NULL) {
/* String literal concatenation */
for (i = 1; i < NCH(n); i++) {
PyObject *s;
- s = parsestr(STR(CHILD(n, i)));
+ s = parsestr(c, STR(CHILD(n, i)));
if (s == NULL)
goto onError;
if (PyString_Check(v) && PyString_Check(s)) {
if (v == NULL)
goto onError;
}
+#ifdef Py_USING_UNICODE
else {
PyObject *temp;
temp = PyUnicode_Concat(v, s);
Py_DECREF(v);
v = temp;
}
+#endif
}
}
return v;
com_push(c, 1);
break;
case STRING:
- v = parsestrplus(n);
+ v = parsestrplus(c, n);
if (v == NULL) {
c->c_errors++;
i = 255;
return i == 0;
case STRING:
- v = parsestr(STR(n));
+ v = parsestr(c, STR(n));
if (v == NULL) {
PyErr_Clear();
break;
}
static PyObject *
-get_docstring(node *n)
+get_docstring(struct compiling *c, node *n)
{
/* Don't generate doc-strings if run with -OO */
if (Py_OptimizeFlag > 1)
n = get_rawdocstring(n);
if (n == NULL)
return NULL;
- return parsestrplus(n);
+ return parsestrplus(c, n);
}
static void
int i;
PyObject *doc;
REQ(n, file_input); /* (NEWLINE | stmt)* ENDMARKER */
- doc = get_docstring(n);
+ doc = get_docstring(c, n);
if (doc != NULL) {
int i = com_addconst(c, doc);
Py_DECREF(doc);
node *ch;
REQ(n, funcdef); /* funcdef: 'def' NAME parameters ':' suite */
c->c_name = STR(CHILD(n, 1));
- doc = get_docstring(CHILD(n, 4));
+ doc = get_docstring(c, CHILD(n, 4));
if (doc != NULL) {
(void) com_addconst(c, doc);
Py_DECREF(doc);
c->c_name = STR(CHILD(n, 1));
c->c_private = c->c_name;
ch = CHILD(n, NCH(n)-1); /* The suite */
- doc = get_docstring(ch);
+ doc = get_docstring(c, ch);
if (doc != NULL) {
int i = com_addconst(c, doc);
Py_DECREF(doc);
*p = PyString_AS_STRING(arg);
*q = PyString_GET_SIZE(arg);
}
+#ifdef Py_USING_UNICODE
else if (PyUnicode_Check(arg)) {
arg = UNICODE_DEFAULT_ENCODING(arg);
if (arg == NULL)
*p = PyString_AS_STRING(arg);
*q = PyString_GET_SIZE(arg);
}
+#endif
else { /* any buffer-like object */
char *buf;
int count = convertbuffer(arg, p, &buf);
if (PyString_Check(arg))
*p = PyString_AS_STRING(arg);
+#ifdef Py_USING_UNICODE
else if (PyUnicode_Check(arg)) {
arg = UNICODE_DEFAULT_ENCODING(arg);
if (arg == NULL)
arg, msgbuf);
*p = PyString_AS_STRING(arg);
}
+#endif
else
return converterr("string", arg, msgbuf);
if ((int)strlen(*p) != PyString_Size(arg))
*p = PyString_AS_STRING(arg);
*q = PyString_GET_SIZE(arg);
}
+#ifdef Py_USING_UNICODE
else if (PyUnicode_Check(arg)) {
arg = UNICODE_DEFAULT_ENCODING(arg);
if (arg == NULL)
*p = PyString_AS_STRING(arg);
*q = PyString_GET_SIZE(arg);
}
+#endif
else { /* any buffer-like object */
char *buf;
int count = convertbuffer(arg, p, &buf);
*p = 0;
else if (PyString_Check(arg))
*p = PyString_AsString(arg);
+#ifdef Py_USING_UNICODE
else if (PyUnicode_Check(arg)) {
arg = UNICODE_DEFAULT_ENCODING(arg);
if (arg == NULL)
arg, msgbuf);
*p = PyString_AS_STRING(arg);
}
+#endif
else
return converterr("string or None",
arg, msgbuf);
case 'e': {/* encoded string */
char **buffer;
const char *encoding;
- PyObject *u, *s;
+ PyObject *s;
int size, recode_strings;
/* Get 'e' parameter: the encoding name */
encoding = (const char *)va_arg(*p_va, const char *);
+#ifdef Py_USING_UNICODE
if (encoding == NULL)
encoding = PyUnicode_GetDefaultEncoding();
+#endif
/* Get output buffer parameter:
's' (recode all objects via Unicode) or
Py_INCREF(s);
}
else {
+#ifdef Py_USING_UNICODE
+ PyObject *u;
+
/* Convert object to Unicode */
u = PyUnicode_FromObject(arg);
if (u == NULL)
"(encoder failed to return a string)",
arg, msgbuf);
}
+#else
+ return converterr("string<e>", arg, msgbuf);
+#endif
}
size = PyString_GET_SIZE(s);
break;
}
+#ifdef Py_USING_UNICODE
case 'u': {/* raw unicode buffer (Py_UNICODE *) */
if (*format == '#') { /* any buffer-like object */
void **p = (void **)va_arg(*p_va, char **);
}
break;
}
+#endif
case 'S': { /* string object */
PyObject **p = va_arg(*p_va, PyObject **);
break;
}
+#ifdef Py_USING_UNICODE
case 'U': { /* Unicode object */
PyObject **p = va_arg(*p_va, PyObject **);
if (PyUnicode_Check(arg))
return converterr("unicode", arg, msgbuf);
break;
}
+#endif
case 'O': { /* object */
PyTypeObject *type;
w_long((long)n, p);
w_string(PyString_AS_STRING(v), n, p);
}
+#ifdef Py_USING_UNICODE
else if (PyUnicode_Check(v)) {
PyObject *utf8;
utf8 = PyUnicode_AsUTF8String(v);
w_string(PyString_AS_STRING(utf8), n, p);
Py_DECREF(utf8);
}
+#endif
else if (PyTuple_Check(v)) {
w_byte(TYPE_TUPLE, p);
n = PyTuple_Size(v);
}
return v;
+#ifdef Py_USING_UNICODE
case TYPE_UNICODE:
{
char *buffer;
PyMem_DEL(buffer);
return v;
}
+#endif
case TYPE_TUPLE:
n = r_long(p);
return v;
}
+#ifdef Py_USING_UNICODE
static int
_ustrlen(Py_UNICODE *u)
{
while (*v != 0) { i++; v++; }
return i;
}
+#endif
static PyObject *
do_mktuple(char **p_format, va_list *p_va, int endchar, int n)
case 'L':
return PyLong_FromLongLong((LONG_LONG)va_arg(*p_va, LONG_LONG));
#endif
+#ifdef Py_USING_UNICODE
case 'u':
{
PyObject *v;
}
return v;
}
+#endif
case 'f':
case 'd':
return PyFloat_FromDouble(
/* Init codec registry */
_PyCodecRegistry_Init();
+#ifdef Py_USING_UNICODE
/* Init Unicode implementation; relies on the codec registry */
_PyUnicode_Init();
+#endif
bimod = _PyBuiltin_Init();
if (bimod == NULL)
/* Disable signal handling */
PyOS_FiniInterrupts();
+#ifdef Py_USING_UNICODE
/* Cleanup Unicode implementation */
_PyUnicode_Fini();
+#endif
/* Cleanup Codec registry */
_PyCodecRegistry_Fini();
Return the current default string encoding used by the Unicode \n\
implementation.";
+#ifdef Py_USING_UNICODE
+
static PyObject *
sys_setdefaultencoding(PyObject *self, PyObject *args)
{
\n\
Set the current default string encoding used by the Unicode implementation.";
+#endif
+
/*
* Cached interned string objects used for calling the profile and
* trace functions. Initialized by trace_init().
{"exc_info", (PyCFunction)sys_exc_info, METH_NOARGS, exc_info_doc},
{"excepthook", sys_excepthook, METH_VARARGS, excepthook_doc},
{"exit", sys_exit, METH_OLDARGS, exit_doc},
+#ifdef Py_USING_UNICODE
{"getdefaultencoding", (PyCFunction)sys_getdefaultencoding, METH_NOARGS,
getdefaultencoding_doc},
+#endif
#ifdef HAVE_DLOPEN
{"getdlopenflags", (PyCFunction)sys_getdlopenflags, METH_NOARGS,
getdlopenflags_doc},
#ifdef USE_MALLOPT
{"mdebug", sys_mdebug, METH_VARARGS},
#endif
+#ifdef Py_USING_UNICODE
{"setdefaultencoding", sys_setdefaultencoding, METH_VARARGS,
setdefaultencoding_doc},
+#endif
{"setcheckinterval", sys_setcheckinterval, METH_VARARGS,
setcheckinterval_doc},
#ifdef HAVE_DLOPEN
PyDict_SetItemString(sysdict, "maxint",
v = PyInt_FromLong(PyInt_GetMax()));
Py_XDECREF(v);
+#ifdef Py_USING_UNICODE
PyDict_SetItemString(sysdict, "maxunicode",
v = PyInt_FromLong(PyUnicode_GetMax()));
Py_XDECREF(v);
+#endif
PyDict_SetItemString(sysdict, "builtin_module_names",
v = list_builtin_module_names());
Py_XDECREF(v);
;;
esac
+
if test "$enable_unicode" = "no"
then
+ UNICODE_OBJS=""
echo "$ac_t""not used" 1>&6
else
+ UNICODE_OBJS="Objects/unicodeobject.o Objects/unicodectype.o"
cat >> confdefs.h <<\EOF
#define Py_USING_UNICODE 1
EOF
# check for endianness
echo $ac_n "checking whether byte ordering is bigendian""... $ac_c" 1>&6
-echo "configure:6738: checking whether byte ordering is bigendian" >&5
+echo "configure:6741: checking whether byte ordering is bigendian" >&5
if eval "test \"`echo '$''{'ac_cv_c_bigendian'+set}'`\" = set"; then
echo $ac_n "(cached) $ac_c" 1>&6
else
ac_cv_c_bigendian=unknown
# See if sys/param.h defines the BYTE_ORDER macro.
cat > conftest.$ac_ext <<EOF
-#line 6745 "configure"
+#line 6748 "configure"
#include "confdefs.h"
#include <sys/types.h>
#include <sys/param.h>
#endif
; return 0; }
EOF
-if { (eval echo configure:6756: \"$ac_compile\") 1>&5; (eval $ac_compile) 2>&5; }; then
+if { (eval echo configure:6759: \"$ac_compile\") 1>&5; (eval $ac_compile) 2>&5; }; then
rm -rf conftest*
# It does; now see whether it defined to BIG_ENDIAN or not.
cat > conftest.$ac_ext <<EOF
-#line 6760 "configure"
+#line 6763 "configure"
#include "confdefs.h"
#include <sys/types.h>
#include <sys/param.h>
#endif
; return 0; }
EOF
-if { (eval echo configure:6771: \"$ac_compile\") 1>&5; (eval $ac_compile) 2>&5; }; then
+if { (eval echo configure:6774: \"$ac_compile\") 1>&5; (eval $ac_compile) 2>&5; }; then
rm -rf conftest*
ac_cv_c_bigendian=yes
else
{ echo "configure: error: can not run test program while cross compiling" 1>&2; exit 1; }
else
cat > conftest.$ac_ext <<EOF
-#line 6791 "configure"
+#line 6794 "configure"
#include "confdefs.h"
main () {
/* Are we little or big endian? From Harbison&Steele. */
exit (u.c[sizeof (long) - 1] == 1);
}
EOF
-if { (eval echo configure:6804: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest${ac_exeext} && (./conftest; exit) 2>/dev/null
+if { (eval echo configure:6807: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest${ac_exeext} && (./conftest; exit) 2>/dev/null
then
ac_cv_c_bigendian=no
else
# Check whether right shifting a negative integer extends the sign bit
# or fills with zeros (like the Cray J90, according to Tim Peters).
echo $ac_n "checking whether right shift extends the sign bit""... $ac_c" 1>&6
-echo "configure:6831: checking whether right shift extends the sign bit" >&5
+echo "configure:6834: checking whether right shift extends the sign bit" >&5
if eval "test \"`echo '$''{'ac_cv_rshift_extends_sign'+set}'`\" = set"; then
echo $ac_n "(cached) $ac_c" 1>&6
else
{ echo "configure: error: can not run test program while cross compiling" 1>&2; exit 1; }
else
cat > conftest.$ac_ext <<EOF
-#line 6840 "configure"
+#line 6843 "configure"
#include "confdefs.h"
int main()
}
EOF
-if { (eval echo configure:6849: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest${ac_exeext} && (./conftest; exit) 2>/dev/null
+if { (eval echo configure:6852: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest${ac_exeext} && (./conftest; exit) 2>/dev/null
then
ac_cv_rshift_extends_sign=yes
else
# check for getc_unlocked and related locking functions
echo $ac_n "checking for getc_unlocked() and friends""... $ac_c" 1>&6
-echo "configure:6874: checking for getc_unlocked() and friends" >&5
+echo "configure:6877: checking for getc_unlocked() and friends" >&5
if eval "test \"`echo '$''{'ac_cv_have_getc_unlocked'+set}'`\" = set"; then
echo $ac_n "(cached) $ac_c" 1>&6
else
cat > conftest.$ac_ext <<EOF
-#line 6880 "configure"
+#line 6883 "configure"
#include "confdefs.h"
#include <stdio.h>
int main() {
; return 0; }
EOF
-if { (eval echo configure:6892: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest${ac_exeext}; then
+if { (eval echo configure:6895: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest${ac_exeext}; then
rm -rf conftest*
ac_cv_have_getc_unlocked=yes
else
# check for readline 4.2
echo $ac_n "checking for rl_completion_matches in -lreadline""... $ac_c" 1>&6
-echo "configure:6915: checking for rl_completion_matches in -lreadline" >&5
+echo "configure:6918: checking for rl_completion_matches in -lreadline" >&5
ac_lib_var=`echo readline'_'rl_completion_matches | sed 'y%./+-%__p_%'`
if eval "test \"`echo '$''{'ac_cv_lib_$ac_lib_var'+set}'`\" = set"; then
echo $ac_n "(cached) $ac_c" 1>&6
ac_save_LIBS="$LIBS"
LIBS="-lreadline -ltermcap $LIBS"
cat > conftest.$ac_ext <<EOF
-#line 6923 "configure"
+#line 6926 "configure"
#include "confdefs.h"
/* Override any gcc2 internal prototype to avoid an error. */
/* We use char because int might match the return type of a gcc2
rl_completion_matches()
; return 0; }
EOF
-if { (eval echo configure:6934: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest${ac_exeext}; then
+if { (eval echo configure:6937: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest${ac_exeext}; then
rm -rf conftest*
eval "ac_cv_lib_$ac_lib_var=yes"
else
echo $ac_n "checking for broken nice()""... $ac_c" 1>&6
-echo "configure:6959: checking for broken nice()" >&5
+echo "configure:6962: checking for broken nice()" >&5
if eval "test \"`echo '$''{'ac_cv_broken_nice'+set}'`\" = set"; then
echo $ac_n "(cached) $ac_c" 1>&6
else
{ echo "configure: error: can not run test program while cross compiling" 1>&2; exit 1; }
else
cat > conftest.$ac_ext <<EOF
-#line 6968 "configure"
+#line 6971 "configure"
#include "confdefs.h"
int main()
}
EOF
-if { (eval echo configure:6980: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest${ac_exeext} && (./conftest; exit) 2>/dev/null
+if { (eval echo configure:6983: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest${ac_exeext} && (./conftest; exit) 2>/dev/null
then
ac_cv_broken_nice=yes
else
#endif
EOF
echo $ac_n "checking for socklen_t""... $ac_c" 1>&6
-echo "configure:7011: checking for socklen_t" >&5
+echo "configure:7014: checking for socklen_t" >&5
if eval "test \"`echo '$''{'ac_cv_type_socklen_t'+set}'`\" = set"; then
echo $ac_n "(cached) $ac_c" 1>&6
else
cat > conftest.$ac_ext <<EOF
-#line 7016 "configure"
+#line 7019 "configure"
#include "confdefs.h"
#include <sys/types.h>
#if STDC_HEADERS
SRCDIRS="Parser Grammar Objects Python Modules"
echo $ac_n "checking for build directories""... $ac_c" 1>&6
-echo "configure:7065: checking for build directories" >&5
+echo "configure:7068: checking for build directories" >&5
for dir in $SRCDIRS; do
if test ! -d $dir; then
mkdir $dir
s%@HAVE_GETHOSTBYNAME@%$HAVE_GETHOSTBYNAME%g
s%@LIBM@%$LIBM%g
s%@LIBC@%$LIBC%g
+s%@UNICODE_OBJS@%$UNICODE_OBJS%g
s%@SRCDIRS@%$SRCDIRS%g
CEOF
;;
esac
+AC_SUBST(UNICODE_OBJS)
if test "$enable_unicode" = "no"
then
+ UNICODE_OBJS=""
AC_MSG_RESULT(not used)
else
+ UNICODE_OBJS="Objects/unicodeobject.o Objects/unicodectype.o"
AC_DEFINE(Py_USING_UNICODE)
if test "$unicode_size" = "$ac_cv_sizeof_wchar_t"
then
if '/usr/local/include' not in self.compiler.include_dirs:
self.compiler.include_dirs.insert(0, '/usr/local/include' )
+ try:
+ have_unicode = unicode
+ except NameError:
+ have_unicode = 0
+
# lib_dirs and inc_dirs are used to search for files;
# if a file is found in one of those directories, it can
# be assumed that no additional -I,-L directives are needed.
# Python C API test module
exts.append( Extension('_testcapi', ['_testcapimodule.c']) )
# static Unicode character database
- exts.append( Extension('unicodedata', ['unicodedata.c']) )
+ if have_unicode:
+ exts.append( Extension('unicodedata', ['unicodedata.c']) )
# access to ISO C locale support
exts.append( Extension('_locale', ['_localemodule.c']) )