# XXX TypeErrors on calling handlers, or on bad return values from a
# handler, are obscure and unhelpful.
-import StringIO
+from io import BytesIO
import unittest
import pyexpat
[0, 0],
]
- def test_returns_unicode(self):
- for x, y in self.set_get_pairs:
- self.parser.returns_unicode = x
- self.assertEquals(self.parser.returns_unicode, y)
-
def test_ordered_attributes(self):
for x, y in self.set_get_pairs:
self.parser.ordered_attributes = x
self.assertEquals(self.parser.specified_attributes, y)
-data = '''\
+data = b'''\
<?xml version="1.0" encoding="iso-8859-1" standalone="no"?>
<?xml-stylesheet href="stylesheet.css"?>
<!-- comment data -->
'ExternalEntityRefHandler'
]
- def test_utf8(self):
-
- out = self.Outputter()
- parser = expat.ParserCreate(namespace_separator='!')
- for name in self.handler_names:
- setattr(parser, name, getattr(out, name))
- parser.returns_unicode = 0
- parser.Parse(data, 1)
-
- # Verify output
- op = out.out
+ def _verify_parse_output(self, op):
self.assertEquals(op[0], 'PI: \'xml-stylesheet\' \'href="stylesheet.css"\'')
self.assertEquals(op[1], "Comment: ' comment data '")
self.assertEquals(op[2], "Notation declared: ('notation', None, 'notation.jpeg', None)")
self.assertEquals(op[3], "Unparsed entity decl: ('unparsed_entity', None, 'entity.file', None, 'notation')")
- self.assertEquals(op[4], "Start element: 'root' {'attr1': 'value1', 'attr2': 'value2\\xe1\\xbd\\x80'}")
+ self.assertEquals(op[4], "Start element: 'root' {'attr1': 'value1', 'attr2': 'value2\\u1f40'}")
self.assertEquals(op[5], "NS decl: 'myns' 'http://www.python.org/namespace'")
self.assertEquals(op[6], "Start element: 'http://www.python.org/namespace!subelement' {}")
self.assertEquals(op[7], "Character data: 'Contents of subelements'")
self.assertEquals(op[15], "External entity ref: (None, 'entity.file', None)")
self.assertEquals(op[16], "End element: 'root'")
+
def test_unicode(self):
# Try the parse again, this time producing Unicode output
out = self.Outputter()
parser = expat.ParserCreate(namespace_separator='!')
- parser.returns_unicode = 1
for name in self.handler_names:
setattr(parser, name, getattr(out, name))
parser.Parse(data, 1)
op = out.out
- self.assertEquals(op[0], 'PI: u\'xml-stylesheet\' u\'href="stylesheet.css"\'')
- self.assertEquals(op[1], "Comment: u' comment data '")
- self.assertEquals(op[2], "Notation declared: (u'notation', None, u'notation.jpeg', None)")
- self.assertEquals(op[3], "Unparsed entity decl: (u'unparsed_entity', None, u'entity.file', None, u'notation')")
- self.assertEquals(op[4], "Start element: u'root' {u'attr1': u'value1', u'attr2': u'value2\\u1f40'}")
- self.assertEquals(op[5], "NS decl: u'myns' u'http://www.python.org/namespace'")
- self.assertEquals(op[6], "Start element: u'http://www.python.org/namespace!subelement' {}")
- self.assertEquals(op[7], "Character data: u'Contents of subelements'")
- self.assertEquals(op[8], "End element: u'http://www.python.org/namespace!subelement'")
- self.assertEquals(op[9], "End of NS decl: u'myns'")
- self.assertEquals(op[10], "Start element: u'sub2' {}")
- self.assertEquals(op[11], 'Start of CDATA section')
- self.assertEquals(op[12], "Character data: u'contents of CDATA section'")
- self.assertEquals(op[13], 'End of CDATA section')
- self.assertEquals(op[14], "End element: u'sub2'")
- self.assertEquals(op[15], "External entity ref: (None, u'entity.file', None)")
- self.assertEquals(op[16], "End element: u'root'")
+ self._verify_parse_output(op)
def test_parse_file(self):
# Try parsing a file
out = self.Outputter()
parser = expat.ParserCreate(namespace_separator='!')
- parser.returns_unicode = 1
for name in self.handler_names:
setattr(parser, name, getattr(out, name))
- file = StringIO.StringIO(data)
+ file = BytesIO(data)
parser.ParseFile(file)
op = out.out
- self.assertEquals(op[0], 'PI: u\'xml-stylesheet\' u\'href="stylesheet.css"\'')
- self.assertEquals(op[1], "Comment: u' comment data '")
- self.assertEquals(op[2], "Notation declared: (u'notation', None, u'notation.jpeg', None)")
- self.assertEquals(op[3], "Unparsed entity decl: (u'unparsed_entity', None, u'entity.file', None, u'notation')")
- self.assertEquals(op[4], "Start element: u'root' {u'attr1': u'value1', u'attr2': u'value2\\u1f40'}")
- self.assertEquals(op[5], "NS decl: u'myns' u'http://www.python.org/namespace'")
- self.assertEquals(op[6], "Start element: u'http://www.python.org/namespace!subelement' {}")
- self.assertEquals(op[7], "Character data: u'Contents of subelements'")
- self.assertEquals(op[8], "End element: u'http://www.python.org/namespace!subelement'")
- self.assertEquals(op[9], "End of NS decl: u'myns'")
- self.assertEquals(op[10], "Start element: u'sub2' {}")
- self.assertEquals(op[11], 'Start of CDATA section')
- self.assertEquals(op[12], "Character data: u'contents of CDATA section'")
- self.assertEquals(op[13], 'End of CDATA section')
- self.assertEquals(op[14], "End element: u'sub2'")
- self.assertEquals(op[15], "External entity ref: (None, u'entity.file', None)")
- self.assertEquals(op[16], "End element: u'root'")
-
+ self._verify_parse_output(op)
class NamespaceSeparatorTest(unittest.TestCase):
def test_legal(self):
PyObject_HEAD
XML_Parser itself;
- int returns_unicode; /* True if Unicode strings are returned;
- if false, UTF-8 strings are returned */
int ordered_attributes; /* Return attributes as a list. */
int specified_attributes; /* Report only specified attributes. */
int in_callback; /* Is a callback active? */
return PyUnicode_DecodeUTF8((const char *)str, len, "strict");
}
-/* Convert a string of XML_Chars into an 8-bit Python string.
- Returns None if str is a null pointer. */
-
-static PyObject *
-conv_string_to_utf8(const XML_Char *str)
-{
- /* XXX currently this code assumes that XML_Char is 8-bit,
- and hence in UTF-8. */
- /* UTF-8 from Expat, UTF-8 desired */
- if (str == NULL) {
- Py_INCREF(Py_None);
- return Py_None;
- }
- return PyString_FromString(str);
-}
-
-static PyObject *
-conv_string_len_to_utf8(const XML_Char *str, int len)
-{
- /* XXX currently this code assumes that XML_Char is 8-bit,
- and hence in UTF-8. */
- /* UTF-8 from Expat, UTF-8 desired */
- if (str == NULL) {
- Py_INCREF(Py_None);
- return Py_None;
- }
- return PyString_FromStringAndSize((const char *)str, len);
-}
-
/* Callback routines */
static void clear_handlers(xmlparseobject *self, int initial);
return res;
}
-/* Python 2.0 and later versions, when built with Unicode support */
-#define STRING_CONV_FUNC (self->returns_unicode \
- ? conv_string_to_unicode : conv_string_to_utf8)
-
static PyObject*
string_intern(xmlparseobject *self, const char* str)
{
- PyObject *result = STRING_CONV_FUNC(str);
+ PyObject *result = conv_string_to_unicode(str);
PyObject *value;
/* result can be NULL if the unicode conversion failed. */
if (!result)
args = PyTuple_New(1);
if (args == NULL)
return -1;
- temp = (self->returns_unicode
- ? conv_string_len_to_unicode(buffer, len)
- : conv_string_len_to_utf8(buffer, len));
+ temp = (conv_string_len_to_unicode(buffer, len));
if (temp == NULL) {
Py_DECREF(args);
flag_error(self);
Py_DECREF(container);
return;
}
- v = STRING_CONV_FUNC((XML_Char *) atts[i+1]);
+ v = conv_string_to_unicode((XML_Char *) atts[i+1]);
if (v == NULL) {
flag_error(self);
Py_DECREF(container);
(void *userData,
const XML_Char *target,
const XML_Char *data),
- ("(NO&)", string_intern(self, target), STRING_CONV_FUNC,data))
+ ("(NO&)", string_intern(self, target), conv_string_to_unicode ,data))
VOID_HANDLER(UnparsedEntityDecl,
(void *userData,
const XML_Char *notationName),
("NiNNNNN",
string_intern(self, entityName), is_parameter_entity,
- (self->returns_unicode
- ? conv_string_len_to_unicode(value, value_length)
- : conv_string_len_to_utf8(value, value_length)),
+ (conv_string_len_to_unicode(value, value_length)),
string_intern(self, base), string_intern(self, systemId),
string_intern(self, publicId),
string_intern(self, notationName)))
const XML_Char *encoding,
int standalone),
("(O&O&i)",
- STRING_CONV_FUNC,version, STRING_CONV_FUNC,encoding,
+ conv_string_to_unicode ,version, conv_string_to_unicode ,encoding,
standalone))
static PyObject *
if (flush_character_buffer(self) < 0)
goto finally;
- modelobj = conv_content_model(model,
- (self->returns_unicode
- ? conv_string_to_unicode
- : conv_string_to_utf8));
+ modelobj = conv_content_model(model, (conv_string_to_unicode));
if (modelobj == NULL) {
flag_error(self);
goto finally;
int isrequired),
("(NNO&O&i)",
string_intern(self, elname), string_intern(self, attname),
- STRING_CONV_FUNC,att_type, STRING_CONV_FUNC,dflt,
+ conv_string_to_unicode ,att_type, conv_string_to_unicode ,dflt,
isrequired))
#if XML_COMBINED_VERSION >= 19504
VOID_HANDLER(Comment,
(void *userData, const XML_Char *data),
- ("(O&)", STRING_CONV_FUNC,data))
+ ("(O&)", conv_string_to_unicode ,data))
VOID_HANDLER(StartCdataSection,
(void *userData),
VOID_HANDLER(Default,
(void *userData, const XML_Char *s, int len),
- ("(N)", (self->returns_unicode
- ? conv_string_len_to_unicode(s,len)
- : conv_string_len_to_utf8(s,len))))
+ ("(N)", (conv_string_len_to_unicode(s,len))))
VOID_HANDLER(DefaultHandlerExpand,
(void *userData, const XML_Char *s, int len),
- ("(N)", (self->returns_unicode
- ? conv_string_len_to_unicode(s,len)
- : conv_string_len_to_utf8(s,len))))
+ ("(N)", (conv_string_len_to_unicode(s,len))))
INT_HANDLER(NotStandalone,
(void *userData),
const XML_Char *publicId),
int rc=0;,
("(O&NNN)",
- STRING_CONV_FUNC,context, string_intern(self, base),
+ conv_string_to_unicode ,context, string_intern(self, base),
string_intern(self, systemId), string_intern(self, publicId)),
rc = PyInt_AsLong(rv);, rc,
XML_GetUserData(parser))
goto finally;
/* XXX what to do if it returns a Unicode string? */
- if (!PyString_Check(str)) {
+ if (!PyBytes_Check(str)) {
PyErr_Format(PyExc_TypeError,
- "read() did not return a string object (type=%.400s)",
+ "read() did not return a bytes object (type=%.400s)",
Py_Type(str)->tp_name);
goto finally;
}
- len = PyString_GET_SIZE(str);
+ len = PyBytes_GET_SIZE(str);
if (len > buf_size) {
PyErr_Format(PyExc_ValueError,
"read() returned too much data: "
buf_size, len);
goto finally;
}
- memcpy(buf, PyString_AsString(str), len);
+ memcpy(buf, PyBytes_AsString(str), len);
finally:
Py_XDECREF(arg);
Py_XDECREF(str);
= XML_GetInputContext(self->itself, &offset, &size);
if (buffer != NULL)
- return PyString_FromStringAndSize(buffer + offset,
+ return PyBytes_FromStringAndSize(buffer + offset,
size - offset);
else
Py_RETURN_NONE;
}
else
new_parser->buffer = NULL;
- new_parser->returns_unicode = self->returns_unicode;
new_parser->ordered_attributes = self->ordered_attributes;
new_parser->specified_attributes = self->specified_attributes;
new_parser->in_callback = 0;
if (self == NULL)
return NULL;
- self->returns_unicode = 1;
-
self->buffer = NULL;
self->buffer_size = CHARACTER_DATA_BUFFER_SIZE;
self->buffer_used = 0;
return get_pybool(self->ns_prefixes);
if (strcmp(name, "ordered_attributes") == 0)
return get_pybool(self->ordered_attributes);
- if (strcmp(name, "returns_unicode") == 0)
- return get_pybool((long) self->returns_unicode);
if (strcmp(name, "specified_attributes") == 0)
return get_pybool((long) self->specified_attributes);
if (strcmp(name, "intern") == 0) {
APPEND(rc, "buffer_used");
APPEND(rc, "namespace_prefixes");
APPEND(rc, "ordered_attributes");
- APPEND(rc, "returns_unicode");
APPEND(rc, "specified_attributes");
APPEND(rc, "intern");
self->ordered_attributes = 0;
return 0;
}
- if (strcmp(name, "returns_unicode") == 0) {
- if (PyObject_IsTrue(v)) {
- self->returns_unicode = 1;
- }
- else
- self->returns_unicode = 0;
- return 0;
- }
if (strcmp(name, "specified_attributes") == 0) {
if (PyObject_IsTrue(v))
self->specified_attributes = 1;