self.checkequal('A', 'a', 'title')
self.checkequal(True, 'a', 'islower')
+ def test_partition(self):
+
+ self.checkequal(('this', ' is ', 'the partition method'),
+ 'this is the partition method', 'partition', ' is ')
+
+ # from raymond's original specification
+ S = 'http://www.python.org'
+ self.checkequal(('http', '://', 'www.python.org'), S, 'partition', '://')
+ self.checkequal(('http://www.python.org', '', ''), S, 'partition', '?')
+ self.checkequal(('', 'http://', 'www.python.org'), S, 'partition', 'http://')
+ self.checkequal(('http://www.python.', 'org', ''), S, 'partition', 'org')
+
+ self.checkraises(ValueError, S, 'partition', '')
+ self.checkraises(TypeError, S, 'partition', None)
+
class MixinStrStringUserStringTest:
# Additional tests for 8bit strings, i.e. str, UserString and
{
Py_ssize_t len = PyString_GET_SIZE(self), sep_len, pos;
const char *str = PyString_AS_STRING(self), *sep;
- PyObject *sepobj;
+ PyObject *sep_obj;
PyObject * out;
- if (!PyArg_ParseTuple(args, "O:partition", &sepobj))
+ if (!PyArg_ParseTuple(args, "O:partition", &sep_obj))
return NULL;
- if (PyString_Check(sepobj)) {
- sep = PyString_AS_STRING(sepobj);
- sep_len = PyString_GET_SIZE(sepobj);
+ if (PyString_Check(sep_obj)) {
+ sep = PyString_AS_STRING(sep_obj);
+ sep_len = PyString_GET_SIZE(sep_obj);
}
-#ifdef Py_USING_UNICODE_NOTYET
- else if (PyUnicode_Check(sepobj))
- return PyUnicode_Partition((PyObject *)self, sepobj);
+#ifdef Py_USING_UNICODE
+ else if (PyUnicode_Check(sep_obj))
+ return PyUnicode_Partition((PyObject *)self, sep_obj);
#endif
- else if (PyObject_AsCharBuffer(sepobj, &sep, &sep_len))
+ else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
return NULL;
if (sep_len == 0) {
Py_INCREF(nullstring);
PyTuple_SET_ITEM(out, 2, (PyObject*) nullstring);
} else {
- Py_INCREF(sepobj);
+ PyObject* obj;
PyTuple_SET_ITEM(out, 0, PyString_FromStringAndSize(str, pos));
- PyTuple_SET_ITEM(out, 1, sepobj);
- PyTuple_SET_ITEM(out, 2,
- PyString_FromStringAndSize(str + sep_len + pos,
- len - sep_len - pos)
- );
+ Py_INCREF(sep_obj);
+ PyTuple_SET_ITEM(out, 1, sep_obj);
+ pos += sep_len;
+ obj = PyString_FromStringAndSize(str + pos, len - pos);
+ PyTuple_SET_ITEM(out, 2, obj);
if (PyErr_Occurred()) {
Py_DECREF(out);
return NULL;
modified by Marc-Andre Lemburg <mal@lemburg.com> according to the
Unicode Integration Proposal (see file Misc/unicode.txt).
+Major speed upgrades to the method implementations at the Reykjavik
+NeedForSpeed sprint, by Fredrik Lundh and Andrew Dalke.
+
Copyright (c) Corporation for National Research Initiatives.
--------------------------------------------------------------------
/* Resizing shared object (unicode_empty or single character
objects) in-place is not allowed. Use PyUnicode_Resize()
instead ! */
+
if (unicode == unicode_empty ||
(unicode->length == 1 &&
unicode->str[0] < 256U &&
return -1;
}
- /* We allocate one more byte to make sure the string is
- Ux0000 terminated -- XXX is this needed ? */
+ /* We allocate one more byte to make sure the string is Ux0000 terminated.
+ The overallocation is also used by fastsearch, which assumes that it's
+ safe to look at str[length] (without makeing any assumptions about what
+ it contains). */
+
oldstr = unicode->str;
PyMem_RESIZE(unicode->str, Py_UNICODE, length + 1);
if (!unicode->str) {
/* --- Helpers ------------------------------------------------------------ */
-#define USE_FAST /* experimental fast search implementation */
-
/* fast search/count implementation, based on a mix between boyer-
moore and horspool, with a few more bells and whistles on the top.
for some more background, see: http://effbot.org/stringlib */
/* miss: check if next character is part of pattern */
if (!(mask & (1 << (s[i+m] & 0x1F))))
i = i + m;
- else {
+ else
i = i + skip;
- continue;
- }
} else {
/* skip: check if next character is part of pattern */
if (!(mask & (1 << (s[i+m] & 0x1F))))
if (substring->length == 0)
return (end - start + 1);
-#ifdef USE_FAST
count = fastsearch(
PyUnicode_AS_UNICODE(self) + start, end - start,
substring->str, substring->length, FAST_COUNT
);
+
if (count < 0)
count = 0; /* no match */
-#else
- end -= substring->length;
-
- while (start <= end)
- if (Py_UNICODE_MATCH(self, start, substring)) {
- count++;
- start += substring->length;
- } else
- start++;
-#endif
return count;
}
if (substring->length == 0)
return (direction > 0) ? start : end;
-#ifdef USE_FAST
if (direction > 0) {
Py_ssize_t pos = fastsearch(
PyUnicode_AS_UNICODE(self) + start, end - start,
substring->str, substring->length, FAST_SEARCH
);
- if (pos < 0)
- return pos;
- return pos + start;
- }
-#endif
-
- end -= substring->length;
-
- if (direction < 0) {
+ if (pos >= 0)
+ return pos + start;
+ } else {
+ end -= substring->length;
for (; end >= start; end--)
if (Py_UNICODE_MATCH(self, end, substring))
return end;
- } else {
- for (; start <= end; start++)
- if (Py_UNICODE_MATCH(self, start, substring))
- return start;
}
-
return -1;
}
PyObject *element)
{
PyUnicodeObject *u, *v;
- int result;
Py_ssize_t size;
-#ifdef USE_FAST
Py_ssize_t pos;
-#endif
/* Coerce the two arguments */
v = (PyUnicodeObject *) PyUnicode_FromObject(element);
size = PyUnicode_GET_SIZE(v);
if (!size) {
- result = 1;
+ pos = 0;
goto done;
}
-#ifdef USE_FAST
pos = fastsearch(
PyUnicode_AS_UNICODE(u), PyUnicode_GET_SIZE(u),
PyUnicode_AS_UNICODE(v), size, FAST_SEARCH
);
- result = (pos != -1);
-#else
- result = 0;
-
- if (size == 1) {
- Py_UNICODE chr = PyUnicode_AS_UNICODE(v)[0];
- Py_UNICODE* ptr = PyUnicode_AS_UNICODE(u);
- Py_UNICODE* end = ptr + PyUnicode_GET_SIZE(u);
- for (; ptr < end; ptr++) {
- if (*ptr == chr) {
- result = 1;
- break;
- }
- }
- } else {
- Py_ssize_t start = 0;
- Py_ssize_t end = PyUnicode_GET_SIZE(u) - size;
- for (; start <= end; start++)
- if (Py_UNICODE_MATCH(u, start, v)) {
- result = 1;
- break;
- }
- }
-#endif
done:
Py_DECREF(u);
Py_DECREF(v);
- return result;
+ return (pos != -1);
}
/* Concat to string or Unicode object giving a new Unicode object. */
return PyUnicode_Split((PyObject *)self, substring, maxcount);
}
+PyObject *
+PyUnicode_Partition(PyObject *str_in, PyObject *sep_in)
+{
+ PyObject* str_obj;
+ PyObject* sep_obj;
+ Py_UNICODE *str, *sep;
+ Py_ssize_t len, sep_len, pos;
+ PyObject* out;
+
+ str_obj = PyUnicode_FromObject(str_in);
+ if (!str_obj)
+ return NULL;
+ sep_obj = PyUnicode_FromObject(sep_in);
+ if (!sep_obj)
+ goto error;
+
+ str = PyUnicode_AS_UNICODE(str_obj);
+ len = PyUnicode_GET_SIZE(str_obj);
+
+ sep = PyUnicode_AS_UNICODE(sep_obj);
+ sep_len = PyUnicode_GET_SIZE(sep_obj);
+
+ if (sep_len == 0) {
+ PyErr_SetString(PyExc_ValueError, "empty separator");
+ goto error;
+ }
+
+ out = PyTuple_New(3);
+ if (!out)
+ goto error;
+
+ pos = fastsearch(str, len, sep, sep_len, FAST_SEARCH);
+ if (pos < 0) {
+ Py_INCREF(str_obj);
+ PyTuple_SET_ITEM(out, 0, (PyObject*) str_obj);
+ Py_INCREF(unicode_empty);
+ PyTuple_SET_ITEM(out, 1, (PyObject*) unicode_empty);
+ Py_INCREF(unicode_empty);
+ PyTuple_SET_ITEM(out, 2, (PyObject*) unicode_empty);
+ } else {
+ PyObject* obj;
+ PyTuple_SET_ITEM(out, 0, PyUnicode_FromUnicode(str, pos));
+ Py_INCREF(sep_obj);
+ PyTuple_SET_ITEM(out, 1, sep_obj);
+ obj = PyUnicode_FromUnicode(str + sep_len + pos, len - sep_len - pos);
+ PyTuple_SET_ITEM(out, 2, obj);
+ if (PyErr_Occurred()) {
+ Py_DECREF(out);
+ goto error;
+ }
+ }
+
+ return out;
+
+error:
+ Py_XDECREF(sep_obj);
+ Py_DECREF(str_obj);
+ return NULL;
+}
+
+PyDoc_STRVAR(partition__doc__,
+"S.partition(sep) -> (head, sep, tail)\n\
+\n\
+Searches for the separator sep in S, and returns the part before it,\n\
+the separator itself, and the part after it. If the separator is not\n\
+found, returns S and two empty strings.");
+
+static PyObject*
+unicode_partition(PyUnicodeObject *self, PyObject *args)
+{
+ PyObject *separator;
+
+ if (!PyArg_ParseTuple(args, "O:partition", &separator))
+ return NULL;
+
+ return PyUnicode_Partition((PyObject *)self, separator);
+}
+
PyObject *PyUnicode_RSplit(PyObject *s,
PyObject *sep,
Py_ssize_t maxsplit)
{"count", (PyCFunction) unicode_count, METH_VARARGS, count__doc__},
{"expandtabs", (PyCFunction) unicode_expandtabs, METH_VARARGS, expandtabs__doc__},
{"find", (PyCFunction) unicode_find, METH_VARARGS, find__doc__},
+ {"partition", (PyCFunction) unicode_partition, METH_VARARGS, partition__doc__},
{"index", (PyCFunction) unicode_index, METH_VARARGS, index__doc__},
{"ljust", (PyCFunction) unicode_ljust, METH_VARARGS, ljust__doc__},
{"lower", (PyCFunction) unicode_lower, METH_NOARGS, lower__doc__},