--- /dev/null
+/* stringlib: replace implementation */
+
+#ifndef STRINGLIB_FASTSEARCH_H
+#error must include "stringlib/fastsearch.h" before including this module
+#endif
+
+Py_LOCAL_INLINE(void)
+STRINGLIB(replace_1char_inplace)(STRINGLIB_CHAR* s, STRINGLIB_CHAR* end,
+ Py_UCS4 u1, Py_UCS4 u2, Py_ssize_t maxcount)
+{
+ *s = u2;
+ while (--maxcount && ++s != end) {
+ /* Find the next character to be replaced.
+
+ If it occurs often, it is faster to scan for it using an inline
+ loop. If it occurs seldom, it is faster to scan for it using a
+ function call; the overhead of the function call is amortized
+ across the many characters that call covers. We start with an
+ inline loop and use a heuristic to determine whether to fall back
+ to a function call. */
+ if (*s != u1) {
+ int attempts = 10;
+ /* search u1 in a dummy loop */
+ while (1) {
+ if (++s == end)
+ return;
+ if (*s == u1)
+ break;
+ if (!--attempts) {
+ /* if u1 was not found for attempts iterations,
+ use FASTSEARCH() or memchr() */
+#if STRINGLIB_SIZEOF_CHAR == 1
+ s++;
+ s = memchr(s, u1, end - s);
+ if (s == NULL)
+ return;
+#else
+ Py_ssize_t i;
+ STRINGLIB_CHAR ch1 = (STRINGLIB_CHAR) u1;
+ s++;
+ i = FASTSEARCH(s, end - s, &ch1, 1, 0, FAST_SEARCH);
+ if (i < 0)
+ return;
+ s += i;
+#endif
+ /* restart the dummy loop */
+ break;
+ }
+ }
+ }
+ *s = u2;
+ }
+}
#include "stringlib/split.h"
#include "stringlib/count.h"
#include "stringlib/find.h"
+#include "stringlib/replace.h"
#include "stringlib/find_max_char.h"
#include "stringlib/localeutil.h"
#include "stringlib/undef.h"
#include "stringlib/split.h"
#include "stringlib/count.h"
#include "stringlib/find.h"
+#include "stringlib/replace.h"
#include "stringlib/find_max_char.h"
#include "stringlib/localeutil.h"
#include "stringlib/undef.h"
#include "stringlib/split.h"
#include "stringlib/count.h"
#include "stringlib/find.h"
+#include "stringlib/replace.h"
#include "stringlib/find_max_char.h"
#include "stringlib/localeutil.h"
#include "stringlib/undef.h"
return 0;
}
+static void
+replace_1char_inplace(PyObject *u, Py_ssize_t pos,
+ Py_UCS4 u1, Py_UCS4 u2, Py_ssize_t maxcount)
+{
+ int kind = PyUnicode_KIND(u);
+ void *data = PyUnicode_DATA(u);
+ Py_ssize_t len = PyUnicode_GET_LENGTH(u);
+ if (kind == PyUnicode_1BYTE_KIND) {
+ ucs1lib_replace_1char_inplace((Py_UCS1 *)data + pos,
+ (Py_UCS1 *)data + len,
+ u1, u2, maxcount);
+ }
+ else if (kind == PyUnicode_2BYTE_KIND) {
+ ucs2lib_replace_1char_inplace((Py_UCS2 *)data + pos,
+ (Py_UCS2 *)data + len,
+ u1, u2, maxcount);
+ }
+ else {
+ assert(kind == PyUnicode_4BYTE_KIND);
+ ucs4lib_replace_1char_inplace((Py_UCS4 *)data + pos,
+ (Py_UCS4 *)data + len,
+ u1, u2, maxcount);
+ }
+}
+
static PyObject *
replace(PyObject *self, PyObject *str1,
PyObject *str2, Py_ssize_t maxcount)
Py_ssize_t len1 = PyUnicode_GET_LENGTH(str1);
Py_ssize_t len2 = PyUnicode_GET_LENGTH(str2);
int mayshrink;
- Py_UCS4 maxchar, maxchar_str2;
+ Py_UCS4 maxchar, maxchar_str1, maxchar_str2;
if (maxcount < 0)
maxcount = PY_SSIZE_T_MAX;
if (str1 == str2)
goto nothing;
- if (skind < kind1)
- /* substring too wide to be present */
- goto nothing;
maxchar = PyUnicode_MAX_CHAR_VALUE(self);
+ maxchar_str1 = PyUnicode_MAX_CHAR_VALUE(str1);
+ if (maxchar < maxchar_str1)
+ /* substring too wide to be present */
+ goto nothing;
maxchar_str2 = PyUnicode_MAX_CHAR_VALUE(str2);
/* Replacing str1 with str2 may cause a maxchar reduction in the
result string. */
- mayshrink = (maxchar_str2 < maxchar);
+ mayshrink = (maxchar_str2 < maxchar_str1) && (maxchar == maxchar_str1);
maxchar = MAX_MAXCHAR(maxchar, maxchar_str2);
if (len1 == len2) {
if (len1 == 1) {
/* replace characters */
Py_UCS4 u1, u2;
- int rkind;
- Py_ssize_t index, pos;
- char *src, *rbuf;
+ Py_ssize_t pos;
u1 = PyUnicode_READ(kind1, buf1, 0);
- pos = findchar(sbuf, PyUnicode_KIND(self), slen, u1, 1);
+ pos = findchar(sbuf, skind, slen, u1, 1);
if (pos < 0)
goto nothing;
u2 = PyUnicode_READ(kind2, buf2, 0);
u = PyUnicode_New(slen, maxchar);
if (!u)
goto error;
- _PyUnicode_FastCopyCharacters(u, 0, self, 0, slen);
- rkind = PyUnicode_KIND(u);
- rbuf = PyUnicode_DATA(u);
- PyUnicode_WRITE(rkind, rbuf, pos, u2);
- index = 0;
- src = sbuf;
- while (--maxcount)
- {
- pos++;
- src += pos * PyUnicode_KIND(self);
- slen -= pos;
- index += pos;
- pos = findchar(src, PyUnicode_KIND(self), slen, u1, 1);
- if (pos < 0)
- break;
- PyUnicode_WRITE(rkind, rbuf, index + pos, u2);
- }
+ _PyUnicode_FastCopyCharacters(u, 0, self, 0, slen);
+ replace_1char_inplace(u, pos, u1, u2, maxcount);
}
else {
int rkind = skind;