From: Fredrik Lundh Date: Sun, 28 May 2006 12:06:46 +0000 (+0000) Subject: needforspeed: added Py_MEMCPY macro (currently tuned for Visual C only), X-Git-Tag: v2.5b1~386 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=80f8e80c15a784a84f77f4895318d13b831b017e;p=python needforspeed: added Py_MEMCPY macro (currently tuned for Visual C only), and use it for string copy operations. this gives a 20% speedup on some string benchmarks. --- diff --git a/Include/pyport.h b/Include/pyport.h index 74ce993497..47b9f70628 100644 --- a/Include/pyport.h +++ b/Include/pyport.h @@ -174,6 +174,27 @@ typedef Py_intptr_t Py_ssize_t; #define Py_LOCAL_INLINE(type) static type #endif +/* Py_MEMCPY can be used instead of memcpy in cases where the copied blocks + * are often very short. While most platforms have highly optimized code for + * large transfers, the setup costs for memcpy are often quite high. MEMCPY + * solves this by doing short copies "in line". + */ + +#if defined(_MSC_VER) +#define Py_MEMCPY(target, source, length) do { \ + size_t i_, n_ = (length); \ + char *t_ = (void*) (target); \ + const char *s_ = (void*) (source); \ + if (n_ >= 16) \ + memcpy(t_, s_, n_); \ + else \ + for (i_ = 0; i_ < n_; i_++) \ + t_[i_] = s_[i_]; \ + } while (0) +#else +#define Py_MEMCPY memcpy +#endif + #include #include /* Moved here from the math section, before extern "C" */ diff --git a/Include/unicodeobject.h b/Include/unicodeobject.h index 0531aedb30..8c39cfe2d6 100644 --- a/Include/unicodeobject.h +++ b/Include/unicodeobject.h @@ -357,15 +357,8 @@ typedef PY_UNICODE_TYPE Py_UNICODE; Py_UNICODE_ISDIGIT(ch) || \ Py_UNICODE_ISNUMERIC(ch)) -/* memcpy has a considerable setup overhead on many platforms; use a - loop for short strings (the "16" below is pretty arbitary) */ -#define Py_UNICODE_COPY(target, source, length) do\ - {Py_ssize_t i_; Py_UNICODE *t_ = (target); const Py_UNICODE *s_ = (source);\ - if (length > 16)\ - memcpy(t_, s_, (length)*sizeof(Py_UNICODE));\ - else\ - for (i_ = 0; i_ < (length); i_++) t_[i_] = s_[i_];\ - } while (0) +#define Py_UNICODE_COPY(target, source, length) \ + Py_MEMCPY((target), (source), (length)*sizeof(Py_UNICODE)) #define Py_UNICODE_FILL(target, value, length) do\ {Py_ssize_t i_; Py_UNICODE *t_ = (target); Py_UNICODE v_ = (value);\ diff --git a/Objects/stringobject.c b/Objects/stringobject.c index 6108c39a3b..a13f45867d 100644 --- a/Objects/stringobject.c +++ b/Objects/stringobject.c @@ -23,7 +23,6 @@ static PyStringObject *nullstring; */ static PyObject *interned; - /* For both PyString_FromString() and PyString_FromStringAndSize(), the parameter `size' denotes number of characters to allocate, not counting any @@ -80,7 +79,7 @@ PyString_FromStringAndSize(const char *str, Py_ssize_t size) op->ob_shash = -1; op->ob_sstate = SSTATE_NOT_INTERNED; if (str != NULL) - memcpy(op->ob_sval, str, size); + Py_MEMCPY(op->ob_sval, str, size); op->ob_sval[size] = '\0'; /* share short strings */ if (size == 0) { @@ -134,7 +133,7 @@ PyString_FromString(const char *str) PyObject_INIT_VAR(op, &PyString_Type, size); op->ob_shash = -1; op->ob_sstate = SSTATE_NOT_INTERNED; - memcpy(op->ob_sval, str, size+1); + Py_MEMCPY(op->ob_sval, str, size+1); /* share short strings */ if (size == 0) { PyObject *t = (PyObject *)op; @@ -162,7 +161,7 @@ PyString_FromFormatV(const char *format, va_list vargs) PyObject* string; #ifdef VA_LIST_IS_ARRAY - memcpy(count, vargs, sizeof(va_list)); + Py_MEMCPY(count, vargs, sizeof(va_list)); #else #ifdef __va_copy __va_copy(count, vargs); @@ -304,7 +303,7 @@ PyString_FromFormatV(const char *format, va_list vargs) i = strlen(p); if (n > 0 && i > n) i = n; - memcpy(s, p, i); + Py_MEMCPY(s, p, i); s += i; break; case 'p': @@ -583,7 +582,7 @@ PyObject *PyString_DecodeEscape(const char *s, assert(PyString_Check(w)); r = PyString_AS_STRING(w); rn = PyString_GET_SIZE(w); - memcpy(p, r, rn); + Py_MEMCPY(p, r, rn); p += rn; Py_DECREF(w); s = t; @@ -967,8 +966,8 @@ string_concat(register PyStringObject *a, register PyObject *bb) PyObject_INIT_VAR(op, &PyString_Type, size); op->ob_shash = -1; op->ob_sstate = SSTATE_NOT_INTERNED; - memcpy(op->ob_sval, a->ob_sval, a->ob_size); - memcpy(op->ob_sval + a->ob_size, b->ob_sval, b->ob_size); + Py_MEMCPY(op->ob_sval, a->ob_sval, a->ob_size); + Py_MEMCPY(op->ob_sval + a->ob_size, b->ob_sval, b->ob_size); op->ob_sval[size] = '\0'; return (PyObject *) op; #undef b @@ -1017,12 +1016,12 @@ string_repeat(register PyStringObject *a, register Py_ssize_t n) } i = 0; if (i < size) { - memcpy(op->ob_sval, a->ob_sval, a->ob_size); + Py_MEMCPY(op->ob_sval, a->ob_sval, a->ob_size); i = a->ob_size; } while (i < size) { j = (i <= size-i) ? i : size-i; - memcpy(op->ob_sval+i, op->ob_sval, j); + Py_MEMCPY(op->ob_sval+i, op->ob_sval, j); i += j; } return (PyObject *) op; @@ -1808,10 +1807,10 @@ string_join(PyStringObject *self, PyObject *orig) size_t n; item = PySequence_Fast_GET_ITEM(seq, i); n = PyString_GET_SIZE(item); - memcpy(p, PyString_AS_STRING(item), n); + Py_MEMCPY(p, PyString_AS_STRING(item), n); p += n; if (i < seqlen - 1) { - memcpy(p, sep, seplen); + Py_MEMCPY(p, sep, seplen); p += seplen; } } @@ -1851,7 +1850,6 @@ string_find_internal(PyStringObject *self, PyObject *args, int dir) Py_ssize_t sub_len; Py_ssize_t start=0, end=PY_SSIZE_T_MAX; - /* XXX ssize_t i */ if (!PyArg_ParseTuple(args, "O|O&O&:find/rfind/index/rindex", &subobj, _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end)) return -2; @@ -1865,6 +1863,8 @@ string_find_internal(PyStringObject *self, PyObject *args, int dir) (PyObject *)self, subobj, start, end, dir); #endif else if (PyObject_AsCharBuffer(subobj, &sub, &sub_len)) + /* XXX - the "expected a character buffer object" is pretty + confusing for a non-expert. remap to something else ? */ return -2; if (dir > 0) @@ -2131,7 +2131,7 @@ string_lower(PyStringObject *self) s = PyString_AS_STRING(newobj); - memcpy(s, PyString_AS_STRING(self), n); + Py_MEMCPY(s, PyString_AS_STRING(self), n); for (i = 0; i < n; i++) { int c = Py_CHARMASK(s[i]); @@ -2164,7 +2164,7 @@ string_upper(PyStringObject *self) s = PyString_AS_STRING(newobj); - memcpy(s, PyString_AS_STRING(self), n); + Py_MEMCPY(s, PyString_AS_STRING(self), n); for (i = 0; i < n; i++) { int c = Py_CHARMASK(s[i]); @@ -2615,18 +2615,18 @@ replace_interleave(PyStringObject *self, /* TODO: special case single character, which doesn't need memcpy */ /* Lay the first one down (guaranteed this will occur) */ - memcpy(result_s, to_s, to_len); + Py_MEMCPY(result_s, to_s, to_len); result_s += to_len; count -= 1; for (i=0; itp_alloc(type, n); if (pnew != NULL) { - memcpy(PyString_AS_STRING(pnew), PyString_AS_STRING(tmp), n+1); + Py_MEMCPY(PyString_AS_STRING(pnew), PyString_AS_STRING(tmp), n+1); ((PyStringObject *)pnew)->ob_shash = ((PyStringObject *)tmp)->ob_shash; ((PyStringObject *)pnew)->ob_sstate = SSTATE_NOT_INTERNED; @@ -4792,7 +4792,7 @@ PyString_Format(PyObject *format, PyObject *args) *res++ = *pbuf++; } } - memcpy(res, pbuf, len); + Py_MEMCPY(res, pbuf, len); res += len; rescnt -= len; while (--width >= len) {