]> granicus.if.org Git - python/commitdiff
needforspeed: use "fastsearch" for count. this results in a 3x speedup
authorFredrik Lundh <fredrik@pythonware.com>
Thu, 25 May 2006 17:55:31 +0000 (17:55 +0000)
committerFredrik Lundh <fredrik@pythonware.com>
Thu, 25 May 2006 17:55:31 +0000 (17:55 +0000)
for the related stringbench tests.

Objects/stringobject.c

index b0c4640a0cb252a05bd877071acc1dc83cca421e..95ade513b1fea5d96f8d6f73ad82f82dd4b5e82e 100644 (file)
@@ -5,6 +5,18 @@
 
 #include <ctype.h>
 
+#undef USE_INLINE /* XXX - set via configure? */
+
+#if defined(_MSC_VER) /* this is taken from _sre.c */
+#pragma warning(disable: 4710)
+/* fastest possible local call under MSVC */
+#define LOCAL(type) static __inline type __fastcall
+#elif defined(USE_INLINE)
+#define LOCAL(type) static inline type
+#else
+#define LOCAL(type) static type
+#endif
+
 #ifdef COUNT_ALLOCS
 int null_strings, one_strings;
 #endif
@@ -763,6 +775,108 @@ PyString_AsStringAndSize(register PyObject *obj,
        return 0;
 }
 
+/* -------------------------------------------------------------------- */
+/* Helpers */
+
+#define USE_FAST /* experimental fast search implementation */
+
+/* XXX - this code is copied from unicodeobject.c.  we really should
+   refactor the core implementations (see _sre.c for how this can be
+   done), but that'll have to wait -- fredrik */
+
+/* fast search/count implementation, based on a mix between boyer-
+   moore and horspool, with a few more bells and whistles on the top.
+   for some more background, see: http://effbot.org/stringlib */
+
+/* note: fastsearch may access s[n], which isn't a problem when using
+   Python's ordinary string types, but may cause problems if you're
+   using this code in other contexts.  also, the count mode returns -1
+   if there cannot possible be a match in the target string, and 0 if
+   it has actually checked for matches, but didn't find any.  callers
+   beware! */
+
+#define FAST_COUNT 0
+#define FAST_SEARCH 1
+
+LOCAL(Py_ssize_t)
+       fastsearch(const unsigned char* s, Py_ssize_t n, const unsigned char* p,
+                  Py_ssize_t m, int mode)
+{
+       long mask;
+       int skip, count = 0;
+       Py_ssize_t i, j, mlast, w;
+
+       w = n - m;
+
+       if (w < 0)
+               return -1;
+
+       /* look for special cases */
+       if (m <= 1) {
+               if (m <= 0)
+                       return -1;
+               /* use special case for 1-character strings */
+               if (mode == FAST_COUNT) {
+                       for (i = 0; i < n; i++)
+                               if (s[i] == p[0])
+                                       count++;
+                       return count;
+               } else {
+                       for (i = 0; i < n; i++)
+                               if (s[i] == p[0])
+                                       return i;
+               }
+               return -1;
+       }
+
+       mlast = m - 1;
+
+       /* create compressed boyer-moore delta 1 table */
+       skip = mlast - 1;
+       /* process pattern[:-1] */
+       for (mask = i = 0; i < mlast; i++) {
+               mask |= (1 << (p[i] & 0x1F));
+               if (p[i] == p[mlast])
+                       skip = mlast - i - 1;
+       }
+       /* process pattern[-1] outside the loop */
+       mask |= (1 << (p[mlast] & 0x1F));
+
+       for (i = 0; i <= w; i++) {
+               /* note: using mlast in the skip path slows things down on x86 */
+               if (s[i+m-1] == p[m-1]) {
+                       /* candidate match */
+                       for (j = 0; j < mlast; j++)
+                               if (s[i+j] != p[j])
+                                       break;
+                       if (j == mlast) {
+                               /* got a match! */
+                               if (mode != FAST_COUNT)
+                                       return i;
+                               count++;
+                               i = i + mlast;
+                               continue;
+                       }
+                       /* miss: check if next character is part of pattern */
+                       if (!(mask & (1 << (s[i+m] & 0x1F))))
+                               i = i + m;
+                       else {
+                               i = i + skip;
+                               continue;
+                       }
+               } else {
+                       /* skip: check if next character is part of pattern */
+                       if (!(mask & (1 << (s[i+m] & 0x1F))))
+                               i = i + m;
+               }
+       }
+
+       if (mode != FAST_COUNT)
+               return -1;
+       return count;
+}
+
+/* -------------------------------------------------------------------- */
 /* Methods */
 
 static int
@@ -2177,7 +2291,7 @@ as in slice notation.");
 static PyObject *
 string_count(PyStringObject *self, PyObject *args)
 {
-       const char *s = PyString_AS_STRING(self), *sub, *t;
+       const char *s = PyString_AS_STRING(self), *sub;
        Py_ssize_t len = PyString_GET_SIZE(self), n;
        Py_ssize_t i = 0, last = PY_SSIZE_T_MAX;
        Py_ssize_t m, r;
@@ -2210,8 +2324,14 @@ string_count(PyStringObject *self, PyObject *args)
        if (n == 0)
                return PyInt_FromSsize_t(m-i);
 
+#ifdef USE_FAST
+       r = fastsearch(s + i, last - i, sub, n, FAST_COUNT);
+       if (r < 0)
+               r = 0; /* no match */
+#else
        r = 0;
        while (i < m) {
+               const char *t
                if (!memcmp(s+i, sub, n)) {
                        r++;
                        i += n;
@@ -2225,6 +2345,7 @@ string_count(PyStringObject *self, PyObject *args)
                        break;
                i = t - s;
        }
+#endif
        return PyInt_FromSsize_t(r);
 }