Committing patch #591250 which provides "str1 in str2" when str1 is a

author Barry Warsaw <barry@python.org>

Tue, 6 Aug 2002 16:58:21 +0000 (16:58 +0000)

committer Barry Warsaw <barry@python.org>

Tue, 6 Aug 2002 16:58:21 +0000 (16:58 +0000)
author Barry Warsaw <barry@python.org>
Tue, 6 Aug 2002 16:58:21 +0000 (16:58 +0000)
committer Barry Warsaw <barry@python.org>
Tue, 6 Aug 2002 16:58:21 +0000 (16:58 +0000)
diff --git a/Doc/lib/libstdtypes.tex b/Doc/lib/libstdtypes.tex

index 87d5402d05c1044e64184c39d9dc65c22c2317ea..df602cda988a152307442a9874a2add9ea79e205 100644 (file)
--- a/Doc/lib/libstdtypes.tex
+++ b/Doc/lib/libstdtypes.tex
@@ -432,15 +432,15 @@ This table lists the sequence operations sorted in ascending priority
  and \var{j} are integers:
  
  \begin{tableiii}{c|l|c}{code}{Operation}{Result}{Notes}
-  \lineiii{\var{x} in \var{s}}{\code{1} if an item of \var{s} is equal to \var{x}, else \code{0}}{}
+  \lineiii{\var{x} in \var{s}}{\code{1} if an item of \var{s} is equal to \var{x}, else \code{0}}{(1)}
    \lineiii{\var{x} not in \var{s}}{\code{0} if an item of \var{s} is
-equal to \var{x}, else \code{1}}{}
+equal to \var{x}, else \code{1}}{(1)}
    \hline
    \lineiii{\var{s} + \var{t}}{the concatenation of \var{s} and \var{t}}{}
-  \lineiii{\var{s} * \var{n}\textrm{,} \var{n} * \var{s}}{\var{n} shallow copies of \var{s} concatenated}{(1)}
+  \lineiii{\var{s} * \var{n}\textrm{,} \var{n} * \var{s}}{\var{n} shallow copies of \var{s} concatenated}{(2)}
    \hline
-  \lineiii{\var{s}[\var{i}]}{\var{i}'th item of \var{s}, origin 0}{(2)}
-  \lineiii{\var{s}[\var{i}:\var{j}]}{slice of \var{s} from \var{i} to \var{j}}{(2), (3)}
+  \lineiii{\var{s}[\var{i}]}{\var{i}'th item of \var{s}, origin 0}{(3)}
+  \lineiii{\var{s}[\var{i}:\var{j}]}{slice of \var{s} from \var{i} to \var{j}}{(3), (4)}
    \hline
    \lineiii{len(\var{s})}{length of \var{s}}{}
    \lineiii{min(\var{s})}{smallest item of \var{s}}{}
@@ -461,7 +461,12 @@ equal to \var{x}, else \code{1}}{}
  Notes:
  
  \begin{description}
-\item[(1)] Values of \var{n} less than \code{0} are treated as
+\item[(1)] When \var{s} is a string or Unicode string object the
+\code{in} and \code{not in} operations act like a substring test.  In
+Python versions before 2.3, \var{x} had to be a string of length 1.
+In Python 2.3 and beyond, \var{x} may be a string of any length.
+
+\item[(2)] Values of \var{n} less than \code{0} are treated as
    \code{0} (which yields an empty sequence of the same type as
    \var{s}).  Note also that the copies are shallow; nested structures
    are not copied.  This often haunts new Python programmers; consider:
@@ -489,12 +494,12 @@ Notes:
  [[3], [5], [7]]
  \end{verbatim}
  
-\item[(2)] If \var{i} or \var{j} is negative, the index is relative to
+\item[(3)] If \var{i} or \var{j} is negative, the index is relative to
    the end of the string: \code{len(\var{s}) + \var{i}} or
    \code{len(\var{s}) + \var{j}} is substituted.  But note that \code{-0} is
    still \code{0}.
  
-\item[(3)] The slice of \var{s} from \var{i} to \var{j} is defined as
+\item[(4)] The slice of \var{s} from \var{i} to \var{j} is defined as
    the sequence of items with index \var{k} such that \code{\var{i} <=
    \var{k} < \var{j}}.  If \var{i} or \var{j} is greater than
    \code{len(\var{s})}, use \code{len(\var{s})}.  If \var{i} is omitted,
diff --git a/Lib/test/string_tests.py b/Lib/test/string_tests.py

index 47d7510c075acf60ba3f39f4e2559854d00f72ce..836836b1f09770b39c7ee47a31d801622bcb8bb0 100644 (file)
--- a/Lib/test/string_tests.py
+++ b/Lib/test/string_tests.py
@@ -1,7 +1,7 @@
  """Common tests shared by test_string and test_userstring"""
  
  import string
-from test.test_support import verify, verbose, TestFailed, have_unicode
+from test.test_support import verify, vereq, verbose, TestFailed, have_unicode
  
  transtable = '\000\001\002\003\004\005\006\007\010\011\012\013\014\015\016\017\020\021\022\023\024\025\026\027\030\031\032\033\034\035\036\037 !"#$%&\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`xyzdefghijklmnopqrstuvwxyz{|}~\177\200\201\202\203\204\205\206\207\210\211\212\213\214\215\216\217\220\221\222\223\224\225\226\227\230\231\232\233\234\235\236\237\240\241\242\243\244\245\246\247\250\251\252\253\254\255\256\257\260\261\262\263\264\265\266\267\270\271\272\273\274\275\276\277\300\301\302\303\304\305\306\307\310\311\312\313\314\315\316\317\320\321\322\323\324\325\326\327\330\331\332\333\334\335\336\337\340\341\342\343\344\345\346\347\350\351\352\353\354\355\356\357\360\361\362\363\364\365\366\367\370\371\372\373\374\375\376\377'
  
@@ -295,3 +295,23 @@ def run_method_tests(test):
          data = 'x\x9c\xcbH\xcd\xc9\xc9W(\xcf/\xcaI\x01\x00\x1a\x0b\x04]'
          verify('hello world'.encode('zlib') == data)
          verify(data.decode('zlib') == 'hello world')
+
+def test_exception(lhs, rhs, msg):
+    try:
+        lhs in rhs
+    except TypeError:
+        pass
+    else:
+        raise TestFailed, msg
+
+def run_contains_tests(test):
+    vereq('' in '', True)
+    vereq('' in 'abc', True)
+    vereq('\0' in 'abc', False)
+    vereq('\0' in '\0abc', True)
+    vereq('\0' in 'abc\0', True)
+    vereq('a' in '\0abc', True)
+    vereq('asdf' in 'asdf', True)
+    vereq('asdf' in 'asd', False)
+    vereq('asdf' in '', False)
+
diff --git a/Lib/test/test_contains.py b/Lib/test/test_contains.py

index 9abed1512bd3139c9f6fab9b652f113ccf10e67f..04eedf1757a5016f21fb80d8d17237039741a702 100644 (file)
--- a/Lib/test/test_contains.py
+++ b/Lib/test/test_contains.py
@@ -45,17 +45,8 @@ except TypeError:
  check('c' in 'abc', "'c' not in 'abc'")
  check('d' not in 'abc', "'d' in 'abc'")
  
-try:
-    '' in 'abc'
-    check(0, "'' in 'abc' did not raise error")
-except TypeError:
-    pass
-
-try:
-    'ab' in 'abc'
-    check(0, "'ab' in 'abc' did not raise error")
-except TypeError:
-    pass
+check('' in '', "'' not in ''")
+check('' in 'abc', "'' not in 'abc'")
  
  try:
      None in 'abc'
@@ -71,17 +62,12 @@ if have_unicode:
      check('c' in unicode('abc'), "'c' not in u'abc'")
      check('d' not in unicode('abc'), "'d' in u'abc'")
  
-    try:
-        '' in unicode('abc')
-        check(0, "'' in u'abc' did not raise error")
-    except TypeError:
-        pass
-
-    try:
-        'ab' in unicode('abc')
-        check(0, "'ab' in u'abc' did not raise error")
-    except TypeError:
-        pass
+    check('' in unicode(''), "'' not in u''")
+    check(unicode('') in '', "u'' not in ''")
+    check(unicode('') in unicode(''), "u'' not in u''")
+    check('' in unicode('abc'), "'' not in u'abc'")
+    check(unicode('') in 'abc', "u'' not in 'abc'")
+    check(unicode('') in unicode('abc'), "u'' not in u'abc'")
  
      try:
          None in unicode('abc')
@@ -94,35 +80,11 @@ if have_unicode:
      check(unicode('c') in unicode('abc'), "u'c' not in u'abc'")
      check(unicode('d') not in unicode('abc'), "u'd' in u'abc'")
  
-    try:
-        unicode('') in unicode('abc')
-        check(0, "u'' in u'abc' did not raise error")
-    except TypeError:
-        pass
-
-    try:
-        unicode('ab') in unicode('abc')
-        check(0, "u'ab' in u'abc' did not raise error")
-    except TypeError:
-        pass
-
      # Test Unicode char in string
  
      check(unicode('c') in 'abc', "u'c' not in 'abc'")
      check(unicode('d') not in 'abc', "u'd' in 'abc'")
  
-    try:
-        unicode('') in 'abc'
-        check(0, "u'' in 'abc' did not raise error")
-    except TypeError:
-        pass
-
-    try:
-        unicode('ab') in 'abc'
-        check(0, "u'ab' in 'abc' did not raise error")
-    except TypeError:
-        pass
-
  # A collection of tests on builtin sequence types
  a = range(10)
  for i in a:
diff --git a/Lib/test/test_string.py b/Lib/test/test_string.py

index af8c1bce66d15d9dc09bddbb5c99fcf3568be598..c92f5f7d1f77806f0a4a36e1f3361e0c8f6dac2d 100644 (file)
--- a/Lib/test/test_string.py
+++ b/Lib/test/test_string.py
@@ -51,6 +51,7 @@ def test(name, input, output, *args):
  
  string_tests.run_module_tests(test)
  string_tests.run_method_tests(test)
+string_tests.run_contains_tests(test)
  
  string.whitespace
  string.lowercase
diff --git a/Lib/test/test_unicode.py b/Lib/test/test_unicode.py

index 028e97ad802c1c051083e6e99aa4b4a90ffbb614..f38467ad0d7305101513aa967f4fdf73f03bd243 100644 (file)
--- a/Lib/test/test_unicode.py
+++ b/Lib/test/test_unicode.py
@@ -6,7 +6,7 @@ Written by Marc-Andre Lemburg (mal@lemburg.com).
  (c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
  
  """#"
-from test.test_support import verify, verbose, TestFailed
+from test.test_support import verify, vereq, verbose, TestFailed
  import sys, string
  
  if not sys.platform.startswith('java'):
@@ -396,23 +396,23 @@ test('translate', u"abababc", u'iiix', {ord('a'):None, ord('b'):ord('i'), ord('c
  
  # Contains:
  print 'Testing Unicode contains method...',
-verify(('a' in u'abdb') == 1)
-verify(('a' in u'bdab') == 1)
-verify(('a' in u'bdaba') == 1)
-verify(('a' in u'bdba') == 1)
-verify(('a' in u'bdba') == 1)
-verify((u'a' in u'bdba') == 1)
-verify((u'a' in u'bdb') == 0)
-verify((u'a' in 'bdb') == 0)
-verify((u'a' in 'bdba') == 1)
-verify((u'a' in ('a',1,None)) == 1)
-verify((u'a' in (1,None,'a')) == 1)
-verify((u'a' in (1,None,u'a')) == 1)
-verify(('a' in ('a',1,None)) == 1)
-verify(('a' in (1,None,'a')) == 1)
-verify(('a' in (1,None,u'a')) == 1)
-verify(('a' in ('x',1,u'y')) == 0)
-verify(('a' in ('x',1,None)) == 0)
+vereq(('a' in u'abdb'), True)
+vereq(('a' in u'bdab'), True)
+vereq(('a' in u'bdaba'), True)
+vereq(('a' in u'bdba'), True)
+vereq(('a' in u'bdba'), True)
+vereq((u'a' in u'bdba'), True)
+vereq((u'a' in u'bdb'), False)
+vereq((u'a' in 'bdb'), False)
+vereq((u'a' in 'bdba'), True)
+vereq((u'a' in ('a',1,None)), True)
+vereq((u'a' in (1,None,'a')), True)
+vereq((u'a' in (1,None,u'a')), True)
+vereq(('a' in ('a',1,None)), True)
+vereq(('a' in (1,None,'a')), True)
+vereq(('a' in (1,None,u'a')), True)
+vereq(('a' in ('x',1,u'y')), False)
+vereq(('a' in ('x',1,None)), False)
  print 'done.'
  
  # Formatting:
@@ -758,3 +758,42 @@ print u'abc\n',
  print u'def\n'
  print u'def\n'
  print 'done.'
+
+def test_exception(lhs, rhs, msg):
+    try:
+        lhs in rhs
+    except TypeError:
+        pass
+    else:
+        raise TestFailed, msg
+
+def run_contains_tests():
+    vereq(u'' in '', True)
+    vereq('' in u'', True)
+    vereq(u'' in u'', True)
+    vereq(u'' in 'abc', True)
+    vereq('' in u'abc', True)
+    vereq(u'' in u'abc', True)
+    vereq(u'\0' in 'abc', False)
+    vereq('\0' in u'abc', False)
+    vereq(u'\0' in u'abc', False)
+    vereq(u'\0' in '\0abc', True)
+    vereq('\0' in u'\0abc', True)
+    vereq(u'\0' in u'\0abc', True)
+    vereq(u'\0' in 'abc\0', True)
+    vereq('\0' in u'abc\0', True)
+    vereq(u'\0' in u'abc\0', True)
+    vereq(u'a' in '\0abc', True)
+    vereq('a' in u'\0abc', True)
+    vereq(u'a' in u'\0abc', True)
+    vereq(u'asdf' in 'asdf', True)
+    vereq('asdf' in u'asdf', True)
+    vereq(u'asdf' in u'asdf', True)
+    vereq(u'asdf' in 'asd', False)
+    vereq('asdf' in u'asd', False)
+    vereq(u'asdf' in u'asd', False)
+    vereq(u'asdf' in '', False)
+    vereq('asdf' in u'', False)
+    vereq(u'asdf' in u'', False)
+
+run_contains_tests()
diff --git a/Lib/test/test_userstring.py b/Lib/test/test_userstring.py

index 78af807b53dd970b1e7459a0a1da0f5afb77be81..5492f2e526d23122c686106d68cc71d008267d5b 100755 (executable)
--- a/Lib/test/test_userstring.py
+++ b/Lib/test/test_userstring.py
@@ -41,3 +41,4 @@ def test(methodname, input, output, *args):
          print (methodname, input, output, args, res[0], res[1], res[2])
  
  string_tests.run_method_tests(test)
+string_tests.run_contains_tests(test)
diff --git a/Objects/stringobject.c b/Objects/stringobject.c

index 3c1b303a503ed73c2155c9f561c33620c2be3d68..1d5277c0c78e3064b5a6b61c59037fad7a7bd441 100644 (file)
--- a/Objects/stringobject.c
+++ b/Objects/stringobject.c
@@ -803,24 +803,31 @@ string_slice(register PyStringObject *a, register int i, register int j)
  static int
  string_contains(PyObject *a, PyObject *el)
  {
-       register char *s, *end;
-       register char c;
+       const char *lhs, *rhs, *end;
+       int size;
  #ifdef Py_USING_UNICODE
         if (PyUnicode_Check(el))
                 return PyUnicode_Contains(a, el);
  #endif
-       if (!PyString_Check(el) || PyString_Size(el) != 1) {
+       if (!PyString_Check(el)) {
                 PyErr_SetString(PyExc_TypeError,
-                   "'in <string>' requires character as left operand");
+                             "'in <string>' requires string as left operand");
                 return -1;
         }
-       c = PyString_AsString(el)[0];
-       s = PyString_AsString(a);
-       end = s + PyString_Size(a);
-       while (s < end) {
-               if (c == *s++)
+       size = PyString_Size(el);
+       rhs = PyString_AS_STRING(el);
+       lhs = PyString_AS_STRING(a);
+
+       /* optimize for a single character */
+       if (size == 1)
+               return memchr(lhs, *rhs, PyString_Size(a)) != NULL;
+
+       end = lhs + (PyString_Size(a) - size);
+       while (lhs <= end) {
+               if (memcmp(lhs++, rhs, size) == 0)
                         return 1;
         }
+
         return 0;
  }
  
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c

index 6ca709b8d308894ffcd2f0529e89ddc4231456b7..a577bfd4d77f8e16e5708e5933285a1c27c16547 100644 (file)
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -3732,15 +3732,14 @@ int PyUnicode_Contains(PyObject *container,
                        PyObject *element)
  {
      PyUnicodeObject *u = NULL, *v = NULL;
-    int result;
-    register const Py_UNICODE *p, *e;
-    register Py_UNICODE ch;
+    int result, size;
+    register const Py_UNICODE *lhs, *end, *rhs;
  
      /* Coerce the two arguments */
      v = (PyUnicodeObject *)PyUnicode_FromObject(element);
      if (v == NULL) {
         PyErr_SetString(PyExc_TypeError,
-           "'in <string>' requires character as left operand");
+           "'in <string>' requires string as left operand");
         goto onError;
      }
      u = (PyUnicodeObject *)PyUnicode_FromObject(container);
@@ -3749,20 +3748,27 @@ int PyUnicode_Contains(PyObject *container,
         goto onError;
      }
  
-    /* Check v in u */
-    if (PyUnicode_GET_SIZE(v) != 1) {
-       PyErr_SetString(PyExc_TypeError,
-           "'in <string>' requires character as left operand");
-       goto onError;
-    }
-    ch = *PyUnicode_AS_UNICODE(v);
-    p = PyUnicode_AS_UNICODE(u);
-    e = p + PyUnicode_GET_SIZE(u);
+    size = PyUnicode_GET_SIZE(v);
+    rhs = PyUnicode_AS_UNICODE(v);
+    lhs = PyUnicode_AS_UNICODE(u);
+
      result = 0;
-    while (p < e) {
-       if (*p++ == ch) {
-           result = 1;
-           break;
+    if (size == 1) {
+       end = lhs + PyUnicode_GET_SIZE(u);
+       while (lhs < end) {
+           if (*lhs++ == *rhs) {
+               result = 1;
+               break;
+           }
+       }
+    }
+    else {
+       end = lhs + (PyUnicode_GET_SIZE(u) - size);
+       while (lhs <= end) {
+           if (memcmp(lhs++, rhs, size) == 0) {
+               result = 1;
+               break;
+           }
         }
      }
author	Barry Warsaw <barry@python.org>
	Tue, 6 Aug 2002 16:58:21 +0000 (16:58 +0000)
committer	Barry Warsaw <barry@python.org>
	Tue, 6 Aug 2002 16:58:21 +0000 (16:58 +0000)
Doc/lib/libstdtypes.tex		patch \| blob \| history
Lib/test/string_tests.py		patch \| blob \| history
Lib/test/test_contains.py		patch \| blob \| history
Lib/test/test_string.py		patch \| blob \| history
Lib/test/test_unicode.py		patch \| blob \| history
Lib/test/test_userstring.py		patch \| blob \| history
Objects/stringobject.c		patch \| blob \| history
Objects/unicodeobject.c		patch \| blob \| history