Minor code clean up and improvements in the re module.

author Serhiy Storchaka <storchaka@gmail.com>

Tue, 11 Nov 2014 19:13:28 +0000 (21:13 +0200)

committer Serhiy Storchaka <storchaka@gmail.com>

Tue, 11 Nov 2014 19:13:28 +0000 (21:13 +0200)
author Serhiy Storchaka <storchaka@gmail.com>
Tue, 11 Nov 2014 19:13:28 +0000 (21:13 +0200)
committer Serhiy Storchaka <storchaka@gmail.com>
Tue, 11 Nov 2014 19:13:28 +0000 (21:13 +0200)
diff --git a/Lib/re.py b/Lib/re.py

index a4de5cc3ef6fda513be595c0184e81614b13f9c8..788fa6bd789749a5161e887793f772217652999e 100644 (file)
--- a/Lib/re.py
+++ b/Lib/re.py
@@ -363,7 +363,7 @@ class Scanner:
          append = result.append
          match = self.scanner.scanner(string).match
          i = 0
-        while 1:
+        while True:
              m = match()
              if not m:
                  break
diff --git a/Lib/sre_compile.py b/Lib/sre_compile.py

index f5aef7a2e5f19992d29d63bb5e9b11176f61428c..1241a01c3eabc19f011a28542c2fea350b58c19e 100644 (file)
--- a/Lib/sre_compile.py
+++ b/Lib/sre_compile.py
@@ -16,11 +16,6 @@ from sre_constants import *
  
  assert _sre.MAGIC == MAGIC, "SRE module mismatch"
  
-if _sre.CODESIZE == 2:
-    MAXCODE = 65535
-else:
-    MAXCODE = 0xFFFFFFFF
-
  _LITERAL_CODES = {LITERAL, NOT_LITERAL}
  _REPEATING_CODES = {REPEAT, MIN_REPEAT, MAX_REPEAT}
  _SUCCESS_CODES = {SUCCESS, FAILURE}
@@ -191,7 +186,7 @@ def _compile(code, pattern, flags):
                  emit(JUMP)
                  tailappend(_len(code)); emit(0)
                  code[skip] = _len(code) - skip
-            emit(0) # end of branch
+            emit(FAILURE) # end of branch
              for tail in tail:
                  code[tail] = _len(code) - tail
          elif op is CATEGORY:
@@ -374,6 +369,7 @@ def _optimize_charset(charset, fixup, fixes):
      return out
  
  _CODEBITS = _sre.CODESIZE * 8
+MAXCODE = (1 << _CODEBITS) - 1
  _BITS_TRANS = b'0' + b'1' * 255
  def _mk_bitmap(bits, _CODEBITS=_CODEBITS, _int=int):
      s = bits.translate(_BITS_TRANS)[::-1]
@@ -477,9 +473,9 @@ def _compile_info(code, pattern, flags):
              elif op is IN:
                  charset = av
  ##     if prefix:
-##         print "*** PREFIX", prefix, prefix_skip
+##         print("*** PREFIX", prefix, prefix_skip)
  ##     if charset:
-##         print "*** CHARSET", charset
+##         print("*** CHARSET", charset)
      # add an info block
      emit = code.append
      emit(INFO)
@@ -489,9 +485,9 @@ def _compile_info(code, pattern, flags):
      if prefix:
          mask = SRE_INFO_PREFIX
          if len(prefix) == prefix_skip == len(pattern.data):
-            mask = mask + SRE_INFO_LITERAL
+            mask = mask | SRE_INFO_LITERAL
      elif charset:
-        mask = mask + SRE_INFO_CHARSET
+        mask = mask | SRE_INFO_CHARSET
      emit(mask)
      # pattern length
      if lo < MAXCODE:
diff --git a/Lib/sre_parse.py b/Lib/sre_parse.py

index aa2d64bb40c048d4316f3919bd43793b895d82a6..45411f89f16e9ff10a77738be76fbac34a46feec 100644 (file)
--- a/Lib/sre_parse.py
+++ b/Lib/sre_parse.py
@@ -103,18 +103,18 @@ class SubPattern:
          seqtypes = (tuple, list)
          for op, av in self.data:
              print(level*"  " + str(op), end='')
-            if op == IN:
+            if op is IN:
                  # member sublanguage
                  print()
                  for op, a in av:
                      print((level+1)*"  " + str(op), a)
-            elif op == BRANCH:
+            elif op is BRANCH:
                  print()
                  for i, a in enumerate(av[1]):
                      if i:
                          print(level*"  " + "OR")
                      a.dump(level+1)
-            elif op == GROUPREF_EXISTS:
+            elif op is GROUPREF_EXISTS:
                  condgroup, item_yes, item_no = av
                  print('', condgroup)
                  item_yes.dump(level+1)
@@ -607,7 +607,7 @@ def _parse(source, state):
                  item = subpattern[-1:]
              else:
                  item = None
-            if not item or (_len(item) == 1 and item[0][0] == AT):
+            if not item or (_len(item) == 1 and item[0][0] is AT):
                  raise source.error("nothing to repeat",
                                     source.tell() - here + len(this))
              if item[0][0] in _REPEATCODES:
diff --git a/Lib/test/test_re.py b/Lib/test/test_re.py

index b30abadd520d53663f6693064b8f0ec7543602a7..7bc1e935d4683b76d7db77a287c88ffa71741f16 100644 (file)
--- a/Lib/test/test_re.py
+++ b/Lib/test/test_re.py
@@ -1101,8 +1101,8 @@ class ReTests(unittest.TestCase):
  
      def test_inline_flags(self):
          # Bug #1700
-        upper_char = chr(0x1ea0) # Latin Capital Letter A with Dot Bellow
-        lower_char = chr(0x1ea1) # Latin Small Letter A with Dot Bellow
+        upper_char = '\u1ea0' # Latin Capital Letter A with Dot Below
+        lower_char = '\u1ea1' # Latin Small Letter A with Dot Below
  
          p = re.compile(upper_char, re.I | re.U)
          q = p.match(lower_char)
author	Serhiy Storchaka <storchaka@gmail.com>
	Tue, 11 Nov 2014 19:13:28 +0000 (21:13 +0200)
committer	Serhiy Storchaka <storchaka@gmail.com>
	Tue, 11 Nov 2014 19:13:28 +0000 (21:13 +0200)
Lib/re.py		patch \| blob \| history
Lib/sre_compile.py		patch \| blob \| history
Lib/sre_parse.py		patch \| blob \| history
Lib/test/test_re.py		patch \| blob \| history