The problem is in sre_compile.py: the call to
_compile_charset near the end of _compile_info forgets to
pass in the flags, so that the info charset is not compiled
with re.U. (The info charset is used when searching to find
the first character at which a match could start; it is not
generated for patterns beginning with a repeat like '\w{1}'.)
table[i+1] = table[table[i+1]-1]+1
code.extend(table[1:]) # don't store first entry
elif charset:
- _compile_charset(charset, 0, code)
+ _compile_charset(charset, flags, code)
code[skip] = len(code) - skip
STRING_TYPES = [type("")]
# bug 410271: \b broken under locales
(r'\b.\b', 'a', SUCCEED, 'found', 'a'),
(r'(?u)\b.\b', u, SUCCEED, 'found', u),
+ (r'(?u)\w', u, SUCCEED, 'found', u),
])